mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-07 06:30:11 +01:00
radeonsi: make NGG streamout output primitive type known at compile time
This compiles an optimized shader variant for NGG streamout where the output primitive is known at compile time. This allows putting stores for all vertices into the same VMEM clause. Reviewed-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32713>
This commit is contained in:
parent
5003465c42
commit
8440184dfd
5 changed files with 42 additions and 8 deletions
|
|
@ -787,6 +787,7 @@ struct si_streamout_target {
|
|||
|
||||
struct si_streamout {
|
||||
enum mesa_prim output_prim;
|
||||
uint8_t num_verts_per_prim;
|
||||
bool begin_emitted;
|
||||
|
||||
unsigned enabled_mask;
|
||||
|
|
|
|||
|
|
@ -1594,13 +1594,15 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
|
|||
if ((stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_TESS_EVAL ||
|
||||
stage == MESA_SHADER_VERTEX) &&
|
||||
!key->ge.as_es && !key->ge.as_ls) {
|
||||
fprintf(f, " opt.kill_outputs = 0x%" PRIx64 "\n", key->ge.opt.kill_outputs);
|
||||
fprintf(f, " opt.kill_pointsize = 0x%x\n", key->ge.opt.kill_pointsize);
|
||||
fprintf(f, " opt.kill_layer = 0x%x\n", key->ge.opt.kill_layer);
|
||||
fprintf(f, " opt.kill_clip_distances = 0x%x\n", key->ge.opt.kill_clip_distances);
|
||||
fprintf(f, " opt.ngg_culling = 0x%x\n", key->ge.opt.ngg_culling);
|
||||
fprintf(f, " opt.remove_streamout = 0x%x\n", key->ge.opt.remove_streamout);
|
||||
fprintf(f, " mono.remove_streamout = 0x%x\n", key->ge.mono.remove_streamout);
|
||||
fprintf(f, " opt.kill_outputs = 0x%" PRIx64 "\n", key->ge.opt.kill_outputs);
|
||||
fprintf(f, " opt.kill_clip_distances = 0x%x\n", key->ge.opt.kill_clip_distances);
|
||||
fprintf(f, " opt.kill_pointsize = %u\n", key->ge.opt.kill_pointsize);
|
||||
fprintf(f, " opt.kill_layer = %u\n", key->ge.opt.kill_layer);
|
||||
fprintf(f, " opt.remove_streamout = %u\n", key->ge.opt.remove_streamout);
|
||||
fprintf(f, " opt.ngg_culling = 0x%x\n", key->ge.opt.ngg_culling);
|
||||
fprintf(f, " opt.ngg_vs_streamout_num_verts_per_prim = %u\n",
|
||||
key->ge.opt.ngg_vs_streamout_num_verts_per_prim);
|
||||
}
|
||||
|
||||
if (stage <= MESA_SHADER_GEOMETRY)
|
||||
|
|
|
|||
|
|
@ -780,6 +780,12 @@ struct si_shader_key_ge {
|
|||
/* For NGG VS and TES. */
|
||||
unsigned ngg_culling : 11; /* SI_NGG_CULL_* */
|
||||
|
||||
/* If NGG VS streamout knows the number of vertices per primitive at compile time,
|
||||
* it can put stores for all vertices in the same VMEM clause, instead of storing
|
||||
* vertices for the 2nd and 3rd vertex conditionally because the primitive type is
|
||||
* unknown.
|
||||
*/
|
||||
unsigned ngg_vs_streamout_num_verts_per_prim : 2;
|
||||
|
||||
/* For shaders where monolithic variants have better code.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -1369,6 +1369,15 @@ unsigned si_get_input_prim(const struct si_shader_selector *gs, const union si_s
|
|||
if (key->ge.opt.ngg_culling & SI_NGG_CULL_VS_LINES)
|
||||
return MESA_PRIM_LINES;
|
||||
|
||||
switch (key->ge.opt.ngg_vs_streamout_num_verts_per_prim) {
|
||||
case 3:
|
||||
return MESA_PRIM_TRIANGLES;
|
||||
case 2:
|
||||
return MESA_PRIM_LINES;
|
||||
case 1:
|
||||
return MESA_PRIM_POINTS;
|
||||
}
|
||||
|
||||
if (return_unknown)
|
||||
return MESA_PRIM_UNKNOWN;
|
||||
else
|
||||
|
|
@ -2525,8 +2534,21 @@ static void si_get_vs_key_outputs(struct si_context *sctx, struct si_shader_sele
|
|||
key->ge.opt.ngg_culling = sctx->ngg_culling;
|
||||
key->ge.mono.u.vs_export_prim_id = vs->stage != MESA_SHADER_GEOMETRY &&
|
||||
sctx->shader.ps.cso && sctx->shader.ps.cso->info.uses_primid;
|
||||
key->ge.opt.remove_streamout = vs->info.enabled_streamout_buffer_mask &&
|
||||
!sctx->streamout.enabled_mask;
|
||||
|
||||
if (vs->info.enabled_streamout_buffer_mask) {
|
||||
if (sctx->streamout.enabled_mask) {
|
||||
key->ge.opt.remove_streamout = 0;
|
||||
key->ge.opt.ngg_vs_streamout_num_verts_per_prim =
|
||||
sctx->gfx_level >= GFX11 ? sctx->streamout.num_verts_per_prim : 0;
|
||||
} else {
|
||||
key->ge.opt.remove_streamout = 1;
|
||||
key->ge.opt.ngg_vs_streamout_num_verts_per_prim = 0;
|
||||
}
|
||||
} else {
|
||||
key->ge.opt.remove_streamout = 0;
|
||||
key->ge.opt.ngg_vs_streamout_num_verts_per_prim = 0;
|
||||
}
|
||||
|
||||
if (sctx->gfx_level >= GFX12)
|
||||
key->ge.mono.remove_streamout = key->ge.opt.remove_streamout;
|
||||
}
|
||||
|
|
@ -2538,6 +2560,7 @@ static void si_clear_vs_key_outputs(struct si_context *sctx, struct si_shader_se
|
|||
key->ge.opt.kill_outputs = 0;
|
||||
key->ge.opt.remove_streamout = 0;
|
||||
key->ge.opt.ngg_culling = 0;
|
||||
key->ge.opt.ngg_vs_streamout_num_verts_per_prim = 0;
|
||||
key->ge.mono.u.vs_export_prim_id = 0;
|
||||
key->ge.mono.remove_streamout = 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -218,6 +218,8 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ
|
|||
sctx->do_update_shaders = true; /* to keep/remove streamout shader code as an optimization */
|
||||
|
||||
sctx->streamout.output_prim = output_prim;
|
||||
sctx->streamout.num_verts_per_prim = output_prim == MESA_PRIM_UNKNOWN ?
|
||||
0 : mesa_vertices_per_prim(output_prim);
|
||||
sctx->streamout.num_targets = num_targets;
|
||||
sctx->streamout.enabled_mask = enabled_mask;
|
||||
sctx->streamout.append_bitmask = append_bitmask;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue