radeonsi: make NGG streamout output primitive type known at compile time

This compiles an optimized shader variant for NGG streamout where the output
primitive is known at compile time. This allows putting stores for all
vertices into the same VMEM clause.

Reviewed-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32713>
This commit is contained in:
Marek Olšák 2024-12-17 01:49:38 -05:00 committed by Marge Bot
parent 5003465c42
commit 8440184dfd
5 changed files with 42 additions and 8 deletions

View file

@ -787,6 +787,7 @@ struct si_streamout_target {
struct si_streamout {
enum mesa_prim output_prim;
uint8_t num_verts_per_prim;
bool begin_emitted;
unsigned enabled_mask;

View file

@ -1594,13 +1594,15 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
if ((stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_TESS_EVAL ||
stage == MESA_SHADER_VERTEX) &&
!key->ge.as_es && !key->ge.as_ls) {
fprintf(f, " opt.kill_outputs = 0x%" PRIx64 "\n", key->ge.opt.kill_outputs);
fprintf(f, " opt.kill_pointsize = 0x%x\n", key->ge.opt.kill_pointsize);
fprintf(f, " opt.kill_layer = 0x%x\n", key->ge.opt.kill_layer);
fprintf(f, " opt.kill_clip_distances = 0x%x\n", key->ge.opt.kill_clip_distances);
fprintf(f, " opt.ngg_culling = 0x%x\n", key->ge.opt.ngg_culling);
fprintf(f, " opt.remove_streamout = 0x%x\n", key->ge.opt.remove_streamout);
fprintf(f, " mono.remove_streamout = 0x%x\n", key->ge.mono.remove_streamout);
fprintf(f, " opt.kill_outputs = 0x%" PRIx64 "\n", key->ge.opt.kill_outputs);
fprintf(f, " opt.kill_clip_distances = 0x%x\n", key->ge.opt.kill_clip_distances);
fprintf(f, " opt.kill_pointsize = %u\n", key->ge.opt.kill_pointsize);
fprintf(f, " opt.kill_layer = %u\n", key->ge.opt.kill_layer);
fprintf(f, " opt.remove_streamout = %u\n", key->ge.opt.remove_streamout);
fprintf(f, " opt.ngg_culling = 0x%x\n", key->ge.opt.ngg_culling);
fprintf(f, " opt.ngg_vs_streamout_num_verts_per_prim = %u\n",
key->ge.opt.ngg_vs_streamout_num_verts_per_prim);
}
if (stage <= MESA_SHADER_GEOMETRY)

View file

@ -780,6 +780,12 @@ struct si_shader_key_ge {
/* For NGG VS and TES. */
unsigned ngg_culling : 11; /* SI_NGG_CULL_* */
/* If NGG VS streamout knows the number of vertices per primitive at compile time,
* it can put stores for all vertices in the same VMEM clause, instead of storing
* vertices for the 2nd and 3rd vertex conditionally because the primitive type is
* unknown.
*/
unsigned ngg_vs_streamout_num_verts_per_prim : 2;
/* For shaders where monolithic variants have better code.
*

View file

@ -1369,6 +1369,15 @@ unsigned si_get_input_prim(const struct si_shader_selector *gs, const union si_s
if (key->ge.opt.ngg_culling & SI_NGG_CULL_VS_LINES)
return MESA_PRIM_LINES;
switch (key->ge.opt.ngg_vs_streamout_num_verts_per_prim) {
case 3:
return MESA_PRIM_TRIANGLES;
case 2:
return MESA_PRIM_LINES;
case 1:
return MESA_PRIM_POINTS;
}
if (return_unknown)
return MESA_PRIM_UNKNOWN;
else
@ -2525,8 +2534,21 @@ static void si_get_vs_key_outputs(struct si_context *sctx, struct si_shader_sele
key->ge.opt.ngg_culling = sctx->ngg_culling;
key->ge.mono.u.vs_export_prim_id = vs->stage != MESA_SHADER_GEOMETRY &&
sctx->shader.ps.cso && sctx->shader.ps.cso->info.uses_primid;
key->ge.opt.remove_streamout = vs->info.enabled_streamout_buffer_mask &&
!sctx->streamout.enabled_mask;
if (vs->info.enabled_streamout_buffer_mask) {
if (sctx->streamout.enabled_mask) {
key->ge.opt.remove_streamout = 0;
key->ge.opt.ngg_vs_streamout_num_verts_per_prim =
sctx->gfx_level >= GFX11 ? sctx->streamout.num_verts_per_prim : 0;
} else {
key->ge.opt.remove_streamout = 1;
key->ge.opt.ngg_vs_streamout_num_verts_per_prim = 0;
}
} else {
key->ge.opt.remove_streamout = 0;
key->ge.opt.ngg_vs_streamout_num_verts_per_prim = 0;
}
if (sctx->gfx_level >= GFX12)
key->ge.mono.remove_streamout = key->ge.opt.remove_streamout;
}
@ -2538,6 +2560,7 @@ static void si_clear_vs_key_outputs(struct si_context *sctx, struct si_shader_se
key->ge.opt.kill_outputs = 0;
key->ge.opt.remove_streamout = 0;
key->ge.opt.ngg_culling = 0;
key->ge.opt.ngg_vs_streamout_num_verts_per_prim = 0;
key->ge.mono.u.vs_export_prim_id = 0;
key->ge.mono.remove_streamout = 0;
}

View file

@ -218,6 +218,8 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ
sctx->do_update_shaders = true; /* to keep/remove streamout shader code as an optimization */
sctx->streamout.output_prim = output_prim;
sctx->streamout.num_verts_per_prim = output_prim == MESA_PRIM_UNKNOWN ?
0 : mesa_vertices_per_prim(output_prim);
sctx->streamout.num_targets = num_targets;
sctx->streamout.enabled_mask = enabled_mask;
sctx->streamout.append_bitmask = append_bitmask;