radeonsi: gather VS system value usage from shader variants

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34492>
This commit is contained in:
Marek Olšák 2025-04-10 18:02:23 -04:00 committed by Marge Bot
parent 3b276e4ba6
commit b85984d2b5
8 changed files with 58 additions and 67 deletions

View file

@ -110,16 +110,10 @@ retry_select_mode:
}
} else {
/* VS and TES. */
bool uses_instance_id = gs_sel->info.uses_instanceid;
bool uses_primitive_id = gs_sel->info.uses_primid;
if (gs_stage == MESA_SHADER_VERTEX) {
uses_instance_id |=
shader->key.ge.mono.instance_divisor_is_one ||
shader->key.ge.mono.instance_divisor_is_fetched;
} else {
if (gs_stage == MESA_SHADER_TESS_EVAL)
uses_primitive_id |= shader->key.ge.mono.u.vs_export_prim_id;
}
esvert_lds_size = ac_ngg_nogs_get_pervertex_lds_size(
gs_stage, gs_sel->info.num_outputs,
@ -127,7 +121,7 @@ retry_select_mode:
shader->key.ge.mono.u.vs_export_prim_id,
gfx10_ngg_writes_user_edgeflags(shader),
si_shader_culling_enabled(shader),
uses_instance_id,
shader->info.uses_instance_id,
uses_primitive_id) / 4;
}

View file

@ -48,11 +48,8 @@ get_vertex_index(nir_builder *b, int input_index, struct lower_vs_inputs_state *
if (divisor_is_one || divisor_is_fetched) {
nir_def *instance_id = nir_load_instance_id(b);
/* This is used to determine vs vgpr count in si_get_vs_vgpr_comp_cnt(). */
s->shader->info.uses_instanceid = true;
nir_def *index = NULL;
if (divisor_is_one) {
index = instance_id;
} else {

View file

@ -1091,6 +1091,7 @@ struct si_context {
bool compute_image_sgprs_dirty;
bool vs_uses_base_instance;
bool vs_uses_draw_id;
bool vs_uses_vs_state_indexed;
uint8_t patch_vertices;
bool has_tessellation; /* whether si_screen::tess_rings* are valid */
@ -1835,6 +1836,18 @@ si_get_vs_inline(struct si_context *sctx, enum si_has_tess has_tess, enum si_has
return &sctx->shader.vs;
}
static ALWAYS_INLINE struct si_shader *
si_get_api_vs_inline(struct si_context *sctx, enum amd_gfx_level gfx_level,
enum si_has_tess has_tess, enum si_has_gs has_gs)
{
if (gfx_level >= GFX9 && has_tess)
return sctx->queued.named.hs; /* this can also be the passthrough TCS */
else if (gfx_level >= GFX9 && has_gs)
return sctx->shader.gs.current;
else
return sctx->shader.vs.current;
}
static inline struct si_shader_ctx_state *si_get_vs(struct si_context *sctx)
{
return si_get_vs_inline(sctx, sctx->shader.tes.cso ? TESS_ON : TESS_OFF,

View file

@ -2762,6 +2762,18 @@ si_get_shader_variant_info(struct si_shader *shader, nir_shader *nir)
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_instance_id:
shader->info.uses_instance_id = true;
break;
case nir_intrinsic_load_base_vertex:
shader->info.uses_vs_state_indexed = true;
break;
case nir_intrinsic_load_base_instance:
shader->info.uses_base_instance = true;
break;
case nir_intrinsic_load_draw_id:
shader->info.uses_draw_id = true;
break;
case nir_intrinsic_load_frag_coord:
case nir_intrinsic_load_sample_pos:
frag_coord_mask |= nir_def_components_read(&intr->def);
@ -2770,8 +2782,19 @@ si_get_shader_variant_info(struct si_shader *shader, nir_shader *nir)
case nir_intrinsic_load_input_vertex:
case nir_intrinsic_load_per_vertex_input:
case nir_intrinsic_load_interpolated_input: {
if (nir->info.stage == MESA_SHADER_VERTEX ||
nir->info.stage == MESA_SHADER_TESS_EVAL) {
if (nir->info.stage == MESA_SHADER_VERTEX) {
shader->info.uses_vmem_load_other = true;
if (intr->intrinsic == nir_intrinsic_load_input) {
if ((shader->key.ge.mono.instance_divisor_is_one |
shader->key.ge.mono.instance_divisor_is_fetched) &
BITFIELD_BIT(nir_intrinsic_base(intr))) {
/* Instanced attribs. */
shader->info.uses_instance_id = true;
shader->info.uses_base_instance = true;
}
}
} else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
shader->info.uses_vmem_load_other = true;
} else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
@ -2957,18 +2980,6 @@ static void get_nir_shaders(struct si_shader *shader, struct si_linked_shaders *
/* TODO: run linking optimizations here if we have LS+HS or ES+GS */
if (shader->selector->stage <= MESA_SHADER_GEOMETRY) {
shader->info.uses_instanceid |=
shader->key.ge.mono.instance_divisor_is_one ||
shader->key.ge.mono.instance_divisor_is_fetched;
if (linked->producer.nir) {
shader->info.uses_instanceid |=
linked->producer.shader->selector->info.uses_instanceid ||
linked->producer.shader->info.uses_instanceid;
}
}
/* Remove holes after removed PS inputs by renumbering them. Holes can only occur with
* monolithic PS.
*/
@ -3138,8 +3149,6 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
shader->info.vs_output_ps_input_cntl[i] = SI_PS_INPUT_CNTL_UNUSED;
shader->info.vs_output_ps_input_cntl[VARYING_SLOT_COL0] = SI_PS_INPUT_CNTL_UNUSED_COLOR0;
/* uses_instanceid may be set by si_nir_lower_vs_inputs(). */
shader->info.uses_instanceid |= sel->info.uses_instanceid;
shader->info.private_mem_vgprs = DIV_ROUND_UP(nir->scratch_size, 4);
/* Set the FP ALU behavior. */
@ -3744,9 +3753,13 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
shader->config.scratch_bytes_per_wave =
MAX2(shader->config.scratch_bytes_per_wave,
shader->previous_stage->config.scratch_bytes_per_wave);
shader->info.uses_instanceid |= shader->previous_stage->info.uses_instanceid;
shader->info.uses_vmem_load_other |= shader->previous_stage->info.uses_vmem_load_other;
shader->info.uses_vmem_sampler_or_bvh |= shader->previous_stage->info.uses_vmem_sampler_or_bvh;
shader->info.uses_instance_id |= shader->previous_stage->info.uses_instance_id;
shader->info.uses_base_instance |= shader->previous_stage->info.uses_base_instance;
shader->info.uses_draw_id |= shader->previous_stage->info.uses_draw_id;
shader->info.uses_vs_state_indexed |= shader->previous_stage->info.uses_vs_state_indexed;
}
if (shader->epilog) {
shader->config.num_sgprs =
@ -3783,22 +3796,6 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
(si_shader_uses_streamout(shader) ||
shader->uses_vs_state_provoking_vertex);
if (sel->stage == MESA_SHADER_VERTEX) {
shader->uses_base_instance = sel->info.uses_base_instance ||
shader->key.ge.mono.instance_divisor_is_one ||
shader->key.ge.mono.instance_divisor_is_fetched;
} else if (sel->stage == MESA_SHADER_TESS_CTRL) {
shader->uses_base_instance = shader->previous_stage_sel &&
(shader->previous_stage_sel->info.uses_base_instance ||
shader->key.ge.mono.instance_divisor_is_one ||
shader->key.ge.mono.instance_divisor_is_fetched);
} else if (sel->stage == MESA_SHADER_GEOMETRY) {
shader->uses_base_instance = shader->previous_stage_sel &&
(shader->previous_stage_sel->info.uses_base_instance ||
shader->key.ge.mono.instance_divisor_is_one ||
shader->key.ge.mono.instance_divisor_is_fetched);
}
si_fix_resource_usage(sscreen, shader);
/* Upload. */

View file

@ -539,10 +539,6 @@ struct si_shader_info {
bool uses_linear_sample;
bool uses_interp_at_offset;
bool uses_interp_at_sample;
bool uses_instanceid;
bool uses_base_vertex;
bool uses_base_instance;
bool uses_drawid;
bool uses_primid;
bool uses_frontface;
bool uses_invocationid;
@ -876,7 +872,10 @@ struct si_shader_binary_info {
bool uses_vmem_sampler_or_bvh : 1;
bool has_non_uniform_tex_access : 1;
bool has_shadow_comparison : 1;
bool uses_instanceid : 1;
bool uses_instance_id : 1;
bool uses_base_instance : 1;
bool uses_draw_id : 1;
bool uses_vs_state_indexed : 1; /* VS_STATE_INDEXED */
uint8_t nr_pos_exports;
uint8_t nr_param_exports;
unsigned private_mem_vgprs;
@ -957,8 +956,6 @@ struct si_shader {
bool uses_vs_state_provoking_vertex;
bool uses_gs_state_outprim;
bool uses_base_instance;
/* Shader key + LLVM IR + disassembly + statistics.
* Generated for debug contexts only.
*/

View file

@ -459,9 +459,6 @@ void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir,
info->uses_frontface = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE) |
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE_FSIGN);
info->uses_instanceid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
info->uses_base_vertex = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX);
info->uses_base_instance = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
info->uses_invocationid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID);
info->uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS);
info->uses_tg_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS);
@ -471,7 +468,6 @@ void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir,
si_should_clear_lds(sscreen, nir);
}
info->uses_variable_block_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_SIZE);
info->uses_drawid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
info->uses_primid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||
nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID;
info->reads_samplemask = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);

View file

@ -159,12 +159,10 @@ static bool si_update_shaders(struct si_context *sctx)
}
}
if (GFX_VERSION >= GFX9 && HAS_TESS)
sctx->vs_uses_base_instance = sctx->queued.named.hs->uses_base_instance;
else if (GFX_VERSION >= GFX9 && HAS_GS)
sctx->vs_uses_base_instance = sctx->shader.gs.current->uses_base_instance;
else
sctx->vs_uses_base_instance = sctx->shader.vs.current->uses_base_instance;
struct si_shader *api_vs = si_get_api_vs_inline(sctx, GFX_VERSION, HAS_TESS, HAS_GS);
sctx->vs_uses_base_instance = api_vs->info.uses_base_instance;
sctx->vs_uses_draw_id = api_vs->info.uses_draw_id;
sctx->vs_uses_vs_state_indexed = api_vs->info.uses_vs_state_indexed;
/* Update VGT_SHADER_STAGES_EN. */
uint32_t vgt_stages = 0;
@ -1002,7 +1000,7 @@ static void si_emit_vs_state(struct si_context *sctx, unsigned index_size)
unsigned vs_state = sctx->current_vs_state; /* all VS bits */
unsigned gs_state = sctx->current_gs_state; /* only GS and NGG bits; VS bits will be copied here */
if (sctx->shader.vs.cso->info.uses_base_vertex && index_size)
if (sctx->vs_uses_vs_state_indexed && index_size)
vs_state |= ENCODE_FIELD(VS_STATE_INDEXED, 1);
/* Copy all state bits from vs_state to gs_state. */
@ -1473,7 +1471,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit((sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit((sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(((sh_base_reg + SI_SGPR_DRAWID * 4 - SI_SH_REG_OFFSET) >> 2) |
S_2C3_DRAW_INDEX_ENABLE(sctx->shader.vs.cso->info.uses_drawid) |
S_2C3_DRAW_INDEX_ENABLE(sctx->vs_uses_draw_id) |
S_2C3_COUNT_INDIRECT_ENABLE(!!indirect->indirect_draw_count));
radeon_emit(indirect->draw_count);
radeon_emit(count_va);

View file

@ -660,7 +660,7 @@ static unsigned si_get_vs_vgpr_comp_cnt(struct si_screen *sscreen, struct si_sha
bool is_ls = shader->selector->stage == MESA_SHADER_TESS_CTRL || shader->key.ge.as_ls;
unsigned max = 0;
if (shader->info.uses_instanceid) {
if (shader->info.uses_instance_id) {
if (sscreen->info.gfx_level >= GFX12)
max = MAX2(max, 1);
else if (sscreen->info.gfx_level >= GFX10)
@ -3836,7 +3836,6 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
sctx->shader.vs.key.ge.use_aco = sel ? sel->info.base.use_aco_amd : 0;
#endif
sctx->num_vs_blit_sgprs = sel ? sel->info.base.vs.blit_sgprs_amd : 0;
sctx->vs_uses_draw_id = sel ? sel->info.uses_drawid : false;
if (old_uses_vbos != new_uses_vbos) {
sctx->num_vertex_elements = new_uses_vbos ? sctx->vertex_elements->count : 0;