mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 06:58:05 +02:00
radeonsi: gather VS system value usage from shader variants
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34492>
This commit is contained in:
parent
3b276e4ba6
commit
b85984d2b5
8 changed files with 58 additions and 67 deletions
|
|
@ -110,16 +110,10 @@ retry_select_mode:
|
|||
}
|
||||
} else {
|
||||
/* VS and TES. */
|
||||
|
||||
bool uses_instance_id = gs_sel->info.uses_instanceid;
|
||||
bool uses_primitive_id = gs_sel->info.uses_primid;
|
||||
if (gs_stage == MESA_SHADER_VERTEX) {
|
||||
uses_instance_id |=
|
||||
shader->key.ge.mono.instance_divisor_is_one ||
|
||||
shader->key.ge.mono.instance_divisor_is_fetched;
|
||||
} else {
|
||||
|
||||
if (gs_stage == MESA_SHADER_TESS_EVAL)
|
||||
uses_primitive_id |= shader->key.ge.mono.u.vs_export_prim_id;
|
||||
}
|
||||
|
||||
esvert_lds_size = ac_ngg_nogs_get_pervertex_lds_size(
|
||||
gs_stage, gs_sel->info.num_outputs,
|
||||
|
|
@ -127,7 +121,7 @@ retry_select_mode:
|
|||
shader->key.ge.mono.u.vs_export_prim_id,
|
||||
gfx10_ngg_writes_user_edgeflags(shader),
|
||||
si_shader_culling_enabled(shader),
|
||||
uses_instance_id,
|
||||
shader->info.uses_instance_id,
|
||||
uses_primitive_id) / 4;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -48,11 +48,8 @@ get_vertex_index(nir_builder *b, int input_index, struct lower_vs_inputs_state *
|
|||
|
||||
if (divisor_is_one || divisor_is_fetched) {
|
||||
nir_def *instance_id = nir_load_instance_id(b);
|
||||
|
||||
/* This is used to determine vs vgpr count in si_get_vs_vgpr_comp_cnt(). */
|
||||
s->shader->info.uses_instanceid = true;
|
||||
|
||||
nir_def *index = NULL;
|
||||
|
||||
if (divisor_is_one) {
|
||||
index = instance_id;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -1091,6 +1091,7 @@ struct si_context {
|
|||
bool compute_image_sgprs_dirty;
|
||||
bool vs_uses_base_instance;
|
||||
bool vs_uses_draw_id;
|
||||
bool vs_uses_vs_state_indexed;
|
||||
uint8_t patch_vertices;
|
||||
bool has_tessellation; /* whether si_screen::tess_rings* are valid */
|
||||
|
||||
|
|
@ -1835,6 +1836,18 @@ si_get_vs_inline(struct si_context *sctx, enum si_has_tess has_tess, enum si_has
|
|||
return &sctx->shader.vs;
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE struct si_shader *
|
||||
si_get_api_vs_inline(struct si_context *sctx, enum amd_gfx_level gfx_level,
|
||||
enum si_has_tess has_tess, enum si_has_gs has_gs)
|
||||
{
|
||||
if (gfx_level >= GFX9 && has_tess)
|
||||
return sctx->queued.named.hs; /* this can also be the passthrough TCS */
|
||||
else if (gfx_level >= GFX9 && has_gs)
|
||||
return sctx->shader.gs.current;
|
||||
else
|
||||
return sctx->shader.vs.current;
|
||||
}
|
||||
|
||||
static inline struct si_shader_ctx_state *si_get_vs(struct si_context *sctx)
|
||||
{
|
||||
return si_get_vs_inline(sctx, sctx->shader.tes.cso ? TESS_ON : TESS_OFF,
|
||||
|
|
|
|||
|
|
@ -2762,6 +2762,18 @@ si_get_shader_variant_info(struct si_shader *shader, nir_shader *nir)
|
|||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_instance_id:
|
||||
shader->info.uses_instance_id = true;
|
||||
break;
|
||||
case nir_intrinsic_load_base_vertex:
|
||||
shader->info.uses_vs_state_indexed = true;
|
||||
break;
|
||||
case nir_intrinsic_load_base_instance:
|
||||
shader->info.uses_base_instance = true;
|
||||
break;
|
||||
case nir_intrinsic_load_draw_id:
|
||||
shader->info.uses_draw_id = true;
|
||||
break;
|
||||
case nir_intrinsic_load_frag_coord:
|
||||
case nir_intrinsic_load_sample_pos:
|
||||
frag_coord_mask |= nir_def_components_read(&intr->def);
|
||||
|
|
@ -2770,8 +2782,19 @@ si_get_shader_variant_info(struct si_shader *shader, nir_shader *nir)
|
|||
case nir_intrinsic_load_input_vertex:
|
||||
case nir_intrinsic_load_per_vertex_input:
|
||||
case nir_intrinsic_load_interpolated_input: {
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX ||
|
||||
nir->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
shader->info.uses_vmem_load_other = true;
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_load_input) {
|
||||
if ((shader->key.ge.mono.instance_divisor_is_one |
|
||||
shader->key.ge.mono.instance_divisor_is_fetched) &
|
||||
BITFIELD_BIT(nir_intrinsic_base(intr))) {
|
||||
/* Instanced attribs. */
|
||||
shader->info.uses_instance_id = true;
|
||||
shader->info.uses_base_instance = true;
|
||||
}
|
||||
}
|
||||
} else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
shader->info.uses_vmem_load_other = true;
|
||||
} else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
|
|
@ -2957,18 +2980,6 @@ static void get_nir_shaders(struct si_shader *shader, struct si_linked_shaders *
|
|||
|
||||
/* TODO: run linking optimizations here if we have LS+HS or ES+GS */
|
||||
|
||||
if (shader->selector->stage <= MESA_SHADER_GEOMETRY) {
|
||||
shader->info.uses_instanceid |=
|
||||
shader->key.ge.mono.instance_divisor_is_one ||
|
||||
shader->key.ge.mono.instance_divisor_is_fetched;
|
||||
|
||||
if (linked->producer.nir) {
|
||||
shader->info.uses_instanceid |=
|
||||
linked->producer.shader->selector->info.uses_instanceid ||
|
||||
linked->producer.shader->info.uses_instanceid;
|
||||
}
|
||||
}
|
||||
|
||||
/* Remove holes after removed PS inputs by renumbering them. Holes can only occur with
|
||||
* monolithic PS.
|
||||
*/
|
||||
|
|
@ -3138,8 +3149,6 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
|
|||
shader->info.vs_output_ps_input_cntl[i] = SI_PS_INPUT_CNTL_UNUSED;
|
||||
shader->info.vs_output_ps_input_cntl[VARYING_SLOT_COL0] = SI_PS_INPUT_CNTL_UNUSED_COLOR0;
|
||||
|
||||
/* uses_instanceid may be set by si_nir_lower_vs_inputs(). */
|
||||
shader->info.uses_instanceid |= sel->info.uses_instanceid;
|
||||
shader->info.private_mem_vgprs = DIV_ROUND_UP(nir->scratch_size, 4);
|
||||
|
||||
/* Set the FP ALU behavior. */
|
||||
|
|
@ -3744,9 +3753,13 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
|
|||
shader->config.scratch_bytes_per_wave =
|
||||
MAX2(shader->config.scratch_bytes_per_wave,
|
||||
shader->previous_stage->config.scratch_bytes_per_wave);
|
||||
shader->info.uses_instanceid |= shader->previous_stage->info.uses_instanceid;
|
||||
|
||||
shader->info.uses_vmem_load_other |= shader->previous_stage->info.uses_vmem_load_other;
|
||||
shader->info.uses_vmem_sampler_or_bvh |= shader->previous_stage->info.uses_vmem_sampler_or_bvh;
|
||||
shader->info.uses_instance_id |= shader->previous_stage->info.uses_instance_id;
|
||||
shader->info.uses_base_instance |= shader->previous_stage->info.uses_base_instance;
|
||||
shader->info.uses_draw_id |= shader->previous_stage->info.uses_draw_id;
|
||||
shader->info.uses_vs_state_indexed |= shader->previous_stage->info.uses_vs_state_indexed;
|
||||
}
|
||||
if (shader->epilog) {
|
||||
shader->config.num_sgprs =
|
||||
|
|
@ -3783,22 +3796,6 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
|
|||
(si_shader_uses_streamout(shader) ||
|
||||
shader->uses_vs_state_provoking_vertex);
|
||||
|
||||
if (sel->stage == MESA_SHADER_VERTEX) {
|
||||
shader->uses_base_instance = sel->info.uses_base_instance ||
|
||||
shader->key.ge.mono.instance_divisor_is_one ||
|
||||
shader->key.ge.mono.instance_divisor_is_fetched;
|
||||
} else if (sel->stage == MESA_SHADER_TESS_CTRL) {
|
||||
shader->uses_base_instance = shader->previous_stage_sel &&
|
||||
(shader->previous_stage_sel->info.uses_base_instance ||
|
||||
shader->key.ge.mono.instance_divisor_is_one ||
|
||||
shader->key.ge.mono.instance_divisor_is_fetched);
|
||||
} else if (sel->stage == MESA_SHADER_GEOMETRY) {
|
||||
shader->uses_base_instance = shader->previous_stage_sel &&
|
||||
(shader->previous_stage_sel->info.uses_base_instance ||
|
||||
shader->key.ge.mono.instance_divisor_is_one ||
|
||||
shader->key.ge.mono.instance_divisor_is_fetched);
|
||||
}
|
||||
|
||||
si_fix_resource_usage(sscreen, shader);
|
||||
|
||||
/* Upload. */
|
||||
|
|
|
|||
|
|
@ -539,10 +539,6 @@ struct si_shader_info {
|
|||
bool uses_linear_sample;
|
||||
bool uses_interp_at_offset;
|
||||
bool uses_interp_at_sample;
|
||||
bool uses_instanceid;
|
||||
bool uses_base_vertex;
|
||||
bool uses_base_instance;
|
||||
bool uses_drawid;
|
||||
bool uses_primid;
|
||||
bool uses_frontface;
|
||||
bool uses_invocationid;
|
||||
|
|
@ -876,7 +872,10 @@ struct si_shader_binary_info {
|
|||
bool uses_vmem_sampler_or_bvh : 1;
|
||||
bool has_non_uniform_tex_access : 1;
|
||||
bool has_shadow_comparison : 1;
|
||||
bool uses_instanceid : 1;
|
||||
bool uses_instance_id : 1;
|
||||
bool uses_base_instance : 1;
|
||||
bool uses_draw_id : 1;
|
||||
bool uses_vs_state_indexed : 1; /* VS_STATE_INDEXED */
|
||||
uint8_t nr_pos_exports;
|
||||
uint8_t nr_param_exports;
|
||||
unsigned private_mem_vgprs;
|
||||
|
|
@ -957,8 +956,6 @@ struct si_shader {
|
|||
bool uses_vs_state_provoking_vertex;
|
||||
bool uses_gs_state_outprim;
|
||||
|
||||
bool uses_base_instance;
|
||||
|
||||
/* Shader key + LLVM IR + disassembly + statistics.
|
||||
* Generated for debug contexts only.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -459,9 +459,6 @@ void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir,
|
|||
|
||||
info->uses_frontface = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE) |
|
||||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE_FSIGN);
|
||||
info->uses_instanceid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
|
||||
info->uses_base_vertex = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX);
|
||||
info->uses_base_instance = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
|
||||
info->uses_invocationid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID);
|
||||
info->uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS);
|
||||
info->uses_tg_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS);
|
||||
|
|
@ -471,7 +468,6 @@ void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir,
|
|||
si_should_clear_lds(sscreen, nir);
|
||||
}
|
||||
info->uses_variable_block_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_SIZE);
|
||||
info->uses_drawid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
|
||||
info->uses_primid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||
|
||||
nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID;
|
||||
info->reads_samplemask = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
|
||||
|
|
|
|||
|
|
@ -159,12 +159,10 @@ static bool si_update_shaders(struct si_context *sctx)
|
|||
}
|
||||
}
|
||||
|
||||
if (GFX_VERSION >= GFX9 && HAS_TESS)
|
||||
sctx->vs_uses_base_instance = sctx->queued.named.hs->uses_base_instance;
|
||||
else if (GFX_VERSION >= GFX9 && HAS_GS)
|
||||
sctx->vs_uses_base_instance = sctx->shader.gs.current->uses_base_instance;
|
||||
else
|
||||
sctx->vs_uses_base_instance = sctx->shader.vs.current->uses_base_instance;
|
||||
struct si_shader *api_vs = si_get_api_vs_inline(sctx, GFX_VERSION, HAS_TESS, HAS_GS);
|
||||
sctx->vs_uses_base_instance = api_vs->info.uses_base_instance;
|
||||
sctx->vs_uses_draw_id = api_vs->info.uses_draw_id;
|
||||
sctx->vs_uses_vs_state_indexed = api_vs->info.uses_vs_state_indexed;
|
||||
|
||||
/* Update VGT_SHADER_STAGES_EN. */
|
||||
uint32_t vgt_stages = 0;
|
||||
|
|
@ -1002,7 +1000,7 @@ static void si_emit_vs_state(struct si_context *sctx, unsigned index_size)
|
|||
unsigned vs_state = sctx->current_vs_state; /* all VS bits */
|
||||
unsigned gs_state = sctx->current_gs_state; /* only GS and NGG bits; VS bits will be copied here */
|
||||
|
||||
if (sctx->shader.vs.cso->info.uses_base_vertex && index_size)
|
||||
if (sctx->vs_uses_vs_state_indexed && index_size)
|
||||
vs_state |= ENCODE_FIELD(VS_STATE_INDEXED, 1);
|
||||
|
||||
/* Copy all state bits from vs_state to gs_state. */
|
||||
|
|
@ -1473,7 +1471,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
|
|||
radeon_emit((sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
|
||||
radeon_emit((sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
|
||||
radeon_emit(((sh_base_reg + SI_SGPR_DRAWID * 4 - SI_SH_REG_OFFSET) >> 2) |
|
||||
S_2C3_DRAW_INDEX_ENABLE(sctx->shader.vs.cso->info.uses_drawid) |
|
||||
S_2C3_DRAW_INDEX_ENABLE(sctx->vs_uses_draw_id) |
|
||||
S_2C3_COUNT_INDIRECT_ENABLE(!!indirect->indirect_draw_count));
|
||||
radeon_emit(indirect->draw_count);
|
||||
radeon_emit(count_va);
|
||||
|
|
|
|||
|
|
@ -660,7 +660,7 @@ static unsigned si_get_vs_vgpr_comp_cnt(struct si_screen *sscreen, struct si_sha
|
|||
bool is_ls = shader->selector->stage == MESA_SHADER_TESS_CTRL || shader->key.ge.as_ls;
|
||||
unsigned max = 0;
|
||||
|
||||
if (shader->info.uses_instanceid) {
|
||||
if (shader->info.uses_instance_id) {
|
||||
if (sscreen->info.gfx_level >= GFX12)
|
||||
max = MAX2(max, 1);
|
||||
else if (sscreen->info.gfx_level >= GFX10)
|
||||
|
|
@ -3836,7 +3836,6 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
|
|||
sctx->shader.vs.key.ge.use_aco = sel ? sel->info.base.use_aco_amd : 0;
|
||||
#endif
|
||||
sctx->num_vs_blit_sgprs = sel ? sel->info.base.vs.blit_sgprs_amd : 0;
|
||||
sctx->vs_uses_draw_id = sel ? sel->info.uses_drawid : false;
|
||||
|
||||
if (old_uses_vbos != new_uses_vbos) {
|
||||
sctx->num_vertex_elements = new_uses_vbos ? sctx->vertex_elements->count : 0;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue