diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index a0616c0683c..615f135335a 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -110,16 +110,10 @@ retry_select_mode: } } else { /* VS and TES. */ - - bool uses_instance_id = gs_sel->info.uses_instanceid; bool uses_primitive_id = gs_sel->info.uses_primid; - if (gs_stage == MESA_SHADER_VERTEX) { - uses_instance_id |= - shader->key.ge.mono.instance_divisor_is_one || - shader->key.ge.mono.instance_divisor_is_fetched; - } else { + + if (gs_stage == MESA_SHADER_TESS_EVAL) uses_primitive_id |= shader->key.ge.mono.u.vs_export_prim_id; - } esvert_lds_size = ac_ngg_nogs_get_pervertex_lds_size( gs_stage, gs_sel->info.num_outputs, @@ -127,7 +121,7 @@ retry_select_mode: shader->key.ge.mono.u.vs_export_prim_id, gfx10_ngg_writes_user_edgeflags(shader), si_shader_culling_enabled(shader), - uses_instance_id, + shader->info.uses_instance_id, uses_primitive_id) / 4; } diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c b/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c index b97d513fa20..e4545b3d3f4 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c @@ -48,11 +48,8 @@ get_vertex_index(nir_builder *b, int input_index, struct lower_vs_inputs_state * if (divisor_is_one || divisor_is_fetched) { nir_def *instance_id = nir_load_instance_id(b); - - /* This is used to determine vs vgpr count in si_get_vs_vgpr_comp_cnt(). */ - s->shader->info.uses_instanceid = true; - nir_def *index = NULL; + if (divisor_is_one) { index = instance_id; } else { diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index c1c4fef235e..8c8944dd5ac 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1091,6 +1091,7 @@ struct si_context { bool compute_image_sgprs_dirty; bool vs_uses_base_instance; bool vs_uses_draw_id; + bool vs_uses_vs_state_indexed; uint8_t patch_vertices; bool has_tessellation; /* whether si_screen::tess_rings* are valid */ @@ -1835,6 +1836,18 @@ si_get_vs_inline(struct si_context *sctx, enum si_has_tess has_tess, enum si_has return &sctx->shader.vs; } +static ALWAYS_INLINE struct si_shader * +si_get_api_vs_inline(struct si_context *sctx, enum amd_gfx_level gfx_level, + enum si_has_tess has_tess, enum si_has_gs has_gs) +{ + if (gfx_level >= GFX9 && has_tess) + return sctx->queued.named.hs; /* this can also be the passthrough TCS */ + else if (gfx_level >= GFX9 && has_gs) + return sctx->shader.gs.current; + else + return sctx->shader.vs.current; +} + static inline struct si_shader_ctx_state *si_get_vs(struct si_context *sctx) { return si_get_vs_inline(sctx, sctx->shader.tes.cso ? TESS_ON : TESS_OFF, diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 0121fbb30d1..d1d71a595b4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2762,6 +2762,18 @@ si_get_shader_variant_info(struct si_shader *shader, nir_shader *nir) nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); switch (intr->intrinsic) { + case nir_intrinsic_load_instance_id: + shader->info.uses_instance_id = true; + break; + case nir_intrinsic_load_base_vertex: + shader->info.uses_vs_state_indexed = true; + break; + case nir_intrinsic_load_base_instance: + shader->info.uses_base_instance = true; + break; + case nir_intrinsic_load_draw_id: + shader->info.uses_draw_id = true; + break; case nir_intrinsic_load_frag_coord: case nir_intrinsic_load_sample_pos: frag_coord_mask |= nir_def_components_read(&intr->def); @@ -2770,8 +2782,19 @@ si_get_shader_variant_info(struct si_shader *shader, nir_shader *nir) case nir_intrinsic_load_input_vertex: case nir_intrinsic_load_per_vertex_input: case nir_intrinsic_load_interpolated_input: { - if (nir->info.stage == MESA_SHADER_VERTEX || - nir->info.stage == MESA_SHADER_TESS_EVAL) { + if (nir->info.stage == MESA_SHADER_VERTEX) { + shader->info.uses_vmem_load_other = true; + + if (intr->intrinsic == nir_intrinsic_load_input) { + if ((shader->key.ge.mono.instance_divisor_is_one | + shader->key.ge.mono.instance_divisor_is_fetched) & + BITFIELD_BIT(nir_intrinsic_base(intr))) { + /* Instanced attribs. */ + shader->info.uses_instance_id = true; + shader->info.uses_base_instance = true; + } + } + } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) { shader->info.uses_vmem_load_other = true; } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { nir_io_semantics sem = nir_intrinsic_io_semantics(intr); @@ -2957,18 +2980,6 @@ static void get_nir_shaders(struct si_shader *shader, struct si_linked_shaders * /* TODO: run linking optimizations here if we have LS+HS or ES+GS */ - if (shader->selector->stage <= MESA_SHADER_GEOMETRY) { - shader->info.uses_instanceid |= - shader->key.ge.mono.instance_divisor_is_one || - shader->key.ge.mono.instance_divisor_is_fetched; - - if (linked->producer.nir) { - shader->info.uses_instanceid |= - linked->producer.shader->selector->info.uses_instanceid || - linked->producer.shader->info.uses_instanceid; - } - } - /* Remove holes after removed PS inputs by renumbering them. Holes can only occur with * monolithic PS. */ @@ -3138,8 +3149,6 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi shader->info.vs_output_ps_input_cntl[i] = SI_PS_INPUT_CNTL_UNUSED; shader->info.vs_output_ps_input_cntl[VARYING_SLOT_COL0] = SI_PS_INPUT_CNTL_UNUSED_COLOR0; - /* uses_instanceid may be set by si_nir_lower_vs_inputs(). */ - shader->info.uses_instanceid |= sel->info.uses_instanceid; shader->info.private_mem_vgprs = DIV_ROUND_UP(nir->scratch_size, 4); /* Set the FP ALU behavior. */ @@ -3744,9 +3753,13 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler shader->config.scratch_bytes_per_wave = MAX2(shader->config.scratch_bytes_per_wave, shader->previous_stage->config.scratch_bytes_per_wave); - shader->info.uses_instanceid |= shader->previous_stage->info.uses_instanceid; + shader->info.uses_vmem_load_other |= shader->previous_stage->info.uses_vmem_load_other; shader->info.uses_vmem_sampler_or_bvh |= shader->previous_stage->info.uses_vmem_sampler_or_bvh; + shader->info.uses_instance_id |= shader->previous_stage->info.uses_instance_id; + shader->info.uses_base_instance |= shader->previous_stage->info.uses_base_instance; + shader->info.uses_draw_id |= shader->previous_stage->info.uses_draw_id; + shader->info.uses_vs_state_indexed |= shader->previous_stage->info.uses_vs_state_indexed; } if (shader->epilog) { shader->config.num_sgprs = @@ -3783,22 +3796,6 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler (si_shader_uses_streamout(shader) || shader->uses_vs_state_provoking_vertex); - if (sel->stage == MESA_SHADER_VERTEX) { - shader->uses_base_instance = sel->info.uses_base_instance || - shader->key.ge.mono.instance_divisor_is_one || - shader->key.ge.mono.instance_divisor_is_fetched; - } else if (sel->stage == MESA_SHADER_TESS_CTRL) { - shader->uses_base_instance = shader->previous_stage_sel && - (shader->previous_stage_sel->info.uses_base_instance || - shader->key.ge.mono.instance_divisor_is_one || - shader->key.ge.mono.instance_divisor_is_fetched); - } else if (sel->stage == MESA_SHADER_GEOMETRY) { - shader->uses_base_instance = shader->previous_stage_sel && - (shader->previous_stage_sel->info.uses_base_instance || - shader->key.ge.mono.instance_divisor_is_one || - shader->key.ge.mono.instance_divisor_is_fetched); - } - si_fix_resource_usage(sscreen, shader); /* Upload. */ diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index d46cba8d2be..eeda76fac17 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -539,10 +539,6 @@ struct si_shader_info { bool uses_linear_sample; bool uses_interp_at_offset; bool uses_interp_at_sample; - bool uses_instanceid; - bool uses_base_vertex; - bool uses_base_instance; - bool uses_drawid; bool uses_primid; bool uses_frontface; bool uses_invocationid; @@ -876,7 +872,10 @@ struct si_shader_binary_info { bool uses_vmem_sampler_or_bvh : 1; bool has_non_uniform_tex_access : 1; bool has_shadow_comparison : 1; - bool uses_instanceid : 1; + bool uses_instance_id : 1; + bool uses_base_instance : 1; + bool uses_draw_id : 1; + bool uses_vs_state_indexed : 1; /* VS_STATE_INDEXED */ uint8_t nr_pos_exports; uint8_t nr_param_exports; unsigned private_mem_vgprs; @@ -957,8 +956,6 @@ struct si_shader { bool uses_vs_state_provoking_vertex; bool uses_gs_state_outprim; - bool uses_base_instance; - /* Shader key + LLVM IR + disassembly + statistics. * Generated for debug contexts only. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c index c63c9165489..b7976afadd2 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_info.c @@ -459,9 +459,6 @@ void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir, info->uses_frontface = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE) | BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE_FSIGN); - info->uses_instanceid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID); - info->uses_base_vertex = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX); - info->uses_base_instance = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE); info->uses_invocationid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID); info->uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS); info->uses_tg_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS); @@ -471,7 +468,6 @@ void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir, si_should_clear_lds(sscreen, nir); } info->uses_variable_block_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_SIZE); - info->uses_drawid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID); info->uses_primid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) || nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID; info->reads_samplemask = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index a06610369bc..6d25ced2e7e 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -159,12 +159,10 @@ static bool si_update_shaders(struct si_context *sctx) } } - if (GFX_VERSION >= GFX9 && HAS_TESS) - sctx->vs_uses_base_instance = sctx->queued.named.hs->uses_base_instance; - else if (GFX_VERSION >= GFX9 && HAS_GS) - sctx->vs_uses_base_instance = sctx->shader.gs.current->uses_base_instance; - else - sctx->vs_uses_base_instance = sctx->shader.vs.current->uses_base_instance; + struct si_shader *api_vs = si_get_api_vs_inline(sctx, GFX_VERSION, HAS_TESS, HAS_GS); + sctx->vs_uses_base_instance = api_vs->info.uses_base_instance; + sctx->vs_uses_draw_id = api_vs->info.uses_draw_id; + sctx->vs_uses_vs_state_indexed = api_vs->info.uses_vs_state_indexed; /* Update VGT_SHADER_STAGES_EN. */ uint32_t vgt_stages = 0; @@ -1002,7 +1000,7 @@ static void si_emit_vs_state(struct si_context *sctx, unsigned index_size) unsigned vs_state = sctx->current_vs_state; /* all VS bits */ unsigned gs_state = sctx->current_gs_state; /* only GS and NGG bits; VS bits will be copied here */ - if (sctx->shader.vs.cso->info.uses_base_vertex && index_size) + if (sctx->vs_uses_vs_state_indexed && index_size) vs_state |= ENCODE_FIELD(VS_STATE_INDEXED, 1); /* Copy all state bits from vs_state to gs_state. */ @@ -1473,7 +1471,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw radeon_emit((sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2); radeon_emit((sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2); radeon_emit(((sh_base_reg + SI_SGPR_DRAWID * 4 - SI_SH_REG_OFFSET) >> 2) | - S_2C3_DRAW_INDEX_ENABLE(sctx->shader.vs.cso->info.uses_drawid) | + S_2C3_DRAW_INDEX_ENABLE(sctx->vs_uses_draw_id) | S_2C3_COUNT_INDIRECT_ENABLE(!!indirect->indirect_draw_count)); radeon_emit(indirect->draw_count); radeon_emit(count_va); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 12038cff747..b20426ccc55 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -660,7 +660,7 @@ static unsigned si_get_vs_vgpr_comp_cnt(struct si_screen *sscreen, struct si_sha bool is_ls = shader->selector->stage == MESA_SHADER_TESS_CTRL || shader->key.ge.as_ls; unsigned max = 0; - if (shader->info.uses_instanceid) { + if (shader->info.uses_instance_id) { if (sscreen->info.gfx_level >= GFX12) max = MAX2(max, 1); else if (sscreen->info.gfx_level >= GFX10) @@ -3836,7 +3836,6 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state) sctx->shader.vs.key.ge.use_aco = sel ? sel->info.base.use_aco_amd : 0; #endif sctx->num_vs_blit_sgprs = sel ? sel->info.base.vs.blit_sgprs_amd : 0; - sctx->vs_uses_draw_id = sel ? sel->info.uses_drawid : false; if (old_uses_vbos != new_uses_vbos) { sctx->num_vertex_elements = new_uses_vbos ? sctx->vertex_elements->count : 0;