From 9b7ea720c939a2d341aa558a126b32fbb3af61e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 2 Dec 2024 21:34:06 -0500 Subject: [PATCH] radeonsi: use nir->info instead of sel->info.base sel->info is out of date after shader variant optimizations. We need to stop using it. Reviewed-by: Qiang Yu Part-of: --- src/gallium/drivers/radeonsi/si_compute.c | 12 +- .../drivers/radeonsi/si_nir_lower_abi.c | 25 +-- .../drivers/radeonsi/si_nir_lower_resource.c | 8 +- .../drivers/radeonsi/si_nir_lower_vs_inputs.c | 6 +- src/gallium/drivers/radeonsi/si_shader.c | 170 +++++++++--------- src/gallium/drivers/radeonsi/si_shader_aco.c | 5 +- src/gallium/drivers/radeonsi/si_shader_info.c | 24 +-- src/gallium/drivers/radeonsi/si_shader_llvm.c | 23 +-- .../drivers/radeonsi/si_state_shaders.cpp | 16 +- 9 files changed, 145 insertions(+), 144 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 95c173a92f2..e33869dda4f 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -103,7 +103,7 @@ static void si_create_compute_state_async(void *job, void *gdata, int thread_ind assert(program->ir_type == PIPE_SHADER_IR_NIR); si_nir_scan_shader(sscreen, sel->nir, &sel->info); - if (!sel->info.base.use_aco_amd && !*compiler) + if (!sel->nir->info.use_aco_amd && !*compiler) *compiler = si_create_llvm_compiler(sscreen); si_get_active_slot_masks(sscreen, &sel->info, &sel->active_const_and_shader_buffers, @@ -117,11 +117,11 @@ static void si_create_compute_state_async(void *job, void *gdata, int thread_ind */ unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS + (sel->info.uses_grid_size ? 3 : 0) + (sel->info.uses_variable_block_size ? 1 : 0) + - sel->info.base.cs.user_data_components_amd; + sel->nir->info.cs.user_data_components_amd; /* Fast path for compute shaders - some descriptors passed via user SGPRs. */ /* Shader buffers in user SGPRs. */ - for (unsigned i = 0; i < MIN2(3, sel->info.base.num_ssbos) && user_sgprs <= 12; i++) { + for (unsigned i = 0; i < MIN2(3, sel->nir->info.num_ssbos) && user_sgprs <= 12; i++) { user_sgprs = align(user_sgprs, 4); if (i == 0) sel->cs_shaderbufs_sgpr_index = user_sgprs; @@ -130,16 +130,16 @@ static void si_create_compute_state_async(void *job, void *gdata, int thread_ind } /* Images in user SGPRs. */ - unsigned non_fmask_images = u_bit_consecutive(0, sel->info.base.num_images); + unsigned non_fmask_images = u_bit_consecutive(0, sel->nir->info.num_images); /* Remove images with FMASK from the bitmask. We only care about the first * 3 anyway, so we can take msaa_images[0] and ignore the rest. */ if (sscreen->info.gfx_level < GFX11) - non_fmask_images &= ~sel->info.base.msaa_images[0]; + non_fmask_images &= ~sel->nir->info.msaa_images[0]; for (unsigned i = 0; i < 3 && non_fmask_images & (1 << i); i++) { - unsigned num_sgprs = BITSET_TEST(sel->info.base.image_buffers, i) ? 4 : 8; + unsigned num_sgprs = BITSET_TEST(sel->nir->info.image_buffers, i) ? 4 : 8; if (align(user_sgprs, num_sgprs) + num_sgprs > 16) break; diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index e6246bc08db..3a6012928e5 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -38,9 +38,9 @@ static nir_def *build_attr_ring_desc(nir_builder *b, struct si_shader *shader, struct si_shader_selector *sel = shader->selector; nir_def *attr_address = - sel->stage == MESA_SHADER_VERTEX && sel->info.base.vs.blit_sgprs_amd ? + b->shader->info.stage == MESA_SHADER_VERTEX && b->shader->info.vs.blit_sgprs_amd ? ac_nir_load_arg_at_offset(b, &args->ac, args->vs_blit_inputs, - sel->info.base.vs.blit_sgprs_amd - 1) : + b->shader->info.vs.blit_sgprs_amd - 1) : ac_nir_load_arg(b, &args->ac, args->gs_attr_address); unsigned stride = 16 * si_shader_num_alloc_param_exports(shader); @@ -178,7 +178,7 @@ static void build_gsvs_ring_desc(nir_builder *b, struct lower_abi_state *s) if (s->shader->is_gs_copy_shader) { s->gsvs_ring[0] = si_nir_load_internal_binding(b, s->args, SI_RING_GSVS, 4); - } else if (sel->stage == MESA_SHADER_GEOMETRY && !key->ge.as_ngg) { + } else if (b->shader->info.stage == MESA_SHADER_GEOMETRY && !key->ge.as_ngg) { nir_def *base_addr = si_nir_load_internal_binding(b, s->args, SI_RING_GSVS, 2); base_addr = nir_pack_64_2x32(b, base_addr); @@ -196,7 +196,7 @@ static void build_gsvs_ring_desc(nir_builder *b, struct lower_abi_state *s) if (!num_components) continue; - unsigned stride = 4 * num_components * sel->info.base.gs.vertices_out; + unsigned stride = 4 * num_components * b->shader->info.gs.vertices_out; /* Limit on the stride field for <= GFX7. */ assert(stride < (1 << 14)); @@ -240,12 +240,13 @@ static void preload_reusable_variables(nir_builder *b, struct lower_abi_state *s b->cursor = nir_before_impl(b->impl); - if (sel->screen->info.gfx_level <= GFX8 && sel->stage <= MESA_SHADER_GEOMETRY && - (key->ge.as_es || sel->stage == MESA_SHADER_GEOMETRY)) { + if (sel->screen->info.gfx_level <= GFX8 && b->shader->info.stage <= MESA_SHADER_GEOMETRY && + (key->ge.as_es || b->shader->info.stage == MESA_SHADER_GEOMETRY)) { s->esgs_ring = build_esgs_ring_desc(b, sel->screen->info.gfx_level, s->args); } - if (sel->stage == MESA_SHADER_TESS_CTRL || sel->stage == MESA_SHADER_TESS_EVAL) + if (b->shader->info.stage == MESA_SHADER_TESS_CTRL || + b->shader->info.stage == MESA_SHADER_TESS_EVAL) s->tess_offchip_ring = build_tess_ring_desc(b, sel->screen, s->args); build_gsvs_ring_desc(b, s); @@ -286,7 +287,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s struct si_shader_args *args = s->args; struct si_shader_selector *sel = shader->selector; union si_shader_key *key = &shader->key; - gl_shader_stage stage = sel->stage; + gl_shader_stage stage = b->shader->info.stage; b->cursor = nir_before_instr(instr); @@ -305,7 +306,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s break; } case nir_intrinsic_load_workgroup_size: { - assert(sel->info.base.workgroup_size_variable && sel->info.uses_variable_block_size); + assert(b->shader->info.workgroup_size_variable && sel->info.uses_variable_block_size); nir_def *block_size = ac_nir_load_arg(b, &args->ac, args->block_size); nir_def *comp[] = { @@ -373,7 +374,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s if (stage == MESA_SHADER_TESS_CTRL) { const unsigned num_hs_out = util_last_bit64(sel->info.tcs_outputs_written_for_tes); const unsigned out_vtx_size = num_hs_out * 16; - const unsigned out_vtx_per_patch = sel->info.base.tess.tcs_vertices_out; + const unsigned out_vtx_per_patch = b->shader->info.tess.tcs_vertices_out; per_vtx_out_patch_size = nir_imm_int(b, out_vtx_size * out_vtx_per_patch); } else { nir_def *num_hs_out = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 23, 6); @@ -681,7 +682,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s if (output_prim == MESA_PRIM_POINTS || output_prim == MESA_PRIM_LINES || output_prim == SI_PRIM_RECTANGLE_LIST) { replacement = nir_imm_int(b, 0); - } else if (shader->selector->stage == MESA_SHADER_VERTEX) { + } else if (stage == MESA_SHADER_VERTEX) { if (sel->screen->info.gfx_level >= GFX12) { replacement = nir_iand_imm(b, ac_nir_load_arg(b, &args->ac, args->ac.gs_vtx_offset[0]), ac_get_all_edge_flag_bits(sel->screen->info.gfx_level)); @@ -716,7 +717,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s break; case nir_intrinsic_load_tess_rel_patch_id_amd: /* LLVM need to replace patch id arg, so have to be done in LLVM backend. */ - if (!sel->info.base.use_aco_amd) + if (!b->shader->info.use_aco_amd) return false; if (stage == MESA_SHADER_TESS_CTRL) { diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_resource.c b/src/gallium/drivers/radeonsi/si_nir_lower_resource.c index f8b5d3b0326..e334a016b90 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_resource.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_resource.c @@ -67,10 +67,10 @@ static nir_def *load_ubo_desc(nir_builder *b, nir_def *index, nir_def *addr = ac_nir_load_arg(b, &s->args->ac, s->args->const_and_shader_buffers); - if (sel->info.base.num_ubos == 1 && sel->info.base.num_ssbos == 0) + if (b->shader->info.num_ubos == 1 && b->shader->info.num_ssbos == 0) return load_ubo_desc_fast_path(b, addr, sel); - index = clamp_index(b, index, sel->info.base.num_ubos); + index = clamp_index(b, index, b->shader->info.num_ubos); index = nir_iadd_imm(b, index, SI_NUM_SHADER_BUFFERS); nir_def *offset = nir_ishl_imm(b, index, 4); @@ -90,7 +90,7 @@ static nir_def *load_ssbo_desc(nir_builder *b, nir_src *index, } nir_def *addr = ac_nir_load_arg(b, &s->args->ac, s->args->const_and_shader_buffers); - nir_def *slot = clamp_index(b, index->ssa, sel->info.base.num_ssbos); + nir_def *slot = clamp_index(b, index->ssa, b->shader->info.num_ssbos); slot = nir_isub_imm(b, SI_NUM_SHADER_BUFFERS - 1, slot); nir_def *offset = nir_ishl_imm(b, slot, 4); @@ -219,7 +219,7 @@ static nir_def *load_deref_image_desc(nir_builder *b, nir_deref_instr *deref, { unsigned const_index; nir_def *dynamic_index; - nir_def *index = deref_to_index(b, deref, s->shader->selector->info.base.num_images, + nir_def *index = deref_to_index(b, deref, b->shader->info.num_images, &dynamic_index, &const_index); nir_def *desc; diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c b/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c index ee8c32dcc06..baab8df529c 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c @@ -131,7 +131,7 @@ load_vs_input_from_blit_sgpr(nir_builder *b, unsigned input_index, /* Color or texture coordinates: */ assert(input_index == 1); - unsigned vs_blit_property = s->shader->selector->info.base.vs.blit_sgprs_amd; + unsigned vs_blit_property = b->shader->info.vs.blit_sgprs_amd; if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR + has_attribute_ring_address) { for (int i = 0; i < 4; i++) out[i] = ac_nir_load_arg_at_offset(b, &s->args->ac, s->args->vs_blit_inputs, 3 + i); @@ -571,7 +571,7 @@ lower_vs_input_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state) unsigned num_components = intrin->def.num_components; nir_def *comp[4]; - if (s->shader->selector->info.base.vs.blit_sgprs_amd) + if (b->shader->info.vs.blit_sgprs_amd) load_vs_input_from_blit_sgpr(b, input_index, s, comp); else load_vs_input_from_vertex_buffer(b, input_index, s, intrin->def.bit_size, comp); @@ -598,7 +598,7 @@ si_nir_lower_vs_inputs(nir_shader *nir, struct si_shader *shader, struct si_shad .args = args, }; - if (!sel->info.base.vs.blit_sgprs_amd) + if (!nir->info.vs.blit_sgprs_amd) get_vertex_index_for_all_inputs(nir, &state); return nir_shader_intrinsics_pass(nir, lower_vs_input_instr, diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index cff3b44fe24..8a94f7d9ec0 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -124,13 +124,12 @@ unsigned si_shader_io_get_unique_index(unsigned semantic) } } -static void declare_streamout_params(struct si_shader_args *args, struct si_shader *shader) +static void declare_streamout_params(struct si_shader_args *args, struct si_shader *shader, + const shader_info *info) { - struct si_shader_selector *sel = shader->selector; - if (shader->selector->screen->info.gfx_level >= GFX11) { /* NGG streamout. */ - if (sel->stage == MESA_SHADER_TESS_EVAL) + if (info->stage == MESA_SHADER_TESS_EVAL) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); return; } @@ -142,12 +141,12 @@ static void declare_streamout_params(struct si_shader_args *args, struct si_shad /* A streamout buffer offset is loaded if the stride is non-zero. */ for (int i = 0; i < 4; i++) { - if (!sel->info.base.xfb_stride[i]) + if (!info->xfb_stride[i]) continue; ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]); } - } else if (sel->stage == MESA_SHADER_TESS_EVAL) { + } else if (info->stage == MESA_SHADER_TESS_EVAL) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); } } @@ -196,14 +195,12 @@ unsigned si_get_max_workgroup_size(const struct si_shader *shader) return max_work_group_size; } -static void declare_const_and_shader_buffers(struct si_shader_args *args, - struct si_shader *shader, - bool assign_params) +static void declare_const_and_shader_buffers(struct si_shader_args *args, struct si_shader *shader, + const shader_info *info, bool assign_params) { enum ac_arg_type const_shader_buf_type; - if (shader->selector->info.base.num_ubos == 1 && - shader->selector->info.base.num_ssbos == 0) + if (info->num_ubos == 1 && info->num_ssbos == 0) const_shader_buf_type = AC_ARG_CONST_FLOAT_PTR; else const_shader_buf_type = AC_ARG_CONST_DESC_PTR; @@ -219,11 +216,10 @@ static void declare_samplers_and_images(struct si_shader_args *args, bool assign assign_params ? &args->samplers_and_images : &args->other_samplers_and_images); } -static void declare_per_stage_desc_pointers(struct si_shader_args *args, - struct si_shader *shader, - bool assign_params) +static void declare_per_stage_desc_pointers(struct si_shader_args *args, struct si_shader *shader, + const shader_info *info, bool assign_params) { - declare_const_and_shader_buffers(args, shader, assign_params); + declare_const_and_shader_buffers(args, shader, info, assign_params); declare_samplers_and_images(args, assign_params); } @@ -290,7 +286,8 @@ static void declare_vs_input_vgprs(struct si_shader_args *args, struct si_shader } } -static void declare_vs_blit_inputs(struct si_shader *shader, struct si_shader_args *args) +static void declare_vs_blit_inputs(struct si_shader *shader, struct si_shader_args *args, + const shader_info *info) { bool has_attribute_ring_address = shader->selector->screen->info.gfx_level >= GFX11; @@ -298,7 +295,7 @@ static void declare_vs_blit_inputs(struct si_shader *shader, struct si_shader_ar ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* i16 x1, y1 */ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* depth */ - if (shader->selector->info.base.vs.blit_sgprs_amd == + if (info->vs.blit_sgprs_amd == SI_VS_BLIT_SGPRS_POS_COLOR + has_attribute_ring_address) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* color0 */ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* color1 */ @@ -306,7 +303,7 @@ static void declare_vs_blit_inputs(struct si_shader *shader, struct si_shader_ar ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* color3 */ if (has_attribute_ring_address) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* attribute ring address */ - } else if (shader->selector->info.base.vs.blit_sgprs_amd == + } else if (info->vs.blit_sgprs_amd == SI_VS_BLIT_SGPRS_POS_TEXCOORD + has_attribute_ring_address) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.x1 */ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.y1 */ @@ -341,12 +338,13 @@ void si_add_arg_checked(struct ac_shader_args *args, enum ac_arg_regfile file, u ac_add_arg(args, file, registers, type, arg); } -static void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) +static void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args, + const shader_info *info) { unsigned i, num_returns, num_return_sgprs; unsigned num_prolog_vgprs = 0; struct si_shader_selector *sel = shader->selector; - unsigned stage = shader->is_gs_copy_shader ? MESA_SHADER_VERTEX : sel->stage; + unsigned stage = shader->is_gs_copy_shader ? MESA_SHADER_VERTEX : info->stage; unsigned stage_case = stage; memset(args, 0, sizeof(*args)); @@ -363,14 +361,14 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args case MESA_SHADER_VERTEX: declare_global_desc_pointers(args); - if (sel->info.base.vs.blit_sgprs_amd) { - declare_vs_blit_inputs(shader, args); + if (info->vs.blit_sgprs_amd) { + declare_vs_blit_inputs(shader, args, info); } else { - declare_per_stage_desc_pointers(args, shader, true); + declare_per_stage_desc_pointers(args, shader, info, true); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->vs_state_bits); if (shader->is_gs_copy_shader) { - declare_streamout_params(args, shader); + declare_streamout_params(args, shader, info); } else { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id); @@ -382,13 +380,13 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args } else if (shader->key.ge.as_ls) { /* no extra parameters */ } else { - declare_streamout_params(args, shader); + declare_streamout_params(args, shader, info); } } } /* GFX11 set FLAT_SCRATCH directly instead of using this arg. */ - if (sel->info.base.use_aco_amd && sel->screen->info.gfx_level < GFX11) + if (info->use_aco_amd && sel->screen->info.gfx_level < GFX11) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); /* VGPRs */ @@ -398,7 +396,7 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args case MESA_SHADER_TESS_CTRL: /* GFX6-GFX8 */ declare_global_desc_pointers(args); - declare_per_stage_desc_pointers(args, shader, true); + declare_per_stage_desc_pointers(args, shader, info, true); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_offchip_layout); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tes_offchip_addr); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->vs_state_bits); @@ -406,7 +404,7 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset); /* GFX11 set FLAT_SCRATCH directly instead of using this arg. */ - if (sel->info.base.use_aco_amd && sel->screen->info.gfx_level < GFX11) + if (info->use_aco_amd && sel->screen->info.gfx_level < GFX11) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); /* VGPRs */ @@ -418,7 +416,7 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args /* Merged stages have 8 system SGPRs at the beginning. */ /* Gfx9-10: SPI_SHADER_USER_DATA_ADDR_LO/HI_HS */ /* Gfx11+: SPI_SHADER_PGM_LO/HI_HS */ - declare_per_stage_desc_pointers(args, shader, stage == MESA_SHADER_TESS_CTRL); + declare_per_stage_desc_pointers(args, shader, info, stage == MESA_SHADER_TESS_CTRL); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset); @@ -430,7 +428,7 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */ declare_global_desc_pointers(args); - declare_per_stage_desc_pointers(args, shader, stage == MESA_SHADER_VERTEX); + declare_per_stage_desc_pointers(args, shader, info, stage == MESA_SHADER_VERTEX); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->vs_state_bits); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex); @@ -459,7 +457,7 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args ac_add_return(&args->ac, AC_ARG_VGPR); /* VS outputs passed via VGPRs to TCS. */ - if (shader->key.ge.opt.same_patch_vertices && !sel->info.base.use_aco_amd) { + if (shader->key.ge.opt.same_patch_vertices && !info->use_aco_amd) { unsigned num_outputs = util_last_bit64(shader->selector->info.ls_es_outputs_written); for (i = 0; i < num_outputs * 4; i++) ac_add_return(&args->ac, AC_ARG_VGPR); @@ -467,7 +465,7 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args } } else { /* TCS inputs are passed via VGPRs from VS. */ - if (shader->key.ge.opt.same_patch_vertices && !sel->info.base.use_aco_amd) { + if (shader->key.ge.opt.same_patch_vertices && !info->use_aco_amd) { unsigned num_inputs = util_last_bit64(shader->previous_stage_sel->info.ls_es_outputs_written); for (i = 0; i < num_inputs * 4; i++) ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); @@ -479,7 +477,7 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args /* Merged stages have 8 system SGPRs at the beginning. */ /* Gfx9-10: SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */ /* Gfx11+: SPI_SHADER_PGM_LO/HI_GS */ - declare_per_stage_desc_pointers(args, shader, stage == MESA_SHADER_GEOMETRY); + declare_per_stage_desc_pointers(args, shader, info, stage == MESA_SHADER_GEOMETRY); if (shader->key.ge.as_ngg) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_tg_info); @@ -496,13 +494,13 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */ declare_global_desc_pointers(args); - if (stage != MESA_SHADER_VERTEX || !sel->info.base.vs.blit_sgprs_amd) { + if (stage != MESA_SHADER_VERTEX || !info->vs.blit_sgprs_amd) { declare_per_stage_desc_pointers( - args, shader, (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL)); + args, shader, info, (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL)); } - if (stage == MESA_SHADER_VERTEX && sel->info.base.vs.blit_sgprs_amd) { - declare_vs_blit_inputs(shader, args); + if (stage == MESA_SHADER_VERTEX && info->vs.blit_sgprs_amd) { + declare_vs_blit_inputs(shader, args, info); } else { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->vs_state_bits); @@ -547,7 +545,7 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args /* Need to keep ES/GS arg index same for shared args when ACO, * so this is not able to be before shared VGPRs. */ - if (!sel->info.base.vs.blit_sgprs_amd) + if (!info->vs.blit_sgprs_amd) declare_vb_descriptor_input_sgprs(args, shader); } else if (stage == MESA_SHADER_TESS_EVAL) { declare_tes_input_vgprs(args); @@ -565,7 +563,7 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args case MESA_SHADER_TESS_EVAL: declare_global_desc_pointers(args); - declare_per_stage_desc_pointers(args, shader, true); + declare_per_stage_desc_pointers(args, shader, info, true); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->vs_state_bits); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_offchip_layout); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tes_offchip_addr); @@ -575,12 +573,12 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset); } else { - declare_streamout_params(args, shader); + declare_streamout_params(args, shader, info); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); } /* GFX11 set FLAT_SCRATCH directly instead of using this arg. */ - if (sel->info.base.use_aco_amd && sel->screen->info.gfx_level < GFX11) + if (info->use_aco_amd && sel->screen->info.gfx_level < GFX11) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); /* VGPRs */ @@ -589,12 +587,12 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args case MESA_SHADER_GEOMETRY: declare_global_desc_pointers(args); - declare_per_stage_desc_pointers(args, shader, true); + declare_per_stage_desc_pointers(args, shader, info, true); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id); /* GFX11 set FLAT_SCRATCH directly instead of using this arg. */ - if (sel->info.base.use_aco_amd && sel->screen->info.gfx_level < GFX11) + if (info->use_aco_amd && sel->screen->info.gfx_level < GFX11) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); /* VGPRs */ @@ -610,7 +608,7 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args case MESA_SHADER_FRAGMENT: declare_global_desc_pointers(args); - declare_per_stage_desc_pointers(args, shader, true); + declare_per_stage_desc_pointers(args, shader, info, true); si_add_arg_checked(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->alpha_reference, SI_PARAM_ALPHA_REF); si_add_arg_checked(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask, @@ -647,7 +645,7 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.pos_fixed_pt, SI_PARAM_POS_FIXED_PT); - if (sel->info.base.use_aco_amd) { + if (info->use_aco_amd) { ac_compact_ps_vgpr_args(&args->ac, shader->config.spi_ps_input_addr); /* GFX11 set FLAT_SCRATCH directly instead of using this arg. */ @@ -683,14 +681,14 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args case MESA_SHADER_COMPUTE: declare_global_desc_pointers(args); - declare_per_stage_desc_pointers(args, shader, true); + declare_per_stage_desc_pointers(args, shader, info, true); if (shader->selector->info.uses_grid_size) ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups); if (shader->selector->info.uses_variable_block_size) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->block_size); unsigned cs_user_data_dwords = - shader->selector->info.base.cs.user_data_components_amd; + info->cs.user_data_components_amd; if (cs_user_data_dwords) { ac_add_arg(&args->ac, AC_ARG_SGPR, MIN2(cs_user_data_dwords, 4), AC_ARG_INT, &args->cs_user_data[0]); @@ -710,7 +708,7 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args } /* Images in user SGPRs. */ for (unsigned i = 0; i < shader->selector->cs_num_images_in_user_sgprs; i++) { - unsigned num_sgprs = BITSET_TEST(shader->selector->info.base.image_buffers, i) ? 4 : 8; + unsigned num_sgprs = BITSET_TEST(info->image_buffers, i) ? 4 : 8; while (args->ac.num_sgprs_used % num_sgprs != 0) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); @@ -734,7 +732,7 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tg_size); /* GFX11 set FLAT_SCRATCH directly instead of using this arg. */ - if (sel->info.base.use_aco_amd && sel->screen->info.gfx_level < GFX11) + if (info->use_aco_amd && sel->screen->info.gfx_level < GFX11) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); /* Hardware VGPRs. */ @@ -1851,7 +1849,8 @@ static bool si_lower_io_to_mem(struct si_shader *shader, nir_shader *nir) struct si_shader_selector *sel = shader->selector; struct si_shader_selector *next_sel = shader->next_shader ? shader->next_shader->selector : sel; const union si_shader_key *key = &shader->key; - const bool is_gfx9_mono_tcs = shader->is_monolithic && next_sel->stage == MESA_SHADER_TESS_CTRL && + const bool is_gfx9_mono_tcs = shader->is_monolithic && + next_sel->stage == MESA_SHADER_TESS_CTRL && sel->screen->info.gfx_level >= GFX9; if (nir->info.stage == MESA_SHADER_VERTEX) { @@ -1928,7 +1927,7 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir) .kill_pointsize = key->ge.opt.kill_pointsize, .kill_layer = key->ge.opt.kill_layer, .force_vrs = sel->screen->options.vrs2x2, - .use_gfx12_xfb_intrinsic = !sel->info.base.use_aco_amd, + .use_gfx12_xfb_intrinsic = !nir->info.use_aco_amd, }; if (nir->info.stage == MESA_SHADER_VERTEX || @@ -1958,7 +1957,7 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir) options.passthrough = gfx10_is_ngg_passthrough(shader); options.use_edgeflags = gfx10_has_variable_edgeflags(shader); options.has_gen_prim_query = options.has_xfb_prim_query = - sel->screen->info.gfx_level >= GFX11 && !sel->info.base.vs.blit_sgprs_amd; + sel->screen->info.gfx_level >= GFX11 && !nir->info.vs.blit_sgprs_amd; options.export_primitive_id = key->ge.mono.u.vs_export_prim_id; options.instance_rate_inputs = instance_rate_inputs; options.user_clip_plane_enable_mask = clip_plane_enable; @@ -2331,10 +2330,10 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_ nir_print_shader(nir, stdout); } - si_init_shader_args(shader, args); + si_init_shader_args(shader, args, &nir->info); /* Kill outputs according to the shader key. */ - if (sel->stage <= MESA_SHADER_GEOMETRY) + if (nir->info.stage <= MESA_SHADER_GEOMETRY) NIR_PASS(progress, nir, si_nir_kill_outputs, key); NIR_PASS(progress, nir, ac_nir_lower_tex, @@ -2348,7 +2347,7 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_ bool inline_uniforms = false; uint32_t *inlined_uniform_values; - si_get_inline_uniform_state((union si_shader_key*)key, sel->stage, + si_get_inline_uniform_state((union si_shader_key*)key, nir->info.stage, &inline_uniforms, &inlined_uniform_values); if (inline_uniforms) { @@ -2396,7 +2395,7 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_ progress = true; } - if (sel->stage == MESA_SHADER_FRAGMENT) { + if (nir->info.stage == MESA_SHADER_FRAGMENT) { /* This uses the epilog key, so only monolithic shaders can call this. */ if (shader->is_monolithic) NIR_PASS(progress, nir, si_nir_kill_ps_outputs, key); @@ -2415,7 +2414,7 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_ NIR_PASS(progress, nir, ac_nir_lower_image_opcodes); /* LLVM does not work well with this, so is handled in llvm backend waterfall. */ - if (sel->info.base.use_aco_amd && sel->info.has_non_uniform_tex_access) { + if (nir->info.use_aco_amd && sel->info.has_non_uniform_tex_access) { nir_lower_non_uniform_access_options options = { .types = nir_lower_non_uniform_texture_access, }; @@ -2423,13 +2422,13 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_ } bool is_last_vgt_stage = - (sel->stage == MESA_SHADER_VERTEX || - sel->stage == MESA_SHADER_TESS_EVAL || - (sel->stage == MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg)) && + (nir->info.stage == MESA_SHADER_VERTEX || + nir->info.stage == MESA_SHADER_TESS_EVAL || + (nir->info.stage == MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg)) && !shader->key.ge.as_ls && !shader->key.ge.as_es; /* Legacy GS is not last VGT stage because it has GS copy shader. */ - bool is_legacy_gs = sel->stage == MESA_SHADER_GEOMETRY && !key->ge.as_ngg; + bool is_legacy_gs = nir->info.stage == MESA_SHADER_GEOMETRY && !key->ge.as_ngg; if (is_last_vgt_stage || is_legacy_gs) NIR_PASS(progress, nir, si_nir_clamp_vertex_color); @@ -2457,7 +2456,7 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_ */ progress |= ac_nir_lower_indirect_derefs(nir, sel->screen->info.gfx_level); - if (sel->stage == MESA_SHADER_VERTEX) + if (nir->info.stage == MESA_SHADER_VERTEX) NIR_PASS(progress, nir, si_nir_lower_vs_inputs, shader, args); progress |= si_lower_io_to_mem(shader, nir); @@ -2472,7 +2471,8 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_ if (key->ge.as_ngg) { /* Lower last VGT NGG shader stage. */ si_lower_ngg(shader, nir); - } else if (sel->stage == MESA_SHADER_VERTEX || sel->stage == MESA_SHADER_TESS_EVAL) { + } else if (nir->info.stage == MESA_SHADER_VERTEX || + nir->info.stage == MESA_SHADER_TESS_EVAL) { /* Lower last VGT none-NGG VS/TES shader stage. */ unsigned clip_cull_mask = (sel->info.clipdist_mask & ~key->ge.opt.kill_clip_distances) | @@ -2493,7 +2493,7 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_ } else if (is_legacy_gs) { NIR_PASS_V(nir, ac_nir_lower_legacy_gs, false, sel->screen->use_ngg, output_info); progress = true; - } else if (sel->stage == MESA_SHADER_FRAGMENT && shader->is_monolithic) { + } else if (nir->info.stage == MESA_SHADER_FRAGMENT && shader->is_monolithic) { /* Uniform inlining can eliminate PS inputs, and colormask can remove PS outputs, * which can also cause the elimination of PS inputs. Remove holes after removed PS inputs * by renumbering them. This can only happen with monolithic PS. Colors are unaffected @@ -2519,7 +2519,7 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_ ac_nir_lower_ps_options options = { .gfx_level = sel->screen->info.gfx_level, .family = sel->screen->info.family, - .use_aco = sel->info.base.use_aco_amd, + .use_aco = nir->info.use_aco_amd, .uses_discard = si_shader_uses_discard(shader), .alpha_to_coverage_via_mrtz = key->ps.part.epilog.alpha_to_coverage_via_mrtz, .dual_src_blend_swizzle = key->ps.part.epilog.dual_src_blend_swizzle, @@ -2561,7 +2561,7 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_ .lower_to_scalar = true, .lower_subgroup_masks = true, .lower_relative_shuffle = true, - .lower_rotate_to_shuffle = !sel->info.base.use_aco_amd, + .lower_rotate_to_shuffle = !nir->info.use_aco_amd, .lower_shuffle_to_32bit = true, .lower_vote_eq = true, .lower_vote_bool_eq = true, @@ -2569,8 +2569,8 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_ .lower_quad_broadcast_dynamic_to_const = sel->screen->info.gfx_level <= GFX7, .lower_shuffle_to_swizzle_amd = true, .lower_ballot_bit_count_to_mbcnt_amd = true, - .lower_inverse_ballot = !sel->info.base.use_aco_amd && LLVM_VERSION_MAJOR < 17, - .lower_boolean_reduce = sel->info.base.use_aco_amd, + .lower_inverse_ballot = !nir->info.use_aco_amd && LLVM_VERSION_MAJOR < 17, + .lower_boolean_reduce = nir->info.use_aco_amd, .lower_boolean_shuffle = true, }); @@ -2618,7 +2618,7 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_ /* LLVM keep non-uniform sampler as index, so can't do this in NIR. * Must be done after si_nir_lower_resource(). */ - if (sel->info.base.use_aco_amd && sel->info.has_shadow_comparison && + if (nir->info.use_aco_amd && sel->info.has_shadow_comparison && sel->screen->info.gfx_level >= GFX8 && sel->screen->info.gfx_level <= GFX9) { NIR_PASS(progress, nir, si_nir_clamp_shadow_comparison_value); } @@ -2642,7 +2642,7 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_ /* aco only accept scalar const, must be done after si_nir_late_opts() * which may generate vec const. */ - if (sel->info.base.use_aco_amd) + if (nir->info.use_aco_amd) NIR_PASS_V(nir, nir_lower_load_const_to_scalar); /* This helps LLVM form VMEM clauses and thus get more GPU cache hits. @@ -2739,7 +2739,7 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen, output_info); struct si_shader_args args; - si_init_shader_args(shader, &args); + si_init_shader_args(shader, &args, &gs_nir->info); NIR_PASS_V(nir, ac_nir_lower_intrinsics_to_args, sscreen->info.gfx_level, AC_HW_VERTEX_SHADER, &args.ac); NIR_PASS_V(nir, si_nir_lower_abi, shader, &args); @@ -2747,7 +2747,7 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen, si_nir_opts(gs_selector->screen, nir, false); /* aco only accept scalar const */ - if (gsinfo->base.use_aco_amd) + if (gs_nir->info.use_aco_amd) NIR_PASS_V(nir, nir_lower_load_const_to_scalar); if (si_can_dump_shader(sscreen, MESA_SHADER_GEOMETRY, SI_DUMP_NIR)) { @@ -2757,7 +2757,7 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen, bool ok = #if AMD_LLVM_AVAILABLE - !gs_selector->info.base.use_aco_amd ? si_llvm_compile_shader(sscreen, compiler, shader, + !gs_nir->info.use_aco_amd ? si_llvm_compile_shader(sscreen, compiler, shader, &args, debug, nir) : #endif si_aco_compile_shader(shader, &args, nir, debug); @@ -2994,7 +2994,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi /* Dump NIR before doing NIR->LLVM conversion in case the * conversion fails. */ - if (si_can_dump_shader(sscreen, sel->stage, SI_DUMP_NIR)) { + if (si_can_dump_shader(sscreen, nir->info.stage, SI_DUMP_NIR)) { nir_print_shader(nir, stderr); if (nir->xfb_info) @@ -3041,8 +3041,8 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi ret = #if AMD_LLVM_AVAILABLE - !sel->info.base.use_aco_amd ? si_llvm_compile_shader(sscreen, compiler, shader, &args, - debug, nir) : + !nir->info.use_aco_amd ? si_llvm_compile_shader(sscreen, compiler, shader, &args, + debug, nir) : #endif si_aco_compile_shader(shader, &args, nir, debug); @@ -3052,7 +3052,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi shader->config.float_mode = float_mode; /* The GS copy shader is compiled next. */ - if (sel->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) { + if (nir->info.stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) { shader->gs_copy_shader = si_nir_generate_gs_copy_shader(sscreen, compiler, shader, nir, debug, &legacy_gs_output_info.info); @@ -3064,13 +3064,13 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi } /* Compute vs_output_ps_input_cntl. */ - if ((sel->stage == MESA_SHADER_VERTEX || - sel->stage == MESA_SHADER_TESS_EVAL || - sel->stage == MESA_SHADER_GEOMETRY) && + if ((nir->info.stage == MESA_SHADER_VERTEX || + nir->info.stage == MESA_SHADER_TESS_EVAL || + nir->info.stage == MESA_SHADER_GEOMETRY) && !shader->key.ge.as_ls && !shader->key.ge.as_es) { uint8_t *vs_output_param_offset = shader->info.vs_output_param_offset; - if (sel->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) + if (nir->info.stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) vs_output_param_offset = shader->gs_copy_shader->info.vs_output_param_offset; /* We must use the original shader info before the removal of duplicated shader outputs. */ @@ -3102,7 +3102,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi } /* Validate SGPR and VGPR usage for compute to detect compiler bugs. */ - if (sel->stage == MESA_SHADER_COMPUTE) { + if (nir->info.stage == MESA_SHADER_COMPUTE) { unsigned max_vgprs = sscreen->info.num_physical_wave64_vgprs_per_simd * (shader->wave_size == 32 ? 2 : 1); unsigned max_sgprs = sscreen->info.num_physical_sgprs_per_simd; @@ -3133,7 +3133,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi /* Add/remove the scratch offset to/from input SGPRs. */ if (!sel->screen->info.has_scratch_base_registers && !si_is_merged_shader(shader)) { - if (sel->info.base.use_aco_amd) { + if (nir->info.use_aco_amd) { /* When aco scratch_offset arg is added explicitly at the beginning. * After compile if no scratch used, reduce the input sgpr count. */ @@ -3147,14 +3147,14 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi } /* Calculate the number of fragment input VGPRs. */ - if (sel->stage == MESA_SHADER_FRAGMENT) { + if (nir->info.stage == MESA_SHADER_FRAGMENT) { shader->info.num_input_vgprs = ac_get_fs_input_vgpr_cnt( &shader->config, &shader->info.num_fragcoord_components); } si_calculate_max_simd_waves(shader); - if (si_can_dump_shader(sscreen, sel->stage, SI_DUMP_STATS)) { + if (si_can_dump_shader(sscreen, nir->info.stage, SI_DUMP_STATS)) { struct util_debug_callback out_stderr = { .debug_message = debug_message_stderr, }; diff --git a/src/gallium/drivers/radeonsi/si_shader_aco.c b/src/gallium/drivers/radeonsi/si_shader_aco.c index b7f609f2a63..294b0f77af7 100644 --- a/src/gallium/drivers/radeonsi/si_shader_aco.c +++ b/src/gallium/drivers/radeonsi/si_shader_aco.c @@ -155,7 +155,7 @@ si_aco_compile_shader(struct si_shader *shader, const struct si_shader_selector *sel = shader->selector; struct aco_compiler_options options = {0}; - si_fill_aco_options(sel->screen, sel->stage, &options, debug); + si_fill_aco_options(sel->screen, nir->info.stage, &options, debug); struct aco_shader_info info = {0}; si_fill_aco_shader_info(shader, &info, args); @@ -169,8 +169,7 @@ si_aco_compile_shader(struct si_shader *shader, /* For merged shader stage. */ if (shader->is_monolithic && sel->screen->info.gfx_level >= GFX9 && - (sel->stage == MESA_SHADER_TESS_CTRL || sel->stage == MESA_SHADER_GEOMETRY)) { - + (nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_GEOMETRY)) { shaders[num_shaders++] = si_get_prev_stage_nir_shader(shader, &prev_shader, &prev_args, &free_nir); diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c index b92167b9d1c..18d443f1c9e 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_info.c @@ -548,7 +548,7 @@ void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir, /* Get options from shader profiles. */ for (unsigned i = 0; i < ARRAY_SIZE(si_shader_profiles); i++) { - if (_mesa_printed_blake3_equal(info->base.source_blake3, si_shader_profiles[i].blake3)) { + if (_mesa_printed_blake3_equal(nir->info.source_blake3, si_shader_profiles[i].blake3)) { info->options = si_shader_profiles[i].options; break; } @@ -681,7 +681,7 @@ void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir, BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) || BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION)); - info->uses_vmem_load_other |= info->base.fs.uses_fbfetch_output; + info->uses_vmem_load_other |= nir->info.fs.uses_fbfetch_output; /* Add both front and back color inputs. */ unsigned num_inputs_with_colors = info->num_inputs; @@ -710,7 +710,7 @@ void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir, if (nir->info.stage == MESA_SHADER_VERTEX) { info->num_vs_inputs = - nir->info.stage == MESA_SHADER_VERTEX && !info->base.vs.blit_sgprs_amd ? info->num_inputs : 0; + nir->info.stage == MESA_SHADER_VERTEX && !nir->info.vs.blit_sgprs_amd ? info->num_inputs : 0; unsigned num_vbos_in_sgprs = si_num_vbos_in_user_sgprs_inline(sscreen->info.gfx_level); info->num_vbos_in_user_sgprs = MIN2(info->num_vs_inputs, num_vbos_in_sgprs); } @@ -731,23 +731,23 @@ void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir, assert(((info->esgs_vertex_stride / 4) & C_028AAC_ITEMSIZE) == 0); } - info->tcs_inputs_via_temp = info->base.tess.tcs_same_invocation_inputs_read; - info->tcs_inputs_via_lds = info->base.tess.tcs_cross_invocation_inputs_read | - (info->base.tess.tcs_same_invocation_inputs_read & - info->base.inputs_read_indirectly); + info->tcs_inputs_via_temp = nir->info.tess.tcs_same_invocation_inputs_read; + info->tcs_inputs_via_lds = nir->info.tess.tcs_cross_invocation_inputs_read | + (nir->info.tess.tcs_same_invocation_inputs_read & + nir->info.inputs_read_indirectly); } if (nir->info.stage == MESA_SHADER_GEOMETRY) { info->gsvs_vertex_size = info->num_outputs * 16; - info->max_gsvs_emit_size = info->gsvs_vertex_size * info->base.gs.vertices_out; + info->max_gsvs_emit_size = info->gsvs_vertex_size * nir->info.gs.vertices_out; info->gs_input_verts_per_prim = - mesa_vertices_per_prim(info->base.gs.input_primitive); + mesa_vertices_per_prim(nir->info.gs.input_primitive); } info->clipdist_mask = info->writes_clipvertex ? SI_USER_CLIP_PLANE_MASK : - u_bit_consecutive(0, info->base.clip_distance_array_size); - info->culldist_mask = u_bit_consecutive(0, info->base.cull_distance_array_size) << - info->base.clip_distance_array_size; + u_bit_consecutive(0, nir->info.clip_distance_array_size); + info->culldist_mask = u_bit_consecutive(0, nir->info.cull_distance_array_size) << + nir->info.clip_distance_array_size; if (nir->info.stage == MESA_SHADER_FRAGMENT) { for (unsigned i = 0; i < info->num_inputs; i++) { diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index f9c0727df99..620953e817f 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -548,13 +548,13 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade const struct si_shader_info *info = &sel->info; ctx->shader = shader; - ctx->stage = shader->is_gs_copy_shader ? MESA_SHADER_VERTEX : sel->stage; + ctx->stage = shader->is_gs_copy_shader ? MESA_SHADER_VERTEX : nir->info.stage; - ctx->num_const_buffers = info->base.num_ubos; - ctx->num_shader_buffers = info->base.num_ssbos; + ctx->num_const_buffers = nir->info.num_ubos; + ctx->num_shader_buffers = nir->info.num_ssbos; - ctx->num_samplers = BITSET_LAST_BIT(info->base.textures_used); - ctx->num_images = info->base.num_images; + ctx->num_samplers = BITSET_LAST_BIT(nir->info.textures_used); + ctx->num_images = nir->info.num_images; ctx->abi.intrinsic_load = si_llvm_load_intrinsic; ctx->abi.load_sampler_desc = si_llvm_load_sampler_desc; @@ -710,7 +710,7 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade * the wave size is a multiple of the number of patch vertices. */ if (!shader->key.ge.opt.same_patch_vertices || - ctx->ac.wave_size % sel->info.base.tess.tcs_vertices_out != 0) + ctx->ac.wave_size % nir->info.tess.tcs_vertices_out != 0) ac_build_s_barrier(&ctx->ac, ctx->stage); } } else if (ctx->stage == MESA_SHADER_GEOMETRY) { @@ -810,11 +810,12 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * { struct si_shader_selector *sel = shader->selector; struct si_shader_context ctx; - enum ac_float_mode float_mode = nir->info.stage == MESA_SHADER_KERNEL ? AC_FLOAT_MODE_DEFAULT : AC_FLOAT_MODE_DEFAULT_OPENGL; + enum ac_float_mode float_mode = nir->info.stage == MESA_SHADER_KERNEL ? + AC_FLOAT_MODE_DEFAULT : AC_FLOAT_MODE_DEFAULT_OPENGL; bool exports_color_null = false; bool exports_mrtz = false; - if (sel->stage == MESA_SHADER_FRAGMENT) { + if (nir->info.stage == MESA_SHADER_FRAGMENT) { exports_color_null = sel->info.colors_written; exports_mrtz = shader->ps.writes_z || shader->ps.writes_stencil || shader->ps.writes_samplemask || @@ -834,7 +835,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * /* For merged shader stage. */ if (shader->is_monolithic && sscreen->info.gfx_level >= GFX9 && - (sel->stage == MESA_SHADER_TESS_CTRL || sel->stage == MESA_SHADER_GEOMETRY)) { + (nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_GEOMETRY)) { /* LS or ES shader. */ struct si_shader prev_shader = {}; @@ -853,7 +854,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * /* Reset the shader context. */ ctx.shader = shader; - ctx.stage = sel->stage; + ctx.stage = nir->info.stage; bool same_thread_count = shader->key.ge.opt.same_patch_vertices; si_build_wrapper_function(&ctx, parts, same_thread_count); @@ -866,7 +867,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * /* Compile to bytecode. */ if (!si_compile_llvm(sscreen, &shader->binary, &shader->config, compiler, &ctx.ac, debug, - sel->stage, si_get_shader_name(shader), + nir->info.stage, si_get_shader_name(shader), si_should_optimize_less(compiler, shader->selector))) { si_llvm_dispose(&ctx); fprintf(stderr, "LLVM failed to compile shader\n"); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 479539e5d56..6ca603378b1 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -3620,7 +3620,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx, switch (sel->stage) { case MESA_SHADER_GEOMETRY: /* Only possibilities: POINTS, LINE_STRIP, TRIANGLES */ - sel->rast_prim = (enum mesa_prim)sel->info.base.gs.output_primitive; + sel->rast_prim = (enum mesa_prim)sel->nir->info.gs.output_primitive; if (util_rast_prim_is_triangles(sel->rast_prim)) sel->rast_prim = MESA_PRIM_TRIANGLES; @@ -3631,17 +3631,17 @@ static void *si_create_shader_selector(struct pipe_context *ctx, */ sel->tess_turns_off_ngg = sscreen->info.gfx_level >= GFX10 && sscreen->info.gfx_level <= GFX10_3 && - (sel->info.base.gs.invocations * sel->info.base.gs.vertices_out > 256 || - sel->info.base.gs.invocations * sel->info.base.gs.vertices_out * + (sel->nir->info.gs.invocations * sel->nir->info.gs.vertices_out > 256 || + sel->nir->info.gs.invocations * sel->nir->info.gs.vertices_out * (sel->info.num_outputs * 4 + 1) > 6500 /* max dw per GS primitive */); break; case MESA_SHADER_VERTEX: case MESA_SHADER_TESS_EVAL: if (sel->stage == MESA_SHADER_TESS_EVAL) { - if (sel->info.base.tess.point_mode) + if (sel->nir->info.tess.point_mode) sel->rast_prim = MESA_PRIM_POINTS; - else if (sel->info.base.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) + else if (sel->nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) sel->rast_prim = MESA_PRIM_LINE_STRIP; else sel->rast_prim = MESA_PRIM_TRIANGLES; @@ -3657,13 +3657,13 @@ static void *si_create_shader_selector(struct pipe_context *ctx, sscreen->use_ngg_culling && sel->info.writes_position && !sel->info.writes_viewport_index && /* cull only against viewport 0 */ - !sel->info.base.writes_memory && + !sel->nir->info.writes_memory && /* NGG GS supports culling with streamout because it culls after streamout. */ (sel->stage == MESA_SHADER_GEOMETRY || !sel->info.enabled_streamout_buffer_mask) && (sel->stage != MESA_SHADER_GEOMETRY || sel->info.num_stream_output_components[0]) && (sel->stage != MESA_SHADER_VERTEX || - (!sel->info.base.vs.blit_sgprs_amd && - !sel->info.base.vs.window_space_position)); + (!sel->nir->info.vs.blit_sgprs_amd && + !sel->nir->info.vs.window_space_position)); sel->ngg_cull_vert_threshold = UINT_MAX; /* disabled (changed below) */