From 8ff44f17ef855d7e1fa9a08e09ef052ed1ccca08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 8 Oct 2025 19:15:50 +0200 Subject: [PATCH] amd/lower_mem_access_bit_sizes: also use SMEM for subdword loads We can simply extract from the loaded dwords as per nir_lower_mem_access_bit_sizes() lowering. Part-of: --- src/amd/common/nir/ac_nir.h | 2 +- src/amd/common/nir/ac_nir_lower_mem_access_bit_sizes.c | 10 +--------- .../compiler/instruction_selection/aco_isel_setup.cpp | 2 +- src/amd/vulkan/radv_pipeline.c | 2 +- src/gallium/drivers/radeonsi/si_shader.c | 2 +- 5 files changed, 5 insertions(+), 13 deletions(-) diff --git a/src/amd/common/nir/ac_nir.h b/src/amd/common/nir/ac_nir.h index f44bfd4c137..bf729d0cb7d 100644 --- a/src/amd/common/nir/ac_nir.h +++ b/src/amd/common/nir/ac_nir.h @@ -413,7 +413,7 @@ bool ac_nir_opt_shared_append(nir_shader *shader); bool -ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm, bool after_lowering); +ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm); bool ac_nir_lower_mem_access_bit_sizes(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm); diff --git a/src/amd/common/nir/ac_nir_lower_mem_access_bit_sizes.c b/src/amd/common/nir/ac_nir_lower_mem_access_bit_sizes.c index 868e16cfaf4..644b4509c1e 100644 --- a/src/amd/common/nir/ac_nir_lower_mem_access_bit_sizes.c +++ b/src/amd/common/nir/ac_nir_lower_mem_access_bit_sizes.c @@ -13,7 +13,6 @@ typedef struct { enum amd_gfx_level gfx_level; bool use_llvm; - bool after_lowering; } mem_access_cb_data; static bool @@ -39,12 +38,6 @@ use_smem_for_load(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data_) if (intrin->def.divergent) return false; - /* ACO doesn't support instruction selection for multi-component 8/16-bit SMEM loads. */ - const bool supports_scalar_subdword = cb_data->gfx_level >= GFX12 && !cb_data->use_llvm; - if (cb_data->after_lowering && intrin->def.bit_size < 32 && - (intrin->def.num_components > 1 || !supports_scalar_subdword)) - return false; - enum gl_access_qualifier access = nir_intrinsic_access(intrin); bool glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT); bool reorder = nir_intrinsic_can_reorder(intrin) || ((access & ACCESS_NON_WRITEABLE) && !(access & ACCESS_VOLATILE)); @@ -169,12 +162,11 @@ lower_mem_access_cb(nir_intrinsic_op intrin, uint8_t bytes, uint8_t bit_size, ui } bool -ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm, bool after_lowering) +ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm) { mem_access_cb_data cb_data = { .gfx_level = gfx_level, .use_llvm = use_llvm, - .after_lowering = after_lowering, }; return nir_shader_intrinsics_pass(shader, &use_smem_for_load, nir_metadata_all, &cb_data); } diff --git a/src/amd/compiler/instruction_selection/aco_isel_setup.cpp b/src/amd/compiler/instruction_selection/aco_isel_setup.cpp index 3909d1a769f..0d090a02d2f 100644 --- a/src/amd/compiler/instruction_selection/aco_isel_setup.cpp +++ b/src/amd/compiler/instruction_selection/aco_isel_setup.cpp @@ -381,7 +381,7 @@ init_context(isel_context* ctx, nir_shader* shader) nir_divergence_analysis_impl(impl, (nir_divergence_options)options); apply_nuw_to_offsets(ctx, impl); - ac_nir_flag_smem_for_loads(shader, ctx->program->gfx_level, false, true); + ac_nir_flag_smem_for_loads(shader, ctx->program->gfx_level, false); if (shader->info.stage == MESA_SHADER_FRAGMENT) { nir_opt_load_skip_helpers_options skip_helper_options = {}; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 204848f6e1d..8edb7a0fab1 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -354,7 +354,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat NIR_PASS(_, stage->nir, radv_nir_opt_tid_function, &tid_options); nir_divergence_analysis(stage->nir); - NIR_PASS(_, stage->nir, ac_nir_flag_smem_for_loads, gfx_level, use_llvm, false); + NIR_PASS(_, stage->nir, ac_nir_flag_smem_for_loads, gfx_level, use_llvm); NIR_PASS(_, stage->nir, radv_nir_opt_access_can_speculate); NIR_PASS(_, stage->nir, nir_lower_memory_model); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 38984cbff16..d3a23d95d66 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1627,7 +1627,7 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx * nir_divergence_analysis(nir); /* required by ac_nir_flag_smem_for_loads */ /* This is required by ac_nir_scalarize_overfetching_loads_callback. */ NIR_PASS(progress, nir, ac_nir_flag_smem_for_loads, sel->screen->info.gfx_level, - !sel->info.base.use_aco_amd, false); + !sel->info.base.use_aco_amd); /* Scalarize overfetching loads, so that we don't load more components than necessary. * Adjacent loads will be re-vectorized with a conservative overfetching limit. */