From 883b1ca364df9fe0027d4085393bf1baefa1f95e Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sun, 6 Jul 2025 23:12:58 +0200 Subject: [PATCH] aco: disable wqm for tex loads when not needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By only executing VMEM loads for lanes where the result is used, we can save bandwidth. The NIR pass only handles tex for now, but those are most common anyway. We can extend it handle image/ssbo/ubo/global loads in the future. Foz-DB GFX1201: Totals from 32633 (40.66% of 80251) affected shaders: Instrs: 22635910 -> 23193509 (+2.46%); split: -0.00%, +2.46% CodeSize: 122880044 -> 125093428 (+1.80%); split: -0.00%, +1.81% VGPRs: 1481868 -> 1481712 (-0.01%) SpillSGPRs: 3877 -> 4301 (+10.94%); split: -0.52%, +11.45% Latency: 171480552 -> 171685219 (+0.12%); split: -0.18%, +0.30% InvThroughput: 24364743 -> 24373441 (+0.04%); split: -0.08%, +0.12% VClause: 388318 -> 388557 (+0.06%); split: -0.06%, +0.13% SClause: 774781 -> 776492 (+0.22%); split: -0.29%, +0.51% Copies: 1416586 -> 1541199 (+8.80%); split: -0.16%, +8.96% Branches: 419591 -> 419673 (+0.02%); split: -0.02%, +0.04% PreSGPRs: 1330303 -> 1416540 (+6.48%) PreVGPRs: 964864 -> 964863 (-0.00%) VALU: 12919601 -> 12920254 (+0.01%); split: -0.01%, +0.01% SALU: 2685402 -> 3224147 (+20.06%); split: -0.00%, +20.07% Reviewed-by: Daniel Schürmann Part-of: --- .../instruction_selection/aco_isel_setup.cpp | 13 ++++++++----- .../instruction_selection/aco_select_nir.cpp | 7 ++++--- src/amd/compiler/tests/test_d3d11_derivs.cpp | 2 +- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/amd/compiler/instruction_selection/aco_isel_setup.cpp b/src/amd/compiler/instruction_selection/aco_isel_setup.cpp index 082845fec4a..9e168388f96 100644 --- a/src/amd/compiler/instruction_selection/aco_isel_setup.cpp +++ b/src/amd/compiler/instruction_selection/aco_isel_setup.cpp @@ -367,6 +367,12 @@ init_context(isel_context* ctx, nir_shader* shader) apply_nuw_to_offsets(ctx, impl); ac_nir_flag_smem_for_loads(shader, ctx->program->gfx_level, false, true); + if (shader->info.stage == MESA_SHADER_FRAGMENT) { + nir_opt_load_skip_helpers_options skip_helper_options = {}; + skip_helper_options.no_add_divergence = true; + nir_opt_load_skip_helpers(shader, &skip_helper_options); + } + /* sanitize control flow */ sanitize_cf_list(impl, &impl->body); nir_progress(true, impl, nir_metadata_none); @@ -621,11 +627,8 @@ init_context(isel_context* ctx, nir_shader* shader) } case nir_instr_type_tex: { nir_tex_instr* tex = nir_instr_as_tex(instr); - RegType type = tex->def.divergent ? RegType::vgpr : RegType::sgpr; - - if (tex->op == nir_texop_texture_samples) { - assert(!tex->def.divergent); - } + RegType type = + tex->def.divergent || tex->skip_helpers ? RegType::vgpr : RegType::sgpr; RegClass rc = get_reg_class(ctx, type, tex->def.num_components, tex->def.bit_size); regclasses[tex->def.index] = rc; diff --git a/src/amd/compiler/instruction_selection/aco_select_nir.cpp b/src/amd/compiler/instruction_selection/aco_select_nir.cpp index 3ce03241fc8..e375074cb37 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir.cpp @@ -83,6 +83,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) assert(instr->op != nir_texop_samples_identical); Builder bld(ctx->program, ctx->block); + bool disable_wqm = instr->skip_helpers; bool has_bias = false, has_lod = false, level_zero = false, has_compare = false, has_offset = false, has_ddx = false, has_ddy = false, has_derivs = false, has_sample_index = false, has_clamped_lod = false, has_wqm_coord = false; @@ -338,7 +339,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) Temp tg4_lod = bld.copy(bld.def(v1), Operand::zero()); Temp size = bld.tmp(v2); MIMG_instruction* tex = emit_mimg(bld, aco_opcode::image_get_resinfo, {size}, resource, - Operand(s4), std::vector{tg4_lod}, false); + Operand(s4), std::vector{tg4_lod}, disable_wqm); tex->dim = dim; tex->dmask = 0x3; tex->da = da; @@ -495,7 +496,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) : aco_opcode::image_load_mip; Operand vdata = instr->is_sparse ? emit_tfe_init(bld, tmp_dst) : Operand(v1); MIMG_instruction* tex = - emit_mimg(bld, op, {tmp_dst}, resource, Operand(s4), args, false, vdata); + emit_mimg(bld, op, {tmp_dst}, resource, Operand(s4), args, disable_wqm, vdata); if (instr->op == nir_texop_fragment_mask_fetch_amd) tex->dim = da ? ac_image_2darray : ac_image_2d; else @@ -675,7 +676,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) Operand vdata = instr->is_sparse ? emit_tfe_init(bld, tmp_dst) : Operand(v1); MIMG_instruction* tex = - emit_mimg(bld, opcode, {tmp_dst}, resource, Operand(sampler), args, false, vdata); + emit_mimg(bld, opcode, {tmp_dst}, resource, Operand(sampler), args, disable_wqm, vdata); tex->dim = dim; tex->dmask = dmask & 0xf; tex->da = da; diff --git a/src/amd/compiler/tests/test_d3d11_derivs.cpp b/src/amd/compiler/tests/test_d3d11_derivs.cpp index b1287ab7327..a05a3110ae7 100644 --- a/src/amd/compiler/tests/test_d3d11_derivs.cpp +++ b/src/amd/compiler/tests/test_d3d11_derivs.cpp @@ -117,7 +117,7 @@ BEGIN_TEST(d3d11_derivs.discard) /* The discard gets emitted as demote_if. */ //>> s2: %_:exec, s1: (kill)%_:scc = s_wqm_b64 %_ //! p_exit_early_if_not %_:exec - //>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (kill)%_, (kill)%_ 2d + //>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (kill)%_, (kill)%_, %_, (kill)%_ 2d disable_wqm pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR"); END_TEST