aco: disable wqm for tex loads when not needed

By only executing VMEM loads for lanes where the result is used, we can save
bandwidth.

The NIR pass only handles tex for now, but those are most common anyway.
We can extend it handle image/ssbo/ubo/global loads in the future.

Foz-DB GFX1201:
Totals from 32633 (40.66% of 80251) affected shaders:
Instrs: 22635910 -> 23193509 (+2.46%); split: -0.00%, +2.46%
CodeSize: 122880044 -> 125093428 (+1.80%); split: -0.00%, +1.81%
VGPRs: 1481868 -> 1481712 (-0.01%)
SpillSGPRs: 3877 -> 4301 (+10.94%); split: -0.52%, +11.45%
Latency: 171480552 -> 171685219 (+0.12%); split: -0.18%, +0.30%
InvThroughput: 24364743 -> 24373441 (+0.04%); split: -0.08%, +0.12%
VClause: 388318 -> 388557 (+0.06%); split: -0.06%, +0.13%
SClause: 774781 -> 776492 (+0.22%); split: -0.29%, +0.51%
Copies: 1416586 -> 1541199 (+8.80%); split: -0.16%, +8.96%
Branches: 419591 -> 419673 (+0.02%); split: -0.02%, +0.04%
PreSGPRs: 1330303 -> 1416540 (+6.48%)
PreVGPRs: 964864 -> 964863 (-0.00%)
VALU: 12919601 -> 12920254 (+0.01%); split: -0.01%, +0.01%
SALU: 2685402 -> 3224147 (+20.06%); split: -0.00%, +20.07%

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35970>
This commit is contained in:
Georg Lehmann 2025-07-06 23:12:58 +02:00 committed by Marge Bot
parent 7159fd21f8
commit 883b1ca364
3 changed files with 13 additions and 9 deletions

View file

@ -367,6 +367,12 @@ init_context(isel_context* ctx, nir_shader* shader)
apply_nuw_to_offsets(ctx, impl); apply_nuw_to_offsets(ctx, impl);
ac_nir_flag_smem_for_loads(shader, ctx->program->gfx_level, false, true); ac_nir_flag_smem_for_loads(shader, ctx->program->gfx_level, false, true);
if (shader->info.stage == MESA_SHADER_FRAGMENT) {
nir_opt_load_skip_helpers_options skip_helper_options = {};
skip_helper_options.no_add_divergence = true;
nir_opt_load_skip_helpers(shader, &skip_helper_options);
}
/* sanitize control flow */ /* sanitize control flow */
sanitize_cf_list(impl, &impl->body); sanitize_cf_list(impl, &impl->body);
nir_progress(true, impl, nir_metadata_none); nir_progress(true, impl, nir_metadata_none);
@ -621,11 +627,8 @@ init_context(isel_context* ctx, nir_shader* shader)
} }
case nir_instr_type_tex: { case nir_instr_type_tex: {
nir_tex_instr* tex = nir_instr_as_tex(instr); nir_tex_instr* tex = nir_instr_as_tex(instr);
RegType type = tex->def.divergent ? RegType::vgpr : RegType::sgpr; RegType type =
tex->def.divergent || tex->skip_helpers ? RegType::vgpr : RegType::sgpr;
if (tex->op == nir_texop_texture_samples) {
assert(!tex->def.divergent);
}
RegClass rc = get_reg_class(ctx, type, tex->def.num_components, tex->def.bit_size); RegClass rc = get_reg_class(ctx, type, tex->def.num_components, tex->def.bit_size);
regclasses[tex->def.index] = rc; regclasses[tex->def.index] = rc;

View file

@ -83,6 +83,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
assert(instr->op != nir_texop_samples_identical); assert(instr->op != nir_texop_samples_identical);
Builder bld(ctx->program, ctx->block); Builder bld(ctx->program, ctx->block);
bool disable_wqm = instr->skip_helpers;
bool has_bias = false, has_lod = false, level_zero = false, has_compare = false, bool has_bias = false, has_lod = false, level_zero = false, has_compare = false,
has_offset = false, has_ddx = false, has_ddy = false, has_derivs = false, has_offset = false, has_ddx = false, has_ddy = false, has_derivs = false,
has_sample_index = false, has_clamped_lod = false, has_wqm_coord = false; has_sample_index = false, has_clamped_lod = false, has_wqm_coord = false;
@ -338,7 +339,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
Temp tg4_lod = bld.copy(bld.def(v1), Operand::zero()); Temp tg4_lod = bld.copy(bld.def(v1), Operand::zero());
Temp size = bld.tmp(v2); Temp size = bld.tmp(v2);
MIMG_instruction* tex = emit_mimg(bld, aco_opcode::image_get_resinfo, {size}, resource, MIMG_instruction* tex = emit_mimg(bld, aco_opcode::image_get_resinfo, {size}, resource,
Operand(s4), std::vector<Temp>{tg4_lod}, false); Operand(s4), std::vector<Temp>{tg4_lod}, disable_wqm);
tex->dim = dim; tex->dim = dim;
tex->dmask = 0x3; tex->dmask = 0x3;
tex->da = da; tex->da = da;
@ -495,7 +496,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
: aco_opcode::image_load_mip; : aco_opcode::image_load_mip;
Operand vdata = instr->is_sparse ? emit_tfe_init(bld, tmp_dst) : Operand(v1); Operand vdata = instr->is_sparse ? emit_tfe_init(bld, tmp_dst) : Operand(v1);
MIMG_instruction* tex = MIMG_instruction* tex =
emit_mimg(bld, op, {tmp_dst}, resource, Operand(s4), args, false, vdata); emit_mimg(bld, op, {tmp_dst}, resource, Operand(s4), args, disable_wqm, vdata);
if (instr->op == nir_texop_fragment_mask_fetch_amd) if (instr->op == nir_texop_fragment_mask_fetch_amd)
tex->dim = da ? ac_image_2darray : ac_image_2d; tex->dim = da ? ac_image_2darray : ac_image_2d;
else else
@ -675,7 +676,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
Operand vdata = instr->is_sparse ? emit_tfe_init(bld, tmp_dst) : Operand(v1); Operand vdata = instr->is_sparse ? emit_tfe_init(bld, tmp_dst) : Operand(v1);
MIMG_instruction* tex = MIMG_instruction* tex =
emit_mimg(bld, opcode, {tmp_dst}, resource, Operand(sampler), args, false, vdata); emit_mimg(bld, opcode, {tmp_dst}, resource, Operand(sampler), args, disable_wqm, vdata);
tex->dim = dim; tex->dim = dim;
tex->dmask = dmask & 0xf; tex->dmask = dmask & 0xf;
tex->da = da; tex->da = da;

View file

@ -117,7 +117,7 @@ BEGIN_TEST(d3d11_derivs.discard)
/* The discard gets emitted as demote_if. */ /* The discard gets emitted as demote_if. */
//>> s2: %_:exec, s1: (kill)%_:scc = s_wqm_b64 %_ //>> s2: %_:exec, s1: (kill)%_:scc = s_wqm_b64 %_
//! p_exit_early_if_not %_:exec //! p_exit_early_if_not %_:exec
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (kill)%_, (kill)%_ 2d //>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (kill)%_, (kill)%_, %_, (kill)%_ 2d disable_wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR"); pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
END_TEST END_TEST