aco: implement skip_helpers for image loads

Foz-DB GFX1201:
Totals from 5 (0.01% of 80287) affected shaders:
Instrs: 1406 -> 1417 (+0.78%)
CodeSize: 8012 -> 8056 (+0.55%)
Latency: 7279 -> 7282 (+0.04%)
Copies: 84 -> 85 (+1.19%)
SALU: 170 -> 180 (+5.88%)

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36785>
This commit is contained in:
Georg Lehmann 2025-08-06 14:06:10 +02:00 committed by Marge Bot
parent bf453a7c6a
commit bdae511b18
2 changed files with 22 additions and 4 deletions

View file

@ -340,6 +340,18 @@ skip_uniformize_merge_phi(nir_def* ssa, unsigned depth)
return true;
}
bool
intrinsic_try_skip_helpers(nir_intrinsic_instr* intr, UNUSED void* data)
{
switch (intr->intrinsic) {
case nir_intrinsic_bindless_image_load:
case nir_intrinsic_bindless_image_fragment_mask_load_amd:
case nir_intrinsic_bindless_image_sparse_load:
return !(nir_intrinsic_access(intr) & ACCESS_SMEM_AMD);
default: return false;
}
}
} /* end namespace */
void
@ -370,6 +382,7 @@ init_context(isel_context* ctx, nir_shader* shader)
if (shader->info.stage == MESA_SHADER_FRAGMENT) {
nir_opt_load_skip_helpers_options skip_helper_options = {};
skip_helper_options.no_add_divergence = true;
skip_helper_options.intrinsic_cb = intrinsic_try_skip_helpers;
nir_opt_load_skip_helpers(shader, &skip_helper_options);
}

View file

@ -1821,6 +1821,9 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
enum gl_access_qualifier access = nir_intrinsic_access(instr);
bool disable_wqm = access & ACCESS_SKIP_HELPERS;
if (dim == GLSL_SAMPLER_DIM_BUF) {
Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1);
@ -1842,17 +1845,19 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
default: UNREACHABLE(">4 channel buffer image load");
}
}
aco_ptr<Instruction> load{create_instruction(opcode, Format::MUBUF, 3 + is_sparse, 1)};
aco_ptr<Instruction> load{
create_instruction(opcode, Format::MUBUF, 3 + is_sparse + 2 * disable_wqm, 1)};
load->operands[0] = Operand(resource);
load->operands[1] = Operand(vindex);
load->operands[2] = Operand::c32(0);
load->definitions[0] = Definition(tmp);
load->mubuf().idxen = true;
load->mubuf().cache = get_cache_flags(ctx, nir_intrinsic_access(instr), ac_access_type_load);
load->mubuf().cache = get_cache_flags(ctx, access, ac_access_type_load);
load->mubuf().sync = sync;
load->mubuf().tfe = is_sparse;
if (load->mubuf().tfe)
load->operands[3] = emit_tfe_init(bld, tmp);
init_disable_wqm(bld, load->mubuf(), disable_wqm);
ctx->block->instructions.emplace_back(std::move(load));
} else {
std::vector<Temp> coords = get_image_coords(ctx, instr);
@ -1867,8 +1872,8 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
Operand vdata = is_sparse ? emit_tfe_init(bld, tmp) : Operand(v1);
MIMG_instruction* load =
emit_mimg(bld, opcode, {tmp}, resource, Operand(s4), coords, false, vdata);
load->cache = get_cache_flags(ctx, nir_intrinsic_access(instr), ac_access_type_load);
emit_mimg(bld, opcode, {tmp}, resource, Operand(s4), coords, disable_wqm, vdata);
load->cache = get_cache_flags(ctx, access, ac_access_type_load);
load->a16 = instr->src[1].ssa->bit_size == 16;
load->d16 = d16;
load->dmask = dmask;