aco: use new disable_wqm for mimg

Foz-DB GFX1201:
Totals from 88 (0.11% of 80251) affected shaders:
Instrs: 81954 -> 82218 (+0.32%); split: -0.02%, +0.34%
CodeSize: 451824 -> 452880 (+0.23%); split: -0.02%, +0.25%
Latency: 308818 -> 308746 (-0.02%); split: -0.05%, +0.02%
VClause: 1324 -> 1318 (-0.45%)
Copies: 2795 -> 2784 (-0.39%)
PreSGPRs: 4029 -> 4035 (+0.15%)
SALU: 6563 -> 6809 (+3.75%); split: -0.15%, +3.90%

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35970>
This commit is contained in:
Georg Lehmann 2025-07-06 21:21:00 +02:00 committed by Marge Bot
parent 922f559c3c
commit 0e66f2b2cc
8 changed files with 56 additions and 31 deletions

View file

@ -62,11 +62,7 @@ struct exec_ctx {
bool
needs_exact(aco_ptr<Instruction>& instr)
{
if (instr->isMIMG()) {
return instr->mimg().disable_wqm;
} else {
return instr->isEXP() || instr->opcode == aco_opcode::p_dual_src_export_gfx11;
}
return instr->isEXP() || instr->opcode == aco_opcode::p_dual_src_export_gfx11;
}
WQMState
@ -420,6 +416,8 @@ remove_disable_wqm(Instruction* instr)
instr->mtbuf().disable_wqm = false;
} else if (instr->isFlatLike()) {
instr->flatlike().disable_wqm = false;
} else if (instr->isMIMG()) {
instr->mimg().disable_wqm = false;
}
/* Remove the two masks so that the assembler doesn't need to handle them. */
@ -843,6 +841,8 @@ instr_disables_wqm(Instruction* instr)
return instr->mtbuf().disable_wqm;
} else if (instr->isFlatLike()) {
return instr->flatlike().disable_wqm;
} else if (instr->isMIMG()) {
return instr->mimg().disable_wqm;
}
return false;

View file

@ -2157,9 +2157,18 @@ lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
{
Operand linear_vgpr = instr->operands[3];
bool disable_wqm = instr->mimg().disable_wqm;
Operand exact_mask;
Operand wqm_mask;
if (disable_wqm) {
exact_mask = instr_exact_mask(instr.get());
wqm_mask = instr_wqm_mask(instr.get());
}
unsigned nsa_size = ctx->program->dev.max_nsa_vgprs;
unsigned vaddr_size = linear_vgpr.size();
unsigned num_copied_vgprs = instr->operands.size() - 4;
unsigned non_mask_operands = instr->operands.size() - (2 * disable_wqm);
unsigned num_copied_vgprs = non_mask_operands - 4;
nsa_size = num_copied_vgprs > 0 && (ctx->program->gfx_level >= GFX11 || vaddr_size <= nsa_size)
? nsa_size
: 0;
@ -2180,7 +2189,7 @@ lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
} else {
PhysReg reg = linear_vgpr.physReg();
std::map<PhysReg, copy_operation> copy_operations;
for (unsigned i = 4; i < instr->operands.size(); i++) {
for (unsigned i = 4; i < non_mask_operands; i++) {
Operand arg = instr->operands[i];
Definition def(reg, RegClass::get(RegType::vgpr, arg.bytes()));
copy_operations[def.physReg()] = {arg, def, def.bytes()};
@ -2193,10 +2202,11 @@ lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
}
instr->mimg().strict_wqm = false;
unsigned new_op_count = 3 + num_vaddr + (2 * disable_wqm);
if ((3 + num_vaddr) > instr->operands.size()) {
if (new_op_count > instr->operands.size()) {
Instruction* new_instr =
create_instruction(instr->opcode, Format::MIMG, 3 + num_vaddr, instr->definitions.size());
create_instruction(instr->opcode, Format::MIMG, new_op_count, instr->definitions.size());
std::copy(instr->definitions.cbegin(), instr->definitions.cend(),
new_instr->definitions.begin());
new_instr->operands[0] = instr->operands[0];
@ -2206,10 +2216,15 @@ lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
sizeof(MIMG_instruction) - sizeof(Instruction));
instr.reset(new_instr);
} else {
while (instr->operands.size() > (3 + num_vaddr))
while (instr->operands.size() > new_op_count)
instr->operands.pop_back();
}
std::copy(vaddr, vaddr + num_vaddr, std::next(instr->operands.begin(), 3));
if (disable_wqm) {
instr_exact_mask(instr.get()) = exact_mask;
instr_wqm_mask(instr.get()) = wqm_mask;
}
}
} /* end namespace */

View file

@ -111,7 +111,7 @@ struct PhysRegIterator {
struct vector_info {
vector_info() : is_weak(false), num_parts(0), parts(NULL) {}
vector_info(Instruction* instr, unsigned start = 0, bool weak = false)
: is_weak(weak), num_parts(instr->operands.size() - start),
: is_weak(weak), num_parts(instr->operands.size() - start - (instr_disables_wqm(instr) * 2)),
parts(instr->operands.begin() + start)
{
if (parts[0].isVectorAligned()) {
@ -3065,7 +3065,8 @@ get_affinities(ra_ctx& ctx)
!instr->mimg().strict_wqm) {
bool is_vector = false;
for (unsigned i = 3, vector_begin = 3; i < instr->operands.size(); i++) {
unsigned op_count = instr->operands.size() - (instr->mimg().disable_wqm * 2);
for (unsigned i = 3, vector_begin = 3; i < op_count; i++) {
if (is_vector || instr->operands[i].isVectorAligned())
ctx.vectors[instr->operands[i].tempId()] = vector_info(instr.get(), vector_begin);
else if (ctx.program->gfx_level < GFX12 && !instr->operands[3].isVectorAligned())

View file

@ -859,13 +859,15 @@ validate_ir(Program* program)
instr.get());
}
unsigned non_mask_ops = instr->operands.size() - (instr->mimg().disable_wqm * 2);
if (instr->mimg().strict_wqm) {
check(instr->operands[3].hasRegClass() &&
instr->operands[3].regClass().is_linear_vgpr(),
"MIMG operands[3] must be temp linear VGPR.", instr.get());
unsigned total_size = 0;
for (unsigned i = 4; i < instr->operands.size(); i++) {
for (unsigned i = 4; i < non_mask_ops; i++) {
check(instr->operands[i].hasRegClass() && instr->operands[i].regClass() == v1,
"MIMG operands[4+] (VADDR) must be v1", instr.get());
total_size += instr->operands[i].bytes();
@ -873,19 +875,18 @@ validate_ir(Program* program)
check(total_size <= instr->operands[3].bytes(),
"MIMG operands[4+] must fit within operands[3].", instr.get());
} else {
check(instr->operands.size() == 4 || program->gfx_level >= GFX10,
check(non_mask_ops == 4 || program->gfx_level >= GFX10,
"NSA is only supported on GFX10+", instr.get());
for (unsigned i = 3; i < instr->operands.size(); i++) {
for (unsigned i = 3; i < non_mask_ops; i++) {
check(instr->operands[i].hasRegClass() &&
instr->operands[i].regClass().type() == RegType::vgpr,
"MIMG operands[3+] (VADDR) must be VGPR", instr.get());
if (instr->operands.size() > 4) {
if (non_mask_ops > 4) {
if (program->gfx_level < GFX11) {
check(instr->operands[i].regClass() == v1,
"GFX10 MIMG VADDR must be v1 if NSA is used", instr.get());
} else {
unsigned num_scalar =
program->gfx_level >= GFX12 ? (instr->operands.size() - 4) : 4;
unsigned num_scalar = program->gfx_level >= GFX12 ? (non_mask_ops - 4) : 4;
if (instr->opcode != aco_opcode::image_bvh_intersect_ray &&
instr->opcode != aco_opcode::image_bvh64_intersect_ray &&
instr->opcode != aco_opcode::image_bvh_dual_intersect_ray &&

View file

@ -228,7 +228,8 @@ void emit_interp_mov_instr(isel_context* ctx, unsigned idx, unsigned component,
Temp dst, Temp prim_mask, bool high_16bits);
std::vector<Temp> emit_pack_v1(isel_context* ctx, const std::vector<Temp>& unpacked);
MIMG_instruction* emit_mimg(Builder& bld, aco_opcode op, std::vector<Temp> dsts, Temp rsrc,
Operand samp, std::vector<Temp> coords, Operand vdata = Operand(v1));
Operand samp, std::vector<Temp> coords, bool disable_wqm,
Operand vdata = Operand(v1));
Operand emit_tfe_init(Builder& bld, Temp dst);
struct aco_export_mrt {
Operand out[4];

View file

@ -498,7 +498,7 @@ emit_pack_v1(isel_context* ctx, const std::vector<Temp>& unpacked)
MIMG_instruction*
emit_mimg(Builder& bld, aco_opcode op, std::vector<Temp> dsts, Temp rsrc, Operand samp,
std::vector<Temp> coords, Operand vdata)
std::vector<Temp> coords, bool disable_wqm, Operand vdata)
{
bool is_vsample = !samp.isUndefined() || op == aco_opcode::image_msaa_load;
@ -541,7 +541,8 @@ emit_mimg(Builder& bld, aco_opcode op, std::vector<Temp> dsts, Temp rsrc, Operan
coords.resize(nsa_size + 1);
}
aco_ptr<Instruction> mimg{create_instruction(op, Format::MIMG, 3 + coords.size(), dsts.size())};
aco_ptr<Instruction> mimg{
create_instruction(op, Format::MIMG, 3 + coords.size() + disable_wqm * 2, dsts.size())};
for (unsigned i = 0; i < dsts.size(); ++i)
mimg->definitions[i] = Definition(dsts[i]);
mimg->operands[0] = Operand(rsrc);
@ -549,6 +550,14 @@ emit_mimg(Builder& bld, aco_opcode op, std::vector<Temp> dsts, Temp rsrc, Operan
mimg->operands[2] = vdata;
for (unsigned i = 0; i < coords.size(); i++)
mimg->operands[3 + i] = Operand(coords[i]);
if (disable_wqm) {
instr_exact_mask(mimg.get()) = Operand();
instr_wqm_mask(mimg.get()) = Operand();
mimg->mimg().disable_wqm = true;
bld.program->needs_exact = true;
}
mimg->mimg().strict_wqm = strict_wqm;
return &bld.insert(std::move(mimg))->mimg();

View file

@ -338,7 +338,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
Temp tg4_lod = bld.copy(bld.def(v1), Operand::zero());
Temp size = bld.tmp(v2);
MIMG_instruction* tex = emit_mimg(bld, aco_opcode::image_get_resinfo, {size}, resource,
Operand(s4), std::vector<Temp>{tg4_lod});
Operand(s4), std::vector<Temp>{tg4_lod}, false);
tex->dim = dim;
tex->dmask = 0x3;
tex->da = da;
@ -494,7 +494,8 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
? aco_opcode::image_load
: aco_opcode::image_load_mip;
Operand vdata = instr->is_sparse ? emit_tfe_init(bld, tmp_dst) : Operand(v1);
MIMG_instruction* tex = emit_mimg(bld, op, {tmp_dst}, resource, Operand(s4), args, vdata);
MIMG_instruction* tex =
emit_mimg(bld, op, {tmp_dst}, resource, Operand(s4), args, false, vdata);
if (instr->op == nir_texop_fragment_mask_fetch_amd)
tex->dim = da ? ac_image_2darray : ac_image_2d;
else
@ -674,7 +675,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
Operand vdata = instr->is_sparse ? emit_tfe_init(bld, tmp_dst) : Operand(v1);
MIMG_instruction* tex =
emit_mimg(bld, opcode, {tmp_dst}, resource, Operand(sampler), args, vdata);
emit_mimg(bld, opcode, {tmp_dst}, resource, Operand(sampler), args, false, vdata);
tex->dim = dim;
tex->dmask = dmask & 0xf;
tex->da = da;

View file

@ -2087,7 +2087,8 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
}
Operand vdata = is_sparse ? emit_tfe_init(bld, tmp) : Operand(v1);
MIMG_instruction* load = emit_mimg(bld, opcode, {tmp}, resource, Operand(s4), coords, vdata);
MIMG_instruction* load =
emit_mimg(bld, opcode, {tmp}, resource, Operand(s4), coords, false, vdata);
load->cache = get_cache_flags(ctx, nir_intrinsic_access(instr) | ACCESS_TYPE_LOAD);
load->a16 = instr->src[1].ssa->bit_size == 16;
load->d16 = d16;
@ -2230,7 +2231,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
aco_opcode opcode = level_zero ? aco_opcode::image_store : aco_opcode::image_store_mip;
MIMG_instruction* store =
emit_mimg(bld, opcode, {}, resource, Operand(s4), coords, Operand(data));
emit_mimg(bld, opcode, {}, resource, Operand(s4), coords, true, Operand(data));
store->cache = cache;
store->a16 = instr->src[1].ssa->bit_size == 16;
store->d16 = d16;
@ -2239,9 +2240,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
ac_image_dim sdim = ac_get_image_dim(ctx->options->gfx_level, dim, is_array);
store->dim = sdim;
store->da = should_declare_array(sdim);
store->disable_wqm = true;
store->sync = sync;
ctx->program->needs_exact = true;
return;
}
@ -2389,7 +2388,7 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
if (return_previous)
tmps = {(cmpswap ? bld.tmp(data.regClass()) : dst)};
MIMG_instruction* mimg =
emit_mimg(bld, image_op, tmps, resource, Operand(s4), coords, Operand(data));
emit_mimg(bld, image_op, tmps, resource, Operand(s4), coords, true, Operand(data));
mimg->cache = get_atomic_cache_flags(ctx, return_previous);
mimg->dmask = (1 << data.size()) - 1;
mimg->a16 = instr->src[1].ssa->bit_size == 16;
@ -2397,9 +2396,7 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
ac_image_dim sdim = ac_get_image_dim(ctx->options->gfx_level, dim, is_array);
mimg->dim = sdim;
mimg->da = should_declare_array(sdim);
mimg->disable_wqm = true;
mimg->sync = sync;
ctx->program->needs_exact = true;
if (return_previous && cmpswap)
bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), tmps[0], Operand::zero());
return;