mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-27 05:30:24 +01:00
aco: use new disable_wqm for mimg
Foz-DB GFX1201: Totals from 88 (0.11% of 80251) affected shaders: Instrs: 81954 -> 82218 (+0.32%); split: -0.02%, +0.34% CodeSize: 451824 -> 452880 (+0.23%); split: -0.02%, +0.25% Latency: 308818 -> 308746 (-0.02%); split: -0.05%, +0.02% VClause: 1324 -> 1318 (-0.45%) Copies: 2795 -> 2784 (-0.39%) PreSGPRs: 4029 -> 4035 (+0.15%) SALU: 6563 -> 6809 (+3.75%); split: -0.15%, +3.90% Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35970>
This commit is contained in:
parent
922f559c3c
commit
0e66f2b2cc
8 changed files with 56 additions and 31 deletions
|
|
@ -62,11 +62,7 @@ struct exec_ctx {
|
|||
bool
|
||||
needs_exact(aco_ptr<Instruction>& instr)
|
||||
{
|
||||
if (instr->isMIMG()) {
|
||||
return instr->mimg().disable_wqm;
|
||||
} else {
|
||||
return instr->isEXP() || instr->opcode == aco_opcode::p_dual_src_export_gfx11;
|
||||
}
|
||||
return instr->isEXP() || instr->opcode == aco_opcode::p_dual_src_export_gfx11;
|
||||
}
|
||||
|
||||
WQMState
|
||||
|
|
@ -420,6 +416,8 @@ remove_disable_wqm(Instruction* instr)
|
|||
instr->mtbuf().disable_wqm = false;
|
||||
} else if (instr->isFlatLike()) {
|
||||
instr->flatlike().disable_wqm = false;
|
||||
} else if (instr->isMIMG()) {
|
||||
instr->mimg().disable_wqm = false;
|
||||
}
|
||||
|
||||
/* Remove the two masks so that the assembler doesn't need to handle them. */
|
||||
|
|
@ -843,6 +841,8 @@ instr_disables_wqm(Instruction* instr)
|
|||
return instr->mtbuf().disable_wqm;
|
||||
} else if (instr->isFlatLike()) {
|
||||
return instr->flatlike().disable_wqm;
|
||||
} else if (instr->isMIMG()) {
|
||||
return instr->mimg().disable_wqm;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -2157,9 +2157,18 @@ lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
|
|||
{
|
||||
Operand linear_vgpr = instr->operands[3];
|
||||
|
||||
bool disable_wqm = instr->mimg().disable_wqm;
|
||||
Operand exact_mask;
|
||||
Operand wqm_mask;
|
||||
if (disable_wqm) {
|
||||
exact_mask = instr_exact_mask(instr.get());
|
||||
wqm_mask = instr_wqm_mask(instr.get());
|
||||
}
|
||||
|
||||
unsigned nsa_size = ctx->program->dev.max_nsa_vgprs;
|
||||
unsigned vaddr_size = linear_vgpr.size();
|
||||
unsigned num_copied_vgprs = instr->operands.size() - 4;
|
||||
unsigned non_mask_operands = instr->operands.size() - (2 * disable_wqm);
|
||||
unsigned num_copied_vgprs = non_mask_operands - 4;
|
||||
nsa_size = num_copied_vgprs > 0 && (ctx->program->gfx_level >= GFX11 || vaddr_size <= nsa_size)
|
||||
? nsa_size
|
||||
: 0;
|
||||
|
|
@ -2180,7 +2189,7 @@ lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
|
|||
} else {
|
||||
PhysReg reg = linear_vgpr.physReg();
|
||||
std::map<PhysReg, copy_operation> copy_operations;
|
||||
for (unsigned i = 4; i < instr->operands.size(); i++) {
|
||||
for (unsigned i = 4; i < non_mask_operands; i++) {
|
||||
Operand arg = instr->operands[i];
|
||||
Definition def(reg, RegClass::get(RegType::vgpr, arg.bytes()));
|
||||
copy_operations[def.physReg()] = {arg, def, def.bytes()};
|
||||
|
|
@ -2193,10 +2202,11 @@ lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
|
|||
}
|
||||
|
||||
instr->mimg().strict_wqm = false;
|
||||
unsigned new_op_count = 3 + num_vaddr + (2 * disable_wqm);
|
||||
|
||||
if ((3 + num_vaddr) > instr->operands.size()) {
|
||||
if (new_op_count > instr->operands.size()) {
|
||||
Instruction* new_instr =
|
||||
create_instruction(instr->opcode, Format::MIMG, 3 + num_vaddr, instr->definitions.size());
|
||||
create_instruction(instr->opcode, Format::MIMG, new_op_count, instr->definitions.size());
|
||||
std::copy(instr->definitions.cbegin(), instr->definitions.cend(),
|
||||
new_instr->definitions.begin());
|
||||
new_instr->operands[0] = instr->operands[0];
|
||||
|
|
@ -2206,10 +2216,15 @@ lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
|
|||
sizeof(MIMG_instruction) - sizeof(Instruction));
|
||||
instr.reset(new_instr);
|
||||
} else {
|
||||
while (instr->operands.size() > (3 + num_vaddr))
|
||||
while (instr->operands.size() > new_op_count)
|
||||
instr->operands.pop_back();
|
||||
}
|
||||
std::copy(vaddr, vaddr + num_vaddr, std::next(instr->operands.begin(), 3));
|
||||
|
||||
if (disable_wqm) {
|
||||
instr_exact_mask(instr.get()) = exact_mask;
|
||||
instr_wqm_mask(instr.get()) = wqm_mask;
|
||||
}
|
||||
}
|
||||
|
||||
} /* end namespace */
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ struct PhysRegIterator {
|
|||
struct vector_info {
|
||||
vector_info() : is_weak(false), num_parts(0), parts(NULL) {}
|
||||
vector_info(Instruction* instr, unsigned start = 0, bool weak = false)
|
||||
: is_weak(weak), num_parts(instr->operands.size() - start),
|
||||
: is_weak(weak), num_parts(instr->operands.size() - start - (instr_disables_wqm(instr) * 2)),
|
||||
parts(instr->operands.begin() + start)
|
||||
{
|
||||
if (parts[0].isVectorAligned()) {
|
||||
|
|
@ -3065,7 +3065,8 @@ get_affinities(ra_ctx& ctx)
|
|||
!instr->mimg().strict_wqm) {
|
||||
|
||||
bool is_vector = false;
|
||||
for (unsigned i = 3, vector_begin = 3; i < instr->operands.size(); i++) {
|
||||
unsigned op_count = instr->operands.size() - (instr->mimg().disable_wqm * 2);
|
||||
for (unsigned i = 3, vector_begin = 3; i < op_count; i++) {
|
||||
if (is_vector || instr->operands[i].isVectorAligned())
|
||||
ctx.vectors[instr->operands[i].tempId()] = vector_info(instr.get(), vector_begin);
|
||||
else if (ctx.program->gfx_level < GFX12 && !instr->operands[3].isVectorAligned())
|
||||
|
|
|
|||
|
|
@ -859,13 +859,15 @@ validate_ir(Program* program)
|
|||
instr.get());
|
||||
}
|
||||
|
||||
unsigned non_mask_ops = instr->operands.size() - (instr->mimg().disable_wqm * 2);
|
||||
|
||||
if (instr->mimg().strict_wqm) {
|
||||
check(instr->operands[3].hasRegClass() &&
|
||||
instr->operands[3].regClass().is_linear_vgpr(),
|
||||
"MIMG operands[3] must be temp linear VGPR.", instr.get());
|
||||
|
||||
unsigned total_size = 0;
|
||||
for (unsigned i = 4; i < instr->operands.size(); i++) {
|
||||
for (unsigned i = 4; i < non_mask_ops; i++) {
|
||||
check(instr->operands[i].hasRegClass() && instr->operands[i].regClass() == v1,
|
||||
"MIMG operands[4+] (VADDR) must be v1", instr.get());
|
||||
total_size += instr->operands[i].bytes();
|
||||
|
|
@ -873,19 +875,18 @@ validate_ir(Program* program)
|
|||
check(total_size <= instr->operands[3].bytes(),
|
||||
"MIMG operands[4+] must fit within operands[3].", instr.get());
|
||||
} else {
|
||||
check(instr->operands.size() == 4 || program->gfx_level >= GFX10,
|
||||
check(non_mask_ops == 4 || program->gfx_level >= GFX10,
|
||||
"NSA is only supported on GFX10+", instr.get());
|
||||
for (unsigned i = 3; i < instr->operands.size(); i++) {
|
||||
for (unsigned i = 3; i < non_mask_ops; i++) {
|
||||
check(instr->operands[i].hasRegClass() &&
|
||||
instr->operands[i].regClass().type() == RegType::vgpr,
|
||||
"MIMG operands[3+] (VADDR) must be VGPR", instr.get());
|
||||
if (instr->operands.size() > 4) {
|
||||
if (non_mask_ops > 4) {
|
||||
if (program->gfx_level < GFX11) {
|
||||
check(instr->operands[i].regClass() == v1,
|
||||
"GFX10 MIMG VADDR must be v1 if NSA is used", instr.get());
|
||||
} else {
|
||||
unsigned num_scalar =
|
||||
program->gfx_level >= GFX12 ? (instr->operands.size() - 4) : 4;
|
||||
unsigned num_scalar = program->gfx_level >= GFX12 ? (non_mask_ops - 4) : 4;
|
||||
if (instr->opcode != aco_opcode::image_bvh_intersect_ray &&
|
||||
instr->opcode != aco_opcode::image_bvh64_intersect_ray &&
|
||||
instr->opcode != aco_opcode::image_bvh_dual_intersect_ray &&
|
||||
|
|
|
|||
|
|
@ -228,7 +228,8 @@ void emit_interp_mov_instr(isel_context* ctx, unsigned idx, unsigned component,
|
|||
Temp dst, Temp prim_mask, bool high_16bits);
|
||||
std::vector<Temp> emit_pack_v1(isel_context* ctx, const std::vector<Temp>& unpacked);
|
||||
MIMG_instruction* emit_mimg(Builder& bld, aco_opcode op, std::vector<Temp> dsts, Temp rsrc,
|
||||
Operand samp, std::vector<Temp> coords, Operand vdata = Operand(v1));
|
||||
Operand samp, std::vector<Temp> coords, bool disable_wqm,
|
||||
Operand vdata = Operand(v1));
|
||||
Operand emit_tfe_init(Builder& bld, Temp dst);
|
||||
struct aco_export_mrt {
|
||||
Operand out[4];
|
||||
|
|
|
|||
|
|
@ -498,7 +498,7 @@ emit_pack_v1(isel_context* ctx, const std::vector<Temp>& unpacked)
|
|||
|
||||
MIMG_instruction*
|
||||
emit_mimg(Builder& bld, aco_opcode op, std::vector<Temp> dsts, Temp rsrc, Operand samp,
|
||||
std::vector<Temp> coords, Operand vdata)
|
||||
std::vector<Temp> coords, bool disable_wqm, Operand vdata)
|
||||
{
|
||||
bool is_vsample = !samp.isUndefined() || op == aco_opcode::image_msaa_load;
|
||||
|
||||
|
|
@ -541,7 +541,8 @@ emit_mimg(Builder& bld, aco_opcode op, std::vector<Temp> dsts, Temp rsrc, Operan
|
|||
coords.resize(nsa_size + 1);
|
||||
}
|
||||
|
||||
aco_ptr<Instruction> mimg{create_instruction(op, Format::MIMG, 3 + coords.size(), dsts.size())};
|
||||
aco_ptr<Instruction> mimg{
|
||||
create_instruction(op, Format::MIMG, 3 + coords.size() + disable_wqm * 2, dsts.size())};
|
||||
for (unsigned i = 0; i < dsts.size(); ++i)
|
||||
mimg->definitions[i] = Definition(dsts[i]);
|
||||
mimg->operands[0] = Operand(rsrc);
|
||||
|
|
@ -549,6 +550,14 @@ emit_mimg(Builder& bld, aco_opcode op, std::vector<Temp> dsts, Temp rsrc, Operan
|
|||
mimg->operands[2] = vdata;
|
||||
for (unsigned i = 0; i < coords.size(); i++)
|
||||
mimg->operands[3 + i] = Operand(coords[i]);
|
||||
|
||||
if (disable_wqm) {
|
||||
instr_exact_mask(mimg.get()) = Operand();
|
||||
instr_wqm_mask(mimg.get()) = Operand();
|
||||
mimg->mimg().disable_wqm = true;
|
||||
bld.program->needs_exact = true;
|
||||
}
|
||||
|
||||
mimg->mimg().strict_wqm = strict_wqm;
|
||||
|
||||
return &bld.insert(std::move(mimg))->mimg();
|
||||
|
|
|
|||
|
|
@ -338,7 +338,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
|||
Temp tg4_lod = bld.copy(bld.def(v1), Operand::zero());
|
||||
Temp size = bld.tmp(v2);
|
||||
MIMG_instruction* tex = emit_mimg(bld, aco_opcode::image_get_resinfo, {size}, resource,
|
||||
Operand(s4), std::vector<Temp>{tg4_lod});
|
||||
Operand(s4), std::vector<Temp>{tg4_lod}, false);
|
||||
tex->dim = dim;
|
||||
tex->dmask = 0x3;
|
||||
tex->da = da;
|
||||
|
|
@ -494,7 +494,8 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
|||
? aco_opcode::image_load
|
||||
: aco_opcode::image_load_mip;
|
||||
Operand vdata = instr->is_sparse ? emit_tfe_init(bld, tmp_dst) : Operand(v1);
|
||||
MIMG_instruction* tex = emit_mimg(bld, op, {tmp_dst}, resource, Operand(s4), args, vdata);
|
||||
MIMG_instruction* tex =
|
||||
emit_mimg(bld, op, {tmp_dst}, resource, Operand(s4), args, false, vdata);
|
||||
if (instr->op == nir_texop_fragment_mask_fetch_amd)
|
||||
tex->dim = da ? ac_image_2darray : ac_image_2d;
|
||||
else
|
||||
|
|
@ -674,7 +675,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
|||
|
||||
Operand vdata = instr->is_sparse ? emit_tfe_init(bld, tmp_dst) : Operand(v1);
|
||||
MIMG_instruction* tex =
|
||||
emit_mimg(bld, opcode, {tmp_dst}, resource, Operand(sampler), args, vdata);
|
||||
emit_mimg(bld, opcode, {tmp_dst}, resource, Operand(sampler), args, false, vdata);
|
||||
tex->dim = dim;
|
||||
tex->dmask = dmask & 0xf;
|
||||
tex->da = da;
|
||||
|
|
|
|||
|
|
@ -2087,7 +2087,8 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
}
|
||||
|
||||
Operand vdata = is_sparse ? emit_tfe_init(bld, tmp) : Operand(v1);
|
||||
MIMG_instruction* load = emit_mimg(bld, opcode, {tmp}, resource, Operand(s4), coords, vdata);
|
||||
MIMG_instruction* load =
|
||||
emit_mimg(bld, opcode, {tmp}, resource, Operand(s4), coords, false, vdata);
|
||||
load->cache = get_cache_flags(ctx, nir_intrinsic_access(instr) | ACCESS_TYPE_LOAD);
|
||||
load->a16 = instr->src[1].ssa->bit_size == 16;
|
||||
load->d16 = d16;
|
||||
|
|
@ -2230,7 +2231,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
aco_opcode opcode = level_zero ? aco_opcode::image_store : aco_opcode::image_store_mip;
|
||||
|
||||
MIMG_instruction* store =
|
||||
emit_mimg(bld, opcode, {}, resource, Operand(s4), coords, Operand(data));
|
||||
emit_mimg(bld, opcode, {}, resource, Operand(s4), coords, true, Operand(data));
|
||||
store->cache = cache;
|
||||
store->a16 = instr->src[1].ssa->bit_size == 16;
|
||||
store->d16 = d16;
|
||||
|
|
@ -2239,9 +2240,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
ac_image_dim sdim = ac_get_image_dim(ctx->options->gfx_level, dim, is_array);
|
||||
store->dim = sdim;
|
||||
store->da = should_declare_array(sdim);
|
||||
store->disable_wqm = true;
|
||||
store->sync = sync;
|
||||
ctx->program->needs_exact = true;
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -2389,7 +2388,7 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
if (return_previous)
|
||||
tmps = {(cmpswap ? bld.tmp(data.regClass()) : dst)};
|
||||
MIMG_instruction* mimg =
|
||||
emit_mimg(bld, image_op, tmps, resource, Operand(s4), coords, Operand(data));
|
||||
emit_mimg(bld, image_op, tmps, resource, Operand(s4), coords, true, Operand(data));
|
||||
mimg->cache = get_atomic_cache_flags(ctx, return_previous);
|
||||
mimg->dmask = (1 << data.size()) - 1;
|
||||
mimg->a16 = instr->src[1].ssa->bit_size == 16;
|
||||
|
|
@ -2397,9 +2396,7 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
ac_image_dim sdim = ac_get_image_dim(ctx->options->gfx_level, dim, is_array);
|
||||
mimg->dim = sdim;
|
||||
mimg->da = should_declare_array(sdim);
|
||||
mimg->disable_wqm = true;
|
||||
mimg->sync = sync;
|
||||
ctx->program->needs_exact = true;
|
||||
if (return_previous && cmpswap)
|
||||
bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), tmps[0], Operand::zero());
|
||||
return;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue