aco: use new disable_wqm for p_dual_src_export_gfx11

No Foz-DB changes on GFX1201.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35970>
This commit is contained in:
Georg Lehmann 2025-07-06 22:07:26 +02:00 committed by Marge Bot
parent 8e53ba9a0a
commit 11cee3d634
4 changed files with 54 additions and 17 deletions

View file

@ -62,7 +62,7 @@ struct exec_ctx {
bool
needs_exact(aco_ptr<Instruction>& instr)
{
return instr->opcode == aco_opcode::p_dual_src_export_gfx11;
return false;
}
WQMState
@ -425,6 +425,8 @@ remove_disable_wqm(Instruction* instr)
/* Remove the two masks so that the assembler doesn't need to handle them. */
instr->operands.pop_back();
instr->operands.pop_back();
assert(!instr_disables_wqm(instr));
}
void
@ -847,6 +849,8 @@ instr_disables_wqm(Instruction* instr)
return instr->mimg().disable_wqm;
} else if (instr->isEXP()) {
return instr->exp().disable_wqm;
} else if (instr->opcode == aco_opcode::p_dual_src_export_gfx11) {
return instr->operands.size() > 8;
}
return false;

View file

@ -2820,10 +2820,16 @@ lower_to_hw_instr(Program* program)
assert(clobber_vcc.regClass() == bld.lm && clobber_vcc.physReg() == vcc);
assert(clobber_scc.isFixed() && clobber_scc.physReg() == scc);
bld.sop1(Builder::s_mov, Definition(exec_tmp.physReg(), bld.lm),
Operand(exec, bld.lm));
bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), clobber_scc,
Operand(exec, bld.lm));
bool disable_wqm = instr_disables_wqm(instr.get());
assert(instr->operands.size() == (disable_wqm ? 10 : 8));
/* If WQM was already ended, manually re-enable it. */
if (!disable_wqm) {
bld.sop1(Builder::s_mov, Definition(exec_tmp.physReg(), bld.lm),
Operand(exec, bld.lm));
bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), clobber_scc,
Operand(exec, bld.lm));
}
uint8_t enabled_channels = 0;
Operand mrt0[4], mrt1[4];
@ -2864,17 +2870,31 @@ lower_to_hw_instr(Program* program)
dst1 = dst1.advance(4);
}
bld.sop1(Builder::s_mov, Definition(exec, bld.lm),
Operand(exec_tmp.physReg(), bld.lm));
if (!disable_wqm) {
bld.sop1(Builder::s_mov, Definition(exec, bld.lm),
Operand(exec_tmp.physReg(), bld.lm));
}
/* Force export all channels when everything is undefined. */
if (!enabled_channels)
enabled_channels = 0xf;
bld.exp(aco_opcode::exp, mrt0[0], mrt0[1], mrt0[2], mrt0[3], enabled_channels,
V_008DFC_SQ_EXP_MRT + 21, false);
bld.exp(aco_opcode::exp, mrt1[0], mrt1[1], mrt1[2], mrt1[3], enabled_channels,
V_008DFC_SQ_EXP_MRT + 22, false);
Instruction* exp[2];
exp[0] =
bld.exp(aco_opcode::exp, mrt0[0], mrt0[1], mrt0[2], mrt0[3], enabled_channels,
V_008DFC_SQ_EXP_MRT + 21, false, false, false, disable_wqm);
exp[1] =
bld.exp(aco_opcode::exp, mrt1[0], mrt1[1], mrt1[2], mrt1[3], enabled_channels,
V_008DFC_SQ_EXP_MRT + 22, false, false, false, disable_wqm);
if (disable_wqm) {
for (unsigned i = 0; i < 2; i++) {
instr_exact_mask(exp[i]) = instr_exact_mask(instr.get());
instr_wqm_mask(exp[i]) = instr_wqm_mask(instr.get());
}
}
break;
}
case aco_opcode::p_end_with_regs: {

View file

@ -753,12 +753,22 @@ validate_ir(Program* program)
"Fifth definition of p_dual_src_export_gfx11 must be vcc", instr.get());
check(instr->definitions[5].physReg() == scc,
"Sixth definition of p_dual_src_export_gfx11 must be scc", instr.get());
check(instr->operands.size() == 8, "p_dual_src_export_gfx11 must have 8 operands",
instr.get());
check(instr->operands.size() == 8 || instr->operands.size() == 10,
"p_dual_src_export_gfx11 must have 8 or 10 operands", instr.get());
for (unsigned i = 0; i < instr->operands.size(); i++) {
check(
instr->operands[i].isOfType(RegType::vgpr) || instr->operands[i].isUndefined(),
"Operands of p_dual_src_export_gfx11 must be VGPRs or undef", instr.get());
if (i < 8) {
check(instr->operands[i].isOfType(RegType::vgpr) ||
instr->operands[i].isUndefined(),
"Operands of p_dual_src_export_gfx11 must be VGPRs or undef",
instr.get());
} else {
check(instr->operands[i].isUndefined() ||
(instr->operands[i].hasRegClass() &&
instr->operands[i].regClass() == program->lane_mask),
"WQM/exact mask operands of p_dual_src_export_gfx11 must be undef or "
"lane mask",
instr.get());
}
}
}
break;

View file

@ -590,12 +590,15 @@ create_fs_dual_src_export_gfx11(isel_context* ctx, const struct aco_export_mrt*
Builder bld(ctx->program, ctx->block);
aco_ptr<Instruction> exp{
create_instruction(aco_opcode::p_dual_src_export_gfx11, Format::PSEUDO, 8, 6)};
create_instruction(aco_opcode::p_dual_src_export_gfx11, Format::PSEUDO, 10, 6)};
for (unsigned i = 0; i < 4; i++) {
exp->operands[i] = mrt0 ? mrt0->out[i] : Operand(v1);
exp->operands[i + 4] = mrt1 ? mrt1->out[i] : Operand(v1);
}
instr_exact_mask(exp.get()) = Operand();
instr_wqm_mask(exp.get()) = Operand();
RegClass type = RegClass(RegType::vgpr, util_bitcount(mrt0->enabled_channels));
exp->definitions[0] = bld.def(type); /* mrt0 */
exp->definitions[1] = bld.def(type); /* mrt1 */