mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 04:40:09 +01:00
aco: add p_dual_src_export_gfx11 for dual source blending on GFX11
Dual source blending must be in strict WQM mode. Cc: 22.3 mesa-stable Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19643>
This commit is contained in:
parent
e93de8a75e
commit
bb90d29660
6 changed files with 109 additions and 3 deletions
|
|
@ -103,7 +103,8 @@ needs_exact(aco_ptr<Instruction>& instr)
|
||||||
* emitted inside the same block, the main FS will always jump to the PS
|
* emitted inside the same block, the main FS will always jump to the PS
|
||||||
* epilog without considering the exec mask.
|
* epilog without considering the exec mask.
|
||||||
*/
|
*/
|
||||||
return instr->isEXP() || instr->opcode == aco_opcode::p_jump_to_epilog;
|
return instr->isEXP() || instr->opcode == aco_opcode::p_jump_to_epilog ||
|
||||||
|
instr->opcode == aco_opcode::p_dual_src_export_gfx11;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1857,7 +1857,8 @@ inline bool
|
||||||
is_dead(const std::vector<uint16_t>& uses, const Instruction* instr)
|
is_dead(const std::vector<uint16_t>& uses, const Instruction* instr)
|
||||||
{
|
{
|
||||||
if (instr->definitions.empty() || instr->isBranch() ||
|
if (instr->definitions.empty() || instr->isBranch() ||
|
||||||
instr->opcode == aco_opcode::p_init_scratch)
|
instr->opcode == aco_opcode::p_init_scratch ||
|
||||||
|
instr->opcode == aco_opcode::p_dual_src_export_gfx11)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (std::any_of(instr->definitions.begin(), instr->definitions.end(),
|
if (std::any_of(instr->definitions.begin(), instr->definitions.end(),
|
||||||
|
|
|
||||||
|
|
@ -2432,6 +2432,85 @@ lower_to_hw_instr(Program* program)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case aco_opcode::p_dual_src_export_gfx11: {
|
||||||
|
PhysReg dst0 = instr->definitions[0].physReg();
|
||||||
|
PhysReg dst1 = instr->definitions[1].physReg();
|
||||||
|
Definition tmp = instr->definitions[2];
|
||||||
|
Definition exec_tmp = instr->definitions[3];
|
||||||
|
Definition clobber_vcc = instr->definitions[4];
|
||||||
|
Definition clobber_scc = instr->definitions[5];
|
||||||
|
|
||||||
|
assert(tmp.regClass() == v1);
|
||||||
|
assert(exec_tmp.regClass() == bld.lm);
|
||||||
|
assert(clobber_vcc.regClass() == bld.lm && clobber_vcc.physReg() == vcc);
|
||||||
|
assert(clobber_scc.isFixed() && clobber_scc.physReg() == scc);
|
||||||
|
|
||||||
|
bld.sop1(Builder::s_mov, Definition(exec_tmp.physReg(), bld.lm),
|
||||||
|
Operand(exec, bld.lm));
|
||||||
|
bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), clobber_scc,
|
||||||
|
Operand(exec, bld.lm));
|
||||||
|
|
||||||
|
uint8_t enabled_channels = 0;
|
||||||
|
Operand mrt0[4], mrt1[4];
|
||||||
|
|
||||||
|
bld.sop1(aco_opcode::s_mov_b32, Definition(clobber_vcc.physReg(), s1),
|
||||||
|
Operand::c32(0x55555555));
|
||||||
|
if (ctx.program->wave_size == 64)
|
||||||
|
bld.sop1(aco_opcode::s_mov_b32, Definition(clobber_vcc.physReg().advance(4), s1),
|
||||||
|
Operand::c32(0x55555555));
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < 4; i++) {
|
||||||
|
if (instr->operands[i].isUndefined() && instr->operands[i + 4].isUndefined()) {
|
||||||
|
mrt0[i] = instr->operands[i];
|
||||||
|
mrt1[i] = instr->operands[i + 4];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
Operand src0 = instr->operands[i];
|
||||||
|
Operand src1 = instr->operands[i + 4];
|
||||||
|
|
||||||
|
/* Swap odd, even lanes of mrt0. */
|
||||||
|
Builder::Result ret =
|
||||||
|
bld.vop1_dpp8(aco_opcode::v_mov_b32, Definition(dst0, v1), src0);
|
||||||
|
for (unsigned j = 0; j < 8; j++) {
|
||||||
|
ret.instr->dpp8().lane_sel[j] = j ^ 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Swap even lanes between mrt0 and mrt1. */
|
||||||
|
bld.vop2(aco_opcode::v_cndmask_b32, tmp, Operand(dst0, v1), src1,
|
||||||
|
Operand(clobber_vcc.physReg(), bld.lm));
|
||||||
|
bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst1, v1), src1, Operand(dst0, v1),
|
||||||
|
Operand(clobber_vcc.physReg(), bld.lm));
|
||||||
|
|
||||||
|
/* Swap odd, even lanes of mrt0 again. */
|
||||||
|
ret = bld.vop1_dpp8(aco_opcode::v_mov_b32, Definition(dst0, v1),
|
||||||
|
Operand(tmp.physReg(), v1));
|
||||||
|
for (unsigned j = 0; j < 8; j++) {
|
||||||
|
ret.instr->dpp8().lane_sel[j] = j ^ 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
mrt0[i] = Operand(dst0, v1);
|
||||||
|
mrt1[i] = Operand(dst1, v1);
|
||||||
|
|
||||||
|
enabled_channels |= 1 << i;
|
||||||
|
|
||||||
|
dst0 = dst0.advance(4);
|
||||||
|
dst1 = dst1.advance(4);
|
||||||
|
}
|
||||||
|
|
||||||
|
bld.sop1(Builder::s_mov, Definition(exec, bld.lm),
|
||||||
|
Operand(exec_tmp.physReg(), bld.lm));
|
||||||
|
|
||||||
|
/* Force export all channels when everything is undefined. */
|
||||||
|
if (!enabled_channels)
|
||||||
|
enabled_channels = 0xf;
|
||||||
|
|
||||||
|
bld.exp(aco_opcode::exp, mrt0[0], mrt0[1], mrt0[2], mrt0[3], enabled_channels,
|
||||||
|
V_008DFC_SQ_EXP_MRT + 21, false);
|
||||||
|
bld.exp(aco_opcode::exp, mrt1[0], mrt1[1], mrt1[2], mrt1[3], enabled_channels,
|
||||||
|
V_008DFC_SQ_EXP_MRT + 22, false);
|
||||||
|
break;
|
||||||
|
}
|
||||||
default: break;
|
default: break;
|
||||||
}
|
}
|
||||||
} else if (instr->isBranch()) {
|
} else if (instr->isBranch()) {
|
||||||
|
|
|
||||||
|
|
@ -340,6 +340,9 @@ opcode("p_jump_to_epilog")
|
||||||
#dst0=result, dst1=exec_tmp, dst2=clobber_scc, src0=linear_vgpr, src1=attribute, src2=component, src3=dpp_ctrl, src4=m0
|
#dst0=result, dst1=exec_tmp, dst2=clobber_scc, src0=linear_vgpr, src1=attribute, src2=component, src3=dpp_ctrl, src4=m0
|
||||||
opcode("p_interp_gfx11")
|
opcode("p_interp_gfx11")
|
||||||
|
|
||||||
|
# performs dual source MRTs swizzling and emits exports on GFX11
|
||||||
|
opcode("p_dual_src_export_gfx11")
|
||||||
|
|
||||||
# SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc)
|
# SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc)
|
||||||
SOP2 = {
|
SOP2 = {
|
||||||
# GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name
|
# GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name
|
||||||
|
|
|
||||||
|
|
@ -673,7 +673,8 @@ alu_can_accept_constant(aco_opcode opcode, unsigned operand)
|
||||||
case aco_opcode::v_readfirstlane_b32:
|
case aco_opcode::v_readfirstlane_b32:
|
||||||
case aco_opcode::p_extract:
|
case aco_opcode::p_extract:
|
||||||
case aco_opcode::p_insert: return operand != 0;
|
case aco_opcode::p_insert: return operand != 0;
|
||||||
case aco_opcode::p_interp_gfx11: return false;
|
case aco_opcode::p_interp_gfx11:
|
||||||
|
case aco_opcode::p_dual_src_export_gfx11: return false;
|
||||||
default: return true;
|
default: return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -262,6 +262,7 @@ validate_ir(Program* program)
|
||||||
bool can_be_undef = is_phi(instr) || instr->isEXP() || instr->isReduction() ||
|
bool can_be_undef = is_phi(instr) || instr->isEXP() || instr->isReduction() ||
|
||||||
instr->opcode == aco_opcode::p_create_vector ||
|
instr->opcode == aco_opcode::p_create_vector ||
|
||||||
instr->opcode == aco_opcode::p_jump_to_epilog ||
|
instr->opcode == aco_opcode::p_jump_to_epilog ||
|
||||||
|
instr->opcode == aco_opcode::p_dual_src_export_gfx11 ||
|
||||||
(instr->opcode == aco_opcode::p_interp_gfx11 && i == 0) ||
|
(instr->opcode == aco_opcode::p_interp_gfx11 && i == 0) ||
|
||||||
(flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
|
(flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
|
||||||
((instr->isMUBUF() || instr->isMTBUF()) && i == 1) ||
|
((instr->isMUBUF() || instr->isMTBUF()) && i == 1) ||
|
||||||
|
|
@ -526,6 +527,26 @@ validate_ir(Program* program)
|
||||||
instr->operands[i].isUndefined(),
|
instr->operands[i].isUndefined(),
|
||||||
"Other operands of p_jump_to_epilog must be VGPRs or undef", instr.get());
|
"Other operands of p_jump_to_epilog must be VGPRs or undef", instr.get());
|
||||||
}
|
}
|
||||||
|
} else if (instr->opcode == aco_opcode::p_dual_src_export_gfx11) {
|
||||||
|
check(instr->definitions.size() == 6,
|
||||||
|
"p_dual_src_export_gfx11 must have 6 definitions", instr.get());
|
||||||
|
check(instr->definitions[2].getTemp().type() == RegType::vgpr &&
|
||||||
|
instr->definitions[2].getTemp().size() == 1,
|
||||||
|
"Third definition of p_dual_src_export_gfx11 must be a v1", instr.get());
|
||||||
|
check(instr->definitions[3].getTemp().type() == RegType::sgpr &&
|
||||||
|
instr->definitions[3].getTemp().size() == 2,
|
||||||
|
"Fourth definition of p_dual_src_export_gfx11 must be a s2", instr.get());
|
||||||
|
check(instr->definitions[4].physReg() == vcc,
|
||||||
|
"Fifth definition of p_dual_src_export_gfx11 must be vcc", instr.get());
|
||||||
|
check(instr->definitions[5].physReg() == scc,
|
||||||
|
"Sixth definition of p_dual_src_export_gfx11 must be scc", instr.get());
|
||||||
|
check(instr->operands.size() == 8, "p_dual_src_export_gfx11 must have 8 operands",
|
||||||
|
instr.get());
|
||||||
|
for (unsigned i = 0; i < instr->operands.size(); i++) {
|
||||||
|
check(instr->operands[i].getTemp().type() == RegType::vgpr ||
|
||||||
|
instr->operands[i].isUndefined(),
|
||||||
|
"Operands of p_dual_src_export_gfx11 must be VGPRs or undef", instr.get());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue