aco/assembler: support VOP3P with DPP

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22698>
This commit is contained in:
Georg Lehmann 2023-04-23 10:42:58 +02:00 committed by Marge Bot
parent d0e73cb313
commit 2548f28ab3
3 changed files with 42 additions and 46 deletions

View file

@ -765,10 +765,45 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
unreachable("Pseudo instructions should be lowered before assembly.");
break;
default:
/* TODO: VOP3/VOP3P can use DPP8/16 on GFX11 (encoding of src0 and DPP8/16 word seems same
* except abs/neg is ignored). src2 cannot be literal and src0/src1 must be VGPR.
*/
if (instr->isVOP3() && !instr->isDPP()) {
if (instr->isDPP16()) {
assert(ctx.gfx_level >= GFX8);
DPP16_instruction& dpp = instr->dpp16();
/* first emit the instruction without the DPP operand */
Operand dpp_op = instr->operands[0];
instr->operands[0] = Operand(PhysReg{250}, v1);
instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP16);
emit_instruction(ctx, out, instr);
uint32_t encoding = (0xF & dpp.row_mask) << 28;
encoding |= (0xF & dpp.bank_mask) << 24;
encoding |= dpp.abs[1] << 23;
encoding |= dpp.neg[1] << 22;
encoding |= dpp.abs[0] << 21;
encoding |= dpp.neg[0] << 20;
if (ctx.gfx_level >= GFX10)
encoding |= 1 << 18; /* set Fetch Inactive to match GFX9 behaviour */
encoding |= dpp.bound_ctrl << 19;
encoding |= dpp.dpp_ctrl << 8;
encoding |= reg(ctx, dpp_op, 8);
encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0;
out.push_back(encoding);
return;
} else if (instr->isDPP8()) {
assert(ctx.gfx_level >= GFX10);
DPP8_instruction& dpp = instr->dpp8();
/* first emit the instruction without the DPP operand */
Operand dpp_op = instr->operands[0];
instr->operands[0] = Operand(PhysReg{234}, v1);
instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP8);
emit_instruction(ctx, out, instr);
uint32_t encoding = reg(ctx, dpp_op, 8);
encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0;
for (unsigned i = 0; i < 8; ++i)
encoding |= dpp.lane_sel[i] << (8 + i * 3);
out.push_back(encoding);
return;
} else if (instr->isVOP3()) {
VALU_instruction& vop3 = instr->valu();
if (instr->isVOP2()) {
@ -855,45 +890,6 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
for (unsigned i = 0; i < 3; i++)
encoding |= vop3.neg_lo[i] << (29 + i);
out.push_back(encoding);
} else if (instr->isDPP16()) {
assert(ctx.gfx_level >= GFX8);
DPP16_instruction& dpp = instr->dpp16();
/* first emit the instruction without the DPP operand */
Operand dpp_op = instr->operands[0];
instr->operands[0] = Operand(PhysReg{250}, v1);
instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP16);
emit_instruction(ctx, out, instr);
uint32_t encoding = (0xF & dpp.row_mask) << 28;
encoding |= (0xF & dpp.bank_mask) << 24;
encoding |= dpp.abs[1] << 23;
encoding |= dpp.neg[1] << 22;
encoding |= dpp.abs[0] << 21;
encoding |= dpp.neg[0] << 20;
if (ctx.gfx_level >= GFX10)
encoding |= 1 << 18; /* set Fetch Inactive to match GFX9 behaviour */
encoding |= dpp.bound_ctrl << 19;
encoding |= dpp.dpp_ctrl << 8;
encoding |= reg(ctx, dpp_op, 8);
encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0;
out.push_back(encoding);
return;
} else if (instr->isDPP8()) {
assert(ctx.gfx_level >= GFX10);
DPP8_instruction& dpp = instr->dpp8();
/* first emit the instruction without the DPP operand */
Operand dpp_op = instr->operands[0];
instr->operands[0] = Operand(PhysReg{234}, v1);
instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP8);
emit_instruction(ctx, out, instr);
uint32_t encoding = reg(ctx, dpp_op, 8);
encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0;
for (unsigned i = 0; i < 8; ++i)
encoding |= dpp.lane_sel[i] << (8 + i * 3);
out.push_back(encoding);
return;
} else if (instr->isSDWA()) {
assert(ctx.gfx_level >= GFX8 && ctx.gfx_level < GFX11);
SDWA_instruction& sdwa = instr->sdwa();

View file

@ -98,8 +98,8 @@ enum class Format : std::uint16_t {
PSEUDO_REDUCTION = 19,
/* Vector ALU Formats */
VOP3P = 20,
VINTERP_INREG = 21,
VOP3P = 1 << 7,
VOP1 = 1 << 8,
VOP2 = 1 << 9,
VOPC = 1 << 10,
@ -1248,7 +1248,7 @@ struct Instruction {
return *(Pseudo_reduction_instruction*)this;
}
constexpr bool isReduction() const noexcept { return format == Format::PSEUDO_REDUCTION; }
constexpr bool isVOP3P() const noexcept { return format == Format::VOP3P; }
constexpr bool isVOP3P() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP3P; }
VINTERP_inreg_instruction& vinterp_inreg() noexcept
{
assert(isVINTERP_INREG());

View file

@ -69,8 +69,8 @@ class Format(Enum):
PSEUDO_BRANCH = 17
PSEUDO_BARRIER = 18
PSEUDO_REDUCTION = 19
VOP3P = 20
VINTERP_INREG = 21
VOP3P = 1 << 7
VOP1 = 1 << 8
VOP2 = 1 << 9
VOPC = 1 << 10