mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 04:48:08 +02:00
aco/assembler: support VOP3P with DPP
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22698>
This commit is contained in:
parent
d0e73cb313
commit
2548f28ab3
3 changed files with 42 additions and 46 deletions
|
|
@ -765,10 +765,45 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
unreachable("Pseudo instructions should be lowered before assembly.");
|
||||
break;
|
||||
default:
|
||||
/* TODO: VOP3/VOP3P can use DPP8/16 on GFX11 (encoding of src0 and DPP8/16 word seems same
|
||||
* except abs/neg is ignored). src2 cannot be literal and src0/src1 must be VGPR.
|
||||
*/
|
||||
if (instr->isVOP3() && !instr->isDPP()) {
|
||||
if (instr->isDPP16()) {
|
||||
assert(ctx.gfx_level >= GFX8);
|
||||
DPP16_instruction& dpp = instr->dpp16();
|
||||
|
||||
/* first emit the instruction without the DPP operand */
|
||||
Operand dpp_op = instr->operands[0];
|
||||
instr->operands[0] = Operand(PhysReg{250}, v1);
|
||||
instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP16);
|
||||
emit_instruction(ctx, out, instr);
|
||||
uint32_t encoding = (0xF & dpp.row_mask) << 28;
|
||||
encoding |= (0xF & dpp.bank_mask) << 24;
|
||||
encoding |= dpp.abs[1] << 23;
|
||||
encoding |= dpp.neg[1] << 22;
|
||||
encoding |= dpp.abs[0] << 21;
|
||||
encoding |= dpp.neg[0] << 20;
|
||||
if (ctx.gfx_level >= GFX10)
|
||||
encoding |= 1 << 18; /* set Fetch Inactive to match GFX9 behaviour */
|
||||
encoding |= dpp.bound_ctrl << 19;
|
||||
encoding |= dpp.dpp_ctrl << 8;
|
||||
encoding |= reg(ctx, dpp_op, 8);
|
||||
encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0;
|
||||
out.push_back(encoding);
|
||||
return;
|
||||
} else if (instr->isDPP8()) {
|
||||
assert(ctx.gfx_level >= GFX10);
|
||||
DPP8_instruction& dpp = instr->dpp8();
|
||||
|
||||
/* first emit the instruction without the DPP operand */
|
||||
Operand dpp_op = instr->operands[0];
|
||||
instr->operands[0] = Operand(PhysReg{234}, v1);
|
||||
instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP8);
|
||||
emit_instruction(ctx, out, instr);
|
||||
uint32_t encoding = reg(ctx, dpp_op, 8);
|
||||
encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0;
|
||||
for (unsigned i = 0; i < 8; ++i)
|
||||
encoding |= dpp.lane_sel[i] << (8 + i * 3);
|
||||
out.push_back(encoding);
|
||||
return;
|
||||
} else if (instr->isVOP3()) {
|
||||
VALU_instruction& vop3 = instr->valu();
|
||||
|
||||
if (instr->isVOP2()) {
|
||||
|
|
@ -855,45 +890,6 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
for (unsigned i = 0; i < 3; i++)
|
||||
encoding |= vop3.neg_lo[i] << (29 + i);
|
||||
out.push_back(encoding);
|
||||
|
||||
} else if (instr->isDPP16()) {
|
||||
assert(ctx.gfx_level >= GFX8);
|
||||
DPP16_instruction& dpp = instr->dpp16();
|
||||
|
||||
/* first emit the instruction without the DPP operand */
|
||||
Operand dpp_op = instr->operands[0];
|
||||
instr->operands[0] = Operand(PhysReg{250}, v1);
|
||||
instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP16);
|
||||
emit_instruction(ctx, out, instr);
|
||||
uint32_t encoding = (0xF & dpp.row_mask) << 28;
|
||||
encoding |= (0xF & dpp.bank_mask) << 24;
|
||||
encoding |= dpp.abs[1] << 23;
|
||||
encoding |= dpp.neg[1] << 22;
|
||||
encoding |= dpp.abs[0] << 21;
|
||||
encoding |= dpp.neg[0] << 20;
|
||||
if (ctx.gfx_level >= GFX10)
|
||||
encoding |= 1 << 18; /* set Fetch Inactive to match GFX9 behaviour */
|
||||
encoding |= dpp.bound_ctrl << 19;
|
||||
encoding |= dpp.dpp_ctrl << 8;
|
||||
encoding |= reg(ctx, dpp_op, 8);
|
||||
encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0;
|
||||
out.push_back(encoding);
|
||||
return;
|
||||
} else if (instr->isDPP8()) {
|
||||
assert(ctx.gfx_level >= GFX10);
|
||||
DPP8_instruction& dpp = instr->dpp8();
|
||||
|
||||
/* first emit the instruction without the DPP operand */
|
||||
Operand dpp_op = instr->operands[0];
|
||||
instr->operands[0] = Operand(PhysReg{234}, v1);
|
||||
instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP8);
|
||||
emit_instruction(ctx, out, instr);
|
||||
uint32_t encoding = reg(ctx, dpp_op, 8);
|
||||
encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0;
|
||||
for (unsigned i = 0; i < 8; ++i)
|
||||
encoding |= dpp.lane_sel[i] << (8 + i * 3);
|
||||
out.push_back(encoding);
|
||||
return;
|
||||
} else if (instr->isSDWA()) {
|
||||
assert(ctx.gfx_level >= GFX8 && ctx.gfx_level < GFX11);
|
||||
SDWA_instruction& sdwa = instr->sdwa();
|
||||
|
|
|
|||
|
|
@ -98,8 +98,8 @@ enum class Format : std::uint16_t {
|
|||
PSEUDO_REDUCTION = 19,
|
||||
|
||||
/* Vector ALU Formats */
|
||||
VOP3P = 20,
|
||||
VINTERP_INREG = 21,
|
||||
VOP3P = 1 << 7,
|
||||
VOP1 = 1 << 8,
|
||||
VOP2 = 1 << 9,
|
||||
VOPC = 1 << 10,
|
||||
|
|
@ -1248,7 +1248,7 @@ struct Instruction {
|
|||
return *(Pseudo_reduction_instruction*)this;
|
||||
}
|
||||
constexpr bool isReduction() const noexcept { return format == Format::PSEUDO_REDUCTION; }
|
||||
constexpr bool isVOP3P() const noexcept { return format == Format::VOP3P; }
|
||||
constexpr bool isVOP3P() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP3P; }
|
||||
VINTERP_inreg_instruction& vinterp_inreg() noexcept
|
||||
{
|
||||
assert(isVINTERP_INREG());
|
||||
|
|
|
|||
|
|
@ -69,8 +69,8 @@ class Format(Enum):
|
|||
PSEUDO_BRANCH = 17
|
||||
PSEUDO_BARRIER = 18
|
||||
PSEUDO_REDUCTION = 19
|
||||
VOP3P = 20
|
||||
VINTERP_INREG = 21
|
||||
VOP3P = 1 << 7
|
||||
VOP1 = 1 << 8
|
||||
VOP2 = 1 << 9
|
||||
VOPC = 1 << 10
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue