aco: add VOPD format

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23367>
This commit is contained in:
Rhys Perry 2023-05-22 14:15:58 +01:00 committed by Marge Bot
parent 54c52932d4
commit 6547e17e60
6 changed files with 141 additions and 1 deletions

View file

@ -468,6 +468,35 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
out.push_back(encoding);
break;
}
case Format::VOPD: {
VOPD_instruction& vopd = instr->vopd();
uint32_t encoding = (0b110010 << 26);
encoding |= reg(ctx, instr->operands[0]);
if (instr->opcode != aco_opcode::v_dual_mov_b32)
encoding |= reg(ctx, instr->operands[1], 8) << 9;
encoding |= (uint32_t)ctx.opcode[(int)vopd.opy] << 17;
encoding |= opcode << 22;
out.push_back(encoding);
unsigned opy_start = instr->opcode == aco_opcode::v_dual_mov_b32 ? 1 : 2;
switch (instr->opcode) {
case aco_opcode::v_dual_fmac_f32:
case aco_opcode::v_dual_fmaak_f32:
case aco_opcode::v_dual_fmamk_f32:
case aco_opcode::v_dual_cndmask_b32:
case aco_opcode::v_dual_dot2acc_f32_f16:
case aco_opcode::v_dual_dot2acc_f32_bf16: opy_start = 3; break;
default: break;
}
encoding = reg(ctx, instr->operands[opy_start]);
if (vopd.opy != aco_opcode::v_dual_mov_b32)
encoding |= reg(ctx, instr->operands[opy_start + 1], 8) << 9;
encoding |= (reg(ctx, instr->definitions[1], 8) >> 1) << 17;
encoding |= reg(ctx, instr->definitions[0], 8) << 24;
out.push_back(encoding);
break;
}
case Format::DS: {
DS_instruction& ds = instr->ds();
uint32_t encoding = (0b110110 << 26);

View file

@ -578,6 +578,7 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod
("vopc_sdwa", [Format.VOPC, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2])),
("vop3", [Format.VOP3], 'VALU_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]),
("vop3p", [Format.VOP3P], 'VALU_instruction', [(1, 2), (1, 3)]),
("vopd", [Format.VOPD], 'VOPD_instruction', [(2, 2), (2, 3), (2, 4), (2, 5), (2, 6)]),
("vinterp_inreg", [Format.VINTERP_INREG], 'VINTERP_inreg_instruction', [(1, 3)]),
("vintrp", [Format.VINTRP], 'VINTRP_instruction', [(1, 2), (1, 3)]),
("vop1_dpp", [Format.VOP1, Format.DPP16], 'DPP16_instruction', [(1, 1)]),

View file

@ -957,6 +957,7 @@ struct Pseudo_reduction_instruction;
struct VALU_instruction;
struct VINTERP_inreg_instruction;
struct VINTRP_instruction;
struct VOPD_instruction;
struct DPP16_instruction;
struct DPP8_instruction;
struct SDWA_instruction;
@ -1210,6 +1211,17 @@ struct Instruction {
return *(VINTERP_inreg_instruction*)this;
}
constexpr bool isVINTERP_INREG() const noexcept { return format == Format::VINTERP_INREG; }
VOPD_instruction& vopd() noexcept
{
assert(isVOPD());
return *(VOPD_instruction*)this;
}
const VOPD_instruction& vopd() const noexcept
{
assert(isVOPD());
return *(VOPD_instruction*)this;
}
constexpr bool isVOPD() const noexcept { return format == Format::VOPD; }
constexpr bool isVOP1() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP1; }
constexpr bool isVOP2() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP2; }
constexpr bool isVOPC() const noexcept { return (uint16_t)format & (uint16_t)Format::VOPC; }
@ -1278,7 +1290,8 @@ struct Instruction {
}
constexpr bool isVALU() const noexcept
{
return isVOP1() || isVOP2() || isVOPC() || isVOP3() || isVOP3P() || isVINTERP_INREG();
return isVOP1() || isVOP2() || isVOPC() || isVOP3() || isVOP3P() || isVINTERP_INREG() ||
isVOPD();
}
constexpr bool isSALU() const noexcept
@ -1368,6 +1381,12 @@ struct VINTERP_inreg_instruction : public VALU_instruction {
static_assert(sizeof(VINTERP_inreg_instruction) == sizeof(VALU_instruction) + 4,
"Unexpected padding");
struct VOPD_instruction : public VALU_instruction {
aco_opcode opy;
uint16_t padding;
};
static_assert(sizeof(VOPD_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
/**
* Data Parallel Primitives Format:
* This format can be used for VOP1, VOP2 or VOPC instructions.

View file

@ -89,6 +89,7 @@ class Format(IntEnum):
VINTRP = auto()
# Vector ALU Formats
VINTERP_INREG = auto()
VOPD = auto()
VOP1 = 1 << 7
VOP2 = 1 << 8
VOPC = 1 << 9
@ -186,6 +187,8 @@ class Format(IntEnum):
elif self == Format.VOP3P:
return [('uint8_t', 'opsel_lo', None),
('uint8_t', 'opsel_hi', None)]
elif self == Format.VOPD:
return [('aco_opcode', 'opy', None)]
elif self == Format.VINTERP_INREG:
return [('unsigned', 'wait_exp', 7),
('uint8_t', 'opsel', 0)]
@ -1272,6 +1275,29 @@ for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, in_mod, out_mod, defs, ops, cls
opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP3, cls, in_mod, out_mod, definitions = defs, operands = ops)
VOPD = {
(0x00, "v_dual_fmac_f32"),
(0x01, "v_dual_fmaak_f32"),
(0x02, "v_dual_fmamk_f32"),
(0x03, "v_dual_mul_f32"),
(0x04, "v_dual_add_f32"),
(0x05, "v_dual_sub_f32"),
(0x06, "v_dual_subrev_f32"),
(0x07, "v_dual_mul_dx9_zero_f32"),
(0x08, "v_dual_mov_b32"),
(0x09, "v_dual_cndmask_b32"),
(0x0a, "v_dual_max_f32"),
(0x0b, "v_dual_min_f32"),
(0x0c, "v_dual_dot2acc_f32_f16"),
(0x0d, "v_dual_dot2acc_f32_bf16"),
(0x10, "v_dual_add_nc_u32"),
(0x11, "v_dual_lshlrev_b32"),
(0x12, "v_dual_and_b32"),
}
for gfx11, name in VOPD:
opcode(name, -1, -1, -1, gfx11, format = Format.VOPD, cls = InstrClass.Valu32)
# DS instructions: 3 inputs (1 addr, 2 data), 1 output
DS = {
(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "ds_add_u32"),

View file

@ -443,6 +443,12 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
fprintf(output, " attr%d.%c", vintrp.attribute, "xyzw"[vintrp.component]);
break;
}
case Format::VOPD: {
const VOPD_instruction& vopd = instr->vopd();
// TODO: beautify
fprintf(output, " %s", instr_info.name[(int)vopd.opy]);
break;
}
case Format::DS: {
const DS_instruction& ds = instr->ds();
if (ds.offset0)

View file

@ -1055,3 +1055,62 @@ BEGIN_TEST(assembler.vop3_dpp)
finish_assembler_test();
END_TEST
BEGIN_TEST(assembler.vopd)
if (!setup_cs(NULL, GFX11))
return;
Definition dst_v0 = bld.def(v1);
dst_v0.setFixed(PhysReg(256));
Definition dst_v1 = bld.def(v1);
dst_v1.setFixed(PhysReg(256 + 1));
Operand op_v0(bld.tmp(v1));
op_v0.setFixed(PhysReg(256 + 0));
Operand op_v1(bld.tmp(v1));
op_v1.setFixed(PhysReg(256 + 1));
Operand op_v2(bld.tmp(v1));
op_v2.setFixed(PhysReg(256 + 2));
Operand op_v3(bld.tmp(v1));
op_v3.setFixed(PhysReg(256 + 3));
Operand op_s0(bld.tmp(s1));
op_s0.setFixed(PhysReg(0));
Operand op_vcc(bld.tmp(s1));
op_vcc.setFixed(vcc);
//>> BB0:
//! v_dual_mov_b32 v0, v0 :: v_dual_mov_b32 v1, v1 ; ca100100 00000101
bld.vopd(aco_opcode::v_dual_mov_b32, dst_v0, dst_v1, op_v0, op_v1, aco_opcode::v_dual_mov_b32);
//! v_dual_mov_b32 v0, 0x60 :: v_dual_mov_b32 v1, s0 ; ca1000ff 00000000 00000060
bld.vopd(aco_opcode::v_dual_mov_b32, dst_v0, dst_v1, Operand::c32(96), op_s0,
aco_opcode::v_dual_mov_b32);
//! v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0x60 ; ca100000 000000ff 00000060
bld.vopd(aco_opcode::v_dual_mov_b32, dst_v0, dst_v1, op_s0, Operand::c32(96),
aco_opcode::v_dual_mov_b32);
//! v_dual_mul_f32 v0, v0, v1 :: v_dual_mov_b32 v1, v2 ; c8d00300 00000102
bld.vopd(aco_opcode::v_dual_mul_f32, dst_v0, dst_v1, op_v0, op_v1, op_v2,
aco_opcode::v_dual_mov_b32);
//! v_dual_fmac_f32 v0, v1, v2 :: v_dual_mov_b32 v1, v3 ; c8100501 00000103
bld.vopd(aco_opcode::v_dual_fmac_f32, dst_v0, dst_v1, op_v1, op_v2, op_v0, op_v3,
aco_opcode::v_dual_mov_b32);
//! v_dual_mov_b32 v0, v0 :: v_dual_and_b32 v1, v1, v2 ; ca240100 00000501
bld.vopd(aco_opcode::v_dual_mov_b32, dst_v0, dst_v1, op_v0, op_v1, op_v2,
aco_opcode::v_dual_and_b32);
//! v_dual_cndmask_b32 v0, v0, v1 :: v_dual_cndmask_b32 v1, v2, v3 ; ca520300 00000702
bld.vopd(aco_opcode::v_dual_cndmask_b32, dst_v0, dst_v1, op_v0, op_v1, op_vcc, op_v2, op_v3,
op_vcc, aco_opcode::v_dual_cndmask_b32);
finish_assembler_test();
END_TEST