mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 07:10:09 +01:00
aco: add VOPD format
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23367>
This commit is contained in:
parent
54c52932d4
commit
6547e17e60
6 changed files with 141 additions and 1 deletions
|
|
@ -468,6 +468,35 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
||||||
out.push_back(encoding);
|
out.push_back(encoding);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case Format::VOPD: {
|
||||||
|
VOPD_instruction& vopd = instr->vopd();
|
||||||
|
uint32_t encoding = (0b110010 << 26);
|
||||||
|
encoding |= reg(ctx, instr->operands[0]);
|
||||||
|
if (instr->opcode != aco_opcode::v_dual_mov_b32)
|
||||||
|
encoding |= reg(ctx, instr->operands[1], 8) << 9;
|
||||||
|
encoding |= (uint32_t)ctx.opcode[(int)vopd.opy] << 17;
|
||||||
|
encoding |= opcode << 22;
|
||||||
|
out.push_back(encoding);
|
||||||
|
|
||||||
|
unsigned opy_start = instr->opcode == aco_opcode::v_dual_mov_b32 ? 1 : 2;
|
||||||
|
switch (instr->opcode) {
|
||||||
|
case aco_opcode::v_dual_fmac_f32:
|
||||||
|
case aco_opcode::v_dual_fmaak_f32:
|
||||||
|
case aco_opcode::v_dual_fmamk_f32:
|
||||||
|
case aco_opcode::v_dual_cndmask_b32:
|
||||||
|
case aco_opcode::v_dual_dot2acc_f32_f16:
|
||||||
|
case aco_opcode::v_dual_dot2acc_f32_bf16: opy_start = 3; break;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
|
||||||
|
encoding = reg(ctx, instr->operands[opy_start]);
|
||||||
|
if (vopd.opy != aco_opcode::v_dual_mov_b32)
|
||||||
|
encoding |= reg(ctx, instr->operands[opy_start + 1], 8) << 9;
|
||||||
|
encoding |= (reg(ctx, instr->definitions[1], 8) >> 1) << 17;
|
||||||
|
encoding |= reg(ctx, instr->definitions[0], 8) << 24;
|
||||||
|
out.push_back(encoding);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case Format::DS: {
|
case Format::DS: {
|
||||||
DS_instruction& ds = instr->ds();
|
DS_instruction& ds = instr->ds();
|
||||||
uint32_t encoding = (0b110110 << 26);
|
uint32_t encoding = (0b110110 << 26);
|
||||||
|
|
|
||||||
|
|
@ -578,6 +578,7 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod
|
||||||
("vopc_sdwa", [Format.VOPC, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2])),
|
("vopc_sdwa", [Format.VOPC, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2])),
|
||||||
("vop3", [Format.VOP3], 'VALU_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]),
|
("vop3", [Format.VOP3], 'VALU_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]),
|
||||||
("vop3p", [Format.VOP3P], 'VALU_instruction', [(1, 2), (1, 3)]),
|
("vop3p", [Format.VOP3P], 'VALU_instruction', [(1, 2), (1, 3)]),
|
||||||
|
("vopd", [Format.VOPD], 'VOPD_instruction', [(2, 2), (2, 3), (2, 4), (2, 5), (2, 6)]),
|
||||||
("vinterp_inreg", [Format.VINTERP_INREG], 'VINTERP_inreg_instruction', [(1, 3)]),
|
("vinterp_inreg", [Format.VINTERP_INREG], 'VINTERP_inreg_instruction', [(1, 3)]),
|
||||||
("vintrp", [Format.VINTRP], 'VINTRP_instruction', [(1, 2), (1, 3)]),
|
("vintrp", [Format.VINTRP], 'VINTRP_instruction', [(1, 2), (1, 3)]),
|
||||||
("vop1_dpp", [Format.VOP1, Format.DPP16], 'DPP16_instruction', [(1, 1)]),
|
("vop1_dpp", [Format.VOP1, Format.DPP16], 'DPP16_instruction', [(1, 1)]),
|
||||||
|
|
|
||||||
|
|
@ -957,6 +957,7 @@ struct Pseudo_reduction_instruction;
|
||||||
struct VALU_instruction;
|
struct VALU_instruction;
|
||||||
struct VINTERP_inreg_instruction;
|
struct VINTERP_inreg_instruction;
|
||||||
struct VINTRP_instruction;
|
struct VINTRP_instruction;
|
||||||
|
struct VOPD_instruction;
|
||||||
struct DPP16_instruction;
|
struct DPP16_instruction;
|
||||||
struct DPP8_instruction;
|
struct DPP8_instruction;
|
||||||
struct SDWA_instruction;
|
struct SDWA_instruction;
|
||||||
|
|
@ -1210,6 +1211,17 @@ struct Instruction {
|
||||||
return *(VINTERP_inreg_instruction*)this;
|
return *(VINTERP_inreg_instruction*)this;
|
||||||
}
|
}
|
||||||
constexpr bool isVINTERP_INREG() const noexcept { return format == Format::VINTERP_INREG; }
|
constexpr bool isVINTERP_INREG() const noexcept { return format == Format::VINTERP_INREG; }
|
||||||
|
VOPD_instruction& vopd() noexcept
|
||||||
|
{
|
||||||
|
assert(isVOPD());
|
||||||
|
return *(VOPD_instruction*)this;
|
||||||
|
}
|
||||||
|
const VOPD_instruction& vopd() const noexcept
|
||||||
|
{
|
||||||
|
assert(isVOPD());
|
||||||
|
return *(VOPD_instruction*)this;
|
||||||
|
}
|
||||||
|
constexpr bool isVOPD() const noexcept { return format == Format::VOPD; }
|
||||||
constexpr bool isVOP1() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP1; }
|
constexpr bool isVOP1() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP1; }
|
||||||
constexpr bool isVOP2() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP2; }
|
constexpr bool isVOP2() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP2; }
|
||||||
constexpr bool isVOPC() const noexcept { return (uint16_t)format & (uint16_t)Format::VOPC; }
|
constexpr bool isVOPC() const noexcept { return (uint16_t)format & (uint16_t)Format::VOPC; }
|
||||||
|
|
@ -1278,7 +1290,8 @@ struct Instruction {
|
||||||
}
|
}
|
||||||
constexpr bool isVALU() const noexcept
|
constexpr bool isVALU() const noexcept
|
||||||
{
|
{
|
||||||
return isVOP1() || isVOP2() || isVOPC() || isVOP3() || isVOP3P() || isVINTERP_INREG();
|
return isVOP1() || isVOP2() || isVOPC() || isVOP3() || isVOP3P() || isVINTERP_INREG() ||
|
||||||
|
isVOPD();
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr bool isSALU() const noexcept
|
constexpr bool isSALU() const noexcept
|
||||||
|
|
@ -1368,6 +1381,12 @@ struct VINTERP_inreg_instruction : public VALU_instruction {
|
||||||
static_assert(sizeof(VINTERP_inreg_instruction) == sizeof(VALU_instruction) + 4,
|
static_assert(sizeof(VINTERP_inreg_instruction) == sizeof(VALU_instruction) + 4,
|
||||||
"Unexpected padding");
|
"Unexpected padding");
|
||||||
|
|
||||||
|
struct VOPD_instruction : public VALU_instruction {
|
||||||
|
aco_opcode opy;
|
||||||
|
uint16_t padding;
|
||||||
|
};
|
||||||
|
static_assert(sizeof(VOPD_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Data Parallel Primitives Format:
|
* Data Parallel Primitives Format:
|
||||||
* This format can be used for VOP1, VOP2 or VOPC instructions.
|
* This format can be used for VOP1, VOP2 or VOPC instructions.
|
||||||
|
|
|
||||||
|
|
@ -89,6 +89,7 @@ class Format(IntEnum):
|
||||||
VINTRP = auto()
|
VINTRP = auto()
|
||||||
# Vector ALU Formats
|
# Vector ALU Formats
|
||||||
VINTERP_INREG = auto()
|
VINTERP_INREG = auto()
|
||||||
|
VOPD = auto()
|
||||||
VOP1 = 1 << 7
|
VOP1 = 1 << 7
|
||||||
VOP2 = 1 << 8
|
VOP2 = 1 << 8
|
||||||
VOPC = 1 << 9
|
VOPC = 1 << 9
|
||||||
|
|
@ -186,6 +187,8 @@ class Format(IntEnum):
|
||||||
elif self == Format.VOP3P:
|
elif self == Format.VOP3P:
|
||||||
return [('uint8_t', 'opsel_lo', None),
|
return [('uint8_t', 'opsel_lo', None),
|
||||||
('uint8_t', 'opsel_hi', None)]
|
('uint8_t', 'opsel_hi', None)]
|
||||||
|
elif self == Format.VOPD:
|
||||||
|
return [('aco_opcode', 'opy', None)]
|
||||||
elif self == Format.VINTERP_INREG:
|
elif self == Format.VINTERP_INREG:
|
||||||
return [('unsigned', 'wait_exp', 7),
|
return [('unsigned', 'wait_exp', 7),
|
||||||
('uint8_t', 'opsel', 0)]
|
('uint8_t', 'opsel', 0)]
|
||||||
|
|
@ -1272,6 +1275,29 @@ for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, in_mod, out_mod, defs, ops, cls
|
||||||
opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP3, cls, in_mod, out_mod, definitions = defs, operands = ops)
|
opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP3, cls, in_mod, out_mod, definitions = defs, operands = ops)
|
||||||
|
|
||||||
|
|
||||||
|
VOPD = {
|
||||||
|
(0x00, "v_dual_fmac_f32"),
|
||||||
|
(0x01, "v_dual_fmaak_f32"),
|
||||||
|
(0x02, "v_dual_fmamk_f32"),
|
||||||
|
(0x03, "v_dual_mul_f32"),
|
||||||
|
(0x04, "v_dual_add_f32"),
|
||||||
|
(0x05, "v_dual_sub_f32"),
|
||||||
|
(0x06, "v_dual_subrev_f32"),
|
||||||
|
(0x07, "v_dual_mul_dx9_zero_f32"),
|
||||||
|
(0x08, "v_dual_mov_b32"),
|
||||||
|
(0x09, "v_dual_cndmask_b32"),
|
||||||
|
(0x0a, "v_dual_max_f32"),
|
||||||
|
(0x0b, "v_dual_min_f32"),
|
||||||
|
(0x0c, "v_dual_dot2acc_f32_f16"),
|
||||||
|
(0x0d, "v_dual_dot2acc_f32_bf16"),
|
||||||
|
(0x10, "v_dual_add_nc_u32"),
|
||||||
|
(0x11, "v_dual_lshlrev_b32"),
|
||||||
|
(0x12, "v_dual_and_b32"),
|
||||||
|
}
|
||||||
|
for gfx11, name in VOPD:
|
||||||
|
opcode(name, -1, -1, -1, gfx11, format = Format.VOPD, cls = InstrClass.Valu32)
|
||||||
|
|
||||||
|
|
||||||
# DS instructions: 3 inputs (1 addr, 2 data), 1 output
|
# DS instructions: 3 inputs (1 addr, 2 data), 1 output
|
||||||
DS = {
|
DS = {
|
||||||
(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "ds_add_u32"),
|
(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "ds_add_u32"),
|
||||||
|
|
|
||||||
|
|
@ -443,6 +443,12 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
|
||||||
fprintf(output, " attr%d.%c", vintrp.attribute, "xyzw"[vintrp.component]);
|
fprintf(output, " attr%d.%c", vintrp.attribute, "xyzw"[vintrp.component]);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case Format::VOPD: {
|
||||||
|
const VOPD_instruction& vopd = instr->vopd();
|
||||||
|
// TODO: beautify
|
||||||
|
fprintf(output, " %s", instr_info.name[(int)vopd.opy]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case Format::DS: {
|
case Format::DS: {
|
||||||
const DS_instruction& ds = instr->ds();
|
const DS_instruction& ds = instr->ds();
|
||||||
if (ds.offset0)
|
if (ds.offset0)
|
||||||
|
|
|
||||||
|
|
@ -1055,3 +1055,62 @@ BEGIN_TEST(assembler.vop3_dpp)
|
||||||
|
|
||||||
finish_assembler_test();
|
finish_assembler_test();
|
||||||
END_TEST
|
END_TEST
|
||||||
|
|
||||||
|
BEGIN_TEST(assembler.vopd)
|
||||||
|
if (!setup_cs(NULL, GFX11))
|
||||||
|
return;
|
||||||
|
|
||||||
|
Definition dst_v0 = bld.def(v1);
|
||||||
|
dst_v0.setFixed(PhysReg(256));
|
||||||
|
|
||||||
|
Definition dst_v1 = bld.def(v1);
|
||||||
|
dst_v1.setFixed(PhysReg(256 + 1));
|
||||||
|
|
||||||
|
Operand op_v0(bld.tmp(v1));
|
||||||
|
op_v0.setFixed(PhysReg(256 + 0));
|
||||||
|
|
||||||
|
Operand op_v1(bld.tmp(v1));
|
||||||
|
op_v1.setFixed(PhysReg(256 + 1));
|
||||||
|
|
||||||
|
Operand op_v2(bld.tmp(v1));
|
||||||
|
op_v2.setFixed(PhysReg(256 + 2));
|
||||||
|
|
||||||
|
Operand op_v3(bld.tmp(v1));
|
||||||
|
op_v3.setFixed(PhysReg(256 + 3));
|
||||||
|
|
||||||
|
Operand op_s0(bld.tmp(s1));
|
||||||
|
op_s0.setFixed(PhysReg(0));
|
||||||
|
|
||||||
|
Operand op_vcc(bld.tmp(s1));
|
||||||
|
op_vcc.setFixed(vcc);
|
||||||
|
|
||||||
|
//>> BB0:
|
||||||
|
//! v_dual_mov_b32 v0, v0 :: v_dual_mov_b32 v1, v1 ; ca100100 00000101
|
||||||
|
bld.vopd(aco_opcode::v_dual_mov_b32, dst_v0, dst_v1, op_v0, op_v1, aco_opcode::v_dual_mov_b32);
|
||||||
|
|
||||||
|
//! v_dual_mov_b32 v0, 0x60 :: v_dual_mov_b32 v1, s0 ; ca1000ff 00000000 00000060
|
||||||
|
bld.vopd(aco_opcode::v_dual_mov_b32, dst_v0, dst_v1, Operand::c32(96), op_s0,
|
||||||
|
aco_opcode::v_dual_mov_b32);
|
||||||
|
|
||||||
|
//! v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0x60 ; ca100000 000000ff 00000060
|
||||||
|
bld.vopd(aco_opcode::v_dual_mov_b32, dst_v0, dst_v1, op_s0, Operand::c32(96),
|
||||||
|
aco_opcode::v_dual_mov_b32);
|
||||||
|
|
||||||
|
//! v_dual_mul_f32 v0, v0, v1 :: v_dual_mov_b32 v1, v2 ; c8d00300 00000102
|
||||||
|
bld.vopd(aco_opcode::v_dual_mul_f32, dst_v0, dst_v1, op_v0, op_v1, op_v2,
|
||||||
|
aco_opcode::v_dual_mov_b32);
|
||||||
|
|
||||||
|
//! v_dual_fmac_f32 v0, v1, v2 :: v_dual_mov_b32 v1, v3 ; c8100501 00000103
|
||||||
|
bld.vopd(aco_opcode::v_dual_fmac_f32, dst_v0, dst_v1, op_v1, op_v2, op_v0, op_v3,
|
||||||
|
aco_opcode::v_dual_mov_b32);
|
||||||
|
|
||||||
|
//! v_dual_mov_b32 v0, v0 :: v_dual_and_b32 v1, v1, v2 ; ca240100 00000501
|
||||||
|
bld.vopd(aco_opcode::v_dual_mov_b32, dst_v0, dst_v1, op_v0, op_v1, op_v2,
|
||||||
|
aco_opcode::v_dual_and_b32);
|
||||||
|
|
||||||
|
//! v_dual_cndmask_b32 v0, v0, v1 :: v_dual_cndmask_b32 v1, v2, v3 ; ca520300 00000702
|
||||||
|
bld.vopd(aco_opcode::v_dual_cndmask_b32, dst_v0, dst_v1, op_v0, op_v1, op_vcc, op_v2, op_v3,
|
||||||
|
op_vcc, aco_opcode::v_dual_cndmask_b32);
|
||||||
|
|
||||||
|
finish_assembler_test();
|
||||||
|
END_TEST
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue