aco: shrink DPP8_instruction

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25525>
This commit is contained in:
Rhys Perry 2023-10-02 15:44:49 +01:00 committed by Marge Bot
parent e90b5385a0
commit 26fce534b5
10 changed files with 25 additions and 29 deletions

View file

@ -814,8 +814,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
emit_instruction(ctx, out, instr);
uint32_t encoding = reg(ctx, dpp_op, 8);
encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0;
for (unsigned i = 0; i < 8; ++i)
encoding |= dpp.lane_sel[i] << (8 + i * 3);
encoding |= dpp.lane_sel << 8;
out.push_back(encoding);
return;
} else if (instr->isVOP3()) {

View file

@ -275,11 +275,10 @@ emit_masked_swizzle(isel_context* ctx, Builder& bld, Temp src, unsigned mask)
} else if (ctx->options->gfx_level >= GFX11 && and_mask == 0x1f && xor_mask < 0x10) {
dpp_ctrl = dpp_row_xmask(xor_mask);
} else if (ctx->options->gfx_level >= GFX10 && (and_mask & 0x18) == 0x18 && xor_mask < 8) {
Builder::Result ret = bld.vop1_dpp8(aco_opcode::v_mov_b32, bld.def(v1), src);
for (unsigned i = 0; i < 8; i++) {
ret->dpp8().lane_sel[i] = ((i & and_mask) ^ xor_mask);
}
return ret;
uint32_t lane_sel = 0;
for (unsigned i = 0; i < 8; i++)
lane_sel |= ((i & and_mask) ^ xor_mask) << (i * 3);
return bld.vop1_dpp8(aco_opcode::v_mov_b32, bld.def(v1), src, lane_sel);
} else if (ctx->options->gfx_level >= GFX10 && (and_mask & 0x10) == 0x10) {
uint64_t lane_mask = 0;
for (unsigned i = 0; i < 16; i++)

View file

@ -455,8 +455,7 @@ convert_to_DPP(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr, bool dpp8)
if (dpp8) {
DPP8_instruction* dpp = &instr->dpp8();
for (unsigned i = 0; i < 8; i++)
dpp->lane_sel[i] = i;
dpp->lane_sel = 0xfac688; /* [0,1,2,3,4,5,6,7] */
} else {
DPP16_instruction* dpp = &instr->dpp16();
dpp->dpp_ctrl = dpp_quad_perm(0, 1, 2, 3);

View file

@ -1461,9 +1461,10 @@ struct DPP16_instruction : public VALU_instruction {
static_assert(sizeof(DPP16_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
struct DPP8_instruction : public VALU_instruction {
uint8_t lane_sel[8];
uint32_t lane_sel : 24;
uint32_t padding : 8;
};
static_assert(sizeof(DPP8_instruction) == sizeof(VALU_instruction) + 8, "Unexpected padding");
static_assert(sizeof(DPP8_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
struct SubdwordSel {
enum sdwa_sel : uint8_t {

View file

@ -2831,12 +2831,12 @@ lower_to_hw_instr(Program* program)
Operand src0 = instr->operands[i];
Operand src1 = instr->operands[i + 4];
uint32_t lane_sel_xor1 = 0;
for (unsigned j = 0; j < 8; j++)
lane_sel_xor1 |= (j ^ 1) << (j * 3);
/* Swap odd, even lanes of mrt0. */
Builder::Result ret =
bld.vop1_dpp8(aco_opcode::v_mov_b32, Definition(dst0, v1), src0);
for (unsigned j = 0; j < 8; j++) {
ret->dpp8().lane_sel[j] = j ^ 1;
}
bld.vop1_dpp8(aco_opcode::v_mov_b32, Definition(dst0, v1), src0, lane_sel_xor1);
/* Swap even lanes between mrt0 and mrt1. */
bld.vop2(aco_opcode::v_cndmask_b32, tmp, Operand(dst0, v1), src1,
@ -2845,11 +2845,8 @@ lower_to_hw_instr(Program* program)
Operand(clobber_vcc.physReg(), bld.lm));
/* Swap odd, even lanes of mrt0 again. */
ret = bld.vop1_dpp8(aco_opcode::v_mov_b32, Definition(dst0, v1),
Operand(tmp.physReg(), v1));
for (unsigned j = 0; j < 8; j++) {
ret->dpp8().lane_sel[j] = j ^ 1;
}
bld.vop1_dpp8(aco_opcode::v_mov_b32, Definition(dst0, v1),
Operand(tmp.physReg(), v1), lane_sel_xor1);
mrt0[i] = Operand(dst0, v1);
mrt1[i] = Operand(dst1, v1);

View file

@ -161,6 +161,8 @@ class Format(Enum):
('uint8_t', 'row_mask', '0xF'),
('uint8_t', 'bank_mask', '0xF'),
('bool', 'bound_ctrl', 'true')]
elif self == Format.DPP8:
return [('uint32_t', 'lane_sel', 0)]
elif self == Format.VOP3P:
return [('uint8_t', 'opsel_lo', None),
('uint8_t', 'opsel_hi', None)]

View file

@ -186,8 +186,7 @@ struct InstrPred {
if (a->isDPP8()) {
DPP8_instruction& aDPP = a->dpp8();
DPP8_instruction& bDPP = b->dpp8();
return aDPP.pass_flags == bDPP.pass_flags &&
!memcmp(aDPP.lane_sel, bDPP.lane_sel, sizeof(aDPP.lane_sel));
return aDPP.pass_flags == bDPP.pass_flags && aDPP.lane_sel == bDPP.lane_sel;
}
if (a->isSDWA()) {
SDWA_instruction& aSDWA = a->sdwa();

View file

@ -4865,8 +4865,7 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (dpp8) {
DPP8_instruction* dpp = &instr->dpp8();
for (unsigned j = 0; j < 8; ++j)
dpp->lane_sel[j] = info.instr->dpp8().lane_sel[j];
dpp->lane_sel = info.instr->dpp8().lane_sel;
if (mov_uses_mods)
instr->format = asVOP3(instr->format);
} else {

View file

@ -547,7 +547,7 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (dpp8) {
DPP8_instruction* dpp = &instr->dpp8();
memcpy(dpp->lane_sel, mov->dpp8().lane_sel, sizeof(dpp->lane_sel));
dpp->lane_sel = mov->dpp8().lane_sel;
if (mov_uses_mods)
instr->format = asVOP3(instr->format);
} else {

View file

@ -709,9 +709,10 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
fprintf(output, " bound_ctrl:1");
} else if (instr->isDPP8()) {
const DPP8_instruction& dpp = instr->dpp8();
fprintf(output, " dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]", dpp.lane_sel[0], dpp.lane_sel[1],
dpp.lane_sel[2], dpp.lane_sel[3], dpp.lane_sel[4], dpp.lane_sel[5], dpp.lane_sel[6],
dpp.lane_sel[7]);
fprintf(output, " dpp8:[");
for (unsigned i = 0; i < 8; i++)
fprintf(output, "%s%u", i ? "," : "", (dpp.lane_sel >> (i * 3)) & 0x8);
fprintf(output, "]");
} else if (instr->isSDWA()) {
const SDWA_instruction& sdwa = instr->sdwa();
if (!instr->isVOPC()) {