mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 15:50:11 +01:00
aco: shrink DPP8_instruction
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25525>
This commit is contained in:
parent
e90b5385a0
commit
26fce534b5
10 changed files with 25 additions and 29 deletions
|
|
@ -814,8 +814,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
emit_instruction(ctx, out, instr);
|
||||
uint32_t encoding = reg(ctx, dpp_op, 8);
|
||||
encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0;
|
||||
for (unsigned i = 0; i < 8; ++i)
|
||||
encoding |= dpp.lane_sel[i] << (8 + i * 3);
|
||||
encoding |= dpp.lane_sel << 8;
|
||||
out.push_back(encoding);
|
||||
return;
|
||||
} else if (instr->isVOP3()) {
|
||||
|
|
|
|||
|
|
@ -275,11 +275,10 @@ emit_masked_swizzle(isel_context* ctx, Builder& bld, Temp src, unsigned mask)
|
|||
} else if (ctx->options->gfx_level >= GFX11 && and_mask == 0x1f && xor_mask < 0x10) {
|
||||
dpp_ctrl = dpp_row_xmask(xor_mask);
|
||||
} else if (ctx->options->gfx_level >= GFX10 && (and_mask & 0x18) == 0x18 && xor_mask < 8) {
|
||||
Builder::Result ret = bld.vop1_dpp8(aco_opcode::v_mov_b32, bld.def(v1), src);
|
||||
for (unsigned i = 0; i < 8; i++) {
|
||||
ret->dpp8().lane_sel[i] = ((i & and_mask) ^ xor_mask);
|
||||
}
|
||||
return ret;
|
||||
uint32_t lane_sel = 0;
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
lane_sel |= ((i & and_mask) ^ xor_mask) << (i * 3);
|
||||
return bld.vop1_dpp8(aco_opcode::v_mov_b32, bld.def(v1), src, lane_sel);
|
||||
} else if (ctx->options->gfx_level >= GFX10 && (and_mask & 0x10) == 0x10) {
|
||||
uint64_t lane_mask = 0;
|
||||
for (unsigned i = 0; i < 16; i++)
|
||||
|
|
|
|||
|
|
@ -455,8 +455,7 @@ convert_to_DPP(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr, bool dpp8)
|
|||
|
||||
if (dpp8) {
|
||||
DPP8_instruction* dpp = &instr->dpp8();
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
dpp->lane_sel[i] = i;
|
||||
dpp->lane_sel = 0xfac688; /* [0,1,2,3,4,5,6,7] */
|
||||
} else {
|
||||
DPP16_instruction* dpp = &instr->dpp16();
|
||||
dpp->dpp_ctrl = dpp_quad_perm(0, 1, 2, 3);
|
||||
|
|
|
|||
|
|
@ -1461,9 +1461,10 @@ struct DPP16_instruction : public VALU_instruction {
|
|||
static_assert(sizeof(DPP16_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
|
||||
|
||||
struct DPP8_instruction : public VALU_instruction {
|
||||
uint8_t lane_sel[8];
|
||||
uint32_t lane_sel : 24;
|
||||
uint32_t padding : 8;
|
||||
};
|
||||
static_assert(sizeof(DPP8_instruction) == sizeof(VALU_instruction) + 8, "Unexpected padding");
|
||||
static_assert(sizeof(DPP8_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
|
||||
|
||||
struct SubdwordSel {
|
||||
enum sdwa_sel : uint8_t {
|
||||
|
|
|
|||
|
|
@ -2831,12 +2831,12 @@ lower_to_hw_instr(Program* program)
|
|||
Operand src0 = instr->operands[i];
|
||||
Operand src1 = instr->operands[i + 4];
|
||||
|
||||
uint32_t lane_sel_xor1 = 0;
|
||||
for (unsigned j = 0; j < 8; j++)
|
||||
lane_sel_xor1 |= (j ^ 1) << (j * 3);
|
||||
|
||||
/* Swap odd, even lanes of mrt0. */
|
||||
Builder::Result ret =
|
||||
bld.vop1_dpp8(aco_opcode::v_mov_b32, Definition(dst0, v1), src0);
|
||||
for (unsigned j = 0; j < 8; j++) {
|
||||
ret->dpp8().lane_sel[j] = j ^ 1;
|
||||
}
|
||||
bld.vop1_dpp8(aco_opcode::v_mov_b32, Definition(dst0, v1), src0, lane_sel_xor1);
|
||||
|
||||
/* Swap even lanes between mrt0 and mrt1. */
|
||||
bld.vop2(aco_opcode::v_cndmask_b32, tmp, Operand(dst0, v1), src1,
|
||||
|
|
@ -2845,11 +2845,8 @@ lower_to_hw_instr(Program* program)
|
|||
Operand(clobber_vcc.physReg(), bld.lm));
|
||||
|
||||
/* Swap odd, even lanes of mrt0 again. */
|
||||
ret = bld.vop1_dpp8(aco_opcode::v_mov_b32, Definition(dst0, v1),
|
||||
Operand(tmp.physReg(), v1));
|
||||
for (unsigned j = 0; j < 8; j++) {
|
||||
ret->dpp8().lane_sel[j] = j ^ 1;
|
||||
}
|
||||
bld.vop1_dpp8(aco_opcode::v_mov_b32, Definition(dst0, v1),
|
||||
Operand(tmp.physReg(), v1), lane_sel_xor1);
|
||||
|
||||
mrt0[i] = Operand(dst0, v1);
|
||||
mrt1[i] = Operand(dst1, v1);
|
||||
|
|
|
|||
|
|
@ -161,6 +161,8 @@ class Format(Enum):
|
|||
('uint8_t', 'row_mask', '0xF'),
|
||||
('uint8_t', 'bank_mask', '0xF'),
|
||||
('bool', 'bound_ctrl', 'true')]
|
||||
elif self == Format.DPP8:
|
||||
return [('uint32_t', 'lane_sel', 0)]
|
||||
elif self == Format.VOP3P:
|
||||
return [('uint8_t', 'opsel_lo', None),
|
||||
('uint8_t', 'opsel_hi', None)]
|
||||
|
|
|
|||
|
|
@ -186,8 +186,7 @@ struct InstrPred {
|
|||
if (a->isDPP8()) {
|
||||
DPP8_instruction& aDPP = a->dpp8();
|
||||
DPP8_instruction& bDPP = b->dpp8();
|
||||
return aDPP.pass_flags == bDPP.pass_flags &&
|
||||
!memcmp(aDPP.lane_sel, bDPP.lane_sel, sizeof(aDPP.lane_sel));
|
||||
return aDPP.pass_flags == bDPP.pass_flags && aDPP.lane_sel == bDPP.lane_sel;
|
||||
}
|
||||
if (a->isSDWA()) {
|
||||
SDWA_instruction& aSDWA = a->sdwa();
|
||||
|
|
|
|||
|
|
@ -4865,8 +4865,7 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
|
||||
if (dpp8) {
|
||||
DPP8_instruction* dpp = &instr->dpp8();
|
||||
for (unsigned j = 0; j < 8; ++j)
|
||||
dpp->lane_sel[j] = info.instr->dpp8().lane_sel[j];
|
||||
dpp->lane_sel = info.instr->dpp8().lane_sel;
|
||||
if (mov_uses_mods)
|
||||
instr->format = asVOP3(instr->format);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -547,7 +547,7 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
|
||||
if (dpp8) {
|
||||
DPP8_instruction* dpp = &instr->dpp8();
|
||||
memcpy(dpp->lane_sel, mov->dpp8().lane_sel, sizeof(dpp->lane_sel));
|
||||
dpp->lane_sel = mov->dpp8().lane_sel;
|
||||
if (mov_uses_mods)
|
||||
instr->format = asVOP3(instr->format);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -709,9 +709,10 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
|
|||
fprintf(output, " bound_ctrl:1");
|
||||
} else if (instr->isDPP8()) {
|
||||
const DPP8_instruction& dpp = instr->dpp8();
|
||||
fprintf(output, " dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]", dpp.lane_sel[0], dpp.lane_sel[1],
|
||||
dpp.lane_sel[2], dpp.lane_sel[3], dpp.lane_sel[4], dpp.lane_sel[5], dpp.lane_sel[6],
|
||||
dpp.lane_sel[7]);
|
||||
fprintf(output, " dpp8:[");
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(output, "%s%u", i ? "," : "", (dpp.lane_sel >> (i * 3)) & 0x8);
|
||||
fprintf(output, "]");
|
||||
} else if (instr->isSDWA()) {
|
||||
const SDWA_instruction& sdwa = instr->sdwa();
|
||||
if (!instr->isVOPC()) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue