mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-22 01:10:32 +01:00
aco: support DPP8
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13971>
This commit is contained in:
parent
5c3dfb4ef5
commit
da0412e55b
12 changed files with 201 additions and 96 deletions
|
|
@ -662,14 +662,14 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
encoding |= vop3.neg_lo[i] << (29 + i);
|
||||
out.push_back(encoding);
|
||||
|
||||
} else if (instr->isDPP()) {
|
||||
} else if (instr->isDPP16()) {
|
||||
assert(ctx.chip_class >= GFX8);
|
||||
DPP_instruction& dpp = instr->dpp();
|
||||
DPP16_instruction& dpp = instr->dpp16();
|
||||
|
||||
/* first emit the instruction without the DPP operand */
|
||||
Operand dpp_op = instr->operands[0];
|
||||
instr->operands[0] = Operand(PhysReg{250}, v1);
|
||||
instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP);
|
||||
instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP16);
|
||||
emit_instruction(ctx, out, instr);
|
||||
uint32_t encoding = (0xF & dpp.row_mask) << 28;
|
||||
encoding |= (0xF & dpp.bank_mask) << 24;
|
||||
|
|
@ -684,6 +684,20 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
encoding |= (0xFF) & dpp_op.physReg();
|
||||
out.push_back(encoding);
|
||||
return;
|
||||
} else if (instr->isDPP8()) {
|
||||
assert(ctx.chip_class >= GFX10);
|
||||
DPP8_instruction& dpp = instr->dpp8();
|
||||
|
||||
/* first emit the instruction without the DPP operand */
|
||||
Operand dpp_op = instr->operands[0];
|
||||
instr->operands[0] = Operand(PhysReg{234}, v1);
|
||||
instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP8);
|
||||
emit_instruction(ctx, out, instr);
|
||||
uint32_t encoding = (0xFF) & dpp_op.physReg();
|
||||
for (unsigned i = 0; i < 8; ++i)
|
||||
encoding |= dpp.lane_sel[i] << (8 + i * 3);
|
||||
out.push_back(encoding);
|
||||
return;
|
||||
} else if (instr->isSDWA()) {
|
||||
SDWA_instruction& sdwa = instr->sdwa();
|
||||
|
||||
|
|
|
|||
|
|
@ -536,9 +536,12 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod
|
|||
("vop3", [Format.VOP3], 'VOP3_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]),
|
||||
("vop3p", [Format.VOP3P], 'VOP3P_instruction', [(1, 2), (1, 3)]),
|
||||
("vintrp", [Format.VINTRP], 'Interp_instruction', [(1, 2), (1, 3)]),
|
||||
("vop1_dpp", [Format.VOP1, Format.DPP], 'DPP_instruction', [(1, 1)]),
|
||||
("vop2_dpp", [Format.VOP2, Format.DPP], 'DPP_instruction', itertools.product([1, 2], [2, 3])),
|
||||
("vopc_dpp", [Format.VOPC, Format.DPP], 'DPP_instruction', itertools.product([1, 2], [2])),
|
||||
("vop1_dpp", [Format.VOP1, Format.DPP16], 'DPP16_instruction', [(1, 1)]),
|
||||
("vop2_dpp", [Format.VOP2, Format.DPP16], 'DPP16_instruction', itertools.product([1, 2], [2, 3])),
|
||||
("vopc_dpp", [Format.VOPC, Format.DPP16], 'DPP16_instruction', itertools.product([1, 2], [2])),
|
||||
("vop1_dpp8", [Format.VOP1, Format.DPP8], 'DPP8_instruction', [(1, 1)]),
|
||||
("vop2_dpp8", [Format.VOP2, Format.DPP8], 'DPP8_instruction', itertools.product([1, 2], [2, 3])),
|
||||
("vopc_dpp8", [Format.VOPC, Format.DPP8], 'DPP8_instruction', itertools.product([1, 2], [2])),
|
||||
("vop1_e64", [Format.VOP1, Format.VOP3], 'VOP3_instruction', itertools.product([1], [1])),
|
||||
("vop2_e64", [Format.VOP2, Format.VOP3], 'VOP3_instruction', itertools.product([1, 2], [2, 3])),
|
||||
("vopc_e64", [Format.VOPC, Format.VOP3], 'VOP3_instruction', itertools.product([1, 2], [2])),
|
||||
|
|
|
|||
|
|
@ -292,12 +292,12 @@ convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr)
|
|||
}
|
||||
|
||||
bool
|
||||
can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra)
|
||||
can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra, bool dpp8)
|
||||
{
|
||||
assert(instr->isVALU() && !instr->operands.empty());
|
||||
|
||||
if (instr->isDPP())
|
||||
return true;
|
||||
return instr->isDPP8() == dpp8;
|
||||
|
||||
if (instr->operands.size() && instr->operands[0].isLiteral())
|
||||
return false;
|
||||
|
|
@ -316,6 +316,8 @@ can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra)
|
|||
const VOP3_instruction* vop3 = &instr->vop3();
|
||||
if (vop3->clamp || vop3->omod || vop3->opsel)
|
||||
return false;
|
||||
if (dpp8)
|
||||
return false;
|
||||
if (instr->format == Format::VOP3)
|
||||
return false;
|
||||
if (instr->operands.size() > 1 && !instr->operands[1].isOfType(RegType::vgpr))
|
||||
|
|
@ -331,29 +333,39 @@ can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra)
|
|||
}
|
||||
|
||||
aco_ptr<Instruction>
|
||||
convert_to_DPP(aco_ptr<Instruction>& instr)
|
||||
convert_to_DPP(aco_ptr<Instruction>& instr, bool dpp8)
|
||||
{
|
||||
if (instr->isDPP())
|
||||
return NULL;
|
||||
|
||||
aco_ptr<Instruction> tmp = std::move(instr);
|
||||
Format format =
|
||||
(Format)(((uint32_t)tmp->format & ~(uint32_t)Format::VOP3) | (uint32_t)Format::DPP);
|
||||
instr.reset(create_instruction<DPP_instruction>(tmp->opcode, format, tmp->operands.size(),
|
||||
tmp->definitions.size()));
|
||||
Format format = (Format)(((uint32_t)tmp->format & ~(uint32_t)Format::VOP3) |
|
||||
(dpp8 ? (uint32_t)Format::DPP8 : (uint32_t)Format::DPP16));
|
||||
if (dpp8)
|
||||
instr.reset(create_instruction<DPP8_instruction>(tmp->opcode, format, tmp->operands.size(),
|
||||
tmp->definitions.size()));
|
||||
else
|
||||
instr.reset(create_instruction<DPP16_instruction>(tmp->opcode, format, tmp->operands.size(),
|
||||
tmp->definitions.size()));
|
||||
std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin());
|
||||
for (unsigned i = 0; i < instr->definitions.size(); i++)
|
||||
instr->definitions[i] = tmp->definitions[i];
|
||||
|
||||
DPP_instruction* dpp = &instr->dpp();
|
||||
dpp->dpp_ctrl = dpp_quad_perm(0, 1, 2, 3);
|
||||
dpp->row_mask = 0xf;
|
||||
dpp->bank_mask = 0xf;
|
||||
if (dpp8) {
|
||||
DPP8_instruction* dpp = &instr->dpp8();
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
dpp->lane_sel[i] = i;
|
||||
} else {
|
||||
DPP16_instruction* dpp = &instr->dpp16();
|
||||
dpp->dpp_ctrl = dpp_quad_perm(0, 1, 2, 3);
|
||||
dpp->row_mask = 0xf;
|
||||
dpp->bank_mask = 0xf;
|
||||
|
||||
if (tmp->isVOP3()) {
|
||||
const VOP3_instruction* vop3 = &tmp->vop3();
|
||||
memcpy(dpp->neg, vop3->neg, sizeof(dpp->neg));
|
||||
memcpy(dpp->abs, vop3->abs, sizeof(dpp->abs));
|
||||
if (tmp->isVOP3()) {
|
||||
const VOP3_instruction* vop3 = &tmp->vop3();
|
||||
memcpy(dpp->neg, vop3->neg, sizeof(dpp->neg));
|
||||
memcpy(dpp->abs, vop3->abs, sizeof(dpp->abs));
|
||||
}
|
||||
}
|
||||
|
||||
if (instr->isVOPC() || instr->definitions.size() > 1)
|
||||
|
|
|
|||
|
|
@ -102,8 +102,9 @@ enum class Format : std::uint16_t {
|
|||
VOP3 = 1 << 11,
|
||||
/* Vector Parameter Interpolation Format */
|
||||
VINTRP = 1 << 12,
|
||||
DPP = 1 << 13,
|
||||
DPP16 = 1 << 13,
|
||||
SDWA = 1 << 14,
|
||||
DPP8 = 1 << 15,
|
||||
};
|
||||
|
||||
enum class instr_class : uint8_t {
|
||||
|
|
@ -294,7 +295,7 @@ asSDWA(Format format)
|
|||
constexpr Format
|
||||
withoutDPP(Format format)
|
||||
{
|
||||
return (Format)((uint32_t)format & ~(uint32_t)Format::DPP);
|
||||
return (Format)((uint32_t)format & ~((uint32_t)Format::DPP16 | (uint32_t)Format::DPP8));
|
||||
}
|
||||
|
||||
enum class RegType {
|
||||
|
|
@ -996,7 +997,8 @@ struct VOP2_instruction;
|
|||
struct VOPC_instruction;
|
||||
struct VOP3_instruction;
|
||||
struct Interp_instruction;
|
||||
struct DPP_instruction;
|
||||
struct DPP16_instruction;
|
||||
struct DPP8_instruction;
|
||||
struct SDWA_instruction;
|
||||
|
||||
struct Instruction {
|
||||
|
|
@ -1282,17 +1284,29 @@ struct Instruction {
|
|||
return *(Interp_instruction*)this;
|
||||
}
|
||||
constexpr bool isVINTRP() const noexcept { return (uint16_t)format & (uint16_t)Format::VINTRP; }
|
||||
DPP_instruction& dpp() noexcept
|
||||
DPP16_instruction& dpp16() noexcept
|
||||
{
|
||||
assert(isDPP());
|
||||
return *(DPP_instruction*)this;
|
||||
assert(isDPP16());
|
||||
return *(DPP16_instruction*)this;
|
||||
}
|
||||
const DPP_instruction& dpp() const noexcept
|
||||
const DPP16_instruction& dpp16() const noexcept
|
||||
{
|
||||
assert(isDPP());
|
||||
return *(DPP_instruction*)this;
|
||||
assert(isDPP16());
|
||||
return *(DPP16_instruction*)this;
|
||||
}
|
||||
constexpr bool isDPP() const noexcept { return (uint16_t)format & (uint16_t)Format::DPP; }
|
||||
constexpr bool isDPP16() const noexcept { return (uint16_t)format & (uint16_t)Format::DPP16; }
|
||||
DPP8_instruction& dpp8() noexcept
|
||||
{
|
||||
assert(isDPP8());
|
||||
return *(DPP8_instruction*)this;
|
||||
}
|
||||
const DPP8_instruction& dpp8() const noexcept
|
||||
{
|
||||
assert(isDPP8());
|
||||
return *(DPP8_instruction*)this;
|
||||
}
|
||||
constexpr bool isDPP8() const noexcept { return (uint16_t)format & (uint16_t)Format::DPP8; }
|
||||
constexpr bool isDPP() const noexcept { return isDPP16() || isDPP8(); }
|
||||
SDWA_instruction& sdwa() noexcept
|
||||
{
|
||||
assert(isSDWA());
|
||||
|
|
@ -1405,7 +1419,7 @@ static_assert(sizeof(VOP3P_instruction) == sizeof(Instruction) + 8, "Unexpected
|
|||
* The swizzle applies to the src0 operand.
|
||||
*
|
||||
*/
|
||||
struct DPP_instruction : public Instruction {
|
||||
struct DPP16_instruction : public Instruction {
|
||||
bool abs[2];
|
||||
bool neg[2];
|
||||
uint16_t dpp_ctrl;
|
||||
|
|
@ -1414,7 +1428,12 @@ struct DPP_instruction : public Instruction {
|
|||
bool bound_ctrl : 1;
|
||||
uint8_t padding : 7;
|
||||
};
|
||||
static_assert(sizeof(DPP_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
|
||||
static_assert(sizeof(DPP16_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
|
||||
|
||||
struct DPP8_instruction : public Instruction {
|
||||
uint8_t lane_sel[8];
|
||||
};
|
||||
static_assert(sizeof(DPP8_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
|
||||
|
||||
struct SubdwordSel {
|
||||
enum sdwa_sel : uint8_t {
|
||||
|
|
@ -1760,10 +1779,10 @@ bool is_dead(const std::vector<uint16_t>& uses, Instruction* instr);
|
|||
bool can_use_opsel(chip_class chip, aco_opcode op, int idx, bool high);
|
||||
bool instr_is_16bit(chip_class chip, aco_opcode op);
|
||||
bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra);
|
||||
bool can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra);
|
||||
bool can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra, bool dpp8);
|
||||
/* updates "instr" and returns the old instruction (or NULL if no update was needed) */
|
||||
aco_ptr<Instruction> convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr);
|
||||
aco_ptr<Instruction> convert_to_DPP(aco_ptr<Instruction>& instr);
|
||||
aco_ptr<Instruction> convert_to_DPP(aco_ptr<Instruction>& instr, bool dpp8);
|
||||
bool needs_exec_mask(const Instruction* instr);
|
||||
|
||||
aco_opcode get_ordered(aco_opcode op);
|
||||
|
|
|
|||
|
|
@ -74,8 +74,9 @@ class Format(Enum):
|
|||
VOPC = 1 << 10
|
||||
VOP3 = 1 << 11
|
||||
VINTRP = 1 << 12
|
||||
DPP = 1 << 13
|
||||
DPP16 = 1 << 13
|
||||
SDWA = 1 << 14
|
||||
DPP8 = 1 << 15
|
||||
|
||||
def get_builder_fields(self):
|
||||
if self == Format.SOPK:
|
||||
|
|
@ -147,7 +148,7 @@ class Format(Enum):
|
|||
elif self == Format.VINTRP:
|
||||
return [('unsigned', 'attribute', None),
|
||||
('unsigned', 'component', None)]
|
||||
elif self == Format.DPP:
|
||||
elif self == Format.DPP16:
|
||||
return [('uint16_t', 'dpp_ctrl', None),
|
||||
('uint8_t', 'row_mask', '0xF'),
|
||||
('uint8_t', 'bank_mask', '0xF'),
|
||||
|
|
|
|||
|
|
@ -87,8 +87,11 @@ struct InstrHash {
|
|||
if (instr->isVOP3())
|
||||
return hash_murmur_32<VOP3_instruction>(instr);
|
||||
|
||||
if (instr->isDPP())
|
||||
return hash_murmur_32<DPP_instruction>(instr);
|
||||
if (instr->isDPP16())
|
||||
return hash_murmur_32<DPP16_instruction>(instr);
|
||||
|
||||
if (instr->isDPP8())
|
||||
return hash_murmur_32<DPP8_instruction>(instr);
|
||||
|
||||
if (instr->isSDWA())
|
||||
return hash_murmur_32<SDWA_instruction>(instr);
|
||||
|
|
@ -172,15 +175,21 @@ struct InstrPred {
|
|||
}
|
||||
return a3.clamp == b3.clamp && a3.omod == b3.omod && a3.opsel == b3.opsel;
|
||||
}
|
||||
if (a->isDPP()) {
|
||||
DPP_instruction& aDPP = a->dpp();
|
||||
DPP_instruction& bDPP = b->dpp();
|
||||
if (a->isDPP16()) {
|
||||
DPP16_instruction& aDPP = a->dpp16();
|
||||
DPP16_instruction& bDPP = b->dpp16();
|
||||
return aDPP.pass_flags == bDPP.pass_flags && aDPP.dpp_ctrl == bDPP.dpp_ctrl &&
|
||||
aDPP.bank_mask == bDPP.bank_mask && aDPP.row_mask == bDPP.row_mask &&
|
||||
aDPP.bound_ctrl == bDPP.bound_ctrl && aDPP.abs[0] == bDPP.abs[0] &&
|
||||
aDPP.abs[1] == bDPP.abs[1] && aDPP.neg[0] == bDPP.neg[0] &&
|
||||
aDPP.neg[1] == bDPP.neg[1];
|
||||
}
|
||||
if (a->isDPP8()) {
|
||||
DPP8_instruction& aDPP = a->dpp8();
|
||||
DPP8_instruction& bDPP = b->dpp8();
|
||||
return aDPP.pass_flags == bDPP.pass_flags &&
|
||||
!memcmp(aDPP.lane_sel, bDPP.lane_sel, sizeof(aDPP.lane_sel));
|
||||
}
|
||||
if (a->isSDWA()) {
|
||||
SDWA_instruction& aSDWA = a->sdwa();
|
||||
SDWA_instruction& bSDWA = b->sdwa();
|
||||
|
|
|
|||
|
|
@ -120,12 +120,14 @@ enum Label {
|
|||
label_canonicalized = 1ull << 32,
|
||||
label_extract = 1ull << 33,
|
||||
label_insert = 1ull << 34,
|
||||
label_dpp = 1ull << 35,
|
||||
label_dpp16 = 1ull << 35,
|
||||
label_dpp8 = 1ull << 36,
|
||||
};
|
||||
|
||||
static constexpr uint64_t instr_usedef_labels =
|
||||
label_vec | label_mul | label_mad | label_add_sub | label_vop3p | label_bitwise |
|
||||
label_uniform_bitwise | label_minmax | label_vopc | label_usedef | label_extract | label_dpp;
|
||||
label_uniform_bitwise | label_minmax | label_vopc | label_usedef | label_extract | label_dpp16 |
|
||||
label_dpp8;
|
||||
static constexpr uint64_t instr_mod_labels =
|
||||
label_omod2 | label_omod4 | label_omod5 | label_clamp | label_insert;
|
||||
|
||||
|
|
@ -455,13 +457,21 @@ struct ssa_info {
|
|||
|
||||
bool is_insert() { return label & label_insert; }
|
||||
|
||||
void set_dpp(Instruction* mov)
|
||||
void set_dpp16(Instruction* mov)
|
||||
{
|
||||
add_label(label_dpp);
|
||||
add_label(label_dpp16);
|
||||
instr = mov;
|
||||
}
|
||||
|
||||
bool is_dpp() { return label & label_dpp; }
|
||||
void set_dpp8(Instruction* mov)
|
||||
{
|
||||
add_label(label_dpp8);
|
||||
instr = mov;
|
||||
}
|
||||
|
||||
bool is_dpp() { return label & (label_dpp16 | label_dpp8); }
|
||||
bool is_dpp16() { return label & label_dpp16; }
|
||||
bool is_dpp8() { return label & label_dpp8; }
|
||||
};
|
||||
|
||||
struct opt_ctx {
|
||||
|
|
@ -1215,7 +1225,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
if (instr->isSDWA())
|
||||
can_use_mod = can_use_mod && instr->sdwa().sel[i].size() == 4;
|
||||
else
|
||||
can_use_mod = can_use_mod && (instr->isDPP() || can_use_VOP3(ctx, instr));
|
||||
can_use_mod = can_use_mod && (instr->isDPP16() || can_use_VOP3(ctx, instr));
|
||||
|
||||
if (info.is_neg() && instr->opcode == aco_opcode::v_add_f32) {
|
||||
instr->opcode = i ? aco_opcode::v_sub_f32 : aco_opcode::v_subrev_f32;
|
||||
|
|
@ -1228,8 +1238,8 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
if (!instr->isDPP() && !instr->isSDWA())
|
||||
to_VOP3(ctx, instr);
|
||||
instr->operands[i].setTemp(info.temp);
|
||||
if (instr->isDPP() && !instr->dpp().abs[i])
|
||||
instr->dpp().neg[i] = true;
|
||||
if (instr->isDPP16() && !instr->dpp16().abs[i])
|
||||
instr->dpp16().neg[i] = true;
|
||||
else if (instr->isSDWA() && !instr->sdwa().abs[i])
|
||||
instr->sdwa().neg[i] = true;
|
||||
else if (instr->isVOP3() && !instr->vop3().abs[i])
|
||||
|
|
@ -1239,8 +1249,8 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
if (!instr->isDPP() && !instr->isSDWA())
|
||||
to_VOP3(ctx, instr);
|
||||
instr->operands[i] = Operand(info.temp);
|
||||
if (instr->isDPP())
|
||||
instr->dpp().abs[i] = true;
|
||||
if (instr->isDPP16())
|
||||
instr->dpp16().abs[i] = true;
|
||||
else if (instr->isSDWA())
|
||||
instr->sdwa().abs[i] = true;
|
||||
else
|
||||
|
|
@ -1579,10 +1589,12 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
}
|
||||
break;
|
||||
case aco_opcode::v_mov_b32:
|
||||
if (instr->isDPP()) {
|
||||
if (instr->isDPP16()) {
|
||||
/* anything else doesn't make sense in SSA */
|
||||
assert(instr->dpp().row_mask == 0xf && instr->dpp().bank_mask == 0xf);
|
||||
ctx.info[instr->definitions[0].tempId()].set_dpp(instr.get());
|
||||
assert(instr->dpp16().row_mask == 0xf && instr->dpp16().bank_mask == 0xf);
|
||||
ctx.info[instr->definitions[0].tempId()].set_dpp16(instr.get());
|
||||
} else if (instr->isDPP8()) {
|
||||
ctx.info[instr->definitions[0].tempId()].set_dpp8(instr.get());
|
||||
}
|
||||
break;
|
||||
case aco_opcode::p_is_helper:
|
||||
|
|
@ -2250,10 +2262,10 @@ combine_inverse_comparison(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
new_sdwa->clamp = cmp_sdwa.clamp;
|
||||
new_sdwa->omod = cmp_sdwa.omod;
|
||||
new_instr = new_sdwa;
|
||||
} else if (cmp->isDPP()) {
|
||||
DPP_instruction* new_dpp = create_instruction<DPP_instruction>(
|
||||
new_opcode, (Format)((uint16_t)Format::DPP | (uint16_t)Format::VOPC), 2, 1);
|
||||
DPP_instruction& cmp_dpp = cmp->dpp();
|
||||
} else if (cmp->isDPP16()) {
|
||||
DPP16_instruction* new_dpp = create_instruction<DPP16_instruction>(
|
||||
new_opcode, (Format)((uint16_t)Format::DPP16 | (uint16_t)Format::VOPC), 2, 1);
|
||||
DPP16_instruction& cmp_dpp = cmp->dpp16();
|
||||
memcpy(new_dpp->abs, cmp_dpp.abs, sizeof(new_dpp->abs));
|
||||
memcpy(new_dpp->neg, cmp_dpp.neg, sizeof(new_dpp->neg));
|
||||
new_dpp->dpp_ctrl = cmp_dpp.dpp_ctrl;
|
||||
|
|
@ -2261,6 +2273,12 @@ combine_inverse_comparison(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
new_dpp->bank_mask = cmp_dpp.bank_mask;
|
||||
new_dpp->bound_ctrl = cmp_dpp.bound_ctrl;
|
||||
new_instr = new_dpp;
|
||||
} else if (cmp->isDPP8()) {
|
||||
DPP8_instruction* new_dpp = create_instruction<DPP8_instruction>(
|
||||
new_opcode, (Format)((uint16_t)Format::DPP8 | (uint16_t)Format::VOPC), 2, 1);
|
||||
DPP8_instruction& cmp_dpp = cmp->dpp8();
|
||||
memcpy(new_dpp->lane_sel, cmp_dpp.lane_sel, sizeof(new_dpp->lane_sel));
|
||||
new_instr = new_dpp;
|
||||
} else {
|
||||
new_instr = create_instruction<VOPC_instruction>(new_opcode, Format::VOPC, 2, 1);
|
||||
instr->definitions[0].setHint(vcc);
|
||||
|
|
@ -4005,23 +4023,34 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
|
||||
aco_opcode swapped_op;
|
||||
if (info.is_dpp() && info.instr->pass_flags == instr->pass_flags &&
|
||||
(i == 0 || can_swap_operands(instr, &swapped_op)) && can_use_DPP(instr, true) &&
|
||||
!instr->isDPP()) {
|
||||
convert_to_DPP(instr);
|
||||
DPP_instruction* dpp = static_cast<DPP_instruction*>(instr.get());
|
||||
if (i) {
|
||||
instr->opcode = swapped_op;
|
||||
std::swap(instr->operands[0], instr->operands[1]);
|
||||
std::swap(dpp->neg[0], dpp->neg[1]);
|
||||
std::swap(dpp->abs[0], dpp->abs[1]);
|
||||
(i == 0 || can_swap_operands(instr, &swapped_op)) &&
|
||||
can_use_DPP(instr, true, info.is_dpp8()) && !instr->isDPP()) {
|
||||
bool dpp8 = info.is_dpp8();
|
||||
convert_to_DPP(instr, dpp8);
|
||||
if (dpp8) {
|
||||
DPP8_instruction* dpp = &instr->dpp8();
|
||||
for (unsigned j = 0; j < 8; ++j)
|
||||
dpp->lane_sel[j] = info.instr->dpp8().lane_sel[j];
|
||||
if (i) {
|
||||
instr->opcode = swapped_op;
|
||||
std::swap(instr->operands[0], instr->operands[1]);
|
||||
}
|
||||
} else {
|
||||
DPP16_instruction* dpp = &instr->dpp16();
|
||||
if (i) {
|
||||
instr->opcode = swapped_op;
|
||||
std::swap(instr->operands[0], instr->operands[1]);
|
||||
std::swap(dpp->neg[0], dpp->neg[1]);
|
||||
std::swap(dpp->abs[0], dpp->abs[1]);
|
||||
}
|
||||
dpp->dpp_ctrl = info.instr->dpp16().dpp_ctrl;
|
||||
dpp->bound_ctrl = info.instr->dpp16().bound_ctrl;
|
||||
dpp->neg[0] ^= info.instr->dpp16().neg[0] && !dpp->abs[0];
|
||||
dpp->abs[0] |= info.instr->dpp16().abs[0];
|
||||
}
|
||||
if (--ctx.uses[info.instr->definitions[0].tempId()])
|
||||
ctx.uses[info.instr->operands[0].tempId()]++;
|
||||
instr->operands[0].setTemp(info.instr->operands[0].getTemp());
|
||||
dpp->dpp_ctrl = info.instr->dpp().dpp_ctrl;
|
||||
dpp->bound_ctrl = info.instr->dpp().bound_ctrl;
|
||||
dpp->neg[0] ^= info.instr->dpp().neg[0] && !dpp->abs[0];
|
||||
dpp->abs[0] |= info.instr->dpp().abs[0];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -386,7 +386,7 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
*
|
||||
*/
|
||||
|
||||
if (!instr->isVALU() || instr->isDPP() || !can_use_DPP(instr, false))
|
||||
if (!instr->isVALU() || instr->isDPP())
|
||||
return;
|
||||
|
||||
for (unsigned i = 0; i < MIN2(2, instr->operands.size()); i++) {
|
||||
|
|
@ -394,9 +394,12 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
if (!op_instr_idx.found())
|
||||
continue;
|
||||
|
||||
Instruction* mov = ctx.get(op_instr_idx);
|
||||
const Instruction* mov = ctx.get(op_instr_idx);
|
||||
if (mov->opcode != aco_opcode::v_mov_b32 || !mov->isDPP())
|
||||
continue;
|
||||
bool dpp8 = mov->isDPP8();
|
||||
if (!can_use_DPP(instr, false, dpp8))
|
||||
return;
|
||||
|
||||
/* If we aren't going to remove the v_mov_b32, we have to ensure that it doesn't overwrite
|
||||
* it's own operand before we use it.
|
||||
|
|
@ -412,25 +415,34 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
if (i && !can_swap_operands(instr, &instr->opcode))
|
||||
continue;
|
||||
|
||||
/* anything else doesn't make sense in SSA */
|
||||
assert(mov->dpp().row_mask == 0xf && mov->dpp().bank_mask == 0xf);
|
||||
if (!dpp8) /* anything else doesn't make sense in SSA */
|
||||
assert(mov->dpp16().row_mask == 0xf && mov->dpp16().bank_mask == 0xf);
|
||||
|
||||
if (--ctx.uses[mov->definitions[0].tempId()])
|
||||
ctx.uses[mov->operands[0].tempId()]++;
|
||||
|
||||
convert_to_DPP(instr);
|
||||
convert_to_DPP(instr, dpp8);
|
||||
|
||||
DPP_instruction* dpp = &instr->dpp();
|
||||
if (i) {
|
||||
std::swap(dpp->operands[0], dpp->operands[1]);
|
||||
std::swap(dpp->neg[0], dpp->neg[1]);
|
||||
std::swap(dpp->abs[0], dpp->abs[1]);
|
||||
if (dpp8) {
|
||||
DPP8_instruction* dpp = &instr->dpp8();
|
||||
if (i) {
|
||||
std::swap(dpp->operands[0], dpp->operands[1]);
|
||||
}
|
||||
dpp->operands[0] = mov->operands[0];
|
||||
memcpy(dpp->lane_sel, mov->dpp8().lane_sel, sizeof(dpp->lane_sel));
|
||||
} else {
|
||||
DPP16_instruction* dpp = &instr->dpp16();
|
||||
if (i) {
|
||||
std::swap(dpp->operands[0], dpp->operands[1]);
|
||||
std::swap(dpp->neg[0], dpp->neg[1]);
|
||||
std::swap(dpp->abs[0], dpp->abs[1]);
|
||||
}
|
||||
dpp->operands[0] = mov->operands[0];
|
||||
dpp->dpp_ctrl = mov->dpp16().dpp_ctrl;
|
||||
dpp->bound_ctrl = true;
|
||||
dpp->neg[0] ^= mov->dpp16().neg[0] && !dpp->abs[0];
|
||||
dpp->abs[0] |= mov->dpp16().abs[0];
|
||||
}
|
||||
dpp->operands[0] = mov->operands[0];
|
||||
dpp->dpp_ctrl = mov->dpp().dpp_ctrl;
|
||||
dpp->bound_ctrl = true;
|
||||
dpp->neg[0] ^= mov->dpp().neg[0] && !dpp->abs[0];
|
||||
dpp->abs[0] |= mov->dpp().abs[0];
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -566,8 +566,8 @@ print_instr_format_specific(const Instruction* instr, FILE* output)
|
|||
fprintf(output, " clamp");
|
||||
if (vop3.opsel & (1 << 3))
|
||||
fprintf(output, " opsel_hi");
|
||||
} else if (instr->isDPP()) {
|
||||
const DPP_instruction& dpp = instr->dpp();
|
||||
} else if (instr->isDPP16()) {
|
||||
const DPP16_instruction& dpp = instr->dpp16();
|
||||
if (dpp.dpp_ctrl <= 0xff) {
|
||||
fprintf(output, " quad_perm:[%d,%d,%d,%d]", dpp.dpp_ctrl & 0x3, (dpp.dpp_ctrl >> 2) & 0x3,
|
||||
(dpp.dpp_ctrl >> 4) & 0x3, (dpp.dpp_ctrl >> 6) & 0x3);
|
||||
|
|
@ -602,6 +602,11 @@ print_instr_format_specific(const Instruction* instr, FILE* output)
|
|||
fprintf(output, " bank_mask:0x%.1x", dpp.bank_mask);
|
||||
if (dpp.bound_ctrl)
|
||||
fprintf(output, " bound_ctrl:1");
|
||||
} else if (instr->isDPP8()) {
|
||||
const DPP8_instruction& dpp = instr->dpp8();
|
||||
fprintf(output, " dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]", dpp.lane_sel[0], dpp.lane_sel[1],
|
||||
dpp.lane_sel[2], dpp.lane_sel[3], dpp.lane_sel[4], dpp.lane_sel[5], dpp.lane_sel[6],
|
||||
dpp.lane_sel[7]);
|
||||
} else if (instr->isSDWA()) {
|
||||
const SDWA_instruction& sdwa = instr->sdwa();
|
||||
switch (sdwa.omod) {
|
||||
|
|
@ -668,8 +673,8 @@ aco_print_instr(const Instruction* instr, FILE* output, unsigned flags)
|
|||
neg[i] = vop3.neg[i];
|
||||
opsel[i] = vop3.opsel & (1 << i);
|
||||
}
|
||||
} else if (instr->isDPP()) {
|
||||
const DPP_instruction& dpp = instr->dpp();
|
||||
} else if (instr->isDPP16()) {
|
||||
const DPP16_instruction& dpp = instr->dpp16();
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
abs[i] = dpp.abs[i];
|
||||
neg[i] = dpp.neg[i];
|
||||
|
|
|
|||
|
|
@ -116,7 +116,8 @@ validate_ir(Program* program)
|
|||
/* check base format */
|
||||
Format base_format = instr->format;
|
||||
base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::SDWA);
|
||||
base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP);
|
||||
base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP16);
|
||||
base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP8);
|
||||
if ((uint32_t)base_format & (uint32_t)Format::VOP1)
|
||||
base_format = Format::VOP1;
|
||||
else if ((uint32_t)base_format & (uint32_t)Format::VOP2)
|
||||
|
|
|
|||
|
|
@ -988,7 +988,7 @@ BEGIN_TEST(optimizer.dpp)
|
|||
//! v1: %res3 = v_add_f32 -%a, %b row_mirror bound_ctrl:1
|
||||
//! p_unit_test 3, %res3
|
||||
auto tmp3 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
|
||||
tmp3.instr->dpp().neg[0] = true;
|
||||
tmp3.instr->dpp16().neg[0] = true;
|
||||
Temp res3 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), tmp3, b);
|
||||
writeout(3, res3);
|
||||
|
||||
|
|
@ -1010,7 +1010,7 @@ BEGIN_TEST(optimizer.dpp)
|
|||
//! v1: %res6 = v_add_f32 |%a|, %b row_mirror bound_ctrl:1
|
||||
//! p_unit_test 6, %res6
|
||||
auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
|
||||
tmp6.instr->dpp().neg[0] = true;
|
||||
tmp6.instr->dpp16().neg[0] = true;
|
||||
auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp6, b);
|
||||
res6.instr->vop3().abs[0] = true;
|
||||
writeout(6, res6);
|
||||
|
|
|
|||
|
|
@ -319,7 +319,7 @@ BEGIN_TEST(optimizer_postRA.dpp)
|
|||
//! v1: %res3:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1
|
||||
//! p_unit_test 3, %res3:v[2]
|
||||
auto tmp3 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
|
||||
tmp3.instr->dpp().neg[0] = true;
|
||||
tmp3.instr->dpp16().neg[0] = true;
|
||||
Temp res3 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp3, reg_v2), b);
|
||||
writeout(3, Operand(res3, reg_v2));
|
||||
|
||||
|
|
@ -341,7 +341,7 @@ BEGIN_TEST(optimizer_postRA.dpp)
|
|||
//! v1: %res6:v[2] = v_add_f32 |%a:v[0]|, %b:v[1] row_mirror bound_ctrl:1
|
||||
//! p_unit_test 6, %res6:v[2]
|
||||
auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
|
||||
tmp6.instr->dpp().neg[0] = true;
|
||||
tmp6.instr->dpp16().neg[0] = true;
|
||||
auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp6, reg_v2), b);
|
||||
res6.instr->vop3().abs[0] = true;
|
||||
writeout(6, Operand(res6, reg_v2));
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue