aco: rewrite SDWA selector

This commit introduces a new struct SubdwordSel
in order to ease and clean up the usage of SDWA
selections. This includes removing the distinction
between register-allocated and fixed SDWA selections.
Instead, SDWA selections can now also access the high
bits of subdword variables. Alignment and sizes are
validated accordingly. Size, offset and sign_extend
can be evaluated via helper methods.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12640>
This commit is contained in:
Daniel Schürmann 2021-08-30 17:58:36 +02:00
parent cc4682ed47
commit 9e3ff06c38
11 changed files with 164 additions and 187 deletions

View file

@ -61,19 +61,6 @@ struct asm_context {
int subvector_begin_pos = -1;
};
static uint32_t
get_sdwa_sel(unsigned sel, PhysReg reg)
{
if (sel & sdwa_isra) {
unsigned size = sdwa_rasize & sel;
if (size == 1)
return reg.byte();
else /* size == 2 */
return sdwa_isword | (reg.byte() >> 1);
}
return sel & sdwa_asuint;
}
unsigned
get_mimg_nsa_dwords(const Instruction* instr)
{
@ -715,23 +702,23 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
}
encoding |= (sdwa.clamp ? 1 : 0) << 13;
} else {
encoding |= get_sdwa_sel(sdwa.dst_sel, instr->definitions[0].physReg()) << 8;
uint32_t dst_u = sdwa.dst_sel & sdwa_sext ? 1 : 0;
if (sdwa.dst_preserve || (sdwa.dst_sel & sdwa_isra))
encoding |= sdwa.dst_sel.to_sdwa_sel(instr->definitions[0].physReg().byte()) << 8;
uint32_t dst_u = sdwa.dst_sel.sign_extend() ? 1 : 0;
if (sdwa.dst_preserve)
dst_u = 2;
encoding |= dst_u << 11;
encoding |= (sdwa.clamp ? 1 : 0) << 13;
encoding |= sdwa.omod << 14;
}
encoding |= get_sdwa_sel(sdwa.sel[0], sdwa_op.physReg()) << 16;
encoding |= sdwa.sel[0] & sdwa_sext ? 1 << 19 : 0;
encoding |= sdwa.sel[0].to_sdwa_sel(sdwa_op.physReg().byte()) << 16;
encoding |= sdwa.sel[0].sign_extend() ? 1 << 19 : 0;
encoding |= sdwa.abs[0] << 21;
encoding |= sdwa.neg[0] << 20;
if (instr->operands.size() >= 2) {
encoding |= get_sdwa_sel(sdwa.sel[1], instr->operands[1].physReg()) << 24;
encoding |= sdwa.sel[1] & sdwa_sext ? 1 << 27 : 0;
encoding |= sdwa.sel[1].to_sdwa_sel(instr->operands[1].physReg().byte()) << 24;
encoding |= sdwa.sel[1].sign_extend() ? 1 << 27 : 0;
encoding |= sdwa.abs[1] << 29;
encoding |= sdwa.neg[1] << 28;
}

View file

@ -644,11 +644,9 @@ convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsign
create_instruction<SDWA_instruction>(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
sdwa->operands[0] = Operand(src);
sdwa->definitions[0] = Definition(tmp);
if (sign_extend)
sdwa->sel[0] = src_bits == 8 ? sdwa_sbyte : sdwa_sword;
else
sdwa->sel[0] = src_bits == 8 ? sdwa_ubyte : sdwa_uword;
sdwa->dst_sel = tmp.bytes() == 2 ? sdwa_uword : sdwa_udword;
sdwa->sel[0] = SubdwordSel(src_bits / 8, 0, sign_extend);
sdwa->dst_sel = tmp.bytes() == 2 ? SubdwordSel::uword : SubdwordSel::dword;
sdwa->dst_preserve = tmp.bytes() == 2;
bld.insert(std::move(sdwa));
} else {
assert(src_bits < 32);

View file

@ -276,24 +276,12 @@ convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr)
if (i >= 2)
break;
switch (instr->operands[i].bytes()) {
case 1: sdwa.sel[i] = sdwa_ubyte; break;
case 2: sdwa.sel[i] = sdwa_uword; break;
case 4: sdwa.sel[i] = sdwa_udword; break;
}
}
switch (instr->definitions[0].bytes()) {
case 1:
sdwa.dst_sel = sdwa_ubyte;
sdwa.dst_preserve = true;
break;
case 2:
sdwa.dst_sel = sdwa_uword;
sdwa.dst_preserve = true;
break;
case 4: sdwa.dst_sel = sdwa_udword; break;
sdwa.sel[i] = SubdwordSel(instr->operands[i].bytes(), 0, false);
}
sdwa.dst_sel = SubdwordSel(instr->definitions[0].bytes(), 0, false);
sdwa.dst_preserve = sdwa.dst_sel.size() < 4;
if (instr->definitions[0].getTemp().type() == RegType::sgpr && chip == GFX8)
instr->definitions[0].setFixed(vcc);
if (instr->definitions.size() >= 2)

View file

@ -1405,40 +1405,53 @@ struct DPP_instruction : public Instruction {
};
static_assert(sizeof(DPP_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
enum sdwa_sel : uint8_t {
/* masks */
sdwa_wordnum = 0x1,
sdwa_bytenum = 0x3,
sdwa_asuint = 0x7 | 0x10,
sdwa_rasize = 0x3,
struct SubdwordSel {
enum sdwa_sel : uint8_t {
ubyte = 0x4,
uword = 0x8,
dword = 0x10,
sext = 0x20,
sbyte = ubyte | sext,
sword = uword | sext,
/* flags */
sdwa_isword = 0x4,
sdwa_sext = 0x8,
sdwa_isra = 0x10,
ubyte0 = ubyte,
ubyte1 = ubyte | 1,
ubyte2 = ubyte | 2,
ubyte3 = ubyte | 3,
sbyte0 = sbyte,
sbyte1 = sbyte | 1,
sbyte2 = sbyte | 2,
sbyte3 = sbyte | 3,
uword0 = uword,
uword1 = uword | 2,
sword0 = sword,
sword1 = sword | 2,
};
/* specific values */
sdwa_ubyte0 = 0,
sdwa_ubyte1 = 1,
sdwa_ubyte2 = 2,
sdwa_ubyte3 = 3,
sdwa_uword0 = sdwa_isword | 0,
sdwa_uword1 = sdwa_isword | 1,
sdwa_udword = 6,
SubdwordSel() : sel((sdwa_sel)0) {}
constexpr SubdwordSel(sdwa_sel sel_) : sel(sel_) {}
constexpr SubdwordSel(unsigned size, unsigned offset, bool sign_extend)
: sel((sdwa_sel)((sign_extend ? sext : 0) | size << 2 | offset))
{}
constexpr operator sdwa_sel() const { return sel; }
explicit operator bool() const { return sel != 0; }
sdwa_sbyte0 = sdwa_ubyte0 | sdwa_sext,
sdwa_sbyte1 = sdwa_ubyte1 | sdwa_sext,
sdwa_sbyte2 = sdwa_ubyte2 | sdwa_sext,
sdwa_sbyte3 = sdwa_ubyte3 | sdwa_sext,
sdwa_sword0 = sdwa_uword0 | sdwa_sext,
sdwa_sword1 = sdwa_uword1 | sdwa_sext,
sdwa_sdword = sdwa_udword | sdwa_sext,
constexpr unsigned size() const { return (sel >> 2) & 0x7; }
constexpr unsigned offset() const { return sel & 0x3; }
constexpr bool sign_extend() const { return sel & sext; }
constexpr unsigned to_sdwa_sel(unsigned reg_byte_offset) const
{
reg_byte_offset += offset();
if (size() == 1)
return reg_byte_offset;
else if (size() == 2)
return 4 + (reg_byte_offset >> 1);
else
return 6;
}
/* register-allocated */
sdwa_ubyte = 1 | sdwa_isra,
sdwa_uword = 2 | sdwa_isra,
sdwa_sbyte = sdwa_ubyte | sdwa_sext,
sdwa_sword = sdwa_uword | sdwa_sext,
private:
sdwa_sel sel;
};
/**
@ -1452,8 +1465,8 @@ enum sdwa_sel : uint8_t {
struct SDWA_instruction : public Instruction {
/* these destination modifiers aren't available with VOPC except for
* clamp on GFX8 */
uint8_t sel[2];
uint8_t dst_sel;
SubdwordSel sel[2];
SubdwordSel dst_sel;
bool neg[2];
bool abs[2];
bool dst_preserve : 1;

View file

@ -516,11 +516,9 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
sdwa->operands[0] = Operand(PhysReg{tmp}, v1);
sdwa->definitions[0] = Definition(PhysReg{tmp}, v1);
if (reduce_op == imin8 || reduce_op == imax8)
sdwa->sel[0] = sdwa_sbyte;
else
sdwa->sel[0] = sdwa_ubyte;
sdwa->dst_sel = sdwa_udword;
bool sext = reduce_op == imin8 || reduce_op == imax8;
sdwa->sel[0] = SubdwordSel(1, 0, sext);
sdwa->dst_sel = SubdwordSel::dword;
bld.insert(std::move(sdwa));
} else {
aco_opcode opcode;
@ -541,11 +539,9 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
sdwa->operands[0] = Operand(PhysReg{tmp}, v1);
sdwa->definitions[0] = Definition(PhysReg{tmp}, v1);
if (reduce_op == imin16 || reduce_op == imax16 || reduce_op == iadd16)
sdwa->sel[0] = sdwa_sword;
else
sdwa->sel[0] = sdwa_uword;
sdwa->dst_sel = sdwa_udword;
bool sext = reduce_op == imin16 || reduce_op == imax16 || reduce_op == iadd16;
sdwa->sel[0] = SubdwordSel(2, 0, sext);
sdwa->dst_sel = SubdwordSel::dword;
bld.insert(std::move(sdwa));
} else if (ctx->program->chip_class == GFX6 || ctx->program->chip_class == GFX7) {
aco_opcode opcode;
@ -2099,10 +2095,9 @@ lower_to_hw_instr(Program* program)
sdwa->operands[0] = Operand(op.physReg().advance(-op.physReg().byte()),
RegClass::get(op.regClass().type(), 4));
sdwa->definitions[0] = dst;
sdwa->sel[0] = sdwa_ubyte0 + op.physReg().byte() + index;
if (signext)
sdwa->sel[0] |= sdwa_sext;
sdwa->dst_sel = sdwa_uword;
sdwa->sel[0] = SubdwordSel(1, op.physReg().byte() + offset / 8, signext);
sdwa->dst_sel = SubdwordSel::uword;
sdwa->dst_preserve = true;
bld.insert(std::move(sdwa));
}
break;
@ -2143,8 +2138,8 @@ lower_to_hw_instr(Program* program)
(Format)((uint16_t)Format::VOP1 | (uint16_t)Format::SDWA), 1, 1)};
sdwa->operands[0] = op;
sdwa->definitions[0] = dst;
sdwa->sel[0] = sdwa_udword;
sdwa->dst_sel = (bits == 8 ? sdwa_ubyte0 : sdwa_uword0) + (offset / bits);
sdwa->sel[0] = SubdwordSel::dword;
sdwa->dst_sel = SubdwordSel(bits / 8, offset / 8, false);
bld.insert(std::move(sdwa));
} else {
bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand::zero(), Operand::c32(bits));
@ -2157,7 +2152,7 @@ lower_to_hw_instr(Program* program)
RegClass::get(op.regClass().type(), 4));
bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), sdwa_op)
.instr->sdwa()
.sel[1] = sdwa_ubyte0 + op.physReg().byte();
.sel[1] = SubdwordSel(1, op.physReg().byte(), false);
}
break;
}

View file

@ -178,8 +178,8 @@ class Format(Enum):
res = ''
if self == Format.SDWA:
for i in range(min(num_operands, 2)):
res += 'instr->sel[{0}] = op{0}.op.bytes() == 2 ? sdwa_uword : (op{0}.op.bytes() == 1 ? sdwa_ubyte : sdwa_udword);\n'.format(i)
res += 'instr->dst_sel = def0.bytes() == 2 ? sdwa_uword : (def0.bytes() == 1 ? sdwa_ubyte : sdwa_udword);\n'
res += 'instr->sel[{0}] = SubdwordSel(op{0}.op.bytes(), 0, false);'.format(i)
res += 'instr->dst_sel = SubdwordSel(def0.bytes(), 0, false);\n'
res += 'if (def0.bytes() < 4) instr->dst_preserve = true;'
return res

View file

@ -785,36 +785,33 @@ fixed_to_exec(Operand op)
return op.isFixed() && op.physReg() == exec;
}
int
SubdwordSel
parse_extract(Instruction* instr)
{
if (instr->opcode == aco_opcode::p_extract) {
bool is_byte = instr->operands[2].constantEquals(8);
unsigned index = instr->operands[1].constantValue();
unsigned sel = (is_byte ? sdwa_ubyte0 : sdwa_uword0) + index;
if (!instr->operands[3].constantEquals(0))
sel |= sdwa_sext;
return sel;
unsigned size = instr->operands[2].constantValue() / 8;
unsigned offset = instr->operands[1].constantValue() * size;
bool sext = instr->operands[3].constantEquals(1);
return SubdwordSel(size, offset, sext);
} else if (instr->opcode == aco_opcode::p_insert && instr->operands[1].constantEquals(0)) {
return instr->operands[2].constantEquals(8) ? sdwa_ubyte0 : sdwa_uword0;
return instr->operands[2].constantEquals(8) ? SubdwordSel::ubyte : SubdwordSel::uword;
} else {
return -1;
return SubdwordSel();
}
}
int
SubdwordSel
parse_insert(Instruction* instr)
{
if (instr->opcode == aco_opcode::p_extract && instr->operands[3].constantEquals(0) &&
instr->operands[1].constantEquals(0)) {
return instr->operands[2].constantEquals(8) ? sdwa_ubyte0 : sdwa_uword0;
return instr->operands[2].constantEquals(8) ? SubdwordSel::ubyte : SubdwordSel::uword;
} else if (instr->opcode == aco_opcode::p_insert) {
bool is_byte = instr->operands[2].constantEquals(8);
unsigned index = instr->operands[1].constantValue();
unsigned sel = (is_byte ? sdwa_ubyte0 : sdwa_uword0) + index;
return sel;
unsigned size = instr->operands[2].constantValue() / 8;
unsigned offset = instr->operands[1].constantValue() * size;
return SubdwordSel(size, offset, false);
} else {
return -1;
return SubdwordSel();
}
}
@ -825,20 +822,21 @@ can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_i
return false;
Temp tmp = info.instr->operands[0].getTemp();
unsigned sel = parse_extract(info.instr);
SubdwordSel sel = parse_extract(info.instr);
if (sel == sdwa_udword || sel == sdwa_sdword) {
if (!sel) {
return false;
} else if (sel.size() == 4) {
return true;
} else if (instr->opcode == aco_opcode::v_cvt_f32_u32 && sel <= sdwa_ubyte3) {
} else if (instr->opcode == aco_opcode::v_cvt_f32_u32 && sel.size() == 1 && !sel.sign_extend()) {
return true;
} else if (can_use_SDWA(ctx.program->chip_class, instr, true) &&
(tmp.type() == RegType::vgpr || ctx.program->chip_class >= GFX9)) {
if (instr->isSDWA() &&
(static_cast<SDWA_instruction*>(instr.get())->sel[idx] & sdwa_asuint) != sdwa_udword)
if (instr->isSDWA() && instr->sdwa().sel[idx] != SubdwordSel::dword)
return false;
return true;
} else if (instr->isVOP3() && (sel & sdwa_isword) &&
can_use_opsel(ctx.program->chip_class, instr->opcode, idx, (sel & sdwa_wordnum)) &&
} else if (instr->isVOP3() && sel.size() == 2 &&
can_use_opsel(ctx.program->chip_class, instr->opcode, idx, sel.offset()) &&
!(instr->vop3().opsel & (1 << idx))) {
return true;
} else {
@ -853,22 +851,24 @@ void
apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info& info)
{
Temp tmp = info.instr->operands[0].getTemp();
unsigned sel = parse_extract(info.instr);
SubdwordSel sel = parse_extract(info.instr);
assert(sel);
if (sel == sdwa_udword || sel == sdwa_sdword) {
} else if (instr->opcode == aco_opcode::v_cvt_f32_u32 && sel <= sdwa_ubyte3) {
switch (sel) {
case sdwa_ubyte0: instr->opcode = aco_opcode::v_cvt_f32_ubyte0; break;
case sdwa_ubyte1: instr->opcode = aco_opcode::v_cvt_f32_ubyte1; break;
case sdwa_ubyte2: instr->opcode = aco_opcode::v_cvt_f32_ubyte2; break;
case sdwa_ubyte3: instr->opcode = aco_opcode::v_cvt_f32_ubyte3; break;
if (sel.size() == 4) {
/* full dword selection */
} else if (instr->opcode == aco_opcode::v_cvt_f32_u32 && sel.size() == 1 && !sel.sign_extend()) {
switch (sel.offset()) {
case 0: instr->opcode = aco_opcode::v_cvt_f32_ubyte0; break;
case 1: instr->opcode = aco_opcode::v_cvt_f32_ubyte1; break;
case 2: instr->opcode = aco_opcode::v_cvt_f32_ubyte2; break;
case 3: instr->opcode = aco_opcode::v_cvt_f32_ubyte3; break;
}
} else if (can_use_SDWA(ctx.program->chip_class, instr, true) &&
(tmp.type() == RegType::vgpr || ctx.program->chip_class >= GFX9)) {
to_SDWA(ctx, instr);
static_cast<SDWA_instruction*>(instr.get())->sel[idx] = sel;
} else if (instr->isVOP3()) {
if (sel & sdwa_wordnum)
if (sel.offset())
instr->vop3().opsel |= 1 << idx;
}
@ -1023,7 +1023,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
can_use_mod = can_use_mod && instr_info.can_use_input_modifiers[(int)instr->opcode];
if (instr->isSDWA())
can_use_mod = can_use_mod && (instr->sdwa().sel[i] & sdwa_asuint) == sdwa_udword;
can_use_mod = can_use_mod && instr->sdwa().sel[i].size() == 4;
else
can_use_mod = can_use_mod && (instr->isDPP() || can_use_VOP3(ctx, instr));
@ -1673,7 +1673,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
case aco_opcode::p_extract: {
if (instr->definitions[0].bytes() == 4) {
ctx.info[instr->definitions[0].tempId()].set_extract(instr.get());
if (instr->operands[0].regClass() == v1 && parse_insert(instr.get()) >= 0)
if (instr->operands[0].regClass() == v1 && parse_insert(instr.get()))
ctx.info[instr->operands[0].tempId()].set_insert(instr.get());
}
break;
@ -1682,7 +1682,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (instr->operands[0].bytes() == 4) {
if (instr->operands[0].regClass() == v1)
ctx.info[instr->operands[0].tempId()].set_insert(instr.get());
if (parse_extract(instr.get()) >= 0)
if (parse_extract(instr.get()))
ctx.info[instr->definitions[0].tempId()].set_extract(instr.get());
ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get());
}
@ -2868,20 +2868,21 @@ apply_insert(opt_ctx& ctx, aco_ptr<Instruction>& instr)
/* MADs/FMAs are created later, so we don't have to update the original add */
assert(!ctx.info[instr->definitions[0].tempId()].is_mad());
unsigned sel = parse_insert(def_info.instr);
SubdwordSel sel = parse_insert(def_info.instr);
assert(sel);
if (instr->isVOP3() && (sel & sdwa_isword) && !(sel & sdwa_sext) &&
can_use_opsel(ctx.program->chip_class, instr->opcode, 3, (sel & sdwa_wordnum))) {
if (instr->isVOP3() && sel.size() == 2 && !sel.sign_extend() &&
can_use_opsel(ctx.program->chip_class, instr->opcode, 3, sel.offset())) {
if (instr->vop3().opsel & (1 << 3))
return false;
if (sel & sdwa_wordnum)
if (sel.offset())
instr->vop3().opsel |= 1 << 3;
} else {
if (!can_use_SDWA(ctx.program->chip_class, instr, true))
return false;
to_SDWA(ctx, instr);
if ((static_cast<SDWA_instruction*>(instr.get())->dst_sel & sdwa_asuint) != sdwa_udword)
if (instr->sdwa().dst_sel.size() != 4)
return false;
static_cast<SDWA_instruction*>(instr.get())->dst_sel = sel;
}

View file

@ -629,20 +629,14 @@ print_instr_format_specific(const Instruction* instr, FILE* output)
fprintf(output, " clamp");
if (instr->isVOPC())
return;
switch (sdwa.dst_sel & sdwa_asuint) {
case sdwa_udword: break;
case sdwa_ubyte0:
case sdwa_ubyte1:
case sdwa_ubyte2:
case sdwa_ubyte3:
fprintf(output, " dst_sel:%sbyte%u", sdwa.dst_sel & sdwa_sext ? "s" : "u",
sdwa.dst_sel & sdwa_bytenum);
break;
case sdwa_uword0:
case sdwa_uword1:
fprintf(output, " dst_sel:%sword%u", sdwa.dst_sel & sdwa_sext ? "s" : "u",
sdwa.dst_sel & sdwa_wordnum);
break;
if (instr->definitions[0].bytes() == 4) {
char sext = sdwa.dst_sel.sign_extend() ? 's' : 'u';
switch (sdwa.dst_sel.size()) {
case 1: fprintf(output, " dst_sel:%cbyte%u", sext, sdwa.dst_sel.offset()); break;
case 2: fprintf(output, " dst_sel:%cword%u", sext, sdwa.dst_sel.offset() >> 1); break;
case 4: break;
default: break;
}
}
if (sdwa.dst_preserve)
fprintf(output, " dst_preserve");
@ -665,12 +659,12 @@ aco_print_instr(const Instruction* instr, FILE* output, unsigned flags)
bool* const abs = (bool*)alloca(instr->operands.size() * sizeof(bool));
bool* const neg = (bool*)alloca(instr->operands.size() * sizeof(bool));
bool* const opsel = (bool*)alloca(instr->operands.size() * sizeof(bool));
uint8_t* const sel = (uint8_t*)alloca(instr->operands.size() * sizeof(uint8_t));
SubdwordSel* const sel = (SubdwordSel*)alloca(instr->operands.size() * sizeof(SubdwordSel));
for (unsigned i = 0; i < instr->operands.size(); ++i) {
abs[i] = false;
neg[i] = false;
opsel[i] = false;
sel[i] = sdwa_udword;
sel[i] = SubdwordSel::dword;
}
if (instr->isVOP3()) {
const VOP3_instruction& vop3 = instr->vop3();
@ -678,7 +672,6 @@ aco_print_instr(const Instruction* instr, FILE* output, unsigned flags)
abs[i] = vop3.abs[i];
neg[i] = vop3.neg[i];
opsel[i] = vop3.opsel & (1 << i);
sel[i] = sdwa_udword;
}
} else if (instr->isDPP()) {
const DPP_instruction& dpp = instr->dpp();
@ -686,7 +679,6 @@ aco_print_instr(const Instruction* instr, FILE* output, unsigned flags)
abs[i] = dpp.abs[i];
neg[i] = dpp.neg[i];
opsel[i] = false;
sel[i] = sdwa_udword;
}
} else if (instr->isSDWA()) {
const SDWA_instruction& sdwa = instr->sdwa();
@ -709,21 +701,15 @@ aco_print_instr(const Instruction* instr, FILE* output, unsigned flags)
fprintf(output, "|");
if (opsel[i])
fprintf(output, "hi(");
else if (sel[i] & sdwa_sext)
else if (sel[i].sign_extend())
fprintf(output, "sext(");
aco_print_operand(&instr->operands[i], output, flags);
if (opsel[i] || (sel[i] & sdwa_sext))
if (opsel[i] || (sel[i].sign_extend()))
fprintf(output, ")");
if (!(sel[i] & sdwa_isra)) {
if (sel[i] == sdwa_udword || sel[i] == sdwa_sdword) {
/* print nothing */
} else if (sel[i] & sdwa_isword) {
unsigned index = sel[i] & sdwa_wordnum;
fprintf(output, "[%u:%u]", index * 16, index * 16 + 15);
} else {
unsigned index = sel[i] & sdwa_bytenum;
fprintf(output, "[%u:%u]", index * 8, index * 8 + 7);
}
if (instr->isSDWA() && i < 2 && sel[i].size() < 4 && instr->operands[i].bytes() == 4) {
unsigned begin = sel[i].offset() * 8;
unsigned end = begin + sel[i].size() * 8 - 1;
fprintf(output, "[%u:%u]", begin, end);
}
if (abs[i])
fprintf(output, "|");

View file

@ -163,16 +163,30 @@ validate_ir(Program* program)
check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) ||
program->chip_class >= GFX9,
"SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get());
} else {
const Definition& def = instr->definitions[0];
check(def.bytes() <= 4, "SDWA definitions must not be larger than 4 bytes",
instr.get());
check(def.bytes() >= sdwa.dst_sel.size() + sdwa.dst_sel.offset(),
"SDWA definition selection size must be at most definition size", instr.get());
check(
sdwa.dst_sel.size() == 1 || sdwa.dst_sel.size() == 2 || sdwa.dst_sel.size() == 4,
"SDWA definition selection size must be 1, 2 or 4 bytes", instr.get());
check(sdwa.dst_sel.offset() % sdwa.dst_sel.size() == 0, "Invalid selection offset",
instr.get());
check(def.bytes() == 4 || sdwa.dst_preserve,
"SDWA subdword definition needs dst_preserve", instr.get());
}
for (unsigned i = 0; i < std::min<unsigned>(2, instr->operands.size()); i++) {
const Operand& op = instr->operands[i];
check(op.bytes() <= 4, "SDWA operands must not be larger than 4 bytes", instr.get());
if (sdwa.sel[i] & sdwa_isra)
check(op.bytes() >= (sdwa.sel[i] & sdwa_rasize),
"SDWA selection size must be at most operand size", instr.get());
else
check(op.bytes() == 4, "SDWA selection needs dword operand", instr.get());
check(op.bytes() >= sdwa.sel[i].size() + sdwa.sel[i].offset(),
"SDWA operand selection size must be at most operand size", instr.get());
check(sdwa.sel[i].size() == 1 || sdwa.sel[i].size() == 2 || sdwa.sel[i].size() == 4,
"SDWA operand selection size must be 1, 2 or 4 bytes", instr.get());
check(sdwa.sel[i].offset() % sdwa.sel[i].size() == 0, "Invalid selection offset",
instr.get());
}
if (instr->operands.size() >= 3) {
check(instr->operands[2].isFixed() && instr->operands[2].physReg() == vcc,
@ -201,10 +215,6 @@ validate_ir(Program* program)
(instr->opcode == aco_opcode::v_mac_f32 && instr->opcode == aco_opcode::v_mac_f16);
check(sdwa_opcodes || feature_mac, "SDWA can't be used with this opcode", instr.get());
if (instr->definitions[0].regClass().is_subdword())
check((sdwa.dst_sel & sdwa_asuint) == (sdwa_isra | instr->definitions[0].bytes()),
"Unexpected SDWA sel for sub-dword definition", instr.get());
}
/* check opsel */
@ -689,10 +699,9 @@ validate_subdword_operand(chip_class chip, const aco_ptr<Instruction>& instr, un
return byte == 0;
if (instr->isPseudo() && chip >= GFX8)
return true;
if (instr->isSDWA()) {
unsigned size = instr->sdwa().sel[index] & sdwa_rasize;
return byte % size == 0;
}
if (instr->isSDWA())
return byte + instr->sdwa().sel[index].offset() + instr->sdwa().sel[index].size() <= 4 &&
byte % instr->sdwa().sel[index].size() == 0;
if (byte == 2 && can_use_opsel(chip, instr->opcode, index, 1))
return true;
@ -742,8 +751,9 @@ validate_subdword_definition(chip_class chip, const aco_ptr<Instruction>& instr)
if (instr->isPseudo() && chip >= GFX8)
return true;
if (instr->isSDWA() && instr->sdwa().dst_sel == (sdwa_isra | def.bytes()))
return true;
if (instr->isSDWA())
return byte + instr->sdwa().dst_sel.offset() + instr->sdwa().dst_sel.size() <= 4 &&
byte % instr->sdwa().dst_sel.size() == 0;
if (byte == 2 && can_use_opsel(chip, instr->opcode, -1, 1))
return true;
@ -774,9 +784,8 @@ get_subdword_bytes_written(Program* program, const aco_ptr<Instruction>& instr,
return chip >= GFX8 ? def.bytes() : def.size() * 4u;
if (instr->isVALU()) {
assert(def.bytes() <= 2);
if (instr->isSDWA() && instr->sdwa().dst_sel == (sdwa_isra | def.bytes()))
return def.bytes();
if (instr->isSDWA())
return instr->sdwa().dst_sel.size();
if (instr_is_16bit(chip, instr->opcode))
return 2;

View file

@ -39,11 +39,11 @@ BEGIN_TEST(validate.sdwa.allow)
sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa();
sdwa->dst_preserve = true;
sdwa->dst_sel = sdwa_ubyte0;
sdwa->dst_sel = SubdwordSel::ubyte0;
sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa();
sdwa->sel[0] = sdwa_sbyte2;
sdwa->sel[1] = sdwa_uword1;
sdwa->sel[0] = SubdwordSel::sbyte2;
sdwa->sel[1] = SubdwordSel::uword1;
finish_validator_test();
}

View file

@ -554,15 +554,15 @@ BEGIN_TEST(to_hw_instr.extract)
//>> p_unit_test 4
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(0:7)
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(0:7) dst_preserve
EXT(0, 0)
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(16:23)
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(16:23) dst_preserve
if (i != GFX7)
EXT(0, 2)
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(8:15)
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(8:15) dst_preserve
EXT(1, 0)
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(24:31)
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(24:31) dst_preserve
if (i != GFX7)
EXT(1, 2)