mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 20:28:04 +02:00
aco: rewrite SDWA selector
This commit introduces a new struct SubdwordSel in order to ease and clean up the usage of SDWA selections. This includes removing the distinction between register-allocated and fixed SDWA selections. Instead, SDWA selections can now also access the high bits of subdword variables. Alignment and sizes are validated accordingly. Size, offset and sign_extend can be evaluated via helper methods. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12640>
This commit is contained in:
parent
cc4682ed47
commit
9e3ff06c38
11 changed files with 164 additions and 187 deletions
|
|
@ -61,19 +61,6 @@ struct asm_context {
|
|||
int subvector_begin_pos = -1;
|
||||
};
|
||||
|
||||
static uint32_t
|
||||
get_sdwa_sel(unsigned sel, PhysReg reg)
|
||||
{
|
||||
if (sel & sdwa_isra) {
|
||||
unsigned size = sdwa_rasize & sel;
|
||||
if (size == 1)
|
||||
return reg.byte();
|
||||
else /* size == 2 */
|
||||
return sdwa_isword | (reg.byte() >> 1);
|
||||
}
|
||||
return sel & sdwa_asuint;
|
||||
}
|
||||
|
||||
unsigned
|
||||
get_mimg_nsa_dwords(const Instruction* instr)
|
||||
{
|
||||
|
|
@ -715,23 +702,23 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
}
|
||||
encoding |= (sdwa.clamp ? 1 : 0) << 13;
|
||||
} else {
|
||||
encoding |= get_sdwa_sel(sdwa.dst_sel, instr->definitions[0].physReg()) << 8;
|
||||
uint32_t dst_u = sdwa.dst_sel & sdwa_sext ? 1 : 0;
|
||||
if (sdwa.dst_preserve || (sdwa.dst_sel & sdwa_isra))
|
||||
encoding |= sdwa.dst_sel.to_sdwa_sel(instr->definitions[0].physReg().byte()) << 8;
|
||||
uint32_t dst_u = sdwa.dst_sel.sign_extend() ? 1 : 0;
|
||||
if (sdwa.dst_preserve)
|
||||
dst_u = 2;
|
||||
encoding |= dst_u << 11;
|
||||
encoding |= (sdwa.clamp ? 1 : 0) << 13;
|
||||
encoding |= sdwa.omod << 14;
|
||||
}
|
||||
|
||||
encoding |= get_sdwa_sel(sdwa.sel[0], sdwa_op.physReg()) << 16;
|
||||
encoding |= sdwa.sel[0] & sdwa_sext ? 1 << 19 : 0;
|
||||
encoding |= sdwa.sel[0].to_sdwa_sel(sdwa_op.physReg().byte()) << 16;
|
||||
encoding |= sdwa.sel[0].sign_extend() ? 1 << 19 : 0;
|
||||
encoding |= sdwa.abs[0] << 21;
|
||||
encoding |= sdwa.neg[0] << 20;
|
||||
|
||||
if (instr->operands.size() >= 2) {
|
||||
encoding |= get_sdwa_sel(sdwa.sel[1], instr->operands[1].physReg()) << 24;
|
||||
encoding |= sdwa.sel[1] & sdwa_sext ? 1 << 27 : 0;
|
||||
encoding |= sdwa.sel[1].to_sdwa_sel(instr->operands[1].physReg().byte()) << 24;
|
||||
encoding |= sdwa.sel[1].sign_extend() ? 1 << 27 : 0;
|
||||
encoding |= sdwa.abs[1] << 29;
|
||||
encoding |= sdwa.neg[1] << 28;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -644,11 +644,9 @@ convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsign
|
|||
create_instruction<SDWA_instruction>(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
|
||||
sdwa->operands[0] = Operand(src);
|
||||
sdwa->definitions[0] = Definition(tmp);
|
||||
if (sign_extend)
|
||||
sdwa->sel[0] = src_bits == 8 ? sdwa_sbyte : sdwa_sword;
|
||||
else
|
||||
sdwa->sel[0] = src_bits == 8 ? sdwa_ubyte : sdwa_uword;
|
||||
sdwa->dst_sel = tmp.bytes() == 2 ? sdwa_uword : sdwa_udword;
|
||||
sdwa->sel[0] = SubdwordSel(src_bits / 8, 0, sign_extend);
|
||||
sdwa->dst_sel = tmp.bytes() == 2 ? SubdwordSel::uword : SubdwordSel::dword;
|
||||
sdwa->dst_preserve = tmp.bytes() == 2;
|
||||
bld.insert(std::move(sdwa));
|
||||
} else {
|
||||
assert(src_bits < 32);
|
||||
|
|
|
|||
|
|
@ -276,24 +276,12 @@ convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr)
|
|||
if (i >= 2)
|
||||
break;
|
||||
|
||||
switch (instr->operands[i].bytes()) {
|
||||
case 1: sdwa.sel[i] = sdwa_ubyte; break;
|
||||
case 2: sdwa.sel[i] = sdwa_uword; break;
|
||||
case 4: sdwa.sel[i] = sdwa_udword; break;
|
||||
}
|
||||
}
|
||||
switch (instr->definitions[0].bytes()) {
|
||||
case 1:
|
||||
sdwa.dst_sel = sdwa_ubyte;
|
||||
sdwa.dst_preserve = true;
|
||||
break;
|
||||
case 2:
|
||||
sdwa.dst_sel = sdwa_uword;
|
||||
sdwa.dst_preserve = true;
|
||||
break;
|
||||
case 4: sdwa.dst_sel = sdwa_udword; break;
|
||||
sdwa.sel[i] = SubdwordSel(instr->operands[i].bytes(), 0, false);
|
||||
}
|
||||
|
||||
sdwa.dst_sel = SubdwordSel(instr->definitions[0].bytes(), 0, false);
|
||||
sdwa.dst_preserve = sdwa.dst_sel.size() < 4;
|
||||
|
||||
if (instr->definitions[0].getTemp().type() == RegType::sgpr && chip == GFX8)
|
||||
instr->definitions[0].setFixed(vcc);
|
||||
if (instr->definitions.size() >= 2)
|
||||
|
|
|
|||
|
|
@ -1405,40 +1405,53 @@ struct DPP_instruction : public Instruction {
|
|||
};
|
||||
static_assert(sizeof(DPP_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
|
||||
|
||||
enum sdwa_sel : uint8_t {
|
||||
/* masks */
|
||||
sdwa_wordnum = 0x1,
|
||||
sdwa_bytenum = 0x3,
|
||||
sdwa_asuint = 0x7 | 0x10,
|
||||
sdwa_rasize = 0x3,
|
||||
struct SubdwordSel {
|
||||
enum sdwa_sel : uint8_t {
|
||||
ubyte = 0x4,
|
||||
uword = 0x8,
|
||||
dword = 0x10,
|
||||
sext = 0x20,
|
||||
sbyte = ubyte | sext,
|
||||
sword = uword | sext,
|
||||
|
||||
/* flags */
|
||||
sdwa_isword = 0x4,
|
||||
sdwa_sext = 0x8,
|
||||
sdwa_isra = 0x10,
|
||||
ubyte0 = ubyte,
|
||||
ubyte1 = ubyte | 1,
|
||||
ubyte2 = ubyte | 2,
|
||||
ubyte3 = ubyte | 3,
|
||||
sbyte0 = sbyte,
|
||||
sbyte1 = sbyte | 1,
|
||||
sbyte2 = sbyte | 2,
|
||||
sbyte3 = sbyte | 3,
|
||||
uword0 = uword,
|
||||
uword1 = uword | 2,
|
||||
sword0 = sword,
|
||||
sword1 = sword | 2,
|
||||
};
|
||||
|
||||
/* specific values */
|
||||
sdwa_ubyte0 = 0,
|
||||
sdwa_ubyte1 = 1,
|
||||
sdwa_ubyte2 = 2,
|
||||
sdwa_ubyte3 = 3,
|
||||
sdwa_uword0 = sdwa_isword | 0,
|
||||
sdwa_uword1 = sdwa_isword | 1,
|
||||
sdwa_udword = 6,
|
||||
SubdwordSel() : sel((sdwa_sel)0) {}
|
||||
constexpr SubdwordSel(sdwa_sel sel_) : sel(sel_) {}
|
||||
constexpr SubdwordSel(unsigned size, unsigned offset, bool sign_extend)
|
||||
: sel((sdwa_sel)((sign_extend ? sext : 0) | size << 2 | offset))
|
||||
{}
|
||||
constexpr operator sdwa_sel() const { return sel; }
|
||||
explicit operator bool() const { return sel != 0; }
|
||||
|
||||
sdwa_sbyte0 = sdwa_ubyte0 | sdwa_sext,
|
||||
sdwa_sbyte1 = sdwa_ubyte1 | sdwa_sext,
|
||||
sdwa_sbyte2 = sdwa_ubyte2 | sdwa_sext,
|
||||
sdwa_sbyte3 = sdwa_ubyte3 | sdwa_sext,
|
||||
sdwa_sword0 = sdwa_uword0 | sdwa_sext,
|
||||
sdwa_sword1 = sdwa_uword1 | sdwa_sext,
|
||||
sdwa_sdword = sdwa_udword | sdwa_sext,
|
||||
constexpr unsigned size() const { return (sel >> 2) & 0x7; }
|
||||
constexpr unsigned offset() const { return sel & 0x3; }
|
||||
constexpr bool sign_extend() const { return sel & sext; }
|
||||
constexpr unsigned to_sdwa_sel(unsigned reg_byte_offset) const
|
||||
{
|
||||
reg_byte_offset += offset();
|
||||
if (size() == 1)
|
||||
return reg_byte_offset;
|
||||
else if (size() == 2)
|
||||
return 4 + (reg_byte_offset >> 1);
|
||||
else
|
||||
return 6;
|
||||
}
|
||||
|
||||
/* register-allocated */
|
||||
sdwa_ubyte = 1 | sdwa_isra,
|
||||
sdwa_uword = 2 | sdwa_isra,
|
||||
sdwa_sbyte = sdwa_ubyte | sdwa_sext,
|
||||
sdwa_sword = sdwa_uword | sdwa_sext,
|
||||
private:
|
||||
sdwa_sel sel;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -1452,8 +1465,8 @@ enum sdwa_sel : uint8_t {
|
|||
struct SDWA_instruction : public Instruction {
|
||||
/* these destination modifiers aren't available with VOPC except for
|
||||
* clamp on GFX8 */
|
||||
uint8_t sel[2];
|
||||
uint8_t dst_sel;
|
||||
SubdwordSel sel[2];
|
||||
SubdwordSel dst_sel;
|
||||
bool neg[2];
|
||||
bool abs[2];
|
||||
bool dst_preserve : 1;
|
||||
|
|
|
|||
|
|
@ -516,11 +516,9 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
|
|||
aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
|
||||
sdwa->operands[0] = Operand(PhysReg{tmp}, v1);
|
||||
sdwa->definitions[0] = Definition(PhysReg{tmp}, v1);
|
||||
if (reduce_op == imin8 || reduce_op == imax8)
|
||||
sdwa->sel[0] = sdwa_sbyte;
|
||||
else
|
||||
sdwa->sel[0] = sdwa_ubyte;
|
||||
sdwa->dst_sel = sdwa_udword;
|
||||
bool sext = reduce_op == imin8 || reduce_op == imax8;
|
||||
sdwa->sel[0] = SubdwordSel(1, 0, sext);
|
||||
sdwa->dst_sel = SubdwordSel::dword;
|
||||
bld.insert(std::move(sdwa));
|
||||
} else {
|
||||
aco_opcode opcode;
|
||||
|
|
@ -541,11 +539,9 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
|
|||
aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
|
||||
sdwa->operands[0] = Operand(PhysReg{tmp}, v1);
|
||||
sdwa->definitions[0] = Definition(PhysReg{tmp}, v1);
|
||||
if (reduce_op == imin16 || reduce_op == imax16 || reduce_op == iadd16)
|
||||
sdwa->sel[0] = sdwa_sword;
|
||||
else
|
||||
sdwa->sel[0] = sdwa_uword;
|
||||
sdwa->dst_sel = sdwa_udword;
|
||||
bool sext = reduce_op == imin16 || reduce_op == imax16 || reduce_op == iadd16;
|
||||
sdwa->sel[0] = SubdwordSel(2, 0, sext);
|
||||
sdwa->dst_sel = SubdwordSel::dword;
|
||||
bld.insert(std::move(sdwa));
|
||||
} else if (ctx->program->chip_class == GFX6 || ctx->program->chip_class == GFX7) {
|
||||
aco_opcode opcode;
|
||||
|
|
@ -2099,10 +2095,9 @@ lower_to_hw_instr(Program* program)
|
|||
sdwa->operands[0] = Operand(op.physReg().advance(-op.physReg().byte()),
|
||||
RegClass::get(op.regClass().type(), 4));
|
||||
sdwa->definitions[0] = dst;
|
||||
sdwa->sel[0] = sdwa_ubyte0 + op.physReg().byte() + index;
|
||||
if (signext)
|
||||
sdwa->sel[0] |= sdwa_sext;
|
||||
sdwa->dst_sel = sdwa_uword;
|
||||
sdwa->sel[0] = SubdwordSel(1, op.physReg().byte() + offset / 8, signext);
|
||||
sdwa->dst_sel = SubdwordSel::uword;
|
||||
sdwa->dst_preserve = true;
|
||||
bld.insert(std::move(sdwa));
|
||||
}
|
||||
break;
|
||||
|
|
@ -2143,8 +2138,8 @@ lower_to_hw_instr(Program* program)
|
|||
(Format)((uint16_t)Format::VOP1 | (uint16_t)Format::SDWA), 1, 1)};
|
||||
sdwa->operands[0] = op;
|
||||
sdwa->definitions[0] = dst;
|
||||
sdwa->sel[0] = sdwa_udword;
|
||||
sdwa->dst_sel = (bits == 8 ? sdwa_ubyte0 : sdwa_uword0) + (offset / bits);
|
||||
sdwa->sel[0] = SubdwordSel::dword;
|
||||
sdwa->dst_sel = SubdwordSel(bits / 8, offset / 8, false);
|
||||
bld.insert(std::move(sdwa));
|
||||
} else {
|
||||
bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand::zero(), Operand::c32(bits));
|
||||
|
|
@ -2157,7 +2152,7 @@ lower_to_hw_instr(Program* program)
|
|||
RegClass::get(op.regClass().type(), 4));
|
||||
bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), sdwa_op)
|
||||
.instr->sdwa()
|
||||
.sel[1] = sdwa_ubyte0 + op.physReg().byte();
|
||||
.sel[1] = SubdwordSel(1, op.physReg().byte(), false);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -178,8 +178,8 @@ class Format(Enum):
|
|||
res = ''
|
||||
if self == Format.SDWA:
|
||||
for i in range(min(num_operands, 2)):
|
||||
res += 'instr->sel[{0}] = op{0}.op.bytes() == 2 ? sdwa_uword : (op{0}.op.bytes() == 1 ? sdwa_ubyte : sdwa_udword);\n'.format(i)
|
||||
res += 'instr->dst_sel = def0.bytes() == 2 ? sdwa_uword : (def0.bytes() == 1 ? sdwa_ubyte : sdwa_udword);\n'
|
||||
res += 'instr->sel[{0}] = SubdwordSel(op{0}.op.bytes(), 0, false);'.format(i)
|
||||
res += 'instr->dst_sel = SubdwordSel(def0.bytes(), 0, false);\n'
|
||||
res += 'if (def0.bytes() < 4) instr->dst_preserve = true;'
|
||||
return res
|
||||
|
||||
|
|
|
|||
|
|
@ -785,36 +785,33 @@ fixed_to_exec(Operand op)
|
|||
return op.isFixed() && op.physReg() == exec;
|
||||
}
|
||||
|
||||
int
|
||||
SubdwordSel
|
||||
parse_extract(Instruction* instr)
|
||||
{
|
||||
if (instr->opcode == aco_opcode::p_extract) {
|
||||
bool is_byte = instr->operands[2].constantEquals(8);
|
||||
unsigned index = instr->operands[1].constantValue();
|
||||
unsigned sel = (is_byte ? sdwa_ubyte0 : sdwa_uword0) + index;
|
||||
if (!instr->operands[3].constantEquals(0))
|
||||
sel |= sdwa_sext;
|
||||
return sel;
|
||||
unsigned size = instr->operands[2].constantValue() / 8;
|
||||
unsigned offset = instr->operands[1].constantValue() * size;
|
||||
bool sext = instr->operands[3].constantEquals(1);
|
||||
return SubdwordSel(size, offset, sext);
|
||||
} else if (instr->opcode == aco_opcode::p_insert && instr->operands[1].constantEquals(0)) {
|
||||
return instr->operands[2].constantEquals(8) ? sdwa_ubyte0 : sdwa_uword0;
|
||||
return instr->operands[2].constantEquals(8) ? SubdwordSel::ubyte : SubdwordSel::uword;
|
||||
} else {
|
||||
return -1;
|
||||
return SubdwordSel();
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
SubdwordSel
|
||||
parse_insert(Instruction* instr)
|
||||
{
|
||||
if (instr->opcode == aco_opcode::p_extract && instr->operands[3].constantEquals(0) &&
|
||||
instr->operands[1].constantEquals(0)) {
|
||||
return instr->operands[2].constantEquals(8) ? sdwa_ubyte0 : sdwa_uword0;
|
||||
return instr->operands[2].constantEquals(8) ? SubdwordSel::ubyte : SubdwordSel::uword;
|
||||
} else if (instr->opcode == aco_opcode::p_insert) {
|
||||
bool is_byte = instr->operands[2].constantEquals(8);
|
||||
unsigned index = instr->operands[1].constantValue();
|
||||
unsigned sel = (is_byte ? sdwa_ubyte0 : sdwa_uword0) + index;
|
||||
return sel;
|
||||
unsigned size = instr->operands[2].constantValue() / 8;
|
||||
unsigned offset = instr->operands[1].constantValue() * size;
|
||||
return SubdwordSel(size, offset, false);
|
||||
} else {
|
||||
return -1;
|
||||
return SubdwordSel();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -825,20 +822,21 @@ can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_i
|
|||
return false;
|
||||
|
||||
Temp tmp = info.instr->operands[0].getTemp();
|
||||
unsigned sel = parse_extract(info.instr);
|
||||
SubdwordSel sel = parse_extract(info.instr);
|
||||
|
||||
if (sel == sdwa_udword || sel == sdwa_sdword) {
|
||||
if (!sel) {
|
||||
return false;
|
||||
} else if (sel.size() == 4) {
|
||||
return true;
|
||||
} else if (instr->opcode == aco_opcode::v_cvt_f32_u32 && sel <= sdwa_ubyte3) {
|
||||
} else if (instr->opcode == aco_opcode::v_cvt_f32_u32 && sel.size() == 1 && !sel.sign_extend()) {
|
||||
return true;
|
||||
} else if (can_use_SDWA(ctx.program->chip_class, instr, true) &&
|
||||
(tmp.type() == RegType::vgpr || ctx.program->chip_class >= GFX9)) {
|
||||
if (instr->isSDWA() &&
|
||||
(static_cast<SDWA_instruction*>(instr.get())->sel[idx] & sdwa_asuint) != sdwa_udword)
|
||||
if (instr->isSDWA() && instr->sdwa().sel[idx] != SubdwordSel::dword)
|
||||
return false;
|
||||
return true;
|
||||
} else if (instr->isVOP3() && (sel & sdwa_isword) &&
|
||||
can_use_opsel(ctx.program->chip_class, instr->opcode, idx, (sel & sdwa_wordnum)) &&
|
||||
} else if (instr->isVOP3() && sel.size() == 2 &&
|
||||
can_use_opsel(ctx.program->chip_class, instr->opcode, idx, sel.offset()) &&
|
||||
!(instr->vop3().opsel & (1 << idx))) {
|
||||
return true;
|
||||
} else {
|
||||
|
|
@ -853,22 +851,24 @@ void
|
|||
apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info& info)
|
||||
{
|
||||
Temp tmp = info.instr->operands[0].getTemp();
|
||||
unsigned sel = parse_extract(info.instr);
|
||||
SubdwordSel sel = parse_extract(info.instr);
|
||||
assert(sel);
|
||||
|
||||
if (sel == sdwa_udword || sel == sdwa_sdword) {
|
||||
} else if (instr->opcode == aco_opcode::v_cvt_f32_u32 && sel <= sdwa_ubyte3) {
|
||||
switch (sel) {
|
||||
case sdwa_ubyte0: instr->opcode = aco_opcode::v_cvt_f32_ubyte0; break;
|
||||
case sdwa_ubyte1: instr->opcode = aco_opcode::v_cvt_f32_ubyte1; break;
|
||||
case sdwa_ubyte2: instr->opcode = aco_opcode::v_cvt_f32_ubyte2; break;
|
||||
case sdwa_ubyte3: instr->opcode = aco_opcode::v_cvt_f32_ubyte3; break;
|
||||
if (sel.size() == 4) {
|
||||
/* full dword selection */
|
||||
} else if (instr->opcode == aco_opcode::v_cvt_f32_u32 && sel.size() == 1 && !sel.sign_extend()) {
|
||||
switch (sel.offset()) {
|
||||
case 0: instr->opcode = aco_opcode::v_cvt_f32_ubyte0; break;
|
||||
case 1: instr->opcode = aco_opcode::v_cvt_f32_ubyte1; break;
|
||||
case 2: instr->opcode = aco_opcode::v_cvt_f32_ubyte2; break;
|
||||
case 3: instr->opcode = aco_opcode::v_cvt_f32_ubyte3; break;
|
||||
}
|
||||
} else if (can_use_SDWA(ctx.program->chip_class, instr, true) &&
|
||||
(tmp.type() == RegType::vgpr || ctx.program->chip_class >= GFX9)) {
|
||||
to_SDWA(ctx, instr);
|
||||
static_cast<SDWA_instruction*>(instr.get())->sel[idx] = sel;
|
||||
} else if (instr->isVOP3()) {
|
||||
if (sel & sdwa_wordnum)
|
||||
if (sel.offset())
|
||||
instr->vop3().opsel |= 1 << idx;
|
||||
}
|
||||
|
||||
|
|
@ -1023,7 +1023,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
can_use_mod = can_use_mod && instr_info.can_use_input_modifiers[(int)instr->opcode];
|
||||
|
||||
if (instr->isSDWA())
|
||||
can_use_mod = can_use_mod && (instr->sdwa().sel[i] & sdwa_asuint) == sdwa_udword;
|
||||
can_use_mod = can_use_mod && instr->sdwa().sel[i].size() == 4;
|
||||
else
|
||||
can_use_mod = can_use_mod && (instr->isDPP() || can_use_VOP3(ctx, instr));
|
||||
|
||||
|
|
@ -1673,7 +1673,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
case aco_opcode::p_extract: {
|
||||
if (instr->definitions[0].bytes() == 4) {
|
||||
ctx.info[instr->definitions[0].tempId()].set_extract(instr.get());
|
||||
if (instr->operands[0].regClass() == v1 && parse_insert(instr.get()) >= 0)
|
||||
if (instr->operands[0].regClass() == v1 && parse_insert(instr.get()))
|
||||
ctx.info[instr->operands[0].tempId()].set_insert(instr.get());
|
||||
}
|
||||
break;
|
||||
|
|
@ -1682,7 +1682,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
if (instr->operands[0].bytes() == 4) {
|
||||
if (instr->operands[0].regClass() == v1)
|
||||
ctx.info[instr->operands[0].tempId()].set_insert(instr.get());
|
||||
if (parse_extract(instr.get()) >= 0)
|
||||
if (parse_extract(instr.get()))
|
||||
ctx.info[instr->definitions[0].tempId()].set_extract(instr.get());
|
||||
ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get());
|
||||
}
|
||||
|
|
@ -2868,20 +2868,21 @@ apply_insert(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
/* MADs/FMAs are created later, so we don't have to update the original add */
|
||||
assert(!ctx.info[instr->definitions[0].tempId()].is_mad());
|
||||
|
||||
unsigned sel = parse_insert(def_info.instr);
|
||||
SubdwordSel sel = parse_insert(def_info.instr);
|
||||
assert(sel);
|
||||
|
||||
if (instr->isVOP3() && (sel & sdwa_isword) && !(sel & sdwa_sext) &&
|
||||
can_use_opsel(ctx.program->chip_class, instr->opcode, 3, (sel & sdwa_wordnum))) {
|
||||
if (instr->isVOP3() && sel.size() == 2 && !sel.sign_extend() &&
|
||||
can_use_opsel(ctx.program->chip_class, instr->opcode, 3, sel.offset())) {
|
||||
if (instr->vop3().opsel & (1 << 3))
|
||||
return false;
|
||||
if (sel & sdwa_wordnum)
|
||||
if (sel.offset())
|
||||
instr->vop3().opsel |= 1 << 3;
|
||||
} else {
|
||||
if (!can_use_SDWA(ctx.program->chip_class, instr, true))
|
||||
return false;
|
||||
|
||||
to_SDWA(ctx, instr);
|
||||
if ((static_cast<SDWA_instruction*>(instr.get())->dst_sel & sdwa_asuint) != sdwa_udword)
|
||||
if (instr->sdwa().dst_sel.size() != 4)
|
||||
return false;
|
||||
static_cast<SDWA_instruction*>(instr.get())->dst_sel = sel;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -629,20 +629,14 @@ print_instr_format_specific(const Instruction* instr, FILE* output)
|
|||
fprintf(output, " clamp");
|
||||
if (instr->isVOPC())
|
||||
return;
|
||||
switch (sdwa.dst_sel & sdwa_asuint) {
|
||||
case sdwa_udword: break;
|
||||
case sdwa_ubyte0:
|
||||
case sdwa_ubyte1:
|
||||
case sdwa_ubyte2:
|
||||
case sdwa_ubyte3:
|
||||
fprintf(output, " dst_sel:%sbyte%u", sdwa.dst_sel & sdwa_sext ? "s" : "u",
|
||||
sdwa.dst_sel & sdwa_bytenum);
|
||||
break;
|
||||
case sdwa_uword0:
|
||||
case sdwa_uword1:
|
||||
fprintf(output, " dst_sel:%sword%u", sdwa.dst_sel & sdwa_sext ? "s" : "u",
|
||||
sdwa.dst_sel & sdwa_wordnum);
|
||||
break;
|
||||
if (instr->definitions[0].bytes() == 4) {
|
||||
char sext = sdwa.dst_sel.sign_extend() ? 's' : 'u';
|
||||
switch (sdwa.dst_sel.size()) {
|
||||
case 1: fprintf(output, " dst_sel:%cbyte%u", sext, sdwa.dst_sel.offset()); break;
|
||||
case 2: fprintf(output, " dst_sel:%cword%u", sext, sdwa.dst_sel.offset() >> 1); break;
|
||||
case 4: break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
if (sdwa.dst_preserve)
|
||||
fprintf(output, " dst_preserve");
|
||||
|
|
@ -665,12 +659,12 @@ aco_print_instr(const Instruction* instr, FILE* output, unsigned flags)
|
|||
bool* const abs = (bool*)alloca(instr->operands.size() * sizeof(bool));
|
||||
bool* const neg = (bool*)alloca(instr->operands.size() * sizeof(bool));
|
||||
bool* const opsel = (bool*)alloca(instr->operands.size() * sizeof(bool));
|
||||
uint8_t* const sel = (uint8_t*)alloca(instr->operands.size() * sizeof(uint8_t));
|
||||
SubdwordSel* const sel = (SubdwordSel*)alloca(instr->operands.size() * sizeof(SubdwordSel));
|
||||
for (unsigned i = 0; i < instr->operands.size(); ++i) {
|
||||
abs[i] = false;
|
||||
neg[i] = false;
|
||||
opsel[i] = false;
|
||||
sel[i] = sdwa_udword;
|
||||
sel[i] = SubdwordSel::dword;
|
||||
}
|
||||
if (instr->isVOP3()) {
|
||||
const VOP3_instruction& vop3 = instr->vop3();
|
||||
|
|
@ -678,7 +672,6 @@ aco_print_instr(const Instruction* instr, FILE* output, unsigned flags)
|
|||
abs[i] = vop3.abs[i];
|
||||
neg[i] = vop3.neg[i];
|
||||
opsel[i] = vop3.opsel & (1 << i);
|
||||
sel[i] = sdwa_udword;
|
||||
}
|
||||
} else if (instr->isDPP()) {
|
||||
const DPP_instruction& dpp = instr->dpp();
|
||||
|
|
@ -686,7 +679,6 @@ aco_print_instr(const Instruction* instr, FILE* output, unsigned flags)
|
|||
abs[i] = dpp.abs[i];
|
||||
neg[i] = dpp.neg[i];
|
||||
opsel[i] = false;
|
||||
sel[i] = sdwa_udword;
|
||||
}
|
||||
} else if (instr->isSDWA()) {
|
||||
const SDWA_instruction& sdwa = instr->sdwa();
|
||||
|
|
@ -709,21 +701,15 @@ aco_print_instr(const Instruction* instr, FILE* output, unsigned flags)
|
|||
fprintf(output, "|");
|
||||
if (opsel[i])
|
||||
fprintf(output, "hi(");
|
||||
else if (sel[i] & sdwa_sext)
|
||||
else if (sel[i].sign_extend())
|
||||
fprintf(output, "sext(");
|
||||
aco_print_operand(&instr->operands[i], output, flags);
|
||||
if (opsel[i] || (sel[i] & sdwa_sext))
|
||||
if (opsel[i] || (sel[i].sign_extend()))
|
||||
fprintf(output, ")");
|
||||
if (!(sel[i] & sdwa_isra)) {
|
||||
if (sel[i] == sdwa_udword || sel[i] == sdwa_sdword) {
|
||||
/* print nothing */
|
||||
} else if (sel[i] & sdwa_isword) {
|
||||
unsigned index = sel[i] & sdwa_wordnum;
|
||||
fprintf(output, "[%u:%u]", index * 16, index * 16 + 15);
|
||||
} else {
|
||||
unsigned index = sel[i] & sdwa_bytenum;
|
||||
fprintf(output, "[%u:%u]", index * 8, index * 8 + 7);
|
||||
}
|
||||
if (instr->isSDWA() && i < 2 && sel[i].size() < 4 && instr->operands[i].bytes() == 4) {
|
||||
unsigned begin = sel[i].offset() * 8;
|
||||
unsigned end = begin + sel[i].size() * 8 - 1;
|
||||
fprintf(output, "[%u:%u]", begin, end);
|
||||
}
|
||||
if (abs[i])
|
||||
fprintf(output, "|");
|
||||
|
|
|
|||
|
|
@ -163,16 +163,30 @@ validate_ir(Program* program)
|
|||
check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) ||
|
||||
program->chip_class >= GFX9,
|
||||
"SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get());
|
||||
} else {
|
||||
const Definition& def = instr->definitions[0];
|
||||
check(def.bytes() <= 4, "SDWA definitions must not be larger than 4 bytes",
|
||||
instr.get());
|
||||
check(def.bytes() >= sdwa.dst_sel.size() + sdwa.dst_sel.offset(),
|
||||
"SDWA definition selection size must be at most definition size", instr.get());
|
||||
check(
|
||||
sdwa.dst_sel.size() == 1 || sdwa.dst_sel.size() == 2 || sdwa.dst_sel.size() == 4,
|
||||
"SDWA definition selection size must be 1, 2 or 4 bytes", instr.get());
|
||||
check(sdwa.dst_sel.offset() % sdwa.dst_sel.size() == 0, "Invalid selection offset",
|
||||
instr.get());
|
||||
check(def.bytes() == 4 || sdwa.dst_preserve,
|
||||
"SDWA subdword definition needs dst_preserve", instr.get());
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < std::min<unsigned>(2, instr->operands.size()); i++) {
|
||||
const Operand& op = instr->operands[i];
|
||||
check(op.bytes() <= 4, "SDWA operands must not be larger than 4 bytes", instr.get());
|
||||
if (sdwa.sel[i] & sdwa_isra)
|
||||
check(op.bytes() >= (sdwa.sel[i] & sdwa_rasize),
|
||||
"SDWA selection size must be at most operand size", instr.get());
|
||||
else
|
||||
check(op.bytes() == 4, "SDWA selection needs dword operand", instr.get());
|
||||
check(op.bytes() >= sdwa.sel[i].size() + sdwa.sel[i].offset(),
|
||||
"SDWA operand selection size must be at most operand size", instr.get());
|
||||
check(sdwa.sel[i].size() == 1 || sdwa.sel[i].size() == 2 || sdwa.sel[i].size() == 4,
|
||||
"SDWA operand selection size must be 1, 2 or 4 bytes", instr.get());
|
||||
check(sdwa.sel[i].offset() % sdwa.sel[i].size() == 0, "Invalid selection offset",
|
||||
instr.get());
|
||||
}
|
||||
if (instr->operands.size() >= 3) {
|
||||
check(instr->operands[2].isFixed() && instr->operands[2].physReg() == vcc,
|
||||
|
|
@ -201,10 +215,6 @@ validate_ir(Program* program)
|
|||
(instr->opcode == aco_opcode::v_mac_f32 && instr->opcode == aco_opcode::v_mac_f16);
|
||||
|
||||
check(sdwa_opcodes || feature_mac, "SDWA can't be used with this opcode", instr.get());
|
||||
|
||||
if (instr->definitions[0].regClass().is_subdword())
|
||||
check((sdwa.dst_sel & sdwa_asuint) == (sdwa_isra | instr->definitions[0].bytes()),
|
||||
"Unexpected SDWA sel for sub-dword definition", instr.get());
|
||||
}
|
||||
|
||||
/* check opsel */
|
||||
|
|
@ -689,10 +699,9 @@ validate_subdword_operand(chip_class chip, const aco_ptr<Instruction>& instr, un
|
|||
return byte == 0;
|
||||
if (instr->isPseudo() && chip >= GFX8)
|
||||
return true;
|
||||
if (instr->isSDWA()) {
|
||||
unsigned size = instr->sdwa().sel[index] & sdwa_rasize;
|
||||
return byte % size == 0;
|
||||
}
|
||||
if (instr->isSDWA())
|
||||
return byte + instr->sdwa().sel[index].offset() + instr->sdwa().sel[index].size() <= 4 &&
|
||||
byte % instr->sdwa().sel[index].size() == 0;
|
||||
if (byte == 2 && can_use_opsel(chip, instr->opcode, index, 1))
|
||||
return true;
|
||||
|
||||
|
|
@ -742,8 +751,9 @@ validate_subdword_definition(chip_class chip, const aco_ptr<Instruction>& instr)
|
|||
|
||||
if (instr->isPseudo() && chip >= GFX8)
|
||||
return true;
|
||||
if (instr->isSDWA() && instr->sdwa().dst_sel == (sdwa_isra | def.bytes()))
|
||||
return true;
|
||||
if (instr->isSDWA())
|
||||
return byte + instr->sdwa().dst_sel.offset() + instr->sdwa().dst_sel.size() <= 4 &&
|
||||
byte % instr->sdwa().dst_sel.size() == 0;
|
||||
if (byte == 2 && can_use_opsel(chip, instr->opcode, -1, 1))
|
||||
return true;
|
||||
|
||||
|
|
@ -774,9 +784,8 @@ get_subdword_bytes_written(Program* program, const aco_ptr<Instruction>& instr,
|
|||
return chip >= GFX8 ? def.bytes() : def.size() * 4u;
|
||||
if (instr->isVALU()) {
|
||||
assert(def.bytes() <= 2);
|
||||
|
||||
if (instr->isSDWA() && instr->sdwa().dst_sel == (sdwa_isra | def.bytes()))
|
||||
return def.bytes();
|
||||
if (instr->isSDWA())
|
||||
return instr->sdwa().dst_sel.size();
|
||||
|
||||
if (instr_is_16bit(chip, instr->opcode))
|
||||
return 2;
|
||||
|
|
|
|||
|
|
@ -39,11 +39,11 @@ BEGIN_TEST(validate.sdwa.allow)
|
|||
|
||||
sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa();
|
||||
sdwa->dst_preserve = true;
|
||||
sdwa->dst_sel = sdwa_ubyte0;
|
||||
sdwa->dst_sel = SubdwordSel::ubyte0;
|
||||
|
||||
sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa();
|
||||
sdwa->sel[0] = sdwa_sbyte2;
|
||||
sdwa->sel[1] = sdwa_uword1;
|
||||
sdwa->sel[0] = SubdwordSel::sbyte2;
|
||||
sdwa->sel[1] = SubdwordSel::uword1;
|
||||
|
||||
finish_validator_test();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -554,15 +554,15 @@ BEGIN_TEST(to_hw_instr.extract)
|
|||
//>> p_unit_test 4
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
|
||||
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8
|
||||
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(0:7)
|
||||
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(0:7) dst_preserve
|
||||
EXT(0, 0)
|
||||
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(16:23)
|
||||
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(16:23) dst_preserve
|
||||
if (i != GFX7)
|
||||
EXT(0, 2)
|
||||
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8
|
||||
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(8:15)
|
||||
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(8:15) dst_preserve
|
||||
EXT(1, 0)
|
||||
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(24:31)
|
||||
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(24:31) dst_preserve
|
||||
if (i != GFX7)
|
||||
EXT(1, 2)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue