aco: Remove use of deprecated Operand constructors

This migration was done with libclang-based automatic tooling, which
performed these replacements:
* Operand(uint8_t) -> Operand::c8
* Operand(uint16_t) -> Operand::c16
* Operand(uint32_t, false) -> Operand::c32
* Operand(uint32_t, bool) -> Operand::c32_or_c64
* Operand(uint64_t) -> Operand::c64
* Operand(0) -> Operand::zero(num_bytes)

Casts that were previously used for constructor selection have automatically
been removed (e.g. Operand((uint16_t)1) -> Operand::c16(1)).

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11653>
This commit is contained in:
Tony Wasserka 2021-07-13 11:22:46 +02:00 committed by Marge Bot
parent 76554419b3
commit 66e51dc474
19 changed files with 1252 additions and 1119 deletions

View file

@ -98,7 +98,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
ctx.constaddrs[instr->operands[1].constantValue()].add_literal = out.size() + 1;
instr->opcode = aco_opcode::s_add_u32;
instr->operands[1] = Operand(0u);
instr->operands[1] = Operand::zero();
instr->operands[1].setFixed(PhysReg(255));
}
@ -904,20 +904,20 @@ emit_long_jump(asm_context& ctx, SOPP_instruction* branch, bool backwards,
instr.reset(bld.sop1(aco_opcode::s_getpc_b64, branch->definitions[0]).instr);
emit_instruction(ctx, out, instr.get());
instr.reset(bld.sop2(aco_opcode::s_addc_u32, def_tmp_lo, op_tmp_lo, Operand(0u)).instr);
instr.reset(bld.sop2(aco_opcode::s_addc_u32, def_tmp_lo, op_tmp_lo, Operand::zero()).instr);
instr->operands[1].setFixed(PhysReg{255}); /* this operand has to be a literal */
emit_instruction(ctx, out, instr.get());
branch->pass_flags = out.size();
instr.reset(
bld.sop2(aco_opcode::s_addc_u32, def_tmp_hi, op_tmp_hi, Operand(backwards ? UINT32_MAX : 0u))
.instr);
instr.reset(bld.sop2(aco_opcode::s_addc_u32, def_tmp_hi, op_tmp_hi,
Operand::c32(backwards ? UINT32_MAX : 0u))
.instr);
emit_instruction(ctx, out, instr.get());
/* restore SCC and clear the LSB of the new PC */
instr.reset(bld.sopc(aco_opcode::s_bitcmp1_b32, def_tmp_lo, op_tmp_lo, Operand(0u)).instr);
instr.reset(bld.sopc(aco_opcode::s_bitcmp1_b32, def_tmp_lo, op_tmp_lo, Operand::zero()).instr);
emit_instruction(ctx, out, instr.get());
instr.reset(bld.sop1(aco_opcode::s_bitset0_b32, def_tmp_lo, Operand(0u)).instr);
instr.reset(bld.sop1(aco_opcode::s_bitset0_b32, def_tmp_lo, Operand::zero()).instr);
emit_instruction(ctx, out, instr.get());
/* create the s_setpc_b64 to jump */

View file

@ -710,7 +710,7 @@ handle_instruction_gfx10(Program* program, Block* cur_block, NOP_ctx_gfx10& ctx,
aco_ptr<SOP1_instruction> s_mov{
create_instruction<SOP1_instruction>(aco_opcode::s_mov_b32, Format::SOP1, 1, 1)};
s_mov->definitions[0] = Definition(sgpr_null, s1);
s_mov->operands[0] = Operand(0u);
s_mov->operands[0] = Operand::zero();
new_instructions.emplace_back(std::move(s_mov));
}
} else if (instr->isSALU()) {

View file

@ -418,7 +418,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
/* exec seems to need to be manually initialized with combined shaders */
if (ctx.program->stage.num_sw_stages() > 1 || ctx.program->stage.hw == HWStage::NGG) {
start_exec = Operand(-1u, bld.lm == s2);
start_exec = Operand::c32_or_c64(-1u, bld.lm == s2);
bld.copy(Definition(exec, bld.lm), start_exec);
}
@ -754,7 +754,7 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
if (state == Exact) {
instr.reset(create_instruction<SOP1_instruction>(bld.w64or32(Builder::s_mov),
Format::SOP1, 1, 1));
instr->operands[0] = Operand(0u);
instr->operands[0] = Operand::zero();
instr->definitions[0] = dst;
} else {
std::pair<Operand, uint8_t>& exact_mask = ctx.info[block->index].exec[0];
@ -780,7 +780,7 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
exit_cond = bld.tmp(s1);
cond =
bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.scc(Definition(exit_cond)),
Definition(exec, bld.lm), Operand(0u), Operand(exec, bld.lm));
Definition(exec, bld.lm), Operand::zero(), Operand(exec, bld.lm));
num = ctx.info[block->index].exec.size() - 2;
if (!(ctx.info[block->index].exec.back().second & mask_type_exact)) {
@ -924,7 +924,7 @@ add_branch_code(exec_ctx& ctx, Block* block)
}
Temp cond = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.def(s1, scc),
Definition(exec, bld.lm), Operand(0u), Operand(exec, bld.lm));
Definition(exec, bld.lm), Operand::zero(), Operand(exec, bld.lm));
for (int i = num - 1; i >= 0; i--) {
Instruction* andn2 = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc),
@ -1047,7 +1047,7 @@ add_branch_code(exec_ctx& ctx, Block* block)
unsigned succ_idx = ctx.program->blocks[block->linear_succs[1]].linear_succs[0];
Block& succ = ctx.program->blocks[succ_idx];
if (!(succ.kind & block_kind_invert || succ.kind & block_kind_merge)) {
bld.copy(Definition(exec, bld.lm), Operand(0u, bld.lm == s2));
bld.copy(Definition(exec, bld.lm), Operand::zero(bld.lm.bytes()));
}
bld.branch(aco_opcode::p_cbranch_nz, bld.hint_vcc(bld.def(s2)), bld.scc(cond),
@ -1076,7 +1076,7 @@ add_branch_code(exec_ctx& ctx, Block* block)
unsigned succ_idx = ctx.program->blocks[block->linear_succs[1]].linear_succs[0];
Block& succ = ctx.program->blocks[succ_idx];
if (!(succ.kind & block_kind_invert || succ.kind & block_kind_merge)) {
bld.copy(Definition(exec, bld.lm), Operand(0u, bld.lm == s2));
bld.copy(Definition(exec, bld.lm), Operand::zero(bld.lm.bytes()));
}
bld.branch(aco_opcode::p_cbranch_nz, bld.hint_vcc(bld.def(s2)), bld.scc(cond),

File diff suppressed because it is too large Load diff

View file

@ -665,19 +665,19 @@ public:
{
if (val == 0x3e22f983 && bytes == 4 && chip >= GFX8) {
/* 1/2 PI can be an inline constant on GFX8+ */
Operand op((uint32_t)val);
Operand op = Operand::c32(val);
op.setFixed(PhysReg{248});
return op;
}
if (bytes == 8)
return Operand(val);
return Operand::c64(val);
else if (bytes == 4)
return Operand((uint32_t)val);
return Operand::c32(val);
else if (bytes == 2)
return Operand((uint16_t)val);
return Operand::c16(val);
assert(bytes == 1);
return Operand((uint8_t)val);
return Operand::c8(val);
}
static bool is_constant_representable(uint64_t val, unsigned bytes, bool zext = false,

View file

@ -141,7 +141,7 @@ build_merge_code(Program* program, Block* block, Definition dst, Operand prev, O
if (!cur_is_constant)
bld.sop2(Builder::s_orn2, dst, bld.def(s1, scc), cur, Operand(exec, bld.lm));
else if (cur.constantValue())
bld.copy(dst, Operand(UINT32_MAX, bld.lm == s2));
bld.copy(dst, Operand::c32_or_c64(UINT32_MAX, bld.lm == s2));
else
bld.sop1(Builder::s_not, dst, bld.def(s1, scc), Operand(exec, bld.lm));
} else {
@ -150,7 +150,7 @@ build_merge_code(Program* program, Block* block, Definition dst, Operand prev, O
else if (cur.constantValue())
bld.copy(dst, Operand(exec, bld.lm));
else
bld.copy(dst, Operand(0u, bld.lm == s2));
bld.copy(dst, Operand::zero(bld.lm.bytes()));
}
}
@ -294,9 +294,9 @@ lower_subdword_phis(Program* program, Block* block, aco_ptr<Instruction>& phi)
Temp tmp = bld.tmp(RegClass(RegType::vgpr, phi_src.size()));
insert_before_logical_end(pred, bld.copy(Definition(tmp), phi_src).get_ptr());
Temp new_phi_src = bld.tmp(phi->definitions[0].regClass());
insert_before_logical_end(
pred, bld.pseudo(aco_opcode::p_extract_vector, Definition(new_phi_src), tmp, Operand(0u))
.get_ptr());
insert_before_logical_end(pred, bld.pseudo(aco_opcode::p_extract_vector,
Definition(new_phi_src), tmp, Operand::zero())
.get_ptr());
phi->operands[i].setTemp(new_phi_src);
}

View file

@ -480,13 +480,13 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
Builder bld(ctx->program, &ctx->instructions);
Operand identity[2];
identity[0] = Operand(get_reduction_identity(reduce_op, 0));
identity[1] = Operand(get_reduction_identity(reduce_op, 1));
identity[0] = Operand::c32(get_reduction_identity(reduce_op, 0));
identity[1] = Operand::c32(get_reduction_identity(reduce_op, 1));
Operand vcndmask_identity[2] = {identity[0], identity[1]};
/* First, copy the source to tmp and set inactive lanes to the identity */
bld.sop1(Builder::s_or_saveexec, Definition(stmp, bld.lm), Definition(scc, s1),
Definition(exec, bld.lm), Operand(UINT64_MAX), Operand(exec, bld.lm));
Definition(exec, bld.lm), Operand::c64(UINT64_MAX), Operand(exec, bld.lm));
for (unsigned i = 0; i < src.size(); i++) {
/* p_exclusive_scan needs it to be a sgpr or inline constant for the v_writelane_b32
@ -530,8 +530,8 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
else
opcode = aco_opcode::v_bfe_u32;
bld.vop3(opcode, Definition(PhysReg{tmp}, v1), Operand(PhysReg{tmp}, v1), Operand(0u),
Operand(8u));
bld.vop3(opcode, Definition(PhysReg{tmp}, v1), Operand(PhysReg{tmp}, v1), Operand::zero(),
Operand::c32(8u));
}
} else if (src.regClass() == v2b) {
if (ctx->program->chip_class >= GFX10 &&
@ -555,8 +555,8 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
else
opcode = aco_opcode::v_bfe_u32;
bld.vop3(opcode, Definition(PhysReg{tmp}, v1), Operand(PhysReg{tmp}, v1), Operand(0u),
Operand(16u));
bld.vop3(opcode, Definition(PhysReg{tmp}, v1), Operand(PhysReg{tmp}, v1), Operand::zero(),
Operand::c32(16u));
}
}
@ -590,7 +590,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
emit_op(ctx, tmp, vtmp, tmp, PhysReg{0}, reduce_op, src.size());
for (unsigned i = 0; i < src.size(); i++)
bld.readlane(Definition(PhysReg{dst.physReg() + i}, s1), Operand(PhysReg{tmp + i}, v1),
Operand(0u));
Operand::zero());
// TODO: it would be more effective to do the last reduction step on SALU
emit_op(ctx, tmp, dst.physReg(), tmp, vtmp, reduce_op, src.size());
reduction_needs_last_op = false;
@ -617,7 +617,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
/* GFX10+ doesn't support row_bcast15 and row_bcast31 */
for (unsigned i = 0; i < src.size(); i++)
bld.vop3(aco_opcode::v_permlanex16_b32, Definition(PhysReg{vtmp + i}, v1),
Operand(PhysReg{tmp + i}, v1), Operand(0u), Operand(0u));
Operand(PhysReg{tmp + i}, v1), Operand::zero(), Operand::zero());
if (cluster_size == 32) {
reduction_needs_last_op = true;
@ -627,7 +627,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size());
for (unsigned i = 0; i < src.size(); i++)
bld.readlane(Definition(PhysReg{dst.physReg() + i}, s1), Operand(PhysReg{tmp + i}, v1),
Operand(0u));
Operand::zero());
// TODO: it would be more effective to do the last reduction step on SALU
emit_op(ctx, tmp, dst.physReg(), tmp, vtmp, reduce_op, src.size());
break;
@ -650,24 +650,25 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
emit_dpp_mov(ctx, vtmp, tmp, src.size(), dpp_row_sr(1), 0xf, 0xf, true);
/* fill in the gaps in rows 1 and 3 */
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand(0x10000u));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand(0x10000u));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::c32(0x10000u));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand::c32(0x10000u));
for (unsigned i = 0; i < src.size(); i++) {
Instruction* perm =
bld.vop3(aco_opcode::v_permlanex16_b32, Definition(PhysReg{vtmp + i}, v1),
Operand(PhysReg{tmp + i}, v1), Operand(0xffffffffu), Operand(0xffffffffu))
Operand(PhysReg{tmp + i}, v1), Operand::c32(0xffffffffu),
Operand::c32(0xffffffffu))
.instr;
perm->vop3().opsel = 1; /* FI (Fetch Inactive) */
}
bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(UINT64_MAX));
bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand::c64(UINT64_MAX));
if (ctx->program->wave_size == 64) {
/* fill in the gap in row 2 */
for (unsigned i = 0; i < src.size(); i++) {
bld.readlane(Definition(PhysReg{sitmp + i}, s1), Operand(PhysReg{tmp + i}, v1),
Operand(31u));
Operand::c32(31u));
bld.writelane(Definition(PhysReg{vtmp + i}, v1), Operand(PhysReg{sitmp + i}, s1),
Operand(32u), Operand(PhysReg{vtmp + i}, v1));
Operand::c32(32u), Operand(PhysReg{vtmp + i}, v1));
}
}
std::swap(tmp, vtmp);
@ -679,39 +680,41 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
emit_ds_swizzle(bld, vtmp, tmp, src.size(), (1 << 15) | dpp_quad_perm(0, 0, 1, 2));
emit_ds_swizzle(bld, tmp, tmp, src.size(),
ds_pattern_bitmode(0x1F, 0x00, 0x07)); /* mirror(8) */
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand(0x10101010u));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::c32(0x10101010u));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand(exec_lo, s1));
for (unsigned i = 0; i < src.size(); i++)
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{vtmp + i}, v1),
Operand(PhysReg{tmp + i}, v1));
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(UINT64_MAX));
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(UINT64_MAX));
emit_ds_swizzle(bld, tmp, tmp, src.size(),
ds_pattern_bitmode(0x1F, 0x00, 0x08)); /* swap(8) */
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand(0x01000100u));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::c32(0x01000100u));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand(exec_lo, s1));
for (unsigned i = 0; i < src.size(); i++)
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{vtmp + i}, v1),
Operand(PhysReg{tmp + i}, v1));
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(UINT64_MAX));
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(UINT64_MAX));
emit_ds_swizzle(bld, tmp, tmp, src.size(),
ds_pattern_bitmode(0x1F, 0x00, 0x10)); /* swap(16) */
bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_lo, s1), Operand(1u), Operand(16u));
bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_hi, s1), Operand(1u), Operand(16u));
bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_lo, s1), Operand::c32(1u),
Operand::c32(16u));
bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_hi, s1), Operand::c32(1u),
Operand::c32(16u));
for (unsigned i = 0; i < src.size(); i++)
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{vtmp + i}, v1),
Operand(PhysReg{tmp + i}, v1));
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(UINT64_MAX));
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(UINT64_MAX));
for (unsigned i = 0; i < src.size(); i++) {
bld.writelane(Definition(PhysReg{vtmp + i}, v1), identity[i], Operand(0u),
bld.writelane(Definition(PhysReg{vtmp + i}, v1), identity[i], Operand::zero(),
Operand(PhysReg{vtmp + i}, v1));
bld.readlane(Definition(PhysReg{sitmp + i}, s1), Operand(PhysReg{tmp + i}, v1),
Operand(0u));
Operand::zero());
bld.writelane(Definition(PhysReg{vtmp + i}, v1), Operand(PhysReg{sitmp + i}, s1),
Operand(32u), Operand(PhysReg{vtmp + i}, v1));
identity[i] = Operand(0u); /* prevent further uses of identity */
Operand::c32(32u), Operand(PhysReg{vtmp + i}, v1));
identity[i] = Operand::zero(); /* prevent further uses of identity */
}
std::swap(tmp, vtmp);
}
@ -722,7 +725,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
if (ctx->program->chip_class < GFX10)
assert((identity[i].isConstant() && !identity[i].isLiteral()) ||
identity[i].physReg() == PhysReg{sitmp + i});
bld.writelane(Definition(PhysReg{tmp + i}, v1), identity[i], Operand(0u),
bld.writelane(Definition(PhysReg{tmp + i}, v1), identity[i], Operand::zero(),
Operand(PhysReg{tmp + i}, v1));
}
}
@ -731,38 +734,41 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
assert(cluster_size == ctx->program->wave_size);
if (ctx->program->chip_class <= GFX7) {
emit_ds_swizzle(bld, vtmp, tmp, src.size(), ds_pattern_bitmode(0x1e, 0x00, 0x00));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand(0xAAAAAAAAu));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::c32(0xAAAAAAAAu));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand(exec_lo, s1));
emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size());
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(UINT64_MAX));
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(UINT64_MAX));
emit_ds_swizzle(bld, vtmp, tmp, src.size(), ds_pattern_bitmode(0x1c, 0x01, 0x00));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand(0xCCCCCCCCu));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::c32(0xCCCCCCCCu));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand(exec_lo, s1));
emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size());
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(UINT64_MAX));
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(UINT64_MAX));
emit_ds_swizzle(bld, vtmp, tmp, src.size(), ds_pattern_bitmode(0x18, 0x03, 0x00));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand(0xF0F0F0F0u));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::c32(0xF0F0F0F0u));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand(exec_lo, s1));
emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size());
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(UINT64_MAX));
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(UINT64_MAX));
emit_ds_swizzle(bld, vtmp, tmp, src.size(), ds_pattern_bitmode(0x10, 0x07, 0x00));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand(0xFF00FF00u));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::c32(0xFF00FF00u));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand(exec_lo, s1));
emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size());
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(UINT64_MAX));
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(UINT64_MAX));
emit_ds_swizzle(bld, vtmp, tmp, src.size(), ds_pattern_bitmode(0x00, 0x0f, 0x00));
bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_lo, s1), Operand(16u), Operand(16u));
bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_hi, s1), Operand(16u), Operand(16u));
bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_lo, s1), Operand::c32(16u),
Operand::c32(16u));
bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_hi, s1), Operand::c32(16u),
Operand::c32(16u));
emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size());
for (unsigned i = 0; i < src.size(); i++)
bld.readlane(Definition(PhysReg{sitmp + i}, s1), Operand(PhysReg{tmp + i}, v1),
Operand(31u));
bld.sop2(aco_opcode::s_bfm_b64, Definition(exec, s2), Operand(32u), Operand(32u));
Operand::c32(31u));
bld.sop2(aco_opcode::s_bfm_b64, Definition(exec, s2), Operand::c32(32u),
Operand::c32(32u));
emit_op(ctx, tmp, sitmp, tmp, vtmp, reduce_op, src.size());
break;
}
@ -776,22 +782,26 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
emit_dpp_op(ctx, tmp, tmp, tmp, vtmp, reduce_op, src.size(), dpp_row_sr(8), 0xf, 0xf, false,
identity);
if (ctx->program->chip_class >= GFX10) {
bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_lo, s1), Operand(16u), Operand(16u));
bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_hi, s1), Operand(16u), Operand(16u));
bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_lo, s1), Operand::c32(16u),
Operand::c32(16u));
bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_hi, s1), Operand::c32(16u),
Operand::c32(16u));
for (unsigned i = 0; i < src.size(); i++) {
Instruction* perm =
bld.vop3(aco_opcode::v_permlanex16_b32, Definition(PhysReg{vtmp + i}, v1),
Operand(PhysReg{tmp + i}, v1), Operand(0xffffffffu), Operand(0xffffffffu))
Operand(PhysReg{tmp + i}, v1), Operand::c32(0xffffffffu),
Operand::c32(0xffffffffu))
.instr;
perm->vop3().opsel = 1; /* FI (Fetch Inactive) */
}
emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size());
if (ctx->program->wave_size == 64) {
bld.sop2(aco_opcode::s_bfm_b64, Definition(exec, s2), Operand(32u), Operand(32u));
bld.sop2(aco_opcode::s_bfm_b64, Definition(exec, s2), Operand::c32(32u),
Operand::c32(32u));
for (unsigned i = 0; i < src.size(); i++)
bld.readlane(Definition(PhysReg{sitmp + i}, s1), Operand(PhysReg{tmp + i}, v1),
Operand(31u));
Operand::c32(31u));
emit_op(ctx, tmp, sitmp, tmp, vtmp, reduce_op, src.size());
}
} else {
@ -821,7 +831,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
if (dst.regClass().type() == RegType::sgpr) {
for (unsigned k = 0; k < src.size(); k++) {
bld.readlane(Definition(PhysReg{dst.physReg() + k}, s1), Operand(PhysReg{tmp + k}, v1),
Operand(ctx->program->wave_size - 1));
Operand::c32(ctx->program->wave_size - 1));
}
} else if (dst.physReg() != tmp) {
for (unsigned k = 0; k < src.size(); k++) {
@ -875,14 +885,14 @@ emit_gfx10_wave64_bpermute(Program* program, aco_ptr<Instruction>& instr, Builde
/* Save EXEC */
bld.sop1(aco_opcode::s_mov_b64, tmp_exec, Operand(exec, s2));
/* Set EXEC to enable LO lanes only */
bld.sop2(aco_opcode::s_bfm_b64, Definition(exec, s2), Operand(32u), Operand(0u));
bld.sop2(aco_opcode::s_bfm_b64, Definition(exec, s2), Operand::c32(32u), Operand::zero());
/* LO: Copy data from low lanes 0-31 to shared vgpr */
bld.vop1(aco_opcode::v_mov_b32, Definition(shared_vgpr_lo, v1), input_data);
/* LO: bpermute shared vgpr (high lanes' data) */
bld.ds(aco_opcode::ds_bpermute_b32, Definition(shared_vgpr_hi, v1), index_x4,
Operand(shared_vgpr_hi, v1));
/* Set EXEC to enable HI lanes only */
bld.sop2(aco_opcode::s_bfm_b64, Definition(exec, s2), Operand(32u), Operand(32u));
bld.sop2(aco_opcode::s_bfm_b64, Definition(exec, s2), Operand::c32(32u), Operand::c32(32u));
/* HI: bpermute shared vgpr (low lanes' data) */
bld.ds(aco_opcode::ds_bpermute_b32, Definition(shared_vgpr_lo, v1), index_x4,
Operand(shared_vgpr_lo, v1));
@ -904,7 +914,8 @@ emit_gfx10_wave64_bpermute(Program* program, aco_ptr<Instruction>& instr, Builde
* it's not there already */
if (input_data.physReg().byte()) {
unsigned right_shift = input_data.physReg().byte() * 8;
bld.vop2(aco_opcode::v_lshrrev_b32, dst, Operand(right_shift), Operand(dst.physReg(), v1));
bld.vop2(aco_opcode::v_lshrrev_b32, dst, Operand::c32(right_shift),
Operand(dst.physReg(), v1));
}
}
@ -938,9 +949,10 @@ emit_gfx6_bpermute(Program* program, aco_ptr<Instruction>& instr, Builder& bld)
*/
for (unsigned n = 0; n < program->wave_size; ++n) {
/* Activate the lane which has N for its source index */
bld.vopc(aco_opcode::v_cmpx_eq_u32, Definition(exec, bld.lm), clobber_vcc, Operand(n), index);
bld.vopc(aco_opcode::v_cmpx_eq_u32, Definition(exec, bld.lm), clobber_vcc, Operand::c32(n),
index);
/* Read the data from lane N */
bld.readlane(Definition(vcc, s1), input, Operand(n));
bld.readlane(Definition(vcc, s1), input, Operand::c32(n));
/* On the active lane, move the data we read from lane N to the destination VGPR */
bld.vop1(aco_opcode::v_mov_b32, dst, Operand(vcc, s1));
/* Restore original EXEC */
@ -1026,15 +1038,15 @@ copy_constant(lower_context* ctx, Builder& bld, Definition dst, Operand op)
} else if (util_bitreverse(imm) <= 64 || util_bitreverse(imm) >= 0xFFFFFFF0) {
uint32_t rev = util_bitreverse(imm);
if (dst.regClass() == s1)
bld.sop1(aco_opcode::s_brev_b32, dst, Operand(rev));
bld.sop1(aco_opcode::s_brev_b32, dst, Operand::c32(rev));
else
bld.vop1(aco_opcode::v_bfrev_b32, dst, Operand(rev));
bld.vop1(aco_opcode::v_bfrev_b32, dst, Operand::c32(rev));
return;
} else if (dst.regClass() == s1 && imm != 0) {
unsigned start = (ffs(imm) - 1) & 0x1f;
unsigned size = util_bitcount(imm) & 0x1f;
if ((((1u << size) - 1u) << start) == imm) {
bld.sop2(aco_opcode::s_bfm_b32, dst, Operand(size), Operand(start));
bld.sop2(aco_opcode::s_bfm_b32, dst, Operand::c32(size), Operand::c32(start));
return;
}
}
@ -1051,10 +1063,10 @@ copy_constant(lower_context* ctx, Builder& bld, Definition dst, Operand op)
bld.sop1(aco_opcode::s_mov_b64, dst, op);
} else if (dst.regClass() == v2) {
if (Operand::is_constant_representable(op.constantValue64(), 8, true, false)) {
bld.vop3(aco_opcode::v_lshrrev_b64, dst, Operand(0u), op);
bld.vop3(aco_opcode::v_lshrrev_b64, dst, Operand::zero(), op);
} else {
assert(Operand::is_constant_representable(op.constantValue64(), 8, false, true));
bld.vop3(aco_opcode::v_ashrrev_i64, dst, Operand(0u), op);
bld.vop3(aco_opcode::v_ashrrev_i64, dst, Operand::zero(), op);
}
} else if (dst.regClass() == v1) {
bld.vop1(aco_opcode::v_mov_b32, dst, op);
@ -1063,13 +1075,13 @@ copy_constant(lower_context* ctx, Builder& bld, Definition dst, Operand op)
if (dst.regClass() == v1b && ctx->program->chip_class >= GFX9) {
uint8_t val = op.constantValue();
Operand op32((uint32_t)val | (val & 0x80u ? 0xffffff00u : 0u));
Operand op32 = Operand::c32((uint32_t)val | (val & 0x80u ? 0xffffff00u : 0u));
if (op32.isLiteral()) {
uint32_t a = (uint32_t)int8_mul_table[val * 2];
uint32_t b = (uint32_t)int8_mul_table[val * 2 + 1];
bld.vop2_sdwa(aco_opcode::v_mul_u32_u24, dst,
Operand(a | (a & 0x80u ? 0xffffff00u : 0x0u)),
Operand(b | (b & 0x80u ? 0xffffff00u : 0x0u)));
Operand::c32(a | (a & 0x80u ? 0xffffff00u : 0x0u)),
Operand::c32(b | (b & 0x80u ? 0xffffff00u : 0x0u)));
} else {
bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op32);
}
@ -1078,9 +1090,9 @@ copy_constant(lower_context* ctx, Builder& bld, Definition dst, Operand op)
/* use v_mov_b32 to avoid possible issues with denormal flushing or
* NaN. v_add_f16 is still needed for float constants. */
uint32_t val32 = (int32_t)(int16_t)op.constantValue();
bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, Operand(val32));
bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, Operand::c32(val32));
} else {
bld.vop2_sdwa(aco_opcode::v_add_f16, dst, op, Operand(0u));
bld.vop2_sdwa(aco_opcode::v_add_f16, dst, op, Operand::zero());
}
} else if (dst.regClass() == v2b && ctx->program->chip_class >= GFX10 &&
(ctx->block->fp_mode.denorm16_64 & fp_denorm_keep_in)) {
@ -1101,9 +1113,9 @@ copy_constant(lower_context* ctx, Builder& bld, Definition dst, Operand op)
dst = Definition(PhysReg(dst.physReg().reg()), v1);
Operand def_op(dst.physReg(), v1);
if (val != mask)
bld.vop2(aco_opcode::v_and_b32, dst, Operand(~mask), def_op);
bld.vop2(aco_opcode::v_and_b32, dst, Operand::c32(~mask), def_op);
if (val != 0)
bld.vop2(aco_opcode::v_or_b32, dst, Operand(val), def_op);
bld.vop2(aco_opcode::v_or_b32, dst, Operand::c32(val), def_op);
}
}
}
@ -1124,14 +1136,14 @@ do_copy(lower_context* ctx, Builder& bld, const copy_operation& copy, bool* pres
split_copy(ctx, offset, &def, &op, copy, false, 8);
if (def.physReg() == scc) {
bld.sopc(aco_opcode::s_cmp_lg_i32, def, op, Operand(0u));
bld.sopc(aco_opcode::s_cmp_lg_i32, def, op, Operand::zero());
*preserve_scc = true;
} else if (op.isConstant()) {
copy_constant(ctx, bld, def, op);
} else if (def.regClass() == v1) {
bld.vop1(aco_opcode::v_mov_b32, def, op);
} else if (def.regClass() == v2) {
bld.vop3(aco_opcode::v_lshrrev_b64, def, Operand(0u), op);
bld.vop3(aco_opcode::v_lshrrev_b64, def, Operand::zero(), op);
} else if (def.regClass() == s1) {
bld.sop1(aco_opcode::s_mov_b32, def, op);
} else if (def.regClass() == s2) {
@ -1139,7 +1151,7 @@ do_copy(lower_context* ctx, Builder& bld, const copy_operation& copy, bool* pres
} else if (def.regClass().is_subdword() && ctx->program->chip_class < GFX8) {
if (op.physReg().byte()) {
assert(def.physReg().byte() == 0);
bld.vop2(aco_opcode::v_lshrrev_b32, def, Operand(op.physReg().byte() * 8), op);
bld.vop2(aco_opcode::v_lshrrev_b32, def, Operand::c32(op.physReg().byte() * 8), op);
} else if (def.physReg().byte()) {
assert(op.physReg().byte() == 0);
/* preserve the target's lower half */
@ -1151,24 +1163,24 @@ do_copy(lower_context* ctx, Builder& bld, const copy_operation& copy, bool* pres
Definition(lo_reg, RegClass::get(RegType::vgpr, lo_half.bytes() + op.bytes()));
if (def.physReg().reg() == op.physReg().reg()) {
bld.vop2(aco_opcode::v_and_b32, lo_half, Operand((1 << bits) - 1u),
bld.vop2(aco_opcode::v_and_b32, lo_half, Operand::c32((1 << bits) - 1u),
Operand(lo_reg, lo_half.regClass()));
if (def.physReg().byte() == 1) {
bld.vop2(aco_opcode::v_mul_u32_u24, dst, Operand((1 << bits) + 1u), op);
bld.vop2(aco_opcode::v_mul_u32_u24, dst, Operand::c32((1 << bits) + 1u), op);
} else if (def.physReg().byte() == 2) {
bld.vop2(aco_opcode::v_cvt_pk_u16_u32, dst, Operand(lo_reg, v2b), op);
} else if (def.physReg().byte() == 3) {
bld.sop1(aco_opcode::s_mov_b32, Definition(scratch_sgpr, s1),
Operand((1 << bits) + 1u));
Operand::c32((1 << bits) + 1u));
bld.vop3(aco_opcode::v_mul_lo_u32, dst, Operand(scratch_sgpr, s1), op);
}
} else {
lo_half.setFixed(lo_half.physReg().advance(4 - def.physReg().byte()));
bld.vop2(aco_opcode::v_lshlrev_b32, lo_half, Operand(32 - bits),
bld.vop2(aco_opcode::v_lshlrev_b32, lo_half, Operand::c32(32 - bits),
Operand(lo_reg, lo_half.regClass()));
bld.vop3(aco_opcode::v_alignbyte_b32, dst, op,
Operand(lo_half.physReg(), lo_half.regClass()),
Operand(4 - def.physReg().byte()));
Operand::c32(4 - def.physReg().byte()));
}
} else {
bld.vop1(aco_opcode::v_mov_b32, def, op);
@ -1241,7 +1253,8 @@ do_swap(lower_context* ctx, Builder& bld, const copy_operation& copy, bool prese
PhysReg other = op.physReg() == scc ? def.physReg() : op.physReg();
bld.sop1(aco_opcode::s_mov_b32, Definition(pi->scratch_sgpr, s1), Operand(scc, s1));
bld.sopc(aco_opcode::s_cmp_lg_i32, Definition(scc, s1), Operand(other, s1), Operand(0u));
bld.sopc(aco_opcode::s_cmp_lg_i32, Definition(scc, s1), Operand(other, s1),
Operand::zero());
bld.sop1(aco_opcode::s_mov_b32, Definition(other, s1), Operand(pi->scratch_sgpr, s1));
} else if (def.regClass() == s1) {
if (preserve_scc) {
@ -1261,10 +1274,10 @@ do_swap(lower_context* ctx, Builder& bld, const copy_operation& copy, bool prese
bld.sop2(aco_opcode::s_xor_b64, op_as_def, Definition(scc, s1), op, def_as_op);
if (preserve_scc)
bld.sopc(aco_opcode::s_cmp_lg_i32, Definition(scc, s1), Operand(pi->scratch_sgpr, s1),
Operand(0u));
Operand::zero());
} else if (def.bytes() == 2 && def.physReg().reg() == op.physReg().reg()) {
bld.vop3(aco_opcode::v_alignbyte_b32, Definition(def.physReg(), v1), def_as_op, op,
Operand(2u));
Operand::c32(2u));
} else {
assert(def.regClass().is_subdword());
bld.vop2_sdwa(aco_opcode::v_xor_b32, op_as_def, op, def_as_op);
@ -1289,7 +1302,7 @@ void
do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Operand hi)
{
if (lo.isConstant() && hi.isConstant()) {
copy_constant(ctx, bld, def, Operand(lo.constantValue() | (hi.constantValue() << 16)));
copy_constant(ctx, bld, def, Operand::c32(lo.constantValue() | (hi.constantValue() << 16)));
return;
}
@ -1306,9 +1319,9 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera
/* a single alignbyte can be sufficient: hi can be a 32-bit integer constant */
if (lo.physReg().byte() == 2 && hi.physReg().byte() == 0 &&
(!hi.isConstant() || !Operand(hi.constantValue()).isLiteral() ||
(!hi.isConstant() || !Operand::c32(hi.constantValue()).isLiteral() ||
ctx->program->chip_class >= GFX10)) {
bld.vop3(aco_opcode::v_alignbyte_b32, def, hi, lo, Operand(2u));
bld.vop3(aco_opcode::v_alignbyte_b32, def, hi, lo, Operand::c32(2u));
return;
}
@ -1318,19 +1331,20 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera
if (lo.isConstant()) {
/* move hi and zero low bits */
if (hi.physReg().byte() == 0)
bld.vop2(aco_opcode::v_lshlrev_b32, def_hi, Operand(16u), hi);
bld.vop2(aco_opcode::v_lshlrev_b32, def_hi, Operand::c32(16u), hi);
else
bld.vop2(aco_opcode::v_and_b32, def_hi, Operand(~0xFFFFu), hi);
bld.vop2(aco_opcode::v_or_b32, def, Operand(lo.constantValue()), Operand(def.physReg(), v1));
bld.vop2(aco_opcode::v_and_b32, def_hi, Operand::c32(~0xFFFFu), hi);
bld.vop2(aco_opcode::v_or_b32, def, Operand::c32(lo.constantValue()),
Operand(def.physReg(), v1));
return;
}
if (hi.isConstant()) {
/* move lo and zero high bits */
if (lo.physReg().byte() == 2)
bld.vop2(aco_opcode::v_lshrrev_b32, def_lo, Operand(16u), lo);
bld.vop2(aco_opcode::v_lshrrev_b32, def_lo, Operand::c32(16u), lo);
else
bld.vop2(aco_opcode::v_and_b32, def_lo, Operand(0xFFFFu), lo);
bld.vop2(aco_opcode::v_or_b32, def, Operand(hi.constantValue() << 16u),
bld.vop2(aco_opcode::v_and_b32, def_lo, Operand::c32(0xFFFFu), lo);
bld.vop2(aco_opcode::v_or_b32, def, Operand::c32(hi.constantValue() << 16u),
Operand(def.physReg(), v1));
return;
}
@ -1338,12 +1352,12 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera
if (lo.physReg().reg() == def.physReg().reg()) {
/* lo is in the high bits of def */
assert(lo.physReg().byte() == 2);
bld.vop2(aco_opcode::v_lshrrev_b32, def_lo, Operand(16u), lo);
bld.vop2(aco_opcode::v_lshrrev_b32, def_lo, Operand::c32(16u), lo);
lo.setFixed(def.physReg());
} else if (hi.physReg() == def.physReg()) {
/* hi is in the low bits of def */
assert(hi.physReg().byte() == 0);
bld.vop2(aco_opcode::v_lshlrev_b32, def_hi, Operand(16u), hi);
bld.vop2(aco_opcode::v_lshlrev_b32, def_hi, Operand::c32(16u), hi);
hi.setFixed(def.physReg().advance(2));
} else if (ctx->program->chip_class >= GFX8) {
/* either lo or hi can be placed with just a v_mov */
@ -1368,21 +1382,21 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera
if (lo.physReg().byte() != hi.physReg().byte()) {
/* | xx lo | hi xx | => | lo hi | lo hi | */
assert(lo.physReg().byte() == 0 && hi.physReg().byte() == 2);
bld.vop3(aco_opcode::v_alignbyte_b32, def, lo, hi, Operand(2u));
bld.vop3(aco_opcode::v_alignbyte_b32, def, lo, hi, Operand::c32(2u));
lo = Operand(def_hi.physReg(), v2b);
hi = Operand(def_lo.physReg(), v2b);
} else if (lo.physReg().byte() == 0) {
/* | xx hi | xx lo | => | xx hi | lo 00 | */
bld.vop2(aco_opcode::v_lshlrev_b32, def_hi, Operand(16u), lo);
bld.vop2(aco_opcode::v_lshlrev_b32, def_hi, Operand::c32(16u), lo);
lo = Operand(def_hi.physReg(), v2b);
} else {
/* | hi xx | lo xx | => | 00 hi | lo xx | */
assert(hi.physReg().byte() == 2);
bld.vop2(aco_opcode::v_lshrrev_b32, def_lo, Operand(16u), hi);
bld.vop2(aco_opcode::v_lshrrev_b32, def_lo, Operand::c32(16u), hi);
hi = Operand(def_lo.physReg(), v2b);
}
/* perform the alignbyte */
bld.vop3(aco_opcode::v_alignbyte_b32, def, hi, lo, Operand(2u));
bld.vop3(aco_opcode::v_alignbyte_b32, def, hi, lo, Operand::c32(2u));
}
void
@ -1816,7 +1830,7 @@ emit_set_mode(Builder& bld, float_mode new_mode, bool set_round, bool set_denorm
} else if (set_round || set_denorm) {
/* "((size - 1) << 11) | register" (MODE is encoded as register 1) */
Instruction* instr =
bld.sopk(aco_opcode::s_setreg_imm32_b32, Operand(new_mode.val), (7 << 11) | 1).instr;
bld.sopk(aco_opcode::s_setreg_imm32_b32, Operand::c8(new_mode.val), (7 << 11) | 1).instr;
/* has to be a literal */
instr->operands[0].setFixed(PhysReg{255});
}
@ -1991,10 +2005,10 @@ lower_to_hw_instr(Program* program)
for (unsigned i = 0; i < instr->operands[2].size(); i++) {
Operand src =
instr->operands[2].isConstant()
? Operand(uint32_t(instr->operands[2].constantValue64() >> (32 * i)))
? Operand::c32(uint32_t(instr->operands[2].constantValue64() >> (32 * i)))
: Operand(PhysReg{instr->operands[2].physReg() + i}, s1);
bld.writelane(bld.def(v1, instr->operands[0].physReg()), src,
Operand(instr->operands[1].constantValue() + i),
Operand::c32(instr->operands[1].constantValue() + i),
instr->operands[0]);
}
break;
@ -2003,7 +2017,8 @@ lower_to_hw_instr(Program* program)
assert(instr->operands[0].regClass() == v1.as_linear());
for (unsigned i = 0; i < instr->definitions[0].size(); i++)
bld.readlane(bld.def(s1, PhysReg{instr->definitions[0].physReg() + i}),
instr->operands[0], Operand(instr->operands[1].constantValue() + i));
instr->operands[0],
Operand::c32(instr->operands[1].constantValue() + i));
break;
}
case aco_opcode::p_as_uniform: {
@ -2037,11 +2052,11 @@ lower_to_hw_instr(Program* program)
case aco_opcode::p_constaddr: {
unsigned id = instr->definitions[0].tempId();
PhysReg reg = instr->definitions[0].physReg();
bld.sop1(aco_opcode::p_constaddr_getpc, instr->definitions[0], Operand(id));
bld.sop1(aco_opcode::p_constaddr_getpc, instr->definitions[0], Operand::c32(id));
bld.sop2(aco_opcode::p_constaddr_addlo, Definition(reg, s1), bld.def(s1, scc),
Operand(reg, s1), Operand(id));
Operand(reg, s1), Operand::c32(id));
bld.sop2(aco_opcode::s_addc_u32, Definition(reg.advance(4), s1), bld.def(s1, scc),
Operand(reg.advance(4), s1), Operand(0u), Operand(scc, s1));
Operand(reg.advance(4), s1), Operand::zero(), Operand(scc, s1));
break;
}
case aco_opcode::p_extract: {
@ -2060,22 +2075,22 @@ lower_to_hw_instr(Program* program)
if (dst.regClass() == s1) {
if (offset == (32 - bits)) {
bld.sop2(signext ? aco_opcode::s_ashr_i32 : aco_opcode::s_lshr_b32, dst,
bld.def(s1, scc), op, Operand(offset));
bld.def(s1, scc), op, Operand::c32(offset));
} else if (offset == 0 && signext && (bits == 8 || bits == 16)) {
bld.sop1(bits == 8 ? aco_opcode::s_sext_i32_i8 : aco_opcode::s_sext_i32_i16,
dst, op);
} else {
bld.sop2(signext ? aco_opcode::s_bfe_i32 : aco_opcode::s_bfe_u32, dst,
bld.def(s1, scc), op, Operand((bits << 16) | offset));
bld.def(s1, scc), op, Operand::c32((bits << 16) | offset));
}
} else if (dst.regClass() == v1 || ctx.program->chip_class <= GFX7) {
assert(op.physReg().byte() == 0 && dst.physReg().byte() == 0);
if (offset == (32 - bits) && op.regClass() != s1) {
bld.vop2(signext ? aco_opcode::v_ashrrev_i32 : aco_opcode::v_lshrrev_b32, dst,
Operand(offset), op);
Operand::c32(offset), op);
} else {
bld.vop3(signext ? aco_opcode::v_bfe_i32 : aco_opcode::v_bfe_u32, dst, op,
Operand(offset), Operand(bits));
Operand::c32(offset), Operand::c32(bits));
}
} else if (dst.regClass() == v2b) {
aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(
@ -2105,21 +2120,22 @@ lower_to_hw_instr(Program* program)
if (dst.regClass() == s1) {
if (offset == (32 - bits)) {
bld.sop2(aco_opcode::s_lshl_b32, dst, bld.def(s1, scc), op, Operand(offset));
bld.sop2(aco_opcode::s_lshl_b32, dst, bld.def(s1, scc), op,
Operand::c32(offset));
} else if (offset == 0) {
bld.sop2(aco_opcode::s_bfe_u32, dst, bld.def(s1, scc), op,
Operand(bits << 16));
Operand::c32(bits << 16));
} else {
bld.sop2(aco_opcode::s_bfe_u32, dst, bld.def(s1, scc), op,
Operand(bits << 16));
Operand::c32(bits << 16));
bld.sop2(aco_opcode::s_lshl_b32, dst, bld.def(s1, scc),
Operand(dst.physReg(), s1), Operand(offset));
Operand(dst.physReg(), s1), Operand::c32(offset));
}
} else if (dst.regClass() == v1 || ctx.program->chip_class <= GFX7) {
if (offset == (dst.bytes() * 8u - bits)) {
bld.vop2(aco_opcode::v_lshlrev_b32, dst, Operand(offset), op);
bld.vop2(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), op);
} else if (offset == 0) {
bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand(0u), Operand(bits));
bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand::zero(), Operand::c32(bits));
} else if (program->chip_class >= GFX9 ||
(op.regClass() != s1 && program->chip_class >= GFX8)) {
aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(
@ -2131,8 +2147,8 @@ lower_to_hw_instr(Program* program)
sdwa->dst_sel = (bits == 8 ? sdwa_ubyte0 : sdwa_uword0) + (offset / bits);
bld.insert(std::move(sdwa));
} else {
bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand(0u), Operand(bits));
bld.vop2(aco_opcode::v_lshlrev_b32, dst, Operand(offset),
bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand::zero(), Operand::c32(bits));
bld.vop2(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset),
Operand(dst.physReg(), v1));
}
} else {

View file

@ -189,7 +189,7 @@ struct ssa_info {
void set_constant(chip_class chip, uint64_t constant)
{
Operand op16((uint16_t)constant);
Operand op16 = Operand::c16(constant);
Operand op32 = Operand::get_const(chip, constant, 4);
add_label(label_literal);
val = constant;
@ -204,7 +204,7 @@ struct ssa_info {
add_label(label_constant_64bit);
if (label & label_constant_64bit) {
val = Operand(constant).constantValue();
val = Operand::c64(constant).constantValue();
if (val != constant)
label &= ~(label_literal | label_constant_16bit | label_constant_32bit);
}
@ -831,7 +831,7 @@ Operand
get_constant_op(opt_ctx& ctx, ssa_info info, uint32_t bits)
{
if (bits == 64)
return Operand(info.val, true);
return Operand::c32_or_c64(info.val, true);
return Operand::get_const(ctx.program->chip_class, info.val, bits / 8u);
}
@ -1161,7 +1161,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
mubuf.offen = false;
continue;
} else if (i == 2 && info.is_constant_or_literal(32) && mubuf.offset + info.val < 4096) {
instr->operands[2] = Operand((uint32_t)0);
instr->operands[2] = Operand::c32(0);
mubuf.offset += info.val;
continue;
} else if (mubuf.offen && i == 1 &&
@ -1232,7 +1232,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
((ctx.program->chip_class == GFX6 && info.val <= 0x3FF) ||
(ctx.program->chip_class == GFX7 && info.val <= 0xFFFFFFFF) ||
(ctx.program->chip_class >= GFX8 && info.val <= 0xFFFFF))) {
instr->operands[i] = Operand(info.val);
instr->operands[i] = Operand::c32(info.val);
continue;
} else if (i == 1 &&
parse_base_offset(ctx, instr.get(), i, &base, &offset, prevent_overflow) &&
@ -1243,13 +1243,13 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
continue;
}
if (soe) {
smem.operands[1] = Operand(offset);
smem.operands[1] = Operand::c32(offset);
smem.operands.back() = Operand(base);
} else {
SMEM_instruction* new_instr = create_instruction<SMEM_instruction>(
smem.opcode, Format::SMEM, smem.operands.size() + 1, smem.definitions.size());
new_instr->operands[0] = smem.operands[0];
new_instr->operands[1] = Operand(offset);
new_instr->operands[1] = Operand::c32(offset);
if (smem.definitions.empty())
new_instr->operands[2] = smem.operands[2];
new_instr->operands.back() = Operand(base);
@ -2392,14 +2392,14 @@ combine_add_or_then_and_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr)
(extins->operands[1].constantValue() + 1) * extins->operands[2].constantValue() == 32) {
op = new_op_lshl;
operands[1] =
Operand(extins->operands[1].constantValue() * extins->operands[2].constantValue());
Operand::c32(extins->operands[1].constantValue() * extins->operands[2].constantValue());
} else if (is_or &&
(extins->opcode == aco_opcode::p_insert ||
(extins->opcode == aco_opcode::p_extract &&
extins->operands[3].constantEquals(0))) &&
extins->operands[1].constantEquals(0)) {
op = aco_opcode::v_and_or_b32;
operands[1] = Operand(extins->operands[2].constantEquals(8) ? 0xffu : 0xffffu);
operands[1] = Operand::c32(extins->operands[2].constantEquals(8) ? 0xffu : 0xffffu);
} else {
continue;
}
@ -2611,7 +2611,7 @@ combine_add_sub_b2i(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode new_op
ctx.uses.push_back(0);
}
new_instr->definitions[1].setHint(vcc);
new_instr->operands[0] = Operand(0u);
new_instr->operands[0] = Operand::zero();
new_instr->operands[1] = instr->operands[!i];
new_instr->operands[2] = Operand(ctx.info[instr->operands[i].tempId()].temp);
instr = std::move(new_instr);
@ -3045,7 +3045,7 @@ combine_and_subbrev(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (ctx.uses[instr->operands[i].tempId()])
ctx.uses[op_instr->operands[2].tempId()]++;
new_instr->operands[0] = Operand(0u);
new_instr->operands[0] = Operand::zero();
new_instr->operands[1] = instr->operands[!i];
new_instr->operands[2] = Operand(op_instr->operands[2]);
new_instr->definitions[0] = instr->definitions[0];
@ -3092,7 +3092,7 @@ combine_add_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr)
aco_ptr<VOP3_instruction> new_instr{
create_instruction<VOP3_instruction>(aco_opcode::v_mad_u32_u24, Format::VOP3, 3, 1)};
new_instr->operands[0] = op_instr->operands[!shift_op_idx];
new_instr->operands[1] = Operand(multiplier);
new_instr->operands[1] = Operand::c32(multiplier);
new_instr->operands[2] = instr->operands[!i];
new_instr->definitions[0] = instr->definitions[0];
instr = std::move(new_instr);
@ -3479,7 +3479,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
aco_ptr<VOP2_instruction> new_instr{
create_instruction<VOP2_instruction>(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1)};
new_instr->operands[0] = Operand(0u);
new_instr->operands[0] = Operand::zero();
new_instr->operands[1] = instr->operands[!i];
new_instr->operands[2] = Operand(ctx.info[instr->operands[i].tempId()].temp);
new_instr->definitions[0] = instr->definitions[0];
@ -3726,7 +3726,8 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
aco_ptr<Pseudo_instruction> extract{create_instruction<Pseudo_instruction>(
aco_opcode::p_extract_vector, Format::PSEUDO, 2, 1)};
extract->operands[0] = instr->operands[0];
extract->operands[1] = Operand((uint32_t)split_offset / instr->definitions[idx].bytes());
extract->operands[1] =
Operand::c32((uint32_t)split_offset / instr->definitions[idx].bytes());
extract->definitions[0] = instr->definitions[idx];
instr.reset(extract.release());
}
@ -3885,7 +3886,7 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (ctx.uses[op.tempId()] < literal_uses) {
is_literal_sgpr = op.getTemp().type() == RegType::sgpr;
mask = 0;
literal = Operand(ctx.info[op.tempId()].val);
literal = Operand::c32(ctx.info[op.tempId()].val);
literal_uses = ctx.uses[op.tempId()];
literal_id = op.tempId();
}
@ -3950,7 +3951,8 @@ apply_literals(opt_ctx& ctx, aco_ptr<Instruction>& instr)
new_mad->operands[0] = instr->operands[1 - info->literal_idx];
new_mad->operands[1] = instr->operands[2];
}
new_mad->operands[2] = Operand(ctx.info[instr->operands[info->literal_idx].tempId()].val);
new_mad->operands[2] =
Operand::c32(ctx.info[instr->operands[info->literal_idx].tempId()].val);
new_mad->definitions[0] = instr->definitions[0];
ctx.instructions.emplace_back(std::move(new_mad));
return;
@ -3963,7 +3965,7 @@ apply_literals(opt_ctx& ctx, aco_ptr<Instruction>& instr)
Operand op = instr->operands[i];
unsigned bits = get_operand_size(instr, i);
if (op.isTemp() && ctx.info[op.tempId()].is_literal(bits) && ctx.uses[op.tempId()] == 0) {
Operand literal(ctx.info[op.tempId()].val);
Operand literal = Operand::c32(ctx.info[op.tempId()].val);
if (instr->isVALU() && i > 0 && instr->format != Format::VOP3P)
to_VOP3(ctx, instr);
instr->operands[i] = literal;

View file

@ -243,7 +243,7 @@ try_optimize_scc_nocompare(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
ctx.uses[instr->operands[0].tempId()]++;
/* Set the opcode and operand to 32-bit */
instr->operands[1] = Operand(0u);
instr->operands[1] = Operand::zero();
instr->opcode =
(instr->opcode == aco_opcode::s_cmp_eq_u32 || instr->opcode == aco_opcode::s_cmp_eq_i32 ||
instr->opcode == aco_opcode::s_cmp_eq_u64)

View file

@ -325,7 +325,7 @@ do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t spill_id)
} else {
aco_ptr<Pseudo_instruction> reload{
create_instruction<Pseudo_instruction>(aco_opcode::p_reload, Format::PSEUDO, 1, 1)};
reload->operands[0] = Operand(spill_id);
reload->operands[0] = Operand::c32(spill_id);
reload->definitions[0] = Definition(new_name);
ctx.is_reloaded[spill_id] = true;
return reload;
@ -863,7 +863,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
aco_ptr<Pseudo_instruction> spill{
create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
spill->operands[0] = spill_op;
spill->operands[1] = Operand(spill_id);
spill->operands[1] = Operand::c32(spill_id);
Block& pred = ctx.program->blocks[pred_idx];
unsigned idx = pred.instructions.size();
do {
@ -920,7 +920,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
aco_ptr<Pseudo_instruction> spill{
create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
spill->operands[0] = Operand(var);
spill->operands[1] = Operand(pair.second);
spill->operands[1] = Operand::c32(pair.second);
Block& pred = ctx.program->blocks[pred_idx];
unsigned idx = pred.instructions.size();
do {
@ -1204,7 +1204,7 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block,
aco_ptr<Pseudo_instruction> spill{
create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
spill->operands[0] = Operand(to_spill);
spill->operands[1] = Operand(spill_id);
spill->operands[1] = Operand::c32(spill_id);
instructions.emplace_back(std::move(spill));
}
}
@ -1353,11 +1353,11 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset,
Temp private_segment_buffer = ctx.program->private_segment_buffer;
if (ctx.program->stage != compute_cs)
private_segment_buffer =
bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand(0u));
bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand::zero());
if (offset)
scratch_offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc),
scratch_offset, Operand(offset));
scratch_offset, Operand::c32(offset));
uint32_t rsrc_conf =
S_008F0C_ADD_TID_ENABLE(1) | S_008F0C_INDEX_STRIDE(ctx.program->wave_size == 64 ? 3 : 2);
@ -1374,8 +1374,8 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset,
if (ctx.program->chip_class <= GFX8)
rsrc_conf |= S_008F0C_ELEMENT_SIZE(1);
return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), private_segment_buffer, Operand(-1u),
Operand(rsrc_conf));
return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), private_segment_buffer,
Operand::c32(-1u), Operand::c32(rsrc_conf));
}
void
@ -1666,7 +1666,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
Pseudo_instruction* spill =
create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 3, 0);
spill->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]);
spill->operands[1] = Operand(spill_slot % ctx.wave_size);
spill->operands[1] = Operand::c32(spill_slot % ctx.wave_size);
spill->operands[2] = (*it)->operands[0];
instructions.emplace_back(aco_ptr<Instruction>(spill));
}
@ -1750,7 +1750,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
Pseudo_instruction* reload = create_instruction<Pseudo_instruction>(
aco_opcode::p_reload, Format::PSEUDO, 2, 1);
reload->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]);
reload->operands[1] = Operand(spill_slot % ctx.wave_size);
reload->operands[1] = Operand::c32(spill_slot % ctx.wave_size);
reload->definitions[0] = (*it)->definitions[0];
instructions.emplace_back(aco_ptr<Instruction>(reload));
}

View file

@ -238,34 +238,35 @@ void finish_assembler_test()
void writeout(unsigned i, Temp tmp)
{
if (tmp.id())
bld.pseudo(aco_opcode::p_unit_test, Operand(i), tmp);
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), tmp);
else
bld.pseudo(aco_opcode::p_unit_test, Operand(i));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i));
}
void writeout(unsigned i, aco::Builder::Result res)
{
bld.pseudo(aco_opcode::p_unit_test, Operand(i), res);
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), res);
}
void writeout(unsigned i, Operand op)
{
bld.pseudo(aco_opcode::p_unit_test, Operand(i), op);
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op);
}
void writeout(unsigned i, Operand op0, Operand op1)
{
bld.pseudo(aco_opcode::p_unit_test, Operand(i), op0, op1);
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op0, op1);
}
Temp fneg(Temp src)
{
return bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0xbf800000u), src);
return bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0xbf800000u), src);
}
Temp fabs(Temp src)
{
Builder::Result res = bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x3f800000u), src);
Builder::Result res =
bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x3f800000u), src);
res.instr->vop3().abs[1] = true;
return res;
}

View file

@ -220,9 +220,9 @@ BEGIN_TEST(assembler.long_jump.constaddr)
//>> s_getpc_b64 s[0:1] ; be801f00
//! s_add_u32 s0, s0, 0xe0 ; 8000ff00 000000e0
bld.sop1(aco_opcode::p_constaddr_getpc, Definition(PhysReg(0), s2), Operand(0u));
bld.sop1(aco_opcode::p_constaddr_getpc, Definition(PhysReg(0), s2), Operand::zero());
bld.sop2(aco_opcode::p_constaddr_addlo, Definition(PhysReg(0), s1), bld.def(s1, scc),
Operand(PhysReg(0), s1), Operand(0u));
Operand(PhysReg(0), s1), Operand::zero());
program->blocks[2].linear_preds.push_back(0u);
program->blocks[2].linear_preds.push_back(1u);
@ -238,9 +238,9 @@ BEGIN_TEST(assembler.v_add3)
//~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080
//~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080
aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
add3->operands[0] = Operand(0u);
add3->operands[1] = Operand(0u);
add3->operands[2] = Operand(0u);
add3->operands[0] = Operand::zero();
add3->operands[1] = Operand::zero();
add3->operands[2] = Operand::zero();
add3->definitions[0] = Definition(PhysReg(0), v1);
bld.insert(std::move(add3));
@ -256,9 +256,9 @@ BEGIN_TEST(assembler.v_add3_clamp)
//~gfx9>> integer addition + clamp ; d1ff8000 02010080
//~gfx10>> integer addition + clamp ; d76d8000 02010080
aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
add3->operands[0] = Operand(0u);
add3->operands[1] = Operand(0u);
add3->operands[2] = Operand(0u);
add3->operands[0] = Operand::zero();
add3->operands[1] = Operand::zero();
add3->operands[2] = Operand::zero();
add3->definitions[0] = Definition(PhysReg(0), v1);
add3->clamp = 1;
bld.insert(std::move(add3));

View file

@ -30,25 +30,27 @@ static void create_mubuf(Temp desc=Temp(0, s8), unsigned vtx_binding=0)
{
Operand desc_op(desc);
desc_op.setFixed(PhysReg(0));
bld.mubuf(aco_opcode::buffer_load_dword, Definition(PhysReg(256), v1),
desc_op, Operand(PhysReg(256), v1),
Operand(0u), 0, false).instr->mubuf().vtx_binding = vtx_binding;
bld.mubuf(aco_opcode::buffer_load_dword, Definition(PhysReg(256), v1), desc_op,
Operand(PhysReg(256), v1), Operand::zero(), 0, false)
.instr->mubuf()
.vtx_binding = vtx_binding;
}
static void create_mubuf_store()
{
bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4),
Operand(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(0u), 0, false);
bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4), Operand(PhysReg(256), v1),
Operand(PhysReg(256), v1), Operand::zero(), 0, false);
}
static void create_mtbuf(Temp desc=Temp(0, s8), unsigned vtx_binding=0)
{
Operand desc_op(desc);
desc_op.setFixed(PhysReg(0));
bld.mtbuf(aco_opcode::tbuffer_load_format_x, Definition(PhysReg(256), v1),
desc_op, Operand(PhysReg(256), v1), Operand(0u),
V_008F0C_BUF_DATA_FORMAT_32, V_008F0C_BUF_NUM_FORMAT_FLOAT, 0, false)
.instr->mtbuf().vtx_binding = vtx_binding;
bld.mtbuf(aco_opcode::tbuffer_load_format_x, Definition(PhysReg(256), v1), desc_op,
Operand(PhysReg(256), v1), Operand::zero(), V_008F0C_BUF_DATA_FORMAT_32,
V_008F0C_BUF_NUM_FORMAT_FLOAT, 0, false)
.instr->mtbuf()
.vtx_binding = vtx_binding;
}
static void create_flat()
@ -82,16 +84,15 @@ static void create_mimg(bool nsa, Temp desc=Temp(0, s8))
static void create_smem()
{
bld.smem(aco_opcode::s_load_dword, Definition(PhysReg(0), s1),
Operand(PhysReg(0), s2), Operand(0u));
bld.smem(aco_opcode::s_load_dword, Definition(PhysReg(0), s1), Operand(PhysReg(0), s2),
Operand::zero());
}
static void create_smem_buffer(Temp desc=Temp(0, s4))
{
Operand desc_op(desc);
desc_op.setFixed(PhysReg(0));
bld.smem(aco_opcode::s_buffer_load_dword, Definition(PhysReg(0), s1),
desc_op, Operand(0u));
bld.smem(aco_opcode::s_buffer_load_dword, Definition(PhysReg(0), s1), desc_op, Operand::zero());
}
BEGIN_TEST(form_hard_clauses.type_restrictions)
@ -102,7 +103,7 @@ BEGIN_TEST(form_hard_clauses.type_restrictions)
//! s_clause imm:1
//; search_re('image_sample')
//; search_re('image_sample')
bld.pseudo(aco_opcode::p_unit_test, Operand(0u));
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
create_mimg(false);
create_mimg(false);
@ -110,7 +111,7 @@ BEGIN_TEST(form_hard_clauses.type_restrictions)
//! s_clause imm:1
//; search_re('buffer_load_dword')
//; search_re('buffer_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(1u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
create_mubuf();
create_mubuf();
@ -118,7 +119,7 @@ BEGIN_TEST(form_hard_clauses.type_restrictions)
//! s_clause imm:1
//; search_re('global_load_dword')
//; search_re('global_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(2u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
create_global();
create_global();
@ -126,7 +127,7 @@ BEGIN_TEST(form_hard_clauses.type_restrictions)
//! s_clause imm:1
//; search_re('flat_load_dword')
//; search_re('flat_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(3u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
create_flat();
create_flat();
@ -134,28 +135,28 @@ BEGIN_TEST(form_hard_clauses.type_restrictions)
//! s_clause imm:1
//; search_re('s_load_dword')
//; search_re('s_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(4u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
create_smem();
create_smem();
//>> p_unit_test 5
//; search_re('buffer_load_dword')
//; search_re('flat_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(5u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
create_mubuf();
create_flat();
//>> p_unit_test 6
//; search_re('buffer_load_dword')
//; search_re('s_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(6u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
create_mubuf();
create_smem();
//>> p_unit_test 7
//; search_re('flat_load_dword')
//; search_re('s_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(7u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
create_flat();
create_smem();
@ -168,14 +169,14 @@ BEGIN_TEST(form_hard_clauses.size)
//>> p_unit_test 0
//; search_re('s_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(0u));
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
create_smem();
//>> p_unit_test 1
//! s_clause imm:63
//; for i in range(64):
//; search_re('s_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(1u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
for (unsigned i = 0; i < 64; i++)
create_smem();
@ -183,7 +184,7 @@ BEGIN_TEST(form_hard_clauses.size)
//! s_clause imm:63
//; for i in range(65):
//; search_re('s_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(2u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
for (unsigned i = 0; i < 65; i++)
create_smem();
@ -194,7 +195,7 @@ BEGIN_TEST(form_hard_clauses.size)
//! s_clause imm:1
//; search_re('s_load_dword')
//; search_re('s_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(3u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
for (unsigned i = 0; i < 66; i++)
create_smem();
@ -210,7 +211,7 @@ BEGIN_TEST(form_hard_clauses.nsa)
//! s_clause imm:1
//; search_re('image_sample .* %0:v\[0\], %0:v\[1\]')
//; search_re('image_sample .* %0:v\[0\], %0:v\[1\]')
bld.pseudo(aco_opcode::p_unit_test, Operand(0u));
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
create_mimg(false);
create_mimg(false);
@ -218,7 +219,7 @@ BEGIN_TEST(form_hard_clauses.nsa)
//~gfx10_3! s_clause imm:1
//; search_re('image_sample .* %0:v\[0\], %0:v\[1\]')
//; search_re('image_sample .* %0:v\[0\], %0:v\[2\]')
bld.pseudo(aco_opcode::p_unit_test, Operand(1u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
create_mimg(false);
create_mimg(true);
@ -226,7 +227,7 @@ BEGIN_TEST(form_hard_clauses.nsa)
//~gfx10_3! s_clause imm:1
//; search_re('image_sample .* %0:v\[0\], %0:v\[2\]')
//; search_re('image_sample .* %0:v\[0\], %0:v\[2\]')
bld.pseudo(aco_opcode::p_unit_test, Operand(2u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
create_mimg(true);
create_mimg(true);
@ -248,14 +249,14 @@ BEGIN_TEST(form_hard_clauses.heuristic)
//! s_clause imm:1
//; search_re('image_sample')
//; search_re('image_sample')
bld.pseudo(aco_opcode::p_unit_test, Operand(0u));
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
create_mimg(false, img_desc0);
create_mimg(false, img_desc0);
//>> p_unit_test 1
//; search_re('image_sample')
//; search_re('image_sample')
bld.pseudo(aco_opcode::p_unit_test, Operand(1u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
create_mimg(false, img_desc0);
create_mimg(false, img_desc1);
@ -263,14 +264,14 @@ BEGIN_TEST(form_hard_clauses.heuristic)
//! s_clause imm:1
//; search_re('buffer_load_dword')
//; search_re('buffer_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(2u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
create_mubuf(buf_desc0);
create_mubuf(buf_desc0);
//>> p_unit_test 3
//; search_re('buffer_load_dword')
//; search_re('buffer_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(3u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
create_mubuf(buf_desc0);
create_mubuf(buf_desc1);
@ -278,21 +279,21 @@ BEGIN_TEST(form_hard_clauses.heuristic)
//! s_clause imm:1
//; search_re('s_buffer_load_dword')
//; search_re('s_buffer_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(4u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
create_smem_buffer(buf_desc0);
create_smem_buffer(buf_desc0);
//>> p_unit_test 5
//; search_re('s_buffer_load_dword')
//; search_re('s_buffer_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(5u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
create_smem_buffer(buf_desc0);
create_smem_buffer(buf_desc1);
//>> p_unit_test 6
//; search_re('s_buffer_load_dword')
//; search_re('s_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(6u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
create_smem_buffer(buf_desc0);
create_smem();
@ -302,7 +303,7 @@ BEGIN_TEST(form_hard_clauses.heuristic)
//>> p_unit_test 7
//; search_re('buffer_load_dword')
//; search_re('tbuffer_load_format_x')
bld.pseudo(aco_opcode::p_unit_test, Operand(7u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
create_mubuf(buf_desc0);
create_mtbuf(buf_desc0);
@ -310,7 +311,7 @@ BEGIN_TEST(form_hard_clauses.heuristic)
//! s_clause imm:1
//; search_re('buffer_load_dword')
//; search_re('tbuffer_load_format_x')
bld.pseudo(aco_opcode::p_unit_test, Operand(8u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
create_mubuf(buf_desc0, 1);
create_mtbuf(buf_desc0, 1);
@ -318,7 +319,7 @@ BEGIN_TEST(form_hard_clauses.heuristic)
//! s_clause imm:1
//; search_re('buffer_load_dword')
//; search_re('tbuffer_load_format_x')
bld.pseudo(aco_opcode::p_unit_test, Operand(9u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
create_mubuf(buf_desc0, 1);
create_mtbuf(buf_desc1, 1);
@ -332,7 +333,7 @@ BEGIN_TEST(form_hard_clauses.stores)
//>> p_unit_test 0
//; search_re('buffer_store_dword')
//; search_re('buffer_store_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(0u));
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
create_mubuf_store();
create_mubuf_store();
@ -341,7 +342,7 @@ BEGIN_TEST(form_hard_clauses.stores)
//; search_re('buffer_load_dword')
//; search_re('buffer_load_dword')
//; search_re('buffer_store_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(1u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
create_mubuf();
create_mubuf();
create_mubuf_store();
@ -351,7 +352,7 @@ BEGIN_TEST(form_hard_clauses.stores)
//! s_clause imm:1
//; search_re('buffer_load_dword')
//; search_re('buffer_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(2u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
create_mubuf_store();
create_mubuf();
create_mubuf();
@ -361,7 +362,7 @@ BEGIN_TEST(form_hard_clauses.stores)
//; search_re('buffer_load_dword')
//; search_re('buffer_store_dword')
//; search_re('buffer_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(3u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
create_mubuf();
create_mubuf_store();
create_mubuf();
@ -373,7 +374,7 @@ BEGIN_TEST(form_hard_clauses.stores)
//; for i in range(63):
//; search_re('buffer_load_dword')
//; search_re('buffer_load_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(4u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
create_mubuf_store();
for (unsigned i = 0; i < 64; i++)
create_mubuf();
@ -383,7 +384,7 @@ BEGIN_TEST(form_hard_clauses.stores)
//; for i in range(64):
//; search_re('buffer_load_dword')
//; search_re('buffer_store_dword')
bld.pseudo(aco_opcode::p_unit_test, Operand(5u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
for (unsigned i = 0; i < 64; i++)
create_mubuf();
create_mubuf_store();

View file

@ -27,9 +27,8 @@ using namespace aco;
void create_mubuf(unsigned offset)
{
bld.mubuf(aco_opcode::buffer_load_dword, Definition(PhysReg(256), v1),
Operand(PhysReg(0), s4), Operand(PhysReg(256), v1),
Operand(0u), offset, true);
bld.mubuf(aco_opcode::buffer_load_dword, Definition(PhysReg(256), v1), Operand(PhysReg(0), s4),
Operand(PhysReg(256), v1), Operand::zero(), offset, true);
}
void create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords)
@ -58,7 +57,7 @@ BEGIN_TEST(insert_nops.nsa_to_vmem_bug)
//>> p_unit_test 0
//! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2], %0:v[4], %0:v[6], %0:v[8], %0:v[10] 2d storage: semantics: scope:invocation
//! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:8 offen storage: semantics: scope:invocation
bld.pseudo(aco_opcode::p_unit_test, Operand(0u));
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
create_mimg(true, 6, 4);
create_mubuf(8);
@ -67,7 +66,7 @@ BEGIN_TEST(insert_nops.nsa_to_vmem_bug)
//! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2], %0:v[4], %0:v[6], %0:v[8], %0:v[10] 2d storage: semantics: scope:invocation
//! s_nop
//! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen storage: semantics: scope:invocation
bld.pseudo(aco_opcode::p_unit_test, Operand(1u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
create_mimg(true, 6, 4);
create_mubuf(4);
@ -75,7 +74,7 @@ BEGIN_TEST(insert_nops.nsa_to_vmem_bug)
//! p_unit_test 2
//! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[1], %0:v[2], %0:v[3], %0:v[4], %0:v[5] 2d storage: semantics: scope:invocation
//! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen storage: semantics: scope:invocation
bld.pseudo(aco_opcode::p_unit_test, Operand(2u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
create_mimg(false, 6, 2);
create_mubuf(4);
@ -84,7 +83,7 @@ BEGIN_TEST(insert_nops.nsa_to_vmem_bug)
//! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2], %0:v[4], %0:v[6], %0:v[8], %0:v[10] 2d storage: semantics: scope:invocation
//! v_nop
//! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen storage: semantics: scope:invocation
bld.pseudo(aco_opcode::p_unit_test, Operand(3u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
create_mimg(true, 6, 4);
bld.vop1(aco_opcode::v_nop);
create_mubuf(4);
@ -93,7 +92,7 @@ BEGIN_TEST(insert_nops.nsa_to_vmem_bug)
//! p_unit_test 4
//! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2] 2d storage: semantics: scope:invocation
//! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen storage: semantics: scope:invocation
bld.pseudo(aco_opcode::p_unit_test, Operand(4u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
create_mimg(true, 2, 3);
create_mubuf(4);
@ -104,7 +103,7 @@ BEGIN_TEST(insert_nops.nsa_to_vmem_bug)
//! /* logical preds: / linear preds: BB0, / kind: uniform, */
//! s_nop
//! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen storage: semantics: scope:invocation
bld.pseudo(aco_opcode::p_unit_test, Operand(5u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
create_mimg(true, 6, 4);
bld.reset(program->create_and_insert_block());
create_mubuf(4);
@ -123,16 +122,18 @@ BEGIN_TEST(insert_nops.writelane_to_nsa_bug)
//! v1: %0:v[255] = v_writelane_b32_e64 0, 0, %0:v[255]
//! s_nop
//! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2] 2d storage: semantics: scope:invocation
bld.pseudo(aco_opcode::p_unit_test, Operand(0u));
bld.writelane(Definition(PhysReg(511), v1), Operand(0u), Operand(0u), Operand(PhysReg(511), v1));
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
bld.writelane(Definition(PhysReg(511), v1), Operand::zero(), Operand::zero(),
Operand(PhysReg(511), v1));
create_mimg(true, 2, 3);
/* no nop needed because the MIMG is not NSA */
//! p_unit_test 1
//! v1: %0:v[255] = v_writelane_b32_e64 0, 0, %0:v[255]
//! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[1] 2d storage: semantics: scope:invocation
bld.pseudo(aco_opcode::p_unit_test, Operand(1u));
bld.writelane(Definition(PhysReg(511), v1), Operand(0u), Operand(0u), Operand(PhysReg(511), v1));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
bld.writelane(Definition(PhysReg(511), v1), Operand::zero(), Operand::zero(),
Operand(PhysReg(511), v1));
create_mimg(false, 2, 2);
/* no nop needed because there's already an instruction in-between */
@ -140,8 +141,9 @@ BEGIN_TEST(insert_nops.writelane_to_nsa_bug)
//! v1: %0:v[255] = v_writelane_b32_e64 0, 0, %0:v[255]
//! v_nop
//! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2] 2d storage: semantics: scope:invocation
bld.pseudo(aco_opcode::p_unit_test, Operand(2u));
bld.writelane(Definition(PhysReg(511), v1), Operand(0u), Operand(0u), Operand(PhysReg(511), v1));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
bld.writelane(Definition(PhysReg(511), v1), Operand::zero(), Operand::zero(),
Operand(PhysReg(511), v1));
bld.vop1(aco_opcode::v_nop);
create_mimg(true, 2, 3);
@ -152,8 +154,9 @@ BEGIN_TEST(insert_nops.writelane_to_nsa_bug)
//! /* logical preds: / linear preds: BB0, / kind: uniform, */
//! s_nop
//! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2] 2d storage: semantics: scope:invocation
bld.pseudo(aco_opcode::p_unit_test, Operand(3u));
bld.writelane(Definition(PhysReg(511), v1), Operand(0u), Operand(0u), Operand(PhysReg(511), v1));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
bld.writelane(Definition(PhysReg(511), v1), Operand::zero(), Operand::zero(),
Operand(PhysReg(511), v1));
bld.reset(program->create_and_insert_block());
create_mimg(true, 2, 3);
program->blocks[0].linear_succs.push_back(1);

View file

@ -41,7 +41,7 @@ BEGIN_TEST(optimize.neg)
//~gfx10! v1: %res1 = v_mul_f32 0x123456, -%a
//! p_unit_test 1, %res1
Temp neg_a = fneg(inputs[0]);
writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x123456u), neg_a));
writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x123456u), neg_a));
//! v1: %res2 = v_mul_f32 %a, %b
//! p_unit_test 2, %res2
@ -97,56 +97,60 @@ BEGIN_TEST(optimize.output_modifiers)
//! v1: %res0 = v_add_f32 %a, %b *0.5
//! p_unit_test 0, %res0
Temp tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x3f000000u), tmp));
writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x3f000000u), tmp));
//! v1: %res1 = v_add_f32 %a, %b *2
//! p_unit_test 1, %res1
tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp));
writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp));
//! v1: %res2 = v_add_f32 %a, %b *4
//! p_unit_test 2, %res2
tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40800000u), tmp));
writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40800000u), tmp));
//! v1: %res3 = v_add_f32 %a, %b clamp
//! p_unit_test 3, %res3
tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
writeout(3, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand(0u), Operand(0x3f800000u), tmp));
writeout(3, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand::zero(),
Operand::c32(0x3f800000u), tmp));
//! v1: %res4 = v_add_f32 %a, %b *2 clamp
//! p_unit_test 4, %res4
tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
tmp = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp);
writeout(4, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand(0u), Operand(0x3f800000u), tmp));
tmp = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp);
writeout(4, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand::zero(),
Operand::c32(0x3f800000u), tmp));
/* 16-bit modifiers */
//! v2b: %res5 = v_add_f16 %a, %b *0.5
//! p_unit_test 5, %res5
tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
writeout(5, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand((uint16_t)0x3800u), tmp));
writeout(5, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x3800u), tmp));
//! v2b: %res6 = v_add_f16 %a, %b *2
//! p_unit_test 6, %res6
tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
writeout(6, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand((uint16_t)0x4000u), tmp));
writeout(6, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4000u), tmp));
//! v2b: %res7 = v_add_f16 %a, %b *4
//! p_unit_test 7, %res7
tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
writeout(7, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand((uint16_t)0x4400u), tmp));
writeout(7, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4400u), tmp));
//! v2b: %res8 = v_add_f16 %a, %b clamp
//! p_unit_test 8, %res8
tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
writeout(8, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand((uint16_t)0u), Operand((uint16_t)0x3c00u), tmp));
writeout(8, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand::c16(0u),
Operand::c16(0x3c00u), tmp));
//! v2b: %res9 = v_add_f16 %a, %b *2 clamp
//! p_unit_test 9, %res9
tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
tmp = bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand((uint16_t)0x4000), tmp);
writeout(9, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand((uint16_t)0u), Operand((uint16_t)0x3c00u), tmp));
tmp = bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4000), tmp);
writeout(9, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand::c16(0u),
Operand::c16(0x3c00u), tmp));
/* clamping is done after omod */
@ -154,8 +158,9 @@ BEGIN_TEST(optimize.output_modifiers)
//! v1: %res10 = v_mul_f32 2.0, %res10_tmp
//! p_unit_test 10, %res10
tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
tmp = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand(0u), Operand(0x3f800000u), tmp);
writeout(10, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp));
tmp = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand::zero(), Operand::c32(0x3f800000u),
tmp);
writeout(10, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp));
/* unsupported instructions */
@ -163,7 +168,7 @@ BEGIN_TEST(optimize.output_modifiers)
//! v1: %res11 = v_mul_f32 2.0, %res11_tmp
//! p_unit_test 11, %res11
tmp = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), inputs[0], inputs[1]);
writeout(11, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp));
writeout(11, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp));
/* several users */
@ -173,12 +178,12 @@ BEGIN_TEST(optimize.output_modifiers)
//! p_unit_test 12, %res12
tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
bld.pseudo(aco_opcode::p_unit_test, tmp);
writeout(12, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp));
writeout(12, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp));
//! v1: %res13 = v_add_f32 %a, %b
//! p_unit_test 13, %res13
tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp);
bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp);
writeout(13, tmp);
/* omod has no effect if denormals are enabled but clamp is fine */
@ -193,12 +198,13 @@ BEGIN_TEST(optimize.output_modifiers)
//! v1: %res14 = v_mul_f32 2.0, %res13_tmp
//! p_unit_test 14, %res14
tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
writeout(14, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp));
writeout(14, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp));
//! v1: %res15 = v_add_f32 %a, %b clamp
//! p_unit_test 15, %res15
tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
writeout(15, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand(0u), Operand(0x3f800000u), tmp));
writeout(15, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand::zero(),
Operand::c32(0x3f800000u), tmp));
//>> BB2
//! /* logical preds: / linear preds: / kind: uniform, */
@ -210,12 +216,13 @@ BEGIN_TEST(optimize.output_modifiers)
//! v2b: %res16 = v_mul_f16 2.0, %res15_tmp
//! p_unit_test 16, %res16
tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
writeout(16, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand((uint16_t)0x4000u), tmp));
writeout(16, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4000u), tmp));
//! v2b: %res17 = v_add_f16 %a, %b clamp
//! p_unit_test 17, %res17
tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
writeout(17, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand((uint16_t)0u), Operand((uint16_t)0x3c00u), tmp));
writeout(17, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand::c16(0u),
Operand::c16(0x3c00u), tmp));
/* omod flushes -0.0 to +0.0 */
@ -231,11 +238,12 @@ BEGIN_TEST(optimize.output_modifiers)
//! v1: %res18 = v_mul_f32 2.0, %res18_tmp
//! p_unit_test 18, %res18
tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
writeout(18, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp));
writeout(18, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp));
//! v1: %res19 = v_add_f32 %a, %b clamp
//! p_unit_test 19, %res19
tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
writeout(19, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand(0u), Operand(0x3f800000u), tmp));
writeout(19, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand::zero(),
Operand::c32(0x3f800000u), tmp));
//>> BB4
//! /* logical preds: / linear preds: / kind: uniform, */
@ -246,11 +254,12 @@ BEGIN_TEST(optimize.output_modifiers)
//! v2b: %res20 = v_mul_f16 2.0, %res20_tmp
//! p_unit_test 20, %res20
tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
writeout(20, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand((uint16_t)0x4000u), tmp));
writeout(20, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4000u), tmp));
//! v2b: %res21 = v_add_f16 %a, %b clamp
//! p_unit_test 21, %res21
tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
writeout(21, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand((uint16_t)0u), Operand((uint16_t)0x3c00u), tmp));
writeout(21, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand::c16(0u),
Operand::c16(0x3c00u), tmp));
finish_opt_test();
END_TEST
@ -270,33 +279,34 @@ BEGIN_TEST(optimize.cndmask)
//! v1: %res0 = v_cndmask_b32 0, %a, %c
//! p_unit_test 0, %res0
subbrev = create_subbrev_co(Operand(0u), Operand(0u), Operand(inputs[2]));
subbrev = create_subbrev_co(Operand::zero(), Operand::zero(), Operand(inputs[2]));
writeout(0, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), inputs[0], subbrev));
//! v1: %res1 = v_cndmask_b32 0, 42, %c
//! p_unit_test 1, %res1
subbrev = create_subbrev_co(Operand(0u), Operand(0u), Operand(inputs[2]));
writeout(1, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(42u), subbrev));
subbrev = create_subbrev_co(Operand::zero(), Operand::zero(), Operand(inputs[2]));
writeout(1, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(42u), subbrev));
//~gfx9! v1: %subbrev, s2: %_ = v_subbrev_co_u32 0, 0, %c
//~gfx9! v1: %res2 = v_and_b32 %b, %subbrev
//~gfx10! v1: %res2 = v_cndmask_b32 0, %b, %c
//! p_unit_test 2, %res2
subbrev = create_subbrev_co(Operand(0u), Operand(0u), Operand(inputs[2]));
subbrev = create_subbrev_co(Operand::zero(), Operand::zero(), Operand(inputs[2]));
writeout(2, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), inputs[1], subbrev));
//! v1: %subbrev1, s2: %_ = v_subbrev_co_u32 0, 0, %c
//! v1: %xor = v_xor_b32 %a, %subbrev1
//! v1: %res3 = v_cndmask_b32 0, %xor, %c
//! p_unit_test 3, %res3
subbrev = create_subbrev_co(Operand(0u), Operand(0u), Operand(inputs[2]));
subbrev = create_subbrev_co(Operand::zero(), Operand::zero(), Operand(inputs[2]));
Temp xor_a = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), inputs[0], subbrev);
writeout(3, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), xor_a, subbrev));
//! v1: %res4 = v_cndmask_b32 0, %a, %c
//! p_unit_test 4, %res4
Temp cndmask = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), Operand(1u), Operand(inputs[2]));
Temp sub = bld.vsub32(bld.def(v1), Operand(0u), cndmask);
Temp cndmask = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(),
Operand::c32(1u), Operand(inputs[2]));
Temp sub = bld.vsub32(bld.def(v1), Operand::zero(), cndmask);
writeout(4, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(inputs[0]), sub));
finish_opt_test();
@ -315,9 +325,10 @@ BEGIN_TEST(optimize.add_lshl)
//~gfx8! s1: %res0, s1: %_:scc = s_add_u32 %lshl0, 4
//~gfx(9|10)! s1: %res0, s1: %_:scc = s_lshl3_add_u32 %a, 4
//! p_unit_test 0, %res0
shift = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc),
Operand(inputs[0]), Operand(3u));
writeout(0, bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), shift, Operand(4u)));
shift = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), Operand(inputs[0]),
Operand::c32(3u));
writeout(0, bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), shift,
Operand::c32(4u)));
//~gfx8! s1: %lshl1, s1: %_:scc = s_lshl_b32 %a, 3
//~gfx8! s1: %add1, s1: %_:scc = s_add_u32 %lshl1, 4
@ -327,9 +338,10 @@ BEGIN_TEST(optimize.add_lshl)
//~gfx(9|10)! v1: %lshl_add = v_lshl_add_u32 %a, 3, %b
//~gfx(9|10)! v1: %res1 = v_add_u32 %lshl1, %lshl_add
//! p_unit_test 1, %res1
shift = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc),
Operand(inputs[0]), Operand(3u));
Temp sadd = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), shift, Operand(4u));
shift = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), Operand(inputs[0]),
Operand::c32(3u));
Temp sadd =
bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), shift, Operand::c32(4u));
Temp vadd = bld.vadd32(bld.def(v1), shift, Operand(inputs[1]));
writeout(1, bld.vadd32(bld.def(v1), sadd, vadd));
@ -337,7 +349,8 @@ BEGIN_TEST(optimize.add_lshl)
//~gfx8! v1: %res2, s2: %_ = v_add_co_u32 %lshl2, %b
//~gfx(9|10)! v1: %res2 = v_lshl_add_u32 %a, 3, %b
//! p_unit_test 2, %res2
Temp lshl = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), Operand(inputs[0]), Operand(3u));
Temp lshl =
bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), Operand(inputs[0]), Operand::c32(3u));
writeout(2, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1])));
//~gfx8! s1: %lshl3 = s_lshl_b32 (is24bit)%a, 7
@ -346,14 +359,14 @@ BEGIN_TEST(optimize.add_lshl)
//! p_unit_test 3, %res3
Operand a_24bit = Operand(inputs[0]);
a_24bit.set24bit(true);
lshl = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), a_24bit, Operand(7u));
lshl = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), a_24bit, Operand::c32(7u));
writeout(3, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1])));
//! s1: %lshl4 = s_lshl_b32 (is24bit)%a, 3
//~gfx(8|9)! v1: %res4, s2: %carry = v_add_co_u32 %lshl4, %b
//~gfx10! v1: %res4, s2: %carry = v_add_co_u32_e64 %lshl4, %b
//! p_unit_test 4, %carry
lshl = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), a_24bit, Operand(3u));
lshl = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), a_24bit, Operand::c32(3u));
Temp carry = bld.vadd32(bld.def(v1), lshl, Operand(inputs[1]), true).def(1).getTemp();
writeout(4, carry);
@ -367,7 +380,7 @@ BEGIN_TEST(optimize.add_lshl)
//~gfx8! v1: %res6 = v_mad_u32_u24 (is24bit)%a, 8, %b
//~gfx(9|10)! v1: %res6 = v_lshl_add_u32 (is24bit)%a, 3, %b
//! p_unit_test 6, %res6
lshl = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), a_24bit, Operand(3u));
lshl = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), a_24bit, Operand::c32(3u));
writeout(6, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1])));
//~gfx8! v1: %res7 = v_mad_u32_u24 (is16bit)%a, 16, %b
@ -375,7 +388,7 @@ BEGIN_TEST(optimize.add_lshl)
//! p_unit_test 7, %res7
Operand a_16bit = Operand(inputs[0]);
a_16bit.set16bit(true);
lshl = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), a_16bit, Operand(4u));
lshl = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), a_16bit, Operand::c32(4u));
writeout(7, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1])));
finish_opt_test();
@ -398,27 +411,28 @@ BEGIN_TEST(optimize.mad_u32_u16)
//! v1: %res0 = v_mul_u32_u24 (is16bit)%a, (is16bit)%b
//! p_unit_test 0, %res0
writeout(0, create_mad_u32_u16(Operand(inputs[0]), Operand(inputs[1]), Operand(0u)));
writeout(0, create_mad_u32_u16(Operand(inputs[0]), Operand(inputs[1]), Operand::zero()));
//! v1: %res1 = v_mul_u32_u24 42, (is16bit)%a
//! p_unit_test 1, %res1
writeout(1, create_mad_u32_u16(Operand(42u), Operand(inputs[0]), Operand(0u)));
writeout(1, create_mad_u32_u16(Operand::c32(42u), Operand(inputs[0]), Operand::zero()));
//! v1: %res2 = v_mul_u32_u24 42, (is16bit)%a
//! p_unit_test 2, %res2
writeout(2, create_mad_u32_u16(Operand(inputs[0]), Operand(42u), Operand(0u)));
writeout(2, create_mad_u32_u16(Operand(inputs[0]), Operand::c32(42u), Operand::zero()));
//! v1: %res3 = v_mul_u32_u24 (is16bit)%c, (is16bit)%a
//! p_unit_test 3, %res3
writeout(3, create_mad_u32_u16(Operand(inputs[2]), Operand(inputs[0]), Operand(0u)));
writeout(3, create_mad_u32_u16(Operand(inputs[2]), Operand(inputs[0]), Operand::zero()));
//! v1: %res4 = v_mad_u32_u16 42, (is16bit)%c, 0
//! p_unit_test 4, %res4
writeout(4, create_mad_u32_u16(Operand(42u), Operand(inputs[2]), Operand(0u)));
writeout(4, create_mad_u32_u16(Operand::c32(42u), Operand(inputs[2]), Operand::zero()));
//! v1: %res5 = v_mad_u32_u16 42, %a, 0
//! p_unit_test 5, %res5
writeout(5, create_mad_u32_u16(Operand(42u), Operand(inputs[0]), Operand(0u), false));
writeout(5,
create_mad_u32_u16(Operand::c32(42u), Operand(inputs[0]), Operand::zero(), false));
//~gfx9! v1: %mul6 = v_mul_lo_u16 %a, %b
//~gfx9! v1: %res6 = v_add_u32 %mul6, %b
@ -458,31 +472,31 @@ BEGIN_TEST(optimize.bcnt)
//! v1: %res0 = v_bcnt_u32_b32 %a, %a
//! p_unit_test 0, %res0
bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand(0u));
bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand::zero());
writeout(0, bld.vadd32(bld.def(v1), bcnt, Operand(inputs[0])));
//! v1: %res1 = v_bcnt_u32_b32 %a, %b
//! p_unit_test 1, %res1
bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand(0u));
bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand::zero());
writeout(1, bld.vadd32(bld.def(v1), bcnt, Operand(inputs[1])));
//! v1: %res2 = v_bcnt_u32_b32 %a, 42
//! p_unit_test 2, %res2
bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand(0u));
writeout(2, bld.vadd32(bld.def(v1), bcnt, Operand(42u)));
bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand::zero());
writeout(2, bld.vadd32(bld.def(v1), bcnt, Operand::c32(42u)));
//! v1: %bnct3 = v_bcnt_u32_b32 %b, 0
//~gfx8! v1: %res3, s2: %_ = v_add_co_u32 %bcnt3, %a
//~gfx(9|10)! v1: %res3 = v_add_u32 %bcnt3, %a
//! p_unit_test 3, %res3
bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[1]), Operand(0u));
bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[1]), Operand::zero());
writeout(3, bld.vadd32(bld.def(v1), bcnt, Operand(inputs[0])));
//! v1: %bnct4 = v_bcnt_u32_b32 %a, 0
//~gfx(8|9)! v1: %add4, s2: %carry = v_add_co_u32 %bcnt4, %a
//~gfx10! v1: %add4, s2: %carry = v_add_co_u32_e64 %bcnt4, %a
//! p_unit_test 4, %carry
bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand(0u));
bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand::zero());
Temp carry = bld.vadd32(bld.def(v1), bcnt, Operand(inputs[0]), true).def(1).getTemp();
writeout(4, carry);
@ -499,28 +513,28 @@ struct clamp_config {
static const clamp_config clamp_configs[] = {
/* 0.0, 4.0 */
{"_0,4f32", aco_opcode::v_min_f32, aco_opcode::v_max_f32, aco_opcode::v_med3_f32,
Operand(0u), Operand(0x40800000u)},
Operand::zero(), Operand::c32(0x40800000u)},
{"_0,4f16", aco_opcode::v_min_f16, aco_opcode::v_max_f16, aco_opcode::v_med3_f16,
Operand((uint16_t)0u), Operand((uint16_t)0x4400)},
Operand::c16(0u), Operand::c16(0x4400)},
/* -1.0, 0.0 */
{"_-1,0f32", aco_opcode::v_min_f32, aco_opcode::v_max_f32, aco_opcode::v_med3_f32,
Operand(0xbf800000u), Operand(0u)},
Operand::c32(0xbf800000u), Operand::zero()},
{"_-1,0f16", aco_opcode::v_min_f16, aco_opcode::v_max_f16, aco_opcode::v_med3_f16,
Operand((uint16_t)0xBC00), Operand((uint16_t)0u)},
Operand::c16(0xBC00), Operand::c16(0u)},
/* 0, 3 */
{"_0,3u32", aco_opcode::v_min_u32, aco_opcode::v_max_u32, aco_opcode::v_med3_u32,
Operand(0u), Operand(3u)},
Operand::zero(), Operand::c32(3u)},
{"_0,3u16", aco_opcode::v_min_u16, aco_opcode::v_max_u16, aco_opcode::v_med3_u16,
Operand((uint16_t)0u), Operand((uint16_t)3u)},
Operand::c16(0u), Operand::c16(3u)},
{"_0,3i32", aco_opcode::v_min_i32, aco_opcode::v_max_i32, aco_opcode::v_med3_i32,
Operand(0u), Operand(3u)},
Operand::zero(), Operand::c32(3u)},
{"_0,3i16", aco_opcode::v_min_i16, aco_opcode::v_max_i16, aco_opcode::v_med3_i16,
Operand((uint16_t)0u), Operand((uint16_t)3u)},
Operand::c16(0u), Operand::c16(3u)},
/* -5, 0 */
{"_-5,0i32", aco_opcode::v_min_i32, aco_opcode::v_max_i32, aco_opcode::v_med3_i32,
Operand(0xfffffffbu), Operand(0u)},
Operand::c32(0xfffffffbu), Operand::zero()},
{"_-5,0i16", aco_opcode::v_min_i16, aco_opcode::v_max_i16, aco_opcode::v_med3_i16,
Operand((uint16_t)0xfffbu), Operand((uint16_t)0u)},
Operand::c16(0xfffbu), Operand::c16(0u)},
};
BEGIN_TEST(optimize.clamp)
@ -613,45 +627,52 @@ BEGIN_TEST(optimize.const_comparison_ordering)
//! p_unit_test 0, %res0
writeout(0, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc),
bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0])));
bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm),
Operand::c32(0x40800000u), inputs[0])));
//! s2: %res1 = v_cmp_nge_f32 4.0, %a
//! p_unit_test 1, %res1
writeout(1, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc),
bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0])));
bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm),
Operand::c32(0x40800000u), inputs[0])));
//! s2: %res2 = v_cmp_nge_f32 0x40a00000, %a
//! p_unit_test 2, %res2
writeout(2, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc),
bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), bld.copy(bld.def(v1), Operand(0x40a00000u)), inputs[0])));
bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm),
bld.copy(bld.def(v1), Operand::c32(0x40a00000u)), inputs[0])));
/* optimize to ordered comparison */
//! s2: %res3 = v_cmp_lt_f32 4.0, %a
//! p_unit_test 3, %res3
writeout(3, bld.sop2(aco_opcode::s_and_b64, bld.def(bld.lm), bld.def(s1, scc),
bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0])));
bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm),
Operand::c32(0x40800000u), inputs[0])));
//! s2: %res4 = v_cmp_lt_f32 4.0, %a
//! p_unit_test 4, %res4
writeout(4, bld.sop2(aco_opcode::s_and_b64, bld.def(bld.lm), bld.def(s1, scc),
bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0])));
bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm),
Operand::c32(0x40800000u), inputs[0])));
//! s2: %res5 = v_cmp_lt_f32 0x40a00000, %a
//! p_unit_test 5, %res5
writeout(5, bld.sop2(aco_opcode::s_and_b64, bld.def(bld.lm), bld.def(s1, scc),
bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), bld.copy(bld.def(v1), Operand(0x40a00000u)), inputs[0])));
bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm),
bld.copy(bld.def(v1), Operand::c32(0x40a00000u)), inputs[0])));
/* similar but unoptimizable expressions */
//! s2: %tmp6_0 = v_cmp_lt_f32 4.0, %a
//! s2: %tmp6_1 = v_cmp_neq_f32 %a, %a
//! s2: %res6, s1: %_:scc = s_and_b64 %tmp6_1, %tmp6_0
//! p_unit_test 6, %res6
Temp src1 = bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0]);
Temp src1 =
bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand::c32(0x40800000u), inputs[0]);
Temp src0 = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]);
writeout(6, bld.sop2(aco_opcode::s_and_b64, bld.def(bld.lm), bld.def(s1, scc), src0, src1));
@ -659,7 +680,8 @@ BEGIN_TEST(optimize.const_comparison_ordering)
//! s2: %tmp7_1 = v_cmp_eq_f32 %a, %a
//! s2: %res7, s1: %_:scc = s_or_b64 %tmp7_1, %tmp7_0
//! p_unit_test 7, %res7
src1 = bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0]);
src1 =
bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), Operand::c32(0x40800000u), inputs[0]);
src0 = bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]);
writeout(7, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), src0, src1));
@ -667,7 +689,7 @@ BEGIN_TEST(optimize.const_comparison_ordering)
//! s2: %tmp8_1 = v_cmp_neq_f32 %a, %a
//! s2: %res8, s1: %_:scc = s_or_b64 %tmp8_1, %tmp8_0
//! p_unit_test 8, %res8
src1 = bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[3]);
src1 = bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand::c32(0x40800000u), inputs[3]);
src0 = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]);
writeout(8, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), src0, src1));
@ -675,23 +697,26 @@ BEGIN_TEST(optimize.const_comparison_ordering)
//! s2: %tmp9_1 = v_cmp_neq_f32 %a, %d
//! s2: %res9, s1: %_:scc = s_or_b64 %tmp9_1, %tmp9_0
//! p_unit_test 9, %res9
src1 = bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0]);
src1 = bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand::c32(0x40800000u), inputs[0]);
src0 = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[3]);
writeout(9, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), src0, src1));
/* bit sizes */
//! s2: %res10 = v_cmp_nge_f16 4.0, %b
//! p_unit_test 10, %res10
Temp input1_16 = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), inputs[1], Operand(0u));
Temp input1_16 =
bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), inputs[1], Operand::zero());
writeout(10, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc),
bld.vopc(aco_opcode::v_cmp_neq_f16, bld.def(bld.lm), input1_16, input1_16),
bld.vopc(aco_opcode::v_cmp_lt_f16, bld.def(bld.lm), Operand((uint16_t)0x4400u), input1_16)));
bld.vopc(aco_opcode::v_cmp_lt_f16, bld.def(bld.lm), Operand::c16(0x4400u),
input1_16)));
//! s2: %res11 = v_cmp_nge_f64 4.0, %c
//! p_unit_test 11, %res11
writeout(11, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc),
bld.vopc(aco_opcode::v_cmp_neq_f64, bld.def(bld.lm), inputs[2], inputs[2]),
bld.vopc(aco_opcode::v_cmp_lt_f64, bld.def(bld.lm), Operand(0x4010000000000000u), inputs[2])));
bld.vopc(aco_opcode::v_cmp_lt_f64, bld.def(bld.lm),
Operand::c64(0x4010000000000000u), inputs[2])));
/* NaN */
uint16_t nan16 = 0x7e00;
@ -702,7 +727,7 @@ BEGIN_TEST(optimize.const_comparison_ordering)
//! s2: %tmp12_1 = v_cmp_neq_f16 %a, %a
//! s2: %res12, s1: %_:scc = s_or_b64 %tmp12_1, %tmp12_0
//! p_unit_test 12, %res12
src1 = bld.vopc(aco_opcode::v_cmp_lt_f16, bld.def(bld.lm), Operand(nan16), inputs[0]);
src1 = bld.vopc(aco_opcode::v_cmp_lt_f16, bld.def(bld.lm), Operand::c16(nan16), inputs[0]);
src0 = bld.vopc(aco_opcode::v_cmp_neq_f16, bld.def(bld.lm), inputs[0], inputs[0]);
writeout(12, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), src0, src1));
@ -710,7 +735,7 @@ BEGIN_TEST(optimize.const_comparison_ordering)
//! s2: %tmp13_1 = v_cmp_neq_f32 %a, %a
//! s2: %res13, s1: %_:scc = s_or_b64 %tmp13_1, %tmp13_0
//! p_unit_test 13, %res13
src1 = bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(nan32), inputs[0]);
src1 = bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand::c32(nan32), inputs[0]);
src0 = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]);
writeout(13, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), src0, src1));
@ -718,7 +743,7 @@ BEGIN_TEST(optimize.const_comparison_ordering)
//! s2: %tmp14_1 = v_cmp_neq_f64 %a, %a
//! s2: %res14, s1: %_:scc = s_or_b64 %tmp14_1, %tmp14_0
//! p_unit_test 14, %res14
src1 = bld.vopc(aco_opcode::v_cmp_lt_f64, bld.def(bld.lm), Operand(nan64), inputs[0]);
src1 = bld.vopc(aco_opcode::v_cmp_lt_f64, bld.def(bld.lm), Operand::c64(nan64), inputs[0]);
src0 = bld.vopc(aco_opcode::v_cmp_neq_f64, bld.def(bld.lm), inputs[0], inputs[0]);
writeout(14, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), src0, src1));
@ -762,15 +787,15 @@ BEGIN_TEST(optimize.minmax)
//! v1: %res0 = v_max3_f32 0, -0, %a
//! p_unit_test 0, %res0
Temp xor0 = fneg(inputs[0]);
Temp min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0u), xor0);
Temp min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand::zero(), xor0);
Temp xor1 = fneg(min);
writeout(0, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), xor1));
writeout(0, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand::zero(), xor1));
//! v1: %res1 = v_max3_f32 0, -0, -%a
//! p_unit_test 1, %res1
min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0u), Operand(inputs[0]));
min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand::zero(), Operand(inputs[0]));
xor1 = fneg(min);
writeout(1, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), xor1));
writeout(1, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand::zero(), xor1));
finish_opt_test();
}
@ -809,7 +834,7 @@ BEGIN_TEST(optimize.add_lshlrev)
//~gfx8! v1: %res0, s2: %_ = v_add_co_u32 %lshl0, %b
//~gfx(9|10)! v1: %res0 = v_lshl_add_u32 %a, 3, %b
//! p_unit_test 0, %res0
lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(3u), Operand(inputs[0]));
lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), Operand(inputs[0]));
writeout(0, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1])));
//~gfx8! v1: %lshl1 = v_lshlrev_b32 7, (is24bit)%a
@ -818,7 +843,7 @@ BEGIN_TEST(optimize.add_lshlrev)
//! p_unit_test 1, %res1
Operand a_24bit = Operand(inputs[0]);
a_24bit.set24bit(true);
lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(7u), a_24bit);
lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(7u), a_24bit);
writeout(1, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1])));
//~gfx8! v1: %lshl2 = v_lshlrev_b32 (is24bit)%a, (is24bit)%b
@ -833,7 +858,7 @@ BEGIN_TEST(optimize.add_lshlrev)
//~gfx8! v1: %res3 = v_mad_u32_u24 (is24bit)%a, 8, %b
//~gfx(9|10)! v1: %res3 = v_lshl_add_u32 (is24bit)%a, 3, %b
//! p_unit_test 3, %res3
lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(3u), a_24bit);
lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), a_24bit);
writeout(3, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1])));
//~gfx8! v1: %res4 = v_mad_u32_u24 (is16bit)%a, 16, %b
@ -841,7 +866,7 @@ BEGIN_TEST(optimize.add_lshlrev)
//! p_unit_test 4, %res4
Operand a_16bit = Operand(inputs[0]);
a_16bit.set16bit(true);
lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(4u), a_16bit);
lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(4u), a_16bit);
writeout(4, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1])));
//~gfx8! v1: %lshl5 = v_lshlrev_b32 4, (is24bit)%c
@ -850,7 +875,7 @@ BEGIN_TEST(optimize.add_lshlrev)
//! p_unit_test 5, %res5
Operand c_24bit = Operand(inputs[2]);
c_24bit.set24bit(true);
lshl = bld.vop2_e64(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(4u), c_24bit);
lshl = bld.vop2_e64(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(4u), c_24bit);
writeout(5, bld.vadd32(bld.def(v1), lshl, Operand(inputs[2])));
finish_opt_test();
@ -896,9 +921,9 @@ static Temp emit_denorm_srcdest(aco_opcode op, Temp val)
{
switch (op) {
case aco_opcode::v_cndmask_b32:
return bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), val, inputs[1]);
return bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), val, inputs[1]);
case aco_opcode::v_min_f32:
return bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0u), val);
return bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand::zero(), val);
case aco_opcode::v_rcp_f32:
return bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), val);
default:
@ -975,7 +1000,7 @@ BEGIN_TEST(optimize.denorm_propagation)
Temp val = emit_denorm_srcdest(cfg.src, inputs[0]);
switch (cfg.op) {
case denorm_mul1:
val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x3f800000u), val);
val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x3f800000u), val);
break;
case denorm_fneg:
val = fneg(val);
@ -988,7 +1013,8 @@ BEGIN_TEST(optimize.denorm_propagation)
break;
}
val = emit_denorm_srcdest(cfg.dest, val);
writeout(0, bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), val, inputs[1]));
writeout(
0, bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), val, inputs[1]));
finish_opt_test();
}

View file

@ -48,7 +48,8 @@ BEGIN_TEST(optimizer_postRA.vcmp)
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
//! s2: %e:s[2-3] = p_cbranch_z %b:vcc
//! p_unit_test 0, %e:s[2-3]
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand(0u), Operand(v_in, reg_v0));
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(0, Operand(br, reg_s2));
@ -64,9 +65,10 @@ BEGIN_TEST(optimizer_postRA.vcmp)
//! s2: %f:vcc = s_mov_b64 0
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 1, %e:s[2-3], %f:vcc
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand(0u), Operand(v_in, reg_v0));
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand(0u));
auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
}
@ -80,7 +82,8 @@ BEGIN_TEST(optimizer_postRA.vcmp)
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:s[4-5], %x:exec
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 2, %e:s[2-3]
auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand(0u), Operand(v_in, reg_v0));
auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(vcmp, reg_s4), Operand(exec, bld.lm));
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(2, Operand(br, reg_s2));
@ -95,7 +98,8 @@ BEGIN_TEST(optimizer_postRA.vcmp)
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 2, %e:s[2-3]
auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc), Operand(1u), Operand(reg_s4, bld.lm));
auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc),
Operand::c32(1u), Operand(reg_s4, bld.lm));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(salu, vcc), Operand(exec, bld.lm));
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(2, Operand(br, reg_s2));
@ -111,9 +115,10 @@ BEGIN_TEST(optimizer_postRA.vcmp)
//! s2: %f:exec = s_mov_b64 42
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 4, %e:s[2-3], %f:exec
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand(0u), Operand(v_in, reg_v0));
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand(42u));
auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u));
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(4, Operand(br, reg_s2), Operand(ovrwr, exec));
}
@ -149,8 +154,10 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_nz %e:scc
//! p_unit_test 0, %f:vcc
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand(0u));
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(0, Operand(br, vcc));
}
@ -161,8 +168,10 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_z %e:scc
//! p_unit_test 1, %f:vcc
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand(0u));
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(1, Operand(br, vcc));
}
@ -173,8 +182,10 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_z %e:scc
//! p_unit_test 2, %f:vcc
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand(0u));
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
writeout(2, Operand(br, vcc));
}
@ -185,8 +196,10 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_nz %e:scc
//! p_unit_test 3, %f:vcc
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand(0u));
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
writeout(3, Operand(br, vcc));
}
@ -197,8 +210,10 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
//! s2: %d:s[2-3], s1: %e:scc = s_and_b64 %y:s[4-5], 0x12345
//! s2: %f:vcc = p_cbranch_z %e:scc
//! p_unit_test 4, %f:vcc
auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1, Operand(0x12345u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2), Operand(UINT64_C(0)));
auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1,
Operand::c32(0x12345u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero(8));
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
writeout(4, Operand(br, vcc));
}
@ -213,9 +228,12 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
//! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
//! s2: %f:vcc = p_cbranch_z %g:scc
//! p_unit_test 5, %f:vcc, %h:s[3]
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand(0x40018u));
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0, Operand(1u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand(0u));
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
Operand::c32(1u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
}
@ -226,8 +244,10 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s1: %f:s[4] = s_cselect_b32 %z:s[6], %a:s[0], %e:scc
//! p_unit_test 6, %f:s[4]
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand(0u));
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp));
writeout(6, Operand(br, reg_s4));
}
@ -242,9 +262,12 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
//! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
//! s1: %f:s[4] = s_cselect_b32 %a:s[0], %z:s[6], %g:scc
//! p_unit_test 7, %f:s[4], %h:s[3]
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand(0x40018u));
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0, Operand(1u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand(0u));
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
Operand::c32(1u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp));
writeout(7, Operand(br, reg_s4), Operand(ovrw, reg_s3));
}

View file

@ -70,7 +70,7 @@ BEGIN_TEST(regalloc.32bit_partial_write)
/* This test checks if this instruction uses SDWA. */
//! v2b: %_:v[0][0:16] = v_not_b32 0 dst_preserve
Temp lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v2b), Operand(0u));
Temp lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v2b), Operand::zero());
//! v1: %_:v[0] = p_create_vector %_:v[0][0:16], %_:v[0][16:32]
bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), lo, hi);

View file

@ -79,14 +79,14 @@ BEGIN_TEST(validate.sdwa.operands)
//~gfx8! Wrong source position for constant argument: v1: %_ = v_mul_f32 4, %vgpr1
//~gfx8! Wrong source position for constant argument: v1: %_ = v_mul_f32 %vgpr0, 4
bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), Operand(4u), inputs[1]);
bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], Operand(4u));
bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(4u), inputs[1]);
bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], Operand::c32(4u));
//! Literal applied on wrong instruction format: v1: %_ = v_mul_f32 0x1234, %vgpr1
//! Literal applied on wrong instruction format: v1: %_ = v_mul_f32 %vgpr0, 0x1234
//! Wrong source position for Literal argument: v1: %_ = v_mul_f32 %vgpr0, 0x1234
bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x1234u), inputs[1]);
bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], Operand(0x1234u));
bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x1234u), inputs[1]);
bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], Operand::c32(0x1234u));
//! Validation failed
@ -172,35 +172,42 @@ BEGIN_TEST(optimize.sdwa.extract)
{
//~gfx[^7].*! @standard_test(0, 0, 8)
Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(0u), Operand(8u), Operand(is_signed));
Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u),
Operand::c32(is_signed));
writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte0_b));
//~gfx[^7].*! @standard_test(1, 8, 8)
Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(1u), Operand(8u), Operand(is_signed));
Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u),
Operand::c32(is_signed));
writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte1_b));
//~gfx[^7].*! @standard_test(2, 16, 8)
Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(2u), Operand(8u), Operand(is_signed));
Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u), Operand::c32(8u),
Operand::c32(is_signed));
writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte2_b));
//~gfx[^7].*! @standard_test(3, 24, 8)
Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(3u), Operand(8u), Operand(is_signed));
Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u), Operand::c32(8u),
Operand::c32(is_signed));
writeout(3, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte3_b));
//~gfx[^7].*! @standard_test(4, 0, 16)
Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(0u), Operand(16u), Operand(is_signed));
Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u),
Operand::c32(is_signed));
writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word0_b));
//~gfx[^7].*! @standard_test(5, 16, 16)
Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(1u), Operand(16u), Operand(is_signed));
Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
Operand::c32(16u), Operand::c32(is_signed));
writeout(5, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word1_b));
//~gfx[^7]_unsigned! @standard_test(6, 0, 8)
Temp bfi_byte0_b = bld.pseudo(ins, bld.def(v1), inputs[1], Operand(0u), Operand(8u));
Temp bfi_byte0_b = bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u));
writeout(6, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte0_b));
//~gfx[^7]_unsigned! @standard_test(7, 0, 16)
Temp bfi_word0_b = bld.pseudo(ins, bld.def(v1), inputs[1], Operand(0u), Operand(16u));
Temp bfi_word0_b =
bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u));
writeout(7, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_word0_b));
}
@ -211,7 +218,8 @@ BEGIN_TEST(optimize.sdwa.extract)
//! v1: %tmp8 = p_insert %b, 1, 8
//! v1: %res8 = v_mul_f32 %a, %tmp8
//! p_unit_test 8, %res8
Temp bfi_byte1_b = bld.pseudo(ins, bld.def(v1), inputs[1], Operand(1u), Operand(8u));
Temp bfi_byte1_b =
bld.pseudo(ins, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u));
writeout(8, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte1_b));
/* v_cvt_f32_ubyte[0-3] can be used instead of v_cvt_f32_u32+sdwa */
@ -220,7 +228,8 @@ BEGIN_TEST(optimize.sdwa.extract)
//~gfx[^7]+_signed! v1: %res9 = v_cvt_f32_u32 @b(0:7)
//~gfx\d+_unsigned! v1: %res9 = v_cvt_f32_ubyte0 %b
//! p_unit_test 9, %res9
Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(0u), Operand(8u), Operand(is_signed));
Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u),
Operand::c32(is_signed));
writeout(9, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte0_b));
//~gfx7_signed! v1: %bfe_byte1_b = p_extract %b, 1, 8, 1
@ -228,7 +237,8 @@ BEGIN_TEST(optimize.sdwa.extract)
//~gfx[^7]+_signed! v1: %res10 = v_cvt_f32_u32 @b(8:15)
//~gfx\d+_unsigned! v1: %res10 = v_cvt_f32_ubyte1 %b
//! p_unit_test 10, %res10
Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(1u), Operand(8u), Operand(is_signed));
Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u),
Operand::c32(is_signed));
writeout(10, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte1_b));
//~gfx7_signed! v1: %bfe_byte2_b = p_extract %b, 2, 8, 1
@ -236,7 +246,8 @@ BEGIN_TEST(optimize.sdwa.extract)
//~gfx[^7]+_signed! v1: %res11 = v_cvt_f32_u32 @b(16:23)
//~gfx\d+_unsigned! v1: %res11 = v_cvt_f32_ubyte2 %b
//! p_unit_test 11, %res11
Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(2u), Operand(8u), Operand(is_signed));
Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u), Operand::c32(8u),
Operand::c32(is_signed));
writeout(11, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte2_b));
//~gfx7_signed! v1: %bfe_byte3_b = p_extract %b, 3, 8, 1
@ -244,18 +255,21 @@ BEGIN_TEST(optimize.sdwa.extract)
//~gfx[^7]+_signed! v1: %res12 = v_cvt_f32_u32 @b(24:31)
//~gfx\d+_unsigned! v1: %res12 = v_cvt_f32_ubyte3 %b
//! p_unit_test 12, %res12
Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(3u), Operand(8u), Operand(is_signed));
Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u), Operand::c32(8u),
Operand::c32(is_signed));
writeout(12, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte3_b));
//! v1: %res13 = v_add_i16 %a, %b
//! p_unit_test 13, %res13
Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(0u), Operand(16u), Operand(is_signed));
Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u),
Operand::c32(is_signed));
writeout(13, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word0_b));
/* VOP3-only instructions can't use SDWA but they can use opsel instead */
//~gfx(9|10).*! v1: %res14 = v_add_i16 %a, hi(%b)
//~gfx(9|10).*! p_unit_test 14, %res14
Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(1u), Operand(16u), Operand(is_signed));
Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
Operand::c32(16u), Operand::c32(is_signed));
writeout(14, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word1_b));
}
@ -274,7 +288,8 @@ BEGIN_TEST(optimize.sdwa.extract_modifiers)
//! v1: %res0 = v_mul_f32 %a, -%b[0:7]
//! p_unit_test 0, %res0
Temp byte0 = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(0u), Operand(8u), Operand(0u));
Temp byte0 = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u),
Operand::zero());
Temp neg_byte0 = fneg(byte0);
writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], neg_byte0));
@ -284,7 +299,8 @@ BEGIN_TEST(optimize.sdwa.extract_modifiers)
//~gfx(9|10)! v1: %res1 = v_mul_f32 %a, %neg_byte0
//! p_unit_test 1, %res1
Temp neg = fneg(inputs[1]);
Temp byte0_neg = bld.pseudo(ext, bld.def(v1), neg, Operand(0u), Operand(8u), Operand(0u));
Temp byte0_neg =
bld.pseudo(ext, bld.def(v1), neg, Operand::zero(), Operand::c32(8u), Operand::zero());
writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_neg));
//! v1: %res2 = v_mul_f32 %a, |%b[0:7]|
@ -296,7 +312,8 @@ BEGIN_TEST(optimize.sdwa.extract_modifiers)
//! v1: %res3 = v_mul_f32 %a, %abs[0:7]
//! p_unit_test 3, %res3
Temp abs = fabs(inputs[1]);
Temp byte0_abs = bld.pseudo(ext, bld.def(v1), abs, Operand(0u), Operand(8u), Operand(0u));
Temp byte0_abs =
bld.pseudo(ext, bld.def(v1), abs, Operand::zero(), Operand::c32(8u), Operand::zero());
writeout(3, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_abs));
//! v1: %res4 = v_mul_f32 %1, -|%2[0:7]|
@ -310,7 +327,8 @@ BEGIN_TEST(optimize.sdwa.extract_modifiers)
//~gfx(9|10)! v1: %res5 = v_mul_f32 %a, %neg_abs_byte0
//! p_unit_test 5, %res5
Temp neg_abs = fneg(abs);
Temp byte0_neg_abs = bld.pseudo(ext, bld.def(v1), neg_abs, Operand(0u), Operand(8u), Operand(0u));
Temp byte0_neg_abs =
bld.pseudo(ext, bld.def(v1), neg_abs, Operand::zero(), Operand::c32(8u), Operand::zero());
writeout(5, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_neg_abs));
finish_opt_test();
@ -329,28 +347,32 @@ BEGIN_TEST(optimize.sdwa.extract.sgpr)
//~gfx8! v1: %res1 = v_mul_f32 %c, %byte0_b
//~gfx(9|10)! v1: %res1 = v_mul_f32 %c, %b[0:7]
//! p_unit_test 1, %res1
Temp byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(0u), Operand(8u), Operand(0u));
Temp byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u),
Operand::zero());
writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[2], byte0_b));
//~gfx8! v1: %byte0_c = p_extract %c, 0, 8, 0
//~gfx8! v1: %res2 = v_mul_f32 %a, %byte0_c
//~gfx(9|10)! v1: %res2 = v_mul_f32 %a, %c[0:7]
//! p_unit_test 2, %res2
Temp byte0_c = bld.pseudo(ext, bld.def(v1), inputs[2], Operand(0u), Operand(8u), Operand(0u));
Temp byte0_c = bld.pseudo(ext, bld.def(v1), inputs[2], Operand::zero(), Operand::c32(8u),
Operand::zero());
writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_c));
//~gfx8! v1: %byte0_c_2 = p_extract %c, 0, 8, 0
//~gfx8! v1: %res3 = v_mul_f32 %c, %byte0_c_2
//~gfx(9|10)! v1: %res3 = v_mul_f32 %c, %c[0:7]
//! p_unit_test 3, %res3
byte0_c = bld.pseudo(ext, bld.def(v1), inputs[2], Operand(0u), Operand(8u), Operand(0u));
byte0_c = bld.pseudo(ext, bld.def(v1), inputs[2], Operand::zero(), Operand::c32(8u),
Operand::zero());
writeout(3, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[2], byte0_c));
//~gfx(8|9)! v1: %byte0_c_3 = p_extract %c, 0, 8, 0
//~gfx(8|9)! v1: %res4 = v_mul_f32 %d, %byte0_c_3
//~gfx10! v1: %res4 = v_mul_f32 %d, %c[0:7]
//! p_unit_test 4, %res4
byte0_c = bld.pseudo(ext, bld.def(v1), inputs[2], Operand(0u), Operand(8u), Operand(0u));
byte0_c = bld.pseudo(ext, bld.def(v1), inputs[2], Operand::zero(), Operand::c32(8u),
Operand::zero());
writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[3], byte0_c));
finish_opt_test();
@ -365,7 +387,8 @@ BEGIN_TEST(optimize.sdwa.from_vop3)
//! v1: %res0 = v_mul_f32 -|%a|, %b[0:7]
//! p_unit_test 0, %res0
Temp byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand(0u), Operand(8u), Operand(0u));
Temp byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand::zero(),
Operand::c32(8u), Operand::zero());
VOP3_instruction *mul = &bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_b).instr->vop3();
mul->neg[0] = true;
mul->abs[0] = true;
@ -375,7 +398,8 @@ BEGIN_TEST(optimize.sdwa.from_vop3)
//~gfx8! v1: %res1 = v_mul_f32 %a, %byte0_b_0 *4
//~gfx(9|10)! v1: %res1 = v_mul_f32 %a, %b[0:7] *4
//! p_unit_test 1, %res1
byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand(0u), Operand(8u), Operand(0u));
byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand::zero(),
Operand::c32(8u), Operand::zero());
mul = &bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_b).instr->vop3();
mul->omod = 2;
writeout(1, mul->definitions[0].getTemp());
@ -384,15 +408,18 @@ BEGIN_TEST(optimize.sdwa.from_vop3)
//~gfx8! v1: %res2 = v_mul_f32 %byte0_b_1, %c
//~gfx(9|10)! v1: %res2 = v_mul_f32 %b[0:7], %c
//! p_unit_test 2, %res2
byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand(0u), Operand(8u), Operand(0u));
byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand::zero(),
Operand::c32(8u), Operand::zero());
writeout(2, bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), byte0_b, inputs[2]));
if (i >= GFX10) {
//~gfx10! v1: %byte0_b_2 = p_extract %b, 0, 8, 0
//~gfx10! v1: %res3 = v_mul_f32 %byte0_b_2, 0x1234
//~gfx10! p_unit_test 3, %res3
byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand(0u), Operand(8u), Operand(0u));
writeout(3, bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), byte0_b, Operand(0x1234u)));
byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand::zero(),
Operand::c32(8u), Operand::zero());
writeout(3,
bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), byte0_b, Operand::c32(0x1234u)));
}
finish_opt_test();
@ -411,54 +438,58 @@ BEGIN_TEST(optimize.sdwa.insert)
//~gfx[^7]! v1: %res0 = v_mul_f32 %a, %b dst_sel:ubyte0
//~gfx[^7]! p_unit_test 0, %res0
Temp val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
writeout(0, bld.pseudo(ins, bld.def(v1), val, Operand(0u), Operand(8u)));
writeout(0, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u)));
//~gfx[^7]! v1: %res1 = v_mul_f32 %a, %b dst_sel:ubyte1
//~gfx[^7]! p_unit_test 1, %res1
val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
writeout(1, bld.pseudo(ins, bld.def(v1), val, Operand(1u), Operand(8u)));
writeout(1, bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(8u)));
//~gfx[^7]! v1: %res2 = v_mul_f32 %a, %b dst_sel:ubyte2
//~gfx[^7]! p_unit_test 2, %res2
val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
writeout(2, bld.pseudo(ins, bld.def(v1), val, Operand(2u), Operand(8u)));
writeout(2, bld.pseudo(ins, bld.def(v1), val, Operand::c32(2u), Operand::c32(8u)));
//~gfx[^7]! v1: %res3 = v_mul_f32 %a, %b dst_sel:ubyte3
//~gfx[^7]! p_unit_test 3, %res3
val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
writeout(3, bld.pseudo(ins, bld.def(v1), val, Operand(3u), Operand(8u)));
writeout(3, bld.pseudo(ins, bld.def(v1), val, Operand::c32(3u), Operand::c32(8u)));
//~gfx[^7]! v1: %res4 = v_mul_f32 %a, %b dst_sel:uword0
//~gfx[^7]! p_unit_test 4, %res4
val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
writeout(4, bld.pseudo(ins, bld.def(v1), val, Operand(0u), Operand(16u)));
writeout(4, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(16u)));
//~gfx[^7]! v1: %res5 = v_mul_f32 %a, %b dst_sel:uword1
//~gfx[^7]! p_unit_test 5, %res5
val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
writeout(5, bld.pseudo(ins, bld.def(v1), val, Operand(1u), Operand(16u)));
writeout(5, bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u)));
//~gfx[^7]! v1: %res6 = v_mul_f32 %a, %b dst_sel:ubyte0
//~gfx[^7]! p_unit_test 6, %res6
val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
writeout(6, bld.pseudo(ext, bld.def(v1), val, Operand(0u), Operand(8u), Operand(0u)));
writeout(
6, bld.pseudo(ext, bld.def(v1), val, Operand::zero(), Operand::c32(8u), Operand::zero()));
//~gfx[^7]! v1: %res7 = v_mul_f32 %a, %b dst_sel:uword0
//~gfx[^7]! p_unit_test 7, %res7
val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
writeout(7, bld.pseudo(ext, bld.def(v1), val, Operand(0u), Operand(16u), Operand(0u)));
writeout(
7, bld.pseudo(ext, bld.def(v1), val, Operand::zero(), Operand::c32(16u), Operand::zero()));
//~gfx[^7]! v1: %tmp8 = v_mul_f32 %a, %b
//~gfx[^7]! v1: %res8 = p_extract %tmp8, 2, 8, 0
//~gfx[^7]! p_unit_test 8, %res8
val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
writeout(8, bld.pseudo(ext, bld.def(v1), val, Operand(2u), Operand(8u), Operand(0u)));
writeout(
8, bld.pseudo(ext, bld.def(v1), val, Operand::c32(2u), Operand::c32(8u), Operand::zero()));
//~gfx[^7]! v1: %tmp9 = v_mul_f32 %a, %b
//~gfx[^7]! v1: %res9 = p_extract %tmp9, 0, 8, 1
//~gfx[^7]! p_unit_test 9, %res9
val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
writeout(9, bld.pseudo(ext, bld.def(v1), val, Operand(0u), Operand(8u), Operand(1u)));
writeout(
9, bld.pseudo(ext, bld.def(v1), val, Operand::zero(), Operand::c32(8u), Operand::c32(1u)));
//>> p_unit_test 63
writeout(63);
@ -466,26 +497,26 @@ BEGIN_TEST(optimize.sdwa.insert)
//! v1: %res10 = v_mul_f32 %a, %b
//! p_unit_test 10, %res10
val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
bld.pseudo(ins, bld.def(v1), val, Operand(1u), Operand(16u));
bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u));
writeout(10, val);
//! v1: %res11 = v_sub_i16 %a, %b
//! p_unit_test 11, %res11
val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]);
writeout(11, bld.pseudo(ins, bld.def(v1), val, Operand(0u), Operand(16u)));
writeout(11, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(16u)));
//~gfx[78]! v1: %tmp12 = v_sub_i16 %a, %b
//~gfx[78]! v1: %res12 = p_insert %tmp11, 1, 16
//~gfx(9|10)! v1: %res12 = v_sub_i16 %a, %b opsel_hi
//! p_unit_test 12, %res12
val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]);
writeout(12, bld.pseudo(ins, bld.def(v1), val, Operand(1u), Operand(16u)));
writeout(12, bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u)));
//! v1: %tmp13 = v_sub_i16 %a, %b
//! v1: %res13 = p_insert %tmp13, 0, 8
//! p_unit_test 13, %res13
val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]);
writeout(13, bld.pseudo(ins, bld.def(v1), val, Operand(0u), Operand(8u)));
writeout(13, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u)));
finish_opt_test();
}
@ -504,29 +535,31 @@ BEGIN_TEST(optimize.sdwa.insert_modifiers)
//~gfx9! v1: %res0 = v_rcp_f32 %a *2 dst_sel:ubyte0
//! p_unit_test 0, %res0
Temp val = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), inputs[0]);
val = bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), val, Operand(0x40000000u));
writeout(0, bld.pseudo(ins, bld.def(v1), val, Operand(0u), Operand(8u)));
val = bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), val, Operand::c32(0x40000000u));
writeout(0, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u)));
//! v1: %res1 = v_rcp_f32 %a clamp dst_sel:ubyte0
//! p_unit_test 1, %res1
val = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), inputs[0]);
val = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), val, Operand(0u), Operand(0x3f800000u));
writeout(1, bld.pseudo(ins, bld.def(v1), val, Operand(0u), Operand(8u)));
val = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), val, Operand::zero(),
Operand::c32(0x3f800000u));
writeout(1, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u)));
//! v1: %tmp2 = v_rcp_f32 %a dst_sel:ubyte0
//! v1: %res2 = v_mul_f32 %tmp2, 2.0
//! p_unit_test 2, %res2
val = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), inputs[0]);
val = bld.pseudo(ins, bld.def(v1), val, Operand(0u), Operand(8u));
val = bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), val, Operand(0x40000000u));
val = bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u));
val = bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), val, Operand::c32(0x40000000u));
writeout(2, val);
//! v1: %tmp3 = v_rcp_f32 %a dst_sel:ubyte0
//! v1: %res3 = v_med3_f32 %tmp3, 0, 1.0
//! p_unit_test 3, %res3
val = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), inputs[0]);
val = bld.pseudo(ins, bld.def(v1), val, Operand(0u), Operand(8u));
val = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), val, Operand(0u), Operand(0x3f800000u));
val = bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u));
val = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), val, Operand::zero(),
Operand::c32(0x3f800000u));
writeout(3, val);
//~gfx8! v1: %tmp4 = v_rcp_f32 %a *2 clamp
@ -534,9 +567,10 @@ BEGIN_TEST(optimize.sdwa.insert_modifiers)
//~gfx9! v1: %res4 = v_rcp_f32 %a *2 clamp dst_sel:ubyte0
//! p_unit_test 4, %res4
val = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), inputs[0]);
val = bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), val, Operand(0x40000000u));
val = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), val, Operand(0u), Operand(0x3f800000u));
writeout(4, bld.pseudo(ins, bld.def(v1), val, Operand(0u), Operand(8u)));
val = bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), val, Operand::c32(0x40000000u));
val = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), val, Operand::zero(),
Operand::c32(0x3f800000u));
writeout(4, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u)));
finish_opt_test();
}

View file

@ -51,7 +51,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
bld.pseudo(aco_opcode::p_unit_test, Operand(0u));
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v2b), Definition(v1_lo, v2b),
Operand(v1_lo, v2b), Operand(v0_lo, v2b));
@ -60,7 +60,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
bld.pseudo(aco_opcode::p_unit_test, Operand(1u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
bld.pseudo(aco_opcode::p_create_vector,
Definition(v0_lo, v1),
Operand(v1_lo, v2b), Operand(v0_lo, v2b));
@ -70,7 +70,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[2][0:16]
bld.pseudo(aco_opcode::p_unit_test, Operand(2u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
bld.pseudo(aco_opcode::p_create_vector,
Definition(v0_lo, v6b), Operand(v1_lo, v2b),
Operand(v0_lo, v2b), Operand(v2_lo, v2b));
@ -81,7 +81,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
//~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[2][0:16]
//~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[3][0:16], %0:v[1][16:32], 2
bld.pseudo(aco_opcode::p_unit_test, Operand(3u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
bld.pseudo(aco_opcode::p_create_vector,
Definition(v0_lo, v2),
Operand(v1_lo, v2b), Operand(v0_lo, v2b),
@ -95,7 +95,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
bld.pseudo(aco_opcode::p_unit_test, Operand(4u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
bld.pseudo(aco_opcode::p_create_vector,
Definition(v0_lo, v2),
Operand(v1_lo, v2b), Operand(v2_lo, v2b),
@ -104,7 +104,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! p_unit_test 5
//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
//~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
bld.pseudo(aco_opcode::p_unit_test, Operand(5u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
bld.pseudo(aco_opcode::p_split_vector,
Definition(v1_lo, v2b), Definition(v0_lo, v2b),
Operand(v0_lo, v1));
@ -113,7 +113,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16]
//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
//~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
bld.pseudo(aco_opcode::p_unit_test, Operand(6u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
bld.pseudo(aco_opcode::p_split_vector,
Definition(v1_lo, v2b), Definition(v0_lo, v2b),
Definition(v2_lo, v2b), Operand(v0_lo, v6b));
@ -123,7 +123,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
//~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
//~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[2][16:32]
bld.pseudo(aco_opcode::p_unit_test, Operand(7u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
bld.pseudo(aco_opcode::p_split_vector,
Definition(v1_lo, v2b), Definition(v0_lo, v2b),
Definition(v2_lo, v2b), Definition(v3_lo, v2b),
@ -135,7 +135,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
bld.pseudo(aco_opcode::p_unit_test, Operand(8u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
bld.pseudo(aco_opcode::p_split_vector,
Definition(v1_lo, v2b), Definition(v2_lo, v2b),
Definition(v0_lo, v2b), Definition(v3_lo, v2b),
@ -145,7 +145,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
bld.pseudo(aco_opcode::p_unit_test, Operand(9u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v1b), Definition(v1_lo, v1b),
Operand(v1_lo, v1b), Operand(v0_lo, v1b));
@ -154,7 +154,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8]
//~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3
//~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16]
bld.pseudo(aco_opcode::p_unit_test, Operand(10u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
bld.pseudo(aco_opcode::p_create_vector,
Definition(v0_lo, v2b),
Operand(v1_lo, v1b), Operand(v0_lo, v1b));
@ -165,7 +165,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16]
//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
//~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2
bld.pseudo(aco_opcode::p_unit_test, Operand(11u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));
bld.pseudo(aco_opcode::p_create_vector,
Definition(v0_lo, v3b), Operand(v1_lo, v1b),
Operand(v0_lo, v1b), Operand(v2_lo, v1b));
@ -178,7 +178,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2
//~gfx[67]! v3b: %0:v[0][8:32] = v_lshlrev_b32 8, %0:v[0][0:24]
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:8], %0:v[0][8:32], 1
bld.pseudo(aco_opcode::p_unit_test, Operand(12u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u));
bld.pseudo(aco_opcode::p_create_vector,
Definition(v0_lo, v1),
Operand(v1_lo, v1b), Operand(v0_lo, v1b),
@ -192,7 +192,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24]
//~gfx[67]! s1: %0:m0 = s_mov_b32 0x1000001
//~gfx[67]! v1: %0:v[0] = v_mul_lo_u32 %0:m0, %0:v[0][0:8]
bld.pseudo(aco_opcode::p_unit_test, Operand(13u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u));
Instruction* pseudo = bld.pseudo(aco_opcode::p_create_vector,
Definition(v0_lo, v1),
Operand(v0_lo, v1b), Operand(v0_lo, v1b),
@ -202,7 +202,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! p_unit_test 14
//~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8]
//~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16]
bld.pseudo(aco_opcode::p_unit_test, Operand(14u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u));
bld.pseudo(aco_opcode::p_split_vector,
Definition(v1_lo, v1b), Definition(v0_lo, v1b),
Operand(v0_lo, v2b));
@ -212,7 +212,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16]
//~gfx[67]! v1b: %0:v[2][0:8] = v_lshrrev_b32 16, %0:v[1][16:24]
//~gfx[67]! v1b: %0:v[3][0:8] = v_lshrrev_b32 24, %0:v[1][24:32]
bld.pseudo(aco_opcode::p_unit_test, Operand(15u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u));
bld.pseudo(aco_opcode::p_split_vector,
Definition(v1_lo, v1b), Definition(v0_lo, v1b),
Definition(v2_lo, v1b), Definition(v3_lo, v1b),
@ -230,7 +230,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[89]>> p_unit_test 0
//~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
//~gfx9! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16]
bld.pseudo(aco_opcode::p_unit_test, Operand(0u));
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Operand(v0_hi, v2b), Operand(v0_lo, v2b));
@ -241,7 +241,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
//~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_preserve
bld.pseudo(aco_opcode::p_unit_test, Operand(1u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v1), Definition(v1_lo, v2b),
Operand(v1_lo, v1), Operand(v0_lo, v2b));
@ -252,7 +252,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_preserve
//~gfx[89]! v2b: %0:v[0][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_preserve
//~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_preserve
bld.pseudo(aco_opcode::p_unit_test, Operand(2u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v1), Definition(v1_lo, v2b), Definition(v1_hi, v2b),
Operand(v1_lo, v1), Operand(v0_lo, v2b), Operand(v0_lo, v2b));
@ -264,7 +264,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
//~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] dst_preserve
//~gfx[89]! v1b: %0:v[1][16:24] = v_mov_b32 %0:v[0][16:24] dst_preserve
bld.pseudo(aco_opcode::p_unit_test, Operand(3u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v1), Definition(v1_b3, v1b),
Operand(v1_lo, v1), Operand(v0_b3, v1b));
@ -276,7 +276,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
//~gfx[89]! v1b: %0:v[1][8:16] = v_mov_b32 %0:v[0][8:16] dst_preserve
//~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_preserve
bld.pseudo(aco_opcode::p_unit_test, Operand(4u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v1), Definition(v1_lo, v1b),
Operand(v1_lo, v1), Operand(v0_lo, v1b));
@ -288,7 +288,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx9! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1]
//~gfx[89]! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][8:16] dst_preserve
//~gfx[89]! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][24:32] dst_preserve
bld.pseudo(aco_opcode::p_unit_test, Operand(5u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v1b), Definition(v0_hi, v1b), Definition(v1_lo, v1),
Operand(v1_lo, v1b), Operand(v1_hi, v1b), Operand(v0_lo, v1));
@ -298,7 +298,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
bld.pseudo(aco_opcode::p_unit_test, Operand(6u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1),
Operand(v1_lo, v2b), Operand(v1_hi, v2b), Operand(v0_lo, v1));
@ -309,7 +309,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
//~gfx9! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1]
//~gfx[89]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
bld.pseudo(aco_opcode::p_unit_test, Operand(7u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1),
Operand(v1_hi, v2b), Operand(v1_lo, v2b), Operand(v0_lo, v1));
@ -322,7 +322,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_preserve
//~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_preserve
//~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_preserve
bld.pseudo(aco_opcode::p_unit_test, Operand(8u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v3b), Definition(v1_lo, v3b),
Operand(v1_lo, v3b), Operand(v0_lo, v3b));
@ -333,7 +333,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
//~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_preserve
bld.pseudo(aco_opcode::p_unit_test, Operand(9u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v3b), Definition(v1_lo, v3b), Definition(v0_b3, v1b),
Operand(v1_lo, v3b), Operand(v0_lo, v3b), Operand(v1_b3, v1b));
@ -345,7 +345,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_preserve
//~gfx[89]! v1b: %0:v[0][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_preserve
//~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_preserve
bld.pseudo(aco_opcode::p_unit_test, Operand(10u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_b1, v2b), Definition(v1_b1, v2b),
Operand(v1_b1, v2b), Operand(v0_b1, v2b));
@ -353,10 +353,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[89]! p_unit_test 11
//~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][16:32] dst_preserve
//~gfx[89]! v1: %0:v[0] = v_mov_b32 42
bld.pseudo(aco_opcode::p_unit_test, Operand(11u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v1), Definition(v1_lo, v2b),
Operand(42u), Operand(v0_hi, v2b));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b),
Operand::c32(42u), Operand(v0_hi, v2b));
//~gfx[89]! s_endpgm
@ -380,93 +379,81 @@ BEGIN_TEST(to_hw_instr.subdword_constant)
/* 16-bit pack */
//>> p_unit_test 0
//! v1: %_:v[0] = v_pack_b32_f16 0.5, hi(%_:v[1][16:32])
bld.pseudo(aco_opcode::p_unit_test, Operand(0u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Operand((uint16_t)0x3800), Operand(v1_hi, v2b));
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Operand::c16(0x3800), Operand(v1_hi, v2b));
//! p_unit_test 1
//~gfx9! v2b: %0:v[0][16:32] = v_and_b32 0xffff0000, %0:v[1][16:32]
//~gfx9! v1: %0:v[0] = v_or_b32 0x4205, %0:v[0]
//~gfx10! v1: %_:v[0] = v_pack_b32_f16 0x4205, hi(%_:v[1][16:32])
bld.pseudo(aco_opcode::p_unit_test, Operand(1u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Operand((uint16_t)0x4205), Operand(v1_hi, v2b));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Operand::c16(0x4205), Operand(v1_hi, v2b));
//! p_unit_test 2
//~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
//~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0]
//~gfx10! v1: %0:v[0] = v_pack_b32_f16 0x4205, %0:v[0][0:16]
bld.pseudo(aco_opcode::p_unit_test, Operand(2u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Operand((uint16_t)0x4205), Operand(v0_lo, v2b));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Operand::c16(0x4205), Operand(v0_lo, v2b));
//! p_unit_test 3
//! v1: %_:v[0] = v_mov_b32 0x3c003800
bld.pseudo(aco_opcode::p_unit_test, Operand(3u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Operand((uint16_t)0x3800), Operand((uint16_t)0x3c00));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Operand::c16(0x3800), Operand::c16(0x3c00));
//! p_unit_test 4
//! v1: %_:v[0] = v_mov_b32 0x43064205
bld.pseudo(aco_opcode::p_unit_test, Operand(4u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Operand((uint16_t)0x4205), Operand((uint16_t)0x4306));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Operand::c16(0x4205), Operand::c16(0x4306));
//! p_unit_test 5
//! v1: %_:v[0] = v_mov_b32 0x38004205
bld.pseudo(aco_opcode::p_unit_test, Operand(5u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Operand((uint16_t)0x4205), Operand((uint16_t)0x3800));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Operand::c16(0x4205), Operand::c16(0x3800));
/* 16-bit copy */
//! p_unit_test 6
//! v2b: %_:v[0][0:16] = v_add_f16 0.5, 0 dst_preserve
bld.pseudo(aco_opcode::p_unit_test, Operand(6u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v2b), Operand((uint16_t)0x3800));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x3800));
//! p_unit_test 7
//~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0]
//~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0]
//~gfx10! v2b: %_:v[0][0:16] = v_pack_b32_f16 0x4205, hi(%_:v[0][16:32])
bld.pseudo(aco_opcode::p_unit_test, Operand(7u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v2b), Operand((uint16_t)0x4205));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x4205));
//! p_unit_test 8
//~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0]
//~gfx9! v1: %_:v[0] = v_or_b32 0x42050000, %_:v[0]
//~gfx10! v2b: %_:v[0][16:32] = v_pack_b32_f16 %_:v[0][0:16], 0x4205
bld.pseudo(aco_opcode::p_unit_test, Operand(8u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_hi, v2b), Operand((uint16_t)0x4205));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0x4205));
//! p_unit_test 9
//! v1b: %_:v[0][8:16] = v_mov_b32 0 dst_preserve
//! v1b: %_:v[0][16:24] = v_mov_b32 56 dst_preserve
bld.pseudo(aco_opcode::p_unit_test, Operand(9u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_b1, v2b), Operand((uint16_t)0x3800));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x3800));
//! p_unit_test 10
//! v1b: %_:v[0][8:16] = v_mov_b32 5 dst_preserve
//! v1b: %_:v[0][16:24] = v_mul_u32_u24 2, 33 dst_preserve
bld.pseudo(aco_opcode::p_unit_test, Operand(10u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_b1, v2b), Operand((uint16_t)0x4205));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x4205));
/* 8-bit copy */
//! p_unit_test 11
//! v1b: %_:v[0][0:8] = v_mul_u32_u24 2, 33 dst_preserve
bld.pseudo(aco_opcode::p_unit_test, Operand(11u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v1b), Operand((uint8_t)0x42));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0x42));
//! s_endpgm
@ -488,7 +475,7 @@ BEGIN_TEST(to_hw_instr.self_intersecting_swap)
//! v1: %0:v[2], v1: %0:v[3] = v_swap_b32 %0:v[3], %0:v[2]
//! v1: %0:v[3], v1: %0:v[7] = v_swap_b32 %0:v[7], %0:v[3]
//! s_endpgm
bld.pseudo(aco_opcode::p_unit_test, Operand(0u));
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
//v[1:2] = v[2:3]
//v3 = v7
//v7 = v1
@ -521,7 +508,7 @@ BEGIN_TEST(to_hw_instr.extract)
//; funcs['sel'] = lambda bits: ('sext(%%_:v[1])[%s]' if variant.endswith('_signed') else '%%_:v[1][%s]') % bits
//>> p_unit_test 0
bld.pseudo(aco_opcode::p_unit_test, Operand(0u));
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
//! v1: %_:v[0] = @v_bfe %_:v[1], 0, 8
EXT(0, 8)
//! v1: %_:v[0] = @v_bfe %_:v[1], 8, 8
@ -542,7 +529,7 @@ BEGIN_TEST(to_hw_instr.extract)
Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size), Operand::c32(is_signed));
//>> p_unit_test 2
bld.pseudo(aco_opcode::p_unit_test, Operand(2u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
//~gfx._unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80000
//~gfx._signed! s1: %_:s[0] = s_sext_i32_i8 %_:s[1]
EXT(0, 8)
@ -565,7 +552,7 @@ BEGIN_TEST(to_hw_instr.extract)
Operand::c32(idx), Operand::c32(8u), Operand::c32(is_signed));
//>> p_unit_test 4
bld.pseudo(aco_opcode::p_unit_test, Operand(4u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(0:7)
EXT(0, 0)
@ -603,7 +590,7 @@ BEGIN_TEST(to_hw_instr.insert)
Operand::c32(size));
//>> p_unit_test 0
bld.pseudo(aco_opcode::p_unit_test, Operand(0u));
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
//! v1: %_:v[0] = v_bfe_u32 %_:v[1], 0, 8
INS(0, 8)
//~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8
@ -628,7 +615,7 @@ BEGIN_TEST(to_hw_instr.insert)
Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size));
//>> p_unit_test 1
bld.pseudo(aco_opcode::p_unit_test, Operand(1u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
//! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000
INS(0, 8)
//! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000
@ -651,7 +638,7 @@ BEGIN_TEST(to_hw_instr.insert)
Operand::c32(idx), Operand::c32(8u));
//>> p_unit_test 2
bld.pseudo(aco_opcode::p_unit_test, Operand(2u));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
//~gfx7! v2b: %_:v[0][0:16] = v_bfe_u32 %_:v[1][0:16], 0, 8
//~gfx[^7]! v1: %_:v[0] = v_mov_b32 %_:v[1][0:16] dst_sel:ubyte0 dst_preserve
INS(0, 0)