r600/sfn: remove early emmission of ALU last op

The scheduler sets the flag when scheduling the ALU
instructions into ALU groups, so there is no need to
set these flags early and it was already done inconsistently
anyway. The only expection is the ALU predicate instructions,
because it is not yet handled direcly by the scheduler.

Clanup the use of alu_write too.

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36790>
This commit is contained in:
Gert Wollny 2025-08-08 14:56:30 +02:00 committed by Marge Bot
parent 091e2f5ec5
commit c221956b68
19 changed files with 387 additions and 391 deletions

View file

@ -619,8 +619,6 @@ AluInstr::replace_dest(PRegister new_dest, AluInstr *move_instr)
}
m_dest = new_dest;
if (!move_instr->has_alu_flag(alu_last_instr))
reset_alu_flag(alu_last_instr);
if (has_alu_flag(alu_is_cayman_trans)) {
/* Copy propagation puts an instruction into the w channel, but we
@ -1842,17 +1840,15 @@ emit_alu_op1_64bit(const nir_alu_instr& alu,
ir = new AluInstr(opcode,
value_factory.dest(alu.def, 2 * i, pin_chan),
value_factory.src64(alu.src[0], i, swz[0]),
{alu_write});
AluInstr::write);
group->add_instruction(ir);
ir = new AluInstr(opcode,
value_factory.dest(alu.def, 2 * i + 1, pin_chan),
value_factory.src64(alu.src[0], i, swz[1]),
{alu_write});
AluInstr::write);
group->add_instruction(ir);
}
if (ir)
ir->set_alu_flag(alu_last_instr);
shader.emit_instruction(group);
return true;
}
@ -1869,12 +1865,10 @@ emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader)
ir = new AluInstr(op1_mov,
value_factory.dest(alu.def, 2 * i + c, pin_free),
value_factory.src64(alu.src[0], i, c),
{alu_write});
AluInstr::write);
shader.emit_instruction(ir);
}
}
if (ir)
ir->set_alu_flag(alu_last_instr);
return true;
}
@ -1890,13 +1884,11 @@ emit_alu_neg(const nir_alu_instr& alu, Shader& shader)
ir = new AluInstr(op1_mov,
value_factory.dest(alu.def, 2 * i + c, pin_chan),
value_factory.src64(alu.src[0], i, c),
{alu_write});
AluInstr::write);
shader.emit_instruction(ir);
}
ir->set_source_mod(0, AluInstr::mod_neg);
}
if (ir)
ir->set_alu_flag(alu_last_instr);
return true;
}
@ -1916,7 +1908,7 @@ emit_alu_abs64(const nir_alu_instr& alu, Shader& shader)
auto ir = new AluInstr(op1_mov,
value_factory.dest(alu.def, 1, pin_chan),
value_factory.src64(alu.src[0], 0, 1),
AluInstr::last_write);
AluInstr::write);
ir->set_source_mod(0, AluInstr::mod_abs);
shader.emit_instruction(ir);
return true;
@ -1967,7 +1959,7 @@ emit_alu_fsat64(const nir_alu_instr& alu, Shader& shader)
shader.emit_instruction(new AluInstr(op1_mov,
value_factory.dest(alu.def, 1, pin_chan),
value_factory.src64(alu.src[0], 0, 1),
AluInstr::last_write));
AluInstr::write));
} else {
/* dest clamp doesn't work on plain 64 bit move, so add a zero
@ -1986,7 +1978,7 @@ emit_alu_fsat64(const nir_alu_instr& alu, Shader& shader)
value_factory.dest(alu.def, 1, pin_chan),
value_factory.src64(alu.src[0], 0, 0),
value_factory.literal(0),
AluInstr::last_write));
AluInstr::write));
shader.emit_instruction(group);
}
@ -2045,8 +2037,6 @@ emit_alu_op2_64bit(const nir_alu_instr& alu,
i == 1 ? AluInstr::write : AluInstr::empty);
group->add_instruction(ir);
}
if (ir)
ir->set_alu_flag(alu_last_instr);
shader.emit_instruction(group);
return true;
@ -2080,8 +2070,6 @@ emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu,
shader.emit_instruction(ir);
}
if (ir)
ir->set_alu_flag(alu_last_instr);
return true;
}
@ -2104,8 +2092,6 @@ emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader
ir->set_source_mod(0, AluInstr::mod_abs);
group->add_instruction(ir);
}
if (ir)
ir->set_alu_flag(alu_last_instr);
shader.emit_instruction(group);
return true;
}
@ -2130,8 +2116,6 @@ emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
i < 2 ? AluInstr::write : AluInstr::empty);
group->add_instruction(ir);
}
if (ir)
ir->set_alu_flag(alu_last_instr);
shader.emit_instruction(group);
return true;
}
@ -2157,7 +2141,6 @@ emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader)
AluInstr::write);
group->add_instruction(ir);
}
ir->set_alu_flag(alu_last_instr);
shader.emit_instruction(group);
return true;
}
@ -2172,12 +2155,12 @@ emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader)
ir = new AluInstr(op1v_flt64_to_flt32,
value_factory.dest(alu.def, 0, pin_chan),
value_factory.src64(alu.src[0], 0, 1),
{alu_write});
AluInstr::write);
group->add_instruction(ir);
ir = new AluInstr(op1v_flt64_to_flt32,
value_factory.dummy_dest(1),
value_factory.src64(alu.src[0], 0, 0),
AluInstr::last);
AluInstr::empty);
group->add_instruction(ir);
shader.emit_instruction(group);
return true;
@ -2196,7 +2179,7 @@ emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader)
value_factory.dest(alu.def, 0, pin_free),
src,
value_factory.inline_const(mask, 0),
{alu_write});
AluInstr::write);
shader.emit_instruction(ir);
return true;
}
@ -2215,7 +2198,7 @@ emit_alu_op1(const nir_alu_instr& alu,
ir = new AluInstr(opcode,
value_factory.dest(alu.def, 0, pin_free),
value_factory.src(alu.src[0], 0),
{alu_write});
AluInstr::write);
switch (mod) {
case mod_src0_abs:
ir->set_source_mod(0, AluInstr::mod_abs);
@ -2252,7 +2235,7 @@ emit_alu_op2(const nir_alu_instr& alu,
value_factory.dest(alu.def, 0, pin_free),
value_factory.src(*src0, 0),
value_factory.src(*src1, 0),
{alu_write});
AluInstr::write);
if (src1_negate)
ir->set_source_mod(1, AluInstr::mod_neg);
shader.emit_instruction(ir);
@ -2287,7 +2270,7 @@ emit_alu_op3(const nir_alu_instr& alu,
value_factory.src(*src[0], 0),
value_factory.src(*src[1], 0),
value_factory.src(*src[2], 0),
{alu_write});
AluInstr::write);
ir->set_alu_flag(alu_write);
shader.emit_instruction(ir);
return true;
@ -2309,7 +2292,7 @@ emit_dot(const nir_alu_instr& alu, int n, Shader& shader)
srcs[2 * i + 1] = value_factory.src(src1, i);
}
AluInstr *ir = new AluInstr(op2_dot_ieee, dest, srcs, AluInstr::last_write, n);
AluInstr *ir = new AluInstr(op2_dot_ieee, dest, srcs, AluInstr::write, n);
shader.emit_instruction(ir);
shader.set_flag(Shader::sh_disble_sb);
@ -2338,7 +2321,7 @@ emit_dot4(const nir_alu_instr& alu, int nelm, Shader& shader)
srcs[2 * i + 1] = value_factory.zero();
}
AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4);
AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::write, 4);
shader.emit_instruction(ir);
return true;
@ -2363,7 +2346,7 @@ emit_fdph(const nir_alu_instr& alu, Shader& shader)
srcs[6] = value_factory.one();
srcs[7] = value_factory.src(src1, 3);
AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4);
AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::write, 4);
shader.emit_instruction(ir);
return true;
}
@ -2372,16 +2355,13 @@ static bool
emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader)
{
auto& value_factory = shader.value_factory();
AluInstr *ir = nullptr;
for (unsigned i = 0; i < nc; ++i) {
auto src = value_factory.src(instr.src[i].src, instr.src[i].swizzle[0]);
auto dst = value_factory.dest(instr.def, i, pin_none);
shader.emit_instruction(new AluInstr(op1_mov, dst, src, {alu_write}));
shader.emit_instruction(new AluInstr(op1_mov, dst, src, AluInstr::write));
}
if (ir)
ir->set_alu_flag(alu_last_instr);
return true;
}
@ -2397,7 +2377,6 @@ emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader)
AluInstr::write);
shader.emit_instruction(ir);
}
ir->set_alu_flag(alu_last_instr);
return true;
}
@ -2413,7 +2392,6 @@ emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader)
AluInstr::write);
shader.emit_instruction(ir);
}
ir->set_alu_flag(alu_last_instr);
return true;
}
@ -2429,7 +2407,6 @@ emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader)
AluInstr::write);
shader.emit_instruction(ir);
}
ir->set_alu_flag(alu_last_instr);
return true;
}
@ -2452,7 +2429,6 @@ emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader)
AluInstr::write);
shader.emit_instruction(ir);
}
ir->set_alu_flag(alu_last_instr);
return true;
}
@ -2465,20 +2441,24 @@ emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader)
auto y = value_factory.temp_register();
auto yy = value_factory.temp_register();
shader.emit_instruction(new AluInstr(
op1_flt32_to_flt16, x, value_factory.src(alu.src[0], 0), AluInstr::last_write));
shader.emit_instruction(new AluInstr(op1_flt32_to_flt16,
x,
value_factory.src(alu.src[0], 0),
AluInstr::write));
shader.emit_instruction(new AluInstr(
op1_flt32_to_flt16, y, value_factory.src(alu.src[1], 0), AluInstr::last_write));
shader.emit_instruction(new AluInstr(op1_flt32_to_flt16,
y,
value_factory.src(alu.src[1], 0),
AluInstr::write));
shader.emit_instruction(
new AluInstr(op2_lshl_int, yy, y, value_factory.literal(16), AluInstr::last_write));
new AluInstr(op2_lshl_int, yy, y, value_factory.literal(16), AluInstr::write));
shader.emit_instruction(new AluInstr(op2_or_int,
value_factory.dest(alu.def, 0, pin_free),
x,
yy,
AluInstr::last_write));
AluInstr::write));
return true;
}
@ -2489,7 +2469,7 @@ emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader)
shader.emit_instruction(new AluInstr(op1_mov,
value_factory.dest(alu.def, 0, pin_free),
value_factory.src64(alu.src[0], 0, comp),
AluInstr::last_write));
AluInstr::write));
return true;
}
@ -2500,7 +2480,7 @@ emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader)
shader.emit_instruction(new AluInstr(op1_flt16_to_flt32,
value_factory.dest(alu.def, 0, pin_free),
value_factory.src(alu.src[0], 0),
AluInstr::last_write));
AluInstr::write));
return true;
}
static bool
@ -2512,12 +2492,12 @@ emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader)
tmp,
value_factory.src(alu.src[0], 0),
value_factory.literal(16),
AluInstr::last_write));
AluInstr::write));
shader.emit_instruction(new AluInstr(op1_flt16_to_flt32,
value_factory.dest(alu.def, 0, pin_free),
tmp,
AluInstr::last_write));
AluInstr::write));
return true;
}
@ -2533,7 +2513,7 @@ emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
ir = new AluInstr(opcode,
value_factory.dest(alu.def, 0, pin_free),
value_factory.src(src0, 0),
AluInstr::last_write);
AluInstr::write);
ir->set_alu_flag(alu_is_trans);
shader.emit_instruction(ir);
return true;
@ -2548,10 +2528,7 @@ emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader
assert(alu.def.num_components == 1);
auto temp = value_factory.temp_register();
ir = new AluInstr(op1_trunc,
temp,
value_factory.src(alu.src[0], 0),
AluInstr::last_write);
ir = new AluInstr(op1_trunc, temp, value_factory.src(alu.src[0], 0), AluInstr::write);
shader.emit_instruction(ir);
ir = new AluInstr(opcode,
@ -2560,7 +2537,6 @@ emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader
AluInstr::write);
if (opcode == op1_flt_to_uint) {
ir->set_alu_flag(alu_is_trans);
ir->set_alu_flag(alu_last_instr);
}
shader.emit_instruction(ir);
return true;
@ -2574,7 +2550,7 @@ emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shade
assert(alu.def.num_components == 1);
const std::set<AluModifiers> flags({alu_write, alu_last_instr, alu_is_cayman_trans});
const std::set<AluModifiers> flags({alu_write, alu_is_cayman_trans});
unsigned ncomp = 3;
@ -2602,7 +2578,7 @@ emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
value_factory.dest(alu.def, 0, pin_free),
value_factory.src(src0, 0),
value_factory.src(src1, 0),
AluInstr::last_write);
AluInstr::write);
ir->set_alu_flag(alu_is_trans);
shader.emit_instruction(ir);
@ -2619,7 +2595,7 @@ emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shade
unsigned last_slot = 4;
const std::set<AluModifiers> flags({alu_write, alu_last_instr, alu_is_cayman_trans});
const std::set<AluModifiers> flags({alu_write, alu_is_cayman_trans});
for (unsigned k = 0; k < alu.def.num_components; ++k) {
AluInstr::SrcValues srcs(2 * last_slot);
@ -2657,14 +2633,11 @@ emit_alu_cube(const nir_alu_instr& alu, Shader& shader)
AluInstr::write);
group->add_instruction(ir);
}
ir->set_alu_flag(alu_last_instr);
shader.emit_instruction(group);
return true;
}
const std::set<AluModifiers> AluInstr::empty;
const std::set<AluModifiers> AluInstr::write({alu_write});
const std::set<AluModifiers> AluInstr::last({alu_last_instr});
const std::set<AluModifiers> AluInstr::last_write({alu_write, alu_last_instr});
} // namespace r600

View file

@ -153,8 +153,6 @@ public:
static const std::set<AluModifiers> empty;
static const std::set<AluModifiers> write;
static const std::set<AluModifiers> last;
static const std::set<AluModifiers> last_write;
std::tuple<PRegister, bool, PRegister> indirect_addr() const;
void update_indirect_addr(PRegister old_reg, PRegister reg) override;

View file

@ -119,7 +119,7 @@ LDSReadInstr::split(std::vector<AluInstr *>& out_block, AluInstr *last_lds_instr
auto instr = new AluInstr(op1_mov,
dest,
new InlineConstant(ALU_SRC_LDS_OQ_A_POP),
AluInstr::last_write);
AluInstr::write);
instr->add_required_instr(last_lds_instr);
instr->set_blockid(block_id(), index());
instr->set_always_keep();
@ -307,7 +307,7 @@ LDSAtomicInstr::split(std::vector<AluInstr *>& out_block, AluInstr *last_lds_ins
}
}
auto op_instr = new AluInstr(m_opcode, srcs, {});
auto op_instr = new AluInstr(m_opcode, srcs, AluInstr::empty);
op_instr->set_blockid(block_id(), index());
if (last_lds_instr) {
@ -322,7 +322,7 @@ LDSAtomicInstr::split(std::vector<AluInstr *>& out_block, AluInstr *last_lds_ins
auto read_instr = new AluInstr(op1_mov,
m_dest,
new InlineConstant(ALU_SRC_LDS_OQ_A_POP),
AluInstr::last_write);
AluInstr::write);
read_instr->add_required_instr(op_instr);
read_instr->set_blockid(block_id(), index());
read_instr->set_alu_flag(alu_lds_group_end);

View file

@ -193,7 +193,7 @@ GDSInstr::emit_atomic_op2(nir_intrinsic_instr *instr, Shader& shader)
if (!src_val->as_register()) {
auto temp_src_val = vf.temp_register();
shader.emit_instruction(
new AluInstr(op1_mov, temp_src_val, src_val, AluInstr::last_write));
new AluInstr(op1_mov, temp_src_val, src_val, AluInstr::write));
src_as_register = temp_src_val;
} else
src_as_register = src_val->as_register();
@ -219,8 +219,7 @@ GDSInstr::emit_atomic_op2(nir_intrinsic_instr *instr, Shader& shader)
else
shader.emit_instruction(
new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
shader.emit_instruction(
new AluInstr(op1_mov, tmp[1], src_val, AluInstr::last_write));
shader.emit_instruction(new AluInstr(op1_mov, tmp[1], src_val, AluInstr::write));
ir = new GDSInstr(op, dest, tmp, 0, nullptr);
}
shader.emit_instruction(ir);
@ -297,7 +296,7 @@ GDSInstr::emit_atomic_inc(nir_intrinsic_instr *instr, Shader& shader)
new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
shader.emit_instruction(
new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::write));
ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest, tmp, 0, nullptr);
}
shader.emit_instruction(ir);
@ -340,7 +339,7 @@ GDSInstr::emit_atomic_pre_dec(nir_intrinsic_instr *instr, Shader& shader)
new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
shader.emit_instruction(
new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::write));
ir = new GDSInstr(opcode, tmp_dest, tmp, 0, nullptr);
}
@ -350,7 +349,7 @@ GDSInstr::emit_atomic_pre_dec(nir_intrinsic_instr *instr, Shader& shader)
vf.dest(instr->def, 0, pin_free),
tmp_dest,
vf.one_i(),
AluInstr::last_write));
AluInstr::write));
return true;
}
@ -594,8 +593,8 @@ RatInstr::emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader)
auto addr_temp = vf.temp_register();
/** Should be lowered in nir */
shader.emit_instruction(new AluInstr(
op2_lshr_int, addr_temp, addr, vf.literal(2), {alu_write, alu_last_instr}));
shader.emit_instruction(
new AluInstr(op2_lshr_int, addr_temp, addr, vf.literal(2), AluInstr::write));
const EVTXDataFormat formats[4] = {fmt_32, fmt_32_32, fmt_32_32_32, fmt_32_32_32_32};
@ -631,8 +630,7 @@ RatInstr::emit_global_store(nir_intrinsic_instr *intr, Shader& shader)
auto addr_vec = vf.temp_vec4(pin_chan, {0, 7, 7, 7});
shader.emit_instruction(
new AluInstr(op2_lshr_int, addr_vec[0], addr_orig, vf.literal(2),
AluInstr::last_write));
new AluInstr(op2_lshr_int, addr_vec[0], addr_orig, vf.literal(2), AluInstr::write));
RegisterVec4::Swizzle value_swz = {0,7,7,7};
auto mask = nir_intrinsic_write_mask(intr);
@ -651,8 +649,6 @@ RatInstr::emit_global_store(nir_intrinsic_instr *intr, Shader& shader)
shader.emit_instruction(ir);
}
}
if (ir)
ir->set_alu_flag(alu_last_instr);
auto store = new RatInstr(cf_mem_rat_cacheless,
RatInstr::STORE_RAW,
@ -688,14 +684,17 @@ RatInstr::emit_ssbo_store(nir_intrinsic_instr *instr, Shader& shader)
auto addr_vec = vf.temp_vec4(pin_group, {0, 1, 2, 7});
if (i == 0) {
shader.emit_instruction(
new AluInstr(op1_mov, addr_vec[0], addr_base, AluInstr::last_write));
new AluInstr(op1_mov, addr_vec[0], addr_base, AluInstr::write));
} else {
shader.emit_instruction(new AluInstr(
op2_add_int, addr_vec[0], addr_base, vf.literal(i), AluInstr::last_write));
shader.emit_instruction(new AluInstr(op2_add_int,
addr_vec[0],
addr_base,
vf.literal(i),
AluInstr::write));
}
auto value = vf.src(instr->src[0], i);
PRegister v = vf.temp_register(0);
shader.emit_instruction(new AluInstr(op1_mov, v, value, AluInstr::last_write));
shader.emit_instruction(new AluInstr(op1_mov, v, value, AluInstr::write));
auto value_vec = RegisterVec4(v, nullptr, nullptr, nullptr, pin_chan);
auto store = new RatInstr(cf_mem_rat,
RatInstr::STORE_TYPED,
@ -730,7 +729,7 @@ RatInstr::emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader)
auto data_vec4 = vf.temp_vec4(pin_chgr, {0, 1, 2, 3});
shader.emit_instruction(
new AluInstr(op2_lshr_int, coord, coord_orig, vf.literal(2), AluInstr::last_write));
new AluInstr(op2_lshr_int, coord, coord_orig, vf.literal(2), AluInstr::write));
shader.emit_instruction(
new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
@ -742,10 +741,10 @@ RatInstr::emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader)
new AluInstr(op1_mov,
data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
vf.src(intr->src[2], 0),
{alu_last_instr, alu_write}));
AluInstr::write));
} else {
shader.emit_instruction(new AluInstr(
op1_mov, data_vec4[0], vf.src(intr->src[2], 0), AluInstr::last_write));
shader.emit_instruction(
new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[2], 0), AluInstr::write));
}
RegisterVec4 out_vec(coord, coord, coord, coord, pin_chgr);
@ -827,12 +826,12 @@ RatInstr::emit_image_store(nir_intrinsic_instr *intrin, Shader& shader)
swizzle = {0, 2, 1, 3};
for (int i = 0; i < 4; ++i) {
auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
auto flags = i != 3 ? AluInstr::write : AluInstr::write;
shader.emit_instruction(
new AluInstr(op1_mov, coord[swizzle[i]], coord_load[i], flags));
}
for (int i = 0; i < 4; ++i) {
auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
auto flags = i != 3 ? AluInstr::write : AluInstr::write;
shader.emit_instruction(new AluInstr(op1_mov, value[i], value_load[i], flags));
}
@ -874,7 +873,7 @@ RatInstr::emit_image_load_or_atomic(nir_intrinsic_instr *intrin, Shader& shader)
swizzle = {0, 2, 1, 3};
for (int i = 0; i < 4; ++i) {
auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
auto flags = i != 3 ? AluInstr::write : AluInstr::write;
shader.emit_instruction(
new AluInstr(op1_mov, coord[swizzle[i]], coord_orig[i], flags));
}
@ -889,12 +888,12 @@ RatInstr::emit_image_load_or_atomic(nir_intrinsic_instr *intrin, Shader& shader)
new AluInstr(op1_mov,
data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
vf.src(intrin->src[3], 0),
AluInstr::last_write));
AluInstr::write));
} else {
shader.emit_instruction(
new AluInstr(op1_mov, data_vec4[0], vf.src(intrin->src[3], 0), AluInstr::write));
shader.emit_instruction(
new AluInstr(op1_mov, data_vec4[2], vf.zero(), AluInstr::last_write));
new AluInstr(op1_mov, data_vec4[2], vf.zero(), AluInstr::write));
}
auto atomic =
@ -988,7 +987,7 @@ RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader)
vf.uniform(lookup_resid / 4 + R600_SHADER_BUFFER_INFO_SEL,
lookup_resid % 4,
R600_BUFFER_INFO_CONST_BUFFER),
AluInstr::last_write));
AluInstr::write));
} else {
/* If the addressing is indirect we have to get the z-value by
* using a binary search */
@ -1014,7 +1013,7 @@ RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader)
high_bit,
vf.src(intrin->src[0], 0),
vf.literal(2),
AluInstr::last_write));
AluInstr::write));
shader.emit_instruction(new LoadFromBuffer(trgt,
{0, 1, 2, 3},
@ -1027,10 +1026,18 @@ RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader)
// this may be wrong
shader.emit_instruction(new AluInstr(
op3_cnde_int, comp1, high_bit, trgt[0], trgt[2], AluInstr::write));
shader.emit_instruction(new AluInstr(
op3_cnde_int, comp2, high_bit, trgt[1], trgt[3], AluInstr::last_write));
shader.emit_instruction(new AluInstr(
op3_cnde_int, dest[2], low_bit, comp1, comp2, AluInstr::last_write));
shader.emit_instruction(new AluInstr(op3_cnde_int,
comp2,
high_bit,
trgt[1],
trgt[3],
AluInstr::write));
shader.emit_instruction(new AluInstr(op3_cnde_int,
dest[2],
low_bit,
comp1,
comp2,
AluInstr::write));
}
} else {
auto dest = vf.dest_vec4(intrin->def, pin_group);
@ -1071,7 +1078,7 @@ RatInstr::emit_image_samples(nir_intrinsic_instr *intrin, Shader& shader)
res_id,
dyn_offset));
shader.emit_instruction(new AluInstr(op1_mov, dest, tmp[0], AluInstr::last_write));
shader.emit_instruction(new AluInstr(op1_mov, dest, tmp[0], AluInstr::write));
return true;
}

View file

@ -615,13 +615,12 @@ TexInstr::emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader)
shader.emit_instruction(ir);
}
ir->set_alu_flag(alu_last_instr);
shader.emit_instruction(
new AluInstr(op2_or_int,
dst[3],
tmp_w,
vf.uniform(buf_sel + 1, 0, R600_BUFFER_INFO_CONST_BUFFER),
AluInstr::last_write));
AluInstr::write));
}
return true;
@ -661,15 +660,13 @@ TexInstr::emit_tex_txs(nir_tex_instr *tex,
} else {
int id = 2 * tex->texture_index + (512 + R600_BUFFER_INFO_OFFSET / 16) + 1;
auto src = vf.uniform(id, 1, R600_BUFFER_INFO_CONST_BUFFER);
shader.emit_instruction(
new AluInstr(op1_mov, dest[0], src, AluInstr::last_write));
shader.emit_instruction(new AluInstr(op1_mov, dest[0], src, AluInstr::write));
shader.set_flag(Shader::sh_uses_tex_buffer);
}
} else {
auto src_lod = vf.temp_register();
shader.emit_instruction(
new AluInstr(op1_mov, src_lod, src.lod, AluInstr::last_write));
shader.emit_instruction(new AluInstr(op1_mov, src_lod, src.lod, AluInstr::write));
RegisterVec4 src_coord(src_lod, src_lod, src_lod, src_lod, pin_free);
@ -691,7 +688,7 @@ TexInstr::emit_tex_txs(nir_tex_instr *tex,
tex->texture_index & 3,
R600_BUFFER_INFO_CONST_BUFFER);
auto alu = new AluInstr(op1_mov, dest[2], src_loc, AluInstr::last_write);
auto alu = new AluInstr(op1_mov, dest[2], src_loc, AluInstr::write);
shader.emit_instruction(alu);
shader.set_flag(Shader::sh_txs_cube_array_comp);
}
@ -746,9 +743,6 @@ TexInstr::prepare_source(nir_tex_instr *tex, const Inputs& inputs, Shader& shade
shader.emit_instruction(ir);
}
if (ir)
ir->set_alu_flag(alu_last_instr);
return src_coord;
}
@ -887,8 +881,6 @@ TexInstr::emit_tex_lod(nir_tex_instr *tex, Inputs& src, Shader& shader)
ir = new AluInstr(op1_mov, src_coord[i], src.coord[i], AluInstr::write);
shader.emit_instruction(ir);
}
if (ir)
ir->set_alu_flag(alu_last_instr);
auto irt = new TexInstr(TexInstr::get_tex_lod,
dst,

View file

@ -124,11 +124,11 @@ InstrFactory::load_const(nir_load_const_instr *literal, Shader& shader)
for (int i = 0; i < literal->def.num_components; ++i) {
auto dest0 = m_value_factory.dest(literal->def, 2 * i, pin_none);
auto src0 = m_value_factory.literal(literal->value[i].u64 & 0xffffffff);
shader.emit_instruction(new AluInstr(op1_mov, dest0, src0, {alu_write}));
shader.emit_instruction(new AluInstr(op1_mov, dest0, src0, AluInstr::write));
auto dest1 = m_value_factory.dest(literal->def, 2 * i + 1, pin_none);
auto src1 = m_value_factory.literal((literal->value[i].u64 >> 32) & 0xffffffff);
shader.emit_instruction(new AluInstr(op1_mov, dest1, src1, AluInstr::last_write));
shader.emit_instruction(new AluInstr(op1_mov, dest1, src1, AluInstr::write));
}
} else {
Pin pin = literal->def.num_components == 1 ? pin_free : pin_none;
@ -156,11 +156,9 @@ InstrFactory::load_const(nir_load_const_instr *literal, Shader& shader)
src = m_value_factory.literal(v);
}
ir = new AluInstr(op1_mov, dest, src, {alu_write});
ir = new AluInstr(op1_mov, dest, src, AluInstr::write);
shader.emit_instruction(ir);
}
if (ir)
ir->set_alu_flag(alu_last_instr);
}
return true;
}
@ -196,7 +194,7 @@ InstrFactory::process_undef(nir_undef_instr *undef, Shader& shader)
for (int i = 0; i < undef->def.num_components; ++i) {
auto dest = shader.value_factory().undef(undef->def.index, i);
shader.emit_instruction(
new AluInstr(op1_mov, dest, value_factory().zero(), AluInstr::last_write));
new AluInstr(op1_mov, dest, value_factory().zero(), AluInstr::write));
}
return true;
}

View file

@ -395,10 +395,8 @@ Shader::allocate_reserved_registers()
m_instr_factory->value_factory().set_virtual_register_base(reserved_registers_end);
if (!m_atomics.empty()) {
m_atomic_update = value_factory().temp_register();
auto alu = new AluInstr(op1_mov,
m_atomic_update,
value_factory().one_i(),
AluInstr::last_write);
auto alu =
new AluInstr(op1_mov, m_atomic_update, value_factory().one_i(), AluInstr::write);
alu->set_alu_flag(alu_no_schedule_bias);
emit_instruction(alu);
}
@ -410,23 +408,27 @@ Shader::allocate_reserved_registers()
auto temp2 = value_factory().temp_register(2);
auto group = new AluGroup();
group->add_instruction(new AluInstr(
op1_mbcnt_32lo_accum_prev_int, temp0, value_factory().literal(-1), {alu_write}));
group->add_instruction(new AluInstr(
op1_mbcnt_32hi_int, temp1, value_factory().literal(-1), {alu_write}));
group->add_instruction(new AluInstr(op1_mbcnt_32lo_accum_prev_int,
temp0,
value_factory().literal(-1),
AluInstr::write));
group->add_instruction(new AluInstr(op1_mbcnt_32hi_int,
temp1,
value_factory().literal(-1),
AluInstr::write));
emit_instruction(group);
emit_instruction(new AluInstr(op3_muladd_uint24,
temp2,
value_factory().inline_const(ALU_SRC_SE_ID, 0),
value_factory().literal(256),
value_factory().inline_const(ALU_SRC_HW_WAVE_ID, 0),
{alu_write, alu_last_instr}));
AluInstr::write));
emit_instruction(new AluInstr(op3_muladd_uint24,
m_rat_return_address,
temp2,
value_factory().literal(0x40),
temp0,
{alu_write, alu_last_instr}));
AluInstr::write));
}
}
@ -713,13 +715,13 @@ Shader::process_if(nir_if *if_stmt)
EAluOp op = child_block_empty(if_stmt->then_list) ? op2_prede_int :
op2_pred_setne_int;
auto flags = {alu_update_exec, alu_last_instr, alu_update_pred};
AluInstr *pred = new AluInstr(op,
value_factory().temp_register(),
value,
value_factory().zero(),
AluInstr::last);
pred->set_alu_flag(alu_update_exec);
pred->set_alu_flag(alu_update_pred);
flags);
pred->set_cf_type(cf_alu_push_before);
IfInstr *ir = new IfInstr(pred);
@ -850,8 +852,6 @@ Shader::emit_tex_fdd(const nir_intrinsic_instr* intr, int opcode, bool fine)
mv = new AluInstr(op1_mov, tmp[i], src[i], AluInstr::write);
emit_instruction(mv);
}
if (mv)
mv->set_alu_flag(alu_last_instr);
auto dst = value_factory_.dest_vec4(intr->def, pin_group);
RegisterVec4::Swizzle dst_swz = {7, 7, 7, 7};
@ -992,7 +992,7 @@ Shader::emit_load_to_register(PVirtualValue src, int chan)
if (!dest || chan >= 0) {
dest = value_factory().temp_register(chan);
dest->set_pin(pin_free);
emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::last_write));
emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::write));
}
return dest;
}
@ -1170,7 +1170,7 @@ Shader::evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id)
uav_id = uav_id_val->as_register();
} else {
uav_id = vf.temp_register();
emit_instruction(new AluInstr(op1_mov, uav_id, uav_id_val, AluInstr::last_write));
emit_instruction(new AluInstr(op1_mov, uav_id, uav_id_val, AluInstr::write));
}
}
return std::make_pair(offset, uav_id);
@ -1200,8 +1200,6 @@ Shader::emit_store_scratch(nir_intrinsic_instr *intr)
if (!ir)
return true;
ir->set_alu_flag(alu_last_instr);
auto address = vf.src(intr->src[1], 0);
int align = nir_intrinsic_align_mul(intr);
@ -1224,7 +1222,7 @@ Shader::emit_store_scratch(nir_intrinsic_instr *intr)
ws_ir = new ScratchIOInstr(value, offset, align, align_offset, writemask);
} else {
auto addr_temp = vf.temp_register(0);
auto load_addr = new AluInstr(op1_mov, addr_temp, address, AluInstr::last_write);
auto load_addr = new AluInstr(op1_mov, addr_temp, address, AluInstr::write);
load_addr->set_alu_flag(alu_no_schedule_bias);
emit_instruction(load_addr);
@ -1272,7 +1270,7 @@ Shader::emit_load_scratch(nir_intrinsic_instr *intr)
ir = new ScratchIOInstr(dest, offset, align, align_offset, 0xf, true);
} else {
auto addr_temp = value_factory().temp_register(0);
auto load_addr = new AluInstr(op1_mov, addr_temp, addr, AluInstr::last_write);
auto load_addr = new AluInstr(op1_mov, addr_temp, addr, AluInstr::write);
load_addr->set_alu_flag(alu_no_schedule_bias);
emit_instruction(load_addr);
@ -1295,7 +1293,7 @@ bool Shader::emit_load_global(nir_intrinsic_instr *intr)
auto src = src_value->as_register();
if (!src) {
src = value_factory().temp_register();
emit_instruction(new AluInstr(op1_mov, src, src_value, AluInstr::last_write));
emit_instruction(new AluInstr(op1_mov, src, src_value, AluInstr::write));
}
auto load = new LoadFromBuffer(dest, {0,7,7,7}, src, 0, 1, NULL, fmt_32);
load->set_mfc(4);
@ -1490,8 +1488,7 @@ bool
Shader::emit_load_tcs_param_base(nir_intrinsic_instr *instr, int offset)
{
auto src = value_factory().temp_register();
emit_instruction(
new AluInstr(op1_mov, src, value_factory().zero(), AluInstr::last_write));
emit_instruction(new AluInstr(op1_mov, src, value_factory().zero(), AluInstr::write));
auto dest = value_factory().dest_vec4(instr->def, pin_group);
auto fetch = new LoadFromBuffer(dest,
@ -1512,8 +1509,7 @@ bool
Shader::emit_get_lds_info_uint(nir_intrinsic_instr *instr, int offset)
{
auto src = value_factory().temp_register();
emit_instruction(
new AluInstr(op1_mov, src, value_factory().zero(), AluInstr::last_write));
emit_instruction(new AluInstr(op1_mov, src, value_factory().zero(), AluInstr::write));
auto dest = value_factory().dest_vec4(instr->def, pin_group);
auto fetch = new LoadFromBuffer(dest,
@ -1540,7 +1536,7 @@ Shader::emit_shader_clock(nir_intrinsic_instr *instr)
group->add_instruction(new AluInstr(op1_mov,
vf.dest(instr->def, 1, pin_chan),
vf.inline_const(ALU_SRC_TIME_HI, 0),
AluInstr::last_write));
AluInstr::write));
emit_instruction(group);
return true;
}
@ -1551,7 +1547,6 @@ Shader::emit_group_barrier(nir_intrinsic_instr *intr)
assert(m_control_flow_depth == 0);
(void)intr;
auto op = new AluInstr(op0_group_barrier, 0);
op->set_alu_flag(alu_last_instr);
emit_instruction(op);
return true;
}
@ -1632,11 +1627,9 @@ Shader::load_ubo(nir_intrinsic_instr *instr)
ir = new AluInstr(op1_mov,
value_factory().dest(instr->def, i, pin),
uniform,
{alu_write});
AluInstr::write);
emit_instruction(ir);
}
if (ir)
ir->set_alu_flag(alu_last_instr);
return true;
} else {
int buf_cmp = nir_intrinsic_component(instr);
@ -1651,8 +1644,6 @@ Shader::load_ubo(nir_intrinsic_instr *instr)
ir = new AluInstr(op1_mov, dest, u, AluInstr::write);
emit_instruction(ir);
}
if (ir)
ir->set_alu_flag(alu_last_instr);
m_indirect_files |= 1 << TGSI_FILE_CONSTANT;
return true;
}
@ -1670,7 +1661,7 @@ bool
Shader::emit_simple_mov(nir_def& def, int chan, PVirtualValue src, Pin pin)
{
auto dst = value_factory().dest(def, chan, pin);
emit_instruction(new AluInstr(op1_mov, dst, src, AluInstr::last_write));
emit_instruction(new AluInstr(op1_mov, dst, src, AluInstr::write));
return true;
}

View file

@ -81,7 +81,7 @@ ComputeShader::emit_load_from_info_buffer(nir_intrinsic_instr *instr, int offset
emit_instruction(new AluInstr(op1_mov,
m_zero_register,
value_factory().inline_const(ALU_SRC_0, 0),
AluInstr::last_write));
AluInstr::write));
}
auto dest = value_factory().dest_vec4(instr->def, pin_group);
@ -109,8 +109,8 @@ ComputeShader::emit_load_3vec(nir_intrinsic_instr *instr,
for (int i = 0; i < 3; ++i) {
auto dest = vf.dest(instr->def, i, pin_none);
emit_instruction(new AluInstr(
op1_mov, dest, src[i], i == 2 ? AluInstr::last_write : AluInstr::write));
emit_instruction(
new AluInstr(op1_mov, dest, src[i], i == 2 ? AluInstr::write : AluInstr::write));
}
return true;
}

View file

@ -72,7 +72,7 @@ FragmentShader::load_input(nir_intrinsic_instr *intr)
vf.dest(intr->def, 0, pin_none),
m_face_input,
vf.inline_const(ALU_SRC_0, 0),
AluInstr::last_write);
AluInstr::write);
emit_instruction(ir);
return true;
}
@ -143,7 +143,7 @@ FragmentShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
nullptr,
value_factory().src(intr->src[0], 0),
value_factory().zero(),
{AluInstr::last}));
AluInstr::empty));
return true;
case nir_intrinsic_terminate:
@ -152,7 +152,7 @@ FragmentShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
nullptr,
value_factory().zero(),
value_factory().zero(),
{AluInstr::last}));
AluInstr::empty));
return true;
case nir_intrinsic_load_sample_mask_in:
if (m_apply_sample_mask) {
@ -286,9 +286,9 @@ FragmentShader::emit_load_sample_mask_in(nir_intrinsic_instr *instr)
assert(m_sample_mask_reg);
emit_instruction(
new AluInstr(op2_lshl_int, tmp, vf.one_i(), m_sample_id_reg, AluInstr::last_write));
new AluInstr(op2_lshl_int, tmp, vf.one_i(), m_sample_id_reg, AluInstr::write));
emit_instruction(
new AluInstr(op2_and_int, dest, tmp, m_sample_mask_reg, AluInstr::last_write));
new AluInstr(op2_and_int, dest, tmp, m_sample_mask_reg, AluInstr::write));
return true;
}
@ -298,7 +298,7 @@ FragmentShader::emit_load_helper_invocation(nir_intrinsic_instr *instr)
assert(m_helper_invocation);
auto& vf = value_factory();
emit_instruction(
new AluInstr(op1_mov, m_helper_invocation, vf.literal(-1), AluInstr::last_write));
new AluInstr(op1_mov, m_helper_invocation, vf.literal(-1), AluInstr::write));
RegisterVec4 destvec{m_helper_invocation, nullptr, nullptr, nullptr, pin_group};
auto vtx = new LoadFromBuffer(destvec,
@ -312,7 +312,7 @@ FragmentShader::emit_load_helper_invocation(nir_intrinsic_instr *instr)
vtx->set_fetch_flag(FetchInstr::use_tc);
vtx->set_always_keep();
auto dst = value_factory().dest(instr->def, 0, pin_free);
auto ir = new AluInstr(op1_mov, dst, m_helper_invocation, AluInstr::last_write);
auto ir = new AluInstr(op1_mov, dst, m_helper_invocation, AluInstr::write);
ir->add_required_instr(vtx);
emit_instruction(vtx);
emit_instruction(ir);
@ -647,7 +647,6 @@ bool
FragmentShaderR600::load_input_hw(nir_intrinsic_instr *intr)
{
auto& vf = value_factory();
AluInstr *ir = nullptr;
for (unsigned i = 0; i < intr->def.num_components; ++i) {
sfn_log << SfnLog::io << "Inject register "
<< *m_interpolated_inputs[nir_intrinsic_base(intr)][i] << "\n";
@ -657,8 +656,6 @@ FragmentShaderR600::load_input_hw(nir_intrinsic_instr *intr)
i,
m_interpolated_inputs[nir_intrinsic_base(intr)][index]);
}
if (ir)
ir->set_alu_flag(alu_last_instr);
return true;
}
@ -697,10 +694,10 @@ FragmentShaderEG::load_input_hw(nir_intrinsic_instr *intr)
new AluInstr(op1_interp_load_p0,
tmp,
new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i + comp),
AluInstr::last_write);
AluInstr::write);
emit_instruction(ir);
emit_instruction(new AluInstr(
op1_mov, vf.dest(intr->def, i, pin_chan), tmp, AluInstr::last_write));
emit_instruction(
new AluInstr(op1_mov, vf.dest(intr->def, i, pin_chan), tmp, AluInstr::write));
} else {
ir = new AluInstr(op1_interp_load_p0,
@ -710,7 +707,6 @@ FragmentShaderEG::load_input_hw(nir_intrinsic_instr *intr)
emit_instruction(ir);
}
}
ir->set_alu_flag(alu_last_instr);
return true;
}
@ -793,7 +789,6 @@ FragmentShaderEG::load_interpolated_input_hw(nir_intrinsic_instr *intr)
emit_instruction(ir);
}
assert(ir);
ir->set_alu_flag(alu_last_instr);
}
return true;
@ -895,22 +890,22 @@ FragmentShaderEG::load_barycentric_at_sample(nir_intrinsic_instr *instr)
auto tmp1 = vf.temp_register();
emit_instruction(
new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, {alu_write}));
emit_instruction(new AluInstr(
op3_muladd, tmp1, grad[1], slope[2], interpolator.i, {alu_write, alu_last_instr}));
new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, AluInstr::write));
emit_instruction(
new AluInstr(op3_muladd, tmp1, grad[1], slope[2], interpolator.i, AluInstr::write));
emit_instruction(new AluInstr(op3_muladd,
vf.dest(instr->def, 0, pin_none),
grad[3],
slope[3],
tmp1,
{alu_write}));
AluInstr::write));
emit_instruction(new AluInstr(op3_muladd,
vf.dest(instr->def, 1, pin_none),
grad[2],
slope[3],
tmp0,
{alu_write, alu_last_instr}));
AluInstr::write));
return true;
}
@ -947,17 +942,21 @@ FragmentShaderEG::load_barycentric_at_offset(nir_intrinsic_instr *instr)
auto tmp0 = vf.temp_register();
auto tmp1 = vf.temp_register();
emit_instruction(
new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, {alu_write}));
emit_instruction(new AluInstr(
op3_muladd, tmp1, help[1], ofs_x, interpolator.i, {alu_write, alu_last_instr}));
emit_instruction(new AluInstr(
op3_muladd, vf.dest(instr->def, 0, pin_none), help[3], ofs_y, tmp1, {alu_write}));
new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, AluInstr::write));
emit_instruction(
new AluInstr(op3_muladd, tmp1, help[1], ofs_x, interpolator.i, AluInstr::write));
emit_instruction(new AluInstr(op3_muladd,
vf.dest(instr->def, 0, pin_none),
help[3],
ofs_y,
tmp1,
AluInstr::write));
emit_instruction(new AluInstr(op3_muladd,
vf.dest(instr->def, 1, pin_none),
help[2],
ofs_y,
tmp0,
{alu_write, alu_last_instr}));
AluInstr::write));
return true;
}
@ -980,12 +979,11 @@ FragmentShaderEG::load_interpolated_one_comp(RegisterVec4& dest,
dest[chan],
i & 1 ? params.j : params.i,
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, chan),
i == 0 ? AluInstr::write : AluInstr::last);
i == 0 ? AluInstr::write : AluInstr::empty);
ir->set_bank_swizzle(alu_vec_210);
success = group->add_instruction(ir);
}
ir->set_alu_flag(alu_last_instr);
if (success)
emit_instruction(group);
return success;
@ -1012,7 +1010,6 @@ FragmentShaderEG::load_interpolated_two_comp(RegisterVec4& dest,
ir->set_bank_swizzle(alu_vec_210);
success = group->add_instruction(ir);
}
ir->set_alu_flag(alu_last_instr);
if (success)
emit_instruction(group);
return success;
@ -1037,7 +1034,6 @@ FragmentShaderEG::load_interpolated_two_comp_for_one(RegisterVec4& dest,
ir->set_bank_swizzle(alu_vec_210);
success = group->add_instruction(ir);
}
ir->set_alu_flag(alu_last_instr);
if (success)
emit_instruction(group);

View file

@ -149,8 +149,7 @@ GeometryShader::do_allocate_reserved_registers()
for (int i = 0; i < 4; ++i) {
m_export_base[i] = value_factory().temp_register(0, false);
emit_instruction(
new AluInstr(op1_mov, m_export_base[i], zero, AluInstr::last_write));
emit_instruction(new AluInstr(op1_mov, m_export_base[i], zero, AluInstr::write));
}
m_ring_item_sizes[0] = m_next_input_ring_offset;
@ -214,7 +213,7 @@ GeometryShader::emit_vertex(nir_intrinsic_instr *instr, bool cut)
m_export_base[stream],
m_export_base[stream],
value_factory().literal(m_noutputs),
AluInstr::last_write);
AluInstr::write);
emit_instruction(ir);
}
@ -288,7 +287,6 @@ GeometryShader::store_output(nir_intrinsic_instr *instr)
emit_instruction(ir);
}
}
ir->set_alu_flag(alu_last_instr);
m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring,
MemRingOutInstr::mem_write_ind,
tmp,
@ -396,7 +394,7 @@ GeometryShader::emit_adj_fix()
adjhelp0,
m_primitive_id,
value_factory().one_i(),
AluInstr::last_write));
AluInstr::write));
int reg_indices[R600_GS_VERTEX_INDIRECT_TOTAL];
int rotate_indices[R600_GS_VERTEX_INDIRECT_TOTAL] = {4, 5, 0, 1, 2, 3};
@ -418,7 +416,6 @@ GeometryShader::emit_adj_fix()
emit_instruction(ir);
}
ir->set_alu_flag(alu_last_instr);
for (int i = 0; i < R600_GS_VERTEX_INDIRECT_TOTAL; i++)
m_per_vertex_offsets[i] = adjhelp[i];

View file

@ -133,8 +133,8 @@ VertexExportForFs::finalize()
{
if (m_vs_as_gs_a) {
auto primid = m_parent->value_factory().temp_vec4(pin_group, {2, 7, 7, 7});
m_parent->emit_instruction(new AluInstr(
op1_mov, primid[0], m_parent->primitive_id(), AluInstr::last_write));
m_parent->emit_instruction(
new AluInstr(op1_mov, primid[0], m_parent->primitive_id(), AluInstr::write));
int param = m_last_param_export ? m_last_param_export->location() + 1 : 0;
m_last_param_export = new ExportInstr(ExportInstr::param, param, primid);
@ -202,9 +202,8 @@ VertexExportForFs::emit_varying_pos(const store_loc& store_info,
auto src = m_parent->value_factory().src(intr.src[0], 0);
auto clamped = m_parent->value_factory().temp_register();
m_parent->emit_instruction(
new AluInstr(op1_mov, clamped, src, {alu_write, alu_dst_clamp, alu_last_instr}));
auto alu =
new AluInstr(op1_flt_to_int, out_value[1], clamped, AluInstr::last_write);
new AluInstr(op1_mov, clamped, src, {alu_write, alu_dst_clamp}));
auto alu = new AluInstr(op1_flt_to_int, out_value[1], clamped, AluInstr::write);
if (m_parent->chip_class() < ISA_CC_EVERGREEN)
alu->set_alu_flag(alu_is_trans);
m_parent->emit_instruction(alu);
@ -277,8 +276,6 @@ VertexExportForFs::emit_varying_param(const store_loc& store_info,
m_parent->emit_instruction(alu);
}
}
if (alu)
alu->set_alu_flag(alu_last_instr);
m_last_param_export = new ExportInstr(ExportInstr::param, export_slot, value);
m_output_registers[nir_intrinsic_base(&intr)] = &m_last_param_export->value();
@ -352,8 +349,6 @@ VertexExportForFs::emit_stream(int stream)
alu = new AluInstr(op1_mov, tmp[i][j], (*so_gpr[i])[j + sc], {alu_write});
m_parent->emit_instruction(alu);
}
if (alu)
alu->set_alu_flag(alu_last_instr);
start_comp[i] = 0;
so_gpr[i] = &tmp[i];
@ -657,8 +652,6 @@ VertexExportForGS::do_store_output(const store_loc& store_info,
AluInstr::write);
m_parent->emit_instruction(ir);
}
if (ir)
ir->set_alu_flag(alu_last_instr);
m_parent->emit_instruction(new MemRingOutInstr(
cf_mem_ring, MemRingOutInstr::mem_write, value, ring_offset >> 2, 4, nullptr));

View file

@ -185,7 +185,8 @@ auto AddressSplitVisitor::load_index_register_eg(Instr *instr,
const EAluOp idx_op[2] = {op1_set_cf_idx0, op1_set_cf_idx1};
m_last_idx_load[idx_id] = new AluInstr(idx_op[idx_id], idx, m_vf.addr(), {});
m_last_idx_load[idx_id] =
new AluInstr(idx_op[idx_id], idx, m_vf.addr(), AluInstr::empty);
m_current_block->insert(m_block_iterator, m_last_idx_load[idx_id]);
for (auto&& i : m_last_idx_use[idx_id])
m_last_ar_load->add_required_instr(i);
@ -208,7 +209,7 @@ auto AddressSplitVisitor::load_index_register_ca(PRegister index) -> int
if (idx_id < 0) {
idx_id = pick_idx();
auto idx = m_vf.idx_reg(idx_id);
m_last_idx_load[idx_id] = new AluInstr(op1_mova_int, idx, index, {});
m_last_idx_load[idx_id] = new AluInstr(op1_mova_int, idx, index, AluInstr::empty);
m_current_block->insert(m_block_iterator, m_last_idx_load[idx_id]);
for (auto&& i : m_last_idx_use[idx_id])
@ -249,7 +250,7 @@ void AddressSplitVisitor::load_ar(Instr *instr, PRegister addr)
{
auto ar = m_vf.addr();
m_last_ar_load = new AluInstr(op1_mova_int, ar, addr, {});
m_last_ar_load = new AluInstr(op1_mova_int, ar, addr, AluInstr::empty);
m_current_block->insert(m_block_iterator, m_last_ar_load);
ar->add_use(instr);
m_current_addr = addr;

View file

@ -37,7 +37,7 @@ TEST_F(InstrTest, test_alu_uni_op_mov)
AluInstr alu(op1_mov,
new Register(128, 2, pin_none),
new Register(129, 0, pin_chan),
{alu_write});
AluInstr::write);
EXPECT_TRUE(alu.has_alu_flag(alu_write));
@ -187,14 +187,14 @@ TEST_F(InstrTest, test_alu_op1_comp)
auto r129y = new Register(129, 1, pin_none);
auto r130x = new Register(130, 0, pin_none);
AluInstr alu1(op1_mov, r128z, r129x, {alu_write});
EXPECT_NE(alu1, AluInstr(op1_mov, r128y, r129x, {alu_write}));
EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129xc, {alu_write}));
EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129y, {alu_write}));
EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r130x, {alu_write}));
AluInstr alu1(op1_mov, r128z, r129x, AluInstr::write);
EXPECT_NE(alu1, AluInstr(op1_mov, r128y, r129x, AluInstr::write));
EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129xc, AluInstr::write));
EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129y, AluInstr::write));
EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r130x, AluInstr::write));
EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129x, {alu_write, alu_last_instr}));
EXPECT_NE(alu1, AluInstr(op1_flt_to_int, r128z, r129x, {alu_write}));
EXPECT_NE(alu1, AluInstr(op1_mov, r128zc, r129x, {alu_write}));
EXPECT_NE(alu1, AluInstr(op1_flt_to_int, r128z, r129x, AluInstr::write));
EXPECT_NE(alu1, AluInstr(op1_mov, r128zc, r129x, AluInstr::write));
EXPECT_EQ(alu1, alu1);
}
@ -205,14 +205,17 @@ TEST_F(InstrTest, test_alu_op2_comp)
auto r128y = new Register(128, 1, pin_none);
auto r128z = new Register(128, 2, pin_none);
AluInstr alu1(op2_add, r128z, r128x, r128y, {alu_write});
AluInstr alu1(op2_add, r128z, r128x, r128y, AluInstr::write);
EXPECT_NE(
alu1, AluInstr(op2_add, r128z, r128x, new Register(129, 2, pin_none), {alu_write}));
alu1,
AluInstr(op2_add, r128z, r128x, new Register(129, 2, pin_none), AluInstr::write));
EXPECT_NE(
alu1, AluInstr(op2_add, r128z, r128x, new Register(128, 0, pin_none), {alu_write}));
alu1,
AluInstr(op2_add, r128z, r128x, new Register(128, 0, pin_none), AluInstr::write));
EXPECT_NE(
alu1, AluInstr(op2_add, r128z, r128x, new Register(128, 1, pin_chan), {alu_write}));
alu1,
AluInstr(op2_add, r128z, r128x, new Register(128, 1, pin_chan), AluInstr::write));
}
TEST_F(InstrTest, test_alu_op3_comp)
@ -221,20 +224,29 @@ TEST_F(InstrTest, test_alu_op3_comp)
auto r128y = new Register(128, 1, pin_none);
auto r128z = new Register(128, 2, pin_none);
AluInstr alu1(op3_muladd, r128z, r128x, r128y, r128y, {alu_write});
AluInstr alu1(op3_muladd, r128z, r128x, r128y, r128y, AluInstr::write);
EXPECT_NE(
alu1,
AluInstr(
op3_muladd, r128z, r128x, r128y, new Register(129, 2, pin_none), {alu_write}));
EXPECT_NE(
alu1,
AluInstr(
op3_muladd, r128z, r128x, r128y, new Register(128, 0, pin_none), {alu_write}));
EXPECT_NE(
alu1,
AluInstr(
op3_muladd, r128z, r128x, r128y, new Register(128, 1, pin_chan), {alu_write}));
EXPECT_NE(alu1,
AluInstr(op3_muladd,
r128z,
r128x,
r128y,
new Register(129, 2, pin_none),
AluInstr::write));
EXPECT_NE(alu1,
AluInstr(op3_muladd,
r128z,
r128x,
r128y,
new Register(128, 0, pin_none),
AluInstr::write));
EXPECT_NE(alu1,
AluInstr(op3_muladd,
r128z,
r128x,
r128y,
new Register(128, 1, pin_chan),
AluInstr::write));
}
TEST_F(InstrTest, test_alu_op3_ne)
@ -258,7 +270,7 @@ TEST_F(InstrTest, test_alu_op3_ne)
AluInstr(op3_cnde, R130x, R130y, R130z, R131w, {alu_write, alu_last_instr}));
EXPECT_NE(alu,
AluInstr(op3_cnde, R130x, R130y, R131z, R130z, {alu_write, alu_last_instr}));
EXPECT_NE(alu, AluInstr(op3_cnde, R130x, R130y, R131z, R131w, {alu_write}));
EXPECT_NE(alu, AluInstr(op3_cnde, R130x, R130y, R131z, R131w, AluInstr::write));
AluInstr alu_cf_changes = alu;
alu_cf_changes.set_cf_type(cf_alu_push_before);
@ -328,15 +340,15 @@ TEST_F(InstrTest, test_alu_dot4_grouped)
auto i = group->begin();
EXPECT_NE(i, group->end());
ASSERT_TRUE(*i);
check(**i, AluInstr(op2_dot4_ieee, R132x, R130x, R130y, {alu_write}));
check(**i, AluInstr(op2_dot4_ieee, R132x, R130x, R130y, AluInstr::write));
++i;
EXPECT_NE(i, group->end());
ASSERT_TRUE(*i);
check(**i, AluInstr(op2_dot4_ieee, R132y, R130z, R130w, {}));
check(**i, AluInstr(op2_dot4_ieee, R132y, R130z, R130w, AluInstr::empty));
++i;
EXPECT_NE(i, group->end());
ASSERT_TRUE(*i);
check(**i, AluInstr(op2_dot4_ieee, R132z, R131x, R131y, {}));
check(**i, AluInstr(op2_dot4_ieee, R132z, R131x, R131y, AluInstr::empty));
++i;
EXPECT_NE(i, group->end());
ASSERT_TRUE(*i);

View file

@ -51,7 +51,7 @@ TEST_F(TestInstrFromString, test_alu_lds_read_ret)
{
add_dest_from_string("R1999.x");
AluInstr expect(DS_OP_READ_RET, {new Register(1999, 0, pin_none)}, {});
AluInstr expect(DS_OP_READ_RET, {new Register(1999, 0, pin_none)}, AluInstr::empty);
check("ALU LDS READ_RET __.x : R1999.x {}", expect);
}
@ -96,7 +96,7 @@ TEST_F(TestInstrFromString, test_alu_mov_neg_abs)
AluInstr expect(op1_mov,
new Register(2000, 1, pin_none),
new Register(1999, 0, pin_none),
{alu_write});
AluInstr::write);
expect.set_source_mod(0, AluInstr::mod_abs);
expect.set_source_mod(0, AluInstr::mod_neg);
@ -351,7 +351,7 @@ TEST_F(TestInstrFromString, test_alu_interp_xy)
new Register(1024, 2, pin_chan),
r0y,
new InlineConstant(ALU_SRC_PARAM_BASE, 2),
{alu_write});
AluInstr::write);
expect.set_bank_swizzle(alu_vec_210);
check(init, expect);
@ -369,7 +369,7 @@ TEST_F(TestInstrFromString, test_alu_interp_xy_no_write)
new Register(1024, 0, pin_chan),
r0x,
new InlineConstant(ALU_SRC_PARAM_BASE, 2),
{});
AluInstr::empty);
expect.set_bank_swizzle(alu_vec_210);
check(init, expect);

View file

@ -79,11 +79,11 @@ TEST_F(LiveRangeTests, SimpleAdd)
RegisterVec4::Swizzle dummy;
ValueFactory vf;
Register *r0x = vf.dest_from_string("S0.x@free");
Register *r1x = vf.dest_from_string("S1.x@free");
Register *r0x = vf.dest_from_string("S0.x@chan");
Register *r1x = vf.dest_from_string("S1.x@chan");
RegisterVec4 r2 = vf.dest_vec4_from_string("S2.xyzw", dummy, pin_none);
Register *r3x = vf.dest_from_string("S3.x@free");
RegisterVec4 r4 = vf.dest_vec4_from_string("S4.xyzw", dummy, pin_group);
Register *r3x = vf.dest_from_string("S3.x@chan");
RegisterVec4 r4 = vf.dest_vec4_from_string("S4.xyzw", dummy, pin_chgr);
LiveRangeMap expect = vf.prepare_live_range_map();
@ -98,7 +98,7 @@ TEST_F(LiveRangeTests, SimpleAdd)
for (int i = 0; i < 4; ++i)
expect.set_life_range(*r4[i], 5, 6);
check(add_add_1_expect_from_nir, expect);
check(add_add_1_expect_from_nir_scheduled, expect);
}
TEST_F(LiveRangeTests, SimpleAInterpolation)

View file

@ -514,30 +514,30 @@ TEST_F(TestShaderFromNir, fs_shed_tex_coord)
TEST_F(TestShaderFromNir, OptimizeAddWChanetoTrans)
{
const char *input =
R"(VS
R"(VS
CHIPCLASS CAYMAN
INPUT LOC:0
OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
OUTPUT LOC:1 VARYING_SLOT:32 MASK:15
SHADER
BLOCK_START
ALU MOV S2.x@free{s} : I[0] {WL}
ALU MOV S3.y@free{s} : L[0x40c00000] {WL}
ALU MOV S4.z@free{s} : L[0xc1140000] {WL}
ALU MOV S5.w@free{s} : L[0xbfe00000] {WL}
ALU MOV S6.x@free{s} : L[0x3fa00000] {WL}
ALU MOV S2.x@free{s} : I[0] {W}
ALU MOV S3.y@free{s} : L[0x40c00000] {W}
ALU MOV S4.z@free{s} : L[0xc1140000] {W}
ALU MOV S5.w@free{s} : L[0xbfe00000] {W}
ALU MOV S6.x@free{s} : L[0x3fa00000] {W}
ALU MOV S7.x{s} : KC0[0].x {W}
ALU MOV S7.y{s} : KC0[0].y {W}
ALU MOV S7.z{s} : KC0[0].z {W}
ALU MOV S7.w{s} : KC0[0].w {WL}
ALU ADD S8.y@free{s} : S3.y@free{s} -S7.x{s} {WL}
ALU ADD S9.z@free{s} : S4.z@free{s} -S7.y{s} {WL}
ALU ADD S10.w@free{s} : S5.w@free{s} -S7.z{s} {WL}
ALU ADD S11.x@free{s} : S6.x@free{s} -S7.w{s} {WL}
ALU EXP_IEEE S12.y@free{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {WL}
ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {WL}
ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {WL}
ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
ALU MOV S7.w{s} : KC0[0].w {W}
ALU ADD S8.y@free{s} : S3.y@free{s} -S7.x{s} {W}
ALU ADD S9.z@free{s} : S4.z@free{s} -S7.y{s} {W}
ALU ADD S10.w@free{s} : S5.w@free{s} -S7.z{s} {W}
ALU ADD S11.x@free{s} : S6.x@free{s} -S7.w{s} {W}
ALU EXP_IEEE S12.y@free{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {W}
ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W}
ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W}
ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {W}
ALU MOV S17.x{s} : S12.y@free{s} {W}
ALU MOV S17.y{s} : S13.z@free{s} {W}
ALU MOV S17.z{s} : S14.x@free{s} {W}
@ -545,26 +545,26 @@ BLOCK_START
ALU MOV S18.x@group{s} : S17.x{s} {W}
ALU MOV S18.y@group{s} : S17.y{s} {W}
ALU MOV S18.z@group{s} : S17.z{s} {W}
ALU MOV S18.w@group{s} : S17.w{s} {WL}
ALU MOV S18.w@group{s} : S17.w{s} {W}
EXPORT_DONE PARAM 0 S18.xyzw
BLOCK_END)";
const char *expect =
R"(VS
R"(VS
CHIPCLASS CAYMAN
INPUT LOC:0
OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
OUTPUT LOC:1 VARYING_SLOT:32 MASK:15
SHADER
BLOCK_START
ALU ADD S8.y@free{s} : L[0x40c00000] -KC0[0].x {WL}
ALU ADD S9.z@free{s} : L[0xc1140000] -KC0[0].y {WL}
ALU ADD S10.w@free{s} : L[0xbfe00000] -KC0[0].z {WL}
ALU ADD S11.x@free{s} : L[0x3fa00000] -KC0[0].w {WL}
ALU ADD S8.y@free{s} : L[0x40c00000] -KC0[0].x {W}
ALU ADD S9.z@free{s} : L[0xc1140000] -KC0[0].y {W}
ALU ADD S10.w@free{s} : L[0xbfe00000] -KC0[0].z {W}
ALU ADD S11.x@free{s} : L[0x3fa00000] -KC0[0].w {W}
ALU EXP_IEEE S18.x@group{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {W}
ALU EXP_IEEE S18.y@group{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W}
ALU EXP_IEEE S18.z@group{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W}
ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {W}
EXPORT_DONE PARAM 0 S18.xyzw
BLOCK_END
)";
@ -576,57 +576,57 @@ BLOCK_END
TEST_F(TestShaderFromNir, PeeholeSoureModsSimple)
{
const char *input =
R"(VS
R"(VS
CHIPCLASS CAYMAN
INPUT LOC:0
OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
OUTPUT LOC:1 VARYING_SLOT:32 MASK:15
SHADER
BLOCK_START
ALU MOV S2.x@free{s} : I[0] {WL}
ALU MOV S3.y@free{s} : L[0x40c00000] {WL}
ALU MOV S4.z@free{s} : L[0xc1140000] {WL}
ALU MOV S5.w@free{s} : L[0xbfe00000] {WL}
ALU MOV S6.x@free{s} : L[0x3fa00000] {WL}
ALU MOV S2.x@free{s} : I[0] {W}
ALU MOV S3.y@free{s} : L[0x40c00000] {W}
ALU MOV S4.z@free{s} : L[0xc1140000] {W}
ALU MOV S5.w@free{s} : L[0xbfe00000] {W}
ALU MOV S6.x@free{s} : L[0x3fa00000] {W}
ALU MOV S7.x{s} : |KC0[0].x| {W}
ALU MOV S7.y{s} : -KC0[0].y {W}
ALU MOV S7.z{s} : -|KC0[0].z| {W}
ALU MOV S7.w{s} : KC0[0].w {WL}
ALU ADD S8.y@free{s} : S3.y@free{s} S7.x{s} {WL}
ALU ADD S9.z@free{s} : S4.z@free{s} S7.y{s} {WL}
ALU ADD S10.w@free{s} : S5.w@free{s} S7.z{s} {WL}
ALU ADD S11.x@free{s} : S6.x@free{s} S7.w{s} {WL}
ALU EXP_IEEE S12.y@free{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {WL}
ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {WL}
ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {WL}
ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
ALU MOV S7.w{s} : KC0[0].w {W}
ALU ADD S8.y@free{s} : S3.y@free{s} S7.x{s} {W}
ALU ADD S9.z@free{s} : S4.z@free{s} S7.y{s} {W}
ALU ADD S10.w@free{s} : S5.w@free{s} S7.z{s} {W}
ALU ADD S11.x@free{s} : S6.x@free{s} S7.w{s} {W}
ALU EXP_IEEE S12.y@free{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {W}
ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W}
ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W}
ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {W}
ALU MOV S17.x{s} : S12.y@free{s} {W}
ALU MOV S17.y{s} : S13.z@free{s} {W}
ALU MOV S17.z{s} : S14.x@free{s} {W}
ALU MOV S17.w{s} : S15.y@free{s} {WL}
ALU MOV S17.w{s} : S15.y@free{s} {W}
ALU MOV S18.x@group{s} : S17.x{s} {W}
ALU MOV S18.y@group{s} : S17.y{s} {W}
ALU MOV S18.z@group{s} : S17.z{s} {W}
ALU MOV S18.w@group{s} : S17.w{s} {WL}
ALU MOV S18.w@group{s} : S17.w{s} {W}
EXPORT_DONE PARAM 0 S18.xyzw
BLOCK_END)";
const char *expect =
R"(VS
R"(VS
CHIPCLASS CAYMAN
INPUT LOC:0
OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
OUTPUT LOC:1 VARYING_SLOT:32 MASK:15
SHADER
BLOCK_START
ALU ADD S8.y@free{s} : L[0x40c00000] |KC0[0].x| {WL}
ALU ADD S9.z@free{s} : L[0xc1140000] -KC0[0].y {WL}
ALU ADD S10.w@free{s} : L[0xbfe00000] -|KC0[0].z| {WL}
ALU ADD S11.x@free{s} : L[0x3fa00000] KC0[0].w {WL}
ALU ADD S8.y@free{s} : L[0x40c00000] |KC0[0].x| {W}
ALU ADD S9.z@free{s} : L[0xc1140000] -KC0[0].y {W}
ALU ADD S10.w@free{s} : L[0xbfe00000] -|KC0[0].z| {W}
ALU ADD S11.x@free{s} : L[0x3fa00000] KC0[0].w {W}
ALU EXP_IEEE S18.x@group{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {W}
ALU EXP_IEEE S18.y@group{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W}
ALU EXP_IEEE S18.z@group{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W}
ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {W}
EXPORT_DONE PARAM 0 S18.xyzw
BLOCK_END
)";
@ -638,34 +638,34 @@ BLOCK_END
TEST_F(TestShaderFromNir, PeeholeSoureModsAbsNegTwice)
{
const char *input =
R"(VS
R"(VS
CHIPCLASS CAYMAN
INPUT LOC:0
OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
OUTPUT LOC:1 VARYING_SLOT:32 MASK:15
SHADER
BLOCK_START
ALU MOV S2.x@free{s} : I[0] {WL}
ALU MOV S3.y@free{s} : L[0x40c00000] {WL}
ALU MOV S4.z@free{s} : L[0xc1140000] {WL}
ALU MOV S5.w@free{s} : L[0xbfe00000] {WL}
ALU MOV S6.x@free{s} : L[0x3fa00000] {WL}
ALU MOV S2.x@free{s} : I[0] {W}
ALU MOV S3.y@free{s} : L[0x40c00000] {W}
ALU MOV S4.z@free{s} : L[0xc1140000] {W}
ALU MOV S5.w@free{s} : L[0xbfe00000] {W}
ALU MOV S6.x@free{s} : L[0x3fa00000] {W}
ALU MOV S7.x{s} : |KC0[0].x| {W}
ALU MOV S7.y{s} : -KC0[0].y {W}
ALU MOV S7.z{s} : -|KC0[0].z| {W}
ALU MOV S7.w{s} : KC0[0].w {WL}
ALU MOV S7.w{s} : KC0[0].w {W}
ALU MOV S8.x : |S7.x| {W}
ALU MOV S8.y : -S7.y {W}
ALU MOV S8.z : -|S7.z| {W}
ALU MOV S8.w : -|S7.x| {WL}
ALU ADD S19.y@free{s} : S3.y@free{s} S8.x {WL}
ALU ADD S9.z@free{s} : S4.z@free{s} S8.y {WL}
ALU ADD S10.w@free{s} : S5.w@free{s} S8.z {WL}
ALU ADD S11.x@free{s} : S6.x@free{s} S8.w {WL}
ALU EXP_IEEE S12.y@free{s} : S19.y@free{s} + S19.y@free{s} + S19.y@free{s} {WL}
ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {WL}
ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {WL}
ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
ALU MOV S8.w : -|S7.x| {W}
ALU ADD S19.y@free{s} : S3.y@free{s} S8.x {W}
ALU ADD S9.z@free{s} : S4.z@free{s} S8.y {W}
ALU ADD S10.w@free{s} : S5.w@free{s} S8.z {W}
ALU ADD S11.x@free{s} : S6.x@free{s} S8.w {W}
ALU EXP_IEEE S12.y@free{s} : S19.y@free{s} + S19.y@free{s} + S19.y@free{s} {W}
ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W}
ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W}
ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {W}
ALU MOV S17.x{s} : S12.y@free{s} {W}
ALU MOV S17.y{s} : S13.z@free{s} {W}
ALU MOV S17.z{s} : S14.x@free{s} {W}
@ -673,26 +673,26 @@ BLOCK_START
ALU MOV S18.x@group{s} : S17.x{s} {W}
ALU MOV S18.y@group{s} : S17.y{s} {W}
ALU MOV S18.z@group{s} : S17.z{s} {W}
ALU MOV S18.w@group{s} : S17.w{s} {WL}
ALU MOV S18.w@group{s} : S17.w{s} {W}
EXPORT_DONE PARAM 0 S18.xyzw
BLOCK_END)";
const char *expect =
R"(VS
R"(VS
CHIPCLASS CAYMAN
INPUT LOC:0
OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
OUTPUT LOC:1 VARYING_SLOT:32 MASK:15
SHADER
BLOCK_START
ALU ADD S19.y@free{s} : L[0x40c00000] |KC0[0].x| {WL}
ALU ADD S9.z@free{s} : L[0xc1140000] KC0[0].y {WL}
ALU ADD S10.w@free{s} : L[0xbfe00000] |KC0[0].z| {WL}
ALU ADD S11.x@free{s} : L[0x3fa00000] -|KC0[0].x| {WL}
ALU ADD S19.y@free{s} : L[0x40c00000] |KC0[0].x| {W}
ALU ADD S9.z@free{s} : L[0xc1140000] KC0[0].y {W}
ALU ADD S10.w@free{s} : L[0xbfe00000] |KC0[0].z| {W}
ALU ADD S11.x@free{s} : L[0x3fa00000] -|KC0[0].x| {W}
ALU EXP_IEEE S18.x@group{s} : S19.y@free{s} + S19.y@free{s} + S19.y@free{s} {W}
ALU EXP_IEEE S18.y@group{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W}
ALU EXP_IEEE S18.z@group{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W}
ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {W}
EXPORT_DONE PARAM 0 S18.xyzw
BLOCK_END
)";

View file

@ -57,13 +57,17 @@ EXPORT_DONE PIXEL 0 R2000.xyzw
expect.push_back(new AluInstr(op1_mov,
new Register(2000, 0, pin_group),
new LiteralConstant(0x38000000),
{alu_write}));
AluInstr::write));
expect.push_back(new AluInstr(
op1_mov, new Register(2000, 1, pin_group), new LiteralConstant(0x0), {alu_write}));
expect.push_back(new AluInstr(op1_mov,
new Register(2000, 1, pin_group),
new LiteralConstant(0x0),
AluInstr::write));
expect.push_back(new AluInstr(
op1_mov, new Register(2000, 2, pin_group), new LiteralConstant(0x0), {alu_write}));
expect.push_back(new AluInstr(op1_mov,
new Register(2000, 2, pin_group),
new LiteralConstant(0x0),
AluInstr::write));
expect.push_back(new AluInstr(op1_mov,
new Register(2000, 3, pin_group),

View file

@ -125,17 +125,50 @@ PROP WRITE_ALL_COLORS:1
PROP COLOR_EXPORT_MASK:15
OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
SHADER
ALU MOV S0.x@free : L[0xbf000000] {WL}
ALU MOV S1.x@free : I[0] {WL}
ALU MOV S0.x@free : L[0xbf000000] {W}
ALU MOV S1.x@free : I[0] {W}
ALU MOV S2.x : KC0[0].x {W}
ALU MOV S2.y : KC0[0].y {W}
ALU MOV S2.z : KC0[0].z {W}
ALU MOV S2.w : KC0[0].w {W}
ALU ADD S3.x@free : S0.x@free S2.x {W}
ALU MOV S4.x@group : S3.x@free {W}
ALU MOV S4.y@group : S2.y {W}
ALU MOV S4.z@group : S2.z {W}
ALU MOV S4.w@group : S2.w {W}
EXPORT_DONE PIXEL 0 S4.xyzw
)";
const char *add_add_1_expect_from_nir_scheduled =
R"(FS
CHIPCLASS EVERGREEN
PROP MAX_COLOR_EXPORTS:1
PROP COLOR_EXPORTS:1
PROP WRITE_ALL_COLORS:1
PROP COLOR_EXPORT_MASK:15
OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
SHADER
ALU_GROUP_BEGIN
ALU MOV S0.x@chan : L[0xbf000000] {WL}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU MOV S1.x@chan : I[0] {WL}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU MOV S2.x : KC0[0].x {W}
ALU MOV S2.y : KC0[0].y {W}
ALU MOV S2.z : KC0[0].z {W}
ALU MOV S2.w : KC0[0].w {WL}
ALU ADD S3.x@free : S0.x@free S2.x {WL}
ALU MOV S4.x@group : S3.x@free {W}
ALU MOV S4.y@group : S2.y {W}
ALU MOV S4.z@group : S2.z {W}
ALU MOV S4.w@group : S2.w {WL}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU ADD S3.x@chan : S0.x@free S2.x {WL}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU MOV S4.x@chgr : S3.x@free {W}
ALU MOV S4.y@chgr : S2.y {W}
ALU MOV S4.z@chgr : S2.z {W}
ALU MOV S4.w@chgr : S2.w {WL}
ALU_GROUP_END
EXPORT_DONE PIXEL 0 S4.xyzw
)";
@ -149,17 +182,17 @@ PROP WRITE_ALL_COLORS:1
PROP COLOR_EXPORT_MASK:15
OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
SHADER
ALU MOV S0.x@free : L[0xbf000000] {WL}
ALU MOV S1.x@free : I[0] {WL}
ALU MOV S0.x@free : L[0xbf000000] {W}
ALU MOV S1.x@free : I[0] {W}
ALU MOV S2.x : KC0[0].x {W}
ALU MOV S2.y : KC0[0].y {W}
ALU MOV S2.z : KC0[0].z {W}
ALU MOV S2.w : KC0[0].w {WL}
ALU ADD S3.x@free : L[0xbf000000] KC0[0].x {WL}
ALU MOV S2.w : KC0[0].w {W}
ALU ADD S3.x@free : L[0xbf000000] KC0[0].x {W}
ALU MOV S4.x@group : S3.x@free {W}
ALU MOV S4.y@group : KC0[0].y {W}
ALU MOV S4.z@group : KC0[0].z {W}
ALU MOV S4.w@group : KC0[0].w {WL}
ALU MOV S4.w@group : KC0[0].w {W}
EXPORT_DONE PIXEL 0 S4.xyzw
)";
@ -173,11 +206,11 @@ PROP WRITE_ALL_COLORS:1
PROP COLOR_EXPORT_MASK:15
OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
SHADER
ALU ADD S3.x@free : L[0xbf000000] KC0[0].x {WL}
ALU ADD S3.x@free : L[0xbf000000] KC0[0].x {W}
ALU MOV S4.x@group : S3.x@free {W}
ALU MOV S4.y@group : KC0[0].y {W}
ALU MOV S4.z@group : KC0[0].z {W}
ALU MOV S4.w@group : KC0[0].w {WL}
ALU MOV S4.w@group : KC0[0].w {W}
EXPORT_DONE PIXEL 0 S4.xyzw
)";
@ -195,7 +228,7 @@ SHADER
ALU ADD S4.x@group : L[0xbf000000] KC0[0].x {W}
ALU MOV S4.y@group : KC0[0].y {W}
ALU MOV S4.z@group : KC0[0].z {W}
ALU MOV S4.w@group : KC0[0].w {WL}
ALU MOV S4.w@group : KC0[0].w {W}
EXPORT_DONE PIXEL 0 S4.xyzw
)";
@ -1056,63 +1089,63 @@ OUTPUT LOC:1 VARYING_SLOT:1 MASK:15
SYSVALUES R1.xyzw
REGISTERS R2.x R3.x R4.x R5.x R6.x R7.x R8.x
SHADER
ALU MOV S9.x@free : I[0] {WL}
ALU MOV S10.x@free : I[-1] {WL}
ALU MOV S11.x@free : I[0] {WL}
ALU MOV S12.x@free : I[1] {WL}
ALU MOV S9.x@free : I[0] {W}
ALU MOV S10.x@free : I[-1] {W}
ALU MOV S11.x@free : I[0] {W}
ALU MOV S12.x@free : I[1] {W}
ALU MOV S13.x : I[1.0] {W}
ALU MOV S13.y : I[1.0] {W}
ALU MOV S13.z : I[0] {W}
ALU MOV S13.w : I[1.0] {WL}
ALU MOV S14.x@free : L[0x2] {WL}
ALU MOV S15.x@free : KC0[0].x {WL}
ALU SETE_INT S16.x@free : S15.x@free S12.x@free {WL}
ALU MOV S13.w : I[1.0] {W}
ALU MOV S14.x@free : L[0x2] {W}
ALU MOV S15.x@free : KC0[0].x {W}
ALU SETE_INT S16.x@free : S15.x@free S12.x@free {W}
IF (( ALU PRED_SETNE_INT __.x@free : S16.x@free I[0] {LEP} PUSH_BEFORE ))
ALU MOV S18.x@free : KC0[2].x {WL}
ALU SETNE_INT S19.x@free : S18.x@free S12.x {WL}
ALU MOV S18.x@free : KC0[2].x {W}
ALU SETNE_INT S19.x@free : S18.x@free S12.x {W}
IF (( ALU PRED_SETNE_INT __.y@free : S19.x@free I[0] {LEP} PUSH_BEFORE ))
ALU MOV R3.x : S12.x@free {WL}
ALU MOV R2.x : S9.x@free {WL}
ALU MOV R3.x : S12.x@free {W}
ALU MOV R2.x : S9.x@free {W}
LOOP_BEGIN
ALU INT_TO_FLT R4.x : R2.x {WL}
ALU MOV S21.x@free : KC0[1].x {WL}
ALU SETNE_INT S22.x@free : S21.x@free S14.x@free {WL}
ALU INT_TO_FLT R4.x : R2.x {W}
ALU MOV S21.x@free : KC0[1].x {W}
ALU SETNE_INT S22.x@free : S21.x@free S14.x@free {W}
IF (( ALU PRED_SETNE_INT __.z@free : S22.x@free I[0] {LEP} PUSH_BEFORE ))
BREAK
ENDIF
ALU ADD_INT R5.x@free : R3.x S12.x@free {WL}
ALU MOV R2.x : R3.x {WL}
ALU MOV R3.x : R5.x {WL}
ALU ADD_INT R5.x@free : R3.x S12.x@free {W}
ALU MOV R2.x : R3.x {W}
ALU MOV R3.x : R5.x {W}
LOOP_END
ALU MOV S24.x@free : I[1.0] {WL}
ALU MOV R8.x : S24.x@free {WL}
ALU MOV R7.x : R8.x {WL}
ALU MOV R6.x : S10.x@free {WL}
ALU MOV S24.x@free : I[1.0] {W}
ALU MOV R8.x : S24.x@free {W}
ALU MOV R7.x : R8.x {W}
ALU MOV R6.x : S10.x@free {W}
ELSE
ALU MOV S25.x@free : I[1.0] {WL}
ALU MOV R8.x : S25.x@free {WL}
ALU MOV R7.x : S9.x {WL}
ALU MOV R4.x : R8.x {WL}
ALU MOV R6.x : S11.x@free {WL}
ALU MOV S25.x@free : I[1.0] {W}
ALU MOV R8.x : S25.x@free {W}
ALU MOV R7.x : S9.x {W}
ALU MOV R4.x : R8.x {W}
ALU MOV R6.x : S11.x@free {W}
ENDIF
ELSE
ALU MOV S26.x@free : I[1.0] {WL}
ALU MOV R8.x : S26.x@free {WL}
ALU MOV R7.x : S9.x {WL}
ALU MOV R4.x : R8.x {WL}
ALU MOV R6.x : S10.x@free {WL}
ALU MOV S26.x@free : I[1.0] {W}
ALU MOV R8.x : S26.x@free {W}
ALU MOV R7.x : S9.x {W}
ALU MOV R4.x : R8.x {W}
ALU MOV R6.x : S10.x@free {W}
ENDIF
ALU CNDE_INT S27.x@free : R6.x S13.x R4.x {WL}
ALU CNDE_INT S28.x@free : R6.x S13.y R7.x {WL}
ALU CNDE_INT S29.x@free : R6.x S13.w R8.x {WL}
ALU CNDE_INT S27.x@free : R6.x S13.x R4.x {W}
ALU CNDE_INT S28.x@free : R6.x S13.y R7.x {W}
ALU CNDE_INT S29.x@free : R6.x S13.w R8.x {W}
EXPORT_DONE POS 0 R1.xyzw
ALU MOV CLAMP S31.x@free : S27.x@free {WL}
ALU MOV CLAMP S32.x@free : S28.x@free {WL}
ALU MOV CLAMP S33.x@free : S29.x@free {WL}
ALU MOV CLAMP S31.x@free : S27.x@free {W}
ALU MOV CLAMP S32.x@free : S28.x@free {W}
ALU MOV CLAMP S33.x@free : S29.x@free {W}
ALU MOV S34.x@group : S31.x@free {W}
ALU MOV S34.y@group : S32.x@free {W}
ALU MOV S34.z@group : S9.x@free {W}
ALU MOV S34.w@group : S33.x@free {WL}
ALU MOV S34.w@group : S33.x@free {W}
EXPORT_DONE PARAM 0 S34.xyzw
)";
@ -1128,39 +1161,39 @@ REGISTERS R2.x@free R3.x@free R4.x@free R5.x@free R6.x@free R7.x@free R8.x@free
SHADER
IF (( ALU PREDE_INT __.x@free : KC0[0].x I[1] {LEP} PUSH_BEFORE ))
IF (( ALU PRED_SETNE_INT __.y@free : KC0[2].x I[1] {LEP} PUSH_BEFORE ))
ALU MOV R3.x : I[1] {WL}
ALU MOV R2.x : I[0] {WL}
ALU MOV R3.x : I[1] {W}
ALU MOV R2.x : I[0] {W}
LOOP_BEGIN
ALU INT_TO_FLT R4.x : R2.x {WL}
ALU INT_TO_FLT R4.x : R2.x {W}
IF (( ALU PRED_SETNE_INT __.z@free : KC0[1].x L[0x2] {LEP} PUSH_BEFORE ))
BREAK
ENDIF
ALU ADD_INT R5.x : R3.x I[1] {WL}
ALU MOV R2.x : R3.x {WL}
ALU MOV R3.x : R5.x {WL}
ALU ADD_INT R5.x : R3.x I[1] {W}
ALU MOV R2.x : R3.x {W}
ALU MOV R3.x : R5.x {W}
LOOP_END
ALU MOV R8.x : I[1.0] {WL}
ALU MOV R7.x : I[1.0] {WL}
ALU MOV R6.x : I[-1] {WL}
ALU MOV R8.x : I[1.0] {W}
ALU MOV R7.x : I[1.0] {W}
ALU MOV R6.x : I[-1] {W}
ELSE
ALU MOV R8.x : I[1.0] {WL}
ALU MOV R7.x : I[0] {WL}
ALU MOV R4.x : I[1.0] {WL}
ALU MOV R6.x : I[0] {WL}
ALU MOV R8.x : I[1.0] {W}
ALU MOV R7.x : I[0] {W}
ALU MOV R4.x : I[1.0] {W}
ALU MOV R6.x : I[0] {W}
ENDIF
ELSE
ALU MOV R8.x : I[1.0] {WL}
ALU MOV R7.x : I[0] {WL}
ALU MOV R4.x : I[1.0] {WL}
ALU MOV R6.x : I[-1] {WL}
ALU MOV R8.x : I[1.0] {W}
ALU MOV R7.x : I[0] {W}
ALU MOV R4.x : I[1.0] {W}
ALU MOV R6.x : I[-1] {W}
ENDIF
ALU CNDE_INT S27.x@free : R6.x I[1.0] R4.x {WL}
ALU CNDE_INT S28.x@free : R6.x I[1.0] R7.x {WL}
ALU CNDE_INT S29.x@free : R6.x I[1.0] R8.x {WL}
ALU CNDE_INT S27.x@free : R6.x I[1.0] R4.x {W}
ALU CNDE_INT S28.x@free : R6.x I[1.0] R7.x {W}
ALU CNDE_INT S29.x@free : R6.x I[1.0] R8.x {W}
EXPORT_DONE POS 0 R1.xyzw
ALU MOV CLAMP S34.x@group : S27.x@free {W}
ALU MOV CLAMP S34.y@group : S28.x@free {W}
ALU MOV CLAMP S34.w@group : S29.x@free {WL}
ALU MOV CLAMP S34.w@group : S29.x@free {W}
EXPORT_DONE PARAM 0 S34.xy0w
)";

View file

@ -12,6 +12,7 @@ extern const char *red_triangle_fs_expect_from_nir_dce;
extern const char *add_add_1_nir;
extern const char *add_add_1_expect_from_nir;
extern const char *add_add_1_expect_from_nir_scheduled;
extern const char *add_add_1_expect_from_nir_copy_prop_fwd;
extern const char *add_add_1_expect_from_nir_copy_prop_fwd_dce;
extern const char *add_add_1_expect_from_nir_copy_prop_fwd_dce_bwd;