diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp index 4dcd88efe7f..765fa0ed24f 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp @@ -619,8 +619,6 @@ AluInstr::replace_dest(PRegister new_dest, AluInstr *move_instr) } m_dest = new_dest; - if (!move_instr->has_alu_flag(alu_last_instr)) - reset_alu_flag(alu_last_instr); if (has_alu_flag(alu_is_cayman_trans)) { /* Copy propagation puts an instruction into the w channel, but we @@ -1842,17 +1840,15 @@ emit_alu_op1_64bit(const nir_alu_instr& alu, ir = new AluInstr(opcode, value_factory.dest(alu.def, 2 * i, pin_chan), value_factory.src64(alu.src[0], i, swz[0]), - {alu_write}); + AluInstr::write); group->add_instruction(ir); ir = new AluInstr(opcode, value_factory.dest(alu.def, 2 * i + 1, pin_chan), value_factory.src64(alu.src[0], i, swz[1]), - {alu_write}); + AluInstr::write); group->add_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); shader.emit_instruction(group); return true; } @@ -1869,12 +1865,10 @@ emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader) ir = new AluInstr(op1_mov, value_factory.dest(alu.def, 2 * i + c, pin_free), value_factory.src64(alu.src[0], i, c), - {alu_write}); + AluInstr::write); shader.emit_instruction(ir); } } - if (ir) - ir->set_alu_flag(alu_last_instr); return true; } @@ -1890,13 +1884,11 @@ emit_alu_neg(const nir_alu_instr& alu, Shader& shader) ir = new AluInstr(op1_mov, value_factory.dest(alu.def, 2 * i + c, pin_chan), value_factory.src64(alu.src[0], i, c), - {alu_write}); + AluInstr::write); shader.emit_instruction(ir); } ir->set_source_mod(0, AluInstr::mod_neg); } - if (ir) - ir->set_alu_flag(alu_last_instr); return true; } @@ -1916,7 +1908,7 @@ emit_alu_abs64(const nir_alu_instr& alu, Shader& shader) auto ir = new AluInstr(op1_mov, value_factory.dest(alu.def, 1, pin_chan), value_factory.src64(alu.src[0], 0, 1), - AluInstr::last_write); + AluInstr::write); ir->set_source_mod(0, AluInstr::mod_abs); shader.emit_instruction(ir); return true; @@ -1967,7 +1959,7 @@ emit_alu_fsat64(const nir_alu_instr& alu, Shader& shader) shader.emit_instruction(new AluInstr(op1_mov, value_factory.dest(alu.def, 1, pin_chan), value_factory.src64(alu.src[0], 0, 1), - AluInstr::last_write)); + AluInstr::write)); } else { /* dest clamp doesn't work on plain 64 bit move, so add a zero @@ -1986,7 +1978,7 @@ emit_alu_fsat64(const nir_alu_instr& alu, Shader& shader) value_factory.dest(alu.def, 1, pin_chan), value_factory.src64(alu.src[0], 0, 0), value_factory.literal(0), - AluInstr::last_write)); + AluInstr::write)); shader.emit_instruction(group); } @@ -2045,8 +2037,6 @@ emit_alu_op2_64bit(const nir_alu_instr& alu, i == 1 ? AluInstr::write : AluInstr::empty); group->add_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); shader.emit_instruction(group); return true; @@ -2080,8 +2070,6 @@ emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu, shader.emit_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); return true; } @@ -2104,8 +2092,6 @@ emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader ir->set_source_mod(0, AluInstr::mod_abs); group->add_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); shader.emit_instruction(group); return true; } @@ -2130,8 +2116,6 @@ emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) i < 2 ? AluInstr::write : AluInstr::empty); group->add_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); shader.emit_instruction(group); return true; } @@ -2157,7 +2141,6 @@ emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader) AluInstr::write); group->add_instruction(ir); } - ir->set_alu_flag(alu_last_instr); shader.emit_instruction(group); return true; } @@ -2172,12 +2155,12 @@ emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader) ir = new AluInstr(op1v_flt64_to_flt32, value_factory.dest(alu.def, 0, pin_chan), value_factory.src64(alu.src[0], 0, 1), - {alu_write}); + AluInstr::write); group->add_instruction(ir); ir = new AluInstr(op1v_flt64_to_flt32, value_factory.dummy_dest(1), value_factory.src64(alu.src[0], 0, 0), - AluInstr::last); + AluInstr::empty); group->add_instruction(ir); shader.emit_instruction(group); return true; @@ -2196,7 +2179,7 @@ emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader) value_factory.dest(alu.def, 0, pin_free), src, value_factory.inline_const(mask, 0), - {alu_write}); + AluInstr::write); shader.emit_instruction(ir); return true; } @@ -2215,7 +2198,7 @@ emit_alu_op1(const nir_alu_instr& alu, ir = new AluInstr(opcode, value_factory.dest(alu.def, 0, pin_free), value_factory.src(alu.src[0], 0), - {alu_write}); + AluInstr::write); switch (mod) { case mod_src0_abs: ir->set_source_mod(0, AluInstr::mod_abs); @@ -2252,7 +2235,7 @@ emit_alu_op2(const nir_alu_instr& alu, value_factory.dest(alu.def, 0, pin_free), value_factory.src(*src0, 0), value_factory.src(*src1, 0), - {alu_write}); + AluInstr::write); if (src1_negate) ir->set_source_mod(1, AluInstr::mod_neg); shader.emit_instruction(ir); @@ -2287,7 +2270,7 @@ emit_alu_op3(const nir_alu_instr& alu, value_factory.src(*src[0], 0), value_factory.src(*src[1], 0), value_factory.src(*src[2], 0), - {alu_write}); + AluInstr::write); ir->set_alu_flag(alu_write); shader.emit_instruction(ir); return true; @@ -2309,7 +2292,7 @@ emit_dot(const nir_alu_instr& alu, int n, Shader& shader) srcs[2 * i + 1] = value_factory.src(src1, i); } - AluInstr *ir = new AluInstr(op2_dot_ieee, dest, srcs, AluInstr::last_write, n); + AluInstr *ir = new AluInstr(op2_dot_ieee, dest, srcs, AluInstr::write, n); shader.emit_instruction(ir); shader.set_flag(Shader::sh_disble_sb); @@ -2338,7 +2321,7 @@ emit_dot4(const nir_alu_instr& alu, int nelm, Shader& shader) srcs[2 * i + 1] = value_factory.zero(); } - AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4); + AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::write, 4); shader.emit_instruction(ir); return true; @@ -2363,7 +2346,7 @@ emit_fdph(const nir_alu_instr& alu, Shader& shader) srcs[6] = value_factory.one(); srcs[7] = value_factory.src(src1, 3); - AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4); + AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::write, 4); shader.emit_instruction(ir); return true; } @@ -2372,16 +2355,13 @@ static bool emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader) { auto& value_factory = shader.value_factory(); - AluInstr *ir = nullptr; for (unsigned i = 0; i < nc; ++i) { auto src = value_factory.src(instr.src[i].src, instr.src[i].swizzle[0]); auto dst = value_factory.dest(instr.def, i, pin_none); - shader.emit_instruction(new AluInstr(op1_mov, dst, src, {alu_write})); + shader.emit_instruction(new AluInstr(op1_mov, dst, src, AluInstr::write)); } - if (ir) - ir->set_alu_flag(alu_last_instr); return true; } @@ -2397,7 +2377,6 @@ emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader) AluInstr::write); shader.emit_instruction(ir); } - ir->set_alu_flag(alu_last_instr); return true; } @@ -2413,7 +2392,6 @@ emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader) AluInstr::write); shader.emit_instruction(ir); } - ir->set_alu_flag(alu_last_instr); return true; } @@ -2429,7 +2407,6 @@ emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader) AluInstr::write); shader.emit_instruction(ir); } - ir->set_alu_flag(alu_last_instr); return true; } @@ -2452,7 +2429,6 @@ emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader) AluInstr::write); shader.emit_instruction(ir); } - ir->set_alu_flag(alu_last_instr); return true; } @@ -2465,20 +2441,24 @@ emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader) auto y = value_factory.temp_register(); auto yy = value_factory.temp_register(); - shader.emit_instruction(new AluInstr( - op1_flt32_to_flt16, x, value_factory.src(alu.src[0], 0), AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op1_flt32_to_flt16, + x, + value_factory.src(alu.src[0], 0), + AluInstr::write)); - shader.emit_instruction(new AluInstr( - op1_flt32_to_flt16, y, value_factory.src(alu.src[1], 0), AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op1_flt32_to_flt16, + y, + value_factory.src(alu.src[1], 0), + AluInstr::write)); shader.emit_instruction( - new AluInstr(op2_lshl_int, yy, y, value_factory.literal(16), AluInstr::last_write)); + new AluInstr(op2_lshl_int, yy, y, value_factory.literal(16), AluInstr::write)); shader.emit_instruction(new AluInstr(op2_or_int, value_factory.dest(alu.def, 0, pin_free), x, yy, - AluInstr::last_write)); + AluInstr::write)); return true; } @@ -2489,7 +2469,7 @@ emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader) shader.emit_instruction(new AluInstr(op1_mov, value_factory.dest(alu.def, 0, pin_free), value_factory.src64(alu.src[0], 0, comp), - AluInstr::last_write)); + AluInstr::write)); return true; } @@ -2500,7 +2480,7 @@ emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader) shader.emit_instruction(new AluInstr(op1_flt16_to_flt32, value_factory.dest(alu.def, 0, pin_free), value_factory.src(alu.src[0], 0), - AluInstr::last_write)); + AluInstr::write)); return true; } static bool @@ -2512,12 +2492,12 @@ emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader) tmp, value_factory.src(alu.src[0], 0), value_factory.literal(16), - AluInstr::last_write)); + AluInstr::write)); shader.emit_instruction(new AluInstr(op1_flt16_to_flt32, value_factory.dest(alu.def, 0, pin_free), tmp, - AluInstr::last_write)); + AluInstr::write)); return true; } @@ -2533,7 +2513,7 @@ emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) ir = new AluInstr(opcode, value_factory.dest(alu.def, 0, pin_free), value_factory.src(src0, 0), - AluInstr::last_write); + AluInstr::write); ir->set_alu_flag(alu_is_trans); shader.emit_instruction(ir); return true; @@ -2548,10 +2528,7 @@ emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader assert(alu.def.num_components == 1); auto temp = value_factory.temp_register(); - ir = new AluInstr(op1_trunc, - temp, - value_factory.src(alu.src[0], 0), - AluInstr::last_write); + ir = new AluInstr(op1_trunc, temp, value_factory.src(alu.src[0], 0), AluInstr::write); shader.emit_instruction(ir); ir = new AluInstr(opcode, @@ -2560,7 +2537,6 @@ emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader AluInstr::write); if (opcode == op1_flt_to_uint) { ir->set_alu_flag(alu_is_trans); - ir->set_alu_flag(alu_last_instr); } shader.emit_instruction(ir); return true; @@ -2574,7 +2550,7 @@ emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shade assert(alu.def.num_components == 1); - const std::set flags({alu_write, alu_last_instr, alu_is_cayman_trans}); + const std::set flags({alu_write, alu_is_cayman_trans}); unsigned ncomp = 3; @@ -2602,7 +2578,7 @@ emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) value_factory.dest(alu.def, 0, pin_free), value_factory.src(src0, 0), value_factory.src(src1, 0), - AluInstr::last_write); + AluInstr::write); ir->set_alu_flag(alu_is_trans); shader.emit_instruction(ir); @@ -2619,7 +2595,7 @@ emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shade unsigned last_slot = 4; - const std::set flags({alu_write, alu_last_instr, alu_is_cayman_trans}); + const std::set flags({alu_write, alu_is_cayman_trans}); for (unsigned k = 0; k < alu.def.num_components; ++k) { AluInstr::SrcValues srcs(2 * last_slot); @@ -2657,14 +2633,11 @@ emit_alu_cube(const nir_alu_instr& alu, Shader& shader) AluInstr::write); group->add_instruction(ir); } - ir->set_alu_flag(alu_last_instr); shader.emit_instruction(group); return true; } const std::set AluInstr::empty; const std::set AluInstr::write({alu_write}); -const std::set AluInstr::last({alu_last_instr}); -const std::set AluInstr::last_write({alu_write, alu_last_instr}); } // namespace r600 diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.h b/src/gallium/drivers/r600/sfn/sfn_instr_alu.h index 2577785c6b0..62f7903e8ab 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.h +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.h @@ -153,8 +153,6 @@ public: static const std::set empty; static const std::set write; - static const std::set last; - static const std::set last_write; std::tuple indirect_addr() const; void update_indirect_addr(PRegister old_reg, PRegister reg) override; diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp index 7a6cee4fd92..3866286d10c 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp @@ -119,7 +119,7 @@ LDSReadInstr::split(std::vector& out_block, AluInstr *last_lds_instr auto instr = new AluInstr(op1_mov, dest, new InlineConstant(ALU_SRC_LDS_OQ_A_POP), - AluInstr::last_write); + AluInstr::write); instr->add_required_instr(last_lds_instr); instr->set_blockid(block_id(), index()); instr->set_always_keep(); @@ -307,7 +307,7 @@ LDSAtomicInstr::split(std::vector& out_block, AluInstr *last_lds_ins } } - auto op_instr = new AluInstr(m_opcode, srcs, {}); + auto op_instr = new AluInstr(m_opcode, srcs, AluInstr::empty); op_instr->set_blockid(block_id(), index()); if (last_lds_instr) { @@ -322,7 +322,7 @@ LDSAtomicInstr::split(std::vector& out_block, AluInstr *last_lds_ins auto read_instr = new AluInstr(op1_mov, m_dest, new InlineConstant(ALU_SRC_LDS_OQ_A_POP), - AluInstr::last_write); + AluInstr::write); read_instr->add_required_instr(op_instr); read_instr->set_blockid(block_id(), index()); read_instr->set_alu_flag(alu_lds_group_end); diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp index f76923c913b..25191d3f7d5 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp @@ -193,7 +193,7 @@ GDSInstr::emit_atomic_op2(nir_intrinsic_instr *instr, Shader& shader) if (!src_val->as_register()) { auto temp_src_val = vf.temp_register(); shader.emit_instruction( - new AluInstr(op1_mov, temp_src_val, src_val, AluInstr::last_write)); + new AluInstr(op1_mov, temp_src_val, src_val, AluInstr::write)); src_as_register = temp_src_val; } else src_as_register = src_val->as_register(); @@ -219,8 +219,7 @@ GDSInstr::emit_atomic_op2(nir_intrinsic_instr *instr, Shader& shader) else shader.emit_instruction( new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write)); - shader.emit_instruction( - new AluInstr(op1_mov, tmp[1], src_val, AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op1_mov, tmp[1], src_val, AluInstr::write)); ir = new GDSInstr(op, dest, tmp, 0, nullptr); } shader.emit_instruction(ir); @@ -297,7 +296,7 @@ GDSInstr::emit_atomic_inc(nir_intrinsic_instr *instr, Shader& shader) new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write)); shader.emit_instruction( - new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write)); + new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::write)); ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest, tmp, 0, nullptr); } shader.emit_instruction(ir); @@ -340,7 +339,7 @@ GDSInstr::emit_atomic_pre_dec(nir_intrinsic_instr *instr, Shader& shader) new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write)); shader.emit_instruction( - new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write)); + new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::write)); ir = new GDSInstr(opcode, tmp_dest, tmp, 0, nullptr); } @@ -350,7 +349,7 @@ GDSInstr::emit_atomic_pre_dec(nir_intrinsic_instr *instr, Shader& shader) vf.dest(instr->def, 0, pin_free), tmp_dest, vf.one_i(), - AluInstr::last_write)); + AluInstr::write)); return true; } @@ -594,8 +593,8 @@ RatInstr::emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader) auto addr_temp = vf.temp_register(); /** Should be lowered in nir */ - shader.emit_instruction(new AluInstr( - op2_lshr_int, addr_temp, addr, vf.literal(2), {alu_write, alu_last_instr})); + shader.emit_instruction( + new AluInstr(op2_lshr_int, addr_temp, addr, vf.literal(2), AluInstr::write)); const EVTXDataFormat formats[4] = {fmt_32, fmt_32_32, fmt_32_32_32, fmt_32_32_32_32}; @@ -631,8 +630,7 @@ RatInstr::emit_global_store(nir_intrinsic_instr *intr, Shader& shader) auto addr_vec = vf.temp_vec4(pin_chan, {0, 7, 7, 7}); shader.emit_instruction( - new AluInstr(op2_lshr_int, addr_vec[0], addr_orig, vf.literal(2), - AluInstr::last_write)); + new AluInstr(op2_lshr_int, addr_vec[0], addr_orig, vf.literal(2), AluInstr::write)); RegisterVec4::Swizzle value_swz = {0,7,7,7}; auto mask = nir_intrinsic_write_mask(intr); @@ -651,8 +649,6 @@ RatInstr::emit_global_store(nir_intrinsic_instr *intr, Shader& shader) shader.emit_instruction(ir); } } - if (ir) - ir->set_alu_flag(alu_last_instr); auto store = new RatInstr(cf_mem_rat_cacheless, RatInstr::STORE_RAW, @@ -688,14 +684,17 @@ RatInstr::emit_ssbo_store(nir_intrinsic_instr *instr, Shader& shader) auto addr_vec = vf.temp_vec4(pin_group, {0, 1, 2, 7}); if (i == 0) { shader.emit_instruction( - new AluInstr(op1_mov, addr_vec[0], addr_base, AluInstr::last_write)); + new AluInstr(op1_mov, addr_vec[0], addr_base, AluInstr::write)); } else { - shader.emit_instruction(new AluInstr( - op2_add_int, addr_vec[0], addr_base, vf.literal(i), AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op2_add_int, + addr_vec[0], + addr_base, + vf.literal(i), + AluInstr::write)); } auto value = vf.src(instr->src[0], i); PRegister v = vf.temp_register(0); - shader.emit_instruction(new AluInstr(op1_mov, v, value, AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op1_mov, v, value, AluInstr::write)); auto value_vec = RegisterVec4(v, nullptr, nullptr, nullptr, pin_chan); auto store = new RatInstr(cf_mem_rat, RatInstr::STORE_TYPED, @@ -730,7 +729,7 @@ RatInstr::emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader) auto data_vec4 = vf.temp_vec4(pin_chgr, {0, 1, 2, 3}); shader.emit_instruction( - new AluInstr(op2_lshr_int, coord, coord_orig, vf.literal(2), AluInstr::last_write)); + new AluInstr(op2_lshr_int, coord, coord_orig, vf.literal(2), AluInstr::write)); shader.emit_instruction( new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write)); @@ -742,10 +741,10 @@ RatInstr::emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader) new AluInstr(op1_mov, data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3], vf.src(intr->src[2], 0), - {alu_last_instr, alu_write})); + AluInstr::write)); } else { - shader.emit_instruction(new AluInstr( - op1_mov, data_vec4[0], vf.src(intr->src[2], 0), AluInstr::last_write)); + shader.emit_instruction( + new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[2], 0), AluInstr::write)); } RegisterVec4 out_vec(coord, coord, coord, coord, pin_chgr); @@ -827,12 +826,12 @@ RatInstr::emit_image_store(nir_intrinsic_instr *intrin, Shader& shader) swizzle = {0, 2, 1, 3}; for (int i = 0; i < 4; ++i) { - auto flags = i != 3 ? AluInstr::write : AluInstr::last_write; + auto flags = i != 3 ? AluInstr::write : AluInstr::write; shader.emit_instruction( new AluInstr(op1_mov, coord[swizzle[i]], coord_load[i], flags)); } for (int i = 0; i < 4; ++i) { - auto flags = i != 3 ? AluInstr::write : AluInstr::last_write; + auto flags = i != 3 ? AluInstr::write : AluInstr::write; shader.emit_instruction(new AluInstr(op1_mov, value[i], value_load[i], flags)); } @@ -874,7 +873,7 @@ RatInstr::emit_image_load_or_atomic(nir_intrinsic_instr *intrin, Shader& shader) swizzle = {0, 2, 1, 3}; for (int i = 0; i < 4; ++i) { - auto flags = i != 3 ? AluInstr::write : AluInstr::last_write; + auto flags = i != 3 ? AluInstr::write : AluInstr::write; shader.emit_instruction( new AluInstr(op1_mov, coord[swizzle[i]], coord_orig[i], flags)); } @@ -889,12 +888,12 @@ RatInstr::emit_image_load_or_atomic(nir_intrinsic_instr *intrin, Shader& shader) new AluInstr(op1_mov, data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3], vf.src(intrin->src[3], 0), - AluInstr::last_write)); + AluInstr::write)); } else { shader.emit_instruction( new AluInstr(op1_mov, data_vec4[0], vf.src(intrin->src[3], 0), AluInstr::write)); shader.emit_instruction( - new AluInstr(op1_mov, data_vec4[2], vf.zero(), AluInstr::last_write)); + new AluInstr(op1_mov, data_vec4[2], vf.zero(), AluInstr::write)); } auto atomic = @@ -988,7 +987,7 @@ RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader) vf.uniform(lookup_resid / 4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4, R600_BUFFER_INFO_CONST_BUFFER), - AluInstr::last_write)); + AluInstr::write)); } else { /* If the addressing is indirect we have to get the z-value by * using a binary search */ @@ -1014,7 +1013,7 @@ RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader) high_bit, vf.src(intrin->src[0], 0), vf.literal(2), - AluInstr::last_write)); + AluInstr::write)); shader.emit_instruction(new LoadFromBuffer(trgt, {0, 1, 2, 3}, @@ -1027,10 +1026,18 @@ RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader) // this may be wrong shader.emit_instruction(new AluInstr( op3_cnde_int, comp1, high_bit, trgt[0], trgt[2], AluInstr::write)); - shader.emit_instruction(new AluInstr( - op3_cnde_int, comp2, high_bit, trgt[1], trgt[3], AluInstr::last_write)); - shader.emit_instruction(new AluInstr( - op3_cnde_int, dest[2], low_bit, comp1, comp2, AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op3_cnde_int, + comp2, + high_bit, + trgt[1], + trgt[3], + AluInstr::write)); + shader.emit_instruction(new AluInstr(op3_cnde_int, + dest[2], + low_bit, + comp1, + comp2, + AluInstr::write)); } } else { auto dest = vf.dest_vec4(intrin->def, pin_group); @@ -1071,7 +1078,7 @@ RatInstr::emit_image_samples(nir_intrinsic_instr *intrin, Shader& shader) res_id, dyn_offset)); - shader.emit_instruction(new AluInstr(op1_mov, dest, tmp[0], AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op1_mov, dest, tmp[0], AluInstr::write)); return true; } diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp index 74c662365c1..cf946140140 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp @@ -615,13 +615,12 @@ TexInstr::emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader) shader.emit_instruction(ir); } - ir->set_alu_flag(alu_last_instr); shader.emit_instruction( new AluInstr(op2_or_int, dst[3], tmp_w, vf.uniform(buf_sel + 1, 0, R600_BUFFER_INFO_CONST_BUFFER), - AluInstr::last_write)); + AluInstr::write)); } return true; @@ -661,15 +660,13 @@ TexInstr::emit_tex_txs(nir_tex_instr *tex, } else { int id = 2 * tex->texture_index + (512 + R600_BUFFER_INFO_OFFSET / 16) + 1; auto src = vf.uniform(id, 1, R600_BUFFER_INFO_CONST_BUFFER); - shader.emit_instruction( - new AluInstr(op1_mov, dest[0], src, AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op1_mov, dest[0], src, AluInstr::write)); shader.set_flag(Shader::sh_uses_tex_buffer); } } else { auto src_lod = vf.temp_register(); - shader.emit_instruction( - new AluInstr(op1_mov, src_lod, src.lod, AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op1_mov, src_lod, src.lod, AluInstr::write)); RegisterVec4 src_coord(src_lod, src_lod, src_lod, src_lod, pin_free); @@ -691,7 +688,7 @@ TexInstr::emit_tex_txs(nir_tex_instr *tex, tex->texture_index & 3, R600_BUFFER_INFO_CONST_BUFFER); - auto alu = new AluInstr(op1_mov, dest[2], src_loc, AluInstr::last_write); + auto alu = new AluInstr(op1_mov, dest[2], src_loc, AluInstr::write); shader.emit_instruction(alu); shader.set_flag(Shader::sh_txs_cube_array_comp); } @@ -746,9 +743,6 @@ TexInstr::prepare_source(nir_tex_instr *tex, const Inputs& inputs, Shader& shade shader.emit_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); - return src_coord; } @@ -887,8 +881,6 @@ TexInstr::emit_tex_lod(nir_tex_instr *tex, Inputs& src, Shader& shader) ir = new AluInstr(op1_mov, src_coord[i], src.coord[i], AluInstr::write); shader.emit_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); auto irt = new TexInstr(TexInstr::get_tex_lod, dst, diff --git a/src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp b/src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp index 25385e1a262..7d62305e0d5 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp @@ -124,11 +124,11 @@ InstrFactory::load_const(nir_load_const_instr *literal, Shader& shader) for (int i = 0; i < literal->def.num_components; ++i) { auto dest0 = m_value_factory.dest(literal->def, 2 * i, pin_none); auto src0 = m_value_factory.literal(literal->value[i].u64 & 0xffffffff); - shader.emit_instruction(new AluInstr(op1_mov, dest0, src0, {alu_write})); + shader.emit_instruction(new AluInstr(op1_mov, dest0, src0, AluInstr::write)); auto dest1 = m_value_factory.dest(literal->def, 2 * i + 1, pin_none); auto src1 = m_value_factory.literal((literal->value[i].u64 >> 32) & 0xffffffff); - shader.emit_instruction(new AluInstr(op1_mov, dest1, src1, AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op1_mov, dest1, src1, AluInstr::write)); } } else { Pin pin = literal->def.num_components == 1 ? pin_free : pin_none; @@ -156,11 +156,9 @@ InstrFactory::load_const(nir_load_const_instr *literal, Shader& shader) src = m_value_factory.literal(v); } - ir = new AluInstr(op1_mov, dest, src, {alu_write}); + ir = new AluInstr(op1_mov, dest, src, AluInstr::write); shader.emit_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); } return true; } @@ -196,7 +194,7 @@ InstrFactory::process_undef(nir_undef_instr *undef, Shader& shader) for (int i = 0; i < undef->def.num_components; ++i) { auto dest = shader.value_factory().undef(undef->def.index, i); shader.emit_instruction( - new AluInstr(op1_mov, dest, value_factory().zero(), AluInstr::last_write)); + new AluInstr(op1_mov, dest, value_factory().zero(), AluInstr::write)); } return true; } diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.cpp b/src/gallium/drivers/r600/sfn/sfn_shader.cpp index ced12b3319b..c87b2138f3f 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader.cpp @@ -395,10 +395,8 @@ Shader::allocate_reserved_registers() m_instr_factory->value_factory().set_virtual_register_base(reserved_registers_end); if (!m_atomics.empty()) { m_atomic_update = value_factory().temp_register(); - auto alu = new AluInstr(op1_mov, - m_atomic_update, - value_factory().one_i(), - AluInstr::last_write); + auto alu = + new AluInstr(op1_mov, m_atomic_update, value_factory().one_i(), AluInstr::write); alu->set_alu_flag(alu_no_schedule_bias); emit_instruction(alu); } @@ -410,23 +408,27 @@ Shader::allocate_reserved_registers() auto temp2 = value_factory().temp_register(2); auto group = new AluGroup(); - group->add_instruction(new AluInstr( - op1_mbcnt_32lo_accum_prev_int, temp0, value_factory().literal(-1), {alu_write})); - group->add_instruction(new AluInstr( - op1_mbcnt_32hi_int, temp1, value_factory().literal(-1), {alu_write})); + group->add_instruction(new AluInstr(op1_mbcnt_32lo_accum_prev_int, + temp0, + value_factory().literal(-1), + AluInstr::write)); + group->add_instruction(new AluInstr(op1_mbcnt_32hi_int, + temp1, + value_factory().literal(-1), + AluInstr::write)); emit_instruction(group); emit_instruction(new AluInstr(op3_muladd_uint24, temp2, value_factory().inline_const(ALU_SRC_SE_ID, 0), value_factory().literal(256), value_factory().inline_const(ALU_SRC_HW_WAVE_ID, 0), - {alu_write, alu_last_instr})); + AluInstr::write)); emit_instruction(new AluInstr(op3_muladd_uint24, m_rat_return_address, temp2, value_factory().literal(0x40), temp0, - {alu_write, alu_last_instr})); + AluInstr::write)); } } @@ -713,13 +715,13 @@ Shader::process_if(nir_if *if_stmt) EAluOp op = child_block_empty(if_stmt->then_list) ? op2_prede_int : op2_pred_setne_int; + auto flags = {alu_update_exec, alu_last_instr, alu_update_pred}; + AluInstr *pred = new AluInstr(op, value_factory().temp_register(), value, value_factory().zero(), - AluInstr::last); - pred->set_alu_flag(alu_update_exec); - pred->set_alu_flag(alu_update_pred); + flags); pred->set_cf_type(cf_alu_push_before); IfInstr *ir = new IfInstr(pred); @@ -850,8 +852,6 @@ Shader::emit_tex_fdd(const nir_intrinsic_instr* intr, int opcode, bool fine) mv = new AluInstr(op1_mov, tmp[i], src[i], AluInstr::write); emit_instruction(mv); } - if (mv) - mv->set_alu_flag(alu_last_instr); auto dst = value_factory_.dest_vec4(intr->def, pin_group); RegisterVec4::Swizzle dst_swz = {7, 7, 7, 7}; @@ -992,7 +992,7 @@ Shader::emit_load_to_register(PVirtualValue src, int chan) if (!dest || chan >= 0) { dest = value_factory().temp_register(chan); dest->set_pin(pin_free); - emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::last_write)); + emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::write)); } return dest; } @@ -1170,7 +1170,7 @@ Shader::evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id) uav_id = uav_id_val->as_register(); } else { uav_id = vf.temp_register(); - emit_instruction(new AluInstr(op1_mov, uav_id, uav_id_val, AluInstr::last_write)); + emit_instruction(new AluInstr(op1_mov, uav_id, uav_id_val, AluInstr::write)); } } return std::make_pair(offset, uav_id); @@ -1200,8 +1200,6 @@ Shader::emit_store_scratch(nir_intrinsic_instr *intr) if (!ir) return true; - ir->set_alu_flag(alu_last_instr); - auto address = vf.src(intr->src[1], 0); int align = nir_intrinsic_align_mul(intr); @@ -1224,7 +1222,7 @@ Shader::emit_store_scratch(nir_intrinsic_instr *intr) ws_ir = new ScratchIOInstr(value, offset, align, align_offset, writemask); } else { auto addr_temp = vf.temp_register(0); - auto load_addr = new AluInstr(op1_mov, addr_temp, address, AluInstr::last_write); + auto load_addr = new AluInstr(op1_mov, addr_temp, address, AluInstr::write); load_addr->set_alu_flag(alu_no_schedule_bias); emit_instruction(load_addr); @@ -1272,7 +1270,7 @@ Shader::emit_load_scratch(nir_intrinsic_instr *intr) ir = new ScratchIOInstr(dest, offset, align, align_offset, 0xf, true); } else { auto addr_temp = value_factory().temp_register(0); - auto load_addr = new AluInstr(op1_mov, addr_temp, addr, AluInstr::last_write); + auto load_addr = new AluInstr(op1_mov, addr_temp, addr, AluInstr::write); load_addr->set_alu_flag(alu_no_schedule_bias); emit_instruction(load_addr); @@ -1295,7 +1293,7 @@ bool Shader::emit_load_global(nir_intrinsic_instr *intr) auto src = src_value->as_register(); if (!src) { src = value_factory().temp_register(); - emit_instruction(new AluInstr(op1_mov, src, src_value, AluInstr::last_write)); + emit_instruction(new AluInstr(op1_mov, src, src_value, AluInstr::write)); } auto load = new LoadFromBuffer(dest, {0,7,7,7}, src, 0, 1, NULL, fmt_32); load->set_mfc(4); @@ -1490,8 +1488,7 @@ bool Shader::emit_load_tcs_param_base(nir_intrinsic_instr *instr, int offset) { auto src = value_factory().temp_register(); - emit_instruction( - new AluInstr(op1_mov, src, value_factory().zero(), AluInstr::last_write)); + emit_instruction(new AluInstr(op1_mov, src, value_factory().zero(), AluInstr::write)); auto dest = value_factory().dest_vec4(instr->def, pin_group); auto fetch = new LoadFromBuffer(dest, @@ -1512,8 +1509,7 @@ bool Shader::emit_get_lds_info_uint(nir_intrinsic_instr *instr, int offset) { auto src = value_factory().temp_register(); - emit_instruction( - new AluInstr(op1_mov, src, value_factory().zero(), AluInstr::last_write)); + emit_instruction(new AluInstr(op1_mov, src, value_factory().zero(), AluInstr::write)); auto dest = value_factory().dest_vec4(instr->def, pin_group); auto fetch = new LoadFromBuffer(dest, @@ -1540,7 +1536,7 @@ Shader::emit_shader_clock(nir_intrinsic_instr *instr) group->add_instruction(new AluInstr(op1_mov, vf.dest(instr->def, 1, pin_chan), vf.inline_const(ALU_SRC_TIME_HI, 0), - AluInstr::last_write)); + AluInstr::write)); emit_instruction(group); return true; } @@ -1551,7 +1547,6 @@ Shader::emit_group_barrier(nir_intrinsic_instr *intr) assert(m_control_flow_depth == 0); (void)intr; auto op = new AluInstr(op0_group_barrier, 0); - op->set_alu_flag(alu_last_instr); emit_instruction(op); return true; } @@ -1632,11 +1627,9 @@ Shader::load_ubo(nir_intrinsic_instr *instr) ir = new AluInstr(op1_mov, value_factory().dest(instr->def, i, pin), uniform, - {alu_write}); + AluInstr::write); emit_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); return true; } else { int buf_cmp = nir_intrinsic_component(instr); @@ -1651,8 +1644,6 @@ Shader::load_ubo(nir_intrinsic_instr *instr) ir = new AluInstr(op1_mov, dest, u, AluInstr::write); emit_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); m_indirect_files |= 1 << TGSI_FILE_CONSTANT; return true; } @@ -1670,7 +1661,7 @@ bool Shader::emit_simple_mov(nir_def& def, int chan, PVirtualValue src, Pin pin) { auto dst = value_factory().dest(def, chan, pin); - emit_instruction(new AluInstr(op1_mov, dst, src, AluInstr::last_write)); + emit_instruction(new AluInstr(op1_mov, dst, src, AluInstr::write)); return true; } diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp index ac10b3b2324..ea61f6b95c7 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp @@ -81,7 +81,7 @@ ComputeShader::emit_load_from_info_buffer(nir_intrinsic_instr *instr, int offset emit_instruction(new AluInstr(op1_mov, m_zero_register, value_factory().inline_const(ALU_SRC_0, 0), - AluInstr::last_write)); + AluInstr::write)); } auto dest = value_factory().dest_vec4(instr->def, pin_group); @@ -109,8 +109,8 @@ ComputeShader::emit_load_3vec(nir_intrinsic_instr *instr, for (int i = 0; i < 3; ++i) { auto dest = vf.dest(instr->def, i, pin_none); - emit_instruction(new AluInstr( - op1_mov, dest, src[i], i == 2 ? AluInstr::last_write : AluInstr::write)); + emit_instruction( + new AluInstr(op1_mov, dest, src[i], i == 2 ? AluInstr::write : AluInstr::write)); } return true; } diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp index cf02c9efe31..094fbb5386f 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp @@ -72,7 +72,7 @@ FragmentShader::load_input(nir_intrinsic_instr *intr) vf.dest(intr->def, 0, pin_none), m_face_input, vf.inline_const(ALU_SRC_0, 0), - AluInstr::last_write); + AluInstr::write); emit_instruction(ir); return true; } @@ -143,7 +143,7 @@ FragmentShader::process_stage_intrinsic(nir_intrinsic_instr *intr) nullptr, value_factory().src(intr->src[0], 0), value_factory().zero(), - {AluInstr::last})); + AluInstr::empty)); return true; case nir_intrinsic_terminate: @@ -152,7 +152,7 @@ FragmentShader::process_stage_intrinsic(nir_intrinsic_instr *intr) nullptr, value_factory().zero(), value_factory().zero(), - {AluInstr::last})); + AluInstr::empty)); return true; case nir_intrinsic_load_sample_mask_in: if (m_apply_sample_mask) { @@ -286,9 +286,9 @@ FragmentShader::emit_load_sample_mask_in(nir_intrinsic_instr *instr) assert(m_sample_mask_reg); emit_instruction( - new AluInstr(op2_lshl_int, tmp, vf.one_i(), m_sample_id_reg, AluInstr::last_write)); + new AluInstr(op2_lshl_int, tmp, vf.one_i(), m_sample_id_reg, AluInstr::write)); emit_instruction( - new AluInstr(op2_and_int, dest, tmp, m_sample_mask_reg, AluInstr::last_write)); + new AluInstr(op2_and_int, dest, tmp, m_sample_mask_reg, AluInstr::write)); return true; } @@ -298,7 +298,7 @@ FragmentShader::emit_load_helper_invocation(nir_intrinsic_instr *instr) assert(m_helper_invocation); auto& vf = value_factory(); emit_instruction( - new AluInstr(op1_mov, m_helper_invocation, vf.literal(-1), AluInstr::last_write)); + new AluInstr(op1_mov, m_helper_invocation, vf.literal(-1), AluInstr::write)); RegisterVec4 destvec{m_helper_invocation, nullptr, nullptr, nullptr, pin_group}; auto vtx = new LoadFromBuffer(destvec, @@ -312,7 +312,7 @@ FragmentShader::emit_load_helper_invocation(nir_intrinsic_instr *instr) vtx->set_fetch_flag(FetchInstr::use_tc); vtx->set_always_keep(); auto dst = value_factory().dest(instr->def, 0, pin_free); - auto ir = new AluInstr(op1_mov, dst, m_helper_invocation, AluInstr::last_write); + auto ir = new AluInstr(op1_mov, dst, m_helper_invocation, AluInstr::write); ir->add_required_instr(vtx); emit_instruction(vtx); emit_instruction(ir); @@ -647,7 +647,6 @@ bool FragmentShaderR600::load_input_hw(nir_intrinsic_instr *intr) { auto& vf = value_factory(); - AluInstr *ir = nullptr; for (unsigned i = 0; i < intr->def.num_components; ++i) { sfn_log << SfnLog::io << "Inject register " << *m_interpolated_inputs[nir_intrinsic_base(intr)][i] << "\n"; @@ -657,8 +656,6 @@ FragmentShaderR600::load_input_hw(nir_intrinsic_instr *intr) i, m_interpolated_inputs[nir_intrinsic_base(intr)][index]); } - if (ir) - ir->set_alu_flag(alu_last_instr); return true; } @@ -697,10 +694,10 @@ FragmentShaderEG::load_input_hw(nir_intrinsic_instr *intr) new AluInstr(op1_interp_load_p0, tmp, new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i + comp), - AluInstr::last_write); + AluInstr::write); emit_instruction(ir); - emit_instruction(new AluInstr( - op1_mov, vf.dest(intr->def, i, pin_chan), tmp, AluInstr::last_write)); + emit_instruction( + new AluInstr(op1_mov, vf.dest(intr->def, i, pin_chan), tmp, AluInstr::write)); } else { ir = new AluInstr(op1_interp_load_p0, @@ -710,7 +707,6 @@ FragmentShaderEG::load_input_hw(nir_intrinsic_instr *intr) emit_instruction(ir); } } - ir->set_alu_flag(alu_last_instr); return true; } @@ -793,7 +789,6 @@ FragmentShaderEG::load_interpolated_input_hw(nir_intrinsic_instr *intr) emit_instruction(ir); } assert(ir); - ir->set_alu_flag(alu_last_instr); } return true; @@ -895,22 +890,22 @@ FragmentShaderEG::load_barycentric_at_sample(nir_intrinsic_instr *instr) auto tmp1 = vf.temp_register(); emit_instruction( - new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, {alu_write})); - emit_instruction(new AluInstr( - op3_muladd, tmp1, grad[1], slope[2], interpolator.i, {alu_write, alu_last_instr})); + new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, AluInstr::write)); + emit_instruction( + new AluInstr(op3_muladd, tmp1, grad[1], slope[2], interpolator.i, AluInstr::write)); emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->def, 0, pin_none), grad[3], slope[3], tmp1, - {alu_write})); + AluInstr::write)); emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->def, 1, pin_none), grad[2], slope[3], tmp0, - {alu_write, alu_last_instr})); + AluInstr::write)); return true; } @@ -947,17 +942,21 @@ FragmentShaderEG::load_barycentric_at_offset(nir_intrinsic_instr *instr) auto tmp0 = vf.temp_register(); auto tmp1 = vf.temp_register(); emit_instruction( - new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, {alu_write})); - emit_instruction(new AluInstr( - op3_muladd, tmp1, help[1], ofs_x, interpolator.i, {alu_write, alu_last_instr})); - emit_instruction(new AluInstr( - op3_muladd, vf.dest(instr->def, 0, pin_none), help[3], ofs_y, tmp1, {alu_write})); + new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, AluInstr::write)); + emit_instruction( + new AluInstr(op3_muladd, tmp1, help[1], ofs_x, interpolator.i, AluInstr::write)); + emit_instruction(new AluInstr(op3_muladd, + vf.dest(instr->def, 0, pin_none), + help[3], + ofs_y, + tmp1, + AluInstr::write)); emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->def, 1, pin_none), help[2], ofs_y, tmp0, - {alu_write, alu_last_instr})); + AluInstr::write)); return true; } @@ -980,12 +979,11 @@ FragmentShaderEG::load_interpolated_one_comp(RegisterVec4& dest, dest[chan], i & 1 ? params.j : params.i, new InlineConstant(ALU_SRC_PARAM_BASE + params.base, chan), - i == 0 ? AluInstr::write : AluInstr::last); + i == 0 ? AluInstr::write : AluInstr::empty); ir->set_bank_swizzle(alu_vec_210); success = group->add_instruction(ir); } - ir->set_alu_flag(alu_last_instr); if (success) emit_instruction(group); return success; @@ -1012,7 +1010,6 @@ FragmentShaderEG::load_interpolated_two_comp(RegisterVec4& dest, ir->set_bank_swizzle(alu_vec_210); success = group->add_instruction(ir); } - ir->set_alu_flag(alu_last_instr); if (success) emit_instruction(group); return success; @@ -1037,7 +1034,6 @@ FragmentShaderEG::load_interpolated_two_comp_for_one(RegisterVec4& dest, ir->set_bank_swizzle(alu_vec_210); success = group->add_instruction(ir); } - ir->set_alu_flag(alu_last_instr); if (success) emit_instruction(group); diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp index aeb7c6b4fe3..dc04d6f50fd 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp @@ -149,8 +149,7 @@ GeometryShader::do_allocate_reserved_registers() for (int i = 0; i < 4; ++i) { m_export_base[i] = value_factory().temp_register(0, false); - emit_instruction( - new AluInstr(op1_mov, m_export_base[i], zero, AluInstr::last_write)); + emit_instruction(new AluInstr(op1_mov, m_export_base[i], zero, AluInstr::write)); } m_ring_item_sizes[0] = m_next_input_ring_offset; @@ -214,7 +213,7 @@ GeometryShader::emit_vertex(nir_intrinsic_instr *instr, bool cut) m_export_base[stream], m_export_base[stream], value_factory().literal(m_noutputs), - AluInstr::last_write); + AluInstr::write); emit_instruction(ir); } @@ -288,7 +287,6 @@ GeometryShader::store_output(nir_intrinsic_instr *instr) emit_instruction(ir); } } - ir->set_alu_flag(alu_last_instr); m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring, MemRingOutInstr::mem_write_ind, tmp, @@ -396,7 +394,7 @@ GeometryShader::emit_adj_fix() adjhelp0, m_primitive_id, value_factory().one_i(), - AluInstr::last_write)); + AluInstr::write)); int reg_indices[R600_GS_VERTEX_INDIRECT_TOTAL]; int rotate_indices[R600_GS_VERTEX_INDIRECT_TOTAL] = {4, 5, 0, 1, 2, 3}; @@ -418,7 +416,6 @@ GeometryShader::emit_adj_fix() emit_instruction(ir); } - ir->set_alu_flag(alu_last_instr); for (int i = 0; i < R600_GS_VERTEX_INDIRECT_TOTAL; i++) m_per_vertex_offsets[i] = adjhelp[i]; diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp index 91b5f729093..0eb32b79a99 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp @@ -133,8 +133,8 @@ VertexExportForFs::finalize() { if (m_vs_as_gs_a) { auto primid = m_parent->value_factory().temp_vec4(pin_group, {2, 7, 7, 7}); - m_parent->emit_instruction(new AluInstr( - op1_mov, primid[0], m_parent->primitive_id(), AluInstr::last_write)); + m_parent->emit_instruction( + new AluInstr(op1_mov, primid[0], m_parent->primitive_id(), AluInstr::write)); int param = m_last_param_export ? m_last_param_export->location() + 1 : 0; m_last_param_export = new ExportInstr(ExportInstr::param, param, primid); @@ -202,9 +202,8 @@ VertexExportForFs::emit_varying_pos(const store_loc& store_info, auto src = m_parent->value_factory().src(intr.src[0], 0); auto clamped = m_parent->value_factory().temp_register(); m_parent->emit_instruction( - new AluInstr(op1_mov, clamped, src, {alu_write, alu_dst_clamp, alu_last_instr})); - auto alu = - new AluInstr(op1_flt_to_int, out_value[1], clamped, AluInstr::last_write); + new AluInstr(op1_mov, clamped, src, {alu_write, alu_dst_clamp})); + auto alu = new AluInstr(op1_flt_to_int, out_value[1], clamped, AluInstr::write); if (m_parent->chip_class() < ISA_CC_EVERGREEN) alu->set_alu_flag(alu_is_trans); m_parent->emit_instruction(alu); @@ -277,8 +276,6 @@ VertexExportForFs::emit_varying_param(const store_loc& store_info, m_parent->emit_instruction(alu); } } - if (alu) - alu->set_alu_flag(alu_last_instr); m_last_param_export = new ExportInstr(ExportInstr::param, export_slot, value); m_output_registers[nir_intrinsic_base(&intr)] = &m_last_param_export->value(); @@ -352,8 +349,6 @@ VertexExportForFs::emit_stream(int stream) alu = new AluInstr(op1_mov, tmp[i][j], (*so_gpr[i])[j + sc], {alu_write}); m_parent->emit_instruction(alu); } - if (alu) - alu->set_alu_flag(alu_last_instr); start_comp[i] = 0; so_gpr[i] = &tmp[i]; @@ -657,8 +652,6 @@ VertexExportForGS::do_store_output(const store_loc& store_info, AluInstr::write); m_parent->emit_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); m_parent->emit_instruction(new MemRingOutInstr( cf_mem_ring, MemRingOutInstr::mem_write, value, ring_offset >> 2, 4, nullptr)); diff --git a/src/gallium/drivers/r600/sfn/sfn_split_address_loads.cpp b/src/gallium/drivers/r600/sfn/sfn_split_address_loads.cpp index 71588b387e9..ad6860fd6f8 100644 --- a/src/gallium/drivers/r600/sfn/sfn_split_address_loads.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_split_address_loads.cpp @@ -185,7 +185,8 @@ auto AddressSplitVisitor::load_index_register_eg(Instr *instr, const EAluOp idx_op[2] = {op1_set_cf_idx0, op1_set_cf_idx1}; - m_last_idx_load[idx_id] = new AluInstr(idx_op[idx_id], idx, m_vf.addr(), {}); + m_last_idx_load[idx_id] = + new AluInstr(idx_op[idx_id], idx, m_vf.addr(), AluInstr::empty); m_current_block->insert(m_block_iterator, m_last_idx_load[idx_id]); for (auto&& i : m_last_idx_use[idx_id]) m_last_ar_load->add_required_instr(i); @@ -208,7 +209,7 @@ auto AddressSplitVisitor::load_index_register_ca(PRegister index) -> int if (idx_id < 0) { idx_id = pick_idx(); auto idx = m_vf.idx_reg(idx_id); - m_last_idx_load[idx_id] = new AluInstr(op1_mova_int, idx, index, {}); + m_last_idx_load[idx_id] = new AluInstr(op1_mova_int, idx, index, AluInstr::empty); m_current_block->insert(m_block_iterator, m_last_idx_load[idx_id]); for (auto&& i : m_last_idx_use[idx_id]) @@ -249,7 +250,7 @@ void AddressSplitVisitor::load_ar(Instr *instr, PRegister addr) { auto ar = m_vf.addr(); - m_last_ar_load = new AluInstr(op1_mova_int, ar, addr, {}); + m_last_ar_load = new AluInstr(op1_mova_int, ar, addr, AluInstr::empty); m_current_block->insert(m_block_iterator, m_last_ar_load); ar->add_use(instr); m_current_addr = addr; diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_instr_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_instr_test.cpp index d110f50bdb9..c5f7616c5d0 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_instr_test.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_instr_test.cpp @@ -37,7 +37,7 @@ TEST_F(InstrTest, test_alu_uni_op_mov) AluInstr alu(op1_mov, new Register(128, 2, pin_none), new Register(129, 0, pin_chan), - {alu_write}); + AluInstr::write); EXPECT_TRUE(alu.has_alu_flag(alu_write)); @@ -187,14 +187,14 @@ TEST_F(InstrTest, test_alu_op1_comp) auto r129y = new Register(129, 1, pin_none); auto r130x = new Register(130, 0, pin_none); - AluInstr alu1(op1_mov, r128z, r129x, {alu_write}); - EXPECT_NE(alu1, AluInstr(op1_mov, r128y, r129x, {alu_write})); - EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129xc, {alu_write})); - EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129y, {alu_write})); - EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r130x, {alu_write})); + AluInstr alu1(op1_mov, r128z, r129x, AluInstr::write); + EXPECT_NE(alu1, AluInstr(op1_mov, r128y, r129x, AluInstr::write)); + EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129xc, AluInstr::write)); + EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129y, AluInstr::write)); + EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r130x, AluInstr::write)); EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129x, {alu_write, alu_last_instr})); - EXPECT_NE(alu1, AluInstr(op1_flt_to_int, r128z, r129x, {alu_write})); - EXPECT_NE(alu1, AluInstr(op1_mov, r128zc, r129x, {alu_write})); + EXPECT_NE(alu1, AluInstr(op1_flt_to_int, r128z, r129x, AluInstr::write)); + EXPECT_NE(alu1, AluInstr(op1_mov, r128zc, r129x, AluInstr::write)); EXPECT_EQ(alu1, alu1); } @@ -205,14 +205,17 @@ TEST_F(InstrTest, test_alu_op2_comp) auto r128y = new Register(128, 1, pin_none); auto r128z = new Register(128, 2, pin_none); - AluInstr alu1(op2_add, r128z, r128x, r128y, {alu_write}); + AluInstr alu1(op2_add, r128z, r128x, r128y, AluInstr::write); EXPECT_NE( - alu1, AluInstr(op2_add, r128z, r128x, new Register(129, 2, pin_none), {alu_write})); + alu1, + AluInstr(op2_add, r128z, r128x, new Register(129, 2, pin_none), AluInstr::write)); EXPECT_NE( - alu1, AluInstr(op2_add, r128z, r128x, new Register(128, 0, pin_none), {alu_write})); + alu1, + AluInstr(op2_add, r128z, r128x, new Register(128, 0, pin_none), AluInstr::write)); EXPECT_NE( - alu1, AluInstr(op2_add, r128z, r128x, new Register(128, 1, pin_chan), {alu_write})); + alu1, + AluInstr(op2_add, r128z, r128x, new Register(128, 1, pin_chan), AluInstr::write)); } TEST_F(InstrTest, test_alu_op3_comp) @@ -221,20 +224,29 @@ TEST_F(InstrTest, test_alu_op3_comp) auto r128y = new Register(128, 1, pin_none); auto r128z = new Register(128, 2, pin_none); - AluInstr alu1(op3_muladd, r128z, r128x, r128y, r128y, {alu_write}); + AluInstr alu1(op3_muladd, r128z, r128x, r128y, r128y, AluInstr::write); - EXPECT_NE( - alu1, - AluInstr( - op3_muladd, r128z, r128x, r128y, new Register(129, 2, pin_none), {alu_write})); - EXPECT_NE( - alu1, - AluInstr( - op3_muladd, r128z, r128x, r128y, new Register(128, 0, pin_none), {alu_write})); - EXPECT_NE( - alu1, - AluInstr( - op3_muladd, r128z, r128x, r128y, new Register(128, 1, pin_chan), {alu_write})); + EXPECT_NE(alu1, + AluInstr(op3_muladd, + r128z, + r128x, + r128y, + new Register(129, 2, pin_none), + AluInstr::write)); + EXPECT_NE(alu1, + AluInstr(op3_muladd, + r128z, + r128x, + r128y, + new Register(128, 0, pin_none), + AluInstr::write)); + EXPECT_NE(alu1, + AluInstr(op3_muladd, + r128z, + r128x, + r128y, + new Register(128, 1, pin_chan), + AluInstr::write)); } TEST_F(InstrTest, test_alu_op3_ne) @@ -258,7 +270,7 @@ TEST_F(InstrTest, test_alu_op3_ne) AluInstr(op3_cnde, R130x, R130y, R130z, R131w, {alu_write, alu_last_instr})); EXPECT_NE(alu, AluInstr(op3_cnde, R130x, R130y, R131z, R130z, {alu_write, alu_last_instr})); - EXPECT_NE(alu, AluInstr(op3_cnde, R130x, R130y, R131z, R131w, {alu_write})); + EXPECT_NE(alu, AluInstr(op3_cnde, R130x, R130y, R131z, R131w, AluInstr::write)); AluInstr alu_cf_changes = alu; alu_cf_changes.set_cf_type(cf_alu_push_before); @@ -328,15 +340,15 @@ TEST_F(InstrTest, test_alu_dot4_grouped) auto i = group->begin(); EXPECT_NE(i, group->end()); ASSERT_TRUE(*i); - check(**i, AluInstr(op2_dot4_ieee, R132x, R130x, R130y, {alu_write})); + check(**i, AluInstr(op2_dot4_ieee, R132x, R130x, R130y, AluInstr::write)); ++i; EXPECT_NE(i, group->end()); ASSERT_TRUE(*i); - check(**i, AluInstr(op2_dot4_ieee, R132y, R130z, R130w, {})); + check(**i, AluInstr(op2_dot4_ieee, R132y, R130z, R130w, AluInstr::empty)); ++i; EXPECT_NE(i, group->end()); ASSERT_TRUE(*i); - check(**i, AluInstr(op2_dot4_ieee, R132z, R131x, R131y, {})); + check(**i, AluInstr(op2_dot4_ieee, R132z, R131x, R131y, AluInstr::empty)); ++i; EXPECT_NE(i, group->end()); ASSERT_TRUE(*i); diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_instrfromstring_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_instrfromstring_test.cpp index 781390c32d6..f3db522496f 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_instrfromstring_test.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_instrfromstring_test.cpp @@ -51,7 +51,7 @@ TEST_F(TestInstrFromString, test_alu_lds_read_ret) { add_dest_from_string("R1999.x"); - AluInstr expect(DS_OP_READ_RET, {new Register(1999, 0, pin_none)}, {}); + AluInstr expect(DS_OP_READ_RET, {new Register(1999, 0, pin_none)}, AluInstr::empty); check("ALU LDS READ_RET __.x : R1999.x {}", expect); } @@ -96,7 +96,7 @@ TEST_F(TestInstrFromString, test_alu_mov_neg_abs) AluInstr expect(op1_mov, new Register(2000, 1, pin_none), new Register(1999, 0, pin_none), - {alu_write}); + AluInstr::write); expect.set_source_mod(0, AluInstr::mod_abs); expect.set_source_mod(0, AluInstr::mod_neg); @@ -351,7 +351,7 @@ TEST_F(TestInstrFromString, test_alu_interp_xy) new Register(1024, 2, pin_chan), r0y, new InlineConstant(ALU_SRC_PARAM_BASE, 2), - {alu_write}); + AluInstr::write); expect.set_bank_swizzle(alu_vec_210); check(init, expect); @@ -369,7 +369,7 @@ TEST_F(TestInstrFromString, test_alu_interp_xy_no_write) new Register(1024, 0, pin_chan), r0x, new InlineConstant(ALU_SRC_PARAM_BASE, 2), - {}); + AluInstr::empty); expect.set_bank_swizzle(alu_vec_210); check(init, expect); diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp index d3c10b51373..407b3ec0804 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp @@ -79,11 +79,11 @@ TEST_F(LiveRangeTests, SimpleAdd) RegisterVec4::Swizzle dummy; ValueFactory vf; - Register *r0x = vf.dest_from_string("S0.x@free"); - Register *r1x = vf.dest_from_string("S1.x@free"); + Register *r0x = vf.dest_from_string("S0.x@chan"); + Register *r1x = vf.dest_from_string("S1.x@chan"); RegisterVec4 r2 = vf.dest_vec4_from_string("S2.xyzw", dummy, pin_none); - Register *r3x = vf.dest_from_string("S3.x@free"); - RegisterVec4 r4 = vf.dest_vec4_from_string("S4.xyzw", dummy, pin_group); + Register *r3x = vf.dest_from_string("S3.x@chan"); + RegisterVec4 r4 = vf.dest_vec4_from_string("S4.xyzw", dummy, pin_chgr); LiveRangeMap expect = vf.prepare_live_range_map(); @@ -98,7 +98,7 @@ TEST_F(LiveRangeTests, SimpleAdd) for (int i = 0; i < 4; ++i) expect.set_life_range(*r4[i], 5, 6); - check(add_add_1_expect_from_nir, expect); + check(add_add_1_expect_from_nir_scheduled, expect); } TEST_F(LiveRangeTests, SimpleAInterpolation) diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp index 47d6a1b2bc1..bc14638affe 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp @@ -514,30 +514,30 @@ TEST_F(TestShaderFromNir, fs_shed_tex_coord) TEST_F(TestShaderFromNir, OptimizeAddWChanetoTrans) { const char *input = -R"(VS + R"(VS CHIPCLASS CAYMAN INPUT LOC:0 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15 SHADER BLOCK_START - ALU MOV S2.x@free{s} : I[0] {WL} - ALU MOV S3.y@free{s} : L[0x40c00000] {WL} - ALU MOV S4.z@free{s} : L[0xc1140000] {WL} - ALU MOV S5.w@free{s} : L[0xbfe00000] {WL} - ALU MOV S6.x@free{s} : L[0x3fa00000] {WL} + ALU MOV S2.x@free{s} : I[0] {W} + ALU MOV S3.y@free{s} : L[0x40c00000] {W} + ALU MOV S4.z@free{s} : L[0xc1140000] {W} + ALU MOV S5.w@free{s} : L[0xbfe00000] {W} + ALU MOV S6.x@free{s} : L[0x3fa00000] {W} ALU MOV S7.x{s} : KC0[0].x {W} ALU MOV S7.y{s} : KC0[0].y {W} ALU MOV S7.z{s} : KC0[0].z {W} - ALU MOV S7.w{s} : KC0[0].w {WL} - ALU ADD S8.y@free{s} : S3.y@free{s} -S7.x{s} {WL} - ALU ADD S9.z@free{s} : S4.z@free{s} -S7.y{s} {WL} - ALU ADD S10.w@free{s} : S5.w@free{s} -S7.z{s} {WL} - ALU ADD S11.x@free{s} : S6.x@free{s} -S7.w{s} {WL} - ALU EXP_IEEE S12.y@free{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {WL} - ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {WL} - ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {WL} - ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL} + ALU MOV S7.w{s} : KC0[0].w {W} + ALU ADD S8.y@free{s} : S3.y@free{s} -S7.x{s} {W} + ALU ADD S9.z@free{s} : S4.z@free{s} -S7.y{s} {W} + ALU ADD S10.w@free{s} : S5.w@free{s} -S7.z{s} {W} + ALU ADD S11.x@free{s} : S6.x@free{s} -S7.w{s} {W} + ALU EXP_IEEE S12.y@free{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {W} + ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W} + ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W} + ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {W} ALU MOV S17.x{s} : S12.y@free{s} {W} ALU MOV S17.y{s} : S13.z@free{s} {W} ALU MOV S17.z{s} : S14.x@free{s} {W} @@ -545,26 +545,26 @@ BLOCK_START ALU MOV S18.x@group{s} : S17.x{s} {W} ALU MOV S18.y@group{s} : S17.y{s} {W} ALU MOV S18.z@group{s} : S17.z{s} {W} - ALU MOV S18.w@group{s} : S17.w{s} {WL} + ALU MOV S18.w@group{s} : S17.w{s} {W} EXPORT_DONE PARAM 0 S18.xyzw BLOCK_END)"; const char *expect = -R"(VS + R"(VS CHIPCLASS CAYMAN INPUT LOC:0 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15 SHADER BLOCK_START - ALU ADD S8.y@free{s} : L[0x40c00000] -KC0[0].x {WL} - ALU ADD S9.z@free{s} : L[0xc1140000] -KC0[0].y {WL} - ALU ADD S10.w@free{s} : L[0xbfe00000] -KC0[0].z {WL} - ALU ADD S11.x@free{s} : L[0x3fa00000] -KC0[0].w {WL} + ALU ADD S8.y@free{s} : L[0x40c00000] -KC0[0].x {W} + ALU ADD S9.z@free{s} : L[0xc1140000] -KC0[0].y {W} + ALU ADD S10.w@free{s} : L[0xbfe00000] -KC0[0].z {W} + ALU ADD S11.x@free{s} : L[0x3fa00000] -KC0[0].w {W} ALU EXP_IEEE S18.x@group{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {W} ALU EXP_IEEE S18.y@group{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W} ALU EXP_IEEE S18.z@group{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W} - ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL} + ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {W} EXPORT_DONE PARAM 0 S18.xyzw BLOCK_END )"; @@ -576,57 +576,57 @@ BLOCK_END TEST_F(TestShaderFromNir, PeeholeSoureModsSimple) { const char *input = -R"(VS + R"(VS CHIPCLASS CAYMAN INPUT LOC:0 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15 SHADER BLOCK_START - ALU MOV S2.x@free{s} : I[0] {WL} - ALU MOV S3.y@free{s} : L[0x40c00000] {WL} - ALU MOV S4.z@free{s} : L[0xc1140000] {WL} - ALU MOV S5.w@free{s} : L[0xbfe00000] {WL} - ALU MOV S6.x@free{s} : L[0x3fa00000] {WL} + ALU MOV S2.x@free{s} : I[0] {W} + ALU MOV S3.y@free{s} : L[0x40c00000] {W} + ALU MOV S4.z@free{s} : L[0xc1140000] {W} + ALU MOV S5.w@free{s} : L[0xbfe00000] {W} + ALU MOV S6.x@free{s} : L[0x3fa00000] {W} ALU MOV S7.x{s} : |KC0[0].x| {W} ALU MOV S7.y{s} : -KC0[0].y {W} ALU MOV S7.z{s} : -|KC0[0].z| {W} - ALU MOV S7.w{s} : KC0[0].w {WL} - ALU ADD S8.y@free{s} : S3.y@free{s} S7.x{s} {WL} - ALU ADD S9.z@free{s} : S4.z@free{s} S7.y{s} {WL} - ALU ADD S10.w@free{s} : S5.w@free{s} S7.z{s} {WL} - ALU ADD S11.x@free{s} : S6.x@free{s} S7.w{s} {WL} - ALU EXP_IEEE S12.y@free{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {WL} - ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {WL} - ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {WL} - ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL} + ALU MOV S7.w{s} : KC0[0].w {W} + ALU ADD S8.y@free{s} : S3.y@free{s} S7.x{s} {W} + ALU ADD S9.z@free{s} : S4.z@free{s} S7.y{s} {W} + ALU ADD S10.w@free{s} : S5.w@free{s} S7.z{s} {W} + ALU ADD S11.x@free{s} : S6.x@free{s} S7.w{s} {W} + ALU EXP_IEEE S12.y@free{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {W} + ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W} + ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W} + ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {W} ALU MOV S17.x{s} : S12.y@free{s} {W} ALU MOV S17.y{s} : S13.z@free{s} {W} ALU MOV S17.z{s} : S14.x@free{s} {W} - ALU MOV S17.w{s} : S15.y@free{s} {WL} + ALU MOV S17.w{s} : S15.y@free{s} {W} ALU MOV S18.x@group{s} : S17.x{s} {W} ALU MOV S18.y@group{s} : S17.y{s} {W} ALU MOV S18.z@group{s} : S17.z{s} {W} - ALU MOV S18.w@group{s} : S17.w{s} {WL} + ALU MOV S18.w@group{s} : S17.w{s} {W} EXPORT_DONE PARAM 0 S18.xyzw BLOCK_END)"; const char *expect = -R"(VS + R"(VS CHIPCLASS CAYMAN INPUT LOC:0 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15 SHADER BLOCK_START - ALU ADD S8.y@free{s} : L[0x40c00000] |KC0[0].x| {WL} - ALU ADD S9.z@free{s} : L[0xc1140000] -KC0[0].y {WL} - ALU ADD S10.w@free{s} : L[0xbfe00000] -|KC0[0].z| {WL} - ALU ADD S11.x@free{s} : L[0x3fa00000] KC0[0].w {WL} + ALU ADD S8.y@free{s} : L[0x40c00000] |KC0[0].x| {W} + ALU ADD S9.z@free{s} : L[0xc1140000] -KC0[0].y {W} + ALU ADD S10.w@free{s} : L[0xbfe00000] -|KC0[0].z| {W} + ALU ADD S11.x@free{s} : L[0x3fa00000] KC0[0].w {W} ALU EXP_IEEE S18.x@group{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {W} ALU EXP_IEEE S18.y@group{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W} ALU EXP_IEEE S18.z@group{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W} - ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL} + ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {W} EXPORT_DONE PARAM 0 S18.xyzw BLOCK_END )"; @@ -638,34 +638,34 @@ BLOCK_END TEST_F(TestShaderFromNir, PeeholeSoureModsAbsNegTwice) { const char *input = -R"(VS + R"(VS CHIPCLASS CAYMAN INPUT LOC:0 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15 SHADER BLOCK_START - ALU MOV S2.x@free{s} : I[0] {WL} - ALU MOV S3.y@free{s} : L[0x40c00000] {WL} - ALU MOV S4.z@free{s} : L[0xc1140000] {WL} - ALU MOV S5.w@free{s} : L[0xbfe00000] {WL} - ALU MOV S6.x@free{s} : L[0x3fa00000] {WL} + ALU MOV S2.x@free{s} : I[0] {W} + ALU MOV S3.y@free{s} : L[0x40c00000] {W} + ALU MOV S4.z@free{s} : L[0xc1140000] {W} + ALU MOV S5.w@free{s} : L[0xbfe00000] {W} + ALU MOV S6.x@free{s} : L[0x3fa00000] {W} ALU MOV S7.x{s} : |KC0[0].x| {W} ALU MOV S7.y{s} : -KC0[0].y {W} ALU MOV S7.z{s} : -|KC0[0].z| {W} - ALU MOV S7.w{s} : KC0[0].w {WL} + ALU MOV S7.w{s} : KC0[0].w {W} ALU MOV S8.x : |S7.x| {W} ALU MOV S8.y : -S7.y {W} ALU MOV S8.z : -|S7.z| {W} - ALU MOV S8.w : -|S7.x| {WL} - ALU ADD S19.y@free{s} : S3.y@free{s} S8.x {WL} - ALU ADD S9.z@free{s} : S4.z@free{s} S8.y {WL} - ALU ADD S10.w@free{s} : S5.w@free{s} S8.z {WL} - ALU ADD S11.x@free{s} : S6.x@free{s} S8.w {WL} - ALU EXP_IEEE S12.y@free{s} : S19.y@free{s} + S19.y@free{s} + S19.y@free{s} {WL} - ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {WL} - ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {WL} - ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL} + ALU MOV S8.w : -|S7.x| {W} + ALU ADD S19.y@free{s} : S3.y@free{s} S8.x {W} + ALU ADD S9.z@free{s} : S4.z@free{s} S8.y {W} + ALU ADD S10.w@free{s} : S5.w@free{s} S8.z {W} + ALU ADD S11.x@free{s} : S6.x@free{s} S8.w {W} + ALU EXP_IEEE S12.y@free{s} : S19.y@free{s} + S19.y@free{s} + S19.y@free{s} {W} + ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W} + ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W} + ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {W} ALU MOV S17.x{s} : S12.y@free{s} {W} ALU MOV S17.y{s} : S13.z@free{s} {W} ALU MOV S17.z{s} : S14.x@free{s} {W} @@ -673,26 +673,26 @@ BLOCK_START ALU MOV S18.x@group{s} : S17.x{s} {W} ALU MOV S18.y@group{s} : S17.y{s} {W} ALU MOV S18.z@group{s} : S17.z{s} {W} - ALU MOV S18.w@group{s} : S17.w{s} {WL} + ALU MOV S18.w@group{s} : S17.w{s} {W} EXPORT_DONE PARAM 0 S18.xyzw BLOCK_END)"; const char *expect = -R"(VS + R"(VS CHIPCLASS CAYMAN INPUT LOC:0 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15 SHADER BLOCK_START - ALU ADD S19.y@free{s} : L[0x40c00000] |KC0[0].x| {WL} - ALU ADD S9.z@free{s} : L[0xc1140000] KC0[0].y {WL} - ALU ADD S10.w@free{s} : L[0xbfe00000] |KC0[0].z| {WL} - ALU ADD S11.x@free{s} : L[0x3fa00000] -|KC0[0].x| {WL} + ALU ADD S19.y@free{s} : L[0x40c00000] |KC0[0].x| {W} + ALU ADD S9.z@free{s} : L[0xc1140000] KC0[0].y {W} + ALU ADD S10.w@free{s} : L[0xbfe00000] |KC0[0].z| {W} + ALU ADD S11.x@free{s} : L[0x3fa00000] -|KC0[0].x| {W} ALU EXP_IEEE S18.x@group{s} : S19.y@free{s} + S19.y@free{s} + S19.y@free{s} {W} ALU EXP_IEEE S18.y@group{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W} ALU EXP_IEEE S18.z@group{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W} - ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL} + ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {W} EXPORT_DONE PARAM 0 S18.xyzw BLOCK_END )"; diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_shaderfromstring_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_shaderfromstring_test.cpp index b7ba53ba165..77881abde7d 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_shaderfromstring_test.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_shaderfromstring_test.cpp @@ -57,13 +57,17 @@ EXPORT_DONE PIXEL 0 R2000.xyzw expect.push_back(new AluInstr(op1_mov, new Register(2000, 0, pin_group), new LiteralConstant(0x38000000), - {alu_write})); + AluInstr::write)); - expect.push_back(new AluInstr( - op1_mov, new Register(2000, 1, pin_group), new LiteralConstant(0x0), {alu_write})); + expect.push_back(new AluInstr(op1_mov, + new Register(2000, 1, pin_group), + new LiteralConstant(0x0), + AluInstr::write)); - expect.push_back(new AluInstr( - op1_mov, new Register(2000, 2, pin_group), new LiteralConstant(0x0), {alu_write})); + expect.push_back(new AluInstr(op1_mov, + new Register(2000, 2, pin_group), + new LiteralConstant(0x0), + AluInstr::write)); expect.push_back(new AluInstr(op1_mov, new Register(2000, 3, pin_group), diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp index 87b8367a7f1..237ae775158 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp @@ -125,17 +125,50 @@ PROP WRITE_ALL_COLORS:1 PROP COLOR_EXPORT_MASK:15 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 SHADER -ALU MOV S0.x@free : L[0xbf000000] {WL} -ALU MOV S1.x@free : I[0] {WL} +ALU MOV S0.x@free : L[0xbf000000] {W} +ALU MOV S1.x@free : I[0] {W} +ALU MOV S2.x : KC0[0].x {W} +ALU MOV S2.y : KC0[0].y {W} +ALU MOV S2.z : KC0[0].z {W} +ALU MOV S2.w : KC0[0].w {W} +ALU ADD S3.x@free : S0.x@free S2.x {W} +ALU MOV S4.x@group : S3.x@free {W} +ALU MOV S4.y@group : S2.y {W} +ALU MOV S4.z@group : S2.z {W} +ALU MOV S4.w@group : S2.w {W} +EXPORT_DONE PIXEL 0 S4.xyzw +)"; + +const char *add_add_1_expect_from_nir_scheduled = + R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP WRITE_ALL_COLORS:1 +PROP COLOR_EXPORT_MASK:15 +OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 +SHADER +ALU_GROUP_BEGIN +ALU MOV S0.x@chan : L[0xbf000000] {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU MOV S1.x@chan : I[0] {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN ALU MOV S2.x : KC0[0].x {W} ALU MOV S2.y : KC0[0].y {W} ALU MOV S2.z : KC0[0].z {W} ALU MOV S2.w : KC0[0].w {WL} -ALU ADD S3.x@free : S0.x@free S2.x {WL} -ALU MOV S4.x@group : S3.x@free {W} -ALU MOV S4.y@group : S2.y {W} -ALU MOV S4.z@group : S2.z {W} -ALU MOV S4.w@group : S2.w {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU ADD S3.x@chan : S0.x@free S2.x {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU MOV S4.x@chgr : S3.x@free {W} +ALU MOV S4.y@chgr : S2.y {W} +ALU MOV S4.z@chgr : S2.z {W} +ALU MOV S4.w@chgr : S2.w {WL} +ALU_GROUP_END EXPORT_DONE PIXEL 0 S4.xyzw )"; @@ -149,17 +182,17 @@ PROP WRITE_ALL_COLORS:1 PROP COLOR_EXPORT_MASK:15 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 SHADER -ALU MOV S0.x@free : L[0xbf000000] {WL} -ALU MOV S1.x@free : I[0] {WL} +ALU MOV S0.x@free : L[0xbf000000] {W} +ALU MOV S1.x@free : I[0] {W} ALU MOV S2.x : KC0[0].x {W} ALU MOV S2.y : KC0[0].y {W} ALU MOV S2.z : KC0[0].z {W} -ALU MOV S2.w : KC0[0].w {WL} -ALU ADD S3.x@free : L[0xbf000000] KC0[0].x {WL} +ALU MOV S2.w : KC0[0].w {W} +ALU ADD S3.x@free : L[0xbf000000] KC0[0].x {W} ALU MOV S4.x@group : S3.x@free {W} ALU MOV S4.y@group : KC0[0].y {W} ALU MOV S4.z@group : KC0[0].z {W} -ALU MOV S4.w@group : KC0[0].w {WL} +ALU MOV S4.w@group : KC0[0].w {W} EXPORT_DONE PIXEL 0 S4.xyzw )"; @@ -173,11 +206,11 @@ PROP WRITE_ALL_COLORS:1 PROP COLOR_EXPORT_MASK:15 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 SHADER -ALU ADD S3.x@free : L[0xbf000000] KC0[0].x {WL} +ALU ADD S3.x@free : L[0xbf000000] KC0[0].x {W} ALU MOV S4.x@group : S3.x@free {W} ALU MOV S4.y@group : KC0[0].y {W} ALU MOV S4.z@group : KC0[0].z {W} -ALU MOV S4.w@group : KC0[0].w {WL} +ALU MOV S4.w@group : KC0[0].w {W} EXPORT_DONE PIXEL 0 S4.xyzw )"; @@ -195,7 +228,7 @@ SHADER ALU ADD S4.x@group : L[0xbf000000] KC0[0].x {W} ALU MOV S4.y@group : KC0[0].y {W} ALU MOV S4.z@group : KC0[0].z {W} -ALU MOV S4.w@group : KC0[0].w {WL} +ALU MOV S4.w@group : KC0[0].w {W} EXPORT_DONE PIXEL 0 S4.xyzw )"; @@ -1056,63 +1089,63 @@ OUTPUT LOC:1 VARYING_SLOT:1 MASK:15 SYSVALUES R1.xyzw REGISTERS R2.x R3.x R4.x R5.x R6.x R7.x R8.x SHADER -ALU MOV S9.x@free : I[0] {WL} -ALU MOV S10.x@free : I[-1] {WL} -ALU MOV S11.x@free : I[0] {WL} -ALU MOV S12.x@free : I[1] {WL} +ALU MOV S9.x@free : I[0] {W} +ALU MOV S10.x@free : I[-1] {W} +ALU MOV S11.x@free : I[0] {W} +ALU MOV S12.x@free : I[1] {W} ALU MOV S13.x : I[1.0] {W} ALU MOV S13.y : I[1.0] {W} ALU MOV S13.z : I[0] {W} -ALU MOV S13.w : I[1.0] {WL} -ALU MOV S14.x@free : L[0x2] {WL} -ALU MOV S15.x@free : KC0[0].x {WL} -ALU SETE_INT S16.x@free : S15.x@free S12.x@free {WL} +ALU MOV S13.w : I[1.0] {W} +ALU MOV S14.x@free : L[0x2] {W} +ALU MOV S15.x@free : KC0[0].x {W} +ALU SETE_INT S16.x@free : S15.x@free S12.x@free {W} IF (( ALU PRED_SETNE_INT __.x@free : S16.x@free I[0] {LEP} PUSH_BEFORE )) - ALU MOV S18.x@free : KC0[2].x {WL} - ALU SETNE_INT S19.x@free : S18.x@free S12.x {WL} + ALU MOV S18.x@free : KC0[2].x {W} + ALU SETNE_INT S19.x@free : S18.x@free S12.x {W} IF (( ALU PRED_SETNE_INT __.y@free : S19.x@free I[0] {LEP} PUSH_BEFORE )) - ALU MOV R3.x : S12.x@free {WL} - ALU MOV R2.x : S9.x@free {WL} + ALU MOV R3.x : S12.x@free {W} + ALU MOV R2.x : S9.x@free {W} LOOP_BEGIN - ALU INT_TO_FLT R4.x : R2.x {WL} - ALU MOV S21.x@free : KC0[1].x {WL} - ALU SETNE_INT S22.x@free : S21.x@free S14.x@free {WL} + ALU INT_TO_FLT R4.x : R2.x {W} + ALU MOV S21.x@free : KC0[1].x {W} + ALU SETNE_INT S22.x@free : S21.x@free S14.x@free {W} IF (( ALU PRED_SETNE_INT __.z@free : S22.x@free I[0] {LEP} PUSH_BEFORE )) BREAK ENDIF - ALU ADD_INT R5.x@free : R3.x S12.x@free {WL} - ALU MOV R2.x : R3.x {WL} - ALU MOV R3.x : R5.x {WL} + ALU ADD_INT R5.x@free : R3.x S12.x@free {W} + ALU MOV R2.x : R3.x {W} + ALU MOV R3.x : R5.x {W} LOOP_END - ALU MOV S24.x@free : I[1.0] {WL} - ALU MOV R8.x : S24.x@free {WL} - ALU MOV R7.x : R8.x {WL} - ALU MOV R6.x : S10.x@free {WL} + ALU MOV S24.x@free : I[1.0] {W} + ALU MOV R8.x : S24.x@free {W} + ALU MOV R7.x : R8.x {W} + ALU MOV R6.x : S10.x@free {W} ELSE - ALU MOV S25.x@free : I[1.0] {WL} - ALU MOV R8.x : S25.x@free {WL} - ALU MOV R7.x : S9.x {WL} - ALU MOV R4.x : R8.x {WL} - ALU MOV R6.x : S11.x@free {WL} + ALU MOV S25.x@free : I[1.0] {W} + ALU MOV R8.x : S25.x@free {W} + ALU MOV R7.x : S9.x {W} + ALU MOV R4.x : R8.x {W} + ALU MOV R6.x : S11.x@free {W} ENDIF ELSE - ALU MOV S26.x@free : I[1.0] {WL} - ALU MOV R8.x : S26.x@free {WL} - ALU MOV R7.x : S9.x {WL} - ALU MOV R4.x : R8.x {WL} - ALU MOV R6.x : S10.x@free {WL} + ALU MOV S26.x@free : I[1.0] {W} + ALU MOV R8.x : S26.x@free {W} + ALU MOV R7.x : S9.x {W} + ALU MOV R4.x : R8.x {W} + ALU MOV R6.x : S10.x@free {W} ENDIF -ALU CNDE_INT S27.x@free : R6.x S13.x R4.x {WL} -ALU CNDE_INT S28.x@free : R6.x S13.y R7.x {WL} -ALU CNDE_INT S29.x@free : R6.x S13.w R8.x {WL} +ALU CNDE_INT S27.x@free : R6.x S13.x R4.x {W} +ALU CNDE_INT S28.x@free : R6.x S13.y R7.x {W} +ALU CNDE_INT S29.x@free : R6.x S13.w R8.x {W} EXPORT_DONE POS 0 R1.xyzw -ALU MOV CLAMP S31.x@free : S27.x@free {WL} -ALU MOV CLAMP S32.x@free : S28.x@free {WL} -ALU MOV CLAMP S33.x@free : S29.x@free {WL} +ALU MOV CLAMP S31.x@free : S27.x@free {W} +ALU MOV CLAMP S32.x@free : S28.x@free {W} +ALU MOV CLAMP S33.x@free : S29.x@free {W} ALU MOV S34.x@group : S31.x@free {W} ALU MOV S34.y@group : S32.x@free {W} ALU MOV S34.z@group : S9.x@free {W} -ALU MOV S34.w@group : S33.x@free {WL} +ALU MOV S34.w@group : S33.x@free {W} EXPORT_DONE PARAM 0 S34.xyzw )"; @@ -1128,39 +1161,39 @@ REGISTERS R2.x@free R3.x@free R4.x@free R5.x@free R6.x@free R7.x@free R8.x@free SHADER IF (( ALU PREDE_INT __.x@free : KC0[0].x I[1] {LEP} PUSH_BEFORE )) IF (( ALU PRED_SETNE_INT __.y@free : KC0[2].x I[1] {LEP} PUSH_BEFORE )) - ALU MOV R3.x : I[1] {WL} - ALU MOV R2.x : I[0] {WL} + ALU MOV R3.x : I[1] {W} + ALU MOV R2.x : I[0] {W} LOOP_BEGIN - ALU INT_TO_FLT R4.x : R2.x {WL} + ALU INT_TO_FLT R4.x : R2.x {W} IF (( ALU PRED_SETNE_INT __.z@free : KC0[1].x L[0x2] {LEP} PUSH_BEFORE )) BREAK ENDIF - ALU ADD_INT R5.x : R3.x I[1] {WL} - ALU MOV R2.x : R3.x {WL} - ALU MOV R3.x : R5.x {WL} + ALU ADD_INT R5.x : R3.x I[1] {W} + ALU MOV R2.x : R3.x {W} + ALU MOV R3.x : R5.x {W} LOOP_END - ALU MOV R8.x : I[1.0] {WL} - ALU MOV R7.x : I[1.0] {WL} - ALU MOV R6.x : I[-1] {WL} + ALU MOV R8.x : I[1.0] {W} + ALU MOV R7.x : I[1.0] {W} + ALU MOV R6.x : I[-1] {W} ELSE - ALU MOV R8.x : I[1.0] {WL} - ALU MOV R7.x : I[0] {WL} - ALU MOV R4.x : I[1.0] {WL} - ALU MOV R6.x : I[0] {WL} + ALU MOV R8.x : I[1.0] {W} + ALU MOV R7.x : I[0] {W} + ALU MOV R4.x : I[1.0] {W} + ALU MOV R6.x : I[0] {W} ENDIF ELSE - ALU MOV R8.x : I[1.0] {WL} - ALU MOV R7.x : I[0] {WL} - ALU MOV R4.x : I[1.0] {WL} - ALU MOV R6.x : I[-1] {WL} + ALU MOV R8.x : I[1.0] {W} + ALU MOV R7.x : I[0] {W} + ALU MOV R4.x : I[1.0] {W} + ALU MOV R6.x : I[-1] {W} ENDIF -ALU CNDE_INT S27.x@free : R6.x I[1.0] R4.x {WL} -ALU CNDE_INT S28.x@free : R6.x I[1.0] R7.x {WL} -ALU CNDE_INT S29.x@free : R6.x I[1.0] R8.x {WL} +ALU CNDE_INT S27.x@free : R6.x I[1.0] R4.x {W} +ALU CNDE_INT S28.x@free : R6.x I[1.0] R7.x {W} +ALU CNDE_INT S29.x@free : R6.x I[1.0] R8.x {W} EXPORT_DONE POS 0 R1.xyzw ALU MOV CLAMP S34.x@group : S27.x@free {W} ALU MOV CLAMP S34.y@group : S28.x@free {W} -ALU MOV CLAMP S34.w@group : S29.x@free {WL} +ALU MOV CLAMP S34.w@group : S29.x@free {W} EXPORT_DONE PARAM 0 S34.xy0w )"; diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h index 5fdeef65964..1bccae33c88 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h +++ b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h @@ -12,6 +12,7 @@ extern const char *red_triangle_fs_expect_from_nir_dce; extern const char *add_add_1_nir; extern const char *add_add_1_expect_from_nir; +extern const char *add_add_1_expect_from_nir_scheduled; extern const char *add_add_1_expect_from_nir_copy_prop_fwd; extern const char *add_add_1_expect_from_nir_copy_prop_fwd_dce; extern const char *add_add_1_expect_from_nir_copy_prop_fwd_dce_bwd;