From c221956b68bb1d23648bd7adf1301c55279b9d58 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Fri, 8 Aug 2025 14:56:30 +0200 Subject: [PATCH] r600/sfn: remove early emmission of ALU last op The scheduler sets the flag when scheduling the ALU instructions into ALU groups, so there is no need to set these flags early and it was already done inconsistently anyway. The only expection is the ALU predicate instructions, because it is not yet handled direcly by the scheduler. Clanup the use of alu_write too. Signed-off-by: Gert Wollny Part-of: --- .../drivers/r600/sfn/sfn_instr_alu.cpp | 99 ++++------ src/gallium/drivers/r600/sfn/sfn_instr_alu.h | 2 - .../drivers/r600/sfn/sfn_instr_lds.cpp | 6 +- .../drivers/r600/sfn/sfn_instr_mem.cpp | 71 ++++--- .../drivers/r600/sfn/sfn_instr_tex.cpp | 16 +- .../drivers/r600/sfn/sfn_instrfactory.cpp | 10 +- src/gallium/drivers/r600/sfn/sfn_shader.cpp | 59 +++--- .../drivers/r600/sfn/sfn_shader_cs.cpp | 6 +- .../drivers/r600/sfn/sfn_shader_fs.cpp | 56 +++--- .../drivers/r600/sfn/sfn_shader_gs.cpp | 9 +- .../drivers/r600/sfn/sfn_shader_vs.cpp | 15 +- .../r600/sfn/sfn_split_address_loads.cpp | 7 +- .../drivers/r600/sfn/tests/sfn_instr_test.cpp | 70 ++++--- .../sfn/tests/sfn_instrfromstring_test.cpp | 8 +- .../r600/sfn/tests/sfn_liverange_test.cpp | 10 +- .../r600/sfn/tests/sfn_optimizer_test.cpp | 136 ++++++------- .../sfn/tests/sfn_shaderfromstring_test.cpp | 14 +- .../r600/sfn/tests/sfn_test_shaders.cpp | 183 +++++++++++------- .../drivers/r600/sfn/tests/sfn_test_shaders.h | 1 + 19 files changed, 387 insertions(+), 391 deletions(-) diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp index 4dcd88efe7f..765fa0ed24f 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp @@ -619,8 +619,6 @@ AluInstr::replace_dest(PRegister new_dest, AluInstr *move_instr) } m_dest = new_dest; - if (!move_instr->has_alu_flag(alu_last_instr)) - reset_alu_flag(alu_last_instr); if (has_alu_flag(alu_is_cayman_trans)) { /* Copy propagation puts an instruction into the w channel, but we @@ -1842,17 +1840,15 @@ emit_alu_op1_64bit(const nir_alu_instr& alu, ir = new AluInstr(opcode, value_factory.dest(alu.def, 2 * i, pin_chan), value_factory.src64(alu.src[0], i, swz[0]), - {alu_write}); + AluInstr::write); group->add_instruction(ir); ir = new AluInstr(opcode, value_factory.dest(alu.def, 2 * i + 1, pin_chan), value_factory.src64(alu.src[0], i, swz[1]), - {alu_write}); + AluInstr::write); group->add_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); shader.emit_instruction(group); return true; } @@ -1869,12 +1865,10 @@ emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader) ir = new AluInstr(op1_mov, value_factory.dest(alu.def, 2 * i + c, pin_free), value_factory.src64(alu.src[0], i, c), - {alu_write}); + AluInstr::write); shader.emit_instruction(ir); } } - if (ir) - ir->set_alu_flag(alu_last_instr); return true; } @@ -1890,13 +1884,11 @@ emit_alu_neg(const nir_alu_instr& alu, Shader& shader) ir = new AluInstr(op1_mov, value_factory.dest(alu.def, 2 * i + c, pin_chan), value_factory.src64(alu.src[0], i, c), - {alu_write}); + AluInstr::write); shader.emit_instruction(ir); } ir->set_source_mod(0, AluInstr::mod_neg); } - if (ir) - ir->set_alu_flag(alu_last_instr); return true; } @@ -1916,7 +1908,7 @@ emit_alu_abs64(const nir_alu_instr& alu, Shader& shader) auto ir = new AluInstr(op1_mov, value_factory.dest(alu.def, 1, pin_chan), value_factory.src64(alu.src[0], 0, 1), - AluInstr::last_write); + AluInstr::write); ir->set_source_mod(0, AluInstr::mod_abs); shader.emit_instruction(ir); return true; @@ -1967,7 +1959,7 @@ emit_alu_fsat64(const nir_alu_instr& alu, Shader& shader) shader.emit_instruction(new AluInstr(op1_mov, value_factory.dest(alu.def, 1, pin_chan), value_factory.src64(alu.src[0], 0, 1), - AluInstr::last_write)); + AluInstr::write)); } else { /* dest clamp doesn't work on plain 64 bit move, so add a zero @@ -1986,7 +1978,7 @@ emit_alu_fsat64(const nir_alu_instr& alu, Shader& shader) value_factory.dest(alu.def, 1, pin_chan), value_factory.src64(alu.src[0], 0, 0), value_factory.literal(0), - AluInstr::last_write)); + AluInstr::write)); shader.emit_instruction(group); } @@ -2045,8 +2037,6 @@ emit_alu_op2_64bit(const nir_alu_instr& alu, i == 1 ? AluInstr::write : AluInstr::empty); group->add_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); shader.emit_instruction(group); return true; @@ -2080,8 +2070,6 @@ emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu, shader.emit_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); return true; } @@ -2104,8 +2092,6 @@ emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader ir->set_source_mod(0, AluInstr::mod_abs); group->add_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); shader.emit_instruction(group); return true; } @@ -2130,8 +2116,6 @@ emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) i < 2 ? AluInstr::write : AluInstr::empty); group->add_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); shader.emit_instruction(group); return true; } @@ -2157,7 +2141,6 @@ emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader) AluInstr::write); group->add_instruction(ir); } - ir->set_alu_flag(alu_last_instr); shader.emit_instruction(group); return true; } @@ -2172,12 +2155,12 @@ emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader) ir = new AluInstr(op1v_flt64_to_flt32, value_factory.dest(alu.def, 0, pin_chan), value_factory.src64(alu.src[0], 0, 1), - {alu_write}); + AluInstr::write); group->add_instruction(ir); ir = new AluInstr(op1v_flt64_to_flt32, value_factory.dummy_dest(1), value_factory.src64(alu.src[0], 0, 0), - AluInstr::last); + AluInstr::empty); group->add_instruction(ir); shader.emit_instruction(group); return true; @@ -2196,7 +2179,7 @@ emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader) value_factory.dest(alu.def, 0, pin_free), src, value_factory.inline_const(mask, 0), - {alu_write}); + AluInstr::write); shader.emit_instruction(ir); return true; } @@ -2215,7 +2198,7 @@ emit_alu_op1(const nir_alu_instr& alu, ir = new AluInstr(opcode, value_factory.dest(alu.def, 0, pin_free), value_factory.src(alu.src[0], 0), - {alu_write}); + AluInstr::write); switch (mod) { case mod_src0_abs: ir->set_source_mod(0, AluInstr::mod_abs); @@ -2252,7 +2235,7 @@ emit_alu_op2(const nir_alu_instr& alu, value_factory.dest(alu.def, 0, pin_free), value_factory.src(*src0, 0), value_factory.src(*src1, 0), - {alu_write}); + AluInstr::write); if (src1_negate) ir->set_source_mod(1, AluInstr::mod_neg); shader.emit_instruction(ir); @@ -2287,7 +2270,7 @@ emit_alu_op3(const nir_alu_instr& alu, value_factory.src(*src[0], 0), value_factory.src(*src[1], 0), value_factory.src(*src[2], 0), - {alu_write}); + AluInstr::write); ir->set_alu_flag(alu_write); shader.emit_instruction(ir); return true; @@ -2309,7 +2292,7 @@ emit_dot(const nir_alu_instr& alu, int n, Shader& shader) srcs[2 * i + 1] = value_factory.src(src1, i); } - AluInstr *ir = new AluInstr(op2_dot_ieee, dest, srcs, AluInstr::last_write, n); + AluInstr *ir = new AluInstr(op2_dot_ieee, dest, srcs, AluInstr::write, n); shader.emit_instruction(ir); shader.set_flag(Shader::sh_disble_sb); @@ -2338,7 +2321,7 @@ emit_dot4(const nir_alu_instr& alu, int nelm, Shader& shader) srcs[2 * i + 1] = value_factory.zero(); } - AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4); + AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::write, 4); shader.emit_instruction(ir); return true; @@ -2363,7 +2346,7 @@ emit_fdph(const nir_alu_instr& alu, Shader& shader) srcs[6] = value_factory.one(); srcs[7] = value_factory.src(src1, 3); - AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4); + AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::write, 4); shader.emit_instruction(ir); return true; } @@ -2372,16 +2355,13 @@ static bool emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader) { auto& value_factory = shader.value_factory(); - AluInstr *ir = nullptr; for (unsigned i = 0; i < nc; ++i) { auto src = value_factory.src(instr.src[i].src, instr.src[i].swizzle[0]); auto dst = value_factory.dest(instr.def, i, pin_none); - shader.emit_instruction(new AluInstr(op1_mov, dst, src, {alu_write})); + shader.emit_instruction(new AluInstr(op1_mov, dst, src, AluInstr::write)); } - if (ir) - ir->set_alu_flag(alu_last_instr); return true; } @@ -2397,7 +2377,6 @@ emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader) AluInstr::write); shader.emit_instruction(ir); } - ir->set_alu_flag(alu_last_instr); return true; } @@ -2413,7 +2392,6 @@ emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader) AluInstr::write); shader.emit_instruction(ir); } - ir->set_alu_flag(alu_last_instr); return true; } @@ -2429,7 +2407,6 @@ emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader) AluInstr::write); shader.emit_instruction(ir); } - ir->set_alu_flag(alu_last_instr); return true; } @@ -2452,7 +2429,6 @@ emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader) AluInstr::write); shader.emit_instruction(ir); } - ir->set_alu_flag(alu_last_instr); return true; } @@ -2465,20 +2441,24 @@ emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader) auto y = value_factory.temp_register(); auto yy = value_factory.temp_register(); - shader.emit_instruction(new AluInstr( - op1_flt32_to_flt16, x, value_factory.src(alu.src[0], 0), AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op1_flt32_to_flt16, + x, + value_factory.src(alu.src[0], 0), + AluInstr::write)); - shader.emit_instruction(new AluInstr( - op1_flt32_to_flt16, y, value_factory.src(alu.src[1], 0), AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op1_flt32_to_flt16, + y, + value_factory.src(alu.src[1], 0), + AluInstr::write)); shader.emit_instruction( - new AluInstr(op2_lshl_int, yy, y, value_factory.literal(16), AluInstr::last_write)); + new AluInstr(op2_lshl_int, yy, y, value_factory.literal(16), AluInstr::write)); shader.emit_instruction(new AluInstr(op2_or_int, value_factory.dest(alu.def, 0, pin_free), x, yy, - AluInstr::last_write)); + AluInstr::write)); return true; } @@ -2489,7 +2469,7 @@ emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader) shader.emit_instruction(new AluInstr(op1_mov, value_factory.dest(alu.def, 0, pin_free), value_factory.src64(alu.src[0], 0, comp), - AluInstr::last_write)); + AluInstr::write)); return true; } @@ -2500,7 +2480,7 @@ emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader) shader.emit_instruction(new AluInstr(op1_flt16_to_flt32, value_factory.dest(alu.def, 0, pin_free), value_factory.src(alu.src[0], 0), - AluInstr::last_write)); + AluInstr::write)); return true; } static bool @@ -2512,12 +2492,12 @@ emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader) tmp, value_factory.src(alu.src[0], 0), value_factory.literal(16), - AluInstr::last_write)); + AluInstr::write)); shader.emit_instruction(new AluInstr(op1_flt16_to_flt32, value_factory.dest(alu.def, 0, pin_free), tmp, - AluInstr::last_write)); + AluInstr::write)); return true; } @@ -2533,7 +2513,7 @@ emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) ir = new AluInstr(opcode, value_factory.dest(alu.def, 0, pin_free), value_factory.src(src0, 0), - AluInstr::last_write); + AluInstr::write); ir->set_alu_flag(alu_is_trans); shader.emit_instruction(ir); return true; @@ -2548,10 +2528,7 @@ emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader assert(alu.def.num_components == 1); auto temp = value_factory.temp_register(); - ir = new AluInstr(op1_trunc, - temp, - value_factory.src(alu.src[0], 0), - AluInstr::last_write); + ir = new AluInstr(op1_trunc, temp, value_factory.src(alu.src[0], 0), AluInstr::write); shader.emit_instruction(ir); ir = new AluInstr(opcode, @@ -2560,7 +2537,6 @@ emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader AluInstr::write); if (opcode == op1_flt_to_uint) { ir->set_alu_flag(alu_is_trans); - ir->set_alu_flag(alu_last_instr); } shader.emit_instruction(ir); return true; @@ -2574,7 +2550,7 @@ emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shade assert(alu.def.num_components == 1); - const std::set flags({alu_write, alu_last_instr, alu_is_cayman_trans}); + const std::set flags({alu_write, alu_is_cayman_trans}); unsigned ncomp = 3; @@ -2602,7 +2578,7 @@ emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) value_factory.dest(alu.def, 0, pin_free), value_factory.src(src0, 0), value_factory.src(src1, 0), - AluInstr::last_write); + AluInstr::write); ir->set_alu_flag(alu_is_trans); shader.emit_instruction(ir); @@ -2619,7 +2595,7 @@ emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shade unsigned last_slot = 4; - const std::set flags({alu_write, alu_last_instr, alu_is_cayman_trans}); + const std::set flags({alu_write, alu_is_cayman_trans}); for (unsigned k = 0; k < alu.def.num_components; ++k) { AluInstr::SrcValues srcs(2 * last_slot); @@ -2657,14 +2633,11 @@ emit_alu_cube(const nir_alu_instr& alu, Shader& shader) AluInstr::write); group->add_instruction(ir); } - ir->set_alu_flag(alu_last_instr); shader.emit_instruction(group); return true; } const std::set AluInstr::empty; const std::set AluInstr::write({alu_write}); -const std::set AluInstr::last({alu_last_instr}); -const std::set AluInstr::last_write({alu_write, alu_last_instr}); } // namespace r600 diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.h b/src/gallium/drivers/r600/sfn/sfn_instr_alu.h index 2577785c6b0..62f7903e8ab 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.h +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.h @@ -153,8 +153,6 @@ public: static const std::set empty; static const std::set write; - static const std::set last; - static const std::set last_write; std::tuple indirect_addr() const; void update_indirect_addr(PRegister old_reg, PRegister reg) override; diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp index 7a6cee4fd92..3866286d10c 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp @@ -119,7 +119,7 @@ LDSReadInstr::split(std::vector& out_block, AluInstr *last_lds_instr auto instr = new AluInstr(op1_mov, dest, new InlineConstant(ALU_SRC_LDS_OQ_A_POP), - AluInstr::last_write); + AluInstr::write); instr->add_required_instr(last_lds_instr); instr->set_blockid(block_id(), index()); instr->set_always_keep(); @@ -307,7 +307,7 @@ LDSAtomicInstr::split(std::vector& out_block, AluInstr *last_lds_ins } } - auto op_instr = new AluInstr(m_opcode, srcs, {}); + auto op_instr = new AluInstr(m_opcode, srcs, AluInstr::empty); op_instr->set_blockid(block_id(), index()); if (last_lds_instr) { @@ -322,7 +322,7 @@ LDSAtomicInstr::split(std::vector& out_block, AluInstr *last_lds_ins auto read_instr = new AluInstr(op1_mov, m_dest, new InlineConstant(ALU_SRC_LDS_OQ_A_POP), - AluInstr::last_write); + AluInstr::write); read_instr->add_required_instr(op_instr); read_instr->set_blockid(block_id(), index()); read_instr->set_alu_flag(alu_lds_group_end); diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp index f76923c913b..25191d3f7d5 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp @@ -193,7 +193,7 @@ GDSInstr::emit_atomic_op2(nir_intrinsic_instr *instr, Shader& shader) if (!src_val->as_register()) { auto temp_src_val = vf.temp_register(); shader.emit_instruction( - new AluInstr(op1_mov, temp_src_val, src_val, AluInstr::last_write)); + new AluInstr(op1_mov, temp_src_val, src_val, AluInstr::write)); src_as_register = temp_src_val; } else src_as_register = src_val->as_register(); @@ -219,8 +219,7 @@ GDSInstr::emit_atomic_op2(nir_intrinsic_instr *instr, Shader& shader) else shader.emit_instruction( new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write)); - shader.emit_instruction( - new AluInstr(op1_mov, tmp[1], src_val, AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op1_mov, tmp[1], src_val, AluInstr::write)); ir = new GDSInstr(op, dest, tmp, 0, nullptr); } shader.emit_instruction(ir); @@ -297,7 +296,7 @@ GDSInstr::emit_atomic_inc(nir_intrinsic_instr *instr, Shader& shader) new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write)); shader.emit_instruction( - new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write)); + new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::write)); ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest, tmp, 0, nullptr); } shader.emit_instruction(ir); @@ -340,7 +339,7 @@ GDSInstr::emit_atomic_pre_dec(nir_intrinsic_instr *instr, Shader& shader) new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write)); shader.emit_instruction( - new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write)); + new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::write)); ir = new GDSInstr(opcode, tmp_dest, tmp, 0, nullptr); } @@ -350,7 +349,7 @@ GDSInstr::emit_atomic_pre_dec(nir_intrinsic_instr *instr, Shader& shader) vf.dest(instr->def, 0, pin_free), tmp_dest, vf.one_i(), - AluInstr::last_write)); + AluInstr::write)); return true; } @@ -594,8 +593,8 @@ RatInstr::emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader) auto addr_temp = vf.temp_register(); /** Should be lowered in nir */ - shader.emit_instruction(new AluInstr( - op2_lshr_int, addr_temp, addr, vf.literal(2), {alu_write, alu_last_instr})); + shader.emit_instruction( + new AluInstr(op2_lshr_int, addr_temp, addr, vf.literal(2), AluInstr::write)); const EVTXDataFormat formats[4] = {fmt_32, fmt_32_32, fmt_32_32_32, fmt_32_32_32_32}; @@ -631,8 +630,7 @@ RatInstr::emit_global_store(nir_intrinsic_instr *intr, Shader& shader) auto addr_vec = vf.temp_vec4(pin_chan, {0, 7, 7, 7}); shader.emit_instruction( - new AluInstr(op2_lshr_int, addr_vec[0], addr_orig, vf.literal(2), - AluInstr::last_write)); + new AluInstr(op2_lshr_int, addr_vec[0], addr_orig, vf.literal(2), AluInstr::write)); RegisterVec4::Swizzle value_swz = {0,7,7,7}; auto mask = nir_intrinsic_write_mask(intr); @@ -651,8 +649,6 @@ RatInstr::emit_global_store(nir_intrinsic_instr *intr, Shader& shader) shader.emit_instruction(ir); } } - if (ir) - ir->set_alu_flag(alu_last_instr); auto store = new RatInstr(cf_mem_rat_cacheless, RatInstr::STORE_RAW, @@ -688,14 +684,17 @@ RatInstr::emit_ssbo_store(nir_intrinsic_instr *instr, Shader& shader) auto addr_vec = vf.temp_vec4(pin_group, {0, 1, 2, 7}); if (i == 0) { shader.emit_instruction( - new AluInstr(op1_mov, addr_vec[0], addr_base, AluInstr::last_write)); + new AluInstr(op1_mov, addr_vec[0], addr_base, AluInstr::write)); } else { - shader.emit_instruction(new AluInstr( - op2_add_int, addr_vec[0], addr_base, vf.literal(i), AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op2_add_int, + addr_vec[0], + addr_base, + vf.literal(i), + AluInstr::write)); } auto value = vf.src(instr->src[0], i); PRegister v = vf.temp_register(0); - shader.emit_instruction(new AluInstr(op1_mov, v, value, AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op1_mov, v, value, AluInstr::write)); auto value_vec = RegisterVec4(v, nullptr, nullptr, nullptr, pin_chan); auto store = new RatInstr(cf_mem_rat, RatInstr::STORE_TYPED, @@ -730,7 +729,7 @@ RatInstr::emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader) auto data_vec4 = vf.temp_vec4(pin_chgr, {0, 1, 2, 3}); shader.emit_instruction( - new AluInstr(op2_lshr_int, coord, coord_orig, vf.literal(2), AluInstr::last_write)); + new AluInstr(op2_lshr_int, coord, coord_orig, vf.literal(2), AluInstr::write)); shader.emit_instruction( new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write)); @@ -742,10 +741,10 @@ RatInstr::emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader) new AluInstr(op1_mov, data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3], vf.src(intr->src[2], 0), - {alu_last_instr, alu_write})); + AluInstr::write)); } else { - shader.emit_instruction(new AluInstr( - op1_mov, data_vec4[0], vf.src(intr->src[2], 0), AluInstr::last_write)); + shader.emit_instruction( + new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[2], 0), AluInstr::write)); } RegisterVec4 out_vec(coord, coord, coord, coord, pin_chgr); @@ -827,12 +826,12 @@ RatInstr::emit_image_store(nir_intrinsic_instr *intrin, Shader& shader) swizzle = {0, 2, 1, 3}; for (int i = 0; i < 4; ++i) { - auto flags = i != 3 ? AluInstr::write : AluInstr::last_write; + auto flags = i != 3 ? AluInstr::write : AluInstr::write; shader.emit_instruction( new AluInstr(op1_mov, coord[swizzle[i]], coord_load[i], flags)); } for (int i = 0; i < 4; ++i) { - auto flags = i != 3 ? AluInstr::write : AluInstr::last_write; + auto flags = i != 3 ? AluInstr::write : AluInstr::write; shader.emit_instruction(new AluInstr(op1_mov, value[i], value_load[i], flags)); } @@ -874,7 +873,7 @@ RatInstr::emit_image_load_or_atomic(nir_intrinsic_instr *intrin, Shader& shader) swizzle = {0, 2, 1, 3}; for (int i = 0; i < 4; ++i) { - auto flags = i != 3 ? AluInstr::write : AluInstr::last_write; + auto flags = i != 3 ? AluInstr::write : AluInstr::write; shader.emit_instruction( new AluInstr(op1_mov, coord[swizzle[i]], coord_orig[i], flags)); } @@ -889,12 +888,12 @@ RatInstr::emit_image_load_or_atomic(nir_intrinsic_instr *intrin, Shader& shader) new AluInstr(op1_mov, data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3], vf.src(intrin->src[3], 0), - AluInstr::last_write)); + AluInstr::write)); } else { shader.emit_instruction( new AluInstr(op1_mov, data_vec4[0], vf.src(intrin->src[3], 0), AluInstr::write)); shader.emit_instruction( - new AluInstr(op1_mov, data_vec4[2], vf.zero(), AluInstr::last_write)); + new AluInstr(op1_mov, data_vec4[2], vf.zero(), AluInstr::write)); } auto atomic = @@ -988,7 +987,7 @@ RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader) vf.uniform(lookup_resid / 4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4, R600_BUFFER_INFO_CONST_BUFFER), - AluInstr::last_write)); + AluInstr::write)); } else { /* If the addressing is indirect we have to get the z-value by * using a binary search */ @@ -1014,7 +1013,7 @@ RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader) high_bit, vf.src(intrin->src[0], 0), vf.literal(2), - AluInstr::last_write)); + AluInstr::write)); shader.emit_instruction(new LoadFromBuffer(trgt, {0, 1, 2, 3}, @@ -1027,10 +1026,18 @@ RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader) // this may be wrong shader.emit_instruction(new AluInstr( op3_cnde_int, comp1, high_bit, trgt[0], trgt[2], AluInstr::write)); - shader.emit_instruction(new AluInstr( - op3_cnde_int, comp2, high_bit, trgt[1], trgt[3], AluInstr::last_write)); - shader.emit_instruction(new AluInstr( - op3_cnde_int, dest[2], low_bit, comp1, comp2, AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op3_cnde_int, + comp2, + high_bit, + trgt[1], + trgt[3], + AluInstr::write)); + shader.emit_instruction(new AluInstr(op3_cnde_int, + dest[2], + low_bit, + comp1, + comp2, + AluInstr::write)); } } else { auto dest = vf.dest_vec4(intrin->def, pin_group); @@ -1071,7 +1078,7 @@ RatInstr::emit_image_samples(nir_intrinsic_instr *intrin, Shader& shader) res_id, dyn_offset)); - shader.emit_instruction(new AluInstr(op1_mov, dest, tmp[0], AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op1_mov, dest, tmp[0], AluInstr::write)); return true; } diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp index 74c662365c1..cf946140140 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp @@ -615,13 +615,12 @@ TexInstr::emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader) shader.emit_instruction(ir); } - ir->set_alu_flag(alu_last_instr); shader.emit_instruction( new AluInstr(op2_or_int, dst[3], tmp_w, vf.uniform(buf_sel + 1, 0, R600_BUFFER_INFO_CONST_BUFFER), - AluInstr::last_write)); + AluInstr::write)); } return true; @@ -661,15 +660,13 @@ TexInstr::emit_tex_txs(nir_tex_instr *tex, } else { int id = 2 * tex->texture_index + (512 + R600_BUFFER_INFO_OFFSET / 16) + 1; auto src = vf.uniform(id, 1, R600_BUFFER_INFO_CONST_BUFFER); - shader.emit_instruction( - new AluInstr(op1_mov, dest[0], src, AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op1_mov, dest[0], src, AluInstr::write)); shader.set_flag(Shader::sh_uses_tex_buffer); } } else { auto src_lod = vf.temp_register(); - shader.emit_instruction( - new AluInstr(op1_mov, src_lod, src.lod, AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op1_mov, src_lod, src.lod, AluInstr::write)); RegisterVec4 src_coord(src_lod, src_lod, src_lod, src_lod, pin_free); @@ -691,7 +688,7 @@ TexInstr::emit_tex_txs(nir_tex_instr *tex, tex->texture_index & 3, R600_BUFFER_INFO_CONST_BUFFER); - auto alu = new AluInstr(op1_mov, dest[2], src_loc, AluInstr::last_write); + auto alu = new AluInstr(op1_mov, dest[2], src_loc, AluInstr::write); shader.emit_instruction(alu); shader.set_flag(Shader::sh_txs_cube_array_comp); } @@ -746,9 +743,6 @@ TexInstr::prepare_source(nir_tex_instr *tex, const Inputs& inputs, Shader& shade shader.emit_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); - return src_coord; } @@ -887,8 +881,6 @@ TexInstr::emit_tex_lod(nir_tex_instr *tex, Inputs& src, Shader& shader) ir = new AluInstr(op1_mov, src_coord[i], src.coord[i], AluInstr::write); shader.emit_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); auto irt = new TexInstr(TexInstr::get_tex_lod, dst, diff --git a/src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp b/src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp index 25385e1a262..7d62305e0d5 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp @@ -124,11 +124,11 @@ InstrFactory::load_const(nir_load_const_instr *literal, Shader& shader) for (int i = 0; i < literal->def.num_components; ++i) { auto dest0 = m_value_factory.dest(literal->def, 2 * i, pin_none); auto src0 = m_value_factory.literal(literal->value[i].u64 & 0xffffffff); - shader.emit_instruction(new AluInstr(op1_mov, dest0, src0, {alu_write})); + shader.emit_instruction(new AluInstr(op1_mov, dest0, src0, AluInstr::write)); auto dest1 = m_value_factory.dest(literal->def, 2 * i + 1, pin_none); auto src1 = m_value_factory.literal((literal->value[i].u64 >> 32) & 0xffffffff); - shader.emit_instruction(new AluInstr(op1_mov, dest1, src1, AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op1_mov, dest1, src1, AluInstr::write)); } } else { Pin pin = literal->def.num_components == 1 ? pin_free : pin_none; @@ -156,11 +156,9 @@ InstrFactory::load_const(nir_load_const_instr *literal, Shader& shader) src = m_value_factory.literal(v); } - ir = new AluInstr(op1_mov, dest, src, {alu_write}); + ir = new AluInstr(op1_mov, dest, src, AluInstr::write); shader.emit_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); } return true; } @@ -196,7 +194,7 @@ InstrFactory::process_undef(nir_undef_instr *undef, Shader& shader) for (int i = 0; i < undef->def.num_components; ++i) { auto dest = shader.value_factory().undef(undef->def.index, i); shader.emit_instruction( - new AluInstr(op1_mov, dest, value_factory().zero(), AluInstr::last_write)); + new AluInstr(op1_mov, dest, value_factory().zero(), AluInstr::write)); } return true; } diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.cpp b/src/gallium/drivers/r600/sfn/sfn_shader.cpp index ced12b3319b..c87b2138f3f 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader.cpp @@ -395,10 +395,8 @@ Shader::allocate_reserved_registers() m_instr_factory->value_factory().set_virtual_register_base(reserved_registers_end); if (!m_atomics.empty()) { m_atomic_update = value_factory().temp_register(); - auto alu = new AluInstr(op1_mov, - m_atomic_update, - value_factory().one_i(), - AluInstr::last_write); + auto alu = + new AluInstr(op1_mov, m_atomic_update, value_factory().one_i(), AluInstr::write); alu->set_alu_flag(alu_no_schedule_bias); emit_instruction(alu); } @@ -410,23 +408,27 @@ Shader::allocate_reserved_registers() auto temp2 = value_factory().temp_register(2); auto group = new AluGroup(); - group->add_instruction(new AluInstr( - op1_mbcnt_32lo_accum_prev_int, temp0, value_factory().literal(-1), {alu_write})); - group->add_instruction(new AluInstr( - op1_mbcnt_32hi_int, temp1, value_factory().literal(-1), {alu_write})); + group->add_instruction(new AluInstr(op1_mbcnt_32lo_accum_prev_int, + temp0, + value_factory().literal(-1), + AluInstr::write)); + group->add_instruction(new AluInstr(op1_mbcnt_32hi_int, + temp1, + value_factory().literal(-1), + AluInstr::write)); emit_instruction(group); emit_instruction(new AluInstr(op3_muladd_uint24, temp2, value_factory().inline_const(ALU_SRC_SE_ID, 0), value_factory().literal(256), value_factory().inline_const(ALU_SRC_HW_WAVE_ID, 0), - {alu_write, alu_last_instr})); + AluInstr::write)); emit_instruction(new AluInstr(op3_muladd_uint24, m_rat_return_address, temp2, value_factory().literal(0x40), temp0, - {alu_write, alu_last_instr})); + AluInstr::write)); } } @@ -713,13 +715,13 @@ Shader::process_if(nir_if *if_stmt) EAluOp op = child_block_empty(if_stmt->then_list) ? op2_prede_int : op2_pred_setne_int; + auto flags = {alu_update_exec, alu_last_instr, alu_update_pred}; + AluInstr *pred = new AluInstr(op, value_factory().temp_register(), value, value_factory().zero(), - AluInstr::last); - pred->set_alu_flag(alu_update_exec); - pred->set_alu_flag(alu_update_pred); + flags); pred->set_cf_type(cf_alu_push_before); IfInstr *ir = new IfInstr(pred); @@ -850,8 +852,6 @@ Shader::emit_tex_fdd(const nir_intrinsic_instr* intr, int opcode, bool fine) mv = new AluInstr(op1_mov, tmp[i], src[i], AluInstr::write); emit_instruction(mv); } - if (mv) - mv->set_alu_flag(alu_last_instr); auto dst = value_factory_.dest_vec4(intr->def, pin_group); RegisterVec4::Swizzle dst_swz = {7, 7, 7, 7}; @@ -992,7 +992,7 @@ Shader::emit_load_to_register(PVirtualValue src, int chan) if (!dest || chan >= 0) { dest = value_factory().temp_register(chan); dest->set_pin(pin_free); - emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::last_write)); + emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::write)); } return dest; } @@ -1170,7 +1170,7 @@ Shader::evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id) uav_id = uav_id_val->as_register(); } else { uav_id = vf.temp_register(); - emit_instruction(new AluInstr(op1_mov, uav_id, uav_id_val, AluInstr::last_write)); + emit_instruction(new AluInstr(op1_mov, uav_id, uav_id_val, AluInstr::write)); } } return std::make_pair(offset, uav_id); @@ -1200,8 +1200,6 @@ Shader::emit_store_scratch(nir_intrinsic_instr *intr) if (!ir) return true; - ir->set_alu_flag(alu_last_instr); - auto address = vf.src(intr->src[1], 0); int align = nir_intrinsic_align_mul(intr); @@ -1224,7 +1222,7 @@ Shader::emit_store_scratch(nir_intrinsic_instr *intr) ws_ir = new ScratchIOInstr(value, offset, align, align_offset, writemask); } else { auto addr_temp = vf.temp_register(0); - auto load_addr = new AluInstr(op1_mov, addr_temp, address, AluInstr::last_write); + auto load_addr = new AluInstr(op1_mov, addr_temp, address, AluInstr::write); load_addr->set_alu_flag(alu_no_schedule_bias); emit_instruction(load_addr); @@ -1272,7 +1270,7 @@ Shader::emit_load_scratch(nir_intrinsic_instr *intr) ir = new ScratchIOInstr(dest, offset, align, align_offset, 0xf, true); } else { auto addr_temp = value_factory().temp_register(0); - auto load_addr = new AluInstr(op1_mov, addr_temp, addr, AluInstr::last_write); + auto load_addr = new AluInstr(op1_mov, addr_temp, addr, AluInstr::write); load_addr->set_alu_flag(alu_no_schedule_bias); emit_instruction(load_addr); @@ -1295,7 +1293,7 @@ bool Shader::emit_load_global(nir_intrinsic_instr *intr) auto src = src_value->as_register(); if (!src) { src = value_factory().temp_register(); - emit_instruction(new AluInstr(op1_mov, src, src_value, AluInstr::last_write)); + emit_instruction(new AluInstr(op1_mov, src, src_value, AluInstr::write)); } auto load = new LoadFromBuffer(dest, {0,7,7,7}, src, 0, 1, NULL, fmt_32); load->set_mfc(4); @@ -1490,8 +1488,7 @@ bool Shader::emit_load_tcs_param_base(nir_intrinsic_instr *instr, int offset) { auto src = value_factory().temp_register(); - emit_instruction( - new AluInstr(op1_mov, src, value_factory().zero(), AluInstr::last_write)); + emit_instruction(new AluInstr(op1_mov, src, value_factory().zero(), AluInstr::write)); auto dest = value_factory().dest_vec4(instr->def, pin_group); auto fetch = new LoadFromBuffer(dest, @@ -1512,8 +1509,7 @@ bool Shader::emit_get_lds_info_uint(nir_intrinsic_instr *instr, int offset) { auto src = value_factory().temp_register(); - emit_instruction( - new AluInstr(op1_mov, src, value_factory().zero(), AluInstr::last_write)); + emit_instruction(new AluInstr(op1_mov, src, value_factory().zero(), AluInstr::write)); auto dest = value_factory().dest_vec4(instr->def, pin_group); auto fetch = new LoadFromBuffer(dest, @@ -1540,7 +1536,7 @@ Shader::emit_shader_clock(nir_intrinsic_instr *instr) group->add_instruction(new AluInstr(op1_mov, vf.dest(instr->def, 1, pin_chan), vf.inline_const(ALU_SRC_TIME_HI, 0), - AluInstr::last_write)); + AluInstr::write)); emit_instruction(group); return true; } @@ -1551,7 +1547,6 @@ Shader::emit_group_barrier(nir_intrinsic_instr *intr) assert(m_control_flow_depth == 0); (void)intr; auto op = new AluInstr(op0_group_barrier, 0); - op->set_alu_flag(alu_last_instr); emit_instruction(op); return true; } @@ -1632,11 +1627,9 @@ Shader::load_ubo(nir_intrinsic_instr *instr) ir = new AluInstr(op1_mov, value_factory().dest(instr->def, i, pin), uniform, - {alu_write}); + AluInstr::write); emit_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); return true; } else { int buf_cmp = nir_intrinsic_component(instr); @@ -1651,8 +1644,6 @@ Shader::load_ubo(nir_intrinsic_instr *instr) ir = new AluInstr(op1_mov, dest, u, AluInstr::write); emit_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); m_indirect_files |= 1 << TGSI_FILE_CONSTANT; return true; } @@ -1670,7 +1661,7 @@ bool Shader::emit_simple_mov(nir_def& def, int chan, PVirtualValue src, Pin pin) { auto dst = value_factory().dest(def, chan, pin); - emit_instruction(new AluInstr(op1_mov, dst, src, AluInstr::last_write)); + emit_instruction(new AluInstr(op1_mov, dst, src, AluInstr::write)); return true; } diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp index ac10b3b2324..ea61f6b95c7 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp @@ -81,7 +81,7 @@ ComputeShader::emit_load_from_info_buffer(nir_intrinsic_instr *instr, int offset emit_instruction(new AluInstr(op1_mov, m_zero_register, value_factory().inline_const(ALU_SRC_0, 0), - AluInstr::last_write)); + AluInstr::write)); } auto dest = value_factory().dest_vec4(instr->def, pin_group); @@ -109,8 +109,8 @@ ComputeShader::emit_load_3vec(nir_intrinsic_instr *instr, for (int i = 0; i < 3; ++i) { auto dest = vf.dest(instr->def, i, pin_none); - emit_instruction(new AluInstr( - op1_mov, dest, src[i], i == 2 ? AluInstr::last_write : AluInstr::write)); + emit_instruction( + new AluInstr(op1_mov, dest, src[i], i == 2 ? AluInstr::write : AluInstr::write)); } return true; } diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp index cf02c9efe31..094fbb5386f 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp @@ -72,7 +72,7 @@ FragmentShader::load_input(nir_intrinsic_instr *intr) vf.dest(intr->def, 0, pin_none), m_face_input, vf.inline_const(ALU_SRC_0, 0), - AluInstr::last_write); + AluInstr::write); emit_instruction(ir); return true; } @@ -143,7 +143,7 @@ FragmentShader::process_stage_intrinsic(nir_intrinsic_instr *intr) nullptr, value_factory().src(intr->src[0], 0), value_factory().zero(), - {AluInstr::last})); + AluInstr::empty)); return true; case nir_intrinsic_terminate: @@ -152,7 +152,7 @@ FragmentShader::process_stage_intrinsic(nir_intrinsic_instr *intr) nullptr, value_factory().zero(), value_factory().zero(), - {AluInstr::last})); + AluInstr::empty)); return true; case nir_intrinsic_load_sample_mask_in: if (m_apply_sample_mask) { @@ -286,9 +286,9 @@ FragmentShader::emit_load_sample_mask_in(nir_intrinsic_instr *instr) assert(m_sample_mask_reg); emit_instruction( - new AluInstr(op2_lshl_int, tmp, vf.one_i(), m_sample_id_reg, AluInstr::last_write)); + new AluInstr(op2_lshl_int, tmp, vf.one_i(), m_sample_id_reg, AluInstr::write)); emit_instruction( - new AluInstr(op2_and_int, dest, tmp, m_sample_mask_reg, AluInstr::last_write)); + new AluInstr(op2_and_int, dest, tmp, m_sample_mask_reg, AluInstr::write)); return true; } @@ -298,7 +298,7 @@ FragmentShader::emit_load_helper_invocation(nir_intrinsic_instr *instr) assert(m_helper_invocation); auto& vf = value_factory(); emit_instruction( - new AluInstr(op1_mov, m_helper_invocation, vf.literal(-1), AluInstr::last_write)); + new AluInstr(op1_mov, m_helper_invocation, vf.literal(-1), AluInstr::write)); RegisterVec4 destvec{m_helper_invocation, nullptr, nullptr, nullptr, pin_group}; auto vtx = new LoadFromBuffer(destvec, @@ -312,7 +312,7 @@ FragmentShader::emit_load_helper_invocation(nir_intrinsic_instr *instr) vtx->set_fetch_flag(FetchInstr::use_tc); vtx->set_always_keep(); auto dst = value_factory().dest(instr->def, 0, pin_free); - auto ir = new AluInstr(op1_mov, dst, m_helper_invocation, AluInstr::last_write); + auto ir = new AluInstr(op1_mov, dst, m_helper_invocation, AluInstr::write); ir->add_required_instr(vtx); emit_instruction(vtx); emit_instruction(ir); @@ -647,7 +647,6 @@ bool FragmentShaderR600::load_input_hw(nir_intrinsic_instr *intr) { auto& vf = value_factory(); - AluInstr *ir = nullptr; for (unsigned i = 0; i < intr->def.num_components; ++i) { sfn_log << SfnLog::io << "Inject register " << *m_interpolated_inputs[nir_intrinsic_base(intr)][i] << "\n"; @@ -657,8 +656,6 @@ FragmentShaderR600::load_input_hw(nir_intrinsic_instr *intr) i, m_interpolated_inputs[nir_intrinsic_base(intr)][index]); } - if (ir) - ir->set_alu_flag(alu_last_instr); return true; } @@ -697,10 +694,10 @@ FragmentShaderEG::load_input_hw(nir_intrinsic_instr *intr) new AluInstr(op1_interp_load_p0, tmp, new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i + comp), - AluInstr::last_write); + AluInstr::write); emit_instruction(ir); - emit_instruction(new AluInstr( - op1_mov, vf.dest(intr->def, i, pin_chan), tmp, AluInstr::last_write)); + emit_instruction( + new AluInstr(op1_mov, vf.dest(intr->def, i, pin_chan), tmp, AluInstr::write)); } else { ir = new AluInstr(op1_interp_load_p0, @@ -710,7 +707,6 @@ FragmentShaderEG::load_input_hw(nir_intrinsic_instr *intr) emit_instruction(ir); } } - ir->set_alu_flag(alu_last_instr); return true; } @@ -793,7 +789,6 @@ FragmentShaderEG::load_interpolated_input_hw(nir_intrinsic_instr *intr) emit_instruction(ir); } assert(ir); - ir->set_alu_flag(alu_last_instr); } return true; @@ -895,22 +890,22 @@ FragmentShaderEG::load_barycentric_at_sample(nir_intrinsic_instr *instr) auto tmp1 = vf.temp_register(); emit_instruction( - new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, {alu_write})); - emit_instruction(new AluInstr( - op3_muladd, tmp1, grad[1], slope[2], interpolator.i, {alu_write, alu_last_instr})); + new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, AluInstr::write)); + emit_instruction( + new AluInstr(op3_muladd, tmp1, grad[1], slope[2], interpolator.i, AluInstr::write)); emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->def, 0, pin_none), grad[3], slope[3], tmp1, - {alu_write})); + AluInstr::write)); emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->def, 1, pin_none), grad[2], slope[3], tmp0, - {alu_write, alu_last_instr})); + AluInstr::write)); return true; } @@ -947,17 +942,21 @@ FragmentShaderEG::load_barycentric_at_offset(nir_intrinsic_instr *instr) auto tmp0 = vf.temp_register(); auto tmp1 = vf.temp_register(); emit_instruction( - new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, {alu_write})); - emit_instruction(new AluInstr( - op3_muladd, tmp1, help[1], ofs_x, interpolator.i, {alu_write, alu_last_instr})); - emit_instruction(new AluInstr( - op3_muladd, vf.dest(instr->def, 0, pin_none), help[3], ofs_y, tmp1, {alu_write})); + new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, AluInstr::write)); + emit_instruction( + new AluInstr(op3_muladd, tmp1, help[1], ofs_x, interpolator.i, AluInstr::write)); + emit_instruction(new AluInstr(op3_muladd, + vf.dest(instr->def, 0, pin_none), + help[3], + ofs_y, + tmp1, + AluInstr::write)); emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->def, 1, pin_none), help[2], ofs_y, tmp0, - {alu_write, alu_last_instr})); + AluInstr::write)); return true; } @@ -980,12 +979,11 @@ FragmentShaderEG::load_interpolated_one_comp(RegisterVec4& dest, dest[chan], i & 1 ? params.j : params.i, new InlineConstant(ALU_SRC_PARAM_BASE + params.base, chan), - i == 0 ? AluInstr::write : AluInstr::last); + i == 0 ? AluInstr::write : AluInstr::empty); ir->set_bank_swizzle(alu_vec_210); success = group->add_instruction(ir); } - ir->set_alu_flag(alu_last_instr); if (success) emit_instruction(group); return success; @@ -1012,7 +1010,6 @@ FragmentShaderEG::load_interpolated_two_comp(RegisterVec4& dest, ir->set_bank_swizzle(alu_vec_210); success = group->add_instruction(ir); } - ir->set_alu_flag(alu_last_instr); if (success) emit_instruction(group); return success; @@ -1037,7 +1034,6 @@ FragmentShaderEG::load_interpolated_two_comp_for_one(RegisterVec4& dest, ir->set_bank_swizzle(alu_vec_210); success = group->add_instruction(ir); } - ir->set_alu_flag(alu_last_instr); if (success) emit_instruction(group); diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp index aeb7c6b4fe3..dc04d6f50fd 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp @@ -149,8 +149,7 @@ GeometryShader::do_allocate_reserved_registers() for (int i = 0; i < 4; ++i) { m_export_base[i] = value_factory().temp_register(0, false); - emit_instruction( - new AluInstr(op1_mov, m_export_base[i], zero, AluInstr::last_write)); + emit_instruction(new AluInstr(op1_mov, m_export_base[i], zero, AluInstr::write)); } m_ring_item_sizes[0] = m_next_input_ring_offset; @@ -214,7 +213,7 @@ GeometryShader::emit_vertex(nir_intrinsic_instr *instr, bool cut) m_export_base[stream], m_export_base[stream], value_factory().literal(m_noutputs), - AluInstr::last_write); + AluInstr::write); emit_instruction(ir); } @@ -288,7 +287,6 @@ GeometryShader::store_output(nir_intrinsic_instr *instr) emit_instruction(ir); } } - ir->set_alu_flag(alu_last_instr); m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring, MemRingOutInstr::mem_write_ind, tmp, @@ -396,7 +394,7 @@ GeometryShader::emit_adj_fix() adjhelp0, m_primitive_id, value_factory().one_i(), - AluInstr::last_write)); + AluInstr::write)); int reg_indices[R600_GS_VERTEX_INDIRECT_TOTAL]; int rotate_indices[R600_GS_VERTEX_INDIRECT_TOTAL] = {4, 5, 0, 1, 2, 3}; @@ -418,7 +416,6 @@ GeometryShader::emit_adj_fix() emit_instruction(ir); } - ir->set_alu_flag(alu_last_instr); for (int i = 0; i < R600_GS_VERTEX_INDIRECT_TOTAL; i++) m_per_vertex_offsets[i] = adjhelp[i]; diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp index 91b5f729093..0eb32b79a99 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp @@ -133,8 +133,8 @@ VertexExportForFs::finalize() { if (m_vs_as_gs_a) { auto primid = m_parent->value_factory().temp_vec4(pin_group, {2, 7, 7, 7}); - m_parent->emit_instruction(new AluInstr( - op1_mov, primid[0], m_parent->primitive_id(), AluInstr::last_write)); + m_parent->emit_instruction( + new AluInstr(op1_mov, primid[0], m_parent->primitive_id(), AluInstr::write)); int param = m_last_param_export ? m_last_param_export->location() + 1 : 0; m_last_param_export = new ExportInstr(ExportInstr::param, param, primid); @@ -202,9 +202,8 @@ VertexExportForFs::emit_varying_pos(const store_loc& store_info, auto src = m_parent->value_factory().src(intr.src[0], 0); auto clamped = m_parent->value_factory().temp_register(); m_parent->emit_instruction( - new AluInstr(op1_mov, clamped, src, {alu_write, alu_dst_clamp, alu_last_instr})); - auto alu = - new AluInstr(op1_flt_to_int, out_value[1], clamped, AluInstr::last_write); + new AluInstr(op1_mov, clamped, src, {alu_write, alu_dst_clamp})); + auto alu = new AluInstr(op1_flt_to_int, out_value[1], clamped, AluInstr::write); if (m_parent->chip_class() < ISA_CC_EVERGREEN) alu->set_alu_flag(alu_is_trans); m_parent->emit_instruction(alu); @@ -277,8 +276,6 @@ VertexExportForFs::emit_varying_param(const store_loc& store_info, m_parent->emit_instruction(alu); } } - if (alu) - alu->set_alu_flag(alu_last_instr); m_last_param_export = new ExportInstr(ExportInstr::param, export_slot, value); m_output_registers[nir_intrinsic_base(&intr)] = &m_last_param_export->value(); @@ -352,8 +349,6 @@ VertexExportForFs::emit_stream(int stream) alu = new AluInstr(op1_mov, tmp[i][j], (*so_gpr[i])[j + sc], {alu_write}); m_parent->emit_instruction(alu); } - if (alu) - alu->set_alu_flag(alu_last_instr); start_comp[i] = 0; so_gpr[i] = &tmp[i]; @@ -657,8 +652,6 @@ VertexExportForGS::do_store_output(const store_loc& store_info, AluInstr::write); m_parent->emit_instruction(ir); } - if (ir) - ir->set_alu_flag(alu_last_instr); m_parent->emit_instruction(new MemRingOutInstr( cf_mem_ring, MemRingOutInstr::mem_write, value, ring_offset >> 2, 4, nullptr)); diff --git a/src/gallium/drivers/r600/sfn/sfn_split_address_loads.cpp b/src/gallium/drivers/r600/sfn/sfn_split_address_loads.cpp index 71588b387e9..ad6860fd6f8 100644 --- a/src/gallium/drivers/r600/sfn/sfn_split_address_loads.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_split_address_loads.cpp @@ -185,7 +185,8 @@ auto AddressSplitVisitor::load_index_register_eg(Instr *instr, const EAluOp idx_op[2] = {op1_set_cf_idx0, op1_set_cf_idx1}; - m_last_idx_load[idx_id] = new AluInstr(idx_op[idx_id], idx, m_vf.addr(), {}); + m_last_idx_load[idx_id] = + new AluInstr(idx_op[idx_id], idx, m_vf.addr(), AluInstr::empty); m_current_block->insert(m_block_iterator, m_last_idx_load[idx_id]); for (auto&& i : m_last_idx_use[idx_id]) m_last_ar_load->add_required_instr(i); @@ -208,7 +209,7 @@ auto AddressSplitVisitor::load_index_register_ca(PRegister index) -> int if (idx_id < 0) { idx_id = pick_idx(); auto idx = m_vf.idx_reg(idx_id); - m_last_idx_load[idx_id] = new AluInstr(op1_mova_int, idx, index, {}); + m_last_idx_load[idx_id] = new AluInstr(op1_mova_int, idx, index, AluInstr::empty); m_current_block->insert(m_block_iterator, m_last_idx_load[idx_id]); for (auto&& i : m_last_idx_use[idx_id]) @@ -249,7 +250,7 @@ void AddressSplitVisitor::load_ar(Instr *instr, PRegister addr) { auto ar = m_vf.addr(); - m_last_ar_load = new AluInstr(op1_mova_int, ar, addr, {}); + m_last_ar_load = new AluInstr(op1_mova_int, ar, addr, AluInstr::empty); m_current_block->insert(m_block_iterator, m_last_ar_load); ar->add_use(instr); m_current_addr = addr; diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_instr_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_instr_test.cpp index d110f50bdb9..c5f7616c5d0 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_instr_test.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_instr_test.cpp @@ -37,7 +37,7 @@ TEST_F(InstrTest, test_alu_uni_op_mov) AluInstr alu(op1_mov, new Register(128, 2, pin_none), new Register(129, 0, pin_chan), - {alu_write}); + AluInstr::write); EXPECT_TRUE(alu.has_alu_flag(alu_write)); @@ -187,14 +187,14 @@ TEST_F(InstrTest, test_alu_op1_comp) auto r129y = new Register(129, 1, pin_none); auto r130x = new Register(130, 0, pin_none); - AluInstr alu1(op1_mov, r128z, r129x, {alu_write}); - EXPECT_NE(alu1, AluInstr(op1_mov, r128y, r129x, {alu_write})); - EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129xc, {alu_write})); - EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129y, {alu_write})); - EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r130x, {alu_write})); + AluInstr alu1(op1_mov, r128z, r129x, AluInstr::write); + EXPECT_NE(alu1, AluInstr(op1_mov, r128y, r129x, AluInstr::write)); + EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129xc, AluInstr::write)); + EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129y, AluInstr::write)); + EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r130x, AluInstr::write)); EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129x, {alu_write, alu_last_instr})); - EXPECT_NE(alu1, AluInstr(op1_flt_to_int, r128z, r129x, {alu_write})); - EXPECT_NE(alu1, AluInstr(op1_mov, r128zc, r129x, {alu_write})); + EXPECT_NE(alu1, AluInstr(op1_flt_to_int, r128z, r129x, AluInstr::write)); + EXPECT_NE(alu1, AluInstr(op1_mov, r128zc, r129x, AluInstr::write)); EXPECT_EQ(alu1, alu1); } @@ -205,14 +205,17 @@ TEST_F(InstrTest, test_alu_op2_comp) auto r128y = new Register(128, 1, pin_none); auto r128z = new Register(128, 2, pin_none); - AluInstr alu1(op2_add, r128z, r128x, r128y, {alu_write}); + AluInstr alu1(op2_add, r128z, r128x, r128y, AluInstr::write); EXPECT_NE( - alu1, AluInstr(op2_add, r128z, r128x, new Register(129, 2, pin_none), {alu_write})); + alu1, + AluInstr(op2_add, r128z, r128x, new Register(129, 2, pin_none), AluInstr::write)); EXPECT_NE( - alu1, AluInstr(op2_add, r128z, r128x, new Register(128, 0, pin_none), {alu_write})); + alu1, + AluInstr(op2_add, r128z, r128x, new Register(128, 0, pin_none), AluInstr::write)); EXPECT_NE( - alu1, AluInstr(op2_add, r128z, r128x, new Register(128, 1, pin_chan), {alu_write})); + alu1, + AluInstr(op2_add, r128z, r128x, new Register(128, 1, pin_chan), AluInstr::write)); } TEST_F(InstrTest, test_alu_op3_comp) @@ -221,20 +224,29 @@ TEST_F(InstrTest, test_alu_op3_comp) auto r128y = new Register(128, 1, pin_none); auto r128z = new Register(128, 2, pin_none); - AluInstr alu1(op3_muladd, r128z, r128x, r128y, r128y, {alu_write}); + AluInstr alu1(op3_muladd, r128z, r128x, r128y, r128y, AluInstr::write); - EXPECT_NE( - alu1, - AluInstr( - op3_muladd, r128z, r128x, r128y, new Register(129, 2, pin_none), {alu_write})); - EXPECT_NE( - alu1, - AluInstr( - op3_muladd, r128z, r128x, r128y, new Register(128, 0, pin_none), {alu_write})); - EXPECT_NE( - alu1, - AluInstr( - op3_muladd, r128z, r128x, r128y, new Register(128, 1, pin_chan), {alu_write})); + EXPECT_NE(alu1, + AluInstr(op3_muladd, + r128z, + r128x, + r128y, + new Register(129, 2, pin_none), + AluInstr::write)); + EXPECT_NE(alu1, + AluInstr(op3_muladd, + r128z, + r128x, + r128y, + new Register(128, 0, pin_none), + AluInstr::write)); + EXPECT_NE(alu1, + AluInstr(op3_muladd, + r128z, + r128x, + r128y, + new Register(128, 1, pin_chan), + AluInstr::write)); } TEST_F(InstrTest, test_alu_op3_ne) @@ -258,7 +270,7 @@ TEST_F(InstrTest, test_alu_op3_ne) AluInstr(op3_cnde, R130x, R130y, R130z, R131w, {alu_write, alu_last_instr})); EXPECT_NE(alu, AluInstr(op3_cnde, R130x, R130y, R131z, R130z, {alu_write, alu_last_instr})); - EXPECT_NE(alu, AluInstr(op3_cnde, R130x, R130y, R131z, R131w, {alu_write})); + EXPECT_NE(alu, AluInstr(op3_cnde, R130x, R130y, R131z, R131w, AluInstr::write)); AluInstr alu_cf_changes = alu; alu_cf_changes.set_cf_type(cf_alu_push_before); @@ -328,15 +340,15 @@ TEST_F(InstrTest, test_alu_dot4_grouped) auto i = group->begin(); EXPECT_NE(i, group->end()); ASSERT_TRUE(*i); - check(**i, AluInstr(op2_dot4_ieee, R132x, R130x, R130y, {alu_write})); + check(**i, AluInstr(op2_dot4_ieee, R132x, R130x, R130y, AluInstr::write)); ++i; EXPECT_NE(i, group->end()); ASSERT_TRUE(*i); - check(**i, AluInstr(op2_dot4_ieee, R132y, R130z, R130w, {})); + check(**i, AluInstr(op2_dot4_ieee, R132y, R130z, R130w, AluInstr::empty)); ++i; EXPECT_NE(i, group->end()); ASSERT_TRUE(*i); - check(**i, AluInstr(op2_dot4_ieee, R132z, R131x, R131y, {})); + check(**i, AluInstr(op2_dot4_ieee, R132z, R131x, R131y, AluInstr::empty)); ++i; EXPECT_NE(i, group->end()); ASSERT_TRUE(*i); diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_instrfromstring_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_instrfromstring_test.cpp index 781390c32d6..f3db522496f 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_instrfromstring_test.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_instrfromstring_test.cpp @@ -51,7 +51,7 @@ TEST_F(TestInstrFromString, test_alu_lds_read_ret) { add_dest_from_string("R1999.x"); - AluInstr expect(DS_OP_READ_RET, {new Register(1999, 0, pin_none)}, {}); + AluInstr expect(DS_OP_READ_RET, {new Register(1999, 0, pin_none)}, AluInstr::empty); check("ALU LDS READ_RET __.x : R1999.x {}", expect); } @@ -96,7 +96,7 @@ TEST_F(TestInstrFromString, test_alu_mov_neg_abs) AluInstr expect(op1_mov, new Register(2000, 1, pin_none), new Register(1999, 0, pin_none), - {alu_write}); + AluInstr::write); expect.set_source_mod(0, AluInstr::mod_abs); expect.set_source_mod(0, AluInstr::mod_neg); @@ -351,7 +351,7 @@ TEST_F(TestInstrFromString, test_alu_interp_xy) new Register(1024, 2, pin_chan), r0y, new InlineConstant(ALU_SRC_PARAM_BASE, 2), - {alu_write}); + AluInstr::write); expect.set_bank_swizzle(alu_vec_210); check(init, expect); @@ -369,7 +369,7 @@ TEST_F(TestInstrFromString, test_alu_interp_xy_no_write) new Register(1024, 0, pin_chan), r0x, new InlineConstant(ALU_SRC_PARAM_BASE, 2), - {}); + AluInstr::empty); expect.set_bank_swizzle(alu_vec_210); check(init, expect); diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp index d3c10b51373..407b3ec0804 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp @@ -79,11 +79,11 @@ TEST_F(LiveRangeTests, SimpleAdd) RegisterVec4::Swizzle dummy; ValueFactory vf; - Register *r0x = vf.dest_from_string("S0.x@free"); - Register *r1x = vf.dest_from_string("S1.x@free"); + Register *r0x = vf.dest_from_string("S0.x@chan"); + Register *r1x = vf.dest_from_string("S1.x@chan"); RegisterVec4 r2 = vf.dest_vec4_from_string("S2.xyzw", dummy, pin_none); - Register *r3x = vf.dest_from_string("S3.x@free"); - RegisterVec4 r4 = vf.dest_vec4_from_string("S4.xyzw", dummy, pin_group); + Register *r3x = vf.dest_from_string("S3.x@chan"); + RegisterVec4 r4 = vf.dest_vec4_from_string("S4.xyzw", dummy, pin_chgr); LiveRangeMap expect = vf.prepare_live_range_map(); @@ -98,7 +98,7 @@ TEST_F(LiveRangeTests, SimpleAdd) for (int i = 0; i < 4; ++i) expect.set_life_range(*r4[i], 5, 6); - check(add_add_1_expect_from_nir, expect); + check(add_add_1_expect_from_nir_scheduled, expect); } TEST_F(LiveRangeTests, SimpleAInterpolation) diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp index 47d6a1b2bc1..bc14638affe 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp @@ -514,30 +514,30 @@ TEST_F(TestShaderFromNir, fs_shed_tex_coord) TEST_F(TestShaderFromNir, OptimizeAddWChanetoTrans) { const char *input = -R"(VS + R"(VS CHIPCLASS CAYMAN INPUT LOC:0 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15 SHADER BLOCK_START - ALU MOV S2.x@free{s} : I[0] {WL} - ALU MOV S3.y@free{s} : L[0x40c00000] {WL} - ALU MOV S4.z@free{s} : L[0xc1140000] {WL} - ALU MOV S5.w@free{s} : L[0xbfe00000] {WL} - ALU MOV S6.x@free{s} : L[0x3fa00000] {WL} + ALU MOV S2.x@free{s} : I[0] {W} + ALU MOV S3.y@free{s} : L[0x40c00000] {W} + ALU MOV S4.z@free{s} : L[0xc1140000] {W} + ALU MOV S5.w@free{s} : L[0xbfe00000] {W} + ALU MOV S6.x@free{s} : L[0x3fa00000] {W} ALU MOV S7.x{s} : KC0[0].x {W} ALU MOV S7.y{s} : KC0[0].y {W} ALU MOV S7.z{s} : KC0[0].z {W} - ALU MOV S7.w{s} : KC0[0].w {WL} - ALU ADD S8.y@free{s} : S3.y@free{s} -S7.x{s} {WL} - ALU ADD S9.z@free{s} : S4.z@free{s} -S7.y{s} {WL} - ALU ADD S10.w@free{s} : S5.w@free{s} -S7.z{s} {WL} - ALU ADD S11.x@free{s} : S6.x@free{s} -S7.w{s} {WL} - ALU EXP_IEEE S12.y@free{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {WL} - ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {WL} - ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {WL} - ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL} + ALU MOV S7.w{s} : KC0[0].w {W} + ALU ADD S8.y@free{s} : S3.y@free{s} -S7.x{s} {W} + ALU ADD S9.z@free{s} : S4.z@free{s} -S7.y{s} {W} + ALU ADD S10.w@free{s} : S5.w@free{s} -S7.z{s} {W} + ALU ADD S11.x@free{s} : S6.x@free{s} -S7.w{s} {W} + ALU EXP_IEEE S12.y@free{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {W} + ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W} + ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W} + ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {W} ALU MOV S17.x{s} : S12.y@free{s} {W} ALU MOV S17.y{s} : S13.z@free{s} {W} ALU MOV S17.z{s} : S14.x@free{s} {W} @@ -545,26 +545,26 @@ BLOCK_START ALU MOV S18.x@group{s} : S17.x{s} {W} ALU MOV S18.y@group{s} : S17.y{s} {W} ALU MOV S18.z@group{s} : S17.z{s} {W} - ALU MOV S18.w@group{s} : S17.w{s} {WL} + ALU MOV S18.w@group{s} : S17.w{s} {W} EXPORT_DONE PARAM 0 S18.xyzw BLOCK_END)"; const char *expect = -R"(VS + R"(VS CHIPCLASS CAYMAN INPUT LOC:0 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15 SHADER BLOCK_START - ALU ADD S8.y@free{s} : L[0x40c00000] -KC0[0].x {WL} - ALU ADD S9.z@free{s} : L[0xc1140000] -KC0[0].y {WL} - ALU ADD S10.w@free{s} : L[0xbfe00000] -KC0[0].z {WL} - ALU ADD S11.x@free{s} : L[0x3fa00000] -KC0[0].w {WL} + ALU ADD S8.y@free{s} : L[0x40c00000] -KC0[0].x {W} + ALU ADD S9.z@free{s} : L[0xc1140000] -KC0[0].y {W} + ALU ADD S10.w@free{s} : L[0xbfe00000] -KC0[0].z {W} + ALU ADD S11.x@free{s} : L[0x3fa00000] -KC0[0].w {W} ALU EXP_IEEE S18.x@group{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {W} ALU EXP_IEEE S18.y@group{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W} ALU EXP_IEEE S18.z@group{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W} - ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL} + ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {W} EXPORT_DONE PARAM 0 S18.xyzw BLOCK_END )"; @@ -576,57 +576,57 @@ BLOCK_END TEST_F(TestShaderFromNir, PeeholeSoureModsSimple) { const char *input = -R"(VS + R"(VS CHIPCLASS CAYMAN INPUT LOC:0 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15 SHADER BLOCK_START - ALU MOV S2.x@free{s} : I[0] {WL} - ALU MOV S3.y@free{s} : L[0x40c00000] {WL} - ALU MOV S4.z@free{s} : L[0xc1140000] {WL} - ALU MOV S5.w@free{s} : L[0xbfe00000] {WL} - ALU MOV S6.x@free{s} : L[0x3fa00000] {WL} + ALU MOV S2.x@free{s} : I[0] {W} + ALU MOV S3.y@free{s} : L[0x40c00000] {W} + ALU MOV S4.z@free{s} : L[0xc1140000] {W} + ALU MOV S5.w@free{s} : L[0xbfe00000] {W} + ALU MOV S6.x@free{s} : L[0x3fa00000] {W} ALU MOV S7.x{s} : |KC0[0].x| {W} ALU MOV S7.y{s} : -KC0[0].y {W} ALU MOV S7.z{s} : -|KC0[0].z| {W} - ALU MOV S7.w{s} : KC0[0].w {WL} - ALU ADD S8.y@free{s} : S3.y@free{s} S7.x{s} {WL} - ALU ADD S9.z@free{s} : S4.z@free{s} S7.y{s} {WL} - ALU ADD S10.w@free{s} : S5.w@free{s} S7.z{s} {WL} - ALU ADD S11.x@free{s} : S6.x@free{s} S7.w{s} {WL} - ALU EXP_IEEE S12.y@free{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {WL} - ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {WL} - ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {WL} - ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL} + ALU MOV S7.w{s} : KC0[0].w {W} + ALU ADD S8.y@free{s} : S3.y@free{s} S7.x{s} {W} + ALU ADD S9.z@free{s} : S4.z@free{s} S7.y{s} {W} + ALU ADD S10.w@free{s} : S5.w@free{s} S7.z{s} {W} + ALU ADD S11.x@free{s} : S6.x@free{s} S7.w{s} {W} + ALU EXP_IEEE S12.y@free{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {W} + ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W} + ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W} + ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {W} ALU MOV S17.x{s} : S12.y@free{s} {W} ALU MOV S17.y{s} : S13.z@free{s} {W} ALU MOV S17.z{s} : S14.x@free{s} {W} - ALU MOV S17.w{s} : S15.y@free{s} {WL} + ALU MOV S17.w{s} : S15.y@free{s} {W} ALU MOV S18.x@group{s} : S17.x{s} {W} ALU MOV S18.y@group{s} : S17.y{s} {W} ALU MOV S18.z@group{s} : S17.z{s} {W} - ALU MOV S18.w@group{s} : S17.w{s} {WL} + ALU MOV S18.w@group{s} : S17.w{s} {W} EXPORT_DONE PARAM 0 S18.xyzw BLOCK_END)"; const char *expect = -R"(VS + R"(VS CHIPCLASS CAYMAN INPUT LOC:0 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15 SHADER BLOCK_START - ALU ADD S8.y@free{s} : L[0x40c00000] |KC0[0].x| {WL} - ALU ADD S9.z@free{s} : L[0xc1140000] -KC0[0].y {WL} - ALU ADD S10.w@free{s} : L[0xbfe00000] -|KC0[0].z| {WL} - ALU ADD S11.x@free{s} : L[0x3fa00000] KC0[0].w {WL} + ALU ADD S8.y@free{s} : L[0x40c00000] |KC0[0].x| {W} + ALU ADD S9.z@free{s} : L[0xc1140000] -KC0[0].y {W} + ALU ADD S10.w@free{s} : L[0xbfe00000] -|KC0[0].z| {W} + ALU ADD S11.x@free{s} : L[0x3fa00000] KC0[0].w {W} ALU EXP_IEEE S18.x@group{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {W} ALU EXP_IEEE S18.y@group{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W} ALU EXP_IEEE S18.z@group{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W} - ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL} + ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {W} EXPORT_DONE PARAM 0 S18.xyzw BLOCK_END )"; @@ -638,34 +638,34 @@ BLOCK_END TEST_F(TestShaderFromNir, PeeholeSoureModsAbsNegTwice) { const char *input = -R"(VS + R"(VS CHIPCLASS CAYMAN INPUT LOC:0 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15 SHADER BLOCK_START - ALU MOV S2.x@free{s} : I[0] {WL} - ALU MOV S3.y@free{s} : L[0x40c00000] {WL} - ALU MOV S4.z@free{s} : L[0xc1140000] {WL} - ALU MOV S5.w@free{s} : L[0xbfe00000] {WL} - ALU MOV S6.x@free{s} : L[0x3fa00000] {WL} + ALU MOV S2.x@free{s} : I[0] {W} + ALU MOV S3.y@free{s} : L[0x40c00000] {W} + ALU MOV S4.z@free{s} : L[0xc1140000] {W} + ALU MOV S5.w@free{s} : L[0xbfe00000] {W} + ALU MOV S6.x@free{s} : L[0x3fa00000] {W} ALU MOV S7.x{s} : |KC0[0].x| {W} ALU MOV S7.y{s} : -KC0[0].y {W} ALU MOV S7.z{s} : -|KC0[0].z| {W} - ALU MOV S7.w{s} : KC0[0].w {WL} + ALU MOV S7.w{s} : KC0[0].w {W} ALU MOV S8.x : |S7.x| {W} ALU MOV S8.y : -S7.y {W} ALU MOV S8.z : -|S7.z| {W} - ALU MOV S8.w : -|S7.x| {WL} - ALU ADD S19.y@free{s} : S3.y@free{s} S8.x {WL} - ALU ADD S9.z@free{s} : S4.z@free{s} S8.y {WL} - ALU ADD S10.w@free{s} : S5.w@free{s} S8.z {WL} - ALU ADD S11.x@free{s} : S6.x@free{s} S8.w {WL} - ALU EXP_IEEE S12.y@free{s} : S19.y@free{s} + S19.y@free{s} + S19.y@free{s} {WL} - ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {WL} - ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {WL} - ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL} + ALU MOV S8.w : -|S7.x| {W} + ALU ADD S19.y@free{s} : S3.y@free{s} S8.x {W} + ALU ADD S9.z@free{s} : S4.z@free{s} S8.y {W} + ALU ADD S10.w@free{s} : S5.w@free{s} S8.z {W} + ALU ADD S11.x@free{s} : S6.x@free{s} S8.w {W} + ALU EXP_IEEE S12.y@free{s} : S19.y@free{s} + S19.y@free{s} + S19.y@free{s} {W} + ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W} + ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W} + ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {W} ALU MOV S17.x{s} : S12.y@free{s} {W} ALU MOV S17.y{s} : S13.z@free{s} {W} ALU MOV S17.z{s} : S14.x@free{s} {W} @@ -673,26 +673,26 @@ BLOCK_START ALU MOV S18.x@group{s} : S17.x{s} {W} ALU MOV S18.y@group{s} : S17.y{s} {W} ALU MOV S18.z@group{s} : S17.z{s} {W} - ALU MOV S18.w@group{s} : S17.w{s} {WL} + ALU MOV S18.w@group{s} : S17.w{s} {W} EXPORT_DONE PARAM 0 S18.xyzw BLOCK_END)"; const char *expect = -R"(VS + R"(VS CHIPCLASS CAYMAN INPUT LOC:0 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15 SHADER BLOCK_START - ALU ADD S19.y@free{s} : L[0x40c00000] |KC0[0].x| {WL} - ALU ADD S9.z@free{s} : L[0xc1140000] KC0[0].y {WL} - ALU ADD S10.w@free{s} : L[0xbfe00000] |KC0[0].z| {WL} - ALU ADD S11.x@free{s} : L[0x3fa00000] -|KC0[0].x| {WL} + ALU ADD S19.y@free{s} : L[0x40c00000] |KC0[0].x| {W} + ALU ADD S9.z@free{s} : L[0xc1140000] KC0[0].y {W} + ALU ADD S10.w@free{s} : L[0xbfe00000] |KC0[0].z| {W} + ALU ADD S11.x@free{s} : L[0x3fa00000] -|KC0[0].x| {W} ALU EXP_IEEE S18.x@group{s} : S19.y@free{s} + S19.y@free{s} + S19.y@free{s} {W} ALU EXP_IEEE S18.y@group{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W} ALU EXP_IEEE S18.z@group{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W} - ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL} + ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {W} EXPORT_DONE PARAM 0 S18.xyzw BLOCK_END )"; diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_shaderfromstring_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_shaderfromstring_test.cpp index b7ba53ba165..77881abde7d 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_shaderfromstring_test.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_shaderfromstring_test.cpp @@ -57,13 +57,17 @@ EXPORT_DONE PIXEL 0 R2000.xyzw expect.push_back(new AluInstr(op1_mov, new Register(2000, 0, pin_group), new LiteralConstant(0x38000000), - {alu_write})); + AluInstr::write)); - expect.push_back(new AluInstr( - op1_mov, new Register(2000, 1, pin_group), new LiteralConstant(0x0), {alu_write})); + expect.push_back(new AluInstr(op1_mov, + new Register(2000, 1, pin_group), + new LiteralConstant(0x0), + AluInstr::write)); - expect.push_back(new AluInstr( - op1_mov, new Register(2000, 2, pin_group), new LiteralConstant(0x0), {alu_write})); + expect.push_back(new AluInstr(op1_mov, + new Register(2000, 2, pin_group), + new LiteralConstant(0x0), + AluInstr::write)); expect.push_back(new AluInstr(op1_mov, new Register(2000, 3, pin_group), diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp index 87b8367a7f1..237ae775158 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp @@ -125,17 +125,50 @@ PROP WRITE_ALL_COLORS:1 PROP COLOR_EXPORT_MASK:15 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 SHADER -ALU MOV S0.x@free : L[0xbf000000] {WL} -ALU MOV S1.x@free : I[0] {WL} +ALU MOV S0.x@free : L[0xbf000000] {W} +ALU MOV S1.x@free : I[0] {W} +ALU MOV S2.x : KC0[0].x {W} +ALU MOV S2.y : KC0[0].y {W} +ALU MOV S2.z : KC0[0].z {W} +ALU MOV S2.w : KC0[0].w {W} +ALU ADD S3.x@free : S0.x@free S2.x {W} +ALU MOV S4.x@group : S3.x@free {W} +ALU MOV S4.y@group : S2.y {W} +ALU MOV S4.z@group : S2.z {W} +ALU MOV S4.w@group : S2.w {W} +EXPORT_DONE PIXEL 0 S4.xyzw +)"; + +const char *add_add_1_expect_from_nir_scheduled = + R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP WRITE_ALL_COLORS:1 +PROP COLOR_EXPORT_MASK:15 +OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 +SHADER +ALU_GROUP_BEGIN +ALU MOV S0.x@chan : L[0xbf000000] {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU MOV S1.x@chan : I[0] {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN ALU MOV S2.x : KC0[0].x {W} ALU MOV S2.y : KC0[0].y {W} ALU MOV S2.z : KC0[0].z {W} ALU MOV S2.w : KC0[0].w {WL} -ALU ADD S3.x@free : S0.x@free S2.x {WL} -ALU MOV S4.x@group : S3.x@free {W} -ALU MOV S4.y@group : S2.y {W} -ALU MOV S4.z@group : S2.z {W} -ALU MOV S4.w@group : S2.w {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU ADD S3.x@chan : S0.x@free S2.x {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU MOV S4.x@chgr : S3.x@free {W} +ALU MOV S4.y@chgr : S2.y {W} +ALU MOV S4.z@chgr : S2.z {W} +ALU MOV S4.w@chgr : S2.w {WL} +ALU_GROUP_END EXPORT_DONE PIXEL 0 S4.xyzw )"; @@ -149,17 +182,17 @@ PROP WRITE_ALL_COLORS:1 PROP COLOR_EXPORT_MASK:15 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 SHADER -ALU MOV S0.x@free : L[0xbf000000] {WL} -ALU MOV S1.x@free : I[0] {WL} +ALU MOV S0.x@free : L[0xbf000000] {W} +ALU MOV S1.x@free : I[0] {W} ALU MOV S2.x : KC0[0].x {W} ALU MOV S2.y : KC0[0].y {W} ALU MOV S2.z : KC0[0].z {W} -ALU MOV S2.w : KC0[0].w {WL} -ALU ADD S3.x@free : L[0xbf000000] KC0[0].x {WL} +ALU MOV S2.w : KC0[0].w {W} +ALU ADD S3.x@free : L[0xbf000000] KC0[0].x {W} ALU MOV S4.x@group : S3.x@free {W} ALU MOV S4.y@group : KC0[0].y {W} ALU MOV S4.z@group : KC0[0].z {W} -ALU MOV S4.w@group : KC0[0].w {WL} +ALU MOV S4.w@group : KC0[0].w {W} EXPORT_DONE PIXEL 0 S4.xyzw )"; @@ -173,11 +206,11 @@ PROP WRITE_ALL_COLORS:1 PROP COLOR_EXPORT_MASK:15 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 SHADER -ALU ADD S3.x@free : L[0xbf000000] KC0[0].x {WL} +ALU ADD S3.x@free : L[0xbf000000] KC0[0].x {W} ALU MOV S4.x@group : S3.x@free {W} ALU MOV S4.y@group : KC0[0].y {W} ALU MOV S4.z@group : KC0[0].z {W} -ALU MOV S4.w@group : KC0[0].w {WL} +ALU MOV S4.w@group : KC0[0].w {W} EXPORT_DONE PIXEL 0 S4.xyzw )"; @@ -195,7 +228,7 @@ SHADER ALU ADD S4.x@group : L[0xbf000000] KC0[0].x {W} ALU MOV S4.y@group : KC0[0].y {W} ALU MOV S4.z@group : KC0[0].z {W} -ALU MOV S4.w@group : KC0[0].w {WL} +ALU MOV S4.w@group : KC0[0].w {W} EXPORT_DONE PIXEL 0 S4.xyzw )"; @@ -1056,63 +1089,63 @@ OUTPUT LOC:1 VARYING_SLOT:1 MASK:15 SYSVALUES R1.xyzw REGISTERS R2.x R3.x R4.x R5.x R6.x R7.x R8.x SHADER -ALU MOV S9.x@free : I[0] {WL} -ALU MOV S10.x@free : I[-1] {WL} -ALU MOV S11.x@free : I[0] {WL} -ALU MOV S12.x@free : I[1] {WL} +ALU MOV S9.x@free : I[0] {W} +ALU MOV S10.x@free : I[-1] {W} +ALU MOV S11.x@free : I[0] {W} +ALU MOV S12.x@free : I[1] {W} ALU MOV S13.x : I[1.0] {W} ALU MOV S13.y : I[1.0] {W} ALU MOV S13.z : I[0] {W} -ALU MOV S13.w : I[1.0] {WL} -ALU MOV S14.x@free : L[0x2] {WL} -ALU MOV S15.x@free : KC0[0].x {WL} -ALU SETE_INT S16.x@free : S15.x@free S12.x@free {WL} +ALU MOV S13.w : I[1.0] {W} +ALU MOV S14.x@free : L[0x2] {W} +ALU MOV S15.x@free : KC0[0].x {W} +ALU SETE_INT S16.x@free : S15.x@free S12.x@free {W} IF (( ALU PRED_SETNE_INT __.x@free : S16.x@free I[0] {LEP} PUSH_BEFORE )) - ALU MOV S18.x@free : KC0[2].x {WL} - ALU SETNE_INT S19.x@free : S18.x@free S12.x {WL} + ALU MOV S18.x@free : KC0[2].x {W} + ALU SETNE_INT S19.x@free : S18.x@free S12.x {W} IF (( ALU PRED_SETNE_INT __.y@free : S19.x@free I[0] {LEP} PUSH_BEFORE )) - ALU MOV R3.x : S12.x@free {WL} - ALU MOV R2.x : S9.x@free {WL} + ALU MOV R3.x : S12.x@free {W} + ALU MOV R2.x : S9.x@free {W} LOOP_BEGIN - ALU INT_TO_FLT R4.x : R2.x {WL} - ALU MOV S21.x@free : KC0[1].x {WL} - ALU SETNE_INT S22.x@free : S21.x@free S14.x@free {WL} + ALU INT_TO_FLT R4.x : R2.x {W} + ALU MOV S21.x@free : KC0[1].x {W} + ALU SETNE_INT S22.x@free : S21.x@free S14.x@free {W} IF (( ALU PRED_SETNE_INT __.z@free : S22.x@free I[0] {LEP} PUSH_BEFORE )) BREAK ENDIF - ALU ADD_INT R5.x@free : R3.x S12.x@free {WL} - ALU MOV R2.x : R3.x {WL} - ALU MOV R3.x : R5.x {WL} + ALU ADD_INT R5.x@free : R3.x S12.x@free {W} + ALU MOV R2.x : R3.x {W} + ALU MOV R3.x : R5.x {W} LOOP_END - ALU MOV S24.x@free : I[1.0] {WL} - ALU MOV R8.x : S24.x@free {WL} - ALU MOV R7.x : R8.x {WL} - ALU MOV R6.x : S10.x@free {WL} + ALU MOV S24.x@free : I[1.0] {W} + ALU MOV R8.x : S24.x@free {W} + ALU MOV R7.x : R8.x {W} + ALU MOV R6.x : S10.x@free {W} ELSE - ALU MOV S25.x@free : I[1.0] {WL} - ALU MOV R8.x : S25.x@free {WL} - ALU MOV R7.x : S9.x {WL} - ALU MOV R4.x : R8.x {WL} - ALU MOV R6.x : S11.x@free {WL} + ALU MOV S25.x@free : I[1.0] {W} + ALU MOV R8.x : S25.x@free {W} + ALU MOV R7.x : S9.x {W} + ALU MOV R4.x : R8.x {W} + ALU MOV R6.x : S11.x@free {W} ENDIF ELSE - ALU MOV S26.x@free : I[1.0] {WL} - ALU MOV R8.x : S26.x@free {WL} - ALU MOV R7.x : S9.x {WL} - ALU MOV R4.x : R8.x {WL} - ALU MOV R6.x : S10.x@free {WL} + ALU MOV S26.x@free : I[1.0] {W} + ALU MOV R8.x : S26.x@free {W} + ALU MOV R7.x : S9.x {W} + ALU MOV R4.x : R8.x {W} + ALU MOV R6.x : S10.x@free {W} ENDIF -ALU CNDE_INT S27.x@free : R6.x S13.x R4.x {WL} -ALU CNDE_INT S28.x@free : R6.x S13.y R7.x {WL} -ALU CNDE_INT S29.x@free : R6.x S13.w R8.x {WL} +ALU CNDE_INT S27.x@free : R6.x S13.x R4.x {W} +ALU CNDE_INT S28.x@free : R6.x S13.y R7.x {W} +ALU CNDE_INT S29.x@free : R6.x S13.w R8.x {W} EXPORT_DONE POS 0 R1.xyzw -ALU MOV CLAMP S31.x@free : S27.x@free {WL} -ALU MOV CLAMP S32.x@free : S28.x@free {WL} -ALU MOV CLAMP S33.x@free : S29.x@free {WL} +ALU MOV CLAMP S31.x@free : S27.x@free {W} +ALU MOV CLAMP S32.x@free : S28.x@free {W} +ALU MOV CLAMP S33.x@free : S29.x@free {W} ALU MOV S34.x@group : S31.x@free {W} ALU MOV S34.y@group : S32.x@free {W} ALU MOV S34.z@group : S9.x@free {W} -ALU MOV S34.w@group : S33.x@free {WL} +ALU MOV S34.w@group : S33.x@free {W} EXPORT_DONE PARAM 0 S34.xyzw )"; @@ -1128,39 +1161,39 @@ REGISTERS R2.x@free R3.x@free R4.x@free R5.x@free R6.x@free R7.x@free R8.x@free SHADER IF (( ALU PREDE_INT __.x@free : KC0[0].x I[1] {LEP} PUSH_BEFORE )) IF (( ALU PRED_SETNE_INT __.y@free : KC0[2].x I[1] {LEP} PUSH_BEFORE )) - ALU MOV R3.x : I[1] {WL} - ALU MOV R2.x : I[0] {WL} + ALU MOV R3.x : I[1] {W} + ALU MOV R2.x : I[0] {W} LOOP_BEGIN - ALU INT_TO_FLT R4.x : R2.x {WL} + ALU INT_TO_FLT R4.x : R2.x {W} IF (( ALU PRED_SETNE_INT __.z@free : KC0[1].x L[0x2] {LEP} PUSH_BEFORE )) BREAK ENDIF - ALU ADD_INT R5.x : R3.x I[1] {WL} - ALU MOV R2.x : R3.x {WL} - ALU MOV R3.x : R5.x {WL} + ALU ADD_INT R5.x : R3.x I[1] {W} + ALU MOV R2.x : R3.x {W} + ALU MOV R3.x : R5.x {W} LOOP_END - ALU MOV R8.x : I[1.0] {WL} - ALU MOV R7.x : I[1.0] {WL} - ALU MOV R6.x : I[-1] {WL} + ALU MOV R8.x : I[1.0] {W} + ALU MOV R7.x : I[1.0] {W} + ALU MOV R6.x : I[-1] {W} ELSE - ALU MOV R8.x : I[1.0] {WL} - ALU MOV R7.x : I[0] {WL} - ALU MOV R4.x : I[1.0] {WL} - ALU MOV R6.x : I[0] {WL} + ALU MOV R8.x : I[1.0] {W} + ALU MOV R7.x : I[0] {W} + ALU MOV R4.x : I[1.0] {W} + ALU MOV R6.x : I[0] {W} ENDIF ELSE - ALU MOV R8.x : I[1.0] {WL} - ALU MOV R7.x : I[0] {WL} - ALU MOV R4.x : I[1.0] {WL} - ALU MOV R6.x : I[-1] {WL} + ALU MOV R8.x : I[1.0] {W} + ALU MOV R7.x : I[0] {W} + ALU MOV R4.x : I[1.0] {W} + ALU MOV R6.x : I[-1] {W} ENDIF -ALU CNDE_INT S27.x@free : R6.x I[1.0] R4.x {WL} -ALU CNDE_INT S28.x@free : R6.x I[1.0] R7.x {WL} -ALU CNDE_INT S29.x@free : R6.x I[1.0] R8.x {WL} +ALU CNDE_INT S27.x@free : R6.x I[1.0] R4.x {W} +ALU CNDE_INT S28.x@free : R6.x I[1.0] R7.x {W} +ALU CNDE_INT S29.x@free : R6.x I[1.0] R8.x {W} EXPORT_DONE POS 0 R1.xyzw ALU MOV CLAMP S34.x@group : S27.x@free {W} ALU MOV CLAMP S34.y@group : S28.x@free {W} -ALU MOV CLAMP S34.w@group : S29.x@free {WL} +ALU MOV CLAMP S34.w@group : S29.x@free {W} EXPORT_DONE PARAM 0 S34.xy0w )"; diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h index 5fdeef65964..1bccae33c88 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h +++ b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h @@ -12,6 +12,7 @@ extern const char *red_triangle_fs_expect_from_nir_dce; extern const char *add_add_1_nir; extern const char *add_add_1_expect_from_nir; +extern const char *add_add_1_expect_from_nir_scheduled; extern const char *add_add_1_expect_from_nir_copy_prop_fwd; extern const char *add_add_1_expect_from_nir_copy_prop_fwd_dce; extern const char *add_add_1_expect_from_nir_copy_prop_fwd_dce_bwd;