From 206ec1ff779e139e3cc7604c6cac6d73bf6d3472 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Thu, 7 Aug 2025 22:59:04 +0200 Subject: [PATCH] r600/sfn: lower u2f64 and i2f64 in nir Signed-off-by: Gert Wollny Part-of: --- .../drivers/r600/sfn/sfn_instr_alu.cpp | 69 ------------------- .../drivers/r600/sfn/sfn_nir_lower_64bit.cpp | 44 ++++++++---- 2 files changed, 31 insertions(+), 82 deletions(-) diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp index d6d54c2f9e1..957561f5d2f 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp @@ -1358,8 +1358,6 @@ emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader); static bool emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader); static bool -emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader); -static bool emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader); static bool emit_alu_abs64(const nir_alu_instr& alu, Shader& shader); @@ -1491,10 +1489,6 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader) return emit_alu_op2_64bit(*alu, op2_min_64, shader, false); case nir_op_f2f64: return emit_alu_f2f64(*alu, shader); - case nir_op_i2f64: - return emit_alu_i2f64(*alu, op1_int_to_flt, shader); - case nir_op_u2f64: - return emit_alu_i2f64(*alu, op1_uint_to_flt, shader); case nir_op_f2f32: return emit_alu_f2f32(*alu, shader); case nir_op_fabs: @@ -2145,69 +2139,6 @@ emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) return true; } -static bool -emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader) -{ - /* int 64 to f64 should have been lowered, so we only handle i32 to f64 */ - auto& value_factory = shader.value_factory(); - auto group = new AluGroup(); - AluInstr *ir = nullptr; - - assert(alu.def.num_components == 1); - - auto tmpx = value_factory.temp_register(); - shader.emit_instruction(new AluInstr(op2_and_int, - tmpx, - value_factory.src(alu.src[0], 0), - value_factory.literal(0xffffff00), - AluInstr::write)); - auto tmpy = value_factory.temp_register(); - shader.emit_instruction(new AluInstr(op2_and_int, - tmpy, - value_factory.src(alu.src[0], 0), - value_factory.literal(0xff), - AluInstr::last_write)); - - auto tmpx2 = value_factory.temp_register(); - auto tmpy2 = value_factory.temp_register(); - shader.emit_instruction(new AluInstr(op, tmpx2, tmpx, AluInstr::last_write)); - shader.emit_instruction(new AluInstr(op, tmpy2, tmpy, AluInstr::last_write)); - - auto tmpx3 = value_factory.temp_register(0); - auto tmpy3 = value_factory.temp_register(1); - auto tmpz3 = value_factory.temp_register(2); - auto tmpw3 = value_factory.temp_register(3); - - ir = new AluInstr(op1_flt32_to_flt64, tmpx3, tmpx2, AluInstr::write); - group->add_instruction(ir); - ir = new AluInstr(op1_flt32_to_flt64, tmpy3, value_factory.zero(), AluInstr::write); - group->add_instruction(ir); - ir = new AluInstr(op1_flt32_to_flt64, tmpz3, tmpy2, AluInstr::write); - group->add_instruction(ir); - ir = - new AluInstr(op1_flt32_to_flt64, tmpw3, value_factory.zero(), AluInstr::last_write); - group->add_instruction(ir); - shader.emit_instruction(group); - - group = new AluGroup(); - - ir = new AluInstr(op2_add_64, - value_factory.dest(alu.def, 0, pin_chan), - tmpy3, - tmpw3, - AluInstr::write); - group->add_instruction(ir); - ir = new AluInstr(op2_add_64, - value_factory.dest(alu.def, 1, pin_chan), - tmpx3, - tmpz3, - AluInstr::write); - group->add_instruction(ir); - shader.emit_instruction(group); - - return true; -} - static bool emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader) { diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp index 10032e59991..5189b30bdbf 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp @@ -136,15 +136,17 @@ class LowerSplit64op : public NirLowerInstruction { auto alu = nir_instr_as_alu(instr); switch (alu->op) { case nir_op_bcsel: - case nir_op_b2f64: return alu->def.bit_size == 64; case nir_op_f2i32: case nir_op_f2u32: case nir_op_f2i64: case nir_op_f2u64: - case nir_op_u2f64: - case nir_op_i2f64: return nir_src_bit_size(alu->src[0].src) == 64; + case nir_op_i2f64: + case nir_op_u2f64: + return nir_src_bit_size(alu->src[0].src) >= 32; + case nir_op_b2f64: + return true; default: return false; } @@ -206,19 +208,19 @@ class LowerSplit64op : public NirLowerInstruction { } case nir_op_u2f64: { auto src = nir_ssa_for_alu_src(b, alu, 0); - auto low = nir_unpack_64_2x32_split_x(b, src); - auto high = nir_unpack_64_2x32_split_y(b, src); - auto flow = nir_u2f64(b, low); - auto fhigh = nir_u2f64(b, high); - return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow); + if (src->bit_size == 64) { + return lower_i64_to_f64(src, nir_op_u2f64); + } else { + return lower_i32_to_f64(src, nir_op_u2f32); + } } case nir_op_i2f64: { auto src = nir_ssa_for_alu_src(b, alu, 0); - auto low = nir_unpack_64_2x32_split_x(b, src); - auto high = nir_unpack_64_2x32_split_y(b, src); - auto flow = nir_u2f64(b, low); - auto fhigh = nir_i2f64(b, high); - return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow); + if (src->bit_size == 64) { + return lower_i64_to_f64(src, nir_op_i2f64); + } else { + return lower_i32_to_f64(src, nir_op_i2f32); + } } case nir_op_b2f64: { auto src = nir_b2b32(b, nir_ssa_for_alu_src(b, alu, 0)); @@ -251,6 +253,22 @@ class LowerSplit64op : public NirLowerInstruction { UNREACHABLE("Trying to lower instruction that was not in filter"); } } + + nir_def *lower_i64_to_f64(nir_def *src, nir_op op) + { + auto flow = nir_i2f64(b, nir_unpack_64_2x32_split_x(b, src)); + auto fhigh = nir_build_alu1(b, op, nir_unpack_64_2x32_split_y(b, src)); + return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow); + } + + nir_def *lower_i32_to_f64(nir_def *src, nir_op op) + { + auto tmplo = nir_u2f32(b, nir_iand(b, src, nir_imm_int(b, 0x000000ff))); + auto tmphi = + nir_build_alu1(b, op, nir_iand(b, src, nir_imm_int(b, 0xffffff00))); + auto f64 = nir_f2f64(b, nir_vec2(b, tmplo, tmphi)); + return nir_fadd(b, nir_channel(b, f64, 0), nir_channel(b, f64, 1)); + } }; bool