r600/sfn: lower u2f64 and i2f64 in nir

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36587>
This commit is contained in:
Gert Wollny 2025-08-07 22:59:04 +02:00 committed by Marge Bot
parent f7552429ef
commit 206ec1ff77
2 changed files with 31 additions and 82 deletions

View file

@ -1358,8 +1358,6 @@ emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
static bool
emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader);
static bool
emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader);
static bool
emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader);
static bool
emit_alu_abs64(const nir_alu_instr& alu, Shader& shader);
@ -1491,10 +1489,6 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
return emit_alu_op2_64bit(*alu, op2_min_64, shader, false);
case nir_op_f2f64:
return emit_alu_f2f64(*alu, shader);
case nir_op_i2f64:
return emit_alu_i2f64(*alu, op1_int_to_flt, shader);
case nir_op_u2f64:
return emit_alu_i2f64(*alu, op1_uint_to_flt, shader);
case nir_op_f2f32:
return emit_alu_f2f32(*alu, shader);
case nir_op_fabs:
@ -2145,69 +2139,6 @@ emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
return true;
}
static bool
emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader)
{
/* int 64 to f64 should have been lowered, so we only handle i32 to f64 */
auto& value_factory = shader.value_factory();
auto group = new AluGroup();
AluInstr *ir = nullptr;
assert(alu.def.num_components == 1);
auto tmpx = value_factory.temp_register();
shader.emit_instruction(new AluInstr(op2_and_int,
tmpx,
value_factory.src(alu.src[0], 0),
value_factory.literal(0xffffff00),
AluInstr::write));
auto tmpy = value_factory.temp_register();
shader.emit_instruction(new AluInstr(op2_and_int,
tmpy,
value_factory.src(alu.src[0], 0),
value_factory.literal(0xff),
AluInstr::last_write));
auto tmpx2 = value_factory.temp_register();
auto tmpy2 = value_factory.temp_register();
shader.emit_instruction(new AluInstr(op, tmpx2, tmpx, AluInstr::last_write));
shader.emit_instruction(new AluInstr(op, tmpy2, tmpy, AluInstr::last_write));
auto tmpx3 = value_factory.temp_register(0);
auto tmpy3 = value_factory.temp_register(1);
auto tmpz3 = value_factory.temp_register(2);
auto tmpw3 = value_factory.temp_register(3);
ir = new AluInstr(op1_flt32_to_flt64, tmpx3, tmpx2, AluInstr::write);
group->add_instruction(ir);
ir = new AluInstr(op1_flt32_to_flt64, tmpy3, value_factory.zero(), AluInstr::write);
group->add_instruction(ir);
ir = new AluInstr(op1_flt32_to_flt64, tmpz3, tmpy2, AluInstr::write);
group->add_instruction(ir);
ir =
new AluInstr(op1_flt32_to_flt64, tmpw3, value_factory.zero(), AluInstr::last_write);
group->add_instruction(ir);
shader.emit_instruction(group);
group = new AluGroup();
ir = new AluInstr(op2_add_64,
value_factory.dest(alu.def, 0, pin_chan),
tmpy3,
tmpw3,
AluInstr::write);
group->add_instruction(ir);
ir = new AluInstr(op2_add_64,
value_factory.dest(alu.def, 1, pin_chan),
tmpx3,
tmpz3,
AluInstr::write);
group->add_instruction(ir);
shader.emit_instruction(group);
return true;
}
static bool
emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader)
{

View file

@ -136,15 +136,17 @@ class LowerSplit64op : public NirLowerInstruction {
auto alu = nir_instr_as_alu(instr);
switch (alu->op) {
case nir_op_bcsel:
case nir_op_b2f64:
return alu->def.bit_size == 64;
case nir_op_f2i32:
case nir_op_f2u32:
case nir_op_f2i64:
case nir_op_f2u64:
case nir_op_u2f64:
case nir_op_i2f64:
return nir_src_bit_size(alu->src[0].src) == 64;
case nir_op_i2f64:
case nir_op_u2f64:
return nir_src_bit_size(alu->src[0].src) >= 32;
case nir_op_b2f64:
return true;
default:
return false;
}
@ -206,19 +208,19 @@ class LowerSplit64op : public NirLowerInstruction {
}
case nir_op_u2f64: {
auto src = nir_ssa_for_alu_src(b, alu, 0);
auto low = nir_unpack_64_2x32_split_x(b, src);
auto high = nir_unpack_64_2x32_split_y(b, src);
auto flow = nir_u2f64(b, low);
auto fhigh = nir_u2f64(b, high);
return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow);
if (src->bit_size == 64) {
return lower_i64_to_f64(src, nir_op_u2f64);
} else {
return lower_i32_to_f64(src, nir_op_u2f32);
}
}
case nir_op_i2f64: {
auto src = nir_ssa_for_alu_src(b, alu, 0);
auto low = nir_unpack_64_2x32_split_x(b, src);
auto high = nir_unpack_64_2x32_split_y(b, src);
auto flow = nir_u2f64(b, low);
auto fhigh = nir_i2f64(b, high);
return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow);
if (src->bit_size == 64) {
return lower_i64_to_f64(src, nir_op_i2f64);
} else {
return lower_i32_to_f64(src, nir_op_i2f32);
}
}
case nir_op_b2f64: {
auto src = nir_b2b32(b, nir_ssa_for_alu_src(b, alu, 0));
@ -251,6 +253,22 @@ class LowerSplit64op : public NirLowerInstruction {
UNREACHABLE("Trying to lower instruction that was not in filter");
}
}
nir_def *lower_i64_to_f64(nir_def *src, nir_op op)
{
auto flow = nir_i2f64(b, nir_unpack_64_2x32_split_x(b, src));
auto fhigh = nir_build_alu1(b, op, nir_unpack_64_2x32_split_y(b, src));
return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow);
}
nir_def *lower_i32_to_f64(nir_def *src, nir_op op)
{
auto tmplo = nir_u2f32(b, nir_iand(b, src, nir_imm_int(b, 0x000000ff)));
auto tmphi =
nir_build_alu1(b, op, nir_iand(b, src, nir_imm_int(b, 0xffffff00)));
auto f64 = nir_f2f64(b, nir_vec2(b, tmplo, tmphi));
return nir_fadd(b, nir_channel(b, f64, 0), nir_channel(b, f64, 1));
}
};
bool