mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 15:48:36 +02:00
r600/sfn: lower u2f64 and i2f64 in nir
Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36587>
This commit is contained in:
parent
f7552429ef
commit
206ec1ff77
2 changed files with 31 additions and 82 deletions
|
|
@ -1358,8 +1358,6 @@ emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
|
|||
static bool
|
||||
emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader);
|
||||
static bool
|
||||
emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader);
|
||||
static bool
|
||||
emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader);
|
||||
static bool
|
||||
emit_alu_abs64(const nir_alu_instr& alu, Shader& shader);
|
||||
|
|
@ -1491,10 +1489,6 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
|
|||
return emit_alu_op2_64bit(*alu, op2_min_64, shader, false);
|
||||
case nir_op_f2f64:
|
||||
return emit_alu_f2f64(*alu, shader);
|
||||
case nir_op_i2f64:
|
||||
return emit_alu_i2f64(*alu, op1_int_to_flt, shader);
|
||||
case nir_op_u2f64:
|
||||
return emit_alu_i2f64(*alu, op1_uint_to_flt, shader);
|
||||
case nir_op_f2f32:
|
||||
return emit_alu_f2f32(*alu, shader);
|
||||
case nir_op_fabs:
|
||||
|
|
@ -2145,69 +2139,6 @@ emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader)
|
||||
{
|
||||
/* int 64 to f64 should have been lowered, so we only handle i32 to f64 */
|
||||
auto& value_factory = shader.value_factory();
|
||||
auto group = new AluGroup();
|
||||
AluInstr *ir = nullptr;
|
||||
|
||||
assert(alu.def.num_components == 1);
|
||||
|
||||
auto tmpx = value_factory.temp_register();
|
||||
shader.emit_instruction(new AluInstr(op2_and_int,
|
||||
tmpx,
|
||||
value_factory.src(alu.src[0], 0),
|
||||
value_factory.literal(0xffffff00),
|
||||
AluInstr::write));
|
||||
auto tmpy = value_factory.temp_register();
|
||||
shader.emit_instruction(new AluInstr(op2_and_int,
|
||||
tmpy,
|
||||
value_factory.src(alu.src[0], 0),
|
||||
value_factory.literal(0xff),
|
||||
AluInstr::last_write));
|
||||
|
||||
auto tmpx2 = value_factory.temp_register();
|
||||
auto tmpy2 = value_factory.temp_register();
|
||||
shader.emit_instruction(new AluInstr(op, tmpx2, tmpx, AluInstr::last_write));
|
||||
shader.emit_instruction(new AluInstr(op, tmpy2, tmpy, AluInstr::last_write));
|
||||
|
||||
auto tmpx3 = value_factory.temp_register(0);
|
||||
auto tmpy3 = value_factory.temp_register(1);
|
||||
auto tmpz3 = value_factory.temp_register(2);
|
||||
auto tmpw3 = value_factory.temp_register(3);
|
||||
|
||||
ir = new AluInstr(op1_flt32_to_flt64, tmpx3, tmpx2, AluInstr::write);
|
||||
group->add_instruction(ir);
|
||||
ir = new AluInstr(op1_flt32_to_flt64, tmpy3, value_factory.zero(), AluInstr::write);
|
||||
group->add_instruction(ir);
|
||||
ir = new AluInstr(op1_flt32_to_flt64, tmpz3, tmpy2, AluInstr::write);
|
||||
group->add_instruction(ir);
|
||||
ir =
|
||||
new AluInstr(op1_flt32_to_flt64, tmpw3, value_factory.zero(), AluInstr::last_write);
|
||||
group->add_instruction(ir);
|
||||
shader.emit_instruction(group);
|
||||
|
||||
group = new AluGroup();
|
||||
|
||||
ir = new AluInstr(op2_add_64,
|
||||
value_factory.dest(alu.def, 0, pin_chan),
|
||||
tmpy3,
|
||||
tmpw3,
|
||||
AluInstr::write);
|
||||
group->add_instruction(ir);
|
||||
ir = new AluInstr(op2_add_64,
|
||||
value_factory.dest(alu.def, 1, pin_chan),
|
||||
tmpx3,
|
||||
tmpz3,
|
||||
AluInstr::write);
|
||||
group->add_instruction(ir);
|
||||
shader.emit_instruction(group);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -136,15 +136,17 @@ class LowerSplit64op : public NirLowerInstruction {
|
|||
auto alu = nir_instr_as_alu(instr);
|
||||
switch (alu->op) {
|
||||
case nir_op_bcsel:
|
||||
case nir_op_b2f64:
|
||||
return alu->def.bit_size == 64;
|
||||
case nir_op_f2i32:
|
||||
case nir_op_f2u32:
|
||||
case nir_op_f2i64:
|
||||
case nir_op_f2u64:
|
||||
case nir_op_u2f64:
|
||||
case nir_op_i2f64:
|
||||
return nir_src_bit_size(alu->src[0].src) == 64;
|
||||
case nir_op_i2f64:
|
||||
case nir_op_u2f64:
|
||||
return nir_src_bit_size(alu->src[0].src) >= 32;
|
||||
case nir_op_b2f64:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
@ -206,19 +208,19 @@ class LowerSplit64op : public NirLowerInstruction {
|
|||
}
|
||||
case nir_op_u2f64: {
|
||||
auto src = nir_ssa_for_alu_src(b, alu, 0);
|
||||
auto low = nir_unpack_64_2x32_split_x(b, src);
|
||||
auto high = nir_unpack_64_2x32_split_y(b, src);
|
||||
auto flow = nir_u2f64(b, low);
|
||||
auto fhigh = nir_u2f64(b, high);
|
||||
return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow);
|
||||
if (src->bit_size == 64) {
|
||||
return lower_i64_to_f64(src, nir_op_u2f64);
|
||||
} else {
|
||||
return lower_i32_to_f64(src, nir_op_u2f32);
|
||||
}
|
||||
}
|
||||
case nir_op_i2f64: {
|
||||
auto src = nir_ssa_for_alu_src(b, alu, 0);
|
||||
auto low = nir_unpack_64_2x32_split_x(b, src);
|
||||
auto high = nir_unpack_64_2x32_split_y(b, src);
|
||||
auto flow = nir_u2f64(b, low);
|
||||
auto fhigh = nir_i2f64(b, high);
|
||||
return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow);
|
||||
if (src->bit_size == 64) {
|
||||
return lower_i64_to_f64(src, nir_op_i2f64);
|
||||
} else {
|
||||
return lower_i32_to_f64(src, nir_op_i2f32);
|
||||
}
|
||||
}
|
||||
case nir_op_b2f64: {
|
||||
auto src = nir_b2b32(b, nir_ssa_for_alu_src(b, alu, 0));
|
||||
|
|
@ -251,6 +253,22 @@ class LowerSplit64op : public NirLowerInstruction {
|
|||
UNREACHABLE("Trying to lower instruction that was not in filter");
|
||||
}
|
||||
}
|
||||
|
||||
nir_def *lower_i64_to_f64(nir_def *src, nir_op op)
|
||||
{
|
||||
auto flow = nir_i2f64(b, nir_unpack_64_2x32_split_x(b, src));
|
||||
auto fhigh = nir_build_alu1(b, op, nir_unpack_64_2x32_split_y(b, src));
|
||||
return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow);
|
||||
}
|
||||
|
||||
nir_def *lower_i32_to_f64(nir_def *src, nir_op op)
|
||||
{
|
||||
auto tmplo = nir_u2f32(b, nir_iand(b, src, nir_imm_int(b, 0x000000ff)));
|
||||
auto tmphi =
|
||||
nir_build_alu1(b, op, nir_iand(b, src, nir_imm_int(b, 0xffffff00)));
|
||||
auto f64 = nir_f2f64(b, nir_vec2(b, tmplo, tmphi));
|
||||
return nir_fadd(b, nir_channel(b, f64, 0), nir_channel(b, f64, 1));
|
||||
}
|
||||
};
|
||||
|
||||
bool
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue