From 3a2b0557265f1aea58535b1df933f1124cce6544 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Mon, 15 Mar 2021 12:26:51 +0100 Subject: [PATCH] aco/isel: Fix i64/u64->float32 conversion for large inputs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, inputs such as 0x100000000 would have their upper 32-bits ignored despite being representable by 32-bit floats. Reviewed-by: Daniel Schürmann Part-of: --- .../compiler/aco_instruction_selection.cpp | 39 ++++++++++++++++--- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 7684b964c60..5f127b61d35 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2444,9 +2444,25 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) case nir_op_i2f32: { assert(dst.size() == 1); Temp src = get_alu_src(ctx, instr->src[0]); - if (instr->src[0].src.ssa->bit_size <= 16) - src = convert_int(ctx, bld, src, instr->src[0].src.ssa->bit_size, 32, true); - bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src); + const unsigned input_size = instr->src[0].src.ssa->bit_size; + if (input_size <= 32) { + if (input_size <= 16) { + /* Sign-extend to 32-bits */ + src = convert_int(ctx, bld, src, input_size, 32, true); + } + bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src); + } else { + assert(input_size == 64); + RegClass rc = RegClass(src.type(), 1); + Temp lower = bld.tmp(rc), upper = bld.tmp(rc); + bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src); + lower = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), lower); + upper = bld.vop1(aco_opcode::v_cvt_f64_i32, bld.def(v2), upper); + upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand(32u)); + upper = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), lower, upper); + bld.vop1(aco_opcode::v_cvt_f32_f64, Definition(dst), upper); + } + break; } case nir_op_i2f64: { @@ -2496,12 +2512,23 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) case nir_op_u2f32: { assert(dst.size() == 1); Temp src = get_alu_src(ctx, instr->src[0]); - if (instr->src[0].src.ssa->bit_size == 8) { + const unsigned input_size = instr->src[0].src.ssa->bit_size; + if (input_size == 8) { bld.vop1(aco_opcode::v_cvt_f32_ubyte0, Definition(dst), src); - } else { - if (instr->src[0].src.ssa->bit_size == 16) + } else if (input_size <= 32) { + if (input_size == 16) src = convert_int(ctx, bld, src, instr->src[0].src.ssa->bit_size, 32, true); bld.vop1(aco_opcode::v_cvt_f32_u32, Definition(dst), src); + } else { + assert(input_size == 64); + RegClass rc = RegClass(src.type(), 1); + Temp lower = bld.tmp(rc), upper = bld.tmp(rc); + bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src); + lower = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), lower); + upper = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), upper); + upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand(32u)); + upper = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), lower, upper); + bld.vop1(aco_opcode::v_cvt_f32_f64, Definition(dst), upper); } break; }