diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 7684b964c60..5f127b61d35 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2444,9 +2444,25 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) case nir_op_i2f32: { assert(dst.size() == 1); Temp src = get_alu_src(ctx, instr->src[0]); - if (instr->src[0].src.ssa->bit_size <= 16) - src = convert_int(ctx, bld, src, instr->src[0].src.ssa->bit_size, 32, true); - bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src); + const unsigned input_size = instr->src[0].src.ssa->bit_size; + if (input_size <= 32) { + if (input_size <= 16) { + /* Sign-extend to 32-bits */ + src = convert_int(ctx, bld, src, input_size, 32, true); + } + bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src); + } else { + assert(input_size == 64); + RegClass rc = RegClass(src.type(), 1); + Temp lower = bld.tmp(rc), upper = bld.tmp(rc); + bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src); + lower = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), lower); + upper = bld.vop1(aco_opcode::v_cvt_f64_i32, bld.def(v2), upper); + upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand(32u)); + upper = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), lower, upper); + bld.vop1(aco_opcode::v_cvt_f32_f64, Definition(dst), upper); + } + break; } case nir_op_i2f64: { @@ -2496,12 +2512,23 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) case nir_op_u2f32: { assert(dst.size() == 1); Temp src = get_alu_src(ctx, instr->src[0]); - if (instr->src[0].src.ssa->bit_size == 8) { + const unsigned input_size = instr->src[0].src.ssa->bit_size; + if (input_size == 8) { bld.vop1(aco_opcode::v_cvt_f32_ubyte0, Definition(dst), src); - } else { - if (instr->src[0].src.ssa->bit_size == 16) + } else if (input_size <= 32) { + if (input_size == 16) src = convert_int(ctx, bld, src, instr->src[0].src.ssa->bit_size, 32, true); bld.vop1(aco_opcode::v_cvt_f32_u32, Definition(dst), src); + } else { + assert(input_size == 64); + RegClass rc = RegClass(src.type(), 1); + Temp lower = bld.tmp(rc), upper = bld.tmp(rc); + bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src); + lower = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), lower); + upper = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), upper); + upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand(32u)); + upper = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), lower, upper); + bld.vop1(aco_opcode::v_cvt_f32_f64, Definition(dst), upper); } break; }