diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 1d543073f1b..2a2dc0687df 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2127,17 +2127,15 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) case nir_op_fsign: { Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0])); if (dst.regClass() == v2b) { - Temp one = bld.copy(bld.def(v1), Operand(0x3c00u)); - Temp minus_one = bld.copy(bld.def(v1), Operand(0xbc00u)); - Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f16, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src); - src = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), one, src, cond); - cond = bld.vopc(aco_opcode::v_cmp_le_f16, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src); - bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), minus_one, src, cond); + assert(ctx->program->chip_class >= GFX9); + /* replace negative zero with positive zero */ + src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand(0u), src); + src = bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), Operand((uint16_t)-1), src, Operand((uint16_t)1u)); + bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src); } else if (dst.regClass() == v1) { - Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f32, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src); - src = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0x3f800000u), src, cond); - cond = bld.vopc(aco_opcode::v_cmp_le_f32, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src); - bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), Operand(0xbf800000u), src, cond); + src = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand(0u), src); + src = bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), Operand((uint32_t)-1), src, Operand(1u)); + bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src); } else if (dst.regClass() == v2) { Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src); Temp tmp = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand(0x3FF00000u));