diff --git a/.pick_status.json b/.pick_status.json index 87ffd9468ce..f611003f385 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -3262,7 +3262,7 @@ "description": "ac/llvm: add option to clamp division by zero", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": null }, diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 1b0bdffdb45..3dc57873e94 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -713,6 +713,9 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp", ac_to_float_type(&ctx->ac, def_type), src[0]); } + if (ctx->abi->clamp_div_by_zero) + result = ac_build_fmin(&ctx->ac, result, + LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX)); break; case nir_op_iand: result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], ""); @@ -859,6 +862,9 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) case nir_op_frsq: result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq", ac_to_float_type(&ctx->ac, def_type), src[0]); + if (ctx->abi->clamp_div_by_zero) + result = ac_build_fmin(&ctx->ac, result, + LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX)); break; case nir_op_frexp_exp: src[0] = ac_to_float(&ctx->ac, src[0]); @@ -900,7 +906,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) case nir_op_ffma: /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */ result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd", - ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]); + ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]); break; case nir_op_ldexp: src[0] = ac_to_float(&ctx->ac, src[0]); diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h index aa31ff9c52d..80b1554ea3e 100644 --- a/src/amd/llvm/ac_shader_abi.h +++ b/src/amd/llvm/ac_shader_abi.h @@ -192,6 +192,9 @@ struct ac_shader_abi { /* Whether undef values must be converted to zero */ bool convert_undef_to_zero; + + /* Clamp div by 0 (so it won't produce NaN) */ + bool clamp_div_by_zero; }; #endif /* AC_SHADER_ABI_H */