From 9e13702af78f1e077d66c4e957355d4185dc3373 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Tue, 11 Aug 2020 18:52:24 +0200 Subject: [PATCH] ac/llvm: add option to clamp division by zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace div(x) by min(div(x), FLT_MAX)) to avoid getting a NaN result when x is 0. A cheaper alternative would be to use legacy mult instructions but they're not exposed by LLVM. Cc: mesa-stable Reviewed-by: Marek Olšák Part-of: (cherry picked from commit 32f46a55c8229b2a8d67d895be18651a81f8e6ff) --- .pick_status.json | 2 +- src/amd/llvm/ac_nir_to_llvm.c | 8 +++++++- src/amd/llvm/ac_shader_abi.h | 3 +++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 87ffd9468ce..f611003f385 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -3262,7 +3262,7 @@ "description": "ac/llvm: add option to clamp division by zero", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": null }, diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 1b0bdffdb45..3dc57873e94 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -713,6 +713,9 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp", ac_to_float_type(&ctx->ac, def_type), src[0]); } + if (ctx->abi->clamp_div_by_zero) + result = ac_build_fmin(&ctx->ac, result, + LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX)); break; case nir_op_iand: result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], ""); @@ -859,6 +862,9 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) case nir_op_frsq: result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq", ac_to_float_type(&ctx->ac, def_type), src[0]); + if (ctx->abi->clamp_div_by_zero) + result = ac_build_fmin(&ctx->ac, result, + LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX)); break; case nir_op_frexp_exp: src[0] = ac_to_float(&ctx->ac, src[0]); @@ -900,7 +906,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) case nir_op_ffma: /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */ result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd", - ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]); + ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]); break; case nir_op_ldexp: src[0] = ac_to_float(&ctx->ac, src[0]); diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h index aa31ff9c52d..80b1554ea3e 100644 --- a/src/amd/llvm/ac_shader_abi.h +++ b/src/amd/llvm/ac_shader_abi.h @@ -192,6 +192,9 @@ struct ac_shader_abi { /* Whether undef values must be converted to zero */ bool convert_undef_to_zero; + + /* Clamp div by 0 (so it won't produce NaN) */ + bool clamp_div_by_zero; }; #endif /* AC_SHADER_ABI_H */