From 9e13702af78f1e077d66c4e957355d4185dc3373 Mon Sep 17 00:00:00 2001
From: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Date: Tue, 11 Aug 2020 18:52:24 +0200
Subject: [PATCH] ac/llvm: add option to clamp division by zero
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace div(x) by min(div(x), FLT_MAX)) to avoid getting a NaN result
when x is 0.

A cheaper alternative would be to use legacy mult instructions but they're
not exposed by LLVM.

Cc: mesa-stable
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6259>
(cherry picked from commit 32f46a55c8229b2a8d67d895be18651a81f8e6ff)
---
 .pick_status.json             | 2 +-
 src/amd/llvm/ac_nir_to_llvm.c | 8 +++++++-
 src/amd/llvm/ac_shader_abi.h  | 3 +++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 87ffd9468ce..f611003f385 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -3262,7 +3262,7 @@
         "description": "ac/llvm: add option to clamp division by zero",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "master_sha": null,
         "because_sha": null
     },
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index 1b0bdffdb45..3dc57873e94 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -713,6 +713,9 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 			result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp",
 						      ac_to_float_type(&ctx->ac, def_type), src[0]);
 		}
+		if (ctx->abi->clamp_div_by_zero)
+			result = ac_build_fmin(&ctx->ac, result,
+					       LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
 		break;
 	case nir_op_iand:
 		result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
@@ -859,6 +862,9 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 	case nir_op_frsq:
 		result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq",
 					      ac_to_float_type(&ctx->ac, def_type), src[0]);
+		if (ctx->abi->clamp_div_by_zero)
+			result = ac_build_fmin(&ctx->ac, result,
+					       LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
 		break;
 	case nir_op_frexp_exp:
 		src[0] = ac_to_float(&ctx->ac, src[0]);
@@ -900,7 +906,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 	case nir_op_ffma:
 		/* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
 		result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd",
-		                              ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
+					      ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
 		break;
 	case nir_op_ldexp:
 		src[0] = ac_to_float(&ctx->ac, src[0]);
diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h
index aa31ff9c52d..80b1554ea3e 100644
--- a/src/amd/llvm/ac_shader_abi.h
+++ b/src/amd/llvm/ac_shader_abi.h
@@ -192,6 +192,9 @@ struct ac_shader_abi {
 
 	/* Whether undef values must be converted to zero */
 	bool convert_undef_to_zero;
+
+	/* Clamp div by 0 (so it won't produce NaN) */
+	bool clamp_div_by_zero;
 };
 
 #endif /* AC_SHADER_ABI_H */