From 2d3fbb44f4bdb1cf94ca1c60ad8146d6b8a2b11f Mon Sep 17 00:00:00 2001
From: Jesse Natalie <jenatali@microsoft.com>
Date: Wed, 31 May 2023 13:10:47 -0700
Subject: [PATCH] nir: Add preserve_mediump as a shader compiler option

The DXIL backend would like to distinguish between casts to 16-bit
that must cast, vs those that may. If a shader only ever produces
16-bit types from mediump casts and ALU ops on those values, then
the resulting shader can be annotated with DXIL's min-precision
qualifier, basically telling the driver to use 16-bit precision if
it's faster for them. If it uses concrete 16-bit casts, or loads/
stores to externally-visible memory, then it must use the "native"
16-bit flag, which is not supported on all hardware.

Acked-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23344>
---
 src/compiler/nir/nir.h                |  5 +++++
 src/compiler/nir/nir_opt_algebraic.py | 14 +++++++-------
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 16e5e6b0144..207a0a59ebe 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3932,6 +3932,11 @@ typedef struct nir_shader_compiler_options {
     *  of adding it to the atomic source
     */
    bool lower_atomic_offset_to_range_base;
+
+   /** Don't convert medium-precision casts (e.g. f2fmp) into concrete
+    *  type casts (e.g. f2f16).
+    */
+   bool preserve_mediump;
 } nir_shader_compiler_options;
 
 typedef struct nir_shader {
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 444542076a7..3056ee1f374 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -3124,13 +3124,13 @@ late_optimizations += [
   # Convert *2*mp instructions to concrete *2*16 instructions. At this point
   # any conversions that could have been removed will have been removed in
   # nir_opt_algebraic so any remaining ones are required.
-  (('f2fmp', a), ('f2f16', a)),
-  (('f2imp', a), ('f2i16', a)),
-  (('f2ump', a), ('f2u16', a)),
-  (('i2imp', a), ('i2i16', a)),
-  (('i2fmp', a), ('i2f16', a)),
-  (('i2imp', a), ('u2u16', a)),
-  (('u2fmp', a), ('u2f16', a)),
+  (('f2fmp', a), ('f2f16', a), "!options->preserve_mediump"),
+  (('f2imp', a), ('f2i16', a), "!options->preserve_mediump"),
+  (('f2ump', a), ('f2u16', a), "!options->preserve_mediump"),
+  (('i2imp', a), ('i2i16', a), "!options->preserve_mediump"),
+  (('i2fmp', a), ('i2f16', a), "!options->preserve_mediump"),
+  (('i2imp', a), ('u2u16', a), "!options->preserve_mediump"),
+  (('u2fmp', a), ('u2f16', a), "!options->preserve_mediump"),
   (('fisfinite', a), ('flt', ('fabs', a), float("inf"))),
 
   (('fcsel', ('slt', 0, a), b, c), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"),