From 80b35fbefe5ecda3a6becd6df2a00761b9bfdaaa Mon Sep 17 00:00:00 2001
From: Emma Anholt <emma@anholt.net>
Date: Tue, 9 Aug 2022 13:56:50 -0700
Subject: [PATCH] nir/lower_mediump: Lower FS outputs to 16-bit when the value
 was upconverted.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take this real-world (trimmed) shader:

precision highp float;
in lowp vec4 var_varVertexColor;
layout(location = 0) out vec4 out_FragColor0;
void main() {
    vec4 textureColor0 = vec4(1.000000e+00, 0.000000e+00, 0.000000e+00, 1.000000e+00);
    vec3 color = vec3(1.000000e+00, 1.000000e+00, 1.000000e+00);
    vec4 outColor = vec4(vec3((color).rgb), 1.000000e+00);
    (outColor *= vec4(var_varVertexColor));
    (out_FragColor0 = outColor);
}

After opts, it's just a store from input to output.  If we decide to lower
the input to 16-bit, then as long as the driver can handle 16-bit outputs,
it would be a good idea to demote the output and save the conversions.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18003>
---
 src/compiler/nir/nir_lower_mediump.c | 30 +++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/src/compiler/nir/nir_lower_mediump.c b/src/compiler/nir/nir_lower_mediump.c
index 4b849c09846..bdbb934fcb0 100644
--- a/src/compiler/nir/nir_lower_mediump.c
+++ b/src/compiler/nir/nir_lower_mediump.c
@@ -166,33 +166,57 @@ nir_lower_mediump_io(nir_shader *nir, nir_variable_mode modes,
                            !(nir->info.stage == MESA_SHADER_FRAGMENT &&
                              mode == nir_var_shader_out);
 
-         if (!sem.medium_precision ||
-             (is_varying && sem.location <= VARYING_SLOT_VAR31 &&
-              !(varying_mask & BITFIELD64_BIT(sem.location))))
+         if (is_varying && sem.location <= VARYING_SLOT_VAR31 &&
+            !(varying_mask & BITFIELD64_BIT(sem.location))) {
             continue; /* can't lower */
+         }
 
          if (nir_intrinsic_has_src_type(intr)) {
             /* Stores. */
             nir_alu_type type = nir_intrinsic_src_type(intr);
 
+            nir_op upconvert_op;
             switch (type) {
             case nir_type_float32:
                convert = nir_f2fmp;
+               upconvert_op = nir_op_f2f32;
                break;
             case nir_type_int32:
+               convert = nir_i2imp;
+               upconvert_op = nir_op_i2i32;
+               break;
             case nir_type_uint32:
                convert = nir_i2imp;
+               upconvert_op = nir_op_u2u32;
                break;
             default:
                continue; /* already lowered? */
             }
 
+            /* Check that the output is mediump, or (for fragment shader
+             * outputs) is a conversion from a mediump value, and lower it to
+             * mediump.  Note that we don't automatically apply it to
+             * gl_FragDepth, as GLSL ES declares it highp and so hardware such
+             * as Adreno a6xx doesn't expect a half-float output for it.
+             */
+            nir_ssa_def *val = intr->src[0].ssa;
+            bool is_fragdepth = (nir->info.stage == MESA_SHADER_FRAGMENT &&
+                                 sem.location == FRAG_RESULT_DEPTH);
+            if (!sem.medium_precision &&
+                (is_varying || is_fragdepth || val->parent_instr->type != nir_instr_type_alu ||
+                 nir_instr_as_alu(val->parent_instr)->op != upconvert_op)) {
+               continue;
+            }
+
             /* Convert the 32-bit store into a 16-bit store. */
             b.cursor = nir_before_instr(&intr->instr);
             nir_instr_rewrite_src_ssa(&intr->instr, &intr->src[0],
                                       convert(&b, intr->src[0].ssa));
             nir_intrinsic_set_src_type(intr, (type & ~32) | 16);
          } else {
+            if (!sem.medium_precision)
+               continue;
+
             /* Loads. */
             nir_alu_type type = nir_intrinsic_dest_type(intr);