From 17589be72b4dd86b04a548a7ddd672ffc24d79da Mon Sep 17 00:00:00 2001
From: Alyssa Rosenzweig <alyssa@collabora.com>
Date: Sat, 15 Oct 2022 15:08:05 -0400
Subject: [PATCH] pan/mdg: Use .u32 for flat shading

This is simple and matches what we do on Bifrost.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19363>
---
 src/panfrost/lib/pan_shader.c          |  2 +-
 src/panfrost/midgard/midgard_compile.c | 69 ++++++++++++--------------
 2 files changed, 33 insertions(+), 38 deletions(-)

diff --git a/src/panfrost/lib/pan_shader.c b/src/panfrost/lib/pan_shader.c
index 9b8fd3df100..0158291cdb3 100644
--- a/src/panfrost/lib/pan_shader.c
+++ b/src/panfrost/lib/pan_shader.c
@@ -122,7 +122,7 @@ collect_varyings(nir_shader *s, nir_variable_mode varying_mode,
                 type = nir_alu_type_get_base_type(type);
 
                 /* Can't do type conversion since GLSL IR packs in funny ways */
-                if (PAN_ARCH >= 6 && var->data.interpolation == INTERP_MODE_FLAT)
+                if (var->data.interpolation == INTERP_MODE_FLAT)
                         type = nir_type_uint;
 
                 /* Point size is handled specially on Valhall (with malloc
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 8ce6d905c5c..08a46f77a8f 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -1437,9 +1437,6 @@ emit_varying_read(
         unsigned nr_comp, unsigned component,
         nir_src *indirect_offset, nir_alu_type type, bool flat)
 {
-        /* XXX: Half-floats? */
-        /* TODO: swizzle, mask */
-
         midgard_instruction ins = m_ld_vary_32(dest, PACK_LDST_ATTRIB_OFS(offset));
         ins.mask = mask_of(nr_comp);
         ins.dest_type = type;
@@ -1452,7 +1449,6 @@ emit_varying_read(
         for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[0]); ++i)
                 ins.swizzle[0][i] = MIN2(i + component, COMPONENT_W);
 
-
         midgard_varying_params p = {
                 .flat_shading = flat,
                 .perspective_correction = 1,
@@ -1469,24 +1465,20 @@ emit_varying_read(
         ins.load_store.arg_reg = REGISTER_LDST_ZERO;
         ins.load_store.index_format = midgard_index_address_u32;
 
-        /* Use the type appropriate load */
-        switch (type) {
-        case nir_type_uint32:
-        case nir_type_bool32:
+        /* For flat shading, we always use .u32 and require 32-bit mode. For
+         * smooth shading, we use the appropriate floating-point type.
+         *
+         * This could be optimized, but it makes it easy to check correctness.
+         */
+        if (flat) {
+                assert(nir_alu_type_get_type_size(type) == 32);
                 ins.op = midgard_op_ld_vary_32u;
-                break;
-        case nir_type_int32:
-                ins.op = midgard_op_ld_vary_32i;
-                break;
-        case nir_type_float32:
-                ins.op = midgard_op_ld_vary_32;
-                break;
-        case nir_type_float16:
-                ins.op = midgard_op_ld_vary_16;
-                break;
-        default:
-                unreachable("Attempted to load unknown type");
-                break;
+        } else {
+                assert(nir_alu_type_get_base_type(type) == nir_type_float);
+
+                ins.op = (nir_alu_type_get_type_size(type) == 32) ?
+                         midgard_op_ld_vary_32 :
+                         midgard_op_ld_vary_16;
         }
 
         emit_mir_instruction(ctx, ins);
@@ -2028,26 +2020,29 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                         unsigned dst_component = nir_intrinsic_component(instr);
                         unsigned nr_comp = nir_src_num_components(instr->src[0]);
 
+                        /* ABI: Format controlled by the attribute descriptor.
+                         * This simplifies flat shading, although it prevents
+                         * certain (unimplemented) 16-bit optimizations.
+                         *
+                         * In particular, it lets the driver handle internal
+                         * TGSI shaders that set flat in the VS but smooth in
+                         * the FS. This matches our handling on Bifrost.
+                         */
+                        bool auto32 = true;
+                        assert(nir_alu_type_get_type_size(nir_intrinsic_src_type(instr)) == 32);
+
+                        /* ABI: varyings in the secondary attribute table */
+                        bool secondary_table = true;
+
                         midgard_instruction st = m_st_vary_32(reg, PACK_LDST_ATTRIB_OFS(offset));
                         st.load_store.arg_reg = REGISTER_LDST_ZERO;
-                        st.load_store.index_format = midgard_index_address_u32;
                         st.load_store.index_reg = REGISTER_LDST_ZERO;
 
-                        switch (nir_alu_type_get_base_type(nir_intrinsic_src_type(instr))) {
-                        case nir_type_uint:
-                        case nir_type_bool:
-                                st.op = midgard_op_st_vary_32u;
-                                break;
-                        case nir_type_int:
-                                st.op = midgard_op_st_vary_32i;
-                                break;
-                        case nir_type_float:
-                                st.op = midgard_op_st_vary_32;
-                                break;
-                        default:
-                                unreachable("Attempted to store unknown type");
-                                break;
-                        }
+                        /* Attribute instruction uses these 2-bits for the
+                         * a32 and table bits, pack this specially.
+                         */
+                        st.load_store.index_format = (auto32 ? (1 << 0) : 0) |
+                                                     (secondary_table ? (1 << 1) : 0);
 
                         /* nir_intrinsic_component(store_intr) encodes the
                          * destination component start. Source component offset