From 6ab7a03aef58a8f5a0b77648392a51e6668cbdda Mon Sep 17 00:00:00 2001
From: Mary Guillemard <mary.guillemard@collabora.com>
Date: Wed, 13 Aug 2025 14:14:48 +0000
Subject: [PATCH] panfrost: Split texture lowering passes

We now have lower_texture_early and lower_texture.

lower_texture_early handle nir_lower_tex and (in the future) could handle
anything that is backend specific that need to happen before nir_lower_io.

lower_texture handles actual lowering of backend specific things that
must happen after nir_lower_tex and nir_lower_io.

This allows us to finally not run nir_lower_tex two times in panvk.

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Olivia Lee <olivia.lee@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36776>
---
 src/gallium/drivers/panfrost/pan_fb_preload.c |  2 +
 src/gallium/drivers/panfrost/pan_shader.c     |  4 ++
 src/panfrost/clc/pan_compile.c                |  2 +
 src/panfrost/compiler/bifrost_compile.c       | 37 ++++++++-----------
 src/panfrost/compiler/bifrost_compile.h       |  1 +
 src/panfrost/lib/pan_shader.h                 | 28 ++++++++++++++
 src/panfrost/midgard/midgard_compile.c        | 27 +++++---------
 src/panfrost/midgard/midgard_compile.h        |  1 +
 src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c |  2 +
 src/panfrost/vulkan/panvk_vX_shader.c         | 21 ++++-------
 10 files changed, 72 insertions(+), 53 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_fb_preload.c b/src/gallium/drivers/panfrost/pan_fb_preload.c
index 09c9a666960..dea2b320103 100644
--- a/src/gallium/drivers/panfrost/pan_fb_preload.c
+++ b/src/gallium/drivers/panfrost/pan_fb_preload.c
@@ -562,6 +562,8 @@ pan_preload_get_shader(struct pan_fb_preload_cache *cache,
       BITSET_SET(b.shader->info.textures_used, i);
 
    pan_shader_preprocess(b.shader, inputs.gpu_id);
+   pan_shader_lower_texture_early(b.shader, inputs.gpu_id);
+   pan_shader_lower_texture(b.shader, inputs.gpu_id);
    pan_shader_postprocess(b.shader, inputs.gpu_id);
 
    if (PAN_ARCH == 4) {
diff --git a/src/gallium/drivers/panfrost/pan_shader.c b/src/gallium/drivers/panfrost/pan_shader.c
index 6cb8ae62f4b..ea0d734d0e7 100644
--- a/src/gallium/drivers/panfrost/pan_shader.c
+++ b/src/gallium/drivers/panfrost/pan_shader.c
@@ -138,6 +138,8 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
     */
    if (mesa_shader_stage_is_compute(s->info.stage)) {
       pan_shader_preprocess(s, panfrost_device_gpu_id(dev));
+      pan_shader_lower_texture_early(s, panfrost_device_gpu_id(dev));
+      pan_shader_lower_texture(s, panfrost_device_gpu_id(dev));
       pan_shader_postprocess(s, panfrost_device_gpu_id(dev));
    }
 
@@ -505,6 +507,7 @@ panfrost_create_shader_state(struct pipe_context *pctx,
    /* Then run the suite of lowering and optimization, including I/O lowering */
    struct panfrost_device *dev = pan_device(pctx->screen);
    pan_shader_preprocess(nir, panfrost_device_gpu_id(dev));
+   pan_shader_lower_texture_early(nir, panfrost_device_gpu_id(dev));
 
    NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
             glsl_type_size, nir_lower_io_use_interpolated_input_intrinsics);
@@ -520,6 +523,7 @@ panfrost_create_shader_state(struct pipe_context *pctx,
     */
    NIR_PASS(_, nir, nir_opt_constant_folding);
 
+   pan_shader_lower_texture(nir, panfrost_device_gpu_id(dev));
    pan_shader_postprocess(nir, panfrost_device_gpu_id(dev));
 
    if (nir->info.stage == MESA_SHADER_FRAGMENT)
diff --git a/src/panfrost/clc/pan_compile.c b/src/panfrost/clc/pan_compile.c
index a9b1010b860..b80546aaefe 100644
--- a/src/panfrost/clc/pan_compile.c
+++ b/src/panfrost/clc/pan_compile.c
@@ -410,6 +410,8 @@ main(int argc, const char **argv)
          } while (progress);
 
          pan_shader_preprocess(s, inputs.gpu_id);
+         pan_shader_lower_texture_early(s, inputs.gpu_id);
+         pan_shader_lower_texture(s, inputs.gpu_id);
          pan_shader_postprocess(s, inputs.gpu_id);
 
          NIR_PASS(_, s, nir_opt_deref);
diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c
index 7d4e6a562ea..fd386d8460d 100644
--- a/src/panfrost/compiler/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost_compile.c
@@ -5982,12 +5982,6 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id)
    };
    NIR_PASS(_, nir, nir_lower_ssbo, &ssbo_opts);
 
-   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
-      NIR_PASS(_, nir, nir_lower_is_helper_invocation);
-      NIR_PASS(_, nir, pan_lower_helper_invocation);
-      NIR_PASS(_, nir, pan_lower_sample_pos);
-   }
-
    /*
     * Lower subgroups ops before lowering int64: nir_lower_int64 doesn't know
     * how to lower imul reductions and scans.
@@ -6038,22 +6032,6 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id)
    NIR_PASS(_, nir, nir_lower_idiv,
             &(nir_lower_idiv_options){.allow_fp16 = true});
 
-   NIR_PASS(_, nir, nir_lower_tex,
-            &(nir_lower_tex_options){
-               .lower_txs_lod = true,
-               .lower_txp = ~0,
-               .lower_tg4_broadcom_swizzle = true,
-               .lower_txd_cube_map = true,
-               .lower_invalid_implicit_lod = true,
-               .lower_index_to_offset = true,
-            });
-
-   NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);
-
-   /* on bifrost, lower MSAA load/stores to 3D load/stores */
-   if (pan_arch(gpu_id) < 9)
-      NIR_PASS(_, nir, pan_nir_lower_image_ms);
-
    NIR_PASS(_, nir, nir_shader_alu_pass, bi_lower_ldexp16,
             nir_metadata_control_flow, NULL);
 
@@ -6067,6 +6045,21 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id)
    NIR_PASS(_, nir, pan_nir_lower_frag_coord_zw);
 }
 
+void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id)
+{
+   NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);
+
+   /* on Bifrost, lower MSAA load/stores to 3D load/stores */
+   if (pan_arch(gpu_id) < 9)
+      NIR_PASS(_, nir, pan_nir_lower_image_ms);
+
+   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+      NIR_PASS(_, nir, nir_lower_is_helper_invocation);
+      NIR_PASS(_, nir, pan_lower_helper_invocation);
+      NIR_PASS(_, nir, pan_lower_sample_pos);
+   }
+}
+
 static bi_context *
 bi_compile_variant_nir(nir_shader *nir,
                        const struct pan_compile_inputs *inputs,
diff --git a/src/panfrost/compiler/bifrost_compile.h b/src/panfrost/compiler/bifrost_compile.h
index 5ed6c586be0..7b87d30669f 100644
--- a/src/panfrost/compiler/bifrost_compile.h
+++ b/src/panfrost/compiler/bifrost_compile.h
@@ -83,6 +83,7 @@ bifrost_precompiled_kernel_prepare_push_uniforms(
 
 void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id);
 void bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id);
+void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id);
 
 void bifrost_compile_shader_nir(nir_shader *nir,
                                 const struct pan_compile_inputs *inputs,
diff --git a/src/panfrost/lib/pan_shader.h b/src/panfrost/lib/pan_shader.h
index 03b0c876661..1c55e4f1d1a 100644
--- a/src/panfrost/lib/pan_shader.h
+++ b/src/panfrost/lib/pan_shader.h
@@ -36,8 +36,10 @@
 
 void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id);
 void bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id);
+void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id);
 void midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id);
 void midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id);
+void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id);
 
 static unsigned
 pan_get_fixed_varying_mask(unsigned varyings_used)
@@ -64,6 +66,32 @@ pan_shader_postprocess(nir_shader *nir, unsigned gpu_id)
       midgard_postprocess_nir(nir, gpu_id);
 }
 
+static inline void
+pan_shader_lower_texture_early(nir_shader *nir, unsigned gpu_id)
+{
+   nir_lower_tex_options lower_tex_options = {
+      .lower_txs_lod = true,
+      .lower_txp = ~0,
+      .lower_tg4_offsets = true,
+      .lower_tg4_broadcom_swizzle = true,
+      .lower_txd = pan_arch(gpu_id) < 6,
+      .lower_txd_cube_map = true,
+      .lower_invalid_implicit_lod = true,
+      .lower_index_to_offset = pan_arch(gpu_id) >= 6,
+   };
+
+   NIR_PASS(_, nir, nir_lower_tex, &lower_tex_options);
+}
+
+static inline void
+pan_shader_lower_texture(nir_shader *nir, unsigned gpu_id)
+{
+   if (pan_arch(gpu_id) >= 6)
+      bifrost_lower_texture_nir(nir, gpu_id);
+   else
+      midgard_lower_texture_nir(nir, gpu_id);
+}
+
 static inline void
 pan_shader_disassemble(FILE *fp, const void *code, size_t size, unsigned gpu_id,
                        bool verbose)
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index de4aac1f3fa..aa0369be91c 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -393,10 +393,8 @@ midgard_preprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id)
 }
 
 void
-midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id)
+midgard_postprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id)
 {
-   unsigned quirks = midgard_get_quirks(gpu_id);
-
    if (nir->info.stage == MESA_SHADER_VERTEX) {
       /* nir_lower[_explicit]_io is lazy and emits mul+add chains even
        * for offsets it could figure out are constant.  Do some
@@ -434,21 +432,20 @@ midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id)
 
    NIR_PASS(_, nir, nir_lower_idiv, &idiv_options);
 
-   nir_lower_tex_options lower_tex_options = {
-      .lower_txs_lod = true,
-      .lower_txp = ~0,
-      .lower_tg4_broadcom_swizzle = true,
-      .lower_txd = true,
-      .lower_invalid_implicit_lod = true,
-   };
+   NIR_PASS(_, nir, midgard_nir_lower_algebraic_early);
+   NIR_PASS(_, nir, nir_lower_alu_to_scalar, mdg_should_scalarize, NULL);
+   NIR_PASS(_, nir, nir_lower_flrp, 16 | 32 | 64, false /* always_precise */);
+   NIR_PASS(_, nir, nir_lower_var_copies);
+}
 
-   NIR_PASS(_, nir, nir_lower_tex, &lower_tex_options);
+void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id)
+{
    NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);
 
    /* TEX_GRAD fails to apply sampler descriptor settings on some
     * implementations, requiring a lowering.
     */
-   if (quirks & MIDGARD_BROKEN_LOD)
+   if (midgard_get_quirks(gpu_id) & MIDGARD_BROKEN_LOD)
       NIR_PASS(_, nir, midgard_nir_lod_errata);
 
    /* lower MSAA image operations to 3D load before coordinate lowering */
@@ -463,13 +460,9 @@ midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id)
       NIR_PASS(_, nir, pan_lower_helper_invocation);
       NIR_PASS(_, nir, pan_lower_sample_pos);
    }
-
-   NIR_PASS(_, nir, midgard_nir_lower_algebraic_early);
-   NIR_PASS(_, nir, nir_lower_alu_to_scalar, mdg_should_scalarize, NULL);
-   NIR_PASS(_, nir, nir_lower_flrp, 16 | 32 | 64, false /* always_precise */);
-   NIR_PASS(_, nir, nir_lower_var_copies);
 }
 
+
 static void
 optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
 {
diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h
index d2f0e0f49ee..825665d6c2e 100644
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h
@@ -31,6 +31,7 @@
 
 void midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id);
 void midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id);
+void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id);
 
 void midgard_compile_shader_nir(nir_shader *nir,
                                 const struct pan_compile_inputs *inputs,
diff --git a/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c b/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c
index de9be178a54..2ddbc6dd361 100644
--- a/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c
+++ b/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c
@@ -153,6 +153,8 @@ get_preload_shader(struct panvk_device *dev,
    };
 
    pan_shader_preprocess(nir, inputs.gpu_id);
+   pan_shader_lower_texture_early(nir, inputs.gpu_id);
+   pan_shader_lower_texture(nir, inputs.gpu_id);
    pan_shader_postprocess(nir, inputs.gpu_id);
 
    VkResult result = panvk_per_arch(create_internal_shader)(
diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c
index 6fdf6def48a..d3a478bf5da 100644
--- a/src/panfrost/vulkan/panvk_vX_shader.c
+++ b/src/panfrost/vulkan/panvk_vX_shader.c
@@ -414,37 +414,29 @@ panvk_preprocess_nir(struct vk_physical_device *vk_pdev,
    NIR_PASS(_, nir, nir_opt_barrier_modes);
    NIR_PASS(_, nir, nir_opt_acquire_release_barriers, SCOPE_DEVICE);
 
-   /* Do texture lowering here.  Yes, it's a duplication of the texture
-    * lowering in bifrost_compile.  However, we need to lower texture stuff
+   /* Do texture lowering here. We need to lower texture stuff
     * now, before we call panvk_per_arch(nir_lower_descriptors)() because some
     * of the texture lowering generates nir_texop_txs which we handle as part
     * of descriptor lowering.
     *
-    * TODO: We really should be doing this in common code, not dpulicated in
-    * panvk.  In order to do that, we need to rework the panfrost compile
+    * TODO: We really should be doing this in common code, not duplicated in
+    * panvk. In order to do that, we need to rework the panfrost compile
     * flow to look more like the Intel flow:
     *
     *  1. Compile SPIR-V to NIR and maybe do a tiny bit of lowering that needs
     *     to be done really early.
     *
-    *  2. pan_preprocess_nir: Does common lowering and runs the optimization
+    *  2. pan_shader_preprocess: Does common lowering and runs the optimization
     *     loop.  Nothing here should be API-specific.
     *
     *  3. Do additional lowering in panvk
     *
-    *  4. pan_postprocess_nir: Does final lowering and runs the optimization
+    *  4. pan_shader_postprocess: Does final lowering and runs the optimization
     *     loop again.  This can happen as part of the final compile.
     *
     * This would give us a better place to do panvk-specific lowering.
     */
-   nir_lower_tex_options lower_tex_options = {
-      .lower_txs_lod = true,
-      .lower_txp = ~0,
-      .lower_tg4_offsets = true,
-      .lower_txd_cube_map = true,
-      .lower_invalid_implicit_lod = true,
-   };
-   NIR_PASS(_, nir, nir_lower_tex, &lower_tex_options);
+   pan_shader_lower_texture_early(nir, pdev->kmod.props.gpu_id);
    NIR_PASS(_, nir, nir_lower_system_values);
 
    nir_lower_compute_system_values_options options = {
@@ -939,6 +931,7 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
     */
    NIR_PASS(_, nir, nir_opt_constant_folding);
 
+   pan_shader_lower_texture(nir, compile_input->gpu_id);
    pan_shader_postprocess(nir, compile_input->gpu_id);
 
    if (stage == MESA_SHADER_VERTEX)