panfrost: Split texture lowering passes

We now have lower_texture_early and lower_texture. lower_texture_early handle nir_lower_tex and (in the future) could handle anything that is backend specific that need to happen before nir_lower_io. lower_texture handles actual lowering of backend specific things that must happen after nir_lower_tex and nir_lower_io. This allows us to finally not run nir_lower_tex two times in panvk. Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com> Reviewed-by: Olivia Lee <olivia.lee@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36776>
2026-05-08 09:08:10 +02:00 · 2025-08-13 14:14:48 +00:00 · 2025-08-13 14:14:48 +00:00 · 6ab7a03aef
commit 6ab7a03aef
parent 310eabacc0
10 changed files with 72 additions and 53 deletions
--- a/src/gallium/drivers/panfrost/pan_fb_preload.c
+++ b/src/gallium/drivers/panfrost/pan_fb_preload.c
@ -562,6 +562,8 @@ pan_preload_get_shader(struct pan_fb_preload_cache *cache,
      BITSET_SET(b.shader->info.textures_used, i);

   pan_shader_preprocess(b.shader, inputs.gpu_id);
+   pan_shader_lower_texture_early(b.shader, inputs.gpu_id);
+   pan_shader_lower_texture(b.shader, inputs.gpu_id);
   pan_shader_postprocess(b.shader, inputs.gpu_id);

   if (PAN_ARCH == 4) {
--- a/src/gallium/drivers/panfrost/pan_shader.c
+++ b/src/gallium/drivers/panfrost/pan_shader.c
@ -138,6 +138,8 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
    */
   if (mesa_shader_stage_is_compute(s->info.stage)) {
      pan_shader_preprocess(s, panfrost_device_gpu_id(dev));
+      pan_shader_lower_texture_early(s, panfrost_device_gpu_id(dev));
+      pan_shader_lower_texture(s, panfrost_device_gpu_id(dev));
      pan_shader_postprocess(s, panfrost_device_gpu_id(dev));
   }

@ -505,6 +507,7 @@ panfrost_create_shader_state(struct pipe_context *pctx,
   /* Then run the suite of lowering and optimization, including I/O lowering */
   struct panfrost_device *dev = pan_device(pctx->screen);
   pan_shader_preprocess(nir, panfrost_device_gpu_id(dev));
+   pan_shader_lower_texture_early(nir, panfrost_device_gpu_id(dev));

   NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
            glsl_type_size, nir_lower_io_use_interpolated_input_intrinsics);
@ -520,6 +523,7 @@ panfrost_create_shader_state(struct pipe_context *pctx,
    */
   NIR_PASS(_, nir, nir_opt_constant_folding);

+   pan_shader_lower_texture(nir, panfrost_device_gpu_id(dev));
   pan_shader_postprocess(nir, panfrost_device_gpu_id(dev));

   if (nir->info.stage == MESA_SHADER_FRAGMENT)
--- a/src/panfrost/clc/pan_compile.c
+++ b/src/panfrost/clc/pan_compile.c
@ -410,6 +410,8 @@ main(int argc, const char **argv)
         } while (progress);

         pan_shader_preprocess(s, inputs.gpu_id);
+         pan_shader_lower_texture_early(s, inputs.gpu_id);
+         pan_shader_lower_texture(s, inputs.gpu_id);
         pan_shader_postprocess(s, inputs.gpu_id);

         NIR_PASS(_, s, nir_opt_deref);
--- a/src/panfrost/compiler/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost_compile.c
@ -5982,12 +5982,6 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id)
   };
   NIR_PASS(_, nir, nir_lower_ssbo, &ssbo_opts);

-   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
-      NIR_PASS(_, nir, nir_lower_is_helper_invocation);
-      NIR_PASS(_, nir, pan_lower_helper_invocation);
-      NIR_PASS(_, nir, pan_lower_sample_pos);
-   }
-
   /*
    * Lower subgroups ops before lowering int64: nir_lower_int64 doesn't know
    * how to lower imul reductions and scans.
@ -6038,22 +6032,6 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id)
   NIR_PASS(_, nir, nir_lower_idiv,
            &(nir_lower_idiv_options){.allow_fp16 = true});

-   NIR_PASS(_, nir, nir_lower_tex,
-            &(nir_lower_tex_options){
-               .lower_txs_lod = true,
-               .lower_txp = ~0,
-               .lower_tg4_broadcom_swizzle = true,
-               .lower_txd_cube_map = true,
-               .lower_invalid_implicit_lod = true,
-               .lower_index_to_offset = true,
-            });
-
-   NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);
-
-   /* on bifrost, lower MSAA load/stores to 3D load/stores */
-   if (pan_arch(gpu_id) < 9)
-      NIR_PASS(_, nir, pan_nir_lower_image_ms);
-
   NIR_PASS(_, nir, nir_shader_alu_pass, bi_lower_ldexp16,
            nir_metadata_control_flow, NULL);

@ -6067,6 +6045,21 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id)
   NIR_PASS(_, nir, pan_nir_lower_frag_coord_zw);
 }

+void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id)
+{
+   NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);
+
+   /* on Bifrost, lower MSAA load/stores to 3D load/stores */
+   if (pan_arch(gpu_id) < 9)
+      NIR_PASS(_, nir, pan_nir_lower_image_ms);
+
+   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+      NIR_PASS(_, nir, nir_lower_is_helper_invocation);
+      NIR_PASS(_, nir, pan_lower_helper_invocation);
+      NIR_PASS(_, nir, pan_lower_sample_pos);
+   }
+}
+
 static bi_context *
 bi_compile_variant_nir(nir_shader *nir,
                       const struct pan_compile_inputs *inputs,
--- a/src/panfrost/compiler/bifrost_compile.h
+++ b/src/panfrost/compiler/bifrost_compile.h
@ -83,6 +83,7 @@ bifrost_precompiled_kernel_prepare_push_uniforms(

 void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id);
 void bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id);
+void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id);

 void bifrost_compile_shader_nir(nir_shader *nir,
                                const struct pan_compile_inputs *inputs,
--- a/src/panfrost/lib/pan_shader.h
+++ b/src/panfrost/lib/pan_shader.h
@ -36,8 +36,10 @@

 void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id);
 void bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id);
+void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id);
 void midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id);
 void midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id);
+void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id);

 static unsigned
 pan_get_fixed_varying_mask(unsigned varyings_used)
@ -64,6 +66,32 @@ pan_shader_postprocess(nir_shader *nir, unsigned gpu_id)
      midgard_postprocess_nir(nir, gpu_id);
 }

+static inline void
+pan_shader_lower_texture_early(nir_shader *nir, unsigned gpu_id)
+{
+   nir_lower_tex_options lower_tex_options = {
+      .lower_txs_lod = true,
+      .lower_txp = ~0,
+      .lower_tg4_offsets = true,
+      .lower_tg4_broadcom_swizzle = true,
+      .lower_txd = pan_arch(gpu_id) < 6,
+      .lower_txd_cube_map = true,
+      .lower_invalid_implicit_lod = true,
+      .lower_index_to_offset = pan_arch(gpu_id) >= 6,
+   };
+
+   NIR_PASS(_, nir, nir_lower_tex, &lower_tex_options);
+}
+
+static inline void
+pan_shader_lower_texture(nir_shader *nir, unsigned gpu_id)
+{
+   if (pan_arch(gpu_id) >= 6)
+      bifrost_lower_texture_nir(nir, gpu_id);
+   else
+      midgard_lower_texture_nir(nir, gpu_id);
+}
+
 static inline void
 pan_shader_disassemble(FILE *fp, const void *code, size_t size, unsigned gpu_id,
                       bool verbose)
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@ -393,10 +393,8 @@ midgard_preprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id)
 }

 void
-midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id)
+midgard_postprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id)
 {
-   unsigned quirks = midgard_get_quirks(gpu_id);
-
   if (nir->info.stage == MESA_SHADER_VERTEX) {
      /* nir_lower[_explicit]_io is lazy and emits mul+add chains even
       * for offsets it could figure out are constant.  Do some
@ -434,21 +432,20 @@ midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id)

   NIR_PASS(_, nir, nir_lower_idiv, &idiv_options);

-   nir_lower_tex_options lower_tex_options = {
-      .lower_txs_lod = true,
-      .lower_txp = ~0,
-      .lower_tg4_broadcom_swizzle = true,
-      .lower_txd = true,
-      .lower_invalid_implicit_lod = true,
-   };
+   NIR_PASS(_, nir, midgard_nir_lower_algebraic_early);
+   NIR_PASS(_, nir, nir_lower_alu_to_scalar, mdg_should_scalarize, NULL);
+   NIR_PASS(_, nir, nir_lower_flrp, 16 | 32 | 64, false /* always_precise */);
+   NIR_PASS(_, nir, nir_lower_var_copies);
+}

-   NIR_PASS(_, nir, nir_lower_tex, &lower_tex_options);
+void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id)
+{
   NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);

   /* TEX_GRAD fails to apply sampler descriptor settings on some
    * implementations, requiring a lowering.
    */
-   if (quirks & MIDGARD_BROKEN_LOD)
+   if (midgard_get_quirks(gpu_id) & MIDGARD_BROKEN_LOD)
      NIR_PASS(_, nir, midgard_nir_lod_errata);

   /* lower MSAA image operations to 3D load before coordinate lowering */
@ -463,13 +460,9 @@ midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id)
      NIR_PASS(_, nir, pan_lower_helper_invocation);
      NIR_PASS(_, nir, pan_lower_sample_pos);
   }
-
-   NIR_PASS(_, nir, midgard_nir_lower_algebraic_early);
-   NIR_PASS(_, nir, nir_lower_alu_to_scalar, mdg_should_scalarize, NULL);
-   NIR_PASS(_, nir, nir_lower_flrp, 16 | 32 | 64, false /* always_precise */);
-   NIR_PASS(_, nir, nir_lower_var_copies);
 }

+
 static void
 optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
 {
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h
@ -31,6 +31,7 @@

 void midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id);
 void midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id);
+void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id);

 void midgard_compile_shader_nir(nir_shader *nir,
                                const struct pan_compile_inputs *inputs,
--- a/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c
+++ b/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c
@ -153,6 +153,8 @@ get_preload_shader(struct panvk_device *dev,
   };

   pan_shader_preprocess(nir, inputs.gpu_id);
+   pan_shader_lower_texture_early(nir, inputs.gpu_id);
+   pan_shader_lower_texture(nir, inputs.gpu_id);
   pan_shader_postprocess(nir, inputs.gpu_id);

   VkResult result = panvk_per_arch(create_internal_shader)(
--- a/src/panfrost/vulkan/panvk_vX_shader.c
+++ b/src/panfrost/vulkan/panvk_vX_shader.c
@ -414,37 +414,29 @@ panvk_preprocess_nir(struct vk_physical_device *vk_pdev,
   NIR_PASS(_, nir, nir_opt_barrier_modes);
   NIR_PASS(_, nir, nir_opt_acquire_release_barriers, SCOPE_DEVICE);

-   /* Do texture lowering here.  Yes, it's a duplication of the texture
-    * lowering in bifrost_compile.  However, we need to lower texture stuff
+   /* Do texture lowering here. We need to lower texture stuff
    * now, before we call panvk_per_arch(nir_lower_descriptors)() because some
    * of the texture lowering generates nir_texop_txs which we handle as part
    * of descriptor lowering.
    *
-    * TODO: We really should be doing this in common code, not dpulicated in
-    * panvk.  In order to do that, we need to rework the panfrost compile
+    * TODO: We really should be doing this in common code, not duplicated in
+    * panvk. In order to do that, we need to rework the panfrost compile
    * flow to look more like the Intel flow:
    *
    *  1. Compile SPIR-V to NIR and maybe do a tiny bit of lowering that needs
    *     to be done really early.
    *
-    *  2. pan_preprocess_nir: Does common lowering and runs the optimization
+    *  2. pan_shader_preprocess: Does common lowering and runs the optimization
    *     loop.  Nothing here should be API-specific.
    *
    *  3. Do additional lowering in panvk
    *
-    *  4. pan_postprocess_nir: Does final lowering and runs the optimization
+    *  4. pan_shader_postprocess: Does final lowering and runs the optimization
    *     loop again.  This can happen as part of the final compile.
    *
    * This would give us a better place to do panvk-specific lowering.
    */
-   nir_lower_tex_options lower_tex_options = {
-      .lower_txs_lod = true,
-      .lower_txp = ~0,
-      .lower_tg4_offsets = true,
-      .lower_txd_cube_map = true,
-      .lower_invalid_implicit_lod = true,
-   };
-   NIR_PASS(_, nir, nir_lower_tex, &lower_tex_options);
+   pan_shader_lower_texture_early(nir, pdev->kmod.props.gpu_id);
   NIR_PASS(_, nir, nir_lower_system_values);

   nir_lower_compute_system_values_options options = {
@ -939,6 +931,7 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
    */
   NIR_PASS(_, nir, nir_opt_constant_folding);

+   pan_shader_lower_texture(nir, compile_input->gpu_id);
   pan_shader_postprocess(nir, compile_input->gpu_id);

   if (stage == MESA_SHADER_VERTEX)