pan/bi: lower subgroups before lowering int64

nir_lower_int64 doesn't know how to lower 64-bit imul reductions and scans. Lowering subgroup operations first leaves us with just 64-bit ballot and read_invocation, which are easily lowered. Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33365>
2026-05-08 17:48:10 +02:00 · 2025-03-04 22:31:11 +00:00 · 2025-03-04 22:31:11 +00:00 · c4941376a9
commit c4941376a9
parent 0f520e3d5a
1 changed files with 27 additions and 23 deletions
--- a/src/panfrost/compiler/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost_compile.c
@ -5664,32 +5664,12 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
   NIR_PASS(_, nir, nir_lower_ssbo, &ssbo_opts);

   NIR_PASS(_, nir, pan_lower_sample_pos);
-   NIR_PASS(_, nir, nir_lower_64bit_phis);
   NIR_PASS(_, nir, pan_lower_helper_invocation);
-   NIR_PASS(_, nir, nir_lower_int64);
-   NIR_PASS(_, nir, nir_lower_bit_size, bi_lower_bit_size, &gpu_id);
-
-   NIR_PASS(_, nir, nir_opt_idiv_const, 8);
-   NIR_PASS(_, nir, nir_lower_idiv,
-            &(nir_lower_idiv_options){.allow_fp16 = true});
-
-   NIR_PASS(_, nir, nir_lower_tex,
-            &(nir_lower_tex_options){
-               .lower_txs_lod = true,
-               .lower_txp = ~0,
-               .lower_tg4_broadcom_swizzle = true,
-               .lower_txd_cube_map = true,
-               .lower_invalid_implicit_lod = true,
-               .lower_index_to_offset = true,
-            });
-
-   NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);
-
-   /* on bifrost, lower MSAA load/stores to 3D load/stores */
-   if (pan_arch(gpu_id) < 9)
-      NIR_PASS(_, nir, pan_nir_lower_image_ms);

   /*
+    * Lower subgroups ops before lowering int64: nir_lower_int64 doesn't know
+    * how to lower imul reductions and scans.
+    *
    * TODO: we can implement certain operations (notably reductions, scans,
    * certain shuffles, etc) more efficiently than nir_lower_subgroups. Moreover
    * we can implement reductions and scans on f16vec2 values without splitting
@ -5727,6 +5707,30 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
   NIR_PASS(_, nir, nir_shader_intrinsics_pass, bi_lower_subgroups,
      nir_metadata_control_flow, &gpu_id);

+   NIR_PASS(_, nir, nir_lower_64bit_phis);
+   NIR_PASS(_, nir, nir_lower_int64);
+   NIR_PASS(_, nir, nir_lower_bit_size, bi_lower_bit_size, &gpu_id);
+
+   NIR_PASS(_, nir, nir_opt_idiv_const, 8);
+   NIR_PASS(_, nir, nir_lower_idiv,
+            &(nir_lower_idiv_options){.allow_fp16 = true});
+
+   NIR_PASS(_, nir, nir_lower_tex,
+            &(nir_lower_tex_options){
+               .lower_txs_lod = true,
+               .lower_txp = ~0,
+               .lower_tg4_broadcom_swizzle = true,
+               .lower_txd_cube_map = true,
+               .lower_invalid_implicit_lod = true,
+               .lower_index_to_offset = true,
+            });
+
+   NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);
+
+   /* on bifrost, lower MSAA load/stores to 3D load/stores */
+   if (pan_arch(gpu_id) < 9)
+      NIR_PASS(_, nir, pan_nir_lower_image_ms);
+
   NIR_PASS(_, nir, nir_shader_alu_pass, bi_lower_ldexp16,
            nir_metadata_control_flow, NULL);