hk: use new path for !soft fault UBO

this is more efficient for !soft fault and lets us drop a code path. i've been running fossils without soft fault since dev machine so that's what these stats are based on. Totals from 11799 (22.22% of 53090) affected shaders: MaxWaves: 11472896 -> 11467200 (-0.05%); split: +0.09%, -0.14% Instrs: 7032360 -> 6603558 (-6.10%); split: -6.11%, +0.02% CodeSize: 49902322 -> 48693876 (-2.42%); split: -2.95%, +0.52% Spills: 2817 -> 2846 (+1.03%); split: -5.75%, +6.78% Fills: 2911 -> 3143 (+7.97%); split: -2.37%, +10.34% Scratch: 10896 -> 11020 (+1.14%); split: -1.95%, +3.08% ALU: 5092487 -> 5265337 (+3.39%); split: -0.58%, +3.97% FSCIB: 5092487 -> 5265333 (+3.39%); split: -0.58%, +3.97% IC: 1461194 -> 1387408 (-5.05%); split: -5.22%, +0.17% GPRs: 803121 -> 805414 (+0.29%); split: -0.80%, +1.08% Uniforms: 2942007 -> 2247619 (-23.60%); split: -24.35%, +0.75% Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35720>
2026-05-05 09:38:07 +02:00 · 2025-06-24 12:48:02 -04:00 · 2025-06-24 12:48:02 -04:00 · 22272c71f2
commit 22272c71f2
parent 63ce73a601
1 changed files with 48 additions and 55 deletions
--- a/src/asahi/vulkan/hk_shader.c
+++ b/src/asahi/vulkan/hk_shader.c
@ -364,12 +364,17 @@ lower_load_global_bounded(nir_builder *b, nir_intrinsic_instr *intr)
   base = nir_bcsel(b, valid, base, nir_imm_int64(b, AGX_ZERO_PAGE_ADDRESS));
   offset = bound_offset(b, valid, nir_scalar_resolved(offset, 0));

-   nir_def *val =
-      nir_build_load_global(b, intr->def.num_components, intr->def.bit_size,
-                            nir_iadd(b, base, nir_u2u64(b, offset)),
-                            .align_mul = nir_intrinsic_align_mul(intr),
-                            .align_offset = nir_intrinsic_align_offset(intr),
-                            .access = nir_intrinsic_access(intr));
+   enum gl_access_qualifier access = nir_intrinsic_access(intr);
+
+   if (intr->intrinsic == nir_intrinsic_load_global_constant_bounded) {
+      access |= ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER;
+   }
+
+   nir_def *val = nir_build_load_global(
+      b, intr->def.num_components, intr->def.bit_size,
+      nir_iadd(b, base, nir_u2u64(b, offset)),
+      .align_mul = nir_intrinsic_align_mul(intr),
+      .align_offset = nir_intrinsic_align_offset(intr), .access = access);

   nir_def_replace(&intr->def, val);
 }
@ -378,7 +383,12 @@ static bool
 lower_load_global_constant_offset_instr(nir_builder *b,
                                        nir_intrinsic_instr *intrin, void *data)
 {
-   if (intrin->intrinsic == nir_intrinsic_load_global_bounded) {
+   bool *has_soft_fault = data;
+
+   if (intrin->intrinsic == nir_intrinsic_load_global_bounded ||
+       (intrin->intrinsic == nir_intrinsic_load_global_constant_bounded &&
+        !(*has_soft_fault))) {
+
      lower_load_global_bounded(b, intrin);
      return true;
   }
@ -388,12 +398,10 @@ lower_load_global_constant_offset_instr(nir_builder *b,
      return false;

   b->cursor = nir_before_instr(&intrin->instr);
-   bool *has_soft_fault = data;

   nir_def *base_addr = intrin->src[0].ssa;
   nir_def *offset = intrin->src[1].ssa;
   nir_def *bound = NULL;
-   nir_def *zero = NULL;

   unsigned bit_size = intrin->def.bit_size;
   assert(bit_size >= 8 && bit_size % 8 == 0);
@ -402,15 +410,6 @@ lower_load_global_constant_offset_instr(nir_builder *b,

   if (intrin->intrinsic == nir_intrinsic_load_global_constant_bounded) {
      bound = intrin->src[2].ssa;
-      zero = nir_imm_zero(b, intrin->num_components, bit_size);
-
-      /* If we do not have soft fault, we branch to bounds check. This is slow,
-       * fortunately we always have soft fault for release drivers.
-       *
-       * With soft fault, we speculatively load and smash to zero at the end.
-       */
-      if (!(*has_soft_fault))
-         nir_push_if(b, check_in_bounds(b, intrin));
   }

   unsigned align_mul = nir_intrinsic_align_mul(intrin);
@ -422,47 +421,41 @@ lower_load_global_constant_offset_instr(nir_builder *b,
      .align_offset = align_offset, .access = nir_intrinsic_access(intrin));

   if (intrin->intrinsic == nir_intrinsic_load_global_constant_bounded) {
-      if (*has_soft_fault) {
-         nir_scalar offs = nir_scalar_resolved(offset, 0);
-         if (nir_scalar_is_const(offs)) {
-            /* Calculate last byte loaded */
-            unsigned offs_imm = nir_scalar_as_uint(offs) + load_size;
+      nir_scalar offs = nir_scalar_resolved(offset, 0);
+      if (nir_scalar_is_const(offs)) {
+         /* Calculate last byte loaded */
+         unsigned offs_imm = nir_scalar_as_uint(offs) + load_size;

-            /* Simplify the bounds check. Uniform buffers are bounds checked at
-             * 64B granularity, so `bound` is a multiple of K = 64. Then
-             *
-             * offs_imm < bound <==> round_down(offs_imm, K) < bound. Proof:
-             *
-             * "=>" round_down(offs_imm, K) <= offs_imm < bound.
-             *
-             * "<=" Let a, b be integer s.t. offs_imm = K a + b with b < K.
-             *      Note round_down(offs_imm, K) = Ka.
-             *
-             *      Let c be integer s.t. bound = Kc.
-             *      We have Ka < Kc => a < c.
-             *      b < K => Ka + b < K(a + 1).
-             *
-             *      a < c with integers => a + 1 <= c.
-             *      offs_imm < K(a + 1) <= Kc = bound.
-             *      Hence offs_imm < bound.
-             */
-            assert(align_mul == 64);
-            offs_imm &= ~(align_mul - 1);
+         /* Simplify the bounds check. Uniform buffers are bounds checked at
+          * 64B granularity, so `bound` is a multiple of K = 64. Then
+          *
+          * offs_imm < bound <==> round_down(offs_imm, K) < bound. Proof:
+          *
+          * "=>" round_down(offs_imm, K) <= offs_imm < bound.
+          *
+          * "<=" Let a, b be integer s.t. offs_imm = K a + b with b < K.
+          *      Note round_down(offs_imm, K) = Ka.
+          *
+          *      Let c be integer s.t. bound = Kc.
+          *      We have Ka < Kc => a < c.
+          *      b < K => Ka + b < K(a + 1).
+          *
+          *      a < c with integers => a + 1 <= c.
+          *      offs_imm < K(a + 1) <= Kc = bound.
+          *      Hence offs_imm < bound.
+          */
+         assert(align_mul == 64);
+         offs_imm &= ~(align_mul - 1);

-            /* Bounds checks are `offset > bound ? 0 : val` so if offset = 0,
-             * the bounds check is useless.
-             */
-            if (offs_imm) {
-               val = bounds_check(b, val, nir_imm_int(b, offs_imm), bound);
-            }
-         } else {
-            offset = nir_iadd_imm(b, offset, load_size);
-            val = bounds_check(b, val, offset, bound);
+         /* Bounds checks are `offset > bound ? 0 : val` so if offset = 0,
+          * the bounds check is useless.
+          */
+         if (offs_imm) {
+            val = bounds_check(b, val, nir_imm_int(b, offs_imm), bound);
         }
-
      } else {
-         nir_pop_if(b, NULL);
-         val = nir_if_phi(b, val, zero);
+         offset = nir_iadd_imm(b, offset, load_size);
+         val = bounds_check(b, val, offset, bound);
      }
   }