ac/llvm: correctly split vector 8/16-bit stores

This assumes that the start of the load is 32-bit aligned. For example, a vec3 16-bit store with align_offset=2 should split off the first component, not the last. This probably also fixed splitting with 8-bit stores. Fixes arb_copy_buffer-overlap Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Tested-by: Michel Dänzer <mdaenzer@redhat.com> Backport-to: 25.0 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34903> (cherry picked from commit c1ecad2b11)
2026-01-31 13:30:42 +01:00 · 2025-05-09 16:55:44 +01:00 · 2025-05-09 16:55:44 +01:00 · 98f96feda8
commit 98f96feda8
parent e11a494e27
2 changed files with 8 additions and 5 deletions
--- a/.pick_status.json
+++ b/.pick_status.json
@ -3574,7 +3574,7 @@
        "description": "ac/llvm: correctly split vector 8/16-bit stores",
        "nominated": true,
        "nomination_type": 4,
-        "resolution": 0,
+        "resolution": 1,
        "main_sha": null,
        "because_sha": null,
        "notes": null
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@ -1571,11 +1571,14 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in
         num_bytes = 16;
      }

-      /* check alignment of 16 Bit stores */
-      if (elem_size_bytes == 2 && num_bytes > 2 && (start % 2) == 1) {
-         writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
+      /* check alignment of 8/16 Bit stores */
+      uint32_t align_mul = nir_intrinsic_align_mul(instr);
+      uint32_t align_offset = nir_intrinsic_align_offset(instr) + start * elem_size_bytes;
+      uint32_t align = nir_combined_align(align_mul, align_offset & (align_mul - 1));
+      if (align < MIN2(num_bytes, 4) || (ctx->ac.gfx_level == GFX6 && elem_size_bytes < 4)) {
+         writemask |= BITFIELD_RANGE(start + 1, count - 1);
         count = 1;
-         num_bytes = 2;
+         num_bytes = elem_size_bytes;
      }

      /* Due to alignment issues, split stores of 8-bit/16-bit