From 98f96feda8ea1b4c48e234476d24c4ef73be5e42 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 9 May 2025 16:55:44 +0100 Subject: [PATCH] ac/llvm: correctly split vector 8/16-bit stores MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This assumes that the start of the load is 32-bit aligned. For example, a vec3 16-bit store with align_offset=2 should split off the first component, not the last. This probably also fixed splitting with 8-bit stores. Fixes arb_copy_buffer-overlap Signed-off-by: Rhys Perry Reviewed-by: Marek Olšák Tested-by: Michel Dänzer Backport-to: 25.0 Part-of: (cherry picked from commit c1ecad2b1156625714a32c00f55597921ab8f642) --- .pick_status.json | 2 +- src/amd/llvm/ac_nir_to_llvm.c | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 882410d5903..273774ad211 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -3574,7 +3574,7 @@ "description": "ac/llvm: correctly split vector 8/16-bit stores", "nominated": true, "nomination_type": 4, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 714dfe39e4d..2d95f06c74a 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -1571,11 +1571,14 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in num_bytes = 16; } - /* check alignment of 16 Bit stores */ - if (elem_size_bytes == 2 && num_bytes > 2 && (start % 2) == 1) { - writemask |= ((1u << (count - 1)) - 1u) << (start + 1); + /* check alignment of 8/16 Bit stores */ + uint32_t align_mul = nir_intrinsic_align_mul(instr); + uint32_t align_offset = nir_intrinsic_align_offset(instr) + start * elem_size_bytes; + uint32_t align = nir_combined_align(align_mul, align_offset & (align_mul - 1)); + if (align < MIN2(num_bytes, 4) || (ctx->ac.gfx_level == GFX6 && elem_size_bytes < 4)) { + writemask |= BITFIELD_RANGE(start + 1, count - 1); count = 1; - num_bytes = 2; + num_bytes = elem_size_bytes; } /* Due to alignment issues, split stores of 8-bit/16-bit