From f56bb3ec4b9cac2d12e889c38748320bb0b0db72 Mon Sep 17 00:00:00 2001 From: Jesse Natalie Date: Fri, 3 Nov 2023 09:43:45 -0700 Subject: [PATCH] nir_lower_mem_access_bit_sizes: Fix write-mask-constrained 3-byte stores as atomics The code here handled stores of actual 3-byte values (8-bit, 3-component), but didn't correctly handle stores of larger 8-bit vectors that were constrained by write mask to just 3 bytes. In that case, the pad-to-vec4 step was unnecessary and problematic. Seen in CL CTS test_basic vector_swizzle test group for char3 with CLOn12. Fixes: c70d94a8 ("nir_lower_mem_access_bit_sizes: Support unaligned stores via a pair of atomics") Reviewed-by: Faith Ekstrand Part-of: (cherry picked from commit cd0cff951a5b7c74d704198b5abfdb40b267cbdc) --- .pick_status.json | 2 +- .../nir/nir_lower_mem_access_bit_sizes.c | 20 +++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index ac610ebbee4..86e4306d5b9 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -434,7 +434,7 @@ "description": "nir_lower_mem_access_bit_sizes: Fix write-mask-constrained 3-byte stores as atomics", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "c70d94a889709d181e9569d4089f2d017b5684fc", "notes": null diff --git a/src/compiler/nir/nir_lower_mem_access_bit_sizes.c b/src/compiler/nir/nir_lower_mem_access_bit_sizes.c index 6d6d38f329b..c0b693a8b56 100644 --- a/src/compiler/nir/nir_lower_mem_access_bit_sizes.c +++ b/src/compiler/nir/nir_lower_mem_access_bit_sizes.c @@ -317,26 +317,26 @@ lower_mem_store(nir_builder *b, nir_intrinsic_instr *intrin, chunk_bytes = MIN2(max_chunk_bytes, requested_bytes - max_pad); unsigned chunk_bits = chunk_bytes * 8; - nir_def *chunk_value = value; - /* The one special case where nir_extract_bits cannot get a scalar by asking for - * 1 component of chunk_bits. - */ + nir_def *data; if (chunk_bits == 24) { - chunk_value = nir_pad_vec4(b, chunk_value); - chunk_bits = 32; + /* This is a bit of a special case because we don't have 24-bit integers */ + data = nir_extract_bits(b, &value, 1, chunk_start * 8, 3, 8); + data = nir_pack_bits(b, nir_pad_vector_imm_int(b, data, 0, 4), 32); + } else { + data = nir_extract_bits(b, &value, 1, chunk_start * 8, 1, chunk_bits); + data = nir_u2u32(b, data); } - nir_def *data = nir_u2u32(b, - nir_extract_bits(b, &chunk_value, 1, chunk_start * 8, - 1, chunk_bits)); nir_def *iand_mask = nir_imm_int(b, (1 << chunk_bits) - 1); if (chunk_align < requested.align) { nir_def *shift = nir_u2u32(b, nir_imul_imm(b, pad, 8)); data = nir_ishl(b, data, shift); - iand_mask = nir_inot(b, nir_ishl(b, iand_mask, shift)); + iand_mask = nir_ishl(b, iand_mask, shift); } + iand_mask = nir_inot(b, iand_mask); + switch (intrin->intrinsic) { case nir_intrinsic_store_ssbo: nir_ssbo_atomic(b, 32, intrin->src[1].ssa, chunk_offset, iand_mask,