mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 21:50:12 +01:00
nir_lower_mem_access_bit_sizes: Fix write-mask-constrained 3-byte stores as atomics
The code here handled stores of actual 3-byte values (8-bit, 3-component), but didn't
correctly handle stores of larger 8-bit vectors that were constrained by write mask to
just 3 bytes. In that case, the pad-to-vec4 step was unnecessary and problematic.
Seen in CL CTS test_basic vector_swizzle test group for char3 with CLOn12.
Fixes: c70d94a8 ("nir_lower_mem_access_bit_sizes: Support unaligned stores via a pair of atomics")
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26034>
This commit is contained in:
parent
bff7e4b69d
commit
cd0cff951a
1 changed files with 10 additions and 10 deletions
|
|
@ -317,26 +317,26 @@ lower_mem_store(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||
chunk_bytes = MIN2(max_chunk_bytes, requested_bytes - max_pad);
|
||||
unsigned chunk_bits = chunk_bytes * 8;
|
||||
|
||||
nir_def *chunk_value = value;
|
||||
/* The one special case where nir_extract_bits cannot get a scalar by asking for
|
||||
* 1 component of chunk_bits.
|
||||
*/
|
||||
nir_def *data;
|
||||
if (chunk_bits == 24) {
|
||||
chunk_value = nir_pad_vec4(b, chunk_value);
|
||||
chunk_bits = 32;
|
||||
/* This is a bit of a special case because we don't have 24-bit integers */
|
||||
data = nir_extract_bits(b, &value, 1, chunk_start * 8, 3, 8);
|
||||
data = nir_pack_bits(b, nir_pad_vector_imm_int(b, data, 0, 4), 32);
|
||||
} else {
|
||||
data = nir_extract_bits(b, &value, 1, chunk_start * 8, 1, chunk_bits);
|
||||
data = nir_u2u32(b, data);
|
||||
}
|
||||
|
||||
nir_def *data = nir_u2u32(b,
|
||||
nir_extract_bits(b, &chunk_value, 1, chunk_start * 8,
|
||||
1, chunk_bits));
|
||||
nir_def *iand_mask = nir_imm_int(b, (1 << chunk_bits) - 1);
|
||||
|
||||
if (chunk_align < requested.align) {
|
||||
nir_def *shift = nir_u2u32(b, nir_imul_imm(b, pad, 8));
|
||||
data = nir_ishl(b, data, shift);
|
||||
iand_mask = nir_inot(b, nir_ishl(b, iand_mask, shift));
|
||||
iand_mask = nir_ishl(b, iand_mask, shift);
|
||||
}
|
||||
|
||||
iand_mask = nir_inot(b, iand_mask);
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_store_ssbo:
|
||||
nir_ssbo_atomic(b, 32, intrin->src[1].ssa, chunk_offset, iand_mask,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue