From 67e6ca1924e2cba6f8b72483be3854cd40fa13d7 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 27 Nov 2023 21:13:48 -0600 Subject: [PATCH] nak/nir: Handle CBuf alignment rules The NIR lowering is more complete and lets us properly handle 16-bit loads which, weirdly, require a 4B alignment. Fixes: bda208665f99 ("nak: Handle non-DW-aligned UBO loads") Part-of: --- src/nouveau/compiler/nak_nir.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index 034c2f92916..d3551571e84 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -989,7 +989,28 @@ nak_mem_access_size_align(nir_intrinsic_op intrin, if (intrin == nir_intrinsic_load_ubo) chunk_bytes = MIN2(chunk_bytes, 8); - if (chunk_bytes < 4) { + if (intrin == nir_intrinsic_load_ubo && align < 4) { + /* CBufs require 4B alignment unless we're doing a ldc.u8 or ldc.i8. + * In particular, this applies to ldc.u16 which means we either have to + * fall back to two ldc.u8 or use ldc.u32 and shift stuff around to get + * the 16bit value out. Fortunately, nir_lower_mem_access_bit_sizes() + * can handle over-alignment for reads. + */ + if (align == 2 || offset_is_const) { + return (nir_mem_access_size_align) { + .bit_size = 32, + .num_components = 1, + .align = 4, + }; + } else { + assert(align == 1); + return (nir_mem_access_size_align) { + .bit_size = 8, + .num_components = 1, + .align = 1, + }; + } + } else if (chunk_bytes < 4) { return (nir_mem_access_size_align) { .bit_size = chunk_bytes * 8, .num_components = 1,