From 7d706af76b2c41ee2d496efabcf96ac5c4a61de2 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 14 Jun 2022 20:47:29 +0200 Subject: [PATCH] ir3: Fix vectorizer condition for SSBOs SSBO access works very differently from UBO access. Straddling loads/stores isn't an issue, loads/stores instead must be aligned to the element size and can have up to 4 components. We support 16-bit access with SSBOs on a650+, and sometimes the vectorizer tries to create a misaligned 32-bit access when combining 32-bit and 16-bit accesses. The UBO-focused logic didn't reject this, which is now fixed. This fixes a number of VK-CTS regressions on a650+. Fixes: bf49d4a084b ("freedreno/ir3: Enable load/store vectorization for SSBO access, too.") Part-of: --- src/freedreno/ir3/ir3_nir.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 519a02c0dac..009d5a884d1 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -37,10 +37,17 @@ ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data) { + unsigned byte_size = bit_size / 8; + + if (low->intrinsic != nir_intrinsic_load_ubo) { + return bit_size <= 32 && align_mul >= byte_size && + align_offset % byte_size == 0 && + num_components <= 4; + } + assert(bit_size >= 8); if (bit_size != 32) return false; - unsigned byte_size = bit_size / 8; int size = num_components * byte_size;