From 1a882ecdab06d3cf3786698d23eb015c2baa332a Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 16 Jun 2022 19:14:58 -0400 Subject: [PATCH] pan/bi: Align accesses with packed TLS When lowering vars to scratch, we need to be careful with alignment on Valhall, where packed TLS access must not straddle a 16-byte boundary. Fixes regressions when enabling indirect access to temps on Valhall. Fixes: 6761dbf8915 ("panfrost: Use packed TLS on Valhall") Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bifrost_compile.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 4eea93f529a..15f5924cf31 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -4835,9 +4835,19 @@ bi_finalize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend) /* Get rid of any global vars before we lower to scratch. */ NIR_PASS_V(nir, nir_lower_global_vars_to_local); + /* Valhall introduces packed thread local storage, which improves cache + * locality of TLS access. However, access to packed TLS cannot + * straddle 16-byte boundaries. As such, when packed TLS is in use + * (currently unconditional for Valhall), we force vec4 alignment for + * scratch access. + */ + bool packed_tls = (gpu_id >= 0x9000); + /* Lower large arrays to scratch and small arrays to bcsel (TODO: tune * threshold, but not until addresses / csel is optimized better) */ NIR_PASS_V(nir, nir_lower_vars_to_scratch, nir_var_function_temp, 16, + packed_tls ? + glsl_get_vec4_size_align_bytes : glsl_get_natural_size_align_bytes); NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_function_temp, ~0);