From 746f2986ece6d8a4eb40486e057a83c14547ba36 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Mon, 28 Apr 2025 19:34:05 -0400 Subject: [PATCH] ir3: Take LB restriction on constlen into account on a7xx On a7xx, the max constlen for compute is increased to 512 vec4s or 8KB, however the size of the LB was not increased beyond 40KB. A quick calculation shows that 8KB of consts multiplied by 2 banks plus the API maximum of 32KB shared memory would exceed 40KB. This means that we can't always use a constlen of 512, and sometimes have to fall back to 256 when a lot of shared memory is in use. In the future, we can use similar calculations to figure out how much "extra" shared memory is available for the backend to spill to, but we currently don't support spilling to shared memory. Fixes: 5879eaac185 ("ir3: Increase compute const size on a7xx") Part-of: (cherry picked from commit ea9d694a7b363d66dd9e57bc0f55c5fd903632b2) --- .pick_status.json | 2 +- src/freedreno/ir3/ir3_shader.h | 37 +++++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index a1d38001c78..b4f4536026d 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -54,7 +54,7 @@ "description": "ir3: Take LB restriction on constlen into account on a7xx", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "5879eaac185ed1c167fd01aff9b91c7cbe43ab0a", "notes": null diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 6bffc65a8a0..2e28ffbd5fb 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -1051,6 +1051,41 @@ ir3_const_state_mut(const struct ir3_shader_variant *v) return v->const_state; } +static inline unsigned +ir3_max_const_compute(const struct ir3_shader_variant *v, + const struct ir3_compiler *compiler) +{ + unsigned lm_size = v->local_size_variable ? compiler->local_mem_size : + v->cs.req_local_mem; + + /* The LB is divided between consts and local memory. LB is split into + * wave_granularity banks, to make it possible for different ALUs to access + * it at the same time, and consts are duplicated into each bank so that they + * always take constant time to access while LM is spread across the banks. + * + * We cannot arbitrarily divide LB. Instead only certain configurations, as + * defined by the CONSTANTRAMMODE register field, are allowed. Not sticking + * with the right configuration can result in hangs when multiple compute + * shaders are in flight. We have to limit the constlen so that we can pick a + * configuration where there is enough space for LM. + */ + unsigned lb_const_size = + ((compiler->compute_lb_size - lm_size) / compiler->wave_granularity) / + 16 /* bytes per vec4 */; + if (lb_const_size < compiler->max_const_compute) { + const uint32_t lb_const_sizes[] = { 128, 192, 256, 512 }; + + assert(lb_const_size >= lb_const_sizes[0]); + for (unsigned i = 0; i < ARRAY_SIZE(lb_const_sizes) - 1; i++) { + if (lb_const_size < lb_const_sizes[i + 1]) + return lb_const_sizes[i]; + } + return lb_const_sizes[ARRAY_SIZE(lb_const_sizes) - 1]; + } else { + return compiler->max_const_compute; + } +} + static inline unsigned _ir3_max_const(const struct ir3_shader_variant *v, bool safe_constlen) { @@ -1078,7 +1113,7 @@ _ir3_max_const(const struct ir3_shader_variant *v, bool safe_constlen) if ((v->type == MESA_SHADER_COMPUTE) || (v->type == MESA_SHADER_KERNEL)) { - return compiler->max_const_compute - shared_consts_size; + return ir3_max_const_compute(v, compiler) - shared_consts_size; } else if (safe_constlen) { return compiler->max_const_safe - safe_shared_consts_size; } else if (v->type == MESA_SHADER_FRAGMENT) {