diff --git a/.pick_status.json b/.pick_status.json index a1d38001c78..b4f4536026d 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -54,7 +54,7 @@ "description": "ir3: Take LB restriction on constlen into account on a7xx", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "5879eaac185ed1c167fd01aff9b91c7cbe43ab0a", "notes": null diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 6bffc65a8a0..2e28ffbd5fb 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -1051,6 +1051,41 @@ ir3_const_state_mut(const struct ir3_shader_variant *v) return v->const_state; } +static inline unsigned +ir3_max_const_compute(const struct ir3_shader_variant *v, + const struct ir3_compiler *compiler) +{ + unsigned lm_size = v->local_size_variable ? compiler->local_mem_size : + v->cs.req_local_mem; + + /* The LB is divided between consts and local memory. LB is split into + * wave_granularity banks, to make it possible for different ALUs to access + * it at the same time, and consts are duplicated into each bank so that they + * always take constant time to access while LM is spread across the banks. + * + * We cannot arbitrarily divide LB. Instead only certain configurations, as + * defined by the CONSTANTRAMMODE register field, are allowed. Not sticking + * with the right configuration can result in hangs when multiple compute + * shaders are in flight. We have to limit the constlen so that we can pick a + * configuration where there is enough space for LM. + */ + unsigned lb_const_size = + ((compiler->compute_lb_size - lm_size) / compiler->wave_granularity) / + 16 /* bytes per vec4 */; + if (lb_const_size < compiler->max_const_compute) { + const uint32_t lb_const_sizes[] = { 128, 192, 256, 512 }; + + assert(lb_const_size >= lb_const_sizes[0]); + for (unsigned i = 0; i < ARRAY_SIZE(lb_const_sizes) - 1; i++) { + if (lb_const_size < lb_const_sizes[i + 1]) + return lb_const_sizes[i]; + } + return lb_const_sizes[ARRAY_SIZE(lb_const_sizes) - 1]; + } else { + return compiler->max_const_compute; + } +} + static inline unsigned _ir3_max_const(const struct ir3_shader_variant *v, bool safe_constlen) { @@ -1078,7 +1113,7 @@ _ir3_max_const(const struct ir3_shader_variant *v, bool safe_constlen) if ((v->type == MESA_SHADER_COMPUTE) || (v->type == MESA_SHADER_KERNEL)) { - return compiler->max_const_compute - shared_consts_size; + return ir3_max_const_compute(v, compiler) - shared_consts_size; } else if (safe_constlen) { return compiler->max_const_safe - safe_shared_consts_size; } else if (v->type == MESA_SHADER_FRAGMENT) {