From e7eed458697cf247b211172547fcbb08294b0bff Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Mon, 29 Mar 2021 13:55:46 +0300 Subject: [PATCH] ir3: do not double threadsize when exceeding branchstack limit We can't support more than compiler->branchstack_size diverging threads in a wave. Thus, doubling the threadsize is only possible if we don't exceed the branchstack size limit. As of blob version 512.490.0 - it doesn't have this heuristics. Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/ir3/ir3.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 804acc761b5..9dca30d54f6 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -112,6 +112,16 @@ should_double_threadsize(struct ir3_shader_variant *v, unsigned regs_count) { const struct ir3_compiler *compiler = v->shader->compiler; + + /* We can't support more than compiler->branchstack_size diverging threads + * in a wave. Thus, doubling the threadsize is only possible if we don't + * exceed the branchstack size limit. + */ + if (MIN2(v->branchstack, compiler->threadsize_base * 2) > + compiler->branchstack_size) { + return false; + } + switch (v->type) { case MESA_SHADER_COMPUTE: { unsigned threads_per_wg = v->local_size[0] * v->local_size[1] * v->local_size[2];