From f5ea8b9a0bfd6ac28da94e46f17f12cdedbc88d1 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 22 Jan 2026 18:00:24 -0500 Subject: [PATCH] ir3: Split out max_branchstack and branchstack_size One is the maximum size per wave and the other is the size per uSP (i.e. "core"). They happened to be the same before, but actually they are different limits in the HW. Part-of: --- src/freedreno/ir3/ir3.c | 4 ++-- src/freedreno/ir3/ir3_compiler.c | 1 + src/freedreno/ir3/ir3_compiler.h | 3 +++ src/freedreno/ir3/ir3_shader.h | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 43c16c4bea5..81d595f9ab5 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -209,12 +209,12 @@ ir3_should_double_threadsize(struct ir3_shader_variant *v, unsigned regs_count) if (v->shader_options.real_wavesize == IR3_DOUBLE_ONLY) return true; - /* We can't support more than compiler->branchstack_size diverging threads + /* We can't support more than compiler->max_branchstack diverging threads * in a wave. Thus, doubling the threadsize is only possible if we don't * exceed the branchstack size limit. */ if (MIN2(v->branchstack, compiler->info->threadsize_base * 2) > - compiler->branchstack_size) { + compiler->max_branchstack) { return false; } diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index d6d3ff07565..b13a1557238 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -167,6 +167,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, /* TODO see if older GPU's were different here */ compiler->branchstack_size = 64; + compiler->max_branchstack = 64; compiler->max_variable_workgroup_size = 1024; diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 601f0e5822f..6bdb00ff519 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -164,6 +164,9 @@ struct ir3_compiler { /* The number of total branch stack entries, divided by wave_granularity. */ uint32_t branchstack_size; + /* The maximum number of branch stack entries per wave. */ + uint32_t max_branchstack; + /* The byte increment of MEMSIZEPERITEM, the private memory per-fiber allocation. */ uint32_t pvtmem_per_fiber_align; diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index f1c3c61b6da..845a26a02cc 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -1458,7 +1458,7 @@ ir3_shader_branchstack_hw(const struct ir3_shader_variant *v) if (v->compiler->gen < 5) return v->branchstack; - return align(MIN2(v->branchstack, v->compiler->branchstack_size), 2); + return align(MIN2(v->branchstack, v->compiler->max_branchstack), 2); } ENDC;