diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 43c16c4bea5..81d595f9ab5 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -209,12 +209,12 @@ ir3_should_double_threadsize(struct ir3_shader_variant *v, unsigned regs_count) if (v->shader_options.real_wavesize == IR3_DOUBLE_ONLY) return true; - /* We can't support more than compiler->branchstack_size diverging threads + /* We can't support more than compiler->max_branchstack diverging threads * in a wave. Thus, doubling the threadsize is only possible if we don't * exceed the branchstack size limit. */ if (MIN2(v->branchstack, compiler->info->threadsize_base * 2) > - compiler->branchstack_size) { + compiler->max_branchstack) { return false; } diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index d6d3ff07565..b13a1557238 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -167,6 +167,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, /* TODO see if older GPU's were different here */ compiler->branchstack_size = 64; + compiler->max_branchstack = 64; compiler->max_variable_workgroup_size = 1024; diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 601f0e5822f..6bdb00ff519 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -164,6 +164,9 @@ struct ir3_compiler { /* The number of total branch stack entries, divided by wave_granularity. */ uint32_t branchstack_size; + /* The maximum number of branch stack entries per wave. */ + uint32_t max_branchstack; + /* The byte increment of MEMSIZEPERITEM, the private memory per-fiber allocation. */ uint32_t pvtmem_per_fiber_align; diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index f1c3c61b6da..845a26a02cc 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -1458,7 +1458,7 @@ ir3_shader_branchstack_hw(const struct ir3_shader_variant *v) if (v->compiler->gen < 5) return v->branchstack; - return align(MIN2(v->branchstack, v->compiler->branchstack_size), 2); + return align(MIN2(v->branchstack, v->compiler->max_branchstack), 2); } ENDC;