diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 751f4dccf7a..2b80a3e82e6 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -234,7 +234,7 @@ ir3_should_double_threadsize(struct ir3_shader_variant *v, unsigned regs_count) * correlated with dynamic branching). For fp16 apps, the increased ALU * rate made it worth it regardless. */ - if (uses_significant_16bit_alu(v)) { + if (!v->ir || uses_significant_16bit_alu(v)) { /* Check that doubling the threadsize wouldn't exceed the regfile size */ return regs_count * 2 <= compiler->reg_size_vec4; } else { diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 7f9c2d98753..019a3bb1c7a 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -5663,13 +5663,6 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, ir = so->ir = ctx->ir; - if (mesa_shader_stage_is_compute(so->type)) { - so->local_size[0] = ctx->s->info.workgroup_size[0]; - so->local_size[1] = ctx->s->info.workgroup_size[1]; - so->local_size[2] = ctx->s->info.workgroup_size[2]; - so->local_size_variable = ctx->s->info.workgroup_size_variable; - } - if (so->type == MESA_SHADER_FRAGMENT && so->reads_shading_rate && !so->reads_smask && compiler->reading_shading_rate_requires_smask_quirk) { diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index fbcfd2bde6b..ff1eabb2111 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -11,6 +11,7 @@ #include "ir3_compiler.h" #include "ir3_nir.h" +#include "ir3_ra.h" #include "ir3_shader.h" #include "nir_builtin_builder.h" diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index 059c78e3eec..25286ca82a4 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -2746,6 +2746,33 @@ calc_limit_pressure_for_cs_with_barrier(struct ir3_shader_variant *v, } } +struct ir3_pressure +ir3_ra_get_reg_file_limits(struct ir3_shader_variant *v) +{ + struct ir3_pressure limit_pressure = { + .full = RA_FULL_SIZE, + .half = RA_HALF_SIZE, + .shared = RA_SHARED_SIZE, + .shared_half = RA_SHARED_HALF_SIZE, + }; + + if (mesa_shader_stage_is_compute(v->type) && + v->shader->nir->info.uses_control_barrier) { + calc_limit_pressure_for_cs_with_barrier(v, &limit_pressure); + } + + /* If the user forces a doubled threadsize, we may have to lower the limit + * because on some gens the register file is not big enough to hold a + * double-size wave with all 48 registers in use. + */ + if (v->shader_options.real_wavesize == IR3_DOUBLE_ONLY) { + limit_pressure.full = + MAX2(limit_pressure.full, v->compiler->reg_size_vec4 / 2 * 16); + } + + return limit_pressure; +} + int ir3_ra(struct ir3_shader_variant *v) { @@ -2788,24 +2815,7 @@ ir3_ra(struct ir3_shader_variant *v) d("\thalf: %u", max_pressure.half); d("\tshared: %u", max_pressure.shared); - struct ir3_pressure limit_pressure; - limit_pressure.full = RA_FULL_SIZE; - limit_pressure.half = RA_HALF_SIZE; - limit_pressure.shared = RA_SHARED_SIZE; - limit_pressure.shared_half = RA_SHARED_HALF_SIZE; - - if (mesa_shader_stage_is_compute(v->type) && v->has_barrier) { - calc_limit_pressure_for_cs_with_barrier(v, &limit_pressure); - } - - /* If the user forces a doubled threadsize, we may have to lower the limit - * because on some gens the register file is not big enough to hold a - * double-size wave with all 48 registers in use. - */ - if (v->shader_options.real_wavesize == IR3_DOUBLE_ONLY) { - limit_pressure.full = - MAX2(limit_pressure.full, ctx->compiler->reg_size_vec4 / 2 * 16); - } + struct ir3_pressure limit_pressure = ir3_ra_get_reg_file_limits(v); /* If requested, lower the limit so that spilling happens more often. */ if (ir3_shader_debug & IR3_DBG_SPILLALL) diff --git a/src/freedreno/ir3/ir3_ra.h b/src/freedreno/ir3/ir3_ra.h index d87d1d4bb3e..0c9449bce02 100644 --- a/src/freedreno/ir3/ir3_ra.h +++ b/src/freedreno/ir3/ir3_ra.h @@ -164,9 +164,12 @@ void ir3_update_merge_sets_index(struct ir3_liveness *live, struct ir3 *ir); void ir3_index_instrs_for_merge_sets(struct ir3 *ir); struct ir3_pressure { + /* Register number limits for RA, in units of half regs. */ unsigned full, half, shared, shared_half; }; +struct ir3_pressure ir3_ra_get_reg_file_limits(struct ir3_shader_variant *so); + void ir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live, struct ir3_pressure *max_pressure); diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 21792dba332..5376f2b0d8a 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -621,6 +621,11 @@ create_variant(struct ir3_shader *shader, const struct ir3_shader_key *key, if (v->type == MESA_SHADER_COMPUTE || v->type == MESA_SHADER_KERNEL) { v->cs.force_linear_dispatch = shader->cs.force_linear_dispatch; + + v->local_size[0] = shader->nir->info.workgroup_size[0]; + v->local_size[1] = shader->nir->info.workgroup_size[1]; + v->local_size[2] = shader->nir->info.workgroup_size[2]; + v->local_size_variable = shader->nir->info.workgroup_size_variable; } struct ir3_const_state *const_state = ir3_const_state_mut(v);