ir3: simplify constlen calculation

Instead of inferring constlen from the usage of const registers by
various instructions, we can calculate it directly from the const file
allocations. This greatly simplifies the calculation of constlen.

Note that the increase in constlen comes from a few binning variants.
This doesn't matter as the constlen of the corresponding non-binning
variant is used for those anyway.

Totals from 73 (0.04% of 176258) affected shaders:
Constlen: 3428 -> 3720 (+8.52%)

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40929>
This commit is contained in:
Job Noorman 2026-04-13 16:47:01 +02:00 committed by Marge Bot
parent 59d9bc7bee
commit 86f3c0c4c2
4 changed files with 8 additions and 60 deletions

View file

@ -1331,10 +1331,6 @@ ir3_store_const(struct ir3_shader_variant *so, struct ir3_builder *build,
stc->flags |= IR3_INSTR_A1EN; stc->flags |= IR3_INSTR_A1EN;
} }
/* The assembler isn't aware of what value a1.x has, so make sure that
* constlen includes the stc here.
*/
so->constlen = MAX2(so->constlen, DIV_ROUND_UP(dst + components, 4));
struct ir3_block *block = ir3_cursor_current_block(build->cursor); struct ir3_block *block = ir3_cursor_current_block(build->cursor);
array_insert(block, block->keeps, stc); array_insert(block, block->keeps, stc);
return stc; return stc;

View file

@ -1231,12 +1231,6 @@ emit_intrinsic_copy_ubo_to_uniform(struct ir3_context *ctx,
ir3_instr_set_address(ldc, addr1); ir3_instr_set_address(ldc, addr1);
/* The assembler isn't aware of what value a1.x has, so make sure that
* constlen includes the ldc.k here.
*/
ctx->so->constlen =
MAX2(ctx->so->constlen, DIV_ROUND_UP(base + size * 4, 4));
array_insert(ctx->block, ctx->block->keeps, ldc); array_insert(ctx->block, ctx->block->keeps, ldc);
} }
@ -1269,12 +1263,6 @@ emit_intrinsic_copy_global_to_uniform(struct ir3_context *ctx,
ldg->flags |= IR3_INSTR_A1EN; ldg->flags |= IR3_INSTR_A1EN;
} }
/* The assembler isn't aware of what value a1.x has, so make sure that
* constlen includes the ldg.k here.
*/
ctx->so->constlen =
MAX2(ctx->so->constlen, DIV_ROUND_UP(dst + size * 4, 4));
array_insert(ctx->block, ctx->block->keeps, ldg); array_insert(ctx->block, ctx->block->keeps, ldg);
} }
@ -1302,15 +1290,6 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
ir3_get_addr0(ctx, src0, ptrsz)); ir3_get_addr0(ctx, src0, ptrsz));
base_hi = create_uniform_indirect(b, ubo + 1, TYPE_U32, base_hi = create_uniform_indirect(b, ubo + 1, TYPE_U32,
ir3_get_addr0(ctx, src0, ptrsz)); ir3_get_addr0(ctx, src0, ptrsz));
/* NOTE: since relative addressing is used, make sure constlen is
* at least big enough to cover all the UBO addresses, since the
* assembler won't know what the max address reg is.
*/
ctx->so->constlen = MAX2(
ctx->so->constlen,
const_state->allocs.consts[IR3_CONST_ALLOC_UBO_PTRS].offset_vec4 +
(ctx->s->info.num_ubos * ptrsz));
} }
/* note: on 32bit gpu's base_hi is ignored and DCE'd */ /* note: on 32bit gpu's base_hi is ignored and DCE'd */
@ -3253,10 +3232,6 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
dst[i] = create_driver_param_indirect(ctx, param + i, dst[i] = create_driver_param_indirect(ctx, param + i,
ir3_get_addr0(ctx, view, 8)); ir3_get_addr0(ctx, view, 8));
} }
ctx->so->constlen =
MAX2(ctx->so->constlen,
const_state->allocs.consts[IR3_CONST_ALLOC_DRIVER_PARAMS].offset_vec4 +
param / 4 + nir_intrinsic_range(intr) * 2);
} }
break; break;
} }
@ -3502,11 +3477,6 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
load->push_consts.dst_base = nir_src_as_uint(intr->src[0]); load->push_consts.dst_base = nir_src_as_uint(intr->src[0]);
load->push_consts.src_base = nir_intrinsic_base(intr); load->push_consts.src_base = nir_intrinsic_base(intr);
load->push_consts.src_size = nir_intrinsic_range(intr); load->push_consts.src_size = nir_intrinsic_range(intr);
ctx->so->constlen =
MAX2(ctx->so->constlen,
DIV_ROUND_UP(
load->push_consts.dst_base + load->push_consts.src_size, 4));
break; break;
} }
case nir_intrinsic_prefetch_sam_ir3: { case nir_intrinsic_prefetch_sam_ir3: {
@ -6252,26 +6222,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
ctx->so->sample_shading = ctx->s->info.fs.uses_sample_shading; ctx->so->sample_shading = ctx->s->info.fs.uses_sample_shading;
if (ctx->has_relative_load_const_ir3) { so->constlen = ir3_constlen(so);
/* NOTE: if relative addressing is used, we set
* constlen in the compiler (to worst-case value)
* since we don't know in the assembler what the max
* addr reg value can be:
*/
const struct ir3_const_state *const_state = ir3_const_state(ctx->so);
const enum ir3_const_alloc_type rel_const_srcs[] = {
IR3_CONST_ALLOC_INLINE_UNIFORM_ADDRS, IR3_CONST_ALLOC_UBO_RANGES,
IR3_CONST_ALLOC_PREAMBLE, IR3_CONST_ALLOC_GLOBAL};
for (int i = 0; i < ARRAY_SIZE(rel_const_srcs); i++) {
const struct ir3_const_allocation *const_alloc =
&const_state->allocs.consts[rel_const_srcs[i]];
if (const_alloc->size_vec4 > 0) {
ctx->so->constlen =
MAX2(ctx->so->constlen,
const_alloc->offset_vec4 + const_alloc->size_vec4);
}
}
}
if (ctx->so->type == MESA_SHADER_FRAGMENT && if (ctx->so->type == MESA_SHADER_FRAGMENT &&
compiler->info->props.fs_must_have_non_zero_constlen_quirk) { compiler->info->props.fs_must_have_non_zero_constlen_quirk) {

View file

@ -156,12 +156,6 @@ ir3_shader_assemble(struct ir3_shader_variant *v)
memcpy(&bin[info->constant_data_offset / 4], v->constant_data, memcpy(&bin[info->constant_data_offset / 4], v->constant_data,
v->constant_data_size); v->constant_data_size);
/* NOTE: if relative addressing is used, we set constlen in
* the compiler (to worst-case value) since we don't know in
* the assembler what the max addr reg value can be:
*/
v->constlen = MAX2(v->constlen, info->max_const + 1);
const struct ir3_const_state *const_state = ir3_const_state(v); const struct ir3_const_state *const_state = ir3_const_state(v);
if (ir3_const_can_upload(&const_state->allocs, IR3_CONST_ALLOC_DRIVER_PARAMS, if (ir3_const_can_upload(&const_state->allocs, IR3_CONST_ALLOC_DRIVER_PARAMS,
v->constlen) || v->constlen) ||

View file

@ -1081,6 +1081,13 @@ ir3_const_state_mut(const struct ir3_shader_variant *v)
return v->const_state; return v->const_state;
} }
static inline unsigned
ir3_constlen(const struct ir3_shader_variant *v)
{
return ir3_const_state(v)->allocs.max_const_offset_vec4 +
v->imm_state.size / 4;
}
static inline unsigned static inline unsigned
ir3_max_const_compute(const struct ir3_shader_variant *v, ir3_max_const_compute(const struct ir3_shader_variant *v,
const struct ir3_compiler *compiler) const struct ir3_compiler *compiler)