From 12fae29ec2d0760ecbe6314e09e2c3db8e893ad8 Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Thu, 11 Sep 2025 12:27:21 -0700 Subject: [PATCH] ir3: Use a linear allocation context for ir3_registers. Since we don't free registers as we go, we can just allocate them in a linear gc context that gets freed at ralloc destroy. Saves 5 pointers of memory per register for the ralloc overhead. Fossil replay time for deadspace3 on a debugoptimized build -4.30353% +/- 1.80078% (n=10). Part-of: --- src/freedreno/ir3/ir3.c | 3 ++- src/freedreno/ir3/ir3.h | 6 ++++++ src/freedreno/ir3/ir3_alias.c | 4 ++-- src/freedreno/ir3/ir3_merge_regs.c | 9 +++++---- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 144e82dc5c5..f15ce67ce9f 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -36,6 +36,7 @@ ir3_create(struct ir3_compiler *compiler, struct ir3_shader_variant *v) shader->compiler = compiler; shader->type = v->type; + shader->lin_ctx = linear_context(shader); list_inithead(&shader->block_list); list_inithead(&shader->array_list); @@ -605,7 +606,7 @@ ir3_collect_info(struct ir3_shader_variant *v) static struct ir3_register * reg_create(struct ir3 *shader, int num, int flags) { - struct ir3_register *reg = ir3_alloc(shader, sizeof(struct ir3_register)); + struct ir3_register *reg = linear_zalloc(shader->lin_ctx, struct ir3_register); reg->wrmask = 1; reg->flags = flags; reg->num = num; diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 8c2a9289ec6..e14838cfce1 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -26,6 +26,7 @@ struct ir3_compiler; struct ir3; struct ir3_instruction; struct ir3_block; +struct linear_context; struct ir3_info { /* Size in bytes of the shader binary, including NIR constants and @@ -641,6 +642,11 @@ struct ir3 { struct ir3_compiler *compiler; mesa_shader_stage type; + /* Ralloc linear context we use for instructions and regs, to reduce + * allocation overhead and pack better than using ralloc directly. + */ + struct linear_ctx *lin_ctx; + DECLARE_ARRAY(struct ir3_instruction *, inputs); /* Track bary.f (and ldlv) instructions.. this is needed in diff --git a/src/freedreno/ir3/ir3_alias.c b/src/freedreno/ir3/ir3_alias.c index 31299eca97e..dc07ba16f8f 100644 --- a/src/freedreno/ir3/ir3_alias.c +++ b/src/freedreno/ir3/ir3_alias.c @@ -76,8 +76,8 @@ alias_srcs(struct ir3_instruction *instr) struct ir3_register **old_srcs = instr->srcs; unsigned old_srcs_count = instr->srcs_count; - instr->srcs = - ir3_alloc(instr->block->shader, new_srcs_count * sizeof(instr->srcs[0])); + instr->srcs = linear_alloc_array(instr->block->shader->lin_ctx, + struct ir3_register *, new_srcs_count); instr->srcs_count = 0; unsigned num_aliases = 0; diff --git a/src/freedreno/ir3/ir3_merge_regs.c b/src/freedreno/ir3/ir3_merge_regs.c index dba68af0d37..593790a4a60 100644 --- a/src/freedreno/ir3/ir3_merge_regs.c +++ b/src/freedreno/ir3/ir3_merge_regs.c @@ -183,14 +183,15 @@ get_merge_set(struct ir3_register *def) if (def->merge_set) return def->merge_set; - struct ir3_merge_set *set = ralloc(def, struct ir3_merge_set); + struct ir3_merge_set *set = + linear_alloc(def->instr->block->shader->lin_ctx, struct ir3_merge_set); set->preferred_reg = ~0; set->interval_start = ~0; set->spill_slot = ~0; set->size = reg_size(def); set->alignment = (def->flags & IR3_REG_HALF) ? 1 : 2; set->regs_count = 1; - set->regs = ralloc(set, struct ir3_register *); + set->regs = linear_alloc(def->instr->block->shader->lin_ctx, struct ir3_register *); set->regs[0] = def; return set; @@ -204,7 +205,8 @@ merge_merge_sets(struct ir3_merge_set *a, struct ir3_merge_set *b, int b_offset) return merge_merge_sets(b, a, -b_offset); struct ir3_register **new_regs = - rzalloc_array(a, struct ir3_register *, a->regs_count + b->regs_count); + linear_zalloc_array(a->regs[0]->instr->block->shader->lin_ctx, + struct ir3_register *, a->regs_count + b->regs_count); unsigned a_index = 0, b_index = 0, new_index = 0; for (; a_index < a->regs_count || b_index < b->regs_count; new_index++) { @@ -226,7 +228,6 @@ merge_merge_sets(struct ir3_merge_set *a, struct ir3_merge_set *b, int b_offset) */ a->alignment = MAX2(a->alignment, b->alignment); a->regs_count += b->regs_count; - ralloc_free(a->regs); a->regs = new_regs; a->size = MAX2(a->size, b->size + b_offset);