ir3: Use a linear allocation context for ir3_registers.

Since we don't free registers as we go, we can just allocate them in a
linear gc context that gets freed at ralloc destroy.  Saves 5 pointers of
memory per register for the ralloc overhead.

Fossil replay time for deadspace3 on a debugoptimized build -4.30353% +/-
1.80078% (n=10).

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37316>
This commit is contained in:
Emma Anholt 2025-09-11 12:27:21 -07:00
parent 1b4c2c1566
commit 12fae29ec2
4 changed files with 15 additions and 7 deletions

View file

@ -36,6 +36,7 @@ ir3_create(struct ir3_compiler *compiler, struct ir3_shader_variant *v)
shader->compiler = compiler;
shader->type = v->type;
shader->lin_ctx = linear_context(shader);
list_inithead(&shader->block_list);
list_inithead(&shader->array_list);
@ -605,7 +606,7 @@ ir3_collect_info(struct ir3_shader_variant *v)
static struct ir3_register *
reg_create(struct ir3 *shader, int num, int flags)
{
struct ir3_register *reg = ir3_alloc(shader, sizeof(struct ir3_register));
struct ir3_register *reg = linear_zalloc(shader->lin_ctx, struct ir3_register);
reg->wrmask = 1;
reg->flags = flags;
reg->num = num;

View file

@ -26,6 +26,7 @@ struct ir3_compiler;
struct ir3;
struct ir3_instruction;
struct ir3_block;
struct linear_context;
struct ir3_info {
/* Size in bytes of the shader binary, including NIR constants and
@ -641,6 +642,11 @@ struct ir3 {
struct ir3_compiler *compiler;
mesa_shader_stage type;
/* Ralloc linear context we use for instructions and regs, to reduce
* allocation overhead and pack better than using ralloc directly.
*/
struct linear_ctx *lin_ctx;
DECLARE_ARRAY(struct ir3_instruction *, inputs);
/* Track bary.f (and ldlv) instructions.. this is needed in

View file

@ -76,8 +76,8 @@ alias_srcs(struct ir3_instruction *instr)
struct ir3_register **old_srcs = instr->srcs;
unsigned old_srcs_count = instr->srcs_count;
instr->srcs =
ir3_alloc(instr->block->shader, new_srcs_count * sizeof(instr->srcs[0]));
instr->srcs = linear_alloc_array(instr->block->shader->lin_ctx,
struct ir3_register *, new_srcs_count);
instr->srcs_count = 0;
unsigned num_aliases = 0;

View file

@ -183,14 +183,15 @@ get_merge_set(struct ir3_register *def)
if (def->merge_set)
return def->merge_set;
struct ir3_merge_set *set = ralloc(def, struct ir3_merge_set);
struct ir3_merge_set *set =
linear_alloc(def->instr->block->shader->lin_ctx, struct ir3_merge_set);
set->preferred_reg = ~0;
set->interval_start = ~0;
set->spill_slot = ~0;
set->size = reg_size(def);
set->alignment = (def->flags & IR3_REG_HALF) ? 1 : 2;
set->regs_count = 1;
set->regs = ralloc(set, struct ir3_register *);
set->regs = linear_alloc(def->instr->block->shader->lin_ctx, struct ir3_register *);
set->regs[0] = def;
return set;
@ -204,7 +205,8 @@ merge_merge_sets(struct ir3_merge_set *a, struct ir3_merge_set *b, int b_offset)
return merge_merge_sets(b, a, -b_offset);
struct ir3_register **new_regs =
rzalloc_array(a, struct ir3_register *, a->regs_count + b->regs_count);
linear_zalloc_array(a->regs[0]->instr->block->shader->lin_ctx,
struct ir3_register *, a->regs_count + b->regs_count);
unsigned a_index = 0, b_index = 0, new_index = 0;
for (; a_index < a->regs_count || b_index < b->regs_count; new_index++) {
@ -226,7 +228,6 @@ merge_merge_sets(struct ir3_merge_set *a, struct ir3_merge_set *b, int b_offset)
*/
a->alignment = MAX2(a->alignment, b->alignment);
a->regs_count += b->regs_count;
ralloc_free(a->regs);
a->regs = new_regs;
a->size = MAX2(a->size, b->size + b_offset);