brw: Don't use individual rallocs for each instruction

Move from a single ralloc allocation per instruction to contiguous
blocks of allocations.  Still use ralloc for those large blocks.

Each ralloc allocation has at least 5 pointers of overhead, which would
be about a third of the current brw_inst, and get worse as we try to
pack brw_inst better.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36730>
This commit is contained in:
Caio Oliveira 2025-08-09 18:25:47 -07:00 committed by Marge Bot
parent 2506540566
commit c358842c1d
3 changed files with 69 additions and 3 deletions

View file

@ -29,14 +29,51 @@ brw_inst_kind_size(brw_inst_kind kind)
: sizeof(brw_send_inst);
}
static inline unsigned
brw_inst_kind_align(brw_inst_kind kind)
{
STATIC_ASSERT(alignof(brw_send_inst) >= alignof(brw_tex_inst));
STATIC_ASSERT(alignof(brw_send_inst) >= alignof(brw_mem_inst));
STATIC_ASSERT(alignof(brw_send_inst) >= alignof(brw_dpas_inst));
STATIC_ASSERT(alignof(brw_send_inst) >= alignof(brw_load_payload_inst));
STATIC_ASSERT(alignof(brw_send_inst) >= alignof(brw_urb_inst));
STATIC_ASSERT(alignof(brw_send_inst) >= alignof(brw_fb_write_inst));
/* See brw_inst_kind_size(). */
return kind == BRW_KIND_BASE ? alignof(brw_inst)
: alignof(brw_send_inst);
}
static void *
brw_alloc_size(brw_shader &s, unsigned size, unsigned align)
{
unsigned padding = -(uintptr_t)s.inst_arena.beg & (align - 1);
/* If doesn't fit, create a larger one. */
if (s.inst_arena.end - s.inst_arena.beg - padding < size) {
unsigned new_cap = MAX2(s.inst_arena.total_cap / 2, size);
s.inst_arena.beg = (char *)ralloc_size(s.inst_arena.mem_ctx, new_cap);
s.inst_arena.end = s.inst_arena.beg + new_cap;
s.inst_arena.cap = new_cap;
s.inst_arena.total_cap += new_cap;
padding = 0;
}
void *mem = s.inst_arena.beg + padding;
s.inst_arena.beg += padding + size;
return mem;
}
static brw_inst *
brw_alloc_inst(brw_shader &s, brw_inst_kind kind, unsigned num_srcs)
{
const unsigned inst_size = brw_inst_kind_size(kind);
const unsigned inst_align = brw_inst_kind_align(kind);
assert((inst_size % alignof(brw_reg)) == 0);
void *mem = ralloc_size(s.mem_ctx, inst_size + num_srcs * sizeof(brw_reg));
void *mem = brw_alloc_size(s, inst_size + num_srcs * sizeof(brw_reg), inst_align);
memset(mem, 0, inst_size);
brw_inst *inst = (brw_inst *)mem;
@ -131,7 +168,8 @@ brw_transform_inst(brw_shader &s, brw_inst *inst, enum opcode new_opcode,
assert(new_num_sources != UINT_MAX);
if (new_num_sources > inst->sources) {
brw_reg *new_src = ralloc_array(inst, brw_reg, new_num_sources);
brw_reg *new_src = (brw_reg *)
brw_alloc_size(s, sizeof(brw_reg) * new_num_sources, alignof(brw_reg));
for (unsigned i = 0; i < inst->sources; i++)
new_src[i] = inst->src[i];
inst->src = new_src;

View file

@ -443,17 +443,35 @@ brw_shader::brw_shader(const brw_shader_params *params)
this->gs.control_data_bits_per_vertex = 0;
this->gs.control_data_header_size_bits = 0;
if (params->per_primitive_offsets) {
assert(stage == MESA_SHADER_FRAGMENT);
memcpy(this->fs.per_primitive_offsets, params->per_primitive_offsets,
sizeof(this->fs.per_primitive_offsets));
}
{
unsigned inst_count = 0;
if (nir_shader_get_entrypoint(nir)) {
nir_foreach_block(block, nir_shader_get_entrypoint(nir)) {
nir_foreach_instr(instr, block)
inst_count++;
}
}
const unsigned estimate = inst_count * (sizeof(brw_inst) + 2 * sizeof(brw_reg));
inst_arena.mem_ctx = ralloc_context(NULL);
inst_arena.cap = estimate;
inst_arena.beg = (char *) ralloc_size(mem_ctx, inst_arena.cap);
inst_arena.end = inst_arena.beg + inst_arena.cap;
inst_arena.total_cap = inst_arena.cap;
}
}
brw_shader::~brw_shader()
{
delete this->payload_;
ralloc_free(inst_arena.mem_ctx);
}
void

View file

@ -226,6 +226,16 @@ public:
void debug_optimizer(const nir_shader *nir,
const char *pass_name,
int iteration, int pass_num) const;
/* Used to allocate instructions, see brw_new_inst() and brw_clone_inst(). */
struct {
void *mem_ctx;
unsigned cap;
char *beg;
char *end;
unsigned total_cap;
} inst_arena;
};
void brw_print_instructions(const brw_shader &s, FILE *file = stderr);