From 879a569884b153be4e90e4f47ef7d66d860a6a5d Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Wed, 7 Jul 2021 10:30:05 -0700 Subject: [PATCH] nir: Switch from ralloc to malloc for NIR instructions. By replacing the 48-byte ralloc header with our exec_node gc_node (16 bytes), runtime of shader-db on my system across this series drops -4.21738% +/- 1.47757% (n=5). Inspired by discussion on #5034. Reviewed-by: Matt Turner Part-of: --- src/compiler/nir/nir.c | 95 +++++++++++++++------ src/compiler/nir/nir_clone.c | 8 +- src/compiler/nir/nir_control_flow.c | 1 + src/compiler/nir/nir_lower_image.c | 2 +- src/compiler/nir/nir_lower_locals_to_regs.c | 2 +- src/compiler/nir/nir_serialize.c | 4 +- src/compiler/nir/nir_sweep.c | 8 +- src/gallium/auxiliary/nir/tgsi_to_nir.c | 2 +- 8 files changed, 84 insertions(+), 38 deletions(-) diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index c6bc24b0dd4..43466ceb64a 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -100,6 +100,17 @@ nir_component_mask_reinterpret(nir_component_mask_t mask, return new_mask; } +static void +nir_shader_destructor(void *ptr) +{ + nir_shader *shader = ptr; + + /* Free all instrs from the shader, since they're not ralloced. */ + list_for_each_entry_safe(nir_instr, instr, &shader->gc_list, gc_node) { + nir_instr_free(instr); + } +} + nir_shader * nir_shader_create(void *mem_ctx, gl_shader_stage stage, @@ -107,6 +118,7 @@ nir_shader_create(void *mem_ctx, shader_info *si) { nir_shader *shader = rzalloc(mem_ctx, nir_shader); + ralloc_set_destructor(shader, nir_shader_destructor); exec_list_make_empty(&shader->variables); @@ -339,7 +351,7 @@ void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx) dest->reg.base_offset = src->reg.base_offset; dest->reg.reg = src->reg.reg; if (src->reg.indirect) { - dest->reg.indirect = ralloc(mem_ctx, nir_src); + dest->reg.indirect = malloc(sizeof(nir_src)); nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx); } else { dest->reg.indirect = NULL; @@ -357,7 +369,7 @@ void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr) dest->reg.base_offset = src->reg.base_offset; dest->reg.reg = src->reg.reg; if (src->reg.indirect) { - dest->reg.indirect = ralloc(instr, nir_src); + dest->reg.indirect = malloc(sizeof(nir_src)); nir_src_copy(dest->reg.indirect, src->reg.indirect, instr); } else { dest->reg.indirect = NULL; @@ -567,10 +579,8 @@ nir_alu_instr * nir_alu_instr_create(nir_shader *shader, nir_op op) { unsigned num_srcs = nir_op_infos[op].num_inputs; - /* TODO: don't use rzalloc */ - nir_alu_instr *instr = - rzalloc_size(shader, - sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src)); + /* TODO: don't use calloc */ + nir_alu_instr *instr = calloc(1, sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src)); instr_init(&instr->instr, nir_instr_type_alu); instr->op = op; @@ -586,8 +596,7 @@ nir_alu_instr_create(nir_shader *shader, nir_op op) nir_deref_instr * nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type) { - nir_deref_instr *instr = - rzalloc_size(shader, sizeof(nir_deref_instr)); + nir_deref_instr *instr = calloc(1, sizeof(*instr)); instr_init(&instr->instr, nir_instr_type_deref); @@ -609,7 +618,7 @@ nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type) nir_jump_instr * nir_jump_instr_create(nir_shader *shader, nir_jump_type type) { - nir_jump_instr *instr = ralloc(shader, nir_jump_instr); + nir_jump_instr *instr = malloc(sizeof(*instr)); instr_init(&instr->instr, nir_instr_type_jump); src_init(&instr->condition); instr->type = type; @@ -626,7 +635,7 @@ nir_load_const_instr_create(nir_shader *shader, unsigned num_components, unsigned bit_size) { nir_load_const_instr *instr = - rzalloc_size(shader, sizeof(*instr) + num_components * sizeof(*instr->value)); + calloc(1, sizeof(*instr) + num_components * sizeof(*instr->value)); instr_init(&instr->instr, nir_instr_type_load_const); nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size); @@ -640,10 +649,9 @@ nir_intrinsic_instr * nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op) { unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; - /* TODO: don't use rzalloc */ + /* TODO: don't use calloc */ nir_intrinsic_instr *instr = - rzalloc_size(shader, - sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src)); + calloc(1, sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src)); instr_init(&instr->instr, nir_instr_type_intrinsic); instr->intrinsic = op; @@ -664,8 +672,7 @@ nir_call_instr_create(nir_shader *shader, nir_function *callee) { const unsigned num_params = callee->num_params; nir_call_instr *instr = - rzalloc_size(shader, sizeof(*instr) + - num_params * sizeof(instr->params[0])); + calloc(1, sizeof(*instr) + num_params * sizeof(instr->params[0])); instr_init(&instr->instr, nir_instr_type_call); instr->callee = callee; @@ -689,13 +696,13 @@ static int8_t default_tg4_offsets[4][2] = nir_tex_instr * nir_tex_instr_create(nir_shader *shader, unsigned num_srcs) { - nir_tex_instr *instr = rzalloc(shader, nir_tex_instr); + nir_tex_instr *instr = calloc(1, sizeof(*instr)); instr_init(&instr->instr, nir_instr_type_tex); dest_init(&instr->dest); instr->num_srcs = num_srcs; - instr->src = ralloc_array(instr, nir_tex_src, num_srcs); + instr->src = malloc(sizeof(nir_tex_src) * num_srcs); for (unsigned i = 0; i < num_srcs; i++) src_init(&instr->src[i].src); @@ -713,7 +720,7 @@ nir_tex_instr_add_src(nir_tex_instr *tex, nir_tex_src_type src_type, nir_src src) { - nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src, + nir_tex_src *new_srcs = calloc(sizeof(*new_srcs), tex->num_srcs + 1); for (unsigned i = 0; i < tex->num_srcs; i++) { @@ -722,7 +729,7 @@ nir_tex_instr_add_src(nir_tex_instr *tex, &tex->src[i].src); } - ralloc_free(tex->src); + free(tex->src); tex->src = new_srcs; tex->src[tex->num_srcs].src_type = src_type; @@ -758,7 +765,7 @@ nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex) nir_phi_instr * nir_phi_instr_create(nir_shader *shader) { - nir_phi_instr *instr = ralloc(shader, nir_phi_instr); + nir_phi_instr *instr = malloc(sizeof(*instr)); instr_init(&instr->instr, nir_instr_type_phi); dest_init(&instr->dest); @@ -782,7 +789,7 @@ nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src) { nir_phi_src *phi_src; - phi_src = rzalloc(instr, nir_phi_src); + phi_src = calloc(1, sizeof(nir_phi_src)); phi_src->pred = pred; phi_src->src = src; phi_src->src.parent_instr = &instr->instr; @@ -794,7 +801,7 @@ nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src) nir_parallel_copy_instr * nir_parallel_copy_instr_create(nir_shader *shader) { - nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr); + nir_parallel_copy_instr *instr = malloc(sizeof(*instr)); instr_init(&instr->instr, nir_instr_type_parallel_copy); exec_list_make_empty(&instr->entries); @@ -809,7 +816,7 @@ nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components, unsigned bit_size) { - nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr); + nir_ssa_undef_instr *instr = malloc(sizeof(*instr)); instr_init(&instr->instr, nir_instr_type_ssa_undef); nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size); @@ -1115,10 +1122,50 @@ void nir_instr_remove_v(nir_instr *instr) } } +static bool nir_instr_free_src_indirects(nir_src *src, void *state) +{ + if (!src->is_ssa && src->reg.indirect) { + assert(src->reg.indirect->is_ssa || !src->reg.indirect->reg.indirect); + free(src->reg.indirect); + src->reg.indirect = NULL; + } + return true; +} + +static bool nir_instr_free_dest_indirects(nir_dest *dest, void *state) +{ + if (!dest->is_ssa && dest->reg.indirect) { + assert(dest->reg.indirect->is_ssa || !dest->reg.indirect->reg.indirect); + free(dest->reg.indirect); + dest->reg.indirect = NULL; + } + return true; +} + void nir_instr_free(nir_instr *instr) { + nir_foreach_src(instr, nir_instr_free_src_indirects, NULL); + nir_foreach_dest(instr, nir_instr_free_dest_indirects, NULL); + + switch (instr->type) { + case nir_instr_type_tex: + free(nir_instr_as_tex(instr)->src); + break; + + case nir_instr_type_phi: { + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_foreach_phi_src_safe(phi_src, phi) { + free(phi_src); + } + break; + } + + default: + break; + } + list_del(&instr->gc_node); - ralloc_free(instr); + free(instr); } void diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c index d1bd0e5fa3d..fd7184fddce 100644 --- a/src/compiler/nir/nir_clone.c +++ b/src/compiler/nir/nir_clone.c @@ -243,7 +243,7 @@ __clone_src(clone_state *state, void *ninstr_or_if, } else { nsrc->reg.reg = remap_reg(state, src->reg.reg); if (src->reg.indirect) { - nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src); + nsrc->reg.indirect = malloc(sizeof(nir_src)); __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect); } nsrc->reg.base_offset = src->reg.base_offset; @@ -263,7 +263,7 @@ __clone_dst(clone_state *state, nir_instr *ninstr, } else { ndst->reg.reg = remap_reg(state, dst->reg.reg); if (dst->reg.indirect) { - ndst->reg.indirect = ralloc(ninstr, nir_src); + ndst->reg.indirect = malloc(sizeof(nir_src)); __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect); } ndst->reg.base_offset = dst->reg.base_offset; @@ -790,6 +790,10 @@ nir_shader_replace(nir_shader *dst, nir_shader *src) ralloc_adopt(dead_ctx, dst); ralloc_free(dead_ctx); + list_for_each_entry_safe(nir_instr, instr, &dst->gc_list, gc_node) { + nir_instr_free(instr); + } + /* Re-parent all of src's ralloc children to dst */ ralloc_adopt(dst, src); diff --git a/src/compiler/nir/nir_control_flow.c b/src/compiler/nir/nir_control_flow.c index 433aa18a39a..dd5e7982a20 100644 --- a/src/compiler/nir/nir_control_flow.c +++ b/src/compiler/nir/nir_control_flow.c @@ -440,6 +440,7 @@ remove_phi_src(nir_block *block, nir_block *pred) if (src->pred == pred) { list_del(&src->src.use_link); exec_node_remove(&src->node); + free(src); } } } diff --git a/src/compiler/nir/nir_lower_image.c b/src/compiler/nir/nir_lower_image.c index 2a53c1972b5..946ddc6cd92 100644 --- a/src/compiler/nir/nir_lower_image.c +++ b/src/compiler/nir/nir_lower_image.c @@ -58,7 +58,7 @@ lower_cube_size(nir_builder *b, nir_intrinsic_instr *intrin) nir_ssa_def *vec = nir_vec(b, comps, intrin->dest.ssa.num_components); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, vec); nir_instr_remove(&intrin->instr); - ralloc_free(&intrin->instr); + nir_instr_free(&intrin->instr); } static bool diff --git a/src/compiler/nir/nir_lower_locals_to_regs.c b/src/compiler/nir/nir_lower_locals_to_regs.c index cddc49bc37d..4aa00156545 100644 --- a/src/compiler/nir/nir_lower_locals_to_regs.c +++ b/src/compiler/nir/nir_lower_locals_to_regs.c @@ -159,7 +159,7 @@ get_deref_reg_src(nir_deref_instr *deref, struct locals_to_regs_state *state) if (src.reg.indirect) { assert(src.reg.base_offset == 0); } else { - src.reg.indirect = ralloc(b->shader, nir_src); + src.reg.indirect = malloc(sizeof(nir_src)); *src.reg.indirect = nir_src_for_ssa(nir_imm_int(b, src.reg.base_offset)); src.reg.base_offset = 0; diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c index 8de119a18dc..7b8cba6ce2a 100644 --- a/src/compiler/nir/nir_serialize.c +++ b/src/compiler/nir/nir_serialize.c @@ -550,7 +550,7 @@ read_src(read_ctx *ctx, nir_src *src, void *mem_ctx) src->reg.reg = read_lookup_object(ctx, header.any.object_idx); src->reg.base_offset = blob_read_uint32(ctx->blob); if (header.any.is_indirect) { - src->reg.indirect = ralloc(mem_ctx, nir_src); + src->reg.indirect = malloc(sizeof(nir_src)); read_src(ctx, src->reg.indirect, mem_ctx); } else { src->reg.indirect = NULL; @@ -770,7 +770,7 @@ read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr, dst->reg.reg = read_object(ctx); dst->reg.base_offset = blob_read_uint32(ctx->blob); if (dest.reg.is_indirect) { - dst->reg.indirect = ralloc(instr, nir_src); + dst->reg.indirect = malloc(sizeof(nir_src)); read_src(ctx, dst->reg.indirect, instr); } } diff --git a/src/compiler/nir/nir_sweep.c b/src/compiler/nir/nir_sweep.c index 09fbddb5cd0..812a450fdb8 100644 --- a/src/compiler/nir/nir_sweep.c +++ b/src/compiler/nir/nir_sweep.c @@ -76,8 +76,6 @@ sweep_block(nir_shader *nir, nir_block *block) list_del(&instr->gc_node); list_add(&instr->gc_node, &nir->gc_list); - ralloc_steal(nir, instr); - nir_foreach_src(instr, sweep_src_indirect, nir); nir_foreach_dest(instr, sweep_dest_indirect, nir); } @@ -179,11 +177,7 @@ nir_sweep(nir_shader *nir) sweep_function(nir, func); } - /* Manually GCed instrs now before ralloc_free()ing the other rubbish, to - * ensure that the shader's GC list was maintained without ralloc_freeing any - * instrs behind our back. Note that the instr free routine will remove it - * from the list. - */ + /* Sweep instrs not found while walking the shader. */ list_for_each_entry_safe(nir_instr, instr, &instr_gc_list, gc_node) { nir_instr_free(instr); } diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index df3d5b84f2f..58841029c18 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -823,7 +823,7 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst) dest.saturate = false; if (tgsi_dst->Indirect && (tgsi_dst->File != TGSI_FILE_TEMPORARY)) { - nir_src *indirect = ralloc(c->build.shader, nir_src); + nir_src *indirect = malloc(sizeof(nir_src)); *indirect = nir_src_for_ssa(ttn_src_for_indirect(c, &tgsi_fdst->Indirect)); dest.dest.reg.indirect = indirect; }