nir: use a GC context for instructions

Gives an roughly -15% change in compile-time for RADV/ACO.

Memory usage increase seems to be 5-6%.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@collabora.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5034
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12910>
This commit is contained in:
Rhys Perry 2021-09-08 15:24:10 +01:00 committed by Marge Bot
parent 69ba1c4d59
commit d09b658dbd
8 changed files with 76 additions and 106 deletions

View file

@ -51,8 +51,6 @@ static const struct debug_named_value nir_debug_control[] = {
"Disable shader validation at each successful lowering/optimization call" },
{ "validate_ssa_dominance", NIR_DEBUG_VALIDATE_SSA_DOMINANCE,
"Validate SSA dominance in shader at each successful lowering/optimization call" },
{ "validate_gc_list", NIR_DEBUG_VALIDATE_GC_LIST,
"Validate the instruction GC list at each successful lowering/optimization call" },
{ "tgsi", NIR_DEBUG_TGSI,
"Dump NIR/TGSI shaders when doing a NIR<->TGSI translation" },
{ "print", NIR_DEBUG_PRINT,
@ -184,17 +182,6 @@ nir_component_mask_reinterpret(nir_component_mask_t mask,
return new_mask;
}
static void
nir_shader_destructor(void *ptr)
{
nir_shader *shader = ptr;
/* Free all instrs from the shader, since they're not ralloced. */
list_for_each_entry_safe(nir_instr, instr, &shader->gc_list, gc_node) {
nir_instr_free(instr);
}
}
nir_shader *
nir_shader_create(void *mem_ctx,
gl_shader_stage stage,
@ -202,7 +189,8 @@ nir_shader_create(void *mem_ctx,
shader_info *si)
{
nir_shader *shader = rzalloc(mem_ctx, nir_shader);
ralloc_set_destructor(shader, nir_shader_destructor);
shader->gctx = gc_context(shader);
#ifndef NDEBUG
nir_process_debug_variable();
@ -221,8 +209,6 @@ nir_shader_create(void *mem_ctx,
exec_list_make_empty(&shader->functions);
list_inithead(&shader->gc_list);
shader->num_inputs = 0;
shader->num_outputs = 0;
shader->num_uniforms = 0;
@ -437,7 +423,7 @@ static void src_free_indirects(nir_src *src)
{
if (src_has_indirect(src)) {
assert(src->reg.indirect->is_ssa || !src->reg.indirect->reg.indirect);
free(src->reg.indirect);
gc_free(src->reg.indirect);
src->reg.indirect = NULL;
}
}
@ -446,13 +432,13 @@ static void dest_free_indirects(nir_dest *dest)
{
if (!dest->is_ssa && dest->reg.indirect) {
assert(dest->reg.indirect->is_ssa || !dest->reg.indirect->reg.indirect);
free(dest->reg.indirect);
gc_free(dest->reg.indirect);
dest->reg.indirect = NULL;
}
}
static void
src_copy(nir_src *dest, const nir_src *src)
src_copy(nir_src *dest, const nir_src *src, gc_ctx *ctx)
{
src_free_indirects(dest);
@ -463,8 +449,8 @@ src_copy(nir_src *dest, const nir_src *src)
dest->reg.base_offset = src->reg.base_offset;
dest->reg.reg = src->reg.reg;
if (src->reg.indirect) {
dest->reg.indirect = calloc(1, sizeof(nir_src));
src_copy(dest->reg.indirect, src->reg.indirect);
dest->reg.indirect = gc_zalloc(ctx, nir_src, 1);
src_copy(dest->reg.indirect, src->reg.indirect, ctx);
} else {
dest->reg.indirect = NULL;
}
@ -476,7 +462,7 @@ src_copy(nir_src *dest, const nir_src *src)
*/
void nir_src_copy(nir_src *dest, const nir_src *src, nir_instr *instr)
{
src_copy(dest, src);
src_copy(dest, src, instr ? gc_get_context(instr) : NULL);
}
void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
@ -491,7 +477,7 @@ void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
dest->reg.base_offset = src->reg.base_offset;
dest->reg.reg = src->reg.reg;
if (src->reg.indirect) {
dest->reg.indirect = calloc(1, sizeof(nir_src));
dest->reg.indirect = gc_zalloc(gc_get_context(instr), nir_src, 1);
nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
} else {
dest->reg.indirect = NULL;
@ -703,7 +689,7 @@ nir_alu_instr_create(nir_shader *shader, nir_op op)
{
unsigned num_srcs = nir_op_infos[op].num_inputs;
/* TODO: don't use calloc */
nir_alu_instr *instr = calloc(1, sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
nir_alu_instr *instr = gc_zalloc_zla(shader->gctx, nir_alu_instr, nir_alu_src, num_srcs);
instr_init(&instr->instr, nir_instr_type_alu);
instr->op = op;
@ -711,15 +697,13 @@ nir_alu_instr_create(nir_shader *shader, nir_op op)
for (unsigned i = 0; i < num_srcs; i++)
alu_src_init(&instr->src[i]);
list_add(&instr->instr.gc_node, &shader->gc_list);
return instr;
}
nir_deref_instr *
nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
{
nir_deref_instr *instr = calloc(1, sizeof(*instr));
nir_deref_instr *instr = gc_zalloc(shader->gctx, nir_deref_instr, 1);
instr_init(&instr->instr, nir_instr_type_deref);
@ -733,23 +717,19 @@ nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
dest_init(&instr->dest);
list_add(&instr->instr.gc_node, &shader->gc_list);
return instr;
}
nir_jump_instr *
nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
{
nir_jump_instr *instr = malloc(sizeof(*instr));
nir_jump_instr *instr = gc_alloc(shader->gctx, nir_jump_instr, 1);
instr_init(&instr->instr, nir_instr_type_jump);
src_init(&instr->condition);
instr->type = type;
instr->target = NULL;
instr->else_target = NULL;
list_add(&instr->instr.gc_node, &shader->gc_list);
return instr;
}
@ -758,13 +738,11 @@ nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
unsigned bit_size)
{
nir_load_const_instr *instr =
calloc(1, sizeof(*instr) + num_components * sizeof(*instr->value));
gc_zalloc_zla(shader->gctx, nir_load_const_instr, nir_const_value, num_components);
instr_init(&instr->instr, nir_instr_type_load_const);
nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
list_add(&instr->instr.gc_node, &shader->gc_list);
return instr;
}
@ -774,7 +752,7 @@ nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
/* TODO: don't use calloc */
nir_intrinsic_instr *instr =
calloc(1, sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
gc_zalloc_zla(shader->gctx, nir_intrinsic_instr, nir_src, num_srcs);
instr_init(&instr->instr, nir_instr_type_intrinsic);
instr->intrinsic = op;
@ -785,8 +763,6 @@ nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
for (unsigned i = 0; i < num_srcs; i++)
src_init(&instr->src[i]);
list_add(&instr->instr.gc_node, &shader->gc_list);
return instr;
}
@ -795,7 +771,7 @@ nir_call_instr_create(nir_shader *shader, nir_function *callee)
{
const unsigned num_params = callee->num_params;
nir_call_instr *instr =
calloc(1, sizeof(*instr) + num_params * sizeof(instr->params[0]));
gc_zalloc_zla(shader->gctx, nir_call_instr, nir_src, num_params);
instr_init(&instr->instr, nir_instr_type_call);
instr->callee = callee;
@ -803,8 +779,6 @@ nir_call_instr_create(nir_shader *shader, nir_function *callee)
for (unsigned i = 0; i < num_params; i++)
src_init(&instr->params[i]);
list_add(&instr->instr.gc_node, &shader->gc_list);
return instr;
}
@ -819,13 +793,13 @@ static int8_t default_tg4_offsets[4][2] =
nir_tex_instr *
nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
{
nir_tex_instr *instr = calloc(1, sizeof(*instr));
nir_tex_instr *instr = gc_zalloc(shader->gctx, nir_tex_instr, 1);
instr_init(&instr->instr, nir_instr_type_tex);
dest_init(&instr->dest);
instr->num_srcs = num_srcs;
instr->src = malloc(sizeof(nir_tex_src) * num_srcs);
instr->src = gc_alloc(shader->gctx, nir_tex_src, num_srcs);
for (unsigned i = 0; i < num_srcs; i++)
src_init(&instr->src[i].src);
@ -833,8 +807,6 @@ nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
instr->sampler_index = 0;
memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets));
list_add(&instr->instr.gc_node, &shader->gc_list);
return instr;
}
@ -843,8 +815,7 @@ nir_tex_instr_add_src(nir_tex_instr *tex,
nir_tex_src_type src_type,
nir_src src)
{
nir_tex_src *new_srcs = calloc(sizeof(*new_srcs),
tex->num_srcs + 1);
nir_tex_src *new_srcs = gc_zalloc(gc_get_context(tex), nir_tex_src, tex->num_srcs + 1);
for (unsigned i = 0; i < tex->num_srcs; i++) {
new_srcs[i].src_type = tex->src[i].src_type;
@ -852,7 +823,7 @@ nir_tex_instr_add_src(nir_tex_instr *tex,
&tex->src[i].src);
}
free(tex->src);
gc_free(tex->src);
tex->src = new_srcs;
tex->src[tex->num_srcs].src_type = src_type;
@ -888,14 +859,12 @@ nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
nir_phi_instr *
nir_phi_instr_create(nir_shader *shader)
{
nir_phi_instr *instr = malloc(sizeof(*instr));
nir_phi_instr *instr = gc_alloc(shader->gctx, nir_phi_instr, 1);
instr_init(&instr->instr, nir_instr_type_phi);
dest_init(&instr->dest);
exec_list_make_empty(&instr->srcs);
list_add(&instr->instr.gc_node, &shader->gc_list);
return instr;
}
@ -912,7 +881,7 @@ nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src)
{
nir_phi_src *phi_src;
phi_src = calloc(1, sizeof(nir_phi_src));
phi_src = gc_zalloc(gc_get_context(instr), nir_phi_src, 1);
phi_src->pred = pred;
phi_src->src = src;
phi_src->src.parent_instr = &instr->instr;
@ -924,13 +893,11 @@ nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src)
nir_parallel_copy_instr *
nir_parallel_copy_instr_create(nir_shader *shader)
{
nir_parallel_copy_instr *instr = malloc(sizeof(*instr));
nir_parallel_copy_instr *instr = gc_alloc(shader->gctx, nir_parallel_copy_instr, 1);
instr_init(&instr->instr, nir_instr_type_parallel_copy);
exec_list_make_empty(&instr->entries);
list_add(&instr->instr.gc_node, &shader->gc_list);
return instr;
}
@ -939,13 +906,11 @@ nir_ssa_undef_instr_create(nir_shader *shader,
unsigned num_components,
unsigned bit_size)
{
nir_ssa_undef_instr *instr = malloc(sizeof(*instr));
nir_ssa_undef_instr *instr = gc_alloc(shader->gctx, nir_ssa_undef_instr, 1);
instr_init(&instr->instr, nir_instr_type_ssa_undef);
nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
list_add(&instr->instr.gc_node, &shader->gc_list);
return instr;
}
@ -1264,14 +1229,13 @@ void nir_instr_free(nir_instr *instr)
switch (instr->type) {
case nir_instr_type_tex:
free(nir_instr_as_tex(instr)->src);
gc_free(nir_instr_as_tex(instr)->src);
break;
case nir_instr_type_phi: {
nir_phi_instr *phi = nir_instr_as_phi(instr);
nir_foreach_phi_src_safe(phi_src, phi) {
free(phi_src);
}
nir_foreach_phi_src_safe(phi_src, phi)
gc_free(phi_src);
break;
}
@ -1279,8 +1243,7 @@ void nir_instr_free(nir_instr *instr)
break;
}
list_del(&instr->gc_node);
free(instr);
gc_free(instr);
}
void
@ -1708,11 +1671,12 @@ nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
void
nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
{
nir_shader *shader = ralloc_parent(if_stmt);
nir_src *src = &if_stmt->condition;
assert(!src_is_valid(src) || src->parent_if == if_stmt);
src_remove_all_uses(src);
src_copy(src, &new_src);
src_copy(src, &new_src, shader->gctx);
src_add_all_uses(src, NULL, if_stmt);
}

View file

@ -92,7 +92,6 @@ extern bool nir_debug_print_shader[MESA_SHADER_KERNEL + 1];
#define NIR_DEBUG_PRINT_CBS (1u << 18)
#define NIR_DEBUG_PRINT_KS (1u << 19)
#define NIR_DEBUG_PRINT_CONSTS (1u << 20)
#define NIR_DEBUG_VALIDATE_GC_LIST (1u << 21)
#define NIR_DEBUG_PRINT (NIR_DEBUG_PRINT_VS | \
NIR_DEBUG_PRINT_TCS | \
@ -878,7 +877,6 @@ typedef enum PACKED {
typedef struct nir_instr {
struct exec_node node;
struct list_head gc_node;
struct nir_block *block;
nir_instr_type type;
@ -3689,6 +3687,8 @@ typedef struct nir_shader_compiler_options {
} nir_shader_compiler_options;
typedef struct nir_shader {
gc_ctx *gctx;
/** list of uniforms (nir_variable) */
struct exec_list variables;
@ -3704,8 +3704,6 @@ typedef struct nir_shader {
struct exec_list functions; /** < list of nir_function */
struct list_head gc_list; /** < list of all nir_instrs allocated on the shader but not yet freed. */
/**
* The size of the variable space for load_input_*, load_uniform_*, etc.
* intrinsics. This is in back-end specific units which is likely one of

View file

@ -244,7 +244,7 @@ __clone_src(clone_state *state, void *ninstr_or_if,
} else {
nsrc->reg.reg = remap_reg(state, src->reg.reg);
if (src->reg.indirect) {
nsrc->reg.indirect = malloc(sizeof(nir_src));
nsrc->reg.indirect = gc_alloc(state->ns->gctx, nir_src, 1);
__clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect);
}
nsrc->reg.base_offset = src->reg.base_offset;
@ -264,7 +264,7 @@ __clone_dst(clone_state *state, nir_instr *ninstr,
} else {
ndst->reg.reg = remap_reg(state, dst->reg.reg);
if (dst->reg.indirect) {
ndst->reg.indirect = malloc(sizeof(nir_src));
ndst->reg.indirect = gc_alloc(state->ns->gctx, nir_src, 1);
__clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect);
}
ndst->reg.base_offset = dst->reg.base_offset;
@ -814,10 +814,6 @@ nir_shader_replace(nir_shader *dst, nir_shader *src)
ralloc_adopt(dead_ctx, dst);
ralloc_free(dead_ctx);
list_for_each_entry_safe(nir_instr, instr, &dst->gc_list, gc_node) {
nir_instr_free(instr);
}
/* Re-parent all of src's ralloc children to dst */
ralloc_adopt(dst, src);
@ -826,8 +822,6 @@ nir_shader_replace(nir_shader *dst, nir_shader *src)
/* We have to move all the linked lists over separately because we need the
* pointers in the list elements to point to the lists in dst and not src.
*/
list_replace(&src->gc_list, &dst->gc_list);
list_inithead(&src->gc_list);
exec_list_move_nodes_to(&src->variables, &dst->variables);
/* Now move the functions over. This takes a tiny bit more work */

View file

@ -441,7 +441,7 @@ remove_phi_src(nir_block *block, nir_block *pred)
if (src->pred == pred) {
list_del(&src->src.use_link);
exec_node_remove(&src->node);
free(src);
gc_free(src);
}
}
}

View file

@ -159,7 +159,7 @@ get_deref_reg_src(nir_deref_instr *deref, struct locals_to_regs_state *state)
if (src.reg.indirect) {
assert(src.reg.base_offset == 0);
} else {
src.reg.indirect = malloc(sizeof(nir_src));
src.reg.indirect = gc_alloc(gc_get_context(deref), nir_src, 1);
*src.reg.indirect =
nir_src_for_ssa(nir_imm_int(b, src.reg.base_offset));
src.reg.base_offset = 0;

View file

@ -558,7 +558,7 @@ read_src(read_ctx *ctx, nir_src *src)
src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
src->reg.base_offset = blob_read_uint32(ctx->blob);
if (header.any.is_indirect) {
src->reg.indirect = malloc(sizeof(nir_src));
src->reg.indirect = gc_alloc(ctx->nir->gctx, nir_src, 1);
read_src(ctx, src->reg.indirect);
} else {
src->reg.indirect = NULL;
@ -779,7 +779,7 @@ read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
dst->reg.reg = read_object(ctx);
dst->reg.base_offset = blob_read_uint32(ctx->blob);
if (dest.reg.is_indirect) {
dst->reg.indirect = malloc(sizeof(nir_src));
dst->reg.indirect = gc_alloc(ctx->nir->gctx, nir_src, 1);
read_src(ctx, dst->reg.indirect);
}
}

View file

@ -40,6 +40,24 @@
static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node);
static bool
sweep_src_indirect(nir_src *src, void *nir)
{
if (!src->is_ssa && src->reg.indirect)
gc_mark_live(((nir_shader*)nir)->gctx, src->reg.indirect);
return true;
}
static bool
sweep_dest_indirect(nir_dest *dest, void *nir)
{
if (!dest->is_ssa && dest->reg.indirect)
gc_mark_live(((nir_shader*)nir)->gctx, dest->reg.indirect);
return true;
}
static void
sweep_block(nir_shader *nir, nir_block *block)
{
@ -55,8 +73,22 @@ sweep_block(nir_shader *nir, nir_block *block)
block->live_out = NULL;
nir_foreach_instr(instr, block) {
list_del(&instr->gc_node);
list_add(&instr->gc_node, &nir->gc_list);
gc_mark_live(nir->gctx, instr);
switch (instr->type) {
case nir_instr_type_tex:
gc_mark_live(nir->gctx, nir_instr_as_tex(instr)->src);
break;
case nir_instr_type_phi:
nir_foreach_phi_src(src, nir_instr_as_phi(instr))
gc_mark_live(nir->gctx, src);
break;
default:
break;
}
nir_foreach_src(instr, sweep_src_indirect, nir);
nir_foreach_dest(instr, sweep_dest_indirect, nir);
}
}
@ -138,12 +170,13 @@ nir_sweep(nir_shader *nir)
struct list_head instr_gc_list;
list_inithead(&instr_gc_list);
list_replace(&nir->gc_list, &instr_gc_list);
list_inithead(&nir->gc_list);
/* First, move ownership of all the memory to a temporary context; assume dead. */
ralloc_adopt(rubbish, nir);
/* Start sweeping */
gc_sweep_start(nir->gctx);
ralloc_steal(nir, nir->gctx);
ralloc_steal(nir, (char *)nir->info.name);
if (nir->info.label)
ralloc_steal(nir, (char *)nir->info.label);
@ -156,12 +189,6 @@ nir_sweep(nir_shader *nir)
sweep_function(nir, func);
}
/* Sweep instrs not found while walking the shader. */
list_for_each_entry_safe(nir_instr, instr, &instr_gc_list, gc_node) {
nir_instr_free(instr);
}
assert(list_is_empty(&instr_gc_list));
ralloc_steal(nir, nir->constant_data);
ralloc_steal(nir, nir->xfb_info);
ralloc_steal(nir, nir->printf_info);
@ -171,5 +198,6 @@ nir_sweep(nir_shader *nir)
}
/* Free everything we didn't steal back. */
gc_sweep_end(nir->gctx);
ralloc_free(rubbish);
}

View file

@ -101,8 +101,6 @@ typedef struct {
/* map of instruction/var/etc to failed assert string */
struct hash_table *errors;
struct set *shader_gc_list;
} validate_state;
static void
@ -1112,9 +1110,6 @@ validate_instr(nir_instr *instr, validate_state *state)
state->instr = instr;
if (state->shader_gc_list)
validate_assert(state, _mesa_set_search(state->shader_gc_list, instr));
switch (instr->type) {
case nir_instr_type_alu:
validate_alu_instr(nir_instr_as_alu(instr), state);
@ -1723,8 +1718,6 @@ init_validate_state(validate_state *state)
state->blocks = _mesa_pointer_set_create(state->mem_ctx);
state->var_defs = _mesa_pointer_hash_table_create(state->mem_ctx);
state->errors = _mesa_pointer_hash_table_create(state->mem_ctx);
state->shader_gc_list = NIR_DEBUG(VALIDATE_GC_LIST) ?
_mesa_pointer_set_create(state->mem_ctx) : NULL;
state->loop = NULL;
state->instr = NULL;
@ -1781,13 +1774,6 @@ nir_validate_shader(nir_shader *shader, const char *when)
validate_state state;
init_validate_state(&state);
if (state.shader_gc_list) {
list_for_each_entry(nir_instr, instr, &shader->gc_list, gc_node) {
if (instr->node.prev || instr->node.next)
_mesa_set_add(state.shader_gc_list, instr);
}
}
state.shader = shader;
nir_variable_mode valid_modes =