mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-22 00:50:45 +01:00
nir: Use sparse bitset for liveness information
Some shaders, especially RTPSO shaders that have parts of the PSO
inlined, can become absolutely huge. Using a sparse bitset avoids
quadratic complexity in memory consumption for the liveness information.
This reduces peak memory usage in worst-case tests (hammering
compilation of many huge RTPSOs on 32 threads concurrently) by ~60%,
from 43GB to 18GB.
CPU time (seconds) differences for a workload with mostly small shaders:
Difference at 95.0% confidence
-5.27 +/- 1.08963
-0.88811% +/- 0.183626%
(Student's t, pooled s = 0.629735)
Peak resident set usage for the mostly-small workload:
Difference at 95.0% confidence
30809 +/- 13394.3
1.59276% +/- 0.69246%
(Student's t, pooled s = 7741.09)
CPU time for the heavy workload did not show any difference.
Co-authored-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37908>
This commit is contained in:
parent
a8b75dd0f4
commit
0cb1fca8fa
8 changed files with 151 additions and 222 deletions
|
|
@ -43,6 +43,7 @@
|
|||
#include "util/ralloc.h"
|
||||
#include "util/range_minimum_query.h"
|
||||
#include "util/set.h"
|
||||
#include "util/sparse_bitset.h"
|
||||
#include "util/u_math.h"
|
||||
#include "nir_defines.h"
|
||||
#include "nir_shader_compiler_options.h"
|
||||
|
|
@ -3106,8 +3107,8 @@ typedef struct nir_block {
|
|||
/* SSA def live in and out for this block; used for liveness analysis.
|
||||
* Indexed by ssa_def->index
|
||||
*/
|
||||
BITSET_WORD *live_in;
|
||||
BITSET_WORD *live_out;
|
||||
struct u_sparse_bitset live_in;
|
||||
struct u_sparse_bitset live_out;
|
||||
} nir_block;
|
||||
|
||||
static inline bool
|
||||
|
|
@ -6131,7 +6132,7 @@ bool nir_shader_supports_implicit_lod(nir_shader *shader);
|
|||
|
||||
void nir_live_defs_impl(nir_function_impl *impl);
|
||||
|
||||
const BITSET_WORD *nir_get_live_defs(nir_cursor cursor, void *mem_ctx);
|
||||
struct u_sparse_bitset *nir_get_live_defs(nir_cursor cursor, void *mem_ctx);
|
||||
|
||||
void nir_loop_analyze_impl(nir_function_impl *impl,
|
||||
nir_variable_mode indirect_mask,
|
||||
|
|
|
|||
|
|
@ -40,10 +40,11 @@
|
|||
*/
|
||||
|
||||
struct live_defs_state {
|
||||
unsigned bitset_words;
|
||||
unsigned num_bits;
|
||||
void *mem_ctx;
|
||||
|
||||
/* Used in propagate_across_edge() */
|
||||
BITSET_WORD *tmp_live;
|
||||
struct u_sparse_bitset tmp_live;
|
||||
|
||||
nir_block_worklist worklist;
|
||||
};
|
||||
|
|
@ -55,26 +56,20 @@ static void
|
|||
init_liveness_block(nir_block *block,
|
||||
struct live_defs_state *state)
|
||||
{
|
||||
block->live_in = reralloc(block, block->live_in, BITSET_WORD,
|
||||
state->bitset_words);
|
||||
memset(block->live_in, 0, state->bitset_words * sizeof(BITSET_WORD));
|
||||
|
||||
block->live_out = reralloc(block, block->live_out, BITSET_WORD,
|
||||
state->bitset_words);
|
||||
memset(block->live_out, 0, state->bitset_words * sizeof(BITSET_WORD));
|
||||
|
||||
u_sparse_bitset_init(&block->live_in, state->num_bits, state->mem_ctx);
|
||||
u_sparse_bitset_init(&block->live_out, state->num_bits, state->mem_ctx);
|
||||
nir_block_worklist_push_head(&state->worklist, block);
|
||||
}
|
||||
|
||||
static bool
|
||||
set_src_live(nir_src *src, void *void_live)
|
||||
{
|
||||
BITSET_WORD *live = void_live;
|
||||
struct u_sparse_bitset *live = void_live;
|
||||
|
||||
if (nir_src_is_undef(*src))
|
||||
return true; /* undefined variables are never live */
|
||||
|
||||
BITSET_SET(live, src->ssa->index);
|
||||
u_sparse_bitset_set(live, src->ssa->index);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -82,9 +77,9 @@ set_src_live(nir_src *src, void *void_live)
|
|||
static bool
|
||||
set_ssa_def_dead(nir_def *def, void *void_live)
|
||||
{
|
||||
BITSET_WORD *live = void_live;
|
||||
struct u_sparse_bitset *live = void_live;
|
||||
|
||||
BITSET_CLEAR(live, def->index);
|
||||
u_sparse_bitset_clear(live, def->index);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -102,8 +97,8 @@ static bool
|
|||
propagate_across_edge(nir_block *pred, nir_block *succ,
|
||||
struct live_defs_state *state)
|
||||
{
|
||||
BITSET_WORD *live = state->tmp_live;
|
||||
memcpy(live, succ->live_in, state->bitset_words * sizeof *live);
|
||||
struct u_sparse_bitset *live = &state->tmp_live;
|
||||
u_sparse_bitset_dup(live, &succ->live_in);
|
||||
|
||||
nir_foreach_phi(phi, succ) {
|
||||
set_ssa_def_dead(&phi->def, live);
|
||||
|
|
@ -118,21 +113,18 @@ propagate_across_edge(nir_block *pred, nir_block *succ,
|
|||
}
|
||||
}
|
||||
|
||||
BITSET_WORD progress = 0;
|
||||
for (unsigned i = 0; i < state->bitset_words; ++i) {
|
||||
progress |= live[i] & ~pred->live_out[i];
|
||||
pred->live_out[i] |= live[i];
|
||||
}
|
||||
return progress != 0;
|
||||
bool progress = u_sparse_bitset_merge(&pred->live_out, live);
|
||||
u_sparse_bitset_free(live);
|
||||
return progress;
|
||||
}
|
||||
|
||||
void
|
||||
nir_live_defs_impl(nir_function_impl *impl)
|
||||
{
|
||||
struct live_defs_state state = {
|
||||
.bitset_words = BITSET_WORDS(impl->ssa_alloc),
|
||||
.num_bits = impl->ssa_alloc,
|
||||
.mem_ctx = impl,
|
||||
};
|
||||
state.tmp_live = rzalloc_array(impl, BITSET_WORD, state.bitset_words),
|
||||
|
||||
nir_block_worklist_init(&state.worklist, impl->num_blocks, NULL);
|
||||
|
||||
|
|
@ -156,12 +148,11 @@ nir_live_defs_impl(nir_function_impl *impl)
|
|||
*/
|
||||
nir_block *block = nir_block_worklist_pop_head(&state.worklist);
|
||||
|
||||
memcpy(block->live_in, block->live_out,
|
||||
state.bitset_words * sizeof(BITSET_WORD));
|
||||
u_sparse_bitset_dup(&block->live_in, &block->live_out);
|
||||
|
||||
nir_if *following_if = nir_block_get_following_if(block);
|
||||
if (following_if)
|
||||
set_src_live(&following_if->condition, block->live_in);
|
||||
set_src_live(&following_if->condition, &block->live_in);
|
||||
|
||||
nir_foreach_instr_reverse(instr, block) {
|
||||
/* Phi nodes are handled seperately so we want to skip them. Since
|
||||
|
|
@ -171,8 +162,8 @@ nir_live_defs_impl(nir_function_impl *impl)
|
|||
if (instr->type == nir_instr_type_phi)
|
||||
break;
|
||||
|
||||
nir_foreach_def(instr, set_ssa_def_dead, block->live_in);
|
||||
nir_foreach_src(instr, set_src_live, block->live_in);
|
||||
nir_foreach_def(instr, set_ssa_def_dead, &block->live_in);
|
||||
nir_foreach_src(instr, set_src_live, &block->live_in);
|
||||
}
|
||||
|
||||
/* Walk over all of the predecessors of the current block updating
|
||||
|
|
@ -187,7 +178,6 @@ nir_live_defs_impl(nir_function_impl *impl)
|
|||
}
|
||||
}
|
||||
|
||||
ralloc_free(state.tmp_live);
|
||||
nir_block_worklist_fini(&state.worklist);
|
||||
}
|
||||
|
||||
|
|
@ -197,7 +187,7 @@ nir_live_defs_impl(nir_function_impl *impl)
|
|||
* which the instruction lives. Do not ralloc_free() it directly;
|
||||
* instead, provide a mem_ctx and free that.
|
||||
*/
|
||||
const BITSET_WORD *
|
||||
struct u_sparse_bitset *
|
||||
nir_get_live_defs(nir_cursor cursor, void *mem_ctx)
|
||||
{
|
||||
nir_block *block = nir_cursor_current_block(cursor);
|
||||
|
|
@ -206,26 +196,25 @@ nir_get_live_defs(nir_cursor cursor, void *mem_ctx)
|
|||
|
||||
switch (cursor.option) {
|
||||
case nir_cursor_before_block:
|
||||
return cursor.block->live_in;
|
||||
return &cursor.block->live_in;
|
||||
|
||||
case nir_cursor_after_block:
|
||||
return cursor.block->live_out;
|
||||
return &cursor.block->live_out;
|
||||
|
||||
case nir_cursor_before_instr:
|
||||
if (cursor.instr == nir_block_first_instr(cursor.instr->block))
|
||||
return cursor.instr->block->live_in;
|
||||
return &cursor.instr->block->live_in;
|
||||
break;
|
||||
|
||||
case nir_cursor_after_instr:
|
||||
if (cursor.instr == nir_block_last_instr(cursor.instr->block))
|
||||
return cursor.instr->block->live_out;
|
||||
return &cursor.instr->block->live_out;
|
||||
break;
|
||||
}
|
||||
|
||||
/* If we got here, we're an instruction cursor mid-block */
|
||||
const unsigned bitset_words = BITSET_WORDS(impl->ssa_alloc);
|
||||
BITSET_WORD *live = ralloc_array(mem_ctx, BITSET_WORD, bitset_words);
|
||||
memcpy(live, block->live_out, bitset_words * sizeof(BITSET_WORD));
|
||||
struct u_sparse_bitset *live = rzalloc_size(mem_ctx, sizeof(struct u_sparse_bitset));
|
||||
u_sparse_bitset_dup_with_ctx(live, &block->live_out, mem_ctx);
|
||||
|
||||
nir_foreach_instr_reverse(instr, block) {
|
||||
if (cursor.option == nir_cursor_after_instr && instr == cursor.instr)
|
||||
|
|
@ -283,13 +272,13 @@ search_for_use_after_instr(nir_instr *start, nir_def *def)
|
|||
static bool
|
||||
nir_def_is_live_at(nir_def *def, nir_instr *instr)
|
||||
{
|
||||
if (BITSET_TEST(instr->block->live_out, def->index)) {
|
||||
if (u_sparse_bitset_test(&instr->block->live_out, def->index)) {
|
||||
/* Since def dominates instr, if def is in the liveout of the block,
|
||||
* it's live at instr
|
||||
*/
|
||||
return true;
|
||||
} else {
|
||||
if (BITSET_TEST(instr->block->live_in, def->index) ||
|
||||
if (u_sparse_bitset_test(&instr->block->live_in, def->index) ||
|
||||
nir_def_block(def) == instr->block) {
|
||||
/* In this case it is either live coming into instr's block or it
|
||||
* is defined in the same block. In this case, we simply need to
|
||||
|
|
|
|||
|
|
@ -72,51 +72,16 @@ instr_is_shader_call(nir_instr *instr)
|
|||
intrin->intrinsic == nir_intrinsic_execute_callable;
|
||||
}
|
||||
|
||||
/* Previously named bitset, it had to be renamed as FreeBSD defines a struct
|
||||
* named bitset in sys/_bitset.h required by pthread_np.h which is included
|
||||
* from src/util/u_thread.h that is indirectly included by this file.
|
||||
*/
|
||||
struct sized_bitset {
|
||||
BITSET_WORD *set;
|
||||
unsigned size;
|
||||
};
|
||||
|
||||
static struct sized_bitset
|
||||
bitset_create(void *mem_ctx, unsigned size)
|
||||
{
|
||||
return (struct sized_bitset){
|
||||
.set = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(size)),
|
||||
.size = size,
|
||||
};
|
||||
}
|
||||
|
||||
static bool
|
||||
src_is_in_bitset(nir_src *src, void *_set)
|
||||
{
|
||||
struct sized_bitset *set = _set;
|
||||
struct u_sparse_bitset *set = _set;
|
||||
|
||||
/* Any SSA values which were added after we generated liveness information
|
||||
* are things generated by this pass and, while most of it is arithmetic
|
||||
* which we could re-materialize, we don't need to because it's only used
|
||||
* for a single load/store and so shouldn't cross any shader calls.
|
||||
*/
|
||||
if (src->ssa->index >= set->size)
|
||||
return false;
|
||||
|
||||
return BITSET_TEST(set->set, src->ssa->index);
|
||||
}
|
||||
|
||||
static void
|
||||
add_ssa_def_to_bitset(nir_def *def, struct sized_bitset *set)
|
||||
{
|
||||
if (def->index >= set->size)
|
||||
return;
|
||||
|
||||
BITSET_SET(set->set, def->index);
|
||||
return u_sparse_bitset_test(set, src->ssa->index);
|
||||
}
|
||||
|
||||
static bool
|
||||
can_remat_instr(nir_instr *instr, struct sized_bitset *remat)
|
||||
can_remat_instr(nir_instr *instr, struct u_sparse_bitset *remat)
|
||||
{
|
||||
/* Set of all values which are trivially re-materializable and we shouldn't
|
||||
* ever spill them. This includes:
|
||||
|
|
@ -203,21 +168,21 @@ can_remat_instr(nir_instr *instr, struct sized_bitset *remat)
|
|||
}
|
||||
|
||||
static bool
|
||||
can_remat_ssa_def(nir_def *def, struct sized_bitset *remat)
|
||||
can_remat_ssa_def(nir_def *def, struct u_sparse_bitset *remat)
|
||||
{
|
||||
return can_remat_instr(def->parent_instr, remat);
|
||||
}
|
||||
|
||||
struct add_instr_data {
|
||||
struct util_dynarray *buf;
|
||||
struct sized_bitset *remat;
|
||||
struct u_sparse_bitset *remat;
|
||||
};
|
||||
|
||||
static bool
|
||||
add_src_instr(nir_src *src, void *state)
|
||||
{
|
||||
struct add_instr_data *data = state;
|
||||
if (BITSET_TEST(data->remat->set, src->ssa->index))
|
||||
if (u_sparse_bitset_test(data->remat, src->ssa->index))
|
||||
return true;
|
||||
|
||||
util_dynarray_foreach(data->buf, nir_instr *, instr_ptr) {
|
||||
|
|
@ -243,7 +208,7 @@ compare_instr_indexes(const void *_inst1, const void *_inst2)
|
|||
}
|
||||
|
||||
static bool
|
||||
can_remat_chain_ssa_def(nir_def *def, struct sized_bitset *remat, struct util_dynarray *buf)
|
||||
can_remat_chain_ssa_def(nir_def *def, struct u_sparse_bitset *remat, struct util_dynarray *buf)
|
||||
{
|
||||
assert(util_dynarray_num_elements(buf, nir_instr *) == 0);
|
||||
|
||||
|
|
@ -274,14 +239,14 @@ can_remat_chain_ssa_def(nir_def *def, struct sized_bitset *remat, struct util_dy
|
|||
* through values that might not be in that set but that we can
|
||||
* rematerialize.
|
||||
*/
|
||||
struct sized_bitset potential_remat = bitset_create(mem_ctx, remat->size);
|
||||
memcpy(potential_remat.set, remat->set, BITSET_WORDS(remat->size) * sizeof(BITSET_WORD));
|
||||
struct u_sparse_bitset potential_remat;
|
||||
u_sparse_bitset_dup(&potential_remat, remat);
|
||||
|
||||
util_dynarray_foreach(buf, nir_instr *, instr_ptr) {
|
||||
nir_def *instr_ssa_def = nir_instr_def(*instr_ptr);
|
||||
|
||||
/* If already in the potential rematerializable, nothing to do. */
|
||||
if (BITSET_TEST(potential_remat.set, instr_ssa_def->index))
|
||||
if (u_sparse_bitset_test(&potential_remat, instr_ssa_def->index))
|
||||
continue;
|
||||
|
||||
if (!can_remat_instr(*instr_ptr, &potential_remat))
|
||||
|
|
@ -290,7 +255,7 @@ can_remat_chain_ssa_def(nir_def *def, struct sized_bitset *remat, struct util_dy
|
|||
/* All the sources are rematerializable and the instruction is also
|
||||
* rematerializable, mark it as rematerializable too.
|
||||
*/
|
||||
BITSET_SET(potential_remat.set, instr_ssa_def->index);
|
||||
u_sparse_bitset_set(&potential_remat, instr_ssa_def->index);
|
||||
}
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
|
|
@ -313,8 +278,9 @@ remat_ssa_def(nir_builder *b, nir_def *def, struct hash_table *remap_table)
|
|||
|
||||
static nir_def *
|
||||
remat_chain_ssa_def(nir_builder *b, struct util_dynarray *buf,
|
||||
struct sized_bitset *remat, nir_def ***fill_defs,
|
||||
unsigned call_idx, struct hash_table *remap_table)
|
||||
struct u_sparse_bitset *remat, nir_def ***fill_defs,
|
||||
unsigned call_idx, unsigned num_ssa_defs,
|
||||
struct hash_table *remap_table)
|
||||
{
|
||||
nir_def *last_def = NULL;
|
||||
|
||||
|
|
@ -331,14 +297,14 @@ remat_chain_ssa_def(nir_builder *b, struct util_dynarray *buf,
|
|||
|
||||
if (fill_defs[ssa_index] == NULL) {
|
||||
fill_defs[ssa_index] =
|
||||
rzalloc_array(fill_defs, nir_def *, remat->size);
|
||||
rzalloc_array(fill_defs, nir_def *, num_ssa_defs);
|
||||
}
|
||||
|
||||
/* Add the new ssa_def to the list fill_defs and flag it as
|
||||
* rematerialized
|
||||
*/
|
||||
fill_defs[ssa_index][call_idx] = last_def = clone_ssa_def;
|
||||
BITSET_SET(remat->set, ssa_index);
|
||||
u_sparse_bitset_set(remat, ssa_index);
|
||||
|
||||
_mesa_hash_table_insert(remap_table, instr_ssa_def, last_def);
|
||||
}
|
||||
|
|
@ -414,9 +380,9 @@ spill_fill(nir_builder *before, nir_builder *after, nir_def *def,
|
|||
static bool
|
||||
add_src_to_call_live_bitset(nir_src *src, void *state)
|
||||
{
|
||||
BITSET_WORD *call_live = state;
|
||||
struct u_sparse_bitset *call_live = state;
|
||||
|
||||
BITSET_SET(call_live, src->ssa->index);
|
||||
u_sparse_bitset_set(call_live, src->ssa->index);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -453,8 +419,8 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls,
|
|||
void *mem_ctx = ralloc_context(shader);
|
||||
|
||||
const unsigned num_ssa_defs = impl->ssa_alloc;
|
||||
const unsigned live_words = BITSET_WORDS(num_ssa_defs);
|
||||
struct sized_bitset trivial_remat = bitset_create(mem_ctx, num_ssa_defs);
|
||||
struct u_sparse_bitset trivial_remat;
|
||||
u_sparse_bitset_init(&trivial_remat, num_ssa_defs, mem_ctx);
|
||||
|
||||
/* Array of all live SSA defs which are spill candidates */
|
||||
nir_def **spill_defs =
|
||||
|
|
@ -467,8 +433,8 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls,
|
|||
rzalloc_array(mem_ctx, nir_def **, num_ssa_defs);
|
||||
|
||||
/* For each call instruction, the liveness set at the call */
|
||||
const BITSET_WORD **call_live =
|
||||
rzalloc_array(mem_ctx, const BITSET_WORD *, num_calls);
|
||||
struct u_sparse_bitset **call_live =
|
||||
rzalloc_array(mem_ctx, struct u_sparse_bitset *, num_calls);
|
||||
|
||||
/* For each call instruction, the block index of the block it lives in */
|
||||
uint32_t *call_block_indices = rzalloc_array(mem_ctx, uint32_t, num_calls);
|
||||
|
|
@ -512,8 +478,8 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls,
|
|||
* spilled/filled sources.
|
||||
*/
|
||||
if (options->should_remat_callback) {
|
||||
BITSET_WORD **updated_call_live =
|
||||
rzalloc_array(mem_ctx, BITSET_WORD *, num_calls);
|
||||
struct u_sparse_bitset **updated_call_live =
|
||||
rzalloc_array(mem_ctx, struct u_sparse_bitset *, num_calls);
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
|
|
@ -522,7 +488,7 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls,
|
|||
continue;
|
||||
|
||||
for (unsigned c = 0; c < num_calls; c++) {
|
||||
if (!BITSET_TEST(call_live[c], def->index))
|
||||
if (!u_sparse_bitset_test(call_live[c], def->index))
|
||||
continue;
|
||||
|
||||
if (!options->should_remat_callback(def->parent_instr,
|
||||
|
|
@ -530,9 +496,8 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls,
|
|||
continue;
|
||||
|
||||
if (updated_call_live[c] == NULL) {
|
||||
const unsigned bitset_words = BITSET_WORDS(impl->ssa_alloc);
|
||||
updated_call_live[c] = ralloc_array(mem_ctx, BITSET_WORD, bitset_words);
|
||||
memcpy(updated_call_live[c], call_live[c], bitset_words * sizeof(BITSET_WORD));
|
||||
updated_call_live[c] = ralloc_size(mem_ctx, sizeof(struct u_sparse_bitset));
|
||||
u_sparse_bitset_dup(updated_call_live[c], call_live[c]);
|
||||
}
|
||||
|
||||
nir_foreach_src(instr, add_src_to_call_live_bitset, updated_call_live[c]);
|
||||
|
|
@ -557,7 +522,7 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls,
|
|||
nir_def *def = nir_instr_def(instr);
|
||||
if (def != NULL) {
|
||||
if (can_remat_ssa_def(def, &trivial_remat)) {
|
||||
add_ssa_def_to_bitset(def, &trivial_remat);
|
||||
u_sparse_bitset_set(&trivial_remat, def->index);
|
||||
_mesa_hash_table_insert(trivial_remap_table, def, def);
|
||||
} else {
|
||||
spill_defs[def->index] = def;
|
||||
|
|
@ -567,7 +532,7 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls,
|
|||
if (!instr_is_shader_call(instr))
|
||||
continue;
|
||||
|
||||
const BITSET_WORD *live = call_live[call_idx];
|
||||
struct u_sparse_bitset *live = call_live[call_idx];
|
||||
|
||||
struct hash_table *remap_table =
|
||||
_mesa_hash_table_clone(trivial_remap_table, mem_ctx);
|
||||
|
|
@ -575,8 +540,8 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls,
|
|||
/* Make a copy of trivial_remat that we'll update as we crawl through
|
||||
* the live SSA defs and unspill them.
|
||||
*/
|
||||
struct sized_bitset remat = bitset_create(mem_ctx, num_ssa_defs);
|
||||
memcpy(remat.set, trivial_remat.set, live_words * sizeof(BITSET_WORD));
|
||||
struct u_sparse_bitset remat;
|
||||
u_sparse_bitset_dup(&remat, &trivial_remat);
|
||||
|
||||
/* Before the two builders are always separated by the call
|
||||
* instruction, it won't break anything to have two of them.
|
||||
|
|
@ -596,63 +561,58 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls,
|
|||
util_dynarray_init_from_stack(&remat_chain, remat_chain_instrs, sizeof(remat_chain_instrs));
|
||||
|
||||
unsigned offset = shader->scratch_size;
|
||||
for (unsigned w = 0; w < live_words; w++) {
|
||||
BITSET_WORD spill_mask = live[w] & ~trivial_remat.set[w];
|
||||
while (spill_mask) {
|
||||
int i = u_bit_scan(&spill_mask);
|
||||
assert(i >= 0);
|
||||
unsigned index = w * BITSET_WORDBITS + i;
|
||||
assert(index < num_ssa_defs);
|
||||
U_SPARSE_BITSET_FOREACH_SET(live, index) {
|
||||
if (u_sparse_bitset_test(&trivial_remat, index))
|
||||
continue;
|
||||
|
||||
def = spill_defs[index];
|
||||
nir_def *original_def = def, *new_def;
|
||||
if (can_remat_ssa_def(def, &remat)) {
|
||||
/* If this SSA def is re-materializable or based on other
|
||||
* things we've already spilled, re-materialize it rather
|
||||
* than spilling and filling. Anything which is trivially
|
||||
* re-materializable won't even get here because we take
|
||||
* those into account in spill_mask above.
|
||||
*/
|
||||
new_def = remat_ssa_def(&after, def, remap_table);
|
||||
} else if (can_remat_chain_ssa_def(def, &remat, &remat_chain)) {
|
||||
new_def = remat_chain_ssa_def(&after, &remat_chain, &remat,
|
||||
fill_defs, call_idx,
|
||||
remap_table);
|
||||
util_dynarray_clear(&remat_chain);
|
||||
} else {
|
||||
bool is_bool = def->bit_size == 1;
|
||||
if (is_bool)
|
||||
def = nir_b2b32(&before, def);
|
||||
|
||||
const unsigned comp_size = def->bit_size / 8;
|
||||
offset = ALIGN(offset, comp_size);
|
||||
|
||||
new_def = spill_fill(&before, &after, def,
|
||||
index, call_idx,
|
||||
offset, options->stack_alignment);
|
||||
|
||||
if (is_bool)
|
||||
new_def = nir_b2b1(&after, new_def);
|
||||
|
||||
offset += def->num_components * comp_size;
|
||||
}
|
||||
|
||||
/* Mark this SSA def as available in the remat set so that, if
|
||||
* some other SSA def we need is computed based on it, we can
|
||||
* just re-compute instead of fetching from memory.
|
||||
def = spill_defs[index];
|
||||
nir_def *original_def = def, *new_def;
|
||||
if (can_remat_ssa_def(def, &remat)) {
|
||||
/* If this SSA def is re-materializable or based on other
|
||||
* things we've already spilled, re-materialize it rather
|
||||
* than spilling and filling. Anything which is trivially
|
||||
* re-materializable won't even get here because we take
|
||||
* those into account in spill_mask above.
|
||||
*/
|
||||
BITSET_SET(remat.set, index);
|
||||
new_def = remat_ssa_def(&after, def, remap_table);
|
||||
} else if (can_remat_chain_ssa_def(def, &remat, &remat_chain)) {
|
||||
new_def = remat_chain_ssa_def(&after, &remat_chain, &remat,
|
||||
fill_defs, call_idx, num_ssa_defs,
|
||||
remap_table);
|
||||
util_dynarray_clear(&remat_chain);
|
||||
} else {
|
||||
bool is_bool = def->bit_size == 1;
|
||||
if (is_bool)
|
||||
def = nir_b2b32(&before, def);
|
||||
|
||||
/* For now, we just make a note of this new SSA def. We'll
|
||||
* fix things up with the phi builder as a second pass.
|
||||
*/
|
||||
if (fill_defs[index] == NULL) {
|
||||
fill_defs[index] =
|
||||
rzalloc_array(fill_defs, nir_def *, num_calls);
|
||||
}
|
||||
fill_defs[index][call_idx] = new_def;
|
||||
_mesa_hash_table_insert(remap_table, original_def, new_def);
|
||||
const unsigned comp_size = def->bit_size / 8;
|
||||
offset = ALIGN(offset, comp_size);
|
||||
|
||||
new_def = spill_fill(&before, &after, def,
|
||||
index, call_idx,
|
||||
offset, options->stack_alignment);
|
||||
|
||||
if (is_bool)
|
||||
new_def = nir_b2b1(&after, new_def);
|
||||
|
||||
offset += def->num_components * comp_size;
|
||||
}
|
||||
|
||||
/* Mark this SSA def as available in the remat set so that, if
|
||||
* some other SSA def we need is computed based on it, we can
|
||||
* just re-compute instead of fetching from memory.
|
||||
*/
|
||||
u_sparse_bitset_set(&remat, index);
|
||||
|
||||
/* For now, we just make a note of this new SSA def. We'll
|
||||
* fix things up with the phi builder as a second pass.
|
||||
*/
|
||||
if (fill_defs[index] == NULL) {
|
||||
fill_defs[index] =
|
||||
rzalloc_array(fill_defs, nir_def *, num_calls);
|
||||
}
|
||||
fill_defs[index][call_idx] = new_def;
|
||||
_mesa_hash_table_insert(remap_table, original_def, new_def);
|
||||
}
|
||||
|
||||
nir_builder *b = &before;
|
||||
|
|
@ -1005,7 +965,7 @@ flatten_resume_if_ladder(nir_builder *b,
|
|||
struct exec_list *child_list,
|
||||
bool child_list_contains_cursor,
|
||||
nir_instr *resume_instr,
|
||||
struct sized_bitset *remat)
|
||||
struct u_sparse_bitset *remat)
|
||||
{
|
||||
nir_cf_list cf_list;
|
||||
|
||||
|
|
@ -1044,7 +1004,7 @@ flatten_resume_if_ladder(nir_builder *b,
|
|||
b->cursor = nir_after_instr(instr);
|
||||
|
||||
nir_def *def = nir_instr_def(instr);
|
||||
BITSET_SET(remat->set, def->index);
|
||||
u_sparse_bitset_set(remat, def->index);
|
||||
}
|
||||
}
|
||||
if (b->cursor.option == nir_cursor_after_block &&
|
||||
|
|
@ -1276,7 +1236,8 @@ lower_resume(nir_shader *shader, int call_idx)
|
|||
/* Used to track which things may have been assumed to be re-materialized
|
||||
* by the spilling pass and which we shouldn't delete.
|
||||
*/
|
||||
struct sized_bitset remat = bitset_create(mem_ctx, impl->ssa_alloc);
|
||||
struct u_sparse_bitset remat;
|
||||
u_sparse_bitset_init(&remat, impl->ssa_alloc, mem_ctx);
|
||||
|
||||
/* Create a nop instruction to use as a cursor as we extract and re-insert
|
||||
* stuff into the CFG.
|
||||
|
|
|
|||
|
|
@ -85,9 +85,8 @@ nir_progress(bool progress, nir_function_impl *impl, nir_metadata preserved)
|
|||
*/
|
||||
if ((impl->valid_metadata & ~preserved) & nir_metadata_live_defs) {
|
||||
nir_foreach_block(block, impl) {
|
||||
ralloc_free(block->live_in);
|
||||
ralloc_free(block->live_out);
|
||||
block->live_in = block->live_out = NULL;
|
||||
u_sparse_bitset_free(&block->live_in);
|
||||
u_sparse_bitset_free(&block->live_out);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -115,10 +114,11 @@ nir_metadata_invalidate(nir_shader *shader)
|
|||
block->index = (block_idx-- & 0xf) + 0xfffffff0;
|
||||
|
||||
if (impl->valid_metadata & nir_metadata_live_defs) {
|
||||
ralloc_free(block->live_in);
|
||||
ralloc_free(block->live_out);
|
||||
u_sparse_bitset_free(&block->live_in);
|
||||
u_sparse_bitset_free(&block->live_out);
|
||||
}
|
||||
block->live_in = block->live_out = NULL;
|
||||
u_sparse_bitset_init(&block->live_in, impl->ssa_alloc, NULL);
|
||||
u_sparse_bitset_init(&block->live_out, impl->ssa_alloc, NULL);
|
||||
|
||||
if (impl->valid_metadata & nir_metadata_dominance &&
|
||||
block->dom_children != block->_dom_children_storage)
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
struct call_liveness_entry {
|
||||
struct list_head list;
|
||||
nir_call_instr *instr;
|
||||
const BITSET_WORD *live_set;
|
||||
struct u_sparse_bitset *live_set;
|
||||
};
|
||||
|
||||
static bool
|
||||
|
|
@ -188,8 +188,6 @@ nir_minimize_call_live_states_impl(nir_function_impl *impl)
|
|||
BITSET_WORD *def_blocks = ralloc_array(mem_ctx, BITSET_WORD, block_words);
|
||||
|
||||
list_for_each_entry(struct call_liveness_entry, entry, &call_list, list) {
|
||||
unsigned i;
|
||||
|
||||
nir_builder b = nir_builder_at(nir_after_instr(&entry->instr->instr));
|
||||
|
||||
struct nir_phi_builder *builder = nir_phi_builder_create(impl);
|
||||
|
|
@ -198,7 +196,7 @@ nir_minimize_call_live_states_impl(nir_function_impl *impl)
|
|||
struct hash_table *remap_table =
|
||||
_mesa_pointer_hash_table_create(mem_ctx);
|
||||
|
||||
BITSET_FOREACH_SET(i, entry->live_set, num_defs) {
|
||||
U_SPARSE_BITSET_FOREACH_SET(entry->live_set, i) {
|
||||
if (!rematerializable[i] ||
|
||||
_mesa_hash_table_search(remap_table, rematerializable[i]))
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -1853,8 +1853,8 @@ validate_dominance(nir_function_impl *impl, validate_state *state)
|
|||
}
|
||||
|
||||
typedef struct {
|
||||
BITSET_WORD *live_in;
|
||||
BITSET_WORD *live_out;
|
||||
struct u_sparse_bitset live_in;
|
||||
struct u_sparse_bitset live_out;
|
||||
} block_liveness_metadata;
|
||||
|
||||
static void
|
||||
|
|
@ -1871,8 +1871,8 @@ validate_live_defs(nir_function_impl *impl, validate_state *state)
|
|||
md->live_in = block->live_in;
|
||||
md->live_out = block->live_out;
|
||||
|
||||
block->live_in = NULL;
|
||||
block->live_out = NULL;
|
||||
u_sparse_bitset_init(&block->live_in, impl->ssa_alloc, NULL);
|
||||
u_sparse_bitset_init(&block->live_out, impl->ssa_alloc, NULL);
|
||||
}
|
||||
|
||||
/* Call metadata passes and compare it against the preserved metadata */
|
||||
|
|
@ -1890,10 +1890,8 @@ validate_live_defs(nir_function_impl *impl, validate_state *state)
|
|||
|
||||
size_t bitset_words = BITSET_WORDS(impl->ssa_alloc);
|
||||
if (bitset_words) {
|
||||
validate_assert(state, !memcmp(md->live_in, block->live_in,
|
||||
sizeof(BITSET_WORD) * bitset_words));
|
||||
validate_assert(state, !memcmp(md->live_out, block->live_out,
|
||||
sizeof(BITSET_WORD) * bitset_words));
|
||||
validate_assert(state, !u_sparse_bitset_cmp(&md->live_in, &block->live_in));
|
||||
validate_assert(state, !u_sparse_bitset_cmp(&md->live_out, &block->live_out));
|
||||
}
|
||||
}
|
||||
state->block = NULL;
|
||||
|
|
@ -1903,8 +1901,8 @@ validate_live_defs(nir_function_impl *impl, validate_state *state)
|
|||
nir_block *block = (nir_block *)entry->key;
|
||||
block_liveness_metadata *md = &blocks[entry - state->blocks->table];
|
||||
|
||||
ralloc_free(block->live_in);
|
||||
ralloc_free(block->live_out);
|
||||
u_sparse_bitset_free(&block->live_in);
|
||||
u_sparse_bitset_free(&block->live_out);
|
||||
|
||||
block->live_in = md->live_in;
|
||||
block->live_out = md->live_out;
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
|
||||
#include "etnaviv_compiler_nir.h"
|
||||
#include "compiler/nir/nir_worklist.h"
|
||||
#include "util/sparse_bitset.h"
|
||||
|
||||
static void
|
||||
range_include(struct live_def *def, unsigned index)
|
||||
|
|
@ -38,7 +39,6 @@ range_include(struct live_def *def, unsigned index)
|
|||
|
||||
struct live_defs_state {
|
||||
unsigned num_defs;
|
||||
unsigned bitset_words;
|
||||
|
||||
nir_function_impl *impl;
|
||||
nir_block *block; /* current block pointer */
|
||||
|
|
@ -54,13 +54,8 @@ static bool
|
|||
init_liveness_block(nir_block *block,
|
||||
struct live_defs_state *state)
|
||||
{
|
||||
block->live_in = reralloc(block, block->live_in, BITSET_WORD,
|
||||
state->bitset_words);
|
||||
memset(block->live_in, 0, state->bitset_words * sizeof(BITSET_WORD));
|
||||
|
||||
block->live_out = reralloc(block, block->live_out, BITSET_WORD,
|
||||
state->bitset_words);
|
||||
memset(block->live_out, 0, state->bitset_words * sizeof(BITSET_WORD));
|
||||
u_sparse_bitset_init(&block->live_in, state->num_defs, block);
|
||||
u_sparse_bitset_init(&block->live_out, state->num_defs, block);
|
||||
|
||||
nir_block_worklist_push_head(&state->worklist, block);
|
||||
|
||||
|
|
@ -98,7 +93,7 @@ set_src_live(nir_src *src, void *void_state)
|
|||
unsigned i = state->live_map[src_index(state->impl, src)];
|
||||
assert(i != ~0u);
|
||||
|
||||
BITSET_SET(state->block->live_in, i);
|
||||
u_sparse_bitset_set(&state->block->live_in, i);
|
||||
range_include(&state->defs[i], state->index);
|
||||
|
||||
return true;
|
||||
|
|
@ -108,12 +103,7 @@ static bool
|
|||
propagate_across_edge(nir_block *pred, nir_block *succ,
|
||||
struct live_defs_state *state)
|
||||
{
|
||||
BITSET_WORD progress = 0;
|
||||
for (unsigned i = 0; i < state->bitset_words; ++i) {
|
||||
progress |= succ->live_in[i] & ~pred->live_out[i];
|
||||
pred->live_out[i] |= succ->live_in[i];
|
||||
}
|
||||
return progress != 0;
|
||||
return u_sparse_bitset_merge(&pred->live_out, &succ->live_in);
|
||||
}
|
||||
|
||||
unsigned
|
||||
|
|
@ -162,7 +152,6 @@ etna_live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_ma
|
|||
* ahead and allocate live_in and live_out sets and add all of the
|
||||
* blocks to the worklist.
|
||||
*/
|
||||
state.bitset_words = BITSET_WORDS(state.num_defs);
|
||||
nir_foreach_block(block, impl) {
|
||||
init_liveness_block(block, &state);
|
||||
}
|
||||
|
|
@ -181,8 +170,7 @@ etna_live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_ma
|
|||
nir_block *block = nir_block_worklist_pop_head(&state.worklist);
|
||||
state.block = block;
|
||||
|
||||
memcpy(block->live_in, block->live_out,
|
||||
state.bitset_words * sizeof(BITSET_WORD));
|
||||
u_sparse_bitset_dup(&block->live_in, &block->live_out);
|
||||
|
||||
state.index = block_live_index[block->index + 1];
|
||||
|
||||
|
|
@ -198,7 +186,7 @@ etna_live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_ma
|
|||
* we don't expect any partial write_mask alus
|
||||
* so clearing live_in here is OK
|
||||
*/
|
||||
BITSET_CLEAR(block->live_in, state.index);
|
||||
u_sparse_bitset_clear(&block->live_in, state.index);
|
||||
}
|
||||
|
||||
/* don't set_src_live for not-emitted instructions */
|
||||
|
|
@ -247,12 +235,10 @@ etna_live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_ma
|
|||
/* apply live_in/live_out to ranges */
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
int i;
|
||||
|
||||
BITSET_FOREACH_SET(i, block->live_in, state.num_defs)
|
||||
U_SPARSE_BITSET_FOREACH_SET(&block->live_in, i)
|
||||
range_include(&state.defs[i], block_live_index[block->index]);
|
||||
|
||||
BITSET_FOREACH_SET(i, block->live_out, state.num_defs)
|
||||
U_SPARSE_BITSET_FOREACH_SET(&block->live_out, i)
|
||||
range_include(&state.defs[i], block_live_index[block->index + 1]);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@
|
|||
#include "compiler/glsl_types.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "dev/intel_debug.h"
|
||||
#include "util/sparse_bitset.h"
|
||||
|
||||
/**
|
||||
* Returns the minimum number of vec4 elements needed to pack a type.
|
||||
|
|
@ -3036,7 +3037,7 @@ brw_nir_find_complete_variable_with_location(nir_shader *shader,
|
|||
struct brw_quick_pressure_state {
|
||||
uint8_t *convergent_size;
|
||||
uint8_t *divergent_size;
|
||||
BITSET_WORD *live;
|
||||
struct u_sparse_bitset live;
|
||||
unsigned curr_convergent_size;
|
||||
unsigned curr_divergent_size;
|
||||
};
|
||||
|
|
@ -3086,8 +3087,8 @@ set_src_live(nir_src *src, void *v_state)
|
|||
if (nir_src_is_undef(*src))
|
||||
return true;
|
||||
|
||||
if (!BITSET_TEST(state->live, src->ssa->index)) {
|
||||
BITSET_SET(state->live, src->ssa->index);
|
||||
if (!u_sparse_bitset_test(&state->live, src->ssa->index)) {
|
||||
u_sparse_bitset_set(&state->live, src->ssa->index);
|
||||
|
||||
/* This value just became live, add its size */
|
||||
state->curr_convergent_size += state->convergent_size[src->ssa->index];
|
||||
|
|
@ -3101,8 +3102,8 @@ static bool
|
|||
set_def_dead(nir_def *def, void *v_state)
|
||||
{
|
||||
struct brw_quick_pressure_state *state = v_state;
|
||||
if (BITSET_TEST(state->live, def->index)) {
|
||||
BITSET_CLEAR(state->live, def->index);
|
||||
if (u_sparse_bitset_test(&state->live, def->index)) {
|
||||
u_sparse_bitset_clear(&state->live, def->index);
|
||||
|
||||
/* This value just became dead, subtract its size */
|
||||
state->curr_convergent_size -= state->convergent_size[def->index];
|
||||
|
|
@ -3121,14 +3122,12 @@ quick_pressure_estimate(nir_shader *nir,
|
|||
nir_metadata_require(impl, nir_metadata_divergence |
|
||||
nir_metadata_live_defs);
|
||||
|
||||
const unsigned bitset_words = BITSET_WORDS(impl->ssa_alloc);
|
||||
|
||||
struct brw_quick_pressure_state state = {
|
||||
.convergent_size = calloc(impl->ssa_alloc, sizeof(uint8_t)),
|
||||
.divergent_size = calloc(impl->ssa_alloc, sizeof(uint8_t)),
|
||||
.live = calloc(bitset_words, sizeof(BITSET_WORD)),
|
||||
};
|
||||
|
||||
u_sparse_bitset_init(&state.live, impl->ssa_alloc, NULL);
|
||||
unsigned max_convergent_size = 0, max_divergent_size = 0;
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
|
|
@ -3140,16 +3139,13 @@ quick_pressure_estimate(nir_shader *nir,
|
|||
state.curr_divergent_size = 0;
|
||||
|
||||
/* Start with sizes for anything live-out from the block */
|
||||
|
||||
unsigned i;
|
||||
BITSET_FOREACH_SET(i, block->live_out, impl->ssa_alloc) {
|
||||
U_SPARSE_BITSET_FOREACH_SET(&block->live_out, i) {
|
||||
state.curr_convergent_size += state.convergent_size[i];
|
||||
state.curr_divergent_size += state.divergent_size[i];
|
||||
}
|
||||
|
||||
/* Walk backwards, add source sizes on first sight, subtract on def */
|
||||
for (i = 0; i < bitset_words; i++)
|
||||
state.live[i] = block->live_out[i];
|
||||
u_sparse_bitset_dup(&state.live, &block->live_out);
|
||||
|
||||
nir_foreach_instr_reverse(instr, block) {
|
||||
if (instr->type == nir_instr_type_phi)
|
||||
|
|
@ -3170,7 +3166,7 @@ quick_pressure_estimate(nir_shader *nir,
|
|||
|
||||
free(state.convergent_size);
|
||||
free(state.divergent_size);
|
||||
free(state.live);
|
||||
u_sparse_bitset_free(&state.live);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue