nir/opt_copy_prop_vars: skip cloning of copies arrays until needed

Most of the variables in the hash table will never actually be looked up
for any given block so cloning every possible value just creates a bunch
of unrequired memcpy calls.

Here we change the code to only clone the copies array once it is
actually looked up for the first time.

The Blender shader compile time in issue #9326 improves as folows:
151.09 seconds -> 21.11 seconds

The CTS test dEQP-GLES31.functional.ubo.random.all_per_block_buffers.20
improves as follows:

1.67 seconds -> 0.92 seconds

Reviewed-by: Emma Anholt <emma@anholt.net>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24227>
This commit is contained in:
Timothy Arceri 2023-07-13 21:12:32 +10:00 committed by Marge Bot
parent 869b5a562e
commit d56e739417

View file

@ -66,6 +66,9 @@ struct copies {
struct copies_dynarray {
struct list_head node;
struct util_dynarray arr;
/* The copies structure this dynarray was cloned or created for */
struct copies *owner;
};
struct vars_written {
@ -310,6 +313,7 @@ gather_vars_written(struct copy_prop_var_state *state,
}
}
/* Creates a fresh dynarray or grabs one from the pool for reuse */
static struct copies_dynarray *
get_copies_dynarray(struct copy_prop_var_state *state)
{
@ -327,17 +331,61 @@ get_copies_dynarray(struct copy_prop_var_state *state)
return cp_arr;
}
/* Checks if the pointer leads to a cloned copy of the array for this hash
* table or if the pointer was inherited from when the hash table was cloned.
*/
static bool
copies_owns_ht_entry(struct copies *copies,
struct hash_entry *ht_entry)
{
assert(copies && ht_entry && ht_entry->data);
struct copies_dynarray *copies_array = ht_entry->data;
return copies_array->owner == copies;
}
static void
clone_copies_dynarray_from_src(struct copies_dynarray *dst,
struct copies_dynarray *src)
{
util_dynarray_append_dynarray(&dst->arr, &src->arr);
}
/* Gets copies array from the hash table entry or clones the source array if
* the hash entry contains NULL. The values are not cloned when the hash table
* is created because its expensive to clone everything and most value will
* never actually be accessed.
*/
static struct copies_dynarray *
get_copies_array_from_ht_entry(struct copy_prop_var_state *state,
struct copies *copies,
struct hash_entry *ht_entry)
{
struct copies_dynarray *copies_array;
if (copies_owns_ht_entry(copies, ht_entry)) {
/* The array already exists so just return it */
copies_array = (struct copies_dynarray *) ht_entry->data;
} else {
/* Clone the array and set the data value for future access */
copies_array = get_copies_dynarray(state);
copies_array->owner = copies;
clone_copies_dynarray_from_src(copies_array, ht_entry->data);
ht_entry->data = copies_array;
}
return copies_array;
}
static struct copies_dynarray *
copies_array_for_var(struct copy_prop_var_state *state,
struct hash_table *copies, nir_variable *var)
struct copies *copies, nir_variable *var)
{
struct hash_entry *entry = _mesa_hash_table_search(copies, var);
struct hash_entry *entry = _mesa_hash_table_search(copies->ht, var);
if (entry != NULL)
return (struct copies_dynarray *) entry->data;
return get_copies_array_from_ht_entry(state, copies, entry);
struct copies_dynarray *copies_array = get_copies_dynarray(state);
_mesa_hash_table_insert(copies, var, copies_array);
copies_array->owner = copies;
_mesa_hash_table_insert(copies->ht, var, copies_array);
return copies_array;
}
@ -353,7 +401,7 @@ copies_array_for_deref(struct copy_prop_var_state *state,
copies_array = &copies->arr;
} else {
struct copies_dynarray *cpda =
copies_array_for_var(state, copies->ht, deref->_path->path[0]->var);
copies_array_for_var(state, copies, deref->_path->path[0]->var);
copies_array = &cpda->arr;
}
@ -498,21 +546,22 @@ lookup_entry_and_kill_aliases(struct copy_prop_var_state *state,
deref->_path->path[0]->var->data.mode == nir_var_mem_shared) {
hash_table_foreach(copies->ht, ht_entry) {
struct copies_dynarray *cp_arr = ht_entry->data;
struct util_dynarray *copies_array = &cp_arr->arr;
nir_variable *var = (nir_variable *) ht_entry->key;
if (deref->_path->path[0]->deref_type == nir_deref_type_var &&
var->data.mode != deref->_path->path[0]->var->data.mode)
continue;
lookup_entry_and_kill_aliases_copy_array(state, copies_array, deref,
write_mask, remove_entry,
&entry, &entry_removed);
struct copies_dynarray *copies_array =
get_copies_array_from_ht_entry(state, copies, ht_entry);
if (copies_array->size == 0) {
lookup_entry_and_kill_aliases_copy_array(state, &copies_array->arr,
deref, write_mask,
remove_entry, &entry,
&entry_removed);
if (copies_array->arr.size == 0) {
_mesa_hash_table_remove(copies->ht, ht_entry);
list_add(&cp_arr->node, &state->unused_copy_dynarray_list);
list_add(&copies_array->node, &state->unused_copy_dynarray_list);
}
}
@ -521,7 +570,7 @@ lookup_entry_and_kill_aliases(struct copy_prop_var_state *state,
&entry, &entry_removed);
} else {
struct copies_dynarray *cpda =
copies_array_for_var(state, copies->ht, deref->_path->path[0]->var);
copies_array_for_var(state, copies, deref->_path->path[0]->var);
struct util_dynarray *copies_array = &cpda->arr;
lookup_entry_and_kill_aliases_copy_array(state, copies_array, deref,
@ -576,13 +625,14 @@ apply_barrier_for_modes_to_dynarr(struct util_dynarray *copies_array,
}
static void
apply_barrier_for_modes(struct copies *copies,
nir_variable_mode modes)
apply_barrier_for_modes(struct copy_prop_var_state *state,
struct copies *copies, nir_variable_mode modes)
{
hash_table_foreach(copies->ht, ht_entry) {
struct util_dynarray *copies_array =
&((struct copies_dynarray *) ht_entry->data)->arr;
apply_barrier_for_modes_to_dynarr(copies_array, modes);
struct copies_dynarray *copies_array =
get_copies_array_from_ht_entry(state, copies, ht_entry);
apply_barrier_for_modes_to_dynarr(&copies_array->arr, modes);
}
apply_barrier_for_modes_to_dynarr(&copies->arr, modes);
@ -883,16 +933,17 @@ invalidate_copies_for_cf_node(struct copy_prop_var_state *state,
struct vars_written *written = ht_entry->data;
if (written->modes) {
hash_table_foreach(copies->ht, ht_entry) {
struct copies_dynarray *cp_arr = ht_entry->data;
struct util_dynarray *copies_array = &cp_arr->arr;
util_dynarray_foreach_reverse(copies_array, struct copy_entry, entry) {
struct copies_dynarray *copies_array =
get_copies_array_from_ht_entry(state, copies, ht_entry);
util_dynarray_foreach_reverse(&copies_array->arr, struct copy_entry, entry) {
if (nir_deref_mode_may_be(entry->dst.instr, written->modes))
copy_entry_remove(copies_array, entry, NULL);
copy_entry_remove(&copies_array->arr, entry, NULL);
}
if (copies_array->size == 0) {
if (copies_array->arr.size == 0) {
_mesa_hash_table_remove(copies->ht, ht_entry);
list_add(&cp_arr->node, &state->unused_copy_dynarray_list);
list_add(&copies_array->node, &state->unused_copy_dynarray_list);
}
}
@ -991,12 +1042,12 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
if (instr->type == nir_instr_type_call) {
if (debug) dump_instr(instr);
apply_barrier_for_modes(copies, nir_var_shader_out |
nir_var_shader_temp |
nir_var_function_temp |
nir_var_mem_ssbo |
nir_var_mem_shared |
nir_var_mem_global);
apply_barrier_for_modes(state, copies, nir_var_shader_out |
nir_var_shader_temp |
nir_var_function_temp |
nir_var_mem_ssbo |
nir_var_mem_shared |
nir_var_mem_global);
if (debug) dump_copy_entries(copies);
continue;
}
@ -1010,28 +1061,28 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
if (debug) dump_instr(instr);
if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_ACQUIRE)
apply_barrier_for_modes(copies, nir_intrinsic_memory_modes(intrin));
apply_barrier_for_modes(state, copies, nir_intrinsic_memory_modes(intrin));
break;
case nir_intrinsic_emit_vertex:
case nir_intrinsic_emit_vertex_with_counter:
if (debug) dump_instr(instr);
apply_barrier_for_modes(copies, nir_var_shader_out);
apply_barrier_for_modes(state, copies, nir_var_shader_out);
break;
case nir_intrinsic_report_ray_intersection:
apply_barrier_for_modes(copies, nir_var_mem_ssbo |
nir_var_mem_global |
nir_var_shader_call_data |
nir_var_ray_hit_attrib);
apply_barrier_for_modes(state, copies, nir_var_mem_ssbo |
nir_var_mem_global |
nir_var_shader_call_data |
nir_var_ray_hit_attrib);
break;
case nir_intrinsic_ignore_ray_intersection:
case nir_intrinsic_terminate_ray:
apply_barrier_for_modes(copies, nir_var_mem_ssbo |
nir_var_mem_global |
nir_var_shader_call_data);
apply_barrier_for_modes(state, copies, nir_var_mem_ssbo |
nir_var_mem_global |
nir_var_shader_call_data);
break;
case nir_intrinsic_load_deref: {
@ -1321,18 +1372,11 @@ clone_copies(struct copy_prop_var_state *state, struct copies *clones,
{
/* Simply clone the entire hash table. This is much faster than trying to
* rebuild it and is needed to avoid slow compilation of very large shaders.
* If needed we will clone the data later if it is ever looked up.
*/
assert(clones->ht == NULL);
clones->ht = _mesa_hash_table_clone(copies->ht, state->mem_ctx);
hash_table_foreach(clones->ht, entry) {
struct copies_dynarray *cloned_copies = get_copies_dynarray(state);
util_dynarray_append_dynarray(&cloned_copies->arr,
&((struct copies_dynarray *) entry->data)->arr);
entry->data = cloned_copies;
}
util_dynarray_clone(&clones->arr, state->mem_ctx, &copies->arr);
}
@ -1362,9 +1406,11 @@ clear_copies_structure(struct copy_prop_var_state *state,
struct copies *copies)
{
hash_table_foreach(copies->ht, entry) {
struct copies_dynarray *cp_arr =
(struct copies_dynarray *) entry->data;
list_add(&cp_arr->node, &state->unused_copy_dynarray_list);
if (copies_owns_ht_entry(copies, entry)) {
struct copies_dynarray *cp_arr =
(struct copies_dynarray *) entry->data;
list_add(&cp_arr->node, &state->unused_copy_dynarray_list);
}
}
ralloc_free(copies->ht);
copies->ht = NULL;