mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-16 06:10:45 +01:00
nir/lower_non_uniform_access: Group accesses using the same resource
Avoids emitting the waterfall loop for every access if they use the same
resource:
waterfall_loop {
access
}
waterfall_loop {
access
}
->
waterfall_loop {
access
access
}
Totals from 276 (0.33% of 84770) affected shaders:
MaxWaves: 3360 -> 3356 (-0.12%)
Instrs: 3759927 -> 3730650 (-0.78%)
CodeSize: 21125784 -> 20899580 (-1.07%)
VGPRs: 23096 -> 23104 (+0.03%)
Latency: 35593716 -> 35315455 (-0.78%); split: -0.78%, +0.00%
InvThroughput: 7353071 -> 7297309 (-0.76%); split: -0.76%, +0.00%
VClause: 120983 -> 118579 (-1.99%)
SClause: 113073 -> 110671 (-2.12%)
Copies: 358272 -> 348686 (-2.68%)
Branches: 166706 -> 159500 (-4.32%)
PreSGPRs: 18598 -> 18596 (-0.01%)
PreVGPRs: 21417 -> 21424 (+0.03%); split: -0.01%, +0.04%
VALU: 2354862 -> 2350053 (-0.20%)
SALU: 582291 -> 567638 (-2.52%)
SMEM: 139875 -> 137473 (-1.72%)
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30509>
This commit is contained in:
parent
c5e40a60f8
commit
4d09cd7fa5
2 changed files with 208 additions and 56 deletions
|
|
@ -6475,6 +6475,7 @@ enum nir_lower_non_uniform_access_type {
|
|||
nir_lower_non_uniform_texture_access = (1 << 2),
|
||||
nir_lower_non_uniform_image_access = (1 << 3),
|
||||
nir_lower_non_uniform_get_ssbo_size = (1 << 4),
|
||||
nir_lower_non_uniform_access_type_count = 5,
|
||||
};
|
||||
|
||||
/* Given the nir_src used for the resource, return the channels which might be non-uniform. */
|
||||
|
|
|
|||
|
|
@ -24,18 +24,50 @@
|
|||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
#include "util/hash_table.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
struct nu_handle {
|
||||
nir_src *src;
|
||||
nir_def *handle;
|
||||
nir_deref_instr *parent_deref;
|
||||
nir_def *first;
|
||||
};
|
||||
|
||||
struct nu_handle_key {
|
||||
uint32_t block_index;
|
||||
uint32_t access_group;
|
||||
uint32_t handle_count;
|
||||
/* We can have at most one texture and one sampler handle */
|
||||
uint32_t handle_indixes[2];
|
||||
uint32_t access_type;
|
||||
/* Optional instruction index for emitting separate loops for non-reorderable instructions. */
|
||||
uint32_t instr_index;
|
||||
};
|
||||
|
||||
DERIVE_HASH_TABLE(nu_handle_key)
|
||||
|
||||
struct nu_handle_data {
|
||||
struct nu_handle handles[2];
|
||||
struct util_dynarray srcs;
|
||||
};
|
||||
|
||||
struct nu_handle_src {
|
||||
nir_src *srcs[2];
|
||||
};
|
||||
|
||||
struct nu_access_group_state {
|
||||
uint32_t last_first_use;
|
||||
uint32_t index;
|
||||
};
|
||||
|
||||
struct nu_state {
|
||||
struct hash_table *accesses;
|
||||
struct nu_access_group_state access_groups[nir_lower_non_uniform_access_type_count];
|
||||
};
|
||||
|
||||
static bool
|
||||
nu_handle_init(struct nu_handle *h, nir_src *src)
|
||||
{
|
||||
h->src = src;
|
||||
|
||||
nir_deref_instr *deref = nir_src_as_deref(*src);
|
||||
if (deref) {
|
||||
if (deref->deref_type == nir_deref_type_var)
|
||||
|
|
@ -65,11 +97,11 @@ nu_handle_init(struct nu_handle *h, nir_src *src)
|
|||
|
||||
static nir_def *
|
||||
nu_handle_compare(const nir_lower_non_uniform_access_options *options,
|
||||
nir_builder *b, struct nu_handle *handle)
|
||||
nir_builder *b, struct nu_handle *handle, nir_src *src)
|
||||
{
|
||||
nir_component_mask_t channel_mask = ~0;
|
||||
if (options->callback)
|
||||
channel_mask = options->callback(handle->src, options->callback_data);
|
||||
channel_mask = options->callback(src, options->callback_data);
|
||||
channel_mask &= nir_component_mask(handle->handle->num_components);
|
||||
|
||||
nir_def *channels[NIR_MAX_VEC_COMPONENTS];
|
||||
|
|
@ -89,21 +121,90 @@ nu_handle_compare(const nir_lower_non_uniform_access_options *options,
|
|||
}
|
||||
|
||||
static void
|
||||
nu_handle_rewrite(nir_builder *b, struct nu_handle *h)
|
||||
nu_handle_rewrite(nir_builder *b, struct nu_handle *h, nir_src *src)
|
||||
{
|
||||
if (h->parent_deref) {
|
||||
/* Replicate the deref. */
|
||||
nir_deref_instr *deref =
|
||||
nir_build_deref_array(b, h->parent_deref, h->first);
|
||||
*(h->src) = nir_src_for_ssa(&deref->def);
|
||||
nir_src_rewrite(src, &deref->def);
|
||||
} else {
|
||||
*(h->src) = nir_src_for_ssa(h->first);
|
||||
nir_src_rewrite(src, h->first);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_non_uniform_tex_access(const nir_lower_non_uniform_access_options *options,
|
||||
nir_builder *b, nir_tex_instr *tex)
|
||||
get_first_use(nir_def *def, void *state)
|
||||
{
|
||||
uint32_t *last_first_use = state;
|
||||
nir_foreach_use(use, def)
|
||||
*last_first_use = MIN2(*last_first_use, nir_src_parent_instr(use)->index);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
add_non_uniform_instr(struct nu_state *state, struct nu_handle *handles,
|
||||
nir_src **srcs, uint32_t handle_count, bool group,
|
||||
enum nir_lower_non_uniform_access_type access_type)
|
||||
{
|
||||
nir_instr *instr = nir_src_parent_instr(srcs[0]);
|
||||
|
||||
struct nu_access_group_state *access_group = &state->access_groups[ffs(access_type) - 1];
|
||||
|
||||
if (group) {
|
||||
uint32_t first_use = UINT32_MAX;
|
||||
nir_foreach_def(instr, get_first_use, &first_use);
|
||||
|
||||
/* Avoid moving accesses below their first use. */
|
||||
if (instr->index >= access_group->last_first_use) {
|
||||
access_group->last_first_use = first_use;
|
||||
access_group->index++;
|
||||
} else {
|
||||
/* Adjust the access group scope so that every access dominates its first use. */
|
||||
access_group->last_first_use = MIN2(access_group->last_first_use, first_use);
|
||||
}
|
||||
}
|
||||
|
||||
struct nu_handle_key key;
|
||||
memset(&key, 0, sizeof(key));
|
||||
key.block_index = instr->block->index;
|
||||
key.access_group = access_group->index;
|
||||
key.access_type = access_type;
|
||||
key.handle_count = handle_count;
|
||||
|
||||
if (!group)
|
||||
key.instr_index = instr->index;
|
||||
|
||||
for (uint32_t i = 0; i < handle_count; i++)
|
||||
key.handle_indixes[i] = handles[i].handle->parent_instr->index;
|
||||
|
||||
struct hash_entry *entry = _mesa_hash_table_search(state->accesses, &key);
|
||||
if (!entry) {
|
||||
struct nu_handle_data *data = ralloc(state->accesses, struct nu_handle_data);
|
||||
|
||||
for (uint32_t i = 0; i < handle_count; i++)
|
||||
data->handles[i] = handles[i];
|
||||
|
||||
util_dynarray_init(&data->srcs, state->accesses);
|
||||
|
||||
struct nu_handle_key *key_copy = ralloc(state->accesses, struct nu_handle_key);
|
||||
memcpy(key_copy, &key, sizeof(key));
|
||||
|
||||
entry = _mesa_hash_table_insert(state->accesses, key_copy, data);
|
||||
}
|
||||
|
||||
struct nu_handle_data *data = entry->data;
|
||||
|
||||
struct nu_handle_src src = { 0 };
|
||||
for (uint32_t i = 0; i < handle_count; i++)
|
||||
src.srcs[i] = srcs[i];
|
||||
|
||||
util_dynarray_append(&data->srcs, struct nu_handle_src, src);
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_non_uniform_tex_access(struct nu_state *state, nir_tex_instr *tex)
|
||||
{
|
||||
if (!tex->texture_non_uniform && !tex->sampler_non_uniform)
|
||||
return false;
|
||||
|
|
@ -111,6 +212,7 @@ lower_non_uniform_tex_access(const nir_lower_non_uniform_access_options *options
|
|||
/* We can have at most one texture and one sampler handle */
|
||||
unsigned num_handles = 0;
|
||||
struct nu_handle handles[2];
|
||||
nir_src *srcs[2];
|
||||
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
||||
switch (tex->src[i].src_type) {
|
||||
case nir_tex_src_texture_offset:
|
||||
|
|
@ -131,7 +233,8 @@ lower_non_uniform_tex_access(const nir_lower_non_uniform_access_options *options
|
|||
continue;
|
||||
}
|
||||
|
||||
assert(num_handles <= ARRAY_SIZE(handles));
|
||||
assert(num_handles < ARRAY_SIZE(handles));
|
||||
srcs[num_handles] = &tex->src[i].src;
|
||||
if (nu_handle_init(&handles[num_handles], &tex->src[i].src))
|
||||
num_handles++;
|
||||
}
|
||||
|
|
@ -143,72 +246,63 @@ lower_non_uniform_tex_access(const nir_lower_non_uniform_access_options *options
|
|||
return false;
|
||||
}
|
||||
|
||||
b->cursor = nir_instr_remove(&tex->instr);
|
||||
|
||||
nir_push_loop(b);
|
||||
|
||||
nir_def *all_equal_first = nir_imm_true(b);
|
||||
for (unsigned i = 0; i < num_handles; i++) {
|
||||
if (i && handles[i].handle == handles[0].handle) {
|
||||
handles[i].first = handles[0].first;
|
||||
continue;
|
||||
}
|
||||
|
||||
nir_def *equal_first = nu_handle_compare(options, b, &handles[i]);
|
||||
all_equal_first = nir_iand(b, all_equal_first, equal_first);
|
||||
}
|
||||
|
||||
nir_push_if(b, all_equal_first);
|
||||
|
||||
for (unsigned i = 0; i < num_handles; i++)
|
||||
nu_handle_rewrite(b, &handles[i]);
|
||||
|
||||
nir_builder_instr_insert(b, &tex->instr);
|
||||
nir_jump(b, nir_jump_break);
|
||||
|
||||
tex->texture_non_uniform = false;
|
||||
tex->sampler_non_uniform = false;
|
||||
|
||||
add_non_uniform_instr(state, handles, srcs, num_handles, true,
|
||||
nir_lower_non_uniform_texture_access);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_non_uniform_access_intrin(const nir_lower_non_uniform_access_options *options,
|
||||
nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
unsigned handle_src)
|
||||
lower_non_uniform_access_intrin(struct nu_state *state, nir_intrinsic_instr *intrin,
|
||||
unsigned handle_src, enum nir_lower_non_uniform_access_type access_type)
|
||||
{
|
||||
if (!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM))
|
||||
return false;
|
||||
|
||||
nir_src *src = &intrin->src[handle_src];
|
||||
|
||||
struct nu_handle handle;
|
||||
if (!nu_handle_init(&handle, &intrin->src[handle_src])) {
|
||||
if (!nu_handle_init(&handle, src)) {
|
||||
nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) & ~ACCESS_NON_UNIFORM);
|
||||
return false;
|
||||
}
|
||||
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
nir_push_loop(b);
|
||||
|
||||
nir_push_if(b, nu_handle_compare(options, b, &handle));
|
||||
|
||||
nu_handle_rewrite(b, &handle);
|
||||
|
||||
nir_builder_instr_insert(b, &intrin->instr);
|
||||
nir_jump(b, nir_jump_break);
|
||||
|
||||
nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) & ~ACCESS_NON_UNIFORM);
|
||||
|
||||
add_non_uniform_instr(state, &handle, &src, 1, nir_intrinsic_can_reorder(intrin),
|
||||
access_type);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
handle_barrier(struct nu_state *state, bool affects_derivatives)
|
||||
{
|
||||
enum nir_lower_non_uniform_access_type access_type =
|
||||
nir_lower_non_uniform_ssbo_access | nir_lower_non_uniform_image_access;
|
||||
|
||||
if (affects_derivatives)
|
||||
access_type |= nir_lower_non_uniform_texture_access;
|
||||
|
||||
u_foreach_bit(i, access_type) {
|
||||
state->access_groups[i].last_first_use = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
nir_lower_non_uniform_access_impl(nir_function_impl *impl,
|
||||
const nir_lower_non_uniform_access_options *options)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
nir_builder b = nir_builder_create(impl);
|
||||
struct nu_state state = {
|
||||
.accesses = nu_handle_key_table_create(NULL),
|
||||
};
|
||||
|
||||
nir_metadata_require(impl, nir_metadata_instr_index | nir_metadata_block_index);
|
||||
|
||||
nir_foreach_block_safe(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
|
|
@ -216,7 +310,7 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl,
|
|||
case nir_instr_type_tex: {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
if ((options->types & nir_lower_non_uniform_texture_access) &&
|
||||
lower_non_uniform_tex_access(options, &b, tex))
|
||||
lower_non_uniform_tex_access(&state, tex))
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -224,9 +318,18 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl,
|
|||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_terminate_if:
|
||||
case nir_intrinsic_terminate:
|
||||
case nir_intrinsic_demote_if:
|
||||
case nir_intrinsic_demote:
|
||||
case nir_intrinsic_barrier:
|
||||
handle_barrier(&state, intrin->intrinsic == nir_intrinsic_terminate_if ||
|
||||
intrin->intrinsic == nir_intrinsic_terminate);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ubo:
|
||||
if ((options->types & nir_lower_non_uniform_ubo_access) &&
|
||||
lower_non_uniform_access_intrin(options, &b, intrin, 0))
|
||||
lower_non_uniform_access_intrin(&state, intrin, 0, nir_lower_non_uniform_ubo_access))
|
||||
progress = true;
|
||||
break;
|
||||
|
||||
|
|
@ -234,20 +337,20 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl,
|
|||
case nir_intrinsic_ssbo_atomic:
|
||||
case nir_intrinsic_ssbo_atomic_swap:
|
||||
if ((options->types & nir_lower_non_uniform_ssbo_access) &&
|
||||
lower_non_uniform_access_intrin(options, &b, intrin, 0))
|
||||
lower_non_uniform_access_intrin(&state, intrin, 0, nir_lower_non_uniform_ssbo_access))
|
||||
progress = true;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_store_ssbo:
|
||||
/* SSBO Stores put the index in the second source */
|
||||
if ((options->types & nir_lower_non_uniform_ssbo_access) &&
|
||||
lower_non_uniform_access_intrin(options, &b, intrin, 1))
|
||||
lower_non_uniform_access_intrin(&state, intrin, 1, nir_lower_non_uniform_ssbo_access))
|
||||
progress = true;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_get_ssbo_size:
|
||||
if ((options->types & nir_lower_non_uniform_get_ssbo_size) &&
|
||||
lower_non_uniform_access_intrin(options, &b, intrin, 0))
|
||||
lower_non_uniform_access_intrin(&state, intrin, 0, nir_lower_non_uniform_get_ssbo_size))
|
||||
progress = true;
|
||||
break;
|
||||
|
||||
|
|
@ -282,7 +385,7 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl,
|
|||
case nir_intrinsic_image_deref_samples_identical:
|
||||
case nir_intrinsic_image_deref_fragment_mask_load_amd:
|
||||
if ((options->types & nir_lower_non_uniform_image_access) &&
|
||||
lower_non_uniform_access_intrin(options, &b, intrin, 0))
|
||||
lower_non_uniform_access_intrin(&state, intrin, 0, nir_lower_non_uniform_image_access))
|
||||
progress = true;
|
||||
break;
|
||||
|
||||
|
|
@ -293,6 +396,10 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl,
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_instr_type_call:
|
||||
handle_barrier(&state, true);
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Nothing to do */
|
||||
break;
|
||||
|
|
@ -300,6 +407,50 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl,
|
|||
}
|
||||
}
|
||||
|
||||
nir_builder b = nir_builder_create(impl);
|
||||
|
||||
hash_table_foreach(state.accesses, entry) {
|
||||
const struct nu_handle_key *key = entry->key;
|
||||
struct nu_handle_data data = *(struct nu_handle_data *)entry->data;
|
||||
|
||||
nir_src *first_src = util_dynarray_top_ptr(&data.srcs, struct nu_handle_src)->srcs[0];
|
||||
b.cursor = nir_after_instr(nir_src_parent_instr(first_src));
|
||||
|
||||
nir_push_loop(&b);
|
||||
|
||||
nir_def *all_equal_first = NULL;
|
||||
for (uint32_t i = 0; i < key->handle_count; i++) {
|
||||
if (i && data.handles[i].handle == data.handles[0].handle) {
|
||||
data.handles[i].first = data.handles[0].first;
|
||||
continue;
|
||||
}
|
||||
|
||||
nir_def *equal_first = nu_handle_compare(options, &b, &data.handles[i], first_src);
|
||||
if (i == 0)
|
||||
all_equal_first = equal_first;
|
||||
else
|
||||
all_equal_first = nir_iand(&b, all_equal_first, equal_first);
|
||||
}
|
||||
|
||||
nir_push_if(&b, all_equal_first);
|
||||
|
||||
util_dynarray_foreach(&data.srcs, struct nu_handle_src, src) {
|
||||
for (uint32_t i = 0; i < key->handle_count; i++)
|
||||
nu_handle_rewrite(&b, &data.handles[i], src->srcs[i]);
|
||||
|
||||
nir_instr *instr = nir_src_parent_instr(src->srcs[0]);
|
||||
nir_instr_remove(instr);
|
||||
nir_builder_instr_insert(&b, instr);
|
||||
}
|
||||
|
||||
nir_jump(&b, nir_jump_break);
|
||||
|
||||
nir_pop_if(&b, NULL);
|
||||
nir_pop_loop(&b, NULL);
|
||||
}
|
||||
|
||||
_mesa_hash_table_destroy(state.accesses, NULL);
|
||||
|
||||
if (progress)
|
||||
nir_metadata_preserve(impl, nir_metadata_none);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue