mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-27 12:40:09 +01:00
nir/opt_offsets: don't check NUW for unswizzled buffer_amd
This isn't necessary. fossil-db (navi21): Totals from 13 (0.02% of 79377) affected shaders: Instrs: 18070 -> 18042 (-0.15%); split: -0.17%, +0.01% CodeSize: 98336 -> 98012 (-0.33%) Latency: 72735 -> 72992 (+0.35%); split: -0.02%, +0.38% InvThroughput: 13157 -> 13105 (-0.40%) VClause: 334 -> 324 (-2.99%) SClause: 563 -> 564 (+0.18%) Copies: 1194 -> 1197 (+0.25%) VALU: 12330 -> 12297 (-0.27%) fossil-db (polaris10): Totals from 10 (0.02% of 61794) affected shaders: Instrs: 4543 -> 4441 (-2.25%) CodeSize: 30196 -> 29388 (-2.68%) Latency: 64290 -> 64272 (-0.03%); split: -0.05%, +0.02% InvThroughput: 20371 -> 20362 (-0.04%); split: -0.08%, +0.04% VClause: 195 -> 135 (-30.77%) Copies: 97 -> 100 (+3.09%) PreSGPRs: 178 -> 177 (-0.56%) VALU: 1765 -> 1666 (-5.61%) VMEM: 2448 -> 2445 (-0.12%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29242>
This commit is contained in:
parent
539f9b4ba6
commit
cfa217ee04
1 changed files with 21 additions and 15 deletions
|
|
@ -35,7 +35,8 @@ typedef struct
|
|||
} opt_offsets_state;
|
||||
|
||||
static nir_scalar
|
||||
try_extract_const_addition(nir_builder *b, nir_scalar val, opt_offsets_state *state, unsigned *out_const, uint32_t max)
|
||||
try_extract_const_addition(nir_builder *b, nir_scalar val, opt_offsets_state *state, unsigned *out_const,
|
||||
uint32_t max, bool need_nuw)
|
||||
{
|
||||
val = nir_scalar_chase_movs(val);
|
||||
|
||||
|
|
@ -56,7 +57,8 @@ try_extract_const_addition(nir_builder *b, nir_scalar val, opt_offsets_state *st
|
|||
* Ignored for ints-as-floats (lower_bitops is a proxy for that), where
|
||||
* unsigned wrapping doesn't make sense.
|
||||
*/
|
||||
if (!state->options->allow_offset_wrap && !alu->no_unsigned_wrap && !b->shader->options->lower_bitops) {
|
||||
if (!state->options->allow_offset_wrap && need_nuw && !alu->no_unsigned_wrap &&
|
||||
!b->shader->options->lower_bitops) {
|
||||
if (!state->range_ht) {
|
||||
/* Cache for nir_unsigned_upper_bound */
|
||||
state->range_ht = _mesa_pointer_hash_table_create(NULL);
|
||||
|
|
@ -79,14 +81,14 @@ try_extract_const_addition(nir_builder *b, nir_scalar val, opt_offsets_state *st
|
|||
uint32_t offset = nir_scalar_as_uint(src[i]);
|
||||
if (offset + *out_const <= max) {
|
||||
*out_const += offset;
|
||||
return try_extract_const_addition(b, src[1 - i], state, out_const, max);
|
||||
return try_extract_const_addition(b, src[1 - i], state, out_const, max, need_nuw);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t orig_offset = *out_const;
|
||||
src[0] = try_extract_const_addition(b, src[0], state, out_const, max);
|
||||
src[1] = try_extract_const_addition(b, src[1], state, out_const, max);
|
||||
src[0] = try_extract_const_addition(b, src[0], state, out_const, max, need_nuw);
|
||||
src[1] = try_extract_const_addition(b, src[1], state, out_const, max, need_nuw);
|
||||
if (*out_const == orig_offset)
|
||||
return val;
|
||||
|
||||
|
|
@ -102,7 +104,8 @@ try_fold_load_store(nir_builder *b,
|
|||
nir_intrinsic_instr *intrin,
|
||||
opt_offsets_state *state,
|
||||
unsigned offset_src_idx,
|
||||
uint32_t max)
|
||||
uint32_t max,
|
||||
bool need_nuw)
|
||||
{
|
||||
/* Assume that BASE is the constant offset of a load/store.
|
||||
* Try to constant-fold additions to the offset source
|
||||
|
|
@ -122,7 +125,7 @@ try_fold_load_store(nir_builder *b,
|
|||
if (!nir_src_is_const(*off_src)) {
|
||||
uint32_t add_offset = 0;
|
||||
nir_scalar val = { .def = off_src->ssa, .comp = 0 };
|
||||
val = try_extract_const_addition(b, val, state, &add_offset, max - off_const);
|
||||
val = try_extract_const_addition(b, val, state, &add_offset, max - off_const, need_nuw);
|
||||
if (add_offset == 0)
|
||||
return false;
|
||||
off_const += add_offset;
|
||||
|
|
@ -198,29 +201,32 @@ process_instr(nir_builder *b, nir_instr *instr, void *s)
|
|||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_uniform:
|
||||
case nir_intrinsic_load_const_ir3:
|
||||
return try_fold_load_store(b, intrin, state, 0, get_max(state, intrin, state->options->uniform_max));
|
||||
return try_fold_load_store(b, intrin, state, 0, get_max(state, intrin, state->options->uniform_max), true);
|
||||
case nir_intrinsic_load_ubo_vec4:
|
||||
return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->ubo_vec4_max));
|
||||
return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->ubo_vec4_max), true);
|
||||
case nir_intrinsic_shared_atomic:
|
||||
case nir_intrinsic_shared_atomic_swap:
|
||||
return try_fold_load_store(b, intrin, state, 0, get_max(state, intrin, state->options->shared_atomic_max));
|
||||
return try_fold_load_store(b, intrin, state, 0, get_max(state, intrin, state->options->shared_atomic_max), true);
|
||||
case nir_intrinsic_load_shared:
|
||||
case nir_intrinsic_load_shared_ir3:
|
||||
return try_fold_load_store(b, intrin, state, 0, get_max(state, intrin, state->options->shared_max));
|
||||
return try_fold_load_store(b, intrin, state, 0, get_max(state, intrin, state->options->shared_max), true);
|
||||
case nir_intrinsic_store_shared:
|
||||
case nir_intrinsic_store_shared_ir3:
|
||||
return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->shared_max));
|
||||
return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->shared_max), true);
|
||||
case nir_intrinsic_load_shared2_amd:
|
||||
return try_fold_shared2(b, intrin, state, 0);
|
||||
case nir_intrinsic_store_shared2_amd:
|
||||
return try_fold_shared2(b, intrin, state, 1);
|
||||
case nir_intrinsic_load_buffer_amd:
|
||||
return try_fold_load_store(b, intrin, state, 1, state->options->buffer_max);
|
||||
return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->buffer_max),
|
||||
nir_intrinsic_access(intrin) & ACCESS_IS_SWIZZLED_AMD);
|
||||
case nir_intrinsic_store_buffer_amd:
|
||||
return try_fold_load_store(b, intrin, state, 2, get_max(state, intrin, state->options->buffer_max),
|
||||
nir_intrinsic_access(intrin) & ACCESS_IS_SWIZZLED_AMD);
|
||||
case nir_intrinsic_load_ssbo_ir3:
|
||||
return try_fold_load_store(b, intrin, state, 2, get_max(state, intrin, state->options->buffer_max));
|
||||
return try_fold_load_store(b, intrin, state, 2, get_max(state, intrin, state->options->buffer_max), true);
|
||||
case nir_intrinsic_store_ssbo_ir3:
|
||||
return try_fold_load_store(b, intrin, state, 3, get_max(state, intrin, state->options->buffer_max));
|
||||
return try_fold_load_store(b, intrin, state, 3, get_max(state, intrin, state->options->buffer_max), true);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue