mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 02:48:06 +02:00
ac/nir_lower_global_access: perform range analysis if useful
fossil-db (navi31):
Totals from 1197 (0.59% of 202426) affected shaders:
Instrs: 2117283 -> 2108380 (-0.42%); split: -0.47%, +0.05%
CodeSize: 11183776 -> 11140060 (-0.39%); split: -0.42%, +0.03%
Latency: 13568247 -> 13648044 (+0.59%); split: -0.13%, +0.72%
InvThroughput: 2389746 -> 2376716 (-0.55%); split: -0.63%, +0.09%
VClause: 43337 -> 43138 (-0.46%); split: -0.51%, +0.05%
SClause: 31035 -> 31027 (-0.03%); split: -0.22%, +0.20%
Copies: 227528 -> 227002 (-0.23%); split: -0.59%, +0.36%
Branches: 29393 -> 29392 (-0.00%); split: -0.01%, +0.00%
PreSGPRs: 64238 -> 64336 (+0.15%)
PreVGPRs: 70480 -> 70468 (-0.02%)
VALU: 1387439 -> 1379274 (-0.59%); split: -0.59%, +0.00%
SALU: 185514 -> 185382 (-0.07%); split: -0.56%, +0.49%
VOPD: 4425 -> 4400 (-0.56%); split: +0.66%, -1.22%
fossil-db (navi21):
Totals from 1197 (0.59% of 202427) affected shaders:
Instrs: 1987004 -> 1974920 (-0.61%); split: -0.64%, +0.03%
CodeSize: 10803928 -> 10745204 (-0.54%); split: -0.56%, +0.01%
VGPRs: 83848 -> 83856 (+0.01%); split: -0.01%, +0.02%
SpillSGPRs: 9843 -> 9861 (+0.18%)
Latency: 14518481 -> 14534898 (+0.11%); split: -0.17%, +0.29%
InvThroughput: 3712336 -> 3698081 (-0.38%); split: -0.52%, +0.13%
VClause: 52677 -> 52546 (-0.25%); split: -0.51%, +0.27%
SClause: 31113 -> 31050 (-0.20%); split: -0.37%, +0.17%
Copies: 219723 -> 218017 (-0.78%); split: -1.05%, +0.27%
Branches: 33717 -> 33716 (-0.00%); split: -0.01%, +0.00%
PreSGPRs: 66364 -> 66480 (+0.17%)
PreVGPRs: 71048 -> 71036 (-0.02%)
VALU: 1442585 -> 1431331 (-0.78%); split: -0.78%, +0.00%
SALU: 209617 -> 208517 (-0.52%); split: -0.88%, +0.35%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40945>
This commit is contained in:
parent
1e03ef5c38
commit
7f3900ed20
1 changed files with 42 additions and 10 deletions
|
|
@ -14,15 +14,42 @@ is_u2u64(nir_scalar scalar)
|
|||
return nir_scalar_is_alu(scalar) && nir_scalar_alu_op(scalar) == nir_op_u2u64;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
nir_shader *shader;
|
||||
struct hash_table *range_ht;
|
||||
} lower_state;
|
||||
|
||||
static bool
|
||||
is_nuw(lower_state *state, nir_scalar scalar)
|
||||
{
|
||||
assert(scalar.def->bit_size == 32);
|
||||
nir_alu_instr *alu = nir_def_as_alu(scalar.def);
|
||||
if (alu->no_unsigned_wrap)
|
||||
return true;
|
||||
|
||||
if (!state->range_ht)
|
||||
state->range_ht = _mesa_pointer_hash_table_create(NULL);
|
||||
|
||||
nir_scalar src0 = nir_scalar_chase_alu_src(scalar, 0);
|
||||
nir_scalar src1 = nir_scalar_chase_alu_src(scalar, 1);
|
||||
uint32_t ub0 = nir_unsigned_upper_bound(state->shader, state->range_ht, src0);
|
||||
uint32_t ub1 = nir_unsigned_upper_bound(state->shader, state->range_ht, src1);
|
||||
if ((UINT32_MAX - ub0) < ub1)
|
||||
return false;
|
||||
|
||||
alu->no_unsigned_wrap = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
try_extract_additions(nir_builder *b, nir_scalar scalar, uint64_t *out_const,
|
||||
try_extract_additions(lower_state *state, nir_builder *b, nir_scalar scalar, uint64_t *out_const,
|
||||
nir_def **out_offset, bool require_nuw)
|
||||
{
|
||||
if (!nir_scalar_is_alu(scalar) || nir_scalar_alu_op(scalar) != nir_op_iadd)
|
||||
return NULL;
|
||||
|
||||
nir_alu_instr *alu = nir_def_as_alu(scalar.def);
|
||||
if (require_nuw && !alu->no_unsigned_wrap)
|
||||
if (require_nuw && !is_nuw(state, scalar))
|
||||
return NULL;
|
||||
|
||||
nir_scalar src0 = nir_scalar_chase_alu_src(scalar, 0);
|
||||
|
|
@ -38,19 +65,19 @@ try_extract_additions(nir_builder *b, nir_scalar scalar, uint64_t *out_const,
|
|||
continue;
|
||||
|
||||
*out_offset = nir_mov_scalar(b, offset_scalar);
|
||||
nir_def *replace_offset = try_extract_additions(b, offset_scalar, out_const, out_offset, true);
|
||||
nir_def *replace_offset = try_extract_additions(state, b, offset_scalar, out_const, out_offset, true);
|
||||
*out_offset = replace_offset ? replace_offset : *out_offset;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
nir_def *replace_src =
|
||||
try_extract_additions(b, i == 1 ? src0 : src1, out_const, out_offset, require_nuw);
|
||||
try_extract_additions(state, b, i == 1 ? src0 : src1, out_const, out_offset, require_nuw);
|
||||
return replace_src ? replace_src : nir_ssa_for_alu_src(b, alu, 1 - i);
|
||||
}
|
||||
|
||||
nir_def *replace_src0 = try_extract_additions(b, src0, out_const, out_offset, require_nuw);
|
||||
nir_def *replace_src1 = try_extract_additions(b, src1, out_const, out_offset, require_nuw);
|
||||
nir_def *replace_src0 = try_extract_additions(state, b, src0, out_const, out_offset, require_nuw);
|
||||
nir_def *replace_src1 = try_extract_additions(state, b, src1, out_const, out_offset, require_nuw);
|
||||
if (!replace_src0 && !replace_src1)
|
||||
return NULL;
|
||||
|
||||
|
|
@ -60,7 +87,7 @@ try_extract_additions(nir_builder *b, nir_scalar scalar, uint64_t *out_const,
|
|||
}
|
||||
|
||||
static bool
|
||||
process_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *_)
|
||||
process_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
|
||||
{
|
||||
nir_intrinsic_op op;
|
||||
unsigned access = 0;
|
||||
|
|
@ -92,7 +119,7 @@ process_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *_)
|
|||
nir_def *offset = NULL;
|
||||
nir_scalar src = {addr_src->ssa, 0};
|
||||
b->cursor = nir_after_def(addr_src->ssa);
|
||||
nir_def *addr = try_extract_additions(b, src, &off_const, &offset, false);
|
||||
nir_def *addr = try_extract_additions(state, b, src, &off_const, &offset, false);
|
||||
addr = addr ? addr : addr_src->ssa;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
|
@ -139,6 +166,11 @@ process_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *_)
|
|||
bool
|
||||
ac_nir_lower_global_access(nir_shader *shader)
|
||||
{
|
||||
return nir_shader_intrinsics_pass(shader, process_instr,
|
||||
nir_metadata_control_flow, NULL);
|
||||
lower_state state;
|
||||
state.shader = shader;
|
||||
state.range_ht = NULL;
|
||||
bool progress = nir_shader_intrinsics_pass(shader, process_instr,
|
||||
nir_metadata_control_flow, &state);
|
||||
_mesa_hash_table_destroy(state.range_ht, NULL);
|
||||
return progress;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue