mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-28 21:10:12 +01:00
radv: lower push constants in NIR
fossil-db (navi21):
Totals from 879 (1.11% of 79395) affected shaders:
Instrs: 1359371 -> 1360237 (+0.06%); split: -0.02%, +0.08%
CodeSize: 7290856 -> 7294308 (+0.05%); split: -0.01%, +0.06%
SpillSGPRs: 751 -> 800 (+6.52%)
Latency: 21923904 -> 21923983 (+0.00%); split: -0.03%, +0.03%
InvThroughput: 7029748 -> 7029528 (-0.00%); split: -0.03%, +0.03%
VClause: 23595 -> 23610 (+0.06%)
SClause: 31819 -> 32256 (+1.37%); split: -0.07%, +1.44%
Copies: 109175 -> 110089 (+0.84%); split: -0.13%, +0.97%
Branches: 32068 -> 32072 (+0.01%); split: -0.02%, +0.03%
PreSGPRs: 41831 -> 41774 (-0.14%); split: -0.15%, +0.01%
PreVGPRs: 53605 -> 53604 (-0.00%)
VALU: 1020426 -> 1020521 (+0.01%); split: -0.00%, +0.01%
SALU: 135931 -> 136850 (+0.68%); split: -0.08%, +0.76%
SMEM: 51688 -> 51686 (-0.00%)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29675>
This commit is contained in:
parent
1ca97f019e
commit
edbb75ce3a
2 changed files with 59 additions and 0 deletions
|
|
@ -342,6 +342,55 @@ update_image_intrinsic(nir_builder *b, apply_layout_state *state, nir_intrinsic_
|
|||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
can_increase_load_size(nir_intrinsic_instr *intrin, unsigned offset, unsigned old, unsigned new)
|
||||
{
|
||||
/* Only increase the size of loads if doing so won't extend into a new page/cache-line. */
|
||||
unsigned align_mul = MIN2(nir_intrinsic_align_mul(intrin), 64u);
|
||||
unsigned end = (nir_intrinsic_align_offset(intrin) + offset + old) & (align_mul - 1);
|
||||
return (new - old) <= (align_mul - end);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
load_push_constant(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
|
||||
{
|
||||
unsigned base = nir_intrinsic_base(intrin);
|
||||
unsigned bit_size = intrin->def.bit_size;
|
||||
unsigned count = intrin->def.num_components * (bit_size / 32u);
|
||||
assert(bit_size >= 32);
|
||||
|
||||
/* Try to use inline push constants when possible. */
|
||||
if (nir_src_is_const(intrin->src[0])) {
|
||||
unsigned start = (base + nir_src_as_uint(intrin->src[0])) / 4u;
|
||||
uint64_t mask = BITFIELD64_MASK(count) << start;
|
||||
if ((state->args->ac.inline_push_const_mask & mask) == mask &&
|
||||
start + count <= (sizeof(state->args->ac.inline_push_const_mask) * 8u)) {
|
||||
start = util_bitcount64(state->args->ac.inline_push_const_mask & BITFIELD64_MASK(start));
|
||||
nir_def *res[NIR_MAX_VEC_COMPONENTS * 2];
|
||||
for (unsigned i = 0; i < count; i++)
|
||||
res[i] = get_scalar_arg(b, 1, state->args->ac.inline_push_consts[start + i]);
|
||||
return nir_extract_bits(b, res, count, 0, intrin->def.num_components, bit_size);
|
||||
}
|
||||
}
|
||||
|
||||
nir_def *addr = get_scalar_arg(b, 1, state->args->ac.push_constants);
|
||||
addr = convert_pointer_to_64_bit(b, state, addr);
|
||||
|
||||
nir_def *offset = nir_iadd_imm_nuw(b, intrin->src[0].ssa, base);
|
||||
nir_def *data[NIR_MAX_VEC_COMPONENTS];
|
||||
unsigned num_loads = 0;
|
||||
for (unsigned start = 0; start < count;) {
|
||||
unsigned size = 1 << (util_last_bit(count - start) - 1); /* Round down to power of two. */
|
||||
/* Try to round up to power of two instead. */
|
||||
if (size < (count - start) && can_increase_load_size(intrin, start * 4, size, size * 2))
|
||||
size *= 2;
|
||||
|
||||
data[num_loads++] = nir_load_smem_amd(b, size, addr, nir_iadd_imm_nuw(b, offset, start * 4));
|
||||
start += size;
|
||||
}
|
||||
return nir_extract_bits(b, data, num_loads, 0, intrin->def.num_components, bit_size);
|
||||
}
|
||||
|
||||
static void
|
||||
apply_layout_to_intrin(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
|
||||
{
|
||||
|
|
@ -382,6 +431,11 @@ apply_layout_to_intrin(nir_builder *b, apply_layout_state *state, nir_intrinsic_
|
|||
case nir_intrinsic_image_deref_descriptor_amd:
|
||||
update_image_intrinsic(b, state, intrin);
|
||||
break;
|
||||
case nir_intrinsic_load_push_constant: {
|
||||
nir_def_rewrite_uses(&intrin->def, load_push_constant(b, state, intrin));
|
||||
nir_instr_remove(&intrin->instr);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -434,6 +434,11 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
NIR_PASS(_, stage->nir, nir_copy_prop);
|
||||
NIR_PASS(_, stage->nir, nir_opt_shrink_stores, !instance->drirc.disable_shrink_image_store);
|
||||
|
||||
/* Ensure vectorized load_push_constant still have constant offsets, for
|
||||
* radv_nir_apply_pipeline_layout. */
|
||||
if (stage->args.ac.inline_push_const_mask)
|
||||
NIR_PASS(_, stage->nir, nir_opt_constant_folding);
|
||||
|
||||
/* Gather info again, to update whether 8/16-bit are used. */
|
||||
nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir));
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue