mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 09:28:07 +02:00
nir/load_store_vectorize: hoist base addr instead of subtracting
Totals from 3130 (3.92% of 79839) affected shaders: (Navi48) Instrs: 2634316 -> 2633652 (-0.03%); split: -0.06%, +0.04% CodeSize: 13999784 -> 13996888 (-0.02%); split: -0.05%, +0.03% SpillSGPRs: 1771 -> 1778 (+0.40%) Latency: 27233464 -> 27230934 (-0.01%); split: -0.02%, +0.01% InvThroughput: 4234587 -> 4234550 (-0.00%); split: -0.00%, +0.00% VClause: 54684 -> 54689 (+0.01%) SClause: 62743 -> 62912 (+0.27%); split: -0.08%, +0.35% Copies: 162594 -> 163729 (+0.70%); split: -0.22%, +0.91% PreSGPRs: 146909 -> 146914 (+0.00%); split: -0.01%, +0.01% VALU: 1558771 -> 1558778 (+0.00%) SALU: 337715 -> 338168 (+0.13%); split: -0.30%, +0.44% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37163>
This commit is contained in:
parent
cfba417316
commit
a53190a426
1 changed files with 36 additions and 14 deletions
|
|
@ -892,6 +892,31 @@ subtract_deref(nir_builder *b, nir_deref_instr *deref, int64_t offset)
|
|||
b, deref, nir_imm_intN_t(b, -offset, deref->def.bit_size));
|
||||
}
|
||||
|
||||
static void
|
||||
hoist_base_addr(nir_instr *instr, nir_instr *to_hoist)
|
||||
{
|
||||
/* Return if this instruction already dominates the first load. */
|
||||
if (to_hoist->block != instr->block || to_hoist->index <= instr->index)
|
||||
return;
|
||||
|
||||
/* Only the offset calculation (consisting of ALU and load_const)
|
||||
* differs between the vectorized loads.
|
||||
*/
|
||||
assert(to_hoist->type == nir_instr_type_load_const ||
|
||||
to_hoist->type == nir_instr_type_alu);
|
||||
|
||||
if (to_hoist->type == nir_instr_type_alu) {
|
||||
/* For ALU, recursively hoist the sources. */
|
||||
nir_alu_instr *alu = nir_instr_as_alu(to_hoist);
|
||||
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++)
|
||||
hoist_base_addr(instr, alu->src[i].src.ssa->parent_instr);
|
||||
}
|
||||
|
||||
nir_instr_move(nir_before_instr(instr), to_hoist);
|
||||
to_hoist->index = instr->index;
|
||||
return;
|
||||
}
|
||||
|
||||
static void
|
||||
vectorize_loads(nir_builder *b, struct vectorize_ctx *ctx,
|
||||
struct entry *low, struct entry *high,
|
||||
|
|
@ -958,21 +983,16 @@ vectorize_loads(nir_builder *b, struct vectorize_ctx *ctx,
|
|||
|
||||
/* update the offset */
|
||||
if (first != low && info->base_src >= 0) {
|
||||
/* let nir_opt_algebraic() remove this addition. this doesn't have much
|
||||
* issues with subtracting 16 from expressions like "(i + 1) * 16" because
|
||||
* nir_opt_algebraic() turns them into "i * 16 + 16" */
|
||||
b->cursor = nir_before_instr(first->instr);
|
||||
|
||||
/* Hoist low base addr before first intrinsic. */
|
||||
nir_def *base = low->intrin->src[info->base_src].ssa;
|
||||
hoist_base_addr(first->instr, base->parent_instr);
|
||||
nir_src_rewrite(&first->intrin->src[info->base_src], base);
|
||||
|
||||
if (nir_intrinsic_has_offset_shift(first->intrin)) {
|
||||
nir_add_io_offset(b, first->intrin, -(int)(high_start / 8u));
|
||||
} else {
|
||||
/* TODO once all intrinsics that need a scaled offset use
|
||||
* offset_shift, this old path can be removed.
|
||||
*/
|
||||
nir_def *new_base = first->intrin->src[info->base_src].ssa;
|
||||
new_base = nir_iadd_imm(
|
||||
b, new_base, -(int)(high_start / 8u / get_offset_scale(first)));
|
||||
nir_src_rewrite(&first->intrin->src[info->base_src], new_base);
|
||||
nir_intrinsic_set_offset_shift(first->intrin,
|
||||
nir_intrinsic_offset_shift(low->intrin));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1013,7 +1033,7 @@ vectorize_loads(nir_builder *b, struct vectorize_ctx *ctx,
|
|||
|
||||
nir_intrinsic_set_range_base(first->intrin, old_low_range_base);
|
||||
nir_intrinsic_set_range(first->intrin, range);
|
||||
} else if (nir_intrinsic_has_base(first->intrin) && info->base_src == -1 && info->deref_src == -1) {
|
||||
} else if (nir_intrinsic_has_base(first->intrin) && info->deref_src == -1) {
|
||||
nir_intrinsic_set_base(first->intrin, nir_intrinsic_base(low->intrin));
|
||||
}
|
||||
|
||||
|
|
@ -1737,6 +1757,8 @@ process_block(nir_function_impl *impl, struct vectorize_ctx *ctx, nir_block *blo
|
|||
unsigned next_index = 0;
|
||||
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
instr->index = next_index++;
|
||||
|
||||
if (handle_barrier(ctx, &progress, impl, instr))
|
||||
continue;
|
||||
|
||||
|
|
@ -1760,7 +1782,7 @@ process_block(nir_function_impl *impl, struct vectorize_ctx *ctx, nir_block *blo
|
|||
|
||||
/* create entry */
|
||||
struct entry *entry = create_entry(ctx, ctx, info, intrin);
|
||||
entry->index = next_index++;
|
||||
entry->index = next_index;
|
||||
|
||||
list_addtail(&entry->head, &ctx->entries[mode_index]);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue