2019-05-22 20:23:03 +01:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2018 Red Hat
|
|
|
|
|
* Copyright © 2019 Valve Corporation
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
|
*
|
|
|
|
|
* Authors:
|
|
|
|
|
* Rob Clark (robdclark@gmail.com>
|
|
|
|
|
* Daniel Schürmann (daniel.schuermann@campus.tu-berlin.de)
|
|
|
|
|
* Rhys Perry (pendingchaos02@gmail.com)
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "nir.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* A simple pass that moves some instructions into the least common
|
|
|
|
|
* anscestor of consuming instructions.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
nir_can_move_instr(nir_instr *instr, nir_move_options options)
|
|
|
|
|
{
|
2020-06-24 11:23:05 +01:00
|
|
|
switch (instr->type) {
|
|
|
|
|
case nir_instr_type_load_const:
|
|
|
|
|
case nir_instr_type_ssa_undef: {
|
|
|
|
|
return options & nir_move_const_undef;
|
2019-05-22 20:23:03 +01:00
|
|
|
}
|
2020-06-24 11:23:05 +01:00
|
|
|
case nir_instr_type_alu: {
|
2020-04-29 15:36:41 +01:00
|
|
|
if (nir_op_is_vec(nir_instr_as_alu(instr)->op) ||
|
|
|
|
|
nir_instr_as_alu(instr)->op == nir_op_b2i32)
|
2020-06-24 11:23:05 +01:00
|
|
|
return options & nir_move_copies;
|
|
|
|
|
if (nir_alu_instr_is_comparison(nir_instr_as_alu(instr)))
|
|
|
|
|
return options & nir_move_comparisons;
|
|
|
|
|
return false;
|
2019-05-22 20:23:03 +01:00
|
|
|
}
|
2020-06-24 11:23:05 +01:00
|
|
|
case nir_instr_type_intrinsic: {
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
|
switch (intrin->intrinsic) {
|
|
|
|
|
case nir_intrinsic_load_ubo:
|
|
|
|
|
return options & nir_move_load_ubo;
|
|
|
|
|
case nir_intrinsic_load_input:
|
|
|
|
|
case nir_intrinsic_load_interpolated_input:
|
|
|
|
|
case nir_intrinsic_load_per_vertex_input:
|
|
|
|
|
return options & nir_move_load_input;
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2019-05-22 20:23:03 +01:00
|
|
|
}
|
2020-06-24 11:23:05 +01:00
|
|
|
default:
|
|
|
|
|
return false;
|
2019-10-14 17:15:04 +01:00
|
|
|
}
|
2019-05-22 20:23:03 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_loop *
|
|
|
|
|
get_innermost_loop(nir_cf_node *node)
|
|
|
|
|
{
|
|
|
|
|
for (; node != NULL; node = node->parent) {
|
|
|
|
|
if (node->type == nir_cf_node_loop)
|
|
|
|
|
return (nir_loop*)node;
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-25 14:02:48 +02:00
|
|
|
static bool
|
|
|
|
|
loop_contains_block(nir_loop *loop, nir_block *block)
|
2019-05-22 20:23:03 +01:00
|
|
|
{
|
2019-09-25 14:02:48 +02:00
|
|
|
nir_block *before = nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
|
|
|
|
|
nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
|
2019-05-22 20:23:03 +01:00
|
|
|
|
2019-09-25 14:02:48 +02:00
|
|
|
return block->index > before->index && block->index < after->index;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Given the LCA of all uses and the definition, find a block on the path
|
|
|
|
|
* between them in the dominance tree that is outside of as many loops as
|
2019-09-25 14:17:23 +02:00
|
|
|
* possible. If "sink_out_of_loops" is false, then we disallow sinking the
|
|
|
|
|
* definition outside of the loop it's defined in (if any).
|
2019-09-25 14:02:48 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
static nir_block *
|
2019-09-25 14:17:23 +02:00
|
|
|
adjust_block_for_loops(nir_block *use_block, nir_block *def_block,
|
|
|
|
|
bool sink_out_of_loops)
|
2019-09-25 14:02:48 +02:00
|
|
|
{
|
2019-09-25 14:17:23 +02:00
|
|
|
nir_loop *def_loop = NULL;
|
|
|
|
|
if (!sink_out_of_loops)
|
|
|
|
|
def_loop = get_innermost_loop(&def_block->cf_node);
|
|
|
|
|
|
2019-09-25 14:02:48 +02:00
|
|
|
for (nir_block *cur_block = use_block; cur_block != def_block->imm_dom;
|
|
|
|
|
cur_block = cur_block->imm_dom) {
|
2019-09-25 14:17:23 +02:00
|
|
|
if (!sink_out_of_loops && def_loop &&
|
|
|
|
|
!loop_contains_block(def_loop, use_block)) {
|
|
|
|
|
use_block = cur_block;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-25 14:02:48 +02:00
|
|
|
nir_cf_node *next = nir_cf_node_next(&cur_block->cf_node);
|
|
|
|
|
if (next && next->type == nir_cf_node_loop) {
|
|
|
|
|
nir_loop *following_loop = nir_cf_node_as_loop(next);
|
|
|
|
|
if (loop_contains_block(following_loop, use_block)) {
|
|
|
|
|
use_block = cur_block;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-05-22 20:23:03 +01:00
|
|
|
}
|
2019-09-25 14:02:48 +02:00
|
|
|
|
|
|
|
|
return use_block;
|
2019-05-22 20:23:03 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* iterate a ssa def's use's and try to find a more optimal block to
|
|
|
|
|
* move it to, using the dominance tree. In short, if all of the uses
|
|
|
|
|
* are contained in a single block, the load will be moved there,
|
|
|
|
|
* otherwise it will be move to the least common ancestor block of all
|
|
|
|
|
* the uses
|
|
|
|
|
*/
|
|
|
|
|
static nir_block *
|
nir: don't sink instructions into loops
Repeatedly loading constants or evaluating ALU operations
in loops doesn't seem beneficial. This might increase the register
pressure, but the tradeoff seems worth it.
Totals from 13629 (9.77% of 139517) affected shaders (RAVEN):
SGPRs: 1179481 -> 1184697 (+0.44%); split: -0.03%, +0.47%
VGPRs: 978776 -> 978732 (-0.00%); split: -0.02%, +0.02%
SpillSGPRs: 51036 -> 50943 (-0.18%); split: -1.35%, +1.17%
CodeSize: 113775020 -> 113428812 (-0.30%); split: -0.34%, +0.04%
MaxWaves: 49877 -> 49881 (+0.01%); split: +0.02%, -0.01%
Instrs: 22295979 -> 22204936 (-0.41%); split: -0.42%, +0.02%
Cycles: 1637198832 -> 1626916048 (-0.63%); split: -0.64%, +0.01%
VMEM: 2403434 -> 2507645 (+4.34%); split: +4.76%, -0.42%
SMEM: 849676 -> 834576 (-1.78%); split: +0.60%, -2.38%
VClause: 412396 -> 398139 (-3.46%); split: -3.46%, +0.01%
SClause: 810480 -> 817349 (+0.85%); split: -0.19%, +1.04%
Copies: 2188260 -> 2166716 (-0.98%); split: -1.18%, +0.19%
Branches: 761204 -> 760475 (-0.10%); split: -0.15%, +0.05%
PreSGPRs: 972892 -> 981054 (+0.84%); split: -0.05%, +0.89%
PreVGPRs: 925390 -> 925420 (+0.00%); split: -0.02%, +0.02%
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7694>
2020-11-19 12:21:17 +01:00
|
|
|
get_preferred_block(nir_ssa_def *def, bool sink_out_of_loops)
|
2019-05-22 20:23:03 +01:00
|
|
|
{
|
|
|
|
|
nir_block *lca = NULL;
|
|
|
|
|
|
|
|
|
|
nir_foreach_use(use, def) {
|
|
|
|
|
nir_instr *instr = use->parent_instr;
|
|
|
|
|
nir_block *use_block = instr->block;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Kind of an ugly special-case, but phi instructions
|
|
|
|
|
* need to appear first in the block, so by definition
|
|
|
|
|
* we can't move an instruction into a block where it is
|
|
|
|
|
* consumed by a phi instruction. We could conceivably
|
|
|
|
|
* move it into a dominator block.
|
|
|
|
|
*/
|
|
|
|
|
if (instr->type == nir_instr_type_phi) {
|
|
|
|
|
nir_phi_instr *phi = nir_instr_as_phi(instr);
|
|
|
|
|
nir_block *phi_lca = NULL;
|
|
|
|
|
nir_foreach_phi_src(src, phi) {
|
|
|
|
|
if (&src->src == use)
|
|
|
|
|
phi_lca = nir_dominance_lca(phi_lca, src->pred);
|
|
|
|
|
}
|
|
|
|
|
use_block = phi_lca;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lca = nir_dominance_lca(lca, use_block);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_foreach_if_use(use, def) {
|
|
|
|
|
nir_block *use_block =
|
|
|
|
|
nir_cf_node_as_block(nir_cf_node_prev(&use->parent_if->cf_node));
|
|
|
|
|
|
|
|
|
|
lca = nir_dominance_lca(lca, use_block);
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-02 15:34:26 +01:00
|
|
|
/* return in case, we didn't find a reachable user */
|
|
|
|
|
if (!lca)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
nir: don't sink instructions into loops
Repeatedly loading constants or evaluating ALU operations
in loops doesn't seem beneficial. This might increase the register
pressure, but the tradeoff seems worth it.
Totals from 13629 (9.77% of 139517) affected shaders (RAVEN):
SGPRs: 1179481 -> 1184697 (+0.44%); split: -0.03%, +0.47%
VGPRs: 978776 -> 978732 (-0.00%); split: -0.02%, +0.02%
SpillSGPRs: 51036 -> 50943 (-0.18%); split: -1.35%, +1.17%
CodeSize: 113775020 -> 113428812 (-0.30%); split: -0.34%, +0.04%
MaxWaves: 49877 -> 49881 (+0.01%); split: +0.02%, -0.01%
Instrs: 22295979 -> 22204936 (-0.41%); split: -0.42%, +0.02%
Cycles: 1637198832 -> 1626916048 (-0.63%); split: -0.64%, +0.01%
VMEM: 2403434 -> 2507645 (+4.34%); split: +4.76%, -0.42%
SMEM: 849676 -> 834576 (-1.78%); split: +0.60%, -2.38%
VClause: 412396 -> 398139 (-3.46%); split: -3.46%, +0.01%
SClause: 810480 -> 817349 (+0.85%); split: -0.19%, +1.04%
Copies: 2188260 -> 2166716 (-0.98%); split: -1.18%, +0.19%
Branches: 761204 -> 760475 (-0.10%); split: -0.15%, +0.05%
PreSGPRs: 972892 -> 981054 (+0.84%); split: -0.05%, +0.89%
PreVGPRs: 925390 -> 925420 (+0.00%); split: -0.02%, +0.02%
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7694>
2020-11-19 12:21:17 +01:00
|
|
|
/* We don't sink any instructions into loops to avoid repeated executions
|
|
|
|
|
* This might occasionally increase register pressure, but seems overall
|
|
|
|
|
* the better choice.
|
2019-09-25 14:02:48 +02:00
|
|
|
*/
|
nir: don't sink instructions into loops
Repeatedly loading constants or evaluating ALU operations
in loops doesn't seem beneficial. This might increase the register
pressure, but the tradeoff seems worth it.
Totals from 13629 (9.77% of 139517) affected shaders (RAVEN):
SGPRs: 1179481 -> 1184697 (+0.44%); split: -0.03%, +0.47%
VGPRs: 978776 -> 978732 (-0.00%); split: -0.02%, +0.02%
SpillSGPRs: 51036 -> 50943 (-0.18%); split: -1.35%, +1.17%
CodeSize: 113775020 -> 113428812 (-0.30%); split: -0.34%, +0.04%
MaxWaves: 49877 -> 49881 (+0.01%); split: +0.02%, -0.01%
Instrs: 22295979 -> 22204936 (-0.41%); split: -0.42%, +0.02%
Cycles: 1637198832 -> 1626916048 (-0.63%); split: -0.64%, +0.01%
VMEM: 2403434 -> 2507645 (+4.34%); split: +4.76%, -0.42%
SMEM: 849676 -> 834576 (-1.78%); split: +0.60%, -2.38%
VClause: 412396 -> 398139 (-3.46%); split: -3.46%, +0.01%
SClause: 810480 -> 817349 (+0.85%); split: -0.19%, +1.04%
Copies: 2188260 -> 2166716 (-0.98%); split: -1.18%, +0.19%
Branches: 761204 -> 760475 (-0.10%); split: -0.15%, +0.05%
PreSGPRs: 972892 -> 981054 (+0.84%); split: -0.05%, +0.89%
PreVGPRs: 925390 -> 925420 (+0.00%); split: -0.02%, +0.02%
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7694>
2020-11-19 12:21:17 +01:00
|
|
|
lca = adjust_block_for_loops(lca, def->parent_instr->block,
|
|
|
|
|
sink_out_of_loops);
|
|
|
|
|
assert(nir_block_dominates(def->parent_instr->block, lca));
|
2019-09-25 14:02:48 +02:00
|
|
|
|
2019-05-22 20:23:03 +01:00
|
|
|
return lca;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
nir_opt_sink(nir_shader *shader, nir_move_options options)
|
|
|
|
|
{
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
|
|
|
|
nir_foreach_function(function, shader) {
|
|
|
|
|
if (!function->impl)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_metadata_require(function->impl,
|
|
|
|
|
nir_metadata_block_index | nir_metadata_dominance);
|
|
|
|
|
|
|
|
|
|
nir_foreach_block_reverse(block, function->impl) {
|
|
|
|
|
nir_foreach_instr_reverse_safe(instr, block) {
|
|
|
|
|
if (!nir_can_move_instr(instr, options))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *def = nir_instr_ssa_def(instr);
|
2019-09-25 14:17:23 +02:00
|
|
|
|
|
|
|
|
/* Don't sink load_ubo out of loops because that can make its
|
|
|
|
|
* resource divergent and break code like that which is generated
|
|
|
|
|
* by nir_lower_non_uniform_access.
|
|
|
|
|
*/
|
|
|
|
|
bool sink_out_of_loops =
|
|
|
|
|
instr->type != nir_instr_type_intrinsic ||
|
|
|
|
|
nir_instr_as_intrinsic(instr)->intrinsic != nir_intrinsic_load_ubo;
|
2019-05-22 20:23:03 +01:00
|
|
|
nir_block *use_block =
|
nir: don't sink instructions into loops
Repeatedly loading constants or evaluating ALU operations
in loops doesn't seem beneficial. This might increase the register
pressure, but the tradeoff seems worth it.
Totals from 13629 (9.77% of 139517) affected shaders (RAVEN):
SGPRs: 1179481 -> 1184697 (+0.44%); split: -0.03%, +0.47%
VGPRs: 978776 -> 978732 (-0.00%); split: -0.02%, +0.02%
SpillSGPRs: 51036 -> 50943 (-0.18%); split: -1.35%, +1.17%
CodeSize: 113775020 -> 113428812 (-0.30%); split: -0.34%, +0.04%
MaxWaves: 49877 -> 49881 (+0.01%); split: +0.02%, -0.01%
Instrs: 22295979 -> 22204936 (-0.41%); split: -0.42%, +0.02%
Cycles: 1637198832 -> 1626916048 (-0.63%); split: -0.64%, +0.01%
VMEM: 2403434 -> 2507645 (+4.34%); split: +4.76%, -0.42%
SMEM: 849676 -> 834576 (-1.78%); split: +0.60%, -2.38%
VClause: 412396 -> 398139 (-3.46%); split: -3.46%, +0.01%
SClause: 810480 -> 817349 (+0.85%); split: -0.19%, +1.04%
Copies: 2188260 -> 2166716 (-0.98%); split: -1.18%, +0.19%
Branches: 761204 -> 760475 (-0.10%); split: -0.15%, +0.05%
PreSGPRs: 972892 -> 981054 (+0.84%); split: -0.05%, +0.89%
PreVGPRs: 925390 -> 925420 (+0.00%); split: -0.02%, +0.02%
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7694>
2020-11-19 12:21:17 +01:00
|
|
|
get_preferred_block(def, sink_out_of_loops);
|
2019-05-22 20:23:03 +01:00
|
|
|
|
|
|
|
|
if (!use_block || use_block == instr->block)
|
|
|
|
|
continue;
|
|
|
|
|
|
2020-12-02 16:03:32 +00:00
|
|
|
nir_instr_remove(instr);
|
|
|
|
|
nir_instr_insert(nir_after_phis(use_block), instr);
|
2019-05-22 20:23:03 +01:00
|
|
|
|
|
|
|
|
progress = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_metadata_preserve(function->impl,
|
|
|
|
|
nir_metadata_block_index | nir_metadata_dominance);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|