mesa/src/compiler/nir/nir_opt_peephole_select.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

587 lines
19 KiB
C
Raw Normal View History

/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
nir/opt_peephole_select: collapse nested IFs if applicable Single-sided nested IFs can sometimes be collapsed even if they cannot be flattened. This optimization re-uses block_check_for_allowed_instrs() to determine if it is beneficial to collapse the IFs. Additionally, it is required that the phis of the outer IF become trivial after this optimization, so that no additional bcsel instructions are added. This optimization turns if (cond1) { <allowed instruction> if (cond2) { <any code> } else { } } else { } into <allowed instruction> if (cond1 && cond2) { <any code> } else { } Totals from 17044 (12.35% of 138013) affected shaders (RAVEN): SGPRs: 1246416 -> 1246256 (-0.01%); split: -0.01%, +0.00% VGPRs: 802752 -> 802736 (-0.00%); split: -0.01%, +0.01% SpillSGPRs: 45857 -> 45850 (-0.02%); split: -0.07%, +0.05% CodeSize: 85318240 -> 85208592 (-0.13%); split: -0.15%, +0.02% Instrs: 16769049 -> 16738195 (-0.18%); split: -0.20%, +0.02% Cycles: 947328732 -> 947145796 (-0.02%); split: -0.03%, +0.01% VMEM: 7271539 -> 7274090 (+0.04%); split: +0.05%, -0.01% SMEM: 925983 -> 927374 (+0.15%); split: +0.19%, -0.04% VClause: 294334 -> 294340 (+0.00%); split: -0.00%, +0.00% SClause: 633600 -> 634048 (+0.07%); split: -0.01%, +0.08% Copies: 1589650 -> 1580573 (-0.57%); split: -0.66%, +0.09% Branches: 540830 -> 525767 (-2.79%); split: -2.79%, +0.00% PreSGPRs: 902500 -> 902415 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 759992 -> 760019 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7478>
2020-11-04 18:20:08 +01:00
#include "nir/nir_builder.h"
#include "nir.h"
#include "nir_builder.h"
#include "nir_control_flow.h"
/*
* Implements a small peephole optimization that looks for
*
* if (cond) {
* <then SSA defs>
* } else {
* <else SSA defs>
* }
* phi
* ...
* phi
*
* and replaces it with:
*
* <then SSA defs>
* <else SSA defs>
* bcsel
* ...
* bcsel
*
* where the SSA defs are ALU operations or other cheap instructions (not
* texturing, for example).
*
* If the number of ALU operations in the branches is greater than the limit
* parameter, then the optimization is skipped. In limit=0 mode, the SSA defs
* must only be MOVs which we expect to get copy-propagated away once they're
* out of the inner blocks.
*/
static bool
block_check_for_allowed_instrs(nir_block *block, unsigned *count,
const nir_opt_peephole_select_options *options)
{
bool alu_ok = options->limit != 0;
/* Used on non-control-flow HW to flatten all IFs. */
if (options->limit == ~0) {
nir_foreach_instr(instr, block) {
switch (instr->type) {
case nir_instr_type_alu:
case nir_instr_type_deref:
case nir_instr_type_load_const:
case nir_instr_type_phi:
case nir_instr_type_undef:
case nir_instr_type_tex:
break;
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_terminate:
case nir_intrinsic_terminate_if:
case nir_intrinsic_demote:
case nir_intrinsic_demote_if:
/* For non-CF hardware, we need to be able to move discards up
* and flatten, so let them pass.
*/
break;
default:
if (!nir_intrinsic_can_reorder(intr))
return false;
}
break;
}
case nir_instr_type_call:
case nir_instr_type_jump:
case nir_instr_type_parallel_copy:
return false;
}
}
return true;
}
nir_foreach_instr(instr, block) {
switch (instr->type) {
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_load_deref: {
nir_deref_instr *const deref = nir_src_as_deref(intrin->src[0]);
switch (deref->modes) {
case nir_var_shader_in:
case nir_var_uniform:
case nir_var_image:
/* Don't try to remove flow control around an indirect load
* because that flow control may be trying to avoid invalid
* loads.
*/
if (!options->indirect_load_ok && nir_deref_instr_has_indirect(deref))
return false;
break;
default:
return false;
}
break;
}
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ubo_vec4:
if (!options->indirect_load_ok && !nir_src_is_const(intrin->src[1]))
return false;
if (!(nir_intrinsic_access(intrin) & ACCESS_CAN_SPECULATE))
return false;
break;
case nir_intrinsic_load_global_constant:
case nir_intrinsic_load_constant_agx:
if (!options->indirect_load_ok && !nir_src_is_const(intrin->src[0]))
return false;
if (!(nir_intrinsic_access(intrin) & ACCESS_CAN_SPECULATE))
return false;
break;
case nir_intrinsic_masked_swizzle_amd:
case nir_intrinsic_quad_swizzle_amd:
if (!nir_intrinsic_fetch_inactive(intrin))
return false;
FALLTHROUGH;
case nir_intrinsic_load_uniform:
case nir_intrinsic_load_preamble:
case nir_intrinsic_load_scalar_arg_amd:
case nir_intrinsic_load_vector_arg_amd:
case nir_intrinsic_load_helper_invocation:
case nir_intrinsic_is_helper_invocation:
case nir_intrinsic_load_front_face:
case nir_intrinsic_load_view_index:
case nir_intrinsic_load_layer_id:
case nir_intrinsic_load_frag_coord:
case nir_intrinsic_load_pixel_coord:
case nir_intrinsic_load_frag_coord_z:
case nir_intrinsic_load_frag_coord_w:
case nir_intrinsic_load_sample_pos:
case nir_intrinsic_load_sample_pos_or_center:
case nir_intrinsic_load_sample_id:
case nir_intrinsic_load_sample_mask_in:
case nir_intrinsic_load_vertex_id_zero_base:
case nir_intrinsic_load_first_vertex:
case nir_intrinsic_load_base_instance:
case nir_intrinsic_load_instance_id:
case nir_intrinsic_load_draw_id:
case nir_intrinsic_load_num_workgroups:
case nir_intrinsic_load_workgroup_id:
case nir_intrinsic_load_local_invocation_id:
case nir_intrinsic_load_local_invocation_index:
case nir_intrinsic_load_subgroup_id:
case nir_intrinsic_load_subgroup_invocation:
case nir_intrinsic_load_num_subgroups:
case nir_intrinsic_load_frag_shading_rate:
case nir_intrinsic_is_sparse_texels_resident:
case nir_intrinsic_sparse_residency_code_and:
case nir_intrinsic_read_invocation:
case nir_intrinsic_quad_broadcast:
case nir_intrinsic_quad_swap_horizontal:
case nir_intrinsic_quad_swap_vertical:
case nir_intrinsic_quad_swap_diagonal:
case nir_intrinsic_lane_permute_16_amd:
case nir_intrinsic_ddx:
case nir_intrinsic_ddx_fine:
case nir_intrinsic_ddx_coarse:
case nir_intrinsic_ddy:
case nir_intrinsic_ddy_fine:
case nir_intrinsic_ddy_coarse:
case nir_intrinsic_load_const_ir3:
if (!alu_ok)
return false;
break;
case nir_intrinsic_terminate:
case nir_intrinsic_terminate_if:
case nir_intrinsic_demote:
case nir_intrinsic_demote_if:
if (!options->discard_ok)
return false;
break;
default:
return false;
}
break;
}
case nir_instr_type_deref:
case nir_instr_type_load_const:
case nir_instr_type_undef:
break;
case nir_instr_type_alu: {
nir_alu_instr *mov = nir_instr_as_alu(instr);
nir/opt_peephole_select: Don't count some unary operations In many cases, fsat, fneg, fabs, ineg, and iabs will get folded into another instruction as either source or destination modifiers. Counting them as instructions means that some if-statements won't get converted to selects. For example, vec1 32 ssa_25 = flt32 ssa_0, ssa_23.x /* succs: block_1 block_2 */ if ssa_25 { block block_1: /* preds: block_0 */ vec1 32 ssa_26 = fabs ssa_24 vec1 32 ssa_27 = fneg ssa_26 vec1 32 ssa_28 = fabs ssa_20 vec1 32 ssa_29 = fneg ssa_28 vec1 32 ssa_30 = fmul ssa_27, ssa_29 vec1 32 ssa_31 = fsat ssa_30 /* succs: block_3 */ } else { block block_2: /* preds: block_0 */ /* succs: block_3 */ } block block_3: /* preds: block_1 block_2 */ block_1 isn't really 6 instructions, but it will be counted that way. Most callers of the peephole_select pass use either 1 or 8. It's very easy to blow way past either of these limits with things that are really only one or two actual instructions. I also tried some fancier things like making sure the fsat was of another SSA def from the same block, but the simple test was actually better. The i965 back-end SEL peephole pass still helps ~700 shaders in shader-db with this change. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Reviewed-by: Matt Turner <mattst88@gmail.com> All Gen6+ platforms had similar results. (Ice Lake shown) total instructions in shared programs: 14743694 -> 14738910 (-0.03%) instructions in affected programs: 156575 -> 151791 (-3.06%) helped: 1204 HURT: 0 helped stats (abs) min: 1 max: 27 x̄: 3.97 x̃: 3 helped stats (rel) min: 0.15% max: 19.57% x̄: 5.15% x̃: 4.55% 95% mean confidence interval for instructions value: -4.12 -3.82 95% mean confidence interval for instructions %-change: -5.35% -4.95% Instructions are helped. total cycles in shared programs: 231749141 -> 231602916 (-0.06%) cycles in affected programs: 2818975 -> 2672750 (-5.19%) helped: 876 HURT: 322 helped stats (abs) min: 2 max: 788 x̄: 180.99 x̃: 220 helped stats (rel) min: <.01% max: 43.82% x̄: 20.75% x̃: 19.44% HURT stats (abs) min: 1 max: 1188 x̄: 38.27 x̃: 20 HURT stats (rel) min: 0.09% max: 102.67% x̄: 5.17% x̃: 1.70% 95% mean confidence interval for cycles value: -130.47 -113.64 95% mean confidence interval for cycles %-change: -14.85% -12.72% Cycles are helped. total sends in shared programs: 730495 -> 730491 (<.01%) sends in affected programs: 46 -> 42 (-8.70%) helped: 2 HURT: 0 Iron Lake and GM45 had similar results. (Iron Lake shown) total instructions in shared programs: 8122757 -> 8122617 (<.01%) instructions in affected programs: 14716 -> 14576 (-0.95%) helped: 46 HURT: 1 helped stats (abs) min: 1 max: 8 x̄: 3.07 x̃: 3 helped stats (rel) min: 0.36% max: 10.00% x̄: 2.54% x̃: 1.06% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 1.59% max: 1.59% x̄: 1.59% x̃: 1.59% 95% mean confidence interval for instructions value: -3.42 -2.54 95% mean confidence interval for instructions %-change: -3.28% -1.62% Instructions are helped. total cycles in shared programs: 188510100 -> 188509780 (<.01%) cycles in affected programs: 58994 -> 58674 (-0.54%) helped: 32 HURT: 1 helped stats (abs) min: 2 max: 96 x̄: 10.06 x̃: 6 helped stats (rel) min: 0.05% max: 15.29% x̄: 1.37% x̃: 0.31% HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 HURT stats (rel) min: 0.68% max: 0.68% x̄: 0.68% x̃: 0.68% 95% mean confidence interval for cycles value: -16.34 -3.06 95% mean confidence interval for cycles %-change: -2.46% -0.15% Cycles are helped.
2019-11-01 14:52:38 -07:00
bool movelike = false;
switch (mov->op) {
case nir_op_mov:
case nir_op_fneg:
case nir_op_ineg:
case nir_op_fabs:
case nir_op_iabs:
case nir_op_vec2:
case nir_op_vec3:
case nir_op_vec4:
case nir_op_vec5:
case nir_op_vec8:
case nir_op_vec16:
nir/opt_peephole_select: Don't count some unary operations In many cases, fsat, fneg, fabs, ineg, and iabs will get folded into another instruction as either source or destination modifiers. Counting them as instructions means that some if-statements won't get converted to selects. For example, vec1 32 ssa_25 = flt32 ssa_0, ssa_23.x /* succs: block_1 block_2 */ if ssa_25 { block block_1: /* preds: block_0 */ vec1 32 ssa_26 = fabs ssa_24 vec1 32 ssa_27 = fneg ssa_26 vec1 32 ssa_28 = fabs ssa_20 vec1 32 ssa_29 = fneg ssa_28 vec1 32 ssa_30 = fmul ssa_27, ssa_29 vec1 32 ssa_31 = fsat ssa_30 /* succs: block_3 */ } else { block block_2: /* preds: block_0 */ /* succs: block_3 */ } block block_3: /* preds: block_1 block_2 */ block_1 isn't really 6 instructions, but it will be counted that way. Most callers of the peephole_select pass use either 1 or 8. It's very easy to blow way past either of these limits with things that are really only one or two actual instructions. I also tried some fancier things like making sure the fsat was of another SSA def from the same block, but the simple test was actually better. The i965 back-end SEL peephole pass still helps ~700 shaders in shader-db with this change. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Reviewed-by: Matt Turner <mattst88@gmail.com> All Gen6+ platforms had similar results. (Ice Lake shown) total instructions in shared programs: 14743694 -> 14738910 (-0.03%) instructions in affected programs: 156575 -> 151791 (-3.06%) helped: 1204 HURT: 0 helped stats (abs) min: 1 max: 27 x̄: 3.97 x̃: 3 helped stats (rel) min: 0.15% max: 19.57% x̄: 5.15% x̃: 4.55% 95% mean confidence interval for instructions value: -4.12 -3.82 95% mean confidence interval for instructions %-change: -5.35% -4.95% Instructions are helped. total cycles in shared programs: 231749141 -> 231602916 (-0.06%) cycles in affected programs: 2818975 -> 2672750 (-5.19%) helped: 876 HURT: 322 helped stats (abs) min: 2 max: 788 x̄: 180.99 x̃: 220 helped stats (rel) min: <.01% max: 43.82% x̄: 20.75% x̃: 19.44% HURT stats (abs) min: 1 max: 1188 x̄: 38.27 x̃: 20 HURT stats (rel) min: 0.09% max: 102.67% x̄: 5.17% x̃: 1.70% 95% mean confidence interval for cycles value: -130.47 -113.64 95% mean confidence interval for cycles %-change: -14.85% -12.72% Cycles are helped. total sends in shared programs: 730495 -> 730491 (<.01%) sends in affected programs: 46 -> 42 (-8.70%) helped: 2 HURT: 0 Iron Lake and GM45 had similar results. (Iron Lake shown) total instructions in shared programs: 8122757 -> 8122617 (<.01%) instructions in affected programs: 14716 -> 14576 (-0.95%) helped: 46 HURT: 1 helped stats (abs) min: 1 max: 8 x̄: 3.07 x̃: 3 helped stats (rel) min: 0.36% max: 10.00% x̄: 2.54% x̃: 1.06% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 1.59% max: 1.59% x̄: 1.59% x̃: 1.59% 95% mean confidence interval for instructions value: -3.42 -2.54 95% mean confidence interval for instructions %-change: -3.28% -1.62% Instructions are helped. total cycles in shared programs: 188510100 -> 188509780 (<.01%) cycles in affected programs: 58994 -> 58674 (-0.54%) helped: 32 HURT: 1 helped stats (abs) min: 2 max: 96 x̄: 10.06 x̃: 6 helped stats (rel) min: 0.05% max: 15.29% x̄: 1.37% x̃: 0.31% HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 HURT stats (rel) min: 0.68% max: 0.68% x̄: 0.68% x̃: 0.68% 95% mean confidence interval for cycles value: -16.34 -3.06 95% mean confidence interval for cycles %-change: -2.46% -0.15% Cycles are helped.
2019-11-01 14:52:38 -07:00
movelike = true;
break;
case nir_op_fcos:
case nir_op_fdiv:
case nir_op_fexp2:
case nir_op_flog2:
case nir_op_fmod:
case nir_op_fpow:
case nir_op_frcp:
case nir_op_frem:
case nir_op_frsq:
case nir_op_fsin:
case nir_op_idiv:
case nir_op_irem:
case nir_op_udiv:
if (!alu_ok || !options->expensive_alu_ok)
return false;
break;
default:
if (!alu_ok) {
/* It must be a move-like operation. */
return false;
}
break;
}
if (alu_ok) {
nir/opt_peephole_select: Don't count some unary operations In many cases, fsat, fneg, fabs, ineg, and iabs will get folded into another instruction as either source or destination modifiers. Counting them as instructions means that some if-statements won't get converted to selects. For example, vec1 32 ssa_25 = flt32 ssa_0, ssa_23.x /* succs: block_1 block_2 */ if ssa_25 { block block_1: /* preds: block_0 */ vec1 32 ssa_26 = fabs ssa_24 vec1 32 ssa_27 = fneg ssa_26 vec1 32 ssa_28 = fabs ssa_20 vec1 32 ssa_29 = fneg ssa_28 vec1 32 ssa_30 = fmul ssa_27, ssa_29 vec1 32 ssa_31 = fsat ssa_30 /* succs: block_3 */ } else { block block_2: /* preds: block_0 */ /* succs: block_3 */ } block block_3: /* preds: block_1 block_2 */ block_1 isn't really 6 instructions, but it will be counted that way. Most callers of the peephole_select pass use either 1 or 8. It's very easy to blow way past either of these limits with things that are really only one or two actual instructions. I also tried some fancier things like making sure the fsat was of another SSA def from the same block, but the simple test was actually better. The i965 back-end SEL peephole pass still helps ~700 shaders in shader-db with this change. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Reviewed-by: Matt Turner <mattst88@gmail.com> All Gen6+ platforms had similar results. (Ice Lake shown) total instructions in shared programs: 14743694 -> 14738910 (-0.03%) instructions in affected programs: 156575 -> 151791 (-3.06%) helped: 1204 HURT: 0 helped stats (abs) min: 1 max: 27 x̄: 3.97 x̃: 3 helped stats (rel) min: 0.15% max: 19.57% x̄: 5.15% x̃: 4.55% 95% mean confidence interval for instructions value: -4.12 -3.82 95% mean confidence interval for instructions %-change: -5.35% -4.95% Instructions are helped. total cycles in shared programs: 231749141 -> 231602916 (-0.06%) cycles in affected programs: 2818975 -> 2672750 (-5.19%) helped: 876 HURT: 322 helped stats (abs) min: 2 max: 788 x̄: 180.99 x̃: 220 helped stats (rel) min: <.01% max: 43.82% x̄: 20.75% x̃: 19.44% HURT stats (abs) min: 1 max: 1188 x̄: 38.27 x̃: 20 HURT stats (rel) min: 0.09% max: 102.67% x̄: 5.17% x̃: 1.70% 95% mean confidence interval for cycles value: -130.47 -113.64 95% mean confidence interval for cycles %-change: -14.85% -12.72% Cycles are helped. total sends in shared programs: 730495 -> 730491 (<.01%) sends in affected programs: 46 -> 42 (-8.70%) helped: 2 HURT: 0 Iron Lake and GM45 had similar results. (Iron Lake shown) total instructions in shared programs: 8122757 -> 8122617 (<.01%) instructions in affected programs: 14716 -> 14576 (-0.95%) helped: 46 HURT: 1 helped stats (abs) min: 1 max: 8 x̄: 3.07 x̃: 3 helped stats (rel) min: 0.36% max: 10.00% x̄: 2.54% x̃: 1.06% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 1.59% max: 1.59% x̄: 1.59% x̃: 1.59% 95% mean confidence interval for instructions value: -3.42 -2.54 95% mean confidence interval for instructions %-change: -3.28% -1.62% Instructions are helped. total cycles in shared programs: 188510100 -> 188509780 (<.01%) cycles in affected programs: 58994 -> 58674 (-0.54%) helped: 32 HURT: 1 helped stats (abs) min: 2 max: 96 x̄: 10.06 x̃: 6 helped stats (rel) min: 0.05% max: 15.29% x̄: 1.37% x̃: 0.31% HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 HURT stats (rel) min: 0.68% max: 0.68% x̄: 0.68% x̃: 0.68% 95% mean confidence interval for cycles value: -16.34 -3.06 95% mean confidence interval for cycles %-change: -2.46% -0.15% Cycles are helped.
2019-11-01 14:52:38 -07:00
/* If the ALU operation is an fsat or a move-like operation, do
* not count it. The expectation is that it will eventually be
* merged as a destination modifier or source modifier on some
* other instruction.
*/
if (mov->op != nir_op_fsat && !movelike)
(*count)++;
} else {
/* The only uses of this definition must be phis in the successor */
nir_foreach_use_including_if(use, &mov->def) {
if (nir_src_is_if(use) ||
nir_src_parent_instr(use)->type != nir_instr_type_phi ||
nir_src_parent_instr(use)->block != block->successors[0])
return false;
}
}
break;
}
default:
return false;
}
}
return true;
}
static nir_opt_peephole_select_options
get_options_for_if(nir_if *if_stmt,
const nir_opt_peephole_select_options *options)
{
nir_opt_peephole_select_options if_options = *options;
if (if_stmt->control == nir_selection_control_flatten) {
/* Override driver defaults */
if_options.limit = UINT_MAX - 1; /* Maximum without unsafe flattening. */
if_options.indirect_load_ok = true;
if_options.expensive_alu_ok = true;
} else if (if_stmt->control == nir_selection_control_dont_flatten) {
if_options.limit = 0;
if_options.indirect_load_ok = false;
}
return if_options;
}
/* If we're moving discards out of the if we need to add the if's condition to it */
static void
rewrite_discard_conds(nir_instr *instr, nir_def *if_cond, bool is_else)
{
if (instr->type != nir_instr_type_intrinsic)
return;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_terminate_if &&
intr->intrinsic != nir_intrinsic_terminate &&
intr->intrinsic != nir_intrinsic_demote_if &&
intr->intrinsic != nir_intrinsic_demote)
return;
nir_builder b = nir_builder_at(nir_before_instr(instr));
if (is_else)
if_cond = nir_inot(&b, if_cond);
if (intr->intrinsic == nir_intrinsic_terminate_if ||
intr->intrinsic == nir_intrinsic_demote_if) {
nir_src_rewrite(&intr->src[0], nir_iand(&b, intr->src[0].ssa, if_cond));
} else {
if (intr->intrinsic == nir_intrinsic_terminate)
nir_terminate_if(&b, if_cond);
else
nir_demote_if(&b, if_cond);
nir_instr_remove(instr);
}
}
nir/opt_peephole_select: collapse nested IFs if applicable Single-sided nested IFs can sometimes be collapsed even if they cannot be flattened. This optimization re-uses block_check_for_allowed_instrs() to determine if it is beneficial to collapse the IFs. Additionally, it is required that the phis of the outer IF become trivial after this optimization, so that no additional bcsel instructions are added. This optimization turns if (cond1) { <allowed instruction> if (cond2) { <any code> } else { } } else { } into <allowed instruction> if (cond1 && cond2) { <any code> } else { } Totals from 17044 (12.35% of 138013) affected shaders (RAVEN): SGPRs: 1246416 -> 1246256 (-0.01%); split: -0.01%, +0.00% VGPRs: 802752 -> 802736 (-0.00%); split: -0.01%, +0.01% SpillSGPRs: 45857 -> 45850 (-0.02%); split: -0.07%, +0.05% CodeSize: 85318240 -> 85208592 (-0.13%); split: -0.15%, +0.02% Instrs: 16769049 -> 16738195 (-0.18%); split: -0.20%, +0.02% Cycles: 947328732 -> 947145796 (-0.02%); split: -0.03%, +0.01% VMEM: 7271539 -> 7274090 (+0.04%); split: +0.05%, -0.01% SMEM: 925983 -> 927374 (+0.15%); split: +0.19%, -0.04% VClause: 294334 -> 294340 (+0.00%); split: -0.00%, +0.00% SClause: 633600 -> 634048 (+0.07%); split: -0.01%, +0.08% Copies: 1589650 -> 1580573 (-0.57%); split: -0.66%, +0.09% Branches: 540830 -> 525767 (-2.79%); split: -2.79%, +0.00% PreSGPRs: 902500 -> 902415 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 759992 -> 760019 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7478>
2020-11-04 18:20:08 +01:00
/**
* Try to collapse nested ifs:
* This optimization turns
*
* if (cond1) {
* <allowed instruction>
* if (cond2) {
* <any code>
* } else {
* }
* } else {
* }
*
* into
*
* <allowed instruction>
* if (cond1 && cond2) {
* <any code>
* } else {
* }
*
*/
static bool
nir_opt_collapse_if(nir_if *if_stmt, nir_shader *shader,
const nir_opt_peephole_select_options *options)
nir/opt_peephole_select: collapse nested IFs if applicable Single-sided nested IFs can sometimes be collapsed even if they cannot be flattened. This optimization re-uses block_check_for_allowed_instrs() to determine if it is beneficial to collapse the IFs. Additionally, it is required that the phis of the outer IF become trivial after this optimization, so that no additional bcsel instructions are added. This optimization turns if (cond1) { <allowed instruction> if (cond2) { <any code> } else { } } else { } into <allowed instruction> if (cond1 && cond2) { <any code> } else { } Totals from 17044 (12.35% of 138013) affected shaders (RAVEN): SGPRs: 1246416 -> 1246256 (-0.01%); split: -0.01%, +0.00% VGPRs: 802752 -> 802736 (-0.00%); split: -0.01%, +0.01% SpillSGPRs: 45857 -> 45850 (-0.02%); split: -0.07%, +0.05% CodeSize: 85318240 -> 85208592 (-0.13%); split: -0.15%, +0.02% Instrs: 16769049 -> 16738195 (-0.18%); split: -0.20%, +0.02% Cycles: 947328732 -> 947145796 (-0.02%); split: -0.03%, +0.01% VMEM: 7271539 -> 7274090 (+0.04%); split: +0.05%, -0.01% SMEM: 925983 -> 927374 (+0.15%); split: +0.19%, -0.04% VClause: 294334 -> 294340 (+0.00%); split: -0.00%, +0.00% SClause: 633600 -> 634048 (+0.07%); split: -0.01%, +0.08% Copies: 1589650 -> 1580573 (-0.57%); split: -0.66%, +0.09% Branches: 540830 -> 525767 (-2.79%); split: -2.79%, +0.00% PreSGPRs: 902500 -> 902415 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 759992 -> 760019 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7478>
2020-11-04 18:20:08 +01:00
{
/* the if has to be nested */
if (if_stmt->cf_node.parent->type != nir_cf_node_if)
return false;
nir_if *parent_if = nir_cf_node_as_if(if_stmt->cf_node.parent);
nir/opt_peephole_select: collapse nested IFs if applicable Single-sided nested IFs can sometimes be collapsed even if they cannot be flattened. This optimization re-uses block_check_for_allowed_instrs() to determine if it is beneficial to collapse the IFs. Additionally, it is required that the phis of the outer IF become trivial after this optimization, so that no additional bcsel instructions are added. This optimization turns if (cond1) { <allowed instruction> if (cond2) { <any code> } else { } } else { } into <allowed instruction> if (cond1 && cond2) { <any code> } else { } Totals from 17044 (12.35% of 138013) affected shaders (RAVEN): SGPRs: 1246416 -> 1246256 (-0.01%); split: -0.01%, +0.00% VGPRs: 802752 -> 802736 (-0.00%); split: -0.01%, +0.01% SpillSGPRs: 45857 -> 45850 (-0.02%); split: -0.07%, +0.05% CodeSize: 85318240 -> 85208592 (-0.13%); split: -0.15%, +0.02% Instrs: 16769049 -> 16738195 (-0.18%); split: -0.20%, +0.02% Cycles: 947328732 -> 947145796 (-0.02%); split: -0.03%, +0.01% VMEM: 7271539 -> 7274090 (+0.04%); split: +0.05%, -0.01% SMEM: 925983 -> 927374 (+0.15%); split: +0.19%, -0.04% VClause: 294334 -> 294340 (+0.00%); split: -0.00%, +0.00% SClause: 633600 -> 634048 (+0.07%); split: -0.01%, +0.08% Copies: 1589650 -> 1580573 (-0.57%); split: -0.66%, +0.09% Branches: 540830 -> 525767 (-2.79%); split: -2.79%, +0.00% PreSGPRs: 902500 -> 902415 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 759992 -> 760019 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7478>
2020-11-04 18:20:08 +01:00
/* check if the else block is empty */
if (!nir_cf_list_is_empty_block(&if_stmt->else_list))
return false;
/* this opt doesn't make much sense if the branch is empty */
if (nir_cf_list_is_empty_block(&if_stmt->then_list))
return false;
/* the nested if has to be the only cf_node:
* i.e. <block> <if_stmt> <block> */
if (exec_list_length(&parent_if->then_list) != 3)
return false;
/* check if the else block of the parent if is empty */
if (!nir_cf_list_is_empty_block(&parent_if->else_list))
return false;
/* check if the block after the nested if is empty except for phis */
nir_block *last = nir_if_last_then_block(parent_if);
nir_instr *last_instr = nir_block_last_instr(last);
if (last_instr && last_instr->type != nir_instr_type_phi)
return false;
/* check if all outer phis become trivial after merging the ifs */
nir_foreach_instr(instr, last) {
if (parent_if->control == nir_selection_control_flatten)
break;
nir/opt_peephole_select: collapse nested IFs if applicable Single-sided nested IFs can sometimes be collapsed even if they cannot be flattened. This optimization re-uses block_check_for_allowed_instrs() to determine if it is beneficial to collapse the IFs. Additionally, it is required that the phis of the outer IF become trivial after this optimization, so that no additional bcsel instructions are added. This optimization turns if (cond1) { <allowed instruction> if (cond2) { <any code> } else { } } else { } into <allowed instruction> if (cond1 && cond2) { <any code> } else { } Totals from 17044 (12.35% of 138013) affected shaders (RAVEN): SGPRs: 1246416 -> 1246256 (-0.01%); split: -0.01%, +0.00% VGPRs: 802752 -> 802736 (-0.00%); split: -0.01%, +0.01% SpillSGPRs: 45857 -> 45850 (-0.02%); split: -0.07%, +0.05% CodeSize: 85318240 -> 85208592 (-0.13%); split: -0.15%, +0.02% Instrs: 16769049 -> 16738195 (-0.18%); split: -0.20%, +0.02% Cycles: 947328732 -> 947145796 (-0.02%); split: -0.03%, +0.01% VMEM: 7271539 -> 7274090 (+0.04%); split: +0.05%, -0.01% SMEM: 925983 -> 927374 (+0.15%); split: +0.19%, -0.04% VClause: 294334 -> 294340 (+0.00%); split: -0.00%, +0.00% SClause: 633600 -> 634048 (+0.07%); split: -0.01%, +0.08% Copies: 1589650 -> 1580573 (-0.57%); split: -0.66%, +0.09% Branches: 540830 -> 525767 (-2.79%); split: -2.79%, +0.00% PreSGPRs: 902500 -> 902415 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 759992 -> 760019 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7478>
2020-11-04 18:20:08 +01:00
nir_phi_instr *phi = nir_instr_as_phi(instr);
nir_phi_src *else_src =
nir_phi_get_src_from_block(phi, nir_if_first_else_block(if_stmt));
nir_foreach_use(src, &phi->def) {
assert(nir_src_parent_instr(src)->type == nir_instr_type_phi);
nir/opt_peephole_select: collapse nested IFs if applicable Single-sided nested IFs can sometimes be collapsed even if they cannot be flattened. This optimization re-uses block_check_for_allowed_instrs() to determine if it is beneficial to collapse the IFs. Additionally, it is required that the phis of the outer IF become trivial after this optimization, so that no additional bcsel instructions are added. This optimization turns if (cond1) { <allowed instruction> if (cond2) { <any code> } else { } } else { } into <allowed instruction> if (cond1 && cond2) { <any code> } else { } Totals from 17044 (12.35% of 138013) affected shaders (RAVEN): SGPRs: 1246416 -> 1246256 (-0.01%); split: -0.01%, +0.00% VGPRs: 802752 -> 802736 (-0.00%); split: -0.01%, +0.01% SpillSGPRs: 45857 -> 45850 (-0.02%); split: -0.07%, +0.05% CodeSize: 85318240 -> 85208592 (-0.13%); split: -0.15%, +0.02% Instrs: 16769049 -> 16738195 (-0.18%); split: -0.20%, +0.02% Cycles: 947328732 -> 947145796 (-0.02%); split: -0.03%, +0.01% VMEM: 7271539 -> 7274090 (+0.04%); split: +0.05%, -0.01% SMEM: 925983 -> 927374 (+0.15%); split: +0.19%, -0.04% VClause: 294334 -> 294340 (+0.00%); split: -0.00%, +0.00% SClause: 633600 -> 634048 (+0.07%); split: -0.01%, +0.08% Copies: 1589650 -> 1580573 (-0.57%); split: -0.66%, +0.09% Branches: 540830 -> 525767 (-2.79%); split: -2.79%, +0.00% PreSGPRs: 902500 -> 902415 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 759992 -> 760019 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7478>
2020-11-04 18:20:08 +01:00
nir_phi_src *phi_src =
nir_phi_get_src_from_block(nir_instr_as_phi(nir_src_parent_instr(src)),
nir/opt_peephole_select: collapse nested IFs if applicable Single-sided nested IFs can sometimes be collapsed even if they cannot be flattened. This optimization re-uses block_check_for_allowed_instrs() to determine if it is beneficial to collapse the IFs. Additionally, it is required that the phis of the outer IF become trivial after this optimization, so that no additional bcsel instructions are added. This optimization turns if (cond1) { <allowed instruction> if (cond2) { <any code> } else { } } else { } into <allowed instruction> if (cond1 && cond2) { <any code> } else { } Totals from 17044 (12.35% of 138013) affected shaders (RAVEN): SGPRs: 1246416 -> 1246256 (-0.01%); split: -0.01%, +0.00% VGPRs: 802752 -> 802736 (-0.00%); split: -0.01%, +0.01% SpillSGPRs: 45857 -> 45850 (-0.02%); split: -0.07%, +0.05% CodeSize: 85318240 -> 85208592 (-0.13%); split: -0.15%, +0.02% Instrs: 16769049 -> 16738195 (-0.18%); split: -0.20%, +0.02% Cycles: 947328732 -> 947145796 (-0.02%); split: -0.03%, +0.01% VMEM: 7271539 -> 7274090 (+0.04%); split: +0.05%, -0.01% SMEM: 925983 -> 927374 (+0.15%); split: +0.19%, -0.04% VClause: 294334 -> 294340 (+0.00%); split: -0.00%, +0.00% SClause: 633600 -> 634048 (+0.07%); split: -0.01%, +0.08% Copies: 1589650 -> 1580573 (-0.57%); split: -0.66%, +0.09% Branches: 540830 -> 525767 (-2.79%); split: -2.79%, +0.00% PreSGPRs: 902500 -> 902415 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 759992 -> 760019 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7478>
2020-11-04 18:20:08 +01:00
nir_if_first_else_block(parent_if));
if (phi_src->src.ssa != else_src->src.ssa)
return false;
}
}
/* check if the block before the nested if matches the requirements */
nir_block *first = nir_if_first_then_block(parent_if);
nir_opt_peephole_select_options if_options = get_options_for_if(parent_if, options);
nir/opt_peephole_select: collapse nested IFs if applicable Single-sided nested IFs can sometimes be collapsed even if they cannot be flattened. This optimization re-uses block_check_for_allowed_instrs() to determine if it is beneficial to collapse the IFs. Additionally, it is required that the phis of the outer IF become trivial after this optimization, so that no additional bcsel instructions are added. This optimization turns if (cond1) { <allowed instruction> if (cond2) { <any code> } else { } } else { } into <allowed instruction> if (cond1 && cond2) { <any code> } else { } Totals from 17044 (12.35% of 138013) affected shaders (RAVEN): SGPRs: 1246416 -> 1246256 (-0.01%); split: -0.01%, +0.00% VGPRs: 802752 -> 802736 (-0.00%); split: -0.01%, +0.01% SpillSGPRs: 45857 -> 45850 (-0.02%); split: -0.07%, +0.05% CodeSize: 85318240 -> 85208592 (-0.13%); split: -0.15%, +0.02% Instrs: 16769049 -> 16738195 (-0.18%); split: -0.20%, +0.02% Cycles: 947328732 -> 947145796 (-0.02%); split: -0.03%, +0.01% VMEM: 7271539 -> 7274090 (+0.04%); split: +0.05%, -0.01% SMEM: 925983 -> 927374 (+0.15%); split: +0.19%, -0.04% VClause: 294334 -> 294340 (+0.00%); split: -0.00%, +0.00% SClause: 633600 -> 634048 (+0.07%); split: -0.01%, +0.08% Copies: 1589650 -> 1580573 (-0.57%); split: -0.66%, +0.09% Branches: 540830 -> 525767 (-2.79%); split: -2.79%, +0.00% PreSGPRs: 902500 -> 902415 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 759992 -> 760019 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7478>
2020-11-04 18:20:08 +01:00
unsigned count = 0;
if (!block_check_for_allowed_instrs(first, &count, &if_options))
nir/opt_peephole_select: collapse nested IFs if applicable Single-sided nested IFs can sometimes be collapsed even if they cannot be flattened. This optimization re-uses block_check_for_allowed_instrs() to determine if it is beneficial to collapse the IFs. Additionally, it is required that the phis of the outer IF become trivial after this optimization, so that no additional bcsel instructions are added. This optimization turns if (cond1) { <allowed instruction> if (cond2) { <any code> } else { } } else { } into <allowed instruction> if (cond1 && cond2) { <any code> } else { } Totals from 17044 (12.35% of 138013) affected shaders (RAVEN): SGPRs: 1246416 -> 1246256 (-0.01%); split: -0.01%, +0.00% VGPRs: 802752 -> 802736 (-0.00%); split: -0.01%, +0.01% SpillSGPRs: 45857 -> 45850 (-0.02%); split: -0.07%, +0.05% CodeSize: 85318240 -> 85208592 (-0.13%); split: -0.15%, +0.02% Instrs: 16769049 -> 16738195 (-0.18%); split: -0.20%, +0.02% Cycles: 947328732 -> 947145796 (-0.02%); split: -0.03%, +0.01% VMEM: 7271539 -> 7274090 (+0.04%); split: +0.05%, -0.01% SMEM: 925983 -> 927374 (+0.15%); split: +0.19%, -0.04% VClause: 294334 -> 294340 (+0.00%); split: -0.00%, +0.00% SClause: 633600 -> 634048 (+0.07%); split: -0.01%, +0.08% Copies: 1589650 -> 1580573 (-0.57%); split: -0.66%, +0.09% Branches: 540830 -> 525767 (-2.79%); split: -2.79%, +0.00% PreSGPRs: 902500 -> 902415 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 759992 -> 760019 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7478>
2020-11-04 18:20:08 +01:00
return false;
if (count > if_options.limit)
nir/opt_peephole_select: collapse nested IFs if applicable Single-sided nested IFs can sometimes be collapsed even if they cannot be flattened. This optimization re-uses block_check_for_allowed_instrs() to determine if it is beneficial to collapse the IFs. Additionally, it is required that the phis of the outer IF become trivial after this optimization, so that no additional bcsel instructions are added. This optimization turns if (cond1) { <allowed instruction> if (cond2) { <any code> } else { } } else { } into <allowed instruction> if (cond1 && cond2) { <any code> } else { } Totals from 17044 (12.35% of 138013) affected shaders (RAVEN): SGPRs: 1246416 -> 1246256 (-0.01%); split: -0.01%, +0.00% VGPRs: 802752 -> 802736 (-0.00%); split: -0.01%, +0.01% SpillSGPRs: 45857 -> 45850 (-0.02%); split: -0.07%, +0.05% CodeSize: 85318240 -> 85208592 (-0.13%); split: -0.15%, +0.02% Instrs: 16769049 -> 16738195 (-0.18%); split: -0.20%, +0.02% Cycles: 947328732 -> 947145796 (-0.02%); split: -0.03%, +0.01% VMEM: 7271539 -> 7274090 (+0.04%); split: +0.05%, -0.01% SMEM: 925983 -> 927374 (+0.15%); split: +0.19%, -0.04% VClause: 294334 -> 294340 (+0.00%); split: -0.00%, +0.00% SClause: 633600 -> 634048 (+0.07%); split: -0.01%, +0.08% Copies: 1589650 -> 1580573 (-0.57%); split: -0.66%, +0.09% Branches: 540830 -> 525767 (-2.79%); split: -2.79%, +0.00% PreSGPRs: 902500 -> 902415 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 759992 -> 760019 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7478>
2020-11-04 18:20:08 +01:00
return false;
/* trivialize succeeding phis */
nir_foreach_instr(instr, last) {
nir_phi_instr *phi = nir_instr_as_phi(instr);
nir_phi_src *else_src =
nir_phi_get_src_from_block(phi, nir_if_first_else_block(if_stmt));
nir_foreach_use_safe(src, &phi->def) {
nir/opt_peephole_select: collapse nested IFs if applicable Single-sided nested IFs can sometimes be collapsed even if they cannot be flattened. This optimization re-uses block_check_for_allowed_instrs() to determine if it is beneficial to collapse the IFs. Additionally, it is required that the phis of the outer IF become trivial after this optimization, so that no additional bcsel instructions are added. This optimization turns if (cond1) { <allowed instruction> if (cond2) { <any code> } else { } } else { } into <allowed instruction> if (cond1 && cond2) { <any code> } else { } Totals from 17044 (12.35% of 138013) affected shaders (RAVEN): SGPRs: 1246416 -> 1246256 (-0.01%); split: -0.01%, +0.00% VGPRs: 802752 -> 802736 (-0.00%); split: -0.01%, +0.01% SpillSGPRs: 45857 -> 45850 (-0.02%); split: -0.07%, +0.05% CodeSize: 85318240 -> 85208592 (-0.13%); split: -0.15%, +0.02% Instrs: 16769049 -> 16738195 (-0.18%); split: -0.20%, +0.02% Cycles: 947328732 -> 947145796 (-0.02%); split: -0.03%, +0.01% VMEM: 7271539 -> 7274090 (+0.04%); split: +0.05%, -0.01% SMEM: 925983 -> 927374 (+0.15%); split: +0.19%, -0.04% VClause: 294334 -> 294340 (+0.00%); split: -0.00%, +0.00% SClause: 633600 -> 634048 (+0.07%); split: -0.01%, +0.08% Copies: 1589650 -> 1580573 (-0.57%); split: -0.66%, +0.09% Branches: 540830 -> 525767 (-2.79%); split: -2.79%, +0.00% PreSGPRs: 902500 -> 902415 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 759992 -> 760019 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7478>
2020-11-04 18:20:08 +01:00
nir_phi_src *phi_src =
nir_phi_get_src_from_block(nir_instr_as_phi(nir_src_parent_instr(src)),
nir/opt_peephole_select: collapse nested IFs if applicable Single-sided nested IFs can sometimes be collapsed even if they cannot be flattened. This optimization re-uses block_check_for_allowed_instrs() to determine if it is beneficial to collapse the IFs. Additionally, it is required that the phis of the outer IF become trivial after this optimization, so that no additional bcsel instructions are added. This optimization turns if (cond1) { <allowed instruction> if (cond2) { <any code> } else { } } else { } into <allowed instruction> if (cond1 && cond2) { <any code> } else { } Totals from 17044 (12.35% of 138013) affected shaders (RAVEN): SGPRs: 1246416 -> 1246256 (-0.01%); split: -0.01%, +0.00% VGPRs: 802752 -> 802736 (-0.00%); split: -0.01%, +0.01% SpillSGPRs: 45857 -> 45850 (-0.02%); split: -0.07%, +0.05% CodeSize: 85318240 -> 85208592 (-0.13%); split: -0.15%, +0.02% Instrs: 16769049 -> 16738195 (-0.18%); split: -0.20%, +0.02% Cycles: 947328732 -> 947145796 (-0.02%); split: -0.03%, +0.01% VMEM: 7271539 -> 7274090 (+0.04%); split: +0.05%, -0.01% SMEM: 925983 -> 927374 (+0.15%); split: +0.19%, -0.04% VClause: 294334 -> 294340 (+0.00%); split: -0.00%, +0.00% SClause: 633600 -> 634048 (+0.07%); split: -0.01%, +0.08% Copies: 1589650 -> 1580573 (-0.57%); split: -0.66%, +0.09% Branches: 540830 -> 525767 (-2.79%); split: -2.79%, +0.00% PreSGPRs: 902500 -> 902415 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 759992 -> 760019 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7478>
2020-11-04 18:20:08 +01:00
nir_if_first_else_block(parent_if));
if (phi_src->src.ssa == else_src->src.ssa)
nir_src_rewrite(&phi_src->src, &phi->def);
nir/opt_peephole_select: collapse nested IFs if applicable Single-sided nested IFs can sometimes be collapsed even if they cannot be flattened. This optimization re-uses block_check_for_allowed_instrs() to determine if it is beneficial to collapse the IFs. Additionally, it is required that the phis of the outer IF become trivial after this optimization, so that no additional bcsel instructions are added. This optimization turns if (cond1) { <allowed instruction> if (cond2) { <any code> } else { } } else { } into <allowed instruction> if (cond1 && cond2) { <any code> } else { } Totals from 17044 (12.35% of 138013) affected shaders (RAVEN): SGPRs: 1246416 -> 1246256 (-0.01%); split: -0.01%, +0.00% VGPRs: 802752 -> 802736 (-0.00%); split: -0.01%, +0.01% SpillSGPRs: 45857 -> 45850 (-0.02%); split: -0.07%, +0.05% CodeSize: 85318240 -> 85208592 (-0.13%); split: -0.15%, +0.02% Instrs: 16769049 -> 16738195 (-0.18%); split: -0.20%, +0.02% Cycles: 947328732 -> 947145796 (-0.02%); split: -0.03%, +0.01% VMEM: 7271539 -> 7274090 (+0.04%); split: +0.05%, -0.01% SMEM: 925983 -> 927374 (+0.15%); split: +0.19%, -0.04% VClause: 294334 -> 294340 (+0.00%); split: -0.00%, +0.00% SClause: 633600 -> 634048 (+0.07%); split: -0.01%, +0.08% Copies: 1589650 -> 1580573 (-0.57%); split: -0.66%, +0.09% Branches: 540830 -> 525767 (-2.79%); split: -2.79%, +0.00% PreSGPRs: 902500 -> 902415 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 759992 -> 760019 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7478>
2020-11-04 18:20:08 +01:00
}
}
/* combine condition with potential demote/terminate */
nir_foreach_instr_safe(instr, first)
rewrite_discard_conds(instr, parent_if->condition.ssa, false);
/* combine the if conditions */
struct nir_builder b = nir_builder_at(nir_before_cf_node(&if_stmt->cf_node));
nir_def *cond = nir_iand(&b, if_stmt->condition.ssa,
parent_if->condition.ssa);
nir_src_rewrite(&if_stmt->condition, cond);
nir/opt_peephole_select: collapse nested IFs if applicable Single-sided nested IFs can sometimes be collapsed even if they cannot be flattened. This optimization re-uses block_check_for_allowed_instrs() to determine if it is beneficial to collapse the IFs. Additionally, it is required that the phis of the outer IF become trivial after this optimization, so that no additional bcsel instructions are added. This optimization turns if (cond1) { <allowed instruction> if (cond2) { <any code> } else { } } else { } into <allowed instruction> if (cond1 && cond2) { <any code> } else { } Totals from 17044 (12.35% of 138013) affected shaders (RAVEN): SGPRs: 1246416 -> 1246256 (-0.01%); split: -0.01%, +0.00% VGPRs: 802752 -> 802736 (-0.00%); split: -0.01%, +0.01% SpillSGPRs: 45857 -> 45850 (-0.02%); split: -0.07%, +0.05% CodeSize: 85318240 -> 85208592 (-0.13%); split: -0.15%, +0.02% Instrs: 16769049 -> 16738195 (-0.18%); split: -0.20%, +0.02% Cycles: 947328732 -> 947145796 (-0.02%); split: -0.03%, +0.01% VMEM: 7271539 -> 7274090 (+0.04%); split: +0.05%, -0.01% SMEM: 925983 -> 927374 (+0.15%); split: +0.19%, -0.04% VClause: 294334 -> 294340 (+0.00%); split: -0.00%, +0.00% SClause: 633600 -> 634048 (+0.07%); split: -0.01%, +0.08% Copies: 1589650 -> 1580573 (-0.57%); split: -0.66%, +0.09% Branches: 540830 -> 525767 (-2.79%); split: -2.79%, +0.00% PreSGPRs: 902500 -> 902415 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 759992 -> 760019 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7478>
2020-11-04 18:20:08 +01:00
/* move the whole inner if before the parent if */
nir_cf_list tmp;
nir_cf_extract(&tmp, nir_before_block(first),
nir_after_block(last));
nir/opt_peephole_select: collapse nested IFs if applicable Single-sided nested IFs can sometimes be collapsed even if they cannot be flattened. This optimization re-uses block_check_for_allowed_instrs() to determine if it is beneficial to collapse the IFs. Additionally, it is required that the phis of the outer IF become trivial after this optimization, so that no additional bcsel instructions are added. This optimization turns if (cond1) { <allowed instruction> if (cond2) { <any code> } else { } } else { } into <allowed instruction> if (cond1 && cond2) { <any code> } else { } Totals from 17044 (12.35% of 138013) affected shaders (RAVEN): SGPRs: 1246416 -> 1246256 (-0.01%); split: -0.01%, +0.00% VGPRs: 802752 -> 802736 (-0.00%); split: -0.01%, +0.01% SpillSGPRs: 45857 -> 45850 (-0.02%); split: -0.07%, +0.05% CodeSize: 85318240 -> 85208592 (-0.13%); split: -0.15%, +0.02% Instrs: 16769049 -> 16738195 (-0.18%); split: -0.20%, +0.02% Cycles: 947328732 -> 947145796 (-0.02%); split: -0.03%, +0.01% VMEM: 7271539 -> 7274090 (+0.04%); split: +0.05%, -0.01% SMEM: 925983 -> 927374 (+0.15%); split: +0.19%, -0.04% VClause: 294334 -> 294340 (+0.00%); split: -0.00%, +0.00% SClause: 633600 -> 634048 (+0.07%); split: -0.01%, +0.08% Copies: 1589650 -> 1580573 (-0.57%); split: -0.66%, +0.09% Branches: 540830 -> 525767 (-2.79%); split: -2.79%, +0.00% PreSGPRs: 902500 -> 902415 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 759992 -> 760019 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7478>
2020-11-04 18:20:08 +01:00
nir_cf_reinsert(&tmp, nir_before_cf_node(&parent_if->cf_node));
/* The now empty parent if will be cleaned up by other passes */
return true;
}
static bool
nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
const nir_opt_peephole_select_options *options)
{
if (nir_cf_node_is_first(&block->cf_node))
return false;
nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node);
if (prev_node->type != nir_cf_node_if)
return false;
nir_block *prev_block = nir_cf_node_as_block(nir_cf_node_prev(prev_node));
/* If the last instruction before this if/else block is a jump, we can't
* append stuff after it because it would break a bunch of assumption about
* control flow (nir_validate expects the successor of a return/halt jump
* to be the end of the function, which might not match the successor of
* the if/else blocks).
*/
if (nir_block_ends_in_return_or_halt(prev_block))
return false;
nir_if *if_stmt = nir_cf_node_as_if(prev_node);
nir/opt_peephole_select: collapse nested IFs if applicable Single-sided nested IFs can sometimes be collapsed even if they cannot be flattened. This optimization re-uses block_check_for_allowed_instrs() to determine if it is beneficial to collapse the IFs. Additionally, it is required that the phis of the outer IF become trivial after this optimization, so that no additional bcsel instructions are added. This optimization turns if (cond1) { <allowed instruction> if (cond2) { <any code> } else { } } else { } into <allowed instruction> if (cond1 && cond2) { <any code> } else { } Totals from 17044 (12.35% of 138013) affected shaders (RAVEN): SGPRs: 1246416 -> 1246256 (-0.01%); split: -0.01%, +0.00% VGPRs: 802752 -> 802736 (-0.00%); split: -0.01%, +0.01% SpillSGPRs: 45857 -> 45850 (-0.02%); split: -0.07%, +0.05% CodeSize: 85318240 -> 85208592 (-0.13%); split: -0.15%, +0.02% Instrs: 16769049 -> 16738195 (-0.18%); split: -0.20%, +0.02% Cycles: 947328732 -> 947145796 (-0.02%); split: -0.03%, +0.01% VMEM: 7271539 -> 7274090 (+0.04%); split: +0.05%, -0.01% SMEM: 925983 -> 927374 (+0.15%); split: +0.19%, -0.04% VClause: 294334 -> 294340 (+0.00%); split: -0.00%, +0.00% SClause: 633600 -> 634048 (+0.07%); split: -0.01%, +0.08% Copies: 1589650 -> 1580573 (-0.57%); split: -0.66%, +0.09% Branches: 540830 -> 525767 (-2.79%); split: -2.79%, +0.00% PreSGPRs: 902500 -> 902415 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 759992 -> 760019 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7478>
2020-11-04 18:20:08 +01:00
/* first, try to collapse the if */
if (nir_opt_collapse_if(if_stmt, shader, options))
nir/opt_peephole_select: collapse nested IFs if applicable Single-sided nested IFs can sometimes be collapsed even if they cannot be flattened. This optimization re-uses block_check_for_allowed_instrs() to determine if it is beneficial to collapse the IFs. Additionally, it is required that the phis of the outer IF become trivial after this optimization, so that no additional bcsel instructions are added. This optimization turns if (cond1) { <allowed instruction> if (cond2) { <any code> } else { } } else { } into <allowed instruction> if (cond1 && cond2) { <any code> } else { } Totals from 17044 (12.35% of 138013) affected shaders (RAVEN): SGPRs: 1246416 -> 1246256 (-0.01%); split: -0.01%, +0.00% VGPRs: 802752 -> 802736 (-0.00%); split: -0.01%, +0.01% SpillSGPRs: 45857 -> 45850 (-0.02%); split: -0.07%, +0.05% CodeSize: 85318240 -> 85208592 (-0.13%); split: -0.15%, +0.02% Instrs: 16769049 -> 16738195 (-0.18%); split: -0.20%, +0.02% Cycles: 947328732 -> 947145796 (-0.02%); split: -0.03%, +0.01% VMEM: 7271539 -> 7274090 (+0.04%); split: +0.05%, -0.01% SMEM: 925983 -> 927374 (+0.15%); split: +0.19%, -0.04% VClause: 294334 -> 294340 (+0.00%); split: -0.00%, +0.00% SClause: 633600 -> 634048 (+0.07%); split: -0.01%, +0.08% Copies: 1589650 -> 1580573 (-0.57%); split: -0.66%, +0.09% Branches: 540830 -> 525767 (-2.79%); split: -2.79%, +0.00% PreSGPRs: 902500 -> 902415 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 759992 -> 760019 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7478>
2020-11-04 18:20:08 +01:00
return true;
nir_block *then_block = nir_if_first_then_block(if_stmt);
nir_block *else_block = nir_if_first_else_block(if_stmt);
/* We can only have one block in each side ... */
if (nir_if_last_then_block(if_stmt) != then_block ||
nir_if_last_else_block(if_stmt) != else_block)
return false;
nir_opt_peephole_select_options if_options = get_options_for_if(if_stmt, options);
/* ... and those blocks must only contain "allowed" instructions. */
unsigned count = 0;
if (!block_check_for_allowed_instrs(then_block, &count, &if_options) ||
!block_check_for_allowed_instrs(else_block, &count, &if_options))
return false;
if (count > if_options.limit)
return false;
/* At this point, we know that the previous CFG node is an if-then
* statement containing only moves to phi nodes in this block. We can
* just remove that entire CF node and replace all of the phi nodes with
* selects.
*/
/* First, we move the remaining instructions from the blocks to the
* block before. We have already guaranteed that this is safe by
* calling block_check_for_allowed_instrs()
*/
nir_foreach_instr_safe(instr, then_block) {
exec_node_remove(&instr->node);
instr->block = prev_block;
exec_list_push_tail(&prev_block->instr_list, &instr->node);
rewrite_discard_conds(instr, if_stmt->condition.ssa, false);
}
nir_foreach_instr_safe(instr, else_block) {
exec_node_remove(&instr->node);
instr->block = prev_block;
exec_list_push_tail(&prev_block->instr_list, &instr->node);
rewrite_discard_conds(instr, if_stmt->condition.ssa, true);
}
nir_foreach_phi_safe(phi, block) {
nir_alu_instr *sel = nir_alu_instr_create(shader, nir_op_bcsel);
sel->src[0].src = nir_src_for_ssa(if_stmt->condition.ssa);
/* Splat the condition to all channels */
memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
assert(exec_list_length(&phi->srcs) == 2);
nir_foreach_phi_src(src, phi) {
assert(src->pred == then_block || src->pred == else_block);
unsigned idx = src->pred == then_block ? 1 : 2;
sel->src[idx].src = nir_src_for_ssa(src->src.ssa);
}
nir_def_init(&sel->instr, &sel->def,
phi->def.num_components, phi->def.bit_size);
nir_def_rewrite_uses(&phi->def,
&sel->def);
nir_instr_insert_before(&phi->instr, &sel->instr);
nir_instr_remove(&phi->instr);
}
nir_cf_node_remove(&if_stmt->cf_node);
return true;
}
static bool
nir_opt_peephole_select_impl(nir_function_impl *impl,
const nir_opt_peephole_select_options *options)
{
nir_shader *shader = impl->function->shader;
bool progress = false;
nir_foreach_block_safe(block, impl) {
progress |= nir_opt_peephole_select_block(block, shader, options);
}
treewide: Switch to nir_progress Via the Coccinelle patch at the end of the commit message, followed by sed -ie 's/progress = progress | /progress |=/g' $(git grep -l 'progress = prog') ninja -C ~/mesa/build clang-format cd ~/mesa/src/compiler/nir && clang-format -i *.c agxfmt @@ identifier prog; expression impl, metadata; @@ -if (prog) { -nir_metadata_preserve(impl, metadata); -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} -return prog; +return nir_progress(prog, impl, metadata); @@ expression prog_expr, impl, metadata; @@ -if (prog_expr) { -nir_metadata_preserve(impl, metadata); -return true; -} else { -nir_metadata_preserve(impl, nir_metadata_all); -return false; -} +bool progress = prog_expr; +return nir_progress(progress, impl, metadata); @@ identifier prog; expression impl, metadata; @@ -nir_metadata_preserve(impl, prog ? (metadata) : nir_metadata_all); -return prog; +return nir_progress(prog, impl, metadata); @@ identifier prog; expression impl, metadata; @@ -nir_metadata_preserve(impl, prog ? (metadata) : nir_metadata_all); +nir_progress(prog, impl, metadata); @@ expression impl, metadata; @@ -nir_metadata_preserve(impl, metadata); -return true; +return nir_progress(true, impl, metadata); @@ expression impl; @@ -nir_metadata_preserve(impl, nir_metadata_all); -return false; +return nir_no_progress(impl); @@ identifier other_prog, prog; expression impl, metadata; @@ -if (prog) { -nir_metadata_preserve(impl, metadata); -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} -other_prog |= prog; +other_prog = other_prog | nir_progress(prog, impl, metadata); @@ identifier prog; expression impl, metadata; @@ -if (prog) { -nir_metadata_preserve(impl, metadata); -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} +nir_progress(prog, impl, metadata); @@ identifier other_prog, prog; expression impl, metadata; @@ -if (prog) { -nir_metadata_preserve(impl, metadata); -other_prog = true; -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} +other_prog = other_prog | nir_progress(prog, impl, metadata); @@ expression prog_expr, impl, metadata; identifier prog; @@ -if (prog_expr) { -nir_metadata_preserve(impl, metadata); -prog = true; -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} +bool impl_progress = prog_expr; +prog = prog | nir_progress(impl_progress, impl, metadata); @@ identifier other_prog, prog; expression impl, metadata; @@ -if (prog) { -other_prog = true; -nir_metadata_preserve(impl, metadata); -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} +other_prog = other_prog | nir_progress(prog, impl, metadata); @@ expression prog_expr, impl, metadata; identifier prog; @@ -if (prog_expr) { -prog = true; -nir_metadata_preserve(impl, metadata); -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} +bool impl_progress = prog_expr; +prog = prog | nir_progress(impl_progress, impl, metadata); @@ expression prog_expr, impl, metadata; @@ -if (prog_expr) { -nir_metadata_preserve(impl, metadata); -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} +bool impl_progress = prog_expr; +nir_progress(impl_progress, impl, metadata); @@ identifier prog; expression impl, metadata; @@ -nir_metadata_preserve(impl, metadata); -prog = true; +prog = nir_progress(true, impl, metadata); @@ identifier prog; expression impl, metadata; @@ -if (prog) { -nir_metadata_preserve(impl, metadata); -} -return prog; +return nir_progress(prog, impl, metadata); @@ identifier prog; expression impl, metadata; @@ -if (prog) { -nir_metadata_preserve(impl, metadata); -} +nir_progress(prog, impl, metadata); @@ expression impl; @@ -nir_metadata_preserve(impl, nir_metadata_all); +nir_no_progress(impl); @@ expression impl, metadata; @@ -nir_metadata_preserve(impl, metadata); +nir_progress(true, impl, metadata); squashme! sed -ie 's/progress = progress | /progress |=/g' $(git grep -l 'progress = prog') Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33722>
2025-02-24 15:10:33 -05:00
return nir_progress(progress, impl, nir_metadata_none);
}
bool
nir_opt_peephole_select(nir_shader *shader,
const nir_opt_peephole_select_options *options)
{
bool progress = false;
nir_foreach_function_impl(impl, shader) {
progress |= nir_opt_peephole_select_impl(impl, options);
}
return progress;
}