2015-01-21 15:23:32 -08:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2015 Intel Corporation
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "nir.h"
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Implements a pass that lowers vector phi nodes to scalar phi nodes when
|
|
|
|
|
* we don't think it will hurt anything.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
struct lower_phis_to_scalar_state {
|
2021-07-07 10:49:32 -07:00
|
|
|
nir_shader *shader;
|
2015-01-21 15:23:32 -08:00
|
|
|
void *mem_ctx;
|
2021-07-07 10:43:12 -07:00
|
|
|
struct exec_list dead_instrs;
|
2015-01-21 15:23:32 -08:00
|
|
|
|
2021-02-23 11:31:41 +01:00
|
|
|
bool lower_all;
|
|
|
|
|
|
2015-01-21 15:23:32 -08:00
|
|
|
/* Hash table marking which phi nodes are scalarizable. The key is
|
|
|
|
|
* pointers to phi instructions and the entry is either NULL for not
|
|
|
|
|
* scalarizable or non-null for scalarizable.
|
|
|
|
|
*/
|
|
|
|
|
struct hash_table *phi_table;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state);
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
is_phi_src_scalarizable(nir_phi_src *src,
|
|
|
|
|
struct lower_phis_to_scalar_state *state)
|
|
|
|
|
{
|
|
|
|
|
/* Don't know what to do with non-ssa sources */
|
|
|
|
|
if (!src->src.is_ssa)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
nir_instr *src_instr = src->src.ssa->parent_instr;
|
|
|
|
|
switch (src_instr->type) {
|
|
|
|
|
case nir_instr_type_alu: {
|
|
|
|
|
nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
|
|
|
|
|
|
|
|
|
|
/* ALU operations with output_size == 0 should be scalarized. We
|
|
|
|
|
* will also see a bunch of vecN operations from scalarizing ALU
|
|
|
|
|
* operations and, since they can easily be copy-propagated, they
|
|
|
|
|
* are ok too.
|
|
|
|
|
*/
|
|
|
|
|
return nir_op_infos[src_alu->op].output_size == 0 ||
|
2020-03-30 12:06:52 -05:00
|
|
|
nir_op_is_vec(src_alu->op);
|
2015-01-21 15:23:32 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_instr_type_phi:
|
|
|
|
|
/* A phi is scalarizable if we're going to lower it */
|
|
|
|
|
return should_lower_phi(nir_instr_as_phi(src_instr), state);
|
|
|
|
|
|
|
|
|
|
case nir_instr_type_load_const:
|
|
|
|
|
/* These are trivially scalarizable */
|
|
|
|
|
return true;
|
|
|
|
|
|
2019-02-22 16:59:13 +11:00
|
|
|
case nir_instr_type_ssa_undef:
|
|
|
|
|
/* The caller of this function is going to OR the results and we don't
|
|
|
|
|
* want undefs to count so we return false.
|
|
|
|
|
*/
|
|
|
|
|
return false;
|
|
|
|
|
|
2015-01-21 15:23:32 -08:00
|
|
|
case nir_instr_type_intrinsic: {
|
|
|
|
|
nir_intrinsic_instr *src_intrin = nir_instr_as_intrinsic(src_instr);
|
|
|
|
|
|
|
|
|
|
switch (src_intrin->intrinsic) {
|
2018-03-26 18:13:59 -07:00
|
|
|
case nir_intrinsic_load_deref: {
|
2020-08-17 12:06:56 -05:00
|
|
|
/* Don't scalarize if we see a load of a local variable because it
|
|
|
|
|
* might turn into one of the things we can't scalarize.
|
|
|
|
|
*/
|
2018-03-26 18:13:59 -07:00
|
|
|
nir_deref_instr *deref = nir_src_as_deref(src_intrin->src[0]);
|
2020-11-01 17:15:28 -06:00
|
|
|
return !nir_deref_mode_may_be(deref, nir_var_function_temp |
|
|
|
|
|
nir_var_shader_temp);
|
2018-03-26 18:13:59 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_intrinsic_interp_deref_at_centroid:
|
|
|
|
|
case nir_intrinsic_interp_deref_at_sample:
|
|
|
|
|
case nir_intrinsic_interp_deref_at_offset:
|
2020-01-24 16:01:04 +01:00
|
|
|
case nir_intrinsic_interp_deref_at_vertex:
|
2015-01-21 15:23:32 -08:00
|
|
|
case nir_intrinsic_load_uniform:
|
|
|
|
|
case nir_intrinsic_load_ubo:
|
2015-07-09 10:29:18 +02:00
|
|
|
case nir_intrinsic_load_ssbo:
|
2018-11-19 13:40:35 -06:00
|
|
|
case nir_intrinsic_load_global:
|
2020-08-29 00:59:22 -05:00
|
|
|
case nir_intrinsic_load_global_constant:
|
2015-01-21 15:23:32 -08:00
|
|
|
case nir_intrinsic_load_input:
|
|
|
|
|
return true;
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-11-24 11:02:00 +01:00
|
|
|
FALLTHROUGH;
|
2015-01-21 15:23:32 -08:00
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
/* We can't scalarize this type of instruction */
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Determines if the given phi node should be lowered. The only phi nodes
|
|
|
|
|
* we will scalarize at the moment are those where all of the sources are
|
2021-02-23 11:31:41 +01:00
|
|
|
* scalarizable, unless lower_all is set.
|
2015-01-21 15:23:32 -08:00
|
|
|
*
|
|
|
|
|
* The reason for this comes down to coalescing. Since phi sources can't
|
|
|
|
|
* swizzle, swizzles on phis have to be resolved by inserting a mov right
|
|
|
|
|
* before the phi. The choice then becomes between movs to pick off
|
|
|
|
|
* components for a scalar phi or potentially movs to recombine components
|
|
|
|
|
* for a vector phi. The problem is that the movs generated to pick off
|
|
|
|
|
* the components are almost uncoalescable. We can't coalesce them in NIR
|
|
|
|
|
* because we need them to pick off components and we can't coalesce them
|
|
|
|
|
* in the backend because the source register is a vector and the
|
|
|
|
|
* destination is a scalar that may be used at other places in the program.
|
|
|
|
|
* On the other hand, if we have a bunch of scalars going into a vector
|
|
|
|
|
* phi, the situation is much better. In this case, if the SSA def is
|
|
|
|
|
* generated in the predecessor block to the corresponding phi source, the
|
|
|
|
|
* backend code will be an ALU op into a temporary and then a mov into the
|
|
|
|
|
* given vector component; this move can almost certainly be coalesced
|
|
|
|
|
* away.
|
|
|
|
|
*/
|
|
|
|
|
static bool
|
|
|
|
|
should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state)
|
|
|
|
|
{
|
|
|
|
|
/* Already scalar */
|
|
|
|
|
if (phi->dest.ssa.num_components == 1)
|
|
|
|
|
return false;
|
|
|
|
|
|
2021-02-23 11:31:41 +01:00
|
|
|
if (state->lower_all)
|
|
|
|
|
return true;
|
|
|
|
|
|
2015-01-21 15:23:32 -08:00
|
|
|
struct hash_entry *entry = _mesa_hash_table_search(state->phi_table, phi);
|
|
|
|
|
if (entry)
|
|
|
|
|
return entry->data != NULL;
|
|
|
|
|
|
|
|
|
|
/* Insert an entry and mark it as scalarizable for now. That way
|
|
|
|
|
* we don't recurse forever and a cycle in the dependence graph
|
|
|
|
|
* won't automatically make us fail to scalarize.
|
|
|
|
|
*/
|
|
|
|
|
entry = _mesa_hash_table_insert(state->phi_table, phi, (void *)(intptr_t)1);
|
|
|
|
|
|
2019-02-22 16:59:13 +11:00
|
|
|
bool scalarizable = false;
|
2015-01-21 15:23:32 -08:00
|
|
|
|
2016-04-26 20:16:21 -07:00
|
|
|
nir_foreach_phi_src(src, phi) {
|
2019-02-22 16:59:13 +11:00
|
|
|
/* This loop ignores srcs that are not scalarizable because its likely
|
|
|
|
|
* still worth copying to temps if another phi source is scalarizable.
|
|
|
|
|
* This reduces register spilling by a huge amount in the i965 driver for
|
|
|
|
|
* Deus Ex: MD.
|
|
|
|
|
*/
|
2015-01-21 15:23:32 -08:00
|
|
|
scalarizable = is_phi_src_scalarizable(src, state);
|
2019-02-22 16:59:13 +11:00
|
|
|
if (scalarizable)
|
2015-01-21 15:23:32 -08:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2015-06-02 13:42:46 +02:00
|
|
|
/* The hash table entry for 'phi' may have changed while recursing the
|
|
|
|
|
* dependence graph, so we need to reset it */
|
|
|
|
|
entry = _mesa_hash_table_search(state->phi_table, phi);
|
|
|
|
|
assert(entry);
|
|
|
|
|
|
2015-01-21 15:23:32 -08:00
|
|
|
entry->data = (void *)(intptr_t)scalarizable;
|
|
|
|
|
|
|
|
|
|
return scalarizable;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
2016-04-08 16:12:30 -04:00
|
|
|
lower_phis_to_scalar_block(nir_block *block,
|
|
|
|
|
struct lower_phis_to_scalar_state *state)
|
2015-01-21 15:23:32 -08:00
|
|
|
{
|
2016-09-13 15:14:28 -07:00
|
|
|
bool progress = false;
|
2023-05-11 13:53:59 -04:00
|
|
|
nir_phi_instr *last_phi = nir_block_last_phi_instr(block);
|
2015-01-21 15:23:32 -08:00
|
|
|
|
|
|
|
|
/* We have to handle the phi nodes in their own pass due to the way
|
|
|
|
|
* we're modifying the linked list of instructions.
|
|
|
|
|
*/
|
2023-05-11 13:20:43 -04:00
|
|
|
nir_foreach_phi_safe(phi, block) {
|
2015-01-21 15:23:32 -08:00
|
|
|
if (!should_lower_phi(phi, state))
|
|
|
|
|
continue;
|
|
|
|
|
|
2015-11-17 13:57:54 +01:00
|
|
|
unsigned bit_size = phi->dest.ssa.bit_size;
|
|
|
|
|
|
2015-01-21 15:23:32 -08:00
|
|
|
/* Create a vecN operation to combine the results. Most of these
|
|
|
|
|
* will be redundant, but copy propagation should clean them up for
|
|
|
|
|
* us. No need to add the complexity here.
|
|
|
|
|
*/
|
2020-03-30 12:06:52 -05:00
|
|
|
nir_op vec_op = nir_op_vec(phi->dest.ssa.num_components);
|
2015-01-21 15:23:32 -08:00
|
|
|
|
2021-07-07 10:49:32 -07:00
|
|
|
nir_alu_instr *vec = nir_alu_instr_create(state->shader, vec_op);
|
2015-01-21 15:23:32 -08:00
|
|
|
nir_ssa_dest_init(&vec->instr, &vec->dest.dest,
|
nir: Drop unused name from nir_ssa_dest_init
Since 624e799cc34 ("nir: Drop nir_ssa_def::name and nir_register::name"), SSA
defs don't have names, making the name argument unused. Drop it from the
signature and fix the call sites. This was done with the help of the following
Coccinelle semantic patch:
@@
expression A, B, C, D, E;
@@
-nir_ssa_dest_init(A, B, C, D, E);
+nir_ssa_dest_init(A, B, C, D);
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23078>
2023-05-17 09:08:22 -04:00
|
|
|
phi->dest.ssa.num_components, bit_size);
|
2015-01-21 15:23:32 -08:00
|
|
|
vec->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
|
|
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < phi->dest.ssa.num_components; i++) {
|
2021-07-07 10:49:32 -07:00
|
|
|
nir_phi_instr *new_phi = nir_phi_instr_create(state->shader);
|
2015-11-17 13:57:54 +01:00
|
|
|
nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1,
|
nir: Drop unused name from nir_ssa_dest_init
Since 624e799cc34 ("nir: Drop nir_ssa_def::name and nir_register::name"), SSA
defs don't have names, making the name argument unused. Drop it from the
signature and fix the call sites. This was done with the help of the following
Coccinelle semantic patch:
@@
expression A, B, C, D, E;
@@
-nir_ssa_dest_init(A, B, C, D, E);
+nir_ssa_dest_init(A, B, C, D);
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23078>
2023-05-17 09:08:22 -04:00
|
|
|
phi->dest.ssa.bit_size);
|
2015-01-21 15:23:32 -08:00
|
|
|
|
|
|
|
|
vec->src[i].src = nir_src_for_ssa(&new_phi->dest.ssa);
|
|
|
|
|
|
2016-04-26 20:16:21 -07:00
|
|
|
nir_foreach_phi_src(src, phi) {
|
2015-01-21 15:23:32 -08:00
|
|
|
/* We need to insert a mov to grab the i'th component of src */
|
2021-07-07 10:49:32 -07:00
|
|
|
nir_alu_instr *mov = nir_alu_instr_create(state->shader,
|
2019-05-06 11:45:46 -05:00
|
|
|
nir_op_mov);
|
nir: Drop unused name from nir_ssa_dest_init
Since 624e799cc34 ("nir: Drop nir_ssa_def::name and nir_register::name"), SSA
defs don't have names, making the name argument unused. Drop it from the
signature and fix the call sites. This was done with the help of the following
Coccinelle semantic patch:
@@
expression A, B, C, D, E;
@@
-nir_ssa_dest_init(A, B, C, D, E);
+nir_ssa_dest_init(A, B, C, D);
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23078>
2023-05-17 09:08:22 -04:00
|
|
|
nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, bit_size);
|
2015-01-21 15:23:32 -08:00
|
|
|
mov->dest.write_mask = 1;
|
2021-09-08 11:37:07 +01:00
|
|
|
nir_src_copy(&mov->src[0].src, &src->src, &mov->instr);
|
2015-01-21 15:23:32 -08:00
|
|
|
mov->src[0].swizzle[0] = i;
|
|
|
|
|
|
|
|
|
|
/* Insert at the end of the predecessor but before the jump */
|
|
|
|
|
nir_instr *pred_last_instr = nir_block_last_instr(src->pred);
|
|
|
|
|
if (pred_last_instr && pred_last_instr->type == nir_instr_type_jump)
|
|
|
|
|
nir_instr_insert_before(pred_last_instr, &mov->instr);
|
|
|
|
|
else
|
|
|
|
|
nir_instr_insert_after_block(src->pred, &mov->instr);
|
|
|
|
|
|
2021-07-07 13:24:45 -07:00
|
|
|
nir_phi_instr_add_src(new_phi, src->pred, nir_src_for_ssa(&mov->dest.dest.ssa));
|
2015-01-21 15:23:32 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_instr_insert_before(&phi->instr, &new_phi->instr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_instr_insert_after(&last_phi->instr, &vec->instr);
|
|
|
|
|
|
|
|
|
|
nir_ssa_def_rewrite_uses(&phi->dest.ssa,
|
2021-03-03 00:13:38 -06:00
|
|
|
&vec->dest.dest.ssa);
|
2015-01-21 15:23:32 -08:00
|
|
|
|
|
|
|
|
nir_instr_remove(&phi->instr);
|
2021-07-07 10:43:12 -07:00
|
|
|
exec_list_push_tail(&state->dead_instrs, &phi->instr.node);
|
2015-01-21 15:23:32 -08:00
|
|
|
|
2016-09-13 15:14:28 -07:00
|
|
|
progress = true;
|
|
|
|
|
|
2015-01-21 15:23:32 -08:00
|
|
|
/* We're using the safe iterator and inserting all the newly
|
|
|
|
|
* scalarized phi nodes before their non-scalarized version so that's
|
|
|
|
|
* ok. However, we are also inserting vec operations after all of
|
|
|
|
|
* the last phi node so once we get here, we can't trust even the
|
|
|
|
|
* safe iterator to stop properly. We have to break manually.
|
|
|
|
|
*/
|
2023-05-11 13:20:43 -04:00
|
|
|
if (phi == last_phi)
|
2015-01-21 15:23:32 -08:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-13 15:14:28 -07:00
|
|
|
return progress;
|
2015-01-21 15:23:32 -08:00
|
|
|
}
|
|
|
|
|
|
2016-09-13 15:14:28 -07:00
|
|
|
static bool
|
2021-02-23 11:31:41 +01:00
|
|
|
lower_phis_to_scalar_impl(nir_function_impl *impl, bool lower_all)
|
2015-01-21 15:23:32 -08:00
|
|
|
{
|
|
|
|
|
struct lower_phis_to_scalar_state state;
|
2016-09-13 15:14:28 -07:00
|
|
|
bool progress = false;
|
2015-01-21 15:23:32 -08:00
|
|
|
|
2021-07-07 10:49:32 -07:00
|
|
|
state.shader = impl->function->shader;
|
2015-01-21 15:23:32 -08:00
|
|
|
state.mem_ctx = ralloc_parent(impl);
|
2021-07-07 10:43:12 -07:00
|
|
|
exec_list_make_empty(&state.dead_instrs);
|
|
|
|
|
state.phi_table = _mesa_pointer_hash_table_create(NULL);
|
2021-02-23 11:31:41 +01:00
|
|
|
state.lower_all = lower_all;
|
2015-01-21 15:23:32 -08:00
|
|
|
|
2016-04-08 16:12:30 -04:00
|
|
|
nir_foreach_block(block, impl) {
|
2016-09-13 15:14:28 -07:00
|
|
|
progress = lower_phis_to_scalar_block(block, &state) || progress;
|
2016-04-08 16:12:30 -04:00
|
|
|
}
|
2015-01-21 15:23:32 -08:00
|
|
|
|
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_block_index |
|
|
|
|
|
nir_metadata_dominance);
|
|
|
|
|
|
2021-07-07 10:43:12 -07:00
|
|
|
nir_instr_free_list(&state.dead_instrs);
|
|
|
|
|
|
|
|
|
|
ralloc_free(state.phi_table);
|
|
|
|
|
|
2016-09-13 15:14:28 -07:00
|
|
|
return progress;
|
2015-01-21 15:23:32 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** A pass that lowers vector phi nodes to scalar
|
|
|
|
|
*
|
|
|
|
|
* This pass loops through the blocks and lowers looks for vector phi nodes
|
|
|
|
|
* it can lower to scalar phi nodes. Not all phi nodes are lowered. For
|
|
|
|
|
* instance, if one of the sources is a non-scalarizable vector, then we
|
|
|
|
|
* don't bother lowering because that would generate hard-to-coalesce movs.
|
|
|
|
|
*/
|
2016-09-13 15:14:28 -07:00
|
|
|
bool
|
2021-02-23 11:31:41 +01:00
|
|
|
nir_lower_phis_to_scalar(nir_shader *shader, bool lower_all)
|
2015-01-21 15:23:32 -08:00
|
|
|
{
|
2016-09-13 15:14:28 -07:00
|
|
|
bool progress = false;
|
|
|
|
|
|
2016-04-26 20:26:42 -07:00
|
|
|
nir_foreach_function(function, shader) {
|
2015-12-26 10:00:47 -08:00
|
|
|
if (function->impl)
|
2021-02-23 11:31:41 +01:00
|
|
|
progress = lower_phis_to_scalar_impl(function->impl, lower_all) || progress;
|
2015-01-21 15:23:32 -08:00
|
|
|
}
|
2016-09-13 15:14:28 -07:00
|
|
|
|
|
|
|
|
return progress;
|
2015-01-21 15:23:32 -08:00
|
|
|
}
|