2021-02-16 02:37:40 +01:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2020 Intel Corporation
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "nir.h"
|
|
|
|
|
#include "nir_builder.h"
|
|
|
|
|
#include "nir_phi_builder.h"
|
nir/lower_shader_calls: rematerialize values in more complex cases
Previously when considering whether to rematerialize or spill/fill
ssa_1954, we would go for a spill/fill :
vec4 32 ssa_388 = (float32)txf ssa_387 (texture_handle), ssa_86 (coord), ssa_23 (lod), 0 (texture), 0 (sampler)
...
vec1 32 ssa_1953 = load_const (0xbd23d70a = -0.040000)
vec1 32 ssa_1954 = fadd ssa_388.x, ssa_1953
vec1 32 ssa_1955 = fneg ssa_1954
This is because when looking at ssa_1955 the first time, we would
consider ssa_388 unrematerialiable, and therefore all values built on
top of it would be considered unrematerialiable as well.
The missing piece when considering whether to rematerialize ssa_1954
is that we should look at filled values. Now that ssa_388 has been
spilled/filled, we can rebuild ssa_1955 on top of the filled value and
avoid spilling/filling ssa_1955 at all.
This requires a bit more work though. We can't just look at an
instruction in isolation, we need to go through the ssa chains until
we find values we can rematerialize or not.
In this change we build a list of all ssa values involved in building
a given value, up to the point there we find a filled or a
rematerializable value.
In this particular case, looking at ssa_1955 :
* We can rematerialize ssa_388 from its filled value
* We can rematerialize ssa_1953 trivially
* We can rematerialize ssa_1954 because its 2 inputs are rematerializable
* We can rematerialize ssa_1955 because ssa_1954 is rematerializable
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16556>
2022-08-24 23:24:57 +03:00
|
|
|
#include "util/u_dynarray.h"
|
2021-02-16 02:37:40 +01:00
|
|
|
#include "util/u_math.h"
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
move_system_values_to_top(nir_shader *shader)
|
|
|
|
|
{
|
|
|
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
|
|
|
|
|
|
|
|
|
bool progress = false;
|
|
|
|
|
nir_foreach_block(block, impl) {
|
|
|
|
|
nir_foreach_instr_safe(instr, block) {
|
|
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* These intrinsics not only can't be re-materialized but aren't
|
|
|
|
|
* preserved when moving to the continuation shader. We have to move
|
|
|
|
|
* them to the top to ensure they get spilled as needed.
|
|
|
|
|
*/
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
|
switch (intrin->intrinsic) {
|
|
|
|
|
case nir_intrinsic_load_shader_record_ptr:
|
|
|
|
|
case nir_intrinsic_load_btd_local_arg_addr_intel:
|
|
|
|
|
nir_instr_remove(instr);
|
|
|
|
|
nir_instr_insert(nir_before_cf_list(&impl->body), instr);
|
|
|
|
|
progress = true;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (progress) {
|
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_block_index |
|
|
|
|
|
nir_metadata_dominance);
|
|
|
|
|
} else {
|
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_all);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
instr_is_shader_call(nir_instr *instr)
|
|
|
|
|
{
|
|
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
|
return intrin->intrinsic == nir_intrinsic_trace_ray ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_report_ray_intersection ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_execute_callable;
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-27 14:31:08 +03:00
|
|
|
/* Previously named bitset, it had to be renamed as FreeBSD defines a struct
|
|
|
|
|
* named bitset in sys/_bitset.h required by pthread_np.h which is included
|
|
|
|
|
* from src/util/u_thread.h that is indirectly included by this file.
|
|
|
|
|
*/
|
|
|
|
|
struct brw_bitset {
|
2021-02-16 02:37:40 +01:00
|
|
|
BITSET_WORD *set;
|
|
|
|
|
unsigned size;
|
|
|
|
|
};
|
|
|
|
|
|
2021-06-27 14:31:08 +03:00
|
|
|
static struct brw_bitset
|
2021-02-16 02:37:40 +01:00
|
|
|
bitset_create(void *mem_ctx, unsigned size)
|
|
|
|
|
{
|
2021-06-27 14:31:08 +03:00
|
|
|
return (struct brw_bitset) {
|
2021-02-16 02:37:40 +01:00
|
|
|
.set = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(size)),
|
|
|
|
|
.size = size,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
src_is_in_bitset(nir_src *src, void *_set)
|
|
|
|
|
{
|
2021-06-27 14:31:08 +03:00
|
|
|
struct brw_bitset *set = _set;
|
2021-02-16 02:37:40 +01:00
|
|
|
assert(src->is_ssa);
|
|
|
|
|
|
|
|
|
|
/* Any SSA values which were added after we generated liveness information
|
|
|
|
|
* are things generated by this pass and, while most of it is arithmetic
|
|
|
|
|
* which we could re-materialize, we don't need to because it's only used
|
|
|
|
|
* for a single load/store and so shouldn't cross any shader calls.
|
|
|
|
|
*/
|
|
|
|
|
if (src->ssa->index >= set->size)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return BITSET_TEST(set->set, src->ssa->index);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2021-06-27 14:31:08 +03:00
|
|
|
add_ssa_def_to_bitset(nir_ssa_def *def, struct brw_bitset *set)
|
2021-02-16 02:37:40 +01:00
|
|
|
{
|
|
|
|
|
if (def->index >= set->size)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
BITSET_SET(set->set, def->index);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
2021-06-27 14:31:08 +03:00
|
|
|
can_remat_instr(nir_instr *instr, struct brw_bitset *remat)
|
2021-02-16 02:37:40 +01:00
|
|
|
{
|
|
|
|
|
/* Set of all values which are trivially re-materializable and we shouldn't
|
|
|
|
|
* ever spill them. This includes:
|
|
|
|
|
*
|
|
|
|
|
* - Undef values
|
|
|
|
|
* - Constants
|
|
|
|
|
* - Uniforms (UBO or push constant)
|
|
|
|
|
* - ALU combinations of any of the above
|
|
|
|
|
* - Derefs which are either complete or casts of any of the above
|
|
|
|
|
*
|
|
|
|
|
* Because this pass rewrites things in-order and phis are always turned
|
|
|
|
|
* into register writes, We can use "is it SSA?" to answer the question
|
|
|
|
|
* "can my source be re-materialized?".
|
|
|
|
|
*/
|
|
|
|
|
switch (instr->type) {
|
|
|
|
|
case nir_instr_type_alu:
|
|
|
|
|
if (!nir_instr_as_alu(instr)->dest.dest.is_ssa)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return nir_foreach_src(instr, src_is_in_bitset, remat);
|
|
|
|
|
|
|
|
|
|
case nir_instr_type_deref:
|
|
|
|
|
return nir_foreach_src(instr, src_is_in_bitset, remat);
|
|
|
|
|
|
|
|
|
|
case nir_instr_type_intrinsic: {
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
|
switch (intrin->intrinsic) {
|
2022-08-09 22:09:52 +03:00
|
|
|
case nir_intrinsic_load_uniform:
|
2021-02-16 02:37:40 +01:00
|
|
|
case nir_intrinsic_load_ubo:
|
|
|
|
|
case nir_intrinsic_vulkan_resource_index:
|
|
|
|
|
case nir_intrinsic_vulkan_resource_reindex:
|
|
|
|
|
case nir_intrinsic_load_vulkan_descriptor:
|
|
|
|
|
case nir_intrinsic_load_push_constant:
|
2022-08-09 22:09:52 +03:00
|
|
|
case nir_intrinsic_load_global_constant:
|
|
|
|
|
case nir_intrinsic_load_global_const_block_intel:
|
|
|
|
|
case nir_intrinsic_load_desc_set_address_intel:
|
2021-02-16 02:37:40 +01:00
|
|
|
/* These intrinsics don't need to be spilled as long as they don't
|
|
|
|
|
* depend on any spilled values.
|
|
|
|
|
*/
|
|
|
|
|
return nir_foreach_src(instr, src_is_in_bitset, remat);
|
|
|
|
|
|
|
|
|
|
case nir_intrinsic_load_scratch_base_ptr:
|
|
|
|
|
case nir_intrinsic_load_ray_launch_id:
|
2021-06-18 11:52:31 +03:00
|
|
|
case nir_intrinsic_load_topology_id_intel:
|
2021-02-16 02:37:40 +01:00
|
|
|
case nir_intrinsic_load_btd_global_arg_addr_intel:
|
|
|
|
|
case nir_intrinsic_load_btd_resume_sbt_addr_intel:
|
|
|
|
|
case nir_intrinsic_load_ray_base_mem_addr_intel:
|
|
|
|
|
case nir_intrinsic_load_ray_hw_stack_size_intel:
|
|
|
|
|
case nir_intrinsic_load_ray_sw_stack_size_intel:
|
|
|
|
|
case nir_intrinsic_load_ray_num_dss_rt_stacks_intel:
|
|
|
|
|
case nir_intrinsic_load_ray_hit_sbt_addr_intel:
|
|
|
|
|
case nir_intrinsic_load_ray_hit_sbt_stride_intel:
|
|
|
|
|
case nir_intrinsic_load_ray_miss_sbt_addr_intel:
|
|
|
|
|
case nir_intrinsic_load_ray_miss_sbt_stride_intel:
|
|
|
|
|
case nir_intrinsic_load_callable_sbt_addr_intel:
|
|
|
|
|
case nir_intrinsic_load_callable_sbt_stride_intel:
|
2021-10-07 16:25:21 +03:00
|
|
|
case nir_intrinsic_load_reloc_const_intel:
|
2021-06-21 13:44:53 +03:00
|
|
|
case nir_intrinsic_load_ray_query_global_intel:
|
2022-08-09 22:09:52 +03:00
|
|
|
case nir_intrinsic_load_ray_launch_size:
|
2021-02-16 02:37:40 +01:00
|
|
|
/* Notably missing from the above list is btd_local_arg_addr_intel.
|
|
|
|
|
* This is because the resume shader will have a different local
|
|
|
|
|
* argument pointer because it has a different BSR. Any access of
|
|
|
|
|
* the original shader's local arguments needs to be preserved so
|
|
|
|
|
* that pointer has to be saved on the stack.
|
|
|
|
|
*
|
|
|
|
|
* TODO: There may be some system values we want to avoid
|
|
|
|
|
* re-materializing as well but we have to be very careful
|
|
|
|
|
* to ensure that it's a system value which cannot change
|
|
|
|
|
* across a shader call.
|
|
|
|
|
*/
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_instr_type_ssa_undef:
|
|
|
|
|
case nir_instr_type_load_const:
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
2021-06-27 14:31:08 +03:00
|
|
|
can_remat_ssa_def(nir_ssa_def *def, struct brw_bitset *remat)
|
2021-02-16 02:37:40 +01:00
|
|
|
{
|
|
|
|
|
return can_remat_instr(def->parent_instr, remat);
|
|
|
|
|
}
|
|
|
|
|
|
nir/lower_shader_calls: rematerialize values in more complex cases
Previously when considering whether to rematerialize or spill/fill
ssa_1954, we would go for a spill/fill :
vec4 32 ssa_388 = (float32)txf ssa_387 (texture_handle), ssa_86 (coord), ssa_23 (lod), 0 (texture), 0 (sampler)
...
vec1 32 ssa_1953 = load_const (0xbd23d70a = -0.040000)
vec1 32 ssa_1954 = fadd ssa_388.x, ssa_1953
vec1 32 ssa_1955 = fneg ssa_1954
This is because when looking at ssa_1955 the first time, we would
consider ssa_388 unrematerialiable, and therefore all values built on
top of it would be considered unrematerialiable as well.
The missing piece when considering whether to rematerialize ssa_1954
is that we should look at filled values. Now that ssa_388 has been
spilled/filled, we can rebuild ssa_1955 on top of the filled value and
avoid spilling/filling ssa_1955 at all.
This requires a bit more work though. We can't just look at an
instruction in isolation, we need to go through the ssa chains until
we find values we can rematerialize or not.
In this change we build a list of all ssa values involved in building
a given value, up to the point there we find a filled or a
rematerializable value.
In this particular case, looking at ssa_1955 :
* We can rematerialize ssa_388 from its filled value
* We can rematerialize ssa_1953 trivially
* We can rematerialize ssa_1954 because its 2 inputs are rematerializable
* We can rematerialize ssa_1955 because ssa_1954 is rematerializable
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16556>
2022-08-24 23:24:57 +03:00
|
|
|
struct add_instr_data {
|
|
|
|
|
struct util_dynarray *buf;
|
|
|
|
|
struct brw_bitset *remat;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
add_src_instr(nir_src *src, void *state)
|
|
|
|
|
{
|
|
|
|
|
if (!src->is_ssa)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
struct add_instr_data *data = state;
|
|
|
|
|
if (BITSET_TEST(data->remat->set, src->ssa->index))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
util_dynarray_foreach(data->buf, nir_instr *, instr_ptr) {
|
|
|
|
|
if (*instr_ptr == src->ssa->parent_instr)
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
util_dynarray_append(data->buf, nir_instr *, src->ssa->parent_instr);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
compare_instr_indexes(const void *_inst1, const void *_inst2)
|
|
|
|
|
{
|
|
|
|
|
const nir_instr * const *inst1 = _inst1;
|
|
|
|
|
const nir_instr * const *inst2 = _inst2;
|
|
|
|
|
|
|
|
|
|
return (*inst1)->index - (*inst2)->index;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
can_remat_chain_ssa_def(nir_ssa_def *def, struct brw_bitset *remat, struct util_dynarray *buf)
|
|
|
|
|
{
|
|
|
|
|
assert(util_dynarray_num_elements(buf, nir_instr *) == 0);
|
|
|
|
|
|
|
|
|
|
void *mem_ctx = ralloc_context(NULL);
|
|
|
|
|
|
|
|
|
|
/* Add all the instructions involved in build this ssa_def */
|
|
|
|
|
util_dynarray_append(buf, nir_instr *, def->parent_instr);
|
|
|
|
|
|
|
|
|
|
unsigned idx = 0;
|
|
|
|
|
struct add_instr_data data = {
|
|
|
|
|
.buf = buf,
|
|
|
|
|
.remat = remat,
|
|
|
|
|
};
|
|
|
|
|
while (idx < util_dynarray_num_elements(buf, nir_instr *)) {
|
|
|
|
|
nir_instr *instr = *util_dynarray_element(buf, nir_instr *, idx++);
|
|
|
|
|
if (!nir_foreach_src(instr, add_src_instr, &data))
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Sort instructions by index */
|
|
|
|
|
qsort(util_dynarray_begin(buf),
|
|
|
|
|
util_dynarray_num_elements(buf, nir_instr *),
|
|
|
|
|
sizeof(nir_instr *),
|
|
|
|
|
compare_instr_indexes);
|
|
|
|
|
|
|
|
|
|
/* Create a temporary bitset with all values already
|
|
|
|
|
* rematerialized/rematerializable. We'll add to this bit set as we go
|
|
|
|
|
* through values that might not be in that set but that we can
|
|
|
|
|
* rematerialize.
|
|
|
|
|
*/
|
|
|
|
|
struct brw_bitset potential_remat = bitset_create(mem_ctx, remat->size);
|
|
|
|
|
memcpy(potential_remat.set, remat->set, BITSET_WORDS(remat->size) * sizeof(BITSET_WORD));
|
|
|
|
|
|
|
|
|
|
util_dynarray_foreach(buf, nir_instr *, instr_ptr) {
|
|
|
|
|
nir_ssa_def *instr_ssa_def = nir_instr_ssa_def(*instr_ptr);
|
|
|
|
|
|
|
|
|
|
/* If already in the potential rematerializable, nothing to do. */
|
|
|
|
|
if (BITSET_TEST(potential_remat.set, instr_ssa_def->index))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (!can_remat_instr(*instr_ptr, &potential_remat))
|
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
|
|
/* All the sources are rematerializable and the instruction is also
|
|
|
|
|
* rematerializable, mark it as rematerializable too.
|
|
|
|
|
*/
|
|
|
|
|
BITSET_SET(potential_remat.set, instr_ssa_def->index);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ralloc_free(mem_ctx);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
fail:
|
|
|
|
|
util_dynarray_clear(buf);
|
|
|
|
|
ralloc_free(mem_ctx);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-16 02:37:40 +01:00
|
|
|
static nir_ssa_def *
|
nir/lower_shader_calls: rematerialize values in more complex cases
Previously when considering whether to rematerialize or spill/fill
ssa_1954, we would go for a spill/fill :
vec4 32 ssa_388 = (float32)txf ssa_387 (texture_handle), ssa_86 (coord), ssa_23 (lod), 0 (texture), 0 (sampler)
...
vec1 32 ssa_1953 = load_const (0xbd23d70a = -0.040000)
vec1 32 ssa_1954 = fadd ssa_388.x, ssa_1953
vec1 32 ssa_1955 = fneg ssa_1954
This is because when looking at ssa_1955 the first time, we would
consider ssa_388 unrematerialiable, and therefore all values built on
top of it would be considered unrematerialiable as well.
The missing piece when considering whether to rematerialize ssa_1954
is that we should look at filled values. Now that ssa_388 has been
spilled/filled, we can rebuild ssa_1955 on top of the filled value and
avoid spilling/filling ssa_1955 at all.
This requires a bit more work though. We can't just look at an
instruction in isolation, we need to go through the ssa chains until
we find values we can rematerialize or not.
In this change we build a list of all ssa values involved in building
a given value, up to the point there we find a filled or a
rematerializable value.
In this particular case, looking at ssa_1955 :
* We can rematerialize ssa_388 from its filled value
* We can rematerialize ssa_1953 trivially
* We can rematerialize ssa_1954 because its 2 inputs are rematerializable
* We can rematerialize ssa_1955 because ssa_1954 is rematerializable
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16556>
2022-08-24 23:24:57 +03:00
|
|
|
remat_ssa_def(nir_builder *b, nir_ssa_def *def, struct hash_table *remap_table)
|
2021-02-16 02:37:40 +01:00
|
|
|
{
|
nir/lower_shader_calls: rematerialize values in more complex cases
Previously when considering whether to rematerialize or spill/fill
ssa_1954, we would go for a spill/fill :
vec4 32 ssa_388 = (float32)txf ssa_387 (texture_handle), ssa_86 (coord), ssa_23 (lod), 0 (texture), 0 (sampler)
...
vec1 32 ssa_1953 = load_const (0xbd23d70a = -0.040000)
vec1 32 ssa_1954 = fadd ssa_388.x, ssa_1953
vec1 32 ssa_1955 = fneg ssa_1954
This is because when looking at ssa_1955 the first time, we would
consider ssa_388 unrematerialiable, and therefore all values built on
top of it would be considered unrematerialiable as well.
The missing piece when considering whether to rematerialize ssa_1954
is that we should look at filled values. Now that ssa_388 has been
spilled/filled, we can rebuild ssa_1955 on top of the filled value and
avoid spilling/filling ssa_1955 at all.
This requires a bit more work though. We can't just look at an
instruction in isolation, we need to go through the ssa chains until
we find values we can rematerialize or not.
In this change we build a list of all ssa values involved in building
a given value, up to the point there we find a filled or a
rematerializable value.
In this particular case, looking at ssa_1955 :
* We can rematerialize ssa_388 from its filled value
* We can rematerialize ssa_1953 trivially
* We can rematerialize ssa_1954 because its 2 inputs are rematerializable
* We can rematerialize ssa_1955 because ssa_1954 is rematerializable
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16556>
2022-08-24 23:24:57 +03:00
|
|
|
nir_instr *clone = nir_instr_clone_deep(b->shader, def->parent_instr, remap_table);
|
2021-02-16 02:37:40 +01:00
|
|
|
nir_builder_instr_insert(b, clone);
|
|
|
|
|
return nir_instr_ssa_def(clone);
|
|
|
|
|
}
|
|
|
|
|
|
nir/lower_shader_calls: rematerialize values in more complex cases
Previously when considering whether to rematerialize or spill/fill
ssa_1954, we would go for a spill/fill :
vec4 32 ssa_388 = (float32)txf ssa_387 (texture_handle), ssa_86 (coord), ssa_23 (lod), 0 (texture), 0 (sampler)
...
vec1 32 ssa_1953 = load_const (0xbd23d70a = -0.040000)
vec1 32 ssa_1954 = fadd ssa_388.x, ssa_1953
vec1 32 ssa_1955 = fneg ssa_1954
This is because when looking at ssa_1955 the first time, we would
consider ssa_388 unrematerialiable, and therefore all values built on
top of it would be considered unrematerialiable as well.
The missing piece when considering whether to rematerialize ssa_1954
is that we should look at filled values. Now that ssa_388 has been
spilled/filled, we can rebuild ssa_1955 on top of the filled value and
avoid spilling/filling ssa_1955 at all.
This requires a bit more work though. We can't just look at an
instruction in isolation, we need to go through the ssa chains until
we find values we can rematerialize or not.
In this change we build a list of all ssa values involved in building
a given value, up to the point there we find a filled or a
rematerializable value.
In this particular case, looking at ssa_1955 :
* We can rematerialize ssa_388 from its filled value
* We can rematerialize ssa_1953 trivially
* We can rematerialize ssa_1954 because its 2 inputs are rematerializable
* We can rematerialize ssa_1955 because ssa_1954 is rematerializable
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16556>
2022-08-24 23:24:57 +03:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
remat_chain_ssa_def(nir_builder *b, struct util_dynarray *buf,
|
|
|
|
|
struct brw_bitset *remat, nir_ssa_def ***fill_defs,
|
|
|
|
|
unsigned call_idx, struct hash_table *remap_table)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *last_def = NULL;
|
|
|
|
|
|
|
|
|
|
util_dynarray_foreach(buf, nir_instr *, instr_ptr) {
|
|
|
|
|
nir_ssa_def *instr_ssa_def = nir_instr_ssa_def(*instr_ptr);
|
|
|
|
|
unsigned ssa_index = instr_ssa_def->index;
|
|
|
|
|
|
|
|
|
|
if (fill_defs[ssa_index] != NULL &&
|
|
|
|
|
fill_defs[ssa_index][call_idx] != NULL)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* Clone the instruction we want to rematerialize */
|
|
|
|
|
nir_ssa_def *clone_ssa_def = remat_ssa_def(b, instr_ssa_def, remap_table);
|
|
|
|
|
|
|
|
|
|
if (fill_defs[ssa_index] == NULL) {
|
|
|
|
|
fill_defs[ssa_index] =
|
|
|
|
|
rzalloc_array(fill_defs, nir_ssa_def *, remat->size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Add the new ssa_def to the list fill_defs and flag it as
|
|
|
|
|
* rematerialized
|
|
|
|
|
*/
|
|
|
|
|
fill_defs[ssa_index][call_idx] = last_def = clone_ssa_def;
|
|
|
|
|
BITSET_SET(remat->set, ssa_index);
|
|
|
|
|
|
|
|
|
|
_mesa_hash_table_insert(remap_table, instr_ssa_def, last_def);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return last_def;
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-16 02:37:40 +01:00
|
|
|
struct pbv_array {
|
|
|
|
|
struct nir_phi_builder_value **arr;
|
|
|
|
|
unsigned len;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static struct nir_phi_builder_value *
|
|
|
|
|
get_phi_builder_value_for_def(nir_ssa_def *def,
|
|
|
|
|
struct pbv_array *pbv_arr)
|
|
|
|
|
{
|
|
|
|
|
if (def->index >= pbv_arr->len)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
return pbv_arr->arr[def->index];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
get_phi_builder_def_for_src(nir_src *src, struct pbv_array *pbv_arr,
|
|
|
|
|
nir_block *block)
|
|
|
|
|
{
|
|
|
|
|
assert(src->is_ssa);
|
|
|
|
|
|
|
|
|
|
struct nir_phi_builder_value *pbv =
|
|
|
|
|
get_phi_builder_value_for_def(src->ssa, pbv_arr);
|
|
|
|
|
if (pbv == NULL)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
return nir_phi_builder_value_get_block_def(pbv, block);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
rewrite_instr_src_from_phi_builder(nir_src *src, void *_pbv_arr)
|
|
|
|
|
{
|
|
|
|
|
nir_block *block;
|
|
|
|
|
if (src->parent_instr->type == nir_instr_type_phi) {
|
|
|
|
|
nir_phi_src *phi_src = exec_node_data(nir_phi_src, src, src);
|
|
|
|
|
block = phi_src->pred;
|
|
|
|
|
} else {
|
|
|
|
|
block = src->parent_instr->block;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *new_def = get_phi_builder_def_for_src(src, _pbv_arr, block);
|
|
|
|
|
if (new_def != NULL)
|
|
|
|
|
nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(new_def));
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
2022-05-18 18:29:10 +03:00
|
|
|
spill_fill(nir_builder *before, nir_builder *after, nir_ssa_def *def,
|
|
|
|
|
unsigned value_id, unsigned call_idx,
|
|
|
|
|
unsigned offset, unsigned stack_alignment)
|
2021-02-16 02:37:40 +01:00
|
|
|
{
|
|
|
|
|
const unsigned comp_size = def->bit_size / 8;
|
|
|
|
|
|
2022-05-18 18:29:10 +03:00
|
|
|
nir_store_stack(before, def,
|
|
|
|
|
.base = offset,
|
|
|
|
|
.call_idx = call_idx,
|
|
|
|
|
.align_mul = MIN2(comp_size, stack_alignment),
|
|
|
|
|
.value_id = value_id,
|
|
|
|
|
.write_mask = BITFIELD_MASK(def->num_components));
|
|
|
|
|
return nir_load_stack(after, def->num_components, def->bit_size,
|
|
|
|
|
.base = offset,
|
|
|
|
|
.call_idx = call_idx,
|
|
|
|
|
.value_id = value_id,
|
|
|
|
|
.align_mul = MIN2(comp_size, stack_alignment));
|
2021-02-16 02:37:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls,
|
|
|
|
|
unsigned stack_alignment)
|
|
|
|
|
{
|
|
|
|
|
/* TODO: If a SSA def is filled more than once, we probably want to just
|
|
|
|
|
* spill it at the LCM of the fill sites so we avoid unnecessary
|
|
|
|
|
* extra spills
|
|
|
|
|
*
|
|
|
|
|
* TODO: If a SSA def is defined outside a loop but live through some call
|
|
|
|
|
* inside the loop, we probably want to spill outside the loop. We
|
|
|
|
|
* may also want to fill outside the loop if it's not used in the
|
|
|
|
|
* loop.
|
|
|
|
|
*
|
|
|
|
|
* TODO: Right now, we only re-materialize things if their immediate
|
|
|
|
|
* sources are things which we filled. We probably want to expand
|
|
|
|
|
* that to re-materialize things whose sources are things we can
|
|
|
|
|
* re-materialize from things we filled. We may want some DAG depth
|
|
|
|
|
* heuristic on this.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/* This happens per-shader rather than per-impl because we mess with
|
|
|
|
|
* nir_shader::scratch_size.
|
|
|
|
|
*/
|
|
|
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
|
|
|
|
|
|
|
|
|
nir_metadata_require(impl, nir_metadata_live_ssa_defs |
|
|
|
|
|
nir_metadata_dominance |
|
nir/lower_shader_calls: rematerialize values in more complex cases
Previously when considering whether to rematerialize or spill/fill
ssa_1954, we would go for a spill/fill :
vec4 32 ssa_388 = (float32)txf ssa_387 (texture_handle), ssa_86 (coord), ssa_23 (lod), 0 (texture), 0 (sampler)
...
vec1 32 ssa_1953 = load_const (0xbd23d70a = -0.040000)
vec1 32 ssa_1954 = fadd ssa_388.x, ssa_1953
vec1 32 ssa_1955 = fneg ssa_1954
This is because when looking at ssa_1955 the first time, we would
consider ssa_388 unrematerialiable, and therefore all values built on
top of it would be considered unrematerialiable as well.
The missing piece when considering whether to rematerialize ssa_1954
is that we should look at filled values. Now that ssa_388 has been
spilled/filled, we can rebuild ssa_1955 on top of the filled value and
avoid spilling/filling ssa_1955 at all.
This requires a bit more work though. We can't just look at an
instruction in isolation, we need to go through the ssa chains until
we find values we can rematerialize or not.
In this change we build a list of all ssa values involved in building
a given value, up to the point there we find a filled or a
rematerializable value.
In this particular case, looking at ssa_1955 :
* We can rematerialize ssa_388 from its filled value
* We can rematerialize ssa_1953 trivially
* We can rematerialize ssa_1954 because its 2 inputs are rematerializable
* We can rematerialize ssa_1955 because ssa_1954 is rematerializable
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16556>
2022-08-24 23:24:57 +03:00
|
|
|
nir_metadata_block_index |
|
|
|
|
|
nir_metadata_instr_index);
|
2021-02-16 02:37:40 +01:00
|
|
|
|
|
|
|
|
void *mem_ctx = ralloc_context(shader);
|
|
|
|
|
|
|
|
|
|
const unsigned num_ssa_defs = impl->ssa_alloc;
|
|
|
|
|
const unsigned live_words = BITSET_WORDS(num_ssa_defs);
|
2021-06-27 14:31:08 +03:00
|
|
|
struct brw_bitset trivial_remat = bitset_create(mem_ctx, num_ssa_defs);
|
2021-02-16 02:37:40 +01:00
|
|
|
|
|
|
|
|
/* Array of all live SSA defs which are spill candidates */
|
|
|
|
|
nir_ssa_def **spill_defs =
|
|
|
|
|
rzalloc_array(mem_ctx, nir_ssa_def *, num_ssa_defs);
|
|
|
|
|
|
|
|
|
|
/* For each spill candidate, an array of every time it's defined by a fill,
|
|
|
|
|
* indexed by call instruction index.
|
|
|
|
|
*/
|
|
|
|
|
nir_ssa_def ***fill_defs =
|
|
|
|
|
rzalloc_array(mem_ctx, nir_ssa_def **, num_ssa_defs);
|
|
|
|
|
|
|
|
|
|
/* For each call instruction, the liveness set at the call */
|
|
|
|
|
const BITSET_WORD **call_live =
|
|
|
|
|
rzalloc_array(mem_ctx, const BITSET_WORD *, num_calls);
|
|
|
|
|
|
|
|
|
|
/* For each call instruction, the block index of the block it lives in */
|
|
|
|
|
uint32_t *call_block_indices = rzalloc_array(mem_ctx, uint32_t, num_calls);
|
|
|
|
|
|
nir/lower_shader_calls: rematerialize values in more complex cases
Previously when considering whether to rematerialize or spill/fill
ssa_1954, we would go for a spill/fill :
vec4 32 ssa_388 = (float32)txf ssa_387 (texture_handle), ssa_86 (coord), ssa_23 (lod), 0 (texture), 0 (sampler)
...
vec1 32 ssa_1953 = load_const (0xbd23d70a = -0.040000)
vec1 32 ssa_1954 = fadd ssa_388.x, ssa_1953
vec1 32 ssa_1955 = fneg ssa_1954
This is because when looking at ssa_1955 the first time, we would
consider ssa_388 unrematerialiable, and therefore all values built on
top of it would be considered unrematerialiable as well.
The missing piece when considering whether to rematerialize ssa_1954
is that we should look at filled values. Now that ssa_388 has been
spilled/filled, we can rebuild ssa_1955 on top of the filled value and
avoid spilling/filling ssa_1955 at all.
This requires a bit more work though. We can't just look at an
instruction in isolation, we need to go through the ssa chains until
we find values we can rematerialize or not.
In this change we build a list of all ssa values involved in building
a given value, up to the point there we find a filled or a
rematerializable value.
In this particular case, looking at ssa_1955 :
* We can rematerialize ssa_388 from its filled value
* We can rematerialize ssa_1953 trivially
* We can rematerialize ssa_1954 because its 2 inputs are rematerializable
* We can rematerialize ssa_1955 because ssa_1954 is rematerializable
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16556>
2022-08-24 23:24:57 +03:00
|
|
|
/* Remap table when rebuilding instructions out of fill operations */
|
|
|
|
|
struct hash_table *trivial_remap_table =
|
|
|
|
|
_mesa_pointer_hash_table_create(mem_ctx);
|
|
|
|
|
|
2021-02-16 02:37:40 +01:00
|
|
|
/* Walk the call instructions and fetch the liveness set and block index
|
|
|
|
|
* for each one. We need to do this before we start modifying the shader
|
|
|
|
|
* so that liveness doesn't complain that it's been invalidated. Don't
|
|
|
|
|
* worry, we'll be very careful with our live sets. :-)
|
|
|
|
|
*/
|
|
|
|
|
unsigned call_idx = 0;
|
|
|
|
|
nir_foreach_block(block, impl) {
|
|
|
|
|
nir_foreach_instr(instr, block) {
|
|
|
|
|
if (!instr_is_shader_call(instr))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
call_block_indices[call_idx] = block->index;
|
|
|
|
|
|
|
|
|
|
/* The objective here is to preserve values around shader call
|
|
|
|
|
* instructions. Therefore, we use the live set after the
|
|
|
|
|
* instruction as the set of things we want to preserve. Because
|
|
|
|
|
* none of our shader call intrinsics return anything, we don't have
|
|
|
|
|
* to worry about spilling over a return value.
|
|
|
|
|
*
|
|
|
|
|
* TODO: This isn't quite true for report_intersection.
|
|
|
|
|
*/
|
|
|
|
|
call_live[call_idx] =
|
|
|
|
|
nir_get_live_ssa_defs(nir_after_instr(instr), mem_ctx);
|
|
|
|
|
|
|
|
|
|
call_idx++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_builder before, after;
|
|
|
|
|
nir_builder_init(&before, impl);
|
|
|
|
|
nir_builder_init(&after, impl);
|
|
|
|
|
|
|
|
|
|
call_idx = 0;
|
|
|
|
|
unsigned max_scratch_size = shader->scratch_size;
|
|
|
|
|
nir_foreach_block(block, impl) {
|
|
|
|
|
nir_foreach_instr_safe(instr, block) {
|
|
|
|
|
nir_ssa_def *def = nir_instr_ssa_def(instr);
|
|
|
|
|
if (def != NULL) {
|
|
|
|
|
if (can_remat_ssa_def(def, &trivial_remat)) {
|
|
|
|
|
add_ssa_def_to_bitset(def, &trivial_remat);
|
nir/lower_shader_calls: rematerialize values in more complex cases
Previously when considering whether to rematerialize or spill/fill
ssa_1954, we would go for a spill/fill :
vec4 32 ssa_388 = (float32)txf ssa_387 (texture_handle), ssa_86 (coord), ssa_23 (lod), 0 (texture), 0 (sampler)
...
vec1 32 ssa_1953 = load_const (0xbd23d70a = -0.040000)
vec1 32 ssa_1954 = fadd ssa_388.x, ssa_1953
vec1 32 ssa_1955 = fneg ssa_1954
This is because when looking at ssa_1955 the first time, we would
consider ssa_388 unrematerialiable, and therefore all values built on
top of it would be considered unrematerialiable as well.
The missing piece when considering whether to rematerialize ssa_1954
is that we should look at filled values. Now that ssa_388 has been
spilled/filled, we can rebuild ssa_1955 on top of the filled value and
avoid spilling/filling ssa_1955 at all.
This requires a bit more work though. We can't just look at an
instruction in isolation, we need to go through the ssa chains until
we find values we can rematerialize or not.
In this change we build a list of all ssa values involved in building
a given value, up to the point there we find a filled or a
rematerializable value.
In this particular case, looking at ssa_1955 :
* We can rematerialize ssa_388 from its filled value
* We can rematerialize ssa_1953 trivially
* We can rematerialize ssa_1954 because its 2 inputs are rematerializable
* We can rematerialize ssa_1955 because ssa_1954 is rematerializable
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16556>
2022-08-24 23:24:57 +03:00
|
|
|
_mesa_hash_table_insert(trivial_remap_table, def, def);
|
2021-02-16 02:37:40 +01:00
|
|
|
} else {
|
|
|
|
|
spill_defs[def->index] = def;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!instr_is_shader_call(instr))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
const BITSET_WORD *live = call_live[call_idx];
|
|
|
|
|
|
nir/lower_shader_calls: rematerialize values in more complex cases
Previously when considering whether to rematerialize or spill/fill
ssa_1954, we would go for a spill/fill :
vec4 32 ssa_388 = (float32)txf ssa_387 (texture_handle), ssa_86 (coord), ssa_23 (lod), 0 (texture), 0 (sampler)
...
vec1 32 ssa_1953 = load_const (0xbd23d70a = -0.040000)
vec1 32 ssa_1954 = fadd ssa_388.x, ssa_1953
vec1 32 ssa_1955 = fneg ssa_1954
This is because when looking at ssa_1955 the first time, we would
consider ssa_388 unrematerialiable, and therefore all values built on
top of it would be considered unrematerialiable as well.
The missing piece when considering whether to rematerialize ssa_1954
is that we should look at filled values. Now that ssa_388 has been
spilled/filled, we can rebuild ssa_1955 on top of the filled value and
avoid spilling/filling ssa_1955 at all.
This requires a bit more work though. We can't just look at an
instruction in isolation, we need to go through the ssa chains until
we find values we can rematerialize or not.
In this change we build a list of all ssa values involved in building
a given value, up to the point there we find a filled or a
rematerializable value.
In this particular case, looking at ssa_1955 :
* We can rematerialize ssa_388 from its filled value
* We can rematerialize ssa_1953 trivially
* We can rematerialize ssa_1954 because its 2 inputs are rematerializable
* We can rematerialize ssa_1955 because ssa_1954 is rematerializable
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16556>
2022-08-24 23:24:57 +03:00
|
|
|
struct hash_table *remap_table =
|
|
|
|
|
_mesa_hash_table_clone(trivial_remap_table, mem_ctx);
|
|
|
|
|
|
2021-02-16 02:37:40 +01:00
|
|
|
/* Make a copy of trivial_remat that we'll update as we crawl through
|
|
|
|
|
* the live SSA defs and unspill them.
|
|
|
|
|
*/
|
2021-06-27 14:31:08 +03:00
|
|
|
struct brw_bitset remat = bitset_create(mem_ctx, num_ssa_defs);
|
2021-02-16 02:37:40 +01:00
|
|
|
memcpy(remat.set, trivial_remat.set, live_words * sizeof(BITSET_WORD));
|
|
|
|
|
|
|
|
|
|
/* Before the two builders are always separated by the call
|
|
|
|
|
* instruction, it won't break anything to have two of them.
|
|
|
|
|
*/
|
|
|
|
|
before.cursor = nir_before_instr(instr);
|
|
|
|
|
after.cursor = nir_after_instr(instr);
|
|
|
|
|
|
nir/lower_shader_calls: rematerialize values in more complex cases
Previously when considering whether to rematerialize or spill/fill
ssa_1954, we would go for a spill/fill :
vec4 32 ssa_388 = (float32)txf ssa_387 (texture_handle), ssa_86 (coord), ssa_23 (lod), 0 (texture), 0 (sampler)
...
vec1 32 ssa_1953 = load_const (0xbd23d70a = -0.040000)
vec1 32 ssa_1954 = fadd ssa_388.x, ssa_1953
vec1 32 ssa_1955 = fneg ssa_1954
This is because when looking at ssa_1955 the first time, we would
consider ssa_388 unrematerialiable, and therefore all values built on
top of it would be considered unrematerialiable as well.
The missing piece when considering whether to rematerialize ssa_1954
is that we should look at filled values. Now that ssa_388 has been
spilled/filled, we can rebuild ssa_1955 on top of the filled value and
avoid spilling/filling ssa_1955 at all.
This requires a bit more work though. We can't just look at an
instruction in isolation, we need to go through the ssa chains until
we find values we can rematerialize or not.
In this change we build a list of all ssa values involved in building
a given value, up to the point there we find a filled or a
rematerializable value.
In this particular case, looking at ssa_1955 :
* We can rematerialize ssa_388 from its filled value
* We can rematerialize ssa_1953 trivially
* We can rematerialize ssa_1954 because its 2 inputs are rematerializable
* We can rematerialize ssa_1955 because ssa_1954 is rematerializable
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16556>
2022-08-24 23:24:57 +03:00
|
|
|
/* Array used to hold all the values needed to rematerialize a live
|
|
|
|
|
* value.
|
|
|
|
|
*/
|
|
|
|
|
struct util_dynarray remat_chain;
|
|
|
|
|
util_dynarray_init(&remat_chain, mem_ctx);
|
|
|
|
|
|
2021-02-16 02:37:40 +01:00
|
|
|
unsigned offset = shader->scratch_size;
|
|
|
|
|
for (unsigned w = 0; w < live_words; w++) {
|
|
|
|
|
BITSET_WORD spill_mask = live[w] & ~trivial_remat.set[w];
|
|
|
|
|
while (spill_mask) {
|
|
|
|
|
int i = u_bit_scan(&spill_mask);
|
|
|
|
|
assert(i >= 0);
|
|
|
|
|
unsigned index = w * BITSET_WORDBITS + i;
|
|
|
|
|
assert(index < num_ssa_defs);
|
|
|
|
|
|
nir/lower_shader_calls: rematerialize values in more complex cases
Previously when considering whether to rematerialize or spill/fill
ssa_1954, we would go for a spill/fill :
vec4 32 ssa_388 = (float32)txf ssa_387 (texture_handle), ssa_86 (coord), ssa_23 (lod), 0 (texture), 0 (sampler)
...
vec1 32 ssa_1953 = load_const (0xbd23d70a = -0.040000)
vec1 32 ssa_1954 = fadd ssa_388.x, ssa_1953
vec1 32 ssa_1955 = fneg ssa_1954
This is because when looking at ssa_1955 the first time, we would
consider ssa_388 unrematerialiable, and therefore all values built on
top of it would be considered unrematerialiable as well.
The missing piece when considering whether to rematerialize ssa_1954
is that we should look at filled values. Now that ssa_388 has been
spilled/filled, we can rebuild ssa_1955 on top of the filled value and
avoid spilling/filling ssa_1955 at all.
This requires a bit more work though. We can't just look at an
instruction in isolation, we need to go through the ssa chains until
we find values we can rematerialize or not.
In this change we build a list of all ssa values involved in building
a given value, up to the point there we find a filled or a
rematerializable value.
In this particular case, looking at ssa_1955 :
* We can rematerialize ssa_388 from its filled value
* We can rematerialize ssa_1953 trivially
* We can rematerialize ssa_1954 because its 2 inputs are rematerializable
* We can rematerialize ssa_1955 because ssa_1954 is rematerializable
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16556>
2022-08-24 23:24:57 +03:00
|
|
|
def = spill_defs[index];
|
|
|
|
|
nir_ssa_def *original_def = def, *new_def;
|
2021-02-16 02:37:40 +01:00
|
|
|
if (can_remat_ssa_def(def, &remat)) {
|
|
|
|
|
/* If this SSA def is re-materializable or based on other
|
|
|
|
|
* things we've already spilled, re-materialize it rather
|
|
|
|
|
* than spilling and filling. Anything which is trivially
|
|
|
|
|
* re-materializable won't even get here because we take
|
|
|
|
|
* those into account in spill_mask above.
|
|
|
|
|
*/
|
nir/lower_shader_calls: rematerialize values in more complex cases
Previously when considering whether to rematerialize or spill/fill
ssa_1954, we would go for a spill/fill :
vec4 32 ssa_388 = (float32)txf ssa_387 (texture_handle), ssa_86 (coord), ssa_23 (lod), 0 (texture), 0 (sampler)
...
vec1 32 ssa_1953 = load_const (0xbd23d70a = -0.040000)
vec1 32 ssa_1954 = fadd ssa_388.x, ssa_1953
vec1 32 ssa_1955 = fneg ssa_1954
This is because when looking at ssa_1955 the first time, we would
consider ssa_388 unrematerialiable, and therefore all values built on
top of it would be considered unrematerialiable as well.
The missing piece when considering whether to rematerialize ssa_1954
is that we should look at filled values. Now that ssa_388 has been
spilled/filled, we can rebuild ssa_1955 on top of the filled value and
avoid spilling/filling ssa_1955 at all.
This requires a bit more work though. We can't just look at an
instruction in isolation, we need to go through the ssa chains until
we find values we can rematerialize or not.
In this change we build a list of all ssa values involved in building
a given value, up to the point there we find a filled or a
rematerializable value.
In this particular case, looking at ssa_1955 :
* We can rematerialize ssa_388 from its filled value
* We can rematerialize ssa_1953 trivially
* We can rematerialize ssa_1954 because its 2 inputs are rematerializable
* We can rematerialize ssa_1955 because ssa_1954 is rematerializable
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16556>
2022-08-24 23:24:57 +03:00
|
|
|
new_def = remat_ssa_def(&after, def, remap_table);
|
|
|
|
|
} else if (can_remat_chain_ssa_def(def, &remat, &remat_chain)) {
|
|
|
|
|
new_def = remat_chain_ssa_def(&after, &remat_chain, &remat,
|
|
|
|
|
fill_defs, call_idx,
|
|
|
|
|
remap_table);
|
|
|
|
|
util_dynarray_clear(&remat_chain);
|
2021-02-16 02:37:40 +01:00
|
|
|
} else {
|
|
|
|
|
bool is_bool = def->bit_size == 1;
|
|
|
|
|
if (is_bool)
|
|
|
|
|
def = nir_b2b32(&before, def);
|
|
|
|
|
|
|
|
|
|
const unsigned comp_size = def->bit_size / 8;
|
|
|
|
|
offset = ALIGN(offset, comp_size);
|
|
|
|
|
|
nir/lower_shader_calls: rematerialize values in more complex cases
Previously when considering whether to rematerialize or spill/fill
ssa_1954, we would go for a spill/fill :
vec4 32 ssa_388 = (float32)txf ssa_387 (texture_handle), ssa_86 (coord), ssa_23 (lod), 0 (texture), 0 (sampler)
...
vec1 32 ssa_1953 = load_const (0xbd23d70a = -0.040000)
vec1 32 ssa_1954 = fadd ssa_388.x, ssa_1953
vec1 32 ssa_1955 = fneg ssa_1954
This is because when looking at ssa_1955 the first time, we would
consider ssa_388 unrematerialiable, and therefore all values built on
top of it would be considered unrematerialiable as well.
The missing piece when considering whether to rematerialize ssa_1954
is that we should look at filled values. Now that ssa_388 has been
spilled/filled, we can rebuild ssa_1955 on top of the filled value and
avoid spilling/filling ssa_1955 at all.
This requires a bit more work though. We can't just look at an
instruction in isolation, we need to go through the ssa chains until
we find values we can rematerialize or not.
In this change we build a list of all ssa values involved in building
a given value, up to the point there we find a filled or a
rematerializable value.
In this particular case, looking at ssa_1955 :
* We can rematerialize ssa_388 from its filled value
* We can rematerialize ssa_1953 trivially
* We can rematerialize ssa_1954 because its 2 inputs are rematerializable
* We can rematerialize ssa_1955 because ssa_1954 is rematerializable
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16556>
2022-08-24 23:24:57 +03:00
|
|
|
new_def = spill_fill(&before, &after, def,
|
|
|
|
|
index, call_idx,
|
|
|
|
|
offset, stack_alignment);
|
2021-02-16 02:37:40 +01:00
|
|
|
|
|
|
|
|
if (is_bool)
|
nir/lower_shader_calls: rematerialize values in more complex cases
Previously when considering whether to rematerialize or spill/fill
ssa_1954, we would go for a spill/fill :
vec4 32 ssa_388 = (float32)txf ssa_387 (texture_handle), ssa_86 (coord), ssa_23 (lod), 0 (texture), 0 (sampler)
...
vec1 32 ssa_1953 = load_const (0xbd23d70a = -0.040000)
vec1 32 ssa_1954 = fadd ssa_388.x, ssa_1953
vec1 32 ssa_1955 = fneg ssa_1954
This is because when looking at ssa_1955 the first time, we would
consider ssa_388 unrematerialiable, and therefore all values built on
top of it would be considered unrematerialiable as well.
The missing piece when considering whether to rematerialize ssa_1954
is that we should look at filled values. Now that ssa_388 has been
spilled/filled, we can rebuild ssa_1955 on top of the filled value and
avoid spilling/filling ssa_1955 at all.
This requires a bit more work though. We can't just look at an
instruction in isolation, we need to go through the ssa chains until
we find values we can rematerialize or not.
In this change we build a list of all ssa values involved in building
a given value, up to the point there we find a filled or a
rematerializable value.
In this particular case, looking at ssa_1955 :
* We can rematerialize ssa_388 from its filled value
* We can rematerialize ssa_1953 trivially
* We can rematerialize ssa_1954 because its 2 inputs are rematerializable
* We can rematerialize ssa_1955 because ssa_1954 is rematerializable
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16556>
2022-08-24 23:24:57 +03:00
|
|
|
new_def = nir_b2b1(&after, new_def);
|
2021-02-16 02:37:40 +01:00
|
|
|
|
|
|
|
|
offset += def->num_components * comp_size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Mark this SSA def as available in the remat set so that, if
|
|
|
|
|
* some other SSA def we need is computed based on it, we can
|
|
|
|
|
* just re-compute instead of fetching from memory.
|
|
|
|
|
*/
|
|
|
|
|
BITSET_SET(remat.set, index);
|
|
|
|
|
|
|
|
|
|
/* For now, we just make a note of this new SSA def. We'll
|
|
|
|
|
* fix things up with the phi builder as a second pass.
|
|
|
|
|
*/
|
|
|
|
|
if (fill_defs[index] == NULL) {
|
|
|
|
|
fill_defs[index] =
|
nir/lower_shader_calls: rematerialize values in more complex cases
Previously when considering whether to rematerialize or spill/fill
ssa_1954, we would go for a spill/fill :
vec4 32 ssa_388 = (float32)txf ssa_387 (texture_handle), ssa_86 (coord), ssa_23 (lod), 0 (texture), 0 (sampler)
...
vec1 32 ssa_1953 = load_const (0xbd23d70a = -0.040000)
vec1 32 ssa_1954 = fadd ssa_388.x, ssa_1953
vec1 32 ssa_1955 = fneg ssa_1954
This is because when looking at ssa_1955 the first time, we would
consider ssa_388 unrematerialiable, and therefore all values built on
top of it would be considered unrematerialiable as well.
The missing piece when considering whether to rematerialize ssa_1954
is that we should look at filled values. Now that ssa_388 has been
spilled/filled, we can rebuild ssa_1955 on top of the filled value and
avoid spilling/filling ssa_1955 at all.
This requires a bit more work though. We can't just look at an
instruction in isolation, we need to go through the ssa chains until
we find values we can rematerialize or not.
In this change we build a list of all ssa values involved in building
a given value, up to the point there we find a filled or a
rematerializable value.
In this particular case, looking at ssa_1955 :
* We can rematerialize ssa_388 from its filled value
* We can rematerialize ssa_1953 trivially
* We can rematerialize ssa_1954 because its 2 inputs are rematerializable
* We can rematerialize ssa_1955 because ssa_1954 is rematerializable
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16556>
2022-08-24 23:24:57 +03:00
|
|
|
rzalloc_array(fill_defs, nir_ssa_def *, num_calls);
|
2021-02-16 02:37:40 +01:00
|
|
|
}
|
nir/lower_shader_calls: rematerialize values in more complex cases
Previously when considering whether to rematerialize or spill/fill
ssa_1954, we would go for a spill/fill :
vec4 32 ssa_388 = (float32)txf ssa_387 (texture_handle), ssa_86 (coord), ssa_23 (lod), 0 (texture), 0 (sampler)
...
vec1 32 ssa_1953 = load_const (0xbd23d70a = -0.040000)
vec1 32 ssa_1954 = fadd ssa_388.x, ssa_1953
vec1 32 ssa_1955 = fneg ssa_1954
This is because when looking at ssa_1955 the first time, we would
consider ssa_388 unrematerialiable, and therefore all values built on
top of it would be considered unrematerialiable as well.
The missing piece when considering whether to rematerialize ssa_1954
is that we should look at filled values. Now that ssa_388 has been
spilled/filled, we can rebuild ssa_1955 on top of the filled value and
avoid spilling/filling ssa_1955 at all.
This requires a bit more work though. We can't just look at an
instruction in isolation, we need to go through the ssa chains until
we find values we can rematerialize or not.
In this change we build a list of all ssa values involved in building
a given value, up to the point there we find a filled or a
rematerializable value.
In this particular case, looking at ssa_1955 :
* We can rematerialize ssa_388 from its filled value
* We can rematerialize ssa_1953 trivially
* We can rematerialize ssa_1954 because its 2 inputs are rematerializable
* We can rematerialize ssa_1955 because ssa_1954 is rematerializable
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16556>
2022-08-24 23:24:57 +03:00
|
|
|
fill_defs[index][call_idx] = new_def;
|
|
|
|
|
_mesa_hash_table_insert(remap_table, original_def, new_def);
|
2021-02-16 02:37:40 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_builder *b = &before;
|
|
|
|
|
|
2021-07-26 20:16:01 +03:00
|
|
|
offset = ALIGN(offset, stack_alignment);
|
2021-02-16 02:37:40 +01:00
|
|
|
max_scratch_size = MAX2(max_scratch_size, offset);
|
|
|
|
|
|
|
|
|
|
/* First thing on the called shader's stack is the resume address
|
|
|
|
|
* followed by a pointer to the payload.
|
|
|
|
|
*/
|
|
|
|
|
nir_intrinsic_instr *call = nir_instr_as_intrinsic(instr);
|
|
|
|
|
|
|
|
|
|
/* Lower to generic intrinsics with information about the stack & resume shader. */
|
|
|
|
|
switch (call->intrinsic) {
|
|
|
|
|
case nir_intrinsic_trace_ray: {
|
|
|
|
|
nir_rt_trace_ray(b, call->src[0].ssa, call->src[1].ssa,
|
|
|
|
|
call->src[2].ssa, call->src[3].ssa,
|
|
|
|
|
call->src[4].ssa, call->src[5].ssa,
|
|
|
|
|
call->src[6].ssa, call->src[7].ssa,
|
|
|
|
|
call->src[8].ssa, call->src[9].ssa,
|
|
|
|
|
call->src[10].ssa,
|
|
|
|
|
.call_idx = call_idx, .stack_size = offset);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_intrinsic_report_ray_intersection:
|
|
|
|
|
unreachable("Any-hit shaders must be inlined");
|
|
|
|
|
|
|
|
|
|
case nir_intrinsic_execute_callable: {
|
|
|
|
|
nir_rt_execute_callable(b, call->src[0].ssa, call->src[1].ssa, .call_idx = call_idx, .stack_size = offset);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Invalid shader call instruction");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_rt_resume(b, .call_idx = call_idx, .stack_size = offset);
|
|
|
|
|
|
|
|
|
|
nir_instr_remove(&call->instr);
|
|
|
|
|
|
|
|
|
|
call_idx++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
assert(call_idx == num_calls);
|
|
|
|
|
shader->scratch_size = max_scratch_size;
|
|
|
|
|
|
|
|
|
|
struct nir_phi_builder *pb = nir_phi_builder_create(impl);
|
|
|
|
|
struct pbv_array pbv_arr = {
|
|
|
|
|
.arr = rzalloc_array(mem_ctx, struct nir_phi_builder_value *,
|
|
|
|
|
num_ssa_defs),
|
|
|
|
|
.len = num_ssa_defs,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const unsigned block_words = BITSET_WORDS(impl->num_blocks);
|
|
|
|
|
BITSET_WORD *def_blocks = ralloc_array(mem_ctx, BITSET_WORD, block_words);
|
|
|
|
|
|
|
|
|
|
/* Go through and set up phi builder values for each spillable value which
|
|
|
|
|
* we ever needed to spill at any point.
|
|
|
|
|
*/
|
|
|
|
|
for (unsigned index = 0; index < num_ssa_defs; index++) {
|
|
|
|
|
if (fill_defs[index] == NULL)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *def = spill_defs[index];
|
|
|
|
|
|
|
|
|
|
memset(def_blocks, 0, block_words * sizeof(BITSET_WORD));
|
|
|
|
|
BITSET_SET(def_blocks, def->parent_instr->block->index);
|
|
|
|
|
for (unsigned call_idx = 0; call_idx < num_calls; call_idx++) {
|
|
|
|
|
if (fill_defs[index][call_idx] != NULL)
|
|
|
|
|
BITSET_SET(def_blocks, call_block_indices[call_idx]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pbv_arr.arr[index] = nir_phi_builder_add_value(pb, def->num_components,
|
|
|
|
|
def->bit_size, def_blocks);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Walk the shader one more time and rewrite SSA defs as needed using the
|
|
|
|
|
* phi builder.
|
|
|
|
|
*/
|
|
|
|
|
nir_foreach_block(block, impl) {
|
|
|
|
|
nir_foreach_instr_safe(instr, block) {
|
|
|
|
|
nir_ssa_def *def = nir_instr_ssa_def(instr);
|
|
|
|
|
if (def != NULL) {
|
|
|
|
|
struct nir_phi_builder_value *pbv =
|
|
|
|
|
get_phi_builder_value_for_def(def, &pbv_arr);
|
|
|
|
|
if (pbv != NULL)
|
|
|
|
|
nir_phi_builder_value_set_block_def(pbv, block, def);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (instr->type == nir_instr_type_phi)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_foreach_src(instr, rewrite_instr_src_from_phi_builder, &pbv_arr);
|
|
|
|
|
|
|
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *resume = nir_instr_as_intrinsic(instr);
|
|
|
|
|
if (resume->intrinsic != nir_intrinsic_rt_resume)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
call_idx = nir_intrinsic_call_idx(resume);
|
|
|
|
|
|
|
|
|
|
/* Technically, this is the wrong place to add the fill defs to the
|
|
|
|
|
* phi builder values because we haven't seen any of the load_scratch
|
|
|
|
|
* instructions for this call yet. However, we know based on how we
|
|
|
|
|
* emitted them that no value ever gets used until after the load
|
|
|
|
|
* instruction has been emitted so this should be safe. If we ever
|
|
|
|
|
* fail validation due this it likely means a bug in our spilling
|
|
|
|
|
* code and not the phi re-construction code here.
|
|
|
|
|
*/
|
|
|
|
|
for (unsigned index = 0; index < num_ssa_defs; index++) {
|
|
|
|
|
if (fill_defs[index] && fill_defs[index][call_idx]) {
|
|
|
|
|
nir_phi_builder_value_set_block_def(pbv_arr.arr[index], block,
|
|
|
|
|
fill_defs[index][call_idx]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_if *following_if = nir_block_get_following_if(block);
|
|
|
|
|
if (following_if) {
|
|
|
|
|
nir_ssa_def *new_def =
|
|
|
|
|
get_phi_builder_def_for_src(&following_if->condition,
|
|
|
|
|
&pbv_arr, block);
|
|
|
|
|
if (new_def != NULL)
|
|
|
|
|
nir_if_rewrite_condition(following_if, nir_src_for_ssa(new_def));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Handle phi sources that source from this block. We have to do this
|
|
|
|
|
* as a separate pass because the phi builder assumes that uses and
|
|
|
|
|
* defs are processed in an order that respects dominance. When we have
|
|
|
|
|
* loops, a phi source may be a back-edge so we have to handle it as if
|
|
|
|
|
* it were one of the last instructions in the predecessor block.
|
|
|
|
|
*/
|
|
|
|
|
nir_foreach_phi_src_leaving_block(block,
|
|
|
|
|
rewrite_instr_src_from_phi_builder,
|
|
|
|
|
&pbv_arr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_phi_builder_finish(pb);
|
|
|
|
|
|
|
|
|
|
ralloc_free(mem_ctx);
|
|
|
|
|
|
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_block_index |
|
|
|
|
|
nir_metadata_dominance);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_instr *
|
|
|
|
|
find_resume_instr(nir_function_impl *impl, unsigned call_idx)
|
|
|
|
|
{
|
|
|
|
|
nir_foreach_block(block, impl) {
|
|
|
|
|
nir_foreach_instr(instr, block) {
|
|
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *resume = nir_instr_as_intrinsic(instr);
|
|
|
|
|
if (resume->intrinsic != nir_intrinsic_rt_resume)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (nir_intrinsic_call_idx(resume) == call_idx)
|
|
|
|
|
return &resume->instr;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
unreachable("Couldn't find resume instruction");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Walk the CF tree and duplicate the contents of every loop, one half runs on
|
|
|
|
|
* resume and the other half is for any post-resume loop iterations. We are
|
|
|
|
|
* careful in our duplication to ensure that resume_instr is in the resume
|
|
|
|
|
* half of the loop though a copy of resume_instr will remain in the other
|
|
|
|
|
* half as well in case the same shader call happens twice.
|
|
|
|
|
*/
|
|
|
|
|
static bool
|
|
|
|
|
duplicate_loop_bodies(nir_function_impl *impl, nir_instr *resume_instr)
|
|
|
|
|
{
|
|
|
|
|
nir_register *resume_reg = NULL;
|
|
|
|
|
for (nir_cf_node *node = resume_instr->block->cf_node.parent;
|
|
|
|
|
node->type != nir_cf_node_function; node = node->parent) {
|
|
|
|
|
if (node->type != nir_cf_node_loop)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_loop *loop = nir_cf_node_as_loop(node);
|
|
|
|
|
|
|
|
|
|
if (resume_reg == NULL) {
|
|
|
|
|
/* We only create resume_reg if we encounter a loop. This way we can
|
|
|
|
|
* avoid re-validating the shader and calling ssa_to_regs in the case
|
|
|
|
|
* where it's just if-ladders.
|
|
|
|
|
*/
|
|
|
|
|
resume_reg = nir_local_reg_create(impl);
|
|
|
|
|
resume_reg->num_components = 1;
|
|
|
|
|
resume_reg->bit_size = 1;
|
|
|
|
|
|
|
|
|
|
nir_builder b;
|
|
|
|
|
nir_builder_init(&b, impl);
|
|
|
|
|
|
|
|
|
|
/* Initialize resume to true */
|
|
|
|
|
b.cursor = nir_before_cf_list(&impl->body);
|
|
|
|
|
nir_store_reg(&b, resume_reg, nir_imm_true(&b), 1);
|
|
|
|
|
|
|
|
|
|
/* Set resume to false right after the resume instruction */
|
|
|
|
|
b.cursor = nir_after_instr(resume_instr);
|
|
|
|
|
nir_store_reg(&b, resume_reg, nir_imm_false(&b), 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Before we go any further, make sure that everything which exits the
|
|
|
|
|
* loop or continues around to the top of the loop does so through
|
|
|
|
|
* registers. We're about to duplicate the loop body and we'll have
|
|
|
|
|
* serious trouble if we don't do this.
|
|
|
|
|
*/
|
|
|
|
|
nir_convert_loop_to_lcssa(loop);
|
|
|
|
|
nir_lower_phis_to_regs_block(nir_loop_first_block(loop));
|
|
|
|
|
nir_lower_phis_to_regs_block(
|
|
|
|
|
nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
|
|
|
|
|
|
|
|
|
|
nir_cf_list cf_list;
|
|
|
|
|
nir_cf_list_extract(&cf_list, &loop->body);
|
|
|
|
|
|
|
|
|
|
nir_if *_if = nir_if_create(impl->function->shader);
|
|
|
|
|
_if->condition = nir_src_for_reg(resume_reg);
|
|
|
|
|
nir_cf_node_insert(nir_after_cf_list(&loop->body), &_if->cf_node);
|
|
|
|
|
|
|
|
|
|
nir_cf_list clone;
|
|
|
|
|
nir_cf_list_clone(&clone, &cf_list, &loop->cf_node, NULL);
|
|
|
|
|
|
|
|
|
|
/* Insert the clone in the else and the original in the then so that
|
|
|
|
|
* the resume_instr remains valid even after the duplication.
|
|
|
|
|
*/
|
|
|
|
|
nir_cf_reinsert(&cf_list, nir_before_cf_list(&_if->then_list));
|
|
|
|
|
nir_cf_reinsert(&clone, nir_before_cf_list(&_if->else_list));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (resume_reg != NULL)
|
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_none);
|
|
|
|
|
|
|
|
|
|
return resume_reg != NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
2022-04-19 15:11:17 +03:00
|
|
|
cf_node_contains_block(nir_cf_node *node, nir_block *block)
|
2021-02-16 02:37:40 +01:00
|
|
|
{
|
2022-04-19 15:11:17 +03:00
|
|
|
for (nir_cf_node *n = &block->cf_node; n != NULL; n = n->parent) {
|
2021-02-16 02:37:40 +01:00
|
|
|
if (n == node)
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
rewrite_phis_to_pred(nir_block *block, nir_block *pred)
|
|
|
|
|
{
|
|
|
|
|
nir_foreach_instr(instr, block) {
|
|
|
|
|
if (instr->type != nir_instr_type_phi)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
nir_phi_instr *phi = nir_instr_as_phi(instr);
|
|
|
|
|
|
|
|
|
|
ASSERTED bool found = false;
|
|
|
|
|
nir_foreach_phi_src(phi_src, phi) {
|
|
|
|
|
if (phi_src->pred == pred) {
|
|
|
|
|
found = true;
|
|
|
|
|
assert(phi_src->src.is_ssa);
|
|
|
|
|
nir_ssa_def_rewrite_uses(&phi->dest.ssa, phi_src->src.ssa);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
assert(found);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-20 10:54:36 +03:00
|
|
|
static bool
|
|
|
|
|
cursor_is_after_jump(nir_cursor cursor)
|
|
|
|
|
{
|
|
|
|
|
switch (cursor.option) {
|
|
|
|
|
case nir_cursor_before_instr:
|
|
|
|
|
case nir_cursor_before_block:
|
|
|
|
|
return false;
|
|
|
|
|
case nir_cursor_after_instr:
|
|
|
|
|
return cursor.instr->type == nir_instr_type_jump;
|
|
|
|
|
case nir_cursor_after_block:
|
|
|
|
|
return nir_block_ends_in_jump(cursor.block);;
|
|
|
|
|
}
|
|
|
|
|
unreachable("Invalid cursor option");
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-16 02:37:40 +01:00
|
|
|
/** Flattens if ladders leading up to a resume
|
|
|
|
|
*
|
|
|
|
|
* Given a resume_instr, this function flattens any if ladders leading to the
|
|
|
|
|
* resume instruction and deletes any code that cannot be encountered on a
|
|
|
|
|
* direct path to the resume instruction. This way we get, for the most part,
|
|
|
|
|
* straight-line control-flow up to the resume instruction.
|
|
|
|
|
*
|
|
|
|
|
* While we do this flattening, we also move any code which is in the remat
|
|
|
|
|
* set up to the top of the function or to the top of the resume portion of
|
|
|
|
|
* the current loop. We don't worry about control-flow as we do this because
|
|
|
|
|
* phis will never be in the remat set (see can_remat_instr) and so nothing
|
|
|
|
|
* control-dependent will ever need to be re-materialized. It is possible
|
|
|
|
|
* that this algorithm will preserve too many instructions by moving them to
|
|
|
|
|
* the top but we leave that for DCE to clean up. Any code not in the remat
|
|
|
|
|
* set is deleted because it's either unused in the continuation or else
|
|
|
|
|
* unspilled from a previous continuation and the unspill code is after the
|
|
|
|
|
* resume instruction.
|
|
|
|
|
*
|
|
|
|
|
* If, for instance, we have something like this:
|
|
|
|
|
*
|
|
|
|
|
* // block 0
|
|
|
|
|
* if (cond1) {
|
|
|
|
|
* // block 1
|
|
|
|
|
* } else {
|
|
|
|
|
* // block 2
|
|
|
|
|
* if (cond2) {
|
|
|
|
|
* // block 3
|
|
|
|
|
* resume;
|
|
|
|
|
* if (cond3) {
|
|
|
|
|
* // block 4
|
|
|
|
|
* }
|
|
|
|
|
* } else {
|
|
|
|
|
* // block 5
|
|
|
|
|
* }
|
|
|
|
|
* }
|
|
|
|
|
*
|
|
|
|
|
* then we know, because we know the resume instruction had to be encoutered,
|
|
|
|
|
* that cond1 = false and cond2 = true and we lower as follows:
|
|
|
|
|
*
|
|
|
|
|
* // block 0
|
|
|
|
|
* // block 2
|
|
|
|
|
* // block 3
|
|
|
|
|
* resume;
|
|
|
|
|
* if (cond3) {
|
|
|
|
|
* // block 4
|
|
|
|
|
* }
|
|
|
|
|
*
|
|
|
|
|
* As you can see, the code in blocks 1 and 5 was removed because there is no
|
|
|
|
|
* path from the start of the shader to the resume instruction which execute
|
|
|
|
|
* blocks 1 or 5. Any remat code from blocks 0, 2, and 3 is preserved and
|
|
|
|
|
* moved to the top. If the resume instruction is inside a loop then we know
|
|
|
|
|
* a priori that it is of the form
|
|
|
|
|
*
|
|
|
|
|
* loop {
|
|
|
|
|
* if (resume) {
|
|
|
|
|
* // Contents containing resume_instr
|
|
|
|
|
* } else {
|
|
|
|
|
* // Second copy of contents
|
|
|
|
|
* }
|
|
|
|
|
* }
|
|
|
|
|
*
|
|
|
|
|
* In this case, we only descend into the first half of the loop. The second
|
|
|
|
|
* half is left alone as that portion is only ever executed after the resume
|
|
|
|
|
* instruction.
|
|
|
|
|
*/
|
|
|
|
|
static bool
|
2022-04-19 15:11:17 +03:00
|
|
|
flatten_resume_if_ladder(nir_builder *b,
|
|
|
|
|
nir_cf_node *parent_node,
|
2021-02-16 02:37:40 +01:00
|
|
|
struct exec_list *child_list,
|
|
|
|
|
bool child_list_contains_cursor,
|
|
|
|
|
nir_instr *resume_instr,
|
2021-06-27 14:31:08 +03:00
|
|
|
struct brw_bitset *remat)
|
2021-02-16 02:37:40 +01:00
|
|
|
{
|
|
|
|
|
nir_cf_list cf_list;
|
|
|
|
|
|
|
|
|
|
/* If our child list contains the cursor instruction then we start out
|
|
|
|
|
* before the cursor instruction. We need to know this so that we can skip
|
|
|
|
|
* moving instructions which are already before the cursor.
|
|
|
|
|
*/
|
|
|
|
|
bool before_cursor = child_list_contains_cursor;
|
|
|
|
|
|
|
|
|
|
nir_cf_node *resume_node = NULL;
|
|
|
|
|
foreach_list_typed_safe(nir_cf_node, child, node, child_list) {
|
|
|
|
|
switch (child->type) {
|
|
|
|
|
case nir_cf_node_block: {
|
|
|
|
|
nir_block *block = nir_cf_node_as_block(child);
|
2022-04-19 15:11:17 +03:00
|
|
|
if (b->cursor.option == nir_cursor_before_block &&
|
|
|
|
|
b->cursor.block == block) {
|
|
|
|
|
assert(before_cursor);
|
|
|
|
|
before_cursor = false;
|
|
|
|
|
}
|
2021-02-16 02:37:40 +01:00
|
|
|
nir_foreach_instr_safe(instr, block) {
|
2022-04-19 15:11:17 +03:00
|
|
|
if ((b->cursor.option == nir_cursor_before_instr ||
|
|
|
|
|
b->cursor.option == nir_cursor_after_instr) &&
|
|
|
|
|
b->cursor.instr == instr) {
|
2021-02-16 02:37:40 +01:00
|
|
|
assert(nir_cf_node_is_first(&block->cf_node));
|
|
|
|
|
assert(before_cursor);
|
|
|
|
|
before_cursor = false;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (instr == resume_instr)
|
|
|
|
|
goto found_resume;
|
|
|
|
|
|
|
|
|
|
if (!before_cursor && can_remat_instr(instr, remat)) {
|
|
|
|
|
nir_instr_remove(instr);
|
2022-04-19 15:11:17 +03:00
|
|
|
nir_instr_insert(b->cursor, instr);
|
|
|
|
|
b->cursor = nir_after_instr(instr);
|
2021-02-16 02:37:40 +01:00
|
|
|
|
|
|
|
|
nir_ssa_def *def = nir_instr_ssa_def(instr);
|
|
|
|
|
BITSET_SET(remat->set, def->index);
|
|
|
|
|
}
|
|
|
|
|
}
|
2022-04-19 15:11:17 +03:00
|
|
|
if (b->cursor.option == nir_cursor_after_block &&
|
|
|
|
|
b->cursor.block == block) {
|
|
|
|
|
assert(before_cursor);
|
|
|
|
|
before_cursor = false;
|
|
|
|
|
}
|
2021-02-16 02:37:40 +01:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_cf_node_if: {
|
|
|
|
|
nir_if *_if = nir_cf_node_as_if(child);
|
2022-04-20 10:51:48 +03:00
|
|
|
|
|
|
|
|
/* Because of the dummy blocks inserted in the first if block of the
|
|
|
|
|
* loops, it's possible we find an empty if block that contains our
|
|
|
|
|
* cursor. At this point, the block should still be empty and we can
|
|
|
|
|
* just skip it and consider we're after the cursor.
|
|
|
|
|
*/
|
|
|
|
|
if (cf_node_contains_block(&_if->cf_node,
|
|
|
|
|
nir_cursor_current_block(b->cursor))) {
|
|
|
|
|
/* Some sanity checks to verify this is actually a dummy block */
|
|
|
|
|
assert(nir_src_as_bool(_if->condition) == true);
|
|
|
|
|
assert(nir_cf_list_is_empty_block(&_if->then_list));
|
|
|
|
|
assert(nir_cf_list_is_empty_block(&_if->else_list));
|
|
|
|
|
before_cursor = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
assert(!before_cursor);
|
|
|
|
|
|
2022-04-19 15:11:17 +03:00
|
|
|
if (flatten_resume_if_ladder(b, &_if->cf_node, &_if->then_list,
|
2021-02-16 02:37:40 +01:00
|
|
|
false, resume_instr, remat)) {
|
|
|
|
|
resume_node = child;
|
|
|
|
|
rewrite_phis_to_pred(nir_cf_node_as_block(nir_cf_node_next(child)),
|
|
|
|
|
nir_if_last_then_block(_if));
|
|
|
|
|
goto found_resume;
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-19 15:11:17 +03:00
|
|
|
if (flatten_resume_if_ladder(b, &_if->cf_node, &_if->else_list,
|
2021-02-16 02:37:40 +01:00
|
|
|
false, resume_instr, remat)) {
|
|
|
|
|
resume_node = child;
|
|
|
|
|
rewrite_phis_to_pred(nir_cf_node_as_block(nir_cf_node_next(child)),
|
|
|
|
|
nir_if_last_else_block(_if));
|
|
|
|
|
goto found_resume;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_cf_node_loop: {
|
|
|
|
|
assert(!before_cursor);
|
|
|
|
|
nir_loop *loop = nir_cf_node_as_loop(child);
|
|
|
|
|
|
2022-04-19 15:11:17 +03:00
|
|
|
if (cf_node_contains_block(&loop->cf_node, resume_instr->block)) {
|
2021-02-16 02:37:40 +01:00
|
|
|
/* Thanks to our loop body duplication pass, every level of loop
|
|
|
|
|
* containing the resume instruction contains exactly three nodes:
|
|
|
|
|
* two blocks and an if. We don't want to lower away this if
|
|
|
|
|
* because it's the resume selection if. The resume half is
|
|
|
|
|
* always the then_list so that's what we want to flatten.
|
|
|
|
|
*/
|
|
|
|
|
nir_block *header = nir_loop_first_block(loop);
|
|
|
|
|
nir_if *_if = nir_cf_node_as_if(nir_cf_node_next(&header->cf_node));
|
|
|
|
|
|
2022-04-19 15:11:17 +03:00
|
|
|
nir_builder bl;
|
|
|
|
|
nir_builder_init(&bl, b->impl);
|
|
|
|
|
bl.cursor = nir_before_cf_list(&_if->then_list);
|
2022-04-20 10:51:48 +03:00
|
|
|
/* We want to place anything re-materialized from inside the loop
|
|
|
|
|
* at the top of the resume half of the loop.
|
|
|
|
|
*
|
|
|
|
|
* Because we're inside a loop, we might run into a break/continue
|
|
|
|
|
* instructions. We can't place those within a block of
|
|
|
|
|
* instructions, they need to be at the end of a block. So we
|
|
|
|
|
* build our own dummy block to place them.
|
|
|
|
|
*/
|
|
|
|
|
nir_push_if(&bl, nir_imm_true(&bl));
|
|
|
|
|
{
|
|
|
|
|
ASSERTED bool found =
|
|
|
|
|
flatten_resume_if_ladder(&bl, &_if->cf_node, &_if->then_list,
|
|
|
|
|
true, resume_instr, remat);
|
|
|
|
|
assert(found);
|
|
|
|
|
}
|
|
|
|
|
nir_pop_if(&bl, NULL);
|
2021-02-16 02:37:40 +01:00
|
|
|
|
|
|
|
|
resume_node = child;
|
|
|
|
|
goto found_resume;
|
|
|
|
|
} else {
|
|
|
|
|
ASSERTED bool found =
|
2022-04-19 15:11:17 +03:00
|
|
|
flatten_resume_if_ladder(b, &loop->cf_node, &loop->body,
|
2021-02-16 02:37:40 +01:00
|
|
|
false, resume_instr, remat);
|
|
|
|
|
assert(!found);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_cf_node_function:
|
|
|
|
|
unreachable("Unsupported CF node type");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
assert(!before_cursor);
|
|
|
|
|
|
|
|
|
|
/* If we got here, we didn't find the resume node or instruction. */
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
found_resume:
|
|
|
|
|
/* If we got here then we found either the resume node or the resume
|
|
|
|
|
* instruction in this CF list.
|
|
|
|
|
*/
|
|
|
|
|
if (resume_node) {
|
|
|
|
|
/* If the resume instruction is buried in side one of our children CF
|
|
|
|
|
* nodes, resume_node now points to that child.
|
|
|
|
|
*/
|
|
|
|
|
if (resume_node->type == nir_cf_node_if) {
|
|
|
|
|
/* Thanks to the recursive call, all of the interesting contents of
|
|
|
|
|
* resume_node have been copied before the cursor. We just need to
|
|
|
|
|
* copy the stuff after resume_node.
|
|
|
|
|
*/
|
|
|
|
|
nir_cf_extract(&cf_list, nir_after_cf_node(resume_node),
|
|
|
|
|
nir_after_cf_list(child_list));
|
|
|
|
|
} else {
|
|
|
|
|
/* The loop contains its own cursor and still has useful stuff in it.
|
|
|
|
|
* We want to move everything after and including the loop to before
|
|
|
|
|
* the cursor.
|
|
|
|
|
*/
|
|
|
|
|
assert(resume_node->type == nir_cf_node_loop);
|
|
|
|
|
nir_cf_extract(&cf_list, nir_before_cf_node(resume_node),
|
|
|
|
|
nir_after_cf_list(child_list));
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
/* If we found the resume instruction in one of our blocks, grab
|
|
|
|
|
* everything after it in the entire list (not just the one block), and
|
|
|
|
|
* place it before the cursor instr.
|
|
|
|
|
*/
|
|
|
|
|
nir_cf_extract(&cf_list, nir_after_instr(resume_instr),
|
|
|
|
|
nir_after_cf_list(child_list));
|
|
|
|
|
}
|
2022-04-20 10:54:36 +03:00
|
|
|
|
|
|
|
|
if (cursor_is_after_jump(b->cursor)) {
|
|
|
|
|
/* If the resume instruction is in a loop, it's possible cf_list ends
|
|
|
|
|
* in a break or continue instruction, in which case we don't want to
|
|
|
|
|
* insert anything. It's also possible we have an early return if
|
|
|
|
|
* someone hasn't lowered those yet. In either case, nothing after that
|
|
|
|
|
* point executes in this context so we can delete it.
|
|
|
|
|
*/
|
|
|
|
|
nir_cf_delete(&cf_list);
|
|
|
|
|
} else {
|
|
|
|
|
b->cursor = nir_cf_reinsert(&cf_list, b->cursor);
|
|
|
|
|
}
|
2021-02-16 02:37:40 +01:00
|
|
|
|
|
|
|
|
if (!resume_node) {
|
|
|
|
|
/* We want the resume to be the first "interesting" instruction */
|
|
|
|
|
nir_instr_remove(resume_instr);
|
2022-04-19 15:11:17 +03:00
|
|
|
nir_instr_insert(nir_before_cf_list(&b->impl->body), resume_instr);
|
2021-02-16 02:37:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* We've copied everything interesting out of this CF list to before the
|
|
|
|
|
* cursor. Delete everything else.
|
|
|
|
|
*/
|
|
|
|
|
if (child_list_contains_cursor) {
|
2022-04-20 10:51:48 +03:00
|
|
|
/* If the cursor is in child_list, then we're either a loop or function
|
|
|
|
|
* that contains the cursor. Cursors are always placed in a wrapper if
|
|
|
|
|
* (true) to deal with break/continue and early returns. We've already
|
|
|
|
|
* moved everything interesting inside the wrapper if and we want to
|
|
|
|
|
* remove whatever is left after it.
|
|
|
|
|
*/
|
|
|
|
|
nir_block *cursor_block = nir_cursor_current_block(b->cursor);
|
|
|
|
|
nir_if *wrapper_if = nir_cf_node_as_if(cursor_block->cf_node.parent);
|
|
|
|
|
assert(wrapper_if->cf_node.parent == parent_node);
|
|
|
|
|
/* The wrapper if blocks are either put into the body of the main
|
|
|
|
|
* function, or within the resume if block of the loops.
|
|
|
|
|
*/
|
|
|
|
|
assert(parent_node->type == nir_cf_node_function ||
|
|
|
|
|
(parent_node->type == nir_cf_node_if &&
|
|
|
|
|
parent_node->parent->type == nir_cf_node_loop));
|
|
|
|
|
nir_cf_extract(&cf_list, nir_after_cf_node(&wrapper_if->cf_node),
|
|
|
|
|
nir_after_cf_list(child_list));
|
2021-02-16 02:37:40 +01:00
|
|
|
} else {
|
|
|
|
|
nir_cf_list_extract(&cf_list, child_list);
|
|
|
|
|
}
|
|
|
|
|
nir_cf_delete(&cf_list);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_instr *
|
|
|
|
|
lower_resume(nir_shader *shader, int call_idx)
|
|
|
|
|
{
|
|
|
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
|
|
|
|
|
|
|
|
|
nir_instr *resume_instr = find_resume_instr(impl, call_idx);
|
|
|
|
|
|
|
|
|
|
if (duplicate_loop_bodies(impl, resume_instr)) {
|
|
|
|
|
nir_validate_shader(shader, "after duplicate_loop_bodies in "
|
|
|
|
|
"brw_nir_lower_shader_calls");
|
|
|
|
|
/* If we duplicated the bodies of any loops, run regs_to_ssa to get rid
|
|
|
|
|
* of all those pesky registers we just added.
|
|
|
|
|
*/
|
|
|
|
|
NIR_PASS_V(shader, nir_lower_regs_to_ssa);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Re-index nir_ssa_def::index. We don't care about actual liveness in
|
|
|
|
|
* this pass but, so we can use the same helpers as the spilling pass, we
|
|
|
|
|
* need to make sure that live_index is something sane. It's used
|
|
|
|
|
* constantly for determining if an SSA value has been added since the
|
|
|
|
|
* start of the pass.
|
|
|
|
|
*/
|
|
|
|
|
nir_index_ssa_defs(impl);
|
|
|
|
|
|
|
|
|
|
void *mem_ctx = ralloc_context(shader);
|
|
|
|
|
|
|
|
|
|
/* Used to track which things may have been assumed to be re-materialized
|
|
|
|
|
* by the spilling pass and which we shouldn't delete.
|
|
|
|
|
*/
|
2021-06-27 14:31:08 +03:00
|
|
|
struct brw_bitset remat = bitset_create(mem_ctx, impl->ssa_alloc);
|
2021-02-16 02:37:40 +01:00
|
|
|
|
|
|
|
|
/* Create a nop instruction to use as a cursor as we extract and re-insert
|
|
|
|
|
* stuff into the CFG.
|
|
|
|
|
*/
|
2022-04-19 15:11:17 +03:00
|
|
|
nir_builder b;
|
|
|
|
|
nir_builder_init(&b, impl);
|
|
|
|
|
b.cursor = nir_before_cf_list(&impl->body);
|
2022-04-20 10:51:48 +03:00
|
|
|
|
|
|
|
|
nir_push_if(&b, nir_imm_true(&b));
|
|
|
|
|
{
|
|
|
|
|
ASSERTED bool found =
|
|
|
|
|
flatten_resume_if_ladder(&b, &impl->cf_node, &impl->body,
|
|
|
|
|
true, resume_instr, &remat);
|
|
|
|
|
assert(found);
|
|
|
|
|
}
|
|
|
|
|
nir_pop_if(&b, NULL);
|
2021-02-16 02:37:40 +01:00
|
|
|
|
|
|
|
|
ralloc_free(mem_ctx);
|
|
|
|
|
|
|
|
|
|
nir_validate_shader(shader, "after flatten_resume_if_ladder in "
|
|
|
|
|
"brw_nir_lower_shader_calls");
|
|
|
|
|
|
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_none);
|
|
|
|
|
|
|
|
|
|
return resume_instr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
replace_resume_with_halt(nir_shader *shader, nir_instr *keep)
|
|
|
|
|
{
|
|
|
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
|
|
|
|
|
|
|
|
|
nir_builder b;
|
|
|
|
|
nir_builder_init(&b, impl);
|
|
|
|
|
|
|
|
|
|
nir_foreach_block_safe(block, impl) {
|
|
|
|
|
nir_foreach_instr_safe(instr, block) {
|
|
|
|
|
if (instr == keep)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *resume = nir_instr_as_intrinsic(instr);
|
|
|
|
|
if (resume->intrinsic != nir_intrinsic_rt_resume)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* If this is some other resume, then we've kicked off a ray or
|
|
|
|
|
* bindless thread and we don't want to go any further in this
|
|
|
|
|
* shader. Insert a halt so that NIR will delete any instructions
|
|
|
|
|
* dominated by this call instruction including the scratch_load
|
|
|
|
|
* instructions we inserted.
|
|
|
|
|
*/
|
|
|
|
|
nir_cf_list cf_list;
|
|
|
|
|
nir_cf_extract(&cf_list, nir_after_instr(&resume->instr),
|
|
|
|
|
nir_after_block(block));
|
|
|
|
|
nir_cf_delete(&cf_list);
|
|
|
|
|
b.cursor = nir_instr_remove(&resume->instr);
|
|
|
|
|
nir_jump(&b, nir_jump_halt);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-18 18:29:10 +03:00
|
|
|
struct lower_scratch_state {
|
|
|
|
|
nir_address_format address_format;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
lower_stack_instr_to_scratch(struct nir_builder *b, nir_instr *instr, void *data)
|
|
|
|
|
{
|
|
|
|
|
struct lower_scratch_state *state = data;
|
|
|
|
|
|
|
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *stack = nir_instr_as_intrinsic(instr);
|
|
|
|
|
switch (stack->intrinsic) {
|
|
|
|
|
case nir_intrinsic_load_stack: {
|
|
|
|
|
b->cursor = nir_instr_remove(instr);
|
|
|
|
|
nir_ssa_def *data, *old_data = nir_instr_ssa_def(instr);
|
|
|
|
|
|
|
|
|
|
if (state->address_format == nir_address_format_64bit_global) {
|
|
|
|
|
nir_ssa_def *addr = nir_iadd_imm(b,
|
|
|
|
|
nir_load_scratch_base_ptr(b, 1, 64, 1),
|
|
|
|
|
nir_intrinsic_base(stack));
|
|
|
|
|
data = nir_load_global(b, addr,
|
|
|
|
|
nir_intrinsic_align_mul(stack),
|
|
|
|
|
stack->dest.ssa.num_components,
|
|
|
|
|
stack->dest.ssa.bit_size);
|
|
|
|
|
} else {
|
|
|
|
|
assert(state->address_format == nir_address_format_32bit_offset);
|
|
|
|
|
data = nir_load_scratch(b,
|
|
|
|
|
old_data->num_components,
|
|
|
|
|
old_data->bit_size,
|
|
|
|
|
nir_imm_int(b, nir_intrinsic_base(stack)),
|
|
|
|
|
.align_mul = nir_intrinsic_align_mul(stack));
|
|
|
|
|
}
|
|
|
|
|
nir_ssa_def_rewrite_uses(old_data, data);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_intrinsic_store_stack: {
|
|
|
|
|
b->cursor = nir_instr_remove(instr);
|
|
|
|
|
nir_ssa_def *data = stack->src[0].ssa;
|
|
|
|
|
|
|
|
|
|
if (state->address_format == nir_address_format_64bit_global) {
|
|
|
|
|
nir_ssa_def *addr = nir_iadd_imm(b,
|
|
|
|
|
nir_load_scratch_base_ptr(b, 1, 64, 1),
|
|
|
|
|
nir_intrinsic_base(stack));
|
|
|
|
|
nir_store_global(b, addr,
|
|
|
|
|
nir_intrinsic_align_mul(stack),
|
|
|
|
|
data,
|
|
|
|
|
BITFIELD_MASK(data->num_components));
|
|
|
|
|
} else {
|
|
|
|
|
assert(state->address_format == nir_address_format_32bit_offset);
|
|
|
|
|
nir_store_scratch(b, data,
|
|
|
|
|
nir_imm_int(b, nir_intrinsic_base(stack)),
|
|
|
|
|
.align_mul = nir_intrinsic_align_mul(stack),
|
|
|
|
|
.write_mask = BITFIELD_MASK(data->num_components));
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
nir_lower_stack_to_scratch(nir_shader *shader,
|
|
|
|
|
nir_address_format address_format)
|
|
|
|
|
{
|
|
|
|
|
struct lower_scratch_state state = {
|
|
|
|
|
.address_format = address_format,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
return nir_shader_instructions_pass(shader,
|
|
|
|
|
lower_stack_instr_to_scratch,
|
|
|
|
|
nir_metadata_block_index |
|
|
|
|
|
nir_metadata_dominance,
|
|
|
|
|
&state);
|
|
|
|
|
}
|
|
|
|
|
|
nir/lower_shader_calls: avoid respilling values
Currently we do something like this :
ssa_0 = ...
ssa_1 = ...
* spill ssa_0, ssa_1
call1()
* fill ssa_0, ssa_1
ssa_2 = ...
ssa_3 = ...
* spill ssa_0, ssa_1, ssa_2, ssa_3
call2()
* fill ssa_0, ssa_1, ssa_2, ssa_3
If we assign the same possition to ssa_0 & ssa_1 in the spilling
stack, then on call2(), we know that those values are already present
in memory at the right location and we can avoid respilling them.
The result would be something like this :
ssa_0 = ...
ssa_1 = ...
* spill ssa_0, ssa_1
call1()
* fill ssa_0, ssa_1
ssa_2 = ...
ssa_3 = ...
* spill ssa_2, ssa_3
call2()
* fill ssa_0, ssa_1, ssa_2, ssa_3
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16556>
2022-05-16 16:23:02 +03:00
|
|
|
static bool
|
|
|
|
|
opt_remove_respills_instr(struct nir_builder *b, nir_instr *instr, void *data)
|
|
|
|
|
{
|
|
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *store_intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
|
if (store_intrin->intrinsic != nir_intrinsic_store_stack)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
nir_instr *value_instr = store_intrin->src[0].ssa->parent_instr;
|
|
|
|
|
if (value_instr->type != nir_instr_type_intrinsic)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *load_intrin = nir_instr_as_intrinsic(value_instr);
|
|
|
|
|
if (load_intrin->intrinsic != nir_intrinsic_load_stack)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (nir_intrinsic_base(load_intrin) != nir_intrinsic_base(store_intrin))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
nir_instr_remove(&store_intrin->instr);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* After shader split, look at stack load/store operations. If we're loading
|
|
|
|
|
* and storing the same value at the same location, we can drop the store
|
|
|
|
|
* instruction.
|
|
|
|
|
*/
|
|
|
|
|
static bool
|
|
|
|
|
nir_opt_remove_respills(nir_shader *shader)
|
|
|
|
|
{
|
|
|
|
|
return nir_shader_instructions_pass(shader,
|
|
|
|
|
opt_remove_respills_instr,
|
|
|
|
|
nir_metadata_block_index |
|
|
|
|
|
nir_metadata_dominance,
|
|
|
|
|
NULL);
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-16 02:37:40 +01:00
|
|
|
/** Lower shader call instructions to split shaders.
|
|
|
|
|
*
|
|
|
|
|
* Shader calls can be split into an initial shader and a series of "resume"
|
|
|
|
|
* shaders. When the shader is first invoked, it is the initial shader which
|
|
|
|
|
* is executed. At any point in the initial shader or any one of the resume
|
|
|
|
|
* shaders, a shader call operation may be performed. The possible shader call
|
|
|
|
|
* operations are:
|
|
|
|
|
*
|
|
|
|
|
* - trace_ray
|
|
|
|
|
* - report_ray_intersection
|
|
|
|
|
* - execute_callable
|
|
|
|
|
*
|
|
|
|
|
* When a shader call operation is performed, we push all live values to the
|
|
|
|
|
* stack,call rt_trace_ray/rt_execute_callable and then kill the shader. Once
|
|
|
|
|
* the operation we invoked is complete, a callee shader will return execution
|
|
|
|
|
* to the respective resume shader. The resume shader pops the contents off
|
|
|
|
|
* the stack and picks up where the calling shader left off.
|
|
|
|
|
*
|
|
|
|
|
* Stack management is assumed to be done after this pass. Call
|
|
|
|
|
* instructions and their resumes get annotated with stack information that
|
|
|
|
|
* should be enough for the backend to implement proper stack management.
|
|
|
|
|
*/
|
|
|
|
|
bool
|
|
|
|
|
nir_lower_shader_calls(nir_shader *shader,
|
|
|
|
|
nir_address_format address_format,
|
|
|
|
|
unsigned stack_alignment,
|
|
|
|
|
nir_shader ***resume_shaders_out,
|
|
|
|
|
uint32_t *num_resume_shaders_out,
|
|
|
|
|
void *mem_ctx)
|
|
|
|
|
{
|
|
|
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
|
|
|
|
|
|
|
|
|
nir_builder b;
|
|
|
|
|
nir_builder_init(&b, impl);
|
|
|
|
|
|
|
|
|
|
int num_calls = 0;
|
|
|
|
|
nir_foreach_block(block, impl) {
|
|
|
|
|
nir_foreach_instr_safe(instr, block) {
|
|
|
|
|
if (instr_is_shader_call(instr))
|
|
|
|
|
num_calls++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (num_calls == 0) {
|
|
|
|
|
nir_shader_preserve_all_metadata(shader);
|
|
|
|
|
*num_resume_shaders_out = 0;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Some intrinsics not only can't be re-materialized but aren't preserved
|
|
|
|
|
* when moving to the continuation shader. We have to move them to the top
|
|
|
|
|
* to ensure they get spilled as needed.
|
|
|
|
|
*/
|
|
|
|
|
{
|
|
|
|
|
bool progress = false;
|
|
|
|
|
NIR_PASS(progress, shader, move_system_values_to_top);
|
|
|
|
|
if (progress)
|
|
|
|
|
NIR_PASS(progress, shader, nir_opt_cse);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
NIR_PASS_V(shader, spill_ssa_defs_and_lower_shader_calls,
|
2022-05-18 18:29:10 +03:00
|
|
|
num_calls, stack_alignment);
|
2021-02-16 02:37:40 +01:00
|
|
|
|
2022-08-22 10:25:14 +03:00
|
|
|
NIR_PASS_V(shader, nir_opt_remove_phis);
|
2021-07-19 19:33:12 +03:00
|
|
|
|
2021-02-16 02:37:40 +01:00
|
|
|
/* Make N copies of our shader */
|
|
|
|
|
nir_shader **resume_shaders = ralloc_array(mem_ctx, nir_shader *, num_calls);
|
2022-04-12 16:04:52 +03:00
|
|
|
for (unsigned i = 0; i < num_calls; i++) {
|
2021-02-16 02:37:40 +01:00
|
|
|
resume_shaders[i] = nir_shader_clone(mem_ctx, shader);
|
|
|
|
|
|
2022-04-12 16:04:52 +03:00
|
|
|
/* Give them a recognizable name */
|
|
|
|
|
resume_shaders[i]->info.name =
|
|
|
|
|
ralloc_asprintf(mem_ctx, "%s%sresume_%u",
|
|
|
|
|
shader->info.name ? shader->info.name : "",
|
|
|
|
|
shader->info.name ? "-" : "",
|
|
|
|
|
i);
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-16 02:37:40 +01:00
|
|
|
replace_resume_with_halt(shader, NULL);
|
|
|
|
|
for (unsigned i = 0; i < num_calls; i++) {
|
|
|
|
|
nir_instr *resume_instr = lower_resume(resume_shaders[i], i);
|
|
|
|
|
replace_resume_with_halt(resume_shaders[i], resume_instr);
|
2021-07-19 19:33:12 +03:00
|
|
|
nir_opt_remove_phis(resume_shaders[i]);
|
2022-04-20 10:51:48 +03:00
|
|
|
/* Remove the dummy blocks added by flatten_resume_if_ladder() */
|
2022-08-08 17:12:14 +02:00
|
|
|
nir_opt_if(resume_shaders[i], nir_opt_if_optimize_phi_true_false);
|
2021-02-16 02:37:40 +01:00
|
|
|
}
|
|
|
|
|
|
nir/lower_shader_calls: avoid respilling values
Currently we do something like this :
ssa_0 = ...
ssa_1 = ...
* spill ssa_0, ssa_1
call1()
* fill ssa_0, ssa_1
ssa_2 = ...
ssa_3 = ...
* spill ssa_0, ssa_1, ssa_2, ssa_3
call2()
* fill ssa_0, ssa_1, ssa_2, ssa_3
If we assign the same possition to ssa_0 & ssa_1 in the spilling
stack, then on call2(), we know that those values are already present
in memory at the right location and we can avoid respilling them.
The result would be something like this :
ssa_0 = ...
ssa_1 = ...
* spill ssa_0, ssa_1
call1()
* fill ssa_0, ssa_1
ssa_2 = ...
ssa_3 = ...
* spill ssa_2, ssa_3
call2()
* fill ssa_0, ssa_1, ssa_2, ssa_3
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16556>
2022-05-16 16:23:02 +03:00
|
|
|
for (unsigned i = 0; i < num_calls; i++)
|
|
|
|
|
NIR_PASS_V(resume_shaders[i], nir_opt_remove_respills);
|
|
|
|
|
|
2022-05-18 18:29:10 +03:00
|
|
|
NIR_PASS_V(shader, nir_lower_stack_to_scratch, address_format);
|
|
|
|
|
for (unsigned i = 0; i < num_calls; i++)
|
|
|
|
|
NIR_PASS_V(resume_shaders[i], nir_lower_stack_to_scratch, address_format);
|
|
|
|
|
|
2021-02-16 02:37:40 +01:00
|
|
|
*resume_shaders_out = resume_shaders;
|
|
|
|
|
*num_resume_shaders_out = num_calls;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|