mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 07:20:10 +01:00
Merge branch 'nir-licm2' into 'main'
nir/opt_licm: use nir_instr_can_speculate, hoist from multiple levels of nested loops, add filter_block & filter_instr callbacks, add tests See merge request mesa/mesa!38823
This commit is contained in:
commit
40e90e227c
8 changed files with 383 additions and 39 deletions
|
|
@ -1050,3 +1050,17 @@ ac_nir_opt_vectorize_cb(const nir_instr *instr, const void *data)
|
||||||
|
|
||||||
return target_width;
|
return target_width;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
ac_nir_opt_licm_filter_instr_cb(nir_instr *instr, bool instr_dominates_exit, unsigned num_dst_bits,
|
||||||
|
unsigned num_all_src_bits, nir_loop *loop)
|
||||||
|
{
|
||||||
|
/* This heuristic reduces spilling. Note that while this only seems to apply to ALU, any ALU
|
||||||
|
* that's hoisted potentially enables hoisting intrinsics using it, so this really affects
|
||||||
|
* all instructions.
|
||||||
|
*/
|
||||||
|
if (!instr_dominates_exit && instr->type == nir_instr_type_alu)
|
||||||
|
return num_dst_bits + 64 < num_all_src_bits;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -447,6 +447,10 @@ ac_nir_allow_offset_wrap_cb(nir_intrinsic_instr *instr, const void *data);
|
||||||
bool
|
bool
|
||||||
ac_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu);
|
ac_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu);
|
||||||
|
|
||||||
|
bool
|
||||||
|
ac_nir_opt_licm_filter_instr_cb(nir_instr *instr, bool instr_dominates_exit, unsigned num_dst_bits,
|
||||||
|
unsigned num_all_src_bits, nir_loop *loop);
|
||||||
|
|
||||||
uint8_t
|
uint8_t
|
||||||
ac_nir_opt_vectorize_cb(const nir_instr *instr, const void *data);
|
ac_nir_opt_vectorize_cb(const nir_instr *instr, const void *data);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -368,7 +368,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
||||||
nir_move_options sink_opts = nir_move_const_undef | nir_move_copies | nir_dont_move_byte_word_vecs;
|
nir_move_options sink_opts = nir_move_const_undef | nir_move_copies | nir_dont_move_byte_word_vecs;
|
||||||
|
|
||||||
if (!stage->key.optimisations_disabled) {
|
if (!stage->key.optimisations_disabled) {
|
||||||
NIR_PASS(_, stage->nir, nir_opt_licm);
|
NIR_PASS(_, stage->nir, nir_opt_licm, ac_nir_opt_licm_filter_instr_cb);
|
||||||
|
|
||||||
if (stage->stage == MESA_SHADER_VERTEX) {
|
if (stage->stage == MESA_SHADER_VERTEX) {
|
||||||
/* Always load all VS inputs at the top to eliminate needless VMEM->s_wait->VMEM sequences.
|
/* Always load all VS inputs at the top to eliminate needless VMEM->s_wait->VMEM sequences.
|
||||||
|
|
|
||||||
|
|
@ -425,6 +425,7 @@ if with_tests
|
||||||
'tests/mod_analysis_tests.cpp',
|
'tests/mod_analysis_tests.cpp',
|
||||||
'tests/negative_equal_tests.cpp',
|
'tests/negative_equal_tests.cpp',
|
||||||
'tests/opt_if_tests.cpp',
|
'tests/opt_if_tests.cpp',
|
||||||
|
'tests/opt_licm_tests.cpp',
|
||||||
'tests/opt_loop_tests.cpp',
|
'tests/opt_loop_tests.cpp',
|
||||||
'tests/opt_peephole_select.cpp',
|
'tests/opt_peephole_select.cpp',
|
||||||
'tests/opt_shrink_vectors_tests.cpp',
|
'tests/opt_shrink_vectors_tests.cpp',
|
||||||
|
|
|
||||||
|
|
@ -4746,6 +4746,18 @@ nir_block *nir_cf_node_cf_tree_prev(nir_cf_node *node);
|
||||||
block != nir_cf_node_cf_tree_prev(node); \
|
block != nir_cf_node_cf_tree_prev(node); \
|
||||||
block = prev, prev = nir_block_cf_tree_prev(block))
|
block = prev, prev = nir_block_cf_tree_prev(block))
|
||||||
|
|
||||||
|
static inline nir_block *
|
||||||
|
nir_loop_predecessor_block(nir_loop *loop)
|
||||||
|
{
|
||||||
|
return nir_cf_node_cf_tree_prev(&loop->cf_node);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline nir_block *
|
||||||
|
nir_loop_successor_block(nir_loop *loop)
|
||||||
|
{
|
||||||
|
return nir_cf_node_cf_tree_next(&loop->cf_node);
|
||||||
|
}
|
||||||
|
|
||||||
/* If the following CF node is an if, this function returns that if.
|
/* If the following CF node is an if, this function returns that if.
|
||||||
* Otherwise, it returns NULL.
|
* Otherwise, it returns NULL.
|
||||||
*/
|
*/
|
||||||
|
|
@ -6458,7 +6470,14 @@ bool nir_opt_large_constants(nir_shader *shader,
|
||||||
glsl_type_size_align_func size_align,
|
glsl_type_size_align_func size_align,
|
||||||
unsigned threshold);
|
unsigned threshold);
|
||||||
|
|
||||||
bool nir_opt_licm(nir_shader *shader);
|
typedef bool (*nir_opt_licm_filter_instr_cb)(nir_instr *instr,
|
||||||
|
bool instr_dominates_exit,
|
||||||
|
unsigned num_dst_bits,
|
||||||
|
unsigned num_all_src_bits,
|
||||||
|
nir_loop *loop);
|
||||||
|
|
||||||
|
bool nir_opt_licm(nir_shader *shader,
|
||||||
|
nir_opt_licm_filter_instr_cb filter_instr);
|
||||||
bool nir_opt_loop(nir_shader *shader);
|
bool nir_opt_loop(nir_shader *shader);
|
||||||
|
|
||||||
bool nir_opt_loop_unroll(nir_shader *shader);
|
bool nir_opt_loop_unroll(nir_shader *shader);
|
||||||
|
|
|
||||||
|
|
@ -5,30 +5,69 @@
|
||||||
|
|
||||||
#include "nir.h"
|
#include "nir.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
nir_opt_licm_filter_instr_cb filter_instr;
|
||||||
|
|
||||||
|
nir_loop *loop;
|
||||||
|
bool current_block_dominates_exit;
|
||||||
|
unsigned num_all_src_bits;
|
||||||
|
} licm_state;
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
defined_before_loop(nir_src *src, void *state)
|
defined_before_loop(nir_src *src, void *_state)
|
||||||
{
|
{
|
||||||
unsigned *loop_preheader_idx = state;
|
licm_state *state = (licm_state *)_state;
|
||||||
return nir_def_block(src->ssa)->index <= *loop_preheader_idx;
|
|
||||||
|
state->num_all_src_bits += src->ssa->bit_size * src->ssa->num_components;
|
||||||
|
|
||||||
|
/* The current instruction is loop-invariant only if its sources are before
|
||||||
|
* the loop.
|
||||||
|
*/
|
||||||
|
return nir_def_block(src->ssa)->index <=
|
||||||
|
nir_loop_predecessor_block(state->loop)->index;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
is_instr_loop_invariant(nir_instr *instr, unsigned loop_preheader_idx)
|
is_instr_loop_invariant(nir_instr *instr, licm_state *state)
|
||||||
{
|
{
|
||||||
switch (instr->type) {
|
switch (instr->type) {
|
||||||
case nir_instr_type_load_const:
|
case nir_instr_type_load_const:
|
||||||
case nir_instr_type_undef:
|
case nir_instr_type_undef:
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
case nir_instr_type_intrinsic:
|
|
||||||
if (!nir_intrinsic_can_reorder(nir_instr_as_intrinsic(instr)))
|
|
||||||
return false;
|
|
||||||
FALLTHROUGH;
|
|
||||||
|
|
||||||
case nir_instr_type_alu:
|
case nir_instr_type_alu:
|
||||||
case nir_instr_type_tex:
|
case nir_instr_type_tex:
|
||||||
case nir_instr_type_deref:
|
case nir_instr_type_deref:
|
||||||
return nir_foreach_src(instr, defined_before_loop, &loop_preheader_idx);
|
case nir_instr_type_intrinsic: {
|
||||||
|
/* An instruction can be hoisted if it either dominates the exit (i.e.
|
||||||
|
* it always executes) and is reorderable, or is speculatable.
|
||||||
|
*/
|
||||||
|
if (state->current_block_dominates_exit) {
|
||||||
|
if (instr->type == nir_instr_type_intrinsic &&
|
||||||
|
!nir_intrinsic_can_reorder(nir_instr_as_intrinsic(instr)))
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
if (!nir_instr_can_speculate(instr))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
state->num_all_src_bits = 0;
|
||||||
|
|
||||||
|
bool invariant = nir_foreach_src(instr, defined_before_loop, state);
|
||||||
|
if (!invariant)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (state->filter_instr) {
|
||||||
|
nir_def *def = nir_instr_def(instr);
|
||||||
|
|
||||||
|
if (!state->filter_instr(instr, state->current_block_dominates_exit,
|
||||||
|
def->bit_size * def->num_components,
|
||||||
|
state->num_all_src_bits, state->loop))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
case nir_instr_type_phi:
|
case nir_instr_type_phi:
|
||||||
case nir_instr_type_call:
|
case nir_instr_type_call:
|
||||||
|
|
@ -39,13 +78,17 @@ is_instr_loop_invariant(nir_instr *instr, unsigned loop_preheader_idx)
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
visit_block(nir_block *block, nir_block *preheader)
|
visit_block(nir_block *block, licm_state *state)
|
||||||
{
|
{
|
||||||
|
state->current_block_dominates_exit =
|
||||||
|
nir_block_dominates(block, nir_loop_successor_block(state->loop));
|
||||||
|
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
nir_foreach_instr_safe(instr, block) {
|
nir_foreach_instr_safe(instr, block) {
|
||||||
if (is_instr_loop_invariant(instr, preheader->index)) {
|
if (is_instr_loop_invariant(instr, state)) {
|
||||||
nir_instr_remove(instr);
|
nir_instr_remove(instr);
|
||||||
nir_instr_insert_after_block(preheader, instr);
|
nir_instr_insert_after_block(nir_loop_predecessor_block(state->loop),
|
||||||
|
instr);
|
||||||
progress = true;
|
progress = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -80,40 +123,62 @@ should_optimize_loop(nir_loop *loop)
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
visit_cf_list(struct exec_list *list, nir_block *preheader, nir_block *exit)
|
visit_cf_list(struct exec_list *list, licm_state *state)
|
||||||
{
|
{
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
|
|
||||||
foreach_list_typed(nir_cf_node, node, node, list) {
|
foreach_list_typed(nir_cf_node, node, node, list) {
|
||||||
switch (node->type) {
|
switch (node->type) {
|
||||||
case nir_cf_node_block: {
|
case nir_cf_node_block: {
|
||||||
/* By only visiting blocks which dominate the loop exit, we
|
nir_cf_node *next = nir_cf_node_next(node);
|
||||||
* ensure that we don't speculatively hoist any instructions
|
bool optimize_loop = false;
|
||||||
* which otherwise might not be executed.
|
|
||||||
*
|
/* If the next CF node is a loop that we optimize, visit it first
|
||||||
* Note, that the proper check would be whether this block
|
* before visiting its predecessor block, so that any instructions
|
||||||
* postdominates the loop preheader.
|
* hoisted from this (potentially nested) loop are then considered
|
||||||
|
* for hoisting from the outer loop as well. The goal is to hoist
|
||||||
|
* instructions across all levels of nested loops.
|
||||||
*/
|
*/
|
||||||
|
if (next && next->type == nir_cf_node_loop) {
|
||||||
|
nir_loop *inner_loop = nir_cf_node_as_loop(next);
|
||||||
|
optimize_loop = should_optimize_loop(inner_loop);
|
||||||
|
|
||||||
|
if (optimize_loop) {
|
||||||
|
nir_loop *outer_loop = state->loop;
|
||||||
|
|
||||||
|
state->loop = inner_loop;
|
||||||
|
progress |= visit_cf_list(&inner_loop->body, state);
|
||||||
|
progress |= visit_cf_list(&inner_loop->continue_list, state);
|
||||||
|
state->loop = outer_loop;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Visit the block. */
|
||||||
nir_block *block = nir_cf_node_as_block(node);
|
nir_block *block = nir_cf_node_as_block(node);
|
||||||
if (exit && nir_block_dominates(block, exit))
|
if (state->loop)
|
||||||
progress |= visit_block(block, preheader);
|
progress |= visit_block(block, state);
|
||||||
|
|
||||||
|
if (next && next->type == nir_cf_node_loop && !optimize_loop) {
|
||||||
|
nir_loop *loop = nir_cf_node_as_loop(next);
|
||||||
|
|
||||||
|
/* We treat this loop like any other block, so we don't do LICM
|
||||||
|
* from it per se, but if this loop is nested inside another
|
||||||
|
* loop, we still do LICM for the outer loop.
|
||||||
|
*/
|
||||||
|
progress |= visit_cf_list(&loop->body, state);
|
||||||
|
progress |= visit_cf_list(&loop->continue_list, state);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_cf_node_if: {
|
case nir_cf_node_if: {
|
||||||
nir_if *nif = nir_cf_node_as_if(node);
|
nir_if *nif = nir_cf_node_as_if(node);
|
||||||
progress |= visit_cf_list(&nif->then_list, preheader, exit);
|
progress |= visit_cf_list(&nif->then_list, state);
|
||||||
progress |= visit_cf_list(&nif->else_list, preheader, exit);
|
progress |= visit_cf_list(&nif->else_list, state);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_cf_node_loop: {
|
case nir_cf_node_loop:
|
||||||
nir_loop *loop = nir_cf_node_as_loop(node);
|
/* All loops are handled when handling their predecessor block. */
|
||||||
bool opt = should_optimize_loop(loop);
|
|
||||||
nir_block *inner_preheader = opt ? nir_cf_node_cf_tree_prev(node) : preheader;
|
|
||||||
nir_block *inner_exit = opt ? nir_cf_node_cf_tree_next(node) : exit;
|
|
||||||
progress |= visit_cf_list(&loop->body, inner_preheader, inner_exit);
|
|
||||||
progress |= visit_cf_list(&loop->continue_list, inner_preheader, inner_exit);
|
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
case nir_cf_node_function:
|
case nir_cf_node_function:
|
||||||
UNREACHABLE("NIR LICM: Unsupported cf_node type.");
|
UNREACHABLE("NIR LICM: Unsupported cf_node type.");
|
||||||
}
|
}
|
||||||
|
|
@ -123,17 +188,19 @@ visit_cf_list(struct exec_list *list, nir_block *preheader, nir_block *exit)
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
nir_opt_licm(nir_shader *shader)
|
nir_opt_licm(nir_shader *shader, nir_opt_licm_filter_instr_cb filter_instr)
|
||||||
{
|
{
|
||||||
|
licm_state state = {filter_instr};
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
|
|
||||||
nir_foreach_function_impl(impl, shader) {
|
nir_foreach_function_impl(impl, shader) {
|
||||||
nir_metadata_require(impl, nir_metadata_block_index |
|
nir_metadata_require(impl, nir_metadata_block_index |
|
||||||
nir_metadata_dominance);
|
nir_metadata_dominance);
|
||||||
|
|
||||||
bool impl_progress = visit_cf_list(&impl->body, NULL, NULL);
|
state.loop = NULL;
|
||||||
progress |= nir_progress(impl_progress, impl,
|
|
||||||
nir_metadata_block_index | nir_metadata_dominance);
|
progress |= nir_progress(visit_cf_list(&impl->body, &state), impl,
|
||||||
|
nir_metadata_control_flow);
|
||||||
}
|
}
|
||||||
|
|
||||||
return progress;
|
return progress;
|
||||||
|
|
|
||||||
231
src/compiler/nir/tests/opt_licm_tests.cpp
Normal file
231
src/compiler/nir/tests/opt_licm_tests.cpp
Normal file
|
|
@ -0,0 +1,231 @@
|
||||||
|
/* Copyright 2025 Advanced Micro Devices, Inc.
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "nir_test.h"
|
||||||
|
|
||||||
|
class nir_opt_licm_test : public nir_test {
|
||||||
|
protected:
|
||||||
|
nir_opt_licm_test()
|
||||||
|
: nir_test::nir_test("nir_opt_licm_test", MESA_SHADER_COMPUTE)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
nir_loop *loop;
|
||||||
|
nir_block *original_block;
|
||||||
|
nir_def *x, *y, *z, *r;
|
||||||
|
bool expect_progress;
|
||||||
|
bool insert_after_break;
|
||||||
|
|
||||||
|
void test_init();
|
||||||
|
void test_finish(nir_opt_licm_filter_instr_cb filter_instr);
|
||||||
|
};
|
||||||
|
|
||||||
|
void
|
||||||
|
nir_opt_licm_test::test_init()
|
||||||
|
{
|
||||||
|
x = nir_load_global(b, 1, 32, nir_undef(b, 1, 64));
|
||||||
|
y = nir_load_global(b, 1, 32, nir_undef(b, 1, 64));
|
||||||
|
z = nir_load_global(b, 1, 32, nir_undef(b, 1, 64));
|
||||||
|
|
||||||
|
loop = nir_push_loop(b);
|
||||||
|
if (insert_after_break)
|
||||||
|
nir_break_if(b, nir_undef(b, 1, 1));
|
||||||
|
original_block = nir_loop_last_block(loop);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
filter_using_dst_src_bits(nir_instr *instr, bool instr_dominates_exit,
|
||||||
|
unsigned num_dst_bits, unsigned num_all_src_bits,
|
||||||
|
nir_loop *loop)
|
||||||
|
{
|
||||||
|
return num_dst_bits <= num_all_src_bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
nir_opt_licm_test::test_finish(nir_opt_licm_filter_instr_cb filter_instr)
|
||||||
|
{
|
||||||
|
if (!insert_after_break)
|
||||||
|
nir_break_if(b, nir_undef(b, 1, 1));
|
||||||
|
nir_pop_loop(b, loop);
|
||||||
|
nir_validate_shader(b->shader, NULL);
|
||||||
|
|
||||||
|
bool progress = false;
|
||||||
|
NIR_PASS(progress, b->shader, nir_opt_licm, filter_instr);
|
||||||
|
|
||||||
|
if (expect_progress) {
|
||||||
|
ASSERT_TRUE(progress);
|
||||||
|
ASSERT_EQ(nir_def_instr(r)->block, nir_loop_predecessor_block(loop));
|
||||||
|
} else {
|
||||||
|
ASSERT_FALSE(progress);
|
||||||
|
ASSERT_EQ(nir_def_instr(r)->block, original_block);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(nir_opt_licm_test, hoist_alu_unary)
|
||||||
|
{
|
||||||
|
this->insert_after_break = true;
|
||||||
|
this->expect_progress = true;
|
||||||
|
this->test_init();
|
||||||
|
r = nir_ineg(b, x);
|
||||||
|
this->test_finish(filter_using_dst_src_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(nir_opt_licm_test, hoist_alu_binary)
|
||||||
|
{
|
||||||
|
this->insert_after_break = true;
|
||||||
|
this->expect_progress = true;
|
||||||
|
this->test_init();
|
||||||
|
r = nir_iadd(b, x, y);
|
||||||
|
this->test_finish(filter_using_dst_src_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(nir_opt_licm_test, skip_alu_u2u64)
|
||||||
|
{
|
||||||
|
this->insert_after_break = true;
|
||||||
|
this->expect_progress = false;
|
||||||
|
this->test_init();
|
||||||
|
r = nir_u2u64(b, x);
|
||||||
|
|
||||||
|
/* If sizeof(dst) > sizeof(all srcs), the default behavior is not to hoist
|
||||||
|
* because that would increase register usage of the whole loop.
|
||||||
|
*/
|
||||||
|
this->test_finish(filter_using_dst_src_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(nir_opt_licm_test, skip_load_ssbo_no_flags_before_break)
|
||||||
|
{
|
||||||
|
this->insert_after_break = false;
|
||||||
|
this->expect_progress = false;
|
||||||
|
this->test_init();
|
||||||
|
r = nir_load_ssbo(b, 1, 32, x, y);
|
||||||
|
this->test_finish(filter_using_dst_src_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(nir_opt_licm_test, hoist_load_ssbo_reorderable_before_break)
|
||||||
|
{
|
||||||
|
this->insert_after_break = false;
|
||||||
|
this->expect_progress = true;
|
||||||
|
this->test_init();
|
||||||
|
r = nir_load_ssbo(b, 1, 32, x, y);
|
||||||
|
nir_intrinsic_set_access(nir_def_as_intrinsic(r),
|
||||||
|
(gl_access_qualifier)(ACCESS_CAN_REORDER));
|
||||||
|
this->test_finish(filter_using_dst_src_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(nir_opt_licm_test, skip_load_ssbo_reorderable)
|
||||||
|
{
|
||||||
|
this->insert_after_break = true;
|
||||||
|
this->expect_progress = false;
|
||||||
|
this->test_init();
|
||||||
|
r = nir_load_ssbo(b, 1, 32, x, y);
|
||||||
|
nir_intrinsic_set_access(nir_def_as_intrinsic(r),
|
||||||
|
(gl_access_qualifier)(ACCESS_CAN_REORDER));
|
||||||
|
this->test_finish(filter_using_dst_src_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(nir_opt_licm_test, skip_load_ssbo_speculatable)
|
||||||
|
{
|
||||||
|
this->insert_after_break = true;
|
||||||
|
this->expect_progress = false;
|
||||||
|
this->test_init();
|
||||||
|
r = nir_load_ssbo(b, 1, 32, x, y);
|
||||||
|
nir_intrinsic_set_access(nir_def_as_intrinsic(r),
|
||||||
|
(gl_access_qualifier)(ACCESS_CAN_SPECULATE));
|
||||||
|
this->test_finish(filter_using_dst_src_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(nir_opt_licm_test, hoist_load_ssbo_reorderable_speculatable)
|
||||||
|
{
|
||||||
|
this->insert_after_break = true;
|
||||||
|
this->expect_progress = true;
|
||||||
|
this->test_init();
|
||||||
|
r = nir_load_ssbo(b, 1, 32, x, y);
|
||||||
|
nir_intrinsic_set_access(nir_def_as_intrinsic(r),
|
||||||
|
(gl_access_qualifier)(ACCESS_CAN_REORDER |
|
||||||
|
ACCESS_CAN_SPECULATE));
|
||||||
|
this->test_finish(filter_using_dst_src_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(nir_opt_licm_test, hoist_alu_2_nested_loops)
|
||||||
|
{
|
||||||
|
this->insert_after_break = true;
|
||||||
|
this->expect_progress = true;
|
||||||
|
this->test_init();
|
||||||
|
|
||||||
|
nir_loop *nested_loop = nir_push_loop(b);
|
||||||
|
{
|
||||||
|
nir_break_if(b, nir_undef(b, 1, 1));
|
||||||
|
r = nir_ineg(b, x);
|
||||||
|
}
|
||||||
|
nir_pop_loop(b, nested_loop);
|
||||||
|
|
||||||
|
this->test_finish(filter_using_dst_src_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(nir_opt_licm_test, hoist_alu_6_nested_loops)
|
||||||
|
{
|
||||||
|
this->insert_after_break = true;
|
||||||
|
this->expect_progress = true;
|
||||||
|
this->test_init();
|
||||||
|
|
||||||
|
nir_loop *nested_loops[5];
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < ARRAY_SIZE(nested_loops); i++) {
|
||||||
|
nested_loops[i] = nir_push_loop(b);
|
||||||
|
nir_break_if(b, nir_undef(b, 1, 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
r = nir_ineg(b, x);
|
||||||
|
|
||||||
|
for (int i = ARRAY_SIZE(nested_loops) - 1; i >= 0; i--)
|
||||||
|
nir_pop_loop(b, nested_loops[i]);
|
||||||
|
|
||||||
|
this->test_finish(filter_using_dst_src_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(nir_opt_licm_test, skip_tex)
|
||||||
|
{
|
||||||
|
this->insert_after_break = true;
|
||||||
|
this->expect_progress = false;
|
||||||
|
this->test_init();
|
||||||
|
|
||||||
|
nir_tex_builder fields = {0};
|
||||||
|
fields.coord = x;
|
||||||
|
fields.texture_handle = y;
|
||||||
|
fields.dest_type = nir_type_uint32;
|
||||||
|
|
||||||
|
r = nir_build_tex_struct(b, nir_texop_tex, fields);
|
||||||
|
this->test_finish(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(nir_opt_licm_test, hoist_tex_before_break)
|
||||||
|
{
|
||||||
|
this->insert_after_break = false;
|
||||||
|
this->expect_progress = true;
|
||||||
|
this->test_init();
|
||||||
|
|
||||||
|
nir_tex_builder fields = {0};
|
||||||
|
fields.coord = x;
|
||||||
|
fields.texture_handle = y;
|
||||||
|
fields.dest_type = nir_type_uint32;
|
||||||
|
|
||||||
|
r = nir_build_tex_struct(b, nir_texop_tex, fields);
|
||||||
|
this->test_finish(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(nir_opt_licm_test, hoist_tex_speculatable)
|
||||||
|
{
|
||||||
|
this->insert_after_break = true;
|
||||||
|
this->expect_progress = true;
|
||||||
|
this->test_init();
|
||||||
|
|
||||||
|
nir_tex_builder fields = {0};
|
||||||
|
fields.coord = x;
|
||||||
|
fields.texture_handle = y;
|
||||||
|
fields.can_speculate = true;
|
||||||
|
fields.dest_type = nir_type_uint32;
|
||||||
|
|
||||||
|
r = nir_build_tex_struct(b, nir_texop_tex, fields);
|
||||||
|
this->test_finish(NULL);
|
||||||
|
}
|
||||||
|
|
@ -786,6 +786,14 @@ static bool robustness_filter(const nir_intrinsic_instr *intr,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
opt_licm_filter_instr_cb(nir_instr *instr, bool instr_dominates_exit,
|
||||||
|
unsigned num_dst_bits, unsigned num_all_src_bits,
|
||||||
|
nir_loop *loop)
|
||||||
|
{
|
||||||
|
return instr_dominates_exit;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Lowers a NIR shader.
|
* \brief Lowers a NIR shader.
|
||||||
*
|
*
|
||||||
|
|
@ -806,7 +814,7 @@ void pco_lower_nir(pco_ctx *ctx, nir_shader *nir, pco_data *data)
|
||||||
|
|
||||||
NIR_PASS(_, nir, nir_lower_memory_model);
|
NIR_PASS(_, nir, nir_lower_memory_model);
|
||||||
|
|
||||||
NIR_PASS(_, nir, nir_opt_licm);
|
NIR_PASS(_, nir, nir_opt_licm, opt_licm_filter_instr_cb);
|
||||||
|
|
||||||
NIR_PASS(_, nir, nir_lower_memcpy);
|
NIR_PASS(_, nir, nir_lower_memcpy);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue