mesa/src/compiler/nir/tests/vars_tests.cpp

727 lines
23 KiB
C++
Raw Normal View History

/*
* Copyright © 2018 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <gtest/gtest.h>
#include "nir.h"
#include "nir_builder.h"
namespace {
class nir_vars_test : public ::testing::Test {
protected:
nir_vars_test();
~nir_vars_test();
nir_variable *create_int(nir_variable_mode mode, const char *name) {
if (mode == nir_var_function_temp)
return nir_local_variable_create(b->impl, glsl_int_type(), name);
return nir_variable_create(b->shader, mode, glsl_int_type(), name);
}
nir_variable *create_ivec2(nir_variable_mode mode, const char *name) {
const glsl_type *var_type = glsl_vector_type(GLSL_TYPE_INT, 2);
if (mode == nir_var_function_temp)
return nir_local_variable_create(b->impl, var_type, name);
return nir_variable_create(b->shader, mode, var_type, name);
}
nir_variable **create_many_int(nir_variable_mode mode, const char *prefix, unsigned count) {
nir_variable **result = (nir_variable **)linear_alloc_child(lin_ctx, sizeof(nir_variable *) * count);
for (unsigned i = 0; i < count; i++)
result[i] = create_int(mode, linear_asprintf(lin_ctx, "%s%u", prefix, i));
return result;
}
nir_variable **create_many_ivec2(nir_variable_mode mode, const char *prefix, unsigned count) {
nir_variable **result = (nir_variable **)linear_alloc_child(lin_ctx, sizeof(nir_variable *) * count);
for (unsigned i = 0; i < count; i++)
result[i] = create_ivec2(mode, linear_asprintf(lin_ctx, "%s%u", prefix, i));
return result;
}
unsigned count_intrinsics(nir_intrinsic_op intrinsic);
nir_intrinsic_instr *find_next_intrinsic(nir_intrinsic_op intrinsic,
nir_intrinsic_instr *after);
void *mem_ctx;
void *lin_ctx;
nir_builder *b;
};
nir_vars_test::nir_vars_test()
{
mem_ctx = ralloc_context(NULL);
lin_ctx = linear_alloc_parent(mem_ctx, 0);
static const nir_shader_compiler_options options = { };
b = rzalloc(mem_ctx, nir_builder);
nir_builder_init_simple_shader(b, mem_ctx, MESA_SHADER_FRAGMENT, &options);
}
nir_vars_test::~nir_vars_test()
{
if (HasFailure()) {
printf("\nShader from the failed test:\n\n");
nir_print_shader(b->shader, stdout);
}
ralloc_free(mem_ctx);
}
unsigned
nir_vars_test::count_intrinsics(nir_intrinsic_op intrinsic)
{
unsigned count = 0;
nir_foreach_block(block, b->impl) {
nir_foreach_instr(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic == intrinsic)
count++;
}
}
return count;
}
nir_intrinsic_instr *
nir_vars_test::find_next_intrinsic(nir_intrinsic_op intrinsic,
nir_intrinsic_instr *after)
{
bool seen = after == NULL;
nir_foreach_block(block, b->impl) {
/* Skip blocks before the 'after' instruction. */
if (!seen && block != after->instr.block)
continue;
nir_foreach_instr(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (!seen) {
seen = (after == intrin);
continue;
}
if (intrin->intrinsic == intrinsic)
return intrin;
}
}
return NULL;
}
/* Allow grouping the tests while still sharing the helpers. */
class nir_redundant_load_vars_test : public nir_vars_test {};
class nir_copy_prop_vars_test : public nir_vars_test {};
class nir_dead_write_vars_test : public nir_vars_test {};
} // namespace
TEST_F(nir_redundant_load_vars_test, duplicated_load)
{
/* Load a variable twice in the same block. One should be removed. */
nir_variable *in = create_int(nir_var_shader_in, "in");
nir_variable **out = create_many_int(nir_var_shader_out, "out", 2);
nir_store_var(b, out[0], nir_load_var(b, in), 1);
nir_store_var(b, out[1], nir_load_var(b, in), 1);
nir_validate_shader(b->shader, NULL);
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
bool progress = nir_opt_copy_prop_vars(b->shader);
EXPECT_TRUE(progress);
nir_validate_shader(b->shader, NULL);
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
}
nir: Copy propagation between blocks Extend the pass to propagate the copies information along the control flow graph. It performs two walks, first it collects the vars that were written inside each node. Then it walks applying the copy propagation using a list of copies previously available. At each node the list is invalidated according to results from the first walk. This approach is simpler than a full data-flow analysis, but covers various cases. If derefs are used for operating on more memory resources (e.g. SSBOs), the difference from a regular pass is expected to be more visible -- as the SSA copy propagation pass won't apply to those. A full data-flow analysis would handle more scenarios: conditional breaks in the control flow and merge equivalent effects from multiple branches (e.g. using a phi node to merge the source for writes to the same deref). However, as previous commentary in the code stated, its complexity 'rapidly get out of hand'. The current patch is a good intermediate step towards more complex analysis. The 'copies' linked list was modified to use util_dynarray to make it more convenient to clone it (to handle ifs/loops). Annotated shader-db results for Skylake: total instructions in shared programs: 15105796 -> 15105451 (<.01%) instructions in affected programs: 152293 -> 151948 (-0.23%) helped: 96 HURT: 17 All the HURTs and many HELPs are one instruction. Looking at pass by pass outputs, the copy prop kicks in removing a bunch of loads correctly, which ends up altering what other other optimizations kick. In those cases the copies would be propagated after lowering to SSA. In few HELPs we are actually helping doing more than was possible previously, e.g. consolidating load_uniforms from different blocks. Most of those are from shaders/dolphin/ubershaders/. total cycles in shared programs: 566048861 -> 565954876 (-0.02%) cycles in affected programs: 151461830 -> 151367845 (-0.06%) helped: 2933 HURT: 2950 A lot of noise on both sides. total loops in shared programs: 4603 -> 4603 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 11085 -> 11073 (-0.11%) spills in affected programs: 23 -> 11 (-52.17%) helped: 1 HURT: 0 The shaders/dolphin/ubershaders/12.shader_test was able to pull a couple of loads from inside if statements and reuse them. total fills in shared programs: 23143 -> 23089 (-0.23%) fills in affected programs: 2718 -> 2664 (-1.99%) helped: 27 HURT: 0 All from shaders/dolphin/ubershaders/. LOST: 0 GAINED: 0 The other generations follow the same overall shape. The spills and fills HURTs are all from the same game. shader-db results for Broadwell. total instructions in shared programs: 15402037 -> 15401841 (<.01%) instructions in affected programs: 144386 -> 144190 (-0.14%) helped: 86 HURT: 9 total cycles in shared programs: 600912755 -> 600902486 (<.01%) cycles in affected programs: 185662820 -> 185652551 (<.01%) helped: 2598 HURT: 3053 total loops in shared programs: 4579 -> 4579 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 80929 -> 80924 (<.01%) spills in affected programs: 720 -> 715 (-0.69%) helped: 1 HURT: 5 total fills in shared programs: 93057 -> 93013 (-0.05%) fills in affected programs: 3398 -> 3354 (-1.29%) helped: 27 HURT: 5 LOST: 0 GAINED: 2 shader-db results for Haswell: total instructions in shared programs: 9231975 -> 9230357 (-0.02%) instructions in affected programs: 44992 -> 43374 (-3.60%) helped: 27 HURT: 69 total cycles in shared programs: 87760587 -> 87727502 (-0.04%) cycles in affected programs: 7720673 -> 7687588 (-0.43%) helped: 1609 HURT: 1416 total loops in shared programs: 1830 -> 1830 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 1988 -> 1692 (-14.89%) spills in affected programs: 296 -> 0 helped: 1 HURT: 0 total fills in shared programs: 2103 -> 1668 (-20.68%) fills in affected programs: 438 -> 3 (-99.32%) helped: 4 HURT: 0 LOST: 0 GAINED: 1 v2: Remove the DISABLE prefix from tests we now pass. v3: Add comments about missing write_mask handling. (Caio) Add unreachable when switching on cf_node type. (Jason) Properly merge the component information in written map instead of replacing. (Jason) Explain how removal from written arrays works. (Jason) Use mode directly from deref instead of getting the var. (Jason) v4: Register the local written mode for calls. (Jason) Prefer cf_node instead of node. (Jason) Clarify that remove inside iteration only works in backward iterations. (Jason) Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2018-09-14 11:41:39 -07:00
TEST_F(nir_redundant_load_vars_test, duplicated_load_in_two_blocks)
{
/* Load a variable twice in different blocks. One should be removed. */
nir_variable *in = create_int(nir_var_shader_in, "in");
nir_variable **out = create_many_int(nir_var_shader_out, "out", 2);
nir_store_var(b, out[0], nir_load_var(b, in), 1);
/* Forces the stores to be in different blocks. */
nir_pop_if(b, nir_push_if(b, nir_imm_int(b, 0)));
nir_store_var(b, out[1], nir_load_var(b, in), 1);
nir_validate_shader(b->shader, NULL);
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
bool progress = nir_opt_copy_prop_vars(b->shader);
EXPECT_TRUE(progress);
nir_validate_shader(b->shader, NULL);
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
}
nir: Copy propagation between blocks Extend the pass to propagate the copies information along the control flow graph. It performs two walks, first it collects the vars that were written inside each node. Then it walks applying the copy propagation using a list of copies previously available. At each node the list is invalidated according to results from the first walk. This approach is simpler than a full data-flow analysis, but covers various cases. If derefs are used for operating on more memory resources (e.g. SSBOs), the difference from a regular pass is expected to be more visible -- as the SSA copy propagation pass won't apply to those. A full data-flow analysis would handle more scenarios: conditional breaks in the control flow and merge equivalent effects from multiple branches (e.g. using a phi node to merge the source for writes to the same deref). However, as previous commentary in the code stated, its complexity 'rapidly get out of hand'. The current patch is a good intermediate step towards more complex analysis. The 'copies' linked list was modified to use util_dynarray to make it more convenient to clone it (to handle ifs/loops). Annotated shader-db results for Skylake: total instructions in shared programs: 15105796 -> 15105451 (<.01%) instructions in affected programs: 152293 -> 151948 (-0.23%) helped: 96 HURT: 17 All the HURTs and many HELPs are one instruction. Looking at pass by pass outputs, the copy prop kicks in removing a bunch of loads correctly, which ends up altering what other other optimizations kick. In those cases the copies would be propagated after lowering to SSA. In few HELPs we are actually helping doing more than was possible previously, e.g. consolidating load_uniforms from different blocks. Most of those are from shaders/dolphin/ubershaders/. total cycles in shared programs: 566048861 -> 565954876 (-0.02%) cycles in affected programs: 151461830 -> 151367845 (-0.06%) helped: 2933 HURT: 2950 A lot of noise on both sides. total loops in shared programs: 4603 -> 4603 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 11085 -> 11073 (-0.11%) spills in affected programs: 23 -> 11 (-52.17%) helped: 1 HURT: 0 The shaders/dolphin/ubershaders/12.shader_test was able to pull a couple of loads from inside if statements and reuse them. total fills in shared programs: 23143 -> 23089 (-0.23%) fills in affected programs: 2718 -> 2664 (-1.99%) helped: 27 HURT: 0 All from shaders/dolphin/ubershaders/. LOST: 0 GAINED: 0 The other generations follow the same overall shape. The spills and fills HURTs are all from the same game. shader-db results for Broadwell. total instructions in shared programs: 15402037 -> 15401841 (<.01%) instructions in affected programs: 144386 -> 144190 (-0.14%) helped: 86 HURT: 9 total cycles in shared programs: 600912755 -> 600902486 (<.01%) cycles in affected programs: 185662820 -> 185652551 (<.01%) helped: 2598 HURT: 3053 total loops in shared programs: 4579 -> 4579 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 80929 -> 80924 (<.01%) spills in affected programs: 720 -> 715 (-0.69%) helped: 1 HURT: 5 total fills in shared programs: 93057 -> 93013 (-0.05%) fills in affected programs: 3398 -> 3354 (-1.29%) helped: 27 HURT: 5 LOST: 0 GAINED: 2 shader-db results for Haswell: total instructions in shared programs: 9231975 -> 9230357 (-0.02%) instructions in affected programs: 44992 -> 43374 (-3.60%) helped: 27 HURT: 69 total cycles in shared programs: 87760587 -> 87727502 (-0.04%) cycles in affected programs: 7720673 -> 7687588 (-0.43%) helped: 1609 HURT: 1416 total loops in shared programs: 1830 -> 1830 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 1988 -> 1692 (-14.89%) spills in affected programs: 296 -> 0 helped: 1 HURT: 0 total fills in shared programs: 2103 -> 1668 (-20.68%) fills in affected programs: 438 -> 3 (-99.32%) helped: 4 HURT: 0 LOST: 0 GAINED: 1 v2: Remove the DISABLE prefix from tests we now pass. v3: Add comments about missing write_mask handling. (Caio) Add unreachable when switching on cf_node type. (Jason) Properly merge the component information in written map instead of replacing. (Jason) Explain how removal from written arrays works. (Jason) Use mode directly from deref instead of getting the var. (Jason) v4: Register the local written mode for calls. (Jason) Prefer cf_node instead of node. (Jason) Clarify that remove inside iteration only works in backward iterations. (Jason) Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2018-09-14 11:41:39 -07:00
TEST_F(nir_redundant_load_vars_test, invalidate_inside_if_block)
{
/* Load variables, then write to some of then in different branches of the
* if statement. They should be invalidated accordingly.
*/
nir_variable **g = create_many_int(nir_var_shader_temp, "g", 3);
nir_variable **out = create_many_int(nir_var_shader_out, "out", 3);
nir_load_var(b, g[0]);
nir_load_var(b, g[1]);
nir_load_var(b, g[2]);
nir_if *if_stmt = nir_push_if(b, nir_imm_int(b, 0));
nir_store_var(b, g[0], nir_imm_int(b, 10), 1);
nir_push_else(b, if_stmt);
nir_store_var(b, g[1], nir_imm_int(b, 20), 1);
nir_pop_if(b, if_stmt);
nir_store_var(b, out[0], nir_load_var(b, g[0]), 1);
nir_store_var(b, out[1], nir_load_var(b, g[1]), 1);
nir_store_var(b, out[2], nir_load_var(b, g[2]), 1);
nir_validate_shader(b->shader, NULL);
bool progress = nir_opt_copy_prop_vars(b->shader);
EXPECT_TRUE(progress);
/* There are 3 initial loads, plus 2 loads for the values invalidated
* inside the if statement.
*/
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 5);
/* We only load g[2] once. */
unsigned g2_load_count = 0;
nir_intrinsic_instr *load = NULL;
for (int i = 0; i < 5; i++) {
load = find_next_intrinsic(nir_intrinsic_load_deref, load);
if (nir_intrinsic_get_var(load, 0) == g[2])
g2_load_count++;
}
EXPECT_EQ(g2_load_count, 1);
}
TEST_F(nir_redundant_load_vars_test, invalidate_live_load_in_the_end_of_loop)
{
/* Invalidating a load in the end of loop body will apply to the whole loop
* body.
*/
nir_variable *v = create_int(nir_var_mem_ssbo, "v");
nir_load_var(b, v);
nir_loop *loop = nir_push_loop(b);
nir_if *if_stmt = nir_push_if(b, nir_imm_int(b, 0));
nir_jump(b, nir_jump_break);
nir_pop_if(b, if_stmt);
nir_load_var(b, v);
nir_store_var(b, v, nir_imm_int(b, 10), 1);
nir_pop_loop(b, loop);
bool progress = nir_opt_copy_prop_vars(b->shader);
ASSERT_FALSE(progress);
}
TEST_F(nir_copy_prop_vars_test, simple_copies)
{
nir_variable *in = create_int(nir_var_shader_in, "in");
nir_variable *temp = create_int(nir_var_function_temp, "temp");
nir_variable *out = create_int(nir_var_shader_out, "out");
nir_copy_var(b, temp, in);
nir_copy_var(b, out, temp);
nir_validate_shader(b->shader, NULL);
bool progress = nir_opt_copy_prop_vars(b->shader);
EXPECT_TRUE(progress);
nir_validate_shader(b->shader, NULL);
nir_intrinsic_instr *copy = NULL;
copy = find_next_intrinsic(nir_intrinsic_copy_deref, copy);
ASSERT_TRUE(copy->src[1].is_ssa);
nir_ssa_def *first_src = copy->src[1].ssa;
copy = find_next_intrinsic(nir_intrinsic_copy_deref, copy);
ASSERT_TRUE(copy->src[1].is_ssa);
EXPECT_EQ(copy->src[1].ssa, first_src);
}
TEST_F(nir_copy_prop_vars_test, simple_store_load)
{
nir_variable **v = create_many_ivec2(nir_var_function_temp, "v", 2);
unsigned mask = 1 | 2;
nir_ssa_def *stored_value = nir_imm_ivec2(b, 10, 20);
nir_store_var(b, v[0], stored_value, mask);
nir_ssa_def *read_value = nir_load_var(b, v[0]);
nir_store_var(b, v[1], read_value, mask);
nir_validate_shader(b->shader, NULL);
bool progress = nir_opt_copy_prop_vars(b->shader);
EXPECT_TRUE(progress);
nir_validate_shader(b->shader, NULL);
ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 2);
nir_intrinsic_instr *store = NULL;
for (int i = 0; i < 2; i++) {
store = find_next_intrinsic(nir_intrinsic_store_deref, store);
ASSERT_TRUE(store->src[1].is_ssa);
EXPECT_EQ(store->src[1].ssa, stored_value);
}
}
TEST_F(nir_copy_prop_vars_test, store_store_load)
{
nir_variable **v = create_many_ivec2(nir_var_function_temp, "v", 2);
unsigned mask = 1 | 2;
nir_ssa_def *first_value = nir_imm_ivec2(b, 10, 20);
nir_store_var(b, v[0], first_value, mask);
nir_ssa_def *second_value = nir_imm_ivec2(b, 30, 40);
nir_store_var(b, v[0], second_value, mask);
nir_ssa_def *read_value = nir_load_var(b, v[0]);
nir_store_var(b, v[1], read_value, mask);
nir_validate_shader(b->shader, NULL);
bool progress = nir_opt_copy_prop_vars(b->shader);
EXPECT_TRUE(progress);
nir_validate_shader(b->shader, NULL);
/* Store to v[1] should use second_value directly. */
nir_intrinsic_instr *store_to_v1 = NULL;
while ((store_to_v1 = find_next_intrinsic(nir_intrinsic_store_deref, store_to_v1)) != NULL) {
if (nir_intrinsic_get_var(store_to_v1, 0) == v[1]) {
ASSERT_TRUE(store_to_v1->src[1].is_ssa);
EXPECT_EQ(store_to_v1->src[1].ssa, second_value);
break;
}
}
EXPECT_TRUE(store_to_v1);
}
TEST_F(nir_copy_prop_vars_test, store_store_load_different_components)
{
nir_variable **v = create_many_ivec2(nir_var_function_temp, "v", 2);
nir_ssa_def *first_value = nir_imm_ivec2(b, 10, 20);
nir_store_var(b, v[0], first_value, 1 << 1);
nir_ssa_def *second_value = nir_imm_ivec2(b, 30, 40);
nir_store_var(b, v[0], second_value, 1 << 0);
nir_ssa_def *read_value = nir_load_var(b, v[0]);
nir_store_var(b, v[1], read_value, 1 << 1);
nir_validate_shader(b->shader, NULL);
bool progress = nir_opt_copy_prop_vars(b->shader);
EXPECT_TRUE(progress);
nir_validate_shader(b->shader, NULL);
nir_opt_constant_folding(b->shader);
nir_validate_shader(b->shader, NULL);
/* Store to v[1] should use first_value directly. The write of
* second_value did not overwrite the component it uses.
*/
nir_intrinsic_instr *store_to_v1 = NULL;
while ((store_to_v1 = find_next_intrinsic(nir_intrinsic_store_deref, store_to_v1)) != NULL) {
if (nir_intrinsic_get_var(store_to_v1, 0) == v[1]) {
ASSERT_TRUE(store_to_v1->src[1].is_ssa);
ASSERT_TRUE(nir_src_is_const(store_to_v1->src[1]));
ASSERT_EQ(nir_src_comp_as_uint(store_to_v1->src[1], 1), 20);
break;
}
}
EXPECT_TRUE(store_to_v1);
}
nir: Copy propagation between blocks Extend the pass to propagate the copies information along the control flow graph. It performs two walks, first it collects the vars that were written inside each node. Then it walks applying the copy propagation using a list of copies previously available. At each node the list is invalidated according to results from the first walk. This approach is simpler than a full data-flow analysis, but covers various cases. If derefs are used for operating on more memory resources (e.g. SSBOs), the difference from a regular pass is expected to be more visible -- as the SSA copy propagation pass won't apply to those. A full data-flow analysis would handle more scenarios: conditional breaks in the control flow and merge equivalent effects from multiple branches (e.g. using a phi node to merge the source for writes to the same deref). However, as previous commentary in the code stated, its complexity 'rapidly get out of hand'. The current patch is a good intermediate step towards more complex analysis. The 'copies' linked list was modified to use util_dynarray to make it more convenient to clone it (to handle ifs/loops). Annotated shader-db results for Skylake: total instructions in shared programs: 15105796 -> 15105451 (<.01%) instructions in affected programs: 152293 -> 151948 (-0.23%) helped: 96 HURT: 17 All the HURTs and many HELPs are one instruction. Looking at pass by pass outputs, the copy prop kicks in removing a bunch of loads correctly, which ends up altering what other other optimizations kick. In those cases the copies would be propagated after lowering to SSA. In few HELPs we are actually helping doing more than was possible previously, e.g. consolidating load_uniforms from different blocks. Most of those are from shaders/dolphin/ubershaders/. total cycles in shared programs: 566048861 -> 565954876 (-0.02%) cycles in affected programs: 151461830 -> 151367845 (-0.06%) helped: 2933 HURT: 2950 A lot of noise on both sides. total loops in shared programs: 4603 -> 4603 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 11085 -> 11073 (-0.11%) spills in affected programs: 23 -> 11 (-52.17%) helped: 1 HURT: 0 The shaders/dolphin/ubershaders/12.shader_test was able to pull a couple of loads from inside if statements and reuse them. total fills in shared programs: 23143 -> 23089 (-0.23%) fills in affected programs: 2718 -> 2664 (-1.99%) helped: 27 HURT: 0 All from shaders/dolphin/ubershaders/. LOST: 0 GAINED: 0 The other generations follow the same overall shape. The spills and fills HURTs are all from the same game. shader-db results for Broadwell. total instructions in shared programs: 15402037 -> 15401841 (<.01%) instructions in affected programs: 144386 -> 144190 (-0.14%) helped: 86 HURT: 9 total cycles in shared programs: 600912755 -> 600902486 (<.01%) cycles in affected programs: 185662820 -> 185652551 (<.01%) helped: 2598 HURT: 3053 total loops in shared programs: 4579 -> 4579 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 80929 -> 80924 (<.01%) spills in affected programs: 720 -> 715 (-0.69%) helped: 1 HURT: 5 total fills in shared programs: 93057 -> 93013 (-0.05%) fills in affected programs: 3398 -> 3354 (-1.29%) helped: 27 HURT: 5 LOST: 0 GAINED: 2 shader-db results for Haswell: total instructions in shared programs: 9231975 -> 9230357 (-0.02%) instructions in affected programs: 44992 -> 43374 (-3.60%) helped: 27 HURT: 69 total cycles in shared programs: 87760587 -> 87727502 (-0.04%) cycles in affected programs: 7720673 -> 7687588 (-0.43%) helped: 1609 HURT: 1416 total loops in shared programs: 1830 -> 1830 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 1988 -> 1692 (-14.89%) spills in affected programs: 296 -> 0 helped: 1 HURT: 0 total fills in shared programs: 2103 -> 1668 (-20.68%) fills in affected programs: 438 -> 3 (-99.32%) helped: 4 HURT: 0 LOST: 0 GAINED: 1 v2: Remove the DISABLE prefix from tests we now pass. v3: Add comments about missing write_mask handling. (Caio) Add unreachable when switching on cf_node type. (Jason) Properly merge the component information in written map instead of replacing. (Jason) Explain how removal from written arrays works. (Jason) Use mode directly from deref instead of getting the var. (Jason) v4: Register the local written mode for calls. (Jason) Prefer cf_node instead of node. (Jason) Clarify that remove inside iteration only works in backward iterations. (Jason) Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2018-09-14 11:41:39 -07:00
TEST_F(nir_copy_prop_vars_test, store_store_load_different_components_in_many_blocks)
{
nir_variable **v = create_many_ivec2(nir_var_function_temp, "v", 2);
nir_ssa_def *first_value = nir_imm_ivec2(b, 10, 20);
nir_store_var(b, v[0], first_value, 1 << 1);
/* Adding an if statement will cause blocks to be created. */
nir_pop_if(b, nir_push_if(b, nir_imm_int(b, 0)));
nir_ssa_def *second_value = nir_imm_ivec2(b, 30, 40);
nir_store_var(b, v[0], second_value, 1 << 0);
/* Adding an if statement will cause blocks to be created. */
nir_pop_if(b, nir_push_if(b, nir_imm_int(b, 0)));
nir_ssa_def *read_value = nir_load_var(b, v[0]);
nir_store_var(b, v[1], read_value, 1 << 1);
nir_validate_shader(b->shader, NULL);
nir_print_shader(b->shader, stdout);
bool progress = nir_opt_copy_prop_vars(b->shader);
EXPECT_TRUE(progress);
nir_print_shader(b->shader, stdout);
nir_validate_shader(b->shader, NULL);
nir_opt_constant_folding(b->shader);
nir_validate_shader(b->shader, NULL);
/* Store to v[1] should use first_value directly. The write of
* second_value did not overwrite the component it uses.
*/
nir_intrinsic_instr *store_to_v1 = NULL;
while ((store_to_v1 = find_next_intrinsic(nir_intrinsic_store_deref, store_to_v1)) != NULL) {
if (nir_intrinsic_get_var(store_to_v1, 0) == v[1]) {
ASSERT_TRUE(store_to_v1->src[1].is_ssa);
ASSERT_TRUE(nir_src_is_const(store_to_v1->src[1]));
ASSERT_EQ(nir_src_comp_as_uint(store_to_v1->src[1], 1), 20);
break;
}
}
EXPECT_TRUE(store_to_v1);
}
nir: Copy propagation between blocks Extend the pass to propagate the copies information along the control flow graph. It performs two walks, first it collects the vars that were written inside each node. Then it walks applying the copy propagation using a list of copies previously available. At each node the list is invalidated according to results from the first walk. This approach is simpler than a full data-flow analysis, but covers various cases. If derefs are used for operating on more memory resources (e.g. SSBOs), the difference from a regular pass is expected to be more visible -- as the SSA copy propagation pass won't apply to those. A full data-flow analysis would handle more scenarios: conditional breaks in the control flow and merge equivalent effects from multiple branches (e.g. using a phi node to merge the source for writes to the same deref). However, as previous commentary in the code stated, its complexity 'rapidly get out of hand'. The current patch is a good intermediate step towards more complex analysis. The 'copies' linked list was modified to use util_dynarray to make it more convenient to clone it (to handle ifs/loops). Annotated shader-db results for Skylake: total instructions in shared programs: 15105796 -> 15105451 (<.01%) instructions in affected programs: 152293 -> 151948 (-0.23%) helped: 96 HURT: 17 All the HURTs and many HELPs are one instruction. Looking at pass by pass outputs, the copy prop kicks in removing a bunch of loads correctly, which ends up altering what other other optimizations kick. In those cases the copies would be propagated after lowering to SSA. In few HELPs we are actually helping doing more than was possible previously, e.g. consolidating load_uniforms from different blocks. Most of those are from shaders/dolphin/ubershaders/. total cycles in shared programs: 566048861 -> 565954876 (-0.02%) cycles in affected programs: 151461830 -> 151367845 (-0.06%) helped: 2933 HURT: 2950 A lot of noise on both sides. total loops in shared programs: 4603 -> 4603 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 11085 -> 11073 (-0.11%) spills in affected programs: 23 -> 11 (-52.17%) helped: 1 HURT: 0 The shaders/dolphin/ubershaders/12.shader_test was able to pull a couple of loads from inside if statements and reuse them. total fills in shared programs: 23143 -> 23089 (-0.23%) fills in affected programs: 2718 -> 2664 (-1.99%) helped: 27 HURT: 0 All from shaders/dolphin/ubershaders/. LOST: 0 GAINED: 0 The other generations follow the same overall shape. The spills and fills HURTs are all from the same game. shader-db results for Broadwell. total instructions in shared programs: 15402037 -> 15401841 (<.01%) instructions in affected programs: 144386 -> 144190 (-0.14%) helped: 86 HURT: 9 total cycles in shared programs: 600912755 -> 600902486 (<.01%) cycles in affected programs: 185662820 -> 185652551 (<.01%) helped: 2598 HURT: 3053 total loops in shared programs: 4579 -> 4579 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 80929 -> 80924 (<.01%) spills in affected programs: 720 -> 715 (-0.69%) helped: 1 HURT: 5 total fills in shared programs: 93057 -> 93013 (-0.05%) fills in affected programs: 3398 -> 3354 (-1.29%) helped: 27 HURT: 5 LOST: 0 GAINED: 2 shader-db results for Haswell: total instructions in shared programs: 9231975 -> 9230357 (-0.02%) instructions in affected programs: 44992 -> 43374 (-3.60%) helped: 27 HURT: 69 total cycles in shared programs: 87760587 -> 87727502 (-0.04%) cycles in affected programs: 7720673 -> 7687588 (-0.43%) helped: 1609 HURT: 1416 total loops in shared programs: 1830 -> 1830 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 1988 -> 1692 (-14.89%) spills in affected programs: 296 -> 0 helped: 1 HURT: 0 total fills in shared programs: 2103 -> 1668 (-20.68%) fills in affected programs: 438 -> 3 (-99.32%) helped: 4 HURT: 0 LOST: 0 GAINED: 1 v2: Remove the DISABLE prefix from tests we now pass. v3: Add comments about missing write_mask handling. (Caio) Add unreachable when switching on cf_node type. (Jason) Properly merge the component information in written map instead of replacing. (Jason) Explain how removal from written arrays works. (Jason) Use mode directly from deref instead of getting the var. (Jason) v4: Register the local written mode for calls. (Jason) Prefer cf_node instead of node. (Jason) Clarify that remove inside iteration only works in backward iterations. (Jason) Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2018-09-14 11:41:39 -07:00
TEST_F(nir_copy_prop_vars_test, memory_barrier_in_two_blocks)
{
nir_variable **v = create_many_int(nir_var_mem_ssbo, "v", 4);
nir_store_var(b, v[0], nir_imm_int(b, 1), 1);
nir_store_var(b, v[1], nir_imm_int(b, 2), 1);
/* Split into many blocks. */
nir_pop_if(b, nir_push_if(b, nir_imm_int(b, 0)));
nir_store_var(b, v[2], nir_load_var(b, v[0]), 1);
nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_memory_barrier)->instr);
nir_store_var(b, v[3], nir_load_var(b, v[1]), 1);
bool progress = nir_opt_copy_prop_vars(b->shader);
ASSERT_TRUE(progress);
/* Only the second load will remain after the optimization. */
ASSERT_EQ(1, count_intrinsics(nir_intrinsic_load_deref));
nir_intrinsic_instr *load = NULL;
load = find_next_intrinsic(nir_intrinsic_load_deref, load);
ASSERT_EQ(nir_intrinsic_get_var(load, 0), v[1]);
}
nir: Copy propagation between blocks Extend the pass to propagate the copies information along the control flow graph. It performs two walks, first it collects the vars that were written inside each node. Then it walks applying the copy propagation using a list of copies previously available. At each node the list is invalidated according to results from the first walk. This approach is simpler than a full data-flow analysis, but covers various cases. If derefs are used for operating on more memory resources (e.g. SSBOs), the difference from a regular pass is expected to be more visible -- as the SSA copy propagation pass won't apply to those. A full data-flow analysis would handle more scenarios: conditional breaks in the control flow and merge equivalent effects from multiple branches (e.g. using a phi node to merge the source for writes to the same deref). However, as previous commentary in the code stated, its complexity 'rapidly get out of hand'. The current patch is a good intermediate step towards more complex analysis. The 'copies' linked list was modified to use util_dynarray to make it more convenient to clone it (to handle ifs/loops). Annotated shader-db results for Skylake: total instructions in shared programs: 15105796 -> 15105451 (<.01%) instructions in affected programs: 152293 -> 151948 (-0.23%) helped: 96 HURT: 17 All the HURTs and many HELPs are one instruction. Looking at pass by pass outputs, the copy prop kicks in removing a bunch of loads correctly, which ends up altering what other other optimizations kick. In those cases the copies would be propagated after lowering to SSA. In few HELPs we are actually helping doing more than was possible previously, e.g. consolidating load_uniforms from different blocks. Most of those are from shaders/dolphin/ubershaders/. total cycles in shared programs: 566048861 -> 565954876 (-0.02%) cycles in affected programs: 151461830 -> 151367845 (-0.06%) helped: 2933 HURT: 2950 A lot of noise on both sides. total loops in shared programs: 4603 -> 4603 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 11085 -> 11073 (-0.11%) spills in affected programs: 23 -> 11 (-52.17%) helped: 1 HURT: 0 The shaders/dolphin/ubershaders/12.shader_test was able to pull a couple of loads from inside if statements and reuse them. total fills in shared programs: 23143 -> 23089 (-0.23%) fills in affected programs: 2718 -> 2664 (-1.99%) helped: 27 HURT: 0 All from shaders/dolphin/ubershaders/. LOST: 0 GAINED: 0 The other generations follow the same overall shape. The spills and fills HURTs are all from the same game. shader-db results for Broadwell. total instructions in shared programs: 15402037 -> 15401841 (<.01%) instructions in affected programs: 144386 -> 144190 (-0.14%) helped: 86 HURT: 9 total cycles in shared programs: 600912755 -> 600902486 (<.01%) cycles in affected programs: 185662820 -> 185652551 (<.01%) helped: 2598 HURT: 3053 total loops in shared programs: 4579 -> 4579 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 80929 -> 80924 (<.01%) spills in affected programs: 720 -> 715 (-0.69%) helped: 1 HURT: 5 total fills in shared programs: 93057 -> 93013 (-0.05%) fills in affected programs: 3398 -> 3354 (-1.29%) helped: 27 HURT: 5 LOST: 0 GAINED: 2 shader-db results for Haswell: total instructions in shared programs: 9231975 -> 9230357 (-0.02%) instructions in affected programs: 44992 -> 43374 (-3.60%) helped: 27 HURT: 69 total cycles in shared programs: 87760587 -> 87727502 (-0.04%) cycles in affected programs: 7720673 -> 7687588 (-0.43%) helped: 1609 HURT: 1416 total loops in shared programs: 1830 -> 1830 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 1988 -> 1692 (-14.89%) spills in affected programs: 296 -> 0 helped: 1 HURT: 0 total fills in shared programs: 2103 -> 1668 (-20.68%) fills in affected programs: 438 -> 3 (-99.32%) helped: 4 HURT: 0 LOST: 0 GAINED: 1 v2: Remove the DISABLE prefix from tests we now pass. v3: Add comments about missing write_mask handling. (Caio) Add unreachable when switching on cf_node type. (Jason) Properly merge the component information in written map instead of replacing. (Jason) Explain how removal from written arrays works. (Jason) Use mode directly from deref instead of getting the var. (Jason) v4: Register the local written mode for calls. (Jason) Prefer cf_node instead of node. (Jason) Clarify that remove inside iteration only works in backward iterations. (Jason) Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2018-09-14 11:41:39 -07:00
TEST_F(nir_copy_prop_vars_test, simple_store_load_in_two_blocks)
{
nir_variable **v = create_many_ivec2(nir_var_function_temp, "v", 2);
unsigned mask = 1 | 2;
nir_ssa_def *stored_value = nir_imm_ivec2(b, 10, 20);
nir_store_var(b, v[0], stored_value, mask);
/* Adding an if statement will cause blocks to be created. */
nir_pop_if(b, nir_push_if(b, nir_imm_int(b, 0)));
nir_ssa_def *read_value = nir_load_var(b, v[0]);
nir_store_var(b, v[1], read_value, mask);
nir_validate_shader(b->shader, NULL);
bool progress = nir_opt_copy_prop_vars(b->shader);
EXPECT_TRUE(progress);
nir_validate_shader(b->shader, NULL);
ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 2);
nir_intrinsic_instr *store = NULL;
for (int i = 0; i < 2; i++) {
store = find_next_intrinsic(nir_intrinsic_store_deref, store);
ASSERT_TRUE(store->src[1].is_ssa);
EXPECT_EQ(store->src[1].ssa, stored_value);
}
}
TEST_F(nir_dead_write_vars_test, no_dead_writes_in_block)
{
nir_variable **v = create_many_int(nir_var_mem_ssbo, "v", 2);
nir_store_var(b, v[0], nir_load_var(b, v[1]), 1);
bool progress = nir_opt_dead_write_vars(b->shader);
ASSERT_FALSE(progress);
}
TEST_F(nir_dead_write_vars_test, no_dead_writes_different_components_in_block)
{
nir_variable **v = create_many_ivec2(nir_var_mem_ssbo, "v", 3);
nir_store_var(b, v[0], nir_load_var(b, v[1]), 1 << 0);
nir_store_var(b, v[0], nir_load_var(b, v[2]), 1 << 1);
bool progress = nir_opt_dead_write_vars(b->shader);
ASSERT_FALSE(progress);
}
TEST_F(nir_dead_write_vars_test, no_dead_writes_in_if_statement)
{
nir_variable **v = create_many_int(nir_var_mem_ssbo, "v", 6);
nir_store_var(b, v[2], nir_load_var(b, v[0]), 1);
nir_store_var(b, v[3], nir_load_var(b, v[1]), 1);
/* Each arm of the if statement will overwrite one store. */
nir_if *if_stmt = nir_push_if(b, nir_imm_int(b, 0));
nir_store_var(b, v[2], nir_load_var(b, v[4]), 1);
nir_push_else(b, if_stmt);
nir_store_var(b, v[3], nir_load_var(b, v[5]), 1);
nir_pop_if(b, if_stmt);
bool progress = nir_opt_dead_write_vars(b->shader);
ASSERT_FALSE(progress);
}
TEST_F(nir_dead_write_vars_test, no_dead_writes_in_loop_statement)
{
nir_variable **v = create_many_int(nir_var_mem_ssbo, "v", 3);
nir_store_var(b, v[0], nir_load_var(b, v[1]), 1);
/* Loop will write other value. Since it might not be executed, it doesn't
* kill the first write.
*/
nir_loop *loop = nir_push_loop(b);
nir_if *if_stmt = nir_push_if(b, nir_imm_int(b, 0));
nir_jump(b, nir_jump_break);
nir_pop_if(b, if_stmt);
nir_store_var(b, v[0], nir_load_var(b, v[2]), 1);
nir_pop_loop(b, loop);
bool progress = nir_opt_dead_write_vars(b->shader);
ASSERT_FALSE(progress);
}
TEST_F(nir_dead_write_vars_test, dead_write_in_block)
{
nir_variable **v = create_many_int(nir_var_mem_ssbo, "v", 3);
nir_store_var(b, v[0], nir_load_var(b, v[1]), 1);
nir_ssa_def *load_v2 = nir_load_var(b, v[2]);
nir_store_var(b, v[0], load_v2, 1);
bool progress = nir_opt_dead_write_vars(b->shader);
ASSERT_TRUE(progress);
EXPECT_EQ(1, count_intrinsics(nir_intrinsic_store_deref));
nir_intrinsic_instr *store = find_next_intrinsic(nir_intrinsic_store_deref, NULL);
ASSERT_TRUE(store->src[1].is_ssa);
EXPECT_EQ(store->src[1].ssa, load_v2);
}
TEST_F(nir_dead_write_vars_test, dead_write_components_in_block)
{
nir_variable **v = create_many_ivec2(nir_var_mem_ssbo, "v", 3);
nir_store_var(b, v[0], nir_load_var(b, v[1]), 1 << 0);
nir_ssa_def *load_v2 = nir_load_var(b, v[2]);
nir_store_var(b, v[0], load_v2, 1 << 0);
bool progress = nir_opt_dead_write_vars(b->shader);
ASSERT_TRUE(progress);
EXPECT_EQ(1, count_intrinsics(nir_intrinsic_store_deref));
nir_intrinsic_instr *store = find_next_intrinsic(nir_intrinsic_store_deref, NULL);
ASSERT_TRUE(store->src[1].is_ssa);
EXPECT_EQ(store->src[1].ssa, load_v2);
}
/* TODO: The DISABLED tests below depend on the dead write removal be able to
* identify dead writes between multiple blocks. This is still not
* implemented.
*/
TEST_F(nir_dead_write_vars_test, DISABLED_dead_write_in_two_blocks)
{
nir_variable **v = create_many_int(nir_var_mem_ssbo, "v", 3);
nir_store_var(b, v[0], nir_load_var(b, v[1]), 1);
nir_ssa_def *load_v2 = nir_load_var(b, v[2]);
/* Causes the stores to be in different blocks. */
nir_pop_if(b, nir_push_if(b, nir_imm_int(b, 0)));
nir_store_var(b, v[0], load_v2, 1);
bool progress = nir_opt_dead_write_vars(b->shader);
ASSERT_TRUE(progress);
EXPECT_EQ(1, count_intrinsics(nir_intrinsic_store_deref));
nir_intrinsic_instr *store = find_next_intrinsic(nir_intrinsic_store_deref, NULL);
ASSERT_TRUE(store->src[1].is_ssa);
EXPECT_EQ(store->src[1].ssa, load_v2);
}
TEST_F(nir_dead_write_vars_test, DISABLED_dead_write_components_in_two_blocks)
{
nir_variable **v = create_many_ivec2(nir_var_mem_ssbo, "v", 3);
nir_store_var(b, v[0], nir_load_var(b, v[1]), 1 << 0);
/* Causes the stores to be in different blocks. */
nir_pop_if(b, nir_push_if(b, nir_imm_int(b, 0)));
nir_ssa_def *load_v2 = nir_load_var(b, v[2]);
nir_store_var(b, v[0], load_v2, 1 << 0);
bool progress = nir_opt_dead_write_vars(b->shader);
ASSERT_TRUE(progress);
EXPECT_EQ(1, count_intrinsics(nir_intrinsic_store_deref));
nir_intrinsic_instr *store = find_next_intrinsic(nir_intrinsic_store_deref, NULL);
ASSERT_TRUE(store->src[1].is_ssa);
EXPECT_EQ(store->src[1].ssa, load_v2);
}
TEST_F(nir_dead_write_vars_test, DISABLED_dead_writes_in_if_statement)
{
nir_variable **v = create_many_int(nir_var_mem_ssbo, "v", 4);
/* Both branches will overwrite, making the previous store dead. */
nir_store_var(b, v[0], nir_load_var(b, v[1]), 1);
nir_if *if_stmt = nir_push_if(b, nir_imm_int(b, 0));
nir_ssa_def *load_v2 = nir_load_var(b, v[2]);
nir_store_var(b, v[0], load_v2, 1);
nir_push_else(b, if_stmt);
nir_ssa_def *load_v3 = nir_load_var(b, v[3]);
nir_store_var(b, v[0], load_v3, 1);
nir_pop_if(b, if_stmt);
bool progress = nir_opt_dead_write_vars(b->shader);
ASSERT_TRUE(progress);
EXPECT_EQ(2, count_intrinsics(nir_intrinsic_store_deref));
nir_intrinsic_instr *store = NULL;
store = find_next_intrinsic(nir_intrinsic_store_deref, store);
ASSERT_TRUE(store->src[1].is_ssa);
EXPECT_EQ(store->src[1].ssa, load_v2);
store = find_next_intrinsic(nir_intrinsic_store_deref, store);
ASSERT_TRUE(store->src[1].is_ssa);
EXPECT_EQ(store->src[1].ssa, load_v3);
}
TEST_F(nir_dead_write_vars_test, DISABLED_memory_barrier_in_two_blocks)
{
nir_variable **v = create_many_int(nir_var_mem_ssbo, "v", 2);
nir_store_var(b, v[0], nir_imm_int(b, 1), 1);
nir_store_var(b, v[1], nir_imm_int(b, 2), 1);
/* Split into many blocks. */
nir_pop_if(b, nir_push_if(b, nir_imm_int(b, 0)));
/* Because it is before the barrier, this will kill the previous store to that target. */
nir_store_var(b, v[0], nir_imm_int(b, 3), 1);
nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_memory_barrier)->instr);
nir_store_var(b, v[1], nir_imm_int(b, 4), 1);
bool progress = nir_opt_dead_write_vars(b->shader);
ASSERT_TRUE(progress);
EXPECT_EQ(3, count_intrinsics(nir_intrinsic_store_deref));
}
TEST_F(nir_dead_write_vars_test, DISABLED_unrelated_barrier_in_two_blocks)
{
nir_variable **v = create_many_int(nir_var_mem_ssbo, "v", 3);
nir_variable *out = create_int(nir_var_shader_out, "out");
nir_store_var(b, out, nir_load_var(b, v[1]), 1);
nir_store_var(b, v[0], nir_load_var(b, v[1]), 1);
/* Split into many blocks. */
nir_pop_if(b, nir_push_if(b, nir_imm_int(b, 0)));
/* Emit vertex will ensure writes to output variables are considered used,
* but should not affect other types of variables. */
nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_emit_vertex)->instr);
nir_store_var(b, out, nir_load_var(b, v[2]), 1);
nir_store_var(b, v[0], nir_load_var(b, v[2]), 1);
bool progress = nir_opt_dead_write_vars(b->shader);
ASSERT_TRUE(progress);
/* Verify the first write to v[0] was removed. */
EXPECT_EQ(3, count_intrinsics(nir_intrinsic_store_deref));
nir_intrinsic_instr *store = NULL;
store = find_next_intrinsic(nir_intrinsic_store_deref, store);
EXPECT_EQ(nir_intrinsic_get_var(store, 0), out);
store = find_next_intrinsic(nir_intrinsic_store_deref, store);
EXPECT_EQ(nir_intrinsic_get_var(store, 0), out);
store = find_next_intrinsic(nir_intrinsic_store_deref, store);
EXPECT_EQ(nir_intrinsic_get_var(store, 0), v[0]);
}