nir/opt_load_store_vectorize: fix check_for_robustness() with deref access

We could do better if we knew the nir_address_format to obtain
addition_bits, but the only affected driver (Turnip) probably won't
benefit because it doesn't vectorize across vec4.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Fixes: 2e7bceb220 ("nir/load_store_vectorizer: fix check_for_robustness() with indirect loads")
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/4922
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11382>
This commit is contained in:
Rhys Perry 2021-06-15 17:50:57 +01:00 committed by Marge Bot
parent 86fe8db4aa
commit 502b06c4f5
2 changed files with 24 additions and 0 deletions

View file

@ -1059,6 +1059,10 @@ check_for_robustness(struct vectorize_ctx *ctx, struct entry *low, uint64_t high
if (!addition_wraps(max_low, high_offset, 64))
return false;
/* We can't obtain addition_bits */
if (low->info->base_src < 0)
return true;
/* Second, use information about the factors from address calculation (offset_defs_mul). These
* are not guaranteed to be power-of-2.
*/

View file

@ -1935,6 +1935,26 @@ TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_strid
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
}
TEST_F(nir_load_store_vectorize_test, shared_offset_overflow_robust_indirect_stride12)
{
nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared,
glsl_array_type(glsl_uint_type(), 4, 0), "var");
nir_deref_instr *deref = nir_build_deref_var(b, var);
nir_ssa_def *index = nir_load_local_invocation_index(b);
index = nir_imul_imm(b, index, 3);
create_shared_load(nir_build_deref_array(b, deref, index), 0x1);
create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index, 1)), 0x2);
create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index, 2)), 0x3);
nir_validate_shader(b->shader, NULL);
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 3);
EXPECT_FALSE(run_vectorizer(nir_var_mem_shared, false, nir_var_mem_shared));
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 3);
}
TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_4)
{
nir_ssa_def *offset = nir_load_local_invocation_index(b);