From dad36b5f12414ebd3e83af16aa67dbf280cd9551 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Wed, 25 May 2022 16:05:39 +0200 Subject: [PATCH] radeonsi: enable use_waterfall_for_divergent_tex_samplers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And run the nir_divergence_analysis pass in si_get_nir_shader to make sure it's up to date. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2253 Reviewed-by: Marek Olšák Part-of: --- src/gallium/drivers/radeonsi/si_shader.c | 2 ++ src/gallium/drivers/radeonsi/si_shader_llvm.c | 1 + 2 files changed, 3 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 4b1c3f7f7ce..cde217ffd25 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1608,6 +1608,8 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel, if (progress || progress2) si_nir_late_opts(nir); + NIR_PASS_V(nir, nir_divergence_analysis); + /* This helps LLVM form VMEM clauses and thus get more GPU cache hits. * 200 is tuned for Viewperf. It should be done last. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index fed85ad7b5a..9f94403d33b 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -1063,6 +1063,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad ctx->abi.load_grid_size_from_user_sgpr = true; ctx->abi.clamp_div_by_zero = ctx->screen->options.clamp_div_by_zero || info->options & SI_PROFILE_CLAMP_DIV_BY_ZERO; + ctx->abi.use_waterfall_for_divergent_tex_samplers = true; for (unsigned i = 0; i < info->num_outputs; i++) { LLVMTypeRef type = ctx->ac.f32;