diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 4b1c3f7f7ce..cde217ffd25 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1608,6 +1608,8 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel, if (progress || progress2) si_nir_late_opts(nir); + NIR_PASS_V(nir, nir_divergence_analysis); + /* This helps LLVM form VMEM clauses and thus get more GPU cache hits. * 200 is tuned for Viewperf. It should be done last. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index fed85ad7b5a..9f94403d33b 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -1063,6 +1063,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad ctx->abi.load_grid_size_from_user_sgpr = true; ctx->abi.clamp_div_by_zero = ctx->screen->options.clamp_div_by_zero || info->options & SI_PROFILE_CLAMP_DIV_BY_ZERO; + ctx->abi.use_waterfall_for_divergent_tex_samplers = true; for (unsigned i = 0; i < info->num_outputs; i++) { LLVMTypeRef type = ctx->ac.f32;