ac/llvm,radeonsi: use texture non-uniform flag as waterfall switch

Also for calling nir_lower_non_uniform_access() when ACO.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22523>
This commit is contained in:
Qiang Yu 2023-04-13 21:00:34 +08:00 committed by Marge Bot
parent ba5eb2f5c1
commit 75b75c6c0a
5 changed files with 13 additions and 29 deletions

View file

@ -4399,8 +4399,6 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr,
struct waterfall_context *wctx, LLVMValueRef *res_ptr,
LLVMValueRef *samp_ptr)
{
bool texture_handle_divergent = false;
bool sampler_handle_divergent = false;
LLVMValueRef texture_dynamic_handle = NULL;
LLVMValueRef sampler_dynamic_handle = NULL;
int plane = -1;
@ -4418,14 +4416,10 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr,
else
*samp_ptr = val;
} else {
bool divergent = instr->src[i].src.ssa->divergent;
if (instr->src[i].src_type == nir_tex_src_texture_handle) {
if (instr->src[i].src_type == nir_tex_src_texture_handle)
texture_dynamic_handle = val;
texture_handle_divergent = divergent;
} else {
else
sampler_dynamic_handle = val;
sampler_handle_divergent = divergent;
}
}
break;
}
@ -4455,23 +4449,11 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr,
main_descriptor = AC_DESC_FMASK;
}
/* instr->sampler_non_uniform and texture_non_uniform are always false in GLSL,
* but this can lead to unexpected behavior if texture/sampler index come from
* a vertex attribute.
* For instance, 2 consecutive draws using 2 different index values,
* could be squashed together by the hw - producing a single draw with
* non-dynamically uniform index.
* To avoid this, detect divergent indexing, and use enter_waterfall.
* See https://gitlab.freedesktop.org/mesa/mesa/-/issues/2253.
*/
/* descriptor handles given through nir_tex_src_{texture,sampler}_handle */
if (instr->texture_non_uniform ||
(ctx->abi->use_waterfall_for_divergent_tex_samplers && texture_handle_divergent))
if (instr->texture_non_uniform)
texture_dynamic_handle = enter_waterfall(ctx, &wctx[0], texture_dynamic_handle, true);
if (instr->sampler_non_uniform ||
(ctx->abi->use_waterfall_for_divergent_tex_samplers && sampler_handle_divergent))
if (instr->sampler_non_uniform)
sampler_dynamic_handle = enter_waterfall(ctx, &wctx[1], sampler_dynamic_handle, true);
if (texture_dynamic_handle)

View file

@ -499,8 +499,6 @@ static nir_ssa_def *load_bindless_sampler_desc(nir_builder *b, nir_ssa_def *inde
static bool lower_resource_tex(nir_builder *b, nir_tex_instr *tex,
struct lower_resource_state *s)
{
assert(!tex->texture_non_uniform && !tex->sampler_non_uniform);
nir_deref_instr *texture_deref = NULL;
nir_deref_instr *sampler_deref = NULL;
nir_ssa_def *texture_handle = NULL;
@ -554,12 +552,12 @@ static bool lower_resource_tex(nir_builder *b, nir_tex_instr *tex,
}
nir_ssa_def *image = texture_deref ?
load_deref_sampler_desc(b, texture_deref, desc_type, s, false) :
load_deref_sampler_desc(b, texture_deref, desc_type, s, !tex->texture_non_uniform) :
load_bindless_sampler_desc(b, texture_handle, desc_type, s);
nir_ssa_def *sampler = NULL;
if (sampler_deref)
sampler = load_deref_sampler_desc(b, sampler_deref, AC_DESC_SAMPLER, s, false);
sampler = load_deref_sampler_desc(b, sampler_deref, AC_DESC_SAMPLER, s, !tex->sampler_non_uniform);
else if (sampler_handle)
sampler = load_bindless_sampler_desc(b, sampler_handle, AC_DESC_SAMPLER, s);

View file

@ -2200,8 +2200,6 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
if (progress || progress2 || opt_offsets)
si_nir_late_opts(nir);
NIR_PASS_V(nir, nir_divergence_analysis);
/* This helps LLVM form VMEM clauses and thus get more GPU cache hits.
* 200 is tuned for Viewperf. It should be done last.
*/

View file

@ -975,7 +975,6 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
ctx->abi.load_grid_size_from_user_sgpr = true;
ctx->abi.clamp_div_by_zero = ctx->screen->options.clamp_div_by_zero ||
info->options & SI_PROFILE_CLAMP_DIV_BY_ZERO;
ctx->abi.use_waterfall_for_divergent_tex_samplers = true;
ctx->abi.disable_aniso_single_level = true;
ctx->abi.conformant_trunc_coord = ctx->screen->info.conformant_trunc_coord;

View file

@ -460,5 +460,12 @@ char *si_finalize_nir(struct pipe_screen *screen, void *nirptr)
NIR_PASS_V(nir, nir_convert_to_lcssa, true, true); /* required by divergence analysis */
NIR_PASS_V(nir, nir_divergence_analysis); /* to find divergent loops */
/* Must be after divergence analysis. */
bool divergence_changed = false;
NIR_PASS(divergence_changed, nir, si_mark_divergent_texture_non_uniform);
/* Re-analysis whole shader if texture instruction divergence changed. */
if (divergence_changed)
NIR_PASS_V(nir, nir_divergence_analysis);
return NULL;
}