From cd269ebfa2e962fba53c9a67d7e085e0597c416e Mon Sep 17 00:00:00 2001 From: antonino Date: Wed, 2 Aug 2023 17:44:39 +0200 Subject: [PATCH] nir: allow using sysvals in `nir_lower_clip` By passing a `NULL` pointer for the `ucp_enable` argument the value will be loaded from a sysval --- src/broadcom/compiler/vir.c | 5 +- src/compiler/nir/nir.h | 6 +- src/compiler/nir/nir_lower_clip.c | 104 ++++++++++++-------- src/freedreno/ir3/ir3_nir.c | 5 +- src/gallium/drivers/crocus/crocus_program.c | 9 +- src/gallium/drivers/iris/iris_program.c | 9 +- src/gallium/drivers/panfrost/pan_shader.c | 3 +- src/gallium/drivers/vc4/vc4_program.c | 3 +- src/mesa/state_tracker/st_program.c | 4 +- 9 files changed, 88 insertions(+), 60 deletions(-) diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 8d6f1d99d30..256967602d5 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -1255,8 +1255,9 @@ v3d_nir_lower_fs_late(struct v3d_compile *c) * array variable, so we have GL's clip lowering follow suit * (compact_arrays option at nir_shader_compiler_options) */ - if (c->fs_key->ucp_enables) - NIR_PASS(_, c->s, nir_lower_clip_fs, c->fs_key->ucp_enables, true, false); + unsigned ucp_enables = c->fs_key->ucp_enables; + if (ucp_enables) + NIR_PASS(_, c->s, nir_lower_clip_fs, &ucp_enables, true, false); NIR_PASS(_, c->s, nir_lower_io_to_scalar, nir_var_shader_in, NULL, NULL); } diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 068725142fb..8c863988dfe 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -6051,14 +6051,14 @@ typedef struct nir_input_attachment_options { bool nir_lower_input_attachments(nir_shader *shader, const nir_input_attachment_options *options); -bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, +bool nir_lower_clip_vs(nir_shader *shader, unsigned *ucp_enables, bool use_vars, bool use_clipdist_array, const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); -bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables, +bool nir_lower_clip_gs(nir_shader *shader, unsigned *ucp_enables, bool use_clipdist_array, const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); -bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables, +bool nir_lower_clip_fs(nir_shader *shader, unsigned *ucp_enables, bool use_clipdist_array, bool use_load_interp); bool nir_lower_clip_cull_distance_to_vec4s(nir_shader *shader); diff --git a/src/compiler/nir/nir_lower_clip.c b/src/compiler/nir/nir_lower_clip.c index 3590317abe9..56bc925e9d0 100644 --- a/src/compiler/nir/nir_lower_clip.c +++ b/src/compiler/nir/nir_lower_clip.c @@ -275,6 +275,7 @@ struct lower_clip_state { nir_variable *out[2]; unsigned ucp_enables; bool use_clipdist_array; + bool use_enables_sysval; const gl_state_index16 (*clipplane_state_tokens)[STATE_LENGTH]; /* This holds the current CLIP_VERTEX value for GS. */ @@ -294,17 +295,16 @@ lower_clip_vertex_var(nir_builder *b, const struct lower_clip_state *state) } for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) { - if (state->ucp_enables & (1 << plane)) { - nir_def *ucp = get_ucp(b, plane, state->clipplane_state_tokens); - - /* calculate clipdist[plane] - dot(ucp, cv): */ - clipdist[plane] = nir_fdot(b, ucp, cv); - } else { - /* 0.0 == don't-clip == disabled: */ - clipdist[plane] = nir_imm_float(b, 0.0); - } + nir_def *ucp = get_ucp(b, plane, state->clipplane_state_tokens); + nir_def *ucp_enable_def; + if (state->use_enables_sysval) + ucp_enable_def = nir_load_clip_plane_enable(b); + else + ucp_enable_def = nir_imm_int(b, state->ucp_enables); + clipdist[plane] = nir_bcsel(b, nir_b2b1(b, nir_iand_imm(b, ucp_enable_def, 1 << plane)), + nir_fdot(b, ucp, cv), nir_imm_float(b, 0.0)); if (state->use_clipdist_array && - plane < util_last_bit(state->ucp_enables)) { + (state->use_enables_sysval || plane < util_last_bit(state->ucp_enables))) { nir_deref_instr *deref; deref = nir_build_deref_array_imm(b, nir_build_deref_var(b, state->out[0]), @@ -338,15 +338,14 @@ lower_clip_vertex_intrin(nir_builder *b, const struct lower_clip_state *state) } for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) { - if (state->ucp_enables & (1 << plane)) { - nir_def *ucp = get_ucp(b, plane, state->clipplane_state_tokens); - - /* calculate clipdist[plane] - dot(ucp, cv): */ - clipdist[plane] = nir_fdot(b, ucp, cv); - } else { - /* 0.0 == don't-clip == disabled: */ - clipdist[plane] = nir_imm_float(b, 0.0); - } + nir_def *ucp = get_ucp(b, plane, state->clipplane_state_tokens); + nir_def *ucp_enable_def; + if (state->use_enables_sysval) + ucp_enable_def = nir_load_clip_plane_enable(b); + else + ucp_enable_def = nir_imm_int(b, state->ucp_enables); + clipdist[plane] = nir_bcsel(b, nir_b2b1(b, nir_iand_imm(b, ucp_enable_def, 1 << plane)), + nir_fdot(b, ucp, cv), nir_imm_float(b, 0.0)); } if (state->use_clipdist_array) { @@ -383,11 +382,13 @@ lower_clip_vertex_intrin(nir_builder *b, const struct lower_clip_state *state) * clipdist output instead of two vec4s. */ bool -nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars, +nir_lower_clip_vs(nir_shader *shader, unsigned *ucp_enables, bool use_vars, bool use_clipdist_array, const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]) { - if (!ucp_enables) + unsigned ucp_enables_val = ucp_enables ? *ucp_enables : 0xff; + + if (!ucp_enables_val) return false; nir_function_impl *impl = nir_shader_get_entrypoint(shader); @@ -406,8 +407,9 @@ nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars, b.cursor = nir_after_impl(impl); struct lower_clip_state state = { NULL }; - state.ucp_enables = ucp_enables; + state.ucp_enables = ucp_enables_val; state.use_clipdist_array = use_clipdist_array; + state.use_enables_sysval = !ucp_enables; state.clipplane_state_tokens = clipplane_state_tokens; /* find clipvertex/position outputs */ @@ -415,20 +417,20 @@ nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars, &state.position)) return false; - shader->info.clip_distance_array_size = util_last_bit(ucp_enables); + shader->info.clip_distance_array_size = util_last_bit(ucp_enables_val); if (!use_vars || shader->info.io_lowered) { /* If the driver has lowered IO instead of st/mesa, the driver expects * that variables are present even with lowered IO, so create them. */ if (!shader->info.io_lowered) { - create_clipdist_vars(shader, state.out, ucp_enables, true, + create_clipdist_vars(shader, state.out, ucp_enables_val, true, use_clipdist_array); } lower_clip_vertex_intrin(&b, &state); } else { - create_clipdist_vars(shader, state.out, ucp_enables, true, + create_clipdist_vars(shader, state.out, ucp_enables_val, true, use_clipdist_array); lower_clip_vertex_var(&b, &state); } @@ -504,16 +506,19 @@ save_clipvertex_to_temp_gs(nir_builder *b, nir_intrinsic_instr *intr, } bool -nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables, +nir_lower_clip_gs(nir_shader *shader, unsigned *ucp_enables, bool use_clipdist_array, const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]) { - if (!ucp_enables) + unsigned ucp_enables_val = ucp_enables ? *ucp_enables : 0xff; + + if (!ucp_enables_val) return false; struct lower_clip_state state = { NULL }; - state.ucp_enables = ucp_enables; + state.ucp_enables = ucp_enables ? *ucp_enables : 0xff; state.use_clipdist_array = use_clipdist_array; + state.use_enables_sysval = !ucp_enables; state.clipplane_state_tokens = clipplane_state_tokens; /* find clipvertex/position outputs */ @@ -521,7 +526,7 @@ nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables, &state.position)) return false; - shader->info.clip_distance_array_size = util_last_bit(ucp_enables); + shader->info.clip_distance_array_size = util_last_bit(ucp_enables_val); if (shader->info.io_lowered) { /* Track the current value of CLIP_VERTEX or POS in a local variable. */ @@ -533,7 +538,7 @@ nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables, return false; } else { /* insert CLIPDIST outputs */ - create_clipdist_vars(shader, state.out, ucp_enables, true, + create_clipdist_vars(shader, state.out, ucp_enables_val, true, use_clipdist_array); } @@ -548,7 +553,8 @@ nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables, static void lower_clip_fs(nir_function_impl *impl, unsigned ucp_enables, - nir_variable **in, bool use_clipdist_array, bool use_load_interp) + nir_variable **in, bool use_clipdist_array, bool use_load_interp, + bool use_enables_sysval) { nir_def *clipdist[MAX_CLIP_PLANES]; nir_builder b = nir_builder_at(nir_before_impl(impl)); @@ -568,12 +574,23 @@ lower_clip_fs(nir_function_impl *impl, unsigned ucp_enables, nir_def *cond = NULL; - for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) { - if (ucp_enables & (1 << plane)) { - nir_def *this_cond = - nir_flt_imm(&b, clipdist[plane], 0.0); + if (use_enables_sysval) + cond = nir_imm_int(&b, 0); - cond = cond ? nir_ior(&b, cond, this_cond) : this_cond; + for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) { + if (use_enables_sysval) { + nir_def *this_cond = + nir_iand(&b, nir_flt(&b, clipdist[plane], nir_imm_float(&b, 0.0)), + nir_iand_imm(&b, nir_load_user_clip_plane(&b), (1 << plane))); + + cond = nir_ior(&b, cond, this_cond); + } else { + if (ucp_enables & (1 << plane)) { + nir_def *this_cond = + nir_flt_imm(&b, clipdist[plane], 0.0); + + cond = cond ? nir_ior(&b, cond, this_cond) : this_cond; + } } } @@ -607,31 +624,32 @@ fs_has_clip_dist_input_var(nir_shader *shader, nir_variable **io_vars, /* insert conditional kill based on interpolated CLIPDIST */ bool -nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables, +nir_lower_clip_fs(nir_shader *shader, unsigned *ucp_enables, bool use_clipdist_array, bool use_load_interp) { nir_variable *in[2] = { 0 }; + unsigned ucp_enables_val = ucp_enables ? *ucp_enables : 0xff; - if (!ucp_enables) + if (!ucp_enables_val) return false; /* this is probably broken until https://gitlab.freedesktop.org/mesa/mesa/-/issues/10826 is fixed */ assert(!shader->info.io_lowered); - shader->info.clip_distance_array_size = util_last_bit(ucp_enables); + shader->info.clip_distance_array_size = util_last_bit(ucp_enables_val); /* No hard reason to require use_clipdist_arr to work with * frag-shader-based gl_ClipDistance, except that the only user that does * not enable this does not support GL 3.0 (or EXT_clip_cull_distance). */ - if (!fs_has_clip_dist_input_var(shader, in, &ucp_enables)) - create_clipdist_vars(shader, in, ucp_enables, false, use_clipdist_array); + if (!fs_has_clip_dist_input_var(shader, in, &ucp_enables_val)) + create_clipdist_vars(shader, in, ucp_enables_val, false, use_clipdist_array); else assert(use_clipdist_array); nir_foreach_function_with_impl(function, impl, shader) { if (!strcmp(function->name, "main")) { - lower_clip_fs(impl, ucp_enables, in, use_clipdist_array, - use_load_interp); + lower_clip_fs(impl, ucp_enables_val, in, use_clipdist_array, + use_load_interp, !ucp_enables); } } diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 92bcc45dd51..9c2645cf20c 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -1164,11 +1164,12 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, /* Note that it is intentional to use the VS lowering pass for GS, since we * lower GS into something that looks more like a VS in ir3_nir_lower_gs(): */ + unsigned ucp_enables = so->key.ucp_enables; if (lower_ucp_vs(so)) { - progress |= OPT(s, nir_lower_clip_vs, so->key.ucp_enables, false, true, NULL); + progress |= OPT(s, nir_lower_clip_vs, &ucp_enables, false, true, NULL); } else if (s->info.stage == MESA_SHADER_FRAGMENT) { if (so->key.ucp_enables && !so->compiler->has_clip_cull) - progress |= OPT(s, nir_lower_clip_fs, so->key.ucp_enables, true, true); + progress |= OPT(s, nir_lower_clip_fs, &ucp_enables, true, true); } if (so->binning_pass) { diff --git a/src/gallium/drivers/crocus/crocus_program.c b/src/gallium/drivers/crocus/crocus_program.c index 5be1a0d6f05..12d44980153 100644 --- a/src/gallium/drivers/crocus/crocus_program.c +++ b/src/gallium/drivers/crocus/crocus_program.c @@ -1179,7 +1179,8 @@ crocus_compile_vs(struct crocus_context *ice, if (key->nr_userclip_plane_consts) { nir_function_impl *impl = nir_shader_get_entrypoint(nir); /* Check if variables were found. */ - if (nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, + unsigned ucp_enables = (1 << key->nr_userclip_plane_consts) - 1; + if (nir_lower_clip_vs(nir, &ucp_enables, true, false, NULL)) { nir_lower_io_vars_to_temporaries(nir, impl, nir_var_shader_out); nir_lower_global_vars_to_local(nir); @@ -1539,7 +1540,8 @@ crocus_compile_tes(struct crocus_context *ice, if (key->nr_userclip_plane_consts) { nir_function_impl *impl = nir_shader_get_entrypoint(nir); - nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true, + unsigned ucp_enables = (1 << key->nr_userclip_plane_consts) - 1; + nir_lower_clip_vs(nir, &ucp_enables, true, false, NULL); nir_lower_io_vars_to_temporaries(nir, impl, nir_var_shader_out); nir_lower_global_vars_to_local(nir); @@ -1682,7 +1684,8 @@ crocus_compile_gs(struct crocus_context *ice, if (key->nr_userclip_plane_consts) { nir_function_impl *impl = nir_shader_get_entrypoint(nir); - nir_lower_clip_gs(nir, (1 << key->nr_userclip_plane_consts) - 1, false, + unsigned ucp_enables = (1 << key->nr_userclip_plane_consts) - 1; + nir_lower_clip_gs(nir, &ucp_enables, false, NULL); nir_lower_io_vars_to_temporaries(nir, impl, nir_var_shader_out); nir_lower_global_vars_to_local(nir); diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index 5879f4d5af7..f3477e09d25 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -1908,7 +1908,8 @@ iris_compile_vs(struct iris_screen *screen, if (key->vue.nr_userclip_plane_consts) { nir_function_impl *impl = nir_shader_get_entrypoint(nir); /* Check if variables were found. */ - if (nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1, + unsigned ucp_enables = (1 << key->vue.nr_userclip_plane_consts) - 1; + if (nir_lower_clip_vs(nir, &ucp_enables, true, false, NULL)) { nir_lower_io_vars_to_temporaries(nir, impl, nir_var_shader_out); nir_lower_global_vars_to_local(nir); @@ -2355,7 +2356,8 @@ iris_compile_tes(struct iris_screen *screen, if (key->vue.nr_userclip_plane_consts) { nir_function_impl *impl = nir_shader_get_entrypoint(nir); - nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1, + unsigned ucp_enables = (1 << key->vue.nr_userclip_plane_consts) - 1; + nir_lower_clip_vs(nir, &ucp_enables, true, false, NULL); nir_lower_io_vars_to_temporaries(nir, impl, nir_var_shader_out); nir_lower_global_vars_to_local(nir); @@ -2550,7 +2552,8 @@ iris_compile_gs(struct iris_screen *screen, if (key->vue.nr_userclip_plane_consts) { nir_function_impl *impl = nir_shader_get_entrypoint(nir); - nir_lower_clip_gs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1, + unsigned ucp_enables = (1 << key->vue.nr_userclip_plane_consts) - 1; + nir_lower_clip_gs(nir, &ucp_enables, false, NULL); nir_lower_io_vars_to_temporaries(nir, impl, nir_var_shader_out); nir_lower_global_vars_to_local(nir); diff --git a/src/gallium/drivers/panfrost/pan_shader.c b/src/gallium/drivers/panfrost/pan_shader.c index 8e829c3e099..66a97c7d0e7 100644 --- a/src/gallium/drivers/panfrost/pan_shader.c +++ b/src/gallium/drivers/panfrost/pan_shader.c @@ -182,7 +182,8 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir, } if (key->fs.clip_plane_enable) { - NIR_PASS(_, s, nir_lower_clip_fs, key->fs.clip_plane_enable, + unsigned ucp_enables = key->fs.clip_plane_enable; + NIR_PASS(_, s, nir_lower_clip_fs, &ucp_enables, false, true); inputs.fixed_varying_mask = pan_get_fixed_varying_mask(s->info.inputs_read); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index f48240ea551..211d281321d 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2263,7 +2263,8 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, NIR_PASS(_, c->s, nir_lower_tex, &tex_options); if (c->fs_key && c->fs_key->ucp_enables) { - NIR_PASS(_, c->s, nir_lower_clip_fs, c->fs_key->ucp_enables, false, false); + unsigned ucp_enables = c->fs_key->ucp_enables; + NIR_PASS(_, c->s, nir_lower_clip_fs, &ucp_enables, false, false); } if (c->stage == QSTAGE_FRAG) diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 8332f3fb049..b288d270d7f 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -719,10 +719,10 @@ lower_ucp(struct st_context *st, if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) { - NIR_PASS(_, nir, nir_lower_clip_vs, ucp_enables, + NIR_PASS(_, nir, nir_lower_clip_vs, &ucp_enables, true, can_compact, clipplane_state); } else if (nir->info.stage == MESA_SHADER_GEOMETRY) { - NIR_PASS(_, nir, nir_lower_clip_gs, ucp_enables, + NIR_PASS(_, nir, nir_lower_clip_gs, &ucp_enables, can_compact, clipplane_state); }