nir/lower_bool: ntt: Generate a good opcode for bcsel

This is heavily copy-pasted from a patch of Ian Romanick, including the commit message. Previously, this pass always generated fcsel for bcsel. This was the only place that generate fcsel, so various drivers assumed (and needed!) that src0 was a Boolean with 0.0 or 1.0 as the only values. Specifically, many DX9 / GL_ARB_vertex_program platforms lack a CMP instruction in vertex shaders. In those cases, they would use LRP to implement fcsel. The bummer is that many plaforms have a real fcsel instruction, and those platforms would benefit from other places generating that opcode. Instead of leaving assumptions in drivers about the sources of an opcode that they can't really support, allow them to control the way the lowering pass translates bcsel. Two flags are used to control this: - If the driver sets has_fused_comp_and_csel in nir_options, fcsel_gt will be used. Since the Boolean value is 0.0 or 1.0, this is equivalent to fcsel. - If the parameter has_fcsel_ne is set, fcsel will be used. This is the old path. - Otherwise, the lowering pass assumes we're on a crufty, old DX9 vertex program, and it emits flrp. With this, the assumptions about src0 of fcsel in NTT can be removed. If a platform can't handle fcsel, it should ensure that the lowering pass won't generate it. No change in shader-db. Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com> Reviewed-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20162>
2026-01-04 07:00:11 +01:00 · 2022-11-24 09:24:35 +01:00 · 2022-11-24 09:24:35 +01:00 · 3fcdd9e4a7
commit 3fcdd9e4a7
parent 36e842839f
6 changed files with 48 additions and 31 deletions
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@ -4972,7 +4972,7 @@ bool nir_scale_fdiv(nir_shader *shader);
 bool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data);
 bool nir_lower_alu_width(nir_shader *shader, nir_vectorize_cb cb, const void *data);
 bool nir_lower_bool_to_bitsize(nir_shader *shader);
-bool nir_lower_bool_to_float(nir_shader *shader);
+bool nir_lower_bool_to_float(nir_shader *shader, bool has_fcsel_ne);
 bool nir_lower_bool_to_int32(nir_shader *shader);
 bool nir_opt_simplify_convert_alu_types(nir_shader *shader);
 bool nir_lower_const_arrays_to_uniforms(nir_shader *shader,
--- a/src/compiler/nir/nir_lower_bool_to_float.c
+++ b/src/compiler/nir/nir_lower_bool_to_float.c
@ -43,7 +43,8 @@ rewrite_1bit_ssa_def_to_32bit(nir_ssa_def *def, void *_progress)
 }

 static bool
-lower_alu_instr(nir_builder *b, nir_alu_instr *alu)
+lower_alu_instr(nir_builder *b, nir_alu_instr *alu, bool has_fcsel_ne,
+                bool has_fcsel_gt)
 {
   const nir_op_info *op_info = &nir_op_infos[alu->op];

@ -96,7 +97,22 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *alu)
   case nir_op_bany_inequal3: alu->op = nir_op_fany_nequal3; break;
   case nir_op_bany_inequal4: alu->op = nir_op_fany_nequal4; break;

-   case nir_op_bcsel: alu->op = nir_op_fcsel; break;
+   case nir_op_bcsel:
+      if (has_fcsel_gt)
+         alu->op = nir_op_fcsel_gt;
+      else if (has_fcsel_ne)
+         alu->op = nir_op_fcsel;
+      else {
+         /* Only a few pre-VS 4.0 platforms (e.g., r300 vertex shaders) should
+          * hit this path.
+          */
+         rep = nir_flrp(b,
+                        nir_ssa_for_alu_src(b, alu, 2),
+                        nir_ssa_for_alu_src(b, alu, 1),
+                        nir_ssa_for_alu_src(b, alu, 0));
+      }
+
+      break;

   case nir_op_iand: alu->op = nir_op_fmul; break;
   case nir_op_ixor: alu->op = nir_op_sne; break;
@ -138,14 +154,22 @@ lower_tex_instr(nir_tex_instr *tex)
   return progress;
 }

+struct lower_bool_to_float_data {
+   bool has_fcsel_ne;
+   bool has_fcsel_gt;
+};
+
 static bool
 nir_lower_bool_to_float_instr(nir_builder *b,
                              nir_instr *instr,
-                              UNUSED void *cb_data)
+                              void *cb_data)
 {
+   struct lower_bool_to_float_data *data = cb_data;
+
   switch (instr->type) {
   case nir_instr_type_alu:
-      return lower_alu_instr(b, nir_instr_as_alu(instr));
+      return lower_alu_instr(b, nir_instr_as_alu(instr),
+                             data->has_fcsel_ne, data->has_fcsel_gt);

   case nir_instr_type_load_const: {
      nir_load_const_instr *load = nir_instr_as_load_const(instr);
@ -177,10 +201,15 @@ nir_lower_bool_to_float_instr(nir_builder *b,
 }

 bool
-nir_lower_bool_to_float(nir_shader *shader)
+nir_lower_bool_to_float(nir_shader *shader, bool has_fcsel_ne)
 {
+   struct lower_bool_to_float_data data = {
+      .has_fcsel_ne = has_fcsel_ne,
+      .has_fcsel_gt = shader->options->has_fused_comp_and_csel
+   };
+
   return nir_shader_instructions_pass(shader, nir_lower_bool_to_float_instr,
                                       nir_metadata_block_index |
                                       nir_metadata_dominance,
-                                       NULL);
+                                       &data);
 }
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@ -1645,26 +1645,13 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
         break;

      case nir_op_fcsel:
-         /* NIR fcsel is src0 != 0 ? src1 : src2.
-          * TGSI CMP is src0 < 0 ? src1 : src2.
-          *
-          * However, fcsel so far as I can find only appears on bools-as-floats
-          * (1.0 or 0.0), so we can just negate it for the TGSI op.  It's
-          * important to not have an abs here, as i915g has to make extra
-          * instructions to do the abs.
+         /* If CMP isn't supported, then the flags that enable NIR to generate
+          * this opcode should also not be set.
          */
-         if (c->options->lower_cmp) {
-            /* If the HW doesn't support TGSI CMP (r300 VS), then lower it to a
-             * LRP on the boolean 1.0/0.0 value, instead of requiring the
-             * backend to turn the src0 into 1.0/0.0 first.
-             *
-             * We don't use this in general because some hardware (i915 FS) the
-             * LRP gets expanded to MUL/MAD.
-             */
-            ntt_LRP(c, dst, src[0], src[1], src[2]);
-         } else {
-            ntt_CMP(c, dst, ureg_negate(src[0]), src[1], src[2]);
-         }
+         assert(!c->options->lower_cmp);
+
+         /* Implement this as CMP(-abs(src0), src1, src2). */
+         ntt_CMP(c, dst, ureg_negate(ureg_abs(src[0])), src[1], src[2]);
         break;

      case nir_op_fcsel_gt:
@ -3942,7 +3929,8 @@ const void *nir_to_tgsi_options(struct nir_shader *s,
      NIR_PASS_V(s, nir_lower_bool_to_int32);
   } else {
      NIR_PASS_V(s, nir_lower_int_to_float);
-      NIR_PASS_V(s, nir_lower_bool_to_float);
+      NIR_PASS_V(s, nir_lower_bool_to_float,
+                 !options->lower_cmp && !options->lower_fabs);
      /* bool_to_float generates MOVs for b2f32 that we want to clean up. */
      NIR_PASS_V(s, nir_copy_prop);
      NIR_PASS_V(s, nir_opt_dce);
--- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
@ -1158,7 +1158,7 @@ etna_compile_shader(struct etna_shader_variant *v)
       */
      NIR_PASS_V(s, nir_lower_int_to_float);
      NIR_PASS_V(s, nir_opt_algebraic);
-      NIR_PASS_V(s, nir_lower_bool_to_float);
+      NIR_PASS_V(s, nir_lower_bool_to_float, true);
   } else {
      NIR_PASS_V(s, nir_lower_bool_to_int32);
   }
--- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
+++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
@ -1111,7 +1111,7 @@ ir2_nir_compile(struct ir2_context *ctx, bool binning)
   OPT_V(ctx->nir, nir_opt_move, nir_move_comparisons);

   OPT_V(ctx->nir, nir_lower_int_to_float);
-   OPT_V(ctx->nir, nir_lower_bool_to_float);
+   OPT_V(ctx->nir, nir_lower_bool_to_float, true);
   while (OPT(ctx->nir, nir_opt_algebraic))
      ;
   OPT_V(ctx->nir, nir_opt_algebraic_late);
--- a/src/gallium/drivers/lima/lima_program.c
+++ b/src/gallium/drivers/lima/lima_program.c
@ -147,7 +147,7 @@ lima_program_optimize_vs_nir(struct nir_shader *s)
   NIR_PASS_V(s, nir_lower_int_to_float);
   /* int_to_float pass generates ftrunc, so lower it */
   NIR_PASS(progress, s, lima_nir_lower_ftrunc);
-   NIR_PASS_V(s, nir_lower_bool_to_float);
+   NIR_PASS_V(s, nir_lower_bool_to_float, true);

   NIR_PASS_V(s, nir_copy_prop);
   NIR_PASS_V(s, nir_opt_dce);
@ -255,7 +255,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
   } while (progress);

   NIR_PASS_V(s, nir_lower_int_to_float);
-   NIR_PASS_V(s, nir_lower_bool_to_float);
+   NIR_PASS_V(s, nir_lower_bool_to_float, true);

   /* Some ops must be lowered after being converted from int ops,
    * so re-run nir_opt_algebraic after int lowering. */