nir/lower_bool: ntt: Generate a good opcode for bcsel

This is heavily copy-pasted from a patch of Ian Romanick, including the
commit message.

Previously, this pass always generated fcsel for bcsel.  This was the
only place that generate fcsel, so various drivers assumed (and needed!)
that src0 was a Boolean with 0.0 or 1.0 as the only values.

Specifically, many DX9 / GL_ARB_vertex_program platforms lack a CMP
instruction in vertex shaders.  In those cases, they would use LRP to
implement fcsel.  The bummer is that many plaforms have a real fcsel
instruction, and those platforms would benefit from other places
generating that opcode.

Instead of leaving assumptions in drivers about the sources of an opcode
that they can't really support, allow them to control the way the
lowering pass translates bcsel.  Two flags are used to control this:

- If the driver sets has_fused_comp_and_csel in nir_options, fcsel_gt
  will be used.  Since the Boolean value is 0.0 or 1.0, this is
  equivalent to fcsel.

- If the parameter has_fcsel_ne is set, fcsel will be used.  This is the
  old path.

- Otherwise, the lowering pass assumes we're on a crufty, old DX9 vertex
  program, and it emits flrp.

With this, the assumptions about src0 of fcsel in NTT can be removed.
If a platform can't handle fcsel, it should ensure that the lowering
pass won't generate it.

No change in shader-db.

Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20162>
This commit is contained in:
Pavel Ondračka 2022-11-24 09:24:35 +01:00 committed by Marge Bot
parent 36e842839f
commit 3fcdd9e4a7
6 changed files with 48 additions and 31 deletions

View file

@ -4972,7 +4972,7 @@ bool nir_scale_fdiv(nir_shader *shader);
bool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data);
bool nir_lower_alu_width(nir_shader *shader, nir_vectorize_cb cb, const void *data);
bool nir_lower_bool_to_bitsize(nir_shader *shader);
bool nir_lower_bool_to_float(nir_shader *shader);
bool nir_lower_bool_to_float(nir_shader *shader, bool has_fcsel_ne);
bool nir_lower_bool_to_int32(nir_shader *shader);
bool nir_opt_simplify_convert_alu_types(nir_shader *shader);
bool nir_lower_const_arrays_to_uniforms(nir_shader *shader,

View file

@ -43,7 +43,8 @@ rewrite_1bit_ssa_def_to_32bit(nir_ssa_def *def, void *_progress)
}
static bool
lower_alu_instr(nir_builder *b, nir_alu_instr *alu)
lower_alu_instr(nir_builder *b, nir_alu_instr *alu, bool has_fcsel_ne,
bool has_fcsel_gt)
{
const nir_op_info *op_info = &nir_op_infos[alu->op];
@ -96,7 +97,22 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *alu)
case nir_op_bany_inequal3: alu->op = nir_op_fany_nequal3; break;
case nir_op_bany_inequal4: alu->op = nir_op_fany_nequal4; break;
case nir_op_bcsel: alu->op = nir_op_fcsel; break;
case nir_op_bcsel:
if (has_fcsel_gt)
alu->op = nir_op_fcsel_gt;
else if (has_fcsel_ne)
alu->op = nir_op_fcsel;
else {
/* Only a few pre-VS 4.0 platforms (e.g., r300 vertex shaders) should
* hit this path.
*/
rep = nir_flrp(b,
nir_ssa_for_alu_src(b, alu, 2),
nir_ssa_for_alu_src(b, alu, 1),
nir_ssa_for_alu_src(b, alu, 0));
}
break;
case nir_op_iand: alu->op = nir_op_fmul; break;
case nir_op_ixor: alu->op = nir_op_sne; break;
@ -138,14 +154,22 @@ lower_tex_instr(nir_tex_instr *tex)
return progress;
}
struct lower_bool_to_float_data {
bool has_fcsel_ne;
bool has_fcsel_gt;
};
static bool
nir_lower_bool_to_float_instr(nir_builder *b,
nir_instr *instr,
UNUSED void *cb_data)
void *cb_data)
{
struct lower_bool_to_float_data *data = cb_data;
switch (instr->type) {
case nir_instr_type_alu:
return lower_alu_instr(b, nir_instr_as_alu(instr));
return lower_alu_instr(b, nir_instr_as_alu(instr),
data->has_fcsel_ne, data->has_fcsel_gt);
case nir_instr_type_load_const: {
nir_load_const_instr *load = nir_instr_as_load_const(instr);
@ -177,10 +201,15 @@ nir_lower_bool_to_float_instr(nir_builder *b,
}
bool
nir_lower_bool_to_float(nir_shader *shader)
nir_lower_bool_to_float(nir_shader *shader, bool has_fcsel_ne)
{
struct lower_bool_to_float_data data = {
.has_fcsel_ne = has_fcsel_ne,
.has_fcsel_gt = shader->options->has_fused_comp_and_csel
};
return nir_shader_instructions_pass(shader, nir_lower_bool_to_float_instr,
nir_metadata_block_index |
nir_metadata_dominance,
NULL);
&data);
}

View file

@ -1645,26 +1645,13 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
break;
case nir_op_fcsel:
/* NIR fcsel is src0 != 0 ? src1 : src2.
* TGSI CMP is src0 < 0 ? src1 : src2.
*
* However, fcsel so far as I can find only appears on bools-as-floats
* (1.0 or 0.0), so we can just negate it for the TGSI op. It's
* important to not have an abs here, as i915g has to make extra
* instructions to do the abs.
/* If CMP isn't supported, then the flags that enable NIR to generate
* this opcode should also not be set.
*/
if (c->options->lower_cmp) {
/* If the HW doesn't support TGSI CMP (r300 VS), then lower it to a
* LRP on the boolean 1.0/0.0 value, instead of requiring the
* backend to turn the src0 into 1.0/0.0 first.
*
* We don't use this in general because some hardware (i915 FS) the
* LRP gets expanded to MUL/MAD.
*/
ntt_LRP(c, dst, src[0], src[1], src[2]);
} else {
ntt_CMP(c, dst, ureg_negate(src[0]), src[1], src[2]);
}
assert(!c->options->lower_cmp);
/* Implement this as CMP(-abs(src0), src1, src2). */
ntt_CMP(c, dst, ureg_negate(ureg_abs(src[0])), src[1], src[2]);
break;
case nir_op_fcsel_gt:
@ -3942,7 +3929,8 @@ const void *nir_to_tgsi_options(struct nir_shader *s,
NIR_PASS_V(s, nir_lower_bool_to_int32);
} else {
NIR_PASS_V(s, nir_lower_int_to_float);
NIR_PASS_V(s, nir_lower_bool_to_float);
NIR_PASS_V(s, nir_lower_bool_to_float,
!options->lower_cmp && !options->lower_fabs);
/* bool_to_float generates MOVs for b2f32 that we want to clean up. */
NIR_PASS_V(s, nir_copy_prop);
NIR_PASS_V(s, nir_opt_dce);

View file

@ -1158,7 +1158,7 @@ etna_compile_shader(struct etna_shader_variant *v)
*/
NIR_PASS_V(s, nir_lower_int_to_float);
NIR_PASS_V(s, nir_opt_algebraic);
NIR_PASS_V(s, nir_lower_bool_to_float);
NIR_PASS_V(s, nir_lower_bool_to_float, true);
} else {
NIR_PASS_V(s, nir_lower_bool_to_int32);
}

View file

@ -1111,7 +1111,7 @@ ir2_nir_compile(struct ir2_context *ctx, bool binning)
OPT_V(ctx->nir, nir_opt_move, nir_move_comparisons);
OPT_V(ctx->nir, nir_lower_int_to_float);
OPT_V(ctx->nir, nir_lower_bool_to_float);
OPT_V(ctx->nir, nir_lower_bool_to_float, true);
while (OPT(ctx->nir, nir_opt_algebraic))
;
OPT_V(ctx->nir, nir_opt_algebraic_late);

View file

@ -147,7 +147,7 @@ lima_program_optimize_vs_nir(struct nir_shader *s)
NIR_PASS_V(s, nir_lower_int_to_float);
/* int_to_float pass generates ftrunc, so lower it */
NIR_PASS(progress, s, lima_nir_lower_ftrunc);
NIR_PASS_V(s, nir_lower_bool_to_float);
NIR_PASS_V(s, nir_lower_bool_to_float, true);
NIR_PASS_V(s, nir_copy_prop);
NIR_PASS_V(s, nir_opt_dce);
@ -255,7 +255,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
} while (progress);
NIR_PASS_V(s, nir_lower_int_to_float);
NIR_PASS_V(s, nir_lower_bool_to_float);
NIR_PASS_V(s, nir_lower_bool_to_float, true);
/* Some ops must be lowered after being converted from int ops,
* so re-run nir_opt_algebraic after int lowering. */