diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 736b20c261b..8ad4b11768c 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -4972,7 +4972,7 @@ bool nir_scale_fdiv(nir_shader *shader); bool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data); bool nir_lower_alu_width(nir_shader *shader, nir_vectorize_cb cb, const void *data); bool nir_lower_bool_to_bitsize(nir_shader *shader); -bool nir_lower_bool_to_float(nir_shader *shader); +bool nir_lower_bool_to_float(nir_shader *shader, bool has_fcsel_ne); bool nir_lower_bool_to_int32(nir_shader *shader); bool nir_opt_simplify_convert_alu_types(nir_shader *shader); bool nir_lower_const_arrays_to_uniforms(nir_shader *shader, diff --git a/src/compiler/nir/nir_lower_bool_to_float.c b/src/compiler/nir/nir_lower_bool_to_float.c index 593a9efe625..8ede52b3287 100644 --- a/src/compiler/nir/nir_lower_bool_to_float.c +++ b/src/compiler/nir/nir_lower_bool_to_float.c @@ -43,7 +43,8 @@ rewrite_1bit_ssa_def_to_32bit(nir_ssa_def *def, void *_progress) } static bool -lower_alu_instr(nir_builder *b, nir_alu_instr *alu) +lower_alu_instr(nir_builder *b, nir_alu_instr *alu, bool has_fcsel_ne, + bool has_fcsel_gt) { const nir_op_info *op_info = &nir_op_infos[alu->op]; @@ -96,7 +97,22 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *alu) case nir_op_bany_inequal3: alu->op = nir_op_fany_nequal3; break; case nir_op_bany_inequal4: alu->op = nir_op_fany_nequal4; break; - case nir_op_bcsel: alu->op = nir_op_fcsel; break; + case nir_op_bcsel: + if (has_fcsel_gt) + alu->op = nir_op_fcsel_gt; + else if (has_fcsel_ne) + alu->op = nir_op_fcsel; + else { + /* Only a few pre-VS 4.0 platforms (e.g., r300 vertex shaders) should + * hit this path. + */ + rep = nir_flrp(b, + nir_ssa_for_alu_src(b, alu, 2), + nir_ssa_for_alu_src(b, alu, 1), + nir_ssa_for_alu_src(b, alu, 0)); + } + + break; case nir_op_iand: alu->op = nir_op_fmul; break; case nir_op_ixor: alu->op = nir_op_sne; break; @@ -138,14 +154,22 @@ lower_tex_instr(nir_tex_instr *tex) return progress; } +struct lower_bool_to_float_data { + bool has_fcsel_ne; + bool has_fcsel_gt; +}; + static bool nir_lower_bool_to_float_instr(nir_builder *b, nir_instr *instr, - UNUSED void *cb_data) + void *cb_data) { + struct lower_bool_to_float_data *data = cb_data; + switch (instr->type) { case nir_instr_type_alu: - return lower_alu_instr(b, nir_instr_as_alu(instr)); + return lower_alu_instr(b, nir_instr_as_alu(instr), + data->has_fcsel_ne, data->has_fcsel_gt); case nir_instr_type_load_const: { nir_load_const_instr *load = nir_instr_as_load_const(instr); @@ -177,10 +201,15 @@ nir_lower_bool_to_float_instr(nir_builder *b, } bool -nir_lower_bool_to_float(nir_shader *shader) +nir_lower_bool_to_float(nir_shader *shader, bool has_fcsel_ne) { + struct lower_bool_to_float_data data = { + .has_fcsel_ne = has_fcsel_ne, + .has_fcsel_gt = shader->options->has_fused_comp_and_csel + }; + return nir_shader_instructions_pass(shader, nir_lower_bool_to_float_instr, nir_metadata_block_index | nir_metadata_dominance, - NULL); + &data); } diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c index f3d69c09ed0..08d19fced26 100644 --- a/src/gallium/auxiliary/nir/nir_to_tgsi.c +++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c @@ -1645,26 +1645,13 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr) break; case nir_op_fcsel: - /* NIR fcsel is src0 != 0 ? src1 : src2. - * TGSI CMP is src0 < 0 ? src1 : src2. - * - * However, fcsel so far as I can find only appears on bools-as-floats - * (1.0 or 0.0), so we can just negate it for the TGSI op. It's - * important to not have an abs here, as i915g has to make extra - * instructions to do the abs. + /* If CMP isn't supported, then the flags that enable NIR to generate + * this opcode should also not be set. */ - if (c->options->lower_cmp) { - /* If the HW doesn't support TGSI CMP (r300 VS), then lower it to a - * LRP on the boolean 1.0/0.0 value, instead of requiring the - * backend to turn the src0 into 1.0/0.0 first. - * - * We don't use this in general because some hardware (i915 FS) the - * LRP gets expanded to MUL/MAD. - */ - ntt_LRP(c, dst, src[0], src[1], src[2]); - } else { - ntt_CMP(c, dst, ureg_negate(src[0]), src[1], src[2]); - } + assert(!c->options->lower_cmp); + + /* Implement this as CMP(-abs(src0), src1, src2). */ + ntt_CMP(c, dst, ureg_negate(ureg_abs(src[0])), src[1], src[2]); break; case nir_op_fcsel_gt: @@ -3942,7 +3929,8 @@ const void *nir_to_tgsi_options(struct nir_shader *s, NIR_PASS_V(s, nir_lower_bool_to_int32); } else { NIR_PASS_V(s, nir_lower_int_to_float); - NIR_PASS_V(s, nir_lower_bool_to_float); + NIR_PASS_V(s, nir_lower_bool_to_float, + !options->lower_cmp && !options->lower_fabs); /* bool_to_float generates MOVs for b2f32 that we want to clean up. */ NIR_PASS_V(s, nir_copy_prop); NIR_PASS_V(s, nir_opt_dce); diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c index e64eb8b2811..b4ec5071795 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c @@ -1158,7 +1158,7 @@ etna_compile_shader(struct etna_shader_variant *v) */ NIR_PASS_V(s, nir_lower_int_to_float); NIR_PASS_V(s, nir_opt_algebraic); - NIR_PASS_V(s, nir_lower_bool_to_float); + NIR_PASS_V(s, nir_lower_bool_to_float, true); } else { NIR_PASS_V(s, nir_lower_bool_to_int32); } diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c index 0218975625d..9aa23b8b976 100644 --- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c +++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c @@ -1111,7 +1111,7 @@ ir2_nir_compile(struct ir2_context *ctx, bool binning) OPT_V(ctx->nir, nir_opt_move, nir_move_comparisons); OPT_V(ctx->nir, nir_lower_int_to_float); - OPT_V(ctx->nir, nir_lower_bool_to_float); + OPT_V(ctx->nir, nir_lower_bool_to_float, true); while (OPT(ctx->nir, nir_opt_algebraic)) ; OPT_V(ctx->nir, nir_opt_algebraic_late); diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c index 0c6b3c0f550..418808afcba 100644 --- a/src/gallium/drivers/lima/lima_program.c +++ b/src/gallium/drivers/lima/lima_program.c @@ -147,7 +147,7 @@ lima_program_optimize_vs_nir(struct nir_shader *s) NIR_PASS_V(s, nir_lower_int_to_float); /* int_to_float pass generates ftrunc, so lower it */ NIR_PASS(progress, s, lima_nir_lower_ftrunc); - NIR_PASS_V(s, nir_lower_bool_to_float); + NIR_PASS_V(s, nir_lower_bool_to_float, true); NIR_PASS_V(s, nir_copy_prop); NIR_PASS_V(s, nir_opt_dce); @@ -255,7 +255,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s, } while (progress); NIR_PASS_V(s, nir_lower_int_to_float); - NIR_PASS_V(s, nir_lower_bool_to_float); + NIR_PASS_V(s, nir_lower_bool_to_float, true); /* Some ops must be lowered after being converted from int ops, * so re-run nir_opt_algebraic after int lowering. */