mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 15:50:11 +01:00
nir,ir3: Add icsel_eqz
In IR3 `sel.b32` works based on the 0 so add `icsel_eqz` to fuse the cmp and sel that we'd otherwise need. total Instruction Count in shared programs: 1112814 -> 1110473 (-0.21%) Instruction Count in affected programs: 162701 -> 160360 (-1.44%) helped: 81 HURT: 29 Instruction count are helped. total MOV Count in shared programs: 86777 -> 88671 (2.18%) MOV Count in affected programs: 28119 -> 30013 (6.74%) helped: 1 HURT: 292 Mov count are HURT. total COV Count in shared programs: 15070 -> 14962 (-0.72%) COV Count in affected programs: 5770 -> 5662 (-1.87%) helped: 76 HURT: 2 Cov count are helped. total Last helper instruction in shared programs: 592729 -> 590638 (-0.35%) Last helper instruction in affected programs: 91331 -> 89240 (-2.29%) helped: 30 HURT: 1 Last helper instruction are helped. total Instructions with SS sync bit in shared programs: 29336 -> 29546 (0.72%) Instructions with SS sync bit in affected programs: 4702 -> 4912 (4.47%) helped: 8 HURT: 43 Instructions with ss sync bit are HURT. total Estimated cycles stalled on SS in shared programs: 111590 -> 112401 (0.73%) Estimated cycles stalled on SS in affected programs: 27708 -> 28519 (2.93%) helped: 21 HURT: 61 Estimated cycles stalled on ss are HURT. total cat1 instructions in shared programs: 101933 -> 103695 (1.73%) cat1 instructions in affected programs: 35804 -> 37566 (4.92%) helped: 18 HURT: 290 Cat1 instructions are HURT. total cat2 instructions in shared programs: 380299 -> 377499 (-0.74%) cat2 instructions in affected programs: 128609 -> 125809 (-2.18%) helped: 322 HURT: 0 Cat2 instructions are helped. Signed-off-by: Karmjit Mahil <karmjit.mahil@igalia.com> Reviewed-by: Connor Abbott <cwabbott0@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32189>
This commit is contained in:
parent
aad0aa0a9c
commit
b79994e92d
5 changed files with 22 additions and 0 deletions
|
|
@ -4180,6 +4180,10 @@ typedef struct nir_shader_compiler_options {
|
||||||
|
|
||||||
/* Backend supports fused comapre against zero and csel */
|
/* Backend supports fused comapre against zero and csel */
|
||||||
bool has_fused_comp_and_csel;
|
bool has_fused_comp_and_csel;
|
||||||
|
/* Backend supports fused int eq/ne against zero and csel. */
|
||||||
|
bool has_icsel_eqz64;
|
||||||
|
bool has_icsel_eqz32;
|
||||||
|
bool has_icsel_eqz16;
|
||||||
|
|
||||||
/* Backend supports fneo, fequ, fltu, fgeu. */
|
/* Backend supports fneo, fequ, fltu, fgeu. */
|
||||||
bool has_fneo_fcmpu;
|
bool has_fneo_fcmpu;
|
||||||
|
|
|
||||||
|
|
@ -1064,6 +1064,8 @@ opcode("b32csel", 0, tuint, [0, 0, 0],
|
||||||
[tbool32, tuint, tuint], False, selection, "src0 ? src1 : src2",
|
[tbool32, tuint, tuint], False, selection, "src0 ? src1 : src2",
|
||||||
description = csel_description.format("a 32-bit", "0 vs ~0"))
|
description = csel_description.format("a 32-bit", "0 vs ~0"))
|
||||||
|
|
||||||
|
triop("icsel_eqz", tint, selection, "(src0 == 0) ? src1 : src2")
|
||||||
|
|
||||||
triop("i32csel_gt", tint32, selection, "(src0 > 0) ? src1 : src2")
|
triop("i32csel_gt", tint32, selection, "(src0 > 0) ? src1 : src2")
|
||||||
triop("i32csel_ge", tint32, selection, "(src0 >= 0) ? src1 : src2")
|
triop("i32csel_ge", tint32, selection, "(src0 >= 0) ? src1 : src2")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3677,6 +3677,12 @@ late_optimizations += [
|
||||||
(('bcsel', ('fge', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_ge', ('fneg', a), b, c), "options->has_fused_comp_and_csel"),
|
(('bcsel', ('fge', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_ge', ('fneg', a), b, c), "options->has_fused_comp_and_csel"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
for s in [16, 32, 64]:
|
||||||
|
late_optimizations.extend([
|
||||||
|
(('bcsel@{}'.format(s), ('ieq', 0, 'a@{}'.format(s)), 'b@{}'.format(s), 'c@{}'.format(s)), ('icsel_eqz', a, b, c), "options->has_icsel_eqz{} && !options->no_integers".format(s)),
|
||||||
|
(('bcsel@{}'.format(s), ('ine', 0, 'a@{}'.format(s)), 'b@{}'.format(s), 'c@{}'.format(s)), ('icsel_eqz', a, c, b), "options->has_icsel_eqz{} && !options->no_integers".format(s)),
|
||||||
|
])
|
||||||
|
|
||||||
distribute_src_mods = [
|
distribute_src_mods = [
|
||||||
# Try to remove some spurious negations rather than pushing them down.
|
# Try to remove some spurious negations rather than pushing them down.
|
||||||
(('fmul', ('fneg', a), ('fneg', b)), ('fmul', a, b)),
|
(('fmul', ('fneg', a), ('fneg', b)), ('fmul', a, b)),
|
||||||
|
|
|
||||||
|
|
@ -98,6 +98,8 @@ static const nir_shader_compiler_options ir3_base_options = {
|
||||||
.lower_pack_split = true,
|
.lower_pack_split = true,
|
||||||
.lower_to_scalar = true,
|
.lower_to_scalar = true,
|
||||||
.has_imul24 = true,
|
.has_imul24 = true,
|
||||||
|
.has_icsel_eqz32 = true,
|
||||||
|
.has_icsel_eqz16 = true,
|
||||||
.has_fsub = true,
|
.has_fsub = true,
|
||||||
.has_isub = true,
|
.has_isub = true,
|
||||||
.force_indirect_unrolling_sampler = true,
|
.force_indirect_unrolling_sampler = true,
|
||||||
|
|
|
||||||
|
|
@ -922,6 +922,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
|
||||||
set_cat2_condition(dst.rpts, dst_sz, IR3_COND_GE);
|
set_cat2_condition(dst.rpts, dst_sz, IR3_COND_GE);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case nir_op_icsel_eqz:
|
||||||
case nir_op_bcsel: {
|
case nir_op_bcsel: {
|
||||||
struct ir3_instruction_rpt conds;
|
struct ir3_instruction_rpt conds;
|
||||||
|
|
||||||
|
|
@ -965,12 +966,19 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
|
||||||
conds.rpts[rpt] = cond;
|
conds.rpts[rpt] = cond;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (alu->op == nir_op_icsel_eqz) {
|
||||||
|
struct ir3_instruction_rpt tmp = src[1];
|
||||||
|
src[1] = src[2];
|
||||||
|
src[2] = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
if (is_half(src[1].rpts[0]))
|
if (is_half(src[1].rpts[0]))
|
||||||
dst = ir3_SEL_B16_rpt(b, dst_sz, src[1], 0, conds, 0, src[2], 0);
|
dst = ir3_SEL_B16_rpt(b, dst_sz, src[1], 0, conds, 0, src[2], 0);
|
||||||
else
|
else
|
||||||
dst = ir3_SEL_B32_rpt(b, dst_sz, src[1], 0, conds, 0, src[2], 0);
|
dst = ir3_SEL_B32_rpt(b, dst_sz, src[1], 0, conds, 0, src[2], 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case nir_op_bit_count: {
|
case nir_op_bit_count: {
|
||||||
if (ctx->compiler->gen < 5 ||
|
if (ctx->compiler->gen < 5 ||
|
||||||
(src[0].rpts[0]->dsts[0]->flags & IR3_REG_HALF)) {
|
(src[0].rpts[0]->dsts[0]->flags & IR3_REG_HALF)) {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue