From 501a66cb5ce76ffca6aa663ea9f8b31d2cd71f07 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 6 Jan 2022 17:12:56 -0500 Subject: [PATCH] pan/bi: Fuse result types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In NIR, comparison instructions always produce 0/~0 results. For other result types, a separate b2f32 or b2i32 instruction is used to transform the result. However, Mali's comparison instructions have modifiers for these alternate result types, so we can implement expressions like int(a < b) and float(a == b) in single instruction. Add a peephole optimization to fuse comparisons with result type transformations. Results on Mali-G52: total instructions in shared programs: 2439696 -> 2434339 (-0.22%) instructions in affected programs: 418703 -> 413346 (-1.28%) helped: 1630 HURT: 0 helped stats (abs) min: 1.0 max: 28.0 x̄: 3.29 x̃: 2 helped stats (rel) min: 0.11% max: 19.35% x̄: 1.64% x̃: 1.39% 95% mean confidence interval for instructions value: -3.44 -3.13 95% mean confidence interval for instructions %-change: -1.72% -1.56% Instructions are helped. total tuples in shared programs: 1946581 -> 1943005 (-0.18%) tuples in affected programs: 251742 -> 248166 (-1.42%) helped: 1113 HURT: 11 helped stats (abs) min: 1.0 max: 32.0 x̄: 3.23 x̃: 2 helped stats (rel) min: 0.17% max: 15.38% x̄: 1.80% x̃: 1.38% HURT stats (abs) min: 1.0 max: 2.0 x̄: 1.45 x̃: 1 HURT stats (rel) min: 0.21% max: 3.12% x̄: 1.23% x̃: 0.89% 95% mean confidence interval for tuples value: -3.35 -3.01 95% mean confidence interval for tuples %-change: -1.88% -1.66% Tuples are helped. total clauses in shared programs: 357791 -> 357349 (-0.12%) clauses in affected programs: 15879 -> 15437 (-2.78%) helped: 371 HURT: 3 helped stats (abs) min: 1.0 max: 8.0 x̄: 1.20 x̃: 1 helped stats (rel) min: 0.80% max: 33.33% x̄: 3.85% x̃: 2.17% HURT stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1 HURT stats (rel) min: 2.94% max: 5.26% x̄: 4.49% x̃: 5.26% 95% mean confidence interval for clauses value: -1.27 -1.09 95% mean confidence interval for clauses %-change: -4.21% -3.36% Clauses are helped. total cycles in shared programs: 167922.04 -> 167810.71 (-0.07%) cycles in affected programs: 6772.08 -> 6660.75 (-1.64%) helped: 655 HURT: 12 helped stats (abs) min: 0.041665999999999315 max: 1.3333319999999986 x̄: 0.17 x̃: 0 helped stats (rel) min: 0.18% max: 20.00% x̄: 2.02% x̃: 1.60% HURT stats (abs) min: 0.041665999999999315 max: 0.125 x̄: 0.05 x̃: 0 HURT stats (rel) min: 0.21% max: 3.80% x̄: 1.23% x̃: 0.88% 95% mean confidence interval for cycles value: -0.18 -0.16 95% mean confidence interval for cycles %-change: -2.10% -1.81% Cycles are helped. total arith in shared programs: 74393.17 -> 74243.08 (-0.20%) arith in affected programs: 10157.50 -> 10007.42 (-1.48%) helped: 1129 HURT: 12 helped stats (abs) min: 0.041665999999999315 max: 1.3333319999999986 x̄: 0.13 x̃: 0 helped stats (rel) min: 0.18% max: 50.00% x̄: 1.94% x̃: 1.40% HURT stats (abs) min: 0.041665999999999315 max: 0.125 x̄: 0.05 x̃: 0 HURT stats (rel) min: 0.21% max: 3.80% x̄: 1.23% x̃: 0.88% 95% mean confidence interval for arith value: -0.14 -0.12 95% mean confidence interval for arith %-change: -2.06% -1.76% Arith are helped. total quadwords in shared programs: 1692019 -> 1688164 (-0.23%) quadwords in affected programs: 216669 -> 212814 (-1.78%) helped: 1148 HURT: 11 helped stats (abs) min: 1.0 max: 41.0 x̄: 3.37 x̃: 2 helped stats (rel) min: 0.17% max: 17.24% x̄: 2.25% x̃: 1.73% HURT stats (abs) min: 1.0 max: 2.0 x̄: 1.09 x̃: 1 HURT stats (rel) min: 0.60% max: 1.32% x̄: 0.85% x̃: 0.83% 95% mean confidence interval for quadwords value: -3.49 -3.16 95% mean confidence interval for quadwords %-change: -2.33% -2.10% Quadwords are helped. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bi_opt_mod_props.c | 80 ++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/src/panfrost/bifrost/bi_opt_mod_props.c b/src/panfrost/bifrost/bi_opt_mod_props.c index b0458b362e1..5e9b76df3ae 100644 --- a/src/panfrost/bifrost/bi_opt_mod_props.c +++ b/src/panfrost/bifrost/bi_opt_mod_props.c @@ -228,6 +228,83 @@ bi_optimizer_clamp(bi_instr *I, bi_instr *use) return true; } +static enum bi_opcode +bi_sized_mux_op(unsigned size) +{ + switch (size) { + case 8: return BI_OPCODE_MUX_V4I8; + case 16: return BI_OPCODE_MUX_V2I16; + case 32: return BI_OPCODE_MUX_I32; + default: unreachable("invalid size"); + } +} + +static bool +bi_is_fixed_mux(bi_instr *I, unsigned size, bi_index v1) +{ + return I->op == bi_sized_mux_op(size) && + bi_is_value_equiv(I->src[0], bi_zero()) && + bi_is_value_equiv(I->src[1], v1); +} + +static bool +bi_takes_int_result_type(enum bi_opcode op) +{ + switch (op) { + case BI_OPCODE_ICMP_I32: + case BI_OPCODE_ICMP_S32: + case BI_OPCODE_ICMP_U32: + case BI_OPCODE_ICMP_V2I16: + case BI_OPCODE_ICMP_V2S16: + case BI_OPCODE_ICMP_V2U16: + case BI_OPCODE_ICMP_V4I8: + case BI_OPCODE_ICMP_V4S8: + case BI_OPCODE_ICMP_V4U8: + case BI_OPCODE_FCMP_F32: + case BI_OPCODE_FCMP_V2F16: + return true; + default: + return false; + } +} + +static bool +bi_takes_float_result_type(enum bi_opcode op) +{ + return (op == BI_OPCODE_FCMP_F32) || + (op == BI_OPCODE_FCMP_V2F16); +} + +/* CMP+MUX -> CMP with result type */ +static bool +bi_optimizer_result_type(bi_instr *I, bi_instr *mux) +{ + if (bi_opcode_props[I->op].size != bi_opcode_props[mux->op].size) + return false; + + if (bi_is_fixed_mux(mux, 32, bi_imm_f32(1.0)) || + bi_is_fixed_mux(mux, 16, bi_imm_f16(1.0))) { + + if (!bi_takes_float_result_type(I->op)) + return false; + + I->result_type = BI_RESULT_TYPE_F1; + } else if (bi_is_fixed_mux(mux, 32, bi_imm_u32(1)) || + bi_is_fixed_mux(mux, 16, bi_imm_u16(1)) || + bi_is_fixed_mux(mux, 8, bi_imm_u8(1))) { + + if (!bi_takes_int_result_type(I->op)) + return false; + + I->result_type = BI_RESULT_TYPE_I1; + } else { + return false; + } + + I->dest[0] = mux->dest[0]; + return true; +} + static bool bi_is_var_tex(bi_instr *var, bi_instr *tex) { @@ -289,7 +366,8 @@ bi_opt_mod_prop_backward(bi_context *ctx) /* Destination has a single use, try to propagate */ bool propagated = - bi_optimizer_clamp(I, use); + bi_optimizer_clamp(I, use) || + bi_optimizer_result_type(I, use); if (!propagated && I->op == BI_OPCODE_LD_VAR_IMM && use->op == BI_OPCODE_SPLIT_I32) { /* Need to see through the split in a