From 04a182417e21f31a2e4e8faa2e8109f4e1d3d524 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Fri, 25 Jul 2025 15:09:27 +0200 Subject: [PATCH] ir3/shared_ra: don't reuse src of different halfness Avoid reusing the src of an ALU/SFU instruction when its halfness is different from the dst. A difference in halfness would introduce a (ss) sync and unnecessarily increase ss-stall. Totals from 8261 (5.02% of 164705) affected shaders: Instrs: 10044160 -> 10044390 (+0.00%); split: -0.04%, +0.04% CodeSize: 19875094 -> 19879238 (+0.02%); split: -0.08%, +0.10% NOPs: 2249893 -> 2249273 (-0.03%); split: -0.14%, +0.11% MOVs: 426644 -> 426565 (-0.02%); split: -0.24%, +0.23% COVs: 134819 -> 134810 (-0.01%); split: -0.06%, +0.05% (ss): 264012 -> 260680 (-1.26%); split: -1.34%, +0.08% (sy): 122711 -> 122851 (+0.11%); split: -0.07%, +0.18% (ss)-stall: 1111161 -> 1100625 (-0.95%); split: -1.07%, +0.12% (sy)-stall: 3650422 -> 3651422 (+0.03%); split: -0.15%, +0.17% STPs: 8693 -> 8701 (+0.09%); split: -0.08%, +0.17% LDPs: 16814 -> 16815 (+0.01%); split: -0.10%, +0.11% Preamble Instrs: 2346201 -> 2351217 (+0.21%); split: -0.25%, +0.46% Last helper: 3417842 -> 3417889 (+0.00%); split: -0.07%, +0.08% Cat0: 2486420 -> 2485758 (-0.03%); split: -0.13%, +0.10% Cat1: 613403 -> 614417 (+0.17%); split: -0.21%, +0.37% Cat2: 3742181 -> 3742109 (-0.00%); split: -0.00%, +0.00% Cat6: 81897 -> 81906 (+0.01%); split: -0.03%, +0.04% Cat7: 254014 -> 253955 (-0.02%); split: -0.11%, +0.09% Note that the slight increase in Cat1 is mostly from lowered copies (to swz) and seems to be RA bad luck. Signed-off-by: Job Noorman Part-of: --- src/freedreno/ir3/ir3_shared_ra.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/freedreno/ir3/ir3_shared_ra.c b/src/freedreno/ir3/ir3_shared_ra.c index eb217a96e4e..3558b71b6a8 100644 --- a/src/freedreno/ir3/ir3_shared_ra.c +++ b/src/freedreno/ir3/ir3_shared_ra.c @@ -693,6 +693,12 @@ get_reg(struct ra_ctx *ctx, struct ir3_register *reg, bool src) struct ir3_register *src = reg->instr->srcs[i]; if (!ra_reg_is_src(src)) continue; + /* When src and dst are overlapping registers with different halfness, + * a (ss) sync is necessary. Avoid this to not unnecessarily increase + * ss-stall. + */ + if ((reg->flags & IR3_REG_HALF) != (src->flags & IR3_REG_HALF)) + continue; if ((src->flags & IR3_REG_SHARED) && reg_size(src) >= size) { struct ra_interval *src_interval = ra_interval_get(ctx, src->def); physreg_t src_physreg = ra_interval_get_physreg(src_interval);