From 67a662ed058a7ab95711f106d617de89c10cbdbd Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Wed, 3 Sep 2025 15:51:46 +0000 Subject: [PATCH] pan/bi: Propagate MKVEC.v2i8 and V2X8_TO_V2X16 for replicate swizzle On Valhall, we can end up with a lot of convertions for 8-bit and 16-bit values. However, since Valhall, we have access to a lot more swizzles on widen sources. The idea of this pass is to propagate replicate swizzle usages to simplify things. We do not attempt to propagate MKVEC.v2i16 as it is already handled by bi_lower_swizzle. This changes the following: 9 = V2S8_TO_V2S16 !7.b0 11 = IADD.v2s16 !9.h00, u4 88 = MKVEC.v2i8 11.b0, u256.b0, u256 13 = IMUL.v4i8 !88.b0, 8.b0 14 = V2S8_TO_V2S16 !13.b0 15 = IADD.v2s16 14.h00, !11.h00 89 = MKVEC.v2i8 !15.b0, u256.b0, u256 17 = IMUL.v4i8 !89.b0, !8.b0 Into this: 11 = IADD.v2s16 !7.b0, u4 13 = IMUL.v4i8 11.b0, 8.b0 15 = IADD.v2s16 13.b0, !11.h00 17 = IMUL.v4i8 !15.b0, !8.b0 Signed-off-by: Mary Guillemard Reviewed-by: Olivia Lee Reviewed-by: Christoph Pillmayer Part-of: --- src/panfrost/compiler/bi_lower_swizzle.c | 29 ----------- src/panfrost/compiler/compiler.h | 29 +++++++++++ src/panfrost/compiler/valhall/va_optimize.c | 57 +++++++++++++++++++++ 3 files changed, 86 insertions(+), 29 deletions(-) diff --git a/src/panfrost/compiler/bi_lower_swizzle.c b/src/panfrost/compiler/bi_lower_swizzle.c index d02059b33cf..4f686be28ae 100644 --- a/src/panfrost/compiler/bi_lower_swizzle.c +++ b/src/panfrost/compiler/bi_lower_swizzle.c @@ -33,20 +33,6 @@ * recombine swizzles where we can as an optimization. */ -static bool -bi_swizzle_replicates_8(enum bi_swizzle swz) -{ - switch (swz) { - case BI_SWIZZLE_B0000: - case BI_SWIZZLE_B1111: - case BI_SWIZZLE_B2222: - case BI_SWIZZLE_B3333: - return true; - default: - return false; - } -} - static uint32_t va_op_swizzles(enum bi_opcode op, unsigned src) { @@ -185,21 +171,6 @@ lower_swizzle(bi_context *ctx, bi_instr *ins, unsigned src) ins->src[src].swizzle = BI_SWIZZLE_H01; } -static bool -bi_swizzle_replicates_16(enum bi_swizzle swz) -{ - switch (swz) { - case BI_SWIZZLE_H00: - case BI_SWIZZLE_H11: - return true; - default: - /* If a swizzle replicates every 8-bits, it also replicates - * every 16-bits, so allow 8-bit replicating swizzles. - */ - return bi_swizzle_replicates_8(swz); - } -} - static bool bi_instr_replicates(bi_instr *I, BITSET_WORD *replicates_16) { diff --git a/src/panfrost/compiler/compiler.h b/src/panfrost/compiler/compiler.h index 0db46b846bc..63e3b5ab3d9 100644 --- a/src/panfrost/compiler/compiler.h +++ b/src/panfrost/compiler/compiler.h @@ -205,6 +205,35 @@ bi_apply_swizzle(uint32_t value, enum bi_swizzle swz) UNREACHABLE("Invalid swizzle"); } +static inline bool +bi_swizzle_replicates_8(enum bi_swizzle swz) +{ + switch (swz) { + case BI_SWIZZLE_B0000: + case BI_SWIZZLE_B1111: + case BI_SWIZZLE_B2222: + case BI_SWIZZLE_B3333: + return true; + default: + return false; + } +} + +static inline bool +bi_swizzle_replicates_16(enum bi_swizzle swz) +{ + switch (swz) { + case BI_SWIZZLE_H00: + case BI_SWIZZLE_H11: + return true; + default: + /* If a swizzle replicates every 8-bits, it also replicates + * every 16-bits, so allow 8-bit replicating swizzles. + */ + return bi_swizzle_replicates_8(swz); + } +} + enum bi_index_type { BI_INDEX_NULL = 0, BI_INDEX_NORMAL = 1, diff --git a/src/panfrost/compiler/valhall/va_optimize.c b/src/panfrost/compiler/valhall/va_optimize.c index a19a62ed582..c55c99e352f 100644 --- a/src/panfrost/compiler/valhall/va_optimize.c +++ b/src/panfrost/compiler/valhall/va_optimize.c @@ -299,6 +299,62 @@ va_fuse_cmp(bi_context *ctx, bi_instr **lut, const BITSET_WORD *multiple, return true; } +static bool +va_propagate_replicate_wide(bi_context *ctx, bi_instr **lut, bi_instr *I) +{ + struct va_opcode_info info = valhall_opcodes[I->op]; + bool progress = false; + + bi_foreach_ssa_src(I, s) { + if (!info.srcs[s].widen) + continue; + + bi_index *src = &I->src[s]; + bi_instr *src_ins = lut[src->value]; + + assert(src_ins && "src has no corresponding instruction"); + + bi_index new_src = bi_null(); + unsigned tmp[4]; + + /* If we have a MKVEC.v2i8 and current instruction only replicate, we + * should propagate */ + if (src_ins->op == BI_OPCODE_MKVEC_V2I8 && + bi_swizzle_replicates_8(src->swizzle) && + bi_swizzle_to_byte_channels(src->swizzle, tmp)) { + unsigned byte_idx = *tmp; + + /* In case of the top 16-bit, src2 contains the value we want without + * any swizzles */ + if (byte_idx >= 2) { + /* src2 should not have non identity swizzle */ + assert(src_ins->src[2].swizzle == BI_SWIZZLE_H01); + + new_src = src_ins->src[2]; + new_src.swizzle = BI_SWIZZLE_B0 + (byte_idx - 2); + } else { + new_src = src_ins->src[byte_idx]; + } + } + /* In case of 16-bit source, attempt to propagate trivial conversions from + 8-bit */ + else if (bi_swizzle_replicates_16(src->swizzle) && + !bi_swizzle_replicates_8(src->swizzle) && + ((src_ins->op == BI_OPCODE_V2S8_TO_V2S16 && info.is_signed) || + (src_ins->op == BI_OPCODE_V2U8_TO_V2U16 && !info.is_signed)) && + bi_swizzle_replicates_8(src_ins->src[0].swizzle)) { + new_src = src_ins->src[0]; + } + + if (!bi_is_null(new_src)) { + *src = new_src; + progress = true; + } + } + + return progress; +} + static void va_optimize_forward(bi_context *ctx) { @@ -330,6 +386,7 @@ va_optimize_forward(bi_context *ctx) } bi_foreach_instr_global_safe(ctx, I) { + progress |= va_propagate_replicate_wide(ctx, lut, I); progress |= va_fuse_cmp(ctx, lut, multiple, I); }