pan/bi: Propagate MKVEC.v2i8 and V2X8_TO_V2X16 for replicate swizzle

On Valhall, we can end up with a lot of convertions for 8-bit and 16-bit
values.

However, since Valhall, we have access to a lot more swizzles on widen
sources.

The idea of this pass is to propagate replicate swizzle usages to
simplify things.

We do not attempt to propagate MKVEC.v2i16 as it is already handled by
bi_lower_swizzle.

This changes the following:
   9 = V2S8_TO_V2S16 !7.b0
   11 = IADD.v2s16 !9.h00, u4
   88 = MKVEC.v2i8 11.b0, u256.b0, u256
   13 = IMUL.v4i8 !88.b0, 8.b0
   14 = V2S8_TO_V2S16 !13.b0
   15 = IADD.v2s16 14.h00, !11.h00
   89 = MKVEC.v2i8 !15.b0, u256.b0, u256
   17 = IMUL.v4i8 !89.b0, !8.b0

Into this:
   11 = IADD.v2s16 !7.b0, u4
   13 = IMUL.v4i8 11.b0, 8.b0
   15 = IADD.v2s16 13.b0, !11.h00
   17 = IMUL.v4i8 !15.b0, !8.b0

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Olivia Lee <olivia.lee@collabora.com>
Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37167>
This commit is contained in:
Mary Guillemard 2025-09-03 15:51:46 +00:00 committed by Marge Bot
parent 59e0a15c47
commit 67a662ed05
3 changed files with 86 additions and 29 deletions

View file

@ -33,20 +33,6 @@
* recombine swizzles where we can as an optimization.
*/
static bool
bi_swizzle_replicates_8(enum bi_swizzle swz)
{
switch (swz) {
case BI_SWIZZLE_B0000:
case BI_SWIZZLE_B1111:
case BI_SWIZZLE_B2222:
case BI_SWIZZLE_B3333:
return true;
default:
return false;
}
}
static uint32_t
va_op_swizzles(enum bi_opcode op, unsigned src)
{
@ -185,21 +171,6 @@ lower_swizzle(bi_context *ctx, bi_instr *ins, unsigned src)
ins->src[src].swizzle = BI_SWIZZLE_H01;
}
static bool
bi_swizzle_replicates_16(enum bi_swizzle swz)
{
switch (swz) {
case BI_SWIZZLE_H00:
case BI_SWIZZLE_H11:
return true;
default:
/* If a swizzle replicates every 8-bits, it also replicates
* every 16-bits, so allow 8-bit replicating swizzles.
*/
return bi_swizzle_replicates_8(swz);
}
}
static bool
bi_instr_replicates(bi_instr *I, BITSET_WORD *replicates_16)
{

View file

@ -205,6 +205,35 @@ bi_apply_swizzle(uint32_t value, enum bi_swizzle swz)
UNREACHABLE("Invalid swizzle");
}
static inline bool
bi_swizzle_replicates_8(enum bi_swizzle swz)
{
switch (swz) {
case BI_SWIZZLE_B0000:
case BI_SWIZZLE_B1111:
case BI_SWIZZLE_B2222:
case BI_SWIZZLE_B3333:
return true;
default:
return false;
}
}
static inline bool
bi_swizzle_replicates_16(enum bi_swizzle swz)
{
switch (swz) {
case BI_SWIZZLE_H00:
case BI_SWIZZLE_H11:
return true;
default:
/* If a swizzle replicates every 8-bits, it also replicates
* every 16-bits, so allow 8-bit replicating swizzles.
*/
return bi_swizzle_replicates_8(swz);
}
}
enum bi_index_type {
BI_INDEX_NULL = 0,
BI_INDEX_NORMAL = 1,

View file

@ -299,6 +299,62 @@ va_fuse_cmp(bi_context *ctx, bi_instr **lut, const BITSET_WORD *multiple,
return true;
}
static bool
va_propagate_replicate_wide(bi_context *ctx, bi_instr **lut, bi_instr *I)
{
struct va_opcode_info info = valhall_opcodes[I->op];
bool progress = false;
bi_foreach_ssa_src(I, s) {
if (!info.srcs[s].widen)
continue;
bi_index *src = &I->src[s];
bi_instr *src_ins = lut[src->value];
assert(src_ins && "src has no corresponding instruction");
bi_index new_src = bi_null();
unsigned tmp[4];
/* If we have a MKVEC.v2i8 and current instruction only replicate, we
* should propagate */
if (src_ins->op == BI_OPCODE_MKVEC_V2I8 &&
bi_swizzle_replicates_8(src->swizzle) &&
bi_swizzle_to_byte_channels(src->swizzle, tmp)) {
unsigned byte_idx = *tmp;
/* In case of the top 16-bit, src2 contains the value we want without
* any swizzles */
if (byte_idx >= 2) {
/* src2 should not have non identity swizzle */
assert(src_ins->src[2].swizzle == BI_SWIZZLE_H01);
new_src = src_ins->src[2];
new_src.swizzle = BI_SWIZZLE_B0 + (byte_idx - 2);
} else {
new_src = src_ins->src[byte_idx];
}
}
/* In case of 16-bit source, attempt to propagate trivial conversions from
8-bit */
else if (bi_swizzle_replicates_16(src->swizzle) &&
!bi_swizzle_replicates_8(src->swizzle) &&
((src_ins->op == BI_OPCODE_V2S8_TO_V2S16 && info.is_signed) ||
(src_ins->op == BI_OPCODE_V2U8_TO_V2U16 && !info.is_signed)) &&
bi_swizzle_replicates_8(src_ins->src[0].swizzle)) {
new_src = src_ins->src[0];
}
if (!bi_is_null(new_src)) {
*src = new_src;
progress = true;
}
}
return progress;
}
static void
va_optimize_forward(bi_context *ctx)
{
@ -330,6 +386,7 @@ va_optimize_forward(bi_context *ctx)
}
bi_foreach_instr_global_safe(ctx, I) {
progress |= va_propagate_replicate_wide(ctx, lut, I);
progress |= va_fuse_cmp(ctx, lut, multiple, I);
}