mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 10:40:11 +01:00
pan/bi: Propagate MKVEC.v2i8 and V2X8_TO_V2X16 for replicate swizzle
On Valhall, we can end up with a lot of convertions for 8-bit and 16-bit values. However, since Valhall, we have access to a lot more swizzles on widen sources. The idea of this pass is to propagate replicate swizzle usages to simplify things. We do not attempt to propagate MKVEC.v2i16 as it is already handled by bi_lower_swizzle. This changes the following: 9 = V2S8_TO_V2S16 !7.b0 11 = IADD.v2s16 !9.h00, u4 88 = MKVEC.v2i8 11.b0, u256.b0, u256 13 = IMUL.v4i8 !88.b0, 8.b0 14 = V2S8_TO_V2S16 !13.b0 15 = IADD.v2s16 14.h00, !11.h00 89 = MKVEC.v2i8 !15.b0, u256.b0, u256 17 = IMUL.v4i8 !89.b0, !8.b0 Into this: 11 = IADD.v2s16 !7.b0, u4 13 = IMUL.v4i8 11.b0, 8.b0 15 = IADD.v2s16 13.b0, !11.h00 17 = IMUL.v4i8 !15.b0, !8.b0 Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com> Reviewed-by: Olivia Lee <olivia.lee@collabora.com> Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37167>
This commit is contained in:
parent
59e0a15c47
commit
67a662ed05
3 changed files with 86 additions and 29 deletions
|
|
@ -33,20 +33,6 @@
|
|||
* recombine swizzles where we can as an optimization.
|
||||
*/
|
||||
|
||||
static bool
|
||||
bi_swizzle_replicates_8(enum bi_swizzle swz)
|
||||
{
|
||||
switch (swz) {
|
||||
case BI_SWIZZLE_B0000:
|
||||
case BI_SWIZZLE_B1111:
|
||||
case BI_SWIZZLE_B2222:
|
||||
case BI_SWIZZLE_B3333:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
va_op_swizzles(enum bi_opcode op, unsigned src)
|
||||
{
|
||||
|
|
@ -185,21 +171,6 @@ lower_swizzle(bi_context *ctx, bi_instr *ins, unsigned src)
|
|||
ins->src[src].swizzle = BI_SWIZZLE_H01;
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_swizzle_replicates_16(enum bi_swizzle swz)
|
||||
{
|
||||
switch (swz) {
|
||||
case BI_SWIZZLE_H00:
|
||||
case BI_SWIZZLE_H11:
|
||||
return true;
|
||||
default:
|
||||
/* If a swizzle replicates every 8-bits, it also replicates
|
||||
* every 16-bits, so allow 8-bit replicating swizzles.
|
||||
*/
|
||||
return bi_swizzle_replicates_8(swz);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_instr_replicates(bi_instr *I, BITSET_WORD *replicates_16)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -205,6 +205,35 @@ bi_apply_swizzle(uint32_t value, enum bi_swizzle swz)
|
|||
UNREACHABLE("Invalid swizzle");
|
||||
}
|
||||
|
||||
static inline bool
|
||||
bi_swizzle_replicates_8(enum bi_swizzle swz)
|
||||
{
|
||||
switch (swz) {
|
||||
case BI_SWIZZLE_B0000:
|
||||
case BI_SWIZZLE_B1111:
|
||||
case BI_SWIZZLE_B2222:
|
||||
case BI_SWIZZLE_B3333:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool
|
||||
bi_swizzle_replicates_16(enum bi_swizzle swz)
|
||||
{
|
||||
switch (swz) {
|
||||
case BI_SWIZZLE_H00:
|
||||
case BI_SWIZZLE_H11:
|
||||
return true;
|
||||
default:
|
||||
/* If a swizzle replicates every 8-bits, it also replicates
|
||||
* every 16-bits, so allow 8-bit replicating swizzles.
|
||||
*/
|
||||
return bi_swizzle_replicates_8(swz);
|
||||
}
|
||||
}
|
||||
|
||||
enum bi_index_type {
|
||||
BI_INDEX_NULL = 0,
|
||||
BI_INDEX_NORMAL = 1,
|
||||
|
|
|
|||
|
|
@ -299,6 +299,62 @@ va_fuse_cmp(bi_context *ctx, bi_instr **lut, const BITSET_WORD *multiple,
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
va_propagate_replicate_wide(bi_context *ctx, bi_instr **lut, bi_instr *I)
|
||||
{
|
||||
struct va_opcode_info info = valhall_opcodes[I->op];
|
||||
bool progress = false;
|
||||
|
||||
bi_foreach_ssa_src(I, s) {
|
||||
if (!info.srcs[s].widen)
|
||||
continue;
|
||||
|
||||
bi_index *src = &I->src[s];
|
||||
bi_instr *src_ins = lut[src->value];
|
||||
|
||||
assert(src_ins && "src has no corresponding instruction");
|
||||
|
||||
bi_index new_src = bi_null();
|
||||
unsigned tmp[4];
|
||||
|
||||
/* If we have a MKVEC.v2i8 and current instruction only replicate, we
|
||||
* should propagate */
|
||||
if (src_ins->op == BI_OPCODE_MKVEC_V2I8 &&
|
||||
bi_swizzle_replicates_8(src->swizzle) &&
|
||||
bi_swizzle_to_byte_channels(src->swizzle, tmp)) {
|
||||
unsigned byte_idx = *tmp;
|
||||
|
||||
/* In case of the top 16-bit, src2 contains the value we want without
|
||||
* any swizzles */
|
||||
if (byte_idx >= 2) {
|
||||
/* src2 should not have non identity swizzle */
|
||||
assert(src_ins->src[2].swizzle == BI_SWIZZLE_H01);
|
||||
|
||||
new_src = src_ins->src[2];
|
||||
new_src.swizzle = BI_SWIZZLE_B0 + (byte_idx - 2);
|
||||
} else {
|
||||
new_src = src_ins->src[byte_idx];
|
||||
}
|
||||
}
|
||||
/* In case of 16-bit source, attempt to propagate trivial conversions from
|
||||
8-bit */
|
||||
else if (bi_swizzle_replicates_16(src->swizzle) &&
|
||||
!bi_swizzle_replicates_8(src->swizzle) &&
|
||||
((src_ins->op == BI_OPCODE_V2S8_TO_V2S16 && info.is_signed) ||
|
||||
(src_ins->op == BI_OPCODE_V2U8_TO_V2U16 && !info.is_signed)) &&
|
||||
bi_swizzle_replicates_8(src_ins->src[0].swizzle)) {
|
||||
new_src = src_ins->src[0];
|
||||
}
|
||||
|
||||
if (!bi_is_null(new_src)) {
|
||||
*src = new_src;
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
static void
|
||||
va_optimize_forward(bi_context *ctx)
|
||||
{
|
||||
|
|
@ -330,6 +386,7 @@ va_optimize_forward(bi_context *ctx)
|
|||
}
|
||||
|
||||
bi_foreach_instr_global_safe(ctx, I) {
|
||||
progress |= va_propagate_replicate_wide(ctx, lut, I);
|
||||
progress |= va_fuse_cmp(ctx, lut, multiple, I);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue