pan/bi: Pack 8-bit vec2s

We used to splat out 8-bit vec2s to 16-bit by repeating both 8-bit
halves twice with the B0011 swizzle.  I think the original idea here was
that 16-bit swizzles were more widely available in the hardware and that
this would make swizzling things easier.  The problem is that nothing
actually knows that the value is half-repeated like this so nothing
knows it can upgrade a swizzle from B0022 to B0123 (H01).  So instead we
get a bunch of B0022 swizzles, which nothing supports.

We can shave a lot of instructions if we just stop trying to be so
clever and instead repeat the whole thing with a B0101 swizzle.

The only real issue here is that v2[fiu]8_to_v2[fiu]16 needs a B0011
swizzle, which we have to apply on-the-fly.  Fortunately, any swizzle
can be composed with B0011.

Reviewed-by: Lorenzo Rossi <lorenzo.rossi@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40720>
This commit is contained in:
Faith Ekstrand 2026-03-30 15:42:54 -04:00 committed by Marge Bot
parent db8cb73b34
commit 15d5675e8e

View file

@ -2307,35 +2307,49 @@ bi_alu_src_index(bi_builder *b, nir_alu_src src, unsigned comps)
} else if (bitsize == 8 && comps == 1) {
idx.swizzle = BI_SWIZZLE_B0000 + (src.swizzle[0] & 3);
} else if (bitsize == 8) {
if (comps == 2 || comps == 4) {
/* For a vec2, place the two components in 0 and 2 instead of
* 0 and 1. For a scalar, splat it out to all channels.
*/
unsigned c[4] = {0};
for (unsigned i = 0; i < 4; ++i)
c[i] = src.swizzle[i * comps / 4] & 3;
bool has_swizzle = false;
enum bi_swizzle swizzle = BI_SWIZZLE_H01;
if (comps == 3) {
unsigned c[4];
for (unsigned i = 0; i < 3; ++i)
c[i] = src.swizzle[i] & 3;
enum bi_swizzle swizzle;
if (bi_swizzle_from_byte_channels(c, &swizzle)) {
idx.swizzle = swizzle;
return idx;
/* Try to find a swizzle that starts with the given v3i8 swizzle */
for (unsigned i = 0; i < 4; i++) {
c[3] = i;
if (bi_swizzle_from_byte_channels(c, &swizzle)) {
has_swizzle = true;
break;
}
}
} else {
/* For 1 and 2-component, repeat the swizzle to increase the chances
* that it's a valid bi_swizzle.
*/
unsigned c[4];
for (unsigned i = 0; i < 4; ++i)
c[i] = src.swizzle[i % comps] & 3;
has_swizzle = bi_swizzle_from_byte_channels(c, &swizzle);
}
/* XXX: Use optimized swizzle when posisble */
bi_index unoffset_srcs[NIR_MAX_VEC_COMPONENTS] = {bi_null()};
unsigned channels[NIR_MAX_VEC_COMPONENTS] = {0};
for (unsigned i = 0; i < comps; ++i) {
unoffset_srcs[i] = bi_src_index(&src.src);
channels[i] = src.swizzle[i];
if (has_swizzle) {
idx.swizzle = swizzle;
return idx;
}
bi_index temp = bi_temp(b->shader);
bi_make_vec_to(b, temp, unoffset_srcs, channels, comps, bitsize);
bi_index v4_srcs[4];
for (unsigned i = 0; i < comps; i++) {
v4_srcs[i] = idx;
v4_srcs[i].swizzle = BI_SWIZZLE_B0 + src.swizzle[i];
}
for (unsigned i = comps; i < 4; i++)
v4_srcs[i] = bi_imm_u8(0);
bi_index temp = bi_mkvec_v4i8(b, v4_srcs[0], v4_srcs[1],
v4_srcs[2], v4_srcs[3]);
static const enum bi_swizzle swizzle_lut[] = {
BI_SWIZZLE_B0000, BI_SWIZZLE_B0011, BI_SWIZZLE_B0123, BI_SWIZZLE_B0123
BI_SWIZZLE_B0000, BI_SWIZZLE_B0101, BI_SWIZZLE_B0123, BI_SWIZZLE_B0123
};
assert(comps - 1 < ARRAY_SIZE(swizzle_lut));
@ -2348,6 +2362,17 @@ bi_alu_src_index(bi_builder *b, nir_alu_src src, unsigned comps)
return idx;
}
static bi_index
bi_swiz_b01(bi_index idx)
{
enum bi_swizzle swizzle;
bool valid = bi_try_compose_swizzles(&swizzle, BI_SWIZZLE_B01, idx.swizzle);
assert(valid);
idx.swizzle = swizzle;
return idx;
}
static enum bi_round
bi_nir_round(nir_op op)
{
@ -2865,12 +2890,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
bi_index s2 =
srcs > 2 ? bi_alu_src_index(b, instr->src[2], comps) : bi_null();
bool need_post_swizzle = sz == 8 && comps == 2;
bi_index post_swizzle_dst = dst;
if (need_post_swizzle) {
dst = bi_temp(b->shader);
}
switch (instr->op) {
case nir_op_ffma:
bi_fma_to(b, sz, dst, s0, s1, s2);
@ -3148,7 +3167,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
if (src_sz == 16)
bi_v2u16_to_v2f16_to(b, dst, s0);
else if (src_sz == 8)
bi_v2u8_to_v2f16_to(b, dst, s0);
bi_v2u8_to_v2f16_to(b, dst, bi_swiz_b01(s0));
break;
case nir_op_u2f32:
@ -3174,7 +3193,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
if (src_sz == 16)
bi_v2s16_to_v2f16_to(b, dst, s0);
else if (src_sz == 8)
bi_v2s8_to_v2f16_to(b, dst, s0);
bi_v2s8_to_v2f16_to(b, dst, bi_swiz_b01(s0));
break;
case nir_op_i2f32:
@ -3216,7 +3235,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
assert(src_sz == 8 || src_sz == 32);
if (src_sz == 8)
bi_v2s8_to_v2s16_to(b, dst, s0);
bi_v2s8_to_v2s16_to(b, dst, bi_swiz_b01(s0));
else
bi_mov_i32_to(b, dst, s0);
break;
@ -3225,7 +3244,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
assert(src_sz == 8 || src_sz == 32);
if (src_sz == 8)
bi_v2u8_to_v2u16_to(b, dst, s0);
bi_v2u8_to_v2u16_to(b, dst, bi_swiz_b01(s0));
else
bi_mov_i32_to(b, dst, s0);
break;
@ -3440,12 +3459,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[instr->op].name);
UNREACHABLE("Unknown ALU op");
}
if (need_post_swizzle) {
bi_index srcs[2] = {dst, dst};
unsigned channels[2] = {0, 2};
bi_make_vec_to(b, post_swizzle_dst, srcs, channels, 2, 8);
}
}
/* Returns dimension with 0 special casing cubemaps. Shamelessly copied from