pan/bi: Emit MKVEC directly

Now that we have bi_lower_mkvec_swz(), there's no need to be so careful
in the NIR -> bi translation.  We can just emit MKVEC and move on.  The
lowering pass will sort out the detaisl.

Reviewed-by: Lorenzo Rossi <lorenzo.rossi@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40720>
This commit is contained in:
Faith Ekstrand 2026-03-28 22:34:14 -04:00 committed by Marge Bot
parent b9e33c7897
commit a4e9002660
2 changed files with 38 additions and 93 deletions

View file

@ -758,80 +758,6 @@ bi_emit_load_var_buf(bi_builder *b, nir_intrinsic_instr *intr)
bi_split_def(b, &intr->def);
}
static bi_index
bi_make_vec8_helper(bi_builder *b, bi_index *src, unsigned *channel,
unsigned count)
{
assert(1 <= count && count <= 4);
bi_index bytes[4] = {bi_imm_u8(0), bi_imm_u8(0), bi_imm_u8(0), bi_imm_u8(0)};
for (unsigned i = 0; i < count; ++i) {
unsigned chan = channel ? channel[i] : 0;
unsigned lane = chan & 3;
bi_index raw_data = bi_extract(b, src[i], chan >> 2);
/* On Bifrost, MKVEC.v4i8 cannot select b1 or b3 */
if (b->shader->arch < 9 && lane != 0 && lane != 2) {
bytes[i] = bi_byte(bi_rshift_or(b, 32, raw_data, bi_zero(),
bi_imm_u8(lane * 8), false),
0);
} else {
bytes[i] = bi_byte(raw_data, lane);
}
assert(b->shader->arch >= 9 || bytes[i].swizzle == BI_SWIZZLE_B0 ||
bytes[i].swizzle == BI_SWIZZLE_B2);
}
if (b->shader->arch >= 9) {
bi_index vec = bi_zero();
if (count >= 3) {
if ((count == 3 && bytes[2].swizzle == BI_SWIZZLE_B0) ||
(count == 4 && bi_is_word_equiv(bytes[2], bytes[3]) &&
bytes[2].swizzle == BI_SWIZZLE_B0 &&
bytes[3].swizzle == BI_SWIZZLE_B1)) {
vec = bytes[2];
vec.swizzle = BI_SWIZZLE_B0123;
} else {
vec = bi_mkvec_v2i8(b, bytes[2], bytes[3], vec);
}
}
return bi_mkvec_v2i8(b, bytes[0], bytes[1], vec);
} else {
return bi_mkvec_v4i8(b, bytes[0], bytes[1], bytes[2], bytes[3]);
}
}
static bi_index
bi_make_vec16_helper(bi_builder *b, bi_index *src, unsigned *channel,
unsigned count)
{
unsigned chan0 = channel ? channel[0] : 0;
bi_index w0 = bi_extract(b, src[0], chan0 >> 1);
bi_index h0 = bi_half(w0, chan0 & 1);
/* Zero extend */
if (count == 1)
return bi_mkvec_v2i16(b, h0, bi_imm_u16(0));
/* Else, create a vector */
assert(count == 2);
unsigned chan1 = channel ? channel[1] : 0;
bi_index w1 = bi_extract(b, src[1], chan1 >> 1);
bi_index h1 = bi_half(w1, chan1 & 1);
if (bi_is_word_equiv(w0, w1) && (chan0 & 1) == 0 && ((chan1 & 1) == 1))
return bi_mov_i32(b, w0);
else if (bi_is_word_equiv(w0, w1))
return bi_swz_v2i16(b, bi_swz_16(w0, chan0 & 1, chan1 & 1));
else
return bi_mkvec_v2i16(b, h0, h1);
}
bi_instr *
bi_make_vec_to(bi_builder *b, bi_index dst, bi_index *src, unsigned *channel,
unsigned count, unsigned bitsize)
@ -839,6 +765,7 @@ bi_make_vec_to(bi_builder *b, bi_index dst, bi_index *src, unsigned *channel,
assert(DIV_ROUND_UP(count * bitsize, 32) <= BI_MAX_SRCS &&
"unnecessarily large vector should have been lowered");
assert(count <= BI_MAX_VEC);
bi_index srcs[BI_MAX_VEC];
if (bitsize == 64) {
@ -848,25 +775,43 @@ bi_make_vec_to(bi_builder *b, bi_index dst, bi_index *src, unsigned *channel,
srcs[i * 2 + 1] = bi_extract(b, src[i], c * 2 + 1);
}
return bi_emit_collect_to(b, dst, srcs, count * 2);
} else if (bitsize == 32) {
for (unsigned i = 0; i < count; i++) {
const unsigned c = channel ? channel[i] : 0;
srcs[i] = bi_extract(b, src[i], c);
}
return bi_emit_collect_to(b, dst, srcs, count);
} else if (bitsize == 16) {
for (unsigned i = 0; i < count; i++) {
const unsigned c = channel ? channel[i] : 0;
srcs[i] = bi_half(bi_extract(b, src[i], c >> 1), c & 1);
}
for (unsigned i = count; i < align(count, 2); i++)
srcs[i] = bi_imm_u16(0);
for (unsigned i = 0; i < count; i += 2)
srcs[i / 2] = bi_mkvec_v2i16(b, srcs[i], srcs[i + 1]);
return bi_emit_collect_to(b, dst, srcs, DIV_ROUND_UP(count, 2));
} else if (bitsize == 8) {
for (unsigned i = 0; i < count; i++) {
const unsigned c = channel ? channel[i] : 0;
srcs[i] = bi_byte(bi_extract(b, src[i], c >> 2), c & 3);
}
for (unsigned i = count; i < align(count, 4); i++)
srcs[i] = bi_imm_u8(0);
for (unsigned i = 0; i < count; i += 4) {
srcs[i / 4] = bi_mkvec_v4i8(b, srcs[i], srcs[i + 1],
srcs[i + 2], srcs[i + 3]);
}
return bi_emit_collect_to(b, dst, srcs, DIV_ROUND_UP(count, 4));
} else {
UNREACHABLE("Unsupported bit size");
}
assert(bitsize == 8 || bitsize == 16 || bitsize == 32);
unsigned shift = (bitsize == 32) ? 0 : (bitsize == 16) ? 1 : 2;
unsigned chan_per_word = 1 << shift;
for (unsigned i = 0; i < count; i += chan_per_word) {
unsigned rem = MIN2(count - i, chan_per_word);
unsigned *channel_offset = channel ? (channel + i) : NULL;
if (bitsize == 32)
srcs[i] = bi_extract(b, src[i], channel_offset ? *channel_offset : 0);
else if (bitsize == 16)
srcs[i >> 1] = bi_make_vec16_helper(b, src + i, channel_offset, rem);
else
srcs[i >> 2] = bi_make_vec8_helper(b, src + i, channel_offset, rem);
}
return bi_emit_collect_to(b, dst, srcs, DIV_ROUND_UP(count, chan_per_word));
}
static inline bi_instr *

View file

@ -574,7 +574,7 @@ bi_is_value_equiv(bi_index left, bi_index right)
}
}
#define BI_MAX_VEC 8
#define BI_MAX_VEC 16
#define BI_MAX_DESTS 4
#define BI_MAX_SRCS 8