pan/bi: Implement general 8-bit vector construction

Refactor the 16-bit vector construction code to separate the collect of separate
32-bit words from the construction of single 32-bit vectors from sub-32-bit
parts. Then add a path to combine (up to 4) bytes into a 32-bit word using
Valhall's MKVEC.v2i8 instruction.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17220>
This commit is contained in:
Alyssa Rosenzweig 2022-06-22 14:33:43 -04:00 committed by Marge Bot
parent faaf4f6c45
commit 6b87a65e38

View file

@ -588,33 +588,60 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)
bi_copy_component(b, instr, dest);
}
static void
bi_make_vec16_to(bi_builder *b, bi_index dst, bi_index *src,
unsigned *channel, unsigned count)
static bi_index
bi_make_vec8_helper(bi_builder *b, bi_index *src, unsigned *channel, unsigned count)
{
bi_index srcs[BI_MAX_VEC];
assert(1 <= count && count <= 4);
for (unsigned i = 0; i < count; i += 2) {
bool next = (i + 1) < count;
bi_index bytes[4] = {
bi_imm_u8(0),
bi_imm_u8(0),
bi_imm_u8(0),
bi_imm_u8(0)
};
for (unsigned i = 0; i < count; ++i) {
unsigned chan = channel ? channel[i] : 0;
unsigned nextc = next && channel ? channel[i + 1] : 0;
bi_index w0 = bi_extract(b, src[i], chan >> 1);
bi_index w1 = next ? bi_extract(b, src[i + 1], nextc >> 1) : bi_zero();
bi_index h0 = bi_half(w0, chan & 1);
bi_index h1 = bi_half(w1, nextc & 1);
if (bi_is_word_equiv(w0, w1) && (chan & 1) == 0 && ((nextc & 1) == 1))
srcs[i >> 1] = bi_mov_i32(b, w0);
else if (bi_is_word_equiv(w0, w1))
srcs[i >> 1] = bi_swz_v2i16(b, bi_swz_16(w0, chan & 1, nextc & 1));
else
srcs[i >> 1] = bi_mkvec_v2i16(b, h0, h1);
bytes[i] = bi_byte(bi_extract(b, src[i], chan >> 2), chan & 3);
}
bi_emit_collect_to(b, dst, srcs, DIV_ROUND_UP(count, 2));
if (b->shader->arch >= 9) {
bi_index vec = bi_zero();
if (count >= 3)
vec = bi_mkvec_v2i8(b, bytes[2], bytes[3], vec);
return bi_mkvec_v2i8(b, bytes[0], bytes[1], vec);
} else {
return bi_mkvec_v4i8(b, bytes[0], bytes[1], bytes[2], bytes[3]);
}
}
static bi_index
bi_make_vec16_helper(bi_builder *b, bi_index *src, unsigned *channel, unsigned count)
{
unsigned chan0 = channel ? channel[0] : 0;
bi_index w0 = bi_extract(b, src[0], chan0 >> 1);
bi_index h0 = bi_half(w0, chan0 & 1);
/* Zero extend */
if (count == 1)
return bi_mkvec_v2i16(b, h0, bi_imm_u16(0));
/* Else, create a vector */
assert(count == 2);
unsigned chan1 = channel ? channel[1] : 0;
bi_index w1 = bi_extract(b, src[1], chan1 >> 1);
bi_index h1 = bi_half(w1, chan1 & 1);
if (bi_is_word_equiv(w0, w1) && (chan0 & 1) == 0 && ((chan1 & 1) == 1))
return bi_mov_i32(b, w0);
else if (bi_is_word_equiv(w0, w1))
return bi_swz_v2i16(b, bi_swz_16(w0, chan0 & 1, chan1 & 1));
else
return bi_mkvec_v2i16(b, h0, h1);
}
static void
@ -624,22 +651,25 @@ bi_make_vec_to(bi_builder *b, bi_index dst,
unsigned count,
unsigned bitsize)
{
if (bitsize == 32) {
bi_index srcs[BI_MAX_VEC];
assert(bitsize == 8 || bitsize == 16 || bitsize == 32);
unsigned shift = (bitsize == 32) ? 0 : (bitsize == 16) ? 1 : 2;
unsigned chan_per_word = 1 << shift;
for (unsigned i = 0; i < count; ++i)
srcs[i] = bi_extract(b, src[i], channel ? channel[i] : 0);
bi_index srcs[BI_MAX_VEC];
bi_emit_collect_to(b, dst, srcs, count);
} else if (bitsize == 16) {
bi_make_vec16_to(b, dst, src, channel, count);
} else if (bitsize == 8 && count == 1) {
bi_swz_v4i8_to(b, dst, bi_byte(
bi_extract(b, src[0], channel[0] >> 2),
channel[0] & 3));
} else {
unreachable("8-bit mkvec not yet supported");
for (unsigned i = 0; i < count; i += chan_per_word) {
unsigned rem = MIN2(count - i, chan_per_word);
unsigned *channel_offset = channel ? (channel + i) : NULL;
if (bitsize == 32)
srcs[i] = bi_extract(b, src[i], channel_offset ? *channel_offset : 0);
else if (bitsize == 16)
srcs[i >> 1] = bi_make_vec16_helper(b, src + i, channel_offset, rem);
else
srcs[i >> 2] = bi_make_vec8_helper(b, src + i, channel_offset, rem);
}
bi_emit_collect_to(b, dst, srcs, DIV_ROUND_UP(count, chan_per_word));
}
static inline bi_instr *