From 6b87a65e383eae2225a45b41090ad4f4f53efcda Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 22 Jun 2022 14:33:43 -0400 Subject: [PATCH] pan/bi: Implement general 8-bit vector construction Refactor the 16-bit vector construction code to separate the collect of separate 32-bit words from the construction of single 32-bit vectors from sub-32-bit parts. Then add a path to combine (up to 4) bytes into a 32-bit word using Valhall's MKVEC.v2i8 instruction. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bifrost_compile.c | 96 +++++++++++++++++--------- 1 file changed, 63 insertions(+), 33 deletions(-) diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index df090f78e56..0c44be3cd56 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -588,33 +588,60 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr) bi_copy_component(b, instr, dest); } -static void -bi_make_vec16_to(bi_builder *b, bi_index dst, bi_index *src, - unsigned *channel, unsigned count) +static bi_index +bi_make_vec8_helper(bi_builder *b, bi_index *src, unsigned *channel, unsigned count) { - bi_index srcs[BI_MAX_VEC]; + assert(1 <= count && count <= 4); - for (unsigned i = 0; i < count; i += 2) { - bool next = (i + 1) < count; + bi_index bytes[4] = { + bi_imm_u8(0), + bi_imm_u8(0), + bi_imm_u8(0), + bi_imm_u8(0) + }; + for (unsigned i = 0; i < count; ++i) { unsigned chan = channel ? channel[i] : 0; - unsigned nextc = next && channel ? channel[i + 1] : 0; - bi_index w0 = bi_extract(b, src[i], chan >> 1); - bi_index w1 = next ? bi_extract(b, src[i + 1], nextc >> 1) : bi_zero(); - - bi_index h0 = bi_half(w0, chan & 1); - bi_index h1 = bi_half(w1, nextc & 1); - - if (bi_is_word_equiv(w0, w1) && (chan & 1) == 0 && ((nextc & 1) == 1)) - srcs[i >> 1] = bi_mov_i32(b, w0); - else if (bi_is_word_equiv(w0, w1)) - srcs[i >> 1] = bi_swz_v2i16(b, bi_swz_16(w0, chan & 1, nextc & 1)); - else - srcs[i >> 1] = bi_mkvec_v2i16(b, h0, h1); + bytes[i] = bi_byte(bi_extract(b, src[i], chan >> 2), chan & 3); } - bi_emit_collect_to(b, dst, srcs, DIV_ROUND_UP(count, 2)); + if (b->shader->arch >= 9) { + bi_index vec = bi_zero(); + + if (count >= 3) + vec = bi_mkvec_v2i8(b, bytes[2], bytes[3], vec); + + return bi_mkvec_v2i8(b, bytes[0], bytes[1], vec); + } else { + return bi_mkvec_v4i8(b, bytes[0], bytes[1], bytes[2], bytes[3]); + } +} + +static bi_index +bi_make_vec16_helper(bi_builder *b, bi_index *src, unsigned *channel, unsigned count) +{ + unsigned chan0 = channel ? channel[0] : 0; + bi_index w0 = bi_extract(b, src[0], chan0 >> 1); + bi_index h0 = bi_half(w0, chan0 & 1); + + /* Zero extend */ + if (count == 1) + return bi_mkvec_v2i16(b, h0, bi_imm_u16(0)); + + /* Else, create a vector */ + assert(count == 2); + + unsigned chan1 = channel ? channel[1] : 0; + bi_index w1 = bi_extract(b, src[1], chan1 >> 1); + bi_index h1 = bi_half(w1, chan1 & 1); + + if (bi_is_word_equiv(w0, w1) && (chan0 & 1) == 0 && ((chan1 & 1) == 1)) + return bi_mov_i32(b, w0); + else if (bi_is_word_equiv(w0, w1)) + return bi_swz_v2i16(b, bi_swz_16(w0, chan0 & 1, chan1 & 1)); + else + return bi_mkvec_v2i16(b, h0, h1); } static void @@ -624,22 +651,25 @@ bi_make_vec_to(bi_builder *b, bi_index dst, unsigned count, unsigned bitsize) { - if (bitsize == 32) { - bi_index srcs[BI_MAX_VEC]; + assert(bitsize == 8 || bitsize == 16 || bitsize == 32); + unsigned shift = (bitsize == 32) ? 0 : (bitsize == 16) ? 1 : 2; + unsigned chan_per_word = 1 << shift; - for (unsigned i = 0; i < count; ++i) - srcs[i] = bi_extract(b, src[i], channel ? channel[i] : 0); + bi_index srcs[BI_MAX_VEC]; - bi_emit_collect_to(b, dst, srcs, count); - } else if (bitsize == 16) { - bi_make_vec16_to(b, dst, src, channel, count); - } else if (bitsize == 8 && count == 1) { - bi_swz_v4i8_to(b, dst, bi_byte( - bi_extract(b, src[0], channel[0] >> 2), - channel[0] & 3)); - } else { - unreachable("8-bit mkvec not yet supported"); + for (unsigned i = 0; i < count; i += chan_per_word) { + unsigned rem = MIN2(count - i, chan_per_word); + unsigned *channel_offset = channel ? (channel + i) : NULL; + + if (bitsize == 32) + srcs[i] = bi_extract(b, src[i], channel_offset ? *channel_offset : 0); + else if (bitsize == 16) + srcs[i >> 1] = bi_make_vec16_helper(b, src + i, channel_offset, rem); + else + srcs[i >> 2] = bi_make_vec8_helper(b, src + i, channel_offset, rem); } + + bi_emit_collect_to(b, dst, srcs, DIV_ROUND_UP(count, chan_per_word)); } static inline bi_instr *