From a4e9002660f3fe9ef8b34a6c48cf8a12b9e7006f Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Sat, 28 Mar 2026 22:34:14 -0400 Subject: [PATCH] pan/bi: Emit MKVEC directly Now that we have bi_lower_mkvec_swz(), there's no need to be so careful in the NIR -> bi translation. We can just emit MKVEC and move on. The lowering pass will sort out the detaisl. Reviewed-by: Lorenzo Rossi Part-of: --- .../compiler/bifrost/bifrost_compile.c | 129 +++++------------- src/panfrost/compiler/bifrost/compiler.h | 2 +- 2 files changed, 38 insertions(+), 93 deletions(-) diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c index 18042236db7..3f09e565361 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost/bifrost_compile.c @@ -758,80 +758,6 @@ bi_emit_load_var_buf(bi_builder *b, nir_intrinsic_instr *intr) bi_split_def(b, &intr->def); } -static bi_index -bi_make_vec8_helper(bi_builder *b, bi_index *src, unsigned *channel, - unsigned count) -{ - assert(1 <= count && count <= 4); - - bi_index bytes[4] = {bi_imm_u8(0), bi_imm_u8(0), bi_imm_u8(0), bi_imm_u8(0)}; - - for (unsigned i = 0; i < count; ++i) { - unsigned chan = channel ? channel[i] : 0; - unsigned lane = chan & 3; - bi_index raw_data = bi_extract(b, src[i], chan >> 2); - - /* On Bifrost, MKVEC.v4i8 cannot select b1 or b3 */ - if (b->shader->arch < 9 && lane != 0 && lane != 2) { - bytes[i] = bi_byte(bi_rshift_or(b, 32, raw_data, bi_zero(), - bi_imm_u8(lane * 8), false), - 0); - } else { - bytes[i] = bi_byte(raw_data, lane); - } - - assert(b->shader->arch >= 9 || bytes[i].swizzle == BI_SWIZZLE_B0 || - bytes[i].swizzle == BI_SWIZZLE_B2); - } - - if (b->shader->arch >= 9) { - bi_index vec = bi_zero(); - - if (count >= 3) { - if ((count == 3 && bytes[2].swizzle == BI_SWIZZLE_B0) || - (count == 4 && bi_is_word_equiv(bytes[2], bytes[3]) && - bytes[2].swizzle == BI_SWIZZLE_B0 && - bytes[3].swizzle == BI_SWIZZLE_B1)) { - vec = bytes[2]; - vec.swizzle = BI_SWIZZLE_B0123; - } else { - vec = bi_mkvec_v2i8(b, bytes[2], bytes[3], vec); - } - } - - return bi_mkvec_v2i8(b, bytes[0], bytes[1], vec); - } else { - return bi_mkvec_v4i8(b, bytes[0], bytes[1], bytes[2], bytes[3]); - } -} - -static bi_index -bi_make_vec16_helper(bi_builder *b, bi_index *src, unsigned *channel, - unsigned count) -{ - unsigned chan0 = channel ? channel[0] : 0; - bi_index w0 = bi_extract(b, src[0], chan0 >> 1); - bi_index h0 = bi_half(w0, chan0 & 1); - - /* Zero extend */ - if (count == 1) - return bi_mkvec_v2i16(b, h0, bi_imm_u16(0)); - - /* Else, create a vector */ - assert(count == 2); - - unsigned chan1 = channel ? channel[1] : 0; - bi_index w1 = bi_extract(b, src[1], chan1 >> 1); - bi_index h1 = bi_half(w1, chan1 & 1); - - if (bi_is_word_equiv(w0, w1) && (chan0 & 1) == 0 && ((chan1 & 1) == 1)) - return bi_mov_i32(b, w0); - else if (bi_is_word_equiv(w0, w1)) - return bi_swz_v2i16(b, bi_swz_16(w0, chan0 & 1, chan1 & 1)); - else - return bi_mkvec_v2i16(b, h0, h1); -} - bi_instr * bi_make_vec_to(bi_builder *b, bi_index dst, bi_index *src, unsigned *channel, unsigned count, unsigned bitsize) @@ -839,6 +765,7 @@ bi_make_vec_to(bi_builder *b, bi_index dst, bi_index *src, unsigned *channel, assert(DIV_ROUND_UP(count * bitsize, 32) <= BI_MAX_SRCS && "unnecessarily large vector should have been lowered"); + assert(count <= BI_MAX_VEC); bi_index srcs[BI_MAX_VEC]; if (bitsize == 64) { @@ -848,25 +775,43 @@ bi_make_vec_to(bi_builder *b, bi_index dst, bi_index *src, unsigned *channel, srcs[i * 2 + 1] = bi_extract(b, src[i], c * 2 + 1); } return bi_emit_collect_to(b, dst, srcs, count * 2); + } else if (bitsize == 32) { + for (unsigned i = 0; i < count; i++) { + const unsigned c = channel ? channel[i] : 0; + srcs[i] = bi_extract(b, src[i], c); + } + return bi_emit_collect_to(b, dst, srcs, count); + } else if (bitsize == 16) { + for (unsigned i = 0; i < count; i++) { + const unsigned c = channel ? channel[i] : 0; + srcs[i] = bi_half(bi_extract(b, src[i], c >> 1), c & 1); + } + + for (unsigned i = count; i < align(count, 2); i++) + srcs[i] = bi_imm_u16(0); + + for (unsigned i = 0; i < count; i += 2) + srcs[i / 2] = bi_mkvec_v2i16(b, srcs[i], srcs[i + 1]); + + return bi_emit_collect_to(b, dst, srcs, DIV_ROUND_UP(count, 2)); + } else if (bitsize == 8) { + for (unsigned i = 0; i < count; i++) { + const unsigned c = channel ? channel[i] : 0; + srcs[i] = bi_byte(bi_extract(b, src[i], c >> 2), c & 3); + } + + for (unsigned i = count; i < align(count, 4); i++) + srcs[i] = bi_imm_u8(0); + + for (unsigned i = 0; i < count; i += 4) { + srcs[i / 4] = bi_mkvec_v4i8(b, srcs[i], srcs[i + 1], + srcs[i + 2], srcs[i + 3]); + } + + return bi_emit_collect_to(b, dst, srcs, DIV_ROUND_UP(count, 4)); + } else { + UNREACHABLE("Unsupported bit size"); } - - assert(bitsize == 8 || bitsize == 16 || bitsize == 32); - unsigned shift = (bitsize == 32) ? 0 : (bitsize == 16) ? 1 : 2; - unsigned chan_per_word = 1 << shift; - - for (unsigned i = 0; i < count; i += chan_per_word) { - unsigned rem = MIN2(count - i, chan_per_word); - unsigned *channel_offset = channel ? (channel + i) : NULL; - - if (bitsize == 32) - srcs[i] = bi_extract(b, src[i], channel_offset ? *channel_offset : 0); - else if (bitsize == 16) - srcs[i >> 1] = bi_make_vec16_helper(b, src + i, channel_offset, rem); - else - srcs[i >> 2] = bi_make_vec8_helper(b, src + i, channel_offset, rem); - } - - return bi_emit_collect_to(b, dst, srcs, DIV_ROUND_UP(count, chan_per_word)); } static inline bi_instr * diff --git a/src/panfrost/compiler/bifrost/compiler.h b/src/panfrost/compiler/bifrost/compiler.h index d77ffd77820..245eb96c279 100644 --- a/src/panfrost/compiler/bifrost/compiler.h +++ b/src/panfrost/compiler/bifrost/compiler.h @@ -574,7 +574,7 @@ bi_is_value_equiv(bi_index left, bi_index right) } } -#define BI_MAX_VEC 8 +#define BI_MAX_VEC 16 #define BI_MAX_DESTS 4 #define BI_MAX_SRCS 8