diff --git a/src/panfrost/compiler/bifrost/bi_schedule.c b/src/panfrost/compiler/bifrost/bi_schedule.c index f12254fc82f..835cd73169c 100644 --- a/src/panfrost/compiler/bifrost/bi_schedule.c +++ b/src/panfrost/compiler/bifrost/bi_schedule.c @@ -703,7 +703,7 @@ bi_impacted_t_modifiers(bi_instr *I, unsigned src) case BI_OPCODE_S8_TO_S32: case BI_OPCODE_U8_TO_F32: case BI_OPCODE_U8_TO_U32: - return (swizzle != BI_SWIZZLE_B0000); + return (swizzle != BI_SWIZZLE_B0); case BI_OPCODE_V2S8_TO_V2F16: case BI_OPCODE_V2S8_TO_V2S16: diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index 48eae98a128..c1619db42b2 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -752,8 +752,8 @@ bi_make_vec8_helper(bi_builder *b, bi_index *src, unsigned *channel, bytes[i] = bi_byte(raw_data, lane); } - assert(b->shader->arch >= 9 || bytes[i].swizzle == BI_SWIZZLE_B0000 || - bytes[i].swizzle == BI_SWIZZLE_B2222); + assert(b->shader->arch >= 9 || bytes[i].swizzle == BI_SWIZZLE_B0 || + bytes[i].swizzle == BI_SWIZZLE_B2); } if (b->shader->arch >= 9) { @@ -2362,7 +2362,8 @@ bi_alu_src_index(bi_builder *b, nir_alu_src src, unsigned comps) bi_make_vec_to(b, temp, unoffset_srcs, channels, comps, bitsize); static const enum bi_swizzle swizzle_lut[] = { - BI_SWIZZLE_B0000, BI_SWIZZLE_B0011, BI_SWIZZLE_H01, BI_SWIZZLE_H01}; + BI_SWIZZLE_B0000, BI_SWIZZLE_B0011, BI_SWIZZLE_B0123, BI_SWIZZLE_B0123 + }; assert(comps - 1 < ARRAY_SIZE(swizzle_lut)); /* Assign a coherent swizzle for the vector */ @@ -5085,7 +5086,6 @@ bi_vectorize_filter(const nir_instr *instr, const void *data) break; } - /* Vectorized instructions cannot write more than 32-bit */ int dst_bit_size = alu->def.bit_size; if (dst_bit_size == 16) return 2; diff --git a/src/panfrost/compiler/compiler.h b/src/panfrost/compiler/compiler.h index 8eac2b5fd42..516a75bbbb0 100644 --- a/src/panfrost/compiler/compiler.h +++ b/src/panfrost/compiler/compiler.h @@ -51,25 +51,56 @@ extern "C" { */ enum bi_swizzle { - /* 16-bit swizzle ordering deliberate for fast compute */ - BI_SWIZZLE_H00 = 0, /* = B0101 */ - BI_SWIZZLE_H01 = 1, /* = B0123 = W0 */ - BI_SWIZZLE_H10 = 2, /* = B2301 */ - BI_SWIZZLE_H11 = 3, /* = B2323 */ + /* 16-bit swizzles, ordered sequentially for fast compute */ + BI_SWIZZLE_H00 = 0, + BI_SWIZZLE_H01 = 1, + BI_SWIZZLE_H10 = 2, + BI_SWIZZLE_H11 = 3, - /* replication order should be maintained for fast compute */ + /* 8-bit swizzle equivalents */ + BI_SWIZZLE_B0101 = BI_SWIZZLE_H00, + BI_SWIZZLE_B0123 = BI_SWIZZLE_H01, + BI_SWIZZLE_B2301 = BI_SWIZZLE_H10, + BI_SWIZZLE_B2323 = BI_SWIZZLE_H11, + + /* 8-bit replication swizzles, ordered sequentially for fast compute */ BI_SWIZZLE_B0000 = 4, /* single channel (replicate) */ BI_SWIZZLE_B1111 = 5, BI_SWIZZLE_B2222 = 6, BI_SWIZZLE_B3333 = 7, - /* totally special for explicit pattern matching */ + /* remaining 8-bit swizzles in arbitrary order */ BI_SWIZZLE_B0011 = 8, /* +SWZ.v4i8 */ BI_SWIZZLE_B2233 = 9, /* +SWZ.v4i8 */ BI_SWIZZLE_B1032 = 10, /* +SWZ.v4i8 */ BI_SWIZZLE_B3210 = 11, /* +SWZ.v4i8 */ - BI_SWIZZLE_B0022 = 12, /* for b02 lanes */ + /* 8-bit swizzles that only exist in HW as 8-bit half swizzles */ + BI_SWIZZLE_B0022 = 12, + + /* 16-bit single-lane, values ordered sequentially */ + BI_SWIZZLE_H0 = BI_SWIZZLE_H00, + BI_SWIZZLE_H1 = BI_SWIZZLE_H11, + + /* 8-bit single-lane, values order sequentially */ + BI_SWIZZLE_B0 = BI_SWIZZLE_B0000, + BI_SWIZZLE_B1 = BI_SWIZZLE_B1111, + BI_SWIZZLE_B2 = BI_SWIZZLE_B2222, + BI_SWIZZLE_B3 = BI_SWIZZLE_B3333, + + /* 8-bit half-swizzle + * + * Values for replication are sequential. Other half-swizzles have + * arbitrary value ordering. + * + * TODO: rest of these */ + BI_SWIZZLE_B00 = BI_SWIZZLE_B0000, + BI_SWIZZLE_B01 = BI_SWIZZLE_B0011, + BI_SWIZZLE_B11 = BI_SWIZZLE_B1111, + BI_SWIZZLE_B02 = BI_SWIZZLE_B0022, + BI_SWIZZLE_B22 = BI_SWIZZLE_B2222, + BI_SWIZZLE_B23 = BI_SWIZZLE_B2233, + BI_SWIZZLE_B33 = BI_SWIZZLE_B3333, }; /* Given a packed i16vec2/i8vec4 constant, apply a swizzle. Useful for constant @@ -232,9 +263,9 @@ bi_half(bi_index idx, bool upper) static inline bi_index bi_byte(bi_index idx, unsigned lane) { - assert(idx.swizzle == BI_SWIZZLE_H01); + assert(idx.swizzle == BI_SWIZZLE_B0123); assert(lane < 4); - idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_B0000 + lane); + idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_B0 + lane); return idx; } diff --git a/src/panfrost/compiler/valhall/va_pack.c b/src/panfrost/compiler/valhall/va_pack.c index e63e0334619..6d48f7630e5 100644 --- a/src/panfrost/compiler/valhall/va_pack.c +++ b/src/panfrost/compiler/valhall/va_pack.c @@ -231,9 +231,9 @@ va_pack_widen_f32(const bi_instr *I, enum bi_swizzle swz) switch (swz) { case BI_SWIZZLE_H01: return VA_WIDEN_NONE; - case BI_SWIZZLE_H00: + case BI_SWIZZLE_H0: return VA_WIDEN_H0; - case BI_SWIZZLE_H11: + case BI_SWIZZLE_H1: return VA_WIDEN_H1; default: invalid_instruction(I, "widen"); @@ -262,11 +262,11 @@ va_pack_widen(const bi_instr *I, enum bi_swizzle swz, enum va_size size) { if (size == VA_SIZE_8) { switch (swz) { - case BI_SWIZZLE_H01: + case BI_SWIZZLE_B0123: return VA_SWIZZLES_8_BIT_B0123; - case BI_SWIZZLE_H00: + case BI_SWIZZLE_B0101: return VA_SWIZZLES_8_BIT_B0101; - case BI_SWIZZLE_H11: + case BI_SWIZZLE_B2323: return VA_SWIZZLES_8_BIT_B2323; case BI_SWIZZLE_B0000: return VA_SWIZZLES_8_BIT_B0000; @@ -289,13 +289,13 @@ va_pack_widen(const bi_instr *I, enum bi_swizzle swz, enum va_size size) return VA_SWIZZLES_16_BIT_H01; case BI_SWIZZLE_H11: return VA_SWIZZLES_16_BIT_H11; - case BI_SWIZZLE_B0000: + case BI_SWIZZLE_B00: return VA_SWIZZLES_16_BIT_B00; - case BI_SWIZZLE_B1111: + case BI_SWIZZLE_B11: return VA_SWIZZLES_16_BIT_B11; - case BI_SWIZZLE_B2222: + case BI_SWIZZLE_B22: return VA_SWIZZLES_16_BIT_B22; - case BI_SWIZZLE_B3333: + case BI_SWIZZLE_B33: return VA_SWIZZLES_16_BIT_B33; default: invalid_instruction(I, "16-bit widen"); @@ -304,17 +304,17 @@ va_pack_widen(const bi_instr *I, enum bi_swizzle swz, enum va_size size) switch (swz) { case BI_SWIZZLE_H01: return VA_SWIZZLES_32_BIT_NONE; - case BI_SWIZZLE_H00: + case BI_SWIZZLE_H0: return VA_SWIZZLES_32_BIT_H0; - case BI_SWIZZLE_H11: + case BI_SWIZZLE_H1: return VA_SWIZZLES_32_BIT_H1; - case BI_SWIZZLE_B0000: + case BI_SWIZZLE_B0: return VA_SWIZZLES_32_BIT_B0; - case BI_SWIZZLE_B1111: + case BI_SWIZZLE_B1: return VA_SWIZZLES_32_BIT_B1; - case BI_SWIZZLE_B2222: + case BI_SWIZZLE_B2: return VA_SWIZZLES_32_BIT_B2; - case BI_SWIZZLE_B3333: + case BI_SWIZZLE_B3: return VA_SWIZZLES_32_BIT_B3; default: invalid_instruction(I, "32-bit widen"); @@ -328,19 +328,19 @@ static enum va_half_swizzles_8_bit va_pack_halfswizzle(const bi_instr *I, enum bi_swizzle swz) { switch (swz) { - case BI_SWIZZLE_B0000: + case BI_SWIZZLE_B00: return VA_HALF_SWIZZLES_8_BIT_B00; - case BI_SWIZZLE_B1111: + case BI_SWIZZLE_B11: return VA_HALF_SWIZZLES_8_BIT_B11; - case BI_SWIZZLE_B2222: + case BI_SWIZZLE_B22: return VA_HALF_SWIZZLES_8_BIT_B22; - case BI_SWIZZLE_B3333: + case BI_SWIZZLE_B33: return VA_HALF_SWIZZLES_8_BIT_B33; - case BI_SWIZZLE_B0011: + case BI_SWIZZLE_B01: return VA_HALF_SWIZZLES_8_BIT_B01; - case BI_SWIZZLE_B2233: + case BI_SWIZZLE_B23: return VA_HALF_SWIZZLES_8_BIT_B23; - case BI_SWIZZLE_B0022: + case BI_SWIZZLE_B02: return VA_HALF_SWIZZLES_8_BIT_B02; default: invalid_instruction(I, "v2u8 swizzle"); @@ -351,13 +351,13 @@ static enum va_lanes_8_bit va_pack_shift_lanes(const bi_instr *I, enum bi_swizzle swz) { switch (swz) { - case BI_SWIZZLE_B0000: + case BI_SWIZZLE_B00: return VA_LANES_8_BIT_B00; - case BI_SWIZZLE_B1111: + case BI_SWIZZLE_B11: return VA_LANES_8_BIT_B11; - case BI_SWIZZLE_B2222: + case BI_SWIZZLE_B22: return VA_LANES_8_BIT_B22; - case BI_SWIZZLE_B3333: + case BI_SWIZZLE_B33: return VA_LANES_8_BIT_B33; default: invalid_instruction(I, "lane shift"); @@ -370,9 +370,9 @@ va_pack_combine(const bi_instr *I, enum bi_swizzle swz) switch (swz) { case BI_SWIZZLE_H01: return VA_COMBINE_NONE; - case BI_SWIZZLE_H00: + case BI_SWIZZLE_H0: return VA_COMBINE_H0; - case BI_SWIZZLE_H11: + case BI_SWIZZLE_H1: return VA_COMBINE_H1; default: invalid_instruction(I, "branch lane"); @@ -618,10 +618,10 @@ va_pack_alu(const bi_instr *I, unsigned arch) (I->op == BI_OPCODE_MKVEC_V2I8) ? ((i == 0) ? 38 : 36) : 28; if (src_info.size == VA_SIZE_16) { - hex |= (src.swizzle == BI_SWIZZLE_H11 ? 1 : 0) << offs; + hex |= (src.swizzle == BI_SWIZZLE_H1 ? 1 : 0) << offs; } else { pack_assert(I, src_info.size == VA_SIZE_8); - unsigned comp = src.swizzle - BI_SWIZZLE_B0000; + unsigned comp = src.swizzle - BI_SWIZZLE_B0; pack_assert(I, comp < 4); hex |= (uint64_t)comp << offs; }