diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index 81eb6213253..0bc5222c175 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -45,6 +45,7 @@ libpanfrost_bifrost_files = files( 'bi_validate.c', 'bir.c', 'bifrost_compile.c', + 'valhall/va_lower_constants.c', 'valhall/va_optimize.c', 'valhall/va_pack.c', 'valhall/va_validate.c', diff --git a/src/panfrost/bifrost/valhall/va_compiler.h b/src/panfrost/bifrost/valhall/va_compiler.h index 1d2a76a3a80..59b7634e092 100644 --- a/src/panfrost/bifrost/valhall/va_compiler.h +++ b/src/panfrost/bifrost/valhall/va_compiler.h @@ -38,6 +38,7 @@ bool va_validate_fau(bi_instr *I); void va_validate(FILE *fp, bi_context *ctx); void va_repair_fau(bi_builder *b, bi_instr *I); void va_fuse_add_imm(bi_instr *I); +void va_lower_constants(bi_context *ctx, bi_instr *I); uint64_t va_pack_instr(const bi_instr *I, unsigned flow); static inline unsigned @@ -79,6 +80,15 @@ va_select_fau_page(const bi_instr *I) return 0; } +/** Cycle model for Valhall. Results need to be normalized */ +struct va_stats { + /** Counts per pipe */ + unsigned fma, cvt, sfu, v, ls, t; +}; + +void +va_count_instr_stats(bi_instr *I, struct va_stats *stats); + #ifdef __cplusplus } /* extern C */ #endif diff --git a/src/panfrost/bifrost/valhall/va_lower_constants.c b/src/panfrost/bifrost/valhall/va_lower_constants.c new file mode 100644 index 00000000000..ede38e871ee --- /dev/null +++ b/src/panfrost/bifrost/valhall/va_lower_constants.c @@ -0,0 +1,179 @@ +/* + * Copyright (C) 2021 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "va_compiler.h" +#include "valhall.h" +#include "bi_builder.h" + +/* Only some special immediates are available, as specified in the Table of + * Immediates in the specification. Other immediates must be lowered, either to + * uniforms or to moves. + */ + +static bi_index +va_mov_imm(bi_builder *b, uint32_t imm) +{ + bi_index zero = bi_fau(BIR_FAU_IMMEDIATE | 0, false); + return bi_iadd_imm_i32(b, zero, imm); +} + +static bi_index +va_lut_index_32(uint32_t imm) +{ + for (unsigned i = 0; i < ARRAY_SIZE(valhall_immediates); ++i) { + if (valhall_immediates[i] == imm) + return va_lut(i); + } + + return bi_null(); +} + +static bi_index +va_lut_index_16(uint16_t imm) +{ + uint16_t *arr16 = (uint16_t *) valhall_immediates; + + for (unsigned i = 0; i < (2 * ARRAY_SIZE(valhall_immediates)); ++i) { + if (arr16[i] == imm) + return bi_half(va_lut(i >> 1), i & 1); + } + + return bi_null(); +} + +UNUSED static bi_index +va_lut_index_8(uint8_t imm) +{ + uint8_t *arr8 = (uint8_t *) valhall_immediates; + + for (unsigned i = 0; i < (4 * ARRAY_SIZE(valhall_immediates)); ++i) { + if (arr8[i] == imm) + return bi_byte(va_lut(i >> 2), i & 3); + } + + return bi_null(); +} + +static bi_index +va_demote_constant_fp16(uint32_t value) +{ + uint16_t fp16 = _mesa_float_to_half(uif(value)); + + /* Only convert if it is exact */ + if (fui(_mesa_half_to_float(fp16)) == value) + return va_lut_index_16(fp16); + else + return bi_null(); +} + +static bi_index +va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool staging) +{ + /* Try the constant as-is */ + if (!staging) { + bi_index lut = va_lut_index_32(value); + if (!bi_is_null(lut)) return lut; + } + + /* Try using a single half of a FP16 constant */ + bool replicated_halves = (value & 0xFFFF) == (value >> 16); + if (!staging && info.swizzle && info.size == VA_SIZE_16 && replicated_halves) { + bi_index lut = va_lut_index_16(value & 0xFFFF); + if (!bi_is_null(lut)) return lut; + } + + /* TODO: Distinguish sign extend from zero extend */ +#if 0 + /* Try zero-extending a single byte */ + if (!staging && info.widen && value <= UINT8_MAX) { + bi_index lut = va_lut_index_8(value); + if (!bi_is_null(lut)) return lut; + } + + /* Try zero-extending a single halfword */ + if (!staging && info.widen && value <= UINT16_MAX) { + bi_index lut = va_lut_index_16(value); + if (!bi_is_null(lut)) return lut; + } +#endif + + /* Try demoting the constant to FP16 */ + if (!staging && info.swizzle && info.size == VA_SIZE_32) { + bi_index lut = va_demote_constant_fp16(value); + if (!bi_is_null(lut)) return lut; + } + + /* TODO: Optimize to uniform */ + return va_mov_imm(b, value); +} + +void +va_lower_constants(bi_context *ctx, bi_instr *I) +{ + bi_builder b = bi_init_builder(ctx, bi_before_instr(I)); + + bi_foreach_src(I, s) { + if (I->src[s].type == BI_INDEX_CONSTANT) { + /* abs(#c) is pointless, but -#c occurs in transcendental sequences */ + assert(!I->src[s].abs && "redundant .abs modifier"); + + bool staging = (s < valhall_opcodes[I->op].nr_staging_srcs); + struct va_src_info info = va_src_info(I->op, s); + uint32_t value = I->src[s].value; + enum bi_swizzle swz = I->src[s].swizzle; + + /* Resolve any swizzle, keeping in mind the different interpretations + * swizzles in different contexts. + */ + if (info.size == VA_SIZE_32) { + /* Extracting a half from the 32-bit value */ + if (swz == BI_SWIZZLE_H00) + value = (value & 0xFFFF); + else if (swz == BI_SWIZZLE_H11) + value = (value >> 16); + else + assert(swz == BI_SWIZZLE_H01); + + /* FP16 -> FP32 */ + if (info.swizzle && swz != BI_SWIZZLE_H01) + value = fui(_mesa_half_to_float(value)); + } else if (info.size == VA_SIZE_16) { + assert(swz >= BI_SWIZZLE_H00 && swz <= BI_SWIZZLE_H11); + value = bi_apply_swizzle(value, swz); + } else if (info.size == VA_SIZE_8 && info.lanes) { + /* 8-bit extract */ + unsigned chan = (swz - BI_SWIZZLE_B0000); + assert(chan < 4); + + value = (value >> (8 * chan)) & 0xFF; + } else { + /* TODO: Any other special handling? */ + value = bi_apply_swizzle(value, swz); + } + + bi_index cons = va_resolve_constant(&b, value, info, staging); + cons.neg ^= I->src[s].neg; + I->src[s] = cons; + } + } +}