diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py index e6383b67737..ac302ba3165 100644 --- a/src/compiler/nir/nir_constant_expressions.py +++ b/src/compiler/nir/nir_constant_expressions.py @@ -62,6 +62,8 @@ template = """\ #include "util/softfloat.h" #include "util/bigmath.h" #include "util/format/format_utils.h" +#include "util/format_r11g11b10f.h" +#include "util/u_math.h" #include "nir_constant_expressions.h" /** @@ -277,6 +279,102 @@ unpack_half_1x16(uint16_t u) return _mesa_half_to_float(u); } +/* Broadcom v3d specific instructions */ +/** + * Packs 2 2x16 floating split into a r11g11b10f: + * + * dst[10:0] = float16_to_float11 (src0[15:0]) + * dst[21:11] = float16_to_float11 (src0[31:16]) + * dst[31:22] = float16_to_float10 (src1[15:0]) + */ +static uint32_t pack_32_to_r11g11b10_v3d(const uint32_t src0, + const uint32_t src1) +{ + float rgb[3] = { + unpack_half_1x16((src0 & 0xffff)), + unpack_half_1x16((src0 >> 16)), + unpack_half_1x16((src1 & 0xffff)), + }; + + return float3_to_r11g11b10f(rgb); +} + +/** + * The three methods below are basically wrappers over pack_s/unorm_1x8/1x16, + * as they receives a uint16_t val instead of a float + */ +static inline uint8_t _mesa_half_to_snorm8(uint16_t val) +{ + return pack_snorm_1x8(_mesa_half_to_float(val)); +} + +static uint16_t _mesa_float_to_snorm16(uint32_t val) +{ + union fi aux; + aux.ui = val; + return pack_snorm_1x16(aux.f); +} + +static uint16_t _mesa_float_to_unorm16(uint32_t val) +{ + union fi aux; + aux.ui = val; + return pack_unorm_1x16(aux.f); +} + +static inline uint32_t float_pack16_v3d(uint32_t f32) +{ + return _mesa_float_to_half(uif(f32)); +} + +static inline uint32_t float_unpack16_v3d(uint32_t f16) +{ + return fui(_mesa_half_to_float(f16)); +} + +static inline uint32_t vfpack_v3d(uint32_t a, uint32_t b) +{ + return float_pack16_v3d(b) << 16 | float_pack16_v3d(a); +} + +static inline uint32_t vfsat_v3d(uint32_t a) +{ + const uint32_t low = fui(SATURATE(_mesa_half_to_float(a & 0xffff))); + const uint32_t high = fui(SATURATE(_mesa_half_to_float(a >> 16))); + + return vfpack_v3d(low, high); +} + +static inline uint32_t fmul_v3d(uint32_t a, uint32_t b) +{ + return fui(uif(a) * uif(b)); +} + +static uint32_t vfmul_v3d(uint32_t a, uint32_t b) +{ + const uint32_t low = fmul_v3d(float_unpack16_v3d(a & 0xffff), + float_unpack16_v3d(b & 0xffff)); + const uint32_t high = fmul_v3d(float_unpack16_v3d(a >> 16), + float_unpack16_v3d(b >> 16)); + + return vfpack_v3d(low, high); +} + +/* Convert 2x16-bit floating point to 2x10-bit unorm */ +static uint32_t pack_2x16_to_unorm_2x10(uint32_t src0) +{ + return vfmul_v3d(vfsat_v3d(src0), 0x03ff03ff); +} + +/* + * Convert 2x16-bit floating point to one 2-bit and one + * 10-bit unorm + */ +static uint32_t pack_2x16_to_unorm_10_2(uint32_t src0) +{ + return vfmul_v3d(vfsat_v3d(src0), 0x000303ff); +} + /* Some typed vector structures to make things like src0.y work */ typedef int8_t int1_t; typedef uint8_t uint1_t; diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 0f81328f441..da53ea56f43 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -1413,6 +1413,58 @@ for (int i = 0; i < 32; i += 8) { } """) +# v3d-specific opcodes + +# v3d-specific (v71) instruction that packs bits of 2 2x16 floating point into +# r11g11b10 bits, rounding to nearest even, so +# dst[10:0] = float16_to_float11 (src0[15:0]) +# dst[21:11] = float16_to_float11 (src0[31:16]) +# dst[31:22] = float16_to_float10 (src1[15:0]) +binop_convert("pack_32_to_r11g11b10_v3d", tuint32, tuint32, "", + "pack_32_to_r11g11b10_v3d(src0, src1)") + +# v3d-specific (v71) instruction that packs 2x32 bit to 2x16 bit integer. The +# difference with pack_32_2x16_split is that the sources are 32bit too. So it +# receives 2 32-bit integer, and packs the lower halfword as 2x16 on a 32-bit +# integer. +binop_horiz("pack_2x32_to_2x16_v3d", 1, tuint32, 1, tuint32, 1, tuint32, + "(src0.x & 0xffff) | (src1.x << 16)") + +# v3d-specific (v71) instruction that packs bits of 2 2x16 integers into +# r10g10b10a2: +# dst[9:0] = src0[9:0] +# dst[19:10] = src0[25:16] +# dst[29:20] = src1[9:0] +# dst[31:30] = src1[17:16] +binop_convert("pack_uint_32_to_r10g10b10a2_v3d", tuint32, tuint32, "", + "(src0 & 0x3ff) | ((src0 >> 16) & 0x3ff) << 10 | (src1 & 0x3ff) << 20 | ((src1 >> 16) & 0x3ff) << 30") + +# v3d-specific (v71) instruction that packs 2 2x16 bit integers into 4x8 bits: +# dst[7:0] = src0[7:0] +# dst[15:8] = src0[23:16] +# dst[23:16] = src1[7:0] +# dst[31:24] = src1[23:16] +opcode("pack_4x16_to_4x8_v3d", 0, tuint32, [0, 0], [tuint32, tuint32], + False, "", + "(src0 & 0x000000ff) | (src0 & 0x00ff0000) >> 8 | (src1 & 0x000000ff) << 16 | (src1 & 0x00ff0000) << 8") + +# v3d-specific (v71) instructions to convert 2x16 floating point to 2x8 bit unorm/snorm +unop("pack_2x16_to_unorm_2x8_v3d", tuint32, + "_mesa_half_to_unorm(src0 & 0xffff, 8) | (_mesa_half_to_unorm(src0 >> 16, 8) << 16)") +unop("pack_2x16_to_snorm_2x8_v3d", tuint32, + "_mesa_half_to_snorm(src0 & 0xffff, 8) | (_mesa_half_to_snorm(src0 >> 16, 8) << 16)") + +# v3d-specific (v71) instructions to convert 32-bit floating point to 16 bit unorm/snorm +unop("f2unorm_16_v3d", tuint32, "_mesa_float_to_unorm16(src0)") +unop("f2snorm_16_v3d", tuint32, "_mesa_float_to_snorm16(src0)") + +# v3d-specific (v71) instructions to convert 2x16 bit floating points to 2x10 bit unorm +unop("pack_2x16_to_unorm_2x10_v3d", tuint32, "pack_2x16_to_unorm_2x10(src0)") + +# v3d-specific (v71) instructions to convert 2x16 bit floating points to one 2-bit +# and one 10 bit unorm +unop("pack_2x16_to_unorm_10_2_v3d", tuint32, "pack_2x16_to_unorm_10_2(src0)") + # Mali-specific opcodes unop("fsat_signed_mali", tfloat, ("fmin(fmax(src0, -1.0), 1.0)")) unop("fclamp_pos_mali", tfloat, ("fmax(src0, 0.0)"))