mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 20:10:14 +01:00
nir: add new opcodes to map new v71 packing/conversion instructions
Since v71, broadcom hw include specific packing/conversion
instructions, so this commit adds opcodes to be able to make use of
them, specially for image stores:
* pack_2x16_to_unorm_2x8 (on backend vftounorm8/vftosnorm8):
2x16-bit floating point to 2x8-bit unorm/snorm
* f2unorm_16/f2snorm_16 (on backend ftounorm16/ftosnorm16):
floating point to 16-bit unorm/snorm
* pack_2x16_to_unorm_2x10/pack_2x16_to_unorm_10_2 (on backend
vftounorm10lo/vftounorm10hi): used to convert a floating point to
a r10g10b10a2 unorm
* pack_32_to_r11g11b10 (on backend v11fpack): packs 2 2x16 FP into
R11G11B10.
* pack_uint_32_to_r10g10b10a2 (on backend v10pack): pack 2 2x16
integer into R10G10B10A2
* pack_4x16_to_4x8 (on backend v8pack): packs 2 2x16 bit integer
into 4x8 bits.
* pack_2x32_to_2x16 (on backend vpack): 2x32 bit to 2x16 integer
pack
For the latter, it can be easly confused with the existing
pack_32_2x16_split. But note that this one receives two 16bit integer,
and packs them on a 32bit integer. But broadcom opcode takes two 32bit
integer, takes the lower halfword, and packs them as 2x16 on a 32bit
integer.
Interestingly broadcom also defines a similar one that packs the
higher halfword. Not used yet.
Note that at this point we use agnostic names, even if we add a _v3d
suffix as they are only available for broadcom, in order to follow
current NIR conventions.
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25726>
This commit is contained in:
parent
fc044928b2
commit
c0cfa4f53b
2 changed files with 150 additions and 0 deletions
|
|
@ -62,6 +62,8 @@ template = """\
|
||||||
#include "util/softfloat.h"
|
#include "util/softfloat.h"
|
||||||
#include "util/bigmath.h"
|
#include "util/bigmath.h"
|
||||||
#include "util/format/format_utils.h"
|
#include "util/format/format_utils.h"
|
||||||
|
#include "util/format_r11g11b10f.h"
|
||||||
|
#include "util/u_math.h"
|
||||||
#include "nir_constant_expressions.h"
|
#include "nir_constant_expressions.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -277,6 +279,102 @@ unpack_half_1x16(uint16_t u)
|
||||||
return _mesa_half_to_float(u);
|
return _mesa_half_to_float(u);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Broadcom v3d specific instructions */
|
||||||
|
/**
|
||||||
|
* Packs 2 2x16 floating split into a r11g11b10f:
|
||||||
|
*
|
||||||
|
* dst[10:0] = float16_to_float11 (src0[15:0])
|
||||||
|
* dst[21:11] = float16_to_float11 (src0[31:16])
|
||||||
|
* dst[31:22] = float16_to_float10 (src1[15:0])
|
||||||
|
*/
|
||||||
|
static uint32_t pack_32_to_r11g11b10_v3d(const uint32_t src0,
|
||||||
|
const uint32_t src1)
|
||||||
|
{
|
||||||
|
float rgb[3] = {
|
||||||
|
unpack_half_1x16((src0 & 0xffff)),
|
||||||
|
unpack_half_1x16((src0 >> 16)),
|
||||||
|
unpack_half_1x16((src1 & 0xffff)),
|
||||||
|
};
|
||||||
|
|
||||||
|
return float3_to_r11g11b10f(rgb);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The three methods below are basically wrappers over pack_s/unorm_1x8/1x16,
|
||||||
|
* as they receives a uint16_t val instead of a float
|
||||||
|
*/
|
||||||
|
static inline uint8_t _mesa_half_to_snorm8(uint16_t val)
|
||||||
|
{
|
||||||
|
return pack_snorm_1x8(_mesa_half_to_float(val));
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint16_t _mesa_float_to_snorm16(uint32_t val)
|
||||||
|
{
|
||||||
|
union fi aux;
|
||||||
|
aux.ui = val;
|
||||||
|
return pack_snorm_1x16(aux.f);
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint16_t _mesa_float_to_unorm16(uint32_t val)
|
||||||
|
{
|
||||||
|
union fi aux;
|
||||||
|
aux.ui = val;
|
||||||
|
return pack_unorm_1x16(aux.f);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint32_t float_pack16_v3d(uint32_t f32)
|
||||||
|
{
|
||||||
|
return _mesa_float_to_half(uif(f32));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint32_t float_unpack16_v3d(uint32_t f16)
|
||||||
|
{
|
||||||
|
return fui(_mesa_half_to_float(f16));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint32_t vfpack_v3d(uint32_t a, uint32_t b)
|
||||||
|
{
|
||||||
|
return float_pack16_v3d(b) << 16 | float_pack16_v3d(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint32_t vfsat_v3d(uint32_t a)
|
||||||
|
{
|
||||||
|
const uint32_t low = fui(SATURATE(_mesa_half_to_float(a & 0xffff)));
|
||||||
|
const uint32_t high = fui(SATURATE(_mesa_half_to_float(a >> 16)));
|
||||||
|
|
||||||
|
return vfpack_v3d(low, high);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint32_t fmul_v3d(uint32_t a, uint32_t b)
|
||||||
|
{
|
||||||
|
return fui(uif(a) * uif(b));
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t vfmul_v3d(uint32_t a, uint32_t b)
|
||||||
|
{
|
||||||
|
const uint32_t low = fmul_v3d(float_unpack16_v3d(a & 0xffff),
|
||||||
|
float_unpack16_v3d(b & 0xffff));
|
||||||
|
const uint32_t high = fmul_v3d(float_unpack16_v3d(a >> 16),
|
||||||
|
float_unpack16_v3d(b >> 16));
|
||||||
|
|
||||||
|
return vfpack_v3d(low, high);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convert 2x16-bit floating point to 2x10-bit unorm */
|
||||||
|
static uint32_t pack_2x16_to_unorm_2x10(uint32_t src0)
|
||||||
|
{
|
||||||
|
return vfmul_v3d(vfsat_v3d(src0), 0x03ff03ff);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert 2x16-bit floating point to one 2-bit and one
|
||||||
|
* 10-bit unorm
|
||||||
|
*/
|
||||||
|
static uint32_t pack_2x16_to_unorm_10_2(uint32_t src0)
|
||||||
|
{
|
||||||
|
return vfmul_v3d(vfsat_v3d(src0), 0x000303ff);
|
||||||
|
}
|
||||||
|
|
||||||
/* Some typed vector structures to make things like src0.y work */
|
/* Some typed vector structures to make things like src0.y work */
|
||||||
typedef int8_t int1_t;
|
typedef int8_t int1_t;
|
||||||
typedef uint8_t uint1_t;
|
typedef uint8_t uint1_t;
|
||||||
|
|
|
||||||
|
|
@ -1413,6 +1413,58 @@ for (int i = 0; i < 32; i += 8) {
|
||||||
}
|
}
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
# v3d-specific opcodes
|
||||||
|
|
||||||
|
# v3d-specific (v71) instruction that packs bits of 2 2x16 floating point into
|
||||||
|
# r11g11b10 bits, rounding to nearest even, so
|
||||||
|
# dst[10:0] = float16_to_float11 (src0[15:0])
|
||||||
|
# dst[21:11] = float16_to_float11 (src0[31:16])
|
||||||
|
# dst[31:22] = float16_to_float10 (src1[15:0])
|
||||||
|
binop_convert("pack_32_to_r11g11b10_v3d", tuint32, tuint32, "",
|
||||||
|
"pack_32_to_r11g11b10_v3d(src0, src1)")
|
||||||
|
|
||||||
|
# v3d-specific (v71) instruction that packs 2x32 bit to 2x16 bit integer. The
|
||||||
|
# difference with pack_32_2x16_split is that the sources are 32bit too. So it
|
||||||
|
# receives 2 32-bit integer, and packs the lower halfword as 2x16 on a 32-bit
|
||||||
|
# integer.
|
||||||
|
binop_horiz("pack_2x32_to_2x16_v3d", 1, tuint32, 1, tuint32, 1, tuint32,
|
||||||
|
"(src0.x & 0xffff) | (src1.x << 16)")
|
||||||
|
|
||||||
|
# v3d-specific (v71) instruction that packs bits of 2 2x16 integers into
|
||||||
|
# r10g10b10a2:
|
||||||
|
# dst[9:0] = src0[9:0]
|
||||||
|
# dst[19:10] = src0[25:16]
|
||||||
|
# dst[29:20] = src1[9:0]
|
||||||
|
# dst[31:30] = src1[17:16]
|
||||||
|
binop_convert("pack_uint_32_to_r10g10b10a2_v3d", tuint32, tuint32, "",
|
||||||
|
"(src0 & 0x3ff) | ((src0 >> 16) & 0x3ff) << 10 | (src1 & 0x3ff) << 20 | ((src1 >> 16) & 0x3ff) << 30")
|
||||||
|
|
||||||
|
# v3d-specific (v71) instruction that packs 2 2x16 bit integers into 4x8 bits:
|
||||||
|
# dst[7:0] = src0[7:0]
|
||||||
|
# dst[15:8] = src0[23:16]
|
||||||
|
# dst[23:16] = src1[7:0]
|
||||||
|
# dst[31:24] = src1[23:16]
|
||||||
|
opcode("pack_4x16_to_4x8_v3d", 0, tuint32, [0, 0], [tuint32, tuint32],
|
||||||
|
False, "",
|
||||||
|
"(src0 & 0x000000ff) | (src0 & 0x00ff0000) >> 8 | (src1 & 0x000000ff) << 16 | (src1 & 0x00ff0000) << 8")
|
||||||
|
|
||||||
|
# v3d-specific (v71) instructions to convert 2x16 floating point to 2x8 bit unorm/snorm
|
||||||
|
unop("pack_2x16_to_unorm_2x8_v3d", tuint32,
|
||||||
|
"_mesa_half_to_unorm(src0 & 0xffff, 8) | (_mesa_half_to_unorm(src0 >> 16, 8) << 16)")
|
||||||
|
unop("pack_2x16_to_snorm_2x8_v3d", tuint32,
|
||||||
|
"_mesa_half_to_snorm(src0 & 0xffff, 8) | (_mesa_half_to_snorm(src0 >> 16, 8) << 16)")
|
||||||
|
|
||||||
|
# v3d-specific (v71) instructions to convert 32-bit floating point to 16 bit unorm/snorm
|
||||||
|
unop("f2unorm_16_v3d", tuint32, "_mesa_float_to_unorm16(src0)")
|
||||||
|
unop("f2snorm_16_v3d", tuint32, "_mesa_float_to_snorm16(src0)")
|
||||||
|
|
||||||
|
# v3d-specific (v71) instructions to convert 2x16 bit floating points to 2x10 bit unorm
|
||||||
|
unop("pack_2x16_to_unorm_2x10_v3d", tuint32, "pack_2x16_to_unorm_2x10(src0)")
|
||||||
|
|
||||||
|
# v3d-specific (v71) instructions to convert 2x16 bit floating points to one 2-bit
|
||||||
|
# and one 10 bit unorm
|
||||||
|
unop("pack_2x16_to_unorm_10_2_v3d", tuint32, "pack_2x16_to_unorm_10_2(src0)")
|
||||||
|
|
||||||
# Mali-specific opcodes
|
# Mali-specific opcodes
|
||||||
unop("fsat_signed_mali", tfloat, ("fmin(fmax(src0, -1.0), 1.0)"))
|
unop("fsat_signed_mali", tfloat, ("fmin(fmax(src0, -1.0), 1.0)"))
|
||||||
unop("fclamp_pos_mali", tfloat, ("fmax(src0, 0.0)"))
|
unop("fclamp_pos_mali", tfloat, ("fmax(src0, 0.0)"))
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue