mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-08 12:50:12 +01:00
nir: Make ALU descriptions machine-readable
We already document a lot of ALU opcodes, let's make this machine-readable so we can put the descriptions in our generated HTML documentation. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22929>
This commit is contained in:
parent
6b4f00a3ac
commit
bd466195b9
1 changed files with 131 additions and 102 deletions
|
|
@ -199,25 +199,28 @@ unop("mov", tuint, "src0")
|
|||
|
||||
unop("ineg", tint, "-src0")
|
||||
unop("fneg", tfloat, "-src0")
|
||||
unop("inot", tint, "~src0") # invert every bit of the integer
|
||||
unop("inot", tint, "~src0", description = "Invert every bit of the integer")
|
||||
|
||||
# nir_op_fsign roughly implements the OpenGL / Vulkan rules for sign(float).
|
||||
# The GLSL.std.450 FSign instruction is defined as:
|
||||
#
|
||||
# Result is 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0.
|
||||
#
|
||||
# If the source is equal to zero, there is a preference for the result to have
|
||||
# the same sign, but this is not required (it is required by OpenCL). If the
|
||||
# source is not a number, there is a preference for the result to be +0.0, but
|
||||
# this is not required (it is required by OpenCL). If the source is not a
|
||||
# number, and the result is not +0.0, the result should definitely **not** be
|
||||
# NaN.
|
||||
#
|
||||
# The values returned for constant folding match the behavior required by
|
||||
# OpenCL.
|
||||
unop("fsign", tfloat, ("bit_size == 64 ? " +
|
||||
"(isnan(src0) ? 0.0 : ((src0 == 0.0 ) ? src0 : (src0 > 0.0 ) ? 1.0 : -1.0 )) : " +
|
||||
"(isnan(src0) ? 0.0f : ((src0 == 0.0f) ? src0 : (src0 > 0.0f) ? 1.0f : -1.0f))"))
|
||||
"(isnan(src0) ? 0.0f : ((src0 == 0.0f) ? src0 : (src0 > 0.0f) ? 1.0f : -1.0f))"),
|
||||
description = """
|
||||
Roughly implements the OpenGL / Vulkan rules for ``sign(float)``.
|
||||
The ``GLSL.std.450 FSign`` instruction is defined as:
|
||||
|
||||
Result is 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0.
|
||||
|
||||
If the source is equal to zero, there is a preference for the result to have
|
||||
the same sign, but this is not required (it is required by OpenCL). If the
|
||||
source is not a number, there is a preference for the result to be +0.0, but
|
||||
this is not required (it is required by OpenCL). If the source is not a
|
||||
number, and the result is not +0.0, the result should definitely **not** be
|
||||
NaN.
|
||||
|
||||
The values returned for constant folding match the behavior required by
|
||||
OpenCL.
|
||||
""")
|
||||
|
||||
unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
|
||||
unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
|
||||
unop("fabs", tfloat, "fabs(src0)")
|
||||
|
|
@ -286,17 +289,23 @@ for src_t in [tint, tuint, tfloat, tbool]:
|
|||
dst_bit_size),
|
||||
dst_t + str(dst_bit_size), src_t, conv_expr)
|
||||
|
||||
# Special opcode that is the same as f2f16, i2i16, u2u16 except that it is safe
|
||||
# to remove it if the result is immediately converted back to 32 bits again.
|
||||
# This is generated as part of the precision lowering pass. mp stands for medium
|
||||
# precision.
|
||||
unop_numeric_convert("f2fmp", tfloat16, tfloat32, opcodes["f2f16"].const_expr)
|
||||
unop_numeric_convert("i2imp", tint16, tint32, opcodes["i2i16"].const_expr)
|
||||
def unop_numeric_convert_mp(base, src_t, dst_t):
|
||||
op_like = base + "16"
|
||||
unop_numeric_convert(base + "mp", src_t, dst_t, opcodes[op_like].const_expr,
|
||||
description = """
|
||||
Special opcode that is the same as :nir:alu-op:`{}` except that it is safe to
|
||||
remove it if the result is immediately converted back to 32 bits again. This is
|
||||
generated as part of the precision lowering pass. ``mp`` stands for medium
|
||||
precision.
|
||||
""".format(op_like))
|
||||
|
||||
unop_numeric_convert_mp("f2f", tfloat16, tfloat32)
|
||||
unop_numeric_convert_mp("i2i", tint16, tint32)
|
||||
# u2ump isn't defined, because the behavior is equal to i2imp
|
||||
unop_numeric_convert("f2imp", tint16, tfloat32, opcodes["f2i16"].const_expr)
|
||||
unop_numeric_convert("f2ump", tuint16, tfloat32, opcodes["f2u16"].const_expr)
|
||||
unop_numeric_convert("i2fmp", tfloat16, tint32, opcodes["i2f16"].const_expr)
|
||||
unop_numeric_convert("u2fmp", tfloat16, tuint32, opcodes["u2f16"].const_expr)
|
||||
unop_numeric_convert_mp("f2i", tint16, tfloat32)
|
||||
unop_numeric_convert_mp("f2u", tuint16, tfloat32)
|
||||
unop_numeric_convert_mp("i2f", tfloat16, tint32)
|
||||
unop_numeric_convert_mp("u2f", tfloat16, tuint32)
|
||||
|
||||
# Unary floating-point rounding operations.
|
||||
|
||||
|
|
@ -320,15 +329,16 @@ unop_convert("frexp_exp", tint32, tfloat, "frexp(src0, &dst);")
|
|||
unop_convert("frexp_sig", tfloat, tfloat, "int n; dst = frexp(src0, &n);")
|
||||
|
||||
# Partial derivatives.
|
||||
deriv_template = """
|
||||
Calculate the screen-space partial derivative using {} derivatives of the input
|
||||
with respect to the {}-axis. The constant folding is trivial as the derivative
|
||||
of a constant is 0.
|
||||
"""
|
||||
|
||||
|
||||
unop("fddx", tfloat, "0.0") # the derivative of a constant is 0.
|
||||
unop("fddy", tfloat, "0.0")
|
||||
unop("fddx_fine", tfloat, "0.0")
|
||||
unop("fddy_fine", tfloat, "0.0")
|
||||
unop("fddx_coarse", tfloat, "0.0")
|
||||
unop("fddy_coarse", tfloat, "0.0")
|
||||
|
||||
for mode, suffix in [("either fine or coarse", ""), ("fine", "_fine"), ("coarse", "_coarse")]:
|
||||
for axis in ["x", "y"]:
|
||||
unop(f"fdd{axis}{suffix}", tfloat, "0.0",
|
||||
description = deriv_template.format(mode, axis.upper()))
|
||||
|
||||
# Floating point pack and unpack operations.
|
||||
|
||||
|
|
@ -372,16 +382,18 @@ unpack_2x16("unorm")
|
|||
unpack_4x8("unorm")
|
||||
unpack_2x16("half")
|
||||
|
||||
# Convert two unsigned integers into a packed unsigned short (clamp is applied).
|
||||
unop_horiz("pack_uint_2x16", 1, tuint32, 2, tuint32, """
|
||||
dst.x = _mesa_unsigned_to_unsigned(src0.x, 16);
|
||||
dst.x |= _mesa_unsigned_to_unsigned(src0.y, 16) << 16;
|
||||
""", description = """
|
||||
Convert two unsigned integers into a packed unsigned short (clamp is applied).
|
||||
""")
|
||||
|
||||
# Convert two signed integers into a packed signed short (clamp is applied).
|
||||
unop_horiz("pack_sint_2x16", 1, tint32, 2, tint32, """
|
||||
dst.x = _mesa_signed_to_signed(src0.x, 16) & 0xffff;
|
||||
dst.x |= _mesa_signed_to_signed(src0.y, 16) << 16;
|
||||
""", description = """
|
||||
Convert two signed integers into a packed signed short (clamp is applied).
|
||||
""")
|
||||
|
||||
unop_horiz("pack_uvec2_to_uint", 1, tuint32, 2, tuint32, """
|
||||
|
|
@ -560,8 +572,8 @@ if (src0.z >= 0 && absZ >= absX && absZ >= absY) dst.x = 4;
|
|||
if (src0.z < 0 && absZ >= absX && absZ >= absY) dst.x = 5;
|
||||
""")
|
||||
|
||||
# Sum of vector components
|
||||
unop_reduce("fsum", 1, tfloat, tfloat, "{src}", "{src0} + {src1}", "{src}")
|
||||
unop_reduce("fsum", 1, tfloat, tfloat, "{src}", "{src0} + {src1}", "{src}",
|
||||
description = "Sum of vector components")
|
||||
|
||||
def binop_convert(name, out_type, in_type, alg_props, const_expr, description=""):
|
||||
opcode(name, 0, out_type, [0, 0], [in_type, in_type],
|
||||
|
|
@ -683,10 +695,6 @@ if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
|
|||
}
|
||||
""")
|
||||
|
||||
# Unlike fmul, anything (even infinity or NaN) multiplied by zero is always zero.
|
||||
# fmulz(0.0, inf) and fmulz(0.0, nan) must be +/-0.0, even if
|
||||
# SIGNED_ZERO_INF_NAN_PRESERVE is not used. If SIGNED_ZERO_INF_NAN_PRESERVE is used, then
|
||||
# the result must be a positive zero if either operand is zero.
|
||||
binop("fmulz", tfloat32, _2src_commutative + associative, """
|
||||
if (src0 == 0.0 || src1 == 0.0)
|
||||
dst = 0.0;
|
||||
|
|
@ -694,21 +702,27 @@ else if (nir_is_rounding_mode_rtz(execution_mode, 32))
|
|||
dst = _mesa_double_to_float_rtz((double)src0 * (double)src1);
|
||||
else
|
||||
dst = src0 * src1;
|
||||
""", description = """
|
||||
Unlike :nir:alu-op:`fmul`, anything (even infinity or NaN) multiplied by zero is
|
||||
always zero. ``fmulz(0.0, inf)`` and ``fmulz(0.0, nan)`` must be +/-0.0, even
|
||||
if ``SIGNED_ZERO_INF_NAN_PRESERVE`` is not used. If
|
||||
``SIGNED_ZERO_INF_NAN_PRESERVE`` is used, then the result must be a positive
|
||||
zero if either operand is zero.
|
||||
""")
|
||||
|
||||
# low 32-bits of signed/unsigned integer multiply
|
||||
|
||||
binop("imul", tint, _2src_commutative + associative, """
|
||||
/* Use 64-bit multiplies to prevent overflow of signed arithmetic */
|
||||
dst = (uint64_t)src0 * (uint64_t)src1;
|
||||
""")
|
||||
""", description = "Low 32-bits of signed/unsigned integer multiply")
|
||||
|
||||
# Generate 64 bit result from 2 32 bits quantity
|
||||
binop_convert("imul_2x32_64", tint64, tint32, _2src_commutative,
|
||||
"(int64_t)src0 * (int64_t)src1")
|
||||
"(int64_t)src0 * (int64_t)src1",
|
||||
description = "Multiply signed 32-bit integers, 64-bit result")
|
||||
binop_convert("umul_2x32_64", tuint64, tuint32, _2src_commutative,
|
||||
"(uint64_t)src0 * (uint64_t)src1")
|
||||
"(uint64_t)src0 * (uint64_t)src1",
|
||||
description = "Multiply unsigned 32-bit integers, 64-bit result")
|
||||
|
||||
# high 32-bits of signed integer multiply
|
||||
binop("imul_high", tint, _2src_commutative, """
|
||||
if (bit_size == 64) {
|
||||
/* We need to do a full 128-bit x 128-bit multiply in order for the sign
|
||||
|
|
@ -735,9 +749,8 @@ if (bit_size == 64) {
|
|||
* potential overflow of signed multiply */
|
||||
dst = ((uint64_t)(int64_t)src0 * (uint64_t)(int64_t)src1) >> bit_size;
|
||||
}
|
||||
""")
|
||||
""", description = "High 32-bits of signed integer multiply")
|
||||
|
||||
# high 32-bits of unsigned integer multiply
|
||||
binop("umul_high", tuint, _2src_commutative, """
|
||||
if (bit_size == 64) {
|
||||
/* The casts are kind-of annoying but needed to prevent compiler warnings. */
|
||||
|
|
@ -749,31 +762,33 @@ if (bit_size == 64) {
|
|||
} else {
|
||||
dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size;
|
||||
}
|
||||
""")
|
||||
""", description = "High 32-bits of unsigned integer multiply")
|
||||
|
||||
# low 32-bits of unsigned integer multiply
|
||||
binop("umul_low", tuint32, _2src_commutative, """
|
||||
uint64_t mask = (1 << (bit_size / 2)) - 1;
|
||||
dst = ((uint64_t)src0 & mask) * ((uint64_t)src1 & mask);
|
||||
""")
|
||||
""", description = "Low 32-bits of unsigned integer multiply")
|
||||
|
||||
# Multiply 32-bits with low 16-bits.
|
||||
binop("imul_32x16", tint32, "", "src0 * (int16_t) src1")
|
||||
binop("umul_32x16", tuint32, "", "src0 * (uint16_t) src1")
|
||||
binop("imul_32x16", tint32, "", "src0 * (int16_t) src1",
|
||||
description = "Multiply 32-bits with low 16-bits, with sign extension")
|
||||
binop("umul_32x16", tuint32, "", "src0 * (uint16_t) src1",
|
||||
description = "Multiply 32-bits with low 16-bits, with zero extension")
|
||||
|
||||
binop("fdiv", tfloat, "", "src0 / src1")
|
||||
binop("idiv", tint, "", "src1 == 0 ? 0 : (src0 / src1)")
|
||||
binop("udiv", tuint, "", "src1 == 0 ? 0 : (src0 / src1)")
|
||||
|
||||
# returns an integer (1 or 0) representing the carry resulting from the
|
||||
# addition of the two unsigned arguments.
|
||||
binop_convert("uadd_carry", tuint, tuint, _2src_commutative,
|
||||
"src0 + src1 < src0",
|
||||
description = """
|
||||
Return an integer (1 or 0) representing the carry resulting from the
|
||||
addition of the two unsigned arguments.
|
||||
""")
|
||||
|
||||
binop_convert("uadd_carry", tuint, tuint, _2src_commutative, "src0 + src1 < src0")
|
||||
|
||||
# returns an integer (1 or 0) representing the borrow resulting from the
|
||||
# subtraction of the two unsigned arguments.
|
||||
|
||||
binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1")
|
||||
binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1", description = """
|
||||
Return an integer (1 or 0) representing the borrow resulting from the
|
||||
subtraction of the two unsigned arguments.
|
||||
""")
|
||||
|
||||
# hadd: (a + b) >> 1 (without overflow)
|
||||
# x + y = x - (x & ~y) + (x & ~y) + y - (~x & y) + (~x & y)
|
||||
|
|
@ -862,15 +877,21 @@ binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equ
|
|||
binop("seq", tfloat, _2src_commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
|
||||
binop("sne", tfloat, _2src_commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
|
||||
|
||||
# SPIRV shifts are undefined for shift-operands >= bitsize,
|
||||
# but SM5 shifts are defined to use only the least significant bits.
|
||||
# The NIR definition is according to the SM5 specification.
|
||||
shift_note = """
|
||||
SPIRV shifts are undefined for shift-operands >= bitsize,
|
||||
but SM5 shifts are defined to use only the least significant bits.
|
||||
The NIR definition is according to the SM5 specification.
|
||||
"""
|
||||
|
||||
opcode("ishl", 0, tint, [0, 0], [tint, tuint32], False, "",
|
||||
"(uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1))")
|
||||
"(uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1))",
|
||||
description = "Left shift." + shift_note)
|
||||
opcode("ishr", 0, tint, [0, 0], [tint, tuint32], False, "",
|
||||
"src0 >> (src1 & (sizeof(src0) * 8 - 1))")
|
||||
"src0 >> (src1 & (sizeof(src0) * 8 - 1))",
|
||||
description = "Signed right-shift." + shift_note)
|
||||
opcode("ushr", 0, tuint, [0, 0], [tuint, tuint32], False, "",
|
||||
"src0 >> (src1 & (sizeof(src0) * 8 - 1))")
|
||||
"src0 >> (src1 & (sizeof(src0) * 8 - 1))",
|
||||
description = "Unsigned right-shift." + shift_note)
|
||||
|
||||
opcode("urol", 0, tuint, [0, 0], [tuint, tuint32], False, "", """
|
||||
uint32_t rotate_mask = sizeof(src0) * 8 - 1;
|
||||
|
|
@ -883,15 +904,16 @@ opcode("uror", 0, tuint, [0, 0], [tuint, tuint32], False, "", """
|
|||
(src0 << (-src1 & rotate_mask));
|
||||
""")
|
||||
|
||||
# bitwise logic operators
|
||||
#
|
||||
# These are also used as boolean and, or, xor for hardware supporting
|
||||
# integers.
|
||||
bitwise_description = """
|
||||
Bitwise {0}, also used as a boolean {0} for hardware supporting integers.
|
||||
"""
|
||||
|
||||
|
||||
binop("iand", tuint, _2src_commutative + associative, "src0 & src1")
|
||||
binop("ior", tuint, _2src_commutative + associative, "src0 | src1")
|
||||
binop("ixor", tuint, _2src_commutative + associative, "src0 ^ src1")
|
||||
binop("iand", tuint, _2src_commutative + associative, "src0 & src1",
|
||||
description = bitwise_description.format("AND"))
|
||||
binop("ior", tuint, _2src_commutative + associative, "src0 | src1",
|
||||
description = bitwise_description.format("OR"))
|
||||
binop("ixor", tuint, _2src_commutative + associative, "src0 ^ src1",
|
||||
description = bitwise_description.format("XOR"))
|
||||
|
||||
|
||||
binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
|
||||
|
|
@ -931,13 +953,14 @@ opcode("pack_32_4x8_split", 0, tuint32, [0, 0, 0, 0], [tuint8, tuint8, tuint8, t
|
|||
False, "",
|
||||
"src0 | ((uint32_t)src1 << 8) | ((uint32_t)src2 << 16) | ((uint32_t)src3 << 24)")
|
||||
|
||||
# bfm implements the behavior of the first operation of the SM5 "bfi" assembly
|
||||
# and that of the "bfi1" i965 instruction. That is, the bits and offset values
|
||||
# are from the low five bits of src0 and src1, respectively.
|
||||
binop_convert("bfm", tuint32, tint32, "", """
|
||||
int bits = src0 & 0x1F;
|
||||
int offset = src1 & 0x1F;
|
||||
dst = ((1u << bits) - 1) << offset;
|
||||
""", description = """
|
||||
Implements the behavior of the first operation of the SM5 "bfi" assembly
|
||||
and that of the "bfi1" i965 instruction. That is, the bits and offset values
|
||||
are from the low five bits of src0 and src1, respectively.
|
||||
""")
|
||||
|
||||
opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint32], False, "", """
|
||||
|
|
@ -947,11 +970,11 @@ if (!isnormal(dst))
|
|||
dst = copysignf(0.0f, src0);
|
||||
""")
|
||||
|
||||
# Combines the first component of each input to make a 2-component vector.
|
||||
|
||||
binop_horiz("vec2", 2, tuint, 1, tuint, 1, tuint, """
|
||||
dst.x = src0.x;
|
||||
dst.y = src1.x;
|
||||
""", description = """
|
||||
Combines the first component of each input to make a 2-component vector.
|
||||
""")
|
||||
|
||||
# Byte extraction
|
||||
|
|
@ -992,10 +1015,6 @@ if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
|
|||
}
|
||||
""")
|
||||
|
||||
# Unlike ffma, anything (even infinity or NaN) multiplied by zero is always zero.
|
||||
# ffmaz(0.0, inf, src2) and ffmaz(0.0, nan, src2) must be +/-0.0 + src2, even if
|
||||
# SIGNED_ZERO_INF_NAN_PRESERVE is not used. If SIGNED_ZERO_INF_NAN_PRESERVE is used, then
|
||||
# the result must be a positive zero plus src2 if either src0 or src1 is zero.
|
||||
triop("ffmaz", tfloat32, _2src_commutative, """
|
||||
if (src0 == 0.0 || src1 == 0.0)
|
||||
dst = 0.0 + src2;
|
||||
|
|
@ -1003,29 +1022,40 @@ else if (nir_is_rounding_mode_rtz(execution_mode, 32))
|
|||
dst = _mesa_float_fma_rtz(src0, src1, src2);
|
||||
else
|
||||
dst = fmaf(src0, src1, src2);
|
||||
""", description = """
|
||||
Floating-point multiply-add with modified zero handling.
|
||||
|
||||
Unlike :nir:alu-op:`ffma`, anything (even infinity or NaN) multiplied by zero is
|
||||
always zero. ``ffmaz(0.0, inf, src2)`` and ``ffmaz(0.0, nan, src2)`` must be
|
||||
``+/-0.0 + src2``, even if ``SIGNED_ZERO_INF_NAN_PRESERVE`` is not used. If
|
||||
``SIGNED_ZERO_INF_NAN_PRESERVE`` is used, then the result must be a positive
|
||||
zero plus src2 if either src0 or src1 is zero.
|
||||
""")
|
||||
|
||||
triop("flrp", tfloat, "", "src0 * (1 - src2) + src1 * src2")
|
||||
|
||||
# Ternary addition
|
||||
triop("iadd3", tint, _2src_commutative + associative, "src0 + src1 + src2")
|
||||
triop("iadd3", tint, _2src_commutative + associative, "src0 + src1 + src2",
|
||||
description = "Ternary addition")
|
||||
|
||||
# Conditional Select
|
||||
#
|
||||
# A vector conditional select instruction (like ?:, but operating per-
|
||||
# component on vectors). There are two versions, one for floating point
|
||||
# bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).
|
||||
|
||||
triop("fcsel", tfloat32, selection, "(src0 != 0.0f) ? src1 : src2")
|
||||
csel_description = """
|
||||
A vector conditional select instruction (like ?:, but operating per-
|
||||
component on vectors). The condition is {} bool ({}).
|
||||
"""
|
||||
|
||||
triop("fcsel", tfloat32, selection, "(src0 != 0.0f) ? src1 : src2",
|
||||
description = csel_description.format("a floating point", "0.0 vs 1.0"))
|
||||
opcode("bcsel", 0, tuint, [0, 0, 0],
|
||||
[tbool1, tuint, tuint], False, selection, "src0 ? src1 : src2")
|
||||
[tbool1, tuint, tuint], False, selection, "src0 ? src1 : src2",
|
||||
description = csel_description.format("a 1-bit", "0 vs 1"))
|
||||
opcode("b8csel", 0, tuint, [0, 0, 0],
|
||||
[tbool8, tuint, tuint], False, selection, "src0 ? src1 : src2")
|
||||
[tbool8, tuint, tuint], False, selection, "src0 ? src1 : src2",
|
||||
description = csel_description.format("an 8-bit", "0 vs ~0"))
|
||||
opcode("b16csel", 0, tuint, [0, 0, 0],
|
||||
[tbool16, tuint, tuint], False, selection, "src0 ? src1 : src2")
|
||||
[tbool16, tuint, tuint], False, selection, "src0 ? src1 : src2",
|
||||
description = csel_description.format("a 16-bit", "0 vs ~0"))
|
||||
opcode("b32csel", 0, tuint, [0, 0, 0],
|
||||
[tbool32, tuint, tuint], False, selection, "src0 ? src1 : src2")
|
||||
[tbool32, tuint, tuint], False, selection, "src0 ? src1 : src2",
|
||||
description = csel_description.format("a 32-bit", "0 vs ~0"))
|
||||
|
||||
triop("i32csel_gt", tint32, selection, "(src0 > 0) ? src1 : src2")
|
||||
triop("i32csel_ge", tint32, selection, "(src0 >= 0) ? src1 : src2")
|
||||
|
|
@ -1033,7 +1063,6 @@ triop("i32csel_ge", tint32, selection, "(src0 >= 0) ? src1 : src2")
|
|||
triop("fcsel_gt", tfloat32, selection, "(src0 > 0.0f) ? src1 : src2")
|
||||
triop("fcsel_ge", tfloat32, selection, "(src0 >= 0.0f) ? src1 : src2")
|
||||
|
||||
# SM5 bfi assembly
|
||||
triop("bfi", tuint32, "", """
|
||||
unsigned mask = src0, insert = src1, base = src2;
|
||||
if (mask == 0) {
|
||||
|
|
@ -1046,7 +1075,7 @@ if (mask == 0) {
|
|||
}
|
||||
dst = (base & ~mask) | (insert & mask);
|
||||
}
|
||||
""")
|
||||
""", description = "SM5 bfi assembly")
|
||||
|
||||
|
||||
triop("bitfield_select", tuint, "", "(src0 & src1) | (~src0 & src2)")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue