nir: Make ALU descriptions machine-readable

We already document a lot of ALU opcodes, let's make this machine-readable so we
can put the descriptions in our generated HTML documentation.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22929>
This commit is contained in:
Alyssa Rosenzweig 2023-05-09 17:10:24 -04:00 committed by Marge Bot
parent 6b4f00a3ac
commit bd466195b9

View file

@ -199,25 +199,28 @@ unop("mov", tuint, "src0")
unop("ineg", tint, "-src0")
unop("fneg", tfloat, "-src0")
unop("inot", tint, "~src0") # invert every bit of the integer
unop("inot", tint, "~src0", description = "Invert every bit of the integer")
# nir_op_fsign roughly implements the OpenGL / Vulkan rules for sign(float).
# The GLSL.std.450 FSign instruction is defined as:
#
# Result is 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0.
#
# If the source is equal to zero, there is a preference for the result to have
# the same sign, but this is not required (it is required by OpenCL). If the
# source is not a number, there is a preference for the result to be +0.0, but
# this is not required (it is required by OpenCL). If the source is not a
# number, and the result is not +0.0, the result should definitely **not** be
# NaN.
#
# The values returned for constant folding match the behavior required by
# OpenCL.
unop("fsign", tfloat, ("bit_size == 64 ? " +
"(isnan(src0) ? 0.0 : ((src0 == 0.0 ) ? src0 : (src0 > 0.0 ) ? 1.0 : -1.0 )) : " +
"(isnan(src0) ? 0.0f : ((src0 == 0.0f) ? src0 : (src0 > 0.0f) ? 1.0f : -1.0f))"))
"(isnan(src0) ? 0.0f : ((src0 == 0.0f) ? src0 : (src0 > 0.0f) ? 1.0f : -1.0f))"),
description = """
Roughly implements the OpenGL / Vulkan rules for ``sign(float)``.
The ``GLSL.std.450 FSign`` instruction is defined as:
Result is 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0.
If the source is equal to zero, there is a preference for the result to have
the same sign, but this is not required (it is required by OpenCL). If the
source is not a number, there is a preference for the result to be +0.0, but
this is not required (it is required by OpenCL). If the source is not a
number, and the result is not +0.0, the result should definitely **not** be
NaN.
The values returned for constant folding match the behavior required by
OpenCL.
""")
unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
unop("fabs", tfloat, "fabs(src0)")
@ -286,17 +289,23 @@ for src_t in [tint, tuint, tfloat, tbool]:
dst_bit_size),
dst_t + str(dst_bit_size), src_t, conv_expr)
# Special opcode that is the same as f2f16, i2i16, u2u16 except that it is safe
# to remove it if the result is immediately converted back to 32 bits again.
# This is generated as part of the precision lowering pass. mp stands for medium
# precision.
unop_numeric_convert("f2fmp", tfloat16, tfloat32, opcodes["f2f16"].const_expr)
unop_numeric_convert("i2imp", tint16, tint32, opcodes["i2i16"].const_expr)
def unop_numeric_convert_mp(base, src_t, dst_t):
op_like = base + "16"
unop_numeric_convert(base + "mp", src_t, dst_t, opcodes[op_like].const_expr,
description = """
Special opcode that is the same as :nir:alu-op:`{}` except that it is safe to
remove it if the result is immediately converted back to 32 bits again. This is
generated as part of the precision lowering pass. ``mp`` stands for medium
precision.
""".format(op_like))
unop_numeric_convert_mp("f2f", tfloat16, tfloat32)
unop_numeric_convert_mp("i2i", tint16, tint32)
# u2ump isn't defined, because the behavior is equal to i2imp
unop_numeric_convert("f2imp", tint16, tfloat32, opcodes["f2i16"].const_expr)
unop_numeric_convert("f2ump", tuint16, tfloat32, opcodes["f2u16"].const_expr)
unop_numeric_convert("i2fmp", tfloat16, tint32, opcodes["i2f16"].const_expr)
unop_numeric_convert("u2fmp", tfloat16, tuint32, opcodes["u2f16"].const_expr)
unop_numeric_convert_mp("f2i", tint16, tfloat32)
unop_numeric_convert_mp("f2u", tuint16, tfloat32)
unop_numeric_convert_mp("i2f", tfloat16, tint32)
unop_numeric_convert_mp("u2f", tfloat16, tuint32)
# Unary floating-point rounding operations.
@ -320,15 +329,16 @@ unop_convert("frexp_exp", tint32, tfloat, "frexp(src0, &dst);")
unop_convert("frexp_sig", tfloat, tfloat, "int n; dst = frexp(src0, &n);")
# Partial derivatives.
deriv_template = """
Calculate the screen-space partial derivative using {} derivatives of the input
with respect to the {}-axis. The constant folding is trivial as the derivative
of a constant is 0.
"""
unop("fddx", tfloat, "0.0") # the derivative of a constant is 0.
unop("fddy", tfloat, "0.0")
unop("fddx_fine", tfloat, "0.0")
unop("fddy_fine", tfloat, "0.0")
unop("fddx_coarse", tfloat, "0.0")
unop("fddy_coarse", tfloat, "0.0")
for mode, suffix in [("either fine or coarse", ""), ("fine", "_fine"), ("coarse", "_coarse")]:
for axis in ["x", "y"]:
unop(f"fdd{axis}{suffix}", tfloat, "0.0",
description = deriv_template.format(mode, axis.upper()))
# Floating point pack and unpack operations.
@ -372,16 +382,18 @@ unpack_2x16("unorm")
unpack_4x8("unorm")
unpack_2x16("half")
# Convert two unsigned integers into a packed unsigned short (clamp is applied).
unop_horiz("pack_uint_2x16", 1, tuint32, 2, tuint32, """
dst.x = _mesa_unsigned_to_unsigned(src0.x, 16);
dst.x |= _mesa_unsigned_to_unsigned(src0.y, 16) << 16;
""", description = """
Convert two unsigned integers into a packed unsigned short (clamp is applied).
""")
# Convert two signed integers into a packed signed short (clamp is applied).
unop_horiz("pack_sint_2x16", 1, tint32, 2, tint32, """
dst.x = _mesa_signed_to_signed(src0.x, 16) & 0xffff;
dst.x |= _mesa_signed_to_signed(src0.y, 16) << 16;
""", description = """
Convert two signed integers into a packed signed short (clamp is applied).
""")
unop_horiz("pack_uvec2_to_uint", 1, tuint32, 2, tuint32, """
@ -560,8 +572,8 @@ if (src0.z >= 0 && absZ >= absX && absZ >= absY) dst.x = 4;
if (src0.z < 0 && absZ >= absX && absZ >= absY) dst.x = 5;
""")
# Sum of vector components
unop_reduce("fsum", 1, tfloat, tfloat, "{src}", "{src0} + {src1}", "{src}")
unop_reduce("fsum", 1, tfloat, tfloat, "{src}", "{src0} + {src1}", "{src}",
description = "Sum of vector components")
def binop_convert(name, out_type, in_type, alg_props, const_expr, description=""):
opcode(name, 0, out_type, [0, 0], [in_type, in_type],
@ -683,10 +695,6 @@ if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
}
""")
# Unlike fmul, anything (even infinity or NaN) multiplied by zero is always zero.
# fmulz(0.0, inf) and fmulz(0.0, nan) must be +/-0.0, even if
# SIGNED_ZERO_INF_NAN_PRESERVE is not used. If SIGNED_ZERO_INF_NAN_PRESERVE is used, then
# the result must be a positive zero if either operand is zero.
binop("fmulz", tfloat32, _2src_commutative + associative, """
if (src0 == 0.0 || src1 == 0.0)
dst = 0.0;
@ -694,21 +702,27 @@ else if (nir_is_rounding_mode_rtz(execution_mode, 32))
dst = _mesa_double_to_float_rtz((double)src0 * (double)src1);
else
dst = src0 * src1;
""", description = """
Unlike :nir:alu-op:`fmul`, anything (even infinity or NaN) multiplied by zero is
always zero. ``fmulz(0.0, inf)`` and ``fmulz(0.0, nan)`` must be +/-0.0, even
if ``SIGNED_ZERO_INF_NAN_PRESERVE`` is not used. If
``SIGNED_ZERO_INF_NAN_PRESERVE`` is used, then the result must be a positive
zero if either operand is zero.
""")
# low 32-bits of signed/unsigned integer multiply
binop("imul", tint, _2src_commutative + associative, """
/* Use 64-bit multiplies to prevent overflow of signed arithmetic */
dst = (uint64_t)src0 * (uint64_t)src1;
""")
""", description = "Low 32-bits of signed/unsigned integer multiply")
# Generate 64 bit result from 2 32 bits quantity
binop_convert("imul_2x32_64", tint64, tint32, _2src_commutative,
"(int64_t)src0 * (int64_t)src1")
"(int64_t)src0 * (int64_t)src1",
description = "Multiply signed 32-bit integers, 64-bit result")
binop_convert("umul_2x32_64", tuint64, tuint32, _2src_commutative,
"(uint64_t)src0 * (uint64_t)src1")
"(uint64_t)src0 * (uint64_t)src1",
description = "Multiply unsigned 32-bit integers, 64-bit result")
# high 32-bits of signed integer multiply
binop("imul_high", tint, _2src_commutative, """
if (bit_size == 64) {
/* We need to do a full 128-bit x 128-bit multiply in order for the sign
@ -735,9 +749,8 @@ if (bit_size == 64) {
* potential overflow of signed multiply */
dst = ((uint64_t)(int64_t)src0 * (uint64_t)(int64_t)src1) >> bit_size;
}
""")
""", description = "High 32-bits of signed integer multiply")
# high 32-bits of unsigned integer multiply
binop("umul_high", tuint, _2src_commutative, """
if (bit_size == 64) {
/* The casts are kind-of annoying but needed to prevent compiler warnings. */
@ -749,31 +762,33 @@ if (bit_size == 64) {
} else {
dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size;
}
""")
""", description = "High 32-bits of unsigned integer multiply")
# low 32-bits of unsigned integer multiply
binop("umul_low", tuint32, _2src_commutative, """
uint64_t mask = (1 << (bit_size / 2)) - 1;
dst = ((uint64_t)src0 & mask) * ((uint64_t)src1 & mask);
""")
""", description = "Low 32-bits of unsigned integer multiply")
# Multiply 32-bits with low 16-bits.
binop("imul_32x16", tint32, "", "src0 * (int16_t) src1")
binop("umul_32x16", tuint32, "", "src0 * (uint16_t) src1")
binop("imul_32x16", tint32, "", "src0 * (int16_t) src1",
description = "Multiply 32-bits with low 16-bits, with sign extension")
binop("umul_32x16", tuint32, "", "src0 * (uint16_t) src1",
description = "Multiply 32-bits with low 16-bits, with zero extension")
binop("fdiv", tfloat, "", "src0 / src1")
binop("idiv", tint, "", "src1 == 0 ? 0 : (src0 / src1)")
binop("udiv", tuint, "", "src1 == 0 ? 0 : (src0 / src1)")
# returns an integer (1 or 0) representing the carry resulting from the
# addition of the two unsigned arguments.
binop_convert("uadd_carry", tuint, tuint, _2src_commutative,
"src0 + src1 < src0",
description = """
Return an integer (1 or 0) representing the carry resulting from the
addition of the two unsigned arguments.
""")
binop_convert("uadd_carry", tuint, tuint, _2src_commutative, "src0 + src1 < src0")
# returns an integer (1 or 0) representing the borrow resulting from the
# subtraction of the two unsigned arguments.
binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1")
binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1", description = """
Return an integer (1 or 0) representing the borrow resulting from the
subtraction of the two unsigned arguments.
""")
# hadd: (a + b) >> 1 (without overflow)
# x + y = x - (x & ~y) + (x & ~y) + y - (~x & y) + (~x & y)
@ -862,15 +877,21 @@ binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equ
binop("seq", tfloat, _2src_commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
binop("sne", tfloat, _2src_commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
# SPIRV shifts are undefined for shift-operands >= bitsize,
# but SM5 shifts are defined to use only the least significant bits.
# The NIR definition is according to the SM5 specification.
shift_note = """
SPIRV shifts are undefined for shift-operands >= bitsize,
but SM5 shifts are defined to use only the least significant bits.
The NIR definition is according to the SM5 specification.
"""
opcode("ishl", 0, tint, [0, 0], [tint, tuint32], False, "",
"(uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1))")
"(uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1))",
description = "Left shift." + shift_note)
opcode("ishr", 0, tint, [0, 0], [tint, tuint32], False, "",
"src0 >> (src1 & (sizeof(src0) * 8 - 1))")
"src0 >> (src1 & (sizeof(src0) * 8 - 1))",
description = "Signed right-shift." + shift_note)
opcode("ushr", 0, tuint, [0, 0], [tuint, tuint32], False, "",
"src0 >> (src1 & (sizeof(src0) * 8 - 1))")
"src0 >> (src1 & (sizeof(src0) * 8 - 1))",
description = "Unsigned right-shift." + shift_note)
opcode("urol", 0, tuint, [0, 0], [tuint, tuint32], False, "", """
uint32_t rotate_mask = sizeof(src0) * 8 - 1;
@ -883,15 +904,16 @@ opcode("uror", 0, tuint, [0, 0], [tuint, tuint32], False, "", """
(src0 << (-src1 & rotate_mask));
""")
# bitwise logic operators
#
# These are also used as boolean and, or, xor for hardware supporting
# integers.
bitwise_description = """
Bitwise {0}, also used as a boolean {0} for hardware supporting integers.
"""
binop("iand", tuint, _2src_commutative + associative, "src0 & src1")
binop("ior", tuint, _2src_commutative + associative, "src0 | src1")
binop("ixor", tuint, _2src_commutative + associative, "src0 ^ src1")
binop("iand", tuint, _2src_commutative + associative, "src0 & src1",
description = bitwise_description.format("AND"))
binop("ior", tuint, _2src_commutative + associative, "src0 | src1",
description = bitwise_description.format("OR"))
binop("ixor", tuint, _2src_commutative + associative, "src0 ^ src1",
description = bitwise_description.format("XOR"))
binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
@ -931,13 +953,14 @@ opcode("pack_32_4x8_split", 0, tuint32, [0, 0, 0, 0], [tuint8, tuint8, tuint8, t
False, "",
"src0 | ((uint32_t)src1 << 8) | ((uint32_t)src2 << 16) | ((uint32_t)src3 << 24)")
# bfm implements the behavior of the first operation of the SM5 "bfi" assembly
# and that of the "bfi1" i965 instruction. That is, the bits and offset values
# are from the low five bits of src0 and src1, respectively.
binop_convert("bfm", tuint32, tint32, "", """
int bits = src0 & 0x1F;
int offset = src1 & 0x1F;
dst = ((1u << bits) - 1) << offset;
""", description = """
Implements the behavior of the first operation of the SM5 "bfi" assembly
and that of the "bfi1" i965 instruction. That is, the bits and offset values
are from the low five bits of src0 and src1, respectively.
""")
opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint32], False, "", """
@ -947,11 +970,11 @@ if (!isnormal(dst))
dst = copysignf(0.0f, src0);
""")
# Combines the first component of each input to make a 2-component vector.
binop_horiz("vec2", 2, tuint, 1, tuint, 1, tuint, """
dst.x = src0.x;
dst.y = src1.x;
""", description = """
Combines the first component of each input to make a 2-component vector.
""")
# Byte extraction
@ -992,10 +1015,6 @@ if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
}
""")
# Unlike ffma, anything (even infinity or NaN) multiplied by zero is always zero.
# ffmaz(0.0, inf, src2) and ffmaz(0.0, nan, src2) must be +/-0.0 + src2, even if
# SIGNED_ZERO_INF_NAN_PRESERVE is not used. If SIGNED_ZERO_INF_NAN_PRESERVE is used, then
# the result must be a positive zero plus src2 if either src0 or src1 is zero.
triop("ffmaz", tfloat32, _2src_commutative, """
if (src0 == 0.0 || src1 == 0.0)
dst = 0.0 + src2;
@ -1003,29 +1022,40 @@ else if (nir_is_rounding_mode_rtz(execution_mode, 32))
dst = _mesa_float_fma_rtz(src0, src1, src2);
else
dst = fmaf(src0, src1, src2);
""", description = """
Floating-point multiply-add with modified zero handling.
Unlike :nir:alu-op:`ffma`, anything (even infinity or NaN) multiplied by zero is
always zero. ``ffmaz(0.0, inf, src2)`` and ``ffmaz(0.0, nan, src2)`` must be
``+/-0.0 + src2``, even if ``SIGNED_ZERO_INF_NAN_PRESERVE`` is not used. If
``SIGNED_ZERO_INF_NAN_PRESERVE`` is used, then the result must be a positive
zero plus src2 if either src0 or src1 is zero.
""")
triop("flrp", tfloat, "", "src0 * (1 - src2) + src1 * src2")
# Ternary addition
triop("iadd3", tint, _2src_commutative + associative, "src0 + src1 + src2")
triop("iadd3", tint, _2src_commutative + associative, "src0 + src1 + src2",
description = "Ternary addition")
# Conditional Select
#
# A vector conditional select instruction (like ?:, but operating per-
# component on vectors). There are two versions, one for floating point
# bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).
triop("fcsel", tfloat32, selection, "(src0 != 0.0f) ? src1 : src2")
csel_description = """
A vector conditional select instruction (like ?:, but operating per-
component on vectors). The condition is {} bool ({}).
"""
triop("fcsel", tfloat32, selection, "(src0 != 0.0f) ? src1 : src2",
description = csel_description.format("a floating point", "0.0 vs 1.0"))
opcode("bcsel", 0, tuint, [0, 0, 0],
[tbool1, tuint, tuint], False, selection, "src0 ? src1 : src2")
[tbool1, tuint, tuint], False, selection, "src0 ? src1 : src2",
description = csel_description.format("a 1-bit", "0 vs 1"))
opcode("b8csel", 0, tuint, [0, 0, 0],
[tbool8, tuint, tuint], False, selection, "src0 ? src1 : src2")
[tbool8, tuint, tuint], False, selection, "src0 ? src1 : src2",
description = csel_description.format("an 8-bit", "0 vs ~0"))
opcode("b16csel", 0, tuint, [0, 0, 0],
[tbool16, tuint, tuint], False, selection, "src0 ? src1 : src2")
[tbool16, tuint, tuint], False, selection, "src0 ? src1 : src2",
description = csel_description.format("a 16-bit", "0 vs ~0"))
opcode("b32csel", 0, tuint, [0, 0, 0],
[tbool32, tuint, tuint], False, selection, "src0 ? src1 : src2")
[tbool32, tuint, tuint], False, selection, "src0 ? src1 : src2",
description = csel_description.format("a 32-bit", "0 vs ~0"))
triop("i32csel_gt", tint32, selection, "(src0 > 0) ? src1 : src2")
triop("i32csel_ge", tint32, selection, "(src0 >= 0) ? src1 : src2")
@ -1033,7 +1063,6 @@ triop("i32csel_ge", tint32, selection, "(src0 >= 0) ? src1 : src2")
triop("fcsel_gt", tfloat32, selection, "(src0 > 0.0f) ? src1 : src2")
triop("fcsel_ge", tfloat32, selection, "(src0 >= 0.0f) ? src1 : src2")
# SM5 bfi assembly
triop("bfi", tuint32, "", """
unsigned mask = src0, insert = src1, base = src2;
if (mask == 0) {
@ -1046,7 +1075,7 @@ if (mask == 0) {
}
dst = (base & ~mask) | (insert & mask);
}
""")
""", description = "SM5 bfi assembly")
triop("bitfield_select", tuint, "", "(src0 & src1) | (~src0 & src2)")