From bd466195b9314997936fe6ea98e4d9eb3aa49eae Mon Sep 17 00:00:00 2001
From: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Date: Tue, 9 May 2023 17:10:24 -0400
Subject: [PATCH] nir: Make ALU descriptions machine-readable

We already document a lot of ALU opcodes, let's make this machine-readable so we
can put the descriptions in our generated HTML documentation.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22929>
---
 src/compiler/nir/nir_opcodes.py | 233 ++++++++++++++++++--------------
 1 file changed, 131 insertions(+), 102 deletions(-)

diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index c44a0b902b0..f7e9df073f7 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -199,25 +199,28 @@ unop("mov", tuint, "src0")
 
 unop("ineg", tint, "-src0")
 unop("fneg", tfloat, "-src0")
-unop("inot", tint, "~src0") # invert every bit of the integer
+unop("inot", tint, "~src0", description = "Invert every bit of the integer")
 
-# nir_op_fsign roughly implements the OpenGL / Vulkan rules for sign(float).
-# The GLSL.std.450 FSign instruction is defined as:
-#
-#    Result is 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0.
-#
-# If the source is equal to zero, there is a preference for the result to have
-# the same sign, but this is not required (it is required by OpenCL).  If the
-# source is not a number, there is a preference for the result to be +0.0, but
-# this is not required (it is required by OpenCL).  If the source is not a
-# number, and the result is not +0.0, the result should definitely **not** be
-# NaN.
-#
-# The values returned for constant folding match the behavior required by
-# OpenCL.
 unop("fsign", tfloat, ("bit_size == 64 ? " +
                        "(isnan(src0) ? 0.0  : ((src0 == 0.0 ) ? src0 : (src0 > 0.0 ) ? 1.0  : -1.0 )) : " +
-                       "(isnan(src0) ? 0.0f : ((src0 == 0.0f) ? src0 : (src0 > 0.0f) ? 1.0f : -1.0f))"))
+                       "(isnan(src0) ? 0.0f : ((src0 == 0.0f) ? src0 : (src0 > 0.0f) ? 1.0f : -1.0f))"),
+     description = """
+Roughly implements the OpenGL / Vulkan rules for ``sign(float)``.
+The ``GLSL.std.450 FSign`` instruction is defined as:
+
+    Result is 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0.
+
+If the source is equal to zero, there is a preference for the result to have
+the same sign, but this is not required (it is required by OpenCL).  If the
+source is not a number, there is a preference for the result to be +0.0, but
+this is not required (it is required by OpenCL).  If the source is not a
+number, and the result is not +0.0, the result should definitely **not** be
+NaN.
+
+The values returned for constant folding match the behavior required by
+OpenCL.
+     """)
+
 unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
 unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
 unop("fabs", tfloat, "fabs(src0)")
@@ -286,17 +289,23 @@ for src_t in [tint, tuint, tfloat, tbool]:
                                                        dst_bit_size),
                                    dst_t + str(dst_bit_size), src_t, conv_expr)
 
-# Special opcode that is the same as f2f16, i2i16, u2u16 except that it is safe
-# to remove it if the result is immediately converted back to 32 bits again.
-# This is generated as part of the precision lowering pass. mp stands for medium
-# precision.
-unop_numeric_convert("f2fmp", tfloat16, tfloat32, opcodes["f2f16"].const_expr)
-unop_numeric_convert("i2imp", tint16, tint32, opcodes["i2i16"].const_expr)
+def unop_numeric_convert_mp(base, src_t, dst_t):
+    op_like = base + "16"
+    unop_numeric_convert(base + "mp", src_t, dst_t, opcodes[op_like].const_expr,
+                         description = """
+Special opcode that is the same as :nir:alu-op:`{}` except that it is safe to
+remove it if the result is immediately converted back to 32 bits again. This is
+generated as part of the precision lowering pass. ``mp`` stands for medium
+precision.
+                         """.format(op_like))
+
+unop_numeric_convert_mp("f2f", tfloat16, tfloat32)
+unop_numeric_convert_mp("i2i", tint16, tint32)
 # u2ump isn't defined, because the behavior is equal to i2imp
-unop_numeric_convert("f2imp", tint16, tfloat32, opcodes["f2i16"].const_expr)
-unop_numeric_convert("f2ump", tuint16, tfloat32, opcodes["f2u16"].const_expr)
-unop_numeric_convert("i2fmp", tfloat16, tint32, opcodes["i2f16"].const_expr)
-unop_numeric_convert("u2fmp", tfloat16, tuint32, opcodes["u2f16"].const_expr)
+unop_numeric_convert_mp("f2i", tint16, tfloat32)
+unop_numeric_convert_mp("f2u", tuint16, tfloat32)
+unop_numeric_convert_mp("i2f", tfloat16, tint32)
+unop_numeric_convert_mp("u2f", tfloat16, tuint32)
 
 # Unary floating-point rounding operations.
 
@@ -320,15 +329,16 @@ unop_convert("frexp_exp", tint32, tfloat, "frexp(src0, &dst);")
 unop_convert("frexp_sig", tfloat, tfloat, "int n; dst = frexp(src0, &n);")
 
 # Partial derivatives.
+deriv_template = """
+Calculate the screen-space partial derivative using {} derivatives of the input
+with respect to the {}-axis. The constant folding is trivial as the derivative
+of a constant is 0.
+"""
 
-
-unop("fddx", tfloat, "0.0") # the derivative of a constant is 0.
-unop("fddy", tfloat, "0.0")
-unop("fddx_fine", tfloat, "0.0")
-unop("fddy_fine", tfloat, "0.0")
-unop("fddx_coarse", tfloat, "0.0")
-unop("fddy_coarse", tfloat, "0.0")
-
+for mode, suffix in [("either fine or coarse", ""), ("fine", "_fine"), ("coarse", "_coarse")]:
+    for axis in ["x", "y"]:
+        unop(f"fdd{axis}{suffix}", tfloat, "0.0",
+             description = deriv_template.format(mode, axis.upper()))
 
 # Floating point pack and unpack operations.
 
@@ -372,16 +382,18 @@ unpack_2x16("unorm")
 unpack_4x8("unorm")
 unpack_2x16("half")
 
-# Convert two unsigned integers into a packed unsigned short (clamp is applied).
 unop_horiz("pack_uint_2x16", 1, tuint32, 2, tuint32, """
 dst.x = _mesa_unsigned_to_unsigned(src0.x, 16);
 dst.x |= _mesa_unsigned_to_unsigned(src0.y, 16) << 16;
+""", description = """
+Convert two unsigned integers into a packed unsigned short (clamp is applied).
 """)
 
-# Convert two signed integers into a packed signed short (clamp is applied).
 unop_horiz("pack_sint_2x16", 1, tint32, 2, tint32, """
 dst.x = _mesa_signed_to_signed(src0.x, 16) & 0xffff;
 dst.x |= _mesa_signed_to_signed(src0.y, 16) << 16;
+""", description = """
+Convert two signed integers into a packed signed short (clamp is applied).
 """)
 
 unop_horiz("pack_uvec2_to_uint", 1, tuint32, 2, tuint32, """
@@ -560,8 +572,8 @@ if (src0.z >= 0 && absZ >= absX && absZ >= absY) dst.x = 4;
 if (src0.z < 0 && absZ >= absX && absZ >= absY) dst.x = 5;
 """)
 
-# Sum of vector components
-unop_reduce("fsum", 1, tfloat, tfloat, "{src}", "{src0} + {src1}", "{src}")
+unop_reduce("fsum", 1, tfloat, tfloat, "{src}", "{src0} + {src1}", "{src}",
+            description = "Sum of vector components")
 
 def binop_convert(name, out_type, in_type, alg_props, const_expr, description=""):
    opcode(name, 0, out_type, [0, 0], [in_type, in_type],
@@ -683,10 +695,6 @@ if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
 }
 """)
 
-# Unlike fmul, anything (even infinity or NaN) multiplied by zero is always zero.
-# fmulz(0.0, inf) and fmulz(0.0, nan) must be +/-0.0, even if
-# SIGNED_ZERO_INF_NAN_PRESERVE is not used. If SIGNED_ZERO_INF_NAN_PRESERVE is used, then
-# the result must be a positive zero if either operand is zero.
 binop("fmulz", tfloat32, _2src_commutative + associative, """
 if (src0 == 0.0 || src1 == 0.0)
    dst = 0.0;
@@ -694,21 +702,27 @@ else if (nir_is_rounding_mode_rtz(execution_mode, 32))
    dst = _mesa_double_to_float_rtz((double)src0 * (double)src1);
 else
    dst = src0 * src1;
+""", description = """
+Unlike :nir:alu-op:`fmul`, anything (even infinity or NaN) multiplied by zero is
+always zero. ``fmulz(0.0, inf)`` and ``fmulz(0.0, nan)`` must be +/-0.0, even
+if ``SIGNED_ZERO_INF_NAN_PRESERVE`` is not used. If
+``SIGNED_ZERO_INF_NAN_PRESERVE`` is used, then the result must be a positive
+zero if either operand is zero.
 """)
 
-# low 32-bits of signed/unsigned integer multiply
+
 binop("imul", tint, _2src_commutative + associative, """
    /* Use 64-bit multiplies to prevent overflow of signed arithmetic */
    dst = (uint64_t)src0 * (uint64_t)src1;
-""")
+""", description = "Low 32-bits of signed/unsigned integer multiply")
 
-# Generate 64 bit result from 2 32 bits quantity
 binop_convert("imul_2x32_64", tint64, tint32, _2src_commutative,
-              "(int64_t)src0 * (int64_t)src1")
+              "(int64_t)src0 * (int64_t)src1",
+              description = "Multiply signed 32-bit integers, 64-bit result")
 binop_convert("umul_2x32_64", tuint64, tuint32, _2src_commutative,
-              "(uint64_t)src0 * (uint64_t)src1")
+              "(uint64_t)src0 * (uint64_t)src1",
+              description = "Multiply unsigned 32-bit integers, 64-bit result")
 
-# high 32-bits of signed integer multiply
 binop("imul_high", tint, _2src_commutative, """
 if (bit_size == 64) {
    /* We need to do a full 128-bit x 128-bit multiply in order for the sign
@@ -735,9 +749,8 @@ if (bit_size == 64) {
     * potential overflow of signed multiply */
    dst = ((uint64_t)(int64_t)src0 * (uint64_t)(int64_t)src1) >> bit_size;
 }
-""")
+""", description = "High 32-bits of signed integer multiply")
 
-# high 32-bits of unsigned integer multiply
 binop("umul_high", tuint, _2src_commutative, """
 if (bit_size == 64) {
    /* The casts are kind-of annoying but needed to prevent compiler warnings. */
@@ -749,31 +762,33 @@ if (bit_size == 64) {
 } else {
    dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size;
 }
-""")
+""", description = "High 32-bits of unsigned integer multiply")
 
-# low 32-bits of unsigned integer multiply
 binop("umul_low", tuint32, _2src_commutative, """
 uint64_t mask = (1 << (bit_size / 2)) - 1;
 dst = ((uint64_t)src0 & mask) * ((uint64_t)src1 & mask);
-""")
+""", description = "Low 32-bits of unsigned integer multiply")
 
-# Multiply 32-bits with low 16-bits.
-binop("imul_32x16", tint32, "", "src0 * (int16_t) src1")
-binop("umul_32x16", tuint32, "", "src0 * (uint16_t) src1")
+binop("imul_32x16", tint32, "", "src0 * (int16_t) src1",
+      description = "Multiply 32-bits with low 16-bits, with sign extension")
+binop("umul_32x16", tuint32, "", "src0 * (uint16_t) src1",
+      description = "Multiply 32-bits with low 16-bits, with zero extension")
 
 binop("fdiv", tfloat, "", "src0 / src1")
 binop("idiv", tint, "", "src1 == 0 ? 0 : (src0 / src1)")
 binop("udiv", tuint, "", "src1 == 0 ? 0 : (src0 / src1)")
 
-# returns an integer (1 or 0) representing the carry resulting from the
-# addition of the two unsigned arguments.
+binop_convert("uadd_carry", tuint, tuint, _2src_commutative,
+              "src0 + src1 < src0",
+              description = """
+Return an integer (1 or 0) representing the carry resulting from the
+addition of the two unsigned arguments.
+              """)
 
-binop_convert("uadd_carry", tuint, tuint, _2src_commutative, "src0 + src1 < src0")
-
-# returns an integer (1 or 0) representing the borrow resulting from the
-# subtraction of the two unsigned arguments.
-
-binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1")
+binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1", description = """
+Return an integer (1 or 0) representing the borrow resulting from the
+subtraction of the two unsigned arguments.
+              """)
 
 # hadd: (a + b) >> 1 (without overflow)
 # x + y = x - (x & ~y) + (x & ~y) + y - (~x & y) + (~x & y)
@@ -862,15 +877,21 @@ binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equ
 binop("seq", tfloat, _2src_commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
 binop("sne", tfloat, _2src_commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
 
-# SPIRV shifts are undefined for shift-operands >= bitsize,
-# but SM5 shifts are defined to use only the least significant bits.
-# The NIR definition is according to the SM5 specification.
+shift_note = """
+SPIRV shifts are undefined for shift-operands >= bitsize,
+but SM5 shifts are defined to use only the least significant bits.
+The NIR definition is according to the SM5 specification.
+"""
+
 opcode("ishl", 0, tint, [0, 0], [tint, tuint32], False, "",
-       "(uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1))")
+       "(uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1))",
+       description = "Left shift." + shift_note)
 opcode("ishr", 0, tint, [0, 0], [tint, tuint32], False, "",
-       "src0 >> (src1 & (sizeof(src0) * 8 - 1))")
+       "src0 >> (src1 & (sizeof(src0) * 8 - 1))",
+       description = "Signed right-shift." + shift_note)
 opcode("ushr", 0, tuint, [0, 0], [tuint, tuint32], False, "",
-       "src0 >> (src1 & (sizeof(src0) * 8 - 1))")
+       "src0 >> (src1 & (sizeof(src0) * 8 - 1))",
+       description = "Unsigned right-shift." + shift_note)
 
 opcode("urol", 0, tuint, [0, 0], [tuint, tuint32], False, "", """
    uint32_t rotate_mask = sizeof(src0) * 8 - 1;
@@ -883,15 +904,16 @@ opcode("uror", 0, tuint, [0, 0], [tuint, tuint32], False, "", """
          (src0 << (-src1 & rotate_mask));
 """)
 
-# bitwise logic operators
-#
-# These are also used as boolean and, or, xor for hardware supporting
-# integers.
+bitwise_description = """
+Bitwise {0}, also used as a boolean {0} for hardware supporting integers.
+"""
 
-
-binop("iand", tuint, _2src_commutative + associative, "src0 & src1")
-binop("ior", tuint, _2src_commutative + associative, "src0 | src1")
-binop("ixor", tuint, _2src_commutative + associative, "src0 ^ src1")
+binop("iand", tuint, _2src_commutative + associative, "src0 & src1",
+      description = bitwise_description.format("AND"))
+binop("ior", tuint, _2src_commutative + associative, "src0 | src1",
+      description = bitwise_description.format("OR"))
+binop("ixor", tuint, _2src_commutative + associative, "src0 ^ src1",
+      description = bitwise_description.format("XOR"))
 
 
 binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
@@ -931,13 +953,14 @@ opcode("pack_32_4x8_split", 0, tuint32, [0, 0, 0, 0], [tuint8, tuint8, tuint8, t
        False, "",
        "src0 | ((uint32_t)src1 << 8) | ((uint32_t)src2 << 16) | ((uint32_t)src3 << 24)")
 
-# bfm implements the behavior of the first operation of the SM5 "bfi" assembly
-# and that of the "bfi1" i965 instruction. That is, the bits and offset values
-# are from the low five bits of src0 and src1, respectively.
 binop_convert("bfm", tuint32, tint32, "", """
 int bits = src0 & 0x1F;
 int offset = src1 & 0x1F;
 dst = ((1u << bits) - 1) << offset;
+""", description = """
+Implements the behavior of the first operation of the SM5 "bfi" assembly
+and that of the "bfi1" i965 instruction. That is, the bits and offset values
+are from the low five bits of src0 and src1, respectively.
 """)
 
 opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint32], False, "", """
@@ -947,11 +970,11 @@ if (!isnormal(dst))
    dst = copysignf(0.0f, src0);
 """)
 
-# Combines the first component of each input to make a 2-component vector.
-
 binop_horiz("vec2", 2, tuint, 1, tuint, 1, tuint, """
 dst.x = src0.x;
 dst.y = src1.x;
+""", description = """
+Combines the first component of each input to make a 2-component vector.
 """)
 
 # Byte extraction
@@ -992,10 +1015,6 @@ if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
 }
 """)
 
-# Unlike ffma, anything (even infinity or NaN) multiplied by zero is always zero.
-# ffmaz(0.0, inf, src2) and ffmaz(0.0, nan, src2) must be +/-0.0 + src2, even if
-# SIGNED_ZERO_INF_NAN_PRESERVE is not used. If SIGNED_ZERO_INF_NAN_PRESERVE is used, then
-# the result must be a positive zero plus src2 if either src0 or src1 is zero.
 triop("ffmaz", tfloat32, _2src_commutative, """
 if (src0 == 0.0 || src1 == 0.0)
    dst = 0.0 + src2;
@@ -1003,29 +1022,40 @@ else if (nir_is_rounding_mode_rtz(execution_mode, 32))
    dst = _mesa_float_fma_rtz(src0, src1, src2);
 else
    dst = fmaf(src0, src1, src2);
+""", description = """
+Floating-point multiply-add with modified zero handling.
+
+Unlike :nir:alu-op:`ffma`, anything (even infinity or NaN) multiplied by zero is
+always zero. ``ffmaz(0.0, inf, src2)`` and ``ffmaz(0.0, nan, src2)`` must be
+``+/-0.0 + src2``, even if ``SIGNED_ZERO_INF_NAN_PRESERVE`` is not used. If
+``SIGNED_ZERO_INF_NAN_PRESERVE`` is used, then the result must be a positive
+zero plus src2 if either src0 or src1 is zero.
 """)
 
 triop("flrp", tfloat, "", "src0 * (1 - src2) + src1 * src2")
 
-# Ternary addition
-triop("iadd3", tint, _2src_commutative + associative, "src0 + src1 + src2")
+triop("iadd3", tint, _2src_commutative + associative, "src0 + src1 + src2",
+      description = "Ternary addition")
 
-# Conditional Select
-#
-# A vector conditional select instruction (like ?:, but operating per-
-# component on vectors). There are two versions, one for floating point
-# bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).
-
-triop("fcsel", tfloat32, selection, "(src0 != 0.0f) ? src1 : src2")
+csel_description = """
+A vector conditional select instruction (like ?:, but operating per-
+component on vectors). The condition is {} bool ({}).
+"""
 
+triop("fcsel", tfloat32, selection, "(src0 != 0.0f) ? src1 : src2",
+      description = csel_description.format("a floating point", "0.0 vs 1.0"))
 opcode("bcsel", 0, tuint, [0, 0, 0],
-       [tbool1, tuint, tuint], False, selection, "src0 ? src1 : src2")
+       [tbool1, tuint, tuint], False, selection, "src0 ? src1 : src2",
+       description = csel_description.format("a 1-bit", "0 vs 1"))
 opcode("b8csel", 0, tuint, [0, 0, 0],
-       [tbool8, tuint, tuint], False, selection, "src0 ? src1 : src2")
+       [tbool8, tuint, tuint], False, selection, "src0 ? src1 : src2",
+       description = csel_description.format("an 8-bit", "0 vs ~0"))
 opcode("b16csel", 0, tuint, [0, 0, 0],
-       [tbool16, tuint, tuint], False, selection, "src0 ? src1 : src2")
+       [tbool16, tuint, tuint], False, selection, "src0 ? src1 : src2",
+       description = csel_description.format("a 16-bit", "0 vs ~0"))
 opcode("b32csel", 0, tuint, [0, 0, 0],
-       [tbool32, tuint, tuint], False, selection, "src0 ? src1 : src2")
+       [tbool32, tuint, tuint], False, selection, "src0 ? src1 : src2",
+       description = csel_description.format("a 32-bit", "0 vs ~0"))
 
 triop("i32csel_gt", tint32, selection, "(src0 > 0) ? src1 : src2")
 triop("i32csel_ge", tint32, selection, "(src0 >= 0) ? src1 : src2")
@@ -1033,7 +1063,6 @@ triop("i32csel_ge", tint32, selection, "(src0 >= 0) ? src1 : src2")
 triop("fcsel_gt", tfloat32, selection, "(src0 > 0.0f) ? src1 : src2")
 triop("fcsel_ge", tfloat32, selection, "(src0 >= 0.0f) ? src1 : src2")
 
-# SM5 bfi assembly
 triop("bfi", tuint32, "", """
 unsigned mask = src0, insert = src1, base = src2;
 if (mask == 0) {
@@ -1046,7 +1075,7 @@ if (mask == 0) {
    }
    dst = (base & ~mask) | (insert & mask);
 }
-""")
+""", description = "SM5 bfi assembly")
 
 
 triop("bitfield_select", tuint, "", "(src0 & src1) | (~src0 & src2)")