From bd466195b9314997936fe6ea98e4d9eb3aa49eae Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 9 May 2023 17:10:24 -0400 Subject: [PATCH] nir: Make ALU descriptions machine-readable We already document a lot of ALU opcodes, let's make this machine-readable so we can put the descriptions in our generated HTML documentation. Signed-off-by: Alyssa Rosenzweig Reviewed-by: Rhys Perry Part-of: --- src/compiler/nir/nir_opcodes.py | 233 ++++++++++++++++++-------------- 1 file changed, 131 insertions(+), 102 deletions(-) diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index c44a0b902b0..f7e9df073f7 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -199,25 +199,28 @@ unop("mov", tuint, "src0") unop("ineg", tint, "-src0") unop("fneg", tfloat, "-src0") -unop("inot", tint, "~src0") # invert every bit of the integer +unop("inot", tint, "~src0", description = "Invert every bit of the integer") -# nir_op_fsign roughly implements the OpenGL / Vulkan rules for sign(float). -# The GLSL.std.450 FSign instruction is defined as: -# -# Result is 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0. -# -# If the source is equal to zero, there is a preference for the result to have -# the same sign, but this is not required (it is required by OpenCL). If the -# source is not a number, there is a preference for the result to be +0.0, but -# this is not required (it is required by OpenCL). If the source is not a -# number, and the result is not +0.0, the result should definitely **not** be -# NaN. -# -# The values returned for constant folding match the behavior required by -# OpenCL. unop("fsign", tfloat, ("bit_size == 64 ? " + "(isnan(src0) ? 0.0 : ((src0 == 0.0 ) ? src0 : (src0 > 0.0 ) ? 1.0 : -1.0 )) : " + - "(isnan(src0) ? 0.0f : ((src0 == 0.0f) ? src0 : (src0 > 0.0f) ? 1.0f : -1.0f))")) + "(isnan(src0) ? 0.0f : ((src0 == 0.0f) ? src0 : (src0 > 0.0f) ? 1.0f : -1.0f))"), + description = """ +Roughly implements the OpenGL / Vulkan rules for ``sign(float)``. +The ``GLSL.std.450 FSign`` instruction is defined as: + + Result is 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0. + +If the source is equal to zero, there is a preference for the result to have +the same sign, but this is not required (it is required by OpenCL). If the +source is not a number, there is a preference for the result to be +0.0, but +this is not required (it is required by OpenCL). If the source is not a +number, and the result is not +0.0, the result should definitely **not** be +NaN. + +The values returned for constant folding match the behavior required by +OpenCL. + """) + unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)") unop("iabs", tint, "(src0 < 0) ? -src0 : src0") unop("fabs", tfloat, "fabs(src0)") @@ -286,17 +289,23 @@ for src_t in [tint, tuint, tfloat, tbool]: dst_bit_size), dst_t + str(dst_bit_size), src_t, conv_expr) -# Special opcode that is the same as f2f16, i2i16, u2u16 except that it is safe -# to remove it if the result is immediately converted back to 32 bits again. -# This is generated as part of the precision lowering pass. mp stands for medium -# precision. -unop_numeric_convert("f2fmp", tfloat16, tfloat32, opcodes["f2f16"].const_expr) -unop_numeric_convert("i2imp", tint16, tint32, opcodes["i2i16"].const_expr) +def unop_numeric_convert_mp(base, src_t, dst_t): + op_like = base + "16" + unop_numeric_convert(base + "mp", src_t, dst_t, opcodes[op_like].const_expr, + description = """ +Special opcode that is the same as :nir:alu-op:`{}` except that it is safe to +remove it if the result is immediately converted back to 32 bits again. This is +generated as part of the precision lowering pass. ``mp`` stands for medium +precision. + """.format(op_like)) + +unop_numeric_convert_mp("f2f", tfloat16, tfloat32) +unop_numeric_convert_mp("i2i", tint16, tint32) # u2ump isn't defined, because the behavior is equal to i2imp -unop_numeric_convert("f2imp", tint16, tfloat32, opcodes["f2i16"].const_expr) -unop_numeric_convert("f2ump", tuint16, tfloat32, opcodes["f2u16"].const_expr) -unop_numeric_convert("i2fmp", tfloat16, tint32, opcodes["i2f16"].const_expr) -unop_numeric_convert("u2fmp", tfloat16, tuint32, opcodes["u2f16"].const_expr) +unop_numeric_convert_mp("f2i", tint16, tfloat32) +unop_numeric_convert_mp("f2u", tuint16, tfloat32) +unop_numeric_convert_mp("i2f", tfloat16, tint32) +unop_numeric_convert_mp("u2f", tfloat16, tuint32) # Unary floating-point rounding operations. @@ -320,15 +329,16 @@ unop_convert("frexp_exp", tint32, tfloat, "frexp(src0, &dst);") unop_convert("frexp_sig", tfloat, tfloat, "int n; dst = frexp(src0, &n);") # Partial derivatives. +deriv_template = """ +Calculate the screen-space partial derivative using {} derivatives of the input +with respect to the {}-axis. The constant folding is trivial as the derivative +of a constant is 0. +""" - -unop("fddx", tfloat, "0.0") # the derivative of a constant is 0. -unop("fddy", tfloat, "0.0") -unop("fddx_fine", tfloat, "0.0") -unop("fddy_fine", tfloat, "0.0") -unop("fddx_coarse", tfloat, "0.0") -unop("fddy_coarse", tfloat, "0.0") - +for mode, suffix in [("either fine or coarse", ""), ("fine", "_fine"), ("coarse", "_coarse")]: + for axis in ["x", "y"]: + unop(f"fdd{axis}{suffix}", tfloat, "0.0", + description = deriv_template.format(mode, axis.upper())) # Floating point pack and unpack operations. @@ -372,16 +382,18 @@ unpack_2x16("unorm") unpack_4x8("unorm") unpack_2x16("half") -# Convert two unsigned integers into a packed unsigned short (clamp is applied). unop_horiz("pack_uint_2x16", 1, tuint32, 2, tuint32, """ dst.x = _mesa_unsigned_to_unsigned(src0.x, 16); dst.x |= _mesa_unsigned_to_unsigned(src0.y, 16) << 16; +""", description = """ +Convert two unsigned integers into a packed unsigned short (clamp is applied). """) -# Convert two signed integers into a packed signed short (clamp is applied). unop_horiz("pack_sint_2x16", 1, tint32, 2, tint32, """ dst.x = _mesa_signed_to_signed(src0.x, 16) & 0xffff; dst.x |= _mesa_signed_to_signed(src0.y, 16) << 16; +""", description = """ +Convert two signed integers into a packed signed short (clamp is applied). """) unop_horiz("pack_uvec2_to_uint", 1, tuint32, 2, tuint32, """ @@ -560,8 +572,8 @@ if (src0.z >= 0 && absZ >= absX && absZ >= absY) dst.x = 4; if (src0.z < 0 && absZ >= absX && absZ >= absY) dst.x = 5; """) -# Sum of vector components -unop_reduce("fsum", 1, tfloat, tfloat, "{src}", "{src0} + {src1}", "{src}") +unop_reduce("fsum", 1, tfloat, tfloat, "{src}", "{src0} + {src1}", "{src}", + description = "Sum of vector components") def binop_convert(name, out_type, in_type, alg_props, const_expr, description=""): opcode(name, 0, out_type, [0, 0], [in_type, in_type], @@ -683,10 +695,6 @@ if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { } """) -# Unlike fmul, anything (even infinity or NaN) multiplied by zero is always zero. -# fmulz(0.0, inf) and fmulz(0.0, nan) must be +/-0.0, even if -# SIGNED_ZERO_INF_NAN_PRESERVE is not used. If SIGNED_ZERO_INF_NAN_PRESERVE is used, then -# the result must be a positive zero if either operand is zero. binop("fmulz", tfloat32, _2src_commutative + associative, """ if (src0 == 0.0 || src1 == 0.0) dst = 0.0; @@ -694,21 +702,27 @@ else if (nir_is_rounding_mode_rtz(execution_mode, 32)) dst = _mesa_double_to_float_rtz((double)src0 * (double)src1); else dst = src0 * src1; +""", description = """ +Unlike :nir:alu-op:`fmul`, anything (even infinity or NaN) multiplied by zero is +always zero. ``fmulz(0.0, inf)`` and ``fmulz(0.0, nan)`` must be +/-0.0, even +if ``SIGNED_ZERO_INF_NAN_PRESERVE`` is not used. If +``SIGNED_ZERO_INF_NAN_PRESERVE`` is used, then the result must be a positive +zero if either operand is zero. """) -# low 32-bits of signed/unsigned integer multiply + binop("imul", tint, _2src_commutative + associative, """ /* Use 64-bit multiplies to prevent overflow of signed arithmetic */ dst = (uint64_t)src0 * (uint64_t)src1; -""") +""", description = "Low 32-bits of signed/unsigned integer multiply") -# Generate 64 bit result from 2 32 bits quantity binop_convert("imul_2x32_64", tint64, tint32, _2src_commutative, - "(int64_t)src0 * (int64_t)src1") + "(int64_t)src0 * (int64_t)src1", + description = "Multiply signed 32-bit integers, 64-bit result") binop_convert("umul_2x32_64", tuint64, tuint32, _2src_commutative, - "(uint64_t)src0 * (uint64_t)src1") + "(uint64_t)src0 * (uint64_t)src1", + description = "Multiply unsigned 32-bit integers, 64-bit result") -# high 32-bits of signed integer multiply binop("imul_high", tint, _2src_commutative, """ if (bit_size == 64) { /* We need to do a full 128-bit x 128-bit multiply in order for the sign @@ -735,9 +749,8 @@ if (bit_size == 64) { * potential overflow of signed multiply */ dst = ((uint64_t)(int64_t)src0 * (uint64_t)(int64_t)src1) >> bit_size; } -""") +""", description = "High 32-bits of signed integer multiply") -# high 32-bits of unsigned integer multiply binop("umul_high", tuint, _2src_commutative, """ if (bit_size == 64) { /* The casts are kind-of annoying but needed to prevent compiler warnings. */ @@ -749,31 +762,33 @@ if (bit_size == 64) { } else { dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size; } -""") +""", description = "High 32-bits of unsigned integer multiply") -# low 32-bits of unsigned integer multiply binop("umul_low", tuint32, _2src_commutative, """ uint64_t mask = (1 << (bit_size / 2)) - 1; dst = ((uint64_t)src0 & mask) * ((uint64_t)src1 & mask); -""") +""", description = "Low 32-bits of unsigned integer multiply") -# Multiply 32-bits with low 16-bits. -binop("imul_32x16", tint32, "", "src0 * (int16_t) src1") -binop("umul_32x16", tuint32, "", "src0 * (uint16_t) src1") +binop("imul_32x16", tint32, "", "src0 * (int16_t) src1", + description = "Multiply 32-bits with low 16-bits, with sign extension") +binop("umul_32x16", tuint32, "", "src0 * (uint16_t) src1", + description = "Multiply 32-bits with low 16-bits, with zero extension") binop("fdiv", tfloat, "", "src0 / src1") binop("idiv", tint, "", "src1 == 0 ? 0 : (src0 / src1)") binop("udiv", tuint, "", "src1 == 0 ? 0 : (src0 / src1)") -# returns an integer (1 or 0) representing the carry resulting from the -# addition of the two unsigned arguments. +binop_convert("uadd_carry", tuint, tuint, _2src_commutative, + "src0 + src1 < src0", + description = """ +Return an integer (1 or 0) representing the carry resulting from the +addition of the two unsigned arguments. + """) -binop_convert("uadd_carry", tuint, tuint, _2src_commutative, "src0 + src1 < src0") - -# returns an integer (1 or 0) representing the borrow resulting from the -# subtraction of the two unsigned arguments. - -binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1") +binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1", description = """ +Return an integer (1 or 0) representing the borrow resulting from the +subtraction of the two unsigned arguments. + """) # hadd: (a + b) >> 1 (without overflow) # x + y = x - (x & ~y) + (x & ~y) + y - (~x & y) + (~x & y) @@ -862,15 +877,21 @@ binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equ binop("seq", tfloat, _2src_commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal binop("sne", tfloat, _2src_commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal -# SPIRV shifts are undefined for shift-operands >= bitsize, -# but SM5 shifts are defined to use only the least significant bits. -# The NIR definition is according to the SM5 specification. +shift_note = """ +SPIRV shifts are undefined for shift-operands >= bitsize, +but SM5 shifts are defined to use only the least significant bits. +The NIR definition is according to the SM5 specification. +""" + opcode("ishl", 0, tint, [0, 0], [tint, tuint32], False, "", - "(uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1))") + "(uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1))", + description = "Left shift." + shift_note) opcode("ishr", 0, tint, [0, 0], [tint, tuint32], False, "", - "src0 >> (src1 & (sizeof(src0) * 8 - 1))") + "src0 >> (src1 & (sizeof(src0) * 8 - 1))", + description = "Signed right-shift." + shift_note) opcode("ushr", 0, tuint, [0, 0], [tuint, tuint32], False, "", - "src0 >> (src1 & (sizeof(src0) * 8 - 1))") + "src0 >> (src1 & (sizeof(src0) * 8 - 1))", + description = "Unsigned right-shift." + shift_note) opcode("urol", 0, tuint, [0, 0], [tuint, tuint32], False, "", """ uint32_t rotate_mask = sizeof(src0) * 8 - 1; @@ -883,15 +904,16 @@ opcode("uror", 0, tuint, [0, 0], [tuint, tuint32], False, "", """ (src0 << (-src1 & rotate_mask)); """) -# bitwise logic operators -# -# These are also used as boolean and, or, xor for hardware supporting -# integers. +bitwise_description = """ +Bitwise {0}, also used as a boolean {0} for hardware supporting integers. +""" - -binop("iand", tuint, _2src_commutative + associative, "src0 & src1") -binop("ior", tuint, _2src_commutative + associative, "src0 | src1") -binop("ixor", tuint, _2src_commutative + associative, "src0 ^ src1") +binop("iand", tuint, _2src_commutative + associative, "src0 & src1", + description = bitwise_description.format("AND")) +binop("ior", tuint, _2src_commutative + associative, "src0 | src1", + description = bitwise_description.format("OR")) +binop("ixor", tuint, _2src_commutative + associative, "src0 ^ src1", + description = bitwise_description.format("XOR")) binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}", @@ -931,13 +953,14 @@ opcode("pack_32_4x8_split", 0, tuint32, [0, 0, 0, 0], [tuint8, tuint8, tuint8, t False, "", "src0 | ((uint32_t)src1 << 8) | ((uint32_t)src2 << 16) | ((uint32_t)src3 << 24)") -# bfm implements the behavior of the first operation of the SM5 "bfi" assembly -# and that of the "bfi1" i965 instruction. That is, the bits and offset values -# are from the low five bits of src0 and src1, respectively. binop_convert("bfm", tuint32, tint32, "", """ int bits = src0 & 0x1F; int offset = src1 & 0x1F; dst = ((1u << bits) - 1) << offset; +""", description = """ +Implements the behavior of the first operation of the SM5 "bfi" assembly +and that of the "bfi1" i965 instruction. That is, the bits and offset values +are from the low five bits of src0 and src1, respectively. """) opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint32], False, "", """ @@ -947,11 +970,11 @@ if (!isnormal(dst)) dst = copysignf(0.0f, src0); """) -# Combines the first component of each input to make a 2-component vector. - binop_horiz("vec2", 2, tuint, 1, tuint, 1, tuint, """ dst.x = src0.x; dst.y = src1.x; +""", description = """ +Combines the first component of each input to make a 2-component vector. """) # Byte extraction @@ -992,10 +1015,6 @@ if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { } """) -# Unlike ffma, anything (even infinity or NaN) multiplied by zero is always zero. -# ffmaz(0.0, inf, src2) and ffmaz(0.0, nan, src2) must be +/-0.0 + src2, even if -# SIGNED_ZERO_INF_NAN_PRESERVE is not used. If SIGNED_ZERO_INF_NAN_PRESERVE is used, then -# the result must be a positive zero plus src2 if either src0 or src1 is zero. triop("ffmaz", tfloat32, _2src_commutative, """ if (src0 == 0.0 || src1 == 0.0) dst = 0.0 + src2; @@ -1003,29 +1022,40 @@ else if (nir_is_rounding_mode_rtz(execution_mode, 32)) dst = _mesa_float_fma_rtz(src0, src1, src2); else dst = fmaf(src0, src1, src2); +""", description = """ +Floating-point multiply-add with modified zero handling. + +Unlike :nir:alu-op:`ffma`, anything (even infinity or NaN) multiplied by zero is +always zero. ``ffmaz(0.0, inf, src2)`` and ``ffmaz(0.0, nan, src2)`` must be +``+/-0.0 + src2``, even if ``SIGNED_ZERO_INF_NAN_PRESERVE`` is not used. If +``SIGNED_ZERO_INF_NAN_PRESERVE`` is used, then the result must be a positive +zero plus src2 if either src0 or src1 is zero. """) triop("flrp", tfloat, "", "src0 * (1 - src2) + src1 * src2") -# Ternary addition -triop("iadd3", tint, _2src_commutative + associative, "src0 + src1 + src2") +triop("iadd3", tint, _2src_commutative + associative, "src0 + src1 + src2", + description = "Ternary addition") -# Conditional Select -# -# A vector conditional select instruction (like ?:, but operating per- -# component on vectors). There are two versions, one for floating point -# bools (0.0 vs 1.0) and one for integer bools (0 vs ~0). - -triop("fcsel", tfloat32, selection, "(src0 != 0.0f) ? src1 : src2") +csel_description = """ +A vector conditional select instruction (like ?:, but operating per- +component on vectors). The condition is {} bool ({}). +""" +triop("fcsel", tfloat32, selection, "(src0 != 0.0f) ? src1 : src2", + description = csel_description.format("a floating point", "0.0 vs 1.0")) opcode("bcsel", 0, tuint, [0, 0, 0], - [tbool1, tuint, tuint], False, selection, "src0 ? src1 : src2") + [tbool1, tuint, tuint], False, selection, "src0 ? src1 : src2", + description = csel_description.format("a 1-bit", "0 vs 1")) opcode("b8csel", 0, tuint, [0, 0, 0], - [tbool8, tuint, tuint], False, selection, "src0 ? src1 : src2") + [tbool8, tuint, tuint], False, selection, "src0 ? src1 : src2", + description = csel_description.format("an 8-bit", "0 vs ~0")) opcode("b16csel", 0, tuint, [0, 0, 0], - [tbool16, tuint, tuint], False, selection, "src0 ? src1 : src2") + [tbool16, tuint, tuint], False, selection, "src0 ? src1 : src2", + description = csel_description.format("a 16-bit", "0 vs ~0")) opcode("b32csel", 0, tuint, [0, 0, 0], - [tbool32, tuint, tuint], False, selection, "src0 ? src1 : src2") + [tbool32, tuint, tuint], False, selection, "src0 ? src1 : src2", + description = csel_description.format("a 32-bit", "0 vs ~0")) triop("i32csel_gt", tint32, selection, "(src0 > 0) ? src1 : src2") triop("i32csel_ge", tint32, selection, "(src0 >= 0) ? src1 : src2") @@ -1033,7 +1063,6 @@ triop("i32csel_ge", tint32, selection, "(src0 >= 0) ? src1 : src2") triop("fcsel_gt", tfloat32, selection, "(src0 > 0.0f) ? src1 : src2") triop("fcsel_ge", tfloat32, selection, "(src0 >= 0.0f) ? src1 : src2") -# SM5 bfi assembly triop("bfi", tuint32, "", """ unsigned mask = src0, insert = src1, base = src2; if (mask == 0) { @@ -1046,7 +1075,7 @@ if (mask == 0) { } dst = (base & ~mask) | (insert & mask); } -""") +""", description = "SM5 bfi assembly") triop("bitfield_select", tuint, "", "(src0 & src1) | (~src0 & src2)")