diff --git a/src/util/lut.c b/src/util/lut.c new file mode 100644 index 00000000000..336b4e90cae --- /dev/null +++ b/src/util/lut.c @@ -0,0 +1,271 @@ +/* + * Copyright 2020 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +/* + * This table was generated by an offline tool in the following manner: + * for each function value, we generated a tree of all expressions that evaluate + * to that function. Then we normalized, simplified, and pruned the tree to take + * the first minimal size expression. + * + * https://github.com/intel/intel-graphics-compiler/blob/eb7d24be2244c6e2225d7b33eefcd6ec62b6f80e/visa/iga/IGALibrary/api/iga_bxml_enums.cpp + */ +const char *util_lut3_to_str[256] = { + [0x00] = "zeros", + [0x01] = "~a & ~b & ~c", + [0x02] = "a & ~b & ~c", + [0x03] = "~b & ~c", + [0x04] = "~a & b & ~c", + [0x05] = "~a & ~c", + [0x06] = "(a ^ b) & ~c", + [0x07] = "(~a | ~b) & ~c", + [0x08] = "a & b & ~c", + [0x09] = "(a ^ ~b) & ~c", + [0x0A] = "a & ~c", + [0x0B] = "(a | ~b) & ~c", + [0x0C] = "b & ~c", + [0x0D] = "(~a | b) & ~c", + [0x0E] = "(a | b) & ~c", + [0x0F] = "~c", + [0x10] = "~a & ~b & c", + [0x11] = "~a & ~b", + [0x12] = "(a ^ c) & ~b", + [0x13] = "(~a | ~c) & ~b", + [0x14] = "~a & (b ^ c)", + [0x15] = "~a & (~b | ~c)", + [0x16] = "a ^ (a & b | b ^ c)", + [0x17] = "~a & ~b | (~a | ~b) & ~c", + [0x18] = "(a ^ c) & (b ^ c)", + [0x19] = "a ^ (a & c | ~b)", + [0x1A] = "a ^ (a | ~b) & c", + [0x1B] = "a & ~c | ~a & ~b", + [0x1C] = "(a & c | b) ^ c", + [0x1D] = "~a & ~b | b & ~c", + [0x1E] = "(a | b) ^ c", + [0x1F] = "~a & ~b | ~c", + [0x20] = "a & ~b & c", + [0x21] = "(a ^ ~c) & ~b", + [0x22] = "a & ~b", + [0x23] = "(a | ~c) & ~b", + [0x24] = "(a ^ b) & (b ^ c)", + [0x25] = "a ^ (a & b | ~c)", + [0x26] = "a ^ (a | ~c) & b", + [0x27] = "a & ~b | ~a & ~c", + [0x28] = "a & (b ^ c)", + [0x29] = "a ^ (a | ~b) & (b ^ ~c)", + [0x2A] = "a & (~b | ~c)", + [0x2B] = "a & ~b | (a | ~b) & ~c", + [0x2C] = "(~a & c | b) ^ c", + [0x2D] = "(~a | b) ^ c", + [0x2E] = "a & ~b | b & ~c", + [0x2F] = "a & ~b | ~c", + [0x30] = "~b & c", + [0x31] = "(~a | c) & ~b", + [0x32] = "(a | c) & ~b", + [0x33] = "~b", + [0x34] = "(a & b | c) ^ b", + [0x35] = "~a & ~c | ~b & c", + [0x36] = "(a | c) ^ b", + [0x37] = "~a & ~c | ~b", + [0x38] = "(~a & b | c) ^ b", + [0x39] = "(~a | c) ^ b", + [0x3A] = "a & ~c | ~b & c", + [0x3B] = "a & ~c | ~b", + [0x3C] = "b ^ c", + [0x3D] = "~a & ~b | b ^ c", + [0x3E] = "a & ~b | b ^ c", + [0x3F] = "~b | ~c", + [0x40] = "~a & b & c", + [0x41] = "~a & (b ^ ~c)", + [0x42] = "(a ^ b) & (b ^ ~c)", + [0x43] = "(a & b | ~c) ^ b", + [0x44] = "~a & b", + [0x45] = "~a & (b | ~c)", + [0x46] = "a ^ (a & c | b)", + [0x47] = "~a & b | ~b & ~c", + [0x48] = "(a ^ c) & b", + [0x49] = "a ^ (a & c | b ^ ~c)", + [0x4A] = "a ^ (a | b) & c", + [0x4B] = "(a | ~b) ^ c", + [0x4C] = "(~a | ~c) & b", + [0x4D] = "~a & b | (~a | b) & ~c", + [0x4E] = "a & ~c | ~a & b", + [0x4F] = "~a & b | ~c", + [0x50] = "~a & c", + [0x51] = "~a & (~b | c)", + [0x52] = "a ^ (a & b | c)", + [0x53] = "~a & c | ~b & ~c", + [0x54] = "~a & (b | c)", + [0x55] = "~a", + [0x56] = "a ^ (b | c)", + [0x57] = "~a | ~b & ~c", + [0x58] = "a ^ (a & ~b | c)", + [0x59] = "a ^ (~b | c)", + [0x5A] = "a ^ c", + [0x5B] = "a ^ c | ~a & ~b", + [0x5C] = "~a & c | b & ~c", + [0x5D] = "~a | b & ~c", + [0x5E] = "a ^ c | b & ~c", + [0x5F] = "~a | ~c", + [0x60] = "(a ^ b) & c", + [0x61] = "a ^ (a & b | b ^ ~c)", + [0x62] = "a ^ (a | c) & b", + [0x63] = "(a | ~c) ^ b", + [0x64] = "a ^ (a & ~c | b)", + [0x65] = "a ^ (b | ~c)", + [0x66] = "a ^ b", + [0x67] = "a ^ b | ~a & ~c", + [0x68] = "a ^ (a | b) & (b ^ ~c)", + [0x69] = "a ^ b ^ ~c", + [0x6A] = "a ^ b & c", + [0x6B] = "a ^ b & c | ~b & ~c", + [0x6C] = "(~a | ~c) ^ ~b", + [0x6D] = "a ^ (b | ~c) | b & ~c", + [0x6E] = "a ^ b | a & ~c", + [0x6F] = "a ^ b | ~c", + [0x70] = "(~a | ~b) & c", + [0x71] = "~a & c | (~a | c) & ~b", + [0x72] = "a & ~b | ~a & c", + [0x73] = "~a & c | ~b", + [0x74] = "~a & b | ~b & c", + [0x75] = "~a | ~b & c", + [0x76] = "a ^ b | ~a & c", + [0x77] = "~a | ~b", + [0x78] = "(~a | ~b) ^ ~c", + [0x79] = "a ^ (~b | c) | ~b & c", + [0x7A] = "a ^ c | a & ~b", + [0x7B] = "a ^ c | ~b", + [0x7C] = "~a & b | b ^ c", + [0x7D] = "~a | b ^ c", + [0x7E] = "a ^ b | b ^ c", + [0x7F] = "~a | ~b | ~c", + [0x80] = "a & b & c", + [0x81] = "(a ^ ~b) & (b ^ ~c)", + [0x82] = "a & (b ^ ~c)", + [0x83] = "(~a & b | ~c) ^ b", + [0x84] = "(a ^ ~c) & b", + [0x85] = "a ^ (a & ~b | ~c)", + [0x86] = "a ^ (a | b) & (b ^ c)", + [0x87] = "(~a | ~b) ^ c", + [0x88] = "a & b", + [0x89] = "a ^ (a | ~c) & ~b", + [0x8A] = "a & (b | ~c)", + [0x8B] = "a & b | ~b & ~c", + [0x8C] = "(a | ~c) & b", + [0x8D] = "a & b | ~a & ~c", + [0x8E] = "a & b | (a | b) & ~c", + [0x8F] = "a & b | ~c", + [0x90] = "(a ^ ~b) & c", + [0x91] = "a ^ (a & ~c | ~b)", + [0x92] = "a ^ (a | c) & (b ^ c)", + [0x93] = "(~a | ~c) ^ b", + [0x94] = "a ^ (a & ~b | b ^ c)", + [0x95] = "a ^ (~b | ~c)", + [0x96] = "a ^ b ^ c", + [0x97] = "a ^ (~b | ~c) | ~b & ~c", + [0x98] = "a ^ (a | c) & ~b", + [0x99] = "a ^ ~b", + [0x9A] = "a ^ ~b & c", + [0x9B] = "a ^ ~b | a & ~c", + [0x9C] = "(a | ~c) ^ ~b", + [0x9D] = "a ^ ~b | b & ~c", + [0x9E] = "a ^ ~b & c | b & ~c", + [0x9F] = "a ^ ~b | ~c", + [0xA0] = "a & c", + [0xA1] = "a ^ (a | ~b) & ~c", + [0xA2] = "a & (~b | c)", + [0xA3] = "a & c | ~b & ~c", + [0xA4] = "a ^ (a | b) & ~c", + [0xA5] = "a ^ ~c", + [0xA6] = "a ^ b & ~c", + [0xA7] = "a ^ ~c | a & ~b", + [0xA8] = "a & (b | c)", + [0xA9] = "a ^ ~b & ~c", + [0xAA] = "a", + [0xAB] = "a | ~b & ~c", + [0xAC] = "a & c | b & ~c", + [0xAD] = "a ^ ~c | a & b", + [0xAE] = "a | b & ~c", + [0xAF] = "a | ~c", + [0xB0] = "(a | ~b) & c", + [0xB1] = "a & c | ~a & ~b", + [0xB2] = "a & c | (a | c) & ~b", + [0xB3] = "a & c | ~b", + [0xB4] = "(a | ~b) ^ ~c", + [0xB5] = "a ^ ~c | ~a & ~b", + [0xB6] = "a ^ b & ~c | ~b & c", + [0xB7] = "a ^ ~c | ~b", + [0xB8] = "a & b | ~b & c", + [0xB9] = "a ^ ~b | a & c", + [0xBA] = "a | ~b & c", + [0xBB] = "a | ~b", + [0xBC] = "a & b | b ^ c", + [0xBD] = "a ^ ~b | b ^ c", + [0xBE] = "a | b ^ c", + [0xBF] = "a | ~b | ~c", + [0xC0] = "b & c", + [0xC1] = "(a & ~b | c) ^ ~b", + [0xC2] = "(~a & ~b | c) ^ ~b", + [0xC3] = "b ^ ~c", + [0xC4] = "(~a | c) & b", + [0xC5] = "~a & ~c | b & c", + [0xC6] = "(~a | c) ^ ~b", + [0xC7] = "~a & b | b ^ ~c", + [0xC8] = "(a | c) & b", + [0xC9] = "(a | c) ^ ~b", + [0xCA] = "a & ~c | b & c", + [0xCB] = "a & b | b ^ ~c", + [0xCC] = "b", + [0xCD] = "~a & ~c | b", + [0xCE] = "a & ~c | b", + [0xCF] = "b | ~c", + [0xD0] = "(~a | b) & c", + [0xD1] = "~a & ~b | b & c", + [0xD2] = "(~a | b) ^ ~c", + [0xD3] = "~a & c | b ^ ~c", + [0xD4] = "~a & b | (~a | b) & c", + [0xD5] = "~a | b & c", + [0xD6] = "a ^ (b | c) | b & c", + [0xD7] = "~a | b ^ ~c", + [0xD8] = "a & b | ~a & c", + [0xD9] = "a ^ ~b | b & c", + [0xDA] = "a ^ c | a & b", + [0xDB] = "a ^ c | b ^ ~c", + [0xDC] = "~a & c | b", + [0xDD] = "~a | b", + [0xDE] = "a ^ c | b", + [0xDF] = "~a | b | ~c", + [0xE0] = "(a | b) & c", + [0xE1] = "(a | b) ^ ~c", + [0xE2] = "a & ~b | b & c", + [0xE3] = "a & c | b ^ ~c", + [0xE4] = "a & c | ~a & b", + [0xE5] = "a ^ ~c | b & c", + [0xE6] = "a ^ b | a & c", + [0xE7] = "a ^ b | b ^ ~c", + [0xE8] = "a & b | (a | b) & c", + [0xE9] = "a ^ ~b & ~c | b & c", + [0xEA] = "a | b & c", + [0xEB] = "a | b ^ ~c", + [0xEC] = "a & c | b", + [0xED] = "a ^ ~c | b", + [0xEE] = "a | b", + [0xEF] = "a | b | ~c", + [0xF0] = "c", + [0xF1] = "~a & ~b | c", + [0xF2] = "a & ~b | c", + [0xF3] = "~b | c", + [0xF4] = "~a & b | c", + [0xF5] = "~a | c", + [0xF6] = "a ^ b | c", + [0xF7] = "~a | ~b | c", + [0xF8] = "a & b | c", + [0xF9] = "a ^ ~b | c", + [0xFA] = "a | c", + [0xFB] = "a | ~b | c", + [0xFC] = "b | c", + [0xFD] = "~a | b | c", + [0xFE] = "a | b | c", + [0xFF] = "ones", +}; diff --git a/src/util/lut.h b/src/util/lut.h new file mode 100644 index 00000000000..0e9965e7a1f --- /dev/null +++ b/src/util/lut.h @@ -0,0 +1,107 @@ +/* + * Copyright 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include +#include +#include "util/macros.h" + +/* + * Represents a boolean lookup table in sum-of-minterms form. These are + * natural encodings, matching the Intel BFN and Apple BITOP instructions. + */ +typedef uint8_t util_lut2; +typedef uint8_t util_lut3; + +#if !defined(_MSC_VER) +/* + * Build a lookup table from a boolean expression. Bitwise operations are + * supported: &, |, ^, and ~. Note ~ must be used, not !. + * + * The implementation uses a GNU statement-expression with the appropriate + * masks, such that the AND of all three masks (with arbitrary complements) + * equals the single bit for the corresponding min-term. This matches how Intel + * describes BFN in the bspec, but it obscures the meaning. + * + * Casting to uint8_t masks the out-of-bounds bits in ~a & ~b & ~c. + * + * Example: UTIL_LUT3((a & b) | (~a & c)) + */ +#define UTIL_LUT3(expr_involving_a_b_c) \ + ({ \ + UNUSED const uint8_t a = 0xAA, b = 0xCC, c = 0xF0; \ + (util_lut3)(expr_involving_a_b_c); \ + }) + +#define UTIL_LUT2(expr_involving_a_b) \ + (util_lut2) UTIL_LUT3((expr_involving_a_b) & ~c) + +/* + * Return a lookup table with source s inverted. We exchange the minterms for + * "source a is true" and "source a is false". + */ +static inline util_lut3 +util_lut3_invert_source(util_lut3 l, unsigned s) +{ + uint8_t masks[] = {UTIL_LUT3(a), UTIL_LUT3(b), UTIL_LUT3(c)}; + assert(s < ARRAY_SIZE(masks)); + + uint8_t mask = masks[s]; + uint8_t shift = __builtin_ctz(mask); + uint8_t true_bits = l & mask; + uint8_t false_bits = l & ~mask; + return (false_bits << shift) | (true_bits >> shift); +} + +static inline util_lut2 +util_lut2_invert_source(util_lut2 l, unsigned s) +{ + return (util_lut2)(util_lut3_invert_source((util_lut3)l, s) & 0xf); +} +#endif + +/* + * Helpers to invert a LUT. This is easy: invert all the min-terms. + */ +static inline util_lut2 +util_lut2_invert(util_lut2 l) +{ + return l ^ 0xf; +} + +static inline util_lut3 +util_lut3_invert(util_lut3 l) +{ + return l ^ 0xff; +} + +/* + * Return a lookup table equivalent to the input but with sources a & b swapped. + * To implement, we swap the corresponding minterms. + */ +static inline util_lut2 +util_lut2_swap_sources(util_lut2 l) +{ + return util_bit_swap(l, 1, 2); +} + +static inline util_lut3 +util_lut3_swap_sources(util_lut3 l, unsigned a, unsigned b) +{ + if (a == 0 && b == 1) { + return util_bit_swap(util_bit_swap(l, 1, 2), 5, 6); + } else if (a == 0 && b == 2) { + return util_bit_swap(util_bit_swap(l, 1, 4), 3, 6); + } else if (a == 1 && b == 2) { + return util_bit_swap(util_bit_swap(l, 2, 4), 3, 5); + } + + UNREACHABLE("invalid source selection"); +} + + +/* Finding minimal string forms of LUTs is tricky, so we precompute. */ +extern const char *util_lut3_to_str[256]; diff --git a/src/util/meson.build b/src/util/meson.build index a4dda94e92e..d1b398d503d 100644 --- a/src/util/meson.build +++ b/src/util/meson.build @@ -64,6 +64,7 @@ files_mesa_util = files( 'u_idalloc.h', 'list.h', 'log.c', + 'lut.c', 'macros.h', 'memstream.c', 'memstream.h',