From 95badb6b1dd0189ea3c8f116e0d3e28a8b81e552 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 5 Sep 2025 11:48:11 -0400 Subject: [PATCH] util: add boolean lookup table helpers Many instruction sets (Intel, Apple, NVIDIA) implement bitwise operations with a single general instruction that takes a lookup table in sum-of-products (minterms) form. Working with these tables manually is a bit gnarly, and multiple backends need to do this. This adds common code for representing such lookup tables with 2- or 3-sources, with a rich set of helpers for building, inspecting, and manipulating the LUTs. Eventually, we may want to introduce a nir_op_bitop3 instruction to NIR to build common code for fusing boolean expression trees into lookup tables. That NIR pass will presumably use the helpers here. NAK already has this abstraction internally (in Rust). Possibly NAK will be able to drop (or cut down) that code once we handle this in NIR. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/util/lut.c | 271 +++++++++++++++++++++++++++++++++++++++++++ src/util/lut.h | 107 +++++++++++++++++ src/util/meson.build | 1 + 3 files changed, 379 insertions(+) create mode 100644 src/util/lut.c create mode 100644 src/util/lut.h diff --git a/src/util/lut.c b/src/util/lut.c new file mode 100644 index 00000000000..336b4e90cae --- /dev/null +++ b/src/util/lut.c @@ -0,0 +1,271 @@ +/* + * Copyright 2020 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +/* + * This table was generated by an offline tool in the following manner: + * for each function value, we generated a tree of all expressions that evaluate + * to that function. Then we normalized, simplified, and pruned the tree to take + * the first minimal size expression. + * + * https://github.com/intel/intel-graphics-compiler/blob/eb7d24be2244c6e2225d7b33eefcd6ec62b6f80e/visa/iga/IGALibrary/api/iga_bxml_enums.cpp + */ +const char *util_lut3_to_str[256] = { + [0x00] = "zeros", + [0x01] = "~a & ~b & ~c", + [0x02] = "a & ~b & ~c", + [0x03] = "~b & ~c", + [0x04] = "~a & b & ~c", + [0x05] = "~a & ~c", + [0x06] = "(a ^ b) & ~c", + [0x07] = "(~a | ~b) & ~c", + [0x08] = "a & b & ~c", + [0x09] = "(a ^ ~b) & ~c", + [0x0A] = "a & ~c", + [0x0B] = "(a | ~b) & ~c", + [0x0C] = "b & ~c", + [0x0D] = "(~a | b) & ~c", + [0x0E] = "(a | b) & ~c", + [0x0F] = "~c", + [0x10] = "~a & ~b & c", + [0x11] = "~a & ~b", + [0x12] = "(a ^ c) & ~b", + [0x13] = "(~a | ~c) & ~b", + [0x14] = "~a & (b ^ c)", + [0x15] = "~a & (~b | ~c)", + [0x16] = "a ^ (a & b | b ^ c)", + [0x17] = "~a & ~b | (~a | ~b) & ~c", + [0x18] = "(a ^ c) & (b ^ c)", + [0x19] = "a ^ (a & c | ~b)", + [0x1A] = "a ^ (a | ~b) & c", + [0x1B] = "a & ~c | ~a & ~b", + [0x1C] = "(a & c | b) ^ c", + [0x1D] = "~a & ~b | b & ~c", + [0x1E] = "(a | b) ^ c", + [0x1F] = "~a & ~b | ~c", + [0x20] = "a & ~b & c", + [0x21] = "(a ^ ~c) & ~b", + [0x22] = "a & ~b", + [0x23] = "(a | ~c) & ~b", + [0x24] = "(a ^ b) & (b ^ c)", + [0x25] = "a ^ (a & b | ~c)", + [0x26] = "a ^ (a | ~c) & b", + [0x27] = "a & ~b | ~a & ~c", + [0x28] = "a & (b ^ c)", + [0x29] = "a ^ (a | ~b) & (b ^ ~c)", + [0x2A] = "a & (~b | ~c)", + [0x2B] = "a & ~b | (a | ~b) & ~c", + [0x2C] = "(~a & c | b) ^ c", + [0x2D] = "(~a | b) ^ c", + [0x2E] = "a & ~b | b & ~c", + [0x2F] = "a & ~b | ~c", + [0x30] = "~b & c", + [0x31] = "(~a | c) & ~b", + [0x32] = "(a | c) & ~b", + [0x33] = "~b", + [0x34] = "(a & b | c) ^ b", + [0x35] = "~a & ~c | ~b & c", + [0x36] = "(a | c) ^ b", + [0x37] = "~a & ~c | ~b", + [0x38] = "(~a & b | c) ^ b", + [0x39] = "(~a | c) ^ b", + [0x3A] = "a & ~c | ~b & c", + [0x3B] = "a & ~c | ~b", + [0x3C] = "b ^ c", + [0x3D] = "~a & ~b | b ^ c", + [0x3E] = "a & ~b | b ^ c", + [0x3F] = "~b | ~c", + [0x40] = "~a & b & c", + [0x41] = "~a & (b ^ ~c)", + [0x42] = "(a ^ b) & (b ^ ~c)", + [0x43] = "(a & b | ~c) ^ b", + [0x44] = "~a & b", + [0x45] = "~a & (b | ~c)", + [0x46] = "a ^ (a & c | b)", + [0x47] = "~a & b | ~b & ~c", + [0x48] = "(a ^ c) & b", + [0x49] = "a ^ (a & c | b ^ ~c)", + [0x4A] = "a ^ (a | b) & c", + [0x4B] = "(a | ~b) ^ c", + [0x4C] = "(~a | ~c) & b", + [0x4D] = "~a & b | (~a | b) & ~c", + [0x4E] = "a & ~c | ~a & b", + [0x4F] = "~a & b | ~c", + [0x50] = "~a & c", + [0x51] = "~a & (~b | c)", + [0x52] = "a ^ (a & b | c)", + [0x53] = "~a & c | ~b & ~c", + [0x54] = "~a & (b | c)", + [0x55] = "~a", + [0x56] = "a ^ (b | c)", + [0x57] = "~a | ~b & ~c", + [0x58] = "a ^ (a & ~b | c)", + [0x59] = "a ^ (~b | c)", + [0x5A] = "a ^ c", + [0x5B] = "a ^ c | ~a & ~b", + [0x5C] = "~a & c | b & ~c", + [0x5D] = "~a | b & ~c", + [0x5E] = "a ^ c | b & ~c", + [0x5F] = "~a | ~c", + [0x60] = "(a ^ b) & c", + [0x61] = "a ^ (a & b | b ^ ~c)", + [0x62] = "a ^ (a | c) & b", + [0x63] = "(a | ~c) ^ b", + [0x64] = "a ^ (a & ~c | b)", + [0x65] = "a ^ (b | ~c)", + [0x66] = "a ^ b", + [0x67] = "a ^ b | ~a & ~c", + [0x68] = "a ^ (a | b) & (b ^ ~c)", + [0x69] = "a ^ b ^ ~c", + [0x6A] = "a ^ b & c", + [0x6B] = "a ^ b & c | ~b & ~c", + [0x6C] = "(~a | ~c) ^ ~b", + [0x6D] = "a ^ (b | ~c) | b & ~c", + [0x6E] = "a ^ b | a & ~c", + [0x6F] = "a ^ b | ~c", + [0x70] = "(~a | ~b) & c", + [0x71] = "~a & c | (~a | c) & ~b", + [0x72] = "a & ~b | ~a & c", + [0x73] = "~a & c | ~b", + [0x74] = "~a & b | ~b & c", + [0x75] = "~a | ~b & c", + [0x76] = "a ^ b | ~a & c", + [0x77] = "~a | ~b", + [0x78] = "(~a | ~b) ^ ~c", + [0x79] = "a ^ (~b | c) | ~b & c", + [0x7A] = "a ^ c | a & ~b", + [0x7B] = "a ^ c | ~b", + [0x7C] = "~a & b | b ^ c", + [0x7D] = "~a | b ^ c", + [0x7E] = "a ^ b | b ^ c", + [0x7F] = "~a | ~b | ~c", + [0x80] = "a & b & c", + [0x81] = "(a ^ ~b) & (b ^ ~c)", + [0x82] = "a & (b ^ ~c)", + [0x83] = "(~a & b | ~c) ^ b", + [0x84] = "(a ^ ~c) & b", + [0x85] = "a ^ (a & ~b | ~c)", + [0x86] = "a ^ (a | b) & (b ^ c)", + [0x87] = "(~a | ~b) ^ c", + [0x88] = "a & b", + [0x89] = "a ^ (a | ~c) & ~b", + [0x8A] = "a & (b | ~c)", + [0x8B] = "a & b | ~b & ~c", + [0x8C] = "(a | ~c) & b", + [0x8D] = "a & b | ~a & ~c", + [0x8E] = "a & b | (a | b) & ~c", + [0x8F] = "a & b | ~c", + [0x90] = "(a ^ ~b) & c", + [0x91] = "a ^ (a & ~c | ~b)", + [0x92] = "a ^ (a | c) & (b ^ c)", + [0x93] = "(~a | ~c) ^ b", + [0x94] = "a ^ (a & ~b | b ^ c)", + [0x95] = "a ^ (~b | ~c)", + [0x96] = "a ^ b ^ c", + [0x97] = "a ^ (~b | ~c) | ~b & ~c", + [0x98] = "a ^ (a | c) & ~b", + [0x99] = "a ^ ~b", + [0x9A] = "a ^ ~b & c", + [0x9B] = "a ^ ~b | a & ~c", + [0x9C] = "(a | ~c) ^ ~b", + [0x9D] = "a ^ ~b | b & ~c", + [0x9E] = "a ^ ~b & c | b & ~c", + [0x9F] = "a ^ ~b | ~c", + [0xA0] = "a & c", + [0xA1] = "a ^ (a | ~b) & ~c", + [0xA2] = "a & (~b | c)", + [0xA3] = "a & c | ~b & ~c", + [0xA4] = "a ^ (a | b) & ~c", + [0xA5] = "a ^ ~c", + [0xA6] = "a ^ b & ~c", + [0xA7] = "a ^ ~c | a & ~b", + [0xA8] = "a & (b | c)", + [0xA9] = "a ^ ~b & ~c", + [0xAA] = "a", + [0xAB] = "a | ~b & ~c", + [0xAC] = "a & c | b & ~c", + [0xAD] = "a ^ ~c | a & b", + [0xAE] = "a | b & ~c", + [0xAF] = "a | ~c", + [0xB0] = "(a | ~b) & c", + [0xB1] = "a & c | ~a & ~b", + [0xB2] = "a & c | (a | c) & ~b", + [0xB3] = "a & c | ~b", + [0xB4] = "(a | ~b) ^ ~c", + [0xB5] = "a ^ ~c | ~a & ~b", + [0xB6] = "a ^ b & ~c | ~b & c", + [0xB7] = "a ^ ~c | ~b", + [0xB8] = "a & b | ~b & c", + [0xB9] = "a ^ ~b | a & c", + [0xBA] = "a | ~b & c", + [0xBB] = "a | ~b", + [0xBC] = "a & b | b ^ c", + [0xBD] = "a ^ ~b | b ^ c", + [0xBE] = "a | b ^ c", + [0xBF] = "a | ~b | ~c", + [0xC0] = "b & c", + [0xC1] = "(a & ~b | c) ^ ~b", + [0xC2] = "(~a & ~b | c) ^ ~b", + [0xC3] = "b ^ ~c", + [0xC4] = "(~a | c) & b", + [0xC5] = "~a & ~c | b & c", + [0xC6] = "(~a | c) ^ ~b", + [0xC7] = "~a & b | b ^ ~c", + [0xC8] = "(a | c) & b", + [0xC9] = "(a | c) ^ ~b", + [0xCA] = "a & ~c | b & c", + [0xCB] = "a & b | b ^ ~c", + [0xCC] = "b", + [0xCD] = "~a & ~c | b", + [0xCE] = "a & ~c | b", + [0xCF] = "b | ~c", + [0xD0] = "(~a | b) & c", + [0xD1] = "~a & ~b | b & c", + [0xD2] = "(~a | b) ^ ~c", + [0xD3] = "~a & c | b ^ ~c", + [0xD4] = "~a & b | (~a | b) & c", + [0xD5] = "~a | b & c", + [0xD6] = "a ^ (b | c) | b & c", + [0xD7] = "~a | b ^ ~c", + [0xD8] = "a & b | ~a & c", + [0xD9] = "a ^ ~b | b & c", + [0xDA] = "a ^ c | a & b", + [0xDB] = "a ^ c | b ^ ~c", + [0xDC] = "~a & c | b", + [0xDD] = "~a | b", + [0xDE] = "a ^ c | b", + [0xDF] = "~a | b | ~c", + [0xE0] = "(a | b) & c", + [0xE1] = "(a | b) ^ ~c", + [0xE2] = "a & ~b | b & c", + [0xE3] = "a & c | b ^ ~c", + [0xE4] = "a & c | ~a & b", + [0xE5] = "a ^ ~c | b & c", + [0xE6] = "a ^ b | a & c", + [0xE7] = "a ^ b | b ^ ~c", + [0xE8] = "a & b | (a | b) & c", + [0xE9] = "a ^ ~b & ~c | b & c", + [0xEA] = "a | b & c", + [0xEB] = "a | b ^ ~c", + [0xEC] = "a & c | b", + [0xED] = "a ^ ~c | b", + [0xEE] = "a | b", + [0xEF] = "a | b | ~c", + [0xF0] = "c", + [0xF1] = "~a & ~b | c", + [0xF2] = "a & ~b | c", + [0xF3] = "~b | c", + [0xF4] = "~a & b | c", + [0xF5] = "~a | c", + [0xF6] = "a ^ b | c", + [0xF7] = "~a | ~b | c", + [0xF8] = "a & b | c", + [0xF9] = "a ^ ~b | c", + [0xFA] = "a | c", + [0xFB] = "a | ~b | c", + [0xFC] = "b | c", + [0xFD] = "~a | b | c", + [0xFE] = "a | b | c", + [0xFF] = "ones", +}; diff --git a/src/util/lut.h b/src/util/lut.h new file mode 100644 index 00000000000..0e9965e7a1f --- /dev/null +++ b/src/util/lut.h @@ -0,0 +1,107 @@ +/* + * Copyright 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include +#include +#include "util/macros.h" + +/* + * Represents a boolean lookup table in sum-of-minterms form. These are + * natural encodings, matching the Intel BFN and Apple BITOP instructions. + */ +typedef uint8_t util_lut2; +typedef uint8_t util_lut3; + +#if !defined(_MSC_VER) +/* + * Build a lookup table from a boolean expression. Bitwise operations are + * supported: &, |, ^, and ~. Note ~ must be used, not !. + * + * The implementation uses a GNU statement-expression with the appropriate + * masks, such that the AND of all three masks (with arbitrary complements) + * equals the single bit for the corresponding min-term. This matches how Intel + * describes BFN in the bspec, but it obscures the meaning. + * + * Casting to uint8_t masks the out-of-bounds bits in ~a & ~b & ~c. + * + * Example: UTIL_LUT3((a & b) | (~a & c)) + */ +#define UTIL_LUT3(expr_involving_a_b_c) \ + ({ \ + UNUSED const uint8_t a = 0xAA, b = 0xCC, c = 0xF0; \ + (util_lut3)(expr_involving_a_b_c); \ + }) + +#define UTIL_LUT2(expr_involving_a_b) \ + (util_lut2) UTIL_LUT3((expr_involving_a_b) & ~c) + +/* + * Return a lookup table with source s inverted. We exchange the minterms for + * "source a is true" and "source a is false". + */ +static inline util_lut3 +util_lut3_invert_source(util_lut3 l, unsigned s) +{ + uint8_t masks[] = {UTIL_LUT3(a), UTIL_LUT3(b), UTIL_LUT3(c)}; + assert(s < ARRAY_SIZE(masks)); + + uint8_t mask = masks[s]; + uint8_t shift = __builtin_ctz(mask); + uint8_t true_bits = l & mask; + uint8_t false_bits = l & ~mask; + return (false_bits << shift) | (true_bits >> shift); +} + +static inline util_lut2 +util_lut2_invert_source(util_lut2 l, unsigned s) +{ + return (util_lut2)(util_lut3_invert_source((util_lut3)l, s) & 0xf); +} +#endif + +/* + * Helpers to invert a LUT. This is easy: invert all the min-terms. + */ +static inline util_lut2 +util_lut2_invert(util_lut2 l) +{ + return l ^ 0xf; +} + +static inline util_lut3 +util_lut3_invert(util_lut3 l) +{ + return l ^ 0xff; +} + +/* + * Return a lookup table equivalent to the input but with sources a & b swapped. + * To implement, we swap the corresponding minterms. + */ +static inline util_lut2 +util_lut2_swap_sources(util_lut2 l) +{ + return util_bit_swap(l, 1, 2); +} + +static inline util_lut3 +util_lut3_swap_sources(util_lut3 l, unsigned a, unsigned b) +{ + if (a == 0 && b == 1) { + return util_bit_swap(util_bit_swap(l, 1, 2), 5, 6); + } else if (a == 0 && b == 2) { + return util_bit_swap(util_bit_swap(l, 1, 4), 3, 6); + } else if (a == 1 && b == 2) { + return util_bit_swap(util_bit_swap(l, 2, 4), 3, 5); + } + + UNREACHABLE("invalid source selection"); +} + + +/* Finding minimal string forms of LUTs is tricky, so we precompute. */ +extern const char *util_lut3_to_str[256]; diff --git a/src/util/meson.build b/src/util/meson.build index a4dda94e92e..d1b398d503d 100644 --- a/src/util/meson.build +++ b/src/util/meson.build @@ -64,6 +64,7 @@ files_mesa_util = files( 'u_idalloc.h', 'list.h', 'log.c', + 'lut.c', 'macros.h', 'memstream.c', 'memstream.h',