util: add boolean lookup table helpers

Many instruction sets (Intel, Apple, NVIDIA) implement bitwise operations with a
single general instruction that takes a lookup table in sum-of-products
(minterms) form. Working with these tables manually is a bit gnarly, and
multiple backends need to do this. This adds common code for representing such
lookup tables with 2- or 3-sources, with a rich set of helpers for building,
inspecting, and manipulating the LUTs.

Eventually, we may want to introduce a nir_op_bitop3 instruction to NIR to build
common code for fusing boolean expression trees into lookup tables. That NIR
pass will presumably use the helpers here.

NAK already has this abstraction internally (in Rust). Possibly NAK will be able
to drop (or cut down) that code once we handle this in NIR.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37200>
This commit is contained in:
Alyssa Rosenzweig 2025-09-05 11:48:11 -04:00 committed by Marge Bot
parent 86a5dd10ac
commit 95badb6b1d
3 changed files with 379 additions and 0 deletions

271
src/util/lut.c Normal file
View file

@ -0,0 +1,271 @@
/*
* Copyright 2020 Intel Corporation
* SPDX-License-Identifier: MIT
*/
/*
* This table was generated by an offline tool in the following manner:
* for each function value, we generated a tree of all expressions that evaluate
* to that function. Then we normalized, simplified, and pruned the tree to take
* the first minimal size expression.
*
* https://github.com/intel/intel-graphics-compiler/blob/eb7d24be2244c6e2225d7b33eefcd6ec62b6f80e/visa/iga/IGALibrary/api/iga_bxml_enums.cpp
*/
const char *util_lut3_to_str[256] = {
[0x00] = "zeros",
[0x01] = "~a & ~b & ~c",
[0x02] = "a & ~b & ~c",
[0x03] = "~b & ~c",
[0x04] = "~a & b & ~c",
[0x05] = "~a & ~c",
[0x06] = "(a ^ b) & ~c",
[0x07] = "(~a | ~b) & ~c",
[0x08] = "a & b & ~c",
[0x09] = "(a ^ ~b) & ~c",
[0x0A] = "a & ~c",
[0x0B] = "(a | ~b) & ~c",
[0x0C] = "b & ~c",
[0x0D] = "(~a | b) & ~c",
[0x0E] = "(a | b) & ~c",
[0x0F] = "~c",
[0x10] = "~a & ~b & c",
[0x11] = "~a & ~b",
[0x12] = "(a ^ c) & ~b",
[0x13] = "(~a | ~c) & ~b",
[0x14] = "~a & (b ^ c)",
[0x15] = "~a & (~b | ~c)",
[0x16] = "a ^ (a & b | b ^ c)",
[0x17] = "~a & ~b | (~a | ~b) & ~c",
[0x18] = "(a ^ c) & (b ^ c)",
[0x19] = "a ^ (a & c | ~b)",
[0x1A] = "a ^ (a | ~b) & c",
[0x1B] = "a & ~c | ~a & ~b",
[0x1C] = "(a & c | b) ^ c",
[0x1D] = "~a & ~b | b & ~c",
[0x1E] = "(a | b) ^ c",
[0x1F] = "~a & ~b | ~c",
[0x20] = "a & ~b & c",
[0x21] = "(a ^ ~c) & ~b",
[0x22] = "a & ~b",
[0x23] = "(a | ~c) & ~b",
[0x24] = "(a ^ b) & (b ^ c)",
[0x25] = "a ^ (a & b | ~c)",
[0x26] = "a ^ (a | ~c) & b",
[0x27] = "a & ~b | ~a & ~c",
[0x28] = "a & (b ^ c)",
[0x29] = "a ^ (a | ~b) & (b ^ ~c)",
[0x2A] = "a & (~b | ~c)",
[0x2B] = "a & ~b | (a | ~b) & ~c",
[0x2C] = "(~a & c | b) ^ c",
[0x2D] = "(~a | b) ^ c",
[0x2E] = "a & ~b | b & ~c",
[0x2F] = "a & ~b | ~c",
[0x30] = "~b & c",
[0x31] = "(~a | c) & ~b",
[0x32] = "(a | c) & ~b",
[0x33] = "~b",
[0x34] = "(a & b | c) ^ b",
[0x35] = "~a & ~c | ~b & c",
[0x36] = "(a | c) ^ b",
[0x37] = "~a & ~c | ~b",
[0x38] = "(~a & b | c) ^ b",
[0x39] = "(~a | c) ^ b",
[0x3A] = "a & ~c | ~b & c",
[0x3B] = "a & ~c | ~b",
[0x3C] = "b ^ c",
[0x3D] = "~a & ~b | b ^ c",
[0x3E] = "a & ~b | b ^ c",
[0x3F] = "~b | ~c",
[0x40] = "~a & b & c",
[0x41] = "~a & (b ^ ~c)",
[0x42] = "(a ^ b) & (b ^ ~c)",
[0x43] = "(a & b | ~c) ^ b",
[0x44] = "~a & b",
[0x45] = "~a & (b | ~c)",
[0x46] = "a ^ (a & c | b)",
[0x47] = "~a & b | ~b & ~c",
[0x48] = "(a ^ c) & b",
[0x49] = "a ^ (a & c | b ^ ~c)",
[0x4A] = "a ^ (a | b) & c",
[0x4B] = "(a | ~b) ^ c",
[0x4C] = "(~a | ~c) & b",
[0x4D] = "~a & b | (~a | b) & ~c",
[0x4E] = "a & ~c | ~a & b",
[0x4F] = "~a & b | ~c",
[0x50] = "~a & c",
[0x51] = "~a & (~b | c)",
[0x52] = "a ^ (a & b | c)",
[0x53] = "~a & c | ~b & ~c",
[0x54] = "~a & (b | c)",
[0x55] = "~a",
[0x56] = "a ^ (b | c)",
[0x57] = "~a | ~b & ~c",
[0x58] = "a ^ (a & ~b | c)",
[0x59] = "a ^ (~b | c)",
[0x5A] = "a ^ c",
[0x5B] = "a ^ c | ~a & ~b",
[0x5C] = "~a & c | b & ~c",
[0x5D] = "~a | b & ~c",
[0x5E] = "a ^ c | b & ~c",
[0x5F] = "~a | ~c",
[0x60] = "(a ^ b) & c",
[0x61] = "a ^ (a & b | b ^ ~c)",
[0x62] = "a ^ (a | c) & b",
[0x63] = "(a | ~c) ^ b",
[0x64] = "a ^ (a & ~c | b)",
[0x65] = "a ^ (b | ~c)",
[0x66] = "a ^ b",
[0x67] = "a ^ b | ~a & ~c",
[0x68] = "a ^ (a | b) & (b ^ ~c)",
[0x69] = "a ^ b ^ ~c",
[0x6A] = "a ^ b & c",
[0x6B] = "a ^ b & c | ~b & ~c",
[0x6C] = "(~a | ~c) ^ ~b",
[0x6D] = "a ^ (b | ~c) | b & ~c",
[0x6E] = "a ^ b | a & ~c",
[0x6F] = "a ^ b | ~c",
[0x70] = "(~a | ~b) & c",
[0x71] = "~a & c | (~a | c) & ~b",
[0x72] = "a & ~b | ~a & c",
[0x73] = "~a & c | ~b",
[0x74] = "~a & b | ~b & c",
[0x75] = "~a | ~b & c",
[0x76] = "a ^ b | ~a & c",
[0x77] = "~a | ~b",
[0x78] = "(~a | ~b) ^ ~c",
[0x79] = "a ^ (~b | c) | ~b & c",
[0x7A] = "a ^ c | a & ~b",
[0x7B] = "a ^ c | ~b",
[0x7C] = "~a & b | b ^ c",
[0x7D] = "~a | b ^ c",
[0x7E] = "a ^ b | b ^ c",
[0x7F] = "~a | ~b | ~c",
[0x80] = "a & b & c",
[0x81] = "(a ^ ~b) & (b ^ ~c)",
[0x82] = "a & (b ^ ~c)",
[0x83] = "(~a & b | ~c) ^ b",
[0x84] = "(a ^ ~c) & b",
[0x85] = "a ^ (a & ~b | ~c)",
[0x86] = "a ^ (a | b) & (b ^ c)",
[0x87] = "(~a | ~b) ^ c",
[0x88] = "a & b",
[0x89] = "a ^ (a | ~c) & ~b",
[0x8A] = "a & (b | ~c)",
[0x8B] = "a & b | ~b & ~c",
[0x8C] = "(a | ~c) & b",
[0x8D] = "a & b | ~a & ~c",
[0x8E] = "a & b | (a | b) & ~c",
[0x8F] = "a & b | ~c",
[0x90] = "(a ^ ~b) & c",
[0x91] = "a ^ (a & ~c | ~b)",
[0x92] = "a ^ (a | c) & (b ^ c)",
[0x93] = "(~a | ~c) ^ b",
[0x94] = "a ^ (a & ~b | b ^ c)",
[0x95] = "a ^ (~b | ~c)",
[0x96] = "a ^ b ^ c",
[0x97] = "a ^ (~b | ~c) | ~b & ~c",
[0x98] = "a ^ (a | c) & ~b",
[0x99] = "a ^ ~b",
[0x9A] = "a ^ ~b & c",
[0x9B] = "a ^ ~b | a & ~c",
[0x9C] = "(a | ~c) ^ ~b",
[0x9D] = "a ^ ~b | b & ~c",
[0x9E] = "a ^ ~b & c | b & ~c",
[0x9F] = "a ^ ~b | ~c",
[0xA0] = "a & c",
[0xA1] = "a ^ (a | ~b) & ~c",
[0xA2] = "a & (~b | c)",
[0xA3] = "a & c | ~b & ~c",
[0xA4] = "a ^ (a | b) & ~c",
[0xA5] = "a ^ ~c",
[0xA6] = "a ^ b & ~c",
[0xA7] = "a ^ ~c | a & ~b",
[0xA8] = "a & (b | c)",
[0xA9] = "a ^ ~b & ~c",
[0xAA] = "a",
[0xAB] = "a | ~b & ~c",
[0xAC] = "a & c | b & ~c",
[0xAD] = "a ^ ~c | a & b",
[0xAE] = "a | b & ~c",
[0xAF] = "a | ~c",
[0xB0] = "(a | ~b) & c",
[0xB1] = "a & c | ~a & ~b",
[0xB2] = "a & c | (a | c) & ~b",
[0xB3] = "a & c | ~b",
[0xB4] = "(a | ~b) ^ ~c",
[0xB5] = "a ^ ~c | ~a & ~b",
[0xB6] = "a ^ b & ~c | ~b & c",
[0xB7] = "a ^ ~c | ~b",
[0xB8] = "a & b | ~b & c",
[0xB9] = "a ^ ~b | a & c",
[0xBA] = "a | ~b & c",
[0xBB] = "a | ~b",
[0xBC] = "a & b | b ^ c",
[0xBD] = "a ^ ~b | b ^ c",
[0xBE] = "a | b ^ c",
[0xBF] = "a | ~b | ~c",
[0xC0] = "b & c",
[0xC1] = "(a & ~b | c) ^ ~b",
[0xC2] = "(~a & ~b | c) ^ ~b",
[0xC3] = "b ^ ~c",
[0xC4] = "(~a | c) & b",
[0xC5] = "~a & ~c | b & c",
[0xC6] = "(~a | c) ^ ~b",
[0xC7] = "~a & b | b ^ ~c",
[0xC8] = "(a | c) & b",
[0xC9] = "(a | c) ^ ~b",
[0xCA] = "a & ~c | b & c",
[0xCB] = "a & b | b ^ ~c",
[0xCC] = "b",
[0xCD] = "~a & ~c | b",
[0xCE] = "a & ~c | b",
[0xCF] = "b | ~c",
[0xD0] = "(~a | b) & c",
[0xD1] = "~a & ~b | b & c",
[0xD2] = "(~a | b) ^ ~c",
[0xD3] = "~a & c | b ^ ~c",
[0xD4] = "~a & b | (~a | b) & c",
[0xD5] = "~a | b & c",
[0xD6] = "a ^ (b | c) | b & c",
[0xD7] = "~a | b ^ ~c",
[0xD8] = "a & b | ~a & c",
[0xD9] = "a ^ ~b | b & c",
[0xDA] = "a ^ c | a & b",
[0xDB] = "a ^ c | b ^ ~c",
[0xDC] = "~a & c | b",
[0xDD] = "~a | b",
[0xDE] = "a ^ c | b",
[0xDF] = "~a | b | ~c",
[0xE0] = "(a | b) & c",
[0xE1] = "(a | b) ^ ~c",
[0xE2] = "a & ~b | b & c",
[0xE3] = "a & c | b ^ ~c",
[0xE4] = "a & c | ~a & b",
[0xE5] = "a ^ ~c | b & c",
[0xE6] = "a ^ b | a & c",
[0xE7] = "a ^ b | b ^ ~c",
[0xE8] = "a & b | (a | b) & c",
[0xE9] = "a ^ ~b & ~c | b & c",
[0xEA] = "a | b & c",
[0xEB] = "a | b ^ ~c",
[0xEC] = "a & c | b",
[0xED] = "a ^ ~c | b",
[0xEE] = "a | b",
[0xEF] = "a | b | ~c",
[0xF0] = "c",
[0xF1] = "~a & ~b | c",
[0xF2] = "a & ~b | c",
[0xF3] = "~b | c",
[0xF4] = "~a & b | c",
[0xF5] = "~a | c",
[0xF6] = "a ^ b | c",
[0xF7] = "~a | ~b | c",
[0xF8] = "a & b | c",
[0xF9] = "a ^ ~b | c",
[0xFA] = "a | c",
[0xFB] = "a | ~b | c",
[0xFC] = "b | c",
[0xFD] = "~a | b | c",
[0xFE] = "a | b | c",
[0xFF] = "ones",
};

107
src/util/lut.h Normal file
View file

@ -0,0 +1,107 @@
/*
* Copyright 2025 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#pragma once
#include <assert.h>
#include <stdint.h>
#include "util/macros.h"
/*
* Represents a boolean lookup table in sum-of-minterms form. These are
* natural encodings, matching the Intel BFN and Apple BITOP instructions.
*/
typedef uint8_t util_lut2;
typedef uint8_t util_lut3;
#if !defined(_MSC_VER)
/*
* Build a lookup table from a boolean expression. Bitwise operations are
* supported: &, |, ^, and ~. Note ~ must be used, not !.
*
* The implementation uses a GNU statement-expression with the appropriate
* masks, such that the AND of all three masks (with arbitrary complements)
* equals the single bit for the corresponding min-term. This matches how Intel
* describes BFN in the bspec, but it obscures the meaning.
*
* Casting to uint8_t masks the out-of-bounds bits in ~a & ~b & ~c.
*
* Example: UTIL_LUT3((a & b) | (~a & c))
*/
#define UTIL_LUT3(expr_involving_a_b_c) \
({ \
UNUSED const uint8_t a = 0xAA, b = 0xCC, c = 0xF0; \
(util_lut3)(expr_involving_a_b_c); \
})
#define UTIL_LUT2(expr_involving_a_b) \
(util_lut2) UTIL_LUT3((expr_involving_a_b) & ~c)
/*
* Return a lookup table with source s inverted. We exchange the minterms for
* "source a is true" and "source a is false".
*/
static inline util_lut3
util_lut3_invert_source(util_lut3 l, unsigned s)
{
uint8_t masks[] = {UTIL_LUT3(a), UTIL_LUT3(b), UTIL_LUT3(c)};
assert(s < ARRAY_SIZE(masks));
uint8_t mask = masks[s];
uint8_t shift = __builtin_ctz(mask);
uint8_t true_bits = l & mask;
uint8_t false_bits = l & ~mask;
return (false_bits << shift) | (true_bits >> shift);
}
static inline util_lut2
util_lut2_invert_source(util_lut2 l, unsigned s)
{
return (util_lut2)(util_lut3_invert_source((util_lut3)l, s) & 0xf);
}
#endif
/*
* Helpers to invert a LUT. This is easy: invert all the min-terms.
*/
static inline util_lut2
util_lut2_invert(util_lut2 l)
{
return l ^ 0xf;
}
static inline util_lut3
util_lut3_invert(util_lut3 l)
{
return l ^ 0xff;
}
/*
* Return a lookup table equivalent to the input but with sources a & b swapped.
* To implement, we swap the corresponding minterms.
*/
static inline util_lut2
util_lut2_swap_sources(util_lut2 l)
{
return util_bit_swap(l, 1, 2);
}
static inline util_lut3
util_lut3_swap_sources(util_lut3 l, unsigned a, unsigned b)
{
if (a == 0 && b == 1) {
return util_bit_swap(util_bit_swap(l, 1, 2), 5, 6);
} else if (a == 0 && b == 2) {
return util_bit_swap(util_bit_swap(l, 1, 4), 3, 6);
} else if (a == 1 && b == 2) {
return util_bit_swap(util_bit_swap(l, 2, 4), 3, 5);
}
UNREACHABLE("invalid source selection");
}
/* Finding minimal string forms of LUTs is tricky, so we precompute. */
extern const char *util_lut3_to_str[256];

View file

@ -64,6 +64,7 @@ files_mesa_util = files(
'u_idalloc.h',
'list.h',
'log.c',
'lut.c',
'macros.h',
'memstream.c',
'memstream.h',