mesa/src/util/lut.h
Alyssa Rosenzweig 95badb6b1d util: add boolean lookup table helpers
Many instruction sets (Intel, Apple, NVIDIA) implement bitwise operations with a
single general instruction that takes a lookup table in sum-of-products
(minterms) form. Working with these tables manually is a bit gnarly, and
multiple backends need to do this. This adds common code for representing such
lookup tables with 2- or 3-sources, with a rich set of helpers for building,
inspecting, and manipulating the LUTs.

Eventually, we may want to introduce a nir_op_bitop3 instruction to NIR to build
common code for fusing boolean expression trees into lookup tables. That NIR
pass will presumably use the helpers here.

NAK already has this abstraction internally (in Rust). Possibly NAK will be able
to drop (or cut down) that code once we handle this in NIR.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37200>
2025-09-16 21:48:37 +00:00

107 lines
3 KiB
C

/*
* Copyright 2025 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#pragma once
#include <assert.h>
#include <stdint.h>
#include "util/macros.h"
/*
* Represents a boolean lookup table in sum-of-minterms form. These are
* natural encodings, matching the Intel BFN and Apple BITOP instructions.
*/
typedef uint8_t util_lut2;
typedef uint8_t util_lut3;
#if !defined(_MSC_VER)
/*
* Build a lookup table from a boolean expression. Bitwise operations are
* supported: &, |, ^, and ~. Note ~ must be used, not !.
*
* The implementation uses a GNU statement-expression with the appropriate
* masks, such that the AND of all three masks (with arbitrary complements)
* equals the single bit for the corresponding min-term. This matches how Intel
* describes BFN in the bspec, but it obscures the meaning.
*
* Casting to uint8_t masks the out-of-bounds bits in ~a & ~b & ~c.
*
* Example: UTIL_LUT3((a & b) | (~a & c))
*/
#define UTIL_LUT3(expr_involving_a_b_c) \
({ \
UNUSED const uint8_t a = 0xAA, b = 0xCC, c = 0xF0; \
(util_lut3)(expr_involving_a_b_c); \
})
#define UTIL_LUT2(expr_involving_a_b) \
(util_lut2) UTIL_LUT3((expr_involving_a_b) & ~c)
/*
* Return a lookup table with source s inverted. We exchange the minterms for
* "source a is true" and "source a is false".
*/
static inline util_lut3
util_lut3_invert_source(util_lut3 l, unsigned s)
{
uint8_t masks[] = {UTIL_LUT3(a), UTIL_LUT3(b), UTIL_LUT3(c)};
assert(s < ARRAY_SIZE(masks));
uint8_t mask = masks[s];
uint8_t shift = __builtin_ctz(mask);
uint8_t true_bits = l & mask;
uint8_t false_bits = l & ~mask;
return (false_bits << shift) | (true_bits >> shift);
}
static inline util_lut2
util_lut2_invert_source(util_lut2 l, unsigned s)
{
return (util_lut2)(util_lut3_invert_source((util_lut3)l, s) & 0xf);
}
#endif
/*
* Helpers to invert a LUT. This is easy: invert all the min-terms.
*/
static inline util_lut2
util_lut2_invert(util_lut2 l)
{
return l ^ 0xf;
}
static inline util_lut3
util_lut3_invert(util_lut3 l)
{
return l ^ 0xff;
}
/*
* Return a lookup table equivalent to the input but with sources a & b swapped.
* To implement, we swap the corresponding minterms.
*/
static inline util_lut2
util_lut2_swap_sources(util_lut2 l)
{
return util_bit_swap(l, 1, 2);
}
static inline util_lut3
util_lut3_swap_sources(util_lut3 l, unsigned a, unsigned b)
{
if (a == 0 && b == 1) {
return util_bit_swap(util_bit_swap(l, 1, 2), 5, 6);
} else if (a == 0 && b == 2) {
return util_bit_swap(util_bit_swap(l, 1, 4), 3, 6);
} else if (a == 1 && b == 2) {
return util_bit_swap(util_bit_swap(l, 2, 4), 3, 5);
}
UNREACHABLE("invalid source selection");
}
/* Finding minimal string forms of LUTs is tricky, so we precompute. */
extern const char *util_lut3_to_str[256];