nir/serialize: Make more space for intrinsic_op allowing 1024 ops

We are close to the limit of 512 intrinsics, make more space to
be able to support up to 1024 intrinsics.

Take one bit from packed_const_indices, they shouldn't suffer in
a common case.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13456>
This commit is contained in:
Danylo Piliaiev 2021-10-20 17:36:27 +03:00 committed by Marge Bot
parent dc74285d32
commit b7c7abded7

View file

@ -575,14 +575,12 @@ union packed_dest {
};
enum intrinsic_const_indices_encoding {
/* Use the 9 bits of packed_const_indices to store 1-9 indices.
* 1 9-bit index, or 2 4-bit indices, or 3 3-bit indices, or
* 4 2-bit indices, or 5-9 1-bit indices.
/* Use packed_const_indices to store tightly packed indices.
*
* The common case for load_ubo is 0, 0, 0, which is trivially represented.
* The common cases for load_interpolated_input also fit here, e.g.: 7, 3
*/
const_indices_9bit_all_combined,
const_indices_all_combined,
const_indices_8bit, /* 8 bits per element */
const_indices_16bit, /* 16 bits per element */
@ -641,9 +639,9 @@ union packed_instr {
} deref_var;
struct {
unsigned instr_type:4;
unsigned intrinsic:9;
unsigned intrinsic:10;
unsigned const_indices_encoding:2;
unsigned packed_const_indices:9;
unsigned packed_const_indices:8;
unsigned dest:8;
} intrinsic;
struct {
@ -1167,11 +1165,11 @@ read_deref(read_ctx *ctx, union packed_instr header)
static void
write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
{
/* 9 bits for nir_intrinsic_op */
STATIC_ASSERT(nir_num_intrinsics <= 512);
/* 10 bits for nir_intrinsic_op */
STATIC_ASSERT(nir_num_intrinsics <= 1024);
unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
assert(intrin->intrinsic < 512);
assert(intrin->intrinsic < 1024);
union packed_instr header;
header.u32 = 0;
@ -1187,11 +1185,11 @@ write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
max_bits = MAX2(max_bits, max);
}
if (max_bits * num_indices <= 9) {
header.intrinsic.const_indices_encoding = const_indices_9bit_all_combined;
if (max_bits * num_indices <= 8) {
header.intrinsic.const_indices_encoding = const_indices_all_combined;
/* Pack all const indices into 6 bits. */
unsigned bit_size = 9 / num_indices;
/* Pack all const indices into 8 bits. */
unsigned bit_size = 8 / num_indices;
for (unsigned i = 0; i < num_indices; i++) {
header.intrinsic.packed_const_indices |=
intrin->const_index[i] << (i * bit_size);
@ -1262,8 +1260,8 @@ read_intrinsic(read_ctx *ctx, union packed_instr header)
if (num_indices) {
switch (header.intrinsic.const_indices_encoding) {
case const_indices_9bit_all_combined: {
unsigned bit_size = 9 / num_indices;
case const_indices_all_combined: {
unsigned bit_size = 8 / num_indices;
unsigned bit_mask = u_bit_consecutive(0, bit_size);
for (unsigned i = 0; i < num_indices; i++) {
intrin->const_index[i] =