From abfd6a4df91ad0b476e302bceb0c268bdbf10331 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Mon, 9 Mar 2026 12:57:17 +0100 Subject: [PATCH] nir: don't assume indicies are always 32bit when accessing them as raw data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Alyssa Rosenzweig Reviewed-by: Marek Olšák Part-of: --- src/compiler/nir/nir.h | 3 ++ src/compiler/nir/nir_instr_set.c | 4 +-- src/compiler/nir/nir_intrinsics_c.py | 1 + .../nir/nir_lower_mem_access_bit_sizes.c | 2 +- src/compiler/nir/nir_serialize.c | 34 +++++++++---------- 5 files changed, 24 insertions(+), 20 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index fb061c88d6d..30aa8bdcd98 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2116,6 +2116,9 @@ typedef struct nir_intrinsic_info { /** the number of constant indices used by the intrinsic */ uint8_t num_indices; + /** the number of 32bit slots used for storing constant indices. */ + uint8_t num_index_slots; + /** list of indices */ uint8_t indices[NIR_INTRINSIC_MAX_CONST_INDEX]; diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c index 871699408c3..b163b361fb1 100644 --- a/src/compiler/nir/nir_instr_set.c +++ b/src/compiler/nir/nir_instr_set.c @@ -242,7 +242,7 @@ hash_intrinsic(uint32_t hash, const nir_intrinsic_instr *instr) hash = XXH32(v, sizeof(v), hash); } - hash = XXH32(instr->const_index, info->num_indices * sizeof(instr->const_index[0]), hash); + hash = XXH32(instr->const_index, info->num_index_slots * sizeof(instr->const_index[0]), hash); for (unsigned i = 0; i < nir_intrinsic_infos[instr->intrinsic].num_srcs; i++) hash = hash_src(hash, &instr->src[i]); @@ -747,7 +747,7 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) return false; } - for (unsigned i = 0; i < info->num_indices; i++) { + for (unsigned i = 0; i < info->num_index_slots; i++) { if (intrinsic1->const_index[i] != intrinsic2->const_index[i]) return false; } diff --git a/src/compiler/nir/nir_intrinsics_c.py b/src/compiler/nir/nir_intrinsics_c.py index 72a591dc35a..63cc73fe295 100644 --- a/src/compiler/nir/nir_intrinsics_c.py +++ b/src/compiler/nir/nir_intrinsics_c.py @@ -41,6 +41,7 @@ const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics] = { .dest_bit_sizes = ${hex(reduce(operator.or_, opcode.bit_sizes, 0))}, .bit_size_src = ${opcode.bit_size_src}, .num_indices = ${opcode.num_indices}, + .num_index_slots = ${opcode.num_indices}, % if opcode.indices: .indices = { % for i in range(len(opcode.indices)): diff --git a/src/compiler/nir/nir_lower_mem_access_bit_sizes.c b/src/compiler/nir/nir_lower_mem_access_bit_sizes.c index 418d45f3f7b..f2b94f6181d 100644 --- a/src/compiler/nir/nir_lower_mem_access_bit_sizes.c +++ b/src/compiler/nir/nir_lower_mem_access_bit_sizes.c @@ -52,7 +52,7 @@ dup_mem_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, } dup->num_components = num_components; - for (unsigned i = 0; i < info->num_indices; i++) + for (unsigned i = 0; i < info->num_index_slots; i++) dup->const_index[i] = intrin->const_index[i]; nir_set_io_offset(dup, offset); diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c index e4248c78d86..24fb6ca16d3 100644 --- a/src/compiler/nir/nir_serialize.c +++ b/src/compiler/nir/nir_serialize.c @@ -1053,7 +1053,7 @@ write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin) /* 10 bits for nir_intrinsic_op */ STATIC_ASSERT(nir_num_intrinsics <= 1024); unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs; - unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices; + unsigned num_index_slots = nir_intrinsic_infos[intrin->intrinsic].num_index_slots; assert(intrin->intrinsic < 1024); union packed_instr header; @@ -1063,19 +1063,19 @@ write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin) header.intrinsic.intrinsic = intrin->intrinsic; /* Analyze constant indices to decide how to encode them. */ - if (num_indices) { + if (num_index_slots) { unsigned max_bits = 0; - for (unsigned i = 0; i < num_indices; i++) { + for (unsigned i = 0; i < num_index_slots; i++) { unsigned max = util_last_bit(intrin->const_index[i]); max_bits = MAX2(max_bits, max); } - if (max_bits * num_indices <= 8) { + if (max_bits * num_index_slots <= 8) { header.intrinsic.const_indices_encoding = const_indices_all_combined; /* Pack all const indices into 8 bits. */ - unsigned bit_size = 8 / num_indices; - for (unsigned i = 0; i < num_indices; i++) { + unsigned bit_size = 8 / num_index_slots; + for (unsigned i = 0; i < num_index_slots; i++) { header.intrinsic.packed_const_indices |= intrin->const_index[i] << (i * bit_size); } @@ -1095,18 +1095,18 @@ write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin) for (unsigned i = 0; i < num_srcs; i++) write_src(ctx, &intrin->src[i]); - if (num_indices) { + if (num_index_slots) { switch (header.intrinsic.const_indices_encoding) { case const_indices_8bit: - for (unsigned i = 0; i < num_indices; i++) + for (unsigned i = 0; i < num_index_slots; i++) blob_write_uint8(ctx->blob, intrin->const_index[i]); break; case const_indices_16bit: - for (unsigned i = 0; i < num_indices; i++) + for (unsigned i = 0; i < num_index_slots; i++) blob_write_uint16(ctx->blob, intrin->const_index[i]); break; case const_indices_32bit: - for (unsigned i = 0; i < num_indices; i++) + for (unsigned i = 0; i < num_index_slots; i++) blob_write_uint32(ctx->blob, intrin->const_index[i]); break; } @@ -1120,7 +1120,7 @@ read_intrinsic(read_ctx *ctx, union packed_instr header) nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op); unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; - unsigned num_indices = nir_intrinsic_infos[op].num_indices; + unsigned num_index_slots = nir_intrinsic_infos[op].num_index_slots; if (nir_intrinsic_infos[op].has_dest) read_def(ctx, &intrin->def, &intrin->instr, header); @@ -1143,12 +1143,12 @@ read_intrinsic(read_ctx *ctx, union packed_instr header) } } - if (num_indices) { + if (num_index_slots) { switch (header.intrinsic.const_indices_encoding) { case const_indices_all_combined: { - unsigned bit_size = 8 / num_indices; + unsigned bit_size = 8 / num_index_slots; unsigned bit_mask = u_bit_consecutive(0, bit_size); - for (unsigned i = 0; i < num_indices; i++) { + for (unsigned i = 0; i < num_index_slots; i++) { intrin->const_index[i] = (header.intrinsic.packed_const_indices >> (i * bit_size)) & bit_mask; @@ -1156,15 +1156,15 @@ read_intrinsic(read_ctx *ctx, union packed_instr header) break; } case const_indices_8bit: - for (unsigned i = 0; i < num_indices; i++) + for (unsigned i = 0; i < num_index_slots; i++) intrin->const_index[i] = blob_read_uint8(ctx->blob); break; case const_indices_16bit: - for (unsigned i = 0; i < num_indices; i++) + for (unsigned i = 0; i < num_index_slots; i++) intrin->const_index[i] = blob_read_uint16(ctx->blob); break; case const_indices_32bit: - for (unsigned i = 0; i < num_indices; i++) + for (unsigned i = 0; i < num_index_slots; i++) intrin->const_index[i] = blob_read_uint32(ctx->blob); break; }