From 372c1a23dcf60c82556d7c09b3caac0165819111 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Wed, 18 Mar 2026 14:34:33 +0100 Subject: [PATCH] nir/opt_large_constants: support negative small constants Foz-DB Navi48: Totals from 511 (0.45% of 114655) affected shaders: MaxWaves: 14554 -> 14552 (-0.01%) Instrs: 767577 -> 768334 (+0.10%); split: -0.17%, +0.27% CodeSize: 4171036 -> 4181400 (+0.25%); split: -0.10%, +0.35% VGPRs: 27676 -> 27724 (+0.17%) SpillSGPRs: 144 -> 183 (+27.08%) Latency: 4053919 -> 4027092 (-0.66%); split: -0.88%, +0.22% InvThroughput: 817990 -> 819490 (+0.18%); split: -0.21%, +0.39% VClause: 11573 -> 11172 (-3.46%); split: -3.47%, +0.01% SClause: 14418 -> 14579 (+1.12%); split: -0.46%, +1.57% Copies: 71638 -> 71365 (-0.38%); split: -1.54%, +1.16% Branches: 20212 -> 20425 (+1.05%); split: -0.39%, +1.44% PreSGPRs: 21765 -> 21743 (-0.10%); split: -0.23%, +0.12% PreVGPRs: 19475 -> 19307 (-0.86%); split: -0.91%, +0.05% VALU: 411365 -> 413642 (+0.55%); split: -0.02%, +0.57% SALU: 126940 -> 125411 (-1.20%); split: -1.53%, +0.32% VMEM: 20574 -> 20062 (-2.49%) SMEM: 23724 -> 23677 (-0.20%); split: -0.25%, +0.05% VOPD: 19838 -> 19847 (+0.05%) Reviewed-by: Alyssa Rosenzweig Part-of: --- src/compiler/nir/nir_opt_large_constants.c | 87 +++++++++++++++---- .../nir/tests/opt_large_constants_tests.cpp | 23 ++--- 2 files changed, 83 insertions(+), 27 deletions(-) diff --git a/src/compiler/nir/nir_opt_large_constants.c b/src/compiler/nir/nir_opt_large_constants.c index b815da367c0..771dfdb2010 100644 --- a/src/compiler/nir/nir_opt_large_constants.c +++ b/src/compiler/nir/nir_opt_large_constants.c @@ -112,6 +112,7 @@ write_const_values(void *dst, const nir_const_value *src, struct small_constant { uint64_t data; + int64_t min; uint32_t bit_size; bool is_float; uint32_t bit_stride; @@ -222,10 +223,14 @@ handle_constant_store(void *mem_ctx, struct var_info *info, bit_size); } +#define NIR_SMALL_CONSTANT_MAX_ABS_VALUE 255 + static bool get_small_constant_component(struct small_constant *info, uint32_t array_len, uint32_t bit_size, nir_const_value *values) { + int64_t min = INT64_MAX; + bool is_float = true; if (bit_size < 16) { is_float = false; @@ -233,23 +238,47 @@ get_small_constant_component(struct small_constant *info, uint32_t array_len, for (unsigned i = 0; i < array_len; i++) { /* See if it's an easily convertible float. * TODO: Compute greatest common divisor to support non-integer floats. - * TODO: Compute min value and add it to the result of - * build_small_constant_load for handling negative floats. */ - uint64_t u = nir_const_value_as_float(values[i], bit_size); - nir_const_value fc = nir_const_value_for_float(u, bit_size); + double float_value = nir_const_value_as_float(values[i], bit_size); + if (fabs(float_value) > NIR_SMALL_CONSTANT_MAX_ABS_VALUE) { + is_float = false; + break; + } + + int64_t int_value = float_value; + nir_const_value fc = nir_const_value_for_float(int_value, bit_size); is_float &= !memcmp(&fc, &values[i], bit_size / 8); + + min = MIN2(min, int_value); + } + } + + if (bit_size == 1) { + min = 0; + } else if (!is_float) { + min = INT64_MAX; + for (unsigned i = 0; i < array_len; i++) { + int64_t integer = nir_const_value_as_int(values[i], bit_size); + min = MIN2(min, integer); } } uint32_t used_bits = 0; for (unsigned i = 0; i < array_len; i++) { - uint64_t u64_elem = is_float ? nir_const_value_as_float(values[i], bit_size) - : nir_const_value_as_uint(values[i], bit_size); - if (!u64_elem) + int64_t i64_elem; + + if (is_float) + i64_elem = nir_const_value_as_float(values[i], bit_size); + else if (bit_size == 1) + i64_elem = nir_const_value_as_uint(values[i], bit_size); + else + i64_elem = nir_const_value_as_int(values[i], bit_size); + + i64_elem -= min; + if (!i64_elem) continue; - uint32_t elem_bits = util_logbase2_64(u64_elem) + 1; + uint32_t elem_bits = util_logbase2_64(i64_elem) + 1; used_bits = MAX2(used_bits, elem_bits); } @@ -262,15 +291,25 @@ get_small_constant_component(struct small_constant *info, uint32_t array_len, return false; for (unsigned i = 0; i < array_len; i++) { - uint64_t u64_elem = is_float ? nir_const_value_as_float(values[i], bit_size) - : nir_const_value_as_uint(values[i], bit_size); + int64_t i64_elem; - info->data |= u64_elem << (i * used_bits); + if (is_float) + i64_elem = nir_const_value_as_float(values[i], bit_size); + else if (bit_size == 1) + i64_elem = nir_const_value_as_uint(values[i], bit_size); + else + i64_elem = nir_const_value_as_int(values[i], bit_size); + + i64_elem -= min; + if (!i64_elem) + continue; + + info->data |= ((uint64_t)i64_elem) << (i * used_bits); } /* Limit bit_size >= 32 to avoid unnecessary conversions. */ - info->bit_size = - MAX2(util_next_power_of_two(used_bits * array_len), 32); + info->bit_size = MAX2(util_next_power_of_two(used_bits * array_len), 32); + info->min = min; info->is_float = is_float; info->bit_stride = used_bits; return true; @@ -328,6 +367,8 @@ build_small_constant_load(nir_builder *b, nir_deref_instr *deref, nir_def *ret[NIR_MAX_VEC_COMPONENTS]; + const unsigned bit_size = glsl_get_bit_size(deref->type); + for (unsigned c = 0; c < info->num_components; c++) { const struct small_constant *constant = &info->small_constant[c]; nir_def *imm = nir_imm_intN_t(b, constant->data, constant->bit_size); @@ -337,16 +378,28 @@ build_small_constant_load(nir_builder *b, nir_deref_instr *deref, ret[c] = nir_ushr(b, imm, shift); ret[c] = nir_iand_imm(b, ret[c], BITFIELD64_MASK(constant->bit_stride)); - const unsigned bit_size = glsl_get_bit_size(deref->type); + assert(constant->bit_stride <= 32); + if (ret[c]->bit_size == 64) + ret[c] = nir_unpack_64_2x32_split_x(b, ret[c]); + + if (bit_size == 64 && !constant->is_float) + ret[c] = nir_u2u64(b, ret[c]); + + ret[c] = nir_iadd_imm(b, ret[c], constant->min); + if (bit_size < 8) { /* Booleans are special-cased to be 32-bit */ assert(glsl_type_is_boolean(deref->type)); ret[c] = nir_ine_imm(b, ret[c], 0); } else { - if (constant->is_float) - ret[c] = nir_u2fN(b, ret[c], bit_size); - else if (bit_size != constant->bit_size) + if (constant->is_float) { + if (constant->min >= 0) + ret[c] = nir_u2fN(b, ret[c], bit_size); + else + ret[c] = nir_i2fN(b, ret[c], bit_size); + } else { ret[c] = nir_u2uN(b, ret[c], bit_size); + } } } diff --git a/src/compiler/nir/tests/opt_large_constants_tests.cpp b/src/compiler/nir/tests/opt_large_constants_tests.cpp index b3566810ad2..3c4b60e84ab 100644 --- a/src/compiler/nir/tests/opt_large_constants_tests.cpp +++ b/src/compiler/nir/tests/opt_large_constants_tests.cpp @@ -273,19 +273,22 @@ TEST_F(nir_large_constants_test, small_float_whole_numbers_array) workgroup_size: 1, 1, 1 max_subgroup_size: 128 min_subgroup_size: 1 - constants: 32 decl_function main () (entrypoint) impl main { - block b0: // preds: - 32 %0 = @load_workgroup_index - 32 %1 = load_const (0x00000000) - 32 %2 = load_const (0x00000002) - 32 %3 = ishl %0, %2 (0x2) - 32 %4 = iadd %1 (0x0), %3 - 32 %5 = @load_constant (%4) (base=0, range=32, access=none, align_mul=4, align_offset=0) - @use (%5) - // succs: b1 + block b0: // preds: + 32 %0 = @load_workgroup_index + 32 %1 = load_const (0x76543210 = 1985229328) + 32 %2 = load_const (0x00000002) + 32 %3 = ishl %0, %2 (0x2) + 32 %4 = ushr %1 (0x76543210), %3 + 32 %5 = load_const (0x0000000f = 15) + 32 %6 = iand %4, %5 (0xf) + 32 %7 = load_const (0xfffffffc = -4 = 4294967292) + 32 %8 = iadd %6, %7 (0xfffffffc) + 32 %9 = i2f32 %8 // exact, preserve:sz + @use (%9) + // succs: b1 block b1: } )"));