From 5f37788ae9a8839b97984dba918aa1636463a6d2 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Wed, 18 Mar 2026 15:05:56 +0100 Subject: [PATCH] nir/opt_large_constants: handle floating point power of two fractions Foz-DB Navi48: Totals from 365 (0.32% of 114655) affected shaders: MaxWaves: 10020 -> 10016 (-0.04%) Instrs: 486252 -> 486097 (-0.03%); split: -0.21%, +0.18% CodeSize: 2629536 -> 2628452 (-0.04%); split: -0.19%, +0.14% VGPRs: 19884 -> 19896 (+0.06%); split: -0.06%, +0.12% SpillSGPRs: 210 -> 212 (+0.95%) Latency: 3818610 -> 3765549 (-1.39%); split: -1.50%, +0.11% InvThroughput: 598445 -> 596281 (-0.36%); split: -0.58%, +0.22% VClause: 10053 -> 9698 (-3.53%); split: -3.54%, +0.01% SClause: 17548 -> 17334 (-1.22%); split: -1.24%, +0.02% Copies: 43196 -> 42249 (-2.19%); split: -2.34%, +0.14% Branches: 16695 -> 16628 (-0.40%); split: -0.47%, +0.07% PreSGPRs: 17988 -> 17971 (-0.09%) PreVGPRs: 13552 -> 13520 (-0.24%) VALU: 244842 -> 246611 (+0.72%); split: -0.02%, +0.74% SALU: 79163 -> 77778 (-1.75%); split: -2.05%, +0.30% VMEM: 13468 -> 13084 (-2.85%) SMEM: 23571 -> 23393 (-0.76%) VOPD: 8384 -> 8372 (-0.14%) Reviewed-by: Alyssa Rosenzweig Part-of: --- src/compiler/nir/nir_opt_large_constants.c | 53 +++++++++++++++---- .../nir/tests/opt_large_constants_tests.cpp | 30 ++++++----- 2 files changed, 62 insertions(+), 21 deletions(-) diff --git a/src/compiler/nir/nir_opt_large_constants.c b/src/compiler/nir/nir_opt_large_constants.c index 771dfdb2010..e544ba3ab93 100644 --- a/src/compiler/nir/nir_opt_large_constants.c +++ b/src/compiler/nir/nir_opt_large_constants.c @@ -115,6 +115,7 @@ struct small_constant { int64_t min; uint32_t bit_size; bool is_float; + uint32_t denom; uint32_t bit_stride; }; @@ -232,24 +233,54 @@ get_small_constant_component(struct small_constant *info, uint32_t array_len, int64_t min = INT64_MAX; bool is_float = true; + uint32_t denom = 1; if (bit_size < 16) { is_float = false; } else { for (unsigned i = 0; i < array_len; i++) { - /* See if it's an easily convertible float. - * TODO: Compute greatest common divisor to support non-integer floats. - */ double float_value = nir_const_value_as_float(values[i], bit_size); if (fabs(float_value) > NIR_SMALL_CONSTANT_MAX_ABS_VALUE) { is_float = false; break; } - int64_t int_value = float_value; - nir_const_value fc = nir_const_value_for_float(int_value, bit_size); - is_float &= !memcmp(&fc, &values[i], bit_size / 8); + /* Try out small denominators. Handling large denominators is not worth it + * because the numerators will be large in that case, making it unlikely that + * they will fit into 64 bits. + * Limit to power of two for now, to avoid any rounding issues. + */ + uint32_t value_denom = 0; + for (uint32_t candidate_denom = 1; candidate_denom <= 16; candidate_denom *= 2) { + double expanded = float_value * candidate_denom; + if (floor(expanded) * (1.0f / (float)candidate_denom) == float_value) { + value_denom = candidate_denom; + break; + } + } - min = MIN2(min, int_value); + if (!value_denom) { + denom = 0; + break; + } else { + denom = MAX2(denom, value_denom); + } + } + + if (denom) { + for (unsigned i = 0; i < array_len; i++) { + double fp_val = nir_const_value_as_float(values[i], bit_size) * denom; + /* quantize to target precision */ + fp_val = nir_const_value_as_float(nir_const_value_for_float(fp_val, bit_size), bit_size); + + int64_t int_value = (int64_t)fp_val; + + nir_const_value fc = nir_const_value_for_float(int_value * (1.0f / (float)denom), bit_size); + is_float &= !memcmp(&fc, &values[i], bit_size / 8); + + min = MIN2(min, int_value); + } + } else { + is_float = false; } } @@ -268,7 +299,7 @@ get_small_constant_component(struct small_constant *info, uint32_t array_len, int64_t i64_elem; if (is_float) - i64_elem = nir_const_value_as_float(values[i], bit_size); + i64_elem = nir_const_value_as_float(values[i], bit_size) * denom; else if (bit_size == 1) i64_elem = nir_const_value_as_uint(values[i], bit_size); else @@ -294,7 +325,7 @@ get_small_constant_component(struct small_constant *info, uint32_t array_len, int64_t i64_elem; if (is_float) - i64_elem = nir_const_value_as_float(values[i], bit_size); + i64_elem = nir_const_value_as_float(values[i], bit_size) * denom; else if (bit_size == 1) i64_elem = nir_const_value_as_uint(values[i], bit_size); else @@ -311,6 +342,7 @@ get_small_constant_component(struct small_constant *info, uint32_t array_len, info->bit_size = MAX2(util_next_power_of_two(used_bits * array_len), 32); info->min = min; info->is_float = is_float; + info->denom = denom; info->bit_stride = used_bits; return true; } @@ -397,6 +429,9 @@ build_small_constant_load(nir_builder *b, nir_deref_instr *deref, ret[c] = nir_u2fN(b, ret[c], bit_size); else ret[c] = nir_i2fN(b, ret[c], bit_size); + + if (constant->denom != 1) + ret[c] = nir_fmul_imm(b, ret[c], 1.0f / (float)constant->denom); } else { ret[c] = nir_u2uN(b, ret[c], bit_size); } diff --git a/src/compiler/nir/tests/opt_large_constants_tests.cpp b/src/compiler/nir/tests/opt_large_constants_tests.cpp index 3c4b60e84ab..5fd613a99db 100644 --- a/src/compiler/nir/tests/opt_large_constants_tests.cpp +++ b/src/compiler/nir/tests/opt_large_constants_tests.cpp @@ -299,9 +299,9 @@ TEST_F(nir_large_constants_test, small_fraction_array) int32_t length = 8; array = nir_local_variable_create(b->impl, glsl_array_type(glsl_float_type(), length, 0), "array"); for (int32_t i = 0; i < length / 2; i++) - nir_store_array_var_imm(b, array, i, nir_imm_float(b, i / 2.0 - 2), 0x1); + nir_store_array_var_imm(b, array, i, nir_imm_float(b, i + 2.25), 0x1); for (int32_t i = length / 2; i < length; i++) - nir_store_array_var_imm(b, array, i, nir_imm_float(b, (i - length / 2) / 3.0), 0x1); + nir_store_array_var_imm(b, array, i, nir_imm_float(b, (i - length / 2) + 0.5), 0x1); run_test(); @@ -311,19 +311,25 @@ TEST_F(nir_large_constants_test, small_fraction_array) workgroup_size: 1, 1, 1 max_subgroup_size: 128 min_subgroup_size: 1 - constants: 32 decl_function main () (entrypoint) impl main { - block b0: // preds: - 32 %0 = @load_workgroup_index - 32 %1 = load_const (0x00000000) - 32 %2 = load_const (0x00000002) - 32 %3 = ishl %0, %2 (0x2) - 32 %4 = iadd %1 (0x0), %3 - 32 %5 = @load_constant (%4) (base=0, range=32, access=none, align_mul=4, align_offset=0) - @use (%5) - // succs: b1 + block b0: // preds: + 32 %0 = @load_workgroup_index + 64 %1 = load_const (0x0c080400130f0b07 = 866947326635084551) + 32 %2 = load_const (0x00000003) + 32 %3 = ishl %0, %2 (0x3) + 64 %4 = ushr %1 (0xc080400130f0b07), %3 + 64 %5 = load_const (0x00000000000000ff = 255) + 64 %6 = iand %4, %5 (0xff) + 32 %7 = unpack_64_2x32_split_x %6 + 32 %8 = load_const (0x00000002) + 32 %9 = iadd %7, %8 (0x2) + 32 %10 = u2f32 %9 // exact, preserve:sz + 32 %11 = load_const (0x3e800000 = 0.250000) + 32 %12 = fmul %10, %11 (0.250000) // exact, preserve:sz + @use (%12) + // succs: b1 block b1: } )"));