mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 13:38:06 +02:00
nir/opt_large_constants: handle floating point power of two fractions
Foz-DB Navi48: Totals from 365 (0.32% of 114655) affected shaders: MaxWaves: 10020 -> 10016 (-0.04%) Instrs: 486252 -> 486097 (-0.03%); split: -0.21%, +0.18% CodeSize: 2629536 -> 2628452 (-0.04%); split: -0.19%, +0.14% VGPRs: 19884 -> 19896 (+0.06%); split: -0.06%, +0.12% SpillSGPRs: 210 -> 212 (+0.95%) Latency: 3818610 -> 3765549 (-1.39%); split: -1.50%, +0.11% InvThroughput: 598445 -> 596281 (-0.36%); split: -0.58%, +0.22% VClause: 10053 -> 9698 (-3.53%); split: -3.54%, +0.01% SClause: 17548 -> 17334 (-1.22%); split: -1.24%, +0.02% Copies: 43196 -> 42249 (-2.19%); split: -2.34%, +0.14% Branches: 16695 -> 16628 (-0.40%); split: -0.47%, +0.07% PreSGPRs: 17988 -> 17971 (-0.09%) PreVGPRs: 13552 -> 13520 (-0.24%) VALU: 244842 -> 246611 (+0.72%); split: -0.02%, +0.74% SALU: 79163 -> 77778 (-1.75%); split: -2.05%, +0.30% VMEM: 13468 -> 13084 (-2.85%) SMEM: 23571 -> 23393 (-0.76%) VOPD: 8384 -> 8372 (-0.14%) Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33002>
This commit is contained in:
parent
372c1a23dc
commit
5f37788ae9
2 changed files with 62 additions and 21 deletions
|
|
@ -115,6 +115,7 @@ struct small_constant {
|
|||
int64_t min;
|
||||
uint32_t bit_size;
|
||||
bool is_float;
|
||||
uint32_t denom;
|
||||
uint32_t bit_stride;
|
||||
};
|
||||
|
||||
|
|
@ -232,24 +233,54 @@ get_small_constant_component(struct small_constant *info, uint32_t array_len,
|
|||
int64_t min = INT64_MAX;
|
||||
|
||||
bool is_float = true;
|
||||
uint32_t denom = 1;
|
||||
if (bit_size < 16) {
|
||||
is_float = false;
|
||||
} else {
|
||||
for (unsigned i = 0; i < array_len; i++) {
|
||||
/* See if it's an easily convertible float.
|
||||
* TODO: Compute greatest common divisor to support non-integer floats.
|
||||
*/
|
||||
double float_value = nir_const_value_as_float(values[i], bit_size);
|
||||
if (fabs(float_value) > NIR_SMALL_CONSTANT_MAX_ABS_VALUE) {
|
||||
is_float = false;
|
||||
break;
|
||||
}
|
||||
|
||||
int64_t int_value = float_value;
|
||||
nir_const_value fc = nir_const_value_for_float(int_value, bit_size);
|
||||
is_float &= !memcmp(&fc, &values[i], bit_size / 8);
|
||||
/* Try out small denominators. Handling large denominators is not worth it
|
||||
* because the numerators will be large in that case, making it unlikely that
|
||||
* they will fit into 64 bits.
|
||||
* Limit to power of two for now, to avoid any rounding issues.
|
||||
*/
|
||||
uint32_t value_denom = 0;
|
||||
for (uint32_t candidate_denom = 1; candidate_denom <= 16; candidate_denom *= 2) {
|
||||
double expanded = float_value * candidate_denom;
|
||||
if (floor(expanded) * (1.0f / (float)candidate_denom) == float_value) {
|
||||
value_denom = candidate_denom;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
min = MIN2(min, int_value);
|
||||
if (!value_denom) {
|
||||
denom = 0;
|
||||
break;
|
||||
} else {
|
||||
denom = MAX2(denom, value_denom);
|
||||
}
|
||||
}
|
||||
|
||||
if (denom) {
|
||||
for (unsigned i = 0; i < array_len; i++) {
|
||||
double fp_val = nir_const_value_as_float(values[i], bit_size) * denom;
|
||||
/* quantize to target precision */
|
||||
fp_val = nir_const_value_as_float(nir_const_value_for_float(fp_val, bit_size), bit_size);
|
||||
|
||||
int64_t int_value = (int64_t)fp_val;
|
||||
|
||||
nir_const_value fc = nir_const_value_for_float(int_value * (1.0f / (float)denom), bit_size);
|
||||
is_float &= !memcmp(&fc, &values[i], bit_size / 8);
|
||||
|
||||
min = MIN2(min, int_value);
|
||||
}
|
||||
} else {
|
||||
is_float = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -268,7 +299,7 @@ get_small_constant_component(struct small_constant *info, uint32_t array_len,
|
|||
int64_t i64_elem;
|
||||
|
||||
if (is_float)
|
||||
i64_elem = nir_const_value_as_float(values[i], bit_size);
|
||||
i64_elem = nir_const_value_as_float(values[i], bit_size) * denom;
|
||||
else if (bit_size == 1)
|
||||
i64_elem = nir_const_value_as_uint(values[i], bit_size);
|
||||
else
|
||||
|
|
@ -294,7 +325,7 @@ get_small_constant_component(struct small_constant *info, uint32_t array_len,
|
|||
int64_t i64_elem;
|
||||
|
||||
if (is_float)
|
||||
i64_elem = nir_const_value_as_float(values[i], bit_size);
|
||||
i64_elem = nir_const_value_as_float(values[i], bit_size) * denom;
|
||||
else if (bit_size == 1)
|
||||
i64_elem = nir_const_value_as_uint(values[i], bit_size);
|
||||
else
|
||||
|
|
@ -311,6 +342,7 @@ get_small_constant_component(struct small_constant *info, uint32_t array_len,
|
|||
info->bit_size = MAX2(util_next_power_of_two(used_bits * array_len), 32);
|
||||
info->min = min;
|
||||
info->is_float = is_float;
|
||||
info->denom = denom;
|
||||
info->bit_stride = used_bits;
|
||||
return true;
|
||||
}
|
||||
|
|
@ -397,6 +429,9 @@ build_small_constant_load(nir_builder *b, nir_deref_instr *deref,
|
|||
ret[c] = nir_u2fN(b, ret[c], bit_size);
|
||||
else
|
||||
ret[c] = nir_i2fN(b, ret[c], bit_size);
|
||||
|
||||
if (constant->denom != 1)
|
||||
ret[c] = nir_fmul_imm(b, ret[c], 1.0f / (float)constant->denom);
|
||||
} else {
|
||||
ret[c] = nir_u2uN(b, ret[c], bit_size);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -299,9 +299,9 @@ TEST_F(nir_large_constants_test, small_fraction_array)
|
|||
int32_t length = 8;
|
||||
array = nir_local_variable_create(b->impl, glsl_array_type(glsl_float_type(), length, 0), "array");
|
||||
for (int32_t i = 0; i < length / 2; i++)
|
||||
nir_store_array_var_imm(b, array, i, nir_imm_float(b, i / 2.0 - 2), 0x1);
|
||||
nir_store_array_var_imm(b, array, i, nir_imm_float(b, i + 2.25), 0x1);
|
||||
for (int32_t i = length / 2; i < length; i++)
|
||||
nir_store_array_var_imm(b, array, i, nir_imm_float(b, (i - length / 2) / 3.0), 0x1);
|
||||
nir_store_array_var_imm(b, array, i, nir_imm_float(b, (i - length / 2) + 0.5), 0x1);
|
||||
|
||||
run_test();
|
||||
|
||||
|
|
@ -311,19 +311,25 @@ TEST_F(nir_large_constants_test, small_fraction_array)
|
|||
workgroup_size: 1, 1, 1
|
||||
max_subgroup_size: 128
|
||||
min_subgroup_size: 1
|
||||
constants: 32
|
||||
decl_function main () (entrypoint)
|
||||
|
||||
impl main {
|
||||
block b0: // preds:
|
||||
32 %0 = @load_workgroup_index
|
||||
32 %1 = load_const (0x00000000)
|
||||
32 %2 = load_const (0x00000002)
|
||||
32 %3 = ishl %0, %2 (0x2)
|
||||
32 %4 = iadd %1 (0x0), %3
|
||||
32 %5 = @load_constant (%4) (base=0, range=32, access=none, align_mul=4, align_offset=0)
|
||||
@use (%5)
|
||||
// succs: b1
|
||||
block b0: // preds:
|
||||
32 %0 = @load_workgroup_index
|
||||
64 %1 = load_const (0x0c080400130f0b07 = 866947326635084551)
|
||||
32 %2 = load_const (0x00000003)
|
||||
32 %3 = ishl %0, %2 (0x3)
|
||||
64 %4 = ushr %1 (0xc080400130f0b07), %3
|
||||
64 %5 = load_const (0x00000000000000ff = 255)
|
||||
64 %6 = iand %4, %5 (0xff)
|
||||
32 %7 = unpack_64_2x32_split_x %6
|
||||
32 %8 = load_const (0x00000002)
|
||||
32 %9 = iadd %7, %8 (0x2)
|
||||
32 %10 = u2f32 %9 // exact, preserve:sz
|
||||
32 %11 = load_const (0x3e800000 = 0.250000)
|
||||
32 %12 = fmul %10, %11 (0.250000) // exact, preserve:sz
|
||||
@use (%12)
|
||||
// succs: b1
|
||||
block b1:
|
||||
}
|
||||
)"));
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue