util: Add power-of-two divisor support to compute_fast_udiv_info

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
Marek Olšák 2018-10-05 20:42:16 -05:00 committed by Jason Ekstrand
parent 7cde4dbcd7
commit a9be8dddfe
2 changed files with 23 additions and 2 deletions

View file

@ -52,6 +52,27 @@ util_compute_fast_udiv_info(uint64_t D, unsigned num_bits, unsigned UINT_BITS)
/* The eventual result */
struct util_fast_udiv_info result;
if (util_is_power_of_two_or_zero64(D)) {
unsigned div_shift = util_logbase2_64(D);
if (div_shift) {
/* Dividing by a power of two. */
result.multiplier = 1ull << (UINT_BITS - div_shift);
result.pre_shift = 0;
result.post_shift = 0;
result.increment = 0;
return result;
} else {
/* Dividing by 1. */
/* Assuming: floor((num + 1) * (2^32 - 1) / 2^32) = num */
result.multiplier = UINT_BITS == 64 ? UINT64_MAX :
(1ull << UINT_BITS) - 1;
result.pre_shift = 0;
result.post_shift = 0;
result.increment = 1;
return result;
}
}
/* The extra shift implicit in the difference between UINT_BITS and num_bits
*/

View file

@ -98,8 +98,8 @@ util_compute_fast_sdiv_info(int64_t D, unsigned SINT_BITS);
* emit("result >>>= UINT_BITS")
* if m.post_shift > 0: emit("result >>>= m.post_shift")
*
* The shifts by UINT_BITS may be "free" if the high half of the full multiply
* is put in a separate register.
* This second version works even if D is 1. The shifts by UINT_BITS may be
* "free" if the high half of the full multiply is put in a separate register.
*
* saturated_increment(n) means "increment n unless it would wrap to 0," i.e.
* if n == (1 << UINT_BITS)-1: result = n