mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
nir/lower_int64: replace uadd_sat with ior for find_lsb64 and ufind_msb64
Using ior here is equivalent to using uadd_sat, but works for every driver and shouldn't hurt anywhere. I forgot to fix this up when fixing up some vvl errors with zink. Fixes crashes with the integer_ctz CL CTS tests in zink. Fixes:39ec184db6("zink: lower 64 bit find_lsb, ufind_msb and bit_count") Reviewed-by: Karol Herbst <kherbst@redhat.com> Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30535> (cherry picked from commit48acf9d358)
This commit is contained in:
parent
f3f0b87bf8
commit
ff7759cdfe
2 changed files with 10 additions and 23 deletions
|
|
@ -2204,7 +2204,7 @@
|
|||
"description": "nir/lower_int64: replace uadd_sat with ior for find_lsb64 and ufind_msb64",
|
||||
"nominated": true,
|
||||
"nomination_type": 1,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": "39ec184db6e9d614fd540b89e9cba4e6cb77de50",
|
||||
"notes": null
|
||||
|
|
|
|||
|
|
@ -683,24 +683,13 @@ lower_ufind_msb64(nir_builder *b, nir_def *x)
|
|||
nir_def *lo_count = nir_ufind_msb(b, x_lo);
|
||||
nir_def *hi_count = nir_ufind_msb(b, x_hi);
|
||||
|
||||
if (b->shader->options->lower_uadd_sat) {
|
||||
nir_def *valid_hi_bits = nir_ine_imm(b, x_hi, 0);
|
||||
nir_def *hi_res = nir_iadd_imm(b, hi_count, 32);
|
||||
return nir_bcsel(b, valid_hi_bits, hi_res, lo_count);
|
||||
} else {
|
||||
/* If hi_count was -1, it will still be -1 after this uadd_sat. As a
|
||||
* result, hi_count is either -1 or the correct return value for 64-bit
|
||||
* ufind_msb.
|
||||
*/
|
||||
nir_def *hi_res = nir_uadd_sat(b, nir_imm_intN_t(b, 32, 32), hi_count);
|
||||
|
||||
/* hi_res is either -1 or a value in the range [63, 32]. lo_count is
|
||||
* either -1 or a value in the range [31, 0]. The imax will pick
|
||||
* lo_count only when hi_res is -1. In those cases, lo_count is
|
||||
* guaranteed to be the correct answer.
|
||||
*/
|
||||
return nir_imax(b, hi_res, lo_count);
|
||||
}
|
||||
/* hi_count is either -1 or a value in the range [31, 0]. lo_count is
|
||||
* the same. The imax will pick lo_count only when hi_count is -1. In those
|
||||
* cases, lo_count is guaranteed to be the correct answer.
|
||||
* The ior 32 is always safe here as with -1 the value won't change,
|
||||
* otherwise it adds 32, which is what we want anyway.
|
||||
*/
|
||||
return nir_imax(b, lo_count, nir_ior_imm(b, hi_count, 32));
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
|
|
@ -713,11 +702,9 @@ lower_find_lsb64(nir_builder *b, nir_def *x)
|
|||
|
||||
/* Use umin so that -1 (no bits found) becomes larger (0xFFFFFFFF)
|
||||
* than any actual bit position, so we return a found bit instead.
|
||||
* This is similar to the ufind_msb lowering. If you need this lowering
|
||||
* without uadd_sat, add code like in lower_ufind_msb64.
|
||||
* This is similar to the ufind_msb lowering.
|
||||
*/
|
||||
assert(!b->shader->options->lower_uadd_sat);
|
||||
return nir_umin(b, lo_lsb, nir_uadd_sat(b, hi_lsb, nir_imm_int(b, 32)));
|
||||
return nir_umin(b, lo_lsb, nir_ior_imm(b, hi_lsb, 32));
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue