mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 18:18:06 +02:00
nir/lower_int64: Fix [iu]mul_high handling
e551040c60, which added a new mechanism for 64-bit imul which is more efficient on BDW and later Intel hardware also introduced a bug where we weren't properly walking both X and Y. No idea how testing didn't find this. Fixes:e551040c60("nir/glsl: Add another way of doing lower_imul64 for gen8+" Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6306 Reviewed-by: Matt Turner <mattst88@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15829> (cherry picked from commitd0ace28790)
This commit is contained in:
parent
5ab0185c2a
commit
c1346ca615
2 changed files with 3 additions and 3 deletions
|
|
@ -2484,7 +2484,7 @@
|
|||
"description": "nir/lower_int64: Fix [iu]mul_high handling",
|
||||
"nominated": true,
|
||||
"nomination_type": 1,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"because_sha": "e551040c602d392019e68f54d9a3a310d2a937a3"
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -455,7 +455,7 @@ lower_mul_high64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y,
|
|||
for (unsigned i = 0; i < 4; i++) {
|
||||
nir_ssa_def *carry = NULL;
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
/* The maximum values of x32[i] and y32[i] are UINT32_MAX so the
|
||||
/* The maximum values of x32[i] and y32[j] are UINT32_MAX so the
|
||||
* maximum value of tmp is UINT32_MAX * UINT32_MAX. The maximum
|
||||
* value that will fit in tmp is
|
||||
*
|
||||
|
|
@ -466,7 +466,7 @@ lower_mul_high64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y,
|
|||
* so we're guaranteed that we can add in two more 32-bit values
|
||||
* without overflowing tmp.
|
||||
*/
|
||||
nir_ssa_def *tmp = nir_umul_2x32_64(b, x32[i], y32[i]);
|
||||
nir_ssa_def *tmp = nir_umul_2x32_64(b, x32[i], y32[j]);
|
||||
|
||||
if (res[i + j])
|
||||
tmp = nir_iadd(b, tmp, nir_u2u64(b, res[i + j]));
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue