zink: scalarize when rewriting explicit 64bit io

all of ntv requires scalarized io since the offsets are now array indices
instead of byte offsets, so enforce scalarization here to avoid breaking
the universe

Fixes: 150d6ee97e ("zink: move all 64-32bit shader load rewriting to nir pass")

Reviewed-by: Adam Jackson <ajax@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16669>
(cherry picked from commit cdaa601de3)
This commit is contained in:
Mike Blumenkrantz 2022-05-26 17:27:39 -04:00 committed by Dylan Baker
parent 692375bf9d
commit 594dc7af9a
2 changed files with 18 additions and 14 deletions

View file

@ -850,7 +850,7 @@
"description": "zink: scalarize when rewriting explicit 64bit io",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "150d6ee97e374b5f520fc1ec3817a8a09c4b80fc"
},

View file

@ -888,14 +888,16 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
/* this is always scalarized */
assert(intr->dest.ssa.num_components == 1);
/* rewrite as 2x32 */
nir_ssa_def *load;
if (intr->intrinsic == nir_intrinsic_load_ssbo)
load = nir_load_ssbo(b, 2, 32, intr->src[0].ssa, intr->src[1].ssa, .align_mul = 4, .align_offset = 0);
else
load = nir_load_ubo(b, 2, 32, intr->src[0].ssa, intr->src[1].ssa, .align_mul = 4, .align_offset = 0, .range = 4);
nir_intrinsic_set_access(nir_instr_as_intrinsic(load->parent_instr), nir_intrinsic_access(intr));
nir_ssa_def *load[2];
for (unsigned i = 0; i < 2; i++) {
if (intr->intrinsic == nir_intrinsic_load_ssbo)
load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
else
load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
}
/* cast back to 64bit */
nir_ssa_def *casted = nir_pack_64_2x32(b, load);
nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
nir_instr_remove(instr);
}
@ -909,9 +911,11 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
/* this is always scalarized */
assert(intr->dest.ssa.num_components == 1);
/* rewrite as 2x32 */
nir_ssa_def *load = nir_load_shared(b, 2, 32, intr->src[0].ssa, .align_mul = 4, .align_offset = 0);
nir_ssa_def *load[2];
for (unsigned i = 0; i < 2; i++)
load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
/* cast back to 64bit */
nir_ssa_def *casted = nir_pack_64_2x32(b, load);
nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
nir_instr_remove(instr);
return true;
@ -926,8 +930,8 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
assert(intr->src[0].ssa->num_components == 1);
/* cast to 32bit: nir_unpack_64_2x32 not supported by ntv */
nir_ssa_def *casted = nir_vec2(b, nir_u2u32(b, intr->src[0].ssa), nir_u2u32(b, nir_ushr_imm(b, intr->src[0].ssa, 32)));
/* rewrite as 2x32 */
nir_store_ssbo(b, casted, intr->src[1].ssa, intr->src[2].ssa, .align_mul = 4, .align_offset = 0);
for (unsigned i = 0; i < 2; i++)
nir_store_ssbo(b, nir_channel(b, casted, i), intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
nir_instr_remove(instr);
}
return true;
@ -940,8 +944,8 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
assert(intr->src[0].ssa->num_components == 1);
/* cast to 32bit: nir_unpack_64_2x32 not supported by ntv */
nir_ssa_def *casted = nir_vec2(b, nir_u2u32(b, intr->src[0].ssa), nir_u2u32(b, nir_ushr_imm(b, intr->src[0].ssa, 32)));
/* rewrite as 2x32 */
nir_store_shared(b, casted, intr->src[1].ssa, .align_mul = 4, .align_offset = 0);
for (unsigned i = 0; i < 2; i++)
nir_store_shared(b, nir_channel(b, casted, i), nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
nir_instr_remove(instr);
}
return true;