mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-07 21:40:20 +01:00
nir/opt_vectorize_io: don't vectorize 16-bit IO to vec8 - it's illegal
NIR represents low bits of 16-bit IO as a separate vec4, and high bits as another separate vec4. There is no representation that allows vec8. Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35315>
This commit is contained in:
parent
1f80ff5550
commit
caddd67b8c
1 changed files with 26 additions and 4 deletions
|
|
@ -127,6 +127,7 @@ compare_intr(const void *xa, const void *xb)
|
|||
|
||||
typedef enum {
|
||||
merge_low_high_16_to_32,
|
||||
vectorize_high_16_separately,
|
||||
vectorize_the_rest,
|
||||
} nir_vectorize_op_step;
|
||||
|
||||
|
|
@ -157,16 +158,26 @@ vectorize_load(nir_intrinsic_instr *chan[8], unsigned start, unsigned count,
|
|||
memcpy(new_intr->src, first->src,
|
||||
nir_intrinsic_infos[first->intrinsic].num_srcs * sizeof(nir_src));
|
||||
nir_intrinsic_copy_const_indices(new_intr, first);
|
||||
nir_intrinsic_set_component(new_intr, start);
|
||||
nir_intrinsic_set_component(new_intr, start & 0x3); /* Bits 4..7 should map to 0..3 */
|
||||
assert(start % 4 + count <= 4);
|
||||
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(new_intr);
|
||||
|
||||
if (step == vectorize_high_16_separately) {
|
||||
assert(start >= 4);
|
||||
sem.high_16bits = 1;
|
||||
} else {
|
||||
assert(start <= 3);
|
||||
}
|
||||
|
||||
if (step == merge_low_high_16_to_32) {
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(new_intr);
|
||||
sem.high_16bits = 0;
|
||||
nir_intrinsic_set_io_semantics(new_intr, sem);
|
||||
nir_intrinsic_set_dest_type(new_intr,
|
||||
(nir_intrinsic_dest_type(new_intr) & ~16) | 32);
|
||||
}
|
||||
|
||||
nir_intrinsic_set_io_semantics(new_intr, sem);
|
||||
|
||||
nir_builder_instr_insert(&b, &new_intr->instr);
|
||||
nir_def *def = &new_intr->def;
|
||||
|
||||
|
|
@ -260,6 +271,13 @@ vectorize_store(nir_intrinsic_instr *chan[8], unsigned start, unsigned count,
|
|||
nir_io_semantics sem = nir_intrinsic_io_semantics(last);
|
||||
sem.gs_streams = gs_streams;
|
||||
|
||||
if (step == vectorize_high_16_separately) {
|
||||
assert(start >= 4);
|
||||
sem.high_16bits = 1;
|
||||
} else {
|
||||
assert(start <= 3);
|
||||
}
|
||||
|
||||
/* Update other flags. */
|
||||
for (unsigned i = start; i < start + count; i++) {
|
||||
if (!nir_intrinsic_io_semantics(chan[i]).no_sysval_output)
|
||||
|
|
@ -287,7 +305,8 @@ vectorize_store(nir_intrinsic_instr *chan[8], unsigned start, unsigned count,
|
|||
|
||||
/* Update the rest. */
|
||||
nir_intrinsic_set_io_semantics(last, sem);
|
||||
nir_intrinsic_set_component(last, start);
|
||||
nir_intrinsic_set_component(last, start & 0x3); /* Bits 4..7 should map to 0..3 */
|
||||
assert(start % 4 + count <= 4);
|
||||
nir_intrinsic_set_write_mask(last, BITFIELD_MASK(count));
|
||||
last->num_components = count;
|
||||
|
||||
|
|
@ -368,6 +387,9 @@ vectorize_slot(nir_intrinsic_instr *chan[8], unsigned mask)
|
|||
mask &= ~low_high_bits;
|
||||
}
|
||||
}
|
||||
} else if (step == vectorize_high_16_separately) {
|
||||
scan_mask = mask & BITFIELD_RANGE(4, 4);
|
||||
mask &= ~scan_mask;
|
||||
} else {
|
||||
scan_mask = mask;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue