mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 07:28:11 +02:00
nir/opt_varyings: fix compaction with sparse indirect FS inputs
Without this, compaction can put inputs into vec4 slots already occupied by indirectly-accessed inputs while ignoring their interpolation qualifier, which is incorrect. All input components sharing the same vec4 slot must use interpolation qualifiers that are compatible with each other. Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32424>
This commit is contained in:
parent
b01f3cea7a
commit
1aa9fec542
1 changed files with 32 additions and 13 deletions
|
|
@ -4222,19 +4222,6 @@ fs_assign_slots(struct linkage_info *linkage,
|
|||
continue;
|
||||
}
|
||||
|
||||
/* Copy the FS vec4 type if indexed indirectly, and move to
|
||||
* the next slot.
|
||||
*/
|
||||
if (BITSET_TEST32(linkage->indirect_mask, slot_index)) {
|
||||
if (assigned_fs_vec4_type) {
|
||||
assigned_fs_vec4_type[vec4_slot(slot_index)] =
|
||||
linkage->fs_vec4_type[vec4_slot(slot_index)];
|
||||
}
|
||||
assert(slot_index % 2 == 0);
|
||||
slot_index += 2; /* increment by 32 bits */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* This slot is already assigned (assigned_mask is set). Move to
|
||||
* the next one.
|
||||
*/
|
||||
|
|
@ -4559,6 +4546,38 @@ compact_varyings(struct linkage_info *linkage,
|
|||
BITSET_DECLARE(assigned_mask, NUM_SCALAR_SLOTS);
|
||||
BITSET_ZERO(assigned_mask);
|
||||
|
||||
/* Iterate over all indirectly accessed inputs and set the assigned vec4
|
||||
* type of each occupied slot to the vec4 type of indirect inputs, so
|
||||
* that compaction doesn't put inputs of a different vec4 type in
|
||||
* the same vec4.
|
||||
*
|
||||
* We don't try to compact indirect input arrays, though we could.
|
||||
*/
|
||||
unsigned i;
|
||||
BITSET_FOREACH_SET(i, linkage->indirect_mask, NUM_SCALAR_SLOTS) {
|
||||
struct scalar_slot *slot = &linkage->slot[i];
|
||||
|
||||
/* The slot of the first array element contains all loads for all
|
||||
* elements, including all direct accesses, while all other array
|
||||
* elements are empty (on purpose).
|
||||
*/
|
||||
if (list_is_empty(&linkage->slot[i].consumer.loads))
|
||||
continue;
|
||||
|
||||
assert(slot->num_slots >= 2);
|
||||
|
||||
for (unsigned array_index = 0; array_index < slot->num_slots;
|
||||
array_index++) {
|
||||
unsigned vec4_index = vec4_slot(i) + array_index;
|
||||
unsigned scalar_index = i + array_index * 8;
|
||||
assigned_fs_vec4_type[vec4_index] = linkage->fs_vec4_type[vec4_index];
|
||||
/* Indirectly-indexed slots are marked to always occupy 32 bits
|
||||
* (2 16-bit slots), though we waste the high 16 bits if they are unused.
|
||||
*/
|
||||
BITSET_SET_RANGE_INSIDE_WORD(assigned_mask, scalar_index, scalar_index + 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (linkage->has_flexible_interp) {
|
||||
/* This codepath packs convergent varyings with both interpolated and
|
||||
* flat, whichever has free space.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue