ac/lower_ngg: Fix collecting buffer offsets from 4 lanes on gfx12

Also use readlane for improved performance.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32743>
This commit is contained in:
Daniel Schürmann 2024-12-19 11:45:02 +01:00 committed by Marge Bot
parent ed58b869e1
commit 39dcd9dedb

View file

@ -2144,14 +2144,9 @@ ngg_build_streamout_buffer_info(nir_builder *b,
nir_def *offset[4] = {undef, undef, undef, undef};
for (unsigned buffer = 0; buffer < 4; buffer++) {
if (info->buffers_written & BITFIELD_BIT(buffer)) {
if (!buffer) {
offset[buffer] = buffer_offset_per_lane;
} else {
offset[buffer] = nir_quad_swizzle_amd(b, buffer_offset_per_lane,
.swizzle_mask = BITFIELD_BIT(buffer));
}
}
if (info->buffers_written & BITFIELD_BIT(buffer))
offset[buffer] = nir_read_invocation(b, buffer_offset_per_lane,
nir_imm_int(b, buffer));
}
buffer_offsets = nir_vec(b, offset, 4);
}