mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 05:18:08 +02:00
radv/bvh: Fix calculating the vertex payload/prefix sizes
This calculation needs to happen in the same loop as the
geometry/triangle id calculations in case the selected invocation is
before all invocations that were already selected.
Totals from 1269 (15.10% of 8406) affected BVHs:
compacted_size: 137581888 -> 137606464 (+0.02%); split: -0.08%, +0.10%
sah: 6496048424 -> 6496048450 (+0.00%); split: -0.00%, +0.00%
primitive_node_count: 604384 -> 604656 (+0.05%); split: -0.14%, +0.19%
Fixes: c18a7d0 ("radv: Emit compressed primitive nodes on GFX12")
Reviewed-by: Natalie Vock <natalie.vock@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38462>
This commit is contained in:
parent
3a3810647e
commit
2749b5b713
1 changed files with 19 additions and 29 deletions
|
|
@ -223,7 +223,24 @@ main()
|
|||
uint32_t triangle_id_payload_bit_size;
|
||||
uint32_t geometry_id_base_bit_size;
|
||||
uint32_t geometry_id_payload_bit_size;
|
||||
uint32_t trailing_zero_bits;
|
||||
uvec3 vertex_payload_bit_size;
|
||||
for (uint32_t i = 0; i <= first_assigned_invocation; i++) {
|
||||
uvec3 vertex_prefix = radv_read_invocation(cluster, i, floatBitsToUint(vertices[0]));
|
||||
uvec3 vertex_payload_mask = uvec3(0);
|
||||
uint32_t vertex_non_zero_mask = 0;
|
||||
for (uint32_t i = 0; i < invocation_vertex_count; i++) {
|
||||
vertex_payload_mask |= vertex_prefix ^ floatBitsToUint(vertices[i]);
|
||||
vertex_non_zero_mask |=
|
||||
floatBitsToUint(vertices[i].x) | floatBitsToUint(vertices[i].y) | floatBitsToUint(vertices[i].z);
|
||||
}
|
||||
uint32_t invoc_trailing_zero_bits = min(findLSB(vertex_non_zero_mask), 32u);
|
||||
uvec3 invoc_vertex_payload_bit_size = min(findMSB(vertex_payload_mask), 31u) + 1;
|
||||
trailing_zero_bits =
|
||||
subgroupClusteredMin(assigned ? invoc_trailing_zero_bits : 32, RADV_TRIANGLE_ENCODE_TASK_INVOCATION_COUNT);
|
||||
vertex_payload_bit_size =
|
||||
subgroupClusteredMax(assigned ? invoc_vertex_payload_bit_size : uvec3(0), RADV_TRIANGLE_ENCODE_TASK_INVOCATION_COUNT);
|
||||
|
||||
/* Determine the number of bits required to represent the node ids in the hw's encoding format.
|
||||
* Base and "offset" are masked and OR'd together, so look at the highest-ordered differing bit.
|
||||
*/
|
||||
|
|
@ -242,6 +259,8 @@ main()
|
|||
RADV_TRIANGLE_ENCODE_TASK_INVOCATION_COUNT);
|
||||
|
||||
if (!assigned) {
|
||||
trailing_zero_bits = min(trailing_zero_bits, invoc_trailing_zero_bits);
|
||||
vertex_payload_bit_size = max(vertex_payload_bit_size, invoc_vertex_payload_bit_size);
|
||||
triangle_id_payload_bit_size = max(triangle_id_payload_bit_size, invoc_triangle_id_payload_bit_size);
|
||||
geometry_id_payload_bit_size = max(geometry_id_payload_bit_size, invoc_geometry_id_payload_bit_size);
|
||||
}
|
||||
|
|
@ -252,35 +271,6 @@ main()
|
|||
|
||||
geometry_id_payload_bit_size = align(geometry_id_payload_bit_size, 2);
|
||||
|
||||
/* vertex_used[0] is guaranteed to be true for at least one invocation. */
|
||||
uvec3 vertex_prefix = first_assigned_invocation == 0xffffffff
|
||||
? floatBitsToUint(vertices[0])
|
||||
: radv_read_invocation(cluster, first_assigned_invocation, floatBitsToUint(vertices[0]));
|
||||
uvec3 vertex_payload_mask = uvec3(0);
|
||||
uint32_t vertex_non_zero_mask = 0;
|
||||
for (uint32_t i = 0; i < invocation_vertex_count; i++) {
|
||||
vertex_payload_mask |= vertex_prefix ^ floatBitsToUint(vertices[i]);
|
||||
vertex_non_zero_mask |=
|
||||
floatBitsToUint(vertices[i].x) | floatBitsToUint(vertices[i].y) | floatBitsToUint(vertices[i].z);
|
||||
}
|
||||
|
||||
uint32_t trailing_zero_bits = min(findLSB(vertex_non_zero_mask), 32u);
|
||||
uvec3 vertex_payload_bit_size = min(findMSB(vertex_payload_mask), 31u) + 1;
|
||||
|
||||
if (!assigned) {
|
||||
trailing_zero_bits = 32;
|
||||
vertex_payload_bit_size = uvec3(0);
|
||||
}
|
||||
|
||||
trailing_zero_bits = subgroupClusteredMin(trailing_zero_bits, RADV_TRIANGLE_ENCODE_TASK_INVOCATION_COUNT);
|
||||
vertex_payload_bit_size =
|
||||
subgroupClusteredMax(vertex_payload_bit_size, RADV_TRIANGLE_ENCODE_TASK_INVOCATION_COUNT);
|
||||
|
||||
if (!assigned) {
|
||||
trailing_zero_bits = min(trailing_zero_bits, min(findLSB(vertex_non_zero_mask), 32u));
|
||||
vertex_payload_bit_size = max(vertex_payload_bit_size, min(findMSB(vertex_payload_mask), 31u) + 1);
|
||||
}
|
||||
|
||||
vertex_payload_bit_size.x =
|
||||
vertex_payload_bit_size.x > trailing_zero_bits ? vertex_payload_bit_size.x - trailing_zero_bits : 1;
|
||||
vertex_payload_bit_size.y =
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue