tu: Don't keep track of acceleration structure sizes

We don't actually need to put the size of the AS in the descriptor, as
the blob is doing, because we don't need to rely on bounds checking as
the index is controlled by the driver. The only chance to get a
"garbage" index is if ray query functions are called in the wrong order,
which is already undefined behavior. By just always using the maximum
size, we can stop tracking sizes for VK_EXT_descriptor_buffer where only
the AS address is provided.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36502>
This commit is contained in:
Connor Abbott 2025-07-31 16:31:46 -04:00 committed by Marge Bot
parent 91cee75e63
commit c0884d04d8
4 changed files with 12 additions and 31 deletions

View file

@ -234,11 +234,6 @@ encode(VkCommandBuffer commandBuffer,
tu_dispatch_unaligned_indirect(commandBuffer,
intermediate_header_addr +
offsetof(struct vk_ir_header, ir_internal_node_count));
*(VkDeviceSize *)
util_sparse_array_get(&device->accel_struct_ranges,
vk_acceleration_structure_get_va(dst)) = dst->size;
}
static VkResult

View file

@ -1160,15 +1160,16 @@ write_sampler_descriptor(uint32_t *dst, VkSampler _sampler)
}
static void
write_accel_struct(uint32_t *dst, uint64_t va, uint64_t size)
write_accel_struct(uint32_t *dst, uint64_t va)
{
dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_32_UINT);
/* The overall range of the entire AS may be more than the max range, but
* the SSBO is only used to access the instance descriptors and header.
* Make sure that we don't specify a too-large range.
/* We don't actually use the bounds checking in the shader, since the
* instance array is accessed entirely with a driver-controlled offset.
* Therefore just always specify the maximum possible size to avoid having
* to keep track of the size.
*/
dst[1] = MAX2(DIV_ROUND_UP(size, AS_RECORD_SIZE), MAX_TEXEL_ELEMENTS);
dst[1] = MAX_TEXEL_ELEMENTS;
dst[2] =
A6XX_TEX_CONST_2_STRUCTSIZETEXELS(AS_RECORD_SIZE / 4) |
A6XX_TEX_CONST_2_STARTOFFSETTEXELS(0) |
@ -1229,13 +1230,9 @@ tu_GetDescriptorEXT(
break;
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
if (pDescriptorInfo->data.accelerationStructure == 0) {
write_accel_struct(dest, device->null_accel_struct_bo->iova,
device->null_accel_struct_bo->size);
write_accel_struct(dest, device->null_accel_struct_bo->iova);
} else {
VkDeviceSize size = *(VkDeviceSize *)
util_sparse_array_get(&device->accel_struct_ranges,
pDescriptorInfo->data.accelerationStructure);
write_accel_struct(dest, pDescriptorInfo->data.accelerationStructure, size);
write_accel_struct(dest, pDescriptorInfo->data.accelerationStructure);
}
break;
}
@ -1361,11 +1358,9 @@ tu_update_descriptor_sets(const struct tu_device *device,
VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, accel_structs->pAccelerationStructures[j]);
if (accel_struct) {
write_accel_struct(ptr,
vk_acceleration_structure_get_va(accel_struct),
accel_struct->size);
vk_acceleration_structure_get_va(accel_struct));
} else {
write_accel_struct(ptr, device->null_accel_struct_bo->iova,
device->null_accel_struct_bo->size);
write_accel_struct(ptr, device->null_accel_struct_bo->iova);
}
break;
}
@ -1703,11 +1698,9 @@ tu_update_descriptor_set_with_template(
VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, *(const VkAccelerationStructureKHR *)src);
if (accel_struct) {
write_accel_struct(ptr,
vk_acceleration_structure_get_va(accel_struct),
accel_struct->size);
vk_acceleration_structure_get_va(accel_struct));
} else {
write_accel_struct(ptr, device->null_accel_struct_bo->iova,
device->null_accel_struct_bo->size);
write_accel_struct(ptr, device->null_accel_struct_bo->iova);
}
break;
}

View file

@ -2658,8 +2658,6 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
if (result != VK_SUCCESS)
goto fail_queues;
util_sparse_array_init(&device->accel_struct_ranges, sizeof(VkDeviceSize), 256);
mtx_init(&device->radix_sort_mutex, mtx_plain);
{
@ -2948,7 +2946,6 @@ fail_free_zombie_vma:
u_vector_finish(&device->zombie_vmas);
ir3_compiler_destroy(device->compiler);
fail_compiler:
util_sparse_array_finish(&device->accel_struct_ranges);
vk_meta_device_finish(&device->vk, &device->meta);
fail_queues:
for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
@ -3001,8 +2998,6 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
vk_meta_device_finish(&device->vk, &device->meta);
util_sparse_array_finish(&device->accel_struct_ranges);
ir3_compiler_destroy(device->compiler);
vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);

View file

@ -300,8 +300,6 @@ struct tu_device
radix_sort_vk_t *radix_sort;
mtx_t radix_sort_mutex;
struct util_sparse_array accel_struct_ranges;
#define MIN_SCRATCH_BO_SIZE_LOG2 12 /* A page */
/* Currently the kernel driver uses a 32-bit GPU address space, but it