From d18b637a7cf75b625f3a881aa22829ec4f52d473 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Wed, 10 Jun 2026 08:16:12 +0200 Subject: [PATCH] tu: Specify max texel buffer and storage buffer limits via GPU props A8XX has different storage buffer range limit. Signed-off-by: Danylo Piliaiev Reviewed-by: Emma Anholt Part-of: --- src/freedreno/common/freedreno_dev_info.h | 3 +++ src/freedreno/common/freedreno_devices.py | 6 ++++++ src/freedreno/vulkan/tu_common.h | 3 --- src/freedreno/vulkan/tu_descriptor_set.cc | 19 +++++++++++-------- src/freedreno/vulkan/tu_device.cc | 4 ++-- 5 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/freedreno/common/freedreno_dev_info.h b/src/freedreno/common/freedreno_dev_info.h index edb1dae1d4d..4302b8a4bb9 100644 --- a/src/freedreno/common/freedreno_dev_info.h +++ b/src/freedreno/common/freedreno_dev_info.h @@ -485,6 +485,9 @@ struct fd_dev_info { * being able to avoid setting ij_linear_sample for FragFace/FragCoord. */ bool has_implicit_fragface_fragcoord_ij_linear; + + uint32_t max_texel_buffer_range_elements; + uint32_t max_storage_buffer_range_bytes; } props; }; diff --git a/src/freedreno/common/freedreno_devices.py b/src/freedreno/common/freedreno_devices.py index 387f8558dd9..0aaa1a90f07 100644 --- a/src/freedreno/common/freedreno_devices.py +++ b/src/freedreno/common/freedreno_devices.py @@ -148,6 +148,8 @@ a6xx_base = GPUProps( line_width_max = 1.0, mov_half_shared_quirk = True, max_draw_states = 32, + max_texel_buffer_range_elements = 1 << 27, + max_storage_buffer_range_bytes = 1 << 27, ) @@ -800,6 +802,8 @@ a7xx_base = GPUProps( has_eolm_eogm = True, round_robin_errata = True, + max_texel_buffer_range_elements = 1 << 27, + max_storage_buffer_range_bytes = 1 << 27, ) a7xx_gen1 = GPUProps( @@ -1243,6 +1247,8 @@ a8xx_base = GPUProps( supports_double_threadsize = False, has_dual_wave_dispatch = True, round_robin_errata = False, + max_texel_buffer_range_elements = 1 << 27, + max_storage_buffer_range_bytes = 1 << 27, ) # For a8xx, the chicken bit and most other non-ctx reg diff --git a/src/freedreno/vulkan/tu_common.h b/src/freedreno/vulkan/tu_common.h index afb180794a3..9878d28a448 100644 --- a/src/freedreno/vulkan/tu_common.h +++ b/src/freedreno/vulkan/tu_common.h @@ -107,9 +107,6 @@ #define MAX_VIEWS 6 #define MAX_HW_SCALED_VIEWS 6 #define MAX_BIND_POINTS 2 /* compute + graphics */ -/* match the latest Qualcomm driver which is also a hw limit on later gens */ -#define MAX_STORAGE_BUFFER_RANGE (1u << 27) -#define MAX_TEXEL_ELEMENTS (1u << 27) /* We use ldc for uniform buffer loads, just like the Qualcomm driver, so * expose the same maximum range. * TODO: The SIZE bitfield is 15 bits, and in 4-dword units, so the actual diff --git a/src/freedreno/vulkan/tu_descriptor_set.cc b/src/freedreno/vulkan/tu_descriptor_set.cc index de35489f3e3..5f421a7c591 100644 --- a/src/freedreno/vulkan/tu_descriptor_set.cc +++ b/src/freedreno/vulkan/tu_descriptor_set.cc @@ -1168,7 +1168,7 @@ write_sampler_descriptor(uint32_t *dst, VkSampler _sampler) template static void -write_accel_struct(uint32_t *dst, uint64_t va) +write_accel_struct(uint32_t *dst, uint64_t va, uint32_t max_texel_elements) { /* We don't actually use the bounds checking in the shader, since the * instance array is accessed entirely with a driver-controlled offset. @@ -1177,7 +1177,7 @@ write_accel_struct(uint32_t *dst, uint64_t va) */ fdl6_buffer_view_init(dst, PIPE_FORMAT_R32_UINT, tu_swiz(X, X, X, X), va, - MAX_TEXEL_ELEMENTS, AS_RECORD_SIZE / 4); + max_texel_elements, AS_RECORD_SIZE / 4); } /* note: this is used with immutable samplers in push descriptors */ @@ -1232,10 +1232,11 @@ tu_GetDescriptorEXT( write_sampler_descriptor(dest, *pDescriptorInfo->data.pSampler); break; case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: { + uint32_t max_texel_elements = device->physical_device->info->props.max_texel_buffer_range_elements; if (pDescriptorInfo->data.accelerationStructure == 0) { - write_accel_struct(dest, device->null_accel_struct_bo->iova); + write_accel_struct(dest, device->null_accel_struct_bo->iova, max_texel_elements); } else { - write_accel_struct(dest, pDescriptorInfo->data.accelerationStructure); + write_accel_struct(dest, pDescriptorInfo->data.accelerationStructure, max_texel_elements); } break; } @@ -1365,11 +1366,12 @@ tu_update_descriptor_sets(const struct tu_device *device, break; case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: { VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, accel_structs->pAccelerationStructures[j]); + uint32_t max_texel_elements = device->physical_device->info->props.max_texel_buffer_range_elements; if (accel_struct) { write_accel_struct(ptr, - vk_acceleration_structure_get_va(accel_struct)); + vk_acceleration_structure_get_va(accel_struct), max_texel_elements); } else { - write_accel_struct(ptr, device->null_accel_struct_bo->iova); + write_accel_struct(ptr, device->null_accel_struct_bo->iova, max_texel_elements); } break; } @@ -1716,11 +1718,12 @@ tu_update_descriptor_set_with_template( break; case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: { VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, *(const VkAccelerationStructureKHR *)src); + uint32_t max_texel_elements = device->physical_device->info->props.max_texel_buffer_range_elements; if (accel_struct) { write_accel_struct(ptr, - vk_acceleration_structure_get_va(accel_struct)); + vk_acceleration_structure_get_va(accel_struct), max_texel_elements); } else { - write_accel_struct(ptr, device->null_accel_struct_bo->iova); + write_accel_struct(ptr, device->null_accel_struct_bo->iova, max_texel_elements); } break; } diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index dbffcc82954..3973eeb0d27 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -1149,9 +1149,9 @@ tu_get_properties(struct tu_physical_device *pdevice, props->maxImageDimension3D = (1 << 11); props->maxImageDimensionCube = (1 << 14); props->maxImageArrayLayers = (1 << (pdevice->info->props.is_a702 ? 8 : 11)); - props->maxTexelBufferElements = MAX_TEXEL_ELEMENTS; + props->maxTexelBufferElements = pdevice->info->props.max_texel_buffer_range_elements; props->maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE; - props->maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE; + props->maxStorageBufferRange = pdevice->info->props.max_storage_buffer_range_bytes; props->maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE; props->maxMemoryAllocationCount = UINT32_MAX; props->maxSamplerAllocationCount = 64 * 1024;