tu: Specify max texel buffer and storage buffer limits via GPU props

A8XX has different storage buffer range limit.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41477>
This commit is contained in:
Danylo Piliaiev 2026-06-10 08:16:12 +02:00 committed by Marge Bot
parent fd99d813af
commit d18b637a7c
5 changed files with 22 additions and 13 deletions

View file

@ -485,6 +485,9 @@ struct fd_dev_info {
* being able to avoid setting ij_linear_sample for FragFace/FragCoord.
*/
bool has_implicit_fragface_fragcoord_ij_linear;
uint32_t max_texel_buffer_range_elements;
uint32_t max_storage_buffer_range_bytes;
} props;
};

View file

@ -148,6 +148,8 @@ a6xx_base = GPUProps(
line_width_max = 1.0,
mov_half_shared_quirk = True,
max_draw_states = 32,
max_texel_buffer_range_elements = 1 << 27,
max_storage_buffer_range_bytes = 1 << 27,
)
@ -800,6 +802,8 @@ a7xx_base = GPUProps(
has_eolm_eogm = True,
round_robin_errata = True,
max_texel_buffer_range_elements = 1 << 27,
max_storage_buffer_range_bytes = 1 << 27,
)
a7xx_gen1 = GPUProps(
@ -1243,6 +1247,8 @@ a8xx_base = GPUProps(
supports_double_threadsize = False,
has_dual_wave_dispatch = True,
round_robin_errata = False,
max_texel_buffer_range_elements = 1 << 27,
max_storage_buffer_range_bytes = 1 << 27,
)
# For a8xx, the chicken bit and most other non-ctx reg

View file

@ -107,9 +107,6 @@
#define MAX_VIEWS 6
#define MAX_HW_SCALED_VIEWS 6
#define MAX_BIND_POINTS 2 /* compute + graphics */
/* match the latest Qualcomm driver which is also a hw limit on later gens */
#define MAX_STORAGE_BUFFER_RANGE (1u << 27)
#define MAX_TEXEL_ELEMENTS (1u << 27)
/* We use ldc for uniform buffer loads, just like the Qualcomm driver, so
* expose the same maximum range.
* TODO: The SIZE bitfield is 15 bits, and in 4-dword units, so the actual

View file

@ -1168,7 +1168,7 @@ write_sampler_descriptor(uint32_t *dst, VkSampler _sampler)
template <chip CHIP>
static void
write_accel_struct(uint32_t *dst, uint64_t va)
write_accel_struct(uint32_t *dst, uint64_t va, uint32_t max_texel_elements)
{
/* We don't actually use the bounds checking in the shader, since the
* instance array is accessed entirely with a driver-controlled offset.
@ -1177,7 +1177,7 @@ write_accel_struct(uint32_t *dst, uint64_t va)
*/
fdl6_buffer_view_init<CHIP>(dst, PIPE_FORMAT_R32_UINT,
tu_swiz(X, X, X, X), va,
MAX_TEXEL_ELEMENTS, AS_RECORD_SIZE / 4);
max_texel_elements, AS_RECORD_SIZE / 4);
}
/* note: this is used with immutable samplers in push descriptors */
@ -1232,10 +1232,11 @@ tu_GetDescriptorEXT(
write_sampler_descriptor(dest, *pDescriptorInfo->data.pSampler);
break;
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
uint32_t max_texel_elements = device->physical_device->info->props.max_texel_buffer_range_elements;
if (pDescriptorInfo->data.accelerationStructure == 0) {
write_accel_struct<CHIP>(dest, device->null_accel_struct_bo->iova);
write_accel_struct<CHIP>(dest, device->null_accel_struct_bo->iova, max_texel_elements);
} else {
write_accel_struct<CHIP>(dest, pDescriptorInfo->data.accelerationStructure);
write_accel_struct<CHIP>(dest, pDescriptorInfo->data.accelerationStructure, max_texel_elements);
}
break;
}
@ -1365,11 +1366,12 @@ tu_update_descriptor_sets(const struct tu_device *device,
break;
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, accel_structs->pAccelerationStructures[j]);
uint32_t max_texel_elements = device->physical_device->info->props.max_texel_buffer_range_elements;
if (accel_struct) {
write_accel_struct<CHIP>(ptr,
vk_acceleration_structure_get_va(accel_struct));
vk_acceleration_structure_get_va(accel_struct), max_texel_elements);
} else {
write_accel_struct<CHIP>(ptr, device->null_accel_struct_bo->iova);
write_accel_struct<CHIP>(ptr, device->null_accel_struct_bo->iova, max_texel_elements);
}
break;
}
@ -1716,11 +1718,12 @@ tu_update_descriptor_set_with_template(
break;
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, *(const VkAccelerationStructureKHR *)src);
uint32_t max_texel_elements = device->physical_device->info->props.max_texel_buffer_range_elements;
if (accel_struct) {
write_accel_struct<CHIP>(ptr,
vk_acceleration_structure_get_va(accel_struct));
vk_acceleration_structure_get_va(accel_struct), max_texel_elements);
} else {
write_accel_struct<CHIP>(ptr, device->null_accel_struct_bo->iova);
write_accel_struct<CHIP>(ptr, device->null_accel_struct_bo->iova, max_texel_elements);
}
break;
}

View file

@ -1149,9 +1149,9 @@ tu_get_properties(struct tu_physical_device *pdevice,
props->maxImageDimension3D = (1 << 11);
props->maxImageDimensionCube = (1 << 14);
props->maxImageArrayLayers = (1 << (pdevice->info->props.is_a702 ? 8 : 11));
props->maxTexelBufferElements = MAX_TEXEL_ELEMENTS;
props->maxTexelBufferElements = pdevice->info->props.max_texel_buffer_range_elements;
props->maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE;
props->maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE;
props->maxStorageBufferRange = pdevice->info->props.max_storage_buffer_range_bytes;
props->maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE;
props->maxMemoryAllocationCount = UINT32_MAX;
props->maxSamplerAllocationCount = 64 * 1024;