mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-18 23:48:22 +02:00
tu: Specify max texel buffer and storage buffer limits via GPU props
A8XX has different storage buffer range limit. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Reviewed-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41477>
This commit is contained in:
parent
fd99d813af
commit
d18b637a7c
5 changed files with 22 additions and 13 deletions
|
|
@ -485,6 +485,9 @@ struct fd_dev_info {
|
|||
* being able to avoid setting ij_linear_sample for FragFace/FragCoord.
|
||||
*/
|
||||
bool has_implicit_fragface_fragcoord_ij_linear;
|
||||
|
||||
uint32_t max_texel_buffer_range_elements;
|
||||
uint32_t max_storage_buffer_range_bytes;
|
||||
} props;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -148,6 +148,8 @@ a6xx_base = GPUProps(
|
|||
line_width_max = 1.0,
|
||||
mov_half_shared_quirk = True,
|
||||
max_draw_states = 32,
|
||||
max_texel_buffer_range_elements = 1 << 27,
|
||||
max_storage_buffer_range_bytes = 1 << 27,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -800,6 +802,8 @@ a7xx_base = GPUProps(
|
|||
has_eolm_eogm = True,
|
||||
|
||||
round_robin_errata = True,
|
||||
max_texel_buffer_range_elements = 1 << 27,
|
||||
max_storage_buffer_range_bytes = 1 << 27,
|
||||
)
|
||||
|
||||
a7xx_gen1 = GPUProps(
|
||||
|
|
@ -1243,6 +1247,8 @@ a8xx_base = GPUProps(
|
|||
supports_double_threadsize = False,
|
||||
has_dual_wave_dispatch = True,
|
||||
round_robin_errata = False,
|
||||
max_texel_buffer_range_elements = 1 << 27,
|
||||
max_storage_buffer_range_bytes = 1 << 27,
|
||||
)
|
||||
|
||||
# For a8xx, the chicken bit and most other non-ctx reg
|
||||
|
|
|
|||
|
|
@ -107,9 +107,6 @@
|
|||
#define MAX_VIEWS 6
|
||||
#define MAX_HW_SCALED_VIEWS 6
|
||||
#define MAX_BIND_POINTS 2 /* compute + graphics */
|
||||
/* match the latest Qualcomm driver which is also a hw limit on later gens */
|
||||
#define MAX_STORAGE_BUFFER_RANGE (1u << 27)
|
||||
#define MAX_TEXEL_ELEMENTS (1u << 27)
|
||||
/* We use ldc for uniform buffer loads, just like the Qualcomm driver, so
|
||||
* expose the same maximum range.
|
||||
* TODO: The SIZE bitfield is 15 bits, and in 4-dword units, so the actual
|
||||
|
|
|
|||
|
|
@ -1168,7 +1168,7 @@ write_sampler_descriptor(uint32_t *dst, VkSampler _sampler)
|
|||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
write_accel_struct(uint32_t *dst, uint64_t va)
|
||||
write_accel_struct(uint32_t *dst, uint64_t va, uint32_t max_texel_elements)
|
||||
{
|
||||
/* We don't actually use the bounds checking in the shader, since the
|
||||
* instance array is accessed entirely with a driver-controlled offset.
|
||||
|
|
@ -1177,7 +1177,7 @@ write_accel_struct(uint32_t *dst, uint64_t va)
|
|||
*/
|
||||
fdl6_buffer_view_init<CHIP>(dst, PIPE_FORMAT_R32_UINT,
|
||||
tu_swiz(X, X, X, X), va,
|
||||
MAX_TEXEL_ELEMENTS, AS_RECORD_SIZE / 4);
|
||||
max_texel_elements, AS_RECORD_SIZE / 4);
|
||||
}
|
||||
|
||||
/* note: this is used with immutable samplers in push descriptors */
|
||||
|
|
@ -1232,10 +1232,11 @@ tu_GetDescriptorEXT(
|
|||
write_sampler_descriptor(dest, *pDescriptorInfo->data.pSampler);
|
||||
break;
|
||||
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
|
||||
uint32_t max_texel_elements = device->physical_device->info->props.max_texel_buffer_range_elements;
|
||||
if (pDescriptorInfo->data.accelerationStructure == 0) {
|
||||
write_accel_struct<CHIP>(dest, device->null_accel_struct_bo->iova);
|
||||
write_accel_struct<CHIP>(dest, device->null_accel_struct_bo->iova, max_texel_elements);
|
||||
} else {
|
||||
write_accel_struct<CHIP>(dest, pDescriptorInfo->data.accelerationStructure);
|
||||
write_accel_struct<CHIP>(dest, pDescriptorInfo->data.accelerationStructure, max_texel_elements);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -1365,11 +1366,12 @@ tu_update_descriptor_sets(const struct tu_device *device,
|
|||
break;
|
||||
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
|
||||
VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, accel_structs->pAccelerationStructures[j]);
|
||||
uint32_t max_texel_elements = device->physical_device->info->props.max_texel_buffer_range_elements;
|
||||
if (accel_struct) {
|
||||
write_accel_struct<CHIP>(ptr,
|
||||
vk_acceleration_structure_get_va(accel_struct));
|
||||
vk_acceleration_structure_get_va(accel_struct), max_texel_elements);
|
||||
} else {
|
||||
write_accel_struct<CHIP>(ptr, device->null_accel_struct_bo->iova);
|
||||
write_accel_struct<CHIP>(ptr, device->null_accel_struct_bo->iova, max_texel_elements);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -1716,11 +1718,12 @@ tu_update_descriptor_set_with_template(
|
|||
break;
|
||||
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
|
||||
VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, *(const VkAccelerationStructureKHR *)src);
|
||||
uint32_t max_texel_elements = device->physical_device->info->props.max_texel_buffer_range_elements;
|
||||
if (accel_struct) {
|
||||
write_accel_struct<CHIP>(ptr,
|
||||
vk_acceleration_structure_get_va(accel_struct));
|
||||
vk_acceleration_structure_get_va(accel_struct), max_texel_elements);
|
||||
} else {
|
||||
write_accel_struct<CHIP>(ptr, device->null_accel_struct_bo->iova);
|
||||
write_accel_struct<CHIP>(ptr, device->null_accel_struct_bo->iova, max_texel_elements);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1149,9 +1149,9 @@ tu_get_properties(struct tu_physical_device *pdevice,
|
|||
props->maxImageDimension3D = (1 << 11);
|
||||
props->maxImageDimensionCube = (1 << 14);
|
||||
props->maxImageArrayLayers = (1 << (pdevice->info->props.is_a702 ? 8 : 11));
|
||||
props->maxTexelBufferElements = MAX_TEXEL_ELEMENTS;
|
||||
props->maxTexelBufferElements = pdevice->info->props.max_texel_buffer_range_elements;
|
||||
props->maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE;
|
||||
props->maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE;
|
||||
props->maxStorageBufferRange = pdevice->info->props.max_storage_buffer_range_bytes;
|
||||
props->maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE;
|
||||
props->maxMemoryAllocationCount = UINT32_MAX;
|
||||
props->maxSamplerAllocationCount = 64 * 1024;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue