From 989e5e4c70225f2d4e0d5446c68879bc8905cf39 Mon Sep 17 00:00:00 2001 From: Matt Coster Date: Tue, 15 Aug 2023 10:45:27 +0100 Subject: [PATCH] pvr: Minor refactor of pvr_device.c Moving a few functions further up here to prepare for the next commit; should make the diffs a lot nicer. No (intentional) functional changes. Signed-off-by: Matt Coster Reviewed-by: Karmjit Mahil Part-of: --- src/imagination/vulkan/pvr_device.c | 806 ++++++++++++++-------------- 1 file changed, 403 insertions(+), 403 deletions(-) diff --git a/src/imagination/vulkan/pvr_device.c b/src/imagination/vulkan/pvr_device.c index 75a078659e7..5578756e3da 100644 --- a/src/imagination/vulkan/pvr_device.c +++ b/src/imagination/vulkan/pvr_device.c @@ -247,6 +247,409 @@ static void pvr_physical_device_get_supported_features( }; } +static VkResult +pvr_physical_device_init_uuids(struct pvr_physical_device *pdevice) +{ + struct mesa_sha1 sha1_ctx; + unsigned build_id_len; + uint8_t sha1[20]; + uint64_t bvnc; + + const struct build_id_note *note = + build_id_find_nhdr_for_addr(pvr_physical_device_init_uuids); + if (!note) { + return vk_errorf(pdevice, + VK_ERROR_INITIALIZATION_FAILED, + "Failed to find build-id"); + } + + build_id_len = build_id_length(note); + if (build_id_len < 20) { + return vk_errorf(pdevice, + VK_ERROR_INITIALIZATION_FAILED, + "Build-id too short. It needs to be a SHA"); + } + + bvnc = pvr_get_packed_bvnc(&pdevice->dev_info); + + _mesa_sha1_init(&sha1_ctx); + _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len); + _mesa_sha1_update(&sha1_ctx, &bvnc, sizeof(bvnc)); + _mesa_sha1_final(&sha1_ctx, sha1); + memcpy(pdevice->pipeline_cache_uuid, sha1, VK_UUID_SIZE); + + return VK_SUCCESS; +} + +struct pvr_descriptor_limits { + uint32_t max_per_stage_resources; + uint32_t max_per_stage_samplers; + uint32_t max_per_stage_uniform_buffers; + uint32_t max_per_stage_storage_buffers; + uint32_t max_per_stage_sampled_images; + uint32_t max_per_stage_storage_images; + uint32_t max_per_stage_input_attachments; +}; + +static const struct pvr_descriptor_limits * +pvr_get_physical_device_descriptor_limits( + const struct pvr_device_info *dev_info, + const struct pvr_device_runtime_info *dev_runtime_info) +{ + enum pvr_descriptor_cs_level { + /* clang-format off */ + CS4096, /* 6XT and some XE cores with large CS. */ + CS2560, /* Mid range Rogue XE cores. */ + CS2048, /* Low end Rogue XE cores. */ + CS1536, /* Ultra-low-end 9XEP. */ + CS680, /* lower limits for older devices. */ + CS408, /* 7XE. */ + /* clang-format on */ + }; + + static const struct pvr_descriptor_limits descriptor_limits[] = { + [CS4096] = { 1160U, 256U, 192U, 144U, 256U, 256U, 8U, }, + [CS2560] = { 648U, 128U, 128U, 128U, 128U, 128U, 8U, }, + [CS2048] = { 584U, 128U, 96U, 64U, 128U, 128U, 8U, }, + [CS1536] = { 456U, 64U, 96U, 64U, 128U, 64U, 8U, }, + [CS680] = { 224U, 32U, 64U, 36U, 48U, 8U, 8U, }, + [CS408] = { 128U, 16U, 40U, 28U, 16U, 8U, 8U, }, + }; + + const uint32_t common_size = + pvr_calc_fscommon_size_and_tiles_in_flight(dev_info, + dev_runtime_info, + UINT32_MAX, + 1); + enum pvr_descriptor_cs_level cs_level; + + if (common_size >= 2048) { + cs_level = CS2048; + } else if (common_size >= 1526) { + cs_level = CS1536; + } else if (common_size >= 680) { + cs_level = CS680; + } else if (common_size >= 408) { + cs_level = CS408; + } else { + mesa_loge("This core appears to have a very limited amount of shared " + "register space and may not meet the Vulkan spec limits."); + abort(); + } + + return &descriptor_limits[cs_level]; +} + +static void +pvr_get_physical_device_properties_1_1(struct pvr_physical_device *pdevice, + VkPhysicalDeviceVulkan11Properties *p) +{ + assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES); +} + +static void +pvr_get_physical_device_properties_1_2(struct pvr_physical_device *pdevice, + VkPhysicalDeviceVulkan12Properties *p) +{ + assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES); + + /* VK_KHR_driver_properties */ + p->driverID = VK_DRIVER_ID_IMAGINATION_OPEN_SOURCE_MESA; + memset(p->driverName, 0, sizeof(p->driverName)); + snprintf(p->driverName, + VK_MAX_DRIVER_NAME_SIZE, + "Imagination open-source Mesa driver"); + memset(p->driverInfo, 0, sizeof(p->driverInfo)); + snprintf(p->driverInfo, + VK_MAX_DRIVER_INFO_SIZE, + ("Mesa " PACKAGE_VERSION MESA_GIT_SHA1)); + p->conformanceVersion = (VkConformanceVersion){ + .major = 1, + .minor = 3, + .subminor = 4, + .patch = 1, + }; + + /* VK_KHR_timeline_semaphore */ + p->maxTimelineSemaphoreValueDifference = UINT64_MAX; +} + +static void +pvr_get_physical_device_properties_1_3(struct pvr_physical_device *pdevice, + VkPhysicalDeviceVulkan13Properties *p) +{ + assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES); +} + +void pvr_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties *pProperties) +{ + PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice); + const struct pvr_device_info *const dev_info = &pdevice->dev_info; + + const struct pvr_descriptor_limits *descriptor_limits = + pvr_get_physical_device_descriptor_limits(dev_info, + &pdevice->dev_runtime_info); + + /* Default value based on the minimum value found in all existing cores. */ + const uint32_t max_multisample = + PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 4); + + /* Default value based on the minimum value found in all existing cores. */ + const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 2); + + /* Default value based on the minimum value found in all existing cores. */ + const uint32_t uvs_pba_entries = + PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 160); + + /* Default value based on the minimum value found in all existing cores. */ + const uint32_t num_user_clip_planes = + PVR_GET_FEATURE_VALUE(dev_info, num_user_clip_planes, 8); + + const uint32_t sub_pixel_precision = + PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 4U : 8U; + + const uint32_t max_render_size = rogue_get_render_size_max(dev_info); + + const uint32_t max_sample_bits = ((max_multisample << 1) - 1); + + const uint32_t max_user_vertex_components = + ((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U; + + /* The workgroup invocations are limited by the case where we have a compute + * barrier - each slot has a fixed number of invocations, the whole workgroup + * may need to span multiple slots. As each slot will WAIT at the barrier + * until the last invocation completes, all have to be schedulable at the + * same time. + * + * Typically all Rogue cores have 16 slots. Some of the smallest cores are + * reduced to 14. + * + * The compute barrier slot exhaustion scenario can be tested with: + * dEQP-VK.memory_model.message_passing*u32.coherent.fence_fence + * .atomicwrite*guard*comp + */ + + /* Default value based on the minimum value found in all existing cores. */ + const uint32_t usc_slots = PVR_GET_FEATURE_VALUE(dev_info, usc_slots, 14); + + /* Default value based on the minimum value found in all existing cores. */ + const uint32_t max_instances_per_pds_task = + PVR_GET_FEATURE_VALUE(dev_info, max_instances_per_pds_task, 32U); + + const uint32_t max_compute_work_group_invocations = + (usc_slots * max_instances_per_pds_task >= 512U) ? 512U : 384U; + + VkPhysicalDeviceLimits limits = { + .maxImageDimension1D = max_render_size, + .maxImageDimension2D = max_render_size, + .maxImageDimension3D = PVR_MAX_TEXTURE_EXTENT_Z, + .maxImageDimensionCube = max_render_size, + .maxImageArrayLayers = PVR_MAX_ARRAY_LAYERS, + .maxTexelBufferElements = 64U * 1024U, + .maxUniformBufferRange = 128U * 1024U * 1024U, + .maxStorageBufferRange = 128U * 1024U * 1024U, + .maxPushConstantsSize = PVR_MAX_PUSH_CONSTANTS_SIZE, + .maxMemoryAllocationCount = UINT32_MAX, + .maxSamplerAllocationCount = UINT32_MAX, + .bufferImageGranularity = 1U, + .sparseAddressSpaceSize = 256ULL * 1024ULL * 1024ULL * 1024ULL, + + /* Maximum number of descriptor sets that can be bound at the same time. + */ + .maxBoundDescriptorSets = PVR_MAX_DESCRIPTOR_SETS, + + .maxPerStageResources = descriptor_limits->max_per_stage_resources, + .maxPerStageDescriptorSamplers = + descriptor_limits->max_per_stage_samplers, + .maxPerStageDescriptorUniformBuffers = + descriptor_limits->max_per_stage_uniform_buffers, + .maxPerStageDescriptorStorageBuffers = + descriptor_limits->max_per_stage_storage_buffers, + .maxPerStageDescriptorSampledImages = + descriptor_limits->max_per_stage_sampled_images, + .maxPerStageDescriptorStorageImages = + descriptor_limits->max_per_stage_storage_images, + .maxPerStageDescriptorInputAttachments = + descriptor_limits->max_per_stage_input_attachments, + + .maxDescriptorSetSamplers = 256U, + .maxDescriptorSetUniformBuffers = 256U, + .maxDescriptorSetUniformBuffersDynamic = + PVR_MAX_DESCRIPTOR_SET_UNIFORM_DYNAMIC_BUFFERS, + .maxDescriptorSetStorageBuffers = 256U, + .maxDescriptorSetStorageBuffersDynamic = + PVR_MAX_DESCRIPTOR_SET_STORAGE_DYNAMIC_BUFFERS, + .maxDescriptorSetSampledImages = 256U, + .maxDescriptorSetStorageImages = 256U, + .maxDescriptorSetInputAttachments = 256U, + + /* Vertex Shader Limits */ + .maxVertexInputAttributes = PVR_MAX_VERTEX_INPUT_BINDINGS, + .maxVertexInputBindings = PVR_MAX_VERTEX_INPUT_BINDINGS, + .maxVertexInputAttributeOffset = 0xFFFF, + .maxVertexInputBindingStride = 1024U * 1024U * 1024U * 2U, + .maxVertexOutputComponents = max_user_vertex_components, + + /* Tessellation Limits */ + .maxTessellationGenerationLevel = 0, + .maxTessellationPatchSize = 0, + .maxTessellationControlPerVertexInputComponents = 0, + .maxTessellationControlPerVertexOutputComponents = 0, + .maxTessellationControlPerPatchOutputComponents = 0, + .maxTessellationControlTotalOutputComponents = 0, + .maxTessellationEvaluationInputComponents = 0, + .maxTessellationEvaluationOutputComponents = 0, + + /* Geometry Shader Limits */ + .maxGeometryShaderInvocations = 0, + .maxGeometryInputComponents = 0, + .maxGeometryOutputComponents = 0, + .maxGeometryOutputVertices = 0, + .maxGeometryTotalOutputComponents = 0, + + /* Fragment Shader Limits */ + .maxFragmentInputComponents = max_user_vertex_components, + .maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS, + .maxFragmentDualSrcAttachments = 0, + .maxFragmentCombinedOutputResources = + descriptor_limits->max_per_stage_storage_buffers + + descriptor_limits->max_per_stage_storage_images + + PVR_MAX_COLOR_ATTACHMENTS, + + /* Compute Shader Limits */ + .maxComputeSharedMemorySize = 16U * 1024U, + .maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U }, + .maxComputeWorkGroupInvocations = max_compute_work_group_invocations, + .maxComputeWorkGroupSize = { max_compute_work_group_invocations, + max_compute_work_group_invocations, + 64U }, + + /* Rasterization Limits */ + .subPixelPrecisionBits = sub_pixel_precision, + .subTexelPrecisionBits = 8U, + .mipmapPrecisionBits = 8U, + + .maxDrawIndexedIndexValue = UINT32_MAX, + .maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U, + .maxSamplerLodBias = 16.0f, + .maxSamplerAnisotropy = 1.0f, + .maxViewports = PVR_MAX_VIEWPORTS, + + .maxViewportDimensions[0] = max_render_size, + .maxViewportDimensions[1] = max_render_size, + .viewportBoundsRange[0] = -(int32_t)(2U * max_render_size), + .viewportBoundsRange[1] = 2U * max_render_size, + + .viewportSubPixelBits = 0, + .minMemoryMapAlignment = 64U, + .minTexelBufferOffsetAlignment = 16U, + .minUniformBufferOffsetAlignment = 4U, + .minStorageBufferOffsetAlignment = 4U, + + .minTexelOffset = -8, + .maxTexelOffset = 7U, + .minTexelGatherOffset = -8, + .maxTexelGatherOffset = 7, + .minInterpolationOffset = -0.5, + .maxInterpolationOffset = 0.5, + .subPixelInterpolationOffsetBits = 4U, + + .maxFramebufferWidth = max_render_size, + .maxFramebufferHeight = max_render_size, + .maxFramebufferLayers = PVR_MAX_FRAMEBUFFER_LAYERS, + + .framebufferColorSampleCounts = max_sample_bits, + .framebufferDepthSampleCounts = max_sample_bits, + .framebufferStencilSampleCounts = max_sample_bits, + .framebufferNoAttachmentsSampleCounts = max_sample_bits, + .maxColorAttachments = PVR_MAX_COLOR_ATTACHMENTS, + .sampledImageColorSampleCounts = max_sample_bits, + .sampledImageIntegerSampleCounts = max_sample_bits, + .sampledImageDepthSampleCounts = max_sample_bits, + .sampledImageStencilSampleCounts = max_sample_bits, + .storageImageSampleCounts = max_sample_bits, + .maxSampleMaskWords = 1U, + .timestampComputeAndGraphics = false, + .timestampPeriod = 0.0f, + .maxClipDistances = num_user_clip_planes, + .maxCullDistances = num_user_clip_planes, + .maxCombinedClipAndCullDistances = num_user_clip_planes, + .discreteQueuePriorities = 2U, + .pointSizeRange[0] = 1.0f, + .pointSizeRange[1] = 511.0f, + .pointSizeGranularity = 0.0625f, + .lineWidthRange[0] = 1.0f / 16.0f, + .lineWidthRange[1] = 16.0f, + .lineWidthGranularity = 1.0f / 16.0f, + .strictLines = false, + .standardSampleLocations = true, + .optimalBufferCopyOffsetAlignment = 4U, + .optimalBufferCopyRowPitchAlignment = 4U, + .nonCoherentAtomSize = 1U, + }; + + *pProperties = (VkPhysicalDeviceProperties){ + .apiVersion = PVR_API_VERSION, + .driverVersion = vk_get_driver_version(), + .vendorID = VK_VENDOR_ID_IMAGINATION, + .deviceID = dev_info->ident.device_id, + .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, + .limits = limits, + .sparseProperties = { 0 }, + }; + + snprintf(pProperties->deviceName, + sizeof(pProperties->deviceName), + "%s", + pdevice->name); + + memcpy(pProperties->pipelineCacheUUID, + pdevice->pipeline_cache_uuid, + VK_UUID_SIZE); +} + +void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties2 *pProperties) +{ + PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice); + + VkPhysicalDeviceVulkan11Properties core_1_1 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES, + }; + + VkPhysicalDeviceVulkan12Properties core_1_2 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES, + }; + + VkPhysicalDeviceVulkan13Properties core_1_3 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES, + }; + + pvr_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties); + pvr_get_physical_device_properties_1_1(pdevice, &core_1_1); + pvr_get_physical_device_properties_1_2(pdevice, &core_1_2); + pvr_get_physical_device_properties_1_3(pdevice, &core_1_3); + + vk_foreach_struct (ext, pProperties->pNext) { + if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1)) + continue; + + if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2)) + continue; + + if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3)) + continue; + + switch (ext->sType) { + default: { + pvr_debug_ignored_stype(ext->sType); + break; + } + } + } +} + VkResult pvr_EnumerateInstanceVersion(uint32_t *pApiVersion) { *pApiVersion = PVR_API_VERSION; @@ -309,40 +712,6 @@ void pvr_DestroyInstance(VkInstance _instance, vk_free(&instance->vk.alloc, instance); } -static VkResult -pvr_physical_device_init_uuids(struct pvr_physical_device *pdevice) -{ - struct mesa_sha1 sha1_ctx; - unsigned build_id_len; - uint8_t sha1[20]; - uint64_t bvnc; - - const struct build_id_note *note = - build_id_find_nhdr_for_addr(pvr_physical_device_init_uuids); - if (!note) { - return vk_errorf(pdevice, - VK_ERROR_INITIALIZATION_FAILED, - "Failed to find build-id"); - } - - build_id_len = build_id_length(note); - if (build_id_len < 20) { - return vk_errorf(pdevice, - VK_ERROR_INITIALIZATION_FAILED, - "Build-id too short. It needs to be a SHA"); - } - - bvnc = pvr_get_packed_bvnc(&pdevice->dev_info); - - _mesa_sha1_init(&sha1_ctx); - _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len); - _mesa_sha1_update(&sha1_ctx, &bvnc, sizeof(bvnc)); - _mesa_sha1_final(&sha1_ctx, sha1); - memcpy(pdevice->pipeline_cache_uuid, sha1, VK_UUID_SIZE); - - return VK_SUCCESS; -} - static uint64_t pvr_compute_heap_size(void) { /* Query the total ram from the system */ @@ -857,375 +1226,6 @@ uint32_t pvr_calc_fscommon_size_and_tiles_in_flight( return MIN2(num_tile_in_flight, max_tiles_in_flight); } -struct pvr_descriptor_limits { - uint32_t max_per_stage_resources; - uint32_t max_per_stage_samplers; - uint32_t max_per_stage_uniform_buffers; - uint32_t max_per_stage_storage_buffers; - uint32_t max_per_stage_sampled_images; - uint32_t max_per_stage_storage_images; - uint32_t max_per_stage_input_attachments; -}; - -static const struct pvr_descriptor_limits * -pvr_get_physical_device_descriptor_limits( - const struct pvr_device_info *dev_info, - const struct pvr_device_runtime_info *dev_runtime_info) -{ - enum pvr_descriptor_cs_level { - /* clang-format off */ - CS4096, /* 6XT and some XE cores with large CS. */ - CS2560, /* Mid range Rogue XE cores. */ - CS2048, /* Low end Rogue XE cores. */ - CS1536, /* Ultra-low-end 9XEP. */ - CS680, /* lower limits for older devices. */ - CS408, /* 7XE. */ - /* clang-format on */ - }; - - static const struct pvr_descriptor_limits descriptor_limits[] = { - [CS4096] = { 1160U, 256U, 192U, 144U, 256U, 256U, 8U, }, - [CS2560] = { 648U, 128U, 128U, 128U, 128U, 128U, 8U, }, - [CS2048] = { 584U, 128U, 96U, 64U, 128U, 128U, 8U, }, - [CS1536] = { 456U, 64U, 96U, 64U, 128U, 64U, 8U, }, - [CS680] = { 224U, 32U, 64U, 36U, 48U, 8U, 8U, }, - [CS408] = { 128U, 16U, 40U, 28U, 16U, 8U, 8U, }, - }; - - const uint32_t common_size = - pvr_calc_fscommon_size_and_tiles_in_flight(dev_info, - dev_runtime_info, - UINT32_MAX, - 1); - enum pvr_descriptor_cs_level cs_level; - - if (common_size >= 2048) { - cs_level = CS2048; - } else if (common_size >= 1526) { - cs_level = CS1536; - } else if (common_size >= 680) { - cs_level = CS680; - } else if (common_size >= 408) { - cs_level = CS408; - } else { - mesa_loge("This core appears to have a very limited amount of shared " - "register space and may not meet the Vulkan spec limits."); - abort(); - } - - return &descriptor_limits[cs_level]; -} - -static void -pvr_get_physical_device_properties_1_1(struct pvr_physical_device *pdevice, - VkPhysicalDeviceVulkan11Properties *p) -{ - assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES); -} - -static void -pvr_get_physical_device_properties_1_2(struct pvr_physical_device *pdevice, - VkPhysicalDeviceVulkan12Properties *p) -{ - assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES); - - /* VK_KHR_driver_properties */ - p->driverID = VK_DRIVER_ID_IMAGINATION_OPEN_SOURCE_MESA; - memset(p->driverName, 0, sizeof(p->driverName)); - snprintf(p->driverName, - VK_MAX_DRIVER_NAME_SIZE, - "Imagination open-source Mesa driver"); - memset(p->driverInfo, 0, sizeof(p->driverInfo)); - snprintf(p->driverInfo, - VK_MAX_DRIVER_INFO_SIZE, - ("Mesa " PACKAGE_VERSION MESA_GIT_SHA1)); - p->conformanceVersion = (VkConformanceVersion){ - .major = 1, - .minor = 3, - .subminor = 4, - .patch = 1, - }; - - /* VK_KHR_timeline_semaphore */ - p->maxTimelineSemaphoreValueDifference = UINT64_MAX; -} - -static void -pvr_get_physical_device_properties_1_3(struct pvr_physical_device *pdevice, - VkPhysicalDeviceVulkan13Properties *p) -{ - assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES); -} - -void pvr_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties *pProperties) -{ - PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice); - const struct pvr_device_info *const dev_info = &pdevice->dev_info; - - const struct pvr_descriptor_limits *descriptor_limits = - pvr_get_physical_device_descriptor_limits(dev_info, - &pdevice->dev_runtime_info); - - /* Default value based on the minimum value found in all existing cores. */ - const uint32_t max_multisample = - PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 4); - - /* Default value based on the minimum value found in all existing cores. */ - const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 2); - - /* Default value based on the minimum value found in all existing cores. */ - const uint32_t uvs_pba_entries = - PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 160); - - /* Default value based on the minimum value found in all existing cores. */ - const uint32_t num_user_clip_planes = - PVR_GET_FEATURE_VALUE(dev_info, num_user_clip_planes, 8); - - const uint32_t sub_pixel_precision = - PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 4U : 8U; - - const uint32_t max_render_size = rogue_get_render_size_max(dev_info); - - const uint32_t max_sample_bits = ((max_multisample << 1) - 1); - - const uint32_t max_user_vertex_components = - ((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U; - - /* The workgroup invocations are limited by the case where we have a compute - * barrier - each slot has a fixed number of invocations, the whole workgroup - * may need to span multiple slots. As each slot will WAIT at the barrier - * until the last invocation completes, all have to be schedulable at the - * same time. - * - * Typically all Rogue cores have 16 slots. Some of the smallest cores are - * reduced to 14. - * - * The compute barrier slot exhaustion scenario can be tested with: - * dEQP-VK.memory_model.message_passing*u32.coherent.fence_fence - * .atomicwrite*guard*comp - */ - - /* Default value based on the minimum value found in all existing cores. */ - const uint32_t usc_slots = PVR_GET_FEATURE_VALUE(dev_info, usc_slots, 14); - - /* Default value based on the minimum value found in all existing cores. */ - const uint32_t max_instances_per_pds_task = - PVR_GET_FEATURE_VALUE(dev_info, max_instances_per_pds_task, 32U); - - const uint32_t max_compute_work_group_invocations = - (usc_slots * max_instances_per_pds_task >= 512U) ? 512U : 384U; - - VkPhysicalDeviceLimits limits = { - .maxImageDimension1D = max_render_size, - .maxImageDimension2D = max_render_size, - .maxImageDimension3D = PVR_MAX_TEXTURE_EXTENT_Z, - .maxImageDimensionCube = max_render_size, - .maxImageArrayLayers = PVR_MAX_ARRAY_LAYERS, - .maxTexelBufferElements = 64U * 1024U, - .maxUniformBufferRange = 128U * 1024U * 1024U, - .maxStorageBufferRange = 128U * 1024U * 1024U, - .maxPushConstantsSize = PVR_MAX_PUSH_CONSTANTS_SIZE, - .maxMemoryAllocationCount = UINT32_MAX, - .maxSamplerAllocationCount = UINT32_MAX, - .bufferImageGranularity = 1U, - .sparseAddressSpaceSize = 256ULL * 1024ULL * 1024ULL * 1024ULL, - - /* Maximum number of descriptor sets that can be bound at the same time. - */ - .maxBoundDescriptorSets = PVR_MAX_DESCRIPTOR_SETS, - - .maxPerStageResources = descriptor_limits->max_per_stage_resources, - .maxPerStageDescriptorSamplers = - descriptor_limits->max_per_stage_samplers, - .maxPerStageDescriptorUniformBuffers = - descriptor_limits->max_per_stage_uniform_buffers, - .maxPerStageDescriptorStorageBuffers = - descriptor_limits->max_per_stage_storage_buffers, - .maxPerStageDescriptorSampledImages = - descriptor_limits->max_per_stage_sampled_images, - .maxPerStageDescriptorStorageImages = - descriptor_limits->max_per_stage_storage_images, - .maxPerStageDescriptorInputAttachments = - descriptor_limits->max_per_stage_input_attachments, - - .maxDescriptorSetSamplers = 256U, - .maxDescriptorSetUniformBuffers = 256U, - .maxDescriptorSetUniformBuffersDynamic = - PVR_MAX_DESCRIPTOR_SET_UNIFORM_DYNAMIC_BUFFERS, - .maxDescriptorSetStorageBuffers = 256U, - .maxDescriptorSetStorageBuffersDynamic = - PVR_MAX_DESCRIPTOR_SET_STORAGE_DYNAMIC_BUFFERS, - .maxDescriptorSetSampledImages = 256U, - .maxDescriptorSetStorageImages = 256U, - .maxDescriptorSetInputAttachments = 256U, - - /* Vertex Shader Limits */ - .maxVertexInputAttributes = PVR_MAX_VERTEX_INPUT_BINDINGS, - .maxVertexInputBindings = PVR_MAX_VERTEX_INPUT_BINDINGS, - .maxVertexInputAttributeOffset = 0xFFFF, - .maxVertexInputBindingStride = 1024U * 1024U * 1024U * 2U, - .maxVertexOutputComponents = max_user_vertex_components, - - /* Tessellation Limits */ - .maxTessellationGenerationLevel = 0, - .maxTessellationPatchSize = 0, - .maxTessellationControlPerVertexInputComponents = 0, - .maxTessellationControlPerVertexOutputComponents = 0, - .maxTessellationControlPerPatchOutputComponents = 0, - .maxTessellationControlTotalOutputComponents = 0, - .maxTessellationEvaluationInputComponents = 0, - .maxTessellationEvaluationOutputComponents = 0, - - /* Geometry Shader Limits */ - .maxGeometryShaderInvocations = 0, - .maxGeometryInputComponents = 0, - .maxGeometryOutputComponents = 0, - .maxGeometryOutputVertices = 0, - .maxGeometryTotalOutputComponents = 0, - - /* Fragment Shader Limits */ - .maxFragmentInputComponents = max_user_vertex_components, - .maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS, - .maxFragmentDualSrcAttachments = 0, - .maxFragmentCombinedOutputResources = - descriptor_limits->max_per_stage_storage_buffers + - descriptor_limits->max_per_stage_storage_images + - PVR_MAX_COLOR_ATTACHMENTS, - - /* Compute Shader Limits */ - .maxComputeSharedMemorySize = 16U * 1024U, - .maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U }, - .maxComputeWorkGroupInvocations = max_compute_work_group_invocations, - .maxComputeWorkGroupSize = { max_compute_work_group_invocations, - max_compute_work_group_invocations, - 64U }, - - /* Rasterization Limits */ - .subPixelPrecisionBits = sub_pixel_precision, - .subTexelPrecisionBits = 8U, - .mipmapPrecisionBits = 8U, - - .maxDrawIndexedIndexValue = UINT32_MAX, - .maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U, - .maxSamplerLodBias = 16.0f, - .maxSamplerAnisotropy = 1.0f, - .maxViewports = PVR_MAX_VIEWPORTS, - - .maxViewportDimensions[0] = max_render_size, - .maxViewportDimensions[1] = max_render_size, - .viewportBoundsRange[0] = -(int32_t)(2U * max_render_size), - .viewportBoundsRange[1] = 2U * max_render_size, - - .viewportSubPixelBits = 0, - .minMemoryMapAlignment = 64U, - .minTexelBufferOffsetAlignment = 16U, - .minUniformBufferOffsetAlignment = 4U, - .minStorageBufferOffsetAlignment = 4U, - - .minTexelOffset = -8, - .maxTexelOffset = 7U, - .minTexelGatherOffset = -8, - .maxTexelGatherOffset = 7, - .minInterpolationOffset = -0.5, - .maxInterpolationOffset = 0.5, - .subPixelInterpolationOffsetBits = 4U, - - .maxFramebufferWidth = max_render_size, - .maxFramebufferHeight = max_render_size, - .maxFramebufferLayers = PVR_MAX_FRAMEBUFFER_LAYERS, - - .framebufferColorSampleCounts = max_sample_bits, - .framebufferDepthSampleCounts = max_sample_bits, - .framebufferStencilSampleCounts = max_sample_bits, - .framebufferNoAttachmentsSampleCounts = max_sample_bits, - .maxColorAttachments = PVR_MAX_COLOR_ATTACHMENTS, - .sampledImageColorSampleCounts = max_sample_bits, - .sampledImageIntegerSampleCounts = max_sample_bits, - .sampledImageDepthSampleCounts = max_sample_bits, - .sampledImageStencilSampleCounts = max_sample_bits, - .storageImageSampleCounts = max_sample_bits, - .maxSampleMaskWords = 1U, - .timestampComputeAndGraphics = false, - .timestampPeriod = 0.0f, - .maxClipDistances = num_user_clip_planes, - .maxCullDistances = num_user_clip_planes, - .maxCombinedClipAndCullDistances = num_user_clip_planes, - .discreteQueuePriorities = 2U, - .pointSizeRange[0] = 1.0f, - .pointSizeRange[1] = 511.0f, - .pointSizeGranularity = 0.0625f, - .lineWidthRange[0] = 1.0f / 16.0f, - .lineWidthRange[1] = 16.0f, - .lineWidthGranularity = 1.0f / 16.0f, - .strictLines = false, - .standardSampleLocations = true, - .optimalBufferCopyOffsetAlignment = 4U, - .optimalBufferCopyRowPitchAlignment = 4U, - .nonCoherentAtomSize = 1U, - }; - - *pProperties = (VkPhysicalDeviceProperties){ - .apiVersion = PVR_API_VERSION, - .driverVersion = vk_get_driver_version(), - .vendorID = VK_VENDOR_ID_IMAGINATION, - .deviceID = dev_info->ident.device_id, - .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, - .limits = limits, - .sparseProperties = { 0 }, - }; - - snprintf(pProperties->deviceName, - sizeof(pProperties->deviceName), - "%s", - pdevice->name); - - memcpy(pProperties->pipelineCacheUUID, - pdevice->pipeline_cache_uuid, - VK_UUID_SIZE); -} - -void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties2 *pProperties) -{ - PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice); - - VkPhysicalDeviceVulkan11Properties core_1_1 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES, - }; - - VkPhysicalDeviceVulkan12Properties core_1_2 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES, - }; - - VkPhysicalDeviceVulkan13Properties core_1_3 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES, - }; - - pvr_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties); - pvr_get_physical_device_properties_1_1(pdevice, &core_1_1); - pvr_get_physical_device_properties_1_2(pdevice, &core_1_2); - pvr_get_physical_device_properties_1_3(pdevice, &core_1_3); - - vk_foreach_struct (ext, pProperties->pNext) { - if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1)) - continue; - - if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2)) - continue; - - if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3)) - continue; - - switch (ext->sType) { - default: { - pvr_debug_ignored_stype(ext->sType); - break; - } - } - } -} - const static VkQueueFamilyProperties pvr_queue_family_properties = { .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT,