diff --git a/src/intel/vulkan_hasvk/anv_device.c b/src/intel/vulkan_hasvk/anv_device.c index 9bc84adbbda..b2645352cc4 100644 --- a/src/intel/vulkan_hasvk/anv_device.c +++ b/src/intel/vulkan_hasvk/anv_device.c @@ -653,6 +653,582 @@ get_features(const struct anv_physical_device *pdevice, features->depthBounds = true; } + +#define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS 64 + +#define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64 +#define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS 256 + +#define MAX_CUSTOM_BORDER_COLORS 4096 + +static void +get_properties_1_1(const struct anv_physical_device *pdevice, + struct vk_properties *p) +{ + memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE); + memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE); + memset(p->deviceLUID, 0, VK_LUID_SIZE); + p->deviceNodeMask = 0; + p->deviceLUIDValid = false; + + p->subgroupSize = ELK_SUBGROUP_SIZE; + VkShaderStageFlags scalar_stages = 0; + for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) { + if (pdevice->compiler->scalar_stage[stage]) + scalar_stages |= mesa_to_vk_shader_stage(stage); + } + p->subgroupSupportedStages = scalar_stages; + p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | + VK_SUBGROUP_FEATURE_VOTE_BIT | + VK_SUBGROUP_FEATURE_BALLOT_BIT | + VK_SUBGROUP_FEATURE_SHUFFLE_BIT | + VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT | + VK_SUBGROUP_FEATURE_QUAD_BIT; + if (pdevice->info.ver >= 8) { + /* TODO: There's no technical reason why these can't be made to + * work on gfx7 but they don't at the moment so it's best to leave + * the feature disabled than enabled and broken. + */ + p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | + VK_SUBGROUP_FEATURE_CLUSTERED_BIT; + } + p->subgroupQuadOperationsInAllStages = pdevice->info.ver >= 8; + + p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY; + p->maxMultiviewViewCount = 16; + p->maxMultiviewInstanceIndex = UINT32_MAX / 16; + p->protectedNoFault = false; + /* This value doesn't matter for us today as our per-stage descriptors are + * the real limit. + */ + p->maxPerSetDescriptors = 1024; + p->maxMemoryAllocationSize = MAX_MEMORY_ALLOCATION_SIZE; +} + +static void +get_properties_1_2(const struct anv_physical_device *pdevice, + struct vk_properties *p) +{ + p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; + memset(p->driverName, 0, sizeof(p->driverName)); + snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, + "Intel open-source Mesa driver"); + memset(p->driverInfo, 0, sizeof(p->driverInfo)); + snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE, + "Mesa " PACKAGE_VERSION MESA_GIT_SHA1); + + /* Don't advertise conformance with a particular version if the hardware's + * support is incomplete/alpha. + */ + if (pdevice->is_alpha) { + p->conformanceVersion = (VkConformanceVersion) { + .major = 0, + .minor = 0, + .subminor = 0, + .patch = 0, + }; + } + else { + p->conformanceVersion = (VkConformanceVersion) { + .major = 1, + .minor = pdevice->use_softpin ? 3 : 2, + .subminor = 0, + .patch = 0, + }; + } + + p->denormBehaviorIndependence = + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL; + p->roundingModeIndependence = + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE; + + /* Broadwell does not support HF denorms and there are restrictions + * other gens. According to Kabylake's PRM: + * + * "math - Extended Math Function + * [...] + * Restriction : Half-float denorms are always retained." + */ + p->shaderDenormFlushToZeroFloat16 = false; + p->shaderDenormPreserveFloat16 = pdevice->info.ver > 8; + p->shaderRoundingModeRTEFloat16 = true; + p->shaderRoundingModeRTZFloat16 = true; + p->shaderSignedZeroInfNanPreserveFloat16 = true; + + p->shaderDenormFlushToZeroFloat32 = true; + p->shaderDenormPreserveFloat32 = pdevice->info.ver >= 8; + p->shaderRoundingModeRTEFloat32 = true; + p->shaderRoundingModeRTZFloat32 = true; + p->shaderSignedZeroInfNanPreserveFloat32 = true; + + p->shaderDenormFlushToZeroFloat64 = true; + p->shaderDenormPreserveFloat64 = true; + p->shaderRoundingModeRTEFloat64 = true; + p->shaderRoundingModeRTZFloat64 = true; + p->shaderSignedZeroInfNanPreserveFloat64 = true; + + /* It's a bit hard to exactly map our implementation to the limits + * described by Vulkan. The bindless surface handle in the extended + * message descriptors is 20 bits and it's an index into the table of + * RENDER_SURFACE_STATE structs that starts at bindless surface base + * address. This means that we can have at must 1M surface states + * allocated at any given time. Since most image views take two + * descriptors, this means we have a limit of about 500K image views. + * + * However, since we allocate surface states at vkCreateImageView time, + * this means our limit is actually something on the order of 500K image + * views allocated at any time. The actual limit describe by Vulkan, on + * the other hand, is a limit of how many you can have in a descriptor set. + * Assuming anyone using 1M descriptors will be using the same image view + * twice a bunch of times (or a bunch of null descriptors), we can safely + * advertise a larger limit here. + */ + const unsigned max_bindless_views = 1 << 20; + p->maxUpdateAfterBindDescriptorsInAllPools = max_bindless_views; + p->shaderUniformBufferArrayNonUniformIndexingNative = false; + p->shaderSampledImageArrayNonUniformIndexingNative = false; + p->shaderStorageBufferArrayNonUniformIndexingNative = true; + p->shaderStorageImageArrayNonUniformIndexingNative = false; + p->shaderInputAttachmentArrayNonUniformIndexingNative = false; + p->robustBufferAccessUpdateAfterBind = true; + p->quadDivergentImplicitLod = false; + p->maxPerStageDescriptorUpdateAfterBindSamplers = max_bindless_views; + p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS; + p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX; + p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_bindless_views; + p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_bindless_views; + p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS; + p->maxPerStageUpdateAfterBindResources = UINT32_MAX; + p->maxDescriptorSetUpdateAfterBindSamplers = max_bindless_views; + p->maxDescriptorSetUpdateAfterBindUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS; + p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2; + p->maxDescriptorSetUpdateAfterBindStorageBuffers = UINT32_MAX; + p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2; + p->maxDescriptorSetUpdateAfterBindSampledImages = max_bindless_views; + p->maxDescriptorSetUpdateAfterBindStorageImages = max_bindless_views; + p->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS; + + /* We support all of the depth resolve modes */ + p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | + VK_RESOLVE_MODE_AVERAGE_BIT | + VK_RESOLVE_MODE_MIN_BIT | + VK_RESOLVE_MODE_MAX_BIT; + /* Average doesn't make sense for stencil so we don't support that */ + p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + if (pdevice->info.ver >= 8) { + /* The advanced stencil resolve modes currently require stencil + * sampling be supported by the hardware. + */ + p->supportedStencilResolveModes |= VK_RESOLVE_MODE_MIN_BIT | + VK_RESOLVE_MODE_MAX_BIT; + } + p->independentResolveNone = true; + p->independentResolve = true; + + p->filterMinmaxSingleComponentFormats = false; + p->filterMinmaxImageComponentMapping = false; + + p->maxTimelineSemaphoreValueDifference = UINT64_MAX; + + p->framebufferIntegerColorSampleCounts = + pdevice->info.ver == 7 ? VK_SAMPLE_COUNT_1_BIT : isl_device_get_sample_counts(&pdevice->isl_dev); +} + +static void +get_properties_1_3(const struct anv_physical_device *pdevice, + struct vk_properties *p) +{ + p->minSubgroupSize = 8; + p->maxSubgroupSize = 32; + p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads; + p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT; + + p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE; + p->maxPerStageDescriptorInlineUniformBlocks = + MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS; + p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = + MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS; + p->maxDescriptorSetInlineUniformBlocks = + MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS; + p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = + MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS; + p->maxInlineUniformTotalSize = UINT16_MAX; + + p->integerDotProduct8BitUnsignedAccelerated = false; + p->integerDotProduct8BitSignedAccelerated = false; + p->integerDotProduct8BitMixedSignednessAccelerated = false; + p->integerDotProduct4x8BitPackedUnsignedAccelerated = false; + p->integerDotProduct4x8BitPackedSignedAccelerated = false; + p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = false; + p->integerDotProduct16BitUnsignedAccelerated = false; + p->integerDotProduct16BitSignedAccelerated = false; + p->integerDotProduct16BitMixedSignednessAccelerated = false; + p->integerDotProduct32BitUnsignedAccelerated = false; + p->integerDotProduct32BitSignedAccelerated = false; + p->integerDotProduct32BitMixedSignednessAccelerated = false; + p->integerDotProduct64BitUnsignedAccelerated = false; + p->integerDotProduct64BitSignedAccelerated = false; + p->integerDotProduct64BitMixedSignednessAccelerated = false; + p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false; + p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false; + p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false; + p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = false; + p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false; + p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = false; + p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false; + p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false; + p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false; + p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false; + p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false; + p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false; + p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false; + p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false; + p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false; + + /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface + * Base Address: + * + * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field + * specifies the base address of the first element of the surface, + * computed in software by adding the surface base address to the + * byte offset of the element in the buffer. The base address must + * be aligned to element size." + * + * The typed dataport messages require that things be texel aligned. + * Otherwise, we may just load/store the wrong data or, in the worst + * case, there may be hangs. + */ + p->storageTexelBufferOffsetAlignmentBytes = 16; + p->storageTexelBufferOffsetSingleTexelAlignment = true; + + /* The sampler, however, is much more forgiving and it can handle + * arbitrary byte alignment for linear and buffer surfaces. It's + * hard to find a good PRM citation for this but years of empirical + * experience demonstrate that this is true. + */ + p->uniformTexelBufferOffsetAlignmentBytes = 1; + p->uniformTexelBufferOffsetSingleTexelAlignment = true; + + p->maxBufferSize = pdevice->isl_dev.max_buffer_size; +} + +static void +get_properties(const struct anv_physical_device *pdevice, + struct vk_properties *props) +{ + const struct intel_device_info *devinfo = &pdevice->info; + + const uint32_t max_ssbos = pdevice->has_a64_buffer_access ? UINT16_MAX : 64; + const uint32_t max_textures = 128; + const uint32_t max_samplers = + pdevice->has_bindless_samplers ? UINT16_MAX : + (devinfo->verx10 >= 75) ? 128 : 16; + const uint32_t max_images = MAX_IMAGES; + + /* If we can use bindless for everything, claim a high per-stage limit, + * otherwise use the binding table size, minus the slots reserved for + * render targets and one slot for the descriptor buffer. */ + const uint32_t max_per_stage = MAX_BINDING_TABLE_SIZE - MAX_RTS - 1; + + const uint32_t max_workgroup_size = + MIN2(1024, 32 * devinfo->max_cs_workgroup_threads); + + VkSampleCountFlags sample_counts = + isl_device_get_sample_counts(&pdevice->isl_dev); + + *props = (struct vk_properties) { +#if DETECT_OS_ANDROID + .apiVersion = ANV_API_VERSION, +#else + .apiVersion = pdevice->use_softpin ? ANV_API_VERSION_1_3 : ANV_API_VERSION_1_2, +#endif /* DETECT_OS_ANDROID */ + .driverVersion = vk_get_driver_version(), + .vendorID = 0x8086, + .deviceID = pdevice->info.pci_device_id, + .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, + + /* Limits: */ + .maxImageDimension1D = (1 << 14), + /* Gfx7 doesn't support 8xMSAA with depth/stencil images when their width + * is greater than 8192 pixels. */ + .maxImageDimension2D = devinfo->ver == 7 ? (1 << 13) : (1 << 14), + .maxImageDimension3D = (1 << 11), + .maxImageDimensionCube = (1 << 14), + .maxImageArrayLayers = (1 << 11), + .maxTexelBufferElements = 128 * 1024 * 1024, + .maxUniformBufferRange = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30), + .maxStorageBufferRange = MIN2(pdevice->isl_dev.max_buffer_size, UINT32_MAX), + .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, + .maxMemoryAllocationCount = UINT32_MAX, + .maxSamplerAllocationCount = 64 * 1024, + .bufferImageGranularity = 1, + .sparseAddressSpaceSize = 0, + .maxBoundDescriptorSets = MAX_SETS, + .maxPerStageDescriptorSamplers = max_samplers, + .maxPerStageDescriptorUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS, + .maxPerStageDescriptorStorageBuffers = max_ssbos, + .maxPerStageDescriptorSampledImages = max_textures, + .maxPerStageDescriptorStorageImages = max_images, + .maxPerStageDescriptorInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS, + .maxPerStageResources = max_per_stage, + .maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */ + .maxDescriptorSetUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS, /* number of stages * maxPerStageDescriptorUniformBuffers */ + .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, + .maxDescriptorSetStorageBuffers = 6 * max_ssbos, /* number of stages * maxPerStageDescriptorStorageBuffers */ + .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, + .maxDescriptorSetSampledImages = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */ + .maxDescriptorSetStorageImages = 6 * max_images, /* number of stages * maxPerStageDescriptorStorageImages */ + .maxDescriptorSetInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS, + .maxVertexInputAttributes = MAX_VES, + .maxVertexInputBindings = MAX_VBS, + /* Broadwell PRMs: Volume 2d: Command Reference: Structures: + * + * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047] + */ + .maxVertexInputAttributeOffset = 2047, + /* Broadwell PRMs: Volume 2d: Command Reference: Structures: + * + * VERTEX_BUFFER_STATE::Buffer Pitch: [0,2048] + * + * Skylake PRMs: Volume 2d: Command Reference: Structures: + * + * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095] + */ + .maxVertexInputBindingStride = devinfo->ver < 9 ? 2048 : 4095, + .maxVertexOutputComponents = 128, + .maxTessellationGenerationLevel = 64, + .maxTessellationPatchSize = 32, + .maxTessellationControlPerVertexInputComponents = 128, + .maxTessellationControlPerVertexOutputComponents = 128, + .maxTessellationControlPerPatchOutputComponents = 128, + .maxTessellationControlTotalOutputComponents = 2048, + .maxTessellationEvaluationInputComponents = 128, + .maxTessellationEvaluationOutputComponents = 128, + .maxGeometryShaderInvocations = 32, + .maxGeometryInputComponents = devinfo->ver >= 8 ? 128 : 64, + .maxGeometryOutputComponents = 128, + .maxGeometryOutputVertices = 256, + .maxGeometryTotalOutputComponents = 1024, + .maxFragmentInputComponents = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */ + .maxFragmentOutputAttachments = 8, + .maxFragmentDualSrcAttachments = 1, + .maxFragmentCombinedOutputResources = MAX_RTS + max_ssbos + max_images, + .maxComputeSharedMemorySize = 64 * 1024, + .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, + .maxComputeWorkGroupInvocations = max_workgroup_size, + .maxComputeWorkGroupSize = { + max_workgroup_size, + max_workgroup_size, + max_workgroup_size, + }, + .subPixelPrecisionBits = 8, + .subTexelPrecisionBits = 8, + .mipmapPrecisionBits = 8, + .maxDrawIndexedIndexValue = UINT32_MAX, + .maxDrawIndirectCount = UINT32_MAX, + .maxSamplerLodBias = 16, + .maxSamplerAnisotropy = 16, + .maxViewports = MAX_VIEWPORTS, + .maxViewportDimensions = { (1 << 14), (1 << 14) }, + .viewportBoundsRange = { INT16_MIN, INT16_MAX }, + .viewportSubPixelBits = 13, /* We take a float? */ + .minMemoryMapAlignment = 4096, /* A page */ + /* The dataport requires texel alignment so we need to assume a worst + * case of R32G32B32A32 which is 16 bytes. + */ + .minTexelBufferOffsetAlignment = 16, + .minUniformBufferOffsetAlignment = ANV_UBO_ALIGNMENT, + .minStorageBufferOffsetAlignment = ANV_SSBO_ALIGNMENT, + .minTexelOffset = -8, + .maxTexelOffset = 7, + .minTexelGatherOffset = -32, + .maxTexelGatherOffset = 31, + .minInterpolationOffset = -0.5, + .maxInterpolationOffset = 0.4375, + .subPixelInterpolationOffsetBits = 4, + .maxFramebufferWidth = (1 << 14), + .maxFramebufferHeight = (1 << 14), + .maxFramebufferLayers = (1 << 11), + .framebufferColorSampleCounts = sample_counts, + .framebufferDepthSampleCounts = sample_counts, + .framebufferStencilSampleCounts = sample_counts, + .framebufferNoAttachmentsSampleCounts = sample_counts, + .maxColorAttachments = MAX_RTS, + .sampledImageColorSampleCounts = sample_counts, + /* Multisampling with SINT formats is not supported on gfx7 */ + .sampledImageIntegerSampleCounts = devinfo->ver == 7 ? VK_SAMPLE_COUNT_1_BIT : sample_counts, + .sampledImageDepthSampleCounts = sample_counts, + .sampledImageStencilSampleCounts = sample_counts, + .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .maxSampleMaskWords = 1, + .timestampComputeAndGraphics = true, + .timestampPeriod = 1000000000.0 / devinfo->timestamp_frequency, + .maxClipDistances = 8, + .maxCullDistances = 8, + .maxCombinedClipAndCullDistances = 8, + .discreteQueuePriorities = 2, + .pointSizeRange = { 0.125, 255.875 }, + /* While SKL and up support much wider lines than we are setting here, + * in practice we run into conformance issues if we go past this limit. + * Since the Windows driver does the same, it's probably fair to assume + * that no one needs more than this. + */ + .lineWidthRange = { 0.0, devinfo->ver >= 9 ? 8.0 : 7.9921875 }, + .pointSizeGranularity = (1.0 / 8.0), + .lineWidthGranularity = (1.0 / 128.0), + .strictLines = false, + .standardSampleLocations = true, + .optimalBufferCopyOffsetAlignment = 128, + .optimalBufferCopyRowPitchAlignment = 128, + .nonCoherentAtomSize = 64, + + /* Broadwell doesn't do sparse. */ + .sparseResidencyStandard2DBlockShape = false, + .sparseResidencyStandard2DMultisampleBlockShape = false, + .sparseResidencyStandard3DBlockShape = false, + .sparseResidencyAlignedMipSize = false, + .sparseResidencyNonResidentStrict = false, + }; + + snprintf(props->deviceName, sizeof(props->deviceName), + "%s", pdevice->info.name); + memcpy(props->pipelineCacheUUID, + pdevice->pipeline_cache_uuid, VK_UUID_SIZE); + + get_properties_1_1(pdevice, props); + get_properties_1_2(pdevice, props); + get_properties_1_3(pdevice, props); + + /* VK_KHR_performance_query */ + { + /* We could support this by spawning a shader to do the equation normalization. */ + props->allowCommandBufferQueryCopies = false; + } + + /* VK_KHR_push_descriptor */ + { + props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS; + } + + /* VK_KHR_vertex_attribute_divisor */ + { + /* We have to restrict this a bit for multiview */ + props->maxVertexAttribDivisor = UINT32_MAX / 16; + } + + /* VK_EXT_custom_border_color */ + { + props->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS; + } + + /* VK_EXT_external_memory_host */ + { + /* Userptr needs page aligned memory. */ + props->minImportedHostPointerAlignment = 4096; + } + + /* VK_EXT_line_rasterization */ + { + /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond) Sampling + * Rules - Legacy Mode", it says the following: + * + * "Note that the device divides a pixel into a 16x16 array of + * subpixels, referenced by their upper left corners." + * + * This is the only known reference in the PRMs to the subpixel + * precision of line rasterization and a "16x16 array of subpixels" + * implies 4 subpixel precision bits. Empirical testing has shown that 4 + * subpixel precision bits applies to all line rasterization types. + */ + props->lineSubPixelPrecisionBits = 4; + } + + /* VK_EXT_multi_draw */ + { + props->maxMultiDrawCount = 2048; + } + + /* VK_EXT_pci_bus_info */ + { + props->pciDomain = pdevice->info.pci_domain; + props->pciBus = pdevice->info.pci_bus; + props->pciDevice = pdevice->info.pci_dev; + props->pciFunction = pdevice->info.pci_func; + } + + /* VK_EXT_physical_device_drm */ + { + props->drmHasPrimary = pdevice->has_master; + props->drmPrimaryMajor = pdevice->master_major; + props->drmPrimaryMinor = pdevice->master_minor; + props->drmHasRender = pdevice->has_local; + props->drmRenderMajor = pdevice->local_major; + props->drmRenderMinor = pdevice->local_minor; + } + + /* VK_EXT_provoking_vertex */ + { + props->provokingVertexModePerPipeline = true; + props->transformFeedbackPreservesTriangleFanProvokingVertex = false; + } + + /* VK_EXT_robustness2 */ + { + props->robustStorageBufferAccessSizeAlignment = + ANV_SSBO_BOUNDS_CHECK_ALIGNMENT; + props->robustUniformBufferAccessSizeAlignment = + ANV_UBO_ALIGNMENT; + } + + /* VK_EXT_sample_locations */ + { + props->sampleLocationSampleCounts = + isl_device_get_sample_counts(&pdevice->isl_dev); + + /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */ + props->maxSampleLocationGridSize.width = 1; + props->maxSampleLocationGridSize.height = 1; + + props->sampleLocationCoordinateRange[0] = 0; + props->sampleLocationCoordinateRange[1] = 0.9375; + props->sampleLocationSubPixelBits = 4; + + props->variableSampleLocations = true; + } + + /* VK_EXT_shader_module_identifier */ + { + STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) == + sizeof(props->shaderModuleIdentifierAlgorithmUUID)); + memcpy(props->shaderModuleIdentifierAlgorithmUUID, + vk_shaderModuleIdentifierAlgorithmUUID, + sizeof(props->shaderModuleIdentifierAlgorithmUUID)); + } + + /* VK_EXT_transform_feedback */ + { + props->maxTransformFeedbackStreams = MAX_XFB_STREAMS; + props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS; + props->maxTransformFeedbackBufferSize = (1ull << 32); + props->maxTransformFeedbackStreamDataSize = 128 * 4; + props->maxTransformFeedbackBufferDataSize = 128 * 4; + props->maxTransformFeedbackBufferDataStride = 2048; + props->transformFeedbackQueries = true; + props->transformFeedbackStreamsLinesTriangles = false; + props->transformFeedbackRasterizationStreamSelect = false; + /* This requires MI_MATH */ + props->transformFeedbackDraw = pdevice->info.verx10 >= 75; + } + + /* VK_ANDROID_native_buffer */ +#if DETECT_OS_ANDROID + { + props->sharedImage = VK_FALSE; + } +#endif /* DETECT_OS_ANDROID */ + +} + static uint64_t anv_compute_sys_heap_size(struct anv_physical_device *device, uint64_t available_ram) @@ -1226,19 +1802,7 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, anv_physical_device_init_perf(device, fd); - get_device_extensions(device, &device->vk.supported_extensions); - get_features(device, &device->vk.supported_features); - - result = anv_init_wsi(device); - if (result != VK_SUCCESS) - goto fail_perf; - - anv_measure_device_init(device); - - anv_genX(&device->info, init_physical_device_state)(device); - - *out = &device->vk; - + /* Gather major/minor before WSI. */ struct stat st; if (stat(primary_path, &st) == 0) { @@ -1261,6 +1825,20 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, device->local_minor = 0; } + get_device_extensions(device, &device->vk.supported_extensions); + get_features(device, &device->vk.supported_features); + get_properties(device, &device->vk.properties); + + result = anv_init_wsi(device); + if (result != VK_SUCCESS) + goto fail_perf; + + anv_measure_device_init(device); + + anv_genX(&device->info, init_physical_device_state)(device); + + *out = &device->vk; + return VK_SUCCESS; fail_perf: @@ -1400,664 +1978,6 @@ void anv_DestroyInstance( vk_free(&instance->vk.alloc, instance); } -#define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS 64 - -#define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64 -#define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS 256 - -#define MAX_CUSTOM_BORDER_COLORS 4096 - -void anv_GetPhysicalDeviceProperties( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties* pProperties) -{ - ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); - const struct intel_device_info *devinfo = &pdevice->info; - - const uint32_t max_ssbos = pdevice->has_a64_buffer_access ? UINT16_MAX : 64; - const uint32_t max_textures = 128; - const uint32_t max_samplers = - pdevice->has_bindless_samplers ? UINT16_MAX : - (devinfo->verx10 >= 75) ? 128 : 16; - const uint32_t max_images = MAX_IMAGES; - - /* If we can use bindless for everything, claim a high per-stage limit, - * otherwise use the binding table size, minus the slots reserved for - * render targets and one slot for the descriptor buffer. */ - const uint32_t max_per_stage = MAX_BINDING_TABLE_SIZE - MAX_RTS - 1; - - const uint32_t max_workgroup_size = - MIN2(1024, 32 * devinfo->max_cs_workgroup_threads); - - VkSampleCountFlags sample_counts = - isl_device_get_sample_counts(&pdevice->isl_dev); - - - VkPhysicalDeviceLimits limits = { - .maxImageDimension1D = (1 << 14), - /* Gfx7 doesn't support 8xMSAA with depth/stencil images when their width - * is greater than 8192 pixels. */ - .maxImageDimension2D = devinfo->ver == 7 ? (1 << 13) : (1 << 14), - .maxImageDimension3D = (1 << 11), - .maxImageDimensionCube = (1 << 14), - .maxImageArrayLayers = (1 << 11), - .maxTexelBufferElements = 128 * 1024 * 1024, - .maxUniformBufferRange = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30), - .maxStorageBufferRange = MIN2(pdevice->isl_dev.max_buffer_size, UINT32_MAX), - .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, - .maxMemoryAllocationCount = UINT32_MAX, - .maxSamplerAllocationCount = 64 * 1024, - .bufferImageGranularity = 1, - .sparseAddressSpaceSize = 0, - .maxBoundDescriptorSets = MAX_SETS, - .maxPerStageDescriptorSamplers = max_samplers, - .maxPerStageDescriptorUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS, - .maxPerStageDescriptorStorageBuffers = max_ssbos, - .maxPerStageDescriptorSampledImages = max_textures, - .maxPerStageDescriptorStorageImages = max_images, - .maxPerStageDescriptorInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS, - .maxPerStageResources = max_per_stage, - .maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */ - .maxDescriptorSetUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS, /* number of stages * maxPerStageDescriptorUniformBuffers */ - .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, - .maxDescriptorSetStorageBuffers = 6 * max_ssbos, /* number of stages * maxPerStageDescriptorStorageBuffers */ - .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, - .maxDescriptorSetSampledImages = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */ - .maxDescriptorSetStorageImages = 6 * max_images, /* number of stages * maxPerStageDescriptorStorageImages */ - .maxDescriptorSetInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS, - .maxVertexInputAttributes = MAX_VES, - .maxVertexInputBindings = MAX_VBS, - /* Broadwell PRMs: Volume 2d: Command Reference: Structures: - * - * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047] - */ - .maxVertexInputAttributeOffset = 2047, - /* Broadwell PRMs: Volume 2d: Command Reference: Structures: - * - * VERTEX_BUFFER_STATE::Buffer Pitch: [0,2048] - * - * Skylake PRMs: Volume 2d: Command Reference: Structures: - * - * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095] - */ - .maxVertexInputBindingStride = devinfo->ver < 9 ? 2048 : 4095, - .maxVertexOutputComponents = 128, - .maxTessellationGenerationLevel = 64, - .maxTessellationPatchSize = 32, - .maxTessellationControlPerVertexInputComponents = 128, - .maxTessellationControlPerVertexOutputComponents = 128, - .maxTessellationControlPerPatchOutputComponents = 128, - .maxTessellationControlTotalOutputComponents = 2048, - .maxTessellationEvaluationInputComponents = 128, - .maxTessellationEvaluationOutputComponents = 128, - .maxGeometryShaderInvocations = 32, - .maxGeometryInputComponents = devinfo->ver >= 8 ? 128 : 64, - .maxGeometryOutputComponents = 128, - .maxGeometryOutputVertices = 256, - .maxGeometryTotalOutputComponents = 1024, - .maxFragmentInputComponents = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */ - .maxFragmentOutputAttachments = 8, - .maxFragmentDualSrcAttachments = 1, - .maxFragmentCombinedOutputResources = MAX_RTS + max_ssbos + max_images, - .maxComputeSharedMemorySize = 64 * 1024, - .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, - .maxComputeWorkGroupInvocations = max_workgroup_size, - .maxComputeWorkGroupSize = { - max_workgroup_size, - max_workgroup_size, - max_workgroup_size, - }, - .subPixelPrecisionBits = 8, - .subTexelPrecisionBits = 8, - .mipmapPrecisionBits = 8, - .maxDrawIndexedIndexValue = UINT32_MAX, - .maxDrawIndirectCount = UINT32_MAX, - .maxSamplerLodBias = 16, - .maxSamplerAnisotropy = 16, - .maxViewports = MAX_VIEWPORTS, - .maxViewportDimensions = { (1 << 14), (1 << 14) }, - .viewportBoundsRange = { INT16_MIN, INT16_MAX }, - .viewportSubPixelBits = 13, /* We take a float? */ - .minMemoryMapAlignment = 4096, /* A page */ - /* The dataport requires texel alignment so we need to assume a worst - * case of R32G32B32A32 which is 16 bytes. - */ - .minTexelBufferOffsetAlignment = 16, - .minUniformBufferOffsetAlignment = ANV_UBO_ALIGNMENT, - .minStorageBufferOffsetAlignment = ANV_SSBO_ALIGNMENT, - .minTexelOffset = -8, - .maxTexelOffset = 7, - .minTexelGatherOffset = -32, - .maxTexelGatherOffset = 31, - .minInterpolationOffset = -0.5, - .maxInterpolationOffset = 0.4375, - .subPixelInterpolationOffsetBits = 4, - .maxFramebufferWidth = (1 << 14), - .maxFramebufferHeight = (1 << 14), - .maxFramebufferLayers = (1 << 11), - .framebufferColorSampleCounts = sample_counts, - .framebufferDepthSampleCounts = sample_counts, - .framebufferStencilSampleCounts = sample_counts, - .framebufferNoAttachmentsSampleCounts = sample_counts, - .maxColorAttachments = MAX_RTS, - .sampledImageColorSampleCounts = sample_counts, - /* Multisampling with SINT formats is not supported on gfx7 */ - .sampledImageIntegerSampleCounts = devinfo->ver == 7 ? VK_SAMPLE_COUNT_1_BIT : sample_counts, - .sampledImageDepthSampleCounts = sample_counts, - .sampledImageStencilSampleCounts = sample_counts, - .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, - .maxSampleMaskWords = 1, - .timestampComputeAndGraphics = true, - .timestampPeriod = 1000000000.0 / devinfo->timestamp_frequency, - .maxClipDistances = 8, - .maxCullDistances = 8, - .maxCombinedClipAndCullDistances = 8, - .discreteQueuePriorities = 2, - .pointSizeRange = { 0.125, 255.875 }, - /* While SKL and up support much wider lines than we are setting here, - * in practice we run into conformance issues if we go past this limit. - * Since the Windows driver does the same, it's probably fair to assume - * that no one needs more than this. - */ - .lineWidthRange = { 0.0, devinfo->ver >= 9 ? 8.0 : 7.9921875 }, - .pointSizeGranularity = (1.0 / 8.0), - .lineWidthGranularity = (1.0 / 128.0), - .strictLines = false, - .standardSampleLocations = true, - .optimalBufferCopyOffsetAlignment = 128, - .optimalBufferCopyRowPitchAlignment = 128, - .nonCoherentAtomSize = 64, - }; - - *pProperties = (VkPhysicalDeviceProperties) { -#ifdef ANDROID_STRICT - .apiVersion = ANV_API_VERSION, -#else - .apiVersion = (pdevice->use_softpin || pdevice->instance->report_vk_1_3) ? - ANV_API_VERSION_1_3 : ANV_API_VERSION_1_2, -#endif - .driverVersion = vk_get_driver_version(), - .vendorID = 0x8086, - .deviceID = pdevice->info.pci_device_id, - .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, - .limits = limits, - .sparseProperties = {0}, /* Broadwell doesn't do sparse. */ - }; - - snprintf(pProperties->deviceName, sizeof(pProperties->deviceName), - "%s", pdevice->info.name); - memcpy(pProperties->pipelineCacheUUID, - pdevice->pipeline_cache_uuid, VK_UUID_SIZE); -} - -static void -anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice, - VkPhysicalDeviceVulkan11Properties *p) -{ - assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES); - - memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE); - memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE); - memset(p->deviceLUID, 0, VK_LUID_SIZE); - p->deviceNodeMask = 0; - p->deviceLUIDValid = false; - - p->subgroupSize = ELK_SUBGROUP_SIZE; - VkShaderStageFlags scalar_stages = 0; - for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) { - if (pdevice->compiler->scalar_stage[stage]) - scalar_stages |= mesa_to_vk_shader_stage(stage); - } - p->subgroupSupportedStages = scalar_stages; - p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | - VK_SUBGROUP_FEATURE_VOTE_BIT | - VK_SUBGROUP_FEATURE_BALLOT_BIT | - VK_SUBGROUP_FEATURE_SHUFFLE_BIT | - VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT | - VK_SUBGROUP_FEATURE_QUAD_BIT; - if (pdevice->info.ver >= 8) { - /* TODO: There's no technical reason why these can't be made to - * work on gfx7 but they don't at the moment so it's best to leave - * the feature disabled than enabled and broken. - */ - p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | - VK_SUBGROUP_FEATURE_CLUSTERED_BIT; - } - p->subgroupQuadOperationsInAllStages = pdevice->info.ver >= 8; - - p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY; - p->maxMultiviewViewCount = 16; - p->maxMultiviewInstanceIndex = UINT32_MAX / 16; - p->protectedNoFault = false; - /* This value doesn't matter for us today as our per-stage descriptors are - * the real limit. - */ - p->maxPerSetDescriptors = 1024; - p->maxMemoryAllocationSize = MAX_MEMORY_ALLOCATION_SIZE; -} - -static void -anv_get_physical_device_properties_1_2(struct anv_physical_device *pdevice, - VkPhysicalDeviceVulkan12Properties *p) -{ - assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES); - - p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; - memset(p->driverName, 0, sizeof(p->driverName)); - snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, - "Intel open-source Mesa driver"); - memset(p->driverInfo, 0, sizeof(p->driverInfo)); - snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE, - "Mesa " PACKAGE_VERSION MESA_GIT_SHA1); - - /* Don't advertise conformance with a particular version if the hardware's - * support is incomplete/alpha. - */ - if (pdevice->is_alpha) { - p->conformanceVersion = (VkConformanceVersion) { - .major = 0, - .minor = 0, - .subminor = 0, - .patch = 0, - }; - } - else { - p->conformanceVersion = (VkConformanceVersion) { - .major = 1, - .minor = pdevice->use_softpin ? 3 : 2, - .subminor = 0, - .patch = 0, - }; - } - - p->denormBehaviorIndependence = - VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL; - p->roundingModeIndependence = - VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE; - - /* Broadwell does not support HF denorms and there are restrictions - * other gens. According to Kabylake's PRM: - * - * "math - Extended Math Function - * [...] - * Restriction : Half-float denorms are always retained." - */ - p->shaderDenormFlushToZeroFloat16 = false; - p->shaderDenormPreserveFloat16 = pdevice->info.ver > 8; - p->shaderRoundingModeRTEFloat16 = true; - p->shaderRoundingModeRTZFloat16 = true; - p->shaderSignedZeroInfNanPreserveFloat16 = true; - - p->shaderDenormFlushToZeroFloat32 = true; - p->shaderDenormPreserveFloat32 = pdevice->info.ver >= 8; - p->shaderRoundingModeRTEFloat32 = true; - p->shaderRoundingModeRTZFloat32 = true; - p->shaderSignedZeroInfNanPreserveFloat32 = true; - - p->shaderDenormFlushToZeroFloat64 = true; - p->shaderDenormPreserveFloat64 = true; - p->shaderRoundingModeRTEFloat64 = true; - p->shaderRoundingModeRTZFloat64 = true; - p->shaderSignedZeroInfNanPreserveFloat64 = true; - - /* It's a bit hard to exactly map our implementation to the limits - * described by Vulkan. The bindless surface handle in the extended - * message descriptors is 20 bits and it's an index into the table of - * RENDER_SURFACE_STATE structs that starts at bindless surface base - * address. This means that we can have at must 1M surface states - * allocated at any given time. Since most image views take two - * descriptors, this means we have a limit of about 500K image views. - * - * However, since we allocate surface states at vkCreateImageView time, - * this means our limit is actually something on the order of 500K image - * views allocated at any time. The actual limit describe by Vulkan, on - * the other hand, is a limit of how many you can have in a descriptor set. - * Assuming anyone using 1M descriptors will be using the same image view - * twice a bunch of times (or a bunch of null descriptors), we can safely - * advertise a larger limit here. - */ - const unsigned max_bindless_views = 1 << 20; - p->maxUpdateAfterBindDescriptorsInAllPools = max_bindless_views; - p->shaderUniformBufferArrayNonUniformIndexingNative = false; - p->shaderSampledImageArrayNonUniformIndexingNative = false; - p->shaderStorageBufferArrayNonUniformIndexingNative = true; - p->shaderStorageImageArrayNonUniformIndexingNative = false; - p->shaderInputAttachmentArrayNonUniformIndexingNative = false; - p->robustBufferAccessUpdateAfterBind = true; - p->quadDivergentImplicitLod = false; - p->maxPerStageDescriptorUpdateAfterBindSamplers = max_bindless_views; - p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS; - p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX; - p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_bindless_views; - p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_bindless_views; - p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS; - p->maxPerStageUpdateAfterBindResources = UINT32_MAX; - p->maxDescriptorSetUpdateAfterBindSamplers = max_bindless_views; - p->maxDescriptorSetUpdateAfterBindUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS; - p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2; - p->maxDescriptorSetUpdateAfterBindStorageBuffers = UINT32_MAX; - p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2; - p->maxDescriptorSetUpdateAfterBindSampledImages = max_bindless_views; - p->maxDescriptorSetUpdateAfterBindStorageImages = max_bindless_views; - p->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS; - - /* We support all of the depth resolve modes */ - p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | - VK_RESOLVE_MODE_AVERAGE_BIT | - VK_RESOLVE_MODE_MIN_BIT | - VK_RESOLVE_MODE_MAX_BIT; - /* Average doesn't make sense for stencil so we don't support that */ - p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; - if (pdevice->info.ver >= 8) { - /* The advanced stencil resolve modes currently require stencil - * sampling be supported by the hardware. - */ - p->supportedStencilResolveModes |= VK_RESOLVE_MODE_MIN_BIT | - VK_RESOLVE_MODE_MAX_BIT; - } - p->independentResolveNone = true; - p->independentResolve = true; - - p->filterMinmaxSingleComponentFormats = false; - p->filterMinmaxImageComponentMapping = false; - - p->maxTimelineSemaphoreValueDifference = UINT64_MAX; - - p->framebufferIntegerColorSampleCounts = - pdevice->info.ver == 7 ? VK_SAMPLE_COUNT_1_BIT : isl_device_get_sample_counts(&pdevice->isl_dev); -} - -static void -anv_get_physical_device_properties_1_3(struct anv_physical_device *pdevice, - VkPhysicalDeviceVulkan13Properties *p) -{ - assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES); - - p->minSubgroupSize = 8; - p->maxSubgroupSize = 32; - p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads; - p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT; - - p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE; - p->maxPerStageDescriptorInlineUniformBlocks = - MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS; - p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = - MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS; - p->maxDescriptorSetInlineUniformBlocks = - MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS; - p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = - MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS; - p->maxInlineUniformTotalSize = UINT16_MAX; - - p->integerDotProduct8BitUnsignedAccelerated = false; - p->integerDotProduct8BitSignedAccelerated = false; - p->integerDotProduct8BitMixedSignednessAccelerated = false; - p->integerDotProduct4x8BitPackedUnsignedAccelerated = false; - p->integerDotProduct4x8BitPackedSignedAccelerated = false; - p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = false; - p->integerDotProduct16BitUnsignedAccelerated = false; - p->integerDotProduct16BitSignedAccelerated = false; - p->integerDotProduct16BitMixedSignednessAccelerated = false; - p->integerDotProduct32BitUnsignedAccelerated = false; - p->integerDotProduct32BitSignedAccelerated = false; - p->integerDotProduct32BitMixedSignednessAccelerated = false; - p->integerDotProduct64BitUnsignedAccelerated = false; - p->integerDotProduct64BitSignedAccelerated = false; - p->integerDotProduct64BitMixedSignednessAccelerated = false; - p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false; - p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false; - p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false; - p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = false; - p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false; - p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = false; - p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false; - p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false; - p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false; - p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false; - p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false; - p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false; - p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false; - p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false; - p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false; - - /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface - * Base Address: - * - * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field - * specifies the base address of the first element of the surface, - * computed in software by adding the surface base address to the - * byte offset of the element in the buffer. The base address must - * be aligned to element size." - * - * The typed dataport messages require that things be texel aligned. - * Otherwise, we may just load/store the wrong data or, in the worst - * case, there may be hangs. - */ - p->storageTexelBufferOffsetAlignmentBytes = 16; - p->storageTexelBufferOffsetSingleTexelAlignment = true; - - /* The sampler, however, is much more forgiving and it can handle - * arbitrary byte alignment for linear and buffer surfaces. It's - * hard to find a good PRM citation for this but years of empirical - * experience demonstrate that this is true. - */ - p->uniformTexelBufferOffsetAlignmentBytes = 1; - p->uniformTexelBufferOffsetSingleTexelAlignment = true; - - p->maxBufferSize = pdevice->isl_dev.max_buffer_size; -} - -void anv_GetPhysicalDeviceProperties2( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties2* pProperties) -{ - ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); - - anv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties); - - VkPhysicalDeviceVulkan11Properties core_1_1 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES, - }; - anv_get_physical_device_properties_1_1(pdevice, &core_1_1); - - VkPhysicalDeviceVulkan12Properties core_1_2 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES, - }; - anv_get_physical_device_properties_1_2(pdevice, &core_1_2); - - VkPhysicalDeviceVulkan13Properties core_1_3 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES, - }; - anv_get_physical_device_properties_1_3(pdevice, &core_1_3); - - vk_foreach_struct(ext, pProperties->pNext) { - if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1)) - continue; - if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2)) - continue; - if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3)) - continue; - - switch (ext->sType) { - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: { - VkPhysicalDeviceCustomBorderColorPropertiesEXT *properties = - (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext; - properties->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS; - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: { - VkPhysicalDeviceDrmPropertiesEXT *props = - (VkPhysicalDeviceDrmPropertiesEXT *)ext; - - props->hasPrimary = pdevice->has_master; - props->primaryMajor = pdevice->master_major; - props->primaryMinor = pdevice->master_minor; - - props->hasRender = pdevice->has_local; - props->renderMajor = pdevice->local_major; - props->renderMinor = pdevice->local_minor; - - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: { - VkPhysicalDeviceExternalMemoryHostPropertiesEXT *props = - (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext; - /* Userptr needs page aligned memory. */ - props->minImportedHostPointerAlignment = 4096; - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: { - VkPhysicalDeviceLineRasterizationPropertiesEXT *props = - (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext; - /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond) - * Sampling Rules - Legacy Mode", it says the following: - * - * "Note that the device divides a pixel into a 16x16 array of - * subpixels, referenced by their upper left corners." - * - * This is the only known reference in the PRMs to the subpixel - * precision of line rasterization and a "16x16 array of subpixels" - * implies 4 subpixel precision bits. Empirical testing has shown - * that 4 subpixel precision bits applies to all line rasterization - * types. - */ - props->lineSubPixelPrecisionBits = 4; - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES: { - VkPhysicalDeviceMaintenance4Properties *properties = - (VkPhysicalDeviceMaintenance4Properties *)ext; - properties->maxBufferSize = pdevice->isl_dev.max_buffer_size; - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: { - VkPhysicalDevicePCIBusInfoPropertiesEXT *properties = - (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext; - properties->pciDomain = pdevice->info.pci_domain; - properties->pciBus = pdevice->info.pci_bus; - properties->pciDevice = pdevice->info.pci_dev; - properties->pciFunction = pdevice->info.pci_func; - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR: { - VkPhysicalDevicePerformanceQueryPropertiesKHR *properties = - (VkPhysicalDevicePerformanceQueryPropertiesKHR *)ext; - /* We could support this by spawning a shader to do the equation - * normalization. - */ - properties->allowCommandBufferQueryCopies = false; - break; - } - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wswitch" - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENTATION_PROPERTIES_ANDROID: { - VkPhysicalDevicePresentationPropertiesANDROID *props = - (VkPhysicalDevicePresentationPropertiesANDROID *)ext; - props->sharedImage = VK_FALSE; - break; - } -#pragma GCC diagnostic pop - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: { - VkPhysicalDeviceProvokingVertexPropertiesEXT *properties = - (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext; - properties->provokingVertexModePerPipeline = true; - properties->transformFeedbackPreservesTriangleFanProvokingVertex = false; - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: { - VkPhysicalDevicePushDescriptorPropertiesKHR *properties = - (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext; - properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS; - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: { - VkPhysicalDeviceRobustness2PropertiesEXT *properties = (void *)ext; - properties->robustStorageBufferAccessSizeAlignment = - ANV_SSBO_BOUNDS_CHECK_ALIGNMENT; - properties->robustUniformBufferAccessSizeAlignment = - ANV_UBO_ALIGNMENT; - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: { - VkPhysicalDeviceSampleLocationsPropertiesEXT *props = - (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext; - - props->sampleLocationSampleCounts = - isl_device_get_sample_counts(&pdevice->isl_dev); - - /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */ - props->maxSampleLocationGridSize.width = 1; - props->maxSampleLocationGridSize.height = 1; - - props->sampleLocationCoordinateRange[0] = 0; - props->sampleLocationCoordinateRange[1] = 0.9375; - props->sampleLocationSubPixelBits = 4; - - props->variableSampleLocations = true; - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_PROPERTIES_EXT: { - VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *props = - (VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *)ext; - STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) == - sizeof(props->shaderModuleIdentifierAlgorithmUUID)); - memcpy(props->shaderModuleIdentifierAlgorithmUUID, - vk_shaderModuleIdentifierAlgorithmUUID, - sizeof(props->shaderModuleIdentifierAlgorithmUUID)); - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: { - VkPhysicalDeviceTransformFeedbackPropertiesEXT *props = - (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext; - - props->maxTransformFeedbackStreams = MAX_XFB_STREAMS; - props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS; - props->maxTransformFeedbackBufferSize = (1ull << 32); - props->maxTransformFeedbackStreamDataSize = 128 * 4; - props->maxTransformFeedbackBufferDataSize = 128 * 4; - props->maxTransformFeedbackBufferDataStride = 2048; - props->transformFeedbackQueries = true; - props->transformFeedbackStreamsLinesTriangles = false; - props->transformFeedbackRasterizationStreamSelect = false; - /* This requires MI_MATH */ - props->transformFeedbackDraw = pdevice->info.verx10 >= 75; - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: { - VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props = - (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext; - /* We have to restrict this a bit for multiview */ - props->maxVertexAttribDivisor = UINT32_MAX / 16; - break; - } - - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: { - VkPhysicalDeviceMultiDrawPropertiesEXT *props = (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext; - props->maxMultiDrawCount = 2048; - break; - } - - default: - anv_debug_ignored_stype(ext->sType); - break; - } - } -} - static int vk_priority_to_gen(int priority) {