mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-09 23:08:18 +02:00
radv: implement VK_KHR_device_fault
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41648>
This commit is contained in:
parent
f6b06ea3de
commit
7dcd2a4c87
1 changed files with 104 additions and 34 deletions
|
|
@ -1880,38 +1880,67 @@ radv_GetDeviceImageSubresourceLayout(VkDevice device, const VkDeviceImageSubreso
|
|||
radv_DestroyImage(device, image, NULL);
|
||||
}
|
||||
|
||||
static VkDeviceFaultAddressInfoKHR
|
||||
radv_get_device_fault_addr_info(struct radv_device *device, bool *vm_fault_occurred)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_winsys_gpuvm_fault_info fault_info = {0};
|
||||
VkDeviceFaultAddressInfoKHR addr_fault_info = {0};
|
||||
|
||||
*vm_fault_occurred = radv_vm_fault_occurred(device, &fault_info);
|
||||
|
||||
if (*vm_fault_occurred) {
|
||||
addr_fault_info.reportedAddress = ((int64_t)fault_info.addr << 16) >> 16;
|
||||
addr_fault_info.addressPrecision = 4096; /* 4K page granularity */
|
||||
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
addr_fault_info.addressType = G_00A130_RW(fault_info.status) ? VK_DEVICE_FAULT_ADDRESS_TYPE_WRITE_INVALID_KHR
|
||||
: VK_DEVICE_FAULT_ADDRESS_TYPE_READ_INVALID_KHR;
|
||||
} else {
|
||||
/* Not sure how to get the access status on GFX6-9. */
|
||||
addr_fault_info.addressType = VK_DEVICE_FAULT_ADDRESS_TYPE_NONE_KHR;
|
||||
}
|
||||
}
|
||||
|
||||
return addr_fault_info;
|
||||
}
|
||||
|
||||
static VkDeviceFaultVendorBinaryHeaderVersionOneKHR
|
||||
radv_get_device_fault_vendor_binary_header(struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
VkDeviceFaultVendorBinaryHeaderVersionOneKHR hdr;
|
||||
|
||||
hdr.headerSize = sizeof(VkDeviceFaultVendorBinaryHeaderVersionOneKHR);
|
||||
hdr.headerVersion = VK_DEVICE_FAULT_VENDOR_BINARY_HEADER_VERSION_ONE_KHR;
|
||||
hdr.vendorID = pdev->vk.properties.vendorID;
|
||||
hdr.deviceID = pdev->vk.properties.deviceID;
|
||||
hdr.driverVersion = pdev->vk.properties.driverVersion;
|
||||
memcpy(hdr.pipelineCacheUUID, pdev->cache_uuid, VK_UUID_SIZE);
|
||||
hdr.applicationNameOffset = 0;
|
||||
hdr.applicationVersion = instance->vk.app_info.app_version;
|
||||
hdr.engineNameOffset = 0;
|
||||
hdr.engineVersion = instance->vk.app_info.engine_version;
|
||||
hdr.apiVersion = instance->vk.app_info.api_version;
|
||||
|
||||
return hdr;
|
||||
}
|
||||
|
||||
/* VK_EXT_device_fault */
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
radv_GetDeviceFaultInfoEXT(VkDevice _device, VkDeviceFaultCountsEXT *pFaultCounts, VkDeviceFaultInfoEXT *pFaultInfo)
|
||||
{
|
||||
VK_OUTARRAY_MAKE_TYPED(VkDeviceFaultAddressInfoEXT, out, pFaultInfo ? pFaultInfo->pAddressInfos : NULL,
|
||||
VK_OUTARRAY_MAKE_TYPED(VkDeviceFaultAddressInfoKHR, out, pFaultInfo ? pFaultInfo->pAddressInfos : NULL,
|
||||
&pFaultCounts->addressInfoCount);
|
||||
struct radv_winsys_gpuvm_fault_info fault_info = {0};
|
||||
VK_FROM_HANDLE(radv_device, device, _device);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
bool vm_fault_occurred = false;
|
||||
|
||||
/* Query if a GPUVM fault happened. */
|
||||
vm_fault_occurred = radv_vm_fault_occurred(device, &fault_info);
|
||||
|
||||
pFaultCounts->vendorInfoCount = 0;
|
||||
pFaultCounts->vendorBinarySize = 0;
|
||||
|
||||
if (device->gpu_hang_report) {
|
||||
VkDeviceFaultVendorBinaryHeaderVersionOneEXT hdr;
|
||||
|
||||
hdr.headerSize = sizeof(VkDeviceFaultVendorBinaryHeaderVersionOneEXT);
|
||||
hdr.headerVersion = VK_DEVICE_FAULT_VENDOR_BINARY_HEADER_VERSION_ONE_EXT;
|
||||
hdr.vendorID = pdev->vk.properties.vendorID;
|
||||
hdr.deviceID = pdev->vk.properties.deviceID;
|
||||
hdr.driverVersion = pdev->vk.properties.driverVersion;
|
||||
memcpy(hdr.pipelineCacheUUID, pdev->cache_uuid, VK_UUID_SIZE);
|
||||
hdr.applicationNameOffset = 0;
|
||||
hdr.applicationVersion = instance->vk.app_info.app_version;
|
||||
hdr.engineNameOffset = 0;
|
||||
hdr.engineVersion = instance->vk.app_info.engine_version;
|
||||
hdr.apiVersion = instance->vk.app_info.api_version;
|
||||
VkDeviceFaultVendorBinaryHeaderVersionOneKHR hdr = radv_get_device_fault_vendor_binary_header(device);
|
||||
|
||||
pFaultCounts->vendorBinarySize = sizeof(hdr) + strlen(device->gpu_hang_report);
|
||||
if (pFaultInfo) {
|
||||
|
|
@ -1921,24 +1950,65 @@ radv_GetDeviceFaultInfoEXT(VkDevice _device, VkDeviceFaultCountsEXT *pFaultCount
|
|||
}
|
||||
}
|
||||
|
||||
if (vm_fault_occurred) {
|
||||
VkDeviceFaultAddressInfoEXT addr_fault_info = {
|
||||
.reportedAddress = ((int64_t)fault_info.addr << 16) >> 16,
|
||||
.addressPrecision = 4096, /* 4K page granularity */
|
||||
};
|
||||
VkDeviceFaultAddressInfoKHR addr_fault_info = radv_get_device_fault_addr_info(device, &vm_fault_occurred);
|
||||
|
||||
if (vm_fault_occurred) {
|
||||
if (pFaultInfo)
|
||||
strncpy(pFaultInfo->description, "A GPUVM fault has been detected", sizeof(pFaultInfo->description));
|
||||
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
addr_fault_info.addressType = G_00A130_RW(fault_info.status) ? VK_DEVICE_FAULT_ADDRESS_TYPE_WRITE_INVALID_EXT
|
||||
: VK_DEVICE_FAULT_ADDRESS_TYPE_READ_INVALID_EXT;
|
||||
} else {
|
||||
/* Not sure how to get the access status on GFX6-9. */
|
||||
addr_fault_info.addressType = VK_DEVICE_FAULT_ADDRESS_TYPE_NONE_EXT;
|
||||
}
|
||||
vk_outarray_append_typed(VkDeviceFaultAddressInfoEXT, &out, elem) *elem = addr_fault_info;
|
||||
vk_outarray_append_typed(VkDeviceFaultAddressInfoKHR, &out, elem) *elem = addr_fault_info;
|
||||
}
|
||||
|
||||
return vk_outarray_status(&out);
|
||||
}
|
||||
|
||||
/* VK_KHR_device_fault */
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
radv_GetDeviceFaultDebugInfoKHR(VkDevice _device, VkDeviceFaultDebugInfoKHR *pDebugInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(radv_device, device, _device);
|
||||
|
||||
pDebugInfo->vendorBinarySize = 0;
|
||||
|
||||
if (device->gpu_hang_report) {
|
||||
VkDeviceFaultVendorBinaryHeaderVersionOneKHR hdr = radv_get_device_fault_vendor_binary_header(device);
|
||||
|
||||
pDebugInfo->vendorBinarySize = sizeof(hdr) + strlen(device->gpu_hang_report);
|
||||
if (pDebugInfo->pVendorBinaryData) {
|
||||
memcpy(pDebugInfo->pVendorBinaryData, &hdr, sizeof(hdr));
|
||||
memcpy((char *)pDebugInfo->pVendorBinaryData + sizeof(hdr), device->gpu_hang_report,
|
||||
strlen(device->gpu_hang_report));
|
||||
}
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
radv_GetDeviceFaultReportsKHR(VkDevice _device, uint64_t timeout, uint32_t *pFaultCounts,
|
||||
VkDeviceFaultInfoKHR *pFaultInfo)
|
||||
{
|
||||
VK_OUTARRAY_MAKE_TYPED(VkDeviceFaultInfoKHR, out, pFaultInfo, pFaultCounts);
|
||||
VK_FROM_HANDLE(radv_device, device, _device);
|
||||
VkDeviceFaultAddressInfoKHR addr_fault_info;
|
||||
bool vm_fault_occurred = false;
|
||||
bool timed_out = false;
|
||||
|
||||
uint64_t abs_timeout = os_time_get_absolute_timeout(timeout);
|
||||
do {
|
||||
addr_fault_info = radv_get_device_fault_addr_info(device, &vm_fault_occurred);
|
||||
} while (timeout > 0 && !vm_fault_occurred && !(timed_out = (abs_timeout < os_time_get_nano())));
|
||||
|
||||
if (!vm_fault_occurred)
|
||||
return VK_TIMEOUT;
|
||||
|
||||
VkDeviceFaultInfoKHR fault_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_DEVICE_FAULT_INFO_KHR,
|
||||
.flags = VK_DEVICE_FAULT_FLAG_MEMORY_ADDRESS_KHR,
|
||||
.faultAddressInfo = addr_fault_info,
|
||||
};
|
||||
strncpy(fault_info.description, "A GPUVM fault has been detected", sizeof(fault_info.description));
|
||||
|
||||
vk_outarray_append_typed(VkDeviceFaultInfoKHR, &out, elem) *elem = fault_info;
|
||||
|
||||
return vk_outarray_status(&out);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue