mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-17 22:38:06 +02:00
vk_clock_gettime hasn't been used by other implementations ever since venus and kk migrated over to the common implementation. It'd be better to drop that helper (or move into anv) because it's not OS agnostic as compare to the more comprehensive vk_device_get_timestamp. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40582>
2365 lines
90 KiB
C
2365 lines
90 KiB
C
/*
|
||
* Copyright © 2015 Intel Corporation
|
||
*
|
||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||
* copy of this software and associated documentation files (the "Software"),
|
||
* to deal in the Software without restriction, including without limitation
|
||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||
* and/or sell copies of the Software, and to permit persons to whom the
|
||
* Software is furnished to do so, subject to the following conditions:
|
||
*
|
||
* The above copyright notice and this permission notice (including the next
|
||
* paragraph) shall be included in all copies or substantial portions of the
|
||
* Software.
|
||
*
|
||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||
* IN THE SOFTWARE.
|
||
*/
|
||
|
||
#include <assert.h>
|
||
#include <inttypes.h>
|
||
#include <stdbool.h>
|
||
#include <fcntl.h>
|
||
#include "drm-uapi/drm_fourcc.h"
|
||
#include "drm-uapi/drm.h"
|
||
#include <xf86drm.h>
|
||
|
||
#include "anv_private.h"
|
||
#include "anv_measure.h"
|
||
#include "anv_shader.h"
|
||
#include "anv_slab_bo.h"
|
||
#include "util/u_debug.h"
|
||
#include "util/os_file.h"
|
||
#include "util/os_misc.h"
|
||
#include "util/u_atomic.h"
|
||
#include "util/u_string.h"
|
||
#include "vk_common_entrypoints.h"
|
||
#include "vk_util.h"
|
||
#include "vk_deferred_operation.h"
|
||
#include "vk_drm_syncobj.h"
|
||
#include "common/intel_aux_map.h"
|
||
#include "common/intel_common.h"
|
||
#include "common/intel_debug_identifier.h"
|
||
|
||
#include "i915/anv_device.h"
|
||
#include "xe/anv_device.h"
|
||
|
||
#include "genxml/gen70_pack.h"
|
||
#include "genxml/genX_bits.h"
|
||
#include "wsi_common_private.h"
|
||
|
||
const struct gfx8_border_color anv_default_border_colors[] = {
|
||
[VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
|
||
[VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
|
||
[VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
|
||
[VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
|
||
[VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
|
||
[VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
|
||
};
|
||
|
||
static void
|
||
anv_device_init_border_colors(struct anv_device *device)
|
||
{
|
||
device->border_colors =
|
||
anv_state_pool_emit_data(&device->dynamic_state_pool,
|
||
sizeof(anv_default_border_colors),
|
||
64, anv_default_border_colors);
|
||
}
|
||
|
||
static VkResult
|
||
anv_device_init_trivial_batch(struct anv_device *device)
|
||
{
|
||
VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
|
||
ANV_BO_ALLOC_BATCH_BUFFER_INTERNAL_FLAGS,
|
||
0 /* explicit_address */,
|
||
&device->trivial_batch_bo);
|
||
ANV_DMR_BO_ALLOC(&device->vk.base, device->trivial_batch_bo, result);
|
||
if (result != VK_SUCCESS)
|
||
return result;
|
||
|
||
struct anv_batch batch = {
|
||
.start = device->trivial_batch_bo->map,
|
||
.next = device->trivial_batch_bo->map,
|
||
.end = device->trivial_batch_bo->map + 4096,
|
||
};
|
||
|
||
anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
|
||
anv_batch_emit(&batch, GFX7_MI_NOOP, noop);
|
||
|
||
return VK_SUCCESS;
|
||
}
|
||
|
||
static bool
|
||
get_bo_from_pool(struct intel_batch_decode_bo *ret,
|
||
struct anv_block_pool *pool,
|
||
uint64_t address)
|
||
{
|
||
anv_block_pool_foreach_bo(bo, pool) {
|
||
uint64_t bo_address = intel_48b_address(bo->offset);
|
||
if (address >= bo_address && address < (bo_address + bo->size)) {
|
||
*ret = (struct intel_batch_decode_bo) {
|
||
.addr = bo_address,
|
||
.size = bo->size,
|
||
.map = bo->map,
|
||
};
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
/* Shader heap: find the backing BO for a GPU VA */
|
||
static bool
|
||
get_bo_from_shader_heap(struct intel_batch_decode_bo *ret,
|
||
const struct anv_device *device,
|
||
uint64_t address)
|
||
{
|
||
unsigned i;
|
||
BITSET_FOREACH_SET(i, device->shader_heap.allocated_bos, ANV_SHADER_HEAP_MAX_BOS) {
|
||
struct anv_bo *bo = device->shader_heap.bos[i].bo;
|
||
|
||
/* Match the 48b-addressing convention used elsewhere */
|
||
uint64_t base = intel_48b_address(bo->offset);
|
||
uint64_t size = bo->size;
|
||
|
||
if (address >= base && address < base + size) {
|
||
*ret = (struct intel_batch_decode_bo) {
|
||
.addr = base,
|
||
.size = size,
|
||
.map = bo->map,
|
||
};
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
/* Finding a buffer for batch decoding */
|
||
static struct intel_batch_decode_bo
|
||
decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
|
||
{
|
||
struct anv_device *device = v_batch;
|
||
struct intel_batch_decode_bo ret_bo = {};
|
||
|
||
assert(ppgtt);
|
||
|
||
if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
|
||
return ret_bo;
|
||
if (get_bo_from_shader_heap(&ret_bo, device, address))
|
||
return ret_bo;
|
||
if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
|
||
return ret_bo;
|
||
if (get_bo_from_pool(&ret_bo, &device->scratch_surface_state_pool.block_pool, address))
|
||
return ret_bo;
|
||
if (device->physical->indirect_descriptors &&
|
||
get_bo_from_pool(&ret_bo, &device->bindless_surface_state_pool.block_pool, address))
|
||
return ret_bo;
|
||
if (get_bo_from_pool(&ret_bo, &device->internal_surface_state_pool.block_pool, address))
|
||
return ret_bo;
|
||
if (device->physical->indirect_descriptors &&
|
||
get_bo_from_pool(&ret_bo, &device->indirect_push_descriptor_pool.block_pool, address))
|
||
return ret_bo;
|
||
if (device->info->has_aux_map &&
|
||
get_bo_from_pool(&ret_bo, &device->aux_tt_pool.block_pool, address))
|
||
return ret_bo;
|
||
|
||
if (!device->cmd_buffer_being_decoded)
|
||
return (struct intel_batch_decode_bo) { };
|
||
|
||
struct anv_batch_bo **bbo;
|
||
u_vector_foreach(bbo, &device->cmd_buffer_being_decoded->seen_bbos) {
|
||
struct anv_bo *bo = (*bbo)->bo;
|
||
/* The decoder zeroes out the top 16 bits, so we need to as well */
|
||
uint64_t bo_address = bo->offset & (~0ull >> 16);
|
||
|
||
if (address >= bo_address &&
|
||
address < (bo_address + bo->size)) {
|
||
return (struct intel_batch_decode_bo) {
|
||
.addr = bo_address,
|
||
.size = bo->size,
|
||
.map = bo->map,
|
||
};
|
||
}
|
||
}
|
||
|
||
u_vector_foreach(bbo, &device->cmd_buffer_being_decoded->seen_bbos) {
|
||
uint32_t dep_words = (*bbo)->relocs.dep_words;
|
||
BITSET_WORD *deps = (*bbo)->relocs.deps;
|
||
for (uint32_t w = 0; w < dep_words; w++) {
|
||
BITSET_WORD mask = deps[w];
|
||
while (mask) {
|
||
int i = u_bit_scan(&mask);
|
||
uint32_t gem_handle = w * BITSET_WORDBITS + i;
|
||
struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle);
|
||
assert(bo->refcount > 0);
|
||
uint64_t bo_address = bo->offset & (~0ull >> 16);
|
||
if (address >= bo_address && address < bo_address + bo->size) {
|
||
return (struct intel_batch_decode_bo) {
|
||
.addr = bo_address,
|
||
.size = bo->size,
|
||
.map = bo->map,
|
||
};
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return (struct intel_batch_decode_bo) { };
|
||
}
|
||
|
||
struct intel_aux_map_buffer {
|
||
struct intel_buffer base;
|
||
struct anv_state state;
|
||
};
|
||
|
||
static struct intel_buffer *
|
||
intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
|
||
{
|
||
struct intel_aux_map_buffer *buf = malloc(sizeof(struct intel_aux_map_buffer));
|
||
if (!buf)
|
||
return NULL;
|
||
|
||
struct anv_device *device = (struct anv_device*)driver_ctx;
|
||
|
||
struct anv_state_pool *pool = &device->aux_tt_pool;
|
||
buf->state = anv_state_pool_alloc(pool, size, size);
|
||
|
||
buf->base.gpu = pool->block_pool.bo->offset + buf->state.offset;
|
||
buf->base.gpu_end = buf->base.gpu + buf->state.alloc_size;
|
||
buf->base.map = buf->state.map;
|
||
buf->base.driver_bo = &buf->state;
|
||
return &buf->base;
|
||
}
|
||
|
||
static void
|
||
intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
|
||
{
|
||
struct intel_aux_map_buffer *buf = (struct intel_aux_map_buffer*)buffer;
|
||
struct anv_device *device = (struct anv_device*)driver_ctx;
|
||
struct anv_state_pool *pool = &device->aux_tt_pool;
|
||
anv_state_pool_free(pool, buf->state);
|
||
free(buf);
|
||
}
|
||
|
||
static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
|
||
.alloc = intel_aux_map_buffer_alloc,
|
||
.free = intel_aux_map_buffer_free,
|
||
};
|
||
|
||
static VkResult
|
||
anv_device_setup_context_or_vm(struct anv_device *device,
|
||
const VkDeviceCreateInfo *pCreateInfo,
|
||
const uint32_t num_queues)
|
||
{
|
||
switch (device->info->kmd_type) {
|
||
case INTEL_KMD_TYPE_I915:
|
||
return anv_i915_device_setup_context(device, pCreateInfo, num_queues);
|
||
case INTEL_KMD_TYPE_XE:
|
||
return anv_xe_device_setup_vm(device);
|
||
default:
|
||
UNREACHABLE("Missing");
|
||
return VK_ERROR_UNKNOWN;
|
||
}
|
||
}
|
||
|
||
static bool
|
||
anv_device_destroy_context_or_vm(struct anv_device *device)
|
||
{
|
||
switch (device->info->kmd_type) {
|
||
case INTEL_KMD_TYPE_I915:
|
||
if (device->physical->has_vm_control)
|
||
return anv_i915_device_destroy_vm(device);
|
||
else
|
||
return intel_gem_destroy_context(device->fd, device->context_id);
|
||
case INTEL_KMD_TYPE_XE:
|
||
return anv_xe_device_destroy_vm(device);
|
||
default:
|
||
UNREACHABLE("Missing");
|
||
return false;
|
||
}
|
||
}
|
||
|
||
static VkResult
|
||
anv_device_init_trtt(struct anv_device *device)
|
||
{
|
||
if (device->physical->sparse_type != ANV_SPARSE_TYPE_TRTT ||
|
||
!device->vk.enabled_features.sparseBinding)
|
||
return VK_SUCCESS;
|
||
|
||
struct anv_trtt *trtt = &device->trtt;
|
||
|
||
VkResult result =
|
||
vk_sync_create(&device->vk,
|
||
&device->physical->sync_syncobj_type,
|
||
VK_SYNC_IS_TIMELINE,
|
||
0 /* initial_value */,
|
||
&trtt->timeline);
|
||
if (result != VK_SUCCESS)
|
||
return result;
|
||
|
||
simple_mtx_init(&trtt->mutex, mtx_plain);
|
||
|
||
list_inithead(&trtt->in_flight_batches);
|
||
|
||
return VK_SUCCESS;
|
||
}
|
||
|
||
static void
|
||
anv_device_finish_trtt(struct anv_device *device)
|
||
{
|
||
if (device->physical->sparse_type != ANV_SPARSE_TYPE_TRTT ||
|
||
!device->vk.enabled_features.sparseBinding)
|
||
return;
|
||
|
||
struct anv_trtt *trtt = &device->trtt;
|
||
|
||
anv_sparse_trtt_garbage_collect_batches(device, true);
|
||
|
||
vk_sync_destroy(&device->vk, trtt->timeline);
|
||
|
||
simple_mtx_destroy(&trtt->mutex);
|
||
|
||
vk_free(&device->vk.alloc, trtt->l3_mirror);
|
||
vk_free(&device->vk.alloc, trtt->l2_mirror);
|
||
|
||
for (int i = 0; i < trtt->num_page_table_bos; i++) {
|
||
struct anv_bo *bo = trtt->page_table_bos[i];
|
||
ANV_DMR_BO_FREE(&device->vk.base, bo);
|
||
anv_device_release_bo(device, trtt->page_table_bos[i]);
|
||
}
|
||
|
||
vk_free(&device->vk.alloc, trtt->page_table_bos);
|
||
}
|
||
|
||
static void
|
||
anv_device_init_descriptors_view(struct anv_device *device)
|
||
{
|
||
if (!device->info->has_lsc)
|
||
return;
|
||
|
||
struct anv_physical_device *pdevice = device->physical;
|
||
|
||
/* For descriptor buffers */
|
||
{
|
||
device->descriptor_buffer_view_state =
|
||
anv_state_pool_alloc(&device->scratch_surface_state_pool,
|
||
device->isl_dev.ss.size, 64);
|
||
|
||
const uint64_t size = pdevice->va.dynamic_visible_pool.size +
|
||
pdevice->va.push_descriptor_buffer_pool.size;
|
||
assert(size <= 4ull * 1024 * 1024 * 1024);
|
||
|
||
isl_buffer_fill_state(&device->isl_dev,
|
||
device->descriptor_buffer_view_state.map,
|
||
.address = pdevice->va.dynamic_visible_pool.addr,
|
||
.size_B = size,
|
||
.mocs = anv_mocs(device, NULL, ISL_SURF_USAGE_CONSTANT_BUFFER_BIT),
|
||
.format = ISL_FORMAT_RAW,
|
||
.swizzle = ISL_SWIZZLE_IDENTITY,
|
||
.stride_B = 1,
|
||
.is_scratch = false,
|
||
.usage = ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
|
||
}
|
||
|
||
/* For descriptors */
|
||
{
|
||
device->descriptor_view_state =
|
||
anv_state_pool_alloc(&device->scratch_surface_state_pool,
|
||
device->isl_dev.ss.size, 64);
|
||
|
||
const uint64_t size =
|
||
pdevice->va.internal_surface_state_pool.size +
|
||
pdevice->va.bindless_surface_state_pool.size;
|
||
|
||
isl_buffer_fill_state(&device->isl_dev,
|
||
device->descriptor_view_state.map,
|
||
.address = pdevice->va.internal_surface_state_pool.addr,
|
||
.size_B = size,
|
||
.mocs = anv_mocs(device, NULL, ISL_SURF_USAGE_CONSTANT_BUFFER_BIT),
|
||
.format = ISL_FORMAT_RAW,
|
||
.swizzle = ISL_SWIZZLE_IDENTITY,
|
||
.stride_B = 1,
|
||
.is_scratch = false,
|
||
.usage = ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
|
||
}
|
||
}
|
||
|
||
static void
|
||
anv_device_finish_descriptors_view(struct anv_device *device)
|
||
{
|
||
if (!device->info->has_lsc)
|
||
return;
|
||
|
||
anv_state_pool_free(&device->scratch_surface_state_pool,
|
||
device->descriptor_buffer_view_state);
|
||
anv_state_pool_free(&device->scratch_surface_state_pool,
|
||
device->descriptor_view_state);
|
||
}
|
||
|
||
VkResult anv_CreateDevice(
|
||
VkPhysicalDevice physicalDevice,
|
||
const VkDeviceCreateInfo* pCreateInfo,
|
||
const VkAllocationCallbacks* pAllocator,
|
||
VkDevice* pDevice)
|
||
{
|
||
anv_wait_for_attach();
|
||
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
|
||
VkResult result;
|
||
struct anv_device *device;
|
||
bool device_has_compute_queue = false;
|
||
|
||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
|
||
|
||
/* Check requested queues and fail if we are requested to create any
|
||
* queues with flags we don't support.
|
||
*/
|
||
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
|
||
if (pCreateInfo->pQueueCreateInfos[i].flags & ~(VK_DEVICE_QUEUE_CREATE_PROTECTED_BIT |
|
||
VK_DEVICE_QUEUE_CREATE_INTERNALLY_SYNCHRONIZED_BIT_KHR))
|
||
return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
|
||
|
||
const struct anv_queue_family *family =
|
||
&physical_device->queue.families[pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex];
|
||
device_has_compute_queue |= family->engine_class == INTEL_ENGINE_CLASS_COMPUTE;
|
||
}
|
||
|
||
device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
|
||
sizeof(*device), 8,
|
||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||
if (!device)
|
||
return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||
|
||
struct vk_device_dispatch_table dispatch_table;
|
||
|
||
bool override_initial_entrypoints = true;
|
||
if (physical_device->instance->vk.app_info.app_name &&
|
||
!strcmp(physical_device->instance->vk.app_info.app_name, "HITMAN3.exe")) {
|
||
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
|
||
&anv_hitman3_device_entrypoints,
|
||
true);
|
||
override_initial_entrypoints = false;
|
||
}
|
||
if (physical_device->info.ver < 12 &&
|
||
physical_device->instance->vk.app_info.app_name &&
|
||
!strcmp(physical_device->instance->vk.app_info.app_name, "DOOM 64")) {
|
||
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
|
||
&anv_doom64_device_entrypoints,
|
||
true);
|
||
override_initial_entrypoints = false;
|
||
}
|
||
|
||
if (physical_device->info.ver < 12 &&
|
||
physical_device->instance->vk.app_info.app_name &&
|
||
!strcmp(physical_device->instance->vk.app_info.app_name, "GeeXLab")) {
|
||
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
|
||
&anv_furmark_device_entrypoints,
|
||
true);
|
||
override_initial_entrypoints = false;
|
||
}
|
||
#if DETECT_OS_ANDROID
|
||
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
|
||
&anv_android_device_entrypoints,
|
||
true);
|
||
override_initial_entrypoints = false;
|
||
#endif
|
||
if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV) {
|
||
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
|
||
&anv_rmv_device_entrypoints,
|
||
true);
|
||
override_initial_entrypoints = false;
|
||
}
|
||
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
|
||
anv_genX(&physical_device->info, device_entrypoints),
|
||
override_initial_entrypoints);
|
||
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
|
||
&anv_device_entrypoints, false);
|
||
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
|
||
&wsi_device_entrypoints, false);
|
||
|
||
|
||
result = vk_device_init(&device->vk, &physical_device->vk,
|
||
&dispatch_table, pCreateInfo, pAllocator);
|
||
if (result != VK_SUCCESS)
|
||
goto fail_alloc;
|
||
|
||
device->vk.shader_ops = &anv_device_shader_ops;
|
||
|
||
if (INTEL_DEBUG(DEBUG_BATCH) || INTEL_DEBUG(DEBUG_BATCH_STATS)) {
|
||
for (unsigned i = 0; i < physical_device->queue.family_count; i++) {
|
||
struct intel_batch_decode_ctx *decoder = &device->decoder[i];
|
||
|
||
const unsigned decode_flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS;
|
||
|
||
intel_batch_decode_ctx_init_brw(decoder,
|
||
&physical_device->compiler->isa,
|
||
&physical_device->info,
|
||
stderr, decode_flags, NULL,
|
||
decode_get_bo, NULL, device);
|
||
intel_batch_stats_reset(decoder);
|
||
|
||
decoder->engine = physical_device->queue.families[i].engine_class;
|
||
decoder->dynamic_base = physical_device->va.dynamic_state_pool.addr;
|
||
decoder->surface_base = physical_device->va.internal_surface_state_pool.addr;
|
||
decoder->instruction_base = physical_device->va.shader_heap.addr;
|
||
}
|
||
}
|
||
|
||
anv_device_set_physical(device, physical_device);
|
||
device->kmd_backend = anv_kmd_backend_get(device->info->kmd_type);
|
||
|
||
/* XXX(chadv): Can we dup() physicalDevice->fd here? */
|
||
device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
|
||
if (device->fd == -1) {
|
||
result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
|
||
goto fail_device;
|
||
}
|
||
|
||
if (intel_virtio_init_fd(device->fd) < 0) {
|
||
result = VK_ERROR_INCOMPATIBLE_DRIVER;
|
||
goto fail_fd;
|
||
}
|
||
|
||
switch (device->info->kmd_type) {
|
||
case INTEL_KMD_TYPE_I915:
|
||
device->vk.check_status = anv_i915_device_check_status;
|
||
break;
|
||
case INTEL_KMD_TYPE_XE:
|
||
device->vk.check_status = anv_xe_device_check_status;
|
||
break;
|
||
default:
|
||
UNREACHABLE("Missing");
|
||
}
|
||
|
||
device->vk.copy_sync_payloads = vk_drm_syncobj_copy_payloads;
|
||
device->vk.command_buffer_ops = &anv_cmd_buffer_ops;
|
||
|
||
if (physical_device->info.is_virtio)
|
||
device->vk.sync = intel_virtio_sync_provider(device->fd);
|
||
else
|
||
vk_device_set_drm_fd(&device->vk, device->fd);
|
||
|
||
uint32_t num_queues = 0;
|
||
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
|
||
num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
|
||
|
||
result = anv_device_setup_context_or_vm(device, pCreateInfo, num_queues);
|
||
if (result != VK_SUCCESS)
|
||
goto fail_fd;
|
||
|
||
device->queues =
|
||
vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
|
||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||
if (device->queues == NULL) {
|
||
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||
goto fail_context_id;
|
||
}
|
||
|
||
if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
|
||
result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
|
||
goto fail_queues_alloc;
|
||
}
|
||
|
||
/* keep the page with address zero out of the allocator */
|
||
util_vma_heap_init(&device->vma_lo,
|
||
device->physical->va.low_heap.addr,
|
||
device->physical->va.low_heap.size);
|
||
|
||
util_vma_heap_init(&device->vma_hi,
|
||
device->physical->va.high_heap.addr,
|
||
device->physical->va.high_heap.size);
|
||
|
||
if (device->physical->indirect_descriptors) {
|
||
util_vma_heap_init(&device->vma_desc,
|
||
device->physical->va.indirect_descriptor_pool.addr,
|
||
device->physical->va.indirect_descriptor_pool.size);
|
||
} else {
|
||
util_vma_heap_init(&device->vma_desc,
|
||
device->physical->va.bindless_surface_state_pool.addr,
|
||
device->physical->va.bindless_surface_state_pool.size);
|
||
}
|
||
|
||
/* Always initialized because the the memory types point to this and they
|
||
* are on the physical device.
|
||
*/
|
||
util_vma_heap_init(&device->vma_dynamic_visible,
|
||
device->physical->va.dynamic_visible_pool.addr,
|
||
device->physical->va.dynamic_visible_pool.size);
|
||
util_vma_heap_init(&device->vma_trtt,
|
||
device->physical->va.trtt.addr,
|
||
device->physical->va.trtt.size);
|
||
|
||
list_inithead(&device->memory_objects);
|
||
list_inithead(&device->image_private_objects);
|
||
|
||
if (pthread_mutex_init(&device->mutex, NULL) != 0) {
|
||
result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
|
||
goto fail_vmas;
|
||
}
|
||
|
||
if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV)
|
||
anv_memory_trace_init(device);
|
||
|
||
result = anv_bo_cache_init(&device->bo_cache, device);
|
||
if (result != VK_SUCCESS)
|
||
goto fail_mutex;
|
||
|
||
if (!anv_slab_bo_init(device))
|
||
goto fail_cache;
|
||
|
||
anv_bo_pool_init(&device->batch_bo_pool, device, "batch",
|
||
ANV_BO_ALLOC_BATCH_BUFFER_FLAGS);
|
||
if (device->vk.enabled_extensions.KHR_acceleration_structure) {
|
||
anv_bo_pool_init(&device->bvh_bo_pool, device, "bvh build",
|
||
0 /* alloc_flags */);
|
||
}
|
||
|
||
/* Because scratch is also relative to General State Base Address, we leave
|
||
* the base address 0 and start the pool memory at an offset. This way we
|
||
* get the correct offsets in the anv_states that get allocated from it.
|
||
*/
|
||
result = anv_state_pool_init(&device->general_state_pool, device,
|
||
&(struct anv_state_pool_params) {
|
||
.name = "general pool",
|
||
.base_address = 0,
|
||
.start_offset = device->physical->va.general_state_pool.addr,
|
||
.block_size = 16384,
|
||
.max_size = device->physical->va.general_state_pool.size
|
||
});
|
||
if (result != VK_SUCCESS)
|
||
goto fail_batch_bo_pool;
|
||
|
||
result = anv_state_pool_init(&device->dynamic_state_pool, device,
|
||
&(struct anv_state_pool_params) {
|
||
.name = "dynamic pool",
|
||
.base_address = device->physical->va.dynamic_state_pool.addr,
|
||
.block_size = 16384,
|
||
.max_size = device->physical->va.dynamic_state_pool.size,
|
||
});
|
||
if (result != VK_SUCCESS)
|
||
goto fail_general_state_pool;
|
||
|
||
/* The border color pointer is limited to 24 bits, so we need to make
|
||
* sure that any such color used at any point in the program doesn't
|
||
* exceed that limit.
|
||
* We achieve that by reserving all the custom border colors we support
|
||
* right off the bat, so they are close to the base address.
|
||
*/
|
||
result = anv_state_reserved_array_pool_init(&device->custom_border_colors,
|
||
&device->dynamic_state_pool,
|
||
MAX_CUSTOM_BORDER_COLORS,
|
||
sizeof(struct gfx8_border_color), 64);
|
||
if (result != VK_SUCCESS)
|
||
goto fail_dynamic_state_pool;
|
||
|
||
result = anv_shader_heap_init(&device->shader_heap, device,
|
||
device->physical->va.shader_heap,
|
||
21 /* 2MiB */, 27 /* 64MiB */);
|
||
if (result != VK_SUCCESS)
|
||
goto fail_custom_border_color_pool;
|
||
|
||
if (device->info->verx10 >= 125) {
|
||
/* Put the scratch surface states at the beginning of the internal
|
||
* surface state pool.
|
||
*/
|
||
result = anv_state_pool_init(&device->scratch_surface_state_pool, device,
|
||
&(struct anv_state_pool_params) {
|
||
.name = "scratch surface state pool",
|
||
.base_address = device->physical->va.scratch_surface_state_pool.addr,
|
||
.block_size = 4096,
|
||
.max_size = device->physical->va.scratch_surface_state_pool.size,
|
||
});
|
||
if (result != VK_SUCCESS)
|
||
goto fail_shader_vma_heap;
|
||
|
||
result = anv_state_pool_init(&device->internal_surface_state_pool, device,
|
||
&(struct anv_state_pool_params) {
|
||
.name = "internal surface state pool",
|
||
.base_address = device->physical->va.internal_surface_state_pool.addr,
|
||
.start_offset = device->physical->va.scratch_surface_state_pool.size,
|
||
.block_size = 4096,
|
||
.max_size = device->physical->va.internal_surface_state_pool.size,
|
||
});
|
||
} else {
|
||
result = anv_state_pool_init(&device->internal_surface_state_pool, device,
|
||
&(struct anv_state_pool_params) {
|
||
.name = "internal surface state pool",
|
||
.base_address = device->physical->va.internal_surface_state_pool.addr,
|
||
.block_size = 4096,
|
||
.max_size = device->physical->va.internal_surface_state_pool.size,
|
||
});
|
||
}
|
||
if (result != VK_SUCCESS)
|
||
goto fail_scratch_surface_state_pool;
|
||
|
||
if (device->physical->indirect_descriptors) {
|
||
result = anv_state_pool_init(&device->bindless_surface_state_pool, device,
|
||
&(struct anv_state_pool_params) {
|
||
.name = "bindless surface state pool",
|
||
.base_address = device->physical->va.bindless_surface_state_pool.addr,
|
||
.block_size = 4096,
|
||
.max_size = device->physical->va.bindless_surface_state_pool.size,
|
||
});
|
||
if (result != VK_SUCCESS)
|
||
goto fail_internal_surface_state_pool;
|
||
}
|
||
|
||
if (device->info->verx10 >= 125) {
|
||
/* We're using 3DSTATE_BINDING_TABLE_POOL_ALLOC to give the binding
|
||
* table its own base address separately from surface state base.
|
||
*/
|
||
result = anv_state_pool_init(&device->binding_table_pool, device,
|
||
&(struct anv_state_pool_params) {
|
||
.name = "binding table pool",
|
||
.base_address = device->physical->va.binding_table_pool.addr,
|
||
.block_size = device->physical->instance->binding_table_block_size,
|
||
.max_size = device->physical->va.binding_table_pool.size,
|
||
});
|
||
} else {
|
||
/* The binding table should be in front of the surface states in virtual
|
||
* address space so that all surface states can be express as relative
|
||
* offsets from the binding table location.
|
||
*/
|
||
assert(device->physical->va.binding_table_pool.addr <
|
||
device->physical->va.internal_surface_state_pool.addr);
|
||
int64_t bt_pool_offset = (int64_t)device->physical->va.binding_table_pool.addr -
|
||
(int64_t)device->physical->va.internal_surface_state_pool.addr;
|
||
assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
|
||
result = anv_state_pool_init(&device->binding_table_pool, device,
|
||
&(struct anv_state_pool_params) {
|
||
.name = "binding table pool",
|
||
.base_address = device->physical->va.internal_surface_state_pool.addr,
|
||
.start_offset = bt_pool_offset,
|
||
.block_size = 64 * 1024,
|
||
.max_size = device->physical->va.internal_surface_state_pool.size,
|
||
});
|
||
}
|
||
if (result != VK_SUCCESS)
|
||
goto fail_bindless_surface_state_pool;
|
||
|
||
if (device->physical->indirect_descriptors) {
|
||
result = anv_state_pool_init(&device->indirect_push_descriptor_pool, device,
|
||
&(struct anv_state_pool_params) {
|
||
.name = "indirect push descriptor pool",
|
||
.base_address = device->physical->va.indirect_push_descriptor_pool.addr,
|
||
.block_size = 4096,
|
||
.max_size = device->physical->va.indirect_push_descriptor_pool.size,
|
||
});
|
||
if (result != VK_SUCCESS)
|
||
goto fail_binding_table_pool;
|
||
}
|
||
|
||
if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
|
||
device->info->verx10 >= 125) {
|
||
/* On Gfx12.5+ because of the bindless stages (Mesh, Task, RT), the only
|
||
* way we can wire push descriptors is through the bindless heap. This
|
||
* state pool is a 1Gb carve out of the 4Gb HW heap.
|
||
*/
|
||
result = anv_state_pool_init(&device->push_descriptor_buffer_pool, device,
|
||
&(struct anv_state_pool_params) {
|
||
.name = "push descriptor buffer state pool",
|
||
.base_address = device->physical->va.push_descriptor_buffer_pool.addr,
|
||
.block_size = 4096,
|
||
.max_size = device->physical->va.push_descriptor_buffer_pool.size,
|
||
});
|
||
if (result != VK_SUCCESS)
|
||
goto fail_indirect_push_descriptor_pool;
|
||
}
|
||
|
||
if (device->info->has_aux_map) {
|
||
result = anv_state_pool_init(&device->aux_tt_pool, device,
|
||
&(struct anv_state_pool_params) {
|
||
.name = "aux-tt pool",
|
||
.base_address = device->physical->va.aux_tt_pool.addr,
|
||
.block_size = 16384,
|
||
.max_size = device->physical->va.aux_tt_pool.size,
|
||
});
|
||
if (result != VK_SUCCESS)
|
||
goto fail_push_descriptor_buffer_pool;
|
||
|
||
device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator,
|
||
&physical_device->info);
|
||
if (!device->aux_map_ctx)
|
||
goto fail_aux_tt_pool;
|
||
}
|
||
|
||
result = anv_device_alloc_bo(device, "workaround", 8192,
|
||
ANV_BO_ALLOC_CAPTURE |
|
||
ANV_BO_ALLOC_HOST_COHERENT |
|
||
ANV_BO_ALLOC_MAPPED |
|
||
ANV_BO_ALLOC_INTERNAL,
|
||
0 /* explicit_address */,
|
||
&device->workaround_bo);
|
||
ANV_DMR_BO_ALLOC(&device->vk.base, device->workaround_bo, result);
|
||
if (result != VK_SUCCESS)
|
||
goto fail_surface_aux_map_pool;
|
||
|
||
if (intel_needs_workaround(device->info, 14019708328)) {
|
||
result = anv_device_alloc_bo(device, "dummy_aux", 4096,
|
||
0 /* alloc_flags */,
|
||
0 /* explicit_address */,
|
||
&device->dummy_aux_bo);
|
||
ANV_DMR_BO_ALLOC(&device->vk.base, device->dummy_aux_bo, result);
|
||
if (result != VK_SUCCESS)
|
||
goto fail_alloc_device_bo;
|
||
|
||
device->isl_dev.dummy_aux_address = device->dummy_aux_bo->offset;
|
||
}
|
||
|
||
/* Programming note from MI_MEM_FENCE specification:
|
||
*
|
||
* Software must ensure STATE_SYSTEM_MEM_FENCE_ADDRESS command is
|
||
* programmed prior to programming this command.
|
||
*
|
||
* HAS 1607240579 then provides the size information: 4K
|
||
*/
|
||
if (device->info->verx10 >= 200) {
|
||
result = anv_device_alloc_bo(device, "mem_fence", 4096,
|
||
ANV_BO_ALLOC_NO_LOCAL_MEM, 0,
|
||
&device->mem_fence_bo);
|
||
ANV_DMR_BO_ALLOC(&device->vk.base, device->mem_fence_bo, result);
|
||
if (result != VK_SUCCESS)
|
||
goto fail_alloc_device_bo;
|
||
}
|
||
|
||
struct anv_address wa_addr = (struct anv_address) {
|
||
.bo = device->workaround_bo,
|
||
};
|
||
|
||
wa_addr = anv_address_add_aligned(wa_addr,
|
||
intel_debug_write_identifiers(
|
||
device->workaround_bo->map,
|
||
device->workaround_bo->size,
|
||
"Anv"), 32);
|
||
|
||
device->rt_uuid_addr = wa_addr;
|
||
memcpy(device->rt_uuid_addr.bo->map + device->rt_uuid_addr.offset,
|
||
physical_device->rt_uuid,
|
||
sizeof(physical_device->rt_uuid));
|
||
|
||
/* Make sure the workaround address is the last one in the workaround BO,
|
||
* so that writes never overwrite other bits of data stored in the
|
||
* workaround BO.
|
||
*/
|
||
wa_addr = anv_address_add_aligned(wa_addr,
|
||
sizeof(physical_device->rt_uuid), 64);
|
||
device->workaround_address = wa_addr;
|
||
|
||
/* Make sure we don't over the allocated BO. */
|
||
assert(device->workaround_address.offset < device->workaround_bo->size);
|
||
/* We also need 64B (maximum GRF size) from the workaround address (see
|
||
* TBIMR workaround)
|
||
*/
|
||
assert((device->workaround_bo->size -
|
||
device->workaround_address.offset) >= 64);
|
||
|
||
device->workarounds.doom64_images = NULL;
|
||
|
||
|
||
device->debug_frame_desc =
|
||
intel_debug_get_identifier_block(device->workaround_bo->map,
|
||
device->workaround_bo->size,
|
||
INTEL_DEBUG_BLOCK_TYPE_FRAME);
|
||
|
||
if (device->vk.enabled_extensions.KHR_ray_query) {
|
||
uint32_t ray_queries_size =
|
||
align(brw_rt_ray_queries_hw_stacks_size(device->info), 4096);
|
||
|
||
result = anv_device_alloc_bo(device, "ray queries",
|
||
ray_queries_size,
|
||
ANV_BO_ALLOC_INTERNAL,
|
||
0 /* explicit_address */,
|
||
&device->ray_query_bo[0]);
|
||
ANV_DMR_BO_ALLOC(&device->vk.base, device->ray_query_bo[0], result);
|
||
if (result != VK_SUCCESS)
|
||
goto fail_alloc_device_bo;
|
||
|
||
/* We need a separate ray query bo for CCS engine with Wa_14022863161. */
|
||
if (intel_needs_workaround(device->isl_dev.info, 14022863161) &&
|
||
device_has_compute_queue) {
|
||
result = anv_device_alloc_bo(device, "ray queries",
|
||
ray_queries_size,
|
||
ANV_BO_ALLOC_INTERNAL,
|
||
0 /* explicit_address */,
|
||
&device->ray_query_bo[1]);
|
||
ANV_DMR_BO_ALLOC(&device->vk.base, device->ray_query_bo[1], result);
|
||
if (result != VK_SUCCESS)
|
||
goto fail_ray_query_bo;
|
||
}
|
||
}
|
||
|
||
result = anv_device_init_trivial_batch(device);
|
||
if (result != VK_SUCCESS)
|
||
goto fail_ray_query_bo;
|
||
|
||
/* Emit the CPS states before running the initialization batch as those
|
||
* structures are referenced.
|
||
*/
|
||
if (device->info->ver >= 12 && device->info->ver < 30) {
|
||
uint32_t n_cps_states = 3 * 3; /* All combinaisons of X by Y CP sizes (1, 2, 4) */
|
||
|
||
if (device->info->has_coarse_pixel_primitive_and_cb)
|
||
n_cps_states *= 5 * 5; /* 5 combiners by 2 operators */
|
||
|
||
n_cps_states += 1; /* Disable CPS */
|
||
|
||
/* Each of the combinaison must be replicated on all viewports */
|
||
n_cps_states *= MAX_VIEWPORTS;
|
||
|
||
device->cps_states =
|
||
anv_state_pool_alloc(&device->dynamic_state_pool,
|
||
n_cps_states * CPS_STATE_length(device->info) * 4,
|
||
32);
|
||
if (device->cps_states.map == NULL)
|
||
goto fail_trivial_batch;
|
||
|
||
anv_genX(device->info, init_cps_device_state)(device);
|
||
}
|
||
|
||
if (device->physical->indirect_descriptors) {
|
||
/* Allocate a null surface state at surface state offset 0. This makes
|
||
* NULL descriptor handling trivial because we can just memset
|
||
* structures to zero and they have a valid descriptor.
|
||
*/
|
||
device->null_surface_state =
|
||
anv_state_pool_alloc(&device->bindless_surface_state_pool,
|
||
device->isl_dev.ss.size,
|
||
device->isl_dev.ss.align);
|
||
isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
|
||
.size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
|
||
assert(device->null_surface_state.offset == 0);
|
||
} else {
|
||
/* When using direct descriptors, those can hold the null surface state
|
||
* directly. We still need a null surface for the binding table entries
|
||
* though but this one can live anywhere the internal surface state
|
||
* pool.
|
||
*/
|
||
device->null_surface_state =
|
||
anv_state_pool_alloc(&device->internal_surface_state_pool,
|
||
device->isl_dev.ss.size,
|
||
device->isl_dev.ss.align);
|
||
isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
|
||
.size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
|
||
}
|
||
|
||
isl_null_fill_state(&device->isl_dev, &device->host_null_surface_state,
|
||
.size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
|
||
|
||
anv_scratch_pool_init(device, &device->scratch_pool, false);
|
||
anv_scratch_pool_init(device, &device->protected_scratch_pool, true);
|
||
|
||
/* TODO(RT): Do we want some sort of data structure for this? */
|
||
memset(device->rt_scratch_bos, 0, sizeof(device->rt_scratch_bos));
|
||
|
||
if (ANV_SUPPORT_RT && device->info->has_ray_tracing) {
|
||
/* The docs say to always allocate 128KB per DSS */
|
||
const uint32_t btd_fifo_bo_size =
|
||
128 * 1024 * intel_device_info_dual_subslice_id_bound(device->info);
|
||
result = anv_device_alloc_bo(device,
|
||
"rt-btd-fifo",
|
||
btd_fifo_bo_size,
|
||
ANV_BO_ALLOC_INTERNAL,
|
||
0 /* explicit_address */,
|
||
&device->btd_fifo_bo);
|
||
ANV_DMR_BO_ALLOC(&device->vk.base, device->btd_fifo_bo, result);
|
||
if (result != VK_SUCCESS)
|
||
goto fail_trivial_batch_bo_and_scratch_pool;
|
||
}
|
||
|
||
struct vk_pipeline_cache_create_info pcc_info = { .weak_ref = true, };
|
||
device->vk.mem_cache =
|
||
vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
|
||
if (!device->vk.mem_cache) {
|
||
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||
goto fail_btd_fifo_bo;
|
||
}
|
||
|
||
/* Internal shaders need their own pipeline cache because, unlike the rest
|
||
* of ANV, it won't work at all without the cache. It depends on it for
|
||
* shaders to remain resident while it runs. Therefore, we need a special
|
||
* cache just for BLORP/RT that's forced to always be enabled.
|
||
*/
|
||
struct vk_pipeline_cache_create_info internal_pcc_info = {
|
||
.force_enable = true,
|
||
.weak_ref = false,
|
||
};
|
||
device->internal_cache =
|
||
vk_pipeline_cache_create(&device->vk, &internal_pcc_info, NULL);
|
||
if (device->internal_cache == NULL) {
|
||
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||
goto fail_default_pipeline_cache;
|
||
}
|
||
|
||
/* The device (currently is ICL/TGL) does not have float64 support. */
|
||
if (!device->info->has_64bit_float)
|
||
anv_load_fp64_shader(device);
|
||
|
||
if (INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
|
||
result = anv_device_print_init(device);
|
||
if (result != VK_SUCCESS)
|
||
goto fail_internal_cache;
|
||
}
|
||
|
||
device->robust_buffer_access =
|
||
device->vk.enabled_features.robustBufferAccess ||
|
||
device->vk.enabled_features.nullDescriptor;
|
||
|
||
device->breakpoint = anv_state_pool_alloc(&device->dynamic_state_pool, 4,
|
||
4);
|
||
p_atomic_set(&device->draw_call_count, 0);
|
||
p_atomic_set(&device->dispatch_call_count, 0);
|
||
|
||
/* Create a separate command pool for companion RCS command buffer. */
|
||
if (device->info->verx10 >= 125) {
|
||
VkCommandPoolCreateInfo pool_info = {
|
||
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||
.queueFamilyIndex =
|
||
anv_get_first_render_queue_index(device->physical),
|
||
};
|
||
|
||
result = vk_common_CreateCommandPool(anv_device_to_handle(device),
|
||
&pool_info, NULL,
|
||
&device->companion_rcs_cmd_pool);
|
||
if (result != VK_SUCCESS) {
|
||
goto fail_print;
|
||
}
|
||
}
|
||
|
||
result = anv_device_init_trtt(device);
|
||
if (result != VK_SUCCESS)
|
||
goto fail_companion_cmd_pool;
|
||
|
||
result = anv_device_init_rt_shaders(device);
|
||
if (result != VK_SUCCESS) {
|
||
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||
goto fail_trtt;
|
||
}
|
||
|
||
anv_device_init_blorp(device);
|
||
|
||
anv_device_init_border_colors(device);
|
||
|
||
anv_device_init_internal_kernels(device);
|
||
|
||
anv_device_init_astc_emu(device);
|
||
|
||
anv_device_perf_init(device);
|
||
|
||
anv_device_init_embedded_samplers(device);
|
||
|
||
anv_device_init_descriptors_view(device);
|
||
|
||
BITSET_ONES(device->gfx_dirty_state);
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_INDEX_BUFFER);
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SO_DECL_LIST);
|
||
if (device->info->ver < 11)
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_VF_SGVS_2);
|
||
if (device->info->ver < 12) {
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_DEPTH_BOUNDS);
|
||
}
|
||
if (!device->vk.enabled_extensions.EXT_sample_locations)
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SAMPLE_PATTERN);
|
||
if (!device->vk.enabled_extensions.KHR_fragment_shading_rate) {
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_CPS);
|
||
}
|
||
if (!device->vk.enabled_extensions.EXT_mesh_shader) {
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SBE_MESH);
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_CLIP_MESH);
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_CONTROL);
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_SHADER);
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_DISTRIB);
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_CONTROL);
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_SHADER);
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_REDISTRIB);
|
||
}
|
||
if (!intel_needs_workaround(device->info, 18019816803))
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_WA_18019816803);
|
||
if (!intel_needs_workaround(device->info, 14018283232))
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_WA_14018283232);
|
||
if (device->info->ver > 9)
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_PMA_FIX);
|
||
|
||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_WA_14024997852);
|
||
|
||
device->queue_count = 0;
|
||
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
|
||
const VkDeviceQueueCreateInfo *queueCreateInfo =
|
||
&pCreateInfo->pQueueCreateInfos[i];
|
||
|
||
for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) {
|
||
result = anv_queue_init(device, &device->queues[device->queue_count],
|
||
queueCreateInfo, j);
|
||
if (result != VK_SUCCESS)
|
||
goto fail_queues;
|
||
|
||
device->queue_count++;
|
||
}
|
||
}
|
||
|
||
anv_device_utrace_init(device);
|
||
|
||
result = vk_meta_device_init(&device->vk, &device->meta_device);
|
||
if (result != VK_SUCCESS)
|
||
goto fail_utrace;
|
||
|
||
result = anv_genX(device->info, init_device_state)(device);
|
||
if (result != VK_SUCCESS)
|
||
goto fail_meta_device;
|
||
|
||
device->vk.disable_lto = device->physical->instance->disable_lto;
|
||
|
||
simple_mtx_init(&device->accel_struct_build.mutex, mtx_plain);
|
||
|
||
*pDevice = anv_device_to_handle(device);
|
||
|
||
return VK_SUCCESS;
|
||
|
||
fail_meta_device:
|
||
vk_meta_device_finish(&device->vk, &device->meta_device);
|
||
fail_utrace:
|
||
anv_device_utrace_finish(device);
|
||
fail_queues:
|
||
for (uint32_t i = 0; i < device->queue_count; i++)
|
||
anv_queue_finish(&device->queues[i]);
|
||
anv_device_finish_descriptors_view(device);
|
||
anv_device_finish_embedded_samplers(device);
|
||
anv_device_finish_blorp(device);
|
||
anv_device_finish_astc_emu(device);
|
||
anv_device_finish_internal_kernels(device);
|
||
anv_device_finish_rt_shaders(device);
|
||
fail_trtt:
|
||
anv_device_finish_trtt(device);
|
||
fail_companion_cmd_pool:
|
||
if (device->info->verx10 >= 125) {
|
||
vk_common_DestroyCommandPool(anv_device_to_handle(device),
|
||
device->companion_rcs_cmd_pool, NULL);
|
||
}
|
||
fail_print:
|
||
if (INTEL_DEBUG(DEBUG_SHADER_PRINT))
|
||
anv_device_print_fini(device);
|
||
fail_internal_cache:
|
||
vk_pipeline_cache_destroy(device->internal_cache, NULL);
|
||
fail_default_pipeline_cache:
|
||
vk_pipeline_cache_destroy(device->vk.mem_cache, NULL);
|
||
fail_btd_fifo_bo:
|
||
if (ANV_SUPPORT_RT && device->info->has_ray_tracing) {
|
||
ANV_DMR_BO_FREE(&device->vk.base, device->btd_fifo_bo);
|
||
anv_device_release_bo(device, device->btd_fifo_bo);
|
||
}
|
||
fail_trivial_batch_bo_and_scratch_pool:
|
||
anv_scratch_pool_finish(device, &device->scratch_pool);
|
||
anv_scratch_pool_finish(device, &device->protected_scratch_pool);
|
||
fail_trivial_batch:
|
||
ANV_DMR_BO_FREE(&device->vk.base, device->trivial_batch_bo);
|
||
anv_device_release_bo(device, device->trivial_batch_bo);
|
||
fail_ray_query_bo:
|
||
for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_bo); i++) {
|
||
if (device->ray_query_bo[i]) {
|
||
ANV_DMR_BO_FREE(&device->vk.base, device->ray_query_bo[i]);
|
||
anv_device_release_bo(device, device->ray_query_bo[i]);
|
||
}
|
||
}
|
||
fail_alloc_device_bo:
|
||
if (device->mem_fence_bo) {
|
||
ANV_DMR_BO_FREE(&device->vk.base, device->mem_fence_bo);
|
||
anv_device_release_bo(device, device->mem_fence_bo);
|
||
}
|
||
if (device->dummy_aux_bo) {
|
||
ANV_DMR_BO_FREE(&device->vk.base, device->dummy_aux_bo);
|
||
anv_device_release_bo(device, device->dummy_aux_bo);
|
||
}
|
||
ANV_DMR_BO_FREE(&device->vk.base, device->workaround_bo);
|
||
anv_device_release_bo(device, device->workaround_bo);
|
||
fail_surface_aux_map_pool:
|
||
if (device->info->has_aux_map) {
|
||
intel_aux_map_finish(device->aux_map_ctx);
|
||
device->aux_map_ctx = NULL;
|
||
}
|
||
fail_aux_tt_pool:
|
||
if (device->info->has_aux_map)
|
||
anv_state_pool_finish(&device->aux_tt_pool);
|
||
fail_push_descriptor_buffer_pool:
|
||
if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
|
||
device->info->verx10 >= 125)
|
||
anv_state_pool_finish(&device->push_descriptor_buffer_pool);
|
||
fail_indirect_push_descriptor_pool:
|
||
if (device->physical->indirect_descriptors)
|
||
anv_state_pool_finish(&device->indirect_push_descriptor_pool);
|
||
fail_binding_table_pool:
|
||
anv_state_pool_finish(&device->binding_table_pool);
|
||
fail_bindless_surface_state_pool:
|
||
if (device->physical->indirect_descriptors)
|
||
anv_state_pool_finish(&device->bindless_surface_state_pool);
|
||
fail_internal_surface_state_pool:
|
||
anv_state_pool_finish(&device->internal_surface_state_pool);
|
||
fail_scratch_surface_state_pool:
|
||
if (device->info->verx10 >= 125)
|
||
anv_state_pool_finish(&device->scratch_surface_state_pool);
|
||
fail_shader_vma_heap:
|
||
anv_shader_heap_finish(&device->shader_heap);
|
||
fail_custom_border_color_pool:
|
||
anv_state_reserved_array_pool_finish(&device->custom_border_colors);
|
||
fail_dynamic_state_pool:
|
||
anv_state_pool_finish(&device->dynamic_state_pool);
|
||
fail_general_state_pool:
|
||
anv_state_pool_finish(&device->general_state_pool);
|
||
fail_batch_bo_pool:
|
||
if (device->vk.enabled_extensions.KHR_acceleration_structure)
|
||
anv_bo_pool_finish(&device->bvh_bo_pool);
|
||
anv_bo_pool_finish(&device->batch_bo_pool);
|
||
anv_slab_bo_deinit(device);
|
||
fail_cache:
|
||
anv_bo_cache_finish(&device->bo_cache);
|
||
fail_mutex:
|
||
pthread_mutex_destroy(&device->mutex);
|
||
fail_vmas:
|
||
util_vma_heap_finish(&device->vma_trtt);
|
||
util_vma_heap_finish(&device->vma_dynamic_visible);
|
||
util_vma_heap_finish(&device->vma_desc);
|
||
util_vma_heap_finish(&device->vma_hi);
|
||
util_vma_heap_finish(&device->vma_lo);
|
||
pthread_mutex_destroy(&device->vma_mutex);
|
||
fail_queues_alloc:
|
||
vk_free(&device->vk.alloc, device->queues);
|
||
fail_context_id:
|
||
anv_device_destroy_context_or_vm(device);
|
||
fail_fd:
|
||
intel_virtio_unref_fd(device->fd);
|
||
close(device->fd);
|
||
fail_device:
|
||
vk_device_finish(&device->vk);
|
||
fail_alloc:
|
||
vk_free(&device->vk.alloc, device);
|
||
|
||
return result;
|
||
}
|
||
|
||
void anv_DestroyDevice(
|
||
VkDevice _device,
|
||
const VkAllocationCallbacks* pAllocator)
|
||
{
|
||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||
|
||
if (!device)
|
||
return;
|
||
|
||
anv_memory_trace_finish(device);
|
||
|
||
struct anv_physical_device *pdevice = device->physical;
|
||
|
||
/* Do TRTT batch garbage collection before destroying queues. */
|
||
anv_device_finish_trtt(device);
|
||
|
||
if (device->accel_struct_build.radix_sort) {
|
||
radix_sort_vk_destroy(device->accel_struct_build.radix_sort,
|
||
_device, &device->vk.alloc);
|
||
}
|
||
vk_meta_device_finish(&device->vk, &device->meta_device);
|
||
|
||
anv_device_utrace_finish(device);
|
||
|
||
for (uint32_t i = 0; i < device->queue_count; i++)
|
||
anv_queue_finish(&device->queues[i]);
|
||
vk_free(&device->vk.alloc, device->queues);
|
||
|
||
anv_device_finish_blorp(device);
|
||
|
||
anv_device_finish_rt_shaders(device);
|
||
|
||
anv_device_finish_astc_emu(device);
|
||
|
||
anv_device_finish_internal_kernels(device);
|
||
|
||
anv_device_finish_descriptors_view(device);
|
||
|
||
if (INTEL_DEBUG(DEBUG_SHADER_PRINT))
|
||
anv_device_print_fini(device);
|
||
|
||
vk_pipeline_cache_destroy(device->internal_cache, NULL);
|
||
vk_pipeline_cache_destroy(device->vk.mem_cache, NULL);
|
||
|
||
anv_device_finish_embedded_samplers(device);
|
||
|
||
if (ANV_SUPPORT_RT && device->info->has_ray_tracing) {
|
||
ANV_DMR_BO_FREE(&device->vk.base, device->btd_fifo_bo);
|
||
anv_device_release_bo(device, device->btd_fifo_bo);
|
||
}
|
||
|
||
if (device->info->verx10 >= 125) {
|
||
vk_common_DestroyCommandPool(anv_device_to_handle(device),
|
||
device->companion_rcs_cmd_pool, NULL);
|
||
}
|
||
|
||
anv_state_reserved_array_pool_finish(&device->custom_border_colors);
|
||
#ifdef HAVE_VALGRIND
|
||
/* We only need to free these to prevent valgrind errors. The backing
|
||
* BO will go away in a couple of lines so we don't actually leak.
|
||
*/
|
||
anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
|
||
anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
|
||
anv_state_pool_free(&device->dynamic_state_pool, device->cps_states);
|
||
anv_state_pool_free(&device->dynamic_state_pool, device->breakpoint);
|
||
#endif
|
||
|
||
for (unsigned i = 0; i < ARRAY_SIZE(device->rt_scratch_bos); i++) {
|
||
if (device->rt_scratch_bos[i] != NULL) {
|
||
struct anv_bo *bo = device->rt_scratch_bos[i];
|
||
ANV_DMR_BO_FREE(&device->vk.base, bo);
|
||
anv_device_release_bo(device, bo);
|
||
}
|
||
}
|
||
|
||
anv_scratch_pool_finish(device, &device->scratch_pool);
|
||
anv_scratch_pool_finish(device, &device->protected_scratch_pool);
|
||
|
||
if (device->vk.enabled_extensions.KHR_ray_query) {
|
||
for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_bo); i++) {
|
||
for (unsigned j = 0; j < ARRAY_SIZE(device->ray_query_shadow_bos[0]); j++) {
|
||
if (device->ray_query_shadow_bos[i][j] != NULL) {
|
||
ANV_DMR_BO_FREE(&device->vk.base, device->ray_query_shadow_bos[i][j]);
|
||
anv_device_release_bo(device, device->ray_query_shadow_bos[i][j]);
|
||
}
|
||
}
|
||
if (device->ray_query_bo[i]) {
|
||
ANV_DMR_BO_FREE(&device->vk.base, device->ray_query_bo[i]);
|
||
anv_device_release_bo(device, device->ray_query_bo[i]);
|
||
}
|
||
}
|
||
}
|
||
ANV_DMR_BO_FREE(&device->vk.base, device->workaround_bo);
|
||
anv_device_release_bo(device, device->workaround_bo);
|
||
if (device->dummy_aux_bo) {
|
||
ANV_DMR_BO_FREE(&device->vk.base, device->dummy_aux_bo);
|
||
anv_device_release_bo(device, device->dummy_aux_bo);
|
||
}
|
||
if (device->mem_fence_bo) {
|
||
ANV_DMR_BO_FREE(&device->vk.base, device->mem_fence_bo);
|
||
anv_device_release_bo(device, device->mem_fence_bo);
|
||
}
|
||
ANV_DMR_BO_FREE(&device->vk.base, device->trivial_batch_bo);
|
||
anv_device_release_bo(device, device->trivial_batch_bo);
|
||
|
||
if (device->info->has_aux_map) {
|
||
intel_aux_map_finish(device->aux_map_ctx);
|
||
device->aux_map_ctx = NULL;
|
||
anv_state_pool_finish(&device->aux_tt_pool);
|
||
}
|
||
if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
|
||
device->info->verx10 >= 125)
|
||
anv_state_pool_finish(&device->push_descriptor_buffer_pool);
|
||
if (device->physical->indirect_descriptors)
|
||
anv_state_pool_finish(&device->indirect_push_descriptor_pool);
|
||
anv_state_pool_finish(&device->binding_table_pool);
|
||
if (device->info->verx10 >= 125)
|
||
anv_state_pool_finish(&device->scratch_surface_state_pool);
|
||
anv_state_pool_finish(&device->internal_surface_state_pool);
|
||
if (device->physical->indirect_descriptors)
|
||
anv_state_pool_finish(&device->bindless_surface_state_pool);
|
||
|
||
anv_shader_heap_finish(&device->shader_heap);
|
||
anv_state_pool_finish(&device->dynamic_state_pool);
|
||
anv_state_pool_finish(&device->general_state_pool);
|
||
|
||
if (device->vk.enabled_extensions.KHR_acceleration_structure)
|
||
anv_bo_pool_finish(&device->bvh_bo_pool);
|
||
anv_bo_pool_finish(&device->batch_bo_pool);
|
||
|
||
anv_slab_bo_deinit(device);
|
||
anv_bo_cache_finish(&device->bo_cache);
|
||
|
||
util_vma_heap_finish(&device->vma_trtt);
|
||
util_vma_heap_finish(&device->vma_dynamic_visible);
|
||
util_vma_heap_finish(&device->vma_desc);
|
||
util_vma_heap_finish(&device->vma_hi);
|
||
util_vma_heap_finish(&device->vma_lo);
|
||
pthread_mutex_destroy(&device->vma_mutex);
|
||
|
||
pthread_mutex_destroy(&device->mutex);
|
||
|
||
simple_mtx_destroy(&device->accel_struct_build.mutex);
|
||
|
||
ralloc_free(device->fp64_nir);
|
||
|
||
anv_device_destroy_context_or_vm(device);
|
||
|
||
if (INTEL_DEBUG(DEBUG_BATCH) || INTEL_DEBUG(DEBUG_BATCH_STATS)) {
|
||
for (unsigned i = 0; i < pdevice->queue.family_count; i++) {
|
||
if (INTEL_DEBUG(DEBUG_BATCH_STATS))
|
||
intel_batch_print_stats(&device->decoder[i]);
|
||
intel_batch_decode_ctx_finish(&device->decoder[i]);
|
||
}
|
||
}
|
||
|
||
close(device->fd);
|
||
|
||
vk_device_finish(&device->vk);
|
||
vk_free(&device->vk.alloc, device);
|
||
}
|
||
|
||
VkResult anv_EnumerateInstanceLayerProperties(
|
||
uint32_t* pPropertyCount,
|
||
VkLayerProperties* pProperties)
|
||
{
|
||
if (pProperties == NULL) {
|
||
*pPropertyCount = 0;
|
||
return VK_SUCCESS;
|
||
}
|
||
|
||
/* None supported at this time */
|
||
return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
|
||
}
|
||
|
||
VkResult
|
||
anv_device_wait(struct anv_device *device, struct anv_bo *bo,
|
||
int64_t timeout)
|
||
{
|
||
int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
|
||
if (ret == -1 && errno == ETIME) {
|
||
return VK_TIMEOUT;
|
||
} else if (ret == -1) {
|
||
/* We don't know the real error. */
|
||
return vk_device_set_lost(&device->vk, "gem wait failed: %m");
|
||
} else {
|
||
return VK_SUCCESS;
|
||
}
|
||
}
|
||
|
||
static struct util_vma_heap *
|
||
anv_vma_heap_for_flags(struct anv_device *device,
|
||
enum anv_bo_alloc_flags alloc_flags)
|
||
{
|
||
if (alloc_flags & ANV_BO_ALLOC_TRTT)
|
||
return &device->vma_trtt;
|
||
|
||
if (alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS)
|
||
return &device->vma_lo;
|
||
|
||
if (alloc_flags & ANV_BO_ALLOC_DESCRIPTOR_POOL)
|
||
return &device->vma_desc;
|
||
|
||
if (alloc_flags & ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL)
|
||
return &device->vma_dynamic_visible;
|
||
|
||
return &device->vma_hi;
|
||
}
|
||
|
||
uint64_t
|
||
anv_vma_alloc(struct anv_device *device,
|
||
uint64_t size, uint64_t align,
|
||
enum anv_bo_alloc_flags alloc_flags,
|
||
uint64_t client_address,
|
||
struct util_vma_heap **out_vma_heap)
|
||
{
|
||
pthread_mutex_lock(&device->vma_mutex);
|
||
|
||
uint64_t addr = 0;
|
||
*out_vma_heap = anv_vma_heap_for_flags(device, alloc_flags);
|
||
|
||
if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
|
||
assert(*out_vma_heap == &device->vma_hi ||
|
||
*out_vma_heap == &device->vma_dynamic_visible ||
|
||
*out_vma_heap == &device->vma_trtt);
|
||
|
||
if (client_address) {
|
||
if (util_vma_heap_alloc_addr(*out_vma_heap,
|
||
client_address, size)) {
|
||
addr = client_address;
|
||
}
|
||
} else {
|
||
(*out_vma_heap)->alloc_high = false;
|
||
addr = util_vma_heap_alloc(*out_vma_heap, size, align);
|
||
(*out_vma_heap)->alloc_high = true;
|
||
}
|
||
/* We don't want to fall back to other heaps */
|
||
goto done;
|
||
}
|
||
|
||
assert(client_address == 0);
|
||
|
||
addr = util_vma_heap_alloc(*out_vma_heap, size, align);
|
||
|
||
done:
|
||
pthread_mutex_unlock(&device->vma_mutex);
|
||
|
||
assert(addr == intel_48b_address(addr));
|
||
return intel_canonical_address(addr);
|
||
}
|
||
|
||
void
|
||
anv_vma_free(struct anv_device *device,
|
||
struct util_vma_heap *vma_heap,
|
||
uint64_t address, uint64_t size)
|
||
{
|
||
assert(vma_heap == &device->vma_lo ||
|
||
vma_heap == &device->vma_hi ||
|
||
vma_heap == &device->vma_desc ||
|
||
vma_heap == &device->vma_dynamic_visible ||
|
||
vma_heap == &device->vma_trtt);
|
||
|
||
const uint64_t addr_48b = intel_48b_address(address);
|
||
|
||
pthread_mutex_lock(&device->vma_mutex);
|
||
|
||
util_vma_heap_free(vma_heap, addr_48b, size);
|
||
|
||
pthread_mutex_unlock(&device->vma_mutex);
|
||
}
|
||
|
||
VkResult anv_AllocateMemory(
|
||
VkDevice _device,
|
||
const VkMemoryAllocateInfo* pAllocateInfo,
|
||
const VkAllocationCallbacks* pAllocator,
|
||
VkDeviceMemory* pMem)
|
||
{
|
||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||
struct anv_physical_device *pdevice = device->physical;
|
||
struct anv_device_memory *mem;
|
||
VkResult result = VK_SUCCESS;
|
||
|
||
assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
|
||
|
||
VkDeviceSize aligned_alloc_size =
|
||
align64(pAllocateInfo->allocationSize, 4096);
|
||
|
||
assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
|
||
const struct anv_memory_type *mem_type =
|
||
&pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
|
||
assert(mem_type->heapIndex < pdevice->memory.heap_count);
|
||
struct anv_memory_heap *mem_heap =
|
||
&pdevice->memory.heaps[mem_type->heapIndex];
|
||
|
||
if (aligned_alloc_size > mem_heap->size)
|
||
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||
|
||
uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
|
||
if (mem_heap_used + aligned_alloc_size > mem_heap->size)
|
||
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||
|
||
mem = vk_device_memory_create(&device->vk, pAllocateInfo,
|
||
pAllocator, sizeof(*mem));
|
||
if (mem == NULL)
|
||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||
|
||
mem->type = mem_type;
|
||
mem->map = NULL;
|
||
mem->map_size = 0;
|
||
mem->map_delta = 0;
|
||
|
||
enum anv_bo_alloc_flags alloc_flags = 0;
|
||
|
||
const VkImportMemoryFdInfoKHR *fd_info = NULL;
|
||
const VkMemoryDedicatedAllocateInfo *dedicated_info = NULL;
|
||
const struct wsi_memory_allocate_info *wsi_info = NULL;
|
||
uint64_t client_address = 0;
|
||
|
||
vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
|
||
/* VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA isn't a real enum
|
||
* value, so use cast to avoid compiler warn
|
||
*/
|
||
switch ((uint32_t)ext->sType) {
|
||
case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
|
||
case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
|
||
case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT:
|
||
case VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR:
|
||
case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO:
|
||
/* handled by vk_device_memory_create */
|
||
break;
|
||
|
||
case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
|
||
fd_info = (void *)ext;
|
||
break;
|
||
|
||
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
|
||
dedicated_info = (void *)ext;
|
||
break;
|
||
|
||
case VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO: {
|
||
const VkMemoryOpaqueCaptureAddressAllocateInfo *addr_info =
|
||
(const VkMemoryOpaqueCaptureAddressAllocateInfo *)ext;
|
||
client_address = addr_info->opaqueCaptureAddress;
|
||
break;
|
||
}
|
||
|
||
case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
|
||
wsi_info = (void *)ext;
|
||
break;
|
||
|
||
default:
|
||
vk_debug_ignored_stype(ext->sType);
|
||
break;
|
||
}
|
||
}
|
||
|
||
/* If i915 reported a mappable/non_mappable vram regions and the
|
||
* application want lmem mappable, then we need to use the
|
||
* I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS flag to create our BO.
|
||
*/
|
||
if (pdevice->vram_mappable.size > 0 &&
|
||
pdevice->vram_non_mappable.size > 0 &&
|
||
(mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
|
||
(mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
|
||
alloc_flags |= ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE;
|
||
|
||
if (!mem_heap->is_local_mem)
|
||
alloc_flags |= ANV_BO_ALLOC_NO_LOCAL_MEM;
|
||
|
||
if (mem->vk.alloc_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
|
||
alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
|
||
|
||
if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_PROTECTED_BIT)
|
||
alloc_flags |= ANV_BO_ALLOC_PROTECTED;
|
||
|
||
/* For now, always allocated AUX-TT aligned memory, regardless of dedicated
|
||
* allocations. An application can for example, suballocate a large
|
||
* VkDeviceMemory and try to bind an image created with a CCS modifier. In
|
||
* that case we cannot disable CCS if the alignment doesn´t meet the AUX-TT
|
||
* requirements, so we need to ensure both the VkDeviceMemory and the
|
||
* alignment reported through vkGetImageMemoryRequirements() meet the
|
||
* AUX-TT requirement.
|
||
*
|
||
* Allocations with the special dynamic_visible mem type are for things like
|
||
* descriptor buffers, so AUX-TT alignment is not needed here.
|
||
*/
|
||
if (device->info->has_aux_map && !mem_type->dynamic_visible)
|
||
alloc_flags |= ANV_BO_ALLOC_AUX_TT_ALIGNED;
|
||
|
||
/* If the allocation is not dedicated nor a host pointer, allocate
|
||
* additional CCS space.
|
||
*
|
||
* Allocations with the special dynamic_visible mem type are for things like
|
||
* descriptor buffers, which don't need any compression.
|
||
*/
|
||
if (device->physical->alloc_aux_tt_mem &&
|
||
dedicated_info == NULL &&
|
||
mem->vk.host_ptr == NULL &&
|
||
!mem_type->dynamic_visible)
|
||
alloc_flags |= ANV_BO_ALLOC_AUX_CCS;
|
||
|
||
/* TODO: Android, ChromeOS and other applications may need another way to
|
||
* allocate buffers that can be scanout to display but it should pretty
|
||
* easy to catch those as Xe KMD driver will print warnings in dmesg when
|
||
* scanning buffers allocated without proper flag set.
|
||
*/
|
||
if (wsi_info)
|
||
alloc_flags |= ANV_BO_ALLOC_SCANOUT;
|
||
|
||
struct anv_image *image = dedicated_info ?
|
||
anv_image_from_handle(dedicated_info->image) :
|
||
NULL;
|
||
mem->dedicated_image = image;
|
||
|
||
/* If there is a dedicated image with a modifier, use that to determine
|
||
* compression, otherwise use the memory type.
|
||
*/
|
||
if (device->info->ver >= 20 && image &&
|
||
image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
|
||
const bool needs_compression =
|
||
isl_drm_modifier_has_aux(image->vk.drm_format_mod);
|
||
assert(!needs_compression || !INTEL_DEBUG(DEBUG_NO_CCS));
|
||
alloc_flags |= needs_compression ? ANV_BO_ALLOC_COMPRESSED : 0;
|
||
} else {
|
||
alloc_flags |= (mem_type->compressed && !INTEL_DEBUG(DEBUG_NO_CCS)) ?
|
||
ANV_BO_ALLOC_COMPRESSED : 0;
|
||
}
|
||
|
||
/* Anything imported or exported is EXTERNAL */
|
||
if (mem->vk.export_handle_types || mem->vk.import_handle_type) {
|
||
alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
|
||
|
||
/* wsi has its own way of synchronizing with the compositor */
|
||
if (!wsi_info && image) {
|
||
/* Apply implicit sync to be compatible with clients relying on
|
||
* implicit fencing. This matches the behavior in iris i915_batch
|
||
* submit. An example client is VA-API (iHD), so only dedicated
|
||
* image scenario has to be covered.
|
||
*/
|
||
alloc_flags |= ANV_BO_ALLOC_IMPLICIT_SYNC;
|
||
|
||
/* For color attachment, apply IMPLICIT_WRITE so a client on the
|
||
* consumer side relying on implicit fencing can have a fence to
|
||
* wait for render complete.
|
||
*/
|
||
if (pdevice->instance->external_memory_implicit_sync &&
|
||
(image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
|
||
alloc_flags |= ANV_BO_ALLOC_IMPLICIT_WRITE;
|
||
}
|
||
}
|
||
|
||
if (mem_type->dynamic_visible)
|
||
alloc_flags |= ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL;
|
||
|
||
if (mem->vk.ahardware_buffer) {
|
||
result = anv_import_ahb_memory(_device, mem);
|
||
if (result != VK_SUCCESS)
|
||
goto fail;
|
||
|
||
goto success;
|
||
}
|
||
|
||
/* The Vulkan spec permits handleType to be 0, in which case the struct is
|
||
* ignored.
|
||
*/
|
||
if (fd_info && fd_info->handleType) {
|
||
/* At the moment, we support only the below handle types. */
|
||
assert(fd_info->handleType ==
|
||
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
|
||
fd_info->handleType ==
|
||
VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
|
||
if (alloc_flags & ANV_BO_ALLOC_COMPRESSED) {
|
||
/* First, when importing a compressed buffer on Xe2+, we are sure
|
||
* about that the buffer is from a resource created with modifiers
|
||
* supporting compression, even the info of modifier is not available
|
||
* on the path of allocation. (Buffers created with modifiers not
|
||
* supporting compression must be uncompressed or resolved first
|
||
* for sharing.)
|
||
*
|
||
* We assume the source of the sharing (a GL driver or this driver)
|
||
* would create the shared buffer for scanout usage as well by
|
||
* following the above reasons. As a result, configure the imported
|
||
* buffer for scanout.
|
||
*
|
||
* Such assumption could fit on pre-Xe2 platforms as well but become
|
||
* more relevant on Xe2+ because the alloc flags will determine bo's
|
||
* heap and then PAT entry in the later vm_bind stage.
|
||
*/
|
||
assert(device->info->ver >= 20);
|
||
assert(image);
|
||
if (vk_format_is_color(image->vk.format))
|
||
alloc_flags |= ANV_BO_ALLOC_SCANOUT;
|
||
}
|
||
|
||
result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
|
||
client_address, &mem->bo);
|
||
if (result != VK_SUCCESS)
|
||
goto fail;
|
||
|
||
/* For security purposes, we reject importing the bo if it's smaller
|
||
* than the requested allocation size. This prevents a malicious client
|
||
* from passing a buffer to a trusted client, lying about the size, and
|
||
* telling the trusted client to try and texture from an image that goes
|
||
* out-of-bounds. This sort of thing could lead to GPU hangs or worse
|
||
* in the trusted client. The trusted client can protect itself against
|
||
* this sort of attack but only if it can trust the buffer size.
|
||
*/
|
||
if (mem->bo->size < aligned_alloc_size) {
|
||
result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
|
||
"aligned allocationSize too large for "
|
||
"VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
|
||
"%"PRIu64"B > %"PRIu64"B",
|
||
aligned_alloc_size, mem->bo->size);
|
||
anv_device_release_bo(device, mem->bo);
|
||
goto fail;
|
||
}
|
||
|
||
/* From the Vulkan spec:
|
||
*
|
||
* "Importing memory from a file descriptor transfers ownership of
|
||
* the file descriptor from the application to the Vulkan
|
||
* implementation. The application must not perform any operations on
|
||
* the file descriptor after a successful import."
|
||
*
|
||
* If the import fails, we leave the file descriptor open.
|
||
*/
|
||
close(fd_info->fd);
|
||
goto success;
|
||
}
|
||
|
||
if (mem->vk.host_ptr) {
|
||
if (mem->vk.import_handle_type ==
|
||
VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) {
|
||
result = vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
|
||
goto fail;
|
||
}
|
||
|
||
assert(mem->vk.import_handle_type ==
|
||
VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
|
||
|
||
result = anv_device_import_bo_from_host_ptr(device,
|
||
mem->vk.host_ptr,
|
||
mem->vk.size,
|
||
alloc_flags,
|
||
client_address,
|
||
&mem->bo);
|
||
if (result != VK_SUCCESS)
|
||
goto fail;
|
||
|
||
goto success;
|
||
}
|
||
|
||
if (alloc_flags & (ANV_BO_ALLOC_EXTERNAL | ANV_BO_ALLOC_SCANOUT)) {
|
||
alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
|
||
} else if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
|
||
if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
|
||
alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
|
||
if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT)
|
||
alloc_flags |= ANV_BO_ALLOC_HOST_CACHED;
|
||
} else {
|
||
/* Required to set some host mode to have a valid pat index set */
|
||
alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
|
||
}
|
||
|
||
/* Regular allocate (not importing memory). */
|
||
|
||
result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
|
||
alloc_flags, client_address, &mem->bo);
|
||
if (result != VK_SUCCESS)
|
||
goto fail;
|
||
|
||
if (image && image->vk.wsi_legacy_scanout) {
|
||
/* Some legacy (non-modifiers) consumers need the tiling to be set on
|
||
* the BO. In this case, we have a dedicated allocation.
|
||
*/
|
||
const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
|
||
result = anv_device_set_bo_tiling(device, mem->bo,
|
||
surf->row_pitch_B,
|
||
surf->tiling);
|
||
if (result != VK_SUCCESS) {
|
||
anv_device_release_bo(device, mem->bo);
|
||
goto fail;
|
||
}
|
||
}
|
||
|
||
success:
|
||
mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
|
||
if (mem_heap_used > mem_heap->size) {
|
||
p_atomic_add(&mem_heap->used, -mem->bo->size);
|
||
anv_device_release_bo(device, mem->bo);
|
||
result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||
"Out of heap memory");
|
||
goto fail;
|
||
}
|
||
|
||
pthread_mutex_lock(&device->mutex);
|
||
list_addtail(&mem->link, &device->memory_objects);
|
||
pthread_mutex_unlock(&device->mutex);
|
||
|
||
ANV_RMV(heap_create, device, mem, false, 0);
|
||
ANV_DMR_BO_ALLOC_IMPORT(&mem->vk.base, mem->bo, result,
|
||
mem->vk.import_handle_type);
|
||
|
||
*pMem = anv_device_memory_to_handle(mem);
|
||
|
||
return VK_SUCCESS;
|
||
|
||
fail:
|
||
ANV_DMR_BO_ALLOC_IMPORT(&mem->vk.base, mem->bo, result,
|
||
mem->vk.import_handle_type);
|
||
vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
|
||
|
||
return result;
|
||
}
|
||
|
||
VkResult anv_GetMemoryFdKHR(
|
||
VkDevice device_h,
|
||
const VkMemoryGetFdInfoKHR* pGetFdInfo,
|
||
int* pFd)
|
||
{
|
||
ANV_FROM_HANDLE(anv_device, dev, device_h);
|
||
ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
|
||
|
||
assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
|
||
|
||
assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
|
||
pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
|
||
|
||
return anv_device_export_bo(dev, mem->bo, pFd);
|
||
}
|
||
|
||
VkResult anv_GetMemoryFdPropertiesKHR(
|
||
VkDevice _device,
|
||
VkExternalMemoryHandleTypeFlagBits handleType,
|
||
int fd,
|
||
VkMemoryFdPropertiesKHR* pMemoryFdProperties)
|
||
{
|
||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||
|
||
switch (handleType) {
|
||
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
|
||
/* dma-buf can be imported as any memory type */
|
||
pMemoryFdProperties->memoryTypeBits =
|
||
(1 << device->physical->memory.type_count) - 1;
|
||
return VK_SUCCESS;
|
||
|
||
default:
|
||
/* The valid usage section for this function says:
|
||
*
|
||
* "handleType must not be one of the handle types defined as
|
||
* opaque."
|
||
*
|
||
* So opaque handle types fall into the default "unsupported" case.
|
||
*/
|
||
return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
|
||
}
|
||
}
|
||
|
||
VkResult anv_GetMemoryHostPointerPropertiesEXT(
|
||
VkDevice _device,
|
||
VkExternalMemoryHandleTypeFlagBits handleType,
|
||
const void* pHostPointer,
|
||
VkMemoryHostPointerPropertiesEXT* pMemoryHostPointerProperties)
|
||
{
|
||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||
|
||
assert(pMemoryHostPointerProperties->sType ==
|
||
VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT);
|
||
|
||
switch (handleType) {
|
||
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
|
||
|
||
pMemoryHostPointerProperties->memoryTypeBits =
|
||
device->info->ver >= 20 ?
|
||
device->physical->memory.default_buffer_mem_types :
|
||
(1ull << device->physical->memory.type_count) - 1;
|
||
|
||
return VK_SUCCESS;
|
||
|
||
default:
|
||
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
|
||
}
|
||
}
|
||
|
||
void anv_FreeMemory(
|
||
VkDevice _device,
|
||
VkDeviceMemory _mem,
|
||
const VkAllocationCallbacks* pAllocator)
|
||
{
|
||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||
ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
|
||
|
||
if (mem == NULL)
|
||
return;
|
||
|
||
pthread_mutex_lock(&device->mutex);
|
||
list_del(&mem->link);
|
||
pthread_mutex_unlock(&device->mutex);
|
||
|
||
if (mem->map) {
|
||
const VkMemoryUnmapInfoKHR unmap = {
|
||
.sType = VK_STRUCTURE_TYPE_MEMORY_UNMAP_INFO_KHR,
|
||
.memory = _mem,
|
||
};
|
||
anv_UnmapMemory2KHR(_device, &unmap);
|
||
}
|
||
|
||
p_atomic_add(&device->physical->memory.heaps[mem->type->heapIndex].used,
|
||
-mem->bo->size);
|
||
|
||
ANV_DMR_BO_FREE_IMPORT(&mem->vk.base, mem->bo,
|
||
mem->vk.import_handle_type);
|
||
|
||
anv_device_release_bo(device, mem->bo);
|
||
|
||
ANV_RMV(resource_destroy, device, mem);
|
||
|
||
vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
|
||
}
|
||
|
||
VkResult anv_MapMemory2KHR(
|
||
VkDevice _device,
|
||
const VkMemoryMapInfoKHR* pMemoryMapInfo,
|
||
void** ppData)
|
||
{
|
||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||
ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryMapInfo->memory);
|
||
|
||
if (mem == NULL) {
|
||
*ppData = NULL;
|
||
return VK_SUCCESS;
|
||
}
|
||
|
||
if (mem->vk.host_ptr) {
|
||
*ppData = mem->vk.host_ptr + pMemoryMapInfo->offset;
|
||
return VK_SUCCESS;
|
||
}
|
||
|
||
/* From the Vulkan spec version 1.0.32 docs for MapMemory:
|
||
*
|
||
* * memory must have been created with a memory type that reports
|
||
* VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
|
||
*/
|
||
if (!(mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
|
||
return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
|
||
"Memory object not mappable.");
|
||
}
|
||
|
||
assert(pMemoryMapInfo->size > 0);
|
||
const VkDeviceSize offset = pMemoryMapInfo->offset;
|
||
const VkDeviceSize size =
|
||
vk_device_memory_range(&mem->vk, pMemoryMapInfo->offset,
|
||
pMemoryMapInfo->size);
|
||
|
||
if (size != (size_t)size) {
|
||
return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
|
||
"requested size 0x%"PRIx64" does not fit in %u bits",
|
||
size, (unsigned)(sizeof(size_t) * 8));
|
||
}
|
||
|
||
/* From the Vulkan 1.2.194 spec:
|
||
*
|
||
* "memory must not be currently host mapped"
|
||
*/
|
||
if (mem->map != NULL) {
|
||
return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
|
||
"Memory object already mapped.");
|
||
}
|
||
|
||
void *placed_addr = NULL;
|
||
if (pMemoryMapInfo->flags & VK_MEMORY_MAP_PLACED_BIT_EXT) {
|
||
const VkMemoryMapPlacedInfoEXT *placed_info =
|
||
vk_find_struct_const(pMemoryMapInfo->pNext, MEMORY_MAP_PLACED_INFO_EXT);
|
||
assert(placed_info != NULL);
|
||
placed_addr = placed_info->pPlacedAddress;
|
||
}
|
||
|
||
uint64_t map_offset, map_size;
|
||
anv_sanitize_map_params(device, mem->bo, offset, size, &map_offset, &map_size);
|
||
|
||
void *map;
|
||
VkResult result = anv_device_map_bo(device, mem->bo, map_offset,
|
||
map_size, placed_addr, &map);
|
||
if (result != VK_SUCCESS)
|
||
return result;
|
||
|
||
mem->map = map;
|
||
mem->map_size = map_size;
|
||
mem->map_delta = (offset - map_offset);
|
||
*ppData = mem->map + mem->map_delta;
|
||
|
||
return VK_SUCCESS;
|
||
}
|
||
|
||
VkResult anv_UnmapMemory2KHR(
|
||
VkDevice _device,
|
||
const VkMemoryUnmapInfoKHR* pMemoryUnmapInfo)
|
||
{
|
||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||
ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryUnmapInfo->memory);
|
||
|
||
if (mem == NULL || mem->vk.host_ptr)
|
||
return VK_SUCCESS;
|
||
|
||
VkResult result =
|
||
anv_device_unmap_bo(device, mem->bo, mem->map, mem->map_size,
|
||
pMemoryUnmapInfo->flags & VK_MEMORY_UNMAP_RESERVE_BIT_EXT);
|
||
if (result != VK_SUCCESS)
|
||
return result;
|
||
|
||
mem->map = NULL;
|
||
mem->map_size = 0;
|
||
mem->map_delta = 0;
|
||
|
||
return VK_SUCCESS;
|
||
}
|
||
|
||
VkResult anv_FlushMappedMemoryRanges(
|
||
VkDevice _device,
|
||
uint32_t memoryRangeCount,
|
||
const VkMappedMemoryRange* pMemoryRanges)
|
||
{
|
||
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
|
||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||
|
||
if (!device->physical->memory.need_flush)
|
||
return VK_SUCCESS;
|
||
|
||
/* Make sure the writes we're flushing have landed. */
|
||
__builtin_ia32_mfence();
|
||
|
||
for (uint32_t i = 0; i < memoryRangeCount; i++) {
|
||
ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
|
||
if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
|
||
continue;
|
||
|
||
uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
|
||
if (map_offset >= mem->map_size)
|
||
continue;
|
||
|
||
util_flush_range(mem->map + map_offset,
|
||
MIN2(pMemoryRanges[i].size,
|
||
mem->map_size - map_offset));
|
||
}
|
||
#endif
|
||
return VK_SUCCESS;
|
||
}
|
||
|
||
VkResult anv_InvalidateMappedMemoryRanges(
|
||
VkDevice _device,
|
||
uint32_t memoryRangeCount,
|
||
const VkMappedMemoryRange* pMemoryRanges)
|
||
{
|
||
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
|
||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||
|
||
if (!device->physical->memory.need_flush)
|
||
return VK_SUCCESS;
|
||
|
||
for (uint32_t i = 0; i < memoryRangeCount; i++) {
|
||
ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
|
||
if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
|
||
continue;
|
||
|
||
uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
|
||
if (map_offset >= mem->map_size)
|
||
continue;
|
||
|
||
util_flush_inval_range(mem->map + map_offset,
|
||
MIN2(pMemoryRanges[i].size,
|
||
mem->map_size - map_offset));
|
||
}
|
||
|
||
/* Make sure no reads get moved up above the invalidate. */
|
||
__builtin_ia32_mfence();
|
||
#endif
|
||
return VK_SUCCESS;
|
||
}
|
||
|
||
void anv_GetDeviceMemoryCommitment(
|
||
VkDevice device,
|
||
VkDeviceMemory memory,
|
||
VkDeviceSize* pCommittedMemoryInBytes)
|
||
{
|
||
*pCommittedMemoryInBytes = 0;
|
||
}
|
||
|
||
static inline VkTimeDomainKHR
|
||
anv_get_default_cpu_time_domain(void)
|
||
{
|
||
#ifdef CLOCK_MONOTONIC_RAW
|
||
return VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR;
|
||
#else
|
||
return VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR;
|
||
#endif
|
||
}
|
||
|
||
static inline clockid_t
|
||
vk_time_domain_to_clockid(VkTimeDomainKHR domain)
|
||
{
|
||
switch (domain) {
|
||
#ifdef CLOCK_MONOTONIC_RAW
|
||
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR:
|
||
return CLOCK_MONOTONIC_RAW;
|
||
#endif
|
||
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR:
|
||
return CLOCK_MONOTONIC;
|
||
default:
|
||
UNREACHABLE("Missing");
|
||
return CLOCK_MONOTONIC;
|
||
}
|
||
}
|
||
|
||
static inline bool
|
||
is_cpu_time_domain(VkTimeDomainKHR domain)
|
||
{
|
||
return domain == VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR ||
|
||
domain == VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR;
|
||
}
|
||
|
||
static inline bool
|
||
is_gpu_time_domain(VkTimeDomainKHR domain)
|
||
{
|
||
return domain == VK_TIME_DOMAIN_DEVICE_KHR;
|
||
}
|
||
|
||
static VkTimeDomainKHR
|
||
get_effective_time_domain(const VkCalibratedTimestampInfoKHR *timestamp)
|
||
{
|
||
if (timestamp->timeDomain == VK_TIME_DOMAIN_PRESENT_STAGE_LOCAL_EXT) {
|
||
const VkSwapchainCalibratedTimestampInfoEXT *swap =
|
||
vk_find_struct_const(timestamp->pNext, SWAPCHAIN_CALIBRATED_TIMESTAMP_INFO_EXT);
|
||
return wsi_common_get_time_domain(swap->swapchain, swap->presentStage, swap->timeDomainId);
|
||
} else {
|
||
return timestamp->timeDomain;
|
||
}
|
||
}
|
||
|
||
VkResult anv_GetCalibratedTimestampsKHR(
|
||
VkDevice _device,
|
||
uint32_t timestampCount,
|
||
const VkCalibratedTimestampInfoKHR *pTimestampInfos,
|
||
uint64_t *pTimestamps,
|
||
uint64_t *pMaxDeviation)
|
||
{
|
||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||
const uint64_t timestamp_frequency = device->info->timestamp_frequency;
|
||
const uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
|
||
uint32_t d, increment;
|
||
uint64_t begin, end;
|
||
uint64_t max_clock_period = 0;
|
||
const enum intel_kmd_type kmd_type = device->physical->info.kmd_type;
|
||
const bool has_correlate_timestamp = kmd_type == INTEL_KMD_TYPE_XE;
|
||
const VkTimeDomainKHR default_cpu_time_domain = anv_get_default_cpu_time_domain();
|
||
const clockid_t default_cpu_clock_id = vk_time_domain_to_clockid(default_cpu_time_domain);
|
||
clockid_t cpu_clock_id = -1;
|
||
VkResult result;
|
||
|
||
result = vk_device_get_timestamp(&device->vk, default_cpu_time_domain, &end);
|
||
if (result != VK_SUCCESS)
|
||
return vk_error(device, result);
|
||
begin = end;
|
||
|
||
for (d = 0, increment = 1; d < timestampCount; d += increment) {
|
||
const VkTimeDomainKHR current = get_effective_time_domain(&pTimestampInfos[d]);
|
||
/* If we have a request pattern like this :
|
||
* - domain0 = VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR or VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR
|
||
* - domain1 = VK_TIME_DOMAIN_DEVICE_KHR
|
||
* - domain2 = domain0 (optional)
|
||
*
|
||
* We can combine all of those into a single ioctl for maximum accuracy.
|
||
*/
|
||
if (has_correlate_timestamp && (d + 1) < timestampCount) {
|
||
const VkTimeDomainKHR next = get_effective_time_domain(&pTimestampInfos[d + 1]);
|
||
|
||
if ((is_cpu_time_domain(current) && is_gpu_time_domain(next)) ||
|
||
(is_gpu_time_domain(current) && is_cpu_time_domain(next))) {
|
||
/* We'll consume at least 2 elements. */
|
||
increment = 2;
|
||
|
||
if (is_cpu_time_domain(current))
|
||
cpu_clock_id = vk_time_domain_to_clockid(current);
|
||
else
|
||
cpu_clock_id = vk_time_domain_to_clockid(next);
|
||
|
||
uint64_t cpu_timestamp, gpu_timestamp, cpu_delta_timestamp, cpu_end_timestamp;
|
||
if (!intel_gem_read_correlate_cpu_gpu_timestamp(device->fd,
|
||
kmd_type,
|
||
INTEL_ENGINE_CLASS_RENDER,
|
||
0 /* engine_instance */,
|
||
cpu_clock_id,
|
||
&cpu_timestamp,
|
||
&gpu_timestamp,
|
||
&cpu_delta_timestamp))
|
||
return vk_device_set_lost(&device->vk, "Failed to read correlate timestamp %m");
|
||
|
||
cpu_end_timestamp = cpu_timestamp + cpu_delta_timestamp;
|
||
if (is_cpu_time_domain(current)) {
|
||
pTimestamps[d] = cpu_timestamp;
|
||
pTimestamps[d + 1] = gpu_timestamp;
|
||
} else {
|
||
pTimestamps[d] = gpu_timestamp;
|
||
pTimestamps[d + 1] = cpu_end_timestamp;
|
||
}
|
||
max_clock_period = MAX2(max_clock_period, device_period);
|
||
|
||
/* If we can consume a third element */
|
||
if ((d + 2) < timestampCount &&
|
||
is_cpu_time_domain(current) &&
|
||
current == get_effective_time_domain(&pTimestampInfos[d + 2])) {
|
||
pTimestamps[d + 2] = cpu_end_timestamp;
|
||
increment++;
|
||
}
|
||
|
||
/* If we're the first element, we can replace begin */
|
||
if (d == 0 && cpu_clock_id == default_cpu_clock_id)
|
||
begin = cpu_timestamp;
|
||
|
||
/* If we're in the same clock domain as begin/end. We can set the end. */
|
||
if (cpu_clock_id == default_cpu_clock_id)
|
||
end = cpu_end_timestamp;
|
||
|
||
continue;
|
||
}
|
||
}
|
||
|
||
/* fallback to regular method */
|
||
increment = 1;
|
||
switch (current) {
|
||
case VK_TIME_DOMAIN_DEVICE_KHR:
|
||
if (!intel_gem_read_render_timestamp(device->fd,
|
||
device->info->kmd_type,
|
||
&pTimestamps[d])) {
|
||
return vk_device_set_lost(&device->vk, "Failed to read the "
|
||
"TIMESTAMP register: %m");
|
||
}
|
||
max_clock_period = MAX2(max_clock_period, device_period);
|
||
break;
|
||
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR:
|
||
result = vk_device_get_timestamp(
|
||
&device->vk, VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR, &pTimestamps[d]);
|
||
if (result != VK_SUCCESS)
|
||
return vk_error(device, result);
|
||
max_clock_period = MAX2(max_clock_period, 1);
|
||
break;
|
||
|
||
#ifdef CLOCK_MONOTONIC_RAW
|
||
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR:
|
||
pTimestamps[d] = begin;
|
||
break;
|
||
#endif
|
||
default:
|
||
pTimestamps[d] = 0;
|
||
break;
|
||
}
|
||
}
|
||
|
||
for (uint32_t i = 0; i < timestampCount; i++) {
|
||
if (pTimestampInfos[i].timeDomain == VK_TIME_DOMAIN_PRESENT_STAGE_LOCAL_EXT) {
|
||
/* Need to rescale device timestamps to nanoseconds. */
|
||
const VkSwapchainCalibratedTimestampInfoEXT *swap =
|
||
vk_find_struct_const(pTimestampInfos[i].pNext, SWAPCHAIN_CALIBRATED_TIMESTAMP_INFO_EXT);
|
||
if (wsi_common_get_time_domain(swap->swapchain, swap->presentStage, swap->timeDomainId) ==
|
||
VK_TIME_DOMAIN_DEVICE_KHR) {
|
||
pTimestamps[i] = (uint64_t)((double)pTimestamps[i] * 1e9 / (double)device->physical->info.timestamp_frequency);
|
||
}
|
||
|
||
/* Timestamps in QueueOperationsEnd are always derived from a device timestamp,
|
||
* even if the reported time domain is not. */
|
||
if (swap->presentStage == VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT)
|
||
max_clock_period = MAX2(max_clock_period, device_period);
|
||
}
|
||
}
|
||
|
||
/* If last timestamp was not get with has_correlate_timestamp method or
|
||
* if it was but last cpu clock is not the default one, get time again
|
||
*/
|
||
if (increment == 1 || cpu_clock_id != default_cpu_clock_id) {
|
||
result = vk_device_get_timestamp(&device->vk, default_cpu_time_domain, &end);
|
||
if (result != VK_SUCCESS)
|
||
return vk_error(device, result);
|
||
}
|
||
|
||
*pMaxDeviation = vk_time_max_deviation(begin, end, max_clock_period);
|
||
|
||
return VK_SUCCESS;
|
||
}
|
||
|
||
const struct intel_device_info_pat_entry *
|
||
anv_device_get_pat_entry(struct anv_device *device,
|
||
enum anv_bo_alloc_flags alloc_flags)
|
||
{
|
||
if (alloc_flags & ANV_BO_ALLOC_COMPRESSED) {
|
||
/* Compressed PAT entries are available on Xe2+. */
|
||
assert(device->info->ver >= 20);
|
||
return alloc_flags & ANV_BO_ALLOC_SCANOUT ?
|
||
&device->info->pat.compressed_scanout :
|
||
&device->info->pat.compressed;
|
||
}
|
||
|
||
if (alloc_flags & ANV_BO_ALLOC_IMPORTED)
|
||
return &device->info->pat.cached_coherent;
|
||
|
||
if (alloc_flags & (ANV_BO_ALLOC_EXTERNAL | ANV_BO_ALLOC_SCANOUT))
|
||
return &device->info->pat.scanout;
|
||
|
||
/* PAT indexes has no actual effect in DG2 and DG1, smem caches will always
|
||
* be snopped by GPU and lmem will always be WC.
|
||
* This might change in future discrete platforms.
|
||
*/
|
||
if (anv_physical_device_has_vram(device->physical)) {
|
||
if (alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM)
|
||
return &device->info->pat.cached_coherent;
|
||
return &device->info->pat.writecombining;
|
||
}
|
||
|
||
/* Integrated platforms handling only */
|
||
if ((alloc_flags & (ANV_BO_ALLOC_HOST_CACHED_COHERENT)) == ANV_BO_ALLOC_HOST_CACHED_COHERENT)
|
||
return &device->info->pat.cached_coherent;
|
||
else if (alloc_flags & ANV_BO_ALLOC_HOST_CACHED)
|
||
return &device->info->pat.writeback_incoherent;
|
||
else
|
||
return &device->info->pat.writecombining;
|
||
}
|