anv: Make all VkDeviceMemory BOs resident permanently

We spend a lot of time in the driver adding things to hash sets to track
residency.  The reality is that a properly built Vulkan app uses large
memory objects and sub-allocates from them.  In a typical frame, most of
if not all of those allocations are going to be resident for the entire
frame so we're really not saving ourselves much by tracking fine-grained
residency.  Just throwing everything in the validation list does make it
a little bit more expensive inside the kernel to walk the list and
ensure that all our VA is in order.  However, without relocations, the
overhead of that is pretty small.

If we ever do run into a memory pressure situation where the fine-
grained residency could even potentially help, we would likely be
swapping one page out to make room for another within the draw call and
performance is totally lost at that point.  We're better off swapping
out other apps and just letting ours run a whole frame.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
This commit is contained in:
Jason Ekstrand 2019-02-26 18:05:34 -06:00 committed by Jason Ekstrand
parent a9241edfa3
commit 83b943cc2f
4 changed files with 48 additions and 46 deletions

View file

@ -1393,18 +1393,13 @@ setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
anv_execbuf_add_bo_set(execbuf, cmd_buffer->surface_relocs.deps, 0, anv_execbuf_add_bo_set(execbuf, cmd_buffer->surface_relocs.deps, 0,
&cmd_buffer->device->alloc); &cmd_buffer->device->alloc);
/* Add the BOs for all the pinned buffers */ /* Add the BOs for all memory objects */
if (cmd_buffer->device->pinned_buffers->entries) { list_for_each_entry(struct anv_device_memory, mem,
struct set *pinned_bos = _mesa_pointer_set_create(NULL); &cmd_buffer->device->memory_objects, link) {
if (pinned_bos == NULL) result = anv_execbuf_add_bo(execbuf, mem->bo, NULL, 0,
return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
set_foreach(cmd_buffer->device->pinned_buffers, entry) {
const struct anv_buffer *buffer = entry->key;
_mesa_set_add(pinned_bos, buffer->address.bo);
}
anv_execbuf_add_bo_set(execbuf, pinned_bos, 0,
&cmd_buffer->device->alloc); &cmd_buffer->device->alloc);
_mesa_set_destroy(pinned_bos, NULL); if (result != VK_SUCCESS)
return result;
} }
struct anv_block_pool *pool; struct anv_block_pool *pool;

View file

@ -2005,6 +2005,8 @@ VkResult anv_CreateDevice(
high_heap->size; high_heap->size;
} }
list_inithead(&device->memory_objects);
/* As per spec, the driver implementation may deny requests to acquire /* As per spec, the driver implementation may deny requests to acquire
* a priority above the default priority (MEDIUM) if the caller does not * a priority above the default priority (MEDIUM) if the caller does not
* have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_EXT * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_EXT
@ -2118,9 +2120,6 @@ VkResult anv_CreateDevice(
if (device->info.gen >= 10) if (device->info.gen >= 10)
anv_device_init_hiz_clear_value_bo(device); anv_device_init_hiz_clear_value_bo(device);
if (physical_device->use_softpin)
device->pinned_buffers = _mesa_pointer_set_create(NULL);
anv_scratch_pool_init(device, &device->scratch_pool); anv_scratch_pool_init(device, &device->scratch_pool);
anv_queue_init(device, &device->queue); anv_queue_init(device, &device->queue);
@ -2211,9 +2210,6 @@ void anv_DestroyDevice(
anv_queue_finish(&device->queue); anv_queue_finish(&device->queue);
if (physical_device->use_softpin)
_mesa_set_destroy(device->pinned_buffers, NULL);
#ifdef HAVE_VALGRIND #ifdef HAVE_VALGRIND
/* We only need to free these to prevent valgrind errors. The backing /* We only need to free these to prevent valgrind errors. The backing
* BO will go away in a couple of lines so we don't actually leak. * BO will go away in a couple of lines so we don't actually leak.
@ -2698,6 +2694,10 @@ VkResult anv_AllocateMemory(
} }
success: success:
pthread_mutex_lock(&device->mutex);
list_addtail(&mem->link, &device->memory_objects);
pthread_mutex_unlock(&device->mutex);
*pMem = anv_device_memory_to_handle(mem); *pMem = anv_device_memory_to_handle(mem);
return VK_SUCCESS; return VK_SUCCESS;
@ -2789,6 +2789,10 @@ void anv_FreeMemory(
if (mem == NULL) if (mem == NULL)
return; return;
pthread_mutex_lock(&device->mutex);
list_del(&mem->link);
pthread_mutex_unlock(&device->mutex);
if (mem->map) if (mem->map)
anv_UnmapMemory(_device, _mem); anv_UnmapMemory(_device, _mem);
@ -3324,12 +3328,6 @@ VkResult anv_CreateBuffer(
buffer->usage = pCreateInfo->usage; buffer->usage = pCreateInfo->usage;
buffer->address = ANV_NULL_ADDRESS; buffer->address = ANV_NULL_ADDRESS;
if (buffer->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT) {
pthread_mutex_lock(&device->mutex);
_mesa_set_add(device->pinned_buffers, buffer);
pthread_mutex_unlock(&device->mutex);
}
*pBuffer = anv_buffer_to_handle(buffer); *pBuffer = anv_buffer_to_handle(buffer);
return VK_SUCCESS; return VK_SUCCESS;
@ -3346,12 +3344,6 @@ void anv_DestroyBuffer(
if (!buffer) if (!buffer)
return; return;
if (buffer->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT) {
pthread_mutex_lock(&device->mutex);
_mesa_set_remove_key(device->pinned_buffers, buffer);
pthread_mutex_unlock(&device->mutex);
}
vk_free2(&device->alloc, pAllocator, buffer); vk_free2(&device->alloc, pAllocator, buffer);
} }

View file

@ -1093,6 +1093,9 @@ struct anv_device {
uint64_t vma_lo_available; uint64_t vma_lo_available;
uint64_t vma_hi_available; uint64_t vma_hi_available;
/** List of all anv_device_memory objects */
struct list_head memory_objects;
struct anv_bo_pool batch_bo_pool; struct anv_bo_pool batch_bo_pool;
struct anv_bo_cache bo_cache; struct anv_bo_cache bo_cache;
@ -1106,12 +1109,6 @@ struct anv_device {
struct anv_bo trivial_batch_bo; struct anv_bo trivial_batch_bo;
struct anv_bo hiz_clear_bo; struct anv_bo hiz_clear_bo;
/* Set of pointers to anv_buffer objects for all pinned buffers. Pinned
* buffers are always resident because they could be used at any time via
* VK_EXT_buffer_device_address.
*/
struct set * pinned_buffers;
struct anv_pipeline_cache default_pipeline_cache; struct anv_pipeline_cache default_pipeline_cache;
struct blorp_context blorp; struct blorp_context blorp;
@ -1483,6 +1480,8 @@ _anv_combine_address(struct anv_batch *batch, void *location,
#define GEN11_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS #define GEN11_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS
struct anv_device_memory { struct anv_device_memory {
struct list_head link;
struct anv_bo * bo; struct anv_bo * bo;
struct anv_memory_type * type; struct anv_memory_type * type;
VkDeviceSize map_size; VkDeviceSize map_size;

View file

@ -2045,6 +2045,12 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
if (bt_state->map == NULL) if (bt_state->map == NULL)
return VK_ERROR_OUT_OF_DEVICE_MEMORY; return VK_ERROR_OUT_OF_DEVICE_MEMORY;
/* We only need to emit relocs if we're not using softpin. If we are using
* softpin then we always keep all user-allocated memory objects resident.
*/
const bool need_client_mem_relocs =
!cmd_buffer->device->instance->physicalDevice.use_softpin;
/* We only use push constant space for images before gen9 */ /* We only use push constant space for images before gen9 */
if (map->image_param_count > 0) { if (map->image_param_count > 0) {
VkResult result = VkResult result =
@ -2122,8 +2128,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
cmd_buffer->state.compute.num_workgroups, cmd_buffer->state.compute.num_workgroups,
12, 1); 12, 1);
bt_map[s] = surface_state.offset + state_offset; bt_map[s] = surface_state.offset + state_offset;
if (need_client_mem_relocs) {
add_surface_reloc(cmd_buffer, surface_state, add_surface_reloc(cmd_buffer, surface_state,
cmd_buffer->state.compute.num_workgroups); cmd_buffer->state.compute.num_workgroups);
}
continue; continue;
} else if (binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS) { } else if (binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS) {
/* This is a descriptor set buffer so the set index is actually /* This is a descriptor set buffer so the set index is actually
@ -2155,6 +2163,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
desc->image_view->planes[binding->plane].optimal_sampler_surface_state; desc->image_view->planes[binding->plane].optimal_sampler_surface_state;
surface_state = sstate.state; surface_state = sstate.state;
assert(surface_state.alloc_size); assert(surface_state.alloc_size);
if (need_client_mem_relocs)
add_surface_state_relocs(cmd_buffer, sstate); add_surface_state_relocs(cmd_buffer, sstate);
break; break;
} }
@ -2170,6 +2179,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
desc->image_view->planes[binding->plane].optimal_sampler_surface_state; desc->image_view->planes[binding->plane].optimal_sampler_surface_state;
surface_state = sstate.state; surface_state = sstate.state;
assert(surface_state.alloc_size); assert(surface_state.alloc_size);
if (need_client_mem_relocs)
add_surface_state_relocs(cmd_buffer, sstate); add_surface_state_relocs(cmd_buffer, sstate);
} else { } else {
/* For color input attachments, we create the surface state at /* For color input attachments, we create the surface state at
@ -2189,6 +2199,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
: desc->image_view->planes[binding->plane].storage_surface_state; : desc->image_view->planes[binding->plane].storage_surface_state;
surface_state = sstate.state; surface_state = sstate.state;
assert(surface_state.alloc_size); assert(surface_state.alloc_size);
if (need_client_mem_relocs)
add_surface_state_relocs(cmd_buffer, sstate); add_surface_state_relocs(cmd_buffer, sstate);
if (devinfo->gen < 9) { if (devinfo->gen < 9) {
/* We only need the image params on gen8 and earlier. No image /* We only need the image params on gen8 and earlier. No image
@ -2210,8 +2221,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
surface_state = desc->buffer_view->surface_state; surface_state = desc->buffer_view->surface_state;
assert(surface_state.alloc_size); assert(surface_state.alloc_size);
if (need_client_mem_relocs) {
add_surface_reloc(cmd_buffer, surface_state, add_surface_reloc(cmd_buffer, surface_state,
desc->buffer_view->address); desc->buffer_view->address);
}
break; break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@ -2235,6 +2248,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
anv_fill_buffer_surface_state(cmd_buffer->device, surface_state, anv_fill_buffer_surface_state(cmd_buffer->device, surface_state,
format, address, range, 1); format, address, range, 1);
if (need_client_mem_relocs)
add_surface_reloc(cmd_buffer, surface_state, address); add_surface_reloc(cmd_buffer, surface_state, address);
break; break;
} }
@ -2244,8 +2258,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
? desc->buffer_view->writeonly_storage_surface_state ? desc->buffer_view->writeonly_storage_surface_state
: desc->buffer_view->storage_surface_state; : desc->buffer_view->storage_surface_state;
assert(surface_state.alloc_size); assert(surface_state.alloc_size);
if (need_client_mem_relocs) {
add_surface_reloc(cmd_buffer, surface_state, add_surface_reloc(cmd_buffer, surface_state,
desc->buffer_view->address); desc->buffer_view->address);
}
if (devinfo->gen < 9) { if (devinfo->gen < 9) {
assert(image < MAX_GEN8_IMAGES); assert(image < MAX_GEN8_IMAGES);
struct brw_image_param *image_param = struct brw_image_param *image_param =