anv: Rework locking for sparse binding with TR-TT

When sparse binding functions submit batches, they may modify the
exec_obj_index field of anv_bo structs. This field is used to ensure a
unique list of buffers is sent to the kernel (i915). Add a lock in these
functions to prevent multiple threads from modifying this field during
the batch submission process. To avoid creating a deadlock, also rework
the locking done in anv_queue_submit().

When playing the Monster Hunter Wilds Benchmark on a mesa build which
enables slab allocation of batch buffers (6f7a32ec92), this avoids a
sporadic assert failure:

nsterHunterWilds.exe:
   ../../src/intel/vulkan/i915/anv_batch_chain.c:489:
      setup_execbuf_for_cmd_buffers:
         Assertion `execbuf->bos[idx] == first_batch_bo_real' failed.

This issue was seemingly first introduced in 04bfe828db
("anv/sparse: allow sparse resouces to use TR-TT as its backend")

Backport-to: 25.2
Ref: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12582
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37307>
(cherry picked from commit 7c8e38ac67)
This commit is contained in:
Nanley Chery 2025-09-10 05:07:06 -04:00 committed by Eric Engestrom
parent bf21cd87dc
commit 63548eec0b
3 changed files with 22 additions and 6 deletions

View file

@ -3954,7 +3954,7 @@
"description": "anv: Rework locking for sparse binding with TR-TT",
"nominated": true,
"nomination_type": 4,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View file

@ -1386,8 +1386,8 @@ can_chain_query_pools(struct anv_query_pool *p1, struct anv_query_pool *p2)
}
static VkResult
anv_queue_submit_sparse_bind_locked(struct anv_queue *queue,
struct vk_queue_submit *submit)
anv_queue_submit_sparse_bind(struct anv_queue *queue,
struct vk_queue_submit *submit)
{
struct anv_device *device = queue->device;
VkResult result;
@ -1647,16 +1647,16 @@ anv_queue_submit(struct vk_queue *vk_queue,
uint64_t start_ts = intel_ds_begin_submit(&queue->ds);
pthread_mutex_lock(&device->mutex);
if (submit->buffer_bind_count ||
submit->image_opaque_bind_count ||
submit->image_bind_count) {
result = anv_queue_submit_sparse_bind_locked(queue, submit);
result = anv_queue_submit_sparse_bind(queue, submit);
} else {
pthread_mutex_lock(&device->mutex);
result = anv_queue_submit_cmd_buffers_locked(queue, submit,
utrace_submit);
pthread_mutex_unlock(&device->mutex);
}
pthread_mutex_unlock(&device->mutex);
intel_ds_end_submit(&queue->ds, start_ts);
intel_ds_device_process(&device->ds, false);

View file

@ -686,11 +686,18 @@ anv_trtt_first_bind_init(struct anv_device *device)
return VK_SUCCESS;
}
/* We lock around execbuf because the algorithm we use for building the
* list of unique buffers isn't thread-safe. Lock the device mutex
* before the TRTT mutex for consistency with the order of other paths
* (e.g., anv_queue_submit_cmd_buffers_locked()).
*/
pthread_mutex_lock(&device->mutex);
simple_mtx_lock(&trtt->mutex);
/* This means we have already initialized the first bind. */
if (likely(trtt->l3_addr)) {
simple_mtx_unlock(&trtt->mutex);
pthread_mutex_unlock(&device->mutex);
return VK_SUCCESS;
}
@ -736,6 +743,7 @@ out:
trtt->l3_addr = 0;
simple_mtx_unlock(&trtt->mutex);
pthread_mutex_unlock(&device->mutex);
return result;
}
@ -765,6 +773,12 @@ anv_sparse_bind_trtt(struct anv_device *device,
if (result != VK_SUCCESS)
goto out_async;
/* We lock around execbuf because the algorithm we use for building the
* list of unique buffers isn't thread-safe. Lock the device mutex
* before the TRTT mutex for consistency with the order that locking is
* done around other paths (e.g., anv_queue_submit_cmd_buffers_locked()).
*/
pthread_mutex_lock(&device->mutex);
simple_mtx_lock(&trtt->mutex);
/* Do this so we can avoid reallocs later. */
@ -871,6 +885,7 @@ anv_sparse_bind_trtt(struct anv_device *device,
list_addtail(&submit->link, &trtt->in_flight_batches);
simple_mtx_unlock(&trtt->mutex);
pthread_mutex_unlock(&device->mutex);
ANV_RMV(vm_binds, device, sparse_submit->binds, sparse_submit->binds_len);
@ -881,6 +896,7 @@ anv_sparse_bind_trtt(struct anv_device *device,
util_dynarray_fini(&l3l2_binds);
out_add_bind:
simple_mtx_unlock(&trtt->mutex);
pthread_mutex_unlock(&device->mutex);
anv_async_submit_fini(&submit->base);
out_async:
vk_free(&device->vk.alloc, submit);