diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index bd516408f09..004977f3f44 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -1717,3 +1717,156 @@ anv_cmd_buffer_clflush(struct anv_cmd_buffer **cmd_buffers, __builtin_ia32_mfence(); #endif } + +static VkResult +anv_async_submit_extend_batch(struct anv_batch *batch, uint32_t size, + void *user_data) +{ + struct anv_async_submit *submit = user_data; + + uint32_t alloc_size = 0; + util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, bo) + alloc_size += (*bo)->size; + alloc_size = MAX2(alloc_size * 2, 8192); + + struct anv_bo *bo; + VkResult result = anv_bo_pool_alloc(submit->bo_pool, + align(alloc_size, 4096), + &bo); + if (result != VK_SUCCESS) + return result; + + util_dynarray_append(&submit->batch_bos, struct anv_bo *, bo); + + batch->end += 4 * GFX9_MI_BATCH_BUFFER_START_length; + + anv_batch_emit(batch, GFX9_MI_BATCH_BUFFER_START, bbs) { + bbs.DWordLength = GFX9_MI_BATCH_BUFFER_START_length - + GFX9_MI_BATCH_BUFFER_START_length_bias; + bbs.SecondLevelBatchBuffer = Firstlevelbatch; + bbs.AddressSpaceIndicator = ASI_PPGTT; + bbs.BatchBufferStartAddress = (struct anv_address) { bo, 0 }; + } + + anv_batch_set_storage(batch, + (struct anv_address) { .bo = bo, }, + bo->map, + bo->size - 4 * GFX9_MI_BATCH_BUFFER_START_length); + + return VK_SUCCESS; +} + +VkResult +anv_async_submit_init(struct anv_async_submit *submit, + struct anv_queue *queue, + struct anv_bo_pool *bo_pool, + bool use_companion_rcs, + bool create_signal_sync) +{ + struct anv_device *device = queue->device; + + memset(submit, 0, sizeof(*submit)); + + submit->use_companion_rcs = use_companion_rcs; + submit->queue = queue; + submit->bo_pool = bo_pool; + + const bool uses_relocs = device->physical->uses_relocs; + VkResult result = + anv_reloc_list_init(&submit->relocs, &device->vk.alloc, uses_relocs); + if (result != VK_SUCCESS) + return result; + + submit->batch = (struct anv_batch) { + .alloc = &device->vk.alloc, + .relocs = &submit->relocs, + .user_data = submit, + .extend_cb = anv_async_submit_extend_batch, + }; + + util_dynarray_init(&submit->batch_bos, NULL); + + if (create_signal_sync) { + result = vk_sync_create(&device->vk, + &device->physical->sync_syncobj_type, + 0, 0, &submit->signal.sync); + if (result != VK_SUCCESS) { + anv_reloc_list_finish(&submit->relocs); + util_dynarray_fini(&submit->batch_bos); + return result; + } + submit->owns_sync = true; + } + + return VK_SUCCESS; +} + +void +anv_async_submit_fini(struct anv_async_submit *submit) +{ + struct anv_device *device = submit->queue->device; + + if (submit->owns_sync) + vk_sync_destroy(&device->vk, submit->signal.sync); + + util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, bo) + anv_bo_pool_free(submit->bo_pool, *bo); + util_dynarray_fini(&submit->batch_bos); + anv_reloc_list_finish(&submit->relocs); +} + +VkResult +anv_async_submit_create(struct anv_queue *queue, + struct anv_bo_pool *bo_pool, + bool use_companion_rcs, + bool create_signal_sync, + struct anv_async_submit **out_submit) +{ + struct anv_device *device = queue->device; + + *out_submit = + vk_alloc(&device->vk.alloc, sizeof(struct anv_async_submit), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (*out_submit == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + VkResult result = anv_async_submit_init(*out_submit, queue, + bo_pool, + use_companion_rcs, + create_signal_sync); + if (result != VK_SUCCESS) + vk_free(&device->vk.alloc, *out_submit); + + return result; +} + +void +anv_async_submit_destroy(struct anv_async_submit *submit) +{ + struct anv_device *device = submit->queue->device; + anv_async_submit_fini(submit); + vk_free(&device->vk.alloc, submit); +} + +bool +anv_async_submit_done(struct anv_async_submit *submit) +{ + struct anv_device *device = submit->queue->device; + + return vk_sync_wait(&device->vk, + submit->signal.sync, + submit->signal.signal_value, + VK_SYNC_WAIT_COMPLETE, 0) == VK_SUCCESS; +} + +bool +anv_async_submit_wait(struct anv_async_submit *submit) +{ + struct anv_device *device = submit->queue->device; + + return vk_sync_wait(&device->vk, + submit->signal.sync, + submit->signal.signal_value, + VK_SYNC_WAIT_COMPLETE, + os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE)) == VK_SUCCESS; +} diff --git a/src/intel/vulkan/anv_gem_stubs.c b/src/intel/vulkan/anv_gem_stubs.c index 48795b431a8..6f170e29c5d 100644 --- a/src/intel/vulkan/anv_gem_stubs.c +++ b/src/intel/vulkan/anv_gem_stubs.c @@ -88,7 +88,11 @@ stub_queue_exec_locked(struct anv_queue *queue, } static VkResult -stub_queue_exec_trace(struct anv_queue *queue, struct anv_utrace_submit *submit) +stub_queue_exec_async(struct anv_async_submit *submit, + uint32_t wait_count, + const struct vk_sync_wait *waits, + uint32_t signal_count, + const struct vk_sync_signal *signals) { return VK_ERROR_UNKNOWN; } @@ -178,7 +182,7 @@ const struct anv_kmd_backend *anv_stub_kmd_backend_get(void) .execute_simple_batch = stub_execute_simple_batch, .execute_trtt_batch = stub_execute_trtt_batch, .queue_exec_locked = stub_queue_exec_locked, - .queue_exec_trace = stub_queue_exec_trace, + .queue_exec_async = stub_queue_exec_async, .bo_alloc_flags_to_bo_flags = stub_bo_alloc_flags_to_bo_flags, }; return &stub_backend; diff --git a/src/intel/vulkan/anv_kmd_backend.h b/src/intel/vulkan/anv_kmd_backend.h index 13d3799858e..6177709fd48 100644 --- a/src/intel/vulkan/anv_kmd_backend.h +++ b/src/intel/vulkan/anv_kmd_backend.h @@ -37,6 +37,7 @@ struct anv_cmd_buffer; struct anv_device; struct anv_queue; struct anv_query_pool; +struct anv_async_submit; struct anv_utrace_submit; struct anv_sparse_submission; struct anv_trtt_batch_bo; @@ -110,6 +111,8 @@ struct anv_kmd_backend { struct anv_bo *batch_bo, uint32_t batch_bo_size, bool is_companion_rcs_batch); + /* The caller is expected to hold device->mutex when calling this vfunc. + */ VkResult (*execute_trtt_batch)(struct anv_sparse_submission *submit, struct anv_trtt_batch_bo *trtt_bbo); VkResult (*queue_exec_locked)(struct anv_queue *queue, @@ -122,8 +125,14 @@ struct anv_kmd_backend { struct anv_query_pool *perf_query_pool, uint32_t perf_query_pass, struct anv_utrace_submit *utrace_submit); - VkResult (*queue_exec_trace)(struct anv_queue *queue, - struct anv_utrace_submit *submit); + /* The caller is not expected to hold device->mutex when calling this + * vfunc. + */ + VkResult (*queue_exec_async)(struct anv_async_submit *submit, + uint32_t wait_count, + const struct vk_sync_wait *waits, + uint32_t signal_count, + const struct vk_sync_signal *signals); uint32_t (*bo_alloc_flags_to_bo_flags)(struct anv_device *device, enum anv_bo_alloc_flags alloc_flags); }; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index da698a23567..660169593ec 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2477,6 +2477,51 @@ _anv_combine_address(struct anv_batch *batch, void *location, /* #define __gen_address_value anv_address_physical */ /* #define __gen_address_offset anv_address_add */ +/* Base structure used to track a submission that needs some clean operations + * upon completion. Should be embedded into a larger structure. + */ +struct anv_async_submit { + struct anv_queue *queue; + + struct anv_bo_pool *bo_pool; + + bool use_companion_rcs; + + bool owns_sync; + struct vk_sync_signal signal; + + struct anv_reloc_list relocs; + struct anv_batch batch; + struct util_dynarray batch_bos; +}; + +VkResult +anv_async_submit_init(struct anv_async_submit *submit, + struct anv_queue *queue, + struct anv_bo_pool *bo_pool, + bool use_companion_rcs, + bool create_signal_sync); + +void +anv_async_submit_fini(struct anv_async_submit *submit); + +VkResult +anv_async_submit_create(struct anv_queue *queue, + struct anv_bo_pool *bo_pool, + bool use_companion_rcs, + bool create_signal_sync, + struct anv_async_submit **out_submit); + +void +anv_async_submit_destroy(struct anv_async_submit *submit); + +bool +anv_async_submit_done(struct anv_async_submit *submit); + +bool +anv_async_submit_wait(struct anv_async_submit *submit); + + struct anv_device_memory { struct vk_device_memory vk; @@ -6072,12 +6117,7 @@ void anv_astc_emu_process(struct anv_cmd_buffer *cmd_buffer, * (vkQueueBeginDebugUtilsLabelEXT/vkQueueEndDebugUtilsLabelEXT) */ struct anv_utrace_submit { - /* Batch stuff to implement of copy of timestamps recorded in another - * buffer. - */ - struct anv_reloc_list relocs; - struct anv_batch batch; - struct util_dynarray batch_bos; + struct anv_async_submit base; /* structure used by the perfetto glue */ struct intel_ds_flush_data ds; @@ -6086,12 +6126,6 @@ struct anv_utrace_submit { struct anv_state_stream dynamic_state_stream; struct anv_state_stream general_state_stream; - /* Syncobj to be signaled when the batch completes */ - struct vk_sync *sync; - - /* Queue on which all the recorded traces are submitted */ - struct anv_queue *queue; - /* Buffer of 64bits timestamps (only used for timestamp copies) */ struct anv_bo *trace_bo; diff --git a/src/intel/vulkan/anv_utrace.c b/src/intel/vulkan/anv_utrace.c index 8d1ed6cde27..55fff661260 100644 --- a/src/intel/vulkan/anv_utrace.c +++ b/src/intel/vulkan/anv_utrace.c @@ -90,11 +90,7 @@ anv_utrace_delete_submit(struct u_trace_context *utctx, void *submit_data) if (submit->trace_bo) anv_bo_pool_free(&device->utrace_bo_pool, submit->trace_bo); - util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, bo) - anv_bo_pool_free(&device->utrace_bo_pool, *bo); - util_dynarray_fini(&submit->batch_bos); - - vk_sync_destroy(&device->vk, submit->sync); + anv_async_submit_fini(&submit->base); vk_free(&device->vk.alloc, submit); } @@ -150,44 +146,6 @@ anv_device_utrace_emit_cs_copy_ts_buffer(struct u_trace_context *utctx, push_data_state); } -static VkResult -anv_utrace_submit_extend_batch(struct anv_batch *batch, uint32_t size, - void *user_data) -{ - struct anv_utrace_submit *submit = user_data; - - uint32_t alloc_size = 0; - util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, bo) - alloc_size += (*bo)->size; - alloc_size = MAX2(alloc_size * 2, 8192); - - struct anv_bo *bo; - VkResult result = anv_bo_pool_alloc(&submit->queue->device->utrace_bo_pool, - align(alloc_size, 4096), - &bo); - if (result != VK_SUCCESS) - return result; - - util_dynarray_append(&submit->batch_bos, struct anv_bo *, bo); - - batch->end += 4 * GFX9_MI_BATCH_BUFFER_START_length; - - anv_batch_emit(batch, GFX9_MI_BATCH_BUFFER_START, bbs) { - bbs.DWordLength = GFX9_MI_BATCH_BUFFER_START_length - - GFX9_MI_BATCH_BUFFER_START_length_bias; - bbs.SecondLevelBatchBuffer = Firstlevelbatch; - bbs.AddressSpaceIndicator = ASI_PPGTT; - bbs.BatchBufferStartAddress = (struct anv_address) { bo, 0 }; - } - - anv_batch_set_storage(batch, - (struct anv_address) { .bo = bo, }, - bo->map, - bo->size - 4 * GFX9_MI_BATCH_BUFFER_START_length); - - return VK_SUCCESS; -} - VkResult anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, uint32_t cmd_buffer_count, @@ -212,41 +170,27 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, if (!submit) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - submit->queue = queue; + result = anv_async_submit_init(&submit->base, queue, + &device->utrace_bo_pool, + false, true); + if (result != VK_SUCCESS) + goto error_async; intel_ds_flush_data_init(&submit->ds, &queue->ds, queue->ds.submission_id); - result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type, - 0, 0, &submit->sync); - if (result != VK_SUCCESS) - goto error_sync; - - util_dynarray_init(&submit->batch_bos, NULL); - + struct anv_batch *batch = &submit->base.batch; if (utrace_copies > 0) { result = anv_bo_pool_alloc(&device->utrace_bo_pool, utrace_copies * 4096, &submit->trace_bo); if (result != VK_SUCCESS) - goto error_trace_buf; - - const bool uses_relocs = device->physical->uses_relocs; - result = anv_reloc_list_init(&submit->relocs, &device->vk.alloc, uses_relocs); - if (result != VK_SUCCESS) - goto error_reloc_list; + goto error_sync; anv_state_stream_init(&submit->dynamic_state_stream, &device->dynamic_state_pool, 16384); anv_state_stream_init(&submit->general_state_stream, &device->general_state_pool, 16384); - submit->batch = (struct anv_batch) { - .alloc = &device->vk.alloc, - .relocs = &submit->relocs, - .user_data = submit, - .extend_cb = anv_utrace_submit_extend_batch, - }; - /* Only engine class where we support timestamp copies * * TODO: add INTEL_ENGINE_CLASS_COPY support (should be trivial ;) @@ -255,12 +199,10 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, queue->family->engine_class == INTEL_ENGINE_CLASS_COMPUTE); if (queue->family->engine_class == INTEL_ENGINE_CLASS_RENDER) { - trace_intel_begin_trace_copy_cb(&submit->ds.trace, &submit->batch); + trace_intel_begin_trace_copy_cb(&submit->ds.trace, batch); anv_genX(device->info, emit_so_memcpy_init)(&submit->memcpy_state, - device, - NULL, - &submit->batch); + device, NULL, batch); uint32_t num_traces = 0; for (uint32_t i = 0; i < cmd_buffer_count; i++) { if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) { @@ -277,8 +219,7 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, } anv_genX(device->info, emit_so_memcpy_fini)(&submit->memcpy_state); - trace_intel_end_trace_copy_cb(&submit->ds.trace, &submit->batch, - num_traces); + trace_intel_end_trace_copy_cb(&submit->ds.trace, batch, num_traces); anv_genX(device->info, emit_so_memcpy_end)(&submit->memcpy_state); } else { @@ -290,13 +231,13 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, if (ret != VK_SUCCESS) goto error_batch; - trace_intel_begin_trace_copy_cb(&submit->ds.trace, &submit->batch); + trace_intel_begin_trace_copy_cb(&submit->ds.trace, batch); submit->simple_state = (struct anv_simple_shader) { .device = device, .dynamic_state_stream = &submit->dynamic_state_stream, .general_state_stream = &submit->general_state_stream, - .batch = &submit->batch, + .batch = batch, .kernel = copy_kernel, .l3_config = device->internal_kernels_l3_config, }; @@ -318,19 +259,19 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, } } - trace_intel_end_trace_copy_cb(&submit->ds.trace, &submit->batch, - num_traces); + trace_intel_end_trace_copy_cb(&submit->ds.trace, batch, num_traces); anv_genX(device->info, emit_simple_shader_end)(&submit->simple_state); } - intel_ds_queue_flush_data(&queue->ds, &submit->ds.trace, &submit->ds, - device->vk.current_frame, true); - if (submit->batch.status != VK_SUCCESS) { - result = submit->batch.status; + if (batch->status != VK_SUCCESS) { + result = batch->status; goto error_batch; } + + intel_ds_queue_flush_data(&queue->ds, &submit->ds.trace, &submit->ds, + device->vk.current_frame, true); } else { for (uint32_t i = 0; i < cmd_buffer_count; i++) { assert(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); @@ -345,15 +286,11 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, return VK_SUCCESS; error_batch: - anv_reloc_list_finish(&submit->relocs); - util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, bo) - anv_bo_pool_free(&device->utrace_bo_pool, *bo); - error_reloc_list: anv_bo_pool_free(&device->utrace_bo_pool, submit->trace_bo); - error_trace_buf: - vk_sync_destroy(&device->vk, submit->sync); error_sync: intel_ds_flush_data_fini(&submit->ds); + anv_async_submit_fini(&submit->base); + error_async: vk_free(&device->vk.alloc, submit); return result; } @@ -458,8 +395,8 @@ anv_utrace_read_ts(struct u_trace_context *utctx, MESA_TRACE_SCOPE("anv utrace wait timestamps"); UNUSED VkResult result = vk_sync_wait(&device->vk, - submit->sync, - 0, + submit->base.signal.sync, + submit->base.signal.signal_value, VK_SYNC_WAIT_COMPLETE, os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE)); assert(result == VK_SUCCESS); @@ -600,69 +537,53 @@ anv_queue_trace(struct anv_queue *queue, const char *label, bool frame, bool beg if (!submit) return; - submit->queue = queue; + result = anv_async_submit_init(&submit->base, queue, + &device->utrace_bo_pool, + false, true); + if (result != VK_SUCCESS) + goto error_async; intel_ds_flush_data_init(&submit->ds, &queue->ds, queue->ds.submission_id); - result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type, - 0, 0, &submit->sync); - if (result != VK_SUCCESS) - goto error_trace; - - const bool uses_relocs = device->physical->uses_relocs; - result = anv_reloc_list_init(&submit->relocs, &device->vk.alloc, uses_relocs); - if (result != VK_SUCCESS) - goto error_sync; - - submit->batch = (struct anv_batch) { - .alloc = &device->vk.alloc, - .relocs = &submit->relocs, - .user_data = submit, - .extend_cb = anv_utrace_submit_extend_batch, - }; - + struct anv_batch *batch = &submit->base.batch; if (frame) { if (begin) - trace_intel_begin_frame(&submit->ds.trace, &submit->batch); + trace_intel_begin_frame(&submit->ds.trace, batch); else - trace_intel_end_frame(&submit->ds.trace, &submit->batch, + trace_intel_end_frame(&submit->ds.trace, batch, device->debug_frame_desc->frame_id); } else { if (begin) { - trace_intel_begin_queue_annotation(&submit->ds.trace, &submit->batch); + trace_intel_begin_queue_annotation(&submit->ds.trace, batch); } else { - trace_intel_end_queue_annotation(&submit->ds.trace, - &submit->batch, - strlen(label), - label); + trace_intel_end_queue_annotation(&submit->ds.trace, batch, + strlen(label), label); } } - anv_batch_emit(&submit->batch, GFX9_MI_BATCH_BUFFER_END, bbs); - anv_batch_emit(&submit->batch, GFX9_MI_NOOP, noop); + anv_batch_emit(batch, GFX9_MI_BATCH_BUFFER_END, bbs); + anv_batch_emit(batch, GFX9_MI_NOOP, noop); - if (submit->batch.status != VK_SUCCESS) { - result = submit->batch.status; - goto error_reloc_list; + if (batch->status != VK_SUCCESS) { + result = batch->status; + goto error_batch; } intel_ds_queue_flush_data(&queue->ds, &submit->ds.trace, &submit->ds, device->vk.current_frame, true); - pthread_mutex_lock(&device->mutex); - device->kmd_backend->queue_exec_trace(queue, submit); - pthread_mutex_unlock(&device->mutex); + result = + device->kmd_backend->queue_exec_async(&submit->base, + 0, NULL, 0, NULL); + if (result != VK_SUCCESS) + goto error_batch; return; - error_reloc_list: - anv_reloc_list_finish(&submit->relocs); - util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, bo) - anv_bo_pool_free(&device->utrace_bo_pool, *bo); - error_sync: - vk_sync_destroy(&device->vk, submit->sync); - error_trace: + error_batch: intel_ds_flush_data_fini(&submit->ds); + anv_async_submit_fini(&submit->base); + error_async: vk_free(&device->vk.alloc, submit); } diff --git a/src/intel/vulkan/i915/anv_batch_chain.c b/src/intel/vulkan/i915/anv_batch_chain.c index 785e467eedc..bda46d9a117 100644 --- a/src/intel/vulkan/i915/anv_batch_chain.c +++ b/src/intel/vulkan/i915/anv_batch_chain.c @@ -569,9 +569,14 @@ setup_execbuf_fence_params(struct anv_execbuf *execbuf) } static VkResult -setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue, - struct anv_utrace_submit *submit) +setup_async_execbuf(struct anv_execbuf *execbuf, + struct anv_async_submit *submit, + uint32_t wait_count, + const struct vk_sync_wait *waits, + uint32_t signal_count, + const struct vk_sync_signal *signals) { + struct anv_queue *queue = submit->queue; struct anv_device *device = queue->device; /* Always add the workaround BO as it includes a driver identifier for the @@ -598,10 +603,38 @@ setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue, #endif } - result = anv_execbuf_add_sync(device, execbuf, submit->sync, - true /* is_signal */, 0 /* value */); - if (result != VK_SUCCESS) - return result; + for (uint32_t i = 0; i < wait_count; i++) { + result = anv_execbuf_add_sync(device, execbuf, + waits[i].sync, + false /* is_signal */, + waits[i].wait_value); + if (result != VK_SUCCESS) + return result; + } + for (uint32_t i = 0; i < signal_count; i++) { + result = anv_execbuf_add_sync(device, execbuf, + signals[i].sync, + true /* is_signal */, + signals[i].signal_value); + if (result != VK_SUCCESS) + return result; + } + if (submit->signal.sync) { + result = anv_execbuf_add_sync(device, execbuf, + submit->signal.sync, + true /* is_signal */, + submit->signal.signal_value); + if (result != VK_SUCCESS) + return result; + } + if (queue->sync) { + result = anv_execbuf_add_sync(device, execbuf, + queue->sync, + true /* is_signal */, + 0 /* signal_value */); + if (result != VK_SUCCESS) + return result; + } struct anv_bo *batch_bo = *util_dynarray_element(&submit->batch_bos, struct anv_bo *, 0); @@ -623,13 +656,13 @@ setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue, uint64_t exec_flags = 0; uint32_t context_id; - get_context_and_exec_flags(queue, false, &exec_flags, &context_id); + get_context_and_exec_flags(queue, submit->use_companion_rcs, + &exec_flags, &context_id); execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { .buffers_ptr = (uintptr_t) execbuf->objects, .buffer_count = execbuf->bo_count, .batch_start_offset = 0, - .batch_len = submit->batch.next - submit->batch.start, .flags = I915_EXEC_NO_RELOC | I915_EXEC_HANDLE_LUT | exec_flags, @@ -658,36 +691,6 @@ anv_gem_execbuffer(struct anv_device *device, return ret; } -static VkResult -anv_queue_exec_utrace_locked(struct anv_queue *queue, - struct anv_utrace_submit *submit) -{ - assert(util_dynarray_num_elements(&submit->batch_bos, - struct anv_bo *) > 0); - - struct anv_device *device = queue->device; - struct anv_execbuf execbuf = { - .alloc = &device->vk.alloc, - .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, - }; - - VkResult result = setup_utrace_execbuf(&execbuf, queue, submit); - if (result != VK_SUCCESS) - goto error; - - ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count); - - int ret = queue->device->info->no_hw ? 0 : - anv_gem_execbuffer(queue->device, &execbuf.execbuf); - if (ret) - result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m"); - - error: - anv_execbuf_finish(&execbuf); - - return result; -} - static void anv_i915_debug_submit(const struct anv_execbuf *execbuf) { @@ -714,6 +717,47 @@ anv_i915_debug_submit(const struct anv_execbuf *execbuf) } } +VkResult +i915_queue_exec_async(struct anv_async_submit *submit, + uint32_t wait_count, + const struct vk_sync_wait *waits, + uint32_t signal_count, + const struct vk_sync_signal *signals) +{ + assert(util_dynarray_num_elements(&submit->batch_bos, + struct anv_bo *) > 0); + + struct anv_queue *queue = submit->queue; + struct anv_device *device = queue->device; + struct anv_execbuf execbuf = { + .alloc = &device->vk.alloc, + .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, + }; + + VkResult result = setup_async_execbuf(&execbuf, submit, + wait_count, waits, + signal_count, signals); + if (result != VK_SUCCESS) + goto error; + + if (INTEL_DEBUG(DEBUG_SUBMIT)) + anv_i915_debug_submit(&execbuf); + + ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count); + + int ret = queue->device->info->no_hw ? 0 : + anv_gem_execbuffer(queue->device, &execbuf.execbuf); + if (ret) + result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m"); + + result = anv_queue_post_submit(queue, result); + + error: + anv_execbuf_finish(&execbuf); + + return result; +} + static VkResult i915_companion_rcs_queue_exec_locked(struct anv_queue *queue, struct anv_cmd_buffer *companion_rcs_cmd_buffer, @@ -796,17 +840,23 @@ i915_queue_exec_locked(struct anv_queue *queue, }; VkResult result; + /* If there is a utrace submission but no batch, it means there are no + * commands to run for utrace. But we still have to signal the associated + * syncs, so add them to the submission. + */ if (utrace_submit && - util_dynarray_num_elements(&utrace_submit->batch_bos, + util_dynarray_num_elements(&utrace_submit->base.batch_bos, struct anv_bo *) == 0) { result = anv_execbuf_add_sync(device, &execbuf, - utrace_submit->sync, + utrace_submit->base.signal.sync, true /* is_signal */, - 0); + utrace_submit->base.signal.signal_value); if (result != VK_SUCCESS) goto error; - /* When The utrace submission doesn't have its own batch buffer*/ + /* Avoid doing a submission after the application's batch since there + * are no commands. + */ utrace_submit = NULL; } @@ -944,8 +994,13 @@ i915_queue_exec_locked(struct anv_queue *queue, error: anv_execbuf_finish(&execbuf); - if (result == VK_SUCCESS && utrace_submit) - result = anv_queue_exec_utrace_locked(queue, utrace_submit); + if (result == VK_SUCCESS && utrace_submit) { + struct vk_sync_signal signal = { + .sync = utrace_submit->base.signal.sync, + .signal_value = utrace_submit->base.signal.signal_value, + }; + result = i915_queue_exec_async(&utrace_submit->base, 0, NULL, 1, &signal); + } return result; } @@ -1098,13 +1153,3 @@ out: anv_execbuf_finish(&execbuf); return result; } - -VkResult -i915_queue_exec_trace(struct anv_queue *queue, - struct anv_utrace_submit *submit) -{ - assert(util_dynarray_num_elements(&submit->batch_bos, - struct anv_bo *) > 0); - - return anv_queue_exec_utrace_locked(queue, submit); -} diff --git a/src/intel/vulkan/i915/anv_batch_chain.h b/src/intel/vulkan/i915/anv_batch_chain.h index fc799582828..6a780b24bbb 100644 --- a/src/intel/vulkan/i915/anv_batch_chain.h +++ b/src/intel/vulkan/i915/anv_batch_chain.h @@ -34,13 +34,18 @@ struct anv_queue; struct anv_bo; struct anv_cmd_buffer; struct anv_query_pool; +struct anv_async_submit; struct anv_utrace_submit; struct anv_sparse_submission; struct anv_trtt_batch_bo; VkResult -i915_queue_exec_trace(struct anv_queue *queue, - struct anv_utrace_submit *submit); +i915_queue_exec_async(struct anv_async_submit *submit, + uint32_t wait_count, + const struct vk_sync_wait *waits, + uint32_t signal_count, + const struct vk_sync_signal *signals); + VkResult i915_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo, uint32_t batch_bo_size, bool is_companion_rcs_batch); diff --git a/src/intel/vulkan/i915/anv_kmd_backend.c b/src/intel/vulkan/i915/anv_kmd_backend.c index 253abfd959e..539cdad6173 100644 --- a/src/intel/vulkan/i915/anv_kmd_backend.c +++ b/src/intel/vulkan/i915/anv_kmd_backend.c @@ -299,7 +299,7 @@ anv_i915_kmd_backend_get(void) .execute_simple_batch = i915_execute_simple_batch, .execute_trtt_batch = i915_execute_trtt_batch, .queue_exec_locked = i915_queue_exec_locked, - .queue_exec_trace = i915_queue_exec_trace, + .queue_exec_async = i915_queue_exec_async, .bo_alloc_flags_to_bo_flags = i915_bo_alloc_flags_to_bo_flags, }; return &i915_backend; diff --git a/src/intel/vulkan/xe/anv_batch_chain.c b/src/intel/vulkan/xe/anv_batch_chain.c index 69a5ed69949..61f912a158d 100644 --- a/src/intel/vulkan/xe/anv_batch_chain.c +++ b/src/intel/vulkan/xe/anv_batch_chain.c @@ -117,8 +117,9 @@ xe_exec_process_syncs(struct anv_queue *queue, /* Signal the utrace sync only if it doesn't have a batch. Otherwise the * it's the utrace batch that should signal its own sync. */ - const bool has_utrace_sync = utrace_submit && - util_dynarray_num_elements(&utrace_submit->batch_bos, struct anv_bo *) == 0; + const bool has_utrace_sync = + utrace_submit && + util_dynarray_num_elements(&utrace_submit->base.batch_bos, struct anv_bo *) == 0; const uint32_t num_syncs = wait_count + signal_count + extra_sync_count + (has_utrace_sync ? 1 : 0) + ((queue->sync && !is_companion_rcs_queue) ? 1 : 0) + @@ -132,7 +133,8 @@ xe_exec_process_syncs(struct anv_queue *queue, uint32_t count = 0; if (has_utrace_sync) { - xe_syncs[count++] = vk_sync_to_drm_xe_sync(utrace_submit->sync, 0, + xe_syncs[count++] = vk_sync_to_drm_xe_sync(utrace_submit->base.signal.sync, + utrace_submit->base.signal.signal_value, TYPE_SIGNAL); } @@ -234,41 +236,73 @@ out: } VkResult -xe_queue_exec_utrace_locked(struct anv_queue *queue, - struct anv_utrace_submit *utrace_submit) +xe_queue_exec_async(struct anv_async_submit *submit, + uint32_t wait_count, + const struct vk_sync_wait *waits, + uint32_t signal_count, + const struct vk_sync_signal *signals) { + struct anv_queue *queue = submit->queue; struct anv_device *device = queue->device; - struct drm_xe_sync xe_syncs[2] = {}; + STACK_ARRAY(struct drm_xe_sync, xe_syncs, + wait_count + signal_count + + ((submit->signal.sync != NULL) ? 1 : 0) + + (queue->sync != NULL ? 1 : 0) + + + 1); + uint32_t n_syncs = 0; - xe_syncs[0] = vk_sync_to_drm_xe_sync(utrace_submit->sync, 0, TYPE_SIGNAL); + for (uint32_t i = 0; i < wait_count; i++) { + xe_syncs[n_syncs++] = vk_sync_to_drm_xe_sync(waits[i].sync, + waits[i].wait_value, + TYPE_WAIT); + } + for (uint32_t i = 0; i < signal_count; i++) { + xe_syncs[n_syncs++] = vk_sync_to_drm_xe_sync(signals[i].sync, + signals[i].signal_value, + TYPE_SIGNAL); + } + if (submit->signal.sync) { + xe_syncs[n_syncs++] = vk_sync_to_drm_xe_sync(submit->signal.sync, + submit->signal.signal_value, + TYPE_SIGNAL); + } + if (queue->sync) + xe_syncs[n_syncs++] = vk_sync_to_drm_xe_sync(queue->sync, 0, TYPE_SIGNAL); - xe_syncs[1].type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ; - xe_syncs[1].handle = intel_bind_timeline_get_syncobj(&device->bind_timeline); - xe_syncs[1].timeline_value = intel_bind_timeline_get_last_point(&device->bind_timeline); + xe_syncs[n_syncs++] = (struct drm_xe_sync) { + .type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ, + .flags = 0 /* TYPE_WAIT */, + .handle = intel_bind_timeline_get_syncobj(&device->bind_timeline), + .timeline_value = intel_bind_timeline_get_last_point(&device->bind_timeline), + }; #ifdef SUPPORT_INTEL_INTEGRATED_GPUS if (device->physical->memory.need_flush && anv_bo_needs_host_cache_flush(device->utrace_bo_pool.bo_alloc_flags)) { - util_dynarray_foreach(&utrace_submit->batch_bos, struct anv_bo *, bo) + util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, bo) intel_flush_range((*bo)->map, (*bo)->size); } #endif struct anv_bo *batch_bo = - *util_dynarray_element(&utrace_submit->batch_bos, struct anv_bo *, 0); + *util_dynarray_element(&submit->batch_bos, struct anv_bo *, 0); struct drm_xe_exec exec = { - .exec_queue_id = queue->exec_queue_id, + .exec_queue_id = submit->use_companion_rcs ? + queue->companion_rcs_id : queue->exec_queue_id, .num_batch_buffer = 1, .syncs = (uintptr_t)xe_syncs, - .num_syncs = ARRAY_SIZE(xe_syncs), + .num_syncs = n_syncs, .address = batch_bo->offset, }; + + xe_exec_print_debug(queue, 0, NULL, NULL, 0, &exec); + if (likely(!device->info->no_hw)) { if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec)) return vk_device_set_lost(&device->vk, "anv_xe_queue_exec_locked failed: %m"); } - return VK_SUCCESS; + return anv_queue_post_submit(queue, VK_SUCCESS); } static VkResult @@ -346,9 +380,11 @@ xe_queue_exec_locked(struct anv_queue *queue, if (result != VK_SUCCESS) return result; - /* If we have no batch for utrace, just forget about it now. */ + /* If there is a utrace submission but no batch, it means there is no + * commands to run for utrace so ignore the submission. + */ if (utrace_submit && - util_dynarray_num_elements(&utrace_submit->batch_bos, + util_dynarray_num_elements(&utrace_submit->base.batch_bos, struct anv_bo *) == 0) utrace_submit = NULL; @@ -402,8 +438,14 @@ xe_queue_exec_locked(struct anv_queue *queue, result = anv_queue_post_submit(queue, result); - if (result == VK_SUCCESS && utrace_submit) - result = xe_queue_exec_utrace_locked(queue, utrace_submit); + if (result == VK_SUCCESS && utrace_submit) { + struct vk_sync_signal signal = { + .sync = utrace_submit->base.signal.sync, + .signal_value = utrace_submit->base.signal.signal_value, + }; + result = xe_queue_exec_async(&utrace_submit->base, + 0, NULL, 1, &signal); + } return result; } diff --git a/src/intel/vulkan/xe/anv_batch_chain.h b/src/intel/vulkan/xe/anv_batch_chain.h index 9afd8f06b6a..f176d98d269 100644 --- a/src/intel/vulkan/xe/anv_batch_chain.h +++ b/src/intel/vulkan/xe/anv_batch_chain.h @@ -34,6 +34,7 @@ struct anv_queue; struct anv_bo; struct anv_cmd_buffer; struct anv_query_pool; +struct anv_async_submit; struct anv_utrace_submit; struct anv_sparse_submission; struct anv_trtt_batch_bo; @@ -41,6 +42,7 @@ struct anv_trtt_batch_bo; VkResult xe_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo, uint32_t batch_bo_size, bool is_companion_rcs_batch); + VkResult xe_execute_trtt_batch(struct anv_sparse_submission *submit, struct anv_trtt_batch_bo *trtt_bbo); @@ -58,8 +60,11 @@ xe_queue_exec_locked(struct anv_queue *queue, struct anv_utrace_submit *utrace_submit); VkResult -xe_queue_exec_utrace_locked(struct anv_queue *queue, - struct anv_utrace_submit *utrace_submit); +xe_queue_exec_async(struct anv_async_submit *submit, + uint32_t wait_count, + const struct vk_sync_wait *waits, + uint32_t signal_count, + const struct vk_sync_signal *signals); struct drm_xe_sync vk_sync_to_drm_xe_sync(struct vk_sync *vk_sync, uint64_t value, bool signal); diff --git a/src/intel/vulkan/xe/anv_kmd_backend.c b/src/intel/vulkan/xe/anv_kmd_backend.c index 19cb1caecf4..98a95f72bc0 100644 --- a/src/intel/vulkan/xe/anv_kmd_backend.c +++ b/src/intel/vulkan/xe/anv_kmd_backend.c @@ -348,7 +348,7 @@ anv_xe_kmd_backend_get(void) .execute_simple_batch = xe_execute_simple_batch, .execute_trtt_batch = xe_execute_trtt_batch, .queue_exec_locked = xe_queue_exec_locked, - .queue_exec_trace = xe_queue_exec_utrace_locked, + .queue_exec_async = xe_queue_exec_async, .bo_alloc_flags_to_bo_flags = xe_bo_alloc_flags_to_bo_flags, }; return &xe_backend;