anv: rework utrace submission

We want to make this more generic so that it can be reused for device
initialization as well as TRTT submissions.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28975>
This commit is contained in:
Lionel Landwerlin 2024-04-28 16:35:30 +03:00 committed by Marge Bot
parent dd19e4240e
commit 1adafbddbd
11 changed files with 439 additions and 221 deletions

View file

@ -1717,3 +1717,156 @@ anv_cmd_buffer_clflush(struct anv_cmd_buffer **cmd_buffers,
__builtin_ia32_mfence();
#endif
}
static VkResult
anv_async_submit_extend_batch(struct anv_batch *batch, uint32_t size,
void *user_data)
{
struct anv_async_submit *submit = user_data;
uint32_t alloc_size = 0;
util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, bo)
alloc_size += (*bo)->size;
alloc_size = MAX2(alloc_size * 2, 8192);
struct anv_bo *bo;
VkResult result = anv_bo_pool_alloc(submit->bo_pool,
align(alloc_size, 4096),
&bo);
if (result != VK_SUCCESS)
return result;
util_dynarray_append(&submit->batch_bos, struct anv_bo *, bo);
batch->end += 4 * GFX9_MI_BATCH_BUFFER_START_length;
anv_batch_emit(batch, GFX9_MI_BATCH_BUFFER_START, bbs) {
bbs.DWordLength = GFX9_MI_BATCH_BUFFER_START_length -
GFX9_MI_BATCH_BUFFER_START_length_bias;
bbs.SecondLevelBatchBuffer = Firstlevelbatch;
bbs.AddressSpaceIndicator = ASI_PPGTT;
bbs.BatchBufferStartAddress = (struct anv_address) { bo, 0 };
}
anv_batch_set_storage(batch,
(struct anv_address) { .bo = bo, },
bo->map,
bo->size - 4 * GFX9_MI_BATCH_BUFFER_START_length);
return VK_SUCCESS;
}
VkResult
anv_async_submit_init(struct anv_async_submit *submit,
struct anv_queue *queue,
struct anv_bo_pool *bo_pool,
bool use_companion_rcs,
bool create_signal_sync)
{
struct anv_device *device = queue->device;
memset(submit, 0, sizeof(*submit));
submit->use_companion_rcs = use_companion_rcs;
submit->queue = queue;
submit->bo_pool = bo_pool;
const bool uses_relocs = device->physical->uses_relocs;
VkResult result =
anv_reloc_list_init(&submit->relocs, &device->vk.alloc, uses_relocs);
if (result != VK_SUCCESS)
return result;
submit->batch = (struct anv_batch) {
.alloc = &device->vk.alloc,
.relocs = &submit->relocs,
.user_data = submit,
.extend_cb = anv_async_submit_extend_batch,
};
util_dynarray_init(&submit->batch_bos, NULL);
if (create_signal_sync) {
result = vk_sync_create(&device->vk,
&device->physical->sync_syncobj_type,
0, 0, &submit->signal.sync);
if (result != VK_SUCCESS) {
anv_reloc_list_finish(&submit->relocs);
util_dynarray_fini(&submit->batch_bos);
return result;
}
submit->owns_sync = true;
}
return VK_SUCCESS;
}
void
anv_async_submit_fini(struct anv_async_submit *submit)
{
struct anv_device *device = submit->queue->device;
if (submit->owns_sync)
vk_sync_destroy(&device->vk, submit->signal.sync);
util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, bo)
anv_bo_pool_free(submit->bo_pool, *bo);
util_dynarray_fini(&submit->batch_bos);
anv_reloc_list_finish(&submit->relocs);
}
VkResult
anv_async_submit_create(struct anv_queue *queue,
struct anv_bo_pool *bo_pool,
bool use_companion_rcs,
bool create_signal_sync,
struct anv_async_submit **out_submit)
{
struct anv_device *device = queue->device;
*out_submit =
vk_alloc(&device->vk.alloc, sizeof(struct anv_async_submit), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (*out_submit == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
VkResult result = anv_async_submit_init(*out_submit, queue,
bo_pool,
use_companion_rcs,
create_signal_sync);
if (result != VK_SUCCESS)
vk_free(&device->vk.alloc, *out_submit);
return result;
}
void
anv_async_submit_destroy(struct anv_async_submit *submit)
{
struct anv_device *device = submit->queue->device;
anv_async_submit_fini(submit);
vk_free(&device->vk.alloc, submit);
}
bool
anv_async_submit_done(struct anv_async_submit *submit)
{
struct anv_device *device = submit->queue->device;
return vk_sync_wait(&device->vk,
submit->signal.sync,
submit->signal.signal_value,
VK_SYNC_WAIT_COMPLETE, 0) == VK_SUCCESS;
}
bool
anv_async_submit_wait(struct anv_async_submit *submit)
{
struct anv_device *device = submit->queue->device;
return vk_sync_wait(&device->vk,
submit->signal.sync,
submit->signal.signal_value,
VK_SYNC_WAIT_COMPLETE,
os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE)) == VK_SUCCESS;
}

View file

@ -88,7 +88,11 @@ stub_queue_exec_locked(struct anv_queue *queue,
}
static VkResult
stub_queue_exec_trace(struct anv_queue *queue, struct anv_utrace_submit *submit)
stub_queue_exec_async(struct anv_async_submit *submit,
uint32_t wait_count,
const struct vk_sync_wait *waits,
uint32_t signal_count,
const struct vk_sync_signal *signals)
{
return VK_ERROR_UNKNOWN;
}
@ -178,7 +182,7 @@ const struct anv_kmd_backend *anv_stub_kmd_backend_get(void)
.execute_simple_batch = stub_execute_simple_batch,
.execute_trtt_batch = stub_execute_trtt_batch,
.queue_exec_locked = stub_queue_exec_locked,
.queue_exec_trace = stub_queue_exec_trace,
.queue_exec_async = stub_queue_exec_async,
.bo_alloc_flags_to_bo_flags = stub_bo_alloc_flags_to_bo_flags,
};
return &stub_backend;

View file

@ -37,6 +37,7 @@ struct anv_cmd_buffer;
struct anv_device;
struct anv_queue;
struct anv_query_pool;
struct anv_async_submit;
struct anv_utrace_submit;
struct anv_sparse_submission;
struct anv_trtt_batch_bo;
@ -110,6 +111,8 @@ struct anv_kmd_backend {
struct anv_bo *batch_bo,
uint32_t batch_bo_size,
bool is_companion_rcs_batch);
/* The caller is expected to hold device->mutex when calling this vfunc.
*/
VkResult (*execute_trtt_batch)(struct anv_sparse_submission *submit,
struct anv_trtt_batch_bo *trtt_bbo);
VkResult (*queue_exec_locked)(struct anv_queue *queue,
@ -122,8 +125,14 @@ struct anv_kmd_backend {
struct anv_query_pool *perf_query_pool,
uint32_t perf_query_pass,
struct anv_utrace_submit *utrace_submit);
VkResult (*queue_exec_trace)(struct anv_queue *queue,
struct anv_utrace_submit *submit);
/* The caller is not expected to hold device->mutex when calling this
* vfunc.
*/
VkResult (*queue_exec_async)(struct anv_async_submit *submit,
uint32_t wait_count,
const struct vk_sync_wait *waits,
uint32_t signal_count,
const struct vk_sync_signal *signals);
uint32_t (*bo_alloc_flags_to_bo_flags)(struct anv_device *device,
enum anv_bo_alloc_flags alloc_flags);
};

View file

@ -2477,6 +2477,51 @@ _anv_combine_address(struct anv_batch *batch, void *location,
/* #define __gen_address_value anv_address_physical */
/* #define __gen_address_offset anv_address_add */
/* Base structure used to track a submission that needs some clean operations
* upon completion. Should be embedded into a larger structure.
*/
struct anv_async_submit {
struct anv_queue *queue;
struct anv_bo_pool *bo_pool;
bool use_companion_rcs;
bool owns_sync;
struct vk_sync_signal signal;
struct anv_reloc_list relocs;
struct anv_batch batch;
struct util_dynarray batch_bos;
};
VkResult
anv_async_submit_init(struct anv_async_submit *submit,
struct anv_queue *queue,
struct anv_bo_pool *bo_pool,
bool use_companion_rcs,
bool create_signal_sync);
void
anv_async_submit_fini(struct anv_async_submit *submit);
VkResult
anv_async_submit_create(struct anv_queue *queue,
struct anv_bo_pool *bo_pool,
bool use_companion_rcs,
bool create_signal_sync,
struct anv_async_submit **out_submit);
void
anv_async_submit_destroy(struct anv_async_submit *submit);
bool
anv_async_submit_done(struct anv_async_submit *submit);
bool
anv_async_submit_wait(struct anv_async_submit *submit);
struct anv_device_memory {
struct vk_device_memory vk;
@ -6072,12 +6117,7 @@ void anv_astc_emu_process(struct anv_cmd_buffer *cmd_buffer,
* (vkQueueBeginDebugUtilsLabelEXT/vkQueueEndDebugUtilsLabelEXT)
*/
struct anv_utrace_submit {
/* Batch stuff to implement of copy of timestamps recorded in another
* buffer.
*/
struct anv_reloc_list relocs;
struct anv_batch batch;
struct util_dynarray batch_bos;
struct anv_async_submit base;
/* structure used by the perfetto glue */
struct intel_ds_flush_data ds;
@ -6086,12 +6126,6 @@ struct anv_utrace_submit {
struct anv_state_stream dynamic_state_stream;
struct anv_state_stream general_state_stream;
/* Syncobj to be signaled when the batch completes */
struct vk_sync *sync;
/* Queue on which all the recorded traces are submitted */
struct anv_queue *queue;
/* Buffer of 64bits timestamps (only used for timestamp copies) */
struct anv_bo *trace_bo;

View file

@ -90,11 +90,7 @@ anv_utrace_delete_submit(struct u_trace_context *utctx, void *submit_data)
if (submit->trace_bo)
anv_bo_pool_free(&device->utrace_bo_pool, submit->trace_bo);
util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, bo)
anv_bo_pool_free(&device->utrace_bo_pool, *bo);
util_dynarray_fini(&submit->batch_bos);
vk_sync_destroy(&device->vk, submit->sync);
anv_async_submit_fini(&submit->base);
vk_free(&device->vk.alloc, submit);
}
@ -150,44 +146,6 @@ anv_device_utrace_emit_cs_copy_ts_buffer(struct u_trace_context *utctx,
push_data_state);
}
static VkResult
anv_utrace_submit_extend_batch(struct anv_batch *batch, uint32_t size,
void *user_data)
{
struct anv_utrace_submit *submit = user_data;
uint32_t alloc_size = 0;
util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, bo)
alloc_size += (*bo)->size;
alloc_size = MAX2(alloc_size * 2, 8192);
struct anv_bo *bo;
VkResult result = anv_bo_pool_alloc(&submit->queue->device->utrace_bo_pool,
align(alloc_size, 4096),
&bo);
if (result != VK_SUCCESS)
return result;
util_dynarray_append(&submit->batch_bos, struct anv_bo *, bo);
batch->end += 4 * GFX9_MI_BATCH_BUFFER_START_length;
anv_batch_emit(batch, GFX9_MI_BATCH_BUFFER_START, bbs) {
bbs.DWordLength = GFX9_MI_BATCH_BUFFER_START_length -
GFX9_MI_BATCH_BUFFER_START_length_bias;
bbs.SecondLevelBatchBuffer = Firstlevelbatch;
bbs.AddressSpaceIndicator = ASI_PPGTT;
bbs.BatchBufferStartAddress = (struct anv_address) { bo, 0 };
}
anv_batch_set_storage(batch,
(struct anv_address) { .bo = bo, },
bo->map,
bo->size - 4 * GFX9_MI_BATCH_BUFFER_START_length);
return VK_SUCCESS;
}
VkResult
anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
uint32_t cmd_buffer_count,
@ -212,41 +170,27 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
if (!submit)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
submit->queue = queue;
result = anv_async_submit_init(&submit->base, queue,
&device->utrace_bo_pool,
false, true);
if (result != VK_SUCCESS)
goto error_async;
intel_ds_flush_data_init(&submit->ds, &queue->ds, queue->ds.submission_id);
result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type,
0, 0, &submit->sync);
if (result != VK_SUCCESS)
goto error_sync;
util_dynarray_init(&submit->batch_bos, NULL);
struct anv_batch *batch = &submit->base.batch;
if (utrace_copies > 0) {
result = anv_bo_pool_alloc(&device->utrace_bo_pool,
utrace_copies * 4096,
&submit->trace_bo);
if (result != VK_SUCCESS)
goto error_trace_buf;
const bool uses_relocs = device->physical->uses_relocs;
result = anv_reloc_list_init(&submit->relocs, &device->vk.alloc, uses_relocs);
if (result != VK_SUCCESS)
goto error_reloc_list;
goto error_sync;
anv_state_stream_init(&submit->dynamic_state_stream,
&device->dynamic_state_pool, 16384);
anv_state_stream_init(&submit->general_state_stream,
&device->general_state_pool, 16384);
submit->batch = (struct anv_batch) {
.alloc = &device->vk.alloc,
.relocs = &submit->relocs,
.user_data = submit,
.extend_cb = anv_utrace_submit_extend_batch,
};
/* Only engine class where we support timestamp copies
*
* TODO: add INTEL_ENGINE_CLASS_COPY support (should be trivial ;)
@ -255,12 +199,10 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
queue->family->engine_class == INTEL_ENGINE_CLASS_COMPUTE);
if (queue->family->engine_class == INTEL_ENGINE_CLASS_RENDER) {
trace_intel_begin_trace_copy_cb(&submit->ds.trace, &submit->batch);
trace_intel_begin_trace_copy_cb(&submit->ds.trace, batch);
anv_genX(device->info, emit_so_memcpy_init)(&submit->memcpy_state,
device,
NULL,
&submit->batch);
device, NULL, batch);
uint32_t num_traces = 0;
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) {
@ -277,8 +219,7 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
}
anv_genX(device->info, emit_so_memcpy_fini)(&submit->memcpy_state);
trace_intel_end_trace_copy_cb(&submit->ds.trace, &submit->batch,
num_traces);
trace_intel_end_trace_copy_cb(&submit->ds.trace, batch, num_traces);
anv_genX(device->info, emit_so_memcpy_end)(&submit->memcpy_state);
} else {
@ -290,13 +231,13 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
if (ret != VK_SUCCESS)
goto error_batch;
trace_intel_begin_trace_copy_cb(&submit->ds.trace, &submit->batch);
trace_intel_begin_trace_copy_cb(&submit->ds.trace, batch);
submit->simple_state = (struct anv_simple_shader) {
.device = device,
.dynamic_state_stream = &submit->dynamic_state_stream,
.general_state_stream = &submit->general_state_stream,
.batch = &submit->batch,
.batch = batch,
.kernel = copy_kernel,
.l3_config = device->internal_kernels_l3_config,
};
@ -318,19 +259,19 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
}
}
trace_intel_end_trace_copy_cb(&submit->ds.trace, &submit->batch,
num_traces);
trace_intel_end_trace_copy_cb(&submit->ds.trace, batch, num_traces);
anv_genX(device->info, emit_simple_shader_end)(&submit->simple_state);
}
intel_ds_queue_flush_data(&queue->ds, &submit->ds.trace, &submit->ds,
device->vk.current_frame, true);
if (submit->batch.status != VK_SUCCESS) {
result = submit->batch.status;
if (batch->status != VK_SUCCESS) {
result = batch->status;
goto error_batch;
}
intel_ds_queue_flush_data(&queue->ds, &submit->ds.trace, &submit->ds,
device->vk.current_frame, true);
} else {
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
assert(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
@ -345,15 +286,11 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
return VK_SUCCESS;
error_batch:
anv_reloc_list_finish(&submit->relocs);
util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, bo)
anv_bo_pool_free(&device->utrace_bo_pool, *bo);
error_reloc_list:
anv_bo_pool_free(&device->utrace_bo_pool, submit->trace_bo);
error_trace_buf:
vk_sync_destroy(&device->vk, submit->sync);
error_sync:
intel_ds_flush_data_fini(&submit->ds);
anv_async_submit_fini(&submit->base);
error_async:
vk_free(&device->vk.alloc, submit);
return result;
}
@ -458,8 +395,8 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
MESA_TRACE_SCOPE("anv utrace wait timestamps");
UNUSED VkResult result =
vk_sync_wait(&device->vk,
submit->sync,
0,
submit->base.signal.sync,
submit->base.signal.signal_value,
VK_SYNC_WAIT_COMPLETE,
os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE));
assert(result == VK_SUCCESS);
@ -600,69 +537,53 @@ anv_queue_trace(struct anv_queue *queue, const char *label, bool frame, bool beg
if (!submit)
return;
submit->queue = queue;
result = anv_async_submit_init(&submit->base, queue,
&device->utrace_bo_pool,
false, true);
if (result != VK_SUCCESS)
goto error_async;
intel_ds_flush_data_init(&submit->ds, &queue->ds, queue->ds.submission_id);
result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type,
0, 0, &submit->sync);
if (result != VK_SUCCESS)
goto error_trace;
const bool uses_relocs = device->physical->uses_relocs;
result = anv_reloc_list_init(&submit->relocs, &device->vk.alloc, uses_relocs);
if (result != VK_SUCCESS)
goto error_sync;
submit->batch = (struct anv_batch) {
.alloc = &device->vk.alloc,
.relocs = &submit->relocs,
.user_data = submit,
.extend_cb = anv_utrace_submit_extend_batch,
};
struct anv_batch *batch = &submit->base.batch;
if (frame) {
if (begin)
trace_intel_begin_frame(&submit->ds.trace, &submit->batch);
trace_intel_begin_frame(&submit->ds.trace, batch);
else
trace_intel_end_frame(&submit->ds.trace, &submit->batch,
trace_intel_end_frame(&submit->ds.trace, batch,
device->debug_frame_desc->frame_id);
} else {
if (begin) {
trace_intel_begin_queue_annotation(&submit->ds.trace, &submit->batch);
trace_intel_begin_queue_annotation(&submit->ds.trace, batch);
} else {
trace_intel_end_queue_annotation(&submit->ds.trace,
&submit->batch,
strlen(label),
label);
trace_intel_end_queue_annotation(&submit->ds.trace, batch,
strlen(label), label);
}
}
anv_batch_emit(&submit->batch, GFX9_MI_BATCH_BUFFER_END, bbs);
anv_batch_emit(&submit->batch, GFX9_MI_NOOP, noop);
anv_batch_emit(batch, GFX9_MI_BATCH_BUFFER_END, bbs);
anv_batch_emit(batch, GFX9_MI_NOOP, noop);
if (submit->batch.status != VK_SUCCESS) {
result = submit->batch.status;
goto error_reloc_list;
if (batch->status != VK_SUCCESS) {
result = batch->status;
goto error_batch;
}
intel_ds_queue_flush_data(&queue->ds, &submit->ds.trace, &submit->ds,
device->vk.current_frame, true);
pthread_mutex_lock(&device->mutex);
device->kmd_backend->queue_exec_trace(queue, submit);
pthread_mutex_unlock(&device->mutex);
result =
device->kmd_backend->queue_exec_async(&submit->base,
0, NULL, 0, NULL);
if (result != VK_SUCCESS)
goto error_batch;
return;
error_reloc_list:
anv_reloc_list_finish(&submit->relocs);
util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, bo)
anv_bo_pool_free(&device->utrace_bo_pool, *bo);
error_sync:
vk_sync_destroy(&device->vk, submit->sync);
error_trace:
error_batch:
intel_ds_flush_data_fini(&submit->ds);
anv_async_submit_fini(&submit->base);
error_async:
vk_free(&device->vk.alloc, submit);
}

View file

@ -569,9 +569,14 @@ setup_execbuf_fence_params(struct anv_execbuf *execbuf)
}
static VkResult
setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue,
struct anv_utrace_submit *submit)
setup_async_execbuf(struct anv_execbuf *execbuf,
struct anv_async_submit *submit,
uint32_t wait_count,
const struct vk_sync_wait *waits,
uint32_t signal_count,
const struct vk_sync_signal *signals)
{
struct anv_queue *queue = submit->queue;
struct anv_device *device = queue->device;
/* Always add the workaround BO as it includes a driver identifier for the
@ -598,10 +603,38 @@ setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue,
#endif
}
result = anv_execbuf_add_sync(device, execbuf, submit->sync,
true /* is_signal */, 0 /* value */);
if (result != VK_SUCCESS)
return result;
for (uint32_t i = 0; i < wait_count; i++) {
result = anv_execbuf_add_sync(device, execbuf,
waits[i].sync,
false /* is_signal */,
waits[i].wait_value);
if (result != VK_SUCCESS)
return result;
}
for (uint32_t i = 0; i < signal_count; i++) {
result = anv_execbuf_add_sync(device, execbuf,
signals[i].sync,
true /* is_signal */,
signals[i].signal_value);
if (result != VK_SUCCESS)
return result;
}
if (submit->signal.sync) {
result = anv_execbuf_add_sync(device, execbuf,
submit->signal.sync,
true /* is_signal */,
submit->signal.signal_value);
if (result != VK_SUCCESS)
return result;
}
if (queue->sync) {
result = anv_execbuf_add_sync(device, execbuf,
queue->sync,
true /* is_signal */,
0 /* signal_value */);
if (result != VK_SUCCESS)
return result;
}
struct anv_bo *batch_bo =
*util_dynarray_element(&submit->batch_bos, struct anv_bo *, 0);
@ -623,13 +656,13 @@ setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue,
uint64_t exec_flags = 0;
uint32_t context_id;
get_context_and_exec_flags(queue, false, &exec_flags, &context_id);
get_context_and_exec_flags(queue, submit->use_companion_rcs,
&exec_flags, &context_id);
execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
.buffers_ptr = (uintptr_t) execbuf->objects,
.buffer_count = execbuf->bo_count,
.batch_start_offset = 0,
.batch_len = submit->batch.next - submit->batch.start,
.flags = I915_EXEC_NO_RELOC |
I915_EXEC_HANDLE_LUT |
exec_flags,
@ -658,36 +691,6 @@ anv_gem_execbuffer(struct anv_device *device,
return ret;
}
static VkResult
anv_queue_exec_utrace_locked(struct anv_queue *queue,
struct anv_utrace_submit *submit)
{
assert(util_dynarray_num_elements(&submit->batch_bos,
struct anv_bo *) > 0);
struct anv_device *device = queue->device;
struct anv_execbuf execbuf = {
.alloc = &device->vk.alloc,
.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
};
VkResult result = setup_utrace_execbuf(&execbuf, queue, submit);
if (result != VK_SUCCESS)
goto error;
ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
int ret = queue->device->info->no_hw ? 0 :
anv_gem_execbuffer(queue->device, &execbuf.execbuf);
if (ret)
result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
error:
anv_execbuf_finish(&execbuf);
return result;
}
static void
anv_i915_debug_submit(const struct anv_execbuf *execbuf)
{
@ -714,6 +717,47 @@ anv_i915_debug_submit(const struct anv_execbuf *execbuf)
}
}
VkResult
i915_queue_exec_async(struct anv_async_submit *submit,
uint32_t wait_count,
const struct vk_sync_wait *waits,
uint32_t signal_count,
const struct vk_sync_signal *signals)
{
assert(util_dynarray_num_elements(&submit->batch_bos,
struct anv_bo *) > 0);
struct anv_queue *queue = submit->queue;
struct anv_device *device = queue->device;
struct anv_execbuf execbuf = {
.alloc = &device->vk.alloc,
.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
};
VkResult result = setup_async_execbuf(&execbuf, submit,
wait_count, waits,
signal_count, signals);
if (result != VK_SUCCESS)
goto error;
if (INTEL_DEBUG(DEBUG_SUBMIT))
anv_i915_debug_submit(&execbuf);
ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
int ret = queue->device->info->no_hw ? 0 :
anv_gem_execbuffer(queue->device, &execbuf.execbuf);
if (ret)
result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
result = anv_queue_post_submit(queue, result);
error:
anv_execbuf_finish(&execbuf);
return result;
}
static VkResult
i915_companion_rcs_queue_exec_locked(struct anv_queue *queue,
struct anv_cmd_buffer *companion_rcs_cmd_buffer,
@ -796,17 +840,23 @@ i915_queue_exec_locked(struct anv_queue *queue,
};
VkResult result;
/* If there is a utrace submission but no batch, it means there are no
* commands to run for utrace. But we still have to signal the associated
* syncs, so add them to the submission.
*/
if (utrace_submit &&
util_dynarray_num_elements(&utrace_submit->batch_bos,
util_dynarray_num_elements(&utrace_submit->base.batch_bos,
struct anv_bo *) == 0) {
result = anv_execbuf_add_sync(device, &execbuf,
utrace_submit->sync,
utrace_submit->base.signal.sync,
true /* is_signal */,
0);
utrace_submit->base.signal.signal_value);
if (result != VK_SUCCESS)
goto error;
/* When The utrace submission doesn't have its own batch buffer*/
/* Avoid doing a submission after the application's batch since there
* are no commands.
*/
utrace_submit = NULL;
}
@ -944,8 +994,13 @@ i915_queue_exec_locked(struct anv_queue *queue,
error:
anv_execbuf_finish(&execbuf);
if (result == VK_SUCCESS && utrace_submit)
result = anv_queue_exec_utrace_locked(queue, utrace_submit);
if (result == VK_SUCCESS && utrace_submit) {
struct vk_sync_signal signal = {
.sync = utrace_submit->base.signal.sync,
.signal_value = utrace_submit->base.signal.signal_value,
};
result = i915_queue_exec_async(&utrace_submit->base, 0, NULL, 1, &signal);
}
return result;
}
@ -1098,13 +1153,3 @@ out:
anv_execbuf_finish(&execbuf);
return result;
}
VkResult
i915_queue_exec_trace(struct anv_queue *queue,
struct anv_utrace_submit *submit)
{
assert(util_dynarray_num_elements(&submit->batch_bos,
struct anv_bo *) > 0);
return anv_queue_exec_utrace_locked(queue, submit);
}

View file

@ -34,13 +34,18 @@ struct anv_queue;
struct anv_bo;
struct anv_cmd_buffer;
struct anv_query_pool;
struct anv_async_submit;
struct anv_utrace_submit;
struct anv_sparse_submission;
struct anv_trtt_batch_bo;
VkResult
i915_queue_exec_trace(struct anv_queue *queue,
struct anv_utrace_submit *submit);
i915_queue_exec_async(struct anv_async_submit *submit,
uint32_t wait_count,
const struct vk_sync_wait *waits,
uint32_t signal_count,
const struct vk_sync_signal *signals);
VkResult
i915_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
uint32_t batch_bo_size, bool is_companion_rcs_batch);

View file

@ -299,7 +299,7 @@ anv_i915_kmd_backend_get(void)
.execute_simple_batch = i915_execute_simple_batch,
.execute_trtt_batch = i915_execute_trtt_batch,
.queue_exec_locked = i915_queue_exec_locked,
.queue_exec_trace = i915_queue_exec_trace,
.queue_exec_async = i915_queue_exec_async,
.bo_alloc_flags_to_bo_flags = i915_bo_alloc_flags_to_bo_flags,
};
return &i915_backend;

View file

@ -117,8 +117,9 @@ xe_exec_process_syncs(struct anv_queue *queue,
/* Signal the utrace sync only if it doesn't have a batch. Otherwise the
* it's the utrace batch that should signal its own sync.
*/
const bool has_utrace_sync = utrace_submit &&
util_dynarray_num_elements(&utrace_submit->batch_bos, struct anv_bo *) == 0;
const bool has_utrace_sync =
utrace_submit &&
util_dynarray_num_elements(&utrace_submit->base.batch_bos, struct anv_bo *) == 0;
const uint32_t num_syncs = wait_count + signal_count + extra_sync_count +
(has_utrace_sync ? 1 : 0) +
((queue->sync && !is_companion_rcs_queue) ? 1 : 0) +
@ -132,7 +133,8 @@ xe_exec_process_syncs(struct anv_queue *queue,
uint32_t count = 0;
if (has_utrace_sync) {
xe_syncs[count++] = vk_sync_to_drm_xe_sync(utrace_submit->sync, 0,
xe_syncs[count++] = vk_sync_to_drm_xe_sync(utrace_submit->base.signal.sync,
utrace_submit->base.signal.signal_value,
TYPE_SIGNAL);
}
@ -234,41 +236,73 @@ out:
}
VkResult
xe_queue_exec_utrace_locked(struct anv_queue *queue,
struct anv_utrace_submit *utrace_submit)
xe_queue_exec_async(struct anv_async_submit *submit,
uint32_t wait_count,
const struct vk_sync_wait *waits,
uint32_t signal_count,
const struct vk_sync_signal *signals)
{
struct anv_queue *queue = submit->queue;
struct anv_device *device = queue->device;
struct drm_xe_sync xe_syncs[2] = {};
STACK_ARRAY(struct drm_xe_sync, xe_syncs,
wait_count + signal_count +
((submit->signal.sync != NULL) ? 1 : 0) +
(queue->sync != NULL ? 1 : 0) +
+ 1);
uint32_t n_syncs = 0;
xe_syncs[0] = vk_sync_to_drm_xe_sync(utrace_submit->sync, 0, TYPE_SIGNAL);
for (uint32_t i = 0; i < wait_count; i++) {
xe_syncs[n_syncs++] = vk_sync_to_drm_xe_sync(waits[i].sync,
waits[i].wait_value,
TYPE_WAIT);
}
for (uint32_t i = 0; i < signal_count; i++) {
xe_syncs[n_syncs++] = vk_sync_to_drm_xe_sync(signals[i].sync,
signals[i].signal_value,
TYPE_SIGNAL);
}
if (submit->signal.sync) {
xe_syncs[n_syncs++] = vk_sync_to_drm_xe_sync(submit->signal.sync,
submit->signal.signal_value,
TYPE_SIGNAL);
}
if (queue->sync)
xe_syncs[n_syncs++] = vk_sync_to_drm_xe_sync(queue->sync, 0, TYPE_SIGNAL);
xe_syncs[1].type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ;
xe_syncs[1].handle = intel_bind_timeline_get_syncobj(&device->bind_timeline);
xe_syncs[1].timeline_value = intel_bind_timeline_get_last_point(&device->bind_timeline);
xe_syncs[n_syncs++] = (struct drm_xe_sync) {
.type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
.flags = 0 /* TYPE_WAIT */,
.handle = intel_bind_timeline_get_syncobj(&device->bind_timeline),
.timeline_value = intel_bind_timeline_get_last_point(&device->bind_timeline),
};
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
if (device->physical->memory.need_flush &&
anv_bo_needs_host_cache_flush(device->utrace_bo_pool.bo_alloc_flags)) {
util_dynarray_foreach(&utrace_submit->batch_bos, struct anv_bo *, bo)
util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, bo)
intel_flush_range((*bo)->map, (*bo)->size);
}
#endif
struct anv_bo *batch_bo =
*util_dynarray_element(&utrace_submit->batch_bos, struct anv_bo *, 0);
*util_dynarray_element(&submit->batch_bos, struct anv_bo *, 0);
struct drm_xe_exec exec = {
.exec_queue_id = queue->exec_queue_id,
.exec_queue_id = submit->use_companion_rcs ?
queue->companion_rcs_id : queue->exec_queue_id,
.num_batch_buffer = 1,
.syncs = (uintptr_t)xe_syncs,
.num_syncs = ARRAY_SIZE(xe_syncs),
.num_syncs = n_syncs,
.address = batch_bo->offset,
};
xe_exec_print_debug(queue, 0, NULL, NULL, 0, &exec);
if (likely(!device->info->no_hw)) {
if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec))
return vk_device_set_lost(&device->vk, "anv_xe_queue_exec_locked failed: %m");
}
return VK_SUCCESS;
return anv_queue_post_submit(queue, VK_SUCCESS);
}
static VkResult
@ -346,9 +380,11 @@ xe_queue_exec_locked(struct anv_queue *queue,
if (result != VK_SUCCESS)
return result;
/* If we have no batch for utrace, just forget about it now. */
/* If there is a utrace submission but no batch, it means there is no
* commands to run for utrace so ignore the submission.
*/
if (utrace_submit &&
util_dynarray_num_elements(&utrace_submit->batch_bos,
util_dynarray_num_elements(&utrace_submit->base.batch_bos,
struct anv_bo *) == 0)
utrace_submit = NULL;
@ -402,8 +438,14 @@ xe_queue_exec_locked(struct anv_queue *queue,
result = anv_queue_post_submit(queue, result);
if (result == VK_SUCCESS && utrace_submit)
result = xe_queue_exec_utrace_locked(queue, utrace_submit);
if (result == VK_SUCCESS && utrace_submit) {
struct vk_sync_signal signal = {
.sync = utrace_submit->base.signal.sync,
.signal_value = utrace_submit->base.signal.signal_value,
};
result = xe_queue_exec_async(&utrace_submit->base,
0, NULL, 1, &signal);
}
return result;
}

View file

@ -34,6 +34,7 @@ struct anv_queue;
struct anv_bo;
struct anv_cmd_buffer;
struct anv_query_pool;
struct anv_async_submit;
struct anv_utrace_submit;
struct anv_sparse_submission;
struct anv_trtt_batch_bo;
@ -41,6 +42,7 @@ struct anv_trtt_batch_bo;
VkResult
xe_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
uint32_t batch_bo_size, bool is_companion_rcs_batch);
VkResult
xe_execute_trtt_batch(struct anv_sparse_submission *submit,
struct anv_trtt_batch_bo *trtt_bbo);
@ -58,8 +60,11 @@ xe_queue_exec_locked(struct anv_queue *queue,
struct anv_utrace_submit *utrace_submit);
VkResult
xe_queue_exec_utrace_locked(struct anv_queue *queue,
struct anv_utrace_submit *utrace_submit);
xe_queue_exec_async(struct anv_async_submit *submit,
uint32_t wait_count,
const struct vk_sync_wait *waits,
uint32_t signal_count,
const struct vk_sync_signal *signals);
struct drm_xe_sync
vk_sync_to_drm_xe_sync(struct vk_sync *vk_sync, uint64_t value, bool signal);

View file

@ -348,7 +348,7 @@ anv_xe_kmd_backend_get(void)
.execute_simple_batch = xe_execute_simple_batch,
.execute_trtt_batch = xe_execute_trtt_batch,
.queue_exec_locked = xe_queue_exec_locked,
.queue_exec_trace = xe_queue_exec_utrace_locked,
.queue_exec_async = xe_queue_exec_async,
.bo_alloc_flags_to_bo_flags = xe_bo_alloc_flags_to_bo_flags,
};
return &xe_backend;