mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-02 05:48:07 +02:00
anv: dynamically allocate utrace batch buffers
Estimating the batch space required can be tricky because of all the workarounds. So implement chaining of batches like we do for command buffers. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26087>
This commit is contained in:
parent
9ebb7721b5
commit
2dc452ec7c
4 changed files with 106 additions and 70 deletions
|
|
@ -5509,7 +5509,7 @@ struct anv_utrace_submit {
|
|||
*/
|
||||
struct anv_reloc_list relocs;
|
||||
struct anv_batch batch;
|
||||
struct anv_bo *batch_bo;
|
||||
struct util_dynarray batch_bos;
|
||||
|
||||
/* Stream for temporary allocations */
|
||||
struct anv_state_stream dynamic_state_stream;
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
#include "anv_internal_kernels.h"
|
||||
|
||||
#include "ds/intel_tracepoints.h"
|
||||
#include "genxml/gen8_pack.h"
|
||||
#include "genxml/gen9_pack.h"
|
||||
#include "perf/intel_perf.h"
|
||||
#include "util/perf/cpu_trace.h"
|
||||
|
||||
|
|
@ -88,10 +88,9 @@ anv_utrace_delete_submit(struct u_trace_context *utctx, void *submit_data)
|
|||
if (submit->trace_bo)
|
||||
anv_bo_pool_free(&device->utrace_bo_pool, submit->trace_bo);
|
||||
|
||||
if (submit->batch_bo) {
|
||||
anv_reloc_list_finish(&submit->relocs);
|
||||
anv_bo_pool_free(&device->utrace_bo_pool, submit->batch_bo);
|
||||
}
|
||||
util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, bo)
|
||||
anv_bo_pool_free(&device->utrace_bo_pool, *bo);
|
||||
util_dynarray_fini(&submit->batch_bos);
|
||||
|
||||
vk_sync_destroy(&device->vk, submit->sync);
|
||||
|
||||
|
|
@ -151,6 +150,44 @@ anv_device_utrace_emit_cs_copy_ts_buffer(struct u_trace_context *utctx,
|
|||
push_data_state);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_utrace_submit_extend_batch(struct anv_batch *batch, uint32_t size,
|
||||
void *user_data)
|
||||
{
|
||||
struct anv_utrace_submit *submit = user_data;
|
||||
|
||||
uint32_t alloc_size = 0;
|
||||
util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, bo)
|
||||
alloc_size += (*bo)->size;
|
||||
alloc_size = MAX2(alloc_size * 2, 8192);
|
||||
|
||||
struct anv_bo *bo;
|
||||
VkResult result = anv_bo_pool_alloc(&submit->queue->device->utrace_bo_pool,
|
||||
align(alloc_size, 4096),
|
||||
&bo);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
util_dynarray_append(&submit->batch_bos, struct anv_bo *, bo);
|
||||
|
||||
batch->end += 4 * GFX9_MI_BATCH_BUFFER_START_length;
|
||||
|
||||
anv_batch_emit(batch, GFX9_MI_BATCH_BUFFER_START, bbs) {
|
||||
bbs.DWordLength = GFX9_MI_BATCH_BUFFER_START_length -
|
||||
GFX9_MI_BATCH_BUFFER_START_length_bias;
|
||||
bbs.SecondLevelBatchBuffer = Firstlevelbatch;
|
||||
bbs.AddressSpaceIndicator = ASI_PPGTT;
|
||||
bbs.BatchBufferStartAddress = (struct anv_address) { bo, 0 };
|
||||
}
|
||||
|
||||
anv_batch_set_storage(batch,
|
||||
(struct anv_address) { .bo = bo, },
|
||||
bo->map,
|
||||
bo->size - 4 * GFX9_MI_BATCH_BUFFER_START_length);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
|
||||
uint32_t cmd_buffer_count,
|
||||
|
|
@ -175,6 +212,8 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
|
|||
if (!submit)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
submit->queue = queue;
|
||||
|
||||
intel_ds_flush_data_init(&submit->ds, &queue->ds, queue->ds.submission_id);
|
||||
|
||||
result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type,
|
||||
|
|
@ -182,6 +221,8 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
|
|||
if (result != VK_SUCCESS)
|
||||
goto error_sync;
|
||||
|
||||
util_dynarray_init(&submit->batch_bos, NULL);
|
||||
|
||||
if (utrace_copies > 0) {
|
||||
result = anv_bo_pool_alloc(&device->utrace_bo_pool,
|
||||
utrace_copies * 4096,
|
||||
|
|
@ -189,22 +230,6 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
|
|||
if (result != VK_SUCCESS)
|
||||
goto error_trace_buf;
|
||||
|
||||
uint32_t batch_size = 512; /* 128 dwords of setup */
|
||||
if (intel_needs_workaround(device->info, 16013994831)) {
|
||||
/* Enable/Disable preemption at the begin/end */
|
||||
batch_size += 2 * (250 /* 250 MI_NOOPs*/ +
|
||||
6 /* PIPE_CONTROL */ +
|
||||
3 /* MI_LRI */) * 4 /* dwords */;
|
||||
}
|
||||
batch_size += 256 * utrace_copies; /* 64 dwords per copy */
|
||||
batch_size = align(batch_size + 4, 8); /* MI_BATCH_BUFFER_END */
|
||||
|
||||
result = anv_bo_pool_alloc(&device->utrace_bo_pool,
|
||||
align(batch_size, 4096),
|
||||
&submit->batch_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error_batch_buf;
|
||||
|
||||
const bool uses_relocs = device->physical->uses_relocs;
|
||||
result = anv_reloc_list_init(&submit->relocs, &device->vk.alloc, uses_relocs);
|
||||
if (result != VK_SUCCESS)
|
||||
|
|
@ -215,11 +240,12 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
|
|||
anv_state_stream_init(&submit->general_state_stream,
|
||||
&device->general_state_pool, 16384);
|
||||
|
||||
submit->batch.alloc = &device->vk.alloc;
|
||||
submit->batch.relocs = &submit->relocs;
|
||||
anv_batch_set_storage(&submit->batch,
|
||||
(struct anv_address) { .bo = submit->batch_bo, },
|
||||
submit->batch_bo->map, submit->batch_bo->size);
|
||||
submit->batch = (struct anv_batch) {
|
||||
.alloc = &device->vk.alloc,
|
||||
.relocs = &submit->relocs,
|
||||
.user_data = submit,
|
||||
.extend_cb = anv_utrace_submit_extend_batch,
|
||||
};
|
||||
|
||||
/* Only engine class where we support timestamp copies
|
||||
*
|
||||
|
|
@ -304,17 +330,15 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
|
|||
}
|
||||
}
|
||||
|
||||
submit->queue = queue;
|
||||
|
||||
*out_submit = submit;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
error_batch:
|
||||
anv_reloc_list_finish(&submit->relocs);
|
||||
util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, bo)
|
||||
anv_bo_pool_free(&device->utrace_bo_pool, *bo);
|
||||
error_reloc_list:
|
||||
anv_bo_pool_free(&device->utrace_bo_pool, submit->batch_bo);
|
||||
error_batch_buf:
|
||||
anv_bo_pool_free(&device->utrace_bo_pool, submit->trace_bo);
|
||||
error_trace_buf:
|
||||
vk_sync_destroy(&device->vk, submit->sync);
|
||||
|
|
@ -555,21 +579,17 @@ anv_queue_trace(struct anv_queue *queue, const char *label, bool frame, bool beg
|
|||
if (result != VK_SUCCESS)
|
||||
goto error_trace;
|
||||
|
||||
result = anv_bo_pool_alloc(&device->utrace_bo_pool, 4096,
|
||||
&submit->batch_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error_sync;
|
||||
|
||||
const bool uses_relocs = device->physical->uses_relocs;
|
||||
result = anv_reloc_list_init(&submit->relocs, &device->vk.alloc, uses_relocs);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error_batch_bo;
|
||||
goto error_sync;
|
||||
|
||||
submit->batch.alloc = &device->vk.alloc;
|
||||
submit->batch.relocs = &submit->relocs;
|
||||
anv_batch_set_storage(&submit->batch,
|
||||
(struct anv_address) { .bo = submit->batch_bo, },
|
||||
submit->batch_bo->map, submit->batch_bo->size);
|
||||
submit->batch = (struct anv_batch) {
|
||||
.alloc = &device->vk.alloc,
|
||||
.relocs = &submit->relocs,
|
||||
.user_data = submit,
|
||||
.extend_cb = anv_utrace_submit_extend_batch,
|
||||
};
|
||||
|
||||
if (frame) {
|
||||
if (begin)
|
||||
|
|
@ -588,8 +608,8 @@ anv_queue_trace(struct anv_queue *queue, const char *label, bool frame, bool beg
|
|||
}
|
||||
}
|
||||
|
||||
anv_batch_emit(&submit->batch, GFX8_MI_BATCH_BUFFER_END, bbs);
|
||||
anv_batch_emit(&submit->batch, GFX8_MI_NOOP, noop);
|
||||
anv_batch_emit(&submit->batch, GFX9_MI_BATCH_BUFFER_END, bbs);
|
||||
anv_batch_emit(&submit->batch, GFX9_MI_NOOP, noop);
|
||||
|
||||
if (submit->batch.status != VK_SUCCESS) {
|
||||
result = submit->batch.status;
|
||||
|
|
@ -606,8 +626,8 @@ anv_queue_trace(struct anv_queue *queue, const char *label, bool frame, bool beg
|
|||
|
||||
error_reloc_list:
|
||||
anv_reloc_list_finish(&submit->relocs);
|
||||
error_batch_bo:
|
||||
anv_bo_pool_free(&device->utrace_bo_pool, submit->batch_bo);
|
||||
util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, bo)
|
||||
anv_bo_pool_free(&device->utrace_bo_pool, *bo);
|
||||
error_sync:
|
||||
vk_sync_destroy(&device->vk, submit->sync);
|
||||
error_trace:
|
||||
|
|
|
|||
|
|
@ -522,38 +522,43 @@ setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue,
|
|||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = anv_execbuf_add_bo(device, execbuf,
|
||||
submit->batch_bo,
|
||||
&submit->relocs, 0);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, _bo) {
|
||||
struct anv_bo *bo = *_bo;
|
||||
|
||||
result = anv_execbuf_add_bo(device, execbuf, bo,
|
||||
&submit->relocs, 0);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
|
||||
if (device->physical->memory.need_flush)
|
||||
intel_flush_range(bo->map, bo->size);
|
||||
#endif
|
||||
}
|
||||
|
||||
result = anv_execbuf_add_sync(device, execbuf, submit->sync,
|
||||
true /* is_signal */, 0 /* value */);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
if (submit->batch_bo->exec_obj_index != execbuf->bo_count - 1) {
|
||||
uint32_t idx = submit->batch_bo->exec_obj_index;
|
||||
struct anv_bo *batch_bo =
|
||||
*util_dynarray_element(&submit->batch_bos, struct anv_bo *, 0);
|
||||
if (batch_bo->exec_obj_index != execbuf->bo_count - 1) {
|
||||
uint32_t idx = batch_bo->exec_obj_index;
|
||||
uint32_t last_idx = execbuf->bo_count - 1;
|
||||
|
||||
struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
|
||||
assert(execbuf->bos[idx] == submit->batch_bo);
|
||||
assert(execbuf->bos[idx] == batch_bo);
|
||||
|
||||
execbuf->objects[idx] = execbuf->objects[last_idx];
|
||||
execbuf->bos[idx] = execbuf->bos[last_idx];
|
||||
execbuf->bos[idx]->exec_obj_index = idx;
|
||||
|
||||
execbuf->objects[last_idx] = tmp_obj;
|
||||
execbuf->bos[last_idx] = submit->batch_bo;
|
||||
submit->batch_bo->exec_obj_index = last_idx;
|
||||
execbuf->bos[last_idx] = batch_bo;
|
||||
batch_bo->exec_obj_index = last_idx;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
|
||||
if (device->physical->memory.need_flush)
|
||||
intel_flush_range(submit->batch_bo->map, submit->batch_bo->size);
|
||||
#endif
|
||||
|
||||
uint64_t exec_flags = 0;
|
||||
uint32_t context_id;
|
||||
get_context_and_exec_flags(queue, false, &exec_flags, &context_id);
|
||||
|
|
@ -596,7 +601,8 @@ static VkResult
|
|||
anv_queue_exec_utrace_locked(struct anv_queue *queue,
|
||||
struct anv_utrace_submit *submit)
|
||||
{
|
||||
assert(submit->batch_bo);
|
||||
assert(util_dynarray_num_elements(&submit->batch_bos,
|
||||
struct anv_bo *) > 0);
|
||||
|
||||
struct anv_device *device = queue->device;
|
||||
struct anv_execbuf execbuf = {
|
||||
|
|
@ -740,7 +746,9 @@ i915_queue_exec_locked(struct anv_queue *queue,
|
|||
};
|
||||
VkResult result;
|
||||
|
||||
if (utrace_submit && !utrace_submit->batch_bo) {
|
||||
if (utrace_submit &&
|
||||
util_dynarray_num_elements(&utrace_submit->batch_bos,
|
||||
struct anv_bo *) == 0) {
|
||||
result = anv_execbuf_add_sync(device, &execbuf,
|
||||
utrace_submit->sync,
|
||||
true /* is_signal */,
|
||||
|
|
@ -950,7 +958,8 @@ VkResult
|
|||
i915_queue_exec_trace(struct anv_queue *queue,
|
||||
struct anv_utrace_submit *submit)
|
||||
{
|
||||
assert(submit->batch_bo);
|
||||
assert(util_dynarray_num_elements(&submit->batch_bos,
|
||||
struct anv_bo *) > 0);
|
||||
|
||||
return anv_queue_exec_utrace_locked(queue, submit);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -126,7 +126,9 @@ xe_exec_process_syncs(struct anv_queue *queue,
|
|||
/* Signal the utrace sync only if it doesn't have a batch. Otherwise the
|
||||
* it's the utrace batch that should signal its own sync.
|
||||
*/
|
||||
if (utrace_submit && !utrace_submit->batch_bo) {
|
||||
if (utrace_submit &&
|
||||
util_dynarray_num_elements(&utrace_submit->batch_bos,
|
||||
struct anv_bo *) == 0) {
|
||||
struct drm_xe_sync *xe_sync = &xe_syncs[count++];
|
||||
|
||||
xe_exec_fill_sync(xe_sync, utrace_submit->sync, 0, TYPE_SIGNAL);
|
||||
|
|
@ -186,17 +188,20 @@ xe_queue_exec_utrace_locked(struct anv_queue *queue,
|
|||
xe_exec_fill_sync(&xe_sync, utrace_submit->sync, 0, TYPE_SIGNAL);
|
||||
|
||||
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
|
||||
if (device->physical->memory.need_flush)
|
||||
intel_flush_range(utrace_submit->batch_bo->map,
|
||||
utrace_submit->batch_bo->size);
|
||||
if (device->physical->memory.need_flush) {
|
||||
util_dynarray_foreach(&utrace_submit->batch_bos, struct anv_bo *, bo)
|
||||
intel_flush_range((*bo)->map, (*bo)->size);
|
||||
}
|
||||
#endif
|
||||
|
||||
struct anv_bo *batch_bo =
|
||||
*util_dynarray_element(&utrace_submit->batch_bos, struct anv_bo *, 0);
|
||||
struct drm_xe_exec exec = {
|
||||
.exec_queue_id = queue->exec_queue_id,
|
||||
.num_batch_buffer = 1,
|
||||
.syncs = (uintptr_t)&xe_sync,
|
||||
.num_syncs = 1,
|
||||
.address = utrace_submit->batch_bo->offset,
|
||||
.address = batch_bo->offset,
|
||||
};
|
||||
if (likely(!device->info->no_hw)) {
|
||||
if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec))
|
||||
|
|
@ -283,7 +288,9 @@ xe_queue_exec_locked(struct anv_queue *queue,
|
|||
return result;
|
||||
|
||||
/* If we have no batch for utrace, just forget about it now. */
|
||||
if (utrace_submit && !utrace_submit->batch_bo)
|
||||
if (utrace_submit &&
|
||||
util_dynarray_num_elements(&utrace_submit->batch_bos,
|
||||
struct anv_bo *) == 0)
|
||||
utrace_submit = NULL;
|
||||
|
||||
struct drm_xe_exec exec = {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue