mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 09:00:10 +01:00
anv: move trtt submissions over to the anv_async_submit
We can remove a bunch of TRTT specific code from the backends as well as manual submission tracking. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28975>
This commit is contained in:
parent
1adafbddbd
commit
7da5b1caef
15 changed files with 297 additions and 504 deletions
|
|
@ -1668,37 +1668,6 @@ anv_queue_submit_simple_batch(struct anv_queue *queue,
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
VkResult
|
|
||||||
anv_queue_submit_trtt_batch(struct anv_sparse_submission *submit,
|
|
||||||
struct anv_batch *batch)
|
|
||||||
{
|
|
||||||
struct anv_queue *queue = submit->queue;
|
|
||||||
struct anv_device *device = queue->device;
|
|
||||||
VkResult result = VK_SUCCESS;
|
|
||||||
|
|
||||||
uint32_t batch_size = align(batch->next - batch->start, 8);
|
|
||||||
struct anv_trtt_batch_bo *trtt_bbo;
|
|
||||||
result = anv_trtt_batch_bo_new(device, batch_size, &trtt_bbo);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
memcpy(trtt_bbo->bo->map, batch->start, trtt_bbo->size);
|
|
||||||
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
|
|
||||||
if (device->physical->memory.need_flush &&
|
|
||||||
anv_bo_needs_host_cache_flush(trtt_bbo->bo->alloc_flags))
|
|
||||||
intel_flush_range(trtt_bbo->bo->map, trtt_bbo->size);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (INTEL_DEBUG(DEBUG_BATCH)) {
|
|
||||||
intel_print_batch(queue->decoder, trtt_bbo->bo->map, trtt_bbo->bo->size,
|
|
||||||
trtt_bbo->bo->offset, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
result = device->kmd_backend->execute_trtt_batch(submit, trtt_bbo);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
anv_cmd_buffer_clflush(struct anv_cmd_buffer **cmd_buffers,
|
anv_cmd_buffer_clflush(struct anv_cmd_buffer **cmd_buffers,
|
||||||
uint32_t num_cmd_buffers)
|
uint32_t num_cmd_buffers)
|
||||||
|
|
|
||||||
|
|
@ -3246,14 +3246,25 @@ anv_device_destroy_context_or_vm(struct anv_device *device)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static VkResult
|
||||||
anv_device_init_trtt(struct anv_device *device)
|
anv_device_init_trtt(struct anv_device *device)
|
||||||
{
|
{
|
||||||
struct anv_trtt *trtt = &device->trtt;
|
struct anv_trtt *trtt = &device->trtt;
|
||||||
|
|
||||||
|
VkResult result =
|
||||||
|
vk_sync_create(&device->vk,
|
||||||
|
&device->physical->sync_syncobj_type,
|
||||||
|
VK_SYNC_IS_TIMELINE,
|
||||||
|
0 /* initial_value */,
|
||||||
|
&trtt->timeline);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
simple_mtx_init(&trtt->mutex, mtx_plain);
|
simple_mtx_init(&trtt->mutex, mtx_plain);
|
||||||
|
|
||||||
list_inithead(&trtt->in_flight_batches);
|
list_inithead(&trtt->in_flight_batches);
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -3261,31 +3272,9 @@ anv_device_finish_trtt(struct anv_device *device)
|
||||||
{
|
{
|
||||||
struct anv_trtt *trtt = &device->trtt;
|
struct anv_trtt *trtt = &device->trtt;
|
||||||
|
|
||||||
if (trtt->timeline_val > 0) {
|
anv_sparse_trtt_garbage_collect_batches(device, true);
|
||||||
struct drm_syncobj_timeline_wait wait = {
|
|
||||||
.handles = (uintptr_t)&trtt->timeline_handle,
|
|
||||||
.points = (uintptr_t)&trtt->timeline_val,
|
|
||||||
.timeout_nsec = INT64_MAX,
|
|
||||||
.count_handles = 1,
|
|
||||||
.flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
|
|
||||||
.first_signaled = false,
|
|
||||||
};
|
|
||||||
if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, &wait))
|
|
||||||
fprintf(stderr, "TR-TT syncobj wait failed!\n");
|
|
||||||
|
|
||||||
list_for_each_entry_safe(struct anv_trtt_batch_bo, trtt_bbo,
|
vk_sync_destroy(&device->vk, trtt->timeline);
|
||||||
&trtt->in_flight_batches, link)
|
|
||||||
anv_trtt_batch_bo_free(device, trtt_bbo);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if (trtt->timeline_handle > 0) {
|
|
||||||
struct drm_syncobj_destroy destroy = {
|
|
||||||
.handle = trtt->timeline_handle,
|
|
||||||
};
|
|
||||||
if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, &destroy))
|
|
||||||
fprintf(stderr, "TR-TT syncobj destroy failed!\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
simple_mtx_destroy(&trtt->mutex);
|
simple_mtx_destroy(&trtt->mutex);
|
||||||
|
|
||||||
|
|
@ -3915,6 +3904,10 @@ VkResult anv_CreateDevice(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
result = anv_device_init_trtt(device);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto fail_companion_cmd_pool;
|
||||||
|
|
||||||
anv_device_init_blorp(device);
|
anv_device_init_blorp(device);
|
||||||
|
|
||||||
anv_device_init_border_colors(device);
|
anv_device_init_border_colors(device);
|
||||||
|
|
@ -3929,8 +3922,6 @@ VkResult anv_CreateDevice(
|
||||||
|
|
||||||
anv_device_init_embedded_samplers(device);
|
anv_device_init_embedded_samplers(device);
|
||||||
|
|
||||||
anv_device_init_trtt(device);
|
|
||||||
|
|
||||||
BITSET_ONES(device->gfx_dirty_state);
|
BITSET_ONES(device->gfx_dirty_state);
|
||||||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_INDEX_BUFFER);
|
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_INDEX_BUFFER);
|
||||||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SO_DECL_LIST);
|
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SO_DECL_LIST);
|
||||||
|
|
@ -3963,13 +3954,13 @@ VkResult anv_CreateDevice(
|
||||||
|
|
||||||
result = anv_genX(device->info, init_device_state)(device);
|
result = anv_genX(device->info, init_device_state)(device);
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
goto fail_companion_cmd_pool;
|
goto fail_inits;
|
||||||
|
|
||||||
*pDevice = anv_device_to_handle(device);
|
*pDevice = anv_device_to_handle(device);
|
||||||
|
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
|
|
||||||
fail_companion_cmd_pool:
|
fail_inits:
|
||||||
anv_device_finish_trtt(device);
|
anv_device_finish_trtt(device);
|
||||||
anv_device_finish_embedded_samplers(device);
|
anv_device_finish_embedded_samplers(device);
|
||||||
anv_device_utrace_finish(device);
|
anv_device_utrace_finish(device);
|
||||||
|
|
@ -3977,7 +3968,7 @@ VkResult anv_CreateDevice(
|
||||||
anv_device_finish_rt_shaders(device);
|
anv_device_finish_rt_shaders(device);
|
||||||
anv_device_finish_astc_emu(device);
|
anv_device_finish_astc_emu(device);
|
||||||
anv_device_finish_internal_kernels(device);
|
anv_device_finish_internal_kernels(device);
|
||||||
|
fail_companion_cmd_pool:
|
||||||
if (device->info->verx10 >= 125) {
|
if (device->info->verx10 >= 125) {
|
||||||
vk_common_DestroyCommandPool(anv_device_to_handle(device),
|
vk_common_DestroyCommandPool(anv_device_to_handle(device),
|
||||||
device->companion_rcs_cmd_pool, NULL);
|
device->companion_rcs_cmd_pool, NULL);
|
||||||
|
|
@ -4089,6 +4080,7 @@ void anv_DestroyDevice(
|
||||||
|
|
||||||
struct anv_physical_device *pdevice = device->physical;
|
struct anv_physical_device *pdevice = device->physical;
|
||||||
|
|
||||||
|
/* Do TRTT batch garbage collection before destroying queues. */
|
||||||
anv_device_finish_trtt(device);
|
anv_device_finish_trtt(device);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < device->queue_count; i++)
|
for (uint32_t i = 0; i < device->queue_count; i++)
|
||||||
|
|
|
||||||
|
|
@ -65,13 +65,6 @@ stub_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
|
||||||
return VK_ERROR_UNKNOWN;
|
return VK_ERROR_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
static VkResult
|
|
||||||
stub_execute_trtt_batch(struct anv_sparse_submission *submit,
|
|
||||||
struct anv_trtt_batch_bo *trtt_bbo)
|
|
||||||
{
|
|
||||||
return VK_ERROR_UNKNOWN;
|
|
||||||
}
|
|
||||||
|
|
||||||
static VkResult
|
static VkResult
|
||||||
stub_queue_exec_locked(struct anv_queue *queue,
|
stub_queue_exec_locked(struct anv_queue *queue,
|
||||||
uint32_t wait_count,
|
uint32_t wait_count,
|
||||||
|
|
@ -180,7 +173,6 @@ const struct anv_kmd_backend *anv_stub_kmd_backend_get(void)
|
||||||
.vm_bind_bo = stub_vm_bind_bo,
|
.vm_bind_bo = stub_vm_bind_bo,
|
||||||
.vm_unbind_bo = stub_vm_bind_bo,
|
.vm_unbind_bo = stub_vm_bind_bo,
|
||||||
.execute_simple_batch = stub_execute_simple_batch,
|
.execute_simple_batch = stub_execute_simple_batch,
|
||||||
.execute_trtt_batch = stub_execute_trtt_batch,
|
|
||||||
.queue_exec_locked = stub_queue_exec_locked,
|
.queue_exec_locked = stub_queue_exec_locked,
|
||||||
.queue_exec_async = stub_queue_exec_async,
|
.queue_exec_async = stub_queue_exec_async,
|
||||||
.bo_alloc_flags_to_bo_flags = stub_bo_alloc_flags_to_bo_flags,
|
.bo_alloc_flags_to_bo_flags = stub_bo_alloc_flags_to_bo_flags,
|
||||||
|
|
|
||||||
|
|
@ -38,8 +38,10 @@
|
||||||
|
|
||||||
struct intel_sample_positions;
|
struct intel_sample_positions;
|
||||||
struct intel_urb_config;
|
struct intel_urb_config;
|
||||||
|
struct anv_async_submit;
|
||||||
struct anv_embedded_sampler;
|
struct anv_embedded_sampler;
|
||||||
struct anv_pipeline_embedded_sampler_binding;
|
struct anv_pipeline_embedded_sampler_binding;
|
||||||
|
struct anv_trtt_bind;
|
||||||
|
|
||||||
typedef struct nir_builder nir_builder;
|
typedef struct nir_builder nir_builder;
|
||||||
typedef struct nir_shader nir_shader;
|
typedef struct nir_shader nir_shader;
|
||||||
|
|
@ -351,9 +353,16 @@ genX(simple_shader_push_state_address)(struct anv_simple_shader *state,
|
||||||
void
|
void
|
||||||
genX(emit_simple_shader_end)(struct anv_simple_shader *state);
|
genX(emit_simple_shader_end)(struct anv_simple_shader *state);
|
||||||
|
|
||||||
VkResult genX(init_trtt_context_state)(struct anv_queue *queue);
|
VkResult genX(init_trtt_context_state)(struct anv_device *device,
|
||||||
|
struct anv_async_submit *submit);
|
||||||
|
|
||||||
VkResult genX(write_trtt_entries)(struct anv_trtt_submission *submit);
|
void genX(write_trtt_entries)(struct anv_async_submit *submit,
|
||||||
|
struct anv_trtt_bind *l3l2_binds,
|
||||||
|
uint32_t n_l3l2_binds,
|
||||||
|
struct anv_trtt_bind *l1_binds,
|
||||||
|
uint32_t n_l1_binds);
|
||||||
|
|
||||||
|
void genX(async_submit_end)(struct anv_async_submit *submit);
|
||||||
|
|
||||||
void
|
void
|
||||||
genX(cmd_buffer_emit_push_descriptor_buffer_surface)(struct anv_cmd_buffer *cmd_buffer,
|
genX(cmd_buffer_emit_push_descriptor_buffer_surface)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,6 @@ struct anv_query_pool;
|
||||||
struct anv_async_submit;
|
struct anv_async_submit;
|
||||||
struct anv_utrace_submit;
|
struct anv_utrace_submit;
|
||||||
struct anv_sparse_submission;
|
struct anv_sparse_submission;
|
||||||
struct anv_trtt_batch_bo;
|
|
||||||
|
|
||||||
enum anv_vm_bind_op {
|
enum anv_vm_bind_op {
|
||||||
/* bind vma specified in anv_vm_bind */
|
/* bind vma specified in anv_vm_bind */
|
||||||
|
|
@ -113,8 +112,6 @@ struct anv_kmd_backend {
|
||||||
bool is_companion_rcs_batch);
|
bool is_companion_rcs_batch);
|
||||||
/* The caller is expected to hold device->mutex when calling this vfunc.
|
/* The caller is expected to hold device->mutex when calling this vfunc.
|
||||||
*/
|
*/
|
||||||
VkResult (*execute_trtt_batch)(struct anv_sparse_submission *submit,
|
|
||||||
struct anv_trtt_batch_bo *trtt_bbo);
|
|
||||||
VkResult (*queue_exec_locked)(struct anv_queue *queue,
|
VkResult (*queue_exec_locked)(struct anv_queue *queue,
|
||||||
uint32_t wait_count,
|
uint32_t wait_count,
|
||||||
const struct vk_sync_wait *waits,
|
const struct vk_sync_wait *waits,
|
||||||
|
|
|
||||||
|
|
@ -768,35 +768,6 @@ struct anv_state_stream {
|
||||||
struct util_dynarray all_blocks;
|
struct util_dynarray all_blocks;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct anv_sparse_submission {
|
|
||||||
struct anv_queue *queue;
|
|
||||||
|
|
||||||
struct anv_vm_bind *binds;
|
|
||||||
int binds_len;
|
|
||||||
int binds_capacity;
|
|
||||||
|
|
||||||
uint32_t wait_count;
|
|
||||||
uint32_t signal_count;
|
|
||||||
|
|
||||||
struct vk_sync_wait *waits;
|
|
||||||
struct vk_sync_signal *signals;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct anv_trtt_bind {
|
|
||||||
uint64_t pte_addr;
|
|
||||||
uint64_t entry_addr;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct anv_trtt_submission {
|
|
||||||
struct anv_sparse_submission *sparse;
|
|
||||||
|
|
||||||
struct anv_trtt_bind *l3l2_binds;
|
|
||||||
struct anv_trtt_bind *l1_binds;
|
|
||||||
|
|
||||||
int l3l2_binds_len;
|
|
||||||
int l1_binds_len;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* The block_pool functions exported for testing only. The block pool should
|
/* The block_pool functions exported for testing only. The block pool should
|
||||||
* only be used via a state pool (see below).
|
* only be used via a state pool (see below).
|
||||||
*/
|
*/
|
||||||
|
|
@ -1788,19 +1759,6 @@ struct anv_device_astc_emu {
|
||||||
VkPipeline pipeline;
|
VkPipeline pipeline;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct anv_trtt_batch_bo {
|
|
||||||
struct anv_bo *bo;
|
|
||||||
uint32_t size;
|
|
||||||
|
|
||||||
/* Once device->trtt.timeline_handle signals timeline_val as complete we
|
|
||||||
* can free this struct and its members.
|
|
||||||
*/
|
|
||||||
uint64_t timeline_val;
|
|
||||||
|
|
||||||
/* Part of device->trtt.in_flight_batches. */
|
|
||||||
struct list_head link;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct anv_device {
|
struct anv_device {
|
||||||
struct vk_device vk;
|
struct vk_device vk;
|
||||||
|
|
||||||
|
|
@ -2028,12 +1986,11 @@ struct anv_device {
|
||||||
struct anv_bo *cur_page_table_bo;
|
struct anv_bo *cur_page_table_bo;
|
||||||
uint64_t next_page_table_bo_offset;
|
uint64_t next_page_table_bo_offset;
|
||||||
|
|
||||||
/* Timeline syncobj used to track completion of the TR-TT batch BOs. */
|
struct vk_sync *timeline;
|
||||||
uint32_t timeline_handle;
|
|
||||||
uint64_t timeline_val;
|
uint64_t timeline_val;
|
||||||
|
|
||||||
/* List of struct anv_trtt_batch_bo batches that are in flight and can
|
/* List of struct anv_trtt_submission that are in flight and can be
|
||||||
* be freed once their timeline gets signaled.
|
* freed once their vk_sync gets signaled.
|
||||||
*/
|
*/
|
||||||
struct list_head in_flight_batches;
|
struct list_head in_flight_batches;
|
||||||
} trtt;
|
} trtt;
|
||||||
|
|
@ -2203,17 +2160,6 @@ VkResult anv_queue_submit(struct vk_queue *queue,
|
||||||
VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
|
VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
|
||||||
struct anv_batch *batch,
|
struct anv_batch *batch,
|
||||||
bool is_companion_rcs_batch);
|
bool is_companion_rcs_batch);
|
||||||
VkResult anv_queue_submit_trtt_batch(struct anv_sparse_submission *submit,
|
|
||||||
struct anv_batch *batch);
|
|
||||||
|
|
||||||
static inline void
|
|
||||||
anv_trtt_batch_bo_free(struct anv_device *device,
|
|
||||||
struct anv_trtt_batch_bo *trtt_bbo)
|
|
||||||
{
|
|
||||||
anv_bo_pool_free(&device->batch_bo_pool, trtt_bbo->bo);
|
|
||||||
list_del(&trtt_bbo->link);
|
|
||||||
vk_free(&device->vk.alloc, trtt_bbo);
|
|
||||||
}
|
|
||||||
|
|
||||||
void anv_queue_trace(struct anv_queue *queue, const char *label,
|
void anv_queue_trace(struct anv_queue *queue, const char *label,
|
||||||
bool frame, bool begin);
|
bool frame, bool begin);
|
||||||
|
|
@ -2521,6 +2467,32 @@ anv_async_submit_done(struct anv_async_submit *submit);
|
||||||
bool
|
bool
|
||||||
anv_async_submit_wait(struct anv_async_submit *submit);
|
anv_async_submit_wait(struct anv_async_submit *submit);
|
||||||
|
|
||||||
|
struct anv_sparse_submission {
|
||||||
|
struct anv_queue *queue;
|
||||||
|
|
||||||
|
struct anv_vm_bind *binds;
|
||||||
|
int binds_len;
|
||||||
|
int binds_capacity;
|
||||||
|
|
||||||
|
uint32_t wait_count;
|
||||||
|
uint32_t signal_count;
|
||||||
|
|
||||||
|
struct vk_sync_wait *waits;
|
||||||
|
struct vk_sync_signal *signals;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct anv_trtt_bind {
|
||||||
|
uint64_t pte_addr;
|
||||||
|
uint64_t entry_addr;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct anv_trtt_submission {
|
||||||
|
struct anv_async_submit base;
|
||||||
|
|
||||||
|
struct anv_sparse_submission *sparse;
|
||||||
|
|
||||||
|
struct list_head link;
|
||||||
|
};
|
||||||
|
|
||||||
struct anv_device_memory {
|
struct anv_device_memory {
|
||||||
struct vk_device_memory vk;
|
struct vk_device_memory vk;
|
||||||
|
|
@ -3217,6 +3189,9 @@ VkResult anv_sparse_bind_image_memory(struct anv_queue *queue,
|
||||||
VkResult anv_sparse_bind(struct anv_device *device,
|
VkResult anv_sparse_bind(struct anv_device *device,
|
||||||
struct anv_sparse_submission *sparse_submit);
|
struct anv_sparse_submission *sparse_submit);
|
||||||
|
|
||||||
|
VkResult anv_sparse_trtt_garbage_collect_batches(struct anv_device *device,
|
||||||
|
bool wait_completion);
|
||||||
|
|
||||||
VkSparseImageFormatProperties
|
VkSparseImageFormatProperties
|
||||||
anv_sparse_calc_image_format_properties(struct anv_physical_device *pdevice,
|
anv_sparse_calc_image_format_properties(struct anv_physical_device *pdevice,
|
||||||
VkImageAspectFlags aspect,
|
VkImageAspectFlags aspect,
|
||||||
|
|
@ -3236,8 +3211,6 @@ VkResult anv_sparse_image_check_support(struct anv_physical_device *pdevice,
|
||||||
VkSampleCountFlagBits samples,
|
VkSampleCountFlagBits samples,
|
||||||
VkImageType type,
|
VkImageType type,
|
||||||
VkFormat format);
|
VkFormat format);
|
||||||
VkResult anv_trtt_batch_bo_new(struct anv_device *device, uint32_t batch_size,
|
|
||||||
struct anv_trtt_batch_bo **out_trtt_bbo);
|
|
||||||
|
|
||||||
struct anv_buffer {
|
struct anv_buffer {
|
||||||
struct vk_buffer vk;
|
struct vk_buffer vk;
|
||||||
|
|
|
||||||
|
|
@ -396,20 +396,11 @@ trtt_get_page_table_bo(struct anv_device *device, struct anv_bo **bo,
|
||||||
}
|
}
|
||||||
|
|
||||||
static VkResult
|
static VkResult
|
||||||
anv_trtt_init_context_state(struct anv_queue *queue)
|
anv_trtt_init_context_state(struct anv_device *device,
|
||||||
|
struct anv_async_submit *submit)
|
||||||
{
|
{
|
||||||
struct anv_device *device = queue->device;
|
|
||||||
struct anv_trtt *trtt = &device->trtt;
|
struct anv_trtt *trtt = &device->trtt;
|
||||||
|
|
||||||
struct drm_syncobj_create create = {
|
|
||||||
.handle = 0,
|
|
||||||
.flags = 0,
|
|
||||||
};
|
|
||||||
if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create))
|
|
||||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
||||||
assert(create.handle != 0);
|
|
||||||
trtt->timeline_handle = create.handle;
|
|
||||||
|
|
||||||
struct anv_bo *l3_bo;
|
struct anv_bo *l3_bo;
|
||||||
VkResult result = trtt_get_page_table_bo(device, &l3_bo, &trtt->l3_addr);
|
VkResult result = trtt_get_page_table_bo(device, &l3_bo, &trtt->l3_addr);
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
|
|
@ -430,7 +421,7 @@ anv_trtt_init_context_state(struct anv_queue *queue)
|
||||||
goto fail_free_l3;
|
goto fail_free_l3;
|
||||||
}
|
}
|
||||||
|
|
||||||
result = anv_genX(device->info, init_trtt_context_state)(queue);
|
result = anv_genX(device->info, init_trtt_context_state)(device, submit);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
|
|
@ -439,17 +430,6 @@ fail_free_l3:
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
anv_trtt_bind_list_add_entry(struct anv_trtt_bind *binds, int *binds_len,
|
|
||||||
uint64_t pte_addr, uint64_t entry_addr)
|
|
||||||
{
|
|
||||||
binds[*binds_len] = (struct anv_trtt_bind) {
|
|
||||||
.pte_addr = pte_addr,
|
|
||||||
.entry_addr = entry_addr,
|
|
||||||
};
|
|
||||||
(*binds_len)++;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* For L3 and L2 pages, null and invalid entries are indicated by bits 1 and 0
|
/* For L3 and L2 pages, null and invalid entries are indicated by bits 1 and 0
|
||||||
* respectively. For L1 entries, the hardware compares the addresses against
|
* respectively. For L1 entries, the hardware compares the addresses against
|
||||||
* what we program to the GFX_TRTT_NULL and GFX_TRTT_INVAL registers.
|
* what we program to the GFX_TRTT_NULL and GFX_TRTT_INVAL registers.
|
||||||
|
|
@ -457,13 +437,27 @@ anv_trtt_bind_list_add_entry(struct anv_trtt_bind *binds, int *binds_len,
|
||||||
#define ANV_TRTT_L3L2_NULL_ENTRY (1 << 1)
|
#define ANV_TRTT_L3L2_NULL_ENTRY (1 << 1)
|
||||||
#define ANV_TRTT_L3L2_INVALID_ENTRY (1 << 0)
|
#define ANV_TRTT_L3L2_INVALID_ENTRY (1 << 0)
|
||||||
|
|
||||||
|
static void
|
||||||
|
anv_trtt_bind_list_add_entry(struct anv_trtt_bind *binds, uint32_t *binds_len,
|
||||||
|
uint64_t pte_addr, uint64_t entry_addr)
|
||||||
|
{
|
||||||
|
binds[*binds_len] = (struct anv_trtt_bind) {
|
||||||
|
.pte_addr = pte_addr,
|
||||||
|
.entry_addr = entry_addr,
|
||||||
|
};
|
||||||
|
(*binds_len)++;
|
||||||
|
}
|
||||||
|
|
||||||
/* Adds elements to the anv_trtt_bind structs passed. This doesn't write the
|
/* Adds elements to the anv_trtt_bind structs passed. This doesn't write the
|
||||||
* entries to the HW yet.
|
* entries to the HW yet.
|
||||||
*/
|
*/
|
||||||
static VkResult
|
static VkResult
|
||||||
anv_trtt_bind_add(struct anv_device *device,
|
anv_trtt_bind_add(struct anv_device *device,
|
||||||
uint64_t trtt_addr, uint64_t dest_addr,
|
uint64_t trtt_addr, uint64_t dest_addr,
|
||||||
struct anv_trtt_submission *s)
|
struct anv_trtt_bind *l3l2_binds,
|
||||||
|
uint32_t *n_l3l2_binds,
|
||||||
|
struct anv_trtt_bind *l1_binds,
|
||||||
|
uint32_t *n_l1_binds)
|
||||||
{
|
{
|
||||||
VkResult result = VK_SUCCESS;
|
VkResult result = VK_SUCCESS;
|
||||||
struct anv_trtt *trtt = &device->trtt;
|
struct anv_trtt *trtt = &device->trtt;
|
||||||
|
|
@ -480,8 +474,9 @@ anv_trtt_bind_add(struct anv_device *device,
|
||||||
if (is_null_bind) {
|
if (is_null_bind) {
|
||||||
trtt->l3_mirror[l3_index] = ANV_TRTT_L3L2_NULL_ENTRY;
|
trtt->l3_mirror[l3_index] = ANV_TRTT_L3L2_NULL_ENTRY;
|
||||||
|
|
||||||
anv_trtt_bind_list_add_entry(s->l3l2_binds, &s->l3l2_binds_len,
|
anv_trtt_bind_list_add_entry(l3l2_binds, n_l3l2_binds,
|
||||||
trtt->l3_addr + l3_index * sizeof(uint64_t),
|
trtt->l3_addr + l3_index *
|
||||||
|
sizeof(uint64_t),
|
||||||
ANV_TRTT_L3L2_NULL_ENTRY);
|
ANV_TRTT_L3L2_NULL_ENTRY);
|
||||||
|
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
|
|
@ -494,8 +489,9 @@ anv_trtt_bind_add(struct anv_device *device,
|
||||||
|
|
||||||
trtt->l3_mirror[l3_index] = l2_addr;
|
trtt->l3_mirror[l3_index] = l2_addr;
|
||||||
|
|
||||||
anv_trtt_bind_list_add_entry(s->l3l2_binds, &s->l3l2_binds_len,
|
anv_trtt_bind_list_add_entry(l3l2_binds, n_l3l2_binds,
|
||||||
trtt->l3_addr + l3_index * sizeof(uint64_t), l2_addr);
|
trtt->l3_addr + l3_index *
|
||||||
|
sizeof(uint64_t), l2_addr);
|
||||||
}
|
}
|
||||||
assert(l2_addr != 0 && l2_addr != ANV_TRTT_L3L2_NULL_ENTRY);
|
assert(l2_addr != 0 && l2_addr != ANV_TRTT_L3L2_NULL_ENTRY);
|
||||||
|
|
||||||
|
|
@ -508,7 +504,7 @@ anv_trtt_bind_add(struct anv_device *device,
|
||||||
trtt->l2_mirror[l3_index * 512 + l2_index] =
|
trtt->l2_mirror[l3_index * 512 + l2_index] =
|
||||||
ANV_TRTT_L3L2_NULL_ENTRY;
|
ANV_TRTT_L3L2_NULL_ENTRY;
|
||||||
|
|
||||||
anv_trtt_bind_list_add_entry(s->l3l2_binds, &s->l3l2_binds_len,
|
anv_trtt_bind_list_add_entry(l3l2_binds, n_l3l2_binds,
|
||||||
l2_addr + l2_index * sizeof(uint64_t),
|
l2_addr + l2_index * sizeof(uint64_t),
|
||||||
ANV_TRTT_L3L2_NULL_ENTRY);
|
ANV_TRTT_L3L2_NULL_ENTRY);
|
||||||
|
|
||||||
|
|
@ -522,13 +518,65 @@ anv_trtt_bind_add(struct anv_device *device,
|
||||||
|
|
||||||
trtt->l2_mirror[l3_index * 512 + l2_index] = l1_addr;
|
trtt->l2_mirror[l3_index * 512 + l2_index] = l1_addr;
|
||||||
|
|
||||||
anv_trtt_bind_list_add_entry(s->l3l2_binds, &s->l3l2_binds_len,
|
anv_trtt_bind_list_add_entry(l3l2_binds, n_l3l2_binds,
|
||||||
l2_addr + l2_index * sizeof(uint64_t), l1_addr);
|
l2_addr + l2_index * sizeof(uint64_t),
|
||||||
|
l1_addr);
|
||||||
}
|
}
|
||||||
assert(l1_addr != 0 && l1_addr != ANV_TRTT_L3L2_NULL_ENTRY);
|
assert(l1_addr != 0 && l1_addr != ANV_TRTT_L3L2_NULL_ENTRY);
|
||||||
|
|
||||||
anv_trtt_bind_list_add_entry(s->l1_binds, &s->l1_binds_len,
|
anv_trtt_bind_list_add_entry(l1_binds, n_l1_binds,
|
||||||
l1_addr + l1_index * sizeof(uint32_t), dest_addr);
|
l1_addr + l1_index * sizeof(uint32_t),
|
||||||
|
dest_addr);
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkResult
|
||||||
|
anv_sparse_trtt_garbage_collect_batches(struct anv_device *device,
|
||||||
|
bool wait_completion)
|
||||||
|
{
|
||||||
|
struct anv_trtt *trtt = &device->trtt;
|
||||||
|
|
||||||
|
uint64_t last_value;
|
||||||
|
if (!wait_completion) {
|
||||||
|
VkResult result =
|
||||||
|
vk_sync_get_value(&device->vk, trtt->timeline, &last_value);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
} else {
|
||||||
|
last_value = trtt->timeline_val;
|
||||||
|
}
|
||||||
|
|
||||||
|
list_for_each_entry_safe(struct anv_trtt_submission, submit,
|
||||||
|
&trtt->in_flight_batches, link) {
|
||||||
|
if (submit->base.signal.signal_value <= last_value) {
|
||||||
|
list_del(&submit->link);
|
||||||
|
anv_async_submit_fini(&submit->base);
|
||||||
|
vk_free(&device->vk.alloc, submit);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!wait_completion)
|
||||||
|
break;
|
||||||
|
|
||||||
|
VkResult result = vk_sync_wait(
|
||||||
|
&device->vk,
|
||||||
|
submit->base.signal.sync,
|
||||||
|
submit->base.signal.signal_value,
|
||||||
|
VK_SYNC_WAIT_COMPLETE,
|
||||||
|
os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE));
|
||||||
|
if (result == VK_SUCCESS) {
|
||||||
|
list_del(&submit->link);
|
||||||
|
anv_async_submit_fini(&submit->base);
|
||||||
|
vk_free(&device->vk.alloc, submit);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the wait failed but the caller wanted completion, return the
|
||||||
|
* error.
|
||||||
|
*/
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
@ -545,6 +593,35 @@ anv_sparse_bind_trtt(struct anv_device *device,
|
||||||
if (!sparse_submit->queue)
|
if (!sparse_submit->queue)
|
||||||
sparse_submit->queue = trtt->queue;
|
sparse_submit->queue = trtt->queue;
|
||||||
|
|
||||||
|
struct anv_trtt_submission *submit =
|
||||||
|
vk_zalloc(&device->vk.alloc, sizeof(*submit), 8,
|
||||||
|
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||||
|
if (submit == NULL)
|
||||||
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
|
|
||||||
|
result = anv_async_submit_init(&submit->base, sparse_submit->queue,
|
||||||
|
&device->batch_bo_pool,
|
||||||
|
false, false);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto error_async;
|
||||||
|
|
||||||
|
simple_mtx_lock(&trtt->mutex);
|
||||||
|
|
||||||
|
anv_sparse_trtt_garbage_collect_batches(device, false);
|
||||||
|
|
||||||
|
submit->base.signal = (struct vk_sync_signal) {
|
||||||
|
.sync = trtt->timeline,
|
||||||
|
.signal_value = ++trtt->timeline_val,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* If the TRTT L3 table was never set, initialize it as part of this
|
||||||
|
* submission.
|
||||||
|
*/
|
||||||
|
if (!trtt->l3_addr)
|
||||||
|
anv_trtt_init_context_state(device, &submit->base);
|
||||||
|
|
||||||
|
assert(trtt->l3_addr);
|
||||||
|
|
||||||
/* These capacities are conservative estimations. For L1 binds the
|
/* These capacities are conservative estimations. For L1 binds the
|
||||||
* number will match exactly unless we skip NULL binds due to L2 already
|
* number will match exactly unless we skip NULL binds due to L2 already
|
||||||
* being NULL. For L3/L2 things are harder to estimate, but the resulting
|
* being NULL. For L3/L2 things are harder to estimate, but the resulting
|
||||||
|
|
@ -561,26 +638,15 @@ anv_sparse_bind_trtt(struct anv_device *device,
|
||||||
l3l2_binds_capacity += (pages / 1024 + 1) * 2;
|
l3l2_binds_capacity += (pages / 1024 + 1) * 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Turn a series of virtual address maps, into a list of L3/L2/L1 TRTT page
|
||||||
|
* table updates.
|
||||||
|
*/
|
||||||
STACK_ARRAY(struct anv_trtt_bind, l3l2_binds, l3l2_binds_capacity);
|
STACK_ARRAY(struct anv_trtt_bind, l3l2_binds, l3l2_binds_capacity);
|
||||||
STACK_ARRAY(struct anv_trtt_bind, l1_binds, l1_binds_capacity);
|
STACK_ARRAY(struct anv_trtt_bind, l1_binds, l1_binds_capacity);
|
||||||
struct anv_trtt_submission trtt_submit = {
|
uint32_t n_l3l2_binds = 0, n_l1_binds = 0;
|
||||||
.sparse = sparse_submit,
|
for (int b = 0; b < sparse_submit->binds_len && result == VK_SUCCESS; b++) {
|
||||||
.l3l2_binds = l3l2_binds,
|
|
||||||
.l1_binds = l1_binds,
|
|
||||||
.l3l2_binds_len = 0,
|
|
||||||
.l1_binds_len = 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
simple_mtx_lock(&trtt->mutex);
|
|
||||||
|
|
||||||
if (!trtt->l3_addr)
|
|
||||||
anv_trtt_init_context_state(sparse_submit->queue);
|
|
||||||
|
|
||||||
assert(trtt->l3_addr);
|
|
||||||
|
|
||||||
for (int b = 0; b < sparse_submit->binds_len; b++) {
|
|
||||||
struct anv_vm_bind *vm_bind = &sparse_submit->binds[b];
|
struct anv_vm_bind *vm_bind = &sparse_submit->binds[b];
|
||||||
for (size_t i = 0; i < vm_bind->size; i += 64 * 1024) {
|
for (size_t i = 0; i < vm_bind->size && result == VK_SUCCESS; i += 64 * 1024) {
|
||||||
uint64_t trtt_addr = vm_bind->address + i;
|
uint64_t trtt_addr = vm_bind->address + i;
|
||||||
uint64_t dest_addr =
|
uint64_t dest_addr =
|
||||||
(vm_bind->op == ANV_VM_BIND && vm_bind->bo) ?
|
(vm_bind->op == ANV_VM_BIND && vm_bind->bo) ?
|
||||||
|
|
@ -588,29 +654,74 @@ anv_sparse_bind_trtt(struct anv_device *device,
|
||||||
ANV_TRTT_L1_NULL_TILE_VAL;
|
ANV_TRTT_L1_NULL_TILE_VAL;
|
||||||
|
|
||||||
result = anv_trtt_bind_add(device, trtt_addr, dest_addr,
|
result = anv_trtt_bind_add(device, trtt_addr, dest_addr,
|
||||||
&trtt_submit);
|
l3l2_binds, &n_l3l2_binds,
|
||||||
if (result != VK_SUCCESS)
|
l1_binds, &n_l1_binds);
|
||||||
goto out;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(trtt_submit.l3l2_binds_len <= l3l2_binds_capacity);
|
assert(n_l3l2_binds <= l3l2_binds_capacity);
|
||||||
assert(trtt_submit.l1_binds_len <= l1_binds_capacity);
|
assert(n_l1_binds <= l1_binds_capacity);
|
||||||
|
|
||||||
|
/* Convert the L3/L2/L1 TRTT page table updates in anv_trtt_bind elements
|
||||||
|
* into MI commands.
|
||||||
|
*/
|
||||||
|
if (result == VK_SUCCESS) {
|
||||||
sparse_debug("trtt_binds: num_vm_binds:%02d l3l2:%04d l1:%04d\n",
|
sparse_debug("trtt_binds: num_vm_binds:%02d l3l2:%04d l1:%04d\n",
|
||||||
sparse_submit->binds_len, trtt_submit.l3l2_binds_len,
|
sparse_submit->binds_len, n_l3l2_binds, n_l1_binds);
|
||||||
trtt_submit.l1_binds_len);
|
|
||||||
|
|
||||||
if (trtt_submit.l3l2_binds_len || trtt_submit.l1_binds_len)
|
if (n_l3l2_binds || n_l1_binds) {
|
||||||
result = anv_genX(device->info, write_trtt_entries)(&trtt_submit);
|
anv_genX(device->info, write_trtt_entries)(
|
||||||
|
&submit->base, l3l2_binds, n_l3l2_binds, l1_binds, n_l1_binds);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (result == VK_SUCCESS)
|
|
||||||
ANV_RMV(vm_binds, device, sparse_submit->binds, sparse_submit->binds_len);
|
|
||||||
|
|
||||||
out:
|
|
||||||
simple_mtx_unlock(&trtt->mutex);
|
|
||||||
STACK_ARRAY_FINISH(l1_binds);
|
STACK_ARRAY_FINISH(l1_binds);
|
||||||
STACK_ARRAY_FINISH(l3l2_binds);
|
STACK_ARRAY_FINISH(l3l2_binds);
|
||||||
|
|
||||||
|
anv_genX(device->info, async_submit_end)(&submit->base);
|
||||||
|
|
||||||
|
if (submit->base.batch.status != VK_SUCCESS) {
|
||||||
|
result = submit->base.batch.status;
|
||||||
|
goto error_add_bind;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add all the BOs backing TRTT page tables to the reloc list.
|
||||||
|
*
|
||||||
|
* TODO: we could narrow down the list by using anv_address structures in
|
||||||
|
* anv_trtt_bind for the pte_addr.
|
||||||
|
*/
|
||||||
|
if (device->physical->uses_relocs) {
|
||||||
|
for (int i = 0; i < trtt->num_page_table_bos; i++) {
|
||||||
|
result = anv_reloc_list_add_bo(&submit->base.relocs,
|
||||||
|
trtt->page_table_bos[i]);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto error_add_bind;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result =
|
||||||
|
device->kmd_backend->queue_exec_async(&submit->base,
|
||||||
|
sparse_submit->wait_count,
|
||||||
|
sparse_submit->waits,
|
||||||
|
sparse_submit->signal_count,
|
||||||
|
sparse_submit->signals);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto error_add_bind;
|
||||||
|
|
||||||
|
|
||||||
|
list_addtail(&submit->link, &trtt->in_flight_batches);
|
||||||
|
|
||||||
|
simple_mtx_unlock(&trtt->mutex);
|
||||||
|
|
||||||
|
ANV_RMV(vm_binds, device, sparse_submit->binds, sparse_submit->binds_len);
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
|
||||||
|
error_add_bind:
|
||||||
|
simple_mtx_unlock(&trtt->mutex);
|
||||||
|
anv_async_submit_fini(&submit->base);
|
||||||
|
error_async:
|
||||||
|
vk_free(&device->vk.alloc, submit);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1299,65 +1410,3 @@ anv_sparse_image_check_support(struct anv_physical_device *pdevice,
|
||||||
|
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static VkResult
|
|
||||||
anv_trtt_garbage_collect_batches(struct anv_device *device)
|
|
||||||
{
|
|
||||||
struct anv_trtt *trtt = &device->trtt;
|
|
||||||
|
|
||||||
if (trtt->timeline_val % 8 != 7)
|
|
||||||
return VK_SUCCESS;
|
|
||||||
|
|
||||||
uint64_t cur_timeline_val = 0;
|
|
||||||
struct drm_syncobj_timeline_array array = {
|
|
||||||
.handles = (uintptr_t)&trtt->timeline_handle,
|
|
||||||
.points = (uintptr_t)&cur_timeline_val,
|
|
||||||
.count_handles = 1,
|
|
||||||
.flags = 0,
|
|
||||||
};
|
|
||||||
if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_QUERY, &array))
|
|
||||||
return vk_error(device, VK_ERROR_UNKNOWN);
|
|
||||||
|
|
||||||
list_for_each_entry_safe(struct anv_trtt_batch_bo, trtt_bbo,
|
|
||||||
&trtt->in_flight_batches, link) {
|
|
||||||
if (trtt_bbo->timeline_val > cur_timeline_val)
|
|
||||||
return VK_SUCCESS;
|
|
||||||
|
|
||||||
anv_trtt_batch_bo_free(device, trtt_bbo);
|
|
||||||
}
|
|
||||||
|
|
||||||
return VK_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkResult
|
|
||||||
anv_trtt_batch_bo_new(struct anv_device *device, uint32_t batch_size,
|
|
||||||
struct anv_trtt_batch_bo **out_trtt_bbo)
|
|
||||||
{
|
|
||||||
struct anv_trtt *trtt = &device->trtt;
|
|
||||||
VkResult result;
|
|
||||||
|
|
||||||
anv_trtt_garbage_collect_batches(device);
|
|
||||||
|
|
||||||
struct anv_trtt_batch_bo *trtt_bbo =
|
|
||||||
vk_alloc(&device->vk.alloc, sizeof(*trtt_bbo), 8,
|
|
||||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
|
||||||
if (!trtt_bbo)
|
|
||||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
||||||
|
|
||||||
result = anv_bo_pool_alloc(&device->batch_bo_pool, batch_size,
|
|
||||||
&trtt_bbo->bo);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
trtt_bbo->size = batch_size;
|
|
||||||
trtt_bbo->timeline_val = ++trtt->timeline_val;
|
|
||||||
|
|
||||||
list_addtail(&trtt_bbo->link, &trtt->in_flight_batches);
|
|
||||||
|
|
||||||
*out_trtt_bbo = trtt_bbo;
|
|
||||||
|
|
||||||
return VK_SUCCESS;
|
|
||||||
out:
|
|
||||||
vk_free(&device->vk.alloc, trtt_bbo);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -6094,22 +6094,17 @@ genX(cmd_buffer_end_companion_rcs_syncpoint)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
VkResult
|
void
|
||||||
genX(write_trtt_entries)(struct anv_trtt_submission *submit)
|
genX(write_trtt_entries)(struct anv_async_submit *submit,
|
||||||
|
struct anv_trtt_bind *l3l2_binds,
|
||||||
|
uint32_t n_l3l2_binds,
|
||||||
|
struct anv_trtt_bind *l1_binds,
|
||||||
|
uint32_t n_l1_binds)
|
||||||
{
|
{
|
||||||
#if GFX_VER >= 12
|
#if GFX_VER >= 12
|
||||||
const struct intel_device_info *devinfo =
|
const struct intel_device_info *devinfo =
|
||||||
submit->sparse->queue->device->info;
|
submit->queue->device->info;
|
||||||
|
struct anv_batch *batch = &submit->batch;
|
||||||
size_t batch_size = submit->l3l2_binds_len * 20 +
|
|
||||||
submit->l1_binds_len * 16 +
|
|
||||||
GENX(PIPE_CONTROL_length) * sizeof(uint32_t) + 8;
|
|
||||||
STACK_ARRAY(uint32_t, cmds, batch_size);
|
|
||||||
struct anv_batch batch = {
|
|
||||||
.start = cmds,
|
|
||||||
.next = cmds,
|
|
||||||
.end = (void *)cmds + batch_size,
|
|
||||||
};
|
|
||||||
|
|
||||||
/* BSpec says:
|
/* BSpec says:
|
||||||
* "DWord Length programmed must not exceed 0x3FE."
|
* "DWord Length programmed must not exceed 0x3FE."
|
||||||
|
|
@ -6127,90 +6122,86 @@ genX(write_trtt_entries)(struct anv_trtt_submission *submit)
|
||||||
* contiguous addresses.
|
* contiguous addresses.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
for (int i = 0; i < submit->l3l2_binds_len; i++) {
|
for (uint32_t i = 0; i < n_l3l2_binds; i++) {
|
||||||
int extra_writes = 0;
|
int extra_writes = 0;
|
||||||
for (int j = i + 1;
|
for (uint32_t j = i + 1;
|
||||||
j < submit->l3l2_binds_len &&
|
j < n_l3l2_binds && extra_writes <= max_qword_extra_writes;
|
||||||
extra_writes <= max_qword_extra_writes;
|
|
||||||
j++) {
|
j++) {
|
||||||
if (submit->l3l2_binds[i].pte_addr + (j - i) * 8 ==
|
if (l3l2_binds[i].pte_addr + (j - i) * 8 == l3l2_binds[j].pte_addr) {
|
||||||
submit->l3l2_binds[j].pte_addr) {
|
|
||||||
extra_writes++;
|
extra_writes++;
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool is_last_write = submit->l1_binds_len == 0 &&
|
bool is_last_write = n_l1_binds == 0 &&
|
||||||
i + extra_writes + 1 == submit->l3l2_binds_len;
|
i + extra_writes + 1 == n_l3l2_binds;
|
||||||
|
|
||||||
uint32_t total_len = GENX(MI_STORE_DATA_IMM_length_bias) +
|
uint32_t total_len = GENX(MI_STORE_DATA_IMM_length_bias) +
|
||||||
qword_write_len + (extra_writes * 2);
|
qword_write_len + (extra_writes * 2);
|
||||||
uint32_t *dw;
|
uint32_t *dw;
|
||||||
dw = anv_batch_emitn(&batch, total_len, GENX(MI_STORE_DATA_IMM),
|
dw = anv_batch_emitn(batch, total_len, GENX(MI_STORE_DATA_IMM),
|
||||||
.ForceWriteCompletionCheck = is_last_write,
|
.ForceWriteCompletionCheck = is_last_write,
|
||||||
.StoreQword = true,
|
.StoreQword = true,
|
||||||
.Address = anv_address_from_u64(submit->l3l2_binds[i].pte_addr),
|
.Address = anv_address_from_u64(l3l2_binds[i].pte_addr),
|
||||||
);
|
);
|
||||||
dw += 3;
|
dw += 3;
|
||||||
for (int j = 0; j < extra_writes + 1; j++) {
|
for (uint32_t j = 0; j < extra_writes + 1; j++) {
|
||||||
uint64_t entry_addr_64b = submit->l3l2_binds[i + j].entry_addr;
|
uint64_t entry_addr_64b = l3l2_binds[i + j].entry_addr;
|
||||||
*dw = entry_addr_64b & 0xFFFFFFFF;
|
*dw = entry_addr_64b & 0xFFFFFFFF;
|
||||||
dw++;
|
dw++;
|
||||||
*dw = (entry_addr_64b >> 32) & 0xFFFFFFFF;
|
*dw = (entry_addr_64b >> 32) & 0xFFFFFFFF;
|
||||||
dw++;
|
dw++;
|
||||||
}
|
}
|
||||||
assert(dw == batch.next);
|
assert(dw == batch->next);
|
||||||
|
|
||||||
i += extra_writes;
|
i += extra_writes;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < submit->l1_binds_len; i++) {
|
for (uint32_t i = 0; i < n_l1_binds; i++) {
|
||||||
int extra_writes = 0;
|
int extra_writes = 0;
|
||||||
for (int j = i + 1;
|
for (uint32_t j = i + 1;
|
||||||
j < submit->l1_binds_len && extra_writes <= max_dword_extra_writes;
|
j < n_l1_binds && extra_writes <= max_dword_extra_writes;
|
||||||
j++) {
|
j++) {
|
||||||
if (submit->l1_binds[i].pte_addr + (j - i) * 4 ==
|
if (l1_binds[i].pte_addr + (j - i) * 4 ==
|
||||||
submit->l1_binds[j].pte_addr) {
|
l1_binds[j].pte_addr) {
|
||||||
extra_writes++;
|
extra_writes++;
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_last_write = i + extra_writes + 1 == submit->l1_binds_len;
|
bool is_last_write = i + extra_writes + 1 == n_l1_binds;
|
||||||
|
|
||||||
uint32_t total_len = GENX(MI_STORE_DATA_IMM_length_bias) +
|
uint32_t total_len = GENX(MI_STORE_DATA_IMM_length_bias) +
|
||||||
dword_write_len + extra_writes;
|
dword_write_len + extra_writes;
|
||||||
uint32_t *dw;
|
uint32_t *dw;
|
||||||
dw = anv_batch_emitn(&batch, total_len, GENX(MI_STORE_DATA_IMM),
|
dw = anv_batch_emitn(batch, total_len, GENX(MI_STORE_DATA_IMM),
|
||||||
.ForceWriteCompletionCheck = is_last_write,
|
.ForceWriteCompletionCheck = is_last_write,
|
||||||
.Address = anv_address_from_u64(submit->l1_binds[i].pte_addr),
|
.Address = anv_address_from_u64(l1_binds[i].pte_addr),
|
||||||
);
|
);
|
||||||
dw += 3;
|
dw += 3;
|
||||||
for (int j = 0; j < extra_writes + 1; j++) {
|
for (uint32_t j = 0; j < extra_writes + 1; j++) {
|
||||||
*dw = (submit->l1_binds[i + j].entry_addr >> 16) & 0xFFFFFFFF;
|
*dw = (l1_binds[i + j].entry_addr >> 16) & 0xFFFFFFFF;
|
||||||
dw++;
|
dw++;
|
||||||
}
|
}
|
||||||
assert(dw == batch.next);
|
assert(dw == batch->next);
|
||||||
|
|
||||||
i += extra_writes;
|
i += extra_writes;
|
||||||
}
|
}
|
||||||
|
|
||||||
genx_batch_emit_pipe_control(&batch, devinfo, _3D,
|
genx_batch_emit_pipe_control(batch, devinfo, _3D,
|
||||||
ANV_PIPE_CS_STALL_BIT |
|
ANV_PIPE_CS_STALL_BIT |
|
||||||
ANV_PIPE_TLB_INVALIDATE_BIT);
|
ANV_PIPE_TLB_INVALIDATE_BIT);
|
||||||
|
#else
|
||||||
anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
|
unreachable("Not implemented");
|
||||||
|
|
||||||
assert(batch.next <= batch.end);
|
|
||||||
|
|
||||||
VkResult result = anv_queue_submit_trtt_batch(submit->sparse, &batch);
|
|
||||||
STACK_ARRAY_FINISH(cmds);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
return VK_SUCCESS;
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
genX(async_submit_end)(struct anv_async_submit *submit)
|
||||||
|
{
|
||||||
|
struct anv_batch *batch = &submit->batch;
|
||||||
|
anv_batch_emit(batch, GENX(MI_BATCH_BUFFER_END), bbe);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
||||||
|
|
@ -1396,31 +1396,25 @@ genX(apply_task_urb_workaround)(struct anv_cmd_buffer *cmd_buffer)
|
||||||
}
|
}
|
||||||
|
|
||||||
VkResult
|
VkResult
|
||||||
genX(init_trtt_context_state)(struct anv_queue *queue)
|
genX(init_trtt_context_state)(struct anv_device *device,
|
||||||
|
struct anv_async_submit *submit)
|
||||||
{
|
{
|
||||||
#if GFX_VER >= 12
|
#if GFX_VER >= 12
|
||||||
struct anv_device *device = queue->device;
|
|
||||||
struct anv_trtt *trtt = &device->trtt;
|
struct anv_trtt *trtt = &device->trtt;
|
||||||
|
struct anv_batch *batch = &submit->batch;
|
||||||
|
|
||||||
uint32_t cmds[128];
|
anv_batch_write_reg(batch, GENX(GFX_TRTT_INVAL), trtt_inval) {
|
||||||
struct anv_batch batch = {
|
|
||||||
.start = cmds,
|
|
||||||
.next = cmds,
|
|
||||||
.end = (void *)cmds + sizeof(cmds),
|
|
||||||
};
|
|
||||||
|
|
||||||
anv_batch_write_reg(&batch, GENX(GFX_TRTT_INVAL), trtt_inval) {
|
|
||||||
trtt_inval.InvalidTileDetectionValue = ANV_TRTT_L1_INVALID_TILE_VAL;
|
trtt_inval.InvalidTileDetectionValue = ANV_TRTT_L1_INVALID_TILE_VAL;
|
||||||
}
|
}
|
||||||
anv_batch_write_reg(&batch, GENX(GFX_TRTT_NULL), trtt_null) {
|
anv_batch_write_reg(batch, GENX(GFX_TRTT_NULL), trtt_null) {
|
||||||
trtt_null.NullTileDetectionValue = ANV_TRTT_L1_NULL_TILE_VAL;
|
trtt_null.NullTileDetectionValue = ANV_TRTT_L1_NULL_TILE_VAL;
|
||||||
}
|
}
|
||||||
#if GFX_VER >= 20
|
#if GFX_VER >= 20
|
||||||
anv_batch_write_reg(&batch, GENX(GFX_TRTT_VA_RANGE), trtt_va_range) {
|
anv_batch_write_reg(batch, GENX(GFX_TRTT_VA_RANGE), trtt_va_range) {
|
||||||
trtt_va_range.TRVABase = device->physical->va.trtt.addr >> 44;
|
trtt_va_range.TRVABase = device->physical->va.trtt.addr >> 44;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
anv_batch_write_reg(&batch, GENX(GFX_TRTT_VA_RANGE), trtt_va_range) {
|
anv_batch_write_reg(batch, GENX(GFX_TRTT_VA_RANGE), trtt_va_range) {
|
||||||
trtt_va_range.TRVAMaskValue = 0xF;
|
trtt_va_range.TRVAMaskValue = 0xF;
|
||||||
trtt_va_range.TRVADataValue = 0xF;
|
trtt_va_range.TRVADataValue = 0xF;
|
||||||
}
|
}
|
||||||
|
|
@ -1428,28 +1422,24 @@ genX(init_trtt_context_state)(struct anv_queue *queue)
|
||||||
|
|
||||||
uint64_t l3_addr = trtt->l3_addr;
|
uint64_t l3_addr = trtt->l3_addr;
|
||||||
assert((l3_addr & 0xFFF) == 0);
|
assert((l3_addr & 0xFFF) == 0);
|
||||||
anv_batch_write_reg(&batch, GENX(GFX_TRTT_L3_BASE_LOW), trtt_base_low) {
|
anv_batch_write_reg(batch, GENX(GFX_TRTT_L3_BASE_LOW), trtt_base_low) {
|
||||||
trtt_base_low.TRVAL3PointerLowerAddress =
|
trtt_base_low.TRVAL3PointerLowerAddress =
|
||||||
(l3_addr & 0xFFFFF000) >> 12;
|
(l3_addr & 0xFFFFF000) >> 12;
|
||||||
}
|
}
|
||||||
anv_batch_write_reg(&batch, GENX(GFX_TRTT_L3_BASE_HIGH),
|
anv_batch_write_reg(batch, GENX(GFX_TRTT_L3_BASE_HIGH),
|
||||||
trtt_base_high) {
|
trtt_base_high) {
|
||||||
trtt_base_high.TRVAL3PointerUpperAddress =
|
trtt_base_high.TRVAL3PointerUpperAddress =
|
||||||
(l3_addr >> 32) & 0xFFFF;
|
(l3_addr >> 32) & 0xFFFF;
|
||||||
}
|
}
|
||||||
/* Enabling TR-TT needs to be done after setting up the other registers.
|
/* Enabling TR-TT needs to be done after setting up the other registers.
|
||||||
*/
|
*/
|
||||||
anv_batch_write_reg(&batch, GENX(GFX_TRTT_CR), trtt_cr) {
|
anv_batch_write_reg(batch, GENX(GFX_TRTT_CR), trtt_cr) {
|
||||||
trtt_cr.TRTTEnable = true;
|
trtt_cr.TRTTEnable = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
|
genx_batch_emit_pipe_control(batch, device->info, _3D,
|
||||||
assert(batch.next <= batch.end);
|
ANV_PIPE_CS_STALL_BIT |
|
||||||
|
ANV_PIPE_TLB_INVALIDATE_BIT);
|
||||||
VkResult res = anv_queue_submit_simple_batch(queue, &batch, false);
|
|
||||||
if (res != VK_SUCCESS)
|
|
||||||
return res;
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1051,105 +1051,3 @@ fail:
|
||||||
anv_execbuf_finish(&execbuf);
|
anv_execbuf_finish(&execbuf);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
VkResult
|
|
||||||
i915_execute_trtt_batch(struct anv_sparse_submission *submit,
|
|
||||||
struct anv_trtt_batch_bo *trtt_bbo)
|
|
||||||
{
|
|
||||||
struct anv_queue *queue = submit->queue;
|
|
||||||
struct anv_device *device = queue->device;
|
|
||||||
struct anv_trtt *trtt = &device->trtt;
|
|
||||||
struct anv_execbuf execbuf = {
|
|
||||||
.alloc = &device->vk.alloc,
|
|
||||||
.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
|
|
||||||
};
|
|
||||||
VkResult result;
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < submit->wait_count; i++) {
|
|
||||||
result = anv_execbuf_add_sync(device, &execbuf, submit->waits[i].sync,
|
|
||||||
false /* is_signal */,
|
|
||||||
submit->waits[i].wait_value);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < submit->signal_count; i++) {
|
|
||||||
result = anv_execbuf_add_sync(device, &execbuf, submit->signals[i].sync,
|
|
||||||
true /* is_signal */,
|
|
||||||
submit->signals[i].signal_value);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
result = anv_execbuf_add_syncobj(device, &execbuf, trtt->timeline_handle,
|
|
||||||
I915_EXEC_FENCE_SIGNAL,
|
|
||||||
trtt_bbo->timeline_val);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
|
|
||||||
result = anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL,
|
|
||||||
0);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
for (int i = 0; i < trtt->num_page_table_bos; i++) {
|
|
||||||
result = anv_execbuf_add_bo(device, &execbuf, trtt->page_table_bos[i],
|
|
||||||
NULL, EXEC_OBJECT_WRITE);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (queue->sync) {
|
|
||||||
result = anv_execbuf_add_sync(device, &execbuf, queue->sync,
|
|
||||||
true /* is_signal */,
|
|
||||||
0 /* signal_value */);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
result = anv_execbuf_add_bo(device, &execbuf, trtt_bbo->bo, NULL, 0);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
if (INTEL_DEBUG(DEBUG_SUBMIT))
|
|
||||||
anv_i915_debug_submit(&execbuf);
|
|
||||||
|
|
||||||
uint64_t exec_flags = 0;
|
|
||||||
uint32_t context_id;
|
|
||||||
get_context_and_exec_flags(queue, false, &exec_flags, &context_id);
|
|
||||||
|
|
||||||
execbuf.execbuf = (struct drm_i915_gem_execbuffer2) {
|
|
||||||
.buffers_ptr = (uintptr_t) execbuf.objects,
|
|
||||||
.buffer_count = execbuf.bo_count,
|
|
||||||
.batch_start_offset = 0,
|
|
||||||
.batch_len = trtt_bbo->size,
|
|
||||||
.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | exec_flags,
|
|
||||||
.rsvd1 = context_id,
|
|
||||||
.rsvd2 = 0,
|
|
||||||
};
|
|
||||||
setup_execbuf_fence_params(&execbuf);
|
|
||||||
|
|
||||||
ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
|
|
||||||
|
|
||||||
int ret = queue->device->info->no_hw ? 0 :
|
|
||||||
anv_gem_execbuffer(device, &execbuf.execbuf);
|
|
||||||
if (ret) {
|
|
||||||
result = vk_device_set_lost(&device->vk,
|
|
||||||
"trtt anv_gem_execbuffer failed: %m");
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (queue->sync) {
|
|
||||||
result = vk_sync_wait(&device->vk, queue->sync, 0,
|
|
||||||
VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
|
|
||||||
if (result != VK_SUCCESS) {
|
|
||||||
result = vk_queue_set_lost(&queue->vk, "trtt sync wait failed");
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
out:
|
|
||||||
anv_execbuf_finish(&execbuf);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -29,15 +29,12 @@
|
||||||
|
|
||||||
#include "vk_sync.h"
|
#include "vk_sync.h"
|
||||||
|
|
||||||
struct anv_device;
|
|
||||||
struct anv_queue;
|
struct anv_queue;
|
||||||
struct anv_bo;
|
struct anv_bo;
|
||||||
struct anv_cmd_buffer;
|
struct anv_cmd_buffer;
|
||||||
struct anv_query_pool;
|
struct anv_query_pool;
|
||||||
struct anv_async_submit;
|
struct anv_async_submit;
|
||||||
struct anv_utrace_submit;
|
struct anv_utrace_submit;
|
||||||
struct anv_sparse_submission;
|
|
||||||
struct anv_trtt_batch_bo;
|
|
||||||
|
|
||||||
VkResult
|
VkResult
|
||||||
i915_queue_exec_async(struct anv_async_submit *submit,
|
i915_queue_exec_async(struct anv_async_submit *submit,
|
||||||
|
|
@ -50,10 +47,6 @@ VkResult
|
||||||
i915_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
|
i915_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
|
||||||
uint32_t batch_bo_size, bool is_companion_rcs_batch);
|
uint32_t batch_bo_size, bool is_companion_rcs_batch);
|
||||||
|
|
||||||
VkResult
|
|
||||||
i915_execute_trtt_batch(struct anv_sparse_submission *submit,
|
|
||||||
struct anv_trtt_batch_bo *trtt_bbo);
|
|
||||||
|
|
||||||
VkResult
|
VkResult
|
||||||
i915_queue_exec_locked(struct anv_queue *queue,
|
i915_queue_exec_locked(struct anv_queue *queue,
|
||||||
uint32_t wait_count,
|
uint32_t wait_count,
|
||||||
|
|
|
||||||
|
|
@ -297,7 +297,6 @@ anv_i915_kmd_backend_get(void)
|
||||||
.vm_bind_bo = i915_vm_bind_bo,
|
.vm_bind_bo = i915_vm_bind_bo,
|
||||||
.vm_unbind_bo = i915_vm_bind_bo,
|
.vm_unbind_bo = i915_vm_bind_bo,
|
||||||
.execute_simple_batch = i915_execute_simple_batch,
|
.execute_simple_batch = i915_execute_simple_batch,
|
||||||
.execute_trtt_batch = i915_execute_trtt_batch,
|
|
||||||
.queue_exec_locked = i915_queue_exec_locked,
|
.queue_exec_locked = i915_queue_exec_locked,
|
||||||
.queue_exec_async = i915_queue_exec_async,
|
.queue_exec_async = i915_queue_exec_async,
|
||||||
.bo_alloc_flags_to_bo_flags = i915_bo_alloc_flags_to_bo_flags,
|
.bo_alloc_flags_to_bo_flags = i915_bo_alloc_flags_to_bo_flags,
|
||||||
|
|
|
||||||
|
|
@ -183,58 +183,6 @@ xe_exec_print_debug(struct anv_queue *queue, uint32_t cmd_buffer_count,
|
||||||
perf_query_pool, perf_query_pass);
|
perf_query_pool, perf_query_pass);
|
||||||
}
|
}
|
||||||
|
|
||||||
VkResult
|
|
||||||
xe_execute_trtt_batch(struct anv_sparse_submission *submit,
|
|
||||||
struct anv_trtt_batch_bo *trtt_bbo)
|
|
||||||
{
|
|
||||||
struct anv_queue *queue = submit->queue;
|
|
||||||
struct anv_device *device = queue->device;
|
|
||||||
struct anv_trtt *trtt = &device->trtt;
|
|
||||||
VkResult result = VK_SUCCESS;
|
|
||||||
|
|
||||||
struct drm_xe_sync extra_sync = {
|
|
||||||
.type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
|
|
||||||
.flags = DRM_XE_SYNC_FLAG_SIGNAL,
|
|
||||||
.handle = trtt->timeline_handle,
|
|
||||||
.timeline_value = trtt_bbo->timeline_val,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct drm_xe_sync *xe_syncs = NULL;
|
|
||||||
uint32_t xe_syncs_count = 0;
|
|
||||||
result = xe_exec_process_syncs(queue, submit->wait_count, submit->waits,
|
|
||||||
submit->signal_count, submit->signals,
|
|
||||||
1, &extra_sync,
|
|
||||||
NULL, /* utrace_submit */
|
|
||||||
false, /* is_companion_rcs_queue */
|
|
||||||
&xe_syncs, &xe_syncs_count);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
struct drm_xe_exec exec = {
|
|
||||||
.exec_queue_id = queue->exec_queue_id,
|
|
||||||
.num_syncs = xe_syncs_count,
|
|
||||||
.syncs = (uintptr_t)xe_syncs,
|
|
||||||
.address = trtt_bbo->bo->offset,
|
|
||||||
.num_batch_buffer = 1,
|
|
||||||
};
|
|
||||||
|
|
||||||
if (!device->info->no_hw) {
|
|
||||||
if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec)) {
|
|
||||||
result = vk_device_set_lost(&device->vk, "XE_EXEC failed: %m");
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (queue->sync) {
|
|
||||||
result = vk_sync_wait(&device->vk, queue->sync, 0,
|
|
||||||
VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
|
|
||||||
}
|
|
||||||
|
|
||||||
out:
|
|
||||||
vk_free(&device->vk.alloc, xe_syncs);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkResult
|
VkResult
|
||||||
xe_queue_exec_async(struct anv_async_submit *submit,
|
xe_queue_exec_async(struct anv_async_submit *submit,
|
||||||
uint32_t wait_count,
|
uint32_t wait_count,
|
||||||
|
|
|
||||||
|
|
@ -36,17 +36,11 @@ struct anv_cmd_buffer;
|
||||||
struct anv_query_pool;
|
struct anv_query_pool;
|
||||||
struct anv_async_submit;
|
struct anv_async_submit;
|
||||||
struct anv_utrace_submit;
|
struct anv_utrace_submit;
|
||||||
struct anv_sparse_submission;
|
|
||||||
struct anv_trtt_batch_bo;
|
|
||||||
|
|
||||||
VkResult
|
VkResult
|
||||||
xe_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
|
xe_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
|
||||||
uint32_t batch_bo_size, bool is_companion_rcs_batch);
|
uint32_t batch_bo_size, bool is_companion_rcs_batch);
|
||||||
|
|
||||||
VkResult
|
|
||||||
xe_execute_trtt_batch(struct anv_sparse_submission *submit,
|
|
||||||
struct anv_trtt_batch_bo *trtt_bbo);
|
|
||||||
|
|
||||||
VkResult
|
VkResult
|
||||||
xe_queue_exec_locked(struct anv_queue *queue,
|
xe_queue_exec_locked(struct anv_queue *queue,
|
||||||
uint32_t wait_count,
|
uint32_t wait_count,
|
||||||
|
|
|
||||||
|
|
@ -346,7 +346,6 @@ anv_xe_kmd_backend_get(void)
|
||||||
.vm_bind_bo = xe_vm_bind_bo,
|
.vm_bind_bo = xe_vm_bind_bo,
|
||||||
.vm_unbind_bo = xe_vm_unbind_bo,
|
.vm_unbind_bo = xe_vm_unbind_bo,
|
||||||
.execute_simple_batch = xe_execute_simple_batch,
|
.execute_simple_batch = xe_execute_simple_batch,
|
||||||
.execute_trtt_batch = xe_execute_trtt_batch,
|
|
||||||
.queue_exec_locked = xe_queue_exec_locked,
|
.queue_exec_locked = xe_queue_exec_locked,
|
||||||
.queue_exec_async = xe_queue_exec_async,
|
.queue_exec_async = xe_queue_exec_async,
|
||||||
.bo_alloc_flags_to_bo_flags = xe_bo_alloc_flags_to_bo_flags,
|
.bo_alloc_flags_to_bo_flags = xe_bo_alloc_flags_to_bo_flags,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue