From 7edae456e2c64d0328037c42b61fd829e29c97c2 Mon Sep 17 00:00:00 2001 From: Giancarlo Devich Date: Tue, 14 Mar 2023 12:21:19 -0700 Subject: [PATCH] d3d12: Track up to 16 contexts worth of batch references locally in bos Part-of: --- src/gallium/drivers/d3d12/d3d12_batch.cpp | 84 +++++++++++++++---- src/gallium/drivers/d3d12/d3d12_batch.h | 2 + src/gallium/drivers/d3d12/d3d12_bufmgr.h | 9 ++ src/gallium/drivers/d3d12/d3d12_context.cpp | 5 ++ src/gallium/drivers/d3d12/d3d12_residency.cpp | 51 ++++++----- .../drivers/d3d12/d3d12_resource_state.cpp | 36 ++++---- 6 files changed, 134 insertions(+), 53 deletions(-) diff --git a/src/gallium/drivers/d3d12/d3d12_batch.cpp b/src/gallium/drivers/d3d12/d3d12_batch.cpp index 465adf8da0c..ac096d7d5e4 100644 --- a/src/gallium/drivers/d3d12/d3d12_batch.cpp +++ b/src/gallium/drivers/d3d12/d3d12_batch.cpp @@ -72,6 +72,7 @@ d3d12_init_batch(struct d3d12_context *ctx, struct d3d12_batch *batch) return false; util_dynarray_init(&batch->zombie_samplers, NULL); + util_dynarray_init(&batch->local_bos, NULL); if (FAILED(screen->dev->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&batch->cmdalloc)))) @@ -95,8 +96,13 @@ d3d12_init_batch(struct d3d12_context *ctx, struct d3d12_batch *batch) return true; } +static inline void +delete_bo(d3d12_bo *bo) +{ + d3d12_bo_unreference(bo); +} static void -delete_bo(hash_entry *entry) +delete_bo_entry(hash_entry *entry) { struct d3d12_bo *bo = (struct d3d12_bo *)entry->key; d3d12_bo_unreference(bo); @@ -143,12 +149,19 @@ d3d12_reset_batch(struct d3d12_context *ctx, struct d3d12_batch *batch, uint64_t d3d12_fence_reference(&batch->fence, NULL); } - _mesa_hash_table_clear(batch->bos, delete_bo); + _mesa_hash_table_clear(batch->bos, delete_bo_entry); _mesa_hash_table_clear(batch->sampler_tables, delete_sampler_view_table); _mesa_set_clear(batch->sampler_views, delete_sampler_view); _mesa_set_clear(batch->surfaces, delete_surface); _mesa_set_clear(batch->objects, delete_object); + + util_dynarray_foreach(&batch->local_bos, d3d12_bo*, bo) { + (*bo)->local_reference_mask[batch->ctx_id] &= ~(1 << batch->ctx_index); + delete_bo(*bo); + } + util_dynarray_clear(&batch->local_bos); + util_dynarray_foreach(&batch->zombie_samplers, d3d12_descriptor_handle, handle) d3d12_descriptor_handle_free(handle); util_dynarray_clear(&batch->zombie_samplers); @@ -178,6 +191,7 @@ d3d12_destroy_batch(struct d3d12_context *ctx, struct d3d12_batch *batch) _mesa_set_destroy(batch->surfaces, NULL); _mesa_set_destroy(batch->objects, NULL); util_dynarray_fini(&batch->zombie_samplers); + util_dynarray_fini(&batch->local_bos); } void @@ -260,21 +274,60 @@ d3d12_end_batch(struct d3d12_context *ctx, struct d3d12_batch *batch) mtx_unlock(&screen->submit_mutex); } -enum batch_bo_reference_state + +inline uint8_t* +d3d12_batch_get_reference(struct d3d12_batch *batch, + struct d3d12_bo *bo) { - batch_bo_reference_read = (1 << 0), - batch_bo_reference_written = (1 << 1), -}; + if (batch->ctx_id != D3D12_CONTEXT_NO_ID) { + if ((bo->local_reference_mask[batch->ctx_id] & (1 << batch->ctx_index)) != 0) { + return &bo->local_reference_state[(batch->ctx_id * 16) + batch->ctx_index]; + } + else + return NULL; + } + else { + hash_entry* entry = _mesa_hash_table_search(batch->bos, bo); + if (entry == NULL) + return NULL; + else + return (uint8_t*)&entry->data; + } +} + +inline uint8_t* +d3d12_batch_acquire_reference(struct d3d12_batch *batch, + struct d3d12_bo *bo) +{ + if (batch->ctx_id != D3D12_CONTEXT_NO_ID) { + if ((bo->local_reference_mask[batch->ctx_id] & (1 << batch->ctx_index)) == 0) { + d3d12_bo_reference(bo); + util_dynarray_append(&batch->local_bos, d3d12_bo*, bo); + bo->local_reference_mask[batch->ctx_id] |= (1 << batch->ctx_index); + bo->local_reference_state[(batch->ctx_id * 16) + batch->ctx_index] = batch_bo_reference_none; + } + return &bo->local_reference_state[(batch->ctx_id * 16) + batch->ctx_index]; + } + else { + hash_entry* entry = _mesa_hash_table_search(batch->bos, bo); + if (entry == NULL) { + d3d12_bo_reference(bo); + entry = _mesa_hash_table_insert(batch->bos, bo, NULL); + } + + return (uint8_t*)&entry->data; + } +} bool d3d12_batch_has_references(struct d3d12_batch *batch, struct d3d12_bo *bo, bool want_to_write) { - hash_entry *entry = _mesa_hash_table_search(batch->bos, bo); - if (entry == NULL) + uint8_t*state = d3d12_batch_get_reference(batch, bo); + if (state == NULL) return false; - bool resource_was_written = ((batch_bo_reference_state)(size_t)entry->data & batch_bo_reference_written) != 0; + bool resource_was_written = ((batch_bo_reference_state)(size_t)*state & batch_bo_reference_written) != 0; return want_to_write || resource_was_written; } @@ -283,14 +336,11 @@ d3d12_batch_reference_resource(struct d3d12_batch *batch, struct d3d12_resource *res, bool write) { - hash_entry *entry = _mesa_hash_table_search(batch->bos, res->bo); - if (entry == NULL) { - d3d12_bo_reference(res->bo); - entry = _mesa_hash_table_insert(batch->bos, res->bo, NULL); - } - size_t new_data = write ? batch_bo_reference_written : batch_bo_reference_read; - size_t old_data = (size_t)entry->data; - entry->data = (void*)(old_data | new_data); + uint8_t*state = d3d12_batch_acquire_reference(batch, res->bo); + + uint8_t new_data = write ? batch_bo_reference_written : batch_bo_reference_read; + uint8_t old_data = (uint8_t)*state; + *state = (old_data | new_data); } void diff --git a/src/gallium/drivers/d3d12/d3d12_batch.h b/src/gallium/drivers/d3d12/d3d12_batch.h index db37a2c5676..1c4af99ae77 100644 --- a/src/gallium/drivers/d3d12/d3d12_batch.h +++ b/src/gallium/drivers/d3d12/d3d12_batch.h @@ -46,6 +46,7 @@ struct d3d12_batch { struct d3d12_fence *fence; struct hash_table *bos; + struct util_dynarray local_bos; struct hash_table *sampler_tables; struct set *sampler_views; struct set *surfaces; @@ -60,6 +61,7 @@ struct d3d12_batch { bool pending_memory_barrier; uint64_t submit_id; + uint32_t ctx_id, ctx_index; }; bool diff --git a/src/gallium/drivers/d3d12/d3d12_bufmgr.h b/src/gallium/drivers/d3d12/d3d12_bufmgr.h index 170580601f6..96d68bfd3b3 100644 --- a/src/gallium/drivers/d3d12/d3d12_bufmgr.h +++ b/src/gallium/drivers/d3d12/d3d12_bufmgr.h @@ -41,6 +41,12 @@ enum d3d12_residency_status { d3d12_permanently_resident, }; +enum batch_bo_reference_state { + batch_bo_reference_none = 0, + batch_bo_reference_read = (1 << 0), + batch_bo_reference_written = (1 << 1), +}; + struct d3d12_bo { struct pipe_reference reference; struct d3d12_screen *screen; @@ -62,7 +68,10 @@ struct d3d12_bo { uint16_t local_needs_resolve_state; unsigned local_context_state_mask; + uint8_t local_reference_mask[16]; + d3d12_context_state_table_entry local_context_states[16]; + uint8_t local_reference_state[128]; }; struct d3d12_buffer { diff --git a/src/gallium/drivers/d3d12/d3d12_context.cpp b/src/gallium/drivers/d3d12/d3d12_context.cpp index af3dbf089fa..5cc9ec76db6 100644 --- a/src/gallium/drivers/d3d12/d3d12_context.cpp +++ b/src/gallium/drivers/d3d12/d3d12_context.cpp @@ -2610,6 +2610,11 @@ d3d12_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) ctx->id = D3D12_CONTEXT_NO_ID; mtx_unlock(&screen->submit_mutex); + for (unsigned i = 0; i < ARRAY_SIZE(ctx->batches); ++i) { + ctx->batches[i].ctx_id = ctx->id; + ctx->batches[i].ctx_index = i; + } + if (flags & PIPE_CONTEXT_PREFER_THREADED) return threaded_context_create(&ctx->base, &screen->transfer_pool, diff --git a/src/gallium/drivers/d3d12/d3d12_residency.cpp b/src/gallium/drivers/d3d12/d3d12_residency.cpp index dd413f3cfbd..02a64c54b49 100644 --- a/src/gallium/drivers/d3d12/d3d12_residency.cpp +++ b/src/gallium/drivers/d3d12/d3d12_residency.cpp @@ -126,6 +126,31 @@ get_eviction_grace_period(struct d3d12_memory_info *mem_info) return INT64_MAX; } +static void +gather_base_bos(struct d3d12_screen *screen, set *base_bo_set, struct d3d12_bo *bo, uint64_t &size_to_make_resident, uint64_t pending_fence_value, int64_t current_time) +{ + uint64_t offset; + struct d3d12_bo *base_bo = d3d12_bo_get_base(bo, &offset); + + if (base_bo->residency_status == d3d12_evicted) { + bool added = false; + _mesa_set_search_or_add(base_bo_set, base_bo, &added); + assert(!added); + + base_bo->residency_status = d3d12_resident; + size_to_make_resident += base_bo->estimated_size; + list_addtail(&base_bo->residency_list_entry, &screen->residency_list); + } else if (base_bo->last_used_fence != pending_fence_value && + base_bo->residency_status == d3d12_resident) { + /* First time seeing this already-resident base bo in this batch */ + list_del(&base_bo->residency_list_entry); + list_addtail(&base_bo->residency_list_entry, &screen->residency_list); + } + + base_bo->last_used_fence = pending_fence_value; + base_bo->last_used_timestamp = current_time; +} + void d3d12_process_batch_residency(struct d3d12_screen *screen, struct d3d12_batch *batch) { @@ -140,29 +165,11 @@ d3d12_process_batch_residency(struct d3d12_screen *screen, struct d3d12_batch *b /* Gather base bos for the batch */ uint64_t size_to_make_resident = 0; set *base_bo_set = _mesa_pointer_set_create(nullptr); - hash_table_foreach(batch->bos, entry) { - struct d3d12_bo *bo = (struct d3d12_bo *)entry->key; - uint64_t offset; - struct d3d12_bo *base_bo = d3d12_bo_get_base(bo, &offset); - if (base_bo->residency_status == d3d12_evicted) { - bool added = false; - _mesa_set_search_or_add(base_bo_set, base_bo, &added); - assert(!added); - - base_bo->residency_status = d3d12_resident; - size_to_make_resident += base_bo->estimated_size; - list_addtail(&base_bo->residency_list_entry, &screen->residency_list); - } else if (base_bo->last_used_fence != pending_fence_value && - base_bo->residency_status == d3d12_resident) { - /* First time seeing this already-resident base bo in this batch */ - list_del(&base_bo->residency_list_entry); - list_addtail(&base_bo->residency_list_entry, &screen->residency_list); - } - - base_bo->last_used_fence = pending_fence_value; - base_bo->last_used_timestamp = current_time; - } + util_dynarray_foreach(&batch->local_bos, d3d12_bo*, bo) + gather_base_bos(screen, base_bo_set, *bo, size_to_make_resident, pending_fence_value, current_time); + hash_table_foreach(batch->bos, entry) + gather_base_bos(screen, base_bo_set, (struct d3d12_bo *)entry->key, size_to_make_resident, pending_fence_value, current_time); /* Now that bos referenced by this batch are moved to the end of the LRU, trim it */ evict_aged_allocations(screen, completed_fence_value, current_time, grace_period); diff --git a/src/gallium/drivers/d3d12/d3d12_resource_state.cpp b/src/gallium/drivers/d3d12/d3d12_resource_state.cpp index 71906af92db..45a01a717f5 100644 --- a/src/gallium/drivers/d3d12/d3d12_resource_state.cpp +++ b/src/gallium/drivers/d3d12/d3d12_resource_state.cpp @@ -339,6 +339,22 @@ resolve_global_state(struct d3d12_context *ctx, ID3D12Resource *res, d3d12_resou } } +static void +context_state_resolve_submission(struct d3d12_context *ctx, d3d12_bo *bo) +{ + d3d12_context_state_table_entry *bo_state = find_or_create_state_entry(ctx, bo); + if (!bo_state->batch_end.supports_simultaneous_access) { + assert(bo->res && bo->global_state.subresource_states); + + resolve_global_state(ctx, bo->res, &bo_state->batch_begin, &bo->global_state); + + copy_resource_state(&bo_state->batch_begin, &bo_state->batch_end); + copy_resource_state(&bo->global_state, &bo_state->batch_end); + } else { + reset_resource_state(&bo_state->batch_end); + } +} + bool d3d12_context_state_resolve_submission(struct d3d12_context *ctx, struct d3d12_batch *batch) { @@ -351,20 +367,12 @@ d3d12_context_state_resolve_submission(struct d3d12_context *ctx, struct d3d12_b util_dynarray_clear(&ctx->recently_destroyed_bos); - hash_table_foreach(batch->bos, bo_entry) { - d3d12_bo *bo = (d3d12_bo *)bo_entry->key; - d3d12_context_state_table_entry *bo_state = find_or_create_state_entry(ctx, bo); - if (!bo_state->batch_end.supports_simultaneous_access) { - assert(bo->res && bo->global_state.subresource_states); - - resolve_global_state(ctx, bo->res, &bo_state->batch_begin, &bo->global_state); - - copy_resource_state(&bo_state->batch_begin, &bo_state->batch_end); - copy_resource_state(&bo->global_state, &bo_state->batch_end); - } else { - reset_resource_state(&bo_state->batch_end); - } - } + + util_dynarray_foreach(&batch->local_bos, d3d12_bo*, bo) + context_state_resolve_submission(ctx, *bo); + hash_table_foreach(batch->bos, bo_entry) + context_state_resolve_submission(ctx, (d3d12_bo *)bo_entry->key); + bool needs_execute_fixup = false; if (ctx->barrier_scratch.size) {