d3d12: Track up to 16 active context resource states locally in d3d12_bo

After 16 entries, we fall back to the previous logic that used a hash
map to link the resource's state per context.

Preventing hash map churn by cheaply tracking up to 16 context's worth
of states per resource significantly reduces CPU cost in
find_or_create_state_entry

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21528>
This commit is contained in:
Giancarlo Devich 2023-02-24 16:20:21 -08:00 committed by Marge Bot
parent 2c00c069fe
commit cb7403b909
4 changed files with 44 additions and 16 deletions

View file

@ -81,9 +81,10 @@ d3d12_bo_wrap_res(struct d3d12_screen *screen, ID3D12Resource *res, enum d3d12_r
{
struct d3d12_bo *bo;
bo = CALLOC_STRUCT(d3d12_bo);
bo = MALLOC_STRUCT(d3d12_bo);
if (!bo)
return NULL;
memset(bo, 0, offsetof(d3d12_bo, local_context_states));
D3D12_RESOURCE_DESC desc = GetDesc(res);
unsigned array_size = desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? 1 : desc.DepthOrArraySize;
@ -158,9 +159,10 @@ d3d12_bo_wrap_buffer(struct d3d12_screen *screen, struct pb_buffer *buf)
{
struct d3d12_bo *bo;
bo = CALLOC_STRUCT(d3d12_bo);
bo = MALLOC_STRUCT(d3d12_bo);
if (!bo)
return NULL;
memset(bo, 0, offsetof(d3d12_bo, local_context_states));
pipe_reference_init(&bo->reference, 1);
bo->screen = screen;
@ -193,13 +195,21 @@ d3d12_bo_unreference(struct d3d12_bo *bo)
/* MSVC's offsetof fails when the name is ambiguous between struct and function */
typedef struct d3d12_context d3d12_context_type;
list_for_each_entry(d3d12_context_type, ctx, &bo->screen->context_list, context_list_entry)
util_dynarray_append(&ctx->recently_destroyed_bos, uint64_t, bo->unique_id);
if (ctx->id == D3D12_CONTEXT_NO_ID)
util_dynarray_append(&ctx->recently_destroyed_bos, uint64_t, bo->unique_id);
mtx_unlock(&bo->screen->submit_mutex);
d3d12_resource_state_cleanup(&bo->global_state);
if (bo->res)
bo->res->Release();
uint64_t mask = bo->local_context_state_mask;
while (mask) {
int ctxid = u_bit_scan64(&mask);
d3d12_destroy_context_state_table_entry(&bo->local_context_states[ctxid]);
}
FREE(bo);
}
}

View file

@ -59,6 +59,9 @@ struct d3d12_bo {
int64_t last_used_timestamp;
uint64_t last_used_fence;
enum d3d12_residency_status residency_status;
unsigned local_context_state_mask;
d3d12_context_state_table_entry local_context_states[16];
};
struct d3d12_buffer {

View file

@ -187,13 +187,12 @@ copy_resource_state(d3d12_resource_state *dest, d3d12_resource_state *src)
}
}
static void
destroy_context_state_table_entry(d3d12_context_state_table_entry *entry)
void
d3d12_destroy_context_state_table_entry(d3d12_context_state_table_entry *entry)
{
desired_resource_state_cleanup(&entry->desired);
d3d12_resource_state_cleanup(&entry->batch_begin);
d3d12_resource_state_cleanup(&entry->batch_end);
free(entry);
}
void
@ -206,8 +205,10 @@ d3d12_context_state_table_init(struct d3d12_context *ctx)
void
d3d12_context_state_table_destroy(struct d3d12_context *ctx)
{
hash_table_foreach(ctx->bo_state_table->table, entry)
destroy_context_state_table_entry((d3d12_context_state_table_entry *)entry->data);
hash_table_foreach(ctx->bo_state_table->table, entry) {
d3d12_destroy_context_state_table_entry((d3d12_context_state_table_entry *)entry->data);
free(entry->data);
}
_mesa_hash_table_u64_destroy(ctx->bo_state_table);
util_dynarray_fini(&ctx->barrier_scratch);
if (ctx->state_fixup_cmdlist)
@ -240,17 +241,28 @@ init_state_table_entry(d3d12_context_state_table_entry *bo_state, d3d12_bo *bo)
/* We'll never need state fixups for simultaneous access resources, so don't bother initializing this second state */
if (!supports_simultaneous_access)
d3d12_resource_state_init(&bo_state->batch_begin, subresource_count, supports_simultaneous_access);
else
memset(&bo_state->batch_begin, 0, sizeof(bo_state->batch_begin));
}
static d3d12_context_state_table_entry *
find_or_create_state_entry(struct hash_table_u64 *table, d3d12_bo *bo)
find_or_create_state_entry(struct d3d12_context *ctx, d3d12_bo *bo)
{
if (ctx->id != D3D12_CONTEXT_NO_ID) {
unsigned context_bit = 1 << ctx->id;
if ((bo->local_context_state_mask & context_bit) == 0) {
init_state_table_entry(&bo->local_context_states[ctx->id], bo);
bo->local_context_state_mask |= context_bit;
}
return &bo->local_context_states[ctx->id];
}
d3d12_context_state_table_entry *bo_state =
(d3d12_context_state_table_entry *) _mesa_hash_table_u64_search(table, bo->unique_id);
(d3d12_context_state_table_entry *) _mesa_hash_table_u64_search(ctx->bo_state_table, bo->unique_id);
if (!bo_state) {
bo_state = CALLOC_STRUCT(d3d12_context_state_table_entry);
init_state_table_entry(bo_state, bo);
_mesa_hash_table_u64_insert(table, bo->unique_id, bo_state);
_mesa_hash_table_u64_insert(ctx->bo_state_table, bo->unique_id, bo_state);
}
return bo_state;
}
@ -330,7 +342,7 @@ d3d12_context_state_resolve_submission(struct d3d12_context *ctx, struct d3d12_b
util_dynarray_foreach(&ctx->recently_destroyed_bos, uint64_t, id) {
void *data = _mesa_hash_table_u64_search(ctx->bo_state_table, *id);
if (data)
destroy_context_state_table_entry((d3d12_context_state_table_entry *)data);
d3d12_destroy_context_state_table_entry((d3d12_context_state_table_entry *)data);
_mesa_hash_table_u64_remove(ctx->bo_state_table, *id);
}
@ -338,7 +350,7 @@ d3d12_context_state_resolve_submission(struct d3d12_context *ctx, struct d3d12_b
hash_table_foreach(batch->bos, bo_entry) {
d3d12_bo *bo = (d3d12_bo *)bo_entry->key;
d3d12_context_state_table_entry *bo_state = find_or_create_state_entry(ctx->bo_state_table, bo);
d3d12_context_state_table_entry *bo_state = find_or_create_state_entry(ctx, bo);
if (!bo_state->batch_end.supports_simultaneous_access) {
assert(bo->res && bo->global_state.subresource_states);
@ -447,7 +459,7 @@ d3d12_transition_resource_state(struct d3d12_context *ctx,
if (flags & D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS)
d3d12_invalidate_context_bindings(ctx, res);
d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx->bo_state_table, res->bo);
d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx, res->bo);
if (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) {
set_desired_resource_state(&state_entry->desired, state);
_mesa_set_add(ctx->pending_barriers_bos, res->bo);
@ -472,7 +484,7 @@ d3d12_transition_subresources_state(struct d3d12_context *ctx,
if(flags & D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS)
d3d12_invalidate_context_bindings(ctx, res);
d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx->bo_state_table, res->bo);
d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx, res->bo);
bool is_whole_resource = num_levels * num_layers * num_planes == state_entry->batch_end.num_subresources;
bool is_accumulate = (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) != 0;
@ -509,7 +521,7 @@ d3d12_apply_resource_states(struct d3d12_context *ctx, bool is_implicit_dispatch
set_foreach_remove(ctx->pending_barriers_bos, entry) {
d3d12_bo *bo = (d3d12_bo *)entry->key;
d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx->bo_state_table, bo);
d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx, bo);
d3d12_desired_resource_state *destination_state = &state_entry->desired;
d3d12_resource_state *current_state = &state_entry->batch_end;

View file

@ -98,4 +98,7 @@ d3d12_context_state_table_destroy(struct d3d12_context *ctx);
bool
d3d12_context_state_resolve_submission(struct d3d12_context *ctx, struct d3d12_batch *batch);
void
d3d12_destroy_context_state_table_entry(d3d12_context_state_table_entry* entry);
#endif // D3D12_RESOURCE_STATE_H