From e4def8a61c3853705a1f8aca56550b63a533442e Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Thu, 23 Apr 2026 11:42:16 +0200 Subject: [PATCH] radeonsi: add si_context.c And move out most si_context code to the new file. Reviewed-by: David Rosca Reviewed-by: Qiang Yu Part-of: --- src/gallium/drivers/radeonsi/gfx/si_gfx.h | 2 - .../drivers/radeonsi/gfx/si_gfx_context.c | 15 + src/gallium/drivers/radeonsi/meson.build | 1 + src/gallium/drivers/radeonsi/si_context.c | 328 ++++++++++++++ src/gallium/drivers/radeonsi/si_pipe.c | 410 ------------------ src/gallium/drivers/radeonsi/si_pipe.h | 9 +- src/gallium/drivers/radeonsi/si_shader.h | 2 + .../drivers/radeonsi/si_state_shaders.cpp | 21 + 8 files changed, 371 insertions(+), 417 deletions(-) create mode 100644 src/gallium/drivers/radeonsi/si_context.c diff --git a/src/gallium/drivers/radeonsi/gfx/si_gfx.h b/src/gallium/drivers/radeonsi/gfx/si_gfx.h index a1b52541a22..f71e4617c7e 100644 --- a/src/gallium/drivers/radeonsi/gfx/si_gfx.h +++ b/src/gallium/drivers/radeonsi/gfx/si_gfx.h @@ -24,8 +24,6 @@ struct ac_llvm_compiler; MESAPROC bool si_init_gfx_context(struct si_screen *sscreen, struct si_context *sctx, unsigned flags) TAILB; MESAPROC void si_fini_gfx_context(struct si_context *sctx) TAILV; void si_destroy_llvm_compiler(struct ac_llvm_compiler *compiler); -MESAPROC void si_get_scratch_tmpring_size(struct si_context *sctx, unsigned bytes_per_wave, - bool is_compute, unsigned *spi_tmpring_size) TAILV; void si_init_aux_async_compute_ctx(struct si_screen *sscreen); /* si_gfx_screen.c */ diff --git a/src/gallium/drivers/radeonsi/gfx/si_gfx_context.c b/src/gallium/drivers/radeonsi/gfx/si_gfx_context.c index 10fe24e9642..016d5b27737 100644 --- a/src/gallium/drivers/radeonsi/gfx/si_gfx_context.c +++ b/src/gallium/drivers/radeonsi/gfx/si_gfx_context.c @@ -17,6 +17,21 @@ #include "util/hash_table.h" #include "driver_ddebug/dd_util.h" +void si_init_aux_async_compute_ctx(struct si_screen *sscreen) +{ + assert(!sscreen->async_compute_context); + sscreen->async_compute_context = + si_create_context(&sscreen->b, + SI_CONTEXT_FLAG_AUX | + PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET | + (sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0) | + PIPE_CONTEXT_COMPUTE_ONLY); + + /* Limit the numbers of waves allocated for this context. */ + if (sscreen->async_compute_context) + ((struct si_context*)sscreen->async_compute_context)->cs_max_waves_per_sh = 2; +} + struct ac_llvm_compiler *si_create_llvm_compiler(struct si_screen *sscreen) { #if AMD_LLVM_AVAILABLE diff --git a/src/gallium/drivers/radeonsi/meson.build b/src/gallium/drivers/radeonsi/meson.build index 9720049a919..c16288ad61e 100644 --- a/src/gallium/drivers/radeonsi/meson.build +++ b/src/gallium/drivers/radeonsi/meson.build @@ -26,6 +26,7 @@ files_libradeonsi = files( 'si_buffer.c', 'si_build_pm4.h', 'si_clear.c', + 'si_context.c', 'si_cp_dma.c', 'si_cp_utils.c', 'si_debug.c', diff --git a/src/gallium/drivers/radeonsi/si_context.c b/src/gallium/drivers/radeonsi/si_context.c new file mode 100644 index 00000000000..febe32fe4e3 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_context.c @@ -0,0 +1,328 @@ +/* + * Copyright 2010 Jerome Glisse + * Copyright 2018 Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: MIT + */ + +#include "si_pipe.h" + +#include "compiler/nir/nir.h" +#include "gfx/si_gfx.h" +#include "mm/si_mm.h" + +static void decref_implicit_resource(struct hash_entry *entry) +{ + pipe_resource_reference((struct pipe_resource**)&entry->data, NULL); +} + +static void si_destroy_context(struct pipe_context *context) +{ + struct si_context *sctx = (struct si_context *)context; + + si_fini_gfx_context(sctx); + si_fini_mm_context(sctx); + + if (sctx->ctx) + sctx->ws->ctx_destroy(sctx->ctx); + + if (sctx->dirty_implicit_resources) + _mesa_hash_table_destroy(sctx->dirty_implicit_resources, + decref_implicit_resource); + + if (sctx->b.stream_uploader) + u_upload_destroy(sctx->b.stream_uploader); + if (sctx->b.const_uploader && sctx->b.const_uploader != sctx->b.stream_uploader) + u_upload_destroy(sctx->b.const_uploader); + if (sctx->cached_gtt_allocator) + u_upload_destroy(sctx->cached_gtt_allocator); + + slab_destroy_child(&sctx->pool_transfers); + slab_destroy_child(&sctx->pool_transfers_unsync); + + u_suballocator_destroy(&sctx->allocator_zeroed_memory); + + _mesa_hash_table_destroy(sctx->tex_handles, NULL); + _mesa_hash_table_destroy(sctx->img_handles, NULL); + + util_dynarray_fini(&sctx->resident_tex_handles); + util_dynarray_fini(&sctx->resident_img_handles); + util_dynarray_fini(&sctx->resident_tex_needs_color_decompress); + util_dynarray_fini(&sctx->resident_img_needs_color_decompress); + util_dynarray_fini(&sctx->resident_tex_needs_depth_decompress); + + if (!(sctx->context_flags & SI_CONTEXT_FLAG_AUX)) + p_atomic_dec(&context->screen->num_contexts); + + FREE(sctx); +} + +static enum pipe_reset_status si_get_reset_status(struct pipe_context *ctx) +{ + struct si_context *sctx = (struct si_context *)ctx; + if (sctx->context_flags & SI_CONTEXT_FLAG_AUX) + return PIPE_NO_RESET; + + bool needs_reset, reset_completed; + enum pipe_reset_status status = sctx->ws->ctx_query_reset_status(sctx->ctx, false, + &needs_reset, &reset_completed); + + if (status != PIPE_NO_RESET) { + if (sctx->has_reset_been_notified && reset_completed) + return PIPE_NO_RESET; + + sctx->has_reset_been_notified = true; + + if (!(sctx->context_flags & SI_CONTEXT_FLAG_AUX)) { + /* Call the gallium frontend to set a no-op API dispatch. */ + if (needs_reset && sctx->device_reset_callback.reset) + sctx->device_reset_callback.reset(sctx->device_reset_callback.data, status); + } + } + return status; +} + +static void si_set_device_reset_callback(struct pipe_context *ctx, + const struct pipe_device_reset_callback *cb) +{ + struct si_context *sctx = (struct si_context *)ctx; + + if (cb) + sctx->device_reset_callback = *cb; + else + memset(&sctx->device_reset_callback, 0, sizeof(sctx->device_reset_callback)); +} + +static void si_set_debug_callback(struct pipe_context *ctx, const struct util_debug_callback *cb) +{ + struct si_context *sctx = (struct si_context *)ctx; + struct si_screen *screen = sctx->screen; + + util_queue_finish(&screen->shader_compiler_queue); + util_queue_finish(&screen->shader_compiler_queue_opt_variants); + + if (cb) + sctx->debug = *cb; + else + memset(&sctx->debug, 0, sizeof(sctx->debug)); +} + +static void si_set_log_context(struct pipe_context *ctx, struct u_log_context *log) +{ + struct si_context *sctx = (struct si_context *)ctx; + sctx->log = log; + + if (log) + u_log_add_auto_logger(log, si_auto_log_cs, sctx); +} + +static void si_set_context_param(struct pipe_context *ctx, enum pipe_context_param param, + unsigned value) +{ + struct radeon_winsys *ws = ((struct si_context *)ctx)->ws; + + switch (param) { + case PIPE_CONTEXT_PARAM_UPDATE_THREAD_SCHEDULING: + ws->pin_threads_to_L3_cache(ws, value); + break; + default:; + } +} + +static void si_set_frontend_noop(struct pipe_context *ctx, bool enable) +{ + struct si_context *sctx = (struct si_context *)ctx; + + ctx->flush(ctx, NULL, PIPE_FLUSH_ASYNC); + sctx->is_noop = enable; +} + +struct pipe_context *si_create_context(struct pipe_screen *screen, unsigned flags) +{ + struct si_screen *sscreen = (struct si_screen *)screen; + STATIC_ASSERT(DBG_COUNT <= 64); + + struct si_context *sctx = CALLOC_STRUCT(si_context); + + if (!sctx) { + mesa_loge("can't allocate a context"); + return NULL; + } + + sctx->b.screen = screen; /* this must be set first */ + sctx->b.priv = NULL; + sctx->b.destroy = si_destroy_context; + sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ + sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0; + sctx->context_flags = flags; + + slab_create_child(&sctx->pool_transfers, &sscreen->pool_transfers); + slab_create_child(&sctx->pool_transfers_unsync, &sscreen->pool_transfers); + + sctx->ws = sscreen->ws; + sctx->family = sscreen->info.family; + sctx->gfx_level = sscreen->info.gfx_level; + sctx->vcn_ip_ver = sscreen->info.vcn_ip_version; + + /* Initialize the context handle and the command stream. */ + sctx->ctx = sctx->ws->ctx_create(sctx->ws, sctx->context_flags); + if (!sctx->ctx) { + mesa_loge("can't create radeon_winsys_ctx"); + goto fail; + } + + /* Initialize private allocators. */ + u_suballocator_init(&sctx->allocator_zeroed_memory, &sctx->b, 128 * 1024, 0, + PIPE_USAGE_DEFAULT, + SI_RESOURCE_FLAG_CLEAR | SI_RESOURCE_FLAG_32BIT, false); + + sctx->cached_gtt_allocator = u_upload_create(&sctx->b, 16 * 1024, 0, PIPE_USAGE_STAGING, 0); + if (!sctx->cached_gtt_allocator) { + mesa_loge("can't create cached_gtt_allocator"); + goto fail; + } + + /* Initialize public allocators. Unify uploaders as follows: + * - dGPUs: The const uploader writes to VRAM and the stream uploader writes to RAM. + * - APUs: There is only one uploader instance writing to RAM. VRAM has the same perf on APUs. + */ + bool is_apu = !sscreen->info.has_dedicated_vram; + sctx->b.stream_uploader = + u_upload_create(&sctx->b, 1024 * 1024, 0, + sscreen->debug_flags & DBG(NO_WC_STREAM) ? PIPE_USAGE_STAGING + : PIPE_USAGE_STREAM, + SI_RESOURCE_FLAG_32BIT); /* same flags as const_uploader */ + if (!sctx->b.stream_uploader) { + mesa_loge("can't create stream_uploader"); + goto fail; + } + + if (is_apu) { + sctx->b.const_uploader = sctx->b.stream_uploader; + } else { + sctx->b.const_uploader = + u_upload_create(&sctx->b, 256 * 1024, 0, PIPE_USAGE_DEFAULT, + SI_RESOURCE_FLAG_32BIT); + if (!sctx->b.const_uploader) { + mesa_loge("can't create const_uploader"); + goto fail; + } + } + + sctx->b.set_debug_callback = si_set_debug_callback; + sctx->b.set_log_context = si_set_log_context; + sctx->b.set_context_param = si_set_context_param; + sctx->b.get_device_reset_status = si_get_reset_status; + sctx->b.set_device_reset_callback = si_set_device_reset_callback; + sctx->b.set_frontend_noop = si_set_frontend_noop; + + list_inithead(&sctx->active_queries); + si_init_buffer_functions(sctx); + si_init_fence_functions(sctx); + si_init_context_texture_functions(sctx); + + /* Bindless handles. */ + sctx->tex_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + sctx->img_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + + sctx->resident_tex_handles = UTIL_DYNARRAY_INIT; + sctx->resident_img_handles = UTIL_DYNARRAY_INIT; + sctx->resident_tex_needs_color_decompress = UTIL_DYNARRAY_INIT; + sctx->resident_img_needs_color_decompress = UTIL_DYNARRAY_INIT; + sctx->resident_tex_needs_depth_decompress = UTIL_DYNARRAY_INIT; + + sctx->dirty_implicit_resources = _mesa_pointer_hash_table_create(NULL); + if (!sctx->dirty_implicit_resources) { + mesa_loge("can't create dirty_implicit_resources"); + goto fail; + } + + if (!(flags & PIPE_CONTEXT_MEDIA_ONLY)) { + if (!si_init_gfx_context(sscreen, sctx, flags)) + goto fail; + } + + /* PIPE_CONTEXT_COMPUTE_ONLY doesn't mean no multimedia, it means no graphics so always + * init mm but don't fail if it reports an error. + */ + si_init_mm_context(sscreen, sctx, flags); + + if (!(flags & SI_CONTEXT_FLAG_AUX)) { + p_atomic_inc(&screen->num_contexts); + + /* Check if the aux_context needs to be recreated */ + for (unsigned i = 0; i < ARRAY_SIZE(sscreen->aux_contexts); i++) { + if (!sscreen->aux_contexts[i].ctx) + continue; + + struct si_context *saux = si_get_aux_context(sscreen, &sscreen->aux_contexts[i]); + enum pipe_reset_status status = + sctx->ws->ctx_query_reset_status(saux->ctx, true, NULL, NULL); + + if (status != PIPE_NO_RESET) { + /* We lost the aux_context, create a new one */ + unsigned context_flags = saux->context_flags; + saux->b.destroy(&saux->b); + + saux = (struct si_context *)si_create_context(&sscreen->b, context_flags); + if (sscreen->options.aux_debug) + saux->b.set_log_context(&saux->b, &sscreen->aux_contexts[i].log); + + sscreen->aux_contexts[i].ctx = &saux->b; + } + si_put_aux_context_flush(&sscreen->aux_contexts[i]); + } + + simple_mtx_lock(&sscreen->async_compute_context_lock); + if (sscreen->async_compute_context) { + struct si_context *compute_ctx = (struct si_context*)sscreen->async_compute_context; + enum pipe_reset_status status = + sctx->ws->ctx_query_reset_status(compute_ctx->ctx, true, NULL, NULL); + + if (status != PIPE_NO_RESET) { + sscreen->async_compute_context->destroy(sscreen->async_compute_context); + sscreen->async_compute_context = NULL; + } + } + simple_mtx_unlock(&sscreen->async_compute_context_lock); + + si_reset_debug_log_buffer(sctx); + } + + return &sctx->b; +fail: + mesa_loge("Failed to create a context."); + si_destroy_context(&sctx->b); + return NULL; +} + +struct si_context *si_get_aux_context(struct si_screen *sscreen, struct si_aux_context *actx) +{ + mtx_lock(&actx->lock); + /* Init aux_context on demand. */ + if (actx->ctx == NULL) { + bool compute = !sscreen->info.has_graphics || + actx == &sscreen->aux_context.compute_resource_init || + actx == &sscreen->aux_context.shader_upload; + actx->ctx = + si_create_context(&sscreen->b, + SI_CONTEXT_FLAG_AUX | PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET | + (sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0) | + (compute ? PIPE_CONTEXT_COMPUTE_ONLY : 0)); + assert(actx->ctx); + + if (sscreen->options.aux_debug) { + u_log_context_init(&actx->log); + + struct pipe_context *ctx = actx->ctx; + ctx->set_log_context(ctx, &actx->log); + } + } + return (struct si_context*)actx->ctx; +} + +void si_put_aux_context_flush(struct si_aux_context *ctx) +{ + ctx->ctx->flush(ctx->ctx, NULL, 0); + mtx_unlock(&ctx->lock); +} diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 3ec1f3c5e0a..edbebaa1a46 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -12,21 +12,12 @@ #include "driver_ddebug/dd_util.h" #include "si_public.h" #include "sid.h" -#include "ac_shader_util.h" -#include "ac_shadowed_regs.h" -#include "compiler/nir/nir.h" -#include "util/disk_cache.h" -#include "util/hex.h" -#include "util/u_cpu_detect.h" #include "util/u_memory.h" #include "util/u_suballoc.h" #include "util/u_tests.h" #include "util/u_upload_mgr.h" #include "util/xmlconfig.h" #include "si_utrace.h" -#include "si_video.h" - -#include "aco_interface.h" #if AMD_LLVM_AVAILABLE #include "ac_llvm_util.h" @@ -38,8 +29,6 @@ #include -static struct pipe_context *si_create_context(struct pipe_screen *screen, unsigned flags); - static const struct debug_named_value radeonsi_debug_options[] = { /* Information logging options: */ {"info", DBG(INFO), "Print driver information"}, @@ -109,174 +98,6 @@ static const struct debug_named_value test_options[] = { DEBUG_NAMED_VALUE_END /* must be last */ }; -void si_init_aux_async_compute_ctx(struct si_screen *sscreen) -{ - assert(!sscreen->async_compute_context); - sscreen->async_compute_context = - si_create_context(&sscreen->b, - SI_CONTEXT_FLAG_AUX | - PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET | - (sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0) | - PIPE_CONTEXT_COMPUTE_ONLY); - - /* Limit the numbers of waves allocated for this context. */ - if (sscreen->async_compute_context) - ((struct si_context*)sscreen->async_compute_context)->cs_max_waves_per_sh = 2; -} - -static void decref_implicit_resource(struct hash_entry *entry) -{ - pipe_resource_reference((struct pipe_resource**)&entry->data, NULL); -} - -/* - * pipe_context - */ -static void si_destroy_context(struct pipe_context *context) -{ - struct si_context *sctx = (struct si_context *)context; - - si_fini_gfx_context(sctx); - si_fini_mm_context(sctx); - - if (sctx->ctx) - sctx->ws->ctx_destroy(sctx->ctx); - - if (sctx->dirty_implicit_resources) - _mesa_hash_table_destroy(sctx->dirty_implicit_resources, - decref_implicit_resource); - - if (sctx->b.stream_uploader) - u_upload_destroy(sctx->b.stream_uploader); - if (sctx->b.const_uploader && sctx->b.const_uploader != sctx->b.stream_uploader) - u_upload_destroy(sctx->b.const_uploader); - if (sctx->cached_gtt_allocator) - u_upload_destroy(sctx->cached_gtt_allocator); - - slab_destroy_child(&sctx->pool_transfers); - slab_destroy_child(&sctx->pool_transfers_unsync); - - u_suballocator_destroy(&sctx->allocator_zeroed_memory); - - _mesa_hash_table_destroy(sctx->tex_handles, NULL); - _mesa_hash_table_destroy(sctx->img_handles, NULL); - - util_dynarray_fini(&sctx->resident_tex_handles); - util_dynarray_fini(&sctx->resident_img_handles); - util_dynarray_fini(&sctx->resident_tex_needs_color_decompress); - util_dynarray_fini(&sctx->resident_img_needs_color_decompress); - util_dynarray_fini(&sctx->resident_tex_needs_depth_decompress); - - if (!(sctx->context_flags & SI_CONTEXT_FLAG_AUX)) - p_atomic_dec(&context->screen->num_contexts); - - FREE(sctx); -} - -static enum pipe_reset_status si_get_reset_status(struct pipe_context *ctx) -{ - struct si_context *sctx = (struct si_context *)ctx; - if (sctx->context_flags & SI_CONTEXT_FLAG_AUX) - return PIPE_NO_RESET; - - bool needs_reset, reset_completed; - enum pipe_reset_status status = sctx->ws->ctx_query_reset_status(sctx->ctx, false, - &needs_reset, &reset_completed); - - if (status != PIPE_NO_RESET) { - if (sctx->has_reset_been_notified && reset_completed) - return PIPE_NO_RESET; - - sctx->has_reset_been_notified = true; - - if (!(sctx->context_flags & SI_CONTEXT_FLAG_AUX)) { - /* Call the gallium frontend to set a no-op API dispatch. */ - if (needs_reset && sctx->device_reset_callback.reset) - sctx->device_reset_callback.reset(sctx->device_reset_callback.data, status); - } - } - return status; -} - -static void si_set_device_reset_callback(struct pipe_context *ctx, - const struct pipe_device_reset_callback *cb) -{ - struct si_context *sctx = (struct si_context *)ctx; - - if (cb) - sctx->device_reset_callback = *cb; - else - memset(&sctx->device_reset_callback, 0, sizeof(sctx->device_reset_callback)); -} - -/* Apitrace profiling: - * 1) qapitrace : Tools -> Profile: Measure CPU & GPU times - * 2) In the middle panel, zoom in (mouse wheel) on some bad draw call - * and remember its number. - * 3) In Mesa, enable queries and performance counters around that draw - * call and print the results. - * 4) glretrace --benchmark --markers .. - */ -static void si_emit_string_marker(struct pipe_context *ctx, const char *string, int len) -{ - struct si_context *sctx = (struct si_context *)ctx; - - dd_parse_apitrace_marker(string, len, &sctx->apitrace_call_number); - - if (sctx->sqtt_enabled) - si_write_user_event(sctx, &sctx->gfx_cs, UserEventTrigger, string, len); - - if (sctx->log) - u_log_printf(sctx->log, "\nString marker: %*s\n", len, string); -} - -static void si_set_debug_callback(struct pipe_context *ctx, const struct util_debug_callback *cb) -{ - struct si_context *sctx = (struct si_context *)ctx; - struct si_screen *screen = sctx->screen; - - util_queue_finish(&screen->shader_compiler_queue); - util_queue_finish(&screen->shader_compiler_queue_opt_variants); - - if (cb) - sctx->debug = *cb; - else - memset(&sctx->debug, 0, sizeof(sctx->debug)); -} - -static void si_set_log_context(struct pipe_context *ctx, struct u_log_context *log) -{ - struct si_context *sctx = (struct si_context *)ctx; - sctx->log = log; - - if (log) - u_log_add_auto_logger(log, si_auto_log_cs, sctx); -} - -static void si_set_context_param(struct pipe_context *ctx, enum pipe_context_param param, - unsigned value) -{ - struct radeon_winsys *ws = ((struct si_context *)ctx)->ws; - - switch (param) { - case PIPE_CONTEXT_PARAM_UPDATE_THREAD_SCHEDULING: - ws->pin_threads_to_L3_cache(ws, value); - break; - default:; - } -} - -static void si_set_frontend_noop(struct pipe_context *ctx, bool enable) -{ - struct si_context *sctx = (struct si_context *)ctx; - - ctx->flush(ctx, NULL, PIPE_FLUSH_ASYNC); - sctx->is_noop = enable; -} - -/* Function used by the pipe_loader to decide which driver to use when - * the KMD is virtio_gpu. - */ bool si_virtgpu_probe_nctx(int fd, const struct virgl_renderer_capset_drm *caps) { #ifdef HAVE_AMDGPU_VIRTIO @@ -286,186 +107,6 @@ bool si_virtgpu_probe_nctx(int fd, const struct virgl_renderer_capset_drm *caps) #endif } -struct pipe_context *si_create_context(struct pipe_screen *screen, unsigned flags) -{ - struct si_screen *sscreen = (struct si_screen *)screen; - STATIC_ASSERT(DBG_COUNT <= 64); - - struct si_context *sctx = CALLOC_STRUCT(si_context); - - if (!sctx) { - mesa_loge("can't allocate a context"); - return NULL; - } - - sctx->b.screen = screen; /* this must be set first */ - sctx->b.priv = NULL; - sctx->b.destroy = si_destroy_context; - sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ - sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0; - sctx->context_flags = flags; - - slab_create_child(&sctx->pool_transfers, &sscreen->pool_transfers); - slab_create_child(&sctx->pool_transfers_unsync, &sscreen->pool_transfers); - - sctx->ws = sscreen->ws; - sctx->family = sscreen->info.family; - sctx->gfx_level = sscreen->info.gfx_level; - sctx->vcn_ip_ver = sscreen->info.vcn_ip_version; - - /* Initialize the context handle and the command stream. */ - sctx->ctx = sctx->ws->ctx_create(sctx->ws, sctx->context_flags); - if (!sctx->ctx) { - mesa_loge("can't create radeon_winsys_ctx"); - goto fail; - } - - /* Initialize private allocators. */ - u_suballocator_init(&sctx->allocator_zeroed_memory, &sctx->b, 128 * 1024, 0, - PIPE_USAGE_DEFAULT, - SI_RESOURCE_FLAG_CLEAR | SI_RESOURCE_FLAG_32BIT, false); - - sctx->cached_gtt_allocator = u_upload_create(&sctx->b, 16 * 1024, 0, PIPE_USAGE_STAGING, 0); - if (!sctx->cached_gtt_allocator) { - mesa_loge("can't create cached_gtt_allocator"); - goto fail; - } - - /* Initialize public allocators. Unify uploaders as follows: - * - dGPUs: The const uploader writes to VRAM and the stream uploader writes to RAM. - * - APUs: There is only one uploader instance writing to RAM. VRAM has the same perf on APUs. - */ - bool is_apu = !sscreen->info.has_dedicated_vram; - sctx->b.stream_uploader = - u_upload_create(&sctx->b, 1024 * 1024, 0, - sscreen->debug_flags & DBG(NO_WC_STREAM) ? PIPE_USAGE_STAGING - : PIPE_USAGE_STREAM, - SI_RESOURCE_FLAG_32BIT); /* same flags as const_uploader */ - if (!sctx->b.stream_uploader) { - mesa_loge("can't create stream_uploader"); - goto fail; - } - - if (is_apu) { - sctx->b.const_uploader = sctx->b.stream_uploader; - } else { - sctx->b.const_uploader = - u_upload_create(&sctx->b, 256 * 1024, 0, PIPE_USAGE_DEFAULT, - SI_RESOURCE_FLAG_32BIT); - if (!sctx->b.const_uploader) { - mesa_loge("can't create const_uploader"); - goto fail; - } - } - - sctx->b.set_debug_callback = si_set_debug_callback; - sctx->b.set_log_context = si_set_log_context; - sctx->b.set_context_param = si_set_context_param; - sctx->b.get_device_reset_status = si_get_reset_status; - sctx->b.set_device_reset_callback = si_set_device_reset_callback; - sctx->b.set_frontend_noop = si_set_frontend_noop; - - list_inithead(&sctx->active_queries); - si_init_buffer_functions(sctx); - si_init_fence_functions(sctx); - si_init_context_texture_functions(sctx); - - /* Bindless handles. */ - sctx->tex_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); - sctx->img_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); - - sctx->resident_tex_handles = UTIL_DYNARRAY_INIT; - sctx->resident_img_handles = UTIL_DYNARRAY_INIT; - sctx->resident_tex_needs_color_decompress = UTIL_DYNARRAY_INIT; - sctx->resident_img_needs_color_decompress = UTIL_DYNARRAY_INIT; - sctx->resident_tex_needs_depth_decompress = UTIL_DYNARRAY_INIT; - - sctx->dirty_implicit_resources = _mesa_pointer_hash_table_create(NULL); - if (!sctx->dirty_implicit_resources) { - mesa_loge("can't create dirty_implicit_resources"); - goto fail; - } - - if (!(flags & PIPE_CONTEXT_MEDIA_ONLY)) { - if (!si_init_gfx_context(sscreen, sctx, flags)) - goto fail; - } - - /* PIPE_CONTEXT_COMPUTE_ONLY doesn't mean no multimedia, it means no graphics so always - * init mm but don't fail if it reports an error. - */ - si_init_mm_context(sscreen, sctx, flags); - - if (!(flags & SI_CONTEXT_FLAG_AUX)) { - p_atomic_inc(&screen->num_contexts); - - /* Check if the aux_context needs to be recreated */ - for (unsigned i = 0; i < ARRAY_SIZE(sscreen->aux_contexts); i++) { - if (!sscreen->aux_contexts[i].ctx) - continue; - - struct si_context *saux = si_get_aux_context(sscreen, &sscreen->aux_contexts[i]); - enum pipe_reset_status status = - sctx->ws->ctx_query_reset_status(saux->ctx, true, NULL, NULL); - - if (status != PIPE_NO_RESET) { - /* We lost the aux_context, create a new one */ - unsigned context_flags = saux->context_flags; - saux->b.destroy(&saux->b); - - saux = (struct si_context *)si_create_context(&sscreen->b, context_flags); - if (sscreen->options.aux_debug) - saux->b.set_log_context(&saux->b, &sscreen->aux_contexts[i].log); - - sscreen->aux_contexts[i].ctx = &saux->b; - } - si_put_aux_context_flush(&sscreen->aux_contexts[i]); - } - - simple_mtx_lock(&sscreen->async_compute_context_lock); - if (sscreen->async_compute_context) { - struct si_context *compute_ctx = (struct si_context*)sscreen->async_compute_context; - enum pipe_reset_status status = - sctx->ws->ctx_query_reset_status(compute_ctx->ctx, true, NULL, NULL); - - if (status != PIPE_NO_RESET) { - sscreen->async_compute_context->destroy(sscreen->async_compute_context); - sscreen->async_compute_context = NULL; - } - } - simple_mtx_unlock(&sscreen->async_compute_context_lock); - - si_reset_debug_log_buffer(sctx); - } - - return &sctx->b; -fail: - mesa_loge("Failed to create a context."); - si_destroy_context(&sctx->b); - return NULL; -} - -void -si_get_scratch_tmpring_size(struct si_context *sctx, unsigned bytes_per_wave, - bool is_compute, unsigned *spi_tmpring_size) -{ - bytes_per_wave = ac_compute_scratch_wavesize(&sctx->screen->info, bytes_per_wave); - - if (is_compute) { - sctx->max_seen_compute_scratch_bytes_per_wave = - MAX2(sctx->max_seen_compute_scratch_bytes_per_wave, bytes_per_wave); - } else { - sctx->max_seen_scratch_bytes_per_wave = - MAX2(sctx->max_seen_scratch_bytes_per_wave, bytes_per_wave); - } - - /* TODO: We could decrease WAVES to make the whole buffer fit into the infinity cache. */ - ac_get_scratch_tmpring_size(&sctx->screen->info, sctx->screen->info.max_scratch_waves, - is_compute ? sctx->max_seen_compute_scratch_bytes_per_wave - : sctx->max_seen_scratch_bytes_per_wave, - spi_tmpring_size); -} - static bool si_is_resource_busy(struct pipe_screen *screen, struct pipe_resource *resource, unsigned usage) { @@ -492,26 +133,6 @@ static struct pipe_context *si_pipe_create_context(struct pipe_screen *screen, v ctx = si_create_context(screen, flags); sctx = (struct si_context *)ctx; - if (ctx && sscreen->info.gfx_level >= GFX9 && sscreen->debug_flags & DBG(SQTT)) { - /* Auto-enable stable performance profile if possible. */ - if (screen->num_contexts == 1) - sscreen->ws->cs_set_pstate(&sctx->gfx_cs, RADEON_CTX_PSTATE_PEAK); - - if (ac_check_profile_state(&sscreen->info)) { - mesa_loge("Canceling RGP trace request as a hang condition has been " - "detected. Force the GPU into a profiling mode with e.g. " - "\"echo profile_peak > " - "/sys/class/drm/card0/device/power_dpm_force_performance_level\""); - } else { - if (!si_init_sqtt(sctx)) { - FREE(ctx); - return NULL; - } - - si_handle_sqtt(sctx, &sctx->gfx_cs); - } - } - if (!(flags & PIPE_CONTEXT_PREFER_THREADED)) return ctx; @@ -740,34 +361,3 @@ struct pipe_screen *radeonsi_screen_create(int fd, const struct pipe_screen_conf drmFreeVersion(version); return rw ? rw->screen : NULL; } - -struct si_context *si_get_aux_context(struct si_screen *sscreen, struct si_aux_context *actx) -{ - mtx_lock(&actx->lock); - /* Init aux_context on demand. */ - if (actx->ctx == NULL) { - bool compute = !sscreen->info.has_graphics || - actx == &sscreen->aux_context.compute_resource_init || - actx == &sscreen->aux_context.shader_upload; - actx->ctx = - si_create_context(&sscreen->b, - SI_CONTEXT_FLAG_AUX | PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET | - (sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0) | - (compute ? PIPE_CONTEXT_COMPUTE_ONLY : 0)); - assert(actx->ctx); - - if (sscreen->options.aux_debug) { - u_log_context_init(&actx->log); - - struct pipe_context *ctx = actx->ctx; - ctx->set_log_context(ctx, &actx->log); - } - } - return (struct si_context*)actx->ctx; -} - -void si_put_aux_context_flush(struct si_aux_context *ctx) -{ - ctx->ctx->flush(ctx->ctx, NULL, 0); - mtx_unlock(&ctx->lock); -} diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 21f5d3fc2ac..b14aba06a13 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1621,13 +1621,12 @@ MESAPROC void si_init_compute_functions(struct si_context *sctx) TAILV; /* si_pipe.c */ struct ac_llvm_compiler *si_create_llvm_compiler(struct si_screen *sscreen); -void si_destroy_llvm_compiler(struct ac_llvm_compiler *compiler); -void si_init_aux_async_compute_ctx(struct si_screen *sscreen); +void si_destroy_screen(struct pipe_screen *pscreen); + +/* si_context.c */ +struct pipe_context *si_create_context(struct pipe_screen *screen, unsigned flags); struct si_context *si_get_aux_context(struct si_screen *sscreen, struct si_aux_context *ctx); void si_put_aux_context_flush(struct si_aux_context *ctx); -void si_get_scratch_tmpring_size(struct si_context *sctx, unsigned bytes_per_wave, - bool is_compute, unsigned *spi_tmpring_size); -void si_destroy_screen(struct pipe_screen *pscreen); /* si_perfcounters.c */ void si_init_perfcounters(struct si_screen *screen); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index d1ce5850466..28dcd61a631 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -914,6 +914,8 @@ unsigned si_shader_lshs_vertex_stride(struct si_shader *ls); bool si_should_clear_lds(struct si_screen *sscreen, const struct nir_shader *shader); unsigned si_get_output_prim_simplified(const struct si_shader_selector *sel, const union si_shader_key *key); +MESAPROC void si_get_scratch_tmpring_size(struct si_context *sctx, unsigned bytes_per_wave, + bool is_compute, unsigned *spi_tmpring_size) TAILV; /* si_shader_binary.c */ unsigned si_get_shader_binary_size(struct si_screen *screen, struct si_shader *shader); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index ba480878ed3..b2fd0c01863 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -4618,6 +4618,27 @@ static void si_emit_spi_ge_ring_state(struct si_context *sctx, unsigned index) } } +void +si_get_scratch_tmpring_size(struct si_context *sctx, unsigned bytes_per_wave, + bool is_compute, unsigned *spi_tmpring_size) +{ + bytes_per_wave = ac_compute_scratch_wavesize(&sctx->screen->info, bytes_per_wave); + + if (is_compute) { + sctx->max_seen_compute_scratch_bytes_per_wave = + MAX2(sctx->max_seen_compute_scratch_bytes_per_wave, bytes_per_wave); + } else { + sctx->max_seen_scratch_bytes_per_wave = + MAX2(sctx->max_seen_scratch_bytes_per_wave, bytes_per_wave); + } + + /* TODO: We could decrease WAVES to make the whole buffer fit into the infinity cache. */ + ac_get_scratch_tmpring_size(&sctx->screen->info, sctx->screen->info.max_scratch_waves, + is_compute ? sctx->max_seen_compute_scratch_bytes_per_wave + : sctx->max_seen_scratch_bytes_per_wave, + spi_tmpring_size); +} + void si_init_shader_functions(struct si_context *sctx) { sctx->atoms.s.vgt_pipeline_state.emit = si_emit_vgt_pipeline_state;