From 8f7f7a90b78d1636e9194bde40df9ecbde57262c Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Wed, 7 Jan 2026 11:16:29 +0100 Subject: [PATCH] radeonsi/sqtt: use pipe_aligned_buffer_create to allocate bo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pipe_aligned_buffer_create can allow allocate 4GB but that's large enough for now. PIPE_USAGE_STREAM is used for now to keep the 2 BOs in GTT. Reviewed-by: Marek Olšák Part-of: --- src/amd/common/ac_spm.h | 2 +- src/amd/common/ac_sqtt.h | 2 +- src/gallium/drivers/radeonsi/si_perfcounter.c | 16 +++----- src/gallium/drivers/radeonsi/si_sqtt.c | 37 ++++++++++--------- 4 files changed, 27 insertions(+), 30 deletions(-) diff --git a/src/amd/common/ac_spm.h b/src/amd/common/ac_spm.h index 36ffa0a66a1..d692d1c3ebb 100644 --- a/src/amd/common/ac_spm.h +++ b/src/amd/common/ac_spm.h @@ -209,7 +209,7 @@ struct ac_spm_block_select { }; struct ac_spm { - /* struct radeon_winsys_bo or struct pb_buffer */ + /* struct radeon_winsys_bo or struct si_resource */ void *bo; void *ptr; uint8_t ptr_granularity; diff --git a/src/amd/common/ac_sqtt.h b/src/amd/common/ac_sqtt.h index 931c4a970c8..7148ef13ab2 100644 --- a/src/amd/common/ac_sqtt.h +++ b/src/amd/common/ac_sqtt.h @@ -37,7 +37,7 @@ struct ac_sqtt { /* Only used by RadeonSI */ void *start_cs[2]; void *stop_cs[2]; - /* VkBuffer or struct pb_buffer */ + /* VkBuffer or struct si_resource */ void *bo; uint64_t buffer_va; void *ptr; diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index 9f0c5e6dc6d..3a90d56770e 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -670,17 +670,13 @@ void si_init_perfcounters(struct si_screen *screen) static bool si_spm_init_bo(struct si_context *sctx) { - struct radeon_winsys *ws = sctx->ws; uint64_t size = 32 * 1024 * 1024; /* Default to 32MB. */ sctx->spm.buffer_size = size; - sctx->spm.bo = ws->buffer_create( - ws, size, 4096, - RADEON_DOMAIN_GTT, - RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_GTT_WC | - RADEON_FLAG_NO_SUBALLOC); + sctx->spm.bo = + pipe_aligned_buffer_create(&sctx->screen->b, SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_STREAM, size, 4096); return sctx->spm.bo != NULL; } @@ -690,7 +686,7 @@ si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs) { const enum amd_ip_type ip_type = sctx->ws->cs_get_ip_type(cs); struct ac_spm *spm = &sctx->spm; - uint64_t va = sctx->screen->ws->buffer_get_virtual_address(spm->bo); + uint64_t va = si_resource(spm->bo)->gpu_address; ac_emit_spm_setup(&cs->current, sctx->gfx_level, ip_type, spm, va); } @@ -721,8 +717,8 @@ si_spm_init(struct si_context *sctx) void si_spm_finish(struct si_context *sctx) { - struct pb_buffer_lean *bo = sctx->spm.bo; - radeon_bo_reference(sctx->screen->ws, &bo, NULL); + struct pipe_resource *bo = sctx->spm.bo; + pipe_resource_reference(&bo, NULL); ac_destroy_spm(&sctx->spm); } diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c b/src/gallium/drivers/radeonsi/si_sqtt.c index 6e5d1d0bede..2cc8768c58c 100644 --- a/src/gallium/drivers/radeonsi/si_sqtt.c +++ b/src/gallium/drivers/radeonsi/si_sqtt.c @@ -23,7 +23,6 @@ si_emit_spi_config_cntl(struct si_context *sctx, static bool si_sqtt_init_bo(struct si_context *sctx) { unsigned max_se = sctx->screen->info.max_se; - struct radeon_winsys *ws = sctx->ws; uint64_t size; /* The buffer size and address need to be aligned in HW regs. Align the @@ -37,14 +36,16 @@ static bool si_sqtt_init_bo(struct si_context *sctx) 1ull << SQTT_BUFFER_ALIGN_SHIFT); size += sctx->sqtt->buffer_size * (uint64_t)max_se; + if (size > UINT32_MAX) + return false; + sctx->sqtt->bo = - ws->buffer_create(ws, size, 4096, RADEON_DOMAIN_GTT, - RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_SUBALLOC); + pipe_aligned_buffer_create(&sctx->screen->b, SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_STREAM, size, 4096); if (!sctx->sqtt->bo) return false; - sctx->sqtt->buffer_va = sctx->ws->buffer_get_virtual_address(sctx->sqtt->bo); + sctx->sqtt->buffer_va = si_resource(sctx->sqtt->bo)->gpu_address; return true; } @@ -120,10 +121,10 @@ static void si_sqtt_start(struct si_context *sctx, struct radeon_cmdbuf *cs) } radeon_end(); - ws->cs_add_buffer(cs, sctx->sqtt->bo, RADEON_USAGE_READWRITE, + ws->cs_add_buffer(cs, si_resource(sctx->sqtt->bo)->buf, RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); if (sctx->spm.bo) - ws->cs_add_buffer(cs, sctx->spm.bo, RADEON_USAGE_READWRITE, + ws->cs_add_buffer(cs, si_resource(sctx->spm.bo)->buf, RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); si_cp_dma_wait_for_idle(sctx, cs); @@ -175,11 +176,11 @@ static void si_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs) } radeon_end(); - ws->cs_add_buffer(cs, sctx->sqtt->bo, RADEON_USAGE_READWRITE, + ws->cs_add_buffer(cs, si_resource(sctx->sqtt->bo)->buf, RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); if (sctx->spm.bo) - ws->cs_add_buffer(cs, sctx->spm.bo, RADEON_USAGE_READWRITE, + ws->cs_add_buffer(cs, si_resource(sctx->spm.bo)->buf, RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); si_cp_dma_wait_for_idle(sctx, cs); @@ -258,19 +259,19 @@ static bool si_sqtt_resize_bo(struct si_context *sctx) { /* Destroy the previous thread trace BO. */ - struct pb_buffer_lean *bo = sctx->sqtt->bo; - radeon_bo_reference(sctx->screen->ws, &bo, NULL); + struct pipe_resource *bo = sctx->sqtt->bo; + pipe_resource_reference(&bo, NULL); sctx->sqtt->bo = NULL; if (sctx->sqtt->buffer_size < UINT32_MAX / 2) { /* Double the size of the thread trace buffer per SE. */ sctx->sqtt->buffer_size *= 2; mesa_loge("Failed to get the thread trace because the buffer " - "was too small, resizing to %d kB", + "was too small, resizing to %d kB per se", sctx->sqtt->buffer_size / 1024); } else { mesa_loge("Failed to get the thread trace because the buffer " - "was too small (%d kB). Cancelling trace capture.", + "was too small (%d kB per se). Cancelling trace capture.", sctx->sqtt->buffer_size / 1024); if (sctx->sqtt->instruction_timing_enabled) mesa_loge("Try again with AMD_THREAD_TRACE_INSTRUCTION_TIMING=false" @@ -288,7 +289,7 @@ static bool si_get_sqtt_trace(struct si_context *sctx, memset(sqtt, 0, sizeof(*sqtt)); sctx->sqtt->ptr = - sctx->ws->buffer_map(sctx->ws, sctx->sqtt->bo, NULL, PIPE_MAP_READ); + sctx->ws->buffer_map(sctx->ws, si_resource(sctx->sqtt->bo)->buf, NULL, PIPE_MAP_READ); if (!sctx->sqtt->ptr) return false; @@ -374,8 +375,8 @@ bool si_init_sqtt(struct si_context *sctx) void si_destroy_sqtt(struct si_context *sctx) { struct si_screen *sscreen = sctx->screen; - struct pb_buffer_lean *bo = sctx->sqtt->bo; - radeon_bo_reference(sctx->screen->ws, &bo, NULL); + struct pipe_resource *bo = sctx->sqtt->bo; + pipe_resource_reference(&bo, NULL); free(sctx->sqtt->trigger_file); @@ -489,7 +490,7 @@ void si_handle_sqtt(struct si_context *sctx, struct radeon_cmdbuf *rcs) /* Map the SPM counter buffer */ if (sctx->spm.bo) { sctx->spm.ptr = sctx->ws->buffer_map( - sctx->ws, sctx->spm.bo, NULL, PIPE_MAP_READ | RADEON_MAP_TEMPORARY); + sctx->ws, si_resource(sctx->spm.bo)->buf, NULL, PIPE_MAP_READ | RADEON_MAP_TEMPORARY); ac_spm_get_trace(&sctx->spm, &spm_trace); } @@ -497,7 +498,7 @@ void si_handle_sqtt(struct si_context *sctx, struct radeon_cmdbuf *rcs) sctx->spm.bo ? &spm_trace : NULL); if (sctx->spm.ptr) - sctx->ws->buffer_unmap(sctx->ws, sctx->spm.bo); + sctx->ws->buffer_unmap(sctx->ws, si_resource(sctx->spm.bo)->buf); } else if (sctx->sqtt->bo) { if (!sctx->sqtt->trigger_file) { sctx->sqtt->start_frame = num_frames + 10;