panfrost: Add CPU traces

A few function scope traces are added to instrument blits, clears,
flushes, BOs and resources handling, shader compilation and cache,
draws, compute shader job emissions and AFBC packing.

Give the panfrost_flush_all_batches() call from panfrost_flush() a
more specific reason ("Gallium flush") to improve traces.

Signed-off-by: Loïc Molinari <loic.molinari@collabora.com>
Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Reviewed-by: Benjamin Lee <benjamin.lee@collabora.com>
Acked-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34385>
This commit is contained in:
Loïc Molinari 2025-03-06 17:11:01 +01:00 committed by Marge Bot
parent a7727f692f
commit 5cd89d48ee
10 changed files with 74 additions and 3 deletions

View file

@ -28,6 +28,7 @@
*/
#include "util/format/u_format.h"
#include "util/perf/cpu_trace.h"
#include "pan_context.h"
#include "pan_resource.h"
#include "pan_util.h"
@ -85,6 +86,8 @@ void
panfrost_blit_no_afbc_legalization(struct pipe_context *pipe,
const struct pipe_blit_info *info)
{
MESA_TRACE_FUNC();
struct panfrost_context *ctx = pan_context(pipe);
panfrost_blitter_save(ctx, info->render_condition_enable
@ -96,6 +99,8 @@ panfrost_blit_no_afbc_legalization(struct pipe_context *pipe,
void
panfrost_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
{
MESA_TRACE_FUNC();
struct panfrost_context *ctx = pan_context(pipe);
if (info->render_condition_enable && !panfrost_render_condition_check(ctx))

View file

@ -38,6 +38,7 @@
#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/perf/cpu_trace.h"
/* This file implements a userspace BO cache. Allocating and freeing
* GPU-visible buffers is very expensive, and even the extra kernel roundtrips
@ -124,6 +125,8 @@ err_alloc:
static void
panfrost_bo_free(struct panfrost_bo *bo)
{
MESA_TRACE_FUNC();
struct pan_kmod_bo *kmod_bo = bo->kmod_bo;
struct pan_kmod_vm *vm = bo->dev->kmod.vm;
uint64_t gpu_va = bo->ptr.gpu;
@ -155,6 +158,8 @@ panfrost_bo_free(struct panfrost_bo *bo)
bool
panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers)
{
MESA_TRACE_FUNC();
/* If the BO has been exported or imported we can't rely on the cached
* state, we need to call the WAIT_BO ioctl.
*/
@ -336,6 +341,8 @@ panfrost_bo_cache_evict_all(struct panfrost_device *dev)
int
panfrost_bo_mmap(struct panfrost_bo *bo)
{
MESA_TRACE_FUNC();
if (bo->ptr.cpu)
return 0;
@ -352,6 +359,8 @@ panfrost_bo_mmap(struct panfrost_bo *bo)
static void
panfrost_bo_munmap(struct panfrost_bo *bo)
{
MESA_TRACE_FUNC();
if (!bo->ptr.cpu)
return;
@ -367,6 +376,9 @@ struct panfrost_bo *
panfrost_bo_create(struct panfrost_device *dev, size_t size, uint32_t flags,
const char *label)
{
assert(label);
MESA_TRACE_SCOPE("%s size=%zu label=\"%s\"", __func__, size, label);
struct panfrost_bo *bo;
if (dev->debug & PAN_DBG_DUMP) {

View file

@ -35,6 +35,7 @@
#include "util/u_sample_positions.h"
#include "util/u_vbuf.h"
#include "util/u_viewport.h"
#include "util/perf/cpu_trace.h"
#include "decode.h"
@ -3291,6 +3292,8 @@ panfrost_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,
const struct pipe_draw_start_count_bias *draws,
unsigned num_draws)
{
MESA_TRACE_FUNC();
struct panfrost_context *ctx = pan_context(pipe);
if (!panfrost_render_condition_check(ctx))
@ -3431,6 +3434,8 @@ panfrost_afbc_size(struct panfrost_batch *batch, struct panfrost_resource *src,
struct panfrost_bo *metadata, unsigned offset,
unsigned level)
{
MESA_TRACE_FUNC();
struct pan_image_slice_layout *slice = &src->image.layout.slices[level];
struct panfrost_afbc_size_info consts = {
.src =
@ -3451,6 +3456,8 @@ panfrost_afbc_pack(struct panfrost_batch *batch, struct panfrost_resource *src,
struct panfrost_bo *metadata, unsigned metadata_offset,
unsigned level)
{
MESA_TRACE_FUNC();
struct pan_image_slice_layout *src_slice = &src->image.layout.slices[level];
struct panfrost_afbc_pack_info consts = {
.src = src->image.data.base + src->image.data.offset +
@ -3472,6 +3479,8 @@ panfrost_afbc_pack(struct panfrost_batch *batch, struct panfrost_resource *src,
static void
panfrost_mtk_detile_compute(struct panfrost_context *ctx, struct pipe_blit_info *info)
{
MESA_TRACE_FUNC();
struct pipe_context *pipe = &ctx->base;
struct pipe_resource *y_src = info->src.resource;
struct pipe_resource *uv_src = y_src->next;

View file

@ -47,6 +47,7 @@
#include "util/u_surface.h"
#include "util/u_upload_mgr.h"
#include "util/u_vbuf.h"
#include "util/perf/cpu_trace.h"
#include "clc/panfrost_compile.h"
#include "compiler/nir/nir_serialize.h"
@ -63,6 +64,8 @@ panfrost_clear(struct pipe_context *pipe, unsigned buffers,
const union pipe_color_union *color, double depth,
unsigned stencil)
{
MESA_TRACE_FUNC();
if (!panfrost_render_condition_check(pan_context(pipe)))
return;
@ -106,11 +109,13 @@ void
panfrost_flush(struct pipe_context *pipe, struct pipe_fence_handle **fence,
unsigned flags)
{
MESA_TRACE_FUNC();
struct panfrost_context *ctx = pan_context(pipe);
struct panfrost_device *dev = pan_device(pipe->screen);
/* Submit all pending jobs */
panfrost_flush_all_batches(ctx, NULL);
panfrost_flush_all_batches(ctx, "Gallium flush");
if (fence) {
struct pipe_fence_handle *f = panfrost_fence_create(ctx);

View file

@ -74,6 +74,8 @@ panfrost_disk_cache_store(struct disk_cache *cache,
if (!cache)
return;
MESA_TRACE_FUNC();
cache_key cache_key;
panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);
@ -116,6 +118,8 @@ panfrost_disk_cache_retrieve(struct disk_cache *cache,
if (!cache)
return false;
MESA_TRACE_FUNC();
cache_key cache_key;
panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);

View file

@ -32,6 +32,7 @@
#include "util/rounding.h"
#include "util/u_framebuffer.h"
#include "util/u_pack_color.h"
#include "util/perf/cpu_trace.h"
#include "pan_bo.h"
#include "pan_context.h"
#include "pan_util.h"
@ -656,6 +657,8 @@ static void
panfrost_batch_submit(struct panfrost_context *ctx,
struct panfrost_batch *batch)
{
MESA_TRACE_FUNC();
struct pipe_screen *pscreen = ctx->base.screen;
struct panfrost_screen *screen = pan_screen(pscreen);
bool has_frag = panfrost_has_fragment_job(batch);
@ -724,8 +727,9 @@ out:
void
panfrost_flush_all_batches(struct panfrost_context *ctx, const char *reason)
{
if (reason)
perf_debug(ctx, "Flushing everything due to: %s", reason);
assert(reason);
MESA_TRACE_SCOPE("%s reason=\"%s\"", __func__, reason);
perf_debug(ctx, "Flushing everything due to: %s", reason);
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
if (!batch)
@ -743,6 +747,9 @@ void
panfrost_flush_writer(struct panfrost_context *ctx,
struct panfrost_resource *rsrc, const char *reason)
{
assert(reason);
MESA_TRACE_SCOPE("%s reason=\"%s\"", __func__, reason);
struct hash_entry *entry = _mesa_hash_table_search(ctx->writers, rsrc);
if (entry) {
@ -756,6 +763,9 @@ panfrost_flush_batches_accessing_rsrc(struct panfrost_context *ctx,
struct panfrost_resource *rsrc,
const char *reason)
{
assert(reason);
MESA_TRACE_SCOPE("%s reason=\"%s\"", __func__, reason);
unsigned i;
foreach_batch(ctx, i) {
struct panfrost_batch *batch = &ctx->batches.slots[i];

View file

@ -45,6 +45,7 @@
#include "util/u_surface.h"
#include "util/u_transfer.h"
#include "util/u_transfer_helper.h"
#include "util/perf/cpu_trace.h"
#include "decode.h"
#include "pan_bo.h"
@ -722,6 +723,8 @@ panfrost_resource_create_with_modifier(struct pipe_screen *screen,
const struct pipe_resource *template,
uint64_t modifier)
{
MESA_TRACE_FUNC();
struct panfrost_device *dev = pan_device(screen);
struct panfrost_resource *so = CALLOC_STRUCT(panfrost_resource);
@ -900,6 +903,8 @@ panfrost_resource_create_with_modifiers(struct pipe_screen *screen,
static void
panfrost_resource_destroy(struct pipe_screen *screen, struct pipe_resource *pt)
{
MESA_TRACE_FUNC();
struct panfrost_device *dev = pan_device(screen);
struct panfrost_resource *rsrc = (struct panfrost_resource *)pt;
@ -1212,6 +1217,8 @@ panfrost_ptr_map(struct pipe_context *pctx, struct pipe_resource *resource,
const struct pipe_box *box,
struct pipe_transfer **out_transfer)
{
MESA_TRACE_FUNC();
struct panfrost_context *ctx = pan_context(pctx);
struct panfrost_device *dev = pan_device(pctx->screen);
struct panfrost_resource *rsrc = pan_resource(resource);
@ -1448,6 +1455,8 @@ pan_resource_modifier_convert(struct panfrost_context *ctx,
struct panfrost_resource *rsrc, uint64_t modifier,
bool copy_resource, const char *reason)
{
MESA_TRACE_FUNC();
bool need_shadow = rsrc->modifier_constant;
assert(!rsrc->modifier_constant || copy_resource);
@ -1683,6 +1692,8 @@ void
panfrost_pack_afbc(struct panfrost_context *ctx,
struct panfrost_resource *prsrc)
{
MESA_TRACE_FUNC();
struct panfrost_screen *screen = pan_screen(ctx->base.screen);
struct panfrost_device *dev = pan_device(ctx->base.screen);
struct panfrost_bo *metadata_bo;
@ -1811,6 +1822,8 @@ panfrost_pack_afbc(struct panfrost_context *ctx,
static void
panfrost_ptr_unmap(struct pipe_context *pctx, struct pipe_transfer *transfer)
{
MESA_TRACE_FUNC();
/* Gallium expects writeback here, so we tile */
struct panfrost_context *ctx = pan_context(pctx);

View file

@ -38,6 +38,7 @@
#include "util/u_screen.h"
#include "util/u_video.h"
#include "util/xmlconfig.h"
#include "util/perf/cpu_trace.h"
#include <fcntl.h>
@ -819,6 +820,8 @@ panfrost_create_screen(int fd, const struct pipe_screen_config *config,
screen->max_afbc_packing_ratio = debug_get_num_option(
"PAN_MAX_AFBC_PACKING_RATIO", DEFAULT_MAX_AFBC_PACKING_RATIO);
util_cpu_trace_init();
if (panfrost_open_device(screen, fd, dev)) {
ralloc_free(screen);
return NULL;

View file

@ -32,6 +32,7 @@
#include "nir/tgsi_to_nir.h"
#include "util/u_memory.h"
#include "util/u_prim.h"
#include "util/perf/cpu_trace.h"
#include "nir_builder.h"
#include "nir_serialize.h"
#include "pan_bo.h"
@ -115,6 +116,8 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
unsigned fixed_varying_mask,
struct panfrost_shader_binary *out)
{
MESA_TRACE_FUNC();
struct panfrost_device *dev = pan_device(&screen->base);
nir_shader *s = nir_shader_clone(NULL, ir);
@ -452,6 +455,8 @@ static void *
panfrost_create_shader_state(struct pipe_context *pctx,
const struct pipe_shader_state *cso)
{
MESA_TRACE_FUNC();
nir_shader *nir = (cso->type == PIPE_SHADER_IR_TGSI)
? tgsi_to_nir(cso->tokens, pctx->screen, false)
: cso->ir.nir;

View file

@ -29,6 +29,7 @@
#include "compiler/glsl_types.h"
#include "compiler/nir/nir_builder.h"
#include "panfrost/util/pan_ir.h"
#include "util/perf/cpu_trace.h"
#include "util/u_debug.h"
#include "bifrost/disassemble.h"
@ -5684,6 +5685,8 @@ bi_lower_ldexp16(nir_builder *b, nir_alu_instr *alu, UNUSED void *data)
void
bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
{
MESA_TRACE_FUNC();
/* Ensure that halt are translated to returns and get ride of them */
NIR_PASS(_, nir, nir_shader_instructions_pass, bi_lower_halt_to_return,
nir_metadata_all, NULL);
@ -6254,6 +6257,8 @@ bifrost_compile_shader_nir(nir_shader *nir,
struct util_dynarray *binary,
struct pan_shader_info *info)
{
MESA_TRACE_FUNC();
bifrost_debug = debug_get_option_bifrost_debug();
/* Combine stores late, to give the driver a chance to lower dual-source