mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-19 08:00:36 +02:00
broadcom/vc4: Add support for HW perfmon
The V3D engine provides several perf counters. Implement ->get_driver_query_[group_]info() so that these counters are exposed through the GL_AMD_performance_monitor extension. Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com> Signed-off-by: Eric Anholt <eric@anholt.net>
This commit is contained in:
parent
5924379a58
commit
9ea90ffb98
5 changed files with 252 additions and 15 deletions
|
|
@ -219,6 +219,13 @@ struct vc4_job_key {
|
||||||
struct pipe_surface *zsbuf;
|
struct pipe_surface *zsbuf;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct vc4_hwperfmon {
|
||||||
|
uint32_t id;
|
||||||
|
uint64_t last_seqno;
|
||||||
|
uint8_t events[DRM_VC4_MAX_PERF_COUNTERS];
|
||||||
|
uint64_t counters[DRM_VC4_MAX_PERF_COUNTERS];
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A complete bin/render job.
|
* A complete bin/render job.
|
||||||
*
|
*
|
||||||
|
|
@ -309,6 +316,9 @@ struct vc4_job {
|
||||||
/** Any flags to be passed in drm_vc4_submit_cl.flags. */
|
/** Any flags to be passed in drm_vc4_submit_cl.flags. */
|
||||||
uint32_t flags;
|
uint32_t flags;
|
||||||
|
|
||||||
|
/* Performance monitor attached to this job. */
|
||||||
|
struct vc4_hwperfmon *perfmon;
|
||||||
|
|
||||||
struct vc4_job_key key;
|
struct vc4_job_key key;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -390,6 +400,8 @@ struct vc4_context {
|
||||||
struct pipe_viewport_state viewport;
|
struct pipe_viewport_state viewport;
|
||||||
struct vc4_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
|
struct vc4_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
|
||||||
struct vc4_vertexbuf_stateobj vertexbuf;
|
struct vc4_vertexbuf_stateobj vertexbuf;
|
||||||
|
|
||||||
|
struct vc4_hwperfmon *perfmon;
|
||||||
/** @} */
|
/** @} */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -447,6 +459,12 @@ vc4_sampler_state(struct pipe_sampler_state *psampler)
|
||||||
return (struct vc4_sampler_state *)psampler;
|
return (struct vc4_sampler_state *)psampler;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int vc4_get_driver_query_group_info(struct pipe_screen *pscreen,
|
||||||
|
unsigned index,
|
||||||
|
struct pipe_driver_query_group_info *info);
|
||||||
|
int vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
|
||||||
|
struct pipe_driver_query_info *info);
|
||||||
|
|
||||||
struct pipe_context *vc4_context_create(struct pipe_screen *pscreen,
|
struct pipe_context *vc4_context_create(struct pipe_screen *pscreen,
|
||||||
void *priv, unsigned flags);
|
void *priv, unsigned flags);
|
||||||
void vc4_draw_init(struct pipe_context *pctx);
|
void vc4_draw_init(struct pipe_context *pctx);
|
||||||
|
|
|
||||||
|
|
@ -92,6 +92,9 @@ vc4_job_create(struct vc4_context *vc4)
|
||||||
|
|
||||||
job->last_gem_handle_hindex = ~0;
|
job->last_gem_handle_hindex = ~0;
|
||||||
|
|
||||||
|
if (vc4->perfmon)
|
||||||
|
job->perfmon = vc4->perfmon;
|
||||||
|
|
||||||
return job;
|
return job;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -455,6 +458,8 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
|
||||||
submit.shader_rec_count = job->shader_rec_count;
|
submit.shader_rec_count = job->shader_rec_count;
|
||||||
submit.uniforms = (uintptr_t)job->uniforms.base;
|
submit.uniforms = (uintptr_t)job->uniforms.base;
|
||||||
submit.uniforms_size = cl_offset(&job->uniforms);
|
submit.uniforms_size = cl_offset(&job->uniforms);
|
||||||
|
if (job->perfmon)
|
||||||
|
submit.perfmonid = job->perfmon->id;
|
||||||
|
|
||||||
assert(job->draw_min_x != ~0 && job->draw_min_y != ~0);
|
assert(job->draw_min_x != ~0 && job->draw_min_y != ~0);
|
||||||
submit.min_x_tile = job->draw_min_x / job->tile_width;
|
submit.min_x_tile = job->draw_min_x / job->tile_width;
|
||||||
|
|
@ -487,6 +492,8 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
|
||||||
warned = true;
|
warned = true;
|
||||||
} else if (!ret) {
|
} else if (!ret) {
|
||||||
vc4->last_emit_seqno = submit.seqno;
|
vc4->last_emit_seqno = submit.seqno;
|
||||||
|
if (job->perfmon)
|
||||||
|
job->perfmon->last_seqno = submit.seqno;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -22,8 +22,9 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stub support for occlusion queries.
|
* Expose V3D HW perf counters.
|
||||||
*
|
*
|
||||||
|
* We also have code to fake support for occlusion queries.
|
||||||
* Since we expose support for GL 2.0, we have to expose occlusion queries,
|
* Since we expose support for GL 2.0, we have to expose occlusion queries,
|
||||||
* but the spec allows you to expose 0 query counter bits, so we just return 0
|
* but the spec allows you to expose 0 query counter bits, so we just return 0
|
||||||
* as the result of all our queries.
|
* as the result of all our queries.
|
||||||
|
|
@ -32,49 +33,252 @@
|
||||||
|
|
||||||
struct vc4_query
|
struct vc4_query
|
||||||
{
|
{
|
||||||
uint8_t pad;
|
unsigned num_queries;
|
||||||
|
struct vc4_hwperfmon *hwperfmon;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const char *v3d_counter_names[] = {
|
||||||
|
"FEP-valid-primitives-no-rendered-pixels",
|
||||||
|
"FEP-valid-primitives-rendered-pixels",
|
||||||
|
"FEP-clipped-quads",
|
||||||
|
"FEP-valid-quads",
|
||||||
|
"TLB-quads-not-passing-stencil-test",
|
||||||
|
"TLB-quads-not-passing-z-and-stencil-test",
|
||||||
|
"TLB-quads-passing-z-and-stencil-test",
|
||||||
|
"TLB-quads-with-zero-coverage",
|
||||||
|
"TLB-quads-with-non-zero-coverage",
|
||||||
|
"TLB-quads-written-to-color-buffer",
|
||||||
|
"PTB-primitives-discarded-outside-viewport",
|
||||||
|
"PTB-primitives-need-clipping",
|
||||||
|
"PTB-primitives-discared-reversed",
|
||||||
|
"QPU-total-idle-clk-cycles",
|
||||||
|
"QPU-total-clk-cycles-vertex-coord-shading",
|
||||||
|
"QPU-total-clk-cycles-fragment-shading",
|
||||||
|
"QPU-total-clk-cycles-executing-valid-instr",
|
||||||
|
"QPU-total-clk-cycles-waiting-TMU",
|
||||||
|
"QPU-total-clk-cycles-waiting-scoreboard",
|
||||||
|
"QPU-total-clk-cycles-waiting-varyings",
|
||||||
|
"QPU-total-instr-cache-hit",
|
||||||
|
"QPU-total-instr-cache-miss",
|
||||||
|
"QPU-total-uniform-cache-hit",
|
||||||
|
"QPU-total-uniform-cache-miss",
|
||||||
|
"TMU-total-text-quads-processed",
|
||||||
|
"TMU-total-text-cache-miss",
|
||||||
|
"VPM-total-clk-cycles-VDW-stalled",
|
||||||
|
"VPM-total-clk-cycles-VCD-stalled",
|
||||||
|
"L2C-total-cache-hit",
|
||||||
|
"L2C-total-cache-miss",
|
||||||
|
};
|
||||||
|
|
||||||
|
int vc4_get_driver_query_group_info(struct pipe_screen *pscreen,
|
||||||
|
unsigned index,
|
||||||
|
struct pipe_driver_query_group_info *info)
|
||||||
|
{
|
||||||
|
struct vc4_screen *screen = vc4_screen(pscreen);
|
||||||
|
|
||||||
|
if (!screen->has_perfmon_ioctl)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (!info)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (index > 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
info->name = "V3D counters";
|
||||||
|
info->max_active_queries = DRM_VC4_MAX_PERF_COUNTERS;
|
||||||
|
info->num_queries = ARRAY_SIZE(v3d_counter_names);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
|
||||||
|
struct pipe_driver_query_info *info)
|
||||||
|
{
|
||||||
|
struct vc4_screen *screen = vc4_screen(pscreen);
|
||||||
|
|
||||||
|
if (!screen->has_perfmon_ioctl)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (!info)
|
||||||
|
return ARRAY_SIZE(v3d_counter_names);
|
||||||
|
|
||||||
|
if (index >= ARRAY_SIZE(v3d_counter_names))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
info->group_id = 0;
|
||||||
|
info->name = v3d_counter_names[index];
|
||||||
|
info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
|
||||||
|
info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
|
||||||
|
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
|
||||||
|
info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct pipe_query *
|
||||||
|
vc4_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
|
||||||
|
unsigned *query_types)
|
||||||
|
{
|
||||||
|
struct vc4_query *query = calloc(1, sizeof(*query));
|
||||||
|
struct vc4_hwperfmon *hwperfmon;
|
||||||
|
unsigned i, nhwqueries = 0;
|
||||||
|
|
||||||
|
if (!query)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
for (i = 0; i < num_queries; i++) {
|
||||||
|
if (query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC)
|
||||||
|
nhwqueries++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We can't mix HW and non-HW queries. */
|
||||||
|
if (nhwqueries && nhwqueries != num_queries)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (!nhwqueries)
|
||||||
|
return (struct pipe_query *)query;
|
||||||
|
|
||||||
|
hwperfmon = calloc(1, sizeof(*hwperfmon));
|
||||||
|
if (!hwperfmon)
|
||||||
|
goto err_free_query;
|
||||||
|
|
||||||
|
for (i = 0; i < num_queries; i++)
|
||||||
|
hwperfmon->events[i] = query_types[i] -
|
||||||
|
PIPE_QUERY_DRIVER_SPECIFIC;
|
||||||
|
|
||||||
|
query->hwperfmon = hwperfmon;
|
||||||
|
query->num_queries = num_queries;
|
||||||
|
|
||||||
|
/* Note that struct pipe_query isn't actually defined anywhere. */
|
||||||
|
return (struct pipe_query *)query;
|
||||||
|
|
||||||
|
err_free_query:
|
||||||
|
free(query);
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static struct pipe_query *
|
static struct pipe_query *
|
||||||
vc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
|
vc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
|
||||||
{
|
{
|
||||||
struct vc4_query *query = calloc(1, sizeof(*query));
|
return vc4_create_batch_query(ctx, 1, &query_type);
|
||||||
|
|
||||||
/* Note that struct pipe_query isn't actually defined anywhere. */
|
|
||||||
return (struct pipe_query *)query;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
vc4_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
|
vc4_destroy_query(struct pipe_context *pctx, struct pipe_query *pquery)
|
||||||
{
|
{
|
||||||
|
struct vc4_context *ctx = vc4_context(pctx);
|
||||||
|
struct vc4_query *query = (struct vc4_query *)pquery;
|
||||||
|
|
||||||
|
if (query->hwperfmon && query->hwperfmon->id) {
|
||||||
|
if (query->hwperfmon->id) {
|
||||||
|
struct drm_vc4_perfmon_destroy req = { };
|
||||||
|
|
||||||
|
req.id = query->hwperfmon->id;
|
||||||
|
vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY,
|
||||||
|
&req);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(query->hwperfmon);
|
||||||
|
}
|
||||||
|
|
||||||
free(query);
|
free(query);
|
||||||
}
|
}
|
||||||
|
|
||||||
static boolean
|
static boolean
|
||||||
vc4_begin_query(struct pipe_context *ctx, struct pipe_query *query)
|
vc4_begin_query(struct pipe_context *pctx, struct pipe_query *pquery)
|
||||||
{
|
{
|
||||||
|
struct vc4_query *query = (struct vc4_query *)pquery;
|
||||||
|
struct vc4_context *ctx = vc4_context(pctx);
|
||||||
|
struct drm_vc4_perfmon_create req = { };
|
||||||
|
unsigned i;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!query->hwperfmon)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* Only one perfmon can be activated per context. */
|
||||||
|
if (ctx->perfmon)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Reset the counters by destroying the previously allocated perfmon */
|
||||||
|
if (query->hwperfmon->id) {
|
||||||
|
struct drm_vc4_perfmon_destroy destroyreq = { };
|
||||||
|
|
||||||
|
destroyreq.id = query->hwperfmon->id;
|
||||||
|
vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, &destroyreq);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < query->num_queries; i++)
|
||||||
|
req.events[i] = query->hwperfmon->events[i];
|
||||||
|
|
||||||
|
req.ncounters = query->num_queries;
|
||||||
|
ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_CREATE, &req);
|
||||||
|
if (ret)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
query->hwperfmon->id = req.id;
|
||||||
|
|
||||||
|
/* Make sure all pendings jobs are flushed before activating the
|
||||||
|
* perfmon.
|
||||||
|
*/
|
||||||
|
vc4_flush(pctx);
|
||||||
|
ctx->perfmon = query->hwperfmon;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
vc4_end_query(struct pipe_context *ctx, struct pipe_query *query)
|
vc4_end_query(struct pipe_context *pctx, struct pipe_query *pquery)
|
||||||
{
|
{
|
||||||
|
struct vc4_query *query = (struct vc4_query *)pquery;
|
||||||
|
struct vc4_context *ctx = vc4_context(pctx);
|
||||||
|
|
||||||
|
if (!query->hwperfmon)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (ctx->perfmon != query->hwperfmon)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Make sure all pendings jobs are flushed before deactivating the
|
||||||
|
* perfmon.
|
||||||
|
*/
|
||||||
|
vc4_flush(pctx);
|
||||||
|
ctx->perfmon = NULL;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static boolean
|
static boolean
|
||||||
vc4_get_query_result(struct pipe_context *ctx, struct pipe_query *query,
|
vc4_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery,
|
||||||
boolean wait, union pipe_query_result *vresult)
|
boolean wait, union pipe_query_result *vresult)
|
||||||
{
|
{
|
||||||
uint64_t *result = &vresult->u64;
|
struct vc4_context *ctx = vc4_context(pctx);
|
||||||
|
struct vc4_query *query = (struct vc4_query *)pquery;
|
||||||
|
struct drm_vc4_perfmon_get_values req;
|
||||||
|
unsigned i;
|
||||||
|
int ret;
|
||||||
|
|
||||||
*result = 0;
|
if (!query->hwperfmon) {
|
||||||
|
vresult->u64 = 0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!vc4_wait_seqno(ctx->screen, query->hwperfmon->last_seqno,
|
||||||
|
wait ? PIPE_TIMEOUT_INFINITE : 0, "perfmon"))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
req.id = query->hwperfmon->id;
|
||||||
|
req.values_ptr = (uintptr_t)query->hwperfmon->counters;
|
||||||
|
ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_GET_VALUES, &req);
|
||||||
|
if (ret)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (i = 0; i < query->num_queries; i++)
|
||||||
|
vresult->batch[i].u64 = query->hwperfmon->counters[i];
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
vc4_set_active_query_state(struct pipe_context *pipe, boolean enable)
|
vc4_set_active_query_state(struct pipe_context *pctx, boolean enable)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -82,10 +286,10 @@ void
|
||||||
vc4_query_init(struct pipe_context *pctx)
|
vc4_query_init(struct pipe_context *pctx)
|
||||||
{
|
{
|
||||||
pctx->create_query = vc4_create_query;
|
pctx->create_query = vc4_create_query;
|
||||||
|
pctx->create_batch_query = vc4_create_batch_query;
|
||||||
pctx->destroy_query = vc4_destroy_query;
|
pctx->destroy_query = vc4_destroy_query;
|
||||||
pctx->begin_query = vc4_begin_query;
|
pctx->begin_query = vc4_begin_query;
|
||||||
pctx->end_query = vc4_end_query;
|
pctx->end_query = vc4_end_query;
|
||||||
pctx->get_query_result = vc4_get_query_result;
|
pctx->get_query_result = vc4_get_query_result;
|
||||||
pctx->set_active_query_state = vc4_set_active_query_state;
|
pctx->set_active_query_state = vc4_set_active_query_state;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -680,6 +680,8 @@ vc4_screen_create(int fd, struct renderonly *ro)
|
||||||
vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_THREADED_FS);
|
vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_THREADED_FS);
|
||||||
screen->has_madvise =
|
screen->has_madvise =
|
||||||
vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_MADVISE);
|
vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_MADVISE);
|
||||||
|
screen->has_perfmon_ioctl =
|
||||||
|
vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_PERFMON);
|
||||||
|
|
||||||
if (!vc4_get_chip_info(screen))
|
if (!vc4_get_chip_info(screen))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
@ -706,6 +708,11 @@ vc4_screen_create(int fd, struct renderonly *ro)
|
||||||
pscreen->get_compiler_options = vc4_screen_get_compiler_options;
|
pscreen->get_compiler_options = vc4_screen_get_compiler_options;
|
||||||
pscreen->query_dmabuf_modifiers = vc4_screen_query_dmabuf_modifiers;
|
pscreen->query_dmabuf_modifiers = vc4_screen_query_dmabuf_modifiers;
|
||||||
|
|
||||||
|
if (screen->has_perfmon_ioctl) {
|
||||||
|
pscreen->get_driver_query_group_info = vc4_get_driver_query_group_info;
|
||||||
|
pscreen->get_driver_query_info = vc4_get_driver_query_info;
|
||||||
|
}
|
||||||
|
|
||||||
return pscreen;
|
return pscreen;
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
|
|
|
||||||
|
|
@ -97,6 +97,7 @@ struct vc4_screen {
|
||||||
bool has_threaded_fs;
|
bool has_threaded_fs;
|
||||||
bool has_madvise;
|
bool has_madvise;
|
||||||
bool has_tiling_ioctl;
|
bool has_tiling_ioctl;
|
||||||
|
bool has_perfmon_ioctl;
|
||||||
|
|
||||||
struct vc4_simulator_file *sim_file;
|
struct vc4_simulator_file *sim_file;
|
||||||
};
|
};
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue