mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-22 12:50:35 +01:00
i965: Add support for GL_AMD_performance_monitor on Ironlake.
Ironlake's counters are always enabled; userspace can simply send a
MI_REPORT_PERF_COUNT packet to take a snapshot of them. This makes it
easy to implement.
The counters are documented in the source code for the intel-gpu-tools
intel_perf_counters utility.
v2: Adjust for core data structure changes. Add a table mapping buffer
object offsets to exposed counters (which changes each generation).
Finally, add report ID assertions to sanity check the BO layout
(thanks to Carl Worth).
v3: Update for core BeginPerfMonitor hook changes (requested by Brian).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
This commit is contained in:
parent
b2e327e08f
commit
0f2da77307
6 changed files with 420 additions and 0 deletions
|
|
@ -69,6 +69,7 @@ i965_FILES = \
|
|||
brw_lower_texture_gradients.cpp \
|
||||
brw_misc_state.c \
|
||||
brw_object_purgeable.c \
|
||||
brw_performance_monitor.c \
|
||||
brw_program.c \
|
||||
brw_primitive_restart.c \
|
||||
brw_queryobj.c \
|
||||
|
|
|
|||
|
|
@ -503,6 +503,10 @@ brwCreateContext(int api,
|
|||
_mesa_initialize_dispatch_tables(ctx);
|
||||
_mesa_initialize_vbo_vtxfmt(ctx);
|
||||
|
||||
if (ctx->Extensions.AMD_performance_monitor) {
|
||||
brw_init_performance_monitors(brw);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -128,6 +128,7 @@ struct brw_vs_prog_key;
|
|||
struct brw_vec4_prog_key;
|
||||
struct brw_wm_prog_key;
|
||||
struct brw_wm_prog_data;
|
||||
struct brw_perf_bo_layout;
|
||||
|
||||
enum brw_state_id {
|
||||
BRW_STATE_URB_FENCE,
|
||||
|
|
@ -1313,6 +1314,16 @@ struct brw_context
|
|||
bool begin_emitted;
|
||||
} query;
|
||||
|
||||
struct {
|
||||
/* A map describing which counters are stored at a particular 32-bit
|
||||
* offset in the buffer object.
|
||||
*/
|
||||
const struct brw_perf_bo_layout *bo_layout;
|
||||
|
||||
/* Number of 32-bit entries in the buffer object. */
|
||||
int entries_in_bo;
|
||||
} perfmon;
|
||||
|
||||
int num_atoms;
|
||||
const struct brw_tracked_state **atoms;
|
||||
|
||||
|
|
@ -1485,6 +1496,9 @@ bool brw_is_hiz_depth_format(struct brw_context *ctx, gl_format format);
|
|||
bool brw_render_target_supported(struct brw_context *brw,
|
||||
struct gl_renderbuffer *rb);
|
||||
|
||||
/* brw_performance_monitor.c */
|
||||
void brw_init_performance_monitors(struct brw_context *brw);
|
||||
|
||||
/* gen6_sol.c */
|
||||
void
|
||||
brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
|
||||
|
|
|
|||
|
|
@ -1817,6 +1817,13 @@ enum brw_wm_barycentric_interp_mode {
|
|||
|
||||
#define CMD_MI_FLUSH 0x0200
|
||||
|
||||
#define GEN5_MI_REPORT_PERF_COUNT ((0x26 << 23) | (3 - 2))
|
||||
/* DW0 */
|
||||
# define GEN5_MI_COUNTER_SET_0 (0 << 6)
|
||||
# define GEN5_MI_COUNTER_SET_1 (1 << 6)
|
||||
/* DW1 */
|
||||
# define MI_COUNTER_ADDRESS_GTT (1 << 0)
|
||||
/* DW2: a user-defined report ID (written to the buffer but can be anything) */
|
||||
|
||||
/* Bitfields for the URB_WRITE message, DW2 of message header: */
|
||||
#define URB_WRITE_PRIM_END 0x1
|
||||
|
|
|
|||
391
src/mesa/drivers/dri/i965/brw_performance_monitor.c
Normal file
391
src/mesa/drivers/dri/i965/brw_performance_monitor.c
Normal file
|
|
@ -0,0 +1,391 @@
|
|||
/*
|
||||
* Copyright © 2012 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file brw_performance_monitor.c
|
||||
*
|
||||
* Implementation of the GL_AMD_performance_monitor extension.
|
||||
*
|
||||
* Currently only for Ironlake.
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#include "main/bitset.h"
|
||||
#include "main/macros.h"
|
||||
#include "main/mtypes.h"
|
||||
#include "main/performance_monitor.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_defines.h"
|
||||
#include "intel_batchbuffer.h"
|
||||
|
||||
/**
|
||||
* i965 representation of a performance monitor object.
|
||||
*/
|
||||
struct brw_perf_monitor_object
|
||||
{
|
||||
/** The base class. */
|
||||
struct gl_perf_monitor_object base;
|
||||
|
||||
/**
|
||||
* BO containing raw counter data in a hardware specific form.
|
||||
*/
|
||||
drm_intel_bo *bo;
|
||||
};
|
||||
|
||||
/** Downcasting convenience macro. */
|
||||
static inline struct brw_perf_monitor_object *
|
||||
brw_perf_monitor(struct gl_perf_monitor_object *m)
|
||||
{
|
||||
return (struct brw_perf_monitor_object *) m;
|
||||
}
|
||||
|
||||
#define SECOND_SNAPSHOT_OFFSET_IN_BYTES 2048
|
||||
|
||||
/* Two random values used to ensure we're getting valid snapshots. */
|
||||
#define FIRST_SNAPSHOT_REPORT_ID 0xd2e9c607
|
||||
#define SECOND_SNAPSHOT_REPORT_ID 0xad584b1d
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
#define COUNTER(name) \
|
||||
{ \
|
||||
.Name = name, \
|
||||
.Type = GL_UNSIGNED_INT, \
|
||||
.Minimum = { .u32 = 0 }, \
|
||||
.Maximum = { .u32 = ~0 }, \
|
||||
}
|
||||
|
||||
#define GROUP(name, max_active, counter_list) \
|
||||
{ \
|
||||
.Name = name, \
|
||||
.MaxActiveCounters = max_active, \
|
||||
.Counters = counter_list, \
|
||||
.NumCounters = ARRAY_SIZE(counter_list), \
|
||||
}
|
||||
|
||||
struct brw_perf_bo_layout {
|
||||
int group;
|
||||
int counter;
|
||||
};
|
||||
|
||||
/**
|
||||
* Ironlake:
|
||||
* @{
|
||||
*/
|
||||
const static struct gl_perf_monitor_counter gen5_raw_aggregating_counters[] = {
|
||||
COUNTER("cycles the CS unit is starved"),
|
||||
COUNTER("cycles the CS unit is stalled"),
|
||||
COUNTER("cycles the VF unit is starved"),
|
||||
COUNTER("cycles the VF unit is stalled"),
|
||||
COUNTER("cycles the VS unit is starved"),
|
||||
COUNTER("cycles the VS unit is stalled"),
|
||||
COUNTER("cycles the GS unit is starved"),
|
||||
COUNTER("cycles the GS unit is stalled"),
|
||||
COUNTER("cycles the CL unit is starved"),
|
||||
COUNTER("cycles the CL unit is stalled"),
|
||||
COUNTER("cycles the SF unit is starved"),
|
||||
COUNTER("cycles the SF unit is stalled"),
|
||||
COUNTER("cycles the WZ unit is starved"),
|
||||
COUNTER("cycles the WZ unit is stalled"),
|
||||
COUNTER("Z buffer read/write"),
|
||||
COUNTER("cycles each EU was active"),
|
||||
COUNTER("cycles each EU was suspended"),
|
||||
COUNTER("cycles threads loaded all EUs"),
|
||||
COUNTER("cycles filtering active"),
|
||||
COUNTER("cycles PS threads executed"),
|
||||
COUNTER("subspans written to RC"),
|
||||
COUNTER("bytes read for texture reads"),
|
||||
COUNTER("texels returned from sampler"),
|
||||
COUNTER("polygons not culled"),
|
||||
COUNTER("clocks MASF has valid message"),
|
||||
COUNTER("64b writes/reads from RC"),
|
||||
COUNTER("reads on dataport"),
|
||||
COUNTER("clocks MASF has valid msg not consumed by sampler"),
|
||||
COUNTER("cycles any EU is stalled for math"),
|
||||
};
|
||||
|
||||
const static struct gl_perf_monitor_group gen5_groups[] = {
|
||||
GROUP("Aggregating Counters", INT_MAX, gen5_raw_aggregating_counters),
|
||||
};
|
||||
|
||||
const static struct brw_perf_bo_layout gen5_perf_bo_layout[] =
|
||||
{
|
||||
{ -1, -1, }, /* Report ID */
|
||||
{ -1, -1, }, /* TIMESTAMP (64-bit) */
|
||||
{ -1, -1, }, /* ...second half... */
|
||||
{ 0, 0, }, /* cycles the CS unit is starved */
|
||||
{ 0, 1, }, /* cycles the CS unit is stalled */
|
||||
{ 0, 2, }, /* cycles the VF unit is starved */
|
||||
{ 0, 3, }, /* cycles the VF unit is stalled */
|
||||
{ 0, 4, }, /* cycles the VS unit is starved */
|
||||
{ 0, 5, }, /* cycles the VS unit is stalled */
|
||||
{ 0, 6, }, /* cycles the GS unit is starved */
|
||||
{ 0, 7, }, /* cycles the GS unit is stalled */
|
||||
{ 0, 8, }, /* cycles the CL unit is starved */
|
||||
{ 0, 9, }, /* cycles the CL unit is stalled */
|
||||
{ 0, 10, }, /* cycles the SF unit is starved */
|
||||
{ 0, 11, }, /* cycles the SF unit is stalled */
|
||||
{ 0, 12, }, /* cycles the WZ unit is starved */
|
||||
{ 0, 13, }, /* cycles the WZ unit is stalled */
|
||||
{ 0, 14, }, /* Z buffer read/write */
|
||||
{ 0, 15, }, /* cycles each EU was active */
|
||||
{ 0, 16, }, /* cycles each EU was suspended */
|
||||
{ 0, 17, }, /* cycles threads loaded all EUs */
|
||||
{ 0, 18, }, /* cycles filtering active */
|
||||
{ 0, 19, }, /* cycles PS threads executed */
|
||||
{ 0, 20, }, /* subspans written to RC */
|
||||
{ 0, 21, }, /* bytes read for texture reads */
|
||||
{ 0, 22, }, /* texels returned from sampler */
|
||||
{ 0, 23, }, /* polygons not culled */
|
||||
{ 0, 24, }, /* clocks MASF has valid message */
|
||||
{ 0, 25, }, /* 64b writes/reads from RC */
|
||||
{ 0, 26, }, /* reads on dataport */
|
||||
{ 0, 27, }, /* clocks MASF has valid msg not consumed by sampler */
|
||||
{ 0, 28, }, /* cycles any EU is stalled for math */
|
||||
};
|
||||
|
||||
/** @} */
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
static void
|
||||
snapshot_aggregating_counters(struct brw_context *brw,
|
||||
drm_intel_bo *bo, uint32_t offset_in_bytes)
|
||||
{
|
||||
uint32_t report_id = offset_in_bytes == 0 ? FIRST_SNAPSHOT_REPORT_ID
|
||||
: SECOND_SNAPSHOT_REPORT_ID;
|
||||
|
||||
if (brw->gen == 5) {
|
||||
/* Ironlake requires two MI_REPORT_PERF_COUNT commands to write all
|
||||
* the counters. The report ID is ignored in the second set.
|
||||
*/
|
||||
BEGIN_BATCH(6);
|
||||
OUT_BATCH(GEN5_MI_REPORT_PERF_COUNT | GEN5_MI_COUNTER_SET_0);
|
||||
OUT_RELOC(bo,
|
||||
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
|
||||
offset_in_bytes);
|
||||
OUT_BATCH(report_id);
|
||||
|
||||
OUT_BATCH(GEN5_MI_REPORT_PERF_COUNT | GEN5_MI_COUNTER_SET_1);
|
||||
OUT_RELOC(bo,
|
||||
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
|
||||
offset_in_bytes + 64);
|
||||
OUT_BATCH(report_id);
|
||||
ADVANCE_BATCH();
|
||||
} else {
|
||||
assert(!"Unsupported generation for performance counters.");
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
aggregating_counters_needed(struct brw_context *brw,
|
||||
struct gl_perf_monitor_object *m)
|
||||
{
|
||||
return m->ActiveGroups[0];
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/**
|
||||
* Create a new performance monitor object.
|
||||
*/
|
||||
static struct gl_perf_monitor_object *
|
||||
brw_new_perf_monitor(struct gl_context *ctx)
|
||||
{
|
||||
return calloc(1, sizeof(struct brw_perf_monitor_object));
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a performance monitor object.
|
||||
*/
|
||||
static void
|
||||
brw_delete_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
|
||||
{
|
||||
struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
|
||||
|
||||
if (monitor->bo)
|
||||
drm_intel_bo_unreference(monitor->bo);
|
||||
|
||||
free(monitor);
|
||||
}
|
||||
|
||||
/**
|
||||
* Driver hook for glBeginPerformanceMonitorAMD().
|
||||
*/
|
||||
static GLboolean
|
||||
brw_begin_perf_monitor(struct gl_context *ctx,
|
||||
struct gl_perf_monitor_object *m)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
|
||||
|
||||
/* If the BO already exists, throw it away. It contains old results
|
||||
* that we're not interested in any more.
|
||||
*/
|
||||
if (monitor->bo)
|
||||
drm_intel_bo_unreference(monitor->bo);
|
||||
|
||||
/* Create a new BO. */
|
||||
monitor->bo =
|
||||
drm_intel_bo_alloc(brw->bufmgr, "performance monitor", 4096, 64);
|
||||
drm_intel_bo_map(monitor->bo, true);
|
||||
memset((char *) monitor->bo->virtual, 0xff, 4096);
|
||||
drm_intel_bo_unmap(monitor->bo);
|
||||
|
||||
/* Take a shapshot of all active counters */
|
||||
if (aggregating_counters_needed(brw, m)) {
|
||||
snapshot_aggregating_counters(brw, monitor->bo, 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Driver hook for glEndPerformanceMonitorAMD().
|
||||
*/
|
||||
static void
|
||||
brw_end_perf_monitor(struct gl_context *ctx,
|
||||
struct gl_perf_monitor_object *m)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
|
||||
if (aggregating_counters_needed(brw, m)) {
|
||||
snapshot_aggregating_counters(brw, monitor->bo,
|
||||
SECOND_SNAPSHOT_OFFSET_IN_BYTES);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset a performance monitor, throwing away any results.
|
||||
*/
|
||||
static void
|
||||
brw_reset_perf_monitor(struct gl_context *ctx,
|
||||
struct gl_perf_monitor_object *m)
|
||||
{
|
||||
struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
|
||||
|
||||
if (monitor->bo) {
|
||||
drm_intel_bo_unreference(monitor->bo);
|
||||
monitor->bo = NULL;
|
||||
}
|
||||
|
||||
if (m->Active) {
|
||||
brw_begin_perf_monitor(ctx, m);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Is a performance monitor result available?
|
||||
*/
|
||||
static GLboolean
|
||||
brw_is_perf_monitor_result_available(struct gl_context *ctx,
|
||||
struct gl_perf_monitor_object *m)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
|
||||
return !m->Active && monitor->bo &&
|
||||
!drm_intel_bo_references(brw->batch.bo, monitor->bo) &&
|
||||
!drm_intel_bo_busy(monitor->bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the performance monitor result.
|
||||
*/
|
||||
static void
|
||||
brw_get_perf_monitor_result(struct gl_context *ctx,
|
||||
struct gl_perf_monitor_object *m,
|
||||
GLsizei data_size,
|
||||
GLuint *data,
|
||||
GLint *bytes_written)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
|
||||
|
||||
/* This hook should only be called when results are available. */
|
||||
assert(monitor->bo != NULL);
|
||||
|
||||
drm_intel_bo_map(monitor->bo, false);
|
||||
unsigned *gpu_bo = monitor->bo->virtual;
|
||||
|
||||
/* Copy data from the BO to the supplied array.
|
||||
*
|
||||
* The output data format is: <group ID, counter ID, value> for each
|
||||
* active counter. The API allows counters to appear in any order.
|
||||
*/
|
||||
GLsizei offset = 0;
|
||||
|
||||
/* Look for expected report ID values to ensure data is present. */
|
||||
assert(gpu_bo[0] == FIRST_SNAPSHOT_REPORT_ID);
|
||||
assert(gpu_bo[SECOND_SNAPSHOT_OFFSET_IN_BYTES/4] == SECOND_SNAPSHOT_REPORT_ID);
|
||||
|
||||
for (int i = 0; i < brw->perfmon.entries_in_bo; i++) {
|
||||
int group = brw->perfmon.bo_layout[i].group;
|
||||
int counter = brw->perfmon.bo_layout[i].counter;
|
||||
|
||||
if (group < 0 || !BITSET_TEST(m->ActiveCounters[group], counter))
|
||||
continue;
|
||||
|
||||
const struct gl_perf_monitor_group *group_obj =
|
||||
&ctx->PerfMonitor.Groups[group];
|
||||
|
||||
const struct gl_perf_monitor_counter *c = &group_obj->Counters[counter];
|
||||
|
||||
data[offset++] = group;
|
||||
data[offset++] = counter;
|
||||
|
||||
uint32_t second_snapshot_index =
|
||||
SECOND_SNAPSHOT_OFFSET_IN_BYTES / sizeof(uint32_t) + i;
|
||||
|
||||
/* Won't work for uint64_t values, but we don't expose any yet. */
|
||||
data[offset] = gpu_bo[second_snapshot_index] - gpu_bo[i];
|
||||
offset += _mesa_perf_monitor_counter_size(c) / sizeof(uint32_t);
|
||||
}
|
||||
|
||||
drm_intel_bo_unmap(monitor->bo);
|
||||
|
||||
if (bytes_written)
|
||||
*bytes_written = offset * sizeof(uint32_t);
|
||||
}
|
||||
|
||||
void
|
||||
brw_init_performance_monitors(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
|
||||
ctx->Driver.NewPerfMonitor = brw_new_perf_monitor;
|
||||
ctx->Driver.DeletePerfMonitor = brw_delete_perf_monitor;
|
||||
ctx->Driver.BeginPerfMonitor = brw_begin_perf_monitor;
|
||||
ctx->Driver.EndPerfMonitor = brw_end_perf_monitor;
|
||||
ctx->Driver.ResetPerfMonitor = brw_reset_perf_monitor;
|
||||
ctx->Driver.IsPerfMonitorResultAvailable = brw_is_perf_monitor_result_available;
|
||||
ctx->Driver.GetPerfMonitorResult = brw_get_perf_monitor_result;
|
||||
|
||||
if (brw->gen == 5) {
|
||||
ctx->PerfMonitor.Groups = gen5_groups;
|
||||
ctx->PerfMonitor.NumGroups = ARRAY_SIZE(gen5_groups);
|
||||
brw->perfmon.bo_layout = gen5_perf_bo_layout;
|
||||
brw->perfmon.entries_in_bo = ARRAY_SIZE(gen5_perf_bo_layout);
|
||||
}
|
||||
}
|
||||
|
|
@ -160,6 +160,9 @@ intelInitExtensions(struct gl_context *ctx)
|
|||
ctx->Extensions.EXT_shader_integer_mix = ctx->Const.GLSLVersion >= 130;
|
||||
}
|
||||
|
||||
if (brw->gen == 5)
|
||||
ctx->Extensions.AMD_performance_monitor = true;
|
||||
|
||||
if (ctx->API == API_OPENGL_CORE)
|
||||
ctx->Extensions.ARB_base_instance = true;
|
||||
if (ctx->API != API_OPENGL_CORE)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue