nvc0: change prefix of MP performance counters to HW_SM

According to NVIDIA, local performance counters (MP) are prefixed
with SM, while global performance counters (PCOUNTER) are called PM.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Samuel Pitoiset 2015-08-29 10:58:49 +02:00
parent 21bdb4d8f3
commit c8a61ea4fb
2 changed files with 149 additions and 149 deletions

View file

@ -56,10 +56,10 @@ struct nvc0_query {
#define NVC0_QUERY_ALLOC_SPACE 256
static boolean nvc0_mp_pm_query_begin(struct nvc0_context *,
static boolean nvc0_hw_sm_query_begin(struct nvc0_context *,
struct nvc0_query *);
static void nvc0_mp_pm_query_end(struct nvc0_context *, struct nvc0_query *);
static boolean nvc0_mp_pm_query_result(struct nvc0_context *,
static void nvc0_hw_sm_query_end(struct nvc0_context *, struct nvc0_query *);
static boolean nvc0_hw_sm_query_result(struct nvc0_context *,
struct nvc0_query *, void *, boolean);
static inline struct nvc0_query *
@ -159,7 +159,7 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
} else
#endif
if (nvc0->screen->base.device->drm_version >= 0x01000101) {
if (type >= NVE4_PM_QUERY(0) && type <= NVE4_PM_QUERY_LAST) {
if (type >= NVE4_HW_SM_QUERY(0) && type <= NVE4_HW_SM_QUERY_LAST) {
/* for each MP:
* [00] = WS0.C0
* [04] = WS0.C1
@ -189,7 +189,7 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t);
break;
} else
if (type >= NVC0_PM_QUERY(0) && type <= NVC0_PM_QUERY_LAST) {
if (type >= NVC0_HW_SM_QUERY(0) && type <= NVC0_HW_SM_QUERY_LAST) {
/* for each MP:
* [00] = MP.C0
* [04] = MP.C1
@ -327,9 +327,9 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
q->u.value = 0;
} else
#endif
if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
(q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
ret = nvc0_mp_pm_query_begin(nvc0, q);
if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
(q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
ret = nvc0_hw_sm_query_begin(nvc0, q);
}
break;
}
@ -412,9 +412,9 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
return;
} else
#endif
if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
(q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
nvc0_mp_pm_query_end(nvc0, q);
if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
(q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
nvc0_hw_sm_query_end(nvc0, q);
}
break;
}
@ -453,9 +453,9 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
return true;
} else
#endif
if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
(q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
return nvc0_mp_pm_query_result(nvc0, q, result, wait);
if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
(q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
return nvc0_hw_sm_query_result(nvc0, q, result, wait);
}
if (q->state != NVC0_QUERY_STATE_READY)
@ -692,7 +692,7 @@ static const char *nvc0_drv_stat_names[] =
* We could add a kernel interface for it, but reading the counters like this
* has the advantage of being async (if get_result isn't called immediately).
*/
static const uint64_t nve4_read_mp_pm_counters_code[] =
static const uint64_t nve4_read_hw_sm_counters_code[] =
{
/* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20
* mov b32 $r8 $tidx
@ -852,7 +852,7 @@ struct nvc0_mp_counter_cfg
#define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */
#define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */
struct nvc0_mp_pm_query_cfg
struct nvc0_hw_sm_query_cfg
{
struct nvc0_mp_counter_cfg ctr[4];
uint8_t num_counters;
@ -860,17 +860,17 @@ struct nvc0_mp_pm_query_cfg
uint8_t norm[2]; /* normalization num,denom */
};
#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
{ f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
{ f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \
{}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
{ f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \
{ f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
{}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
{ f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
{ f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
{}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
@ -881,7 +881,7 @@ struct nvc0_mp_pm_query_cfg
* metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers;
* this is inaccurate !
*/
static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
static const struct nvc0_hw_sm_query_cfg nve4_hw_sm_queries[] =
{
_Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
_Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1),
@ -940,7 +940,7 @@ static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
#undef _M2B
/* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
static const uint64_t nvc0_read_mp_pm_counters_code[] =
static const uint64_t nvc0_read_hw_sm_counters_code[] =
{
/* mov b32 $r8 $tidx
* mov b32 $r9 $physid
@ -1026,9 +1026,9 @@ static const char *nvc0_pm_query_names[] =
"warps_launched",
};
#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_PM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_HW_SM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] =
static const struct nvc0_hw_sm_query_cfg nvc0_hw_sm_queries[] =
{
_Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
@ -1065,34 +1065,34 @@ static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] =
#undef _Q
static const struct nvc0_mp_pm_query_cfg *
nvc0_mp_pm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
static const struct nvc0_hw_sm_query_cfg *
nvc0_hw_sm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
{
struct nvc0_screen *screen = nvc0->screen;
if (screen->base.class_3d >= NVE4_3D_CLASS)
return &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
return &nvc0_mp_pm_queries[q->type - NVC0_PM_QUERY(0)];
return &nve4_hw_sm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
return &nvc0_hw_sm_queries[q->type - NVC0_HW_SM_QUERY(0)];
}
boolean
nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
nvc0_hw_sm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
{
struct nvc0_screen *screen = nvc0->screen;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
const struct nvc0_mp_pm_query_cfg *cfg;
const struct nvc0_hw_sm_query_cfg *cfg;
unsigned i, c;
unsigned num_ab[2] = { 0, 0 };
cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
/* check if we have enough free counter slots */
for (i = 0; i < cfg->num_counters; ++i)
num_ab[cfg->ctr[i].sig_dom]++;
if (screen->pm.num_mp_pm_active[0] + num_ab[0] > 4 ||
screen->pm.num_mp_pm_active[1] + num_ab[1] > 4) {
if (screen->pm.num_hw_sm_active[0] + num_ab[0] > 4 ||
screen->pm.num_hw_sm_active[1] + num_ab[1] > 4) {
NOUVEAU_ERR("Not enough free MP counter slots !\n");
return false;
}
@ -1113,14 +1113,14 @@ nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
for (i = 0; i < cfg->num_counters; ++i) {
const unsigned d = cfg->ctr[i].sig_dom;
if (!screen->pm.num_mp_pm_active[d]) {
if (!screen->pm.num_hw_sm_active[d]) {
uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
if (screen->pm.num_mp_pm_active[!d])
if (screen->pm.num_hw_sm_active[!d])
m |= 1 << (7 + (8 * d));
BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
PUSH_DATA (push, m);
}
screen->pm.num_mp_pm_active[d]++;
screen->pm.num_hw_sm_active[d]++;
for (c = d * 4; c < (d * 4 + 4); ++c) {
if (!screen->pm.mp_counter[c]) {
@ -1163,7 +1163,7 @@ nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
}
static void
nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
nvc0_hw_sm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
{
struct nvc0_screen *screen = nvc0->screen;
struct pipe_context *pipe = &nvc0->base.pipe;
@ -1174,9 +1174,9 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
const uint grid[3] = { screen->mp_count, 1, 1 };
unsigned c;
const struct nvc0_mp_pm_query_cfg *cfg;
const struct nvc0_hw_sm_query_cfg *cfg;
cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
if (unlikely(!screen->pm.prog)) {
struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
@ -1185,11 +1185,11 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
prog->num_gprs = 14;
prog->parm_size = 12;
if (is_nve4) {
prog->code = (uint32_t *)nve4_read_mp_pm_counters_code;
prog->code_size = sizeof(nve4_read_mp_pm_counters_code);
prog->code = (uint32_t *)nve4_read_hw_sm_counters_code;
prog->code_size = sizeof(nve4_read_hw_sm_counters_code);
} else {
prog->code = (uint32_t *)nvc0_read_mp_pm_counters_code;
prog->code_size = sizeof(nvc0_read_mp_pm_counters_code);
prog->code = (uint32_t *)nvc0_read_hw_sm_counters_code;
prog->code_size = sizeof(nvc0_read_hw_sm_counters_code);
}
screen->pm.prog = prog;
}
@ -1207,7 +1207,7 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
/* release counters for this query */
for (c = 0; c < 8; ++c) {
if (nvc0_query(screen->pm.mp_counter[c]) == q) {
screen->pm.num_mp_pm_active[c / 4]--;
screen->pm.num_hw_sm_active[c / 4]--;
screen->pm.mp_counter[c] = NULL;
}
}
@ -1234,7 +1234,7 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
q = nvc0_query(screen->pm.mp_counter[c]);
if (!q)
continue;
cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
for (i = 0; i < cfg->num_counters; ++i) {
if (mask & (1 << q->ctr[i]))
break;
@ -1250,10 +1250,10 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
}
static inline bool
nvc0_mp_pm_query_read_data(uint32_t count[32][4],
nvc0_hw_sm_query_read_data(uint32_t count[32][4],
struct nvc0_context *nvc0, bool wait,
struct nvc0_query *q,
const struct nvc0_mp_pm_query_cfg *cfg,
const struct nvc0_hw_sm_query_cfg *cfg,
unsigned mp_count)
{
unsigned p, c;
@ -1275,10 +1275,10 @@ nvc0_mp_pm_query_read_data(uint32_t count[32][4],
}
static inline bool
nve4_mp_pm_query_read_data(uint32_t count[32][4],
nve4_hw_sm_query_read_data(uint32_t count[32][4],
struct nvc0_context *nvc0, bool wait,
struct nvc0_query *q,
const struct nvc0_mp_pm_query_cfg *cfg,
const struct nvc0_hw_sm_query_cfg *cfg,
unsigned mp_count)
{
unsigned p, c, d;
@ -1317,22 +1317,22 @@ nve4_mp_pm_query_read_data(uint32_t count[32][4],
* NOTE: Interpretation of IPC requires knowledge of MP count.
*/
static boolean
nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
nvc0_hw_sm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
void *result, boolean wait)
{
uint32_t count[32][4];
uint64_t value = 0;
unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
unsigned p, c;
const struct nvc0_mp_pm_query_cfg *cfg;
const struct nvc0_hw_sm_query_cfg *cfg;
bool ret;
cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
ret = nve4_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
ret = nve4_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
else
ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
ret = nvc0_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
if (!ret)
return false;
@ -1410,11 +1410,11 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
if (screen->base.device->drm_version >= 0x01000101) {
if (screen->compute) {
if (screen->base.class_3d == NVE4_3D_CLASS) {
count += NVE4_PM_QUERY_COUNT;
count += NVE4_HW_SM_QUERY_COUNT;
} else
if (screen->base.class_3d < NVE4_3D_CLASS) {
/* NVC0_COMPUTE is not always enabled */
count += NVC0_PM_QUERY_COUNT;
count += NVC0_HW_SM_QUERY_COUNT;
}
}
}
@ -1444,15 +1444,15 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
if (screen->compute) {
if (screen->base.class_3d == NVE4_3D_CLASS) {
info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
info->query_type = NVE4_HW_SM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
info->max_value.u64 =
(id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
(id < NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
return 1;
} else
if (screen->base.class_3d < NVE4_3D_CLASS) {
info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
info->query_type = NVC0_HW_SM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
return 1;
}
@ -1494,7 +1494,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
if (screen->base.class_3d == NVE4_3D_CLASS) {
info->num_queries = NVE4_PM_QUERY_COUNT;
info->num_queries = NVE4_HW_SM_QUERY_COUNT;
/* On NVE4+, each multiprocessor have 8 hardware counters separated
* in two distinct domains, but we allow only one active query
@ -1504,7 +1504,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
return 1;
} else
if (screen->base.class_3d < NVE4_3D_CLASS) {
info->num_queries = NVC0_PM_QUERY_COUNT;
info->num_queries = NVC0_HW_SM_QUERY_COUNT;
/* On NVC0:NVE4, each multiprocessor have 8 hardware counters
* in a single domain. */

View file

@ -95,7 +95,7 @@ struct nvc0_screen {
struct {
struct nvc0_program *prog; /* compute state object to read MP counters */
struct pipe_query *mp_counter[8]; /* counter to query allocation */
uint8_t num_mp_pm_active[2];
uint8_t num_hw_sm_active[2];
bool mp_counters_enabled;
} pm;
@ -120,98 +120,98 @@ nvc0_screen(struct pipe_screen *screen)
/* Performance counter queries:
*/
#define NVE4_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
#define NVE4_PM_QUERY_LAST NVE4_PM_QUERY(NVE4_PM_QUERY_COUNT - 1)
#define NVE4_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
#define NVE4_HW_SM_QUERY_LAST NVE4_HW_SM_QUERY(NVE4_HW_SM_QUERY_COUNT - 1)
enum nve4_pm_queries
{
NVE4_PM_QUERY_ACTIVE_CYCLES = 0,
NVE4_PM_QUERY_ACTIVE_WARPS,
NVE4_PM_QUERY_ATOM_COUNT,
NVE4_PM_QUERY_BRANCH,
NVE4_PM_QUERY_DIVERGENT_BRANCH,
NVE4_PM_QUERY_GLD_REQUEST,
NVE4_PM_QUERY_GLD_MEM_DIV_REPLAY,
NVE4_PM_QUERY_GST_TRANSACTIONS,
NVE4_PM_QUERY_GST_MEM_DIV_REPLAY,
NVE4_PM_QUERY_GRED_COUNT,
NVE4_PM_QUERY_GST_REQUEST,
NVE4_PM_QUERY_INST_EXECUTED,
NVE4_PM_QUERY_INST_ISSUED,
NVE4_PM_QUERY_INST_ISSUED1,
NVE4_PM_QUERY_INST_ISSUED2,
NVE4_PM_QUERY_L1_GLD_HIT,
NVE4_PM_QUERY_L1_GLD_MISS,
NVE4_PM_QUERY_L1_LOCAL_LD_HIT,
NVE4_PM_QUERY_L1_LOCAL_LD_MISS,
NVE4_PM_QUERY_L1_LOCAL_ST_HIT,
NVE4_PM_QUERY_L1_LOCAL_ST_MISS,
NVE4_PM_QUERY_L1_SHARED_LD_TRANSACTIONS,
NVE4_PM_QUERY_L1_SHARED_ST_TRANSACTIONS,
NVE4_PM_QUERY_LOCAL_LD,
NVE4_PM_QUERY_LOCAL_LD_TRANSACTIONS,
NVE4_PM_QUERY_LOCAL_ST,
NVE4_PM_QUERY_LOCAL_ST_TRANSACTIONS,
NVE4_PM_QUERY_PROF_TRIGGER_0,
NVE4_PM_QUERY_PROF_TRIGGER_1,
NVE4_PM_QUERY_PROF_TRIGGER_2,
NVE4_PM_QUERY_PROF_TRIGGER_3,
NVE4_PM_QUERY_PROF_TRIGGER_4,
NVE4_PM_QUERY_PROF_TRIGGER_5,
NVE4_PM_QUERY_PROF_TRIGGER_6,
NVE4_PM_QUERY_PROF_TRIGGER_7,
NVE4_PM_QUERY_SHARED_LD,
NVE4_PM_QUERY_SHARED_LD_REPLAY,
NVE4_PM_QUERY_SHARED_ST,
NVE4_PM_QUERY_SHARED_ST_REPLAY,
NVE4_PM_QUERY_SM_CTA_LAUNCHED,
NVE4_PM_QUERY_THREADS_LAUNCHED,
NVE4_PM_QUERY_UNCACHED_GLD_TRANSACTIONS,
NVE4_PM_QUERY_WARPS_LAUNCHED,
NVE4_PM_QUERY_METRIC_IPC,
NVE4_PM_QUERY_METRIC_IPAC,
NVE4_PM_QUERY_METRIC_IPEC,
NVE4_PM_QUERY_METRIC_MP_OCCUPANCY,
NVE4_PM_QUERY_METRIC_MP_EFFICIENCY,
NVE4_PM_QUERY_METRIC_INST_REPLAY_OHEAD,
NVE4_PM_QUERY_COUNT
NVE4_HW_SM_QUERY_ACTIVE_CYCLES = 0,
NVE4_HW_SM_QUERY_ACTIVE_WARPS,
NVE4_HW_SM_QUERY_ATOM_COUNT,
NVE4_HW_SM_QUERY_BRANCH,
NVE4_HW_SM_QUERY_DIVERGENT_BRANCH,
NVE4_HW_SM_QUERY_GLD_REQUEST,
NVE4_HW_SM_QUERY_GLD_MEM_DIV_REPLAY,
NVE4_HW_SM_QUERY_GST_TRANSACTIONS,
NVE4_HW_SM_QUERY_GST_MEM_DIV_REPLAY,
NVE4_HW_SM_QUERY_GRED_COUNT,
NVE4_HW_SM_QUERY_GST_REQUEST,
NVE4_HW_SM_QUERY_INST_EXECUTED,
NVE4_HW_SM_QUERY_INST_ISSUED,
NVE4_HW_SM_QUERY_INST_ISSUED1,
NVE4_HW_SM_QUERY_INST_ISSUED2,
NVE4_HW_SM_QUERY_L1_GLD_HIT,
NVE4_HW_SM_QUERY_L1_GLD_MISS,
NVE4_HW_SM_QUERY_L1_LOCAL_LD_HIT,
NVE4_HW_SM_QUERY_L1_LOCAL_LD_MISS,
NVE4_HW_SM_QUERY_L1_LOCAL_ST_HIT,
NVE4_HW_SM_QUERY_L1_LOCAL_ST_MISS,
NVE4_HW_SM_QUERY_L1_SHARED_LD_TRANSACTIONS,
NVE4_HW_SM_QUERY_L1_SHARED_ST_TRANSACTIONS,
NVE4_HW_SM_QUERY_LOCAL_LD,
NVE4_HW_SM_QUERY_LOCAL_LD_TRANSACTIONS,
NVE4_HW_SM_QUERY_LOCAL_ST,
NVE4_HW_SM_QUERY_LOCAL_ST_TRANSACTIONS,
NVE4_HW_SM_QUERY_PROF_TRIGGER_0,
NVE4_HW_SM_QUERY_PROF_TRIGGER_1,
NVE4_HW_SM_QUERY_PROF_TRIGGER_2,
NVE4_HW_SM_QUERY_PROF_TRIGGER_3,
NVE4_HW_SM_QUERY_PROF_TRIGGER_4,
NVE4_HW_SM_QUERY_PROF_TRIGGER_5,
NVE4_HW_SM_QUERY_PROF_TRIGGER_6,
NVE4_HW_SM_QUERY_PROF_TRIGGER_7,
NVE4_HW_SM_QUERY_SHARED_LD,
NVE4_HW_SM_QUERY_SHARED_LD_REPLAY,
NVE4_HW_SM_QUERY_SHARED_ST,
NVE4_HW_SM_QUERY_SHARED_ST_REPLAY,
NVE4_HW_SM_QUERY_SM_CTA_LAUNCHED,
NVE4_HW_SM_QUERY_THREADS_LAUNCHED,
NVE4_HW_SM_QUERY_UNCACHED_GLD_TRANSACTIONS,
NVE4_HW_SM_QUERY_WARPS_LAUNCHED,
NVE4_HW_SM_QUERY_METRIC_IPC,
NVE4_HW_SM_QUERY_METRIC_IPAC,
NVE4_HW_SM_QUERY_METRIC_IPEC,
NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY,
NVE4_HW_SM_QUERY_METRIC_MP_EFFICIENCY,
NVE4_HW_SM_QUERY_METRIC_INST_REPLAY_OHEAD,
NVE4_HW_SM_QUERY_COUNT
};
#define NVC0_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i))
#define NVC0_PM_QUERY_LAST NVC0_PM_QUERY(NVC0_PM_QUERY_COUNT - 1)
#define NVC0_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i))
#define NVC0_HW_SM_QUERY_LAST NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_COUNT - 1)
enum nvc0_pm_queries
{
NVC0_PM_QUERY_ACTIVE_CYCLES = 0,
NVC0_PM_QUERY_ACTIVE_WARPS,
NVC0_PM_QUERY_ATOM_COUNT,
NVC0_PM_QUERY_BRANCH,
NVC0_PM_QUERY_DIVERGENT_BRANCH,
NVC0_PM_QUERY_GLD_REQUEST,
NVC0_PM_QUERY_GRED_COUNT,
NVC0_PM_QUERY_GST_REQUEST,
NVC0_PM_QUERY_INST_EXECUTED,
NVC0_PM_QUERY_INST_ISSUED1_0,
NVC0_PM_QUERY_INST_ISSUED1_1,
NVC0_PM_QUERY_INST_ISSUED2_0,
NVC0_PM_QUERY_INST_ISSUED2_1,
NVC0_PM_QUERY_LOCAL_LD,
NVC0_PM_QUERY_LOCAL_ST,
NVC0_PM_QUERY_PROF_TRIGGER_0,
NVC0_PM_QUERY_PROF_TRIGGER_1,
NVC0_PM_QUERY_PROF_TRIGGER_2,
NVC0_PM_QUERY_PROF_TRIGGER_3,
NVC0_PM_QUERY_PROF_TRIGGER_4,
NVC0_PM_QUERY_PROF_TRIGGER_5,
NVC0_PM_QUERY_PROF_TRIGGER_6,
NVC0_PM_QUERY_PROF_TRIGGER_7,
NVC0_PM_QUERY_SHARED_LD,
NVC0_PM_QUERY_SHARED_ST,
NVC0_PM_QUERY_THREADS_LAUNCHED,
NVC0_PM_QUERY_TH_INST_EXECUTED_0,
NVC0_PM_QUERY_TH_INST_EXECUTED_1,
NVC0_PM_QUERY_TH_INST_EXECUTED_2,
NVC0_PM_QUERY_TH_INST_EXECUTED_3,
NVC0_PM_QUERY_WARPS_LAUNCHED,
NVC0_PM_QUERY_COUNT
NVC0_HW_SM_QUERY_ACTIVE_CYCLES = 0,
NVC0_HW_SM_QUERY_ACTIVE_WARPS,
NVC0_HW_SM_QUERY_ATOM_COUNT,
NVC0_HW_SM_QUERY_BRANCH,
NVC0_HW_SM_QUERY_DIVERGENT_BRANCH,
NVC0_HW_SM_QUERY_GLD_REQUEST,
NVC0_HW_SM_QUERY_GRED_COUNT,
NVC0_HW_SM_QUERY_GST_REQUEST,
NVC0_HW_SM_QUERY_INST_EXECUTED,
NVC0_HW_SM_QUERY_INST_ISSUED1_0,
NVC0_HW_SM_QUERY_INST_ISSUED1_1,
NVC0_HW_SM_QUERY_INST_ISSUED2_0,
NVC0_HW_SM_QUERY_INST_ISSUED2_1,
NVC0_HW_SM_QUERY_LOCAL_LD,
NVC0_HW_SM_QUERY_LOCAL_ST,
NVC0_HW_SM_QUERY_PROF_TRIGGER_0,
NVC0_HW_SM_QUERY_PROF_TRIGGER_1,
NVC0_HW_SM_QUERY_PROF_TRIGGER_2,
NVC0_HW_SM_QUERY_PROF_TRIGGER_3,
NVC0_HW_SM_QUERY_PROF_TRIGGER_4,
NVC0_HW_SM_QUERY_PROF_TRIGGER_5,
NVC0_HW_SM_QUERY_PROF_TRIGGER_6,
NVC0_HW_SM_QUERY_PROF_TRIGGER_7,
NVC0_HW_SM_QUERY_SHARED_LD,
NVC0_HW_SM_QUERY_SHARED_ST,
NVC0_HW_SM_QUERY_THREADS_LAUNCHED,
NVC0_HW_SM_QUERY_TH_INST_EXECUTED_0,
NVC0_HW_SM_QUERY_TH_INST_EXECUTED_1,
NVC0_HW_SM_QUERY_TH_INST_EXECUTED_2,
NVC0_HW_SM_QUERY_TH_INST_EXECUTED_3,
NVC0_HW_SM_QUERY_WARPS_LAUNCHED,
NVC0_HW_SM_QUERY_COUNT
};
/* Driver statistics queries: