mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-05 06:50:10 +01:00
nvc0: change prefix of MP performance counters to HW_SM
According to NVIDIA, local performance counters (MP) are prefixed with SM, while global performance counters (PCOUNTER) are called PM. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
parent
21bdb4d8f3
commit
c8a61ea4fb
2 changed files with 149 additions and 149 deletions
|
|
@ -56,10 +56,10 @@ struct nvc0_query {
|
|||
|
||||
#define NVC0_QUERY_ALLOC_SPACE 256
|
||||
|
||||
static boolean nvc0_mp_pm_query_begin(struct nvc0_context *,
|
||||
static boolean nvc0_hw_sm_query_begin(struct nvc0_context *,
|
||||
struct nvc0_query *);
|
||||
static void nvc0_mp_pm_query_end(struct nvc0_context *, struct nvc0_query *);
|
||||
static boolean nvc0_mp_pm_query_result(struct nvc0_context *,
|
||||
static void nvc0_hw_sm_query_end(struct nvc0_context *, struct nvc0_query *);
|
||||
static boolean nvc0_hw_sm_query_result(struct nvc0_context *,
|
||||
struct nvc0_query *, void *, boolean);
|
||||
|
||||
static inline struct nvc0_query *
|
||||
|
|
@ -159,7 +159,7 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
|
|||
} else
|
||||
#endif
|
||||
if (nvc0->screen->base.device->drm_version >= 0x01000101) {
|
||||
if (type >= NVE4_PM_QUERY(0) && type <= NVE4_PM_QUERY_LAST) {
|
||||
if (type >= NVE4_HW_SM_QUERY(0) && type <= NVE4_HW_SM_QUERY_LAST) {
|
||||
/* for each MP:
|
||||
* [00] = WS0.C0
|
||||
* [04] = WS0.C1
|
||||
|
|
@ -189,7 +189,7 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
|
|||
space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t);
|
||||
break;
|
||||
} else
|
||||
if (type >= NVC0_PM_QUERY(0) && type <= NVC0_PM_QUERY_LAST) {
|
||||
if (type >= NVC0_HW_SM_QUERY(0) && type <= NVC0_HW_SM_QUERY_LAST) {
|
||||
/* for each MP:
|
||||
* [00] = MP.C0
|
||||
* [04] = MP.C1
|
||||
|
|
@ -327,9 +327,9 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
|
|||
q->u.value = 0;
|
||||
} else
|
||||
#endif
|
||||
if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
|
||||
(q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
|
||||
ret = nvc0_mp_pm_query_begin(nvc0, q);
|
||||
if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
|
||||
(q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
|
||||
ret = nvc0_hw_sm_query_begin(nvc0, q);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -412,9 +412,9 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
|
|||
return;
|
||||
} else
|
||||
#endif
|
||||
if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
|
||||
(q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
|
||||
nvc0_mp_pm_query_end(nvc0, q);
|
||||
if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
|
||||
(q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
|
||||
nvc0_hw_sm_query_end(nvc0, q);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -453,9 +453,9 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
|
|||
return true;
|
||||
} else
|
||||
#endif
|
||||
if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
|
||||
(q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
|
||||
return nvc0_mp_pm_query_result(nvc0, q, result, wait);
|
||||
if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
|
||||
(q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
|
||||
return nvc0_hw_sm_query_result(nvc0, q, result, wait);
|
||||
}
|
||||
|
||||
if (q->state != NVC0_QUERY_STATE_READY)
|
||||
|
|
@ -692,7 +692,7 @@ static const char *nvc0_drv_stat_names[] =
|
|||
* We could add a kernel interface for it, but reading the counters like this
|
||||
* has the advantage of being async (if get_result isn't called immediately).
|
||||
*/
|
||||
static const uint64_t nve4_read_mp_pm_counters_code[] =
|
||||
static const uint64_t nve4_read_hw_sm_counters_code[] =
|
||||
{
|
||||
/* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20
|
||||
* mov b32 $r8 $tidx
|
||||
|
|
@ -852,7 +852,7 @@ struct nvc0_mp_counter_cfg
|
|||
#define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */
|
||||
#define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */
|
||||
|
||||
struct nvc0_mp_pm_query_cfg
|
||||
struct nvc0_hw_sm_query_cfg
|
||||
{
|
||||
struct nvc0_mp_counter_cfg ctr[4];
|
||||
uint8_t num_counters;
|
||||
|
|
@ -860,17 +860,17 @@ struct nvc0_mp_pm_query_cfg
|
|||
uint8_t norm[2]; /* normalization num,denom */
|
||||
};
|
||||
|
||||
#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
|
||||
#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
|
||||
#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
|
||||
#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
|
||||
#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
|
||||
#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
|
||||
{ f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
|
||||
{ f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \
|
||||
{}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
|
||||
#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
|
||||
#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
|
||||
{ f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \
|
||||
{ f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
|
||||
{}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
|
||||
#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
|
||||
#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
|
||||
{ f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
|
||||
{ f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
|
||||
{}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
|
||||
|
|
@ -881,7 +881,7 @@ struct nvc0_mp_pm_query_cfg
|
|||
* metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers;
|
||||
* this is inaccurate !
|
||||
*/
|
||||
static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
|
||||
static const struct nvc0_hw_sm_query_cfg nve4_hw_sm_queries[] =
|
||||
{
|
||||
_Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
|
||||
_Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1),
|
||||
|
|
@ -940,7 +940,7 @@ static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
|
|||
#undef _M2B
|
||||
|
||||
/* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
|
||||
static const uint64_t nvc0_read_mp_pm_counters_code[] =
|
||||
static const uint64_t nvc0_read_hw_sm_counters_code[] =
|
||||
{
|
||||
/* mov b32 $r8 $tidx
|
||||
* mov b32 $r9 $physid
|
||||
|
|
@ -1026,9 +1026,9 @@ static const char *nvc0_pm_query_names[] =
|
|||
"warps_launched",
|
||||
};
|
||||
|
||||
#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_PM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
|
||||
#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_HW_SM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
|
||||
|
||||
static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] =
|
||||
static const struct nvc0_hw_sm_query_cfg nvc0_hw_sm_queries[] =
|
||||
{
|
||||
_Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
|
||||
|
|
@ -1065,34 +1065,34 @@ static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] =
|
|||
|
||||
#undef _Q
|
||||
|
||||
static const struct nvc0_mp_pm_query_cfg *
|
||||
nvc0_mp_pm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
|
||||
static const struct nvc0_hw_sm_query_cfg *
|
||||
nvc0_hw_sm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
|
||||
{
|
||||
struct nvc0_screen *screen = nvc0->screen;
|
||||
|
||||
if (screen->base.class_3d >= NVE4_3D_CLASS)
|
||||
return &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
|
||||
return &nvc0_mp_pm_queries[q->type - NVC0_PM_QUERY(0)];
|
||||
return &nve4_hw_sm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
|
||||
return &nvc0_hw_sm_queries[q->type - NVC0_HW_SM_QUERY(0)];
|
||||
}
|
||||
|
||||
boolean
|
||||
nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
|
||||
nvc0_hw_sm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
|
||||
{
|
||||
struct nvc0_screen *screen = nvc0->screen;
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
|
||||
const struct nvc0_mp_pm_query_cfg *cfg;
|
||||
const struct nvc0_hw_sm_query_cfg *cfg;
|
||||
unsigned i, c;
|
||||
unsigned num_ab[2] = { 0, 0 };
|
||||
|
||||
cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
|
||||
cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
|
||||
|
||||
/* check if we have enough free counter slots */
|
||||
for (i = 0; i < cfg->num_counters; ++i)
|
||||
num_ab[cfg->ctr[i].sig_dom]++;
|
||||
|
||||
if (screen->pm.num_mp_pm_active[0] + num_ab[0] > 4 ||
|
||||
screen->pm.num_mp_pm_active[1] + num_ab[1] > 4) {
|
||||
if (screen->pm.num_hw_sm_active[0] + num_ab[0] > 4 ||
|
||||
screen->pm.num_hw_sm_active[1] + num_ab[1] > 4) {
|
||||
NOUVEAU_ERR("Not enough free MP counter slots !\n");
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1113,14 +1113,14 @@ nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
|
|||
for (i = 0; i < cfg->num_counters; ++i) {
|
||||
const unsigned d = cfg->ctr[i].sig_dom;
|
||||
|
||||
if (!screen->pm.num_mp_pm_active[d]) {
|
||||
if (!screen->pm.num_hw_sm_active[d]) {
|
||||
uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
|
||||
if (screen->pm.num_mp_pm_active[!d])
|
||||
if (screen->pm.num_hw_sm_active[!d])
|
||||
m |= 1 << (7 + (8 * d));
|
||||
BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
|
||||
PUSH_DATA (push, m);
|
||||
}
|
||||
screen->pm.num_mp_pm_active[d]++;
|
||||
screen->pm.num_hw_sm_active[d]++;
|
||||
|
||||
for (c = d * 4; c < (d * 4 + 4); ++c) {
|
||||
if (!screen->pm.mp_counter[c]) {
|
||||
|
|
@ -1163,7 +1163,7 @@ nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
|
|||
}
|
||||
|
||||
static void
|
||||
nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
|
||||
nvc0_hw_sm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
|
||||
{
|
||||
struct nvc0_screen *screen = nvc0->screen;
|
||||
struct pipe_context *pipe = &nvc0->base.pipe;
|
||||
|
|
@ -1174,9 +1174,9 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
|
|||
const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
|
||||
const uint grid[3] = { screen->mp_count, 1, 1 };
|
||||
unsigned c;
|
||||
const struct nvc0_mp_pm_query_cfg *cfg;
|
||||
const struct nvc0_hw_sm_query_cfg *cfg;
|
||||
|
||||
cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
|
||||
cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
|
||||
|
||||
if (unlikely(!screen->pm.prog)) {
|
||||
struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
|
||||
|
|
@ -1185,11 +1185,11 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
|
|||
prog->num_gprs = 14;
|
||||
prog->parm_size = 12;
|
||||
if (is_nve4) {
|
||||
prog->code = (uint32_t *)nve4_read_mp_pm_counters_code;
|
||||
prog->code_size = sizeof(nve4_read_mp_pm_counters_code);
|
||||
prog->code = (uint32_t *)nve4_read_hw_sm_counters_code;
|
||||
prog->code_size = sizeof(nve4_read_hw_sm_counters_code);
|
||||
} else {
|
||||
prog->code = (uint32_t *)nvc0_read_mp_pm_counters_code;
|
||||
prog->code_size = sizeof(nvc0_read_mp_pm_counters_code);
|
||||
prog->code = (uint32_t *)nvc0_read_hw_sm_counters_code;
|
||||
prog->code_size = sizeof(nvc0_read_hw_sm_counters_code);
|
||||
}
|
||||
screen->pm.prog = prog;
|
||||
}
|
||||
|
|
@ -1207,7 +1207,7 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
|
|||
/* release counters for this query */
|
||||
for (c = 0; c < 8; ++c) {
|
||||
if (nvc0_query(screen->pm.mp_counter[c]) == q) {
|
||||
screen->pm.num_mp_pm_active[c / 4]--;
|
||||
screen->pm.num_hw_sm_active[c / 4]--;
|
||||
screen->pm.mp_counter[c] = NULL;
|
||||
}
|
||||
}
|
||||
|
|
@ -1234,7 +1234,7 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
|
|||
q = nvc0_query(screen->pm.mp_counter[c]);
|
||||
if (!q)
|
||||
continue;
|
||||
cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
|
||||
cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
|
||||
for (i = 0; i < cfg->num_counters; ++i) {
|
||||
if (mask & (1 << q->ctr[i]))
|
||||
break;
|
||||
|
|
@ -1250,10 +1250,10 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
|
|||
}
|
||||
|
||||
static inline bool
|
||||
nvc0_mp_pm_query_read_data(uint32_t count[32][4],
|
||||
nvc0_hw_sm_query_read_data(uint32_t count[32][4],
|
||||
struct nvc0_context *nvc0, bool wait,
|
||||
struct nvc0_query *q,
|
||||
const struct nvc0_mp_pm_query_cfg *cfg,
|
||||
const struct nvc0_hw_sm_query_cfg *cfg,
|
||||
unsigned mp_count)
|
||||
{
|
||||
unsigned p, c;
|
||||
|
|
@ -1275,10 +1275,10 @@ nvc0_mp_pm_query_read_data(uint32_t count[32][4],
|
|||
}
|
||||
|
||||
static inline bool
|
||||
nve4_mp_pm_query_read_data(uint32_t count[32][4],
|
||||
nve4_hw_sm_query_read_data(uint32_t count[32][4],
|
||||
struct nvc0_context *nvc0, bool wait,
|
||||
struct nvc0_query *q,
|
||||
const struct nvc0_mp_pm_query_cfg *cfg,
|
||||
const struct nvc0_hw_sm_query_cfg *cfg,
|
||||
unsigned mp_count)
|
||||
{
|
||||
unsigned p, c, d;
|
||||
|
|
@ -1317,22 +1317,22 @@ nve4_mp_pm_query_read_data(uint32_t count[32][4],
|
|||
* NOTE: Interpretation of IPC requires knowledge of MP count.
|
||||
*/
|
||||
static boolean
|
||||
nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
|
||||
nvc0_hw_sm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
|
||||
void *result, boolean wait)
|
||||
{
|
||||
uint32_t count[32][4];
|
||||
uint64_t value = 0;
|
||||
unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
|
||||
unsigned p, c;
|
||||
const struct nvc0_mp_pm_query_cfg *cfg;
|
||||
const struct nvc0_hw_sm_query_cfg *cfg;
|
||||
bool ret;
|
||||
|
||||
cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
|
||||
cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
|
||||
|
||||
if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
|
||||
ret = nve4_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
|
||||
ret = nve4_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
|
||||
else
|
||||
ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
|
||||
ret = nvc0_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
|
||||
if (!ret)
|
||||
return false;
|
||||
|
||||
|
|
@ -1410,11 +1410,11 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
|
|||
if (screen->base.device->drm_version >= 0x01000101) {
|
||||
if (screen->compute) {
|
||||
if (screen->base.class_3d == NVE4_3D_CLASS) {
|
||||
count += NVE4_PM_QUERY_COUNT;
|
||||
count += NVE4_HW_SM_QUERY_COUNT;
|
||||
} else
|
||||
if (screen->base.class_3d < NVE4_3D_CLASS) {
|
||||
/* NVC0_COMPUTE is not always enabled */
|
||||
count += NVC0_PM_QUERY_COUNT;
|
||||
count += NVC0_HW_SM_QUERY_COUNT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1444,15 +1444,15 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
|
|||
if (screen->compute) {
|
||||
if (screen->base.class_3d == NVE4_3D_CLASS) {
|
||||
info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
|
||||
info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
|
||||
info->query_type = NVE4_HW_SM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
|
||||
info->max_value.u64 =
|
||||
(id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
|
||||
(id < NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
|
||||
info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
|
||||
return 1;
|
||||
} else
|
||||
if (screen->base.class_3d < NVE4_3D_CLASS) {
|
||||
info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
|
||||
info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
|
||||
info->query_type = NVC0_HW_SM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
|
||||
info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
|
||||
return 1;
|
||||
}
|
||||
|
|
@ -1494,7 +1494,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
|
|||
info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
|
||||
|
||||
if (screen->base.class_3d == NVE4_3D_CLASS) {
|
||||
info->num_queries = NVE4_PM_QUERY_COUNT;
|
||||
info->num_queries = NVE4_HW_SM_QUERY_COUNT;
|
||||
|
||||
/* On NVE4+, each multiprocessor have 8 hardware counters separated
|
||||
* in two distinct domains, but we allow only one active query
|
||||
|
|
@ -1504,7 +1504,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
|
|||
return 1;
|
||||
} else
|
||||
if (screen->base.class_3d < NVE4_3D_CLASS) {
|
||||
info->num_queries = NVC0_PM_QUERY_COUNT;
|
||||
info->num_queries = NVC0_HW_SM_QUERY_COUNT;
|
||||
|
||||
/* On NVC0:NVE4, each multiprocessor have 8 hardware counters
|
||||
* in a single domain. */
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ struct nvc0_screen {
|
|||
struct {
|
||||
struct nvc0_program *prog; /* compute state object to read MP counters */
|
||||
struct pipe_query *mp_counter[8]; /* counter to query allocation */
|
||||
uint8_t num_mp_pm_active[2];
|
||||
uint8_t num_hw_sm_active[2];
|
||||
bool mp_counters_enabled;
|
||||
} pm;
|
||||
|
||||
|
|
@ -120,98 +120,98 @@ nvc0_screen(struct pipe_screen *screen)
|
|||
|
||||
/* Performance counter queries:
|
||||
*/
|
||||
#define NVE4_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
|
||||
#define NVE4_PM_QUERY_LAST NVE4_PM_QUERY(NVE4_PM_QUERY_COUNT - 1)
|
||||
#define NVE4_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
|
||||
#define NVE4_HW_SM_QUERY_LAST NVE4_HW_SM_QUERY(NVE4_HW_SM_QUERY_COUNT - 1)
|
||||
enum nve4_pm_queries
|
||||
{
|
||||
NVE4_PM_QUERY_ACTIVE_CYCLES = 0,
|
||||
NVE4_PM_QUERY_ACTIVE_WARPS,
|
||||
NVE4_PM_QUERY_ATOM_COUNT,
|
||||
NVE4_PM_QUERY_BRANCH,
|
||||
NVE4_PM_QUERY_DIVERGENT_BRANCH,
|
||||
NVE4_PM_QUERY_GLD_REQUEST,
|
||||
NVE4_PM_QUERY_GLD_MEM_DIV_REPLAY,
|
||||
NVE4_PM_QUERY_GST_TRANSACTIONS,
|
||||
NVE4_PM_QUERY_GST_MEM_DIV_REPLAY,
|
||||
NVE4_PM_QUERY_GRED_COUNT,
|
||||
NVE4_PM_QUERY_GST_REQUEST,
|
||||
NVE4_PM_QUERY_INST_EXECUTED,
|
||||
NVE4_PM_QUERY_INST_ISSUED,
|
||||
NVE4_PM_QUERY_INST_ISSUED1,
|
||||
NVE4_PM_QUERY_INST_ISSUED2,
|
||||
NVE4_PM_QUERY_L1_GLD_HIT,
|
||||
NVE4_PM_QUERY_L1_GLD_MISS,
|
||||
NVE4_PM_QUERY_L1_LOCAL_LD_HIT,
|
||||
NVE4_PM_QUERY_L1_LOCAL_LD_MISS,
|
||||
NVE4_PM_QUERY_L1_LOCAL_ST_HIT,
|
||||
NVE4_PM_QUERY_L1_LOCAL_ST_MISS,
|
||||
NVE4_PM_QUERY_L1_SHARED_LD_TRANSACTIONS,
|
||||
NVE4_PM_QUERY_L1_SHARED_ST_TRANSACTIONS,
|
||||
NVE4_PM_QUERY_LOCAL_LD,
|
||||
NVE4_PM_QUERY_LOCAL_LD_TRANSACTIONS,
|
||||
NVE4_PM_QUERY_LOCAL_ST,
|
||||
NVE4_PM_QUERY_LOCAL_ST_TRANSACTIONS,
|
||||
NVE4_PM_QUERY_PROF_TRIGGER_0,
|
||||
NVE4_PM_QUERY_PROF_TRIGGER_1,
|
||||
NVE4_PM_QUERY_PROF_TRIGGER_2,
|
||||
NVE4_PM_QUERY_PROF_TRIGGER_3,
|
||||
NVE4_PM_QUERY_PROF_TRIGGER_4,
|
||||
NVE4_PM_QUERY_PROF_TRIGGER_5,
|
||||
NVE4_PM_QUERY_PROF_TRIGGER_6,
|
||||
NVE4_PM_QUERY_PROF_TRIGGER_7,
|
||||
NVE4_PM_QUERY_SHARED_LD,
|
||||
NVE4_PM_QUERY_SHARED_LD_REPLAY,
|
||||
NVE4_PM_QUERY_SHARED_ST,
|
||||
NVE4_PM_QUERY_SHARED_ST_REPLAY,
|
||||
NVE4_PM_QUERY_SM_CTA_LAUNCHED,
|
||||
NVE4_PM_QUERY_THREADS_LAUNCHED,
|
||||
NVE4_PM_QUERY_UNCACHED_GLD_TRANSACTIONS,
|
||||
NVE4_PM_QUERY_WARPS_LAUNCHED,
|
||||
NVE4_PM_QUERY_METRIC_IPC,
|
||||
NVE4_PM_QUERY_METRIC_IPAC,
|
||||
NVE4_PM_QUERY_METRIC_IPEC,
|
||||
NVE4_PM_QUERY_METRIC_MP_OCCUPANCY,
|
||||
NVE4_PM_QUERY_METRIC_MP_EFFICIENCY,
|
||||
NVE4_PM_QUERY_METRIC_INST_REPLAY_OHEAD,
|
||||
NVE4_PM_QUERY_COUNT
|
||||
NVE4_HW_SM_QUERY_ACTIVE_CYCLES = 0,
|
||||
NVE4_HW_SM_QUERY_ACTIVE_WARPS,
|
||||
NVE4_HW_SM_QUERY_ATOM_COUNT,
|
||||
NVE4_HW_SM_QUERY_BRANCH,
|
||||
NVE4_HW_SM_QUERY_DIVERGENT_BRANCH,
|
||||
NVE4_HW_SM_QUERY_GLD_REQUEST,
|
||||
NVE4_HW_SM_QUERY_GLD_MEM_DIV_REPLAY,
|
||||
NVE4_HW_SM_QUERY_GST_TRANSACTIONS,
|
||||
NVE4_HW_SM_QUERY_GST_MEM_DIV_REPLAY,
|
||||
NVE4_HW_SM_QUERY_GRED_COUNT,
|
||||
NVE4_HW_SM_QUERY_GST_REQUEST,
|
||||
NVE4_HW_SM_QUERY_INST_EXECUTED,
|
||||
NVE4_HW_SM_QUERY_INST_ISSUED,
|
||||
NVE4_HW_SM_QUERY_INST_ISSUED1,
|
||||
NVE4_HW_SM_QUERY_INST_ISSUED2,
|
||||
NVE4_HW_SM_QUERY_L1_GLD_HIT,
|
||||
NVE4_HW_SM_QUERY_L1_GLD_MISS,
|
||||
NVE4_HW_SM_QUERY_L1_LOCAL_LD_HIT,
|
||||
NVE4_HW_SM_QUERY_L1_LOCAL_LD_MISS,
|
||||
NVE4_HW_SM_QUERY_L1_LOCAL_ST_HIT,
|
||||
NVE4_HW_SM_QUERY_L1_LOCAL_ST_MISS,
|
||||
NVE4_HW_SM_QUERY_L1_SHARED_LD_TRANSACTIONS,
|
||||
NVE4_HW_SM_QUERY_L1_SHARED_ST_TRANSACTIONS,
|
||||
NVE4_HW_SM_QUERY_LOCAL_LD,
|
||||
NVE4_HW_SM_QUERY_LOCAL_LD_TRANSACTIONS,
|
||||
NVE4_HW_SM_QUERY_LOCAL_ST,
|
||||
NVE4_HW_SM_QUERY_LOCAL_ST_TRANSACTIONS,
|
||||
NVE4_HW_SM_QUERY_PROF_TRIGGER_0,
|
||||
NVE4_HW_SM_QUERY_PROF_TRIGGER_1,
|
||||
NVE4_HW_SM_QUERY_PROF_TRIGGER_2,
|
||||
NVE4_HW_SM_QUERY_PROF_TRIGGER_3,
|
||||
NVE4_HW_SM_QUERY_PROF_TRIGGER_4,
|
||||
NVE4_HW_SM_QUERY_PROF_TRIGGER_5,
|
||||
NVE4_HW_SM_QUERY_PROF_TRIGGER_6,
|
||||
NVE4_HW_SM_QUERY_PROF_TRIGGER_7,
|
||||
NVE4_HW_SM_QUERY_SHARED_LD,
|
||||
NVE4_HW_SM_QUERY_SHARED_LD_REPLAY,
|
||||
NVE4_HW_SM_QUERY_SHARED_ST,
|
||||
NVE4_HW_SM_QUERY_SHARED_ST_REPLAY,
|
||||
NVE4_HW_SM_QUERY_SM_CTA_LAUNCHED,
|
||||
NVE4_HW_SM_QUERY_THREADS_LAUNCHED,
|
||||
NVE4_HW_SM_QUERY_UNCACHED_GLD_TRANSACTIONS,
|
||||
NVE4_HW_SM_QUERY_WARPS_LAUNCHED,
|
||||
NVE4_HW_SM_QUERY_METRIC_IPC,
|
||||
NVE4_HW_SM_QUERY_METRIC_IPAC,
|
||||
NVE4_HW_SM_QUERY_METRIC_IPEC,
|
||||
NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY,
|
||||
NVE4_HW_SM_QUERY_METRIC_MP_EFFICIENCY,
|
||||
NVE4_HW_SM_QUERY_METRIC_INST_REPLAY_OHEAD,
|
||||
NVE4_HW_SM_QUERY_COUNT
|
||||
};
|
||||
|
||||
#define NVC0_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i))
|
||||
#define NVC0_PM_QUERY_LAST NVC0_PM_QUERY(NVC0_PM_QUERY_COUNT - 1)
|
||||
#define NVC0_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i))
|
||||
#define NVC0_HW_SM_QUERY_LAST NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_COUNT - 1)
|
||||
enum nvc0_pm_queries
|
||||
{
|
||||
NVC0_PM_QUERY_ACTIVE_CYCLES = 0,
|
||||
NVC0_PM_QUERY_ACTIVE_WARPS,
|
||||
NVC0_PM_QUERY_ATOM_COUNT,
|
||||
NVC0_PM_QUERY_BRANCH,
|
||||
NVC0_PM_QUERY_DIVERGENT_BRANCH,
|
||||
NVC0_PM_QUERY_GLD_REQUEST,
|
||||
NVC0_PM_QUERY_GRED_COUNT,
|
||||
NVC0_PM_QUERY_GST_REQUEST,
|
||||
NVC0_PM_QUERY_INST_EXECUTED,
|
||||
NVC0_PM_QUERY_INST_ISSUED1_0,
|
||||
NVC0_PM_QUERY_INST_ISSUED1_1,
|
||||
NVC0_PM_QUERY_INST_ISSUED2_0,
|
||||
NVC0_PM_QUERY_INST_ISSUED2_1,
|
||||
NVC0_PM_QUERY_LOCAL_LD,
|
||||
NVC0_PM_QUERY_LOCAL_ST,
|
||||
NVC0_PM_QUERY_PROF_TRIGGER_0,
|
||||
NVC0_PM_QUERY_PROF_TRIGGER_1,
|
||||
NVC0_PM_QUERY_PROF_TRIGGER_2,
|
||||
NVC0_PM_QUERY_PROF_TRIGGER_3,
|
||||
NVC0_PM_QUERY_PROF_TRIGGER_4,
|
||||
NVC0_PM_QUERY_PROF_TRIGGER_5,
|
||||
NVC0_PM_QUERY_PROF_TRIGGER_6,
|
||||
NVC0_PM_QUERY_PROF_TRIGGER_7,
|
||||
NVC0_PM_QUERY_SHARED_LD,
|
||||
NVC0_PM_QUERY_SHARED_ST,
|
||||
NVC0_PM_QUERY_THREADS_LAUNCHED,
|
||||
NVC0_PM_QUERY_TH_INST_EXECUTED_0,
|
||||
NVC0_PM_QUERY_TH_INST_EXECUTED_1,
|
||||
NVC0_PM_QUERY_TH_INST_EXECUTED_2,
|
||||
NVC0_PM_QUERY_TH_INST_EXECUTED_3,
|
||||
NVC0_PM_QUERY_WARPS_LAUNCHED,
|
||||
NVC0_PM_QUERY_COUNT
|
||||
NVC0_HW_SM_QUERY_ACTIVE_CYCLES = 0,
|
||||
NVC0_HW_SM_QUERY_ACTIVE_WARPS,
|
||||
NVC0_HW_SM_QUERY_ATOM_COUNT,
|
||||
NVC0_HW_SM_QUERY_BRANCH,
|
||||
NVC0_HW_SM_QUERY_DIVERGENT_BRANCH,
|
||||
NVC0_HW_SM_QUERY_GLD_REQUEST,
|
||||
NVC0_HW_SM_QUERY_GRED_COUNT,
|
||||
NVC0_HW_SM_QUERY_GST_REQUEST,
|
||||
NVC0_HW_SM_QUERY_INST_EXECUTED,
|
||||
NVC0_HW_SM_QUERY_INST_ISSUED1_0,
|
||||
NVC0_HW_SM_QUERY_INST_ISSUED1_1,
|
||||
NVC0_HW_SM_QUERY_INST_ISSUED2_0,
|
||||
NVC0_HW_SM_QUERY_INST_ISSUED2_1,
|
||||
NVC0_HW_SM_QUERY_LOCAL_LD,
|
||||
NVC0_HW_SM_QUERY_LOCAL_ST,
|
||||
NVC0_HW_SM_QUERY_PROF_TRIGGER_0,
|
||||
NVC0_HW_SM_QUERY_PROF_TRIGGER_1,
|
||||
NVC0_HW_SM_QUERY_PROF_TRIGGER_2,
|
||||
NVC0_HW_SM_QUERY_PROF_TRIGGER_3,
|
||||
NVC0_HW_SM_QUERY_PROF_TRIGGER_4,
|
||||
NVC0_HW_SM_QUERY_PROF_TRIGGER_5,
|
||||
NVC0_HW_SM_QUERY_PROF_TRIGGER_6,
|
||||
NVC0_HW_SM_QUERY_PROF_TRIGGER_7,
|
||||
NVC0_HW_SM_QUERY_SHARED_LD,
|
||||
NVC0_HW_SM_QUERY_SHARED_ST,
|
||||
NVC0_HW_SM_QUERY_THREADS_LAUNCHED,
|
||||
NVC0_HW_SM_QUERY_TH_INST_EXECUTED_0,
|
||||
NVC0_HW_SM_QUERY_TH_INST_EXECUTED_1,
|
||||
NVC0_HW_SM_QUERY_TH_INST_EXECUTED_2,
|
||||
NVC0_HW_SM_QUERY_TH_INST_EXECUTED_3,
|
||||
NVC0_HW_SM_QUERY_WARPS_LAUNCHED,
|
||||
NVC0_HW_SM_QUERY_COUNT
|
||||
};
|
||||
|
||||
/* Driver statistics queries:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue