v3d/simulator: implement performance counters

Add support for performance counters when using the simulator.

v2 (Iago):
 - Remove brackets from single-line conditionals
 - Rename channel to channels
 - Ensure perfmon start/stop function is implemented in all versions
 - Use an array for perfmons instead of hash table
 - Implement performance counters in CSD

v3 (Iago):
 - Rename PERFMON_CHUNKS to PERFMONS_ALLOC_SIZE.
 - Merge increasing lastid and ensuring space in a single function.

v4 (Iago):
 - Assert perfid <= perfmons_size.

v7 (Iago):
 - Do not stop perfmon on each submission

v8 (Iago):
 - Add comment about stopping the perfmon when retrieving values.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Signed-off-by: Juan A. Suarez Romero <jasuarez@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10666>
This commit is contained in:
Juan A. Suarez Romero 2021-04-27 18:11:18 +02:00 committed by Marge Bot
parent 685281278e
commit 4f7043fc0c
3 changed files with 209 additions and 1 deletions

View file

@ -87,6 +87,9 @@ static struct v3d_simulator_state {
/** Mapping from GEM fd to struct v3d_simulator_file * */
struct hash_table *fd_map;
/** Last performance monitor ID. */
uint32_t last_perfid;
struct util_dynarray bin_oom;
int refcount;
} sim_state = {
@ -100,6 +103,11 @@ struct v3d_simulator_file {
/** Mapping from GEM handle to struct v3d_simulator_bo * */
struct hash_table *bo_map;
/** Dynamic array with performance monitors */
struct v3d_simulator_perfmon **perfmons;
uint32_t perfmons_size;
uint32_t active_perfid;
struct mem_block *gmp;
void *gmp_vaddr;
@ -121,12 +129,34 @@ struct v3d_simulator_bo {
int handle;
};
struct v3d_simulator_perfmon {
uint32_t ncounters;
uint8_t counters[DRM_V3D_MAX_PERF_COUNTERS];
uint64_t values[DRM_V3D_MAX_PERF_COUNTERS];
};
static void *
int_to_key(int key)
{
return (void *)(uintptr_t)key;
}
#define PERFMONS_ALLOC_SIZE 100
static uint32_t
perfmons_next_id(struct v3d_simulator_file *sim_file) {
sim_state.last_perfid++;
if (sim_state.last_perfid > sim_file->perfmons_size) {
sim_file->perfmons_size += PERFMONS_ALLOC_SIZE;
sim_file->perfmons = reralloc(sim_file,
sim_file->perfmons,
struct v3d_simulator_perfmon *,
sim_file->perfmons_size);
}
return sim_state.last_perfid;
}
static struct v3d_simulator_file *
v3d_get_simulator_file_for_fd(int fd)
{
@ -357,6 +387,46 @@ v3d_simulator_unpin_bos(struct v3d_simulator_file *file,
return 0;
}
static struct v3d_simulator_perfmon *
v3d_get_simulator_perfmon(int fd, uint32_t perfid)
{
if (!perfid || perfid > sim_state.last_perfid)
return NULL;
struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd);
mtx_lock(&sim_state.mutex);
assert(perfid <= file->perfmons_size);
struct v3d_simulator_perfmon *perfmon = file->perfmons[perfid - 1];
mtx_unlock(&sim_state.mutex);
return perfmon;
}
static void
v3d_simulator_perfmon_switch(int fd, uint32_t perfid)
{
struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd);
struct v3d_simulator_perfmon *perfmon;
if (perfid == file->active_perfid)
return;
perfmon = v3d_get_simulator_perfmon(fd, file->active_perfid);
if (perfmon)
v3d41_simulator_perfmon_stop(sim_state.v3d,
perfmon->ncounters,
perfmon->values);
perfmon = v3d_get_simulator_perfmon(fd, perfid);
if (perfmon)
v3d41_simulator_perfmon_start(sim_state.v3d,
perfmon->ncounters,
perfmon->counters);
file->active_perfid = perfid;
}
static int
v3d_simulator_submit_cl_ioctl(int fd, struct drm_v3d_submit_cl *submit)
{
@ -369,6 +439,9 @@ v3d_simulator_submit_cl_ioctl(int fd, struct drm_v3d_submit_cl *submit)
mtx_lock(&sim_state.submit_lock);
bin_fd = fd;
v3d_simulator_perfmon_switch(fd, submit->perfmon_id);
if (sim_state.ver >= 41)
v3d41_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
else
@ -530,6 +603,8 @@ v3d_simulator_submit_csd_ioctl(int fd, struct drm_v3d_submit_csd *args)
for (int i = 0; i < args->bo_handle_count; i++)
v3d_simulator_copy_in_handle(file, bo_handles[i]);
v3d_simulator_perfmon_switch(fd, args->perfmon_id);
if (sim_state.ver >= 41)
ret = v3d41_simulator_submit_csd_ioctl(sim_state.v3d, args,
file->gmp->ofs);
@ -542,6 +617,79 @@ v3d_simulator_submit_csd_ioctl(int fd, struct drm_v3d_submit_csd *args)
return ret;
}
static int
v3d_simulator_perfmon_create_ioctl(int fd, struct drm_v3d_perfmon_create *args)
{
struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd);
if (args->ncounters == 0 ||
args->ncounters > DRM_V3D_MAX_PERF_COUNTERS)
return -EINVAL;
struct v3d_simulator_perfmon *perfmon = rzalloc(file,
struct v3d_simulator_perfmon);
perfmon->ncounters = args->ncounters;
for (int i = 0; i < args->ncounters; i++) {
if (args->counters[i] >= V3D_PERFCNT_NUM) {
ralloc_free(perfmon);
return -EINVAL;
} else {
perfmon->counters[i] = args->counters[i];
}
}
mtx_lock(&sim_state.mutex);
args->id = perfmons_next_id(file);
file->perfmons[args->id - 1] = perfmon;
mtx_unlock(&sim_state.mutex);
return 0;
}
static int
v3d_simulator_perfmon_destroy_ioctl(int fd, struct drm_v3d_perfmon_destroy *args)
{
struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd);
struct v3d_simulator_perfmon *perfmon =
v3d_get_simulator_perfmon(fd, args->id);
if (!perfmon)
return -EINVAL;
mtx_lock(&sim_state.mutex);
file->perfmons[args->id - 1] = NULL;
mtx_unlock(&sim_state.mutex);
ralloc_free(perfmon);
return 0;
}
static int
v3d_simulator_perfmon_get_values_ioctl(int fd, struct drm_v3d_perfmon_get_values *args)
{
struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd);
mtx_lock(&sim_state.submit_lock);
/* Stop the perfmon if it is still active */
if (args->id == file->active_perfid)
v3d_simulator_perfmon_switch(fd, 0);
mtx_unlock(&sim_state.submit_lock);
struct v3d_simulator_perfmon *perfmon =
v3d_get_simulator_perfmon(fd, args->id);
if (!perfmon)
return -EINVAL;
memcpy((void *)args->values_ptr, perfmon->values, perfmon->ncounters * sizeof(uint64_t));
return 0;
}
int
v3d_simulator_ioctl(int fd, unsigned long request, void *args)
{
@ -575,6 +723,15 @@ v3d_simulator_ioctl(int fd, unsigned long request, void *args)
case DRM_IOCTL_V3D_SUBMIT_CSD:
return v3d_simulator_submit_csd_ioctl(fd, args);
case DRM_IOCTL_V3D_PERFMON_CREATE:
return v3d_simulator_perfmon_create_ioctl(fd, args);
case DRM_IOCTL_V3D_PERFMON_DESTROY:
return v3d_simulator_perfmon_destroy_ioctl(fd, args);
case DRM_IOCTL_V3D_PERFMON_GET_VALUES:
return v3d_simulator_perfmon_get_values_ioctl(fd, args);
case DRM_IOCTL_GEM_OPEN:
case DRM_IOCTL_GEM_FLINK:
return drmIoctl(fd, request, args);

View file

@ -267,7 +267,7 @@ v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
args->value = 1;
return 0;
case DRM_V3D_PARAM_SUPPORTS_PERFMON:
args->value = 0;
args->value = V3D_VERSION >= 41;
return 0;
}
@ -501,4 +501,49 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
}
}
#if V3D_VERSION >= 41
#define V3D_PCTR_0_PCTR_N(x) (V3D_PCTR_0_PCTR0 + 4 * (x))
#define V3D_PCTR_0_SRC_N(x) (V3D_PCTR_0_SRC_0_3 + 4 * (x))
#define V3D_PCTR_0_SRC_N_SHIFT(x) ((x) * 8)
#define V3D_PCTR_0_SRC_N_MASK(x) (BITFIELD_RANGE(V3D_PCTR_0_SRC_N_SHIFT(x), \
V3D_PCTR_0_SRC_N_SHIFT(x) + 6))
#endif
void
v3dX(simulator_perfmon_start)(struct v3d_hw *v3d,
uint32_t ncounters,
uint8_t *events)
{
#if V3D_VERSION >= 41
int i, j;
uint32_t source;
uint32_t mask = BITFIELD_RANGE(0, ncounters);
for (i = 0; i < ncounters; i+=4) {
source = i / 4;
uint32_t channels = 0;
for (j = 0; j < 4 && (i + j) < ncounters; j++)
channels |= events[i + j] << V3D_PCTR_0_SRC_N_SHIFT(j);
V3D_WRITE(V3D_PCTR_0_SRC_N(source), channels);
}
V3D_WRITE(V3D_PCTR_0_CLR, mask);
V3D_WRITE(V3D_PCTR_0_OVERFLOW, mask);
V3D_WRITE(V3D_PCTR_0_EN, mask);
#endif
}
void v3dX(simulator_perfmon_stop)(struct v3d_hw *v3d,
uint32_t ncounters,
uint64_t *values)
{
#if V3D_VERSION >= 41
int i;
for (i = 0; i < ncounters; i++)
values[i] += V3D_READ(V3D_PCTR_0_PCTR_N(i));
V3D_WRITE(V3D_PCTR_0_EN, 0);
#endif
}
#endif /* USE_V3D_SIMULATOR */

View file

@ -44,3 +44,9 @@ int v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d,
int v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
struct drm_v3d_submit_csd *args,
uint32_t gmp_offset);
void v3dX(simulator_perfmon_start)(struct v3d_hw *v3d,
uint32_t ncounters,
uint8_t *events);
void v3dX(simulator_perfmon_stop)(struct v3d_hw *v3d,
uint32_t ncounters,
uint64_t *values);