gallium/radeon: add a new HUD query for the number of mapped buffers

Useful when debugging applications which map a ton of buffers
and also because we used to run into Linux's limit on the number
of simultaneous mmap() calls.

v2: - update the commit message

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
Samuel Pitoiset 2017-01-23 21:44:45 +01:00
parent 56495080ed
commit cff199ceb7
9 changed files with 18 additions and 0 deletions

View file

@ -65,6 +65,7 @@ static enum radeon_value_id winsys_id_from_type(unsigned type)
case R600_QUERY_MAPPED_VRAM: return RADEON_MAPPED_VRAM;
case R600_QUERY_MAPPED_GTT: return RADEON_MAPPED_GTT;
case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS;
case R600_QUERY_NUM_MAPPED_BUFFERS: return RADEON_NUM_MAPPED_BUFFERS;
case R600_QUERY_NUM_GFX_IBS: return RADEON_NUM_GFX_IBS;
case R600_QUERY_NUM_SDMA_IBS: return RADEON_NUM_SDMA_IBS;
case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
@ -133,6 +134,7 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx,
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
case R600_QUERY_NUM_MAPPED_BUFFERS:
query->begin_result = 0;
break;
case R600_QUERY_BUFFER_WAIT_TIME:
@ -241,6 +243,7 @@ static bool r600_query_sw_end(struct r600_common_context *rctx,
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_BUFFER_WAIT_TIME:
case R600_QUERY_NUM_MAPPED_BUFFERS:
case R600_QUERY_NUM_GFX_IBS:
case R600_QUERY_NUM_SDMA_IBS:
case R600_QUERY_NUM_BYTES_MOVED:
@ -1722,6 +1725,7 @@ static struct pipe_driver_query_info r600_driver_query_list[] = {
X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE),
X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE),
X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE),
X("num-mapped-buffers", NUM_MAPPED_BUFFERS, UINT64, AVERAGE),
X("num-GFX-IBs", NUM_GFX_IBS, UINT64, AVERAGE),
X("num-SDMA-IBs", NUM_SDMA_IBS, UINT64, AVERAGE),
X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE),

View file

@ -60,6 +60,7 @@ enum {
R600_QUERY_MAPPED_VRAM,
R600_QUERY_MAPPED_GTT,
R600_QUERY_BUFFER_WAIT_TIME,
R600_QUERY_NUM_MAPPED_BUFFERS,
R600_QUERY_NUM_GFX_IBS,
R600_QUERY_NUM_SDMA_IBS,
R600_QUERY_NUM_BYTES_MOVED,

View file

@ -81,6 +81,7 @@ enum radeon_value_id {
RADEON_MAPPED_VRAM,
RADEON_MAPPED_GTT,
RADEON_BUFFER_WAIT_TIME_NS,
RADEON_NUM_MAPPED_BUFFERS,
RADEON_TIMESTAMP,
RADEON_NUM_GFX_IBS,
RADEON_NUM_SDMA_IBS,

View file

@ -181,6 +181,7 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
bo->ws->mapped_vram -= bo->base.size;
else if (bo->initial_domain & RADEON_DOMAIN_GTT)
bo->ws->mapped_gtt -= bo->base.size;
bo->ws->num_mapped_buffers--;
}
FREE(bo);
@ -308,6 +309,7 @@ static void *amdgpu_bo_map(struct pb_buffer *buf,
real->ws->mapped_vram += real->base.size;
else if (real->initial_domain & RADEON_DOMAIN_GTT)
real->ws->mapped_gtt += real->base.size;
real->ws->num_mapped_buffers++;
}
return (uint8_t*)cpu + offset;
}
@ -327,6 +329,7 @@ static void amdgpu_bo_unmap(struct pb_buffer *buf)
real->ws->mapped_vram -= real->base.size;
else if (real->initial_domain & RADEON_DOMAIN_GTT)
real->ws->mapped_gtt -= real->base.size;
real->ws->num_mapped_buffers--;
}
amdgpu_bo_cpu_unmap(real->bo);

View file

@ -424,6 +424,8 @@ static uint64_t amdgpu_query_value(struct radeon_winsys *rws,
return ws->mapped_gtt;
case RADEON_BUFFER_WAIT_TIME_NS:
return ws->buffer_wait_time;
case RADEON_NUM_MAPPED_BUFFERS:
return ws->num_mapped_buffers;
case RADEON_TIMESTAMP:
amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval);
return retval;

View file

@ -64,6 +64,7 @@ struct amdgpu_winsys {
uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */
uint64_t num_gfx_IBs;
uint64_t num_sdma_IBs;
uint64_t num_mapped_buffers;
struct radeon_info info;

View file

@ -382,6 +382,7 @@ void radeon_bo_destroy(struct pb_buffer *_buf)
bo->rws->mapped_vram -= bo->base.size;
else
bo->rws->mapped_gtt -= bo->base.size;
bo->rws->num_mapped_buffers--;
}
FREE(bo);
@ -458,6 +459,7 @@ void *radeon_bo_do_map(struct radeon_bo *bo)
bo->rws->mapped_vram += bo->base.size;
else
bo->rws->mapped_gtt += bo->base.size;
bo->rws->num_mapped_buffers++;
pipe_mutex_unlock(bo->u.real.map_mutex);
return (uint8_t*)bo->u.real.ptr + offset;
@ -570,6 +572,7 @@ static void radeon_bo_unmap(struct pb_buffer *_buf)
bo->rws->mapped_vram -= bo->base.size;
else
bo->rws->mapped_gtt -= bo->base.size;
bo->rws->num_mapped_buffers--;
pipe_mutex_unlock(bo->u.real.map_mutex);
}

View file

@ -614,6 +614,8 @@ static uint64_t radeon_query_value(struct radeon_winsys *rws,
return ws->mapped_gtt;
case RADEON_BUFFER_WAIT_TIME_NS:
return ws->buffer_wait_time;
case RADEON_NUM_MAPPED_BUFFERS:
return ws->num_mapped_buffers;
case RADEON_TIMESTAMP:
if (ws->info.drm_minor < 20 || ws->gen < DRV_R600) {
assert(0);

View file

@ -81,6 +81,7 @@ struct radeon_drm_winsys {
uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */
uint64_t num_gfx_IBs;
uint64_t num_sdma_IBs;
uint64_t num_mapped_buffers;
uint32_t next_bo_hash;
enum radeon_generation gen;