mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 06:58:05 +02:00
gallium/radeon: add a heuristic for better (S)DMA performance
Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
bb74152597
commit
60946c0d60
4 changed files with 32 additions and 0 deletions
|
|
@ -176,6 +176,20 @@ void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
|
|||
/* done at the end of DMA calls, so increment this. */
|
||||
rctx->num_dma_calls++;
|
||||
|
||||
/* IBs using too little memory are limited by the IB submission overhead.
|
||||
* IBs using too much memory are limited by the kernel/TTM overhead.
|
||||
* Too long IBs create CPU-GPU pipeline bubbles and add latency.
|
||||
*
|
||||
* This heuristic makes sure that DMA requests are executed
|
||||
* very soon after the call is made and lowers memory usage.
|
||||
* It improves texture upload performance by keeping the DMA
|
||||
* engine busy while uploads are being submitted.
|
||||
*/
|
||||
if (rctx->ws->cs_query_memory_usage(rctx->dma.cs) > 64 * 1024 * 1024) {
|
||||
rctx->dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
r600_need_dma_space(rctx, 1, NULL, NULL);
|
||||
|
||||
if (cs->cdw == 0) /* empty queue */
|
||||
|
|
|
|||
|
|
@ -683,6 +683,8 @@ struct radeon_winsys {
|
|||
*/
|
||||
boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt);
|
||||
|
||||
uint64_t (*cs_query_memory_usage)(struct radeon_winsys_cs *cs);
|
||||
|
||||
/**
|
||||
* Return the buffer list.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -563,6 +563,13 @@ static boolean amdgpu_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64
|
|||
return gtt < ws->info.gart_size * 0.7;
|
||||
}
|
||||
|
||||
static uint64_t amdgpu_cs_query_memory_usage(struct radeon_winsys_cs *rcs)
|
||||
{
|
||||
struct amdgpu_cs *cs = amdgpu_cs(rcs);
|
||||
|
||||
return cs->used_vram + cs->used_gart;
|
||||
}
|
||||
|
||||
static unsigned amdgpu_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
|
||||
struct radeon_bo_list_item *list)
|
||||
{
|
||||
|
|
@ -828,6 +835,7 @@ void amdgpu_cs_init_functions(struct amdgpu_winsys *ws)
|
|||
ws->base.cs_lookup_buffer = amdgpu_cs_lookup_buffer;
|
||||
ws->base.cs_validate = amdgpu_cs_validate;
|
||||
ws->base.cs_memory_below_limit = amdgpu_cs_memory_below_limit;
|
||||
ws->base.cs_query_memory_usage = amdgpu_cs_query_memory_usage;
|
||||
ws->base.cs_get_buffer_list = amdgpu_cs_get_buffer_list;
|
||||
ws->base.cs_flush = amdgpu_cs_flush;
|
||||
ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced;
|
||||
|
|
|
|||
|
|
@ -398,6 +398,13 @@ static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, ui
|
|||
return gtt < cs->ws->info.gart_size * 0.7;
|
||||
}
|
||||
|
||||
static uint64_t radeon_drm_cs_query_memory_usage(struct radeon_winsys_cs *rcs)
|
||||
{
|
||||
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
|
||||
|
||||
return cs->csc->used_vram + cs->csc->used_gart;
|
||||
}
|
||||
|
||||
static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
|
||||
struct radeon_bo_list_item *list)
|
||||
{
|
||||
|
|
@ -671,6 +678,7 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
|
|||
ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer;
|
||||
ws->base.cs_validate = radeon_drm_cs_validate;
|
||||
ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
|
||||
ws->base.cs_query_memory_usage = radeon_drm_cs_query_memory_usage;
|
||||
ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list;
|
||||
ws->base.cs_flush = radeon_drm_cs_flush;
|
||||
ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue