lima: implement PLB PP stream cache

Generating PLB PP stream is expensive. PLB PP stream content depends on
damage, and if damage consists of several rects it's impossible to come
up with a simple key.

Simplify damage to a single bounding box so we have a simple key
and cache PLB PP stream. Cache size is limited to 0.1% of system RAM and
once limit is reached least recently used entries are dropped.

Reviewed-by: Qiang Yu <yuq825@gmail.com>
Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3834>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3834>
This commit is contained in:
Vasily Khoruzhick 2020-02-16 02:25:10 -08:00 committed by Marge Bot
parent 7edde3d26b
commit 8021daeb1f
6 changed files with 121 additions and 120 deletions

View file

@ -120,6 +120,16 @@ lima_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
job->resolve &= ~PIPE_CLEAR_COLOR0;
}
static void
plb_pp_stream_delete_fn(struct hash_entry *entry)
{
struct lima_ctx_plb_pp_stream *s = entry->data;
lima_bo_unreference(s->bo);
list_del(&s->lru_list);
ralloc_free(s);
}
static void
lima_context_destroy(struct pipe_context *pctx)
{
@ -154,8 +164,8 @@ lima_context_destroy(struct pipe_context *pctx)
if (ctx->gp_output)
lima_bo_unreference(ctx->gp_output);
if (ctx->plb_pp_stream)
assert(!_mesa_hash_table_num_entries(ctx->plb_pp_stream));
_mesa_hash_table_destroy(ctx->plb_pp_stream,
plb_pp_stream_delete_fn);
lima_context_free_drm_ctx(screen, ctx->id);
@ -267,12 +277,11 @@ lima_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
plb_gp_stream[j] = ctx->plb[i]->va + LIMA_CTX_PLB_BLK_SIZE * j;
}
if (screen->gpu_type == DRM_LIMA_PARAM_GPU_ID_MALI400) {
ctx->plb_pp_stream = _mesa_hash_table_create(
ctx, plb_pp_stream_hash, plb_pp_stream_compare);
if (!ctx->plb_pp_stream)
goto err_out;
}
list_inithead(&ctx->plb_pp_stream_lru_list);
ctx->plb_pp_stream = _mesa_hash_table_create(
ctx, plb_pp_stream_hash, plb_pp_stream_compare);
if (!ctx->plb_pp_stream)
goto err_out;
if (!lima_job_init(ctx))
goto err_out;

View file

@ -25,6 +25,7 @@
#ifndef H_LIMA_CONTEXT
#define H_LIMA_CONTEXT
#include "util/list.h"
#include "util/slab.h"
#include "pipe/p_context.h"
@ -138,16 +139,19 @@ struct lima_texture_stateobj {
};
struct lima_ctx_plb_pp_stream_key {
uint32_t plb_index;
uint32_t tiled_w;
uint32_t tiled_h;
uint16_t plb_index;
/* Coordinates are in tiles */
uint16_t minx, miny, maxx, maxy;
/* FB params */
uint16_t shift_w, shift_h;
uint16_t block_w, block_h;
};
struct lima_ctx_plb_pp_stream {
struct list_head lru_list;
struct lima_ctx_plb_pp_stream_key key;
uint32_t refcnt;
struct lima_bo *bo;
uint32_t offset[4];
uint32_t offset[8];
};
struct lima_pp_stream_state {
@ -217,7 +221,9 @@ struct lima_context {
uint32_t gp_output_point_size_offt;
struct hash_table *plb_pp_stream;
struct list_head plb_pp_stream_lru_list;
uint32_t plb_index;
size_t plb_stream_cache_size;
struct lima_ctx_buff_state buffer_state[lima_ctx_buff_num];

View file

@ -494,22 +494,6 @@ lima_get_pp_stream_size(int num_pp, int tiled_w, int tiled_h, uint32_t *off)
return offset;
}
static bool
inside_damage_region(int x, int y, struct lima_damage_region *ds)
{
if (!ds || !ds->region)
return true;
for (int i = 0; i < ds->num_region; i++) {
struct pipe_scissor_state *ss = ds->region + i;
if (x >= ss->minx && x < ss->maxx &&
y >= ss->miny && y < ss->maxy)
return true;
}
return false;
}
static void
lima_generate_pp_stream(struct lima_job *job, int off_x, int off_y,
int tiled_w, int tiled_h)
@ -517,7 +501,6 @@ lima_generate_pp_stream(struct lima_job *job, int off_x, int off_y,
struct lima_context *ctx = job->ctx;
struct lima_pp_stream_state *ps = &ctx->pp_stream;
struct lima_job_fb_info *fb = &job->fb;
struct lima_damage_region *damage = lima_job_get_damage(job);
struct lima_screen *screen = lima_screen(ctx->base.screen);
int i, num_pp = screen->num_pp;
@ -551,9 +534,6 @@ lima_generate_pp_stream(struct lima_job *job, int off_x, int off_y,
x += off_x;
y += off_y;
if (!inside_damage_region(x, y, damage))
continue;
int pp = index % num_pp;
int offset = ((y >> fb->shift_h) * fb->block_w +
(x >> fb->shift_w)) * LIMA_CTX_PLB_BLK_SIZE;
@ -581,6 +561,27 @@ lima_generate_pp_stream(struct lima_job *job, int off_x, int off_y,
}
}
static void
lima_free_stale_pp_stream_bo(struct lima_context *ctx)
{
list_for_each_entry_safe(struct lima_ctx_plb_pp_stream, entry,
&ctx->plb_pp_stream_lru_list, lru_list) {
if (ctx->plb_stream_cache_size <= lima_plb_pp_stream_cache_size)
break;
struct hash_entry *hash_entry =
_mesa_hash_table_search(ctx->plb_pp_stream, &entry->key);
if (hash_entry)
_mesa_hash_table_remove(ctx->plb_pp_stream, hash_entry);
list_del(&entry->lru_list);
ctx->plb_stream_cache_size -= entry->bo->size;
lima_bo_unreference(entry->bo);
ralloc_free(entry);
}
}
static void
lima_update_damage_pp_stream(struct lima_job *job)
{
@ -609,52 +610,69 @@ lima_update_damage_pp_stream(struct lima_job *job)
bound.maxx = MIN2(bound.maxx, fb->tiled_w);
bound.maxy = MIN2(bound.maxy, fb->tiled_h);
int tiled_w = bound.maxx - bound.minx;
int tiled_h = bound.maxy - bound.miny;
struct lima_screen *screen = lima_screen(ctx->base.screen);
int size = lima_get_pp_stream_size(
screen->num_pp, tiled_w, tiled_h, ctx->pp_stream.offset);
ctx->pp_stream.map = lima_job_create_stream_bo(
job, LIMA_PIPE_PP, size, &ctx->pp_stream.va);
lima_generate_pp_stream(job, bound.minx, bound.miny, tiled_w, tiled_h);
}
static void
lima_update_full_pp_stream(struct lima_job *job)
{
struct lima_context *ctx = job->ctx;
struct lima_job_fb_info *fb = &job->fb;
struct lima_ctx_plb_pp_stream_key key = {
.plb_index = ctx->plb_index,
.tiled_w = fb->tiled_w,
.tiled_h = fb->tiled_h,
.minx = bound.minx,
.miny = bound.miny,
.maxx = bound.maxx,
.maxy = bound.maxy,
.shift_w = fb->shift_w,
.shift_h = fb->shift_h,
.block_w = fb->block_w,
.block_h = fb->block_h,
};
struct hash_entry *entry =
_mesa_hash_table_search(ctx->plb_pp_stream, &key);
struct lima_ctx_plb_pp_stream *s = entry->data;
if (entry) {
struct lima_ctx_plb_pp_stream *s = entry->data;
if (s->bo) {
ctx->pp_stream.map = lima_bo_map(s->bo);
ctx->pp_stream.va = s->bo->va;
memcpy(ctx->pp_stream.offset, s->offset, sizeof(s->offset));
}
else {
struct lima_screen *screen = lima_screen(ctx->base.screen);
int size = lima_get_pp_stream_size(
screen->num_pp, fb->tiled_w, fb->tiled_h, s->offset);
s->bo = lima_bo_create(screen, size, 0);
list_del(&s->lru_list);
list_addtail(&s->lru_list, &ctx->plb_pp_stream_lru_list);
ctx->pp_stream.map = lima_bo_map(s->bo);
ctx->pp_stream.va = s->bo->va;
memcpy(ctx->pp_stream.offset, s->offset, sizeof(s->offset));
lima_generate_pp_stream(job, 0, 0, fb->tiled_w, fb->tiled_h);
lima_job_add_bo(job, LIMA_PIPE_PP, s->bo, LIMA_SUBMIT_BO_READ);
return;
}
lima_free_stale_pp_stream_bo(ctx);
struct lima_screen *screen = lima_screen(ctx->base.screen);
struct lima_ctx_plb_pp_stream *s =
rzalloc(ctx->plb_pp_stream, struct lima_ctx_plb_pp_stream);
list_inithead(&s->lru_list);
s->key.plb_index = ctx->plb_index;
s->key.minx = bound.minx;
s->key.maxx = bound.maxx;
s->key.miny = bound.miny;
s->key.maxy = bound.maxy;
s->key.shift_w = fb->shift_w;
s->key.shift_h = fb->shift_h;
s->key.block_w = fb->block_w;
s->key.block_h = fb->block_h;
int tiled_w = bound.maxx - bound.minx;
int tiled_h = bound.maxy - bound.miny;
int size = lima_get_pp_stream_size(
screen->num_pp, tiled_w, tiled_h, s->offset);
s->bo = lima_bo_create(screen, size, 0);
ctx->pp_stream.map = lima_bo_map(s->bo);
ctx->pp_stream.va = s->bo->va;
memcpy(ctx->pp_stream.offset, s->offset, sizeof(s->offset));
lima_generate_pp_stream(job, bound.minx, bound.miny, tiled_w, tiled_h);
ctx->plb_stream_cache_size += size;
list_addtail(&s->lru_list, &ctx->plb_pp_stream_lru_list);
_mesa_hash_table_insert(ctx->plb_pp_stream, &s->key, s);
lima_job_add_bo(job, LIMA_PIPE_PP, s->bo, LIMA_SUBMIT_BO_READ);
}
@ -673,12 +691,13 @@ static void
lima_update_pp_stream(struct lima_job *job)
{
struct lima_context *ctx = job->ctx;
struct lima_screen *screen = lima_screen(ctx->base.screen);
struct lima_damage_region *damage = lima_job_get_damage(job);
if ((damage && damage->region) || !lima_damage_fullscreen(job))
if ((screen->gpu_type == DRM_LIMA_PARAM_GPU_ID_MALI400) ||
(damage && damage->region) || !lima_damage_fullscreen(job))
lima_update_damage_pp_stream(job);
else if (ctx->plb_pp_stream)
lima_update_full_pp_stream(job);
else
/* Mali450 doesn't need full PP stream */
ctx->pp_stream.map = NULL;
}

View file

@ -507,35 +507,6 @@ lima_surface_create(struct pipe_context *pctx,
surf->reload = true;
struct lima_context *ctx = lima_context(pctx);
if (ctx->plb_pp_stream) {
struct lima_ctx_plb_pp_stream_key key = {
.tiled_w = surf->tiled_w,
.tiled_h = surf->tiled_h,
};
for (int i = 0; i < lima_ctx_num_plb; i++) {
key.plb_index = i;
struct hash_entry *entry =
_mesa_hash_table_search(ctx->plb_pp_stream, &key);
if (entry) {
struct lima_ctx_plb_pp_stream *s = entry->data;
s->refcnt++;
}
else {
struct lima_ctx_plb_pp_stream *s =
ralloc(ctx->plb_pp_stream, struct lima_ctx_plb_pp_stream);
s->key.plb_index = i;
s->key.tiled_w = surf->tiled_w;
s->key.tiled_h = surf->tiled_h;
s->refcnt = 1;
s->bo = NULL;
_mesa_hash_table_insert(ctx->plb_pp_stream, &s->key, s);
}
}
}
return &surf->base;
}
@ -543,29 +514,6 @@ static void
lima_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf)
{
struct lima_surface *surf = lima_surface(psurf);
/* psurf->context may be not equal with pctx (i.e. glxinfo) */
struct lima_context *ctx = lima_context(psurf->context);
if (ctx->plb_pp_stream) {
struct lima_ctx_plb_pp_stream_key key = {
.tiled_w = surf->tiled_w,
.tiled_h = surf->tiled_h,
};
for (int i = 0; i < lima_ctx_num_plb; i++) {
key.plb_index = i;
struct hash_entry *entry =
_mesa_hash_table_search(ctx->plb_pp_stream, &key);
struct lima_ctx_plb_pp_stream *s = entry->data;
if (--s->refcnt == 0) {
if (s->bo)
lima_bo_unreference(s->bo);
_mesa_hash_table_remove(ctx->plb_pp_stream, entry);
ralloc_free(s);
}
}
}
pipe_resource_reference(&psurf->texture, NULL);
FREE(surf);

View file

@ -44,6 +44,7 @@
#include "xf86drm.h"
int lima_plb_max_blk = 0;
int lima_plb_pp_stream_cache_size = 0;
static void
lima_screen_destroy(struct pipe_screen *pscreen)
@ -501,11 +502,19 @@ lima_screen_parse_env(void)
"reset to default 0\n", lima_ppir_force_spilling);
lima_ppir_force_spilling = 0;
}
lima_plb_pp_stream_cache_size = debug_get_num_option("LIMA_PLB_PP_STREAM_CACHE_SIZE", 0);
if (lima_plb_pp_stream_cache_size < 0) {
fprintf(stderr, "lima: LIMA_PLB_PP_STREAM_CACHE_SIZE %d less than 0, "
"reset to default 0\n", lima_plb_pp_stream_cache_size);
lima_plb_pp_stream_cache_size = 0;
}
}
struct pipe_screen *
lima_screen_create(int fd, struct renderonly *ro)
{
uint64_t system_memory;
struct lima_screen *screen;
screen = rzalloc(NULL, struct lima_screen);
@ -516,6 +525,15 @@ lima_screen_create(int fd, struct renderonly *ro)
lima_screen_parse_env();
/* Limit PP PLB stream cache size to 0.1% of system memory */
if (!lima_plb_pp_stream_cache_size &&
os_get_total_physical_memory(&system_memory))
lima_plb_pp_stream_cache_size = system_memory >> 10;
/* Set lower limit on PP PLB cache size */
lima_plb_pp_stream_cache_size = MAX2(128 * 1024 * lima_ctx_num_plb,
lima_plb_pp_stream_cache_size);
if (!lima_screen_query_info(screen))
goto err_out0;

View file

@ -47,6 +47,7 @@ extern uint32_t lima_debug;
extern int lima_ctx_num_plb;
extern int lima_plb_max_blk;
extern int lima_ppir_force_spilling;
extern int lima_plb_pp_stream_cache_size;
struct ra_regs;