mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 13:58:04 +02:00
panfrost: upload blend shaders to bin pool
Instead of reuploading them to a fresh BO every time the blend state changes. This allows us to drop the separate blend shader cache for the fb preload shaders. This improves gfxbench gl_driver FPS on G610 from 42.39 to 61.94, which is now slightly faster than the DDK (57.76). Signed-off-by: Olivia Lee <olivia.lee@collabora.com> Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com> Acked-by: Ryan Mckeever <ryan.mckeever@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34666>
This commit is contained in:
parent
7fe842663e
commit
04bbe45b76
7 changed files with 30 additions and 82 deletions
|
|
@ -18,9 +18,10 @@ DERIVE_HASH_TABLE(pan_blend_shader_key);
|
|||
|
||||
void
|
||||
pan_blend_shader_cache_init(struct pan_blend_shader_cache *cache,
|
||||
unsigned gpu_id)
|
||||
unsigned gpu_id, struct pan_pool *bin_pool)
|
||||
{
|
||||
cache->gpu_id = gpu_id;
|
||||
cache->bin_pool = bin_pool;
|
||||
cache->shaders = pan_blend_shader_key_table_create(NULL);
|
||||
pthread_mutex_init(&cache->lock, NULL);
|
||||
}
|
||||
|
|
@ -118,12 +119,20 @@ GENX(pan_blend_get_shader_locked)(struct pan_blend_shader_cache *cache,
|
|||
cache->gpu_id < 0x700);
|
||||
#endif
|
||||
|
||||
GENX(pan_shader_compile)(nir, &inputs, &shader->binary, &info);
|
||||
struct util_dynarray binary;
|
||||
util_dynarray_init(&binary, NULL);
|
||||
GENX(pan_shader_compile)(nir, &inputs, &binary, &info);
|
||||
|
||||
struct panfrost_ptr bin =
|
||||
pan_pool_alloc_aligned(cache->bin_pool, binary.size, 64);
|
||||
memcpy(bin.cpu, binary.data, binary.size);
|
||||
util_dynarray_fini(&binary);
|
||||
|
||||
shader->work_reg_count = info.work_reg_count;
|
||||
|
||||
shader->address = bin.gpu;
|
||||
#if PAN_ARCH <= 5
|
||||
shader->first_tag = info.midgard.first_tag;
|
||||
shader->address |= info.midgard.first_tag;
|
||||
#endif
|
||||
|
||||
ralloc_free(nir);
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@
|
|||
#include "nir.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "pan_blend.h"
|
||||
#include "pan_pool.h"
|
||||
|
||||
struct panfrost_bo;
|
||||
struct panfrost_batch;
|
||||
|
|
@ -61,6 +62,7 @@ struct panfrost_blend_state {
|
|||
|
||||
struct pan_blend_shader_cache {
|
||||
unsigned gpu_id;
|
||||
struct pan_pool *bin_pool;
|
||||
struct hash_table *shaders;
|
||||
pthread_mutex_t lock;
|
||||
};
|
||||
|
|
@ -68,16 +70,14 @@ struct pan_blend_shader_cache {
|
|||
struct pan_blend_shader {
|
||||
struct pan_blend_shader_key key;
|
||||
|
||||
struct util_dynarray binary;
|
||||
unsigned first_tag;
|
||||
uint64_t address;
|
||||
unsigned work_reg_count;
|
||||
};
|
||||
|
||||
uint64_t panfrost_get_blend(struct panfrost_batch *batch, unsigned rt,
|
||||
struct panfrost_bo **bo, unsigned *shader_offset);
|
||||
uint64_t panfrost_get_blend(struct panfrost_batch *batch, unsigned rt);
|
||||
|
||||
void pan_blend_shader_cache_init(struct pan_blend_shader_cache *cache,
|
||||
unsigned gpu_id);
|
||||
unsigned gpu_id, struct pan_pool *bin_pool);
|
||||
|
||||
void pan_blend_shader_cache_cleanup(struct pan_blend_shader_cache *cache);
|
||||
|
||||
|
|
|
|||
|
|
@ -274,17 +274,17 @@ static void
|
|||
panfrost_get_blend_shaders(struct panfrost_batch *batch,
|
||||
uint64_t *blend_shaders)
|
||||
{
|
||||
unsigned shader_offset = 0;
|
||||
struct panfrost_bo *shader_bo = NULL;
|
||||
bool used = false;
|
||||
|
||||
for (unsigned c = 0; c < batch->key.nr_cbufs; ++c) {
|
||||
if (batch->key.cbufs[c]) {
|
||||
blend_shaders[c] =
|
||||
panfrost_get_blend(batch, c, &shader_bo, &shader_offset);
|
||||
blend_shaders[c] = panfrost_get_blend(batch, c);
|
||||
if (blend_shaders[c])
|
||||
used = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (shader_bo)
|
||||
if (used)
|
||||
perf_debug(batch->ctx, "Blend shader use");
|
||||
}
|
||||
|
||||
|
|
@ -4230,6 +4230,7 @@ screen_destroy(struct pipe_screen *pscreen)
|
|||
{
|
||||
struct panfrost_device *dev = pan_device(pscreen);
|
||||
GENX(pan_fb_preload_cache_cleanup)(&dev->fb_preload_cache);
|
||||
pan_blend_shader_cache_cleanup(&dev->blend_shaders);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -4394,6 +4395,9 @@ GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen)
|
|||
screen->vtbl.emit_write_timestamp = emit_write_timestamp;
|
||||
screen->vtbl.select_tile_size = GENX(pan_select_tile_size);
|
||||
|
||||
pan_blend_shader_cache_init(&dev->blend_shaders, panfrost_device_gpu_id(dev),
|
||||
&screen->mempools.bin.base);
|
||||
|
||||
GENX(pan_fb_preload_cache_init)
|
||||
(&dev->fb_preload_cache, panfrost_device_gpu_id(dev), &dev->blend_shaders,
|
||||
&screen->mempools.bin.base, &screen->mempools.desc.base);
|
||||
|
|
|
|||
|
|
@ -170,8 +170,7 @@ panfrost_set_blend_color(struct pipe_context *pipe,
|
|||
/* Create a final blend given the context */
|
||||
|
||||
uint64_t
|
||||
panfrost_get_blend(struct panfrost_batch *batch, unsigned rti,
|
||||
struct panfrost_bo **bo, unsigned *shader_offset)
|
||||
panfrost_get_blend(struct panfrost_batch *batch, unsigned rti)
|
||||
{
|
||||
struct panfrost_context *ctx = batch->ctx;
|
||||
struct panfrost_device *dev = pan_device(ctx->base.screen);
|
||||
|
|
@ -212,16 +211,6 @@ panfrost_get_blend(struct panfrost_batch *batch, unsigned rti,
|
|||
memcpy(pan_blend.constants, ctx->blend_color.color,
|
||||
sizeof(pan_blend.constants));
|
||||
|
||||
/* Upload the shader, sharing a BO */
|
||||
if (!(*bo)) {
|
||||
*bo = panfrost_batch_create_bo(batch, 4096, PAN_BO_EXECUTE,
|
||||
PIPE_SHADER_FRAGMENT, "Blend shader");
|
||||
if (!(*bo)) {
|
||||
mesa_loge("failed to allocate blend-shader");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
struct panfrost_compiled_shader *ss = ctx->prog[PIPE_SHADER_FRAGMENT];
|
||||
|
||||
/* Default for Midgard */
|
||||
|
|
@ -239,15 +228,10 @@ panfrost_get_blend(struct panfrost_batch *batch, unsigned rti,
|
|||
pan_screen(ctx->base.screen)
|
||||
->vtbl.get_blend_shader(&dev->blend_shaders, &pan_blend, col0_type,
|
||||
col1_type, rti);
|
||||
|
||||
/* Size check and upload */
|
||||
unsigned offset = *shader_offset;
|
||||
assert((offset + shader->binary.size) < 4096);
|
||||
memcpy((*bo)->ptr.cpu + offset, shader->binary.data, shader->binary.size);
|
||||
*shader_offset += shader->binary.size;
|
||||
uint64_t address = shader->address;
|
||||
pthread_mutex_unlock(&dev->blend_shaders.lock);
|
||||
|
||||
return ((*bo)->ptr.gpu + offset) | shader->first_tag;
|
||||
return address;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -99,19 +99,6 @@ struct pan_preload_shader_data {
|
|||
nir_alu_type blend_types[8];
|
||||
};
|
||||
|
||||
struct pan_preload_blend_shader_key {
|
||||
enum pipe_format format;
|
||||
nir_alu_type type;
|
||||
unsigned rt : 3;
|
||||
unsigned nr_samples : 5;
|
||||
unsigned pad : 24;
|
||||
};
|
||||
|
||||
struct pan_preload_blend_shader_data {
|
||||
struct pan_preload_blend_shader_key key;
|
||||
uint64_t address;
|
||||
};
|
||||
|
||||
struct pan_preload_rsd_key {
|
||||
struct {
|
||||
enum pipe_format format;
|
||||
|
|
@ -335,27 +322,6 @@ pan_preload_get_blend_shaders(struct pan_fb_preload_cache *cache,
|
|||
if (!rts[i] || panfrost_blendable_formats_v7[rts[i]->format].internal)
|
||||
continue;
|
||||
|
||||
struct pan_preload_blend_shader_key key = {
|
||||
.format = rts[i]->format,
|
||||
.rt = i,
|
||||
.nr_samples = pan_image_view_get_nr_samples(rts[i]),
|
||||
.type = preload_shader->blend_types[i],
|
||||
};
|
||||
|
||||
pthread_mutex_lock(&cache->shaders.lock);
|
||||
struct hash_entry *he =
|
||||
_mesa_hash_table_search(cache->shaders.blend, &key);
|
||||
struct pan_preload_blend_shader_data *blend_shader = he ? he->data : NULL;
|
||||
if (blend_shader) {
|
||||
blend_shaders[i] = blend_shader->address;
|
||||
pthread_mutex_unlock(&cache->shaders.lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
blend_shader =
|
||||
rzalloc(cache->shaders.blend, struct pan_preload_blend_shader_data);
|
||||
blend_shader->key = key;
|
||||
|
||||
blend_state.rts[i] = (struct pan_blend_rt_state){
|
||||
.format = rts[i]->format,
|
||||
.nr_samples = pan_image_view_get_nr_samples(rts[i]),
|
||||
|
|
@ -373,16 +339,8 @@ pan_preload_get_blend_shaders(struct pan_fb_preload_cache *cache,
|
|||
i);
|
||||
|
||||
assert(b->work_reg_count <= 4);
|
||||
struct panfrost_ptr bin =
|
||||
pan_pool_alloc_aligned(cache->shaders.pool, b->binary.size, 64);
|
||||
memcpy(bin.cpu, b->binary.data, b->binary.size);
|
||||
|
||||
blend_shader->address = bin.gpu | b->first_tag;
|
||||
blend_shaders[i] = b->address;
|
||||
pthread_mutex_unlock(&cache->blend_shader_cache->lock);
|
||||
_mesa_hash_table_insert(cache->shaders.blend, &blend_shader->key,
|
||||
blend_shader);
|
||||
pthread_mutex_unlock(&cache->shaders.lock);
|
||||
blend_shaders[i] = blend_shader->address;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
@ -1397,7 +1355,6 @@ GENX(pan_preload_fb)(struct pan_fb_preload_cache *cache, struct pan_pool *pool,
|
|||
}
|
||||
|
||||
DERIVE_HASH_TABLE(pan_preload_shader_key);
|
||||
DERIVE_HASH_TABLE(pan_preload_blend_shader_key);
|
||||
DERIVE_HASH_TABLE(pan_preload_rsd_key);
|
||||
|
||||
static void
|
||||
|
|
@ -1445,7 +1402,6 @@ GENX(pan_fb_preload_cache_init)(
|
|||
{
|
||||
cache->gpu_id = gpu_id;
|
||||
cache->shaders.preload = pan_preload_shader_key_table_create(NULL);
|
||||
cache->shaders.blend = pan_preload_blend_shader_key_table_create(NULL);
|
||||
cache->shaders.pool = bin_pool;
|
||||
pthread_mutex_init(&cache->shaders.lock, NULL);
|
||||
pan_preload_prefill_preload_shader_cache(cache);
|
||||
|
|
@ -1460,7 +1416,6 @@ void
|
|||
GENX(pan_fb_preload_cache_cleanup)(struct pan_fb_preload_cache *cache)
|
||||
{
|
||||
_mesa_hash_table_destroy(cache->shaders.preload, NULL);
|
||||
_mesa_hash_table_destroy(cache->shaders.blend, NULL);
|
||||
pthread_mutex_destroy(&cache->shaders.lock);
|
||||
_mesa_hash_table_destroy(cache->rsds.rsds, NULL);
|
||||
pthread_mutex_destroy(&cache->rsds.lock);
|
||||
|
|
|
|||
|
|
@ -41,7 +41,6 @@ struct pan_fb_preload_cache {
|
|||
struct {
|
||||
struct pan_pool *pool;
|
||||
struct hash_table *preload;
|
||||
struct hash_table *blend;
|
||||
pthread_mutex_t lock;
|
||||
} shaders;
|
||||
struct {
|
||||
|
|
|
|||
|
|
@ -722,7 +722,6 @@ panfrost_destroy_screen(struct pipe_screen *pscreen)
|
|||
panfrost_resource_screen_destroy(pscreen);
|
||||
panfrost_pool_cleanup(&screen->mempools.bin);
|
||||
panfrost_pool_cleanup(&screen->mempools.desc);
|
||||
pan_blend_shader_cache_cleanup(&dev->blend_shaders);
|
||||
|
||||
if (screen->vtbl.screen_destroy)
|
||||
screen->vtbl.screen_destroy(pscreen);
|
||||
|
|
@ -902,8 +901,6 @@ panfrost_create_screen(int fd, const struct pipe_screen_config *config,
|
|||
panfrost_query_compression_modifiers;
|
||||
|
||||
panfrost_resource_screen_init(&screen->base);
|
||||
pan_blend_shader_cache_init(&dev->blend_shaders,
|
||||
panfrost_device_gpu_id(dev));
|
||||
|
||||
panfrost_init_shader_caps(screen);
|
||||
panfrost_init_compute_caps(screen);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue