panfrost: upload blend shaders to bin pool

Instead of reuploading them to a fresh BO every time the blend state
changes. This allows us to drop the separate blend shader cache for the
fb preload shaders.

This improves gfxbench gl_driver FPS on G610 from 42.39 to 61.94,
which is now slightly faster than the DDK (57.76).

Signed-off-by: Olivia Lee <olivia.lee@collabora.com>
Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Acked-by: Ryan Mckeever <ryan.mckeever@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34666>
This commit is contained in:
Olivia Lee 2025-04-22 22:20:43 -07:00 committed by Marge Bot
parent 7fe842663e
commit 04bbe45b76
7 changed files with 30 additions and 82 deletions

View file

@ -18,9 +18,10 @@ DERIVE_HASH_TABLE(pan_blend_shader_key);
void
pan_blend_shader_cache_init(struct pan_blend_shader_cache *cache,
unsigned gpu_id)
unsigned gpu_id, struct pan_pool *bin_pool)
{
cache->gpu_id = gpu_id;
cache->bin_pool = bin_pool;
cache->shaders = pan_blend_shader_key_table_create(NULL);
pthread_mutex_init(&cache->lock, NULL);
}
@ -118,12 +119,20 @@ GENX(pan_blend_get_shader_locked)(struct pan_blend_shader_cache *cache,
cache->gpu_id < 0x700);
#endif
GENX(pan_shader_compile)(nir, &inputs, &shader->binary, &info);
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
GENX(pan_shader_compile)(nir, &inputs, &binary, &info);
struct panfrost_ptr bin =
pan_pool_alloc_aligned(cache->bin_pool, binary.size, 64);
memcpy(bin.cpu, binary.data, binary.size);
util_dynarray_fini(&binary);
shader->work_reg_count = info.work_reg_count;
shader->address = bin.gpu;
#if PAN_ARCH <= 5
shader->first_tag = info.midgard.first_tag;
shader->address |= info.midgard.first_tag;
#endif
ralloc_free(nir);

View file

@ -32,6 +32,7 @@
#include "nir.h"
#include "pipe/p_state.h"
#include "pan_blend.h"
#include "pan_pool.h"
struct panfrost_bo;
struct panfrost_batch;
@ -61,6 +62,7 @@ struct panfrost_blend_state {
struct pan_blend_shader_cache {
unsigned gpu_id;
struct pan_pool *bin_pool;
struct hash_table *shaders;
pthread_mutex_t lock;
};
@ -68,16 +70,14 @@ struct pan_blend_shader_cache {
struct pan_blend_shader {
struct pan_blend_shader_key key;
struct util_dynarray binary;
unsigned first_tag;
uint64_t address;
unsigned work_reg_count;
};
uint64_t panfrost_get_blend(struct panfrost_batch *batch, unsigned rt,
struct panfrost_bo **bo, unsigned *shader_offset);
uint64_t panfrost_get_blend(struct panfrost_batch *batch, unsigned rt);
void pan_blend_shader_cache_init(struct pan_blend_shader_cache *cache,
unsigned gpu_id);
unsigned gpu_id, struct pan_pool *bin_pool);
void pan_blend_shader_cache_cleanup(struct pan_blend_shader_cache *cache);

View file

@ -274,17 +274,17 @@ static void
panfrost_get_blend_shaders(struct panfrost_batch *batch,
uint64_t *blend_shaders)
{
unsigned shader_offset = 0;
struct panfrost_bo *shader_bo = NULL;
bool used = false;
for (unsigned c = 0; c < batch->key.nr_cbufs; ++c) {
if (batch->key.cbufs[c]) {
blend_shaders[c] =
panfrost_get_blend(batch, c, &shader_bo, &shader_offset);
blend_shaders[c] = panfrost_get_blend(batch, c);
if (blend_shaders[c])
used = true;
}
}
if (shader_bo)
if (used)
perf_debug(batch->ctx, "Blend shader use");
}
@ -4230,6 +4230,7 @@ screen_destroy(struct pipe_screen *pscreen)
{
struct panfrost_device *dev = pan_device(pscreen);
GENX(pan_fb_preload_cache_cleanup)(&dev->fb_preload_cache);
pan_blend_shader_cache_cleanup(&dev->blend_shaders);
}
static void
@ -4394,6 +4395,9 @@ GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen)
screen->vtbl.emit_write_timestamp = emit_write_timestamp;
screen->vtbl.select_tile_size = GENX(pan_select_tile_size);
pan_blend_shader_cache_init(&dev->blend_shaders, panfrost_device_gpu_id(dev),
&screen->mempools.bin.base);
GENX(pan_fb_preload_cache_init)
(&dev->fb_preload_cache, panfrost_device_gpu_id(dev), &dev->blend_shaders,
&screen->mempools.bin.base, &screen->mempools.desc.base);

View file

@ -170,8 +170,7 @@ panfrost_set_blend_color(struct pipe_context *pipe,
/* Create a final blend given the context */
uint64_t
panfrost_get_blend(struct panfrost_batch *batch, unsigned rti,
struct panfrost_bo **bo, unsigned *shader_offset)
panfrost_get_blend(struct panfrost_batch *batch, unsigned rti)
{
struct panfrost_context *ctx = batch->ctx;
struct panfrost_device *dev = pan_device(ctx->base.screen);
@ -212,16 +211,6 @@ panfrost_get_blend(struct panfrost_batch *batch, unsigned rti,
memcpy(pan_blend.constants, ctx->blend_color.color,
sizeof(pan_blend.constants));
/* Upload the shader, sharing a BO */
if (!(*bo)) {
*bo = panfrost_batch_create_bo(batch, 4096, PAN_BO_EXECUTE,
PIPE_SHADER_FRAGMENT, "Blend shader");
if (!(*bo)) {
mesa_loge("failed to allocate blend-shader");
return 0;
}
}
struct panfrost_compiled_shader *ss = ctx->prog[PIPE_SHADER_FRAGMENT];
/* Default for Midgard */
@ -239,15 +228,10 @@ panfrost_get_blend(struct panfrost_batch *batch, unsigned rti,
pan_screen(ctx->base.screen)
->vtbl.get_blend_shader(&dev->blend_shaders, &pan_blend, col0_type,
col1_type, rti);
/* Size check and upload */
unsigned offset = *shader_offset;
assert((offset + shader->binary.size) < 4096);
memcpy((*bo)->ptr.cpu + offset, shader->binary.data, shader->binary.size);
*shader_offset += shader->binary.size;
uint64_t address = shader->address;
pthread_mutex_unlock(&dev->blend_shaders.lock);
return ((*bo)->ptr.gpu + offset) | shader->first_tag;
return address;
}
static void

View file

@ -99,19 +99,6 @@ struct pan_preload_shader_data {
nir_alu_type blend_types[8];
};
struct pan_preload_blend_shader_key {
enum pipe_format format;
nir_alu_type type;
unsigned rt : 3;
unsigned nr_samples : 5;
unsigned pad : 24;
};
struct pan_preload_blend_shader_data {
struct pan_preload_blend_shader_key key;
uint64_t address;
};
struct pan_preload_rsd_key {
struct {
enum pipe_format format;
@ -335,27 +322,6 @@ pan_preload_get_blend_shaders(struct pan_fb_preload_cache *cache,
if (!rts[i] || panfrost_blendable_formats_v7[rts[i]->format].internal)
continue;
struct pan_preload_blend_shader_key key = {
.format = rts[i]->format,
.rt = i,
.nr_samples = pan_image_view_get_nr_samples(rts[i]),
.type = preload_shader->blend_types[i],
};
pthread_mutex_lock(&cache->shaders.lock);
struct hash_entry *he =
_mesa_hash_table_search(cache->shaders.blend, &key);
struct pan_preload_blend_shader_data *blend_shader = he ? he->data : NULL;
if (blend_shader) {
blend_shaders[i] = blend_shader->address;
pthread_mutex_unlock(&cache->shaders.lock);
continue;
}
blend_shader =
rzalloc(cache->shaders.blend, struct pan_preload_blend_shader_data);
blend_shader->key = key;
blend_state.rts[i] = (struct pan_blend_rt_state){
.format = rts[i]->format,
.nr_samples = pan_image_view_get_nr_samples(rts[i]),
@ -373,16 +339,8 @@ pan_preload_get_blend_shaders(struct pan_fb_preload_cache *cache,
i);
assert(b->work_reg_count <= 4);
struct panfrost_ptr bin =
pan_pool_alloc_aligned(cache->shaders.pool, b->binary.size, 64);
memcpy(bin.cpu, b->binary.data, b->binary.size);
blend_shader->address = bin.gpu | b->first_tag;
blend_shaders[i] = b->address;
pthread_mutex_unlock(&cache->blend_shader_cache->lock);
_mesa_hash_table_insert(cache->shaders.blend, &blend_shader->key,
blend_shader);
pthread_mutex_unlock(&cache->shaders.lock);
blend_shaders[i] = blend_shader->address;
}
}
#endif
@ -1397,7 +1355,6 @@ GENX(pan_preload_fb)(struct pan_fb_preload_cache *cache, struct pan_pool *pool,
}
DERIVE_HASH_TABLE(pan_preload_shader_key);
DERIVE_HASH_TABLE(pan_preload_blend_shader_key);
DERIVE_HASH_TABLE(pan_preload_rsd_key);
static void
@ -1445,7 +1402,6 @@ GENX(pan_fb_preload_cache_init)(
{
cache->gpu_id = gpu_id;
cache->shaders.preload = pan_preload_shader_key_table_create(NULL);
cache->shaders.blend = pan_preload_blend_shader_key_table_create(NULL);
cache->shaders.pool = bin_pool;
pthread_mutex_init(&cache->shaders.lock, NULL);
pan_preload_prefill_preload_shader_cache(cache);
@ -1460,7 +1416,6 @@ void
GENX(pan_fb_preload_cache_cleanup)(struct pan_fb_preload_cache *cache)
{
_mesa_hash_table_destroy(cache->shaders.preload, NULL);
_mesa_hash_table_destroy(cache->shaders.blend, NULL);
pthread_mutex_destroy(&cache->shaders.lock);
_mesa_hash_table_destroy(cache->rsds.rsds, NULL);
pthread_mutex_destroy(&cache->rsds.lock);

View file

@ -41,7 +41,6 @@ struct pan_fb_preload_cache {
struct {
struct pan_pool *pool;
struct hash_table *preload;
struct hash_table *blend;
pthread_mutex_t lock;
} shaders;
struct {

View file

@ -722,7 +722,6 @@ panfrost_destroy_screen(struct pipe_screen *pscreen)
panfrost_resource_screen_destroy(pscreen);
panfrost_pool_cleanup(&screen->mempools.bin);
panfrost_pool_cleanup(&screen->mempools.desc);
pan_blend_shader_cache_cleanup(&dev->blend_shaders);
if (screen->vtbl.screen_destroy)
screen->vtbl.screen_destroy(pscreen);
@ -902,8 +901,6 @@ panfrost_create_screen(int fd, const struct pipe_screen_config *config,
panfrost_query_compression_modifiers;
panfrost_resource_screen_init(&screen->base);
pan_blend_shader_cache_init(&dev->blend_shaders,
panfrost_device_gpu_id(dev));
panfrost_init_shader_caps(screen);
panfrost_init_compute_caps(screen);