freedreno/a6xx: Extract out GMEM cache helper

Extract out a helper to calculate placement of various caches that live
in GMEM.

Signed-off-by: Rob Clark <rob.clark@oss.qualcomm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39052>
This commit is contained in:
Rob Clark 2025-12-20 12:28:55 -08:00
parent ebf1454410
commit d197ea37d1
3 changed files with 127 additions and 104 deletions

View file

@ -0,0 +1,121 @@
/*
* Copyright (c) Qualcomm Technologies, Inc. and/or its susidiaries.
* SPDX-License-Identifier: MIT
*/
#ifndef FD6_GMEM_CACHE_H
#define FD6_GMEM_CACHE_H
#include <stdint.h>
#include "common/freedreno_dev_info.h"
/* Offset within GMEM of various "non-GMEM" things that GMEM is used to
* cache. These offsets differ for gmem vs sysmem rendering (in sysmem
* mode, the entire GMEM can be used)
*/
struct fd6_gmem_config {
/* Color/depth CCU cache: */
uint32_t color_ccu_offset;
uint32_t depth_ccu_offset;
/* Vertex attrib cache (a750+): */
uint32_t vpc_attr_buf_size;
uint32_t vpc_attr_buf_offset;
/* Vertex position cache (a8xx+): */
uint32_t vpc_pos_buf_size;
uint32_t vpc_pos_buf_offset;
uint32_t vpc_bv_pos_buf_size;
uint32_t vpc_bv_pos_buf_offset;
/* see enum a6xx_ccu_cache_size */
uint32_t depth_cache_fraction;
uint32_t color_cache_fraction;
uint32_t depth_cache_size;
uint32_t color_cache_size;
};
static inline unsigned
__calc_gmem_cache_offsets(const struct fd_dev_info *info, unsigned offset,
struct fd6_gmem_config *config)
{
unsigned num_ccu = info->num_ccu;
/* This seems not to be load bearing, but keeping it for now to match blob: */
if (info->chip >= 8)
offset -= 0x78000;
config->vpc_bv_pos_buf_offset = offset - (num_ccu * config->vpc_bv_pos_buf_size);
offset = config->vpc_bv_pos_buf_offset;
config->vpc_attr_buf_offset = offset - (num_ccu * config->vpc_attr_buf_size);
offset = config->vpc_attr_buf_offset;
config->vpc_pos_buf_offset = offset - (num_ccu * config->vpc_pos_buf_size);
offset = config->vpc_pos_buf_offset;
config->color_ccu_offset = offset - (num_ccu * config->color_cache_size);
offset = config->color_ccu_offset;
config->depth_ccu_offset = offset - (num_ccu * config->depth_cache_size);
offset = config->depth_ccu_offset;
return offset;
}
static inline unsigned
fd6_calc_gmem_cache_offsets(const struct fd_dev_info *info, unsigned gmemsize_bytes,
struct fd6_gmem_config *gmem, struct fd6_gmem_config *sysmem)
{
uint32_t depth_cache_size =
info->num_ccu * info->props.sysmem_per_ccu_depth_cache_size;
uint32_t color_cache_size =
(info->num_ccu * info->props.sysmem_per_ccu_color_cache_size);
uint32_t color_cache_size_gmem =
color_cache_size /
(1 << info->props.gmem_ccu_color_cache_fraction);
sysmem->depth_ccu_offset = 0;
sysmem->color_ccu_offset = sysmem->depth_ccu_offset + depth_cache_size;
/* TODO we could unify gen7/gen8 setup.. gen7 is a subset.. */
if (info->chip == 8) {
gmem->depth_cache_fraction = info->props.gmem_ccu_depth_cache_fraction;
gmem->depth_cache_size = info->props.gmem_per_ccu_depth_cache_size;
gmem->color_cache_fraction = info->props.gmem_ccu_color_cache_fraction;
gmem->color_cache_size = info->props.gmem_per_ccu_color_cache_size;
gmem->vpc_attr_buf_size = info->props.gmem_vpc_attr_buf_size;
gmem->vpc_pos_buf_size = info->props.gmem_vpc_pos_buf_size;
gmem->vpc_bv_pos_buf_size = info->props.gmem_vpc_bv_pos_buf_size;
sysmem->depth_cache_fraction = info->props.sysmem_ccu_depth_cache_fraction;
sysmem->depth_cache_size = info->props.sysmem_per_ccu_depth_cache_size;
sysmem->color_cache_fraction = info->props.sysmem_ccu_color_cache_fraction;
sysmem->color_cache_size = info->props.sysmem_per_ccu_color_cache_size;
sysmem->vpc_attr_buf_size = info->props.sysmem_vpc_attr_buf_size;
sysmem->vpc_pos_buf_size = info->props.sysmem_vpc_pos_buf_size;
sysmem->vpc_bv_pos_buf_size = info->props.sysmem_vpc_bv_pos_buf_size;
__calc_gmem_cache_offsets(info, gmemsize_bytes, sysmem);
return __calc_gmem_cache_offsets(info, gmemsize_bytes, gmem);
} else if (info->props.has_gmem_vpc_attr_buf) {
sysmem->vpc_attr_buf_size = info->props.sysmem_vpc_attr_buf_size;
sysmem->vpc_attr_buf_offset = sysmem->color_ccu_offset + color_cache_size;
gmem->vpc_attr_buf_size = info->props.gmem_vpc_attr_buf_size;
gmem->vpc_attr_buf_offset = gmemsize_bytes -
(gmem->vpc_attr_buf_size * info->num_ccu);
gmem->color_ccu_offset = gmem->vpc_attr_buf_offset - color_cache_size_gmem;
return gmem->vpc_attr_buf_offset;
} else {
gmem->depth_ccu_offset = 0;
gmem->color_ccu_offset = gmemsize_bytes - color_cache_size_gmem;
return gmemsize_bytes;
}
}
#endif /* FD6_GMEM_CACHE_H */

View file

@ -160,91 +160,17 @@ static const enum pc_di_primtype primtypes[] = {
};
/* clang-format on */
static unsigned
calc_gmem_cache_offsets(struct fd_screen *screen, struct fd6_gmem_config *config)
{
unsigned num_ccu = screen->info->num_ccu;
/* Layout from end of gmem: */
unsigned offset = screen->gmemsize_bytes;
// ????
offset -= 0x78000;
config->vpc_bv_pos_buf_offset = offset - (num_ccu * config->vpc_bv_pos_buf_size);
offset = config->vpc_bv_pos_buf_offset;
config->vpc_attr_buf_offset = offset - (num_ccu * config->vpc_attr_buf_size);
offset = config->vpc_attr_buf_offset;
config->vpc_pos_buf_offset = offset - (num_ccu * config->vpc_pos_buf_size);
offset = config->vpc_pos_buf_offset;
config->color_ccu_offset = offset - (num_ccu * config->color_cache_size);
offset = config->color_ccu_offset;
config->depth_ccu_offset = offset - (num_ccu * config->depth_cache_size);
offset = config->depth_ccu_offset;
return offset;
}
void
fd6_screen_init(struct pipe_screen *pscreen)
{
struct fd_screen *screen = fd_screen(pscreen);
const struct fd_dev_info *info = screen->info;
screen->max_rts = A6XX_MAX_RENDER_TARGETS;
uint32_t depth_cache_size =
screen->info->num_ccu * screen->info->props.sysmem_per_ccu_depth_cache_size;
uint32_t color_cache_size =
(screen->info->num_ccu * screen->info->props.sysmem_per_ccu_color_cache_size);
uint32_t color_cache_size_gmem =
color_cache_size /
(1 << screen->info->props.gmem_ccu_color_cache_fraction);
struct fd6_gmem_config *gmem = &screen->config_gmem;
struct fd6_gmem_config *sysmem = &screen->config_sysmem;
sysmem->depth_ccu_offset = 0;
sysmem->color_ccu_offset = sysmem->depth_ccu_offset + depth_cache_size;
/* TODO we could unify gen7/gen8 setup.. gen7 is a subset.. */
if (info->chip == 8) {
gmem->depth_cache_fraction = info->props.gmem_ccu_depth_cache_fraction;
gmem->depth_cache_size = info->props.gmem_per_ccu_depth_cache_size;
gmem->color_cache_fraction = info->props.gmem_ccu_color_cache_fraction;
gmem->color_cache_size = info->props.gmem_per_ccu_color_cache_size;
gmem->vpc_attr_buf_size = info->props.gmem_vpc_attr_buf_size;
gmem->vpc_pos_buf_size = info->props.gmem_vpc_pos_buf_size;
gmem->vpc_bv_pos_buf_size = info->props.gmem_vpc_bv_pos_buf_size;
sysmem->depth_cache_fraction = info->props.sysmem_ccu_depth_cache_fraction;
sysmem->depth_cache_size = info->props.sysmem_per_ccu_depth_cache_size;
sysmem->color_cache_fraction = info->props.sysmem_ccu_color_cache_fraction;
sysmem->color_cache_size = info->props.sysmem_per_ccu_color_cache_size;
sysmem->vpc_attr_buf_size = info->props.sysmem_vpc_attr_buf_size;
sysmem->vpc_pos_buf_size = info->props.sysmem_vpc_pos_buf_size;
sysmem->vpc_bv_pos_buf_size = info->props.sysmem_vpc_bv_pos_buf_size;
calc_gmem_cache_offsets(screen, sysmem);
screen->gmemsize_bytes = calc_gmem_cache_offsets(screen, gmem);
} else if (screen->info->props.has_gmem_vpc_attr_buf) {
sysmem->vpc_attr_buf_size = screen->info->props.sysmem_vpc_attr_buf_size;
sysmem->vpc_attr_buf_offset = sysmem->color_ccu_offset + color_cache_size;
gmem->vpc_attr_buf_size = screen->info->props.gmem_vpc_attr_buf_size;
gmem->vpc_attr_buf_offset = screen->gmemsize_bytes -
(gmem->vpc_attr_buf_size * screen->info->num_ccu);
gmem->color_ccu_offset = gmem->vpc_attr_buf_offset - color_cache_size_gmem;
screen->gmemsize_bytes = gmem->vpc_attr_buf_offset;
} else {
gmem->depth_ccu_offset = 0;
gmem->color_ccu_offset = screen->gmemsize_bytes - color_cache_size_gmem;
}
screen->gmemsize_bytes =
fd6_calc_gmem_cache_offsets(screen->info, screen->gmemsize_bytes,
&screen->config_gmem,
&screen->config_sysmem);
/* Currently only FB_READ forces GMEM path, mostly because we'd have to
* deal with cmdstream patching otherwise..

View file

@ -23,6 +23,8 @@
#include "util/u_memory.h"
#include "util/u_queue.h"
#include "common/fd6_gmem_cache.h"
#include "freedreno_batch_cache.h"
#include "freedreno_gmem.h"
#include "freedreno_util.h"
@ -48,32 +50,6 @@ enum fd_gmem_reason {
FD_GMEM_FB_READ = BIT(5),
};
/* Offset within GMEM of various "non-GMEM" things that GMEM is used to
* cache. These offsets differ for gmem vs sysmem rendering (in sysmem
* mode, the entire GMEM can be used)
*/
struct fd6_gmem_config {
/* Color/depth CCU cache: */
uint32_t color_ccu_offset;
uint32_t depth_ccu_offset;
/* Vertex attrib cache (a750+): */
uint32_t vpc_attr_buf_size;
uint32_t vpc_attr_buf_offset;
/* Vertex position cache (a8xx+): */
uint32_t vpc_pos_buf_size;
uint32_t vpc_pos_buf_offset;
uint32_t vpc_bv_pos_buf_size;
uint32_t vpc_bv_pos_buf_offset;
/* see enum a6xx_ccu_cache_size */
uint32_t depth_cache_fraction;
uint32_t color_cache_fraction;
uint32_t depth_cache_size;
uint32_t color_cache_size;
};
struct fd_screen {
struct pipe_screen base;