mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-15 15:20:31 +01:00
freedreno,turnip: Make number of VSC pipes configurable
a610/a608 has less pipes, so we need to make it configurable. In particular we need to program all of the VSC_PIPE_CONFIG_REG[n] rather than leaving garbage values for the unused pipes. Pointing multiple VSC pipes at the same bin makes the hw angry. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20991>
This commit is contained in:
parent
0814c2c7a6
commit
05fffc7b25
4 changed files with 53 additions and 35 deletions
|
|
@ -126,14 +126,13 @@ class A6xxGPUInfo(GPUInfo):
|
|||
into distinct sub-generations. The template parameter avoids
|
||||
duplication of parameters that are unique to the sub-generation.
|
||||
"""
|
||||
def __init__(self, chip, template, num_ccu, tile_align_w, tile_align_h, magic_regs):
|
||||
def __init__(self, chip, template, num_ccu, tile_align_w, tile_align_h, num_vsc_pipes, magic_regs):
|
||||
super().__init__(chip, gmem_align_w = 16, gmem_align_h = 4,
|
||||
tile_align_w = tile_align_w,
|
||||
tile_align_h = tile_align_h,
|
||||
tile_max_w = 1024, # max_bitfield_val(5, 0, 5)
|
||||
tile_max_h = max_bitfield_val(14, 8, 4),
|
||||
num_vsc_pipes = 32)
|
||||
|
||||
num_vsc_pipes = num_vsc_pipes)
|
||||
# The # of SP cores seems to always match # of CCU
|
||||
self.num_sp_cores = num_ccu
|
||||
self.num_ccu = num_ccu
|
||||
|
|
@ -307,6 +306,7 @@ add_gpus([
|
|||
num_ccu = 1,
|
||||
tile_align_w = 32,
|
||||
tile_align_h = 32,
|
||||
num_vsc_pipes = 32,
|
||||
magic_regs = dict(
|
||||
PC_POWER_CNTL = 0,
|
||||
TPL1_DBG_ECO_CNTL = 0x00108000,
|
||||
|
|
@ -332,6 +332,7 @@ add_gpus([
|
|||
num_ccu = 1,
|
||||
tile_align_w = 32,
|
||||
tile_align_h = 16,
|
||||
num_vsc_pipes = 32,
|
||||
magic_regs = dict(
|
||||
PC_POWER_CNTL = 0,
|
||||
TPL1_DBG_ECO_CNTL = 0x01008000,
|
||||
|
|
@ -357,6 +358,7 @@ add_gpus([
|
|||
num_ccu = 2,
|
||||
tile_align_w = 32,
|
||||
tile_align_h = 16,
|
||||
num_vsc_pipes = 32,
|
||||
magic_regs = dict(
|
||||
PC_POWER_CNTL = 1,
|
||||
TPL1_DBG_ECO_CNTL = 0x00108000,
|
||||
|
|
@ -382,6 +384,7 @@ add_gpus([
|
|||
num_ccu = 2,
|
||||
tile_align_w = 32,
|
||||
tile_align_h = 16,
|
||||
num_vsc_pipes = 32,
|
||||
magic_regs = dict(
|
||||
PC_POWER_CNTL = 1,
|
||||
TPL1_DBG_ECO_CNTL = 0x00008000,
|
||||
|
|
@ -407,6 +410,7 @@ add_gpus([
|
|||
num_ccu = 4,
|
||||
tile_align_w = 64,
|
||||
tile_align_h = 32,
|
||||
num_vsc_pipes = 32,
|
||||
magic_regs = dict(
|
||||
PC_POWER_CNTL = 3,
|
||||
TPL1_DBG_ECO_CNTL = 0x00108000,
|
||||
|
|
@ -432,6 +436,7 @@ add_gpus([
|
|||
num_ccu = 3,
|
||||
tile_align_w = 96,
|
||||
tile_align_h = 16,
|
||||
num_vsc_pipes = 32,
|
||||
magic_regs = dict(
|
||||
PC_POWER_CNTL = 2,
|
||||
# this seems to be a chicken bit that fixes cubic filtering:
|
||||
|
|
@ -462,6 +467,7 @@ add_gpus([
|
|||
num_ccu = 2,
|
||||
tile_align_w = 32,
|
||||
tile_align_h = 16,
|
||||
num_vsc_pipes = 32,
|
||||
magic_regs = dict(
|
||||
PC_POWER_CNTL = 1,
|
||||
TPL1_DBG_ECO_CNTL = 0x05008000,
|
||||
|
|
@ -487,6 +493,7 @@ add_gpus([
|
|||
num_ccu = 3,
|
||||
tile_align_w = 96,
|
||||
tile_align_h = 16,
|
||||
num_vsc_pipes = 32,
|
||||
magic_regs = dict(
|
||||
PC_POWER_CNTL = 2,
|
||||
TPL1_DBG_ECO_CNTL = 0x05008000,
|
||||
|
|
@ -512,6 +519,7 @@ add_gpus([
|
|||
num_ccu = 8,
|
||||
tile_align_w = 64,
|
||||
tile_align_h = 32,
|
||||
num_vsc_pipes = 32,
|
||||
magic_regs = dict(
|
||||
PC_POWER_CNTL = 7,
|
||||
TPL1_DBG_ECO_CNTL = 0x01008000,
|
||||
|
|
@ -539,6 +547,7 @@ add_gpus([
|
|||
num_ccu = 4,
|
||||
tile_align_w = 64,
|
||||
tile_align_h = 32,
|
||||
num_vsc_pipes = 32,
|
||||
magic_regs = dict()
|
||||
))
|
||||
|
||||
|
|
|
|||
|
|
@ -87,6 +87,7 @@ static void
|
|||
tu6_lazy_emit_vsc(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
{
|
||||
struct tu_device *dev = cmd->device;
|
||||
uint32_t num_vsc_pipes = dev->physical_device->info->num_vsc_pipes;
|
||||
|
||||
/* VSC buffers:
|
||||
* use vsc pitches from the largest values used so far with this device
|
||||
|
|
@ -114,18 +115,19 @@ tu6_lazy_emit_vsc(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
mtx_unlock(&dev->mutex);
|
||||
|
||||
struct tu_bo *vsc_bo;
|
||||
uint32_t size0 = cmd->vsc_prim_strm_pitch * MAX_VSC_PIPES +
|
||||
cmd->vsc_draw_strm_pitch * MAX_VSC_PIPES;
|
||||
uint32_t size0 = cmd->vsc_prim_strm_pitch * num_vsc_pipes +
|
||||
cmd->vsc_draw_strm_pitch * num_vsc_pipes;
|
||||
|
||||
tu_get_scratch_bo(dev, size0 + MAX_VSC_PIPES * 4, &vsc_bo);
|
||||
tu_get_scratch_bo(dev, size0 + num_vsc_pipes * 4, &vsc_bo);
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = vsc_bo, .bo_offset = size0));
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_VSC_PRIM_STRM_ADDRESS(.bo = vsc_bo));
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_VSC_DRAW_STRM_ADDRESS(.bo = vsc_bo,
|
||||
.bo_offset = cmd->vsc_prim_strm_pitch * MAX_VSC_PIPES));
|
||||
tu_cs_emit_regs(
|
||||
cs, A6XX_VSC_DRAW_STRM_ADDRESS(.bo = vsc_bo,
|
||||
.bo_offset = cmd->vsc_prim_strm_pitch *
|
||||
num_vsc_pipes));
|
||||
|
||||
cmd->vsc_initialized = true;
|
||||
}
|
||||
|
|
@ -1144,7 +1146,9 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
}
|
||||
|
||||
static void
|
||||
update_vsc_pipe(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
update_vsc_pipe(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
uint32_t num_vsc_pipes)
|
||||
{
|
||||
const struct tu_tiling_config *tiling = cmd->state.tiling;
|
||||
|
||||
|
|
@ -1156,8 +1160,8 @@ update_vsc_pipe(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
A6XX_VSC_BIN_COUNT(.nx = tiling->tile_count.width,
|
||||
.ny = tiling->tile_count.height));
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32);
|
||||
tu_cs_emit_array(cs, tiling->pipe_config, 32);
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_VSC_PIPE_CONFIG_REG(0), num_vsc_pipes);
|
||||
tu_cs_emit_array(cs, tiling->pipe_config, num_vsc_pipes);
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_VSC_PRIM_STRM_PITCH(cmd->vsc_prim_strm_pitch),
|
||||
|
|
@ -1244,7 +1248,7 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
tu_cs_emit_regs(cs,
|
||||
A6XX_VFD_MODE_CNTL(.render_mode = BINNING_PASS));
|
||||
|
||||
update_vsc_pipe(cmd, cs);
|
||||
update_vsc_pipe(cmd, cs, phys_dev->info->num_vsc_pipes);
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_PC_POWER_CNTL(phys_dev->info->a6xx.magic.PC_POWER_CNTL));
|
||||
|
|
|
|||
|
|
@ -222,7 +222,8 @@ static void
|
|||
tu_tiling_config_update_pipe_layout(struct tu_tiling_config *tiling,
|
||||
const struct tu_device *dev)
|
||||
{
|
||||
const uint32_t max_pipe_count = 32; /* A6xx */
|
||||
const uint32_t max_pipe_count =
|
||||
dev->physical_device->info->num_vsc_pipes;
|
||||
|
||||
/* start from 1 tile per pipe */
|
||||
tiling->pipe0 = (VkExtent2D) {
|
||||
|
|
@ -248,7 +249,8 @@ static void
|
|||
tu_tiling_config_update_pipes(struct tu_tiling_config *tiling,
|
||||
const struct tu_device *dev)
|
||||
{
|
||||
const uint32_t max_pipe_count = 32; /* A6xx */
|
||||
const uint32_t max_pipe_count =
|
||||
dev->physical_device->info->num_vsc_pipes;
|
||||
const uint32_t used_pipe_count =
|
||||
tiling->pipe_count.width * tiling->pipe_count.height;
|
||||
const VkExtent2D last_pipe = {
|
||||
|
|
|
|||
|
|
@ -494,10 +494,6 @@ update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb,
|
|||
}
|
||||
}
|
||||
|
||||
/* extra size to store VSC_DRAW_STRM_SIZE: */
|
||||
#define VSC_DRAW_STRM_SIZE(pitch) ((pitch)*32 + 0x100)
|
||||
#define VSC_PRIM_STRM_SIZE(pitch) ((pitch)*32)
|
||||
|
||||
static void
|
||||
update_vsc_pipe(struct fd_batch *batch)
|
||||
{
|
||||
|
|
@ -505,6 +501,7 @@ update_vsc_pipe(struct fd_batch *batch)
|
|||
struct fd6_context *fd6_ctx = fd6_context(ctx);
|
||||
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
|
||||
struct fd_ringbuffer *ring = batch->gmem;
|
||||
unsigned max_vsc_pipes = batch->ctx->screen->info->num_vsc_pipes;
|
||||
int i;
|
||||
|
||||
if (batch->draw_strm_bits / 8 > fd6_ctx->vsc_draw_strm_pitch) {
|
||||
|
|
@ -530,27 +527,31 @@ update_vsc_pipe(struct fd_batch *batch)
|
|||
}
|
||||
|
||||
if (!fd6_ctx->vsc_draw_strm) {
|
||||
fd6_ctx->vsc_draw_strm = fd_bo_new(
|
||||
ctx->screen->dev, VSC_DRAW_STRM_SIZE(fd6_ctx->vsc_draw_strm_pitch),
|
||||
FD_BO_NOMAP, "vsc_draw_strm");
|
||||
/* We also use four bytes per vsc pipe at the end of the draw
|
||||
* stream buffer for VSC_DRAW_STRM_SIZE written back by hw
|
||||
* (see VSC_DRAW_STRM_SIZE_ADDRESS)
|
||||
*/
|
||||
unsigned sz = (max_vsc_pipes * fd6_ctx->vsc_draw_strm_pitch) +
|
||||
(max_vsc_pipes * 4);
|
||||
fd6_ctx->vsc_draw_strm =
|
||||
fd_bo_new(ctx->screen->dev, sz, FD_BO_NOMAP, "vsc_draw_strm");
|
||||
}
|
||||
|
||||
if (!fd6_ctx->vsc_prim_strm) {
|
||||
fd6_ctx->vsc_prim_strm = fd_bo_new(
|
||||
ctx->screen->dev, VSC_PRIM_STRM_SIZE(fd6_ctx->vsc_prim_strm_pitch),
|
||||
FD_BO_NOMAP, "vsc_prim_strm");
|
||||
unsigned sz = max_vsc_pipes * fd6_ctx->vsc_prim_strm_pitch;
|
||||
fd6_ctx->vsc_prim_strm =
|
||||
fd_bo_new(ctx->screen->dev, sz, FD_BO_NOMAP, "vsc_prim_strm");
|
||||
}
|
||||
|
||||
OUT_REG(
|
||||
ring, A6XX_VSC_BIN_SIZE(.width = gmem->bin_w, .height = gmem->bin_h),
|
||||
A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = fd6_ctx->vsc_draw_strm,
|
||||
.bo_offset =
|
||||
32 * fd6_ctx->vsc_draw_strm_pitch));
|
||||
OUT_REG(ring, A6XX_VSC_BIN_SIZE(.width = gmem->bin_w, .height = gmem->bin_h),
|
||||
A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = fd6_ctx->vsc_draw_strm,
|
||||
.bo_offset = max_vsc_pipes *
|
||||
fd6_ctx->vsc_draw_strm_pitch));
|
||||
|
||||
OUT_REG(ring, A6XX_VSC_BIN_COUNT(.nx = gmem->nbins_x, .ny = gmem->nbins_y));
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32);
|
||||
for (i = 0; i < 32; i++) {
|
||||
OUT_PKT4(ring, REG_A6XX_VSC_PIPE_CONFIG_REG(0), max_vsc_pipes);
|
||||
for (i = 0; i < max_vsc_pipes; i++) {
|
||||
const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
|
||||
OUT_RING(ring, A6XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
|
||||
A6XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
|
||||
|
|
@ -1088,6 +1089,7 @@ fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
|
|||
|
||||
if (use_hw_binning(batch)) {
|
||||
const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
|
||||
unsigned num_vsc_pipes = ctx->screen->info->num_vsc_pipes;
|
||||
|
||||
OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
|
||||
|
||||
|
|
@ -1099,9 +1101,10 @@ fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
|
|||
CP_SET_BIN_DATA5_0_VSC_N(tile->n));
|
||||
OUT_RELOC(ring, fd6_ctx->vsc_draw_strm, /* per-pipe draw-stream address */
|
||||
(tile->p * fd6_ctx->vsc_draw_strm_pitch), 0, 0);
|
||||
OUT_RELOC(ring,
|
||||
fd6_ctx->vsc_draw_strm, /* VSC_DRAW_STRM_ADDRESS + (p * 4) */
|
||||
(tile->p * 4) + (32 * fd6_ctx->vsc_draw_strm_pitch), 0, 0);
|
||||
OUT_RELOC(
|
||||
ring, fd6_ctx->vsc_draw_strm, /* VSC_DRAW_STRM_ADDRESS + (p * 4) */
|
||||
(tile->p * 4) + (num_vsc_pipes * fd6_ctx->vsc_draw_strm_pitch),
|
||||
0, 0);
|
||||
OUT_RELOC(ring, fd6_ctx->vsc_prim_strm,
|
||||
(tile->p * fd6_ctx->vsc_prim_strm_pitch), 0, 0);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue