freedreno+tu: Big GMEM support

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21573>
This commit is contained in:
Rob Clark 2023-02-24 14:41:14 -08:00 committed by Marge Bot
parent 60bc7c0e22
commit b012a4a9cd
12 changed files with 60 additions and 39 deletions

View file

@ -1529,7 +1529,7 @@ registers:
00000000 0xa630: 00000000
00100000 RB_DBG_ECO_CNTL: 0x100000
00000001 RB_ADDR_MODE_CNTL: ADDR_64B
00000000 RB_CCU_CNTL: { COLOR_OFFSET = 0 | DEPTH_OFFSET = 0 }
00000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | COLOR_OFFSET = 0 }
00000004 RB_NC_MODE_CNTL: { LOWER_BIT = 2 | UPPER_BIT = 0 }
00000000 RB_PERFCTR_RB_SEL[0]+0: 00000000
00000000 RB_PERFCTR_RB_SEL[0x1]+0: 00000000

View file

@ -1744,7 +1744,7 @@ registers:
00000000 0xa630: 00000000
00100000 RB_DBG_ECO_CNTL: 0x100000
00000001 RB_ADDR_MODE_CNTL: ADDR_64B
08000000 RB_CCU_CNTL: { COLOR_OFFSET = 0x10000 | DEPTH_OFFSET = 0 }
08000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | COLOR_OFFSET = 0x10000 }
00000002 RB_NC_MODE_CNTL: { LOWER_BIT = 1 | UPPER_BIT = 0 }
00000000 RB_PERFCTR_RB_SEL[0]+0: 00000000
00000000 RB_PERFCTR_RB_SEL[0x1]+0: 00000000
@ -2022,7 +2022,7 @@ got cmdszdw=83
+ 00000000 RB_2D_SRC_SOLID_C3: 0
!+ 00000001 RB_UNKNOWN_8E01: 0x1
!+ 00100000 RB_DBG_ECO_CNTL: 0x100000
!+ 08000000 RB_CCU_CNTL: { COLOR_OFFSET = 0x10000 | DEPTH_OFFSET = 0 }
!+ 08000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | COLOR_OFFSET = 0x10000 }
+ 00000000 VPC_UNKNOWN_9210: 0
+ 00000000 VPC_UNKNOWN_9211: 0
+ 00000000 VPC_POINT_COORD_INVERT: { 0 }
@ -2332,7 +2332,7 @@ got cmdszdw=83
+ 00000000 RB_UNKNOWN_88F0: 0
+ 00000001 RB_UNKNOWN_8E01: 0x1
+ 00100000 RB_DBG_ECO_CNTL: 0x100000
+ 08000000 RB_CCU_CNTL: { COLOR_OFFSET = 0x10000 | DEPTH_OFFSET = 0 }
+ 08000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | COLOR_OFFSET = 0x10000 }
+ 00000000 VPC_UNKNOWN_9210: 0
+ 00000000 VPC_UNKNOWN_9211: 0
+ 00000000 VPC_POINT_COORD_INVERT: { 0 }
@ -5212,7 +5212,7 @@ ESTIMATED CRASH LOCATION!
!+ 00000000 RB_2D_DST_PITCH: 0
+ 00000001 RB_UNKNOWN_8E01: 0x1
+ 00100000 RB_DBG_ECO_CNTL: 0x100000
+ 08000000 RB_CCU_CNTL: { COLOR_OFFSET = 0x10000 | DEPTH_OFFSET = 0 }
+ 08000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | COLOR_OFFSET = 0x10000 }
+ 00000000 VPC_UNKNOWN_9210: 0
+ 00000000 VPC_UNKNOWN_9211: 0
+ 00000000 VPC_POINT_COORD_INVERT: { 0 }

View file

@ -12,7 +12,7 @@ cmdstream[0]: 265 dwords
opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
0000000001058010: 0000: 70268000
write RB_CCU_CNTL (8e07)
RB_CCU_CNTL: { COLOR_OFFSET = 0x20000 | DEPTH_OFFSET = 0 }
RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | COLOR_OFFSET = 0x20000 }
0000000001058014: 0000: 408e0701 10000000
write RB_DBG_ECO_CNTL (8e04)
RB_DBG_ECO_CNTL: 0x100000
@ -310,7 +310,7 @@ cmdstream[0]: 265 dwords
!+ 000000ff RB_2D_SRC_SOLID_C3: 0xff
+ 00000000 RB_UNKNOWN_8E01: 0
!+ 00100000 RB_DBG_ECO_CNTL: 0x100000
!+ 10000000 RB_CCU_CNTL: { COLOR_OFFSET = 0x20000 | DEPTH_OFFSET = 0 }
!+ 10000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | COLOR_OFFSET = 0x20000 }
+ 00000000 VPC_UNKNOWN_9107: { 0 }
+ 00000000 VPC_UNKNOWN_9210: 0
+ 00000000 VPC_UNKNOWN_9211: 0
@ -384,7 +384,7 @@ cmdstream[0]: 265 dwords
opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
000000000105832c: 0000: 70268000
write RB_CCU_CNTL (8e07)
RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | DEPTH_OFFSET = 0 | GMEM }
RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | GMEM | COLOR_OFFSET = 0xf8000 }
0000000001058330: 0000: 408e0701 7c400000
write VPC_SO_DISABLE (9306)
VPC_SO_DISABLE: { 0 }
@ -485,7 +485,7 @@ cmdstream[0]: 265 dwords
!+ 01012000 RB_BLIT_FLAG_DST: 0x1012000
!+ 00004001 RB_BLIT_FLAG_DST_PITCH: { PITCH = 64 | ARRAY_PITCH = 1024 }
!+ 00000003 RB_BLIT_INFO: { UNK0 | GMEM | CLEAR_MASK = 0 | LAST = 0 | BUFFER_ID = 0 }
!+ 7c400000 RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | DEPTH_OFFSET = 0 | GMEM }
!+ 7c400000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | GMEM | COLOR_OFFSET = 0xf8000 }
!+ 00000000 VPC_SO_DISABLE: { 0 }
+ 00000000 SP_TP_WINDOW_OFFSET: { X = 0 | Y = 0 }
+ 00000000 SP_WINDOW_OFFSET: { X = 0 | Y = 0 }

View file

@ -245,7 +245,7 @@ cmdstream[0]: 1023 dwords
opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
0000000001d91278: 0000: 70268000
write RB_CCU_CNTL (8e07)
RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | DEPTH_OFFSET = 0 | GMEM | CONCURRENT_RESOLVE }
RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | GMEM | COLOR_OFFSET = 0xf8000 }
0000000001d9127c: 0000: 408e0701 7c400004
write RB_DEPTH_BUFFER_INFO (8872)
RB_DEPTH_BUFFER_INFO: { DEPTH_FORMAT = DEPTH6_NONE }
@ -1007,7 +1007,7 @@ cmdstream[0]: 1023 dwords
+ 00000000 RB_MRT_FLAG_BUFFER[0].PITCH: { PITCH = 0 | ARRAY_PITCH = 0 }
!+ 00000001 RB_UNKNOWN_8E01: 0x1
+ 00000000 RB_DBG_ECO_CNTL: 0
!+ 7c400004 RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | DEPTH_OFFSET = 0 | GMEM | CONCURRENT_RESOLVE }
!+ 7c400004 RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | GMEM | COLOR_OFFSET = 0xf8000 }
!+ 00ffff00 VPC_VS_CLIP_CNTL: { CLIP_MASK = 0 | CLIP_DIST_03_LOC = 255 | CLIP_DIST_47_LOC = 255 }
!+ 0000ffff VPC_VS_LAYER_CNTL: { LAYERLOC = 255 | VIEWLOC = 255 }
+ 00000000 VPC_UNKNOWN_9107: { 0 }
@ -1498,7 +1498,7 @@ cmdstream[0]: 1023 dwords
opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
0000000001d91938: 0000: 70268000
write RB_CCU_CNTL (8e07)
RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | DEPTH_OFFSET = 0 | GMEM | CONCURRENT_RESOLVE }
RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | GMEM | COLOR_OFFSET = 0xf8000 }
0000000001d9193c: 0000: 408e0701 7c400004
write VPC_SO_DISABLE (9306)
VPC_SO_DISABLE: { DISABLE }
@ -1678,7 +1678,7 @@ cmdstream[0]: 1023 dwords
+ 00000000 RB_BLIT_CLEAR_COLOR_DW2: 0
+ 00000000 RB_BLIT_CLEAR_COLOR_DW3: 0
!+ 000000f2 RB_BLIT_INFO: { GMEM | CLEAR_MASK = 0xf | LAST = 0 | BUFFER_ID = 0 }
+ 7c400004 RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | DEPTH_OFFSET = 0 | GMEM | CONCURRENT_RESOLVE }
+ 7c400004 RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | GMEM | COLOR_OFFSET = 0xf8000 }
!+ 00000001 VPC_SO_DISABLE: { DISABLE }
+ 00000001 PC_POWER_CNTL: 0x1
!+ 00000000 VFD_MODE_CNTL: { RENDER_MODE = RENDERING_PASS }

View file

@ -2323,7 +2323,7 @@ registers:
00000000 0xa630: 00000000
00000000 RB_DBG_ECO_CNTL: 0
00000001 RB_ADDR_MODE_CNTL: ADDR_64B
08000000 RB_CCU_CNTL: { COLOR_OFFSET = 0x10000 | DEPTH_OFFSET = 0 }
08000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | COLOR_OFFSET = 0x10000 }
00000002 RB_NC_MODE_CNTL: { LOWER_BIT = 1 | UPPER_BIT = 0 }
00000000 RB_PERFCTR_RB_SEL[0]+0: 00000000
00000000 RB_PERFCTR_RB_SEL[0x1]+0: 00000000
@ -3108,7 +3108,7 @@ got cmdszdw=438
!+ 00004001 RB_MRT_FLAG_BUFFER[0].PITCH: { PITCH = 64 | ARRAY_PITCH = 1024 }
!+ 10000ad30 RB_SAMPLE_COUNT_ADDR: 0x10000ad30
!+ 00000000 RB_DBG_ECO_CNTL: 0
!+ 08000000 RB_CCU_CNTL: { COLOR_OFFSET = 0x10000 | DEPTH_OFFSET = 0 }
!+ 08000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | COLOR_OFFSET = 0x10000 }
!+ 00ffff00 VPC_VS_CLIP_CNTL: { CLIP_MASK = 0 | CLIP_DIST_03_LOC = 255 | CLIP_DIST_47_LOC = 255 }
!+ 0000ffff VPC_VS_LAYER_CNTL: { LAYERLOC = 255 | VIEWLOC = 255 }
+ 00000000 VPC_UNKNOWN_9107: { 0 }

View file

@ -2493,6 +2493,13 @@ to upconvert to 32b float internally?
<reg32 offset="0x8e05" name="RB_ADDR_MODE_CNTL" pos="0" type="a5xx_address_mode"/>
<!-- 0x8e06 invalid -->
<reg32 offset="0x8e07" name="RB_CCU_CNTL">
<!-- concurrent resolves are apparently a 2-bit enum on a650+ -->
<bitfield name="CONCURRENT_RESOLVE" pos="2" type="boolean"/>
<bitfield name="DEPTH_OFFSET_HI" pos="7" type="hex"/>
<bitfield name="COLOR_OFFSET_HI" pos="9" type="hex"/>
<!-- GMEM offset of CCU depth cache -->
<bitfield name="DEPTH_OFFSET" low="12" high="20" shr="12" type="hex"/>
<bitfield name="GMEM" pos="22" type="boolean"/> <!-- set for GMEM path -->
<!-- GMEM offset of CCU color cache
for GMEM rendering, we set it to GMEM size minus the minimum
CCU color cache size. CCU color cache will be needed in some
@ -2500,11 +2507,6 @@ to upconvert to 32b float internally?
of GMEM for color cache.
-->
<bitfield name="COLOR_OFFSET" low="23" high="31" shr="12" type="hex"/>
<!-- GMEM offset of CCU depth cache -->
<bitfield name="DEPTH_OFFSET" low="12" high="20" shr="12" type="hex"/>
<bitfield name="GMEM" pos="22" type="boolean"/> <!-- set for GMEM path -->
<!-- concurrent resolves are apparently a 2-bit enum on a650+ -->
<bitfield name="CONCURRENT_RESOLVE" pos="2" type="boolean"/>
<!--TODO: valid mask 0xfffffc1f -->
</reg32>
<reg32 offset="0x8e08" name="RB_NC_MODE_CNTL">

View file

@ -199,6 +199,18 @@ tu_emit_cache_flush_renderpass(struct tu_cmd_buffer *cmd_buffer)
&cmd_buffer->state.renderpass_cache);
}
static struct fd_reg_pair
rb_ccu_cntl(uint32_t color_offset, bool gmem)
{
uint32_t color_offset_hi = color_offset >> 21;
color_offset &= 0x1fffff;
return A6XX_RB_CCU_CNTL(
.color_offset = color_offset,
.color_offset_hi = color_offset_hi,
.gmem = gmem,
);
}
/* Cache flushes for things that use the color/depth read/write path (i.e.
* blits and draws). This deals with changing CCU state as well as the usual
* cache flushing.
@ -242,11 +254,10 @@ tu_emit_cache_flush_ccu(struct tu_cmd_buffer *cmd_buffer,
if (ccu_state != cmd_buffer->state.ccu_state) {
struct tu_physical_device *phys_dev = cmd_buffer->device->physical_device;
tu_cs_emit_regs(cs,
A6XX_RB_CCU_CNTL(.color_offset =
ccu_state == TU_CMD_CCU_GMEM ?
phys_dev->ccu_offset_gmem :
phys_dev->ccu_offset_bypass,
.gmem = ccu_state == TU_CMD_CCU_GMEM));
rb_ccu_cntl(ccu_state == TU_CMD_CCU_GMEM ?
phys_dev->ccu_offset_gmem :
phys_dev->ccu_offset_bypass,
ccu_state == TU_CMD_CCU_GMEM));
cmd_buffer->state.ccu_state = ccu_state;
}
}
@ -946,8 +957,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
cmd->state.cache.pending_flush_bits &=
~(TU_CMD_FLAG_WAIT_FOR_IDLE | TU_CMD_FLAG_CACHE_INVALIDATE);
tu_cs_emit_regs(cs,
A6XX_RB_CCU_CNTL(.color_offset = phys_dev->ccu_offset_bypass));
tu_cs_emit_regs(cs, rb_ccu_cntl(phys_dev->ccu_offset_bypass, false));
cmd->state.ccu_state = TU_CMD_CCU_SYSMEM;
tu_cs_emit_write_reg(cs, REG_A6XX_RB_DBG_ECO_CNTL,
phys_dev->info->a6xx.magic.RB_DBG_ECO_CNTL);

View file

@ -259,8 +259,7 @@ emit_setup(struct fd_batch *batch)
/* normal BLIT_OP_SCALE operation needs bypass RB_CCU_CNTL */
OUT_WFI5(ring);
OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
OUT_RING(ring, A6XX_RB_CCU_CNTL_COLOR_OFFSET(screen->ccu_offset_bypass));
fd6_emit_ccu_cntl(ring, screen, false);
}
static void

View file

@ -433,7 +433,7 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) a
OUT_WFI5(ring);
OUT_REG(ring, A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_bypass));
fd6_emit_ccu_cntl(ring, screen, false);
OUT_REG(ring,
A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true,

View file

@ -733,6 +733,21 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
fd6_state_emit(&state, ring);
}
void
fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gmem)
{
uint32_t offset = gmem ? screen->ccu_offset_gmem : screen->ccu_offset_bypass;
uint32_t offset_hi = offset >> 21;
offset &= 0x1fffff;
OUT_REG(ring, A6XX_RB_CCU_CNTL(
.concurrent_resolve = gmem && screen->info->a6xx.concurrent_resolve,
.color_offset_hi = offset_hi,
.gmem = gmem,
.color_offset = offset,
));
}
/* emit setup at begin of new cmdstream buffer (don't rely on previous
* state, there could have been a context switch between ioctls):
*/

View file

@ -341,6 +341,7 @@ struct fd6_compute_state;
void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd6_compute_state *cs) assert_dt;
void fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gmem);
void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring);
void fd6_emit_init_screen(struct pipe_screen *pscreen);

View file

@ -768,10 +768,7 @@ emit_binning_pass(struct fd_batch *batch) assert_dt
OUT_WFI5(ring);
OUT_REG(ring,
A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem,
.gmem = true,
.concurrent_resolve = screen->info->a6xx.concurrent_resolve));
fd6_emit_ccu_cntl(ring, screen, true);
}
static void
@ -836,10 +833,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt
OUT_RING(ring, 0x1);
fd_wfi(batch, ring);
OUT_REG(ring,
A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem,
.gmem = true,
.concurrent_resolve = screen->info->a6xx.concurrent_resolve));
fd6_emit_ccu_cntl(ring, screen, true);
emit_zs(ring, pfb->zsbuf, batch->gmem_state);
emit_mrt(ring, pfb, batch->gmem_state);
@ -1625,7 +1619,7 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
fd6_cache_inv(batch, ring);
fd_wfi(batch, ring);
OUT_REG(ring, A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_bypass));
fd6_emit_ccu_cntl(ring, screen, false);
/* enable stream-out, with sysmem there is only one pass: */
OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));