mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 05:18:08 +02:00
tu/a7xx: Update CCU layout logic for A7XX
A7XX introduces some changes into the CCU such as having different amounts of memory per CCU for depth and color and dividing up CCU control into two registers A7XX_RB_CCU_CNTL and A7XX_RB_CCU_CNTL2 where CNTL2 no longer requires a complete flush to be updated, we currently don't take advantage of this as any CCU updates set both registers but it's a potential optimization we can add in the future. Signed-off-by: Mark Collins <mark@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26934>
This commit is contained in:
parent
98d6d93a82
commit
5266815ca9
13 changed files with 117 additions and 60 deletions
|
|
@ -1529,7 +1529,7 @@ registers:
|
|||
00000000 0xa630: 00000000
|
||||
00100000 RB_DBG_ECO_CNTL: 0x100000
|
||||
00000001 RB_ADDR_MODE_CNTL: ADDR_64B
|
||||
00000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_FULL | COLOR_OFFSET = 0 }
|
||||
00000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = CCU_CACHE_SIZE_FULL | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_CACHE_SIZE_FULL | COLOR_OFFSET = 0 }
|
||||
00000004 RB_NC_MODE_CNTL: { LOWER_BIT = 2 | UPPER_BIT = 0 }
|
||||
00000000 RB_PERFCTR_RB_SEL[0]+0: 00000000
|
||||
00000000 RB_PERFCTR_RB_SEL[0x1]+0: 00000000
|
||||
|
|
|
|||
|
|
@ -1744,7 +1744,7 @@ registers:
|
|||
00000000 0xa630: 00000000
|
||||
00100000 RB_DBG_ECO_CNTL: 0x100000
|
||||
00000001 RB_ADDR_MODE_CNTL: ADDR_64B
|
||||
08000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_FULL | COLOR_OFFSET = 0x10000 }
|
||||
08000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = CCU_CACHE_SIZE_FULL | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_CACHE_SIZE_FULL | COLOR_OFFSET = 0x10000 }
|
||||
00000002 RB_NC_MODE_CNTL: { LOWER_BIT = 1 | UPPER_BIT = 0 }
|
||||
00000000 RB_PERFCTR_RB_SEL[0]+0: 00000000
|
||||
00000000 RB_PERFCTR_RB_SEL[0x1]+0: 00000000
|
||||
|
|
@ -2022,7 +2022,7 @@ got cmdszdw=83
|
|||
+ 00000000 RB_2D_SRC_SOLID_C3: 0
|
||||
!+ 00000001 RB_UNKNOWN_8E01: 0x1
|
||||
!+ 00100000 RB_DBG_ECO_CNTL: 0x100000
|
||||
!+ 08000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_FULL | COLOR_OFFSET = 0x10000 }
|
||||
!+ 08000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = CCU_CACHE_SIZE_FULL | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_CACHE_SIZE_FULL | COLOR_OFFSET = 0x10000 }
|
||||
+ 00000000 VPC_UNKNOWN_9210: 0
|
||||
+ 00000000 VPC_UNKNOWN_9211: 0
|
||||
+ 00000000 VPC_POINT_COORD_INVERT: { 0 }
|
||||
|
|
@ -2332,7 +2332,7 @@ got cmdszdw=83
|
|||
+ 00000000 RB_UNKNOWN_88F0: 0
|
||||
+ 00000001 RB_UNKNOWN_8E01: 0x1
|
||||
+ 00100000 RB_DBG_ECO_CNTL: 0x100000
|
||||
+ 08000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_FULL | COLOR_OFFSET = 0x10000 }
|
||||
+ 08000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = CCU_CACHE_SIZE_FULL | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_CACHE_SIZE_FULL | COLOR_OFFSET = 0x10000 }
|
||||
+ 00000000 VPC_UNKNOWN_9210: 0
|
||||
+ 00000000 VPC_UNKNOWN_9211: 0
|
||||
+ 00000000 VPC_POINT_COORD_INVERT: { 0 }
|
||||
|
|
@ -5212,7 +5212,7 @@ ESTIMATED CRASH LOCATION!
|
|||
!+ 00000000 RB_2D_DST_PITCH: 0
|
||||
+ 00000001 RB_UNKNOWN_8E01: 0x1
|
||||
+ 00100000 RB_DBG_ECO_CNTL: 0x100000
|
||||
+ 08000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_FULL | COLOR_OFFSET = 0x10000 }
|
||||
+ 08000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = CCU_CACHE_SIZE_FULL | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_CACHE_SIZE_FULL | COLOR_OFFSET = 0x10000 }
|
||||
+ 00000000 VPC_UNKNOWN_9210: 0
|
||||
+ 00000000 VPC_UNKNOWN_9211: 0
|
||||
+ 00000000 VPC_POINT_COORD_INVERT: { 0 }
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ cmdstream[0]: 265 dwords
|
|||
opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
|
||||
0000000001058010: 0000: 70268000
|
||||
write RB_CCU_CNTL (8e07)
|
||||
RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_FULL | COLOR_OFFSET = 0x20000 }
|
||||
RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = CCU_CACHE_SIZE_FULL | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_CACHE_SIZE_FULL | COLOR_OFFSET = 0x20000 }
|
||||
0000000001058014: 0000: 408e0701 10000000
|
||||
write RB_DBG_ECO_CNTL (8e04)
|
||||
RB_DBG_ECO_CNTL: 0x100000
|
||||
|
|
@ -310,7 +310,7 @@ cmdstream[0]: 265 dwords
|
|||
!+ 000000ff RB_2D_SRC_SOLID_C3: 0xff
|
||||
+ 00000000 RB_UNKNOWN_8E01: 0
|
||||
!+ 00100000 RB_DBG_ECO_CNTL: 0x100000
|
||||
!+ 10000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_FULL | COLOR_OFFSET = 0x20000 }
|
||||
!+ 10000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = CCU_CACHE_SIZE_FULL | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_CACHE_SIZE_FULL | COLOR_OFFSET = 0x20000 }
|
||||
+ 00000000 VPC_UNKNOWN_9107: { 0 }
|
||||
+ 00000000 VPC_UNKNOWN_9210: 0
|
||||
+ 00000000 VPC_UNKNOWN_9211: 0
|
||||
|
|
@ -384,7 +384,7 @@ cmdstream[0]: 265 dwords
|
|||
opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
|
||||
000000000105832c: 0000: 70268000
|
||||
write RB_CCU_CNTL (8e07)
|
||||
RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_QUARTER | COLOR_OFFSET = 0xf8000 }
|
||||
RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = CCU_CACHE_SIZE_FULL | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_CACHE_SIZE_QUARTER | COLOR_OFFSET = 0xf8000 }
|
||||
0000000001058330: 0000: 408e0701 7c400000
|
||||
write VPC_SO_DISABLE (9306)
|
||||
VPC_SO_DISABLE: { 0 }
|
||||
|
|
@ -485,7 +485,7 @@ cmdstream[0]: 265 dwords
|
|||
!+ 01012000 RB_BLIT_FLAG_DST: 0x1012000
|
||||
!+ 00004001 RB_BLIT_FLAG_DST_PITCH: { PITCH = 64 | ARRAY_PITCH = 1024 }
|
||||
!+ 00000003 RB_BLIT_INFO: { UNK0 | GMEM | CLEAR_MASK = 0 | LAST = 0 | BUFFER_ID = 0 }
|
||||
!+ 7c400000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_QUARTER | COLOR_OFFSET = 0xf8000 }
|
||||
!+ 7c400000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = CCU_CACHE_SIZE_FULL | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_CACHE_SIZE_QUARTER | COLOR_OFFSET = 0xf8000 }
|
||||
!+ 00000000 VPC_SO_DISABLE: { 0 }
|
||||
+ 00000000 SP_TP_WINDOW_OFFSET: { X = 0 | Y = 0 }
|
||||
+ 00000000 SP_WINDOW_OFFSET: { X = 0 | Y = 0 }
|
||||
|
|
|
|||
|
|
@ -245,7 +245,7 @@ cmdstream[0]: 1023 dwords
|
|||
opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
|
||||
0000000001d91278: 0000: 70268000
|
||||
write RB_CCU_CNTL (8e07)
|
||||
RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_QUARTER | COLOR_OFFSET = 0xf8000 }
|
||||
RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = CCU_CACHE_SIZE_FULL | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_CACHE_SIZE_QUARTER | COLOR_OFFSET = 0xf8000 }
|
||||
0000000001d9127c: 0000: 408e0701 7c400004
|
||||
write RB_DEPTH_BUFFER_INFO (8872)
|
||||
RB_DEPTH_BUFFER_INFO: { DEPTH_FORMAT = DEPTH6_NONE }
|
||||
|
|
@ -1007,7 +1007,7 @@ cmdstream[0]: 1023 dwords
|
|||
+ 00000000 RB_MRT_FLAG_BUFFER[0].PITCH: { PITCH = 0 | ARRAY_PITCH = 0 }
|
||||
!+ 00000001 RB_UNKNOWN_8E01: 0x1
|
||||
+ 00000000 RB_DBG_ECO_CNTL: 0
|
||||
!+ 7c400004 RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_QUARTER | COLOR_OFFSET = 0xf8000 }
|
||||
!+ 7c400004 RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = CCU_CACHE_SIZE_FULL | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_CACHE_SIZE_QUARTER | COLOR_OFFSET = 0xf8000 }
|
||||
!+ 00ffff00 VPC_VS_CLIP_CNTL: { CLIP_MASK = 0 | CLIP_DIST_03_LOC = 255 | CLIP_DIST_47_LOC = 255 }
|
||||
!+ 0000ffff VPC_VS_LAYER_CNTL: { LAYERLOC = 255 | VIEWLOC = 255 }
|
||||
+ 00000000 VPC_UNKNOWN_9107: { 0 }
|
||||
|
|
@ -1498,7 +1498,7 @@ cmdstream[0]: 1023 dwords
|
|||
opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
|
||||
0000000001d91938: 0000: 70268000
|
||||
write RB_CCU_CNTL (8e07)
|
||||
RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_QUARTER | COLOR_OFFSET = 0xf8000 }
|
||||
RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = CCU_CACHE_SIZE_FULL | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_CACHE_SIZE_QUARTER | COLOR_OFFSET = 0xf8000 }
|
||||
0000000001d9193c: 0000: 408e0701 7c400004
|
||||
write VPC_SO_DISABLE (9306)
|
||||
VPC_SO_DISABLE: { DISABLE }
|
||||
|
|
@ -1678,7 +1678,7 @@ cmdstream[0]: 1023 dwords
|
|||
+ 00000000 RB_BLIT_CLEAR_COLOR_DW2: 0
|
||||
+ 00000000 RB_BLIT_CLEAR_COLOR_DW3: 0
|
||||
!+ 000000f2 RB_BLIT_INFO: { GMEM | CLEAR_MASK = 0xf | LAST = 0 | BUFFER_ID = 0 }
|
||||
+ 7c400004 RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_QUARTER | COLOR_OFFSET = 0xf8000 }
|
||||
+ 7c400004 RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = CCU_CACHE_SIZE_FULL | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_CACHE_SIZE_QUARTER | COLOR_OFFSET = 0xf8000 }
|
||||
!+ 00000001 VPC_SO_DISABLE: { DISABLE }
|
||||
+ 00000001 PC_POWER_CNTL: 0x1
|
||||
!+ 00000000 VFD_MODE_CNTL: { RENDER_MODE = RENDERING_PASS }
|
||||
|
|
|
|||
|
|
@ -2323,7 +2323,7 @@ registers:
|
|||
00000000 0xa630: 00000000
|
||||
00000000 RB_DBG_ECO_CNTL: 0
|
||||
00000001 RB_ADDR_MODE_CNTL: ADDR_64B
|
||||
08000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_FULL | COLOR_OFFSET = 0x10000 }
|
||||
08000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = CCU_CACHE_SIZE_FULL | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_CACHE_SIZE_FULL | COLOR_OFFSET = 0x10000 }
|
||||
00000002 RB_NC_MODE_CNTL: { LOWER_BIT = 1 | UPPER_BIT = 0 }
|
||||
00000000 RB_PERFCTR_RB_SEL[0]+0: 00000000
|
||||
00000000 RB_PERFCTR_RB_SEL[0x1]+0: 00000000
|
||||
|
|
@ -3108,7 +3108,7 @@ got cmdszdw=438
|
|||
!+ 00004001 RB_MRT_FLAG_BUFFER[0].PITCH: { PITCH = 64 | ARRAY_PITCH = 1024 }
|
||||
!+ 10000ad30 RB_SAMPLE_COUNT_ADDR: 0x10000ad30
|
||||
!+ 00000000 RB_DBG_ECO_CNTL: 0
|
||||
!+ 08000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_FULL | COLOR_OFFSET = 0x10000 }
|
||||
!+ 08000000 RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = CCU_CACHE_SIZE_FULL | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_CACHE_SIZE_FULL | COLOR_OFFSET = 0x10000 }
|
||||
!+ 00ffff00 VPC_VS_CLIP_CNTL: { CLIP_MASK = 0 | CLIP_DIST_03_LOC = 255 | CLIP_DIST_47_LOC = 255 }
|
||||
!+ 0000ffff VPC_VS_LAYER_CNTL: { LAYERLOC = 255 | VIEWLOC = 255 }
|
||||
+ 00000000 VPC_UNKNOWN_9107: { 0 }
|
||||
|
|
|
|||
|
|
@ -149,7 +149,8 @@ struct fd_dev_info {
|
|||
|
||||
/* Per CCU GMEM amount reserved for each of DEPTH and COLOR caches
|
||||
* in sysmem rendering. */
|
||||
uint32_t sysmem_per_ccu_cache_size;
|
||||
uint32_t sysmem_per_ccu_depth_cache_size;
|
||||
uint32_t sysmem_per_ccu_color_cache_size;
|
||||
/* Per CCU GMEM amount reserved for color cache used by GMEM resolves
|
||||
* which require color cache (non-BLIT event case).
|
||||
* The size is expressed as a fraction of ccu cache used by sysmem
|
||||
|
|
@ -157,7 +158,7 @@ struct fd_dev_info {
|
|||
* to make sure it will not overwrite pixel data in GMEM that is still
|
||||
* needed.
|
||||
*/
|
||||
/* see enum a6xx_ccu_color_cache_size */
|
||||
/* see enum a6xx_ccu_cache_size */
|
||||
uint32_t gmem_ccu_color_cache_fraction;
|
||||
|
||||
/* Corresponds to HLSQ_CONTROL_1_REG::PRIMALLOCTHRESHOLD */
|
||||
|
|
|
|||
|
|
@ -314,7 +314,8 @@ a6xx_base = A6XXProps(
|
|||
|
||||
supports_double_threadsize = True,
|
||||
|
||||
sysmem_per_ccu_cache_size = 64 * 1024,
|
||||
sysmem_per_ccu_depth_cache_size = 64 * 1024,
|
||||
sysmem_per_ccu_color_cache_size = 64 * 1024,
|
||||
gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value,
|
||||
|
||||
prim_alloc_threshold = 0x7,
|
||||
|
|
@ -337,7 +338,8 @@ a6xx_gen1_low = A6XXProps(
|
|||
has_hw_multiview = False,
|
||||
has_sampler_minmax = False,
|
||||
has_fs_tex_prefetch = False,
|
||||
sysmem_per_ccu_cache_size = 8 * 1024,
|
||||
sysmem_per_ccu_color_cache_size = 8 * 1024,
|
||||
sysmem_per_ccu_depth_cache_size = 8 * 1024,
|
||||
gmem_ccu_color_cache_fraction = CCUColorCacheFraction.HALF.value,
|
||||
vs_max_inputs_count = 16,
|
||||
supports_double_threadsize = False,
|
||||
|
|
@ -751,8 +753,9 @@ a7xx_base = A6XXProps(
|
|||
|
||||
supports_double_threadsize = True,
|
||||
|
||||
sysmem_per_ccu_cache_size = 64 * 1024,
|
||||
gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value,
|
||||
sysmem_per_ccu_depth_cache_size = 256 * 1024,
|
||||
sysmem_per_ccu_color_cache_size = 64 * 1024,
|
||||
gmem_ccu_color_cache_fraction = CCUColorCacheFraction.EIGHTH.value,
|
||||
|
||||
prim_alloc_threshold = 0x7,
|
||||
vs_max_inputs_count = 32,
|
||||
|
|
|
|||
|
|
@ -2780,7 +2780,28 @@ to upconvert to 32b float internally?
|
|||
<!-- Value conditioned based on predicate, changed before blits -->
|
||||
<bitfield name="UNK0" pos="0" type="boolean"/>
|
||||
</reg32>
|
||||
<reg32 offset="0x88e5" name="RB_UNKNOWN_88E5" variants="A7XX-" usage="rp_blit"/>
|
||||
|
||||
<enum name="a6xx_ccu_cache_size">
|
||||
<value value="0x0" name="CCU_CACHE_SIZE_FULL"/>
|
||||
<value value="0x1" name="CCU_CACHE_SIZE_HALF"/>
|
||||
<value value="0x2" name="CCU_CACHE_SIZE_QUARTER"/>
|
||||
<value value="0x3" name="CCU_CACHE_SIZE_EIGHTH"/>
|
||||
</enum>
|
||||
<reg32 offset="0x88e5" name="RB_CCU_CNTL2" variants="A7XX-" usage="rp_blit">
|
||||
<bitfield name="DEPTH_OFFSET_HI" pos="0" type="hex"/>
|
||||
<bitfield name="COLOR_OFFSET_HI" pos="2" type="hex"/>
|
||||
<bitfield name="DEPTH_CACHE_SIZE" low="10" high="11" type="a6xx_ccu_cache_size"/>
|
||||
<!-- GMEM offset of CCU depth cache -->
|
||||
<bitfield name="DEPTH_OFFSET" low="12" high="20" shr="12" type="hex"/>
|
||||
<bitfield name="COLOR_CACHE_SIZE" low="21" high="22" type="a6xx_ccu_cache_size"/>
|
||||
<!-- GMEM offset of CCU color cache
|
||||
for GMEM rendering, we set it to GMEM size minus the minimum
|
||||
CCU color cache size. CCU color cache will be needed in some
|
||||
resolve cases, and in those cases we need to reserve the end
|
||||
of GMEM for color cache.
|
||||
-->
|
||||
<bitfield name="COLOR_OFFSET" low="23" high="31" shr="12" type="hex"/>
|
||||
</reg32>
|
||||
<!-- 0x88e6-0x88ef invalid -->
|
||||
<!-- always 0x0 ? -->
|
||||
<reg32 offset="0x88f0" name="RB_UNKNOWN_88F0" low="0" high="11" usage="cmd"/>
|
||||
|
|
@ -2876,23 +2897,17 @@ to upconvert to 32b float internally?
|
|||
<reg32 offset="0x8e05" name="RB_ADDR_MODE_CNTL" pos="0" type="a5xx_address_mode"/>
|
||||
<!-- 0x02080000 in GMEM, zero otherwise? -->
|
||||
<reg32 offset="0x8e06" name="RB_UNKNOWN_8E06" variants="A7XX-" usage="cmd"/>
|
||||
<enum name="a6xx_ccu_color_cache_size">
|
||||
<value value="0x0" name="CCU_COLOR_CACHE_SIZE_FULL"/>
|
||||
<value value="0x1" name="CCU_COLOR_CACHE_SIZE_HALF"/>
|
||||
<value value="0x2" name="CCU_COLOR_CACHE_SIZE_QUARTER"/>
|
||||
<value value="0x3" name="CCU_COLOR_CACHE_SIZE_EIGHTH"/>
|
||||
</enum>
|
||||
|
||||
<reg32 offset="0x8e07" name="RB_CCU_CNTL" usage="cmd">
|
||||
<reg32 offset="0x8e07" name="RB_CCU_CNTL" usage="cmd" variants="A6XX">
|
||||
<bitfield name="GMEM_FAST_CLEAR_DISABLE" pos="0" type="boolean"/>
|
||||
<!-- concurrent resolves are apparently a 2-bit enum on a650+ -->
|
||||
<bitfield name="CONCURRENT_RESOLVE" pos="2" type="boolean"/>
|
||||
<bitfield name="DEPTH_OFFSET_HI" pos="7" type="hex"/>
|
||||
<bitfield name="COLOR_OFFSET_HI" pos="9" type="hex"/>
|
||||
<bitfield name="DEPTH_CACHE_SIZE" low="10" high="11" type="uint"/>
|
||||
<bitfield name="DEPTH_CACHE_SIZE" low="10" high="11" type="a6xx_ccu_cache_size"/>
|
||||
<!-- GMEM offset of CCU depth cache -->
|
||||
<bitfield name="DEPTH_OFFSET" low="12" high="20" shr="12" type="hex"/>
|
||||
<bitfield name="COLOR_CACHE_SIZE" low="21" high="22" type="a6xx_ccu_color_cache_size"/>
|
||||
<bitfield name="COLOR_CACHE_SIZE" low="21" high="22" type="a6xx_ccu_cache_size"/>
|
||||
<!-- GMEM offset of CCU color cache
|
||||
for GMEM rendering, we set it to GMEM size minus the minimum
|
||||
CCU color cache size. CCU color cache will be needed in some
|
||||
|
|
@ -2902,6 +2917,11 @@ to upconvert to 32b float internally?
|
|||
<bitfield name="COLOR_OFFSET" low="23" high="31" shr="12" type="hex"/>
|
||||
<!--TODO: valid mask 0xfffffc1f -->
|
||||
</reg32>
|
||||
<reg32 offset="0x8e07" name="RB_CCU_CNTL" usage="cmd" variants="A7XX-">
|
||||
<bitfield name="GMEM_FAST_CLEAR_DISABLE" pos="0" type="boolean"/>
|
||||
<bitfield name="CONCURRENT_RESOLVE" pos="2" type="boolean"/>
|
||||
<!-- rest of the bits were moved to RB_CCU_CNTL2 -->
|
||||
</reg32>
|
||||
<reg32 offset="0x8e08" name="RB_NC_MODE_CNTL">
|
||||
<bitfield name="MODE" pos="0" type="boolean"/>
|
||||
<bitfield name="LOWER_BIT" low="1" high="2" type="uint"/>
|
||||
|
|
|
|||
|
|
@ -1474,7 +1474,6 @@ r3d_setup(struct tu_cmd_buffer *cmd,
|
|||
|
||||
if (CHIP >= A7XX) {
|
||||
tu_cs_emit_regs(cs, A7XX_RB_UNKNOWN_8812(0x3ff));
|
||||
tu_cs_emit_regs(cs, A7XX_RB_UNKNOWN_88E5(0x50120004));
|
||||
tu_cs_emit_regs(cs,
|
||||
A7XX_RB_UNKNOWN_8E06(cmd->device->physical_device->info->a6xx.magic.RB_UNKNOWN_8E06));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -218,30 +218,66 @@ tu_emit_cache_flush_renderpass(struct tu_cmd_buffer *cmd_buffer)
|
|||
TU_GENX(tu_emit_cache_flush_renderpass);
|
||||
|
||||
template <chip CHIP>
|
||||
static struct fd_reg_pair
|
||||
rb_ccu_cntl(struct tu_device *dev, bool gmem)
|
||||
static void
|
||||
emit_rb_ccu_cntl(struct tu_cs *cs, struct tu_device *dev, bool gmem)
|
||||
{
|
||||
if (CHIP == A7XX) {
|
||||
return A6XX_RB_CCU_CNTL(.dword = gmem ? 0x68 : 0);
|
||||
}
|
||||
|
||||
/* The CCUs are a cache that allocates memory from GMEM while facilitating
|
||||
* framebuffer caching for sysmem rendering. The CCU is split into two parts,
|
||||
* one for color and one for depth. The size and offset of these in GMEM can
|
||||
* be configured separately.
|
||||
*
|
||||
* The most common configuration for the CCU is to occupy as much as possible
|
||||
* of GMEM (CACHE_SIZE_FULL) during sysmem rendering as GMEM is unused. On
|
||||
* the other hand, when rendering to GMEM, the CCUs can be left enabled at
|
||||
* any configuration as they don't interfere with GMEM rendering and only
|
||||
* overwrite GMEM when sysmem operations are performed.
|
||||
*
|
||||
* The vast majority of GMEM rendering doesn't need any sysmem operations
|
||||
* but there are some cases where it is required. For example, when the
|
||||
* framebuffer isn't aligned to the tile size or with certain MSAA resolves.
|
||||
*
|
||||
* To correctly handle these cases, we need to be able to switch between
|
||||
* sysmem and GMEM rendering. We do this by allocating a carveout at the
|
||||
* end of GMEM for the color CCU (as none of these operations are depth)
|
||||
* which the color CCU offset is set to and the GMEM size available to the
|
||||
* GMEM layout calculations is adjusted accordingly.
|
||||
*/
|
||||
uint32_t color_offset = gmem ? dev->physical_device->ccu_offset_gmem
|
||||
: dev->physical_device->ccu_offset_bypass;
|
||||
|
||||
uint32_t color_offset_hi = color_offset >> 21;
|
||||
color_offset &= 0x1fffff;
|
||||
enum a6xx_ccu_color_cache_size cache_size =
|
||||
(a6xx_ccu_color_cache_size)(dev->physical_device->info->a6xx.gmem_ccu_color_cache_fraction);
|
||||
enum a6xx_ccu_cache_size cache_size =
|
||||
(a6xx_ccu_cache_size)(dev->physical_device->info->a6xx.gmem_ccu_color_cache_fraction);
|
||||
bool concurrent_resolve = dev->physical_device->info->a6xx.concurrent_resolve;
|
||||
return A6XX_RB_CCU_CNTL(.gmem_fast_clear_disable =
|
||||
!dev->physical_device->info->a6xx.has_gmem_fast_clear,
|
||||
.concurrent_resolve = concurrent_resolve,
|
||||
.depth_offset_hi = 0,
|
||||
.color_offset_hi = color_offset_hi,
|
||||
.depth_cache_size = 0,
|
||||
.depth_offset = 0,
|
||||
.color_cache_size = cache_size,
|
||||
.color_offset = color_offset);
|
||||
|
||||
if (CHIP == A7XX) {
|
||||
tu_cs_emit_regs(cs, A7XX_RB_CCU_CNTL(
|
||||
.gmem_fast_clear_disable =
|
||||
!dev->physical_device->info->a6xx.has_gmem_fast_clear,
|
||||
.concurrent_resolve = concurrent_resolve,
|
||||
));
|
||||
tu_cs_emit_regs(cs, A7XX_RB_CCU_CNTL2(
|
||||
.depth_offset_hi = 0,
|
||||
.color_offset_hi = color_offset_hi,
|
||||
.depth_cache_size = CCU_CACHE_SIZE_FULL,
|
||||
.depth_offset = 0,
|
||||
.color_cache_size = cache_size,
|
||||
.color_offset = color_offset
|
||||
));
|
||||
} else {
|
||||
tu_cs_emit_regs(cs, A6XX_RB_CCU_CNTL(
|
||||
.gmem_fast_clear_disable =
|
||||
!dev->physical_device->info->a6xx.has_gmem_fast_clear,
|
||||
.concurrent_resolve = concurrent_resolve,
|
||||
.depth_offset_hi = 0,
|
||||
.color_offset_hi = color_offset_hi,
|
||||
.depth_cache_size = CCU_CACHE_SIZE_FULL,
|
||||
.depth_offset = 0,
|
||||
.color_cache_size = cache_size,
|
||||
.color_offset = color_offset
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
/* Cache flushes for things that use the color/depth read/write path (i.e.
|
||||
|
|
@ -285,8 +321,8 @@ tu_emit_cache_flush_ccu(struct tu_cmd_buffer *cmd_buffer,
|
|||
tu6_emit_flushes<CHIP>(cmd_buffer, cs, &cmd_buffer->state.cache);
|
||||
|
||||
if (ccu_state != cmd_buffer->state.ccu_state) {
|
||||
tu_cs_emit_regs(cs, rb_ccu_cntl<CHIP>(cmd_buffer->device,
|
||||
ccu_state == TU_CMD_CCU_GMEM));
|
||||
emit_rb_ccu_cntl<CHIP>(cs, cmd_buffer->device,
|
||||
ccu_state == TU_CMD_CCU_GMEM);
|
||||
cmd_buffer->state.ccu_state = ccu_state;
|
||||
}
|
||||
}
|
||||
|
|
@ -1159,7 +1195,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
cmd->state.cache.pending_flush_bits &=
|
||||
~(TU_CMD_FLAG_WAIT_FOR_IDLE | TU_CMD_FLAG_CACHE_INVALIDATE);
|
||||
|
||||
tu_cs_emit_regs(cs, rb_ccu_cntl<CHIP>(cmd->device, false));
|
||||
emit_rb_ccu_cntl<CHIP>(cs, cmd->device, false);
|
||||
cmd->state.ccu_state = TU_CMD_CCU_SYSMEM;
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(phys_dev->info->a6xx.magic_raw); i++) {
|
||||
|
|
@ -1650,8 +1686,6 @@ tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
if (CHIP == A7XX) {
|
||||
tu_cs_emit_regs(cs,
|
||||
A7XX_RB_UNKNOWN_8812(0x3ff)); // all buffers in sysmem
|
||||
tu_cs_emit_regs(cs,
|
||||
A7XX_RB_UNKNOWN_88E5(0x50120004));
|
||||
tu_cs_emit_regs(cs,
|
||||
A7XX_RB_UNKNOWN_8E06(cmd->device->physical_device->info->a6xx.magic.RB_UNKNOWN_8E06));
|
||||
|
||||
|
|
|
|||
|
|
@ -638,10 +638,10 @@ tu_physical_device_init(struct tu_physical_device *device,
|
|||
device->dev_info = info;
|
||||
device->info = &device->dev_info;
|
||||
uint32_t depth_cache_size =
|
||||
device->info->num_ccu * device->info->a6xx.sysmem_per_ccu_cache_size;
|
||||
device->info->num_ccu * device->info->a6xx.sysmem_per_ccu_depth_cache_size;
|
||||
uint32_t color_cache_size =
|
||||
(device->info->num_ccu *
|
||||
device->info->a6xx.sysmem_per_ccu_cache_size) /
|
||||
device->info->a6xx.sysmem_per_ccu_color_cache_size) /
|
||||
(1 << device->info->a6xx.gmem_ccu_color_cache_fraction);
|
||||
|
||||
device->ccu_offset_bypass = depth_cache_size;
|
||||
|
|
|
|||
|
|
@ -780,7 +780,7 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
void
|
||||
fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gmem)
|
||||
{
|
||||
enum a6xx_ccu_color_cache_size cache_size = (a6xx_ccu_color_cache_size)(screen->info->a6xx.gmem_ccu_color_cache_fraction);
|
||||
enum a6xx_ccu_cache_size cache_size = (enum a6xx_ccu_cache_size)(screen->info->a6xx.gmem_ccu_color_cache_fraction);
|
||||
uint32_t offset = gmem ? screen->ccu_offset_gmem : screen->ccu_offset_bypass;
|
||||
uint32_t offset_hi = offset >> 21;
|
||||
offset &= 0x1fffff;
|
||||
|
|
@ -792,7 +792,7 @@ fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gme
|
|||
screen->info->a6xx.concurrent_resolve,
|
||||
.depth_offset_hi = 0,
|
||||
.color_offset_hi = offset_hi,
|
||||
.depth_cache_size = 0,
|
||||
.depth_cache_size = CCU_CACHE_SIZE_FULL,
|
||||
.depth_offset = 0,
|
||||
.color_cache_size = cache_size,
|
||||
.color_offset = offset,
|
||||
|
|
|
|||
|
|
@ -163,9 +163,9 @@ fd6_screen_init(struct pipe_screen *pscreen)
|
|||
screen->max_rts = A6XX_MAX_RENDER_TARGETS;
|
||||
|
||||
uint32_t depth_cache_size =
|
||||
screen->info->num_ccu * screen->info->a6xx.sysmem_per_ccu_cache_size;
|
||||
screen->info->num_ccu * screen->info->a6xx.sysmem_per_ccu_depth_cache_size;
|
||||
uint32_t color_cache_size =
|
||||
(screen->info->num_ccu * screen->info->a6xx.sysmem_per_ccu_cache_size) /
|
||||
(screen->info->num_ccu * screen->info->a6xx.sysmem_per_ccu_color_cache_size) /
|
||||
(1 << screen->info->a6xx.gmem_ccu_color_cache_fraction);
|
||||
|
||||
screen->ccu_offset_bypass = depth_cache_size;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue