From c569fb56692ed6afad857b6e467ad5278edc20d4 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 22 Jan 2026 16:34:38 -0500 Subject: [PATCH] freedreno, ir3: Fix branchstack register definitions on a5xx+ The branchstack starts one bit lower, and we have to round to the next even value instead of dividing by 2. This matches the actual HW definition and will make the next commits simpler. Part-of: --- src/freedreno/ir3/ir3_shader.h | 2 +- src/freedreno/registers/adreno/a5xx.xml | 2 +- src/freedreno/registers/adreno/a6xx.xml | 4 +--- src/freedreno/tests/reference/crash.log | 4 ++-- src/freedreno/tests/reference/fd-clouds.log | 4 ++-- src/freedreno/tests/reference/prefetch-test.log | 12 ++++++------ 6 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index e084b5a4919..f1c3c61b6da 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -1458,7 +1458,7 @@ ir3_shader_branchstack_hw(const struct ir3_shader_variant *v) if (v->compiler->gen < 5) return v->branchstack; - return DIV_ROUND_UP(MIN2(v->branchstack, v->compiler->branchstack_size), 2); + return align(MIN2(v->branchstack, v->compiler->branchstack_size), 2); } ENDC; diff --git a/src/freedreno/registers/adreno/a5xx.xml b/src/freedreno/registers/adreno/a5xx.xml index bd8df594516..014e5faa22b 100644 --- a/src/freedreno/registers/adreno/a5xx.xml +++ b/src/freedreno/registers/adreno/a5xx.xml @@ -2475,7 +2475,7 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set - + diff --git a/src/freedreno/registers/adreno/a6xx.xml b/src/freedreno/registers/adreno/a6xx.xml index 2bf19db4e80..468986a3cee 100644 --- a/src/freedreno/registers/adreno/a6xx.xml +++ b/src/freedreno/registers/adreno/a6xx.xml @@ -3601,10 +3601,8 @@ by a particular renderpass/blit. --> - - - + diff --git a/src/freedreno/tests/reference/crash.log b/src/freedreno/tests/reference/crash.log index 1048baaf5c0..ebca29f3390 100644 --- a/src/freedreno/tests/reference/crash.log +++ b/src/freedreno/tests/reference/crash.log @@ -7100,7 +7100,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d) 00000000 SP_PS_INITIAL_TEX_INDEX[0x3].CMD: { SAMP_ID = 0 | TEX_ID = 0 } 00000080 SP_PS_TSIZE: 128 0000f000 SP_UNKNOWN_A9A8: 0xf000 - 00421800 SP_CS_CNTL_0: { THREADSIZE = THREAD64 | UNK22 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 } + 00421800 SP_CS_CNTL_0: { THREADSIZE = THREAD64 | UNK22 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 16 } 0000001f SP_CS_CNTL_1: { SHARED_SIZE = 31 | CONSTANTRAMMODE = CONSTLEN_128 } 00000000 SP_CS_BOOLEAN_CF_MASK: 0 00000000 SP_CS_PROGRAM_COUNTER_OFFSET: 0 @@ -7168,7 +7168,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d) 00000000 SP_PS_INITIAL_TEX_INDEX[0x3].CMD: { SAMP_ID = 0 | TEX_ID = 0 } 00000080 SP_PS_TSIZE: 128 0000f000 SP_UNKNOWN_A9A8: 0xf000 - 00421800 SP_CS_CNTL_0: { THREADSIZE = THREAD64 | UNK22 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 } + 00421800 SP_CS_CNTL_0: { THREADSIZE = THREAD64 | UNK22 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 16 } 0000001f SP_CS_CNTL_1: { SHARED_SIZE = 31 | CONSTANTRAMMODE = CONSTLEN_128 } 00000000 SP_CS_BOOLEAN_CF_MASK: 0 00000000 SP_CS_PROGRAM_COUNTER_OFFSET: 0 diff --git a/src/freedreno/tests/reference/fd-clouds.log b/src/freedreno/tests/reference/fd-clouds.log index 1704d73e1a9..205710df306 100644 --- a/src/freedreno/tests/reference/fd-clouds.log +++ b/src/freedreno/tests/reference/fd-clouds.log @@ -1889,7 +1889,7 @@ cmdstream[0]: 1023 dwords SP_PS_WAVE_CNTL: { THREADSIZE = THREAD128 } 00000000011200b8: 0000: 48b98001 00000001 write SP_PS_CNTL_0 (a980) - SP_PS_CNTL_0: { THREADSIZE = THREAD128 | VARYING | INOUTREGOVERLAP | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 2 } + SP_PS_CNTL_0: { THREADSIZE = THREAD128 | VARYING | INOUTREGOVERLAP | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 4 } 00000000011200c0: 0000: 40a98001 81508980 write SP_PS_PROGRAM_COUNTER_OFFSET (a982) SP_PS_PROGRAM_COUNTER_OFFSET: 0 @@ -5198,7 +5198,7 @@ cmdstream[0]: 1023 dwords + 00000000 SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NUAV = 0 } + 00000000 SP_GS_CNTL_1: 0 + 00000000 SP_GS_CONFIG: { NTEX = 0 | NSAMP = 0 | NUAV = 0 } -!+ 81508980 SP_PS_CNTL_0: { THREADSIZE = THREAD128 | VARYING | INOUTREGOVERLAP | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 2 } +!+ 81508980 SP_PS_CNTL_0: { THREADSIZE = THREAD128 | VARYING | INOUTREGOVERLAP | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 4 } + 00000000 SP_PS_PROGRAM_COUNTER_OFFSET: 0 !+ 01013000 SP_PS_BASE: 0x1013000 base=1013000, offset=0, size=11264 0000000001013000: 0000: 40400000 204cc000 00000000 204cc006 3e99999a 204cc004 20080014 42700008 diff --git a/src/freedreno/tests/reference/prefetch-test.log b/src/freedreno/tests/reference/prefetch-test.log index 04fe9364c20..d64a3bf0062 100644 --- a/src/freedreno/tests/reference/prefetch-test.log +++ b/src/freedreno/tests/reference/prefetch-test.log @@ -7633,7 +7633,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d) 103adc200 HLSQ_LOAD_STATE_GEOM_EXT_SRC_ADDR: 0x103adc200 - cluster-name: CLUSTER_SP_VS - context: 0 - 00108280 SP_VS_CNTL_0: { MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 5 | BRANCHSTACK = 2 } + 00108280 SP_VS_CNTL_0: { MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 5 | BRANCHSTACK = 4 } 00000000 SP_VS_BOOLEAN_CF_MASK: 0 00000002 SP_VS_OUTPUT_CNTL: { OUT = 2 } 0f0e0312 SP_VS_OUTPUT[0].REG: { A_REGID = r4.z | A_COMPMASK = 0x3 | B_REGID = r3.z | B_COMPMASK = 0xf } @@ -7763,7 +7763,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d) 00000000 0xa8c2: 00000000 00000000 0xa8c3: 00000000 - context: 1 - 00108280 SP_VS_CNTL_0: { MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 5 | BRANCHSTACK = 2 } + 00108280 SP_VS_CNTL_0: { MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 5 | BRANCHSTACK = 4 } 00000000 SP_VS_BOOLEAN_CF_MASK: 0 00000002 SP_VS_OUTPUT_CNTL: { OUT = 2 } 0f0e0312 SP_VS_OUTPUT[0].REG: { A_REGID = r4.z | A_COMPMASK = 0x3 | B_REGID = r3.z | B_COMPMASK = 0xf } @@ -8024,7 +8024,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d) deadbeef HLSQ_2D_EVENT_CMD: { STATE_ID = 0xbe | EVENT = 0x6f | 0xdead0080 } - cluster-name: CLUSTER_SP_PS - context: 0 - 85508180 SP_PS_CNTL_0: { THREADSIZE = THREAD128 | VARYING | INOUTREGOVERLAP | PIXLODENABLE | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 3 | BRANCHSTACK = 2 } + 85508180 SP_PS_CNTL_0: { THREADSIZE = THREAD128 | VARYING | INOUTREGOVERLAP | PIXLODENABLE | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 3 | BRANCHSTACK = 4 } 00000000 SP_PS_BOOLEAN_CF_MASK: 0 00000000 SP_PS_PROGRAM_COUNTER_OFFSET: 0 10372c000 SP_PS_BASE: 0x10372c000 base=10372c000, offset=0, size=4096 @@ -8092,7 +8092,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d) 00000000 0xaa30: 00000000 00000000 0xaa31: 00000000 - context: 1 - 85508180 SP_PS_CNTL_0: { THREADSIZE = THREAD128 | VARYING | INOUTREGOVERLAP | PIXLODENABLE | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 3 | BRANCHSTACK = 2 } + 85508180 SP_PS_CNTL_0: { THREADSIZE = THREAD128 | VARYING | INOUTREGOVERLAP | PIXLODENABLE | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 3 | BRANCHSTACK = 4 } 00000000 SP_PS_BOOLEAN_CF_MASK: 0 00000000 SP_PS_PROGRAM_COUNTER_OFFSET: 0 10372c000 SP_PS_BASE: 0x10372c000 base=10372c000, offset=0, size=4096 @@ -8926,7 +8926,7 @@ got cmdszdw=416 !+ c7400000 VFD_FETCH_INSTR[0].INSTR: { IDX = 0 | OFFSET = 0 | FORMAT = FMT6_32_32_32_FLOAT | SWAP = WZYX | UNK30 | FLOAT } !+ 00000001 VFD_FETCH_INSTR[0].STEP_RATE: 1 !+ 00000007 VFD_DEST_CNTL[0].INSTR: { WRITEMASK = 0x7 | REGID = r0.x } -!+ 00108280 SP_VS_CNTL_0: { MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 5 | BRANCHSTACK = 2 } +!+ 00108280 SP_VS_CNTL_0: { MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 5 | BRANCHSTACK = 4 } !+ 00000002 SP_VS_OUTPUT_CNTL: { OUT = 2 } !+ 0f0e0312 SP_VS_OUTPUT[0].REG: { A_REGID = r4.z | A_COMPMASK = 0x3 | B_REGID = r3.z | B_COMPMASK = 0xf } !+ 00000200 SP_VS_VPC_DEST[0].REG: { OUTLOC0 = 0 | OUTLOC1 = 2 | OUTLOC2 = 0 | OUTLOC3 = 0 } @@ -9003,7 +9003,7 @@ got cmdszdw=416 + 00000000 SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NUAV = 0 } + 00000000 SP_GS_CNTL_1: 0 + 00000000 SP_GS_CONFIG: { NTEX = 0 | NSAMP = 0 | NUAV = 0 } -!+ 85508180 SP_PS_CNTL_0: { THREADSIZE = THREAD128 | VARYING | INOUTREGOVERLAP | PIXLODENABLE | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 3 | BRANCHSTACK = 2 } +!+ 85508180 SP_PS_CNTL_0: { THREADSIZE = THREAD128 | VARYING | INOUTREGOVERLAP | PIXLODENABLE | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 3 | BRANCHSTACK = 4 } + 00000000 SP_PS_PROGRAM_COUNTER_OFFSET: 0 !+ 10372c000 SP_PS_BASE: 0x10372c000 base=10372c000, offset=0, size=4096 000000010372c000: 0000: 00002000 473080fc 0000000e 03820000 0000000d 02820000 0000b800 20488007