freedreno/a5xx: Set num_sp_cores and set PC/VFD_POWER_CNTL accordingly.

Based on libwrap tracing of the blob. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24358>
2025-12-23 13:20:14 +01:00 · 2023-08-08 09:53:16 -07:00 · 2023-08-08 09:53:16 -07:00 · c4874b4cee
commit c4874b4cee
parent c9f9d71412
2 changed files with 35 additions and 5 deletions
--- a/src/freedreno/common/freedreno_devices.py
+++ b/src/freedreno/common/freedreno_devices.py
@ -232,8 +232,36 @@ add_gpus([
        GPUId(506),
        GPUId(508),
        GPUId(509),
+    ], GPUInfo(
+        CHIP.A5XX,
+        gmem_align_w = 64,  gmem_align_h = 32,
+        tile_align_w = 64,  tile_align_h = 32,
+        tile_max_w   = 1024, # max_bitfield_val(7, 0, 5)
+        tile_max_h   = max_bitfield_val(16, 9, 5),
+        num_vsc_pipes = 16,
+        cs_shared_mem_size = 32 * 1024,
+        num_sp_cores = 1,
+        wave_granularity = 2,
+        fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd.
+    ))
+
+add_gpus([
        GPUId(510),
        GPUId(512),
+    ], GPUInfo(
+        CHIP.A5XX,
+        gmem_align_w = 64,  gmem_align_h = 32,
+        tile_align_w = 64,  tile_align_h = 32,
+        tile_max_w   = 1024, # max_bitfield_val(7, 0, 5)
+        tile_max_h   = max_bitfield_val(16, 9, 5),
+        num_vsc_pipes = 16,
+        cs_shared_mem_size = 32 * 1024,
+        num_sp_cores = 2,
+        wave_granularity = 2,
+        fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd.
+    ))
+
+add_gpus([
        GPUId(530),
        GPUId(540),
    ], GPUInfo(
@ -244,7 +272,7 @@ add_gpus([
        tile_max_h   = max_bitfield_val(16, 9, 5),
        num_vsc_pipes = 16,
        cs_shared_mem_size = 32 * 1024,
-        num_sp_cores = 0, # TODO
+        num_sp_cores = 4,
        wave_granularity = 2,
        fibers_per_sp = 0, # TODO
    ))
--- a/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c
@ -394,6 +394,7 @@ emit_binning_pass(struct fd_batch *batch) assert_dt
 static void
 fd5_emit_tile_init(struct fd_batch *batch) assert_dt
 {
+   struct fd_context *ctx = batch->ctx;
   struct fd_ringbuffer *ring = batch->gmem;
   struct pipe_framebuffer_state *pfb = &batch->framebuffer;

@ -411,10 +412,10 @@ fd5_emit_tile_init(struct fd_batch *batch) assert_dt
   OUT_RING(ring, 0x0);

   OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
-   OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
+   OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* PC_POWER_CNTL */

   OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
-   OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
+   OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* VFD_POWER_CNTL */

   /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
   fd_wfi(batch, ring);
@ -715,6 +716,7 @@ fd5_emit_tile_fini(struct fd_batch *batch) assert_dt
 static void
 fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt
 {
+   struct fd_context *ctx = batch->ctx;
   struct fd_ringbuffer *ring = batch->gmem;

   fd5_emit_restore(batch, ring);
@ -730,10 +732,10 @@ fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt
   fd5_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);

   OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
-   OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
+   OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* PC_POWER_CNTL */

   OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
-   OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
+   OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* VFD_POWER_CNTL */

   /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
   fd_wfi(batch, ring);