freedreno/a6xx: Rework where we emit ccu cache cntl

We don't need to re-emit it each tile. But we do need to setup the preemption to restore us to GMEM mode in case we get preempted on a tile boundary. While we are at it, rename the function to something more sensible. Signed-off-by: Rob Clark <rob.clark@oss.qualcomm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38450>
2026-04-27 11:00:37 +02:00 · 2025-11-02 09:12:28 -08:00 · 2025-11-02 09:12:28 -08:00 · 4ca78a296f
commit 4ca78a296f
parent 2439606518
4 changed files with 15 additions and 13 deletions
--- a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc
@ -268,7 +268,7 @@ emit_setup(struct fd_context *ctx, fd_cs &cs)
                          FD6_INVALIDATE_CCU_DEPTH);

   /* normal BLIT_OP_SCALE operation needs bypass RB_CCU_CNTL */
-   fd6_emit_ccu_cntl<CHIP>(cs, ctx->screen, false);
+   fd6_emit_gmem_cache_cntl<CHIP>(cs, ctx->screen, false);
 }

 template <chip CHIP>
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc
@ -756,9 +756,13 @@ fd6_emit_cs_state(struct fd_context *ctx, fd_cs &cs, struct fd6_compute_state *c
 }
 FD_GENX(fd6_emit_cs_state);

+/**
+ * Configure RB_CCU_CNTL and various caches that reside in GMEM for either
+ * GMEM or sysmem mode.
+ */
 template <chip CHIP>
 void
-fd6_emit_ccu_cntl(fd_cs &cs, struct fd_screen *screen, bool gmem)
+fd6_emit_gmem_cache_cntl(fd_cs &cs, struct fd_screen *screen, bool gmem)
 {
   const struct fd6_gmem_config *cfg = gmem ? &screen->config_gmem : &screen->config_sysmem;
   enum a6xx_ccu_cache_size color_cache_size = !gmem ? CCU_CACHE_SIZE_FULL :
@ -806,7 +810,7 @@ fd6_emit_ccu_cntl(fd_cs &cs, struct fd_screen *screen, bool gmem)
      );
   }
 }
-FD_GENX(fd6_emit_ccu_cntl);
+FD_GENX(fd6_emit_gmem_cache_cntl);

 template <chip CHIP>
 static void
@ -1103,7 +1107,7 @@ fd6_emit_restore(fd_cs &cs, struct fd_batch *batch)
   fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);

   fd6_emit_ib<CHIP>(cs, fd6_context(ctx)->restore);
-   fd6_emit_ccu_cntl<CHIP>(cs, screen, false);
+   fd6_emit_gmem_cache_cntl<CHIP>(cs, screen, false);

   uint32_t dwords;

--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
@ -374,7 +374,7 @@ void fd6_emit_cs_state(struct fd_context *ctx, fd_cs &cs,
                       struct fd6_compute_state *cp) assert_dt;

 template <chip CHIP>
-void fd6_emit_ccu_cntl(fd_cs &cs, struct fd_screen *screen, bool gmem);
+void fd6_emit_gmem_cache_cntl(fd_cs &cs, struct fd_screen *screen, bool gmem);

 template <chip CHIP>
 void fd6_emit_static_regs(fd_cs &cs, struct fd_context *ctx);
--- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc
@ -298,7 +298,7 @@ emit_lrz_clears(struct fd_batch *batch)

      /* prep before first clear: */
      if (count == 0) {
-         fd6_emit_ccu_cntl<CHIP>(cs, ctx->screen, false);
+         fd6_emit_gmem_cache_cntl<CHIP>(cs, ctx->screen, false);

         fd_pkt7(cs, CP_SET_MARKER, 1)
            .add(A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
@ -984,7 +984,7 @@ emit_binning_pass(fd_cs &cs, struct fd_batch *batch) assert_dt
   fd_pkt7(cs, CP_SET_MODE, 1)
      .add(0x0);

-   fd6_emit_ccu_cntl<CHIP>(cs, screen, true);
+   fd6_emit_gmem_cache_cntl<CHIP>(cs, screen, true);
 }

 /* nregs: 7 */
@ -1042,7 +1042,7 @@ fd6_build_preemption_preamble(struct fd_context *ctx)
   fd_cs cs(ctx->pipe, 0x1000);

   fd6_emit_static_regs<CHIP>(cs, ctx);
-   fd6_emit_ccu_cntl<CHIP>(cs, screen, false);
+   fd6_emit_gmem_cache_cntl<CHIP>(cs, screen, true);

   if (CHIP >= A7XX) {
      fd7_emit_static_binning_regs<CHIP>(cs);
@ -1099,7 +1099,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt
   fd_pkt7(cs, CP_SKIP_IB2_ENABLE_LOCAL, 1)
      .add(0x1);

-   fd6_emit_ccu_cntl<CHIP>(cs, screen, true);
+   fd6_emit_gmem_cache_cntl<CHIP>(cs, screen, true);

   with_crb (cs, 150) {
      emit_zs<CHIP>(crb, &pfb->zsbuf, batch->gmem_state);
@ -1219,8 +1219,6 @@ fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
   set_scissor<CHIP>(cs, x1, y1, x2, y2);
   set_tessfactor_bo<CHIP>(cs, batch);

-   fd6_emit_ccu_cntl<CHIP>(cs, screen, true);
-
   with_crb (cs, 150) {
      emit_zs<CHIP>(crb, &pfb->zsbuf, batch->gmem_state);
      emit_mrt<CHIP>(crb, pfb, batch->gmem_state);
@ -2051,6 +2049,8 @@ fd6_emit_sysmem(struct fd_batch *batch)
   struct fd_screen *screen = batch->ctx->screen;
   fd_cs cs(batch->gmem);

+   fd6_emit_gmem_cache_cntl<CHIP>(cs, screen, false);
+
   foreach_subpass (subpass, batch) {
      if (subpass->fast_cleared) {
         unsigned flushes = 0;
@ -2063,8 +2063,6 @@ fd6_emit_sysmem(struct fd_batch *batch)
         emit_sysmem_clears<CHIP>(cs, batch, subpass);
      }

-      fd6_emit_ccu_cntl<CHIP>(cs, screen, false);
-
      struct pipe_framebuffer_state *pfb = &batch->framebuffer;
      update_render_cntl<CHIP>(cs, screen, pfb, false);