panvk: Wrap draws and dispatches with conditional rendering

Use panvk_cond_render() to wrap RUN_IDVS and RUN_COMPUTE GPU commands. When active, the predicate is loaded from the buffer and a cs_if branches over the GPU commands when the condition says to skip. This per-command approach correctly leaves render pass operations (BeginRendering, EndRendering, load ops) unaffected as required by the spec. Only draws, dispatches, and CmdClearAttachments are conditional. Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com> Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40452>
2026-05-14 07:58:16 +02:00 · 2026-03-17 08:42:46 +01:00 · 2026-03-17 08:42:46 +01:00 · 516c68268a
commit 516c68268a
parent 62f3db0e6e
2 changed files with 57 additions and 48 deletions
--- a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c
+++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c
@ -280,26 +280,30 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
   cs_next_iter_sb(cmdbuf, PANVK_SUBQUEUE_COMPUTE,
                   cs_scratch_reg_tuple(b, 0, 2));

-   if (indirect) {
-      /* Use run_compute with a set task axis instead of run_compute_indirect as
-       * run_compute_indirect has been found to cause intermittent hangs. This
-       * is safe, as the task increment will be clamped by the job size along
-       * the specified axis.
-       * The chosen task axis is potentially suboptimal, as choosing good
-       * increment/axis parameters requires knowledge of job dimensions, but
-       * this is somewhat offset by run_compute being a native instruction. */
-      unsigned task_axis = MALI_TASK_AXIS_X;
-      cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
-                           wg_per_task, task_axis,
-                           cs_shader_res_sel(0, 0, 0, 0));
-   } else {
-      unsigned task_axis = MALI_TASK_AXIS_X;
-      unsigned task_increment = 0;
-      panvk_per_arch(calculate_task_axis_and_increment)(
-         cs, phys_dev, &dim, &task_axis, &task_increment);
-      cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
-                           task_increment, task_axis,
-                           cs_shader_res_sel(0, 0, 0, 0));
+   panvk_cond_render(cmdbuf, b)
+   {
+      if (indirect) {
+         /* Use run_compute with a set task axis instead of
+          * run_compute_indirect as run_compute_indirect has been found to
+          * cause intermittent hangs. This is safe, as the task increment
+          * will be clamped by the job size along the specified axis.
+          * The chosen task axis is potentially suboptimal, as choosing good
+          * increment/axis parameters requires knowledge of job dimensions,
+          * but this is somewhat offset by run_compute being a native
+          * instruction. */
+         unsigned task_axis = MALI_TASK_AXIS_X;
+         cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
+                              wg_per_task, task_axis,
+                              cs_shader_res_sel(0, 0, 0, 0));
+      } else {
+         unsigned task_axis = MALI_TASK_AXIS_X;
+         unsigned task_increment = 0;
+         panvk_per_arch(calculate_task_axis_and_increment)(
+            cs, phys_dev, &dim, &task_axis, &task_increment);
+         cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
+                              task_increment, task_axis,
+                              cs_shader_res_sel(0, 0, 0, 0));
+      }
   }

 #if PAN_ARCH >= 11
--- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c
+++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c
@ -2519,13 +2519,38 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
   uint32_t idvs_count = DIV_ROUND_UP(cmdbuf->state.gfx.render.layer_count,
                                      MAX_LAYERS_PER_TILER_DESC);

-   if (idvs_count > 1) {
-      struct cs_index counter_reg = cs_scratch_reg32(b, 17);
-      struct cs_index tiler_ctx_addr = cs_sr_reg64(b, IDVS, TILER_CTX);
+   panvk_cond_render(cmdbuf, b)
+   {
+      if (idvs_count > 1) {
+         struct cs_index counter_reg = cs_scratch_reg32(b, 17);
+         struct cs_index tiler_ctx_addr = cs_sr_reg64(b, IDVS, TILER_CTX);

-      cs_move32_to(b, counter_reg, idvs_count);
+         cs_move32_to(b, counter_reg, idvs_count);

-      cs_while(b, MALI_CS_CONDITION_GREATER, counter_reg) {
+         cs_while(b, MALI_CS_CONDITION_GREATER, counter_reg) {
+#if PAN_ARCH >= 12
+            cs_trace_run_idvs2(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
+                               flags_override.opaque[0], true, cs_undef(),
+                               MALI_IDVS_SHADING_MODE_EARLY);
+#else
+            cs_trace_run_idvs(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
+                              flags_override.opaque[0], true,
+                              cs_shader_res_sel(0, 0, 1, 0),
+                              cs_shader_res_sel(2, 2, 2, 0), cs_undef());
+#endif
+
+            cs_add32(b, counter_reg, counter_reg, -1);
+            cs_update_vt_ctx(b) {
+               cs_add64(b, tiler_ctx_addr, tiler_ctx_addr,
+                        pan_size(TILER_CONTEXT));
+            }
+         }
+
+         cs_update_vt_ctx(b) {
+            cs_add64(b, tiler_ctx_addr, tiler_ctx_addr,
+                     -(idvs_count * pan_size(TILER_CONTEXT)));
+         }
+      } else {
 #if PAN_ARCH >= 12
         cs_trace_run_idvs2(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
                            flags_override.opaque[0], true, cs_undef(),
@ -2536,29 +2561,7 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
                           cs_shader_res_sel(0, 0, 1, 0),
                           cs_shader_res_sel(2, 2, 2, 0), cs_undef());
 #endif
-
-         cs_add32(b, counter_reg, counter_reg, -1);
-         cs_update_vt_ctx(b) {
-            cs_add64(b, tiler_ctx_addr, tiler_ctx_addr,
-                     pan_size(TILER_CONTEXT));
-         }
      }
-
-      cs_update_vt_ctx(b) {
-         cs_add64(b, tiler_ctx_addr, tiler_ctx_addr,
-                  -(idvs_count * pan_size(TILER_CONTEXT)));
-      }
-   } else {
-#if PAN_ARCH >= 12
-      cs_trace_run_idvs2(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
-                         flags_override.opaque[0], true, cs_undef(),
-                         MALI_IDVS_SHADING_MODE_EARLY);
-#else
-      cs_trace_run_idvs(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
-                        flags_override.opaque[0], true,
-                        cs_shader_res_sel(0, 0, 1, 0),
-                        cs_shader_res_sel(2, 2, 2, 0), cs_undef());
-#endif
   }
 }

@ -2725,7 +2728,9 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
   cs_move64_to(b, draw_params_addr, draw->indirect.buffer_dev_addr);
   cs_move32_to(b, draw_id, 0);

-   cs_while(b, MALI_CS_CONDITION_GREATER, draw_count) {
+   panvk_cond_render(cmdbuf, b)
+      cs_while(b, MALI_CS_CONDITION_GREATER, draw_count)
+   {
      cs_update_vt_ctx(b) {
         cs_move32_to(b, cs_sr_reg32(b, IDVS, GLOBAL_ATTRIBUTE_OFFSET), 0);
         /* Load SR33-37 from indirect buffer. */