radv: implement indirect compute pipeline binds with DGC

This also supports push constants. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27495>
2026-05-02 16:38:09 +02:00 · 2024-02-06 18:33:49 +01:00 · 2024-02-06 18:33:49 +01:00 · a2d67adff1
commit a2d67adff1
parent 12b015940f
3 changed files with 361 additions and 68 deletions
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@ -10307,7 +10307,8 @@ radv_dgc_before_dispatch(struct radv_cmd_buffer *cmd_buffer)
   if (compute_shader->info.cs.regalloc_hang_bug)
      cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH;

-   radv_emit_compute_pipeline(cmd_buffer, pipeline);
+   if (pipeline)
+      radv_emit_compute_pipeline(cmd_buffer, pipeline);
   radv_emit_cache_flush(cmd_buffer);

   radv_upload_compute_shader_descriptors(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE);
--- a/src/amd/vulkan/radv_device_generated_commands.c
+++ b/src/amd/vulkan/radv_device_generated_commands.c
@ -35,13 +35,37 @@ radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout
                               const struct radv_compute_pipeline *pipeline, uint32_t *cmd_size)
 {
   const struct radv_device *device = container_of(layout->base.device, struct radv_device, vk);
-   struct radv_shader *cs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_COMPUTE);

   /* dispatch */
   *cmd_size += 5 * 4;

-   const struct radv_userdata_info *loc = radv_get_user_sgpr(cs, AC_UD_CS_GRID_SIZE);
-   if (loc->sgpr_idx != -1) {
+   if (pipeline) {
+      struct radv_shader *cs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_COMPUTE);
+      const struct radv_userdata_info *loc = radv_get_user_sgpr(cs, AC_UD_CS_GRID_SIZE);
+      if (loc->sgpr_idx != -1) {
+         if (device->load_grid_size_from_user_sgpr) {
+            /* PKT3_SET_SH_REG for immediate values */
+            *cmd_size += 5 * 4;
+         } else {
+            /* PKT3_SET_SH_REG for pointer */
+            *cmd_size += 4 * 4;
+         }
+      }
+   } else {
+      /* COMPUTE_PGM_{LO,RSRC1,RSRC2} */
+      *cmd_size += 7 * 4;
+
+      if (device->physical_device->rad_info.gfx_level >= GFX10) {
+         /* COMPUTE_PGM_RSRC3 */
+         *cmd_size += 3 * 4;
+      }
+
+      /* COMPUTE_{RESOURCE_LIMITS,NUM_THREADS_X} */
+      *cmd_size += 8 * 4;
+
+      /* Assume the compute shader needs grid size because we can't know the information for
+       * indirect pipelines.
+       */
      if (device->load_grid_size_from_user_sgpr) {
         /* PKT3_SET_SH_REG for immediate values */
         *cmd_size += 5 * 4;
@ -116,20 +140,32 @@ radv_get_sequence_size(const struct radv_indirect_command_layout *layout, struct
   if (layout->push_constant_mask) {
      bool need_copy = false;

-      for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); ++i) {
-         if (!pipeline->shaders[i])
-            continue;
+      if (pipeline) {
+         for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); ++i) {
+            if (!pipeline->shaders[i])
+               continue;

-         struct radv_userdata_locations *locs = &pipeline->shaders[i]->info.user_sgprs_locs;
-         if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) {
-            /* One PKT3_SET_SH_REG for emitting push constants pointer (32-bit) */
-            *cmd_size += 3 * 4;
-            need_copy = true;
+            struct radv_userdata_locations *locs = &pipeline->shaders[i]->info.user_sgprs_locs;
+            if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) {
+               /* One PKT3_SET_SH_REG for emitting push constants pointer (32-bit) */
+               *cmd_size += 3 * 4;
+               need_copy = true;
+            }
+            if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0)
+               /* One PKT3_SET_SH_REG writing all inline push constants. */
+               *cmd_size += (3 * util_bitcount64(layout->push_constant_mask)) * 4;
         }
-         if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0)
-            /* One PKT3_SET_SH_REG writing all inline push constants. */
-            *cmd_size += (3 * util_bitcount64(layout->push_constant_mask)) * 4;
+      } else {
+         /* Assume the compute shader needs both user SGPRs because we can't know the information
+          * for indirect pipelines.
+          */
+         assert(layout->pipeline_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
+         *cmd_size += 3 * 4;
+         need_copy = true;
+
+         *cmd_size += (3 * util_bitcount64(layout->push_constant_mask)) * 4;
      }
+
      if (need_copy) {
         *upload_size += align(layout->push_constant_size + 16 * layout->dynamic_offset_count, 16);
      }
@ -145,7 +181,7 @@ radv_get_sequence_size(const struct radv_indirect_command_layout *layout, struct
      radv_get_sequence_size_graphics(layout, graphics_pipeline, cmd_size, upload_size);
   } else {
      assert(layout->pipeline_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
-      struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
+      struct radv_compute_pipeline *compute_pipeline = pipeline ? radv_pipeline_to_compute(pipeline) : NULL;
      radv_get_sequence_size_compute(layout, compute_pipeline, cmd_size);
   }
 }
@ -236,6 +272,9 @@ struct radv_dgc_params {
   uint8_t predicating;
   uint8_t predication_type;
   uint64_t predication_va;
+
+   uint8_t bind_pipeline;
+   uint16_t pipeline_params_offset;
 };

 enum {
@ -292,6 +331,15 @@ dgc_emit(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *value)
   nir_pack_64_2x32((b), nir_load_push_constant((b), 2, 32, nir_imm_int((b), 0),                                       \
                                                .base = offsetof(struct radv_dgc_params, field), .range = 8))

+/* Pipeline metadata */
+#define load_metadata32(b, field)                                                                                      \
+   nir_load_global(                                                                                                    \
+      b, nir_iadd(b, pipeline_va, nir_imm_int64(b, offsetof(struct radv_compute_pipeline_metadata, field))), 4, 1, 32)
+
+#define load_metadata64(b, field)                                                                                      \
+   nir_load_global(                                                                                                    \
+      b, nir_iadd(b, pipeline_va, nir_imm_int64(b, offsetof(struct radv_compute_pipeline_metadata, field))), 4, 1, 64)
+
 static nir_def *
 nir_pkt3_base(nir_builder *b, unsigned op, nir_def *len, bool predicate)
 {
@ -849,12 +897,127 @@ dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf
 /**
 * Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV.
 */
+static nir_def *
+dgc_get_push_constant_shader_cnt(nir_builder *b, nir_def *stream_buf, nir_def *stream_base,
+                                 nir_def *pipeline_params_offset)
+{
+   nir_def *res1, *res2;
+
+   nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
+   {
+      nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
+      nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
+
+      res1 = nir_b2i32(b, nir_ine_imm(b, load_metadata32(b, push_const_sgpr), 0));
+   }
+   nir_push_else(b, 0);
+   {
+      res2 = load_param16(b, push_constant_shader_cnt);
+   }
+   nir_pop_if(b, 0);
+
+   return nir_if_phi(b, res1, res2);
+}
+
+static nir_def *
+dgc_get_upload_sgpr(nir_builder *b, nir_def *stream_buf, nir_def *stream_base, nir_def *param_buf,
+                    nir_def *param_offset, nir_def *cur_shader_idx, nir_def *pipeline_params_offset)
+{
+   nir_def *res1, *res2;
+
+   nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
+   {
+      nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
+      nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
+
+      res1 = load_metadata32(b, push_const_sgpr);
+   }
+   nir_push_else(b, 0);
+   {
+      res2 = nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_offset, nir_imul_imm(b, cur_shader_idx, 12)));
+   }
+   nir_pop_if(b, 0);
+
+   nir_def *res = nir_if_phi(b, res1, res2);
+
+   return nir_ubfe_imm(b, res, 0, 16);
+}
+
+static nir_def *
+dgc_get_inline_sgpr(nir_builder *b, nir_def *stream_buf, nir_def *stream_base, nir_def *param_buf,
+                    nir_def *param_offset, nir_def *cur_shader_idx, nir_def *pipeline_params_offset)
+{
+   nir_def *res1, *res2;
+
+   nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
+   {
+      nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
+      nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
+
+      res1 = load_metadata32(b, push_const_sgpr);
+   }
+   nir_push_else(b, 0);
+   {
+      res2 = nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_offset, nir_imul_imm(b, cur_shader_idx, 12)));
+   }
+   nir_pop_if(b, 0);
+
+   nir_def *res = nir_if_phi(b, res1, res2);
+
+   return nir_ubfe_imm(b, res, 16, 16);
+}
+
+static nir_def *
+dgc_get_inline_mask(nir_builder *b, nir_def *stream_buf, nir_def *stream_base, nir_def *param_buf,
+                    nir_def *param_offset, nir_def *cur_shader_idx, nir_def *pipeline_params_offset)
+{
+   nir_def *res1, *res2;
+
+   nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
+   {
+      nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
+      nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
+
+      res1 = load_metadata64(b, inline_push_const_mask);
+   }
+   nir_push_else(b, 0);
+   {
+      nir_def *reg_info = nir_load_ssbo(
+         b, 2, 32, param_buf, nir_iadd(b, param_offset, nir_iadd_imm(b, nir_imul_imm(b, cur_shader_idx, 12), 4)));
+      res2 = nir_pack_64_2x32(b, nir_channels(b, reg_info, 0x3));
+   }
+   nir_pop_if(b, 0);
+
+   return nir_if_phi(b, res1, res2);
+}
+
+static nir_def *
+dgc_push_constant_needs_copy(nir_builder *b, nir_def *stream_buf, nir_def *stream_base, nir_def *pipeline_params_offset)
+{
+   nir_def *res1, *res2;
+
+   nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
+   {
+      nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
+      nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
+
+      res1 = nir_ine_imm(b, nir_ubfe_imm(b, load_metadata32(b, push_const_sgpr), 0, 16), 0);
+   }
+   nir_push_else(b, 0);
+   {
+      res2 = nir_ine_imm(b, load_param8(b, const_copy), 0);
+   }
+   nir_pop_if(b, 0);
+
+   return nir_if_phi(b, res1, res2);
+}
+
 static void
 dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
-                       nir_def *push_const_mask, nir_variable *upload_offset)
+                       nir_def *pipeline_params_offset, nir_def *push_const_mask, nir_variable *upload_offset)
 {
   nir_def *vbo_cnt = load_param8(b, vbo_cnt);
-   nir_def *const_copy = nir_ine_imm(b, load_param8(b, const_copy), 0);
+   nir_def *const_copy = dgc_push_constant_needs_copy(b, stream_buf, stream_base, pipeline_params_offset);
   nir_def *const_copy_size = load_param16(b, const_copy_size);
   nir_def *const_copy_words = nir_ushr_imm(b, const_copy_size, 2);
   const_copy_words = nir_bcsel(b, const_copy, const_copy_words, nir_imm_int(b, 0));
@ -906,7 +1069,7 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu

   nir_variable *shader_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "shader_idx");
   nir_store_var(b, shader_idx, nir_imm_int(b, 0), 0x1);
-   nir_def *shader_cnt = load_param16(b, push_constant_shader_cnt);
+   nir_def *shader_cnt = dgc_get_push_constant_shader_cnt(b, stream_buf, stream_base, pipeline_params_offset);

   nir_push_loop(b);
   {
@ -917,11 +1080,12 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
      }
      nir_pop_if(b, NULL);

-      nir_def *reg_info =
-         nir_load_ssbo(b, 3, 32, param_buf, nir_iadd(b, param_offset, nir_imul_imm(b, cur_shader_idx, 12)));
-      nir_def *upload_sgpr = nir_ubfe_imm(b, nir_channel(b, reg_info, 0), 0, 16);
-      nir_def *inline_sgpr = nir_ubfe_imm(b, nir_channel(b, reg_info, 0), 16, 16);
-      nir_def *inline_mask = nir_pack_64_2x32(b, nir_channels(b, reg_info, 0x6));
+      nir_def *upload_sgpr = dgc_get_upload_sgpr(b, stream_buf, stream_base, param_buf, param_offset, cur_shader_idx,
+                                                 pipeline_params_offset);
+      nir_def *inline_sgpr = dgc_get_inline_sgpr(b, stream_buf, stream_base, param_buf, param_offset, cur_shader_idx,
+                                                 pipeline_params_offset);
+      nir_def *inline_mask = dgc_get_inline_mask(b, stream_buf, stream_base, param_buf, param_offset, cur_shader_idx,
+                                                 pipeline_params_offset);

      nir_push_if(b, nir_ine_imm(b, upload_sgpr, 0));
      {
@ -974,6 +1138,26 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu

               dgc_emit(b, cs, nir_vec(b, pkt, 3));
            }
+            nir_push_else(b, NULL);
+            {
+               nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
+               {
+                  /* For indirect pipeline binds, partial push constant updates can't be emitted
+                   * when the DGC execute is called because there is no bound pipeline and they have
+                   * to be emitted from the DGC prepare shader.
+                   */
+                  nir_def *new_data =
+                     nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_const_offset, nir_ishl_imm(b, cur_idx, 2)));
+                  nir_store_var(b, data, new_data, 0x1);
+
+                  nir_def *pkt[3] = {nir_pkt3(b, PKT3_SET_SH_REG, nir_imm_int(b, 1)),
+                                     nir_iadd(b, inline_sgpr, nir_load_var(b, pc_idx)), nir_load_var(b, data)};
+
+                  dgc_emit(b, cs, nir_vec(b, pkt, 3));
+               }
+               nir_pop_if(b, NULL);
+            }
+
            nir_pop_if(b, NULL);

            nir_store_var(b, idx, nir_iadd_imm(b, cur_idx, 1), 0x1);
@ -1129,9 +1313,53 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
 /**
 * For emitting VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_NV.
 */
+static nir_def *
+dgc_get_grid_sgpr(nir_builder *b, nir_def *stream_buf, nir_def *stream_base, nir_def *pipeline_params_offset)
+{
+   nir_def *res1, *res2;
+
+   nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
+   {
+      nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
+      nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
+      res1 = load_metadata32(b, grid_base_sgpr);
+   }
+   nir_push_else(b, 0);
+   {
+      res2 = load_param16(b, grid_base_sgpr);
+   }
+   nir_pop_if(b, 0);
+
+   return nir_if_phi(b, res1, res2);
+}
+
+static nir_def *
+dgc_get_dispatch_initiator(nir_builder *b, nir_def *stream_buf, nir_def *stream_base, nir_def *pipeline_params_offset)
+{
+   nir_def *res1, *res2;
+
+   nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
+   {
+      nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
+      nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
+
+      nir_def *dispatch_initiator = load_param32(b, dispatch_initiator);
+      nir_def *wave32 = nir_ieq_imm(b, load_metadata32(b, wave32), 1);
+      res1 = nir_bcsel(b, wave32, nir_ior_imm(b, dispatch_initiator, S_00B800_CS_W32_EN(1)), dispatch_initiator);
+   }
+   nir_push_else(b, 0);
+   {
+      res2 = load_param32(b, dispatch_initiator);
+   }
+   nir_pop_if(b, 0);
+
+   return nir_if_phi(b, res1, res2);
+}
+
 static void
 dgc_emit_dispatch(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
-                  nir_def *dispatch_params_offset, nir_def *sequence_id, const struct radv_device *device)
+                  nir_def *dispatch_params_offset, nir_def *pipeline_params_offset, nir_def *sequence_id,
+                  const struct radv_device *device)
 {
   nir_def *stream_offset = nir_iadd(b, dispatch_params_offset, stream_base);

@ -1140,7 +1368,7 @@ dgc_emit_dispatch(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, ni
   nir_def *wg_y = nir_channel(b, dispatch_data, 1);
   nir_def *wg_z = nir_channel(b, dispatch_data, 2);

-   nir_def *grid_sgpr = load_param16(b, grid_base_sgpr);
+   nir_def *grid_sgpr = dgc_get_grid_sgpr(b, stream_buf, stream_base, pipeline_params_offset);
   nir_push_if(b, nir_ine_imm(b, grid_sgpr, 0));
   {
      if (device->load_grid_size_from_user_sgpr) {
@ -1156,7 +1384,8 @@ dgc_emit_dispatch(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, ni
      dgc_emit_sqtt_begin_api_marker(b, cs, ApiCmdDispatch);
      dgc_emit_sqtt_marker_event_with_dims(b, cs, sequence_id, wg_x, wg_y, wg_z, EventCmdDispatch);

-      dgc_emit_dispatch_direct(b, cs, wg_x, wg_y, wg_z, load_param32(b, dispatch_initiator));
+      nir_def *dispatch_initiator = dgc_get_dispatch_initiator(b, stream_buf, stream_base, pipeline_params_offset);
+      dgc_emit_dispatch_direct(b, cs, wg_x, wg_y, wg_z, dispatch_initiator);

      dgc_emit_sqtt_thread_trace_marker(b, cs);
      dgc_emit_sqtt_end_api_marker(b, cs, ApiCmdDispatch);
@ -1200,6 +1429,48 @@ dgc_emit_draw_mesh_tasks(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_
   nir_pop_if(b, NULL);
 }

+/**
+ * Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_PIPELINE_NV.
+ */
+static void
+dgc_emit_set_sh_reg_seq(nir_builder *b, struct dgc_cmdbuf *cs, unsigned reg, unsigned num)
+{
+   nir_def *values[2] = {
+      nir_imm_int(b, PKT3(PKT3_SET_SH_REG, num, false)),
+      nir_imm_int(b, (reg - SI_SH_REG_OFFSET) >> 2),
+   };
+   dgc_emit(b, cs, nir_vec(b, values, 2));
+}
+
+static void
+dgc_emit_bind_pipeline(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
+                       nir_def *pipeline_params_offset, const struct radv_device *device)
+{
+   nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
+
+   nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
+
+   dgc_emit_set_sh_reg_seq(b, cs, R_00B830_COMPUTE_PGM_LO, 1);
+   dgc_emit(b, cs, load_metadata32(b, shader_va));
+
+   dgc_emit_set_sh_reg_seq(b, cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
+   dgc_emit(b, cs, load_metadata32(b, rsrc1));
+   dgc_emit(b, cs, load_metadata32(b, rsrc2));
+
+   if (device->physical_device->rad_info.gfx_level >= GFX10) {
+      dgc_emit_set_sh_reg_seq(b, cs, R_00B8A0_COMPUTE_PGM_RSRC3, 1);
+      dgc_emit(b, cs, load_metadata32(b, rsrc3));
+   }
+
+   dgc_emit_set_sh_reg_seq(b, cs, R_00B854_COMPUTE_RESOURCE_LIMITS, 1);
+   dgc_emit(b, cs, load_metadata32(b, compute_resource_limits));
+
+   dgc_emit_set_sh_reg_seq(b, cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
+   dgc_emit(b, cs, load_metadata32(b, block_size_x));
+   dgc_emit(b, cs, load_metadata32(b, block_size_y));
+   dgc_emit(b, cs, load_metadata32(b, block_size_z));
+}
+
 static nir_def *
 dgc_is_cond_render_enabled(nir_builder *b)
 {
@ -1300,7 +1571,14 @@ build_dgc_prepare_shader(struct radv_device *dev)
      nir_def *push_const_mask = load_param64(&b, push_constant_mask);
      nir_push_if(&b, nir_ine_imm(&b, push_const_mask, 0));
      {
-         dgc_emit_push_constant(&b, &cmd_buf, stream_buf, stream_base, push_const_mask, upload_offset);
+         dgc_emit_push_constant(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, pipeline_params_offset),
+                                push_const_mask, upload_offset);
+      }
+      nir_pop_if(&b, 0);
+
+      nir_push_if(&b, nir_ieq_imm(&b, load_param8(&b, bind_pipeline), 1));
+      {
+         dgc_emit_bind_pipeline(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, pipeline_params_offset), dev);
      }
      nir_pop_if(&b, 0);

@ -1353,8 +1631,8 @@ build_dgc_prepare_shader(struct radv_device *dev)
      }
      nir_push_else(&b, NULL);
      {
-         dgc_emit_dispatch(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, dispatch_params_offset), sequence_id,
-                           dev);
+         dgc_emit_dispatch(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, dispatch_params_offset),
+                           load_param16(&b, pipeline_params_offset), sequence_id, dev);
      }
      nir_pop_if(&b, NULL);

@ -1551,6 +1829,10 @@ radv_CreateIndirectCommandsLayoutNV(VkDevice _device, const VkIndirectCommandsLa
         layout->draw_mesh_tasks = true;
         layout->draw_params_offset = pCreateInfo->pTokens[i].offset;
         break;
+      case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PIPELINE_NV:
+         layout->bind_pipeline = true;
+         layout->pipeline_params_offset = pCreateInfo->pTokens[i].offset;
+         break;
      default:
         unreachable("Unhandled token type");
      }
@ -1761,8 +2043,6 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
 {
   VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout);
   VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline);
-   struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
-   struct radv_shader *cs = radv_get_shader(compute_pipeline->base.shaders, MESA_SHADER_COMPUTE);

   *upload_size = MAX2(*upload_size, 16);

@ -1771,15 +2051,8 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
      return;
   }

-   uint32_t dispatch_initiator = cmd_buffer->device->dispatch_initiator;
-   dispatch_initiator |= S_00B800_FORCE_START_AT_000(1);
-   if (cs->info.wave_size == 32) {
-      assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10);
-      dispatch_initiator |= S_00B800_CS_W32_EN(1);
-   }
-
   params->dispatch_params_offset = layout->dispatch_params_offset;
-   params->dispatch_initiator = dispatch_initiator;
+   params->dispatch_initiator = cmd_buffer->device->dispatch_initiator | S_00B800_FORCE_START_AT_000(1);
   params->is_dispatch = 1;

   if (cond_render_enabled) {
@ -1788,9 +2061,22 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
      params->predication_type = cmd_buffer->state.predication_type;
   }

-   const struct radv_userdata_info *loc = radv_get_user_sgpr(cs, AC_UD_CS_GRID_SIZE);
-   if (loc->sgpr_idx != -1) {
-      params->grid_base_sgpr = (cs->info.user_data_0 + 4 * loc->sgpr_idx - SI_SH_REG_OFFSET) >> 2;
+   if (pipeline) {
+      struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
+      struct radv_shader *cs = radv_get_shader(compute_pipeline->base.shaders, MESA_SHADER_COMPUTE);
+
+      if (cs->info.wave_size == 32) {
+         assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10);
+         params->dispatch_initiator |= S_00B800_CS_W32_EN(1);
+      }
+
+      const struct radv_userdata_info *loc = radv_get_user_sgpr(cs, AC_UD_CS_GRID_SIZE);
+      if (loc->sgpr_idx != -1) {
+         params->grid_base_sgpr = (cs->info.user_data_0 + 4 * loc->sgpr_idx - SI_SH_REG_OFFSET) >> 2;
+      }
+   } else {
+      params->bind_pipeline = 1;
+      params->pipeline_params_offset = layout->pipeline_params_offset;
   }
 }

@ -1849,35 +2135,38 @@ radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsIn
      upload_data = (char *)upload_data + ARRAY_SIZE(pipeline->shaders) * 12;

      unsigned idx = 0;
-      for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); ++i) {
-         if (!pipeline->shaders[i])
-            continue;

-         const struct radv_shader *shader = pipeline->shaders[i];
-         const struct radv_userdata_locations *locs = &shader->info.user_sgprs_locs;
-         if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0)
-            params.const_copy = 1;
+      if (pipeline) {
+         for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); ++i) {
+            if (!pipeline->shaders[i])
+               continue;

-         if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0 ||
-             locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) {
-            unsigned upload_sgpr = 0;
-            unsigned inline_sgpr = 0;
+            const struct radv_shader *shader = pipeline->shaders[i];
+            const struct radv_userdata_locations *locs = &shader->info.user_sgprs_locs;
+            if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0)
+               params.const_copy = 1;

-            if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) {
-               upload_sgpr = (shader->info.user_data_0 + 4 * locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx -
-                              SI_SH_REG_OFFSET) >>
-                             2;
+            if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0 ||
+                locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) {
+               unsigned upload_sgpr = 0;
+               unsigned inline_sgpr = 0;
+
+               if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) {
+                  upload_sgpr = (shader->info.user_data_0 + 4 * locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx -
+                                 SI_SH_REG_OFFSET) >>
+                                2;
+               }
+
+               if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) {
+                  inline_sgpr = (shader->info.user_data_0 +
+                                 4 * locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx - SI_SH_REG_OFFSET) >>
+                                2;
+                  desc[idx * 3 + 1] = pipeline->shaders[i]->info.inline_push_constant_mask;
+                  desc[idx * 3 + 2] = pipeline->shaders[i]->info.inline_push_constant_mask >> 32;
+               }
+               desc[idx * 3] = upload_sgpr | (inline_sgpr << 16);
+               ++idx;
            }
-
-            if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) {
-               inline_sgpr = (shader->info.user_data_0 + 4 * locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx -
-                              SI_SH_REG_OFFSET) >>
-                             2;
-               desc[idx * 3 + 1] = pipeline->shaders[i]->info.inline_push_constant_mask;
-               desc[idx * 3 + 2] = pipeline->shaders[i]->info.inline_push_constant_mask >> 32;
-            }
-            desc[idx * 3] = upload_sgpr | (inline_sgpr << 16);
-            ++idx;
         }
      }

--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@ -3280,6 +3280,9 @@ struct radv_indirect_command_layout {

   uint16_t dispatch_params_offset;

+   bool bind_pipeline;
+   uint16_t pipeline_params_offset;
+
   uint32_t bind_vbo_mask;
   uint32_t vbo_offsets[MAX_VBS];