anv,iris: program the maximum number of threads on compute queue init

Fixes: 90a39cac87 ("intel/blorp: Emit compute program based on BLORP_BATCH_USE_COMPUTE") Signed-off-by: Rohan Garg <rohan.garg@intel.com> Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23934> (cherry picked from commit e7e7042093)
2026-05-01 18:58:10 +02:00 · 2023-06-29 14:24:55 +02:00 · 2023-06-29 14:24:55 +02:00 · 7fbf06e503
commit 7fbf06e503
parent f874ebe024
4 changed files with 31 additions and 7 deletions
--- a/.pick_status.json
+++ b/.pick_status.json
@ -15084,7 +15084,7 @@
        "description": "anv,iris: program the maximum number of threads on compute queue init",
        "nominated": true,
        "nomination_type": 1,
-        "resolution": 0,
+        "resolution": 1,
        "main_sha": null,
        "because_sha": "90a39cac87f415375a70e1cb2f7ba2c486f941e4",
        "notes": null
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@ -1392,6 +1392,13 @@ iris_init_compute_context(struct iris_batch *batch)
   init_aux_map_state(batch);
 #endif

+#if GFX_VERx10 >= 125
+   iris_emit_cmd(batch, GENX(CFE_STATE), cfe) {
+      cfe.MaximumNumberofThreads =
+         devinfo->max_cs_threads * devinfo->subslice_total;
+   }
+#endif
+
   iris_batch_sync_region_end(batch);
 }

--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@ -2170,12 +2170,6 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
 #endif /* GFX_VER >= 7 */

 #if GFX_VERx10 >= 125
-
-   blorp_emit(batch, GENX(CFE_STATE), cfe) {
-      cfe.MaximumNumberofThreads =
-         devinfo->max_cs_threads * devinfo->subslice_total;
-   }
-
   assert(cs_prog_data->push.per_thread.regs == 0);
   blorp_emit(batch, GENX(COMPUTE_WALKER), cw) {
      cw.SIMDSize                       = dispatch.simd_size / 16;
--- a/src/intel/vulkan/genX_state.c
+++ b/src/intel/vulkan/genX_state.c
@ -343,6 +343,7 @@ static VkResult
 init_render_queue_state(struct anv_queue *queue)
 {
   struct anv_device *device = queue->device;
+   UNUSED const struct intel_device_info *devinfo = queue->device->info;
   uint32_t cmds[128];
   struct anv_batch batch = {
      .start = cmds,
@ -586,6 +587,20 @@ init_render_queue_state(struct anv_queue *queue)
 #if GFX_VERx10 >= 125
   anv_batch_emit(&batch, GENX(3DSTATE_MESH_CONTROL), zero);
   anv_batch_emit(&batch, GENX(3DSTATE_TASK_CONTROL), zero);
+   genX(batch_emit_pipe_control_write)(&batch, device->info, NoWrite,
+                                       ANV_NULL_ADDRESS,
+                                       0,
+                                       ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS);
+   genX(emit_pipeline_select)(&batch, GPGPU);
+   anv_batch_emit(&batch, GENX(CFE_STATE), cfe) {
+      cfe.MaximumNumberofThreads =
+         devinfo->max_cs_threads * devinfo->subslice_total;
+   }
+   genX(batch_emit_pipe_control_write)(&batch, device->info, NoWrite,
+                                       ANV_NULL_ADDRESS,
+                                       0,
+                                       ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS);
+   genX(emit_pipeline_select)(&batch, _3D);
 #endif

   anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
@ -599,6 +614,7 @@ static VkResult
 init_compute_queue_state(struct anv_queue *queue)
 {
   struct anv_batch batch;
+   UNUSED const struct intel_device_info *devinfo = queue->device->info;

   uint32_t cmds[64];
   batch.start = batch.next = cmds;
@ -626,6 +642,13 @@ init_compute_queue_state(struct anv_queue *queue)

   init_common_queue_state(queue, &batch);

+#if GFX_VERx10 >= 125
+   anv_batch_emit(&batch, GENX(CFE_STATE), cfe) {
+      cfe.MaximumNumberofThreads =
+         devinfo->max_cs_threads * devinfo->subslice_total;
+   }
+#endif
+
   anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);

   assert(batch.next <= batch.end);