anv,iris: program the maximum number of threads on compute queue init

Fixes: 90a39cac87 ("intel/blorp: Emit compute program based on BLORP_BATCH_USE_COMPUTE")
Signed-off-by: Rohan Garg <rohan.garg@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23934>
(cherry picked from commit e7e7042093)
This commit is contained in:
Rohan Garg 2023-06-29 14:24:55 +02:00 committed by Dylan Baker
parent f874ebe024
commit 7fbf06e503
4 changed files with 31 additions and 7 deletions

View file

@ -15084,7 +15084,7 @@
"description": "anv,iris: program the maximum number of threads on compute queue init",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "90a39cac87f415375a70e1cb2f7ba2c486f941e4",
"notes": null

View file

@ -1392,6 +1392,13 @@ iris_init_compute_context(struct iris_batch *batch)
init_aux_map_state(batch);
#endif
#if GFX_VERx10 >= 125
iris_emit_cmd(batch, GENX(CFE_STATE), cfe) {
cfe.MaximumNumberofThreads =
devinfo->max_cs_threads * devinfo->subslice_total;
}
#endif
iris_batch_sync_region_end(batch);
}

View file

@ -2170,12 +2170,6 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
#endif /* GFX_VER >= 7 */
#if GFX_VERx10 >= 125
blorp_emit(batch, GENX(CFE_STATE), cfe) {
cfe.MaximumNumberofThreads =
devinfo->max_cs_threads * devinfo->subslice_total;
}
assert(cs_prog_data->push.per_thread.regs == 0);
blorp_emit(batch, GENX(COMPUTE_WALKER), cw) {
cw.SIMDSize = dispatch.simd_size / 16;

View file

@ -343,6 +343,7 @@ static VkResult
init_render_queue_state(struct anv_queue *queue)
{
struct anv_device *device = queue->device;
UNUSED const struct intel_device_info *devinfo = queue->device->info;
uint32_t cmds[128];
struct anv_batch batch = {
.start = cmds,
@ -586,6 +587,20 @@ init_render_queue_state(struct anv_queue *queue)
#if GFX_VERx10 >= 125
anv_batch_emit(&batch, GENX(3DSTATE_MESH_CONTROL), zero);
anv_batch_emit(&batch, GENX(3DSTATE_TASK_CONTROL), zero);
genX(batch_emit_pipe_control_write)(&batch, device->info, NoWrite,
ANV_NULL_ADDRESS,
0,
ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS);
genX(emit_pipeline_select)(&batch, GPGPU);
anv_batch_emit(&batch, GENX(CFE_STATE), cfe) {
cfe.MaximumNumberofThreads =
devinfo->max_cs_threads * devinfo->subslice_total;
}
genX(batch_emit_pipe_control_write)(&batch, device->info, NoWrite,
ANV_NULL_ADDRESS,
0,
ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS);
genX(emit_pipeline_select)(&batch, _3D);
#endif
anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
@ -599,6 +614,7 @@ static VkResult
init_compute_queue_state(struct anv_queue *queue)
{
struct anv_batch batch;
UNUSED const struct intel_device_info *devinfo = queue->device->info;
uint32_t cmds[64];
batch.start = batch.next = cmds;
@ -626,6 +642,13 @@ init_compute_queue_state(struct anv_queue *queue)
init_common_queue_state(queue, &batch);
#if GFX_VERx10 >= 125
anv_batch_emit(&batch, GENX(CFE_STATE), cfe) {
cfe.MaximumNumberofThreads =
devinfo->max_cs_threads * devinfo->subslice_total;
}
#endif
anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
assert(batch.next <= batch.end);