anv: Set PIPELINE_SELECT systolic mode enable flag

Set the flag on compute shaders when the application has enabled the
cooperative matrix feature. We might still want to enable this only when
DPAS is actually used. The current method is based on many suggestions
from Lionel.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25994>
This commit is contained in:
Ian Romanick 2023-09-25 09:16:55 -07:00
parent 7bfbeb79a7
commit b741a9a851
5 changed files with 20 additions and 9 deletions

View file

@ -1764,6 +1764,7 @@
</field>
<field name="Media Sampler DOP Clock Gate Enable" start="4" end="4" type="bool" />
<field name="Force Media Awake" start="5" end="5" type="bool" />
<field name="Systolic Mode Enable" start="7" end="7" type="bool" />
<field name="Mask Bits" start="8" end="15" type="uint" />
<field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="4" />
<field name="3D Command Opcode" start="24" end="26" type="uint" default="1" />

View file

@ -90,7 +90,8 @@ void genX(cmd_buffer_emit_hashing_mode)(struct anv_cmd_buffer *cmd_buffer,
void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer);
void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer);
void genX(emit_pipeline_select)(struct anv_batch *batch, uint32_t pipeline);
void genX(emit_pipeline_select)(struct anv_batch *batch, uint32_t pipeline,
const struct anv_device *device);
void genX(apply_task_urb_workaround)(struct anv_cmd_buffer *cmd_buffer);

View file

@ -6815,14 +6815,23 @@ genX(CmdTraceRaysIndirect2KHR)(
* flush_pipeline_select()
*/
void
genX(emit_pipeline_select)(struct anv_batch *batch, uint32_t pipeline)
genX(emit_pipeline_select)(struct anv_batch *batch, uint32_t pipeline,
const struct anv_device *device)
{
anv_batch_emit(batch, GENX(PIPELINE_SELECT), ps) {
ps.MaskBits = GFX_VER == 12 ? 0x13 : 0x3;
ps.MaskBits = GFX_VERx10 >= 125 ? 0x93 : GFX_VER >= 12 ? 0x13 : 0x3;
#if GFX_VER == 12
ps.MediaSamplerDOPClockGateEnable = true;
#endif
ps.PipelineSelection = pipeline;
#if GFX_VERx10 == 125
/* It might still be better to only enable this when the compute
* pipeline will have DPAS instructions.
*/
ps.SystolicModeEnable = pipeline == GPGPU &&
device->vk.enabled_extensions.KHR_cooperative_matrix &&
device->vk.enabled_features.cooperativeMatrix;
#endif
}
}
@ -6972,7 +6981,7 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
}
#endif
genX(emit_pipeline_select)(&cmd_buffer->batch, pipeline);
genX(emit_pipeline_select)(&cmd_buffer->batch, pipeline, cmd_buffer->device);
#if GFX_VER == 9
if (devinfo->platform == INTEL_PLATFORM_GLK) {

View file

@ -256,7 +256,7 @@ genX(emit_so_memcpy_init)(struct anv_memcpy_state *state,
const struct intel_l3_config *cfg = intel_get_default_l3_config(device->info);
genX(emit_l3_config)(batch, device, cfg);
genX(emit_pipeline_select)(batch, _3D);
genX(emit_pipeline_select)(batch, _3D, device);
emit_common_so_memcpy(batch, device, cfg);
}

View file

@ -352,7 +352,7 @@ init_render_queue_state(struct anv_queue *queue, bool is_companion_rcs_batch)
};
GENX(VERTEX_ELEMENT_STATE_pack)(NULL, device->empty_vs_input, &empty_ve);
genX(emit_pipeline_select)(&batch, _3D);
genX(emit_pipeline_select)(&batch, _3D, device);
#if GFX_VER == 9
anv_batch_write_reg(&batch, GENX(CACHE_MODE_1), cm1) {
@ -595,7 +595,7 @@ init_render_queue_state(struct anv_queue *queue, bool is_companion_rcs_batch)
ANV_NULL_ADDRESS,
0,
ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS);
genX(emit_pipeline_select)(&batch, GPGPU);
genX(emit_pipeline_select)(&batch, GPGPU, device);
anv_batch_emit(&batch, GENX(CFE_STATE), cfe) {
cfe.MaximumNumberofThreads =
devinfo->max_cs_threads * devinfo->subslice_total;
@ -604,7 +604,7 @@ init_render_queue_state(struct anv_queue *queue, bool is_companion_rcs_batch)
ANV_NULL_ADDRESS,
0,
ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS);
genX(emit_pipeline_select)(&batch, _3D);
genX(emit_pipeline_select)(&batch, _3D, device);
#endif
anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
@ -628,7 +628,7 @@ init_compute_queue_state(struct anv_queue *queue)
.end = (void *) cmds + sizeof(cmds),
};
genX(emit_pipeline_select)(&batch, GPGPU);
genX(emit_pipeline_select)(&batch, GPGPU, queue->device);
#if GFX_VER == 12
if (queue->device->info->has_aux_map) {