diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index 72fc4cdea4f..17aaba3c612 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -660,9 +660,9 @@ iris_rewrite_compute_walker_pc(struct iris_batch *batch,
uint32_t dwords[GENX(COMPUTE_WALKER_length)];
_iris_pack_command(batch, GENX(COMPUTE_WALKER), dwords, cw) {
- cw.PostSync.Operation = WriteTimestamp;
- cw.PostSync.DestinationAddress = addr;
- cw.PostSync.MOCS = iris_mocs(NULL, &screen->isl_dev, 0);
+ cw.body.PostSync.Operation = WriteTimestamp;
+ cw.body.PostSync.DestinationAddress = addr;
+ cw.body.PostSync.MOCS = iris_mocs(NULL, &screen->isl_dev, 0);
}
for (uint32_t i = 0; i < GENX(COMPUTE_WALKER_length); i++)
@@ -9012,29 +9012,33 @@ iris_upload_compute_walker(struct iris_context *ice,
ice->utrace.last_compute_walker =
iris_emit_dwords(batch, GENX(COMPUTE_WALKER_length));
+
+ struct GENX(COMPUTE_WALKER_BODY) body = {
+ .SIMDSize = dispatch.simd_size / 16,
+ .MessageSIMD = dispatch.simd_size / 16,
+ .LocalXMaximum = grid->block[0] - 1,
+ .LocalYMaximum = grid->block[1] - 1,
+ .LocalZMaximum = grid->block[2] - 1,
+ .ThreadGroupIDXDimension = grid->grid[0],
+ .ThreadGroupIDYDimension = grid->grid[1],
+ .ThreadGroupIDZDimension = grid->grid[2],
+ .ExecutionMask = dispatch.right_mask,
+ .PostSync.MOCS = iris_mocs(NULL, &screen->isl_dev, 0),
+ .InterfaceDescriptor = idd,
+
+#if GFX_VERx10 >= 125
+ .GenerateLocalID = cs_data->generate_local_id != 0,
+ .EmitLocal = cs_data->generate_local_id,
+ .WalkOrder = cs_data->walk_order,
+ .TileLayout = cs_data->walk_order == INTEL_WALK_ORDER_YXZ ?
+ TileY32bpe : Linear,
+#endif
+ };
+
_iris_pack_command(batch, GENX(COMPUTE_WALKER),
ice->utrace.last_compute_walker, cw) {
cw.IndirectParameterEnable = grid->indirect;
- cw.SIMDSize = dispatch.simd_size / 16;
- cw.MessageSIMD = dispatch.simd_size / 16;
- cw.LocalXMaximum = grid->block[0] - 1;
- cw.LocalYMaximum = grid->block[1] - 1;
- cw.LocalZMaximum = grid->block[2] - 1;
- cw.ThreadGroupIDXDimension = grid->grid[0];
- cw.ThreadGroupIDYDimension = grid->grid[1];
- cw.ThreadGroupIDZDimension = grid->grid[2];
- cw.ExecutionMask = dispatch.right_mask;
- cw.PostSync.MOCS = iris_mocs(NULL, &screen->isl_dev, 0);
- cw.InterfaceDescriptor = idd;
-
-#if GFX_VERx10 >= 125
- cw.GenerateLocalID = cs_data->generate_local_id != 0;
- cw.EmitLocal = cs_data->generate_local_id;
- cw.WalkOrder = cs_data->walk_order;
- cw.TileLayout = cs_data->walk_order == INTEL_WALK_ORDER_YXZ ?
- TileY32bpe : Linear;
-#endif
-
+ cw.body = body;
assert(iris_cs_push_const_total_size(shader, dispatch.threads) == 0);
}
}
diff --git a/src/intel/blorp/blorp_genX_exec_brw.h b/src/intel/blorp/blorp_genX_exec_brw.h
index 97a549a57e3..dc5b633bd30 100644
--- a/src/intel/blorp/blorp_genX_exec_brw.h
+++ b/src/intel/blorp/blorp_genX_exec_brw.h
@@ -1653,43 +1653,42 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
assert(cs_prog_data->local_size[2] == 1);
#if GFX_VERx10 >= 125
- assert(cs_prog_data->push.per_thread.regs == 0);
- blorp_emit(batch, GENX(COMPUTE_WALKER), cw) {
- cw.SIMDSize = dispatch.simd_size / 16;
- cw.MessageSIMD = dispatch.simd_size / 16,
- cw.LocalXMaximum = cs_prog_data->local_size[0] - 1;
- cw.LocalYMaximum = cs_prog_data->local_size[1] - 1;
- cw.LocalZMaximum = cs_prog_data->local_size[2] - 1;
- cw.ThreadGroupIDStartingX = group_x0;
- cw.ThreadGroupIDStartingY = group_y0;
- cw.ThreadGroupIDStartingZ = group_z0;
- cw.ThreadGroupIDXDimension = group_x1;
- cw.ThreadGroupIDYDimension = group_y1;
- cw.ThreadGroupIDZDimension = group_z1;
- cw.ExecutionMask = 0xffffffff;
- cw.PostSync.MOCS = isl_mocs(batch->blorp->isl_dev, 0, false);
+ uint32_t surfaces_offset = blorp_setup_binding_table(batch, params);
- uint32_t surfaces_offset = blorp_setup_binding_table(batch, params);
+ uint32_t samplers_offset =
+ params->src.enabled ? blorp_emit_sampler_state(batch) : 0;
- uint32_t samplers_offset =
- params->src.enabled ? blorp_emit_sampler_state(batch) : 0;
+ uint32_t push_const_offset;
+ unsigned push_const_size;
+ blorp_get_compute_push_const(batch, params, dispatch.threads,
+ &push_const_offset, &push_const_size);
+ struct GENX(COMPUTE_WALKER_BODY) body = {
+ .SIMDSize = dispatch.simd_size / 16,
+ .MessageSIMD = dispatch.simd_size / 16,
+ .LocalXMaximum = cs_prog_data->local_size[0] - 1,
+ .LocalYMaximum = cs_prog_data->local_size[1] - 1,
+ .LocalZMaximum = cs_prog_data->local_size[2] - 1,
+ .ThreadGroupIDStartingX = group_x0,
+ .ThreadGroupIDStartingY = group_y0,
+ .ThreadGroupIDStartingZ = group_z0,
+ .ThreadGroupIDXDimension = group_x1,
+ .ThreadGroupIDYDimension = group_y1,
+ .ThreadGroupIDZDimension = group_z1,
+ .ExecutionMask = 0xffffffff,
+ .PostSync.MOCS = isl_mocs(batch->blorp->isl_dev, 0, false),
- uint32_t push_const_offset;
- unsigned push_const_size;
- blorp_get_compute_push_const(batch, params, dispatch.threads,
- &push_const_offset, &push_const_size);
- cw.IndirectDataStartAddress = push_const_offset;
- cw.IndirectDataLength = push_const_size;
+ .IndirectDataStartAddress = push_const_offset,
+ .IndirectDataLength = push_const_size,
#if GFX_VERx10 >= 125
- cw.GenerateLocalID = cs_prog_data->generate_local_id != 0;
- cw.EmitLocal = cs_prog_data->generate_local_id;
- cw.WalkOrder = cs_prog_data->walk_order;
- cw.TileLayout = cs_prog_data->walk_order == INTEL_WALK_ORDER_YXZ ?
- TileY32bpe : Linear;
+ .GenerateLocalID = cs_prog_data->generate_local_id != 0,
+ .EmitLocal = cs_prog_data->generate_local_id,
+ .WalkOrder = cs_prog_data->walk_order,
+ .TileLayout = cs_prog_data->walk_order == INTEL_WALK_ORDER_YXZ ?
+ TileY32bpe : Linear,
#endif
- cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
+ .InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
.KernelStartPointer = params->cs_prog_kernel,
.SamplerStatePointer = samplers_offset,
.SamplerCount = params->src.enabled ? 1 : 0,
@@ -1704,7 +1703,12 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
dispatch.group_size,
dispatch.simd_size),
.NumberOfBarriers = cs_prog_data->uses_barrier,
- };
+ },
+ };
+
+ assert(cs_prog_data->push.per_thread.regs == 0);
+ blorp_emit(batch, GENX(COMPUTE_WALKER), cw) {
+ cw.body = body;
}
#else
diff --git a/src/intel/executor/executor_genx.c b/src/intel/executor/executor_genx.c
index a5548dc1d25..c523ca91757 100644
--- a/src/intel/executor/executor_genx.c
+++ b/src/intel/executor/executor_genx.c
@@ -143,17 +143,23 @@ genX(emit_execute)(executor_context *ec, const executor_params *params)
emit_pipe_control(ec);
#if GFX_VERx10 >= 125
- executor_batch_emit(GENX(COMPUTE_WALKER), cw) {
+ struct GENX(COMPUTE_WALKER_BODY) body = {
#if GFX_VERx10 >= 200
- cw.SIMDSize = 1;
- cw.MessageSIMD = 1;
+ .SIMDSize = 1,
+ .MessageSIMD = 1,
#endif
- cw.ThreadGroupIDXDimension = 1;
- cw.ThreadGroupIDYDimension = 1;
- cw.ThreadGroupIDZDimension = 1;
- cw.ExecutionMask = 0xFFFFFFFF;
- cw.PostSync.MOCS = mocs;
- cw.InterfaceDescriptor = desc;
+ .ThreadGroupIDXDimension = 1,
+ .ThreadGroupIDYDimension = 1,
+ .ThreadGroupIDZDimension = 1,
+ .ExecutionMask = 0xFFFFFFFF,
+ .PostSync.MOCS = mocs,
+ .InterfaceDescriptor = desc,
+ };
+#endif
+
+#if GFX_VERx10 >= 125
+ executor_batch_emit(GENX(COMPUTE_WALKER), cw) {
+ cw.body = body;
};
#else
uint32_t *idd = executor_alloc_bytes_aligned(&ec->bo.extra, 8 * 4, 256);
diff --git a/src/intel/genxml/gen125.xml b/src/intel/genxml/gen125.xml
index 13e861ac492..c78cdef9ff7 100644
--- a/src/intel/genxml/gen125.xml
+++ b/src/intel/genxml/gen125.xml
@@ -1590,66 +1590,7 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
diff --git a/src/intel/genxml/gen20.xml b/src/intel/genxml/gen20.xml
index 18b6aa47bcd..c43a0bc292d 100644
--- a/src/intel/genxml/gen20.xml
+++ b/src/intel/genxml/gen20.xml
@@ -936,64 +936,7 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 7274837768a..c97350245a3 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -6197,11 +6197,13 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
uint32_t dwords[GENX(COMPUTE_WALKER_length)];
GENX(COMPUTE_WALKER_pack)(batch, dwords, &(struct GENX(COMPUTE_WALKER)) {
- .PostSync = (struct GENX(POSTSYNC_DATA)) {
- .Operation = WriteTimestamp,
- .DestinationAddress = addr,
- .MOCS = anv_mocs(device, NULL, 0),
- },
+ .body = {
+ .PostSync = (struct GENX(POSTSYNC_DATA)) {
+ .Operation = WriteTimestamp,
+ .DestinationAddress = addr,
+ .MOCS = anv_mocs(device, NULL, 0),
+ },
+ }
});
for (uint32_t i = 0; i < ARRAY_SIZE(dwords); i++) {
diff --git a/src/intel/vulkan/genX_cmd_compute.c b/src/intel/vulkan/genX_cmd_compute.c
index 132e2d56088..6db84bc23f6 100644
--- a/src/intel/vulkan/genX_cmd_compute.c
+++ b/src/intel/vulkan/genX_cmd_compute.c
@@ -437,6 +437,37 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
num_workgroup_data[2] = groupCountZ;
}
+ struct GENX(COMPUTE_WALKER_BODY) body = {
+ .SIMDSize = dispatch.simd_size / 16,
+ .MessageSIMD = dispatch.simd_size / 16,
+ .IndirectDataStartAddress = comp_state->base.push_constants_state.offset,
+ .IndirectDataLength = comp_state->base.push_constants_state.alloc_size,
+ .GenerateLocalID = prog_data->generate_local_id != 0,
+ .EmitLocal = prog_data->generate_local_id,
+ .WalkOrder = prog_data->walk_order,
+ .TileLayout = prog_data->walk_order == INTEL_WALK_ORDER_YXZ ?
+ TileY32bpe : Linear,
+ .LocalXMaximum = prog_data->local_size[0] - 1,
+ .LocalYMaximum = prog_data->local_size[1] - 1,
+ .LocalZMaximum = prog_data->local_size[2] - 1,
+ .ThreadGroupIDXDimension = groupCountX,
+ .ThreadGroupIDYDimension = groupCountY,
+ .ThreadGroupIDZDimension = groupCountZ,
+ .ExecutionMask = dispatch.right_mask,
+ .PostSync = {
+ .MOCS = anv_mocs(pipeline->base.device, NULL, 0),
+ },
+ .InterfaceDescriptor =
+ get_interface_descriptor_data(cmd_buffer, pipeline->cs,
+ prog_data, &dispatch),
+ .EmitInlineParameter = prog_data->uses_inline_data,
+ .InlineData = {
+ [ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 0] = num_workgroup_data[0],
+ [ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 1] = num_workgroup_data[1],
+ [ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 2] = num_workgroup_data[2],
+ }
+ };
+
cmd_buffer->state.last_compute_walker =
anv_batch_emitn(
&cmd_buffer->batch,
@@ -444,38 +475,11 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
GENX(COMPUTE_WALKER),
.IndirectParameterEnable = !anv_address_is_null(indirect_addr),
.PredicateEnable = predicate,
- .SIMDSize = dispatch.simd_size / 16,
- .MessageSIMD = dispatch.simd_size / 16,
- .IndirectDataStartAddress = comp_state->base.push_constants_state.offset,
- .IndirectDataLength = comp_state->base.push_constants_state.alloc_size,
+ .body = body,
#if GFX_VERx10 == 125
.SystolicModeEnable = prog_data->uses_systolic,
#endif
- .GenerateLocalID = prog_data->generate_local_id != 0,
- .EmitLocal = prog_data->generate_local_id,
- .WalkOrder = prog_data->walk_order,
- .TileLayout = prog_data->walk_order == INTEL_WALK_ORDER_YXZ ?
- TileY32bpe : Linear,
- .LocalXMaximum = prog_data->local_size[0] - 1,
- .LocalYMaximum = prog_data->local_size[1] - 1,
- .LocalZMaximum = prog_data->local_size[2] - 1,
- .ThreadGroupIDXDimension = groupCountX,
- .ThreadGroupIDYDimension = groupCountY,
- .ThreadGroupIDZDimension = groupCountZ,
- .ExecutionMask = dispatch.right_mask,
- .PostSync = {
- .MOCS = anv_mocs(pipeline->base.device, NULL, 0),
- },
- .InterfaceDescriptor =
- get_interface_descriptor_data(cmd_buffer, pipeline->cs,
- prog_data, &dispatch),
- .EmitInlineParameter = prog_data->uses_inline_data,
- .InlineData = {
- [ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 0] = num_workgroup_data[0],
- [ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 1] = num_workgroup_data[1],
- [ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 2] = num_workgroup_data[2],
- });
-
+ );
}
#else /* #if GFX_VERx10 >= 125 */
@@ -724,33 +728,39 @@ genX(cmd_buffer_dispatch_kernel)(struct anv_cmd_buffer *cmd_buffer,
struct intel_cs_dispatch_info dispatch =
brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
- anv_batch_emit(&cmd_buffer->batch, GENX(COMPUTE_WALKER), cw) {
- cw.PredicateEnable = false;
- cw.SIMDSize = dispatch.simd_size / 16;
- cw.MessageSIMD = dispatch.simd_size / 16;
- cw.IndirectDataStartAddress = indirect_data.offset;
- cw.IndirectDataLength = indirect_data.alloc_size;
- cw.LocalXMaximum = cs_prog_data->local_size[0] - 1;
- cw.LocalYMaximum = cs_prog_data->local_size[1] - 1;
- cw.LocalZMaximum = cs_prog_data->local_size[2] - 1;
- cw.ExecutionMask = dispatch.right_mask;
- cw.PostSync.MOCS = cmd_buffer->device->isl_dev.mocs.internal;
-
- if (global_size != NULL) {
- cw.ThreadGroupIDXDimension = global_size[0];
- cw.ThreadGroupIDYDimension = global_size[1];
- cw.ThreadGroupIDZDimension = global_size[2];
- } else {
- cw.IndirectParameterEnable = true;
- }
-
- cw.InterfaceDescriptor =
+ struct GENX(COMPUTE_WALKER_BODY) body = {
+ .SIMDSize = dispatch.simd_size / 16,
+ .MessageSIMD = dispatch.simd_size / 16,
+ .IndirectDataStartAddress = indirect_data.offset,
+ .IndirectDataLength = indirect_data.alloc_size,
+ .LocalXMaximum = cs_prog_data->local_size[0] - 1,
+ .LocalYMaximum = cs_prog_data->local_size[1] - 1,
+ .LocalZMaximum = cs_prog_data->local_size[2] - 1,
+ .ExecutionMask = dispatch.right_mask,
+ .PostSync.MOCS = cmd_buffer->device->isl_dev.mocs.internal,
+ .InterfaceDescriptor =
get_interface_descriptor_data(cmd_buffer,
kernel->bin,
cs_prog_data,
- &dispatch);
+ &dispatch),
+ };
+
+ if (global_size != NULL) {
+ body.ThreadGroupIDXDimension = global_size[0];
+ body.ThreadGroupIDYDimension = global_size[1];
+ body.ThreadGroupIDZDimension = global_size[2];
}
+ cmd_buffer->state.last_compute_walker =
+ anv_batch_emitn(
+ &cmd_buffer->batch,
+ GENX(COMPUTE_WALKER_length),
+ GENX(COMPUTE_WALKER),
+ .IndirectParameterEnable = global_size == NULL,
+ .PredicateEnable = false,
+ .body = body,
+ );
+
/* We just blew away the compute pipeline state */
cmd_buffer->state.compute.pipeline_dirty = true;
}
@@ -1132,26 +1142,39 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
struct intel_cs_dispatch_info dispatch =
brw_cs_get_dispatch_info(device->info, cs_prog_data, NULL);
- anv_batch_emit(&cmd_buffer->batch, GENX(COMPUTE_WALKER), cw) {
- cw.IndirectParameterEnable = params->is_launch_size_indirect;
- cw.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
- cw.SIMDSize = dispatch.simd_size / 16;
- cw.MessageSIMD = dispatch.simd_size / 16;
- cw.LocalXMaximum = (1 << local_size_log2[0]) - 1;
- cw.LocalYMaximum = (1 << local_size_log2[1]) - 1;
- cw.LocalZMaximum = (1 << local_size_log2[2]) - 1;
- cw.ThreadGroupIDXDimension = global_size[0];
- cw.ThreadGroupIDYDimension = global_size[1];
- cw.ThreadGroupIDZDimension = global_size[2];
- cw.ExecutionMask = 0xff;
- cw.EmitInlineParameter = true;
- cw.PostSync.MOCS = anv_mocs(pipeline->base.device, NULL, 0);
+ const gl_shader_stage s = MESA_SHADER_RAYGEN;
+ struct anv_state *surfaces = &cmd_buffer->state.binding_tables[s];
+ struct anv_state *samplers = &cmd_buffer->state.samplers[s];
+ struct brw_rt_raygen_trampoline_params trampoline_params = {
+ .rt_disp_globals_addr = anv_address_physical(rtdg_addr),
+ .raygen_bsr_addr =
+ params->is_sbt_indirect ?
+ (params->indirect_sbts_addr +
+ offsetof(VkTraceRaysIndirectCommand2KHR,
+ raygenShaderRecordAddress)) :
+ params->raygen_sbt->deviceAddress,
+ .is_indirect = params->is_sbt_indirect,
+ .local_group_size_log2 = {
+ local_size_log2[0],
+ local_size_log2[1],
+ local_size_log2[2],
+ },
+ };
- const gl_shader_stage s = MESA_SHADER_RAYGEN;
- struct anv_device *device = cmd_buffer->device;
- struct anv_state *surfaces = &cmd_buffer->state.binding_tables[s];
- struct anv_state *samplers = &cmd_buffer->state.samplers[s];
- cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
+ struct GENX(COMPUTE_WALKER_BODY) body = {
+ .SIMDSize = dispatch.simd_size / 16,
+ .MessageSIMD = dispatch.simd_size / 16,
+ .LocalXMaximum = (1 << local_size_log2[0]) - 1,
+ .LocalYMaximum = (1 << local_size_log2[1]) - 1,
+ .LocalZMaximum = (1 << local_size_log2[2]) - 1,
+ .ThreadGroupIDXDimension = global_size[0],
+ .ThreadGroupIDYDimension = global_size[1],
+ .ThreadGroupIDZDimension = global_size[2],
+ .ExecutionMask = 0xff,
+ .EmitInlineParameter = true,
+ .PostSync.MOCS = anv_mocs(pipeline->base.device, NULL, 0),
+
+ .InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
.KernelStartPointer = device->rt_trampoline->kernel.offset,
.SamplerStatePointer = samplers->offset,
/* i965: DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4), */
@@ -1162,26 +1185,21 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
#if INTEL_NEEDS_WA_14017794102
.ThreadPreemption = false,
#endif
- };
+ },
+ };
- struct brw_rt_raygen_trampoline_params trampoline_params = {
- .rt_disp_globals_addr = anv_address_physical(rtdg_addr),
- .raygen_bsr_addr =
- params->is_sbt_indirect ?
- (params->indirect_sbts_addr +
- offsetof(VkTraceRaysIndirectCommand2KHR,
- raygenShaderRecordAddress)) :
- params->raygen_sbt->deviceAddress,
- .is_indirect = params->is_sbt_indirect,
- .local_group_size_log2 = {
- local_size_log2[0],
- local_size_log2[1],
- local_size_log2[2],
- },
- };
- STATIC_ASSERT(sizeof(trampoline_params) == 32);
- memcpy(cw.InlineData, &trampoline_params, sizeof(trampoline_params));
- }
+ STATIC_ASSERT(sizeof(trampoline_params) == 32);
+ memcpy(body.InlineData, &trampoline_params, sizeof(trampoline_params));
+
+ cmd_buffer->state.last_compute_walker =
+ anv_batch_emitn(
+ &cmd_buffer->batch,
+ GENX(COMPUTE_WALKER_length),
+ GENX(COMPUTE_WALKER),
+ .IndirectParameterEnable = params->is_launch_size_indirect,
+ .PredicateEnable = cmd_buffer->state.conditional_render_enabled,
+ .body = body,
+ );
trace_intel_end_rays(&cmd_buffer->trace,
params->launch_size[0],
diff --git a/src/intel/vulkan/genX_simple_shader.c b/src/intel/vulkan/genX_simple_shader.c
index 3e44b23a806..062db96c4f0 100644
--- a/src/intel/vulkan/genX_simple_shader.c
+++ b/src/intel/vulkan/genX_simple_shader.c
@@ -565,30 +565,30 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state,
brw_cs_get_dispatch_info(devinfo, prog_data, NULL);
#if GFX_VERx10 >= 125
- anv_batch_emit(batch, GENX(COMPUTE_WALKER), cw) {
- cw.SIMDSize = dispatch.simd_size / 16;
- cw.MessageSIMD = dispatch.simd_size / 16,
- cw.IndirectDataStartAddress = push_state.offset;
- cw.IndirectDataLength = push_state.alloc_size;
- cw.LocalXMaximum = prog_data->local_size[0] - 1;
- cw.LocalYMaximum = prog_data->local_size[1] - 1;
- cw.LocalZMaximum = prog_data->local_size[2] - 1;
- cw.ThreadGroupIDXDimension = DIV_ROUND_UP(num_threads,
- dispatch.simd_size);
- cw.ThreadGroupIDYDimension = 1;
- cw.ThreadGroupIDZDimension = 1;
- cw.ExecutionMask = dispatch.right_mask;
- cw.PostSync.MOCS = anv_mocs(device, NULL, 0);
+ struct GENX(COMPUTE_WALKER_BODY) body = {
+ .SIMDSize = dispatch.simd_size / 16,
+ .MessageSIMD = dispatch.simd_size / 16,
+ .IndirectDataStartAddress = push_state.offset,
+ .IndirectDataLength = push_state.alloc_size,
+ .LocalXMaximum = prog_data->local_size[0] - 1,
+ .LocalYMaximum = prog_data->local_size[1] - 1,
+ .LocalZMaximum = prog_data->local_size[2] - 1,
+ .ThreadGroupIDXDimension = DIV_ROUND_UP(num_threads,
+ dispatch.simd_size),
+ .ThreadGroupIDYDimension = 1,
+ .ThreadGroupIDZDimension = 1,
+ .ExecutionMask = dispatch.right_mask,
+ .PostSync.MOCS = anv_mocs(device, NULL, 0),
#if GFX_VERx10 >= 125
- cw.GenerateLocalID = prog_data->generate_local_id != 0;
- cw.EmitLocal = prog_data->generate_local_id;
- cw.WalkOrder = prog_data->walk_order;
- cw.TileLayout = prog_data->walk_order == INTEL_WALK_ORDER_YXZ ?
- TileY32bpe : Linear;
+ .GenerateLocalID = prog_data->generate_local_id != 0,
+ .EmitLocal = prog_data->generate_local_id,
+ .WalkOrder = prog_data->walk_order,
+ .TileLayout = prog_data->walk_order == INTEL_WALK_ORDER_YXZ ?
+ TileY32bpe : Linear,
#endif
- cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
+ .InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
.KernelStartPointer = state->kernel->kernel.offset +
brw_cs_prog_data_prog_offset(prog_data,
dispatch.simd_size),
@@ -599,7 +599,11 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state,
.SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER,
prog_data->base.total_shared),
.NumberOfBarriers = prog_data->uses_barrier,
- };
+ },
+ };
+
+ anv_batch_emit(batch, GENX(COMPUTE_WALKER), cw) {
+ cw.body = body;
}
#else
const uint32_t vfe_curbe_allocation =