mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 22:00:13 +01:00
anv: actually use the COMPUTE_WALKER_BODY prepacked field
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36711>
This commit is contained in:
parent
9c8571794a
commit
5a2fb0da32
4 changed files with 104 additions and 127 deletions
|
|
@ -54,6 +54,8 @@ genX_bits_included_symbols = [
|
||||||
'3DSTATE_SO_BUFFER::Stream Offset',
|
'3DSTATE_SO_BUFFER::Stream Offset',
|
||||||
'3DSTATE_CPSIZE_CONTROL_BUFFER::Surface Base Address',
|
'3DSTATE_CPSIZE_CONTROL_BUFFER::Surface Base Address',
|
||||||
'3DSTATE_CPSIZE_CONTROL_BUFFER::Surface Pitch',
|
'3DSTATE_CPSIZE_CONTROL_BUFFER::Surface Pitch',
|
||||||
|
'COMPUTE_WALKER::body',
|
||||||
|
'EXECUTE_INDIRECT_DISPATCH::body',
|
||||||
# structures
|
# structures
|
||||||
'RENDER_SURFACE_STATE::Surface Base Address',
|
'RENDER_SURFACE_STATE::Surface Base Address',
|
||||||
'RENDER_SURFACE_STATE::Surface Pitch',
|
'RENDER_SURFACE_STATE::Surface Pitch',
|
||||||
|
|
|
||||||
|
|
@ -2725,6 +2725,31 @@ _anv_combine_address(struct anv_batch *batch, void *location,
|
||||||
__dst; \
|
__dst; \
|
||||||
})
|
})
|
||||||
|
|
||||||
|
/* Emit an instruction with fields set in the arguments of this macro and
|
||||||
|
* combine it with a prepacked instructions.
|
||||||
|
*/
|
||||||
|
#define anv_batch_emitn_merge_at(batch, n, offset, to_merge, cmd, ...) ({ \
|
||||||
|
void *__dst = anv_batch_emit_dwords(batch, n); \
|
||||||
|
if (__dst) { \
|
||||||
|
struct cmd __template = { \
|
||||||
|
__anv_cmd_header(cmd), \
|
||||||
|
.DWordLength = n - __anv_cmd_length_bias(cmd), \
|
||||||
|
__VA_ARGS__ \
|
||||||
|
}; \
|
||||||
|
uint32_t __partial[__anv_cmd_length(cmd)]; \
|
||||||
|
__anv_cmd_pack(cmd)(batch, __partial, &__template); \
|
||||||
|
for (uint32_t i = 0; i < (offset); i++) \
|
||||||
|
((uint32_t *)__dst)[i] = __partial[i]; \
|
||||||
|
for (uint32_t i = (offset); i < n; i++) { \
|
||||||
|
((uint32_t *)__dst)[i] = \
|
||||||
|
(to_merge)[i - (offset)] | __partial[i]; \
|
||||||
|
} \
|
||||||
|
VG(VALGRIND_CHECK_MEM_IS_DEFINED(__dst, \
|
||||||
|
__anv_cmd_length(cmd) * 4)); \
|
||||||
|
} \
|
||||||
|
__dst; \
|
||||||
|
})
|
||||||
|
|
||||||
#define anv_batch_emit_merge(batch, cmd, pipeline, state, name) \
|
#define anv_batch_emit_merge(batch, cmd, pipeline, state, name) \
|
||||||
for (struct cmd name = { 0 }, \
|
for (struct cmd name = { 0 }, \
|
||||||
*_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \
|
*_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \
|
||||||
|
|
@ -5260,7 +5285,7 @@ struct anv_compute_pipeline {
|
||||||
uint32_t gpgpu_walker[15];
|
uint32_t gpgpu_walker[15];
|
||||||
} gfx9;
|
} gfx9;
|
||||||
struct {
|
struct {
|
||||||
uint32_t compute_walker[40];
|
uint32_t compute_walker_body[39];
|
||||||
} gfx125;
|
} gfx125;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -354,33 +354,11 @@ compute_store_indirect_params(struct anv_cmd_buffer *cmd_buffer,
|
||||||
#if GFX_VERx10 >= 125
|
#if GFX_VERx10 >= 125
|
||||||
|
|
||||||
static inline struct GENX(INTERFACE_DESCRIPTOR_DATA)
|
static inline struct GENX(INTERFACE_DESCRIPTOR_DATA)
|
||||||
get_interface_descriptor_data(struct anv_cmd_buffer *cmd_buffer,
|
get_interface_descriptor_data_tables(struct anv_cmd_buffer *cmd_buffer)
|
||||||
const struct anv_shader_bin *shader,
|
|
||||||
const struct brw_cs_prog_data *prog_data,
|
|
||||||
const struct intel_cs_dispatch_info *dispatch)
|
|
||||||
{
|
{
|
||||||
const struct intel_device_info *devinfo = cmd_buffer->device->info;
|
|
||||||
|
|
||||||
return (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
|
return (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
|
||||||
.SamplerCount = DIV_ROUND_UP(CLAMP(shader->bind_map.sampler_count, 0, 16), 4),
|
|
||||||
.KernelStartPointer = shader->kernel.offset,
|
|
||||||
.SamplerStatePointer = cmd_buffer->state.samplers[MESA_SHADER_COMPUTE].offset,
|
.SamplerStatePointer = cmd_buffer->state.samplers[MESA_SHADER_COMPUTE].offset,
|
||||||
.BindingTablePointer = cmd_buffer->state.binding_tables[MESA_SHADER_COMPUTE].offset,
|
.BindingTablePointer = cmd_buffer->state.binding_tables[MESA_SHADER_COMPUTE].offset,
|
||||||
/* Typically set to 0 to avoid prefetching on every thread dispatch. */
|
|
||||||
.BindingTableEntryCount = devinfo->verx10 == 125 ?
|
|
||||||
0 : MIN2(shader->bind_map.surface_count, 30),
|
|
||||||
.NumberofThreadsinGPGPUThreadGroup = dispatch->threads,
|
|
||||||
.ThreadGroupDispatchSize = intel_compute_threads_group_dispatch_size(dispatch->threads),
|
|
||||||
.SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER, prog_data->base.total_shared),
|
|
||||||
.PreferredSLMAllocationSize =
|
|
||||||
intel_compute_preferred_slm_calc_encode_size(devinfo,
|
|
||||||
prog_data->base.total_shared,
|
|
||||||
dispatch->group_size,
|
|
||||||
dispatch->simd_size),
|
|
||||||
.NumberOfBarriers = prog_data->uses_barrier,
|
|
||||||
#if GFX_VER >= 30
|
|
||||||
.RegistersPerThread = ptl_register_blocks(prog_data->base.grf_used),
|
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -447,7 +425,6 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
||||||
|
|
||||||
const struct intel_cs_dispatch_info dispatch =
|
const struct intel_cs_dispatch_info dispatch =
|
||||||
brw_cs_get_dispatch_info(devinfo, prog_data, NULL);
|
brw_cs_get_dispatch_info(devinfo, prog_data, NULL);
|
||||||
const int dispatch_size = dispatch.simd_size / 16;
|
|
||||||
|
|
||||||
uint64_t indirect_addr64 = anv_address_physical(indirect_addr);
|
uint64_t indirect_addr64 = anv_address_physical(indirect_addr);
|
||||||
|
|
||||||
|
|
@ -457,51 +434,29 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
||||||
|
|
||||||
compute_update_async_threads_limit(cmd_buffer, prog_data, &dispatch);
|
compute_update_async_threads_limit(cmd_buffer, prog_data, &dispatch);
|
||||||
|
|
||||||
struct GENX(COMPUTE_WALKER_BODY) body = {
|
|
||||||
.SIMDSize = dispatch_size,
|
|
||||||
/* HSD 14016252163: Use of Morton walk order (and batching using a batch
|
|
||||||
* size of 4) is expected to increase sampler cache hit rates by
|
|
||||||
* increasing sample address locality within a subslice.
|
|
||||||
*/
|
|
||||||
#if GFX_VER >= 30
|
|
||||||
.DispatchWalkOrder = prog_data->uses_sampler ?
|
|
||||||
MortonWalk :
|
|
||||||
LinearWalk,
|
|
||||||
.ThreadGroupBatchSize = prog_data->uses_sampler ? TG_BATCH_4 :
|
|
||||||
TG_BATCH_1,
|
|
||||||
#endif
|
|
||||||
.MessageSIMD = dispatch_size,
|
|
||||||
.GenerateLocalID = prog_data->generate_local_id != 0,
|
|
||||||
.EmitLocal = prog_data->generate_local_id,
|
|
||||||
.WalkOrder = prog_data->walk_order,
|
|
||||||
.TileLayout = prog_data->walk_order == INTEL_WALK_ORDER_YXZ ?
|
|
||||||
TileY32bpe : Linear,
|
|
||||||
.LocalXMaximum = prog_data->local_size[0] - 1,
|
|
||||||
.LocalYMaximum = prog_data->local_size[1] - 1,
|
|
||||||
.LocalZMaximum = prog_data->local_size[2] - 1,
|
|
||||||
.ExecutionMask = dispatch.right_mask,
|
|
||||||
.PostSync.MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
|
|
||||||
.InterfaceDescriptor =
|
|
||||||
get_interface_descriptor_data(cmd_buffer, comp_state->shader,
|
|
||||||
prog_data, &dispatch),
|
|
||||||
.EmitInlineParameter = prog_data->uses_inline_data,
|
|
||||||
.InlineData = {
|
|
||||||
[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 0] = push_addr64 & 0xffffffff,
|
|
||||||
[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 1] = push_addr64 >> 32,
|
|
||||||
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 0] = UINT32_MAX,
|
|
||||||
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 1] = indirect_addr64 & 0xffffffff,
|
|
||||||
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 2] = indirect_addr64 >> 32,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
cmd_buffer->state.last_indirect_dispatch =
|
cmd_buffer->state.last_indirect_dispatch =
|
||||||
anv_batch_emitn(
|
anv_batch_emitn_merge_at(
|
||||||
&cmd_buffer->batch,
|
&cmd_buffer->batch,
|
||||||
GENX(EXECUTE_INDIRECT_DISPATCH_length),
|
GENX(EXECUTE_INDIRECT_DISPATCH_length),
|
||||||
|
GENX(EXECUTE_INDIRECT_DISPATCH_body_start) / 32,
|
||||||
|
anv_pipeline_to_compute(comp_state->base.pipeline)->gfx125.compute_walker_body,
|
||||||
GENX(EXECUTE_INDIRECT_DISPATCH),
|
GENX(EXECUTE_INDIRECT_DISPATCH),
|
||||||
.PredicateEnable = predicate,
|
.PredicateEnable = predicate,
|
||||||
.MaxCount = 1,
|
.MaxCount = 1,
|
||||||
.body = body,
|
.body = {
|
||||||
|
.InterfaceDescriptor = get_interface_descriptor_data_tables(cmd_buffer),
|
||||||
|
.ExecutionMask = dispatch.right_mask,
|
||||||
|
.InlineData = {
|
||||||
|
[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 0] = push_addr64 & 0xffffffff,
|
||||||
|
[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 1] = push_addr64 >> 32,
|
||||||
|
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 0] = UINT32_MAX,
|
||||||
|
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 1] = indirect_addr64 & 0xffffffff,
|
||||||
|
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 2] = indirect_addr64 >> 32,
|
||||||
|
},
|
||||||
|
.PostSync = {
|
||||||
|
.MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
|
||||||
|
},
|
||||||
|
},
|
||||||
.ArgumentBufferStartAddress = indirect_addr,
|
.ArgumentBufferStartAddress = indirect_addr,
|
||||||
.MOCS = anv_mocs(cmd_buffer->device,
|
.MOCS = anv_mocs(cmd_buffer->device,
|
||||||
indirect_addr.bo, 0),
|
indirect_addr.bo, 0),
|
||||||
|
|
@ -538,27 +493,11 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
||||||
comp_state->base.push_constants_state));
|
comp_state->base.push_constants_state));
|
||||||
|
|
||||||
struct GENX(COMPUTE_WALKER_BODY) body = {
|
struct GENX(COMPUTE_WALKER_BODY) body = {
|
||||||
.SIMDSize = dispatch.simd_size / 16,
|
.InterfaceDescriptor = get_interface_descriptor_data_tables(cmd_buffer),
|
||||||
.MessageSIMD = dispatch.simd_size / 16,
|
|
||||||
.GenerateLocalID = prog_data->generate_local_id != 0,
|
|
||||||
.EmitLocal = prog_data->generate_local_id,
|
|
||||||
.WalkOrder = prog_data->walk_order,
|
|
||||||
.TileLayout = prog_data->walk_order == INTEL_WALK_ORDER_YXZ ?
|
|
||||||
TileY32bpe : Linear,
|
|
||||||
.LocalXMaximum = prog_data->local_size[0] - 1,
|
|
||||||
.LocalYMaximum = prog_data->local_size[1] - 1,
|
|
||||||
.LocalZMaximum = prog_data->local_size[2] - 1,
|
|
||||||
.ThreadGroupIDXDimension = groupCountX,
|
.ThreadGroupIDXDimension = groupCountX,
|
||||||
.ThreadGroupIDYDimension = groupCountY,
|
.ThreadGroupIDYDimension = groupCountY,
|
||||||
.ThreadGroupIDZDimension = groupCountZ,
|
.ThreadGroupIDZDimension = groupCountZ,
|
||||||
.ExecutionMask = dispatch.right_mask,
|
.ExecutionMask = dispatch.right_mask,
|
||||||
.PostSync = {
|
|
||||||
.MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
|
|
||||||
},
|
|
||||||
.InterfaceDescriptor =
|
|
||||||
get_interface_descriptor_data(cmd_buffer, comp_state->shader,
|
|
||||||
prog_data, &dispatch),
|
|
||||||
.EmitInlineParameter = prog_data->uses_inline_data,
|
|
||||||
.InlineData = {
|
.InlineData = {
|
||||||
[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 0] = push_addr64 & 0xffffffff,
|
[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 0] = push_addr64 & 0xffffffff,
|
||||||
[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 1] = push_addr64 >> 32,
|
[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 1] = push_addr64 >> 32,
|
||||||
|
|
@ -566,17 +505,17 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
||||||
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 1] = num_workgroup_data[1],
|
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 1] = num_workgroup_data[1],
|
||||||
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 2] = num_workgroup_data[2],
|
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 2] = num_workgroup_data[2],
|
||||||
},
|
},
|
||||||
#if GFX_VER >= 30
|
.PostSync = {
|
||||||
/* HSD 14016252163 */
|
.MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
|
||||||
.DispatchWalkOrder = prog_data->uses_sampler ? MortonWalk : LinearWalk,
|
},
|
||||||
.ThreadGroupBatchSize = prog_data->uses_sampler ? TG_BATCH_4 : TG_BATCH_1,
|
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
|
|
||||||
cmd_buffer->state.last_compute_walker =
|
cmd_buffer->state.last_compute_walker =
|
||||||
anv_batch_emitn(
|
anv_batch_emitn_merge_at(
|
||||||
&cmd_buffer->batch,
|
&cmd_buffer->batch,
|
||||||
GENX(COMPUTE_WALKER_length),
|
GENX(COMPUTE_WALKER_length),
|
||||||
|
GENX(COMPUTE_WALKER_body_start) / 32,
|
||||||
|
anv_pipeline_to_compute(comp_state->base.pipeline)->gfx125.compute_walker_body,
|
||||||
GENX(COMPUTE_WALKER),
|
GENX(COMPUTE_WALKER),
|
||||||
.IndirectParameterEnable = !anv_address_is_null(indirect_addr),
|
.IndirectParameterEnable = !anv_address_is_null(indirect_addr),
|
||||||
.PredicateEnable = predicate,
|
.PredicateEnable = predicate,
|
||||||
|
|
|
||||||
|
|
@ -1542,49 +1542,57 @@ genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
|
||||||
brw_cs_get_dispatch_info(devinfo, prog_data, NULL);
|
brw_cs_get_dispatch_info(devinfo, prog_data, NULL);
|
||||||
const struct anv_shader_bin *shader = pipeline->cs;
|
const struct anv_shader_bin *shader = pipeline->cs;
|
||||||
|
|
||||||
struct GENX(COMPUTE_WALKER) walker = {
|
struct GENX(COMPUTE_WALKER_BODY) walker = {
|
||||||
GENX(COMPUTE_WALKER_header),
|
/* HSD 14016252163: Use of Morton walk order (and batching using a batch
|
||||||
#if GFX_VERx10 == 125
|
* size of 4) is expected to increase sampler cache hit rates by
|
||||||
.SystolicModeEnable = prog_data->uses_systolic,
|
* increasing sample address locality within a subslice.
|
||||||
|
*/
|
||||||
|
#if GFX_VER >= 30
|
||||||
|
.DispatchWalkOrder = prog_data->uses_sampler ?
|
||||||
|
MortonWalk :
|
||||||
|
LinearWalk,
|
||||||
|
.ThreadGroupBatchSize = prog_data->uses_sampler ? TG_BATCH_4 :
|
||||||
|
TG_BATCH_1,
|
||||||
#endif
|
#endif
|
||||||
.body = {
|
.SIMDSize = dispatch.simd_size / 16,
|
||||||
.SIMDSize = dispatch.simd_size / 16,
|
.MessageSIMD = dispatch.simd_size / 16,
|
||||||
.MessageSIMD = dispatch.simd_size / 16,
|
.GenerateLocalID = prog_data->generate_local_id != 0,
|
||||||
.GenerateLocalID = prog_data->generate_local_id != 0,
|
.EmitLocal = prog_data->generate_local_id,
|
||||||
.EmitLocal = prog_data->generate_local_id,
|
.WalkOrder = prog_data->walk_order,
|
||||||
.WalkOrder = prog_data->walk_order,
|
.TileLayout = prog_data->walk_order == INTEL_WALK_ORDER_YXZ ?
|
||||||
.TileLayout = prog_data->walk_order == INTEL_WALK_ORDER_YXZ ?
|
TileY32bpe : Linear,
|
||||||
TileY32bpe : Linear,
|
.LocalXMaximum = prog_data->local_size[0] - 1,
|
||||||
.LocalXMaximum = prog_data->local_size[0] - 1,
|
.LocalYMaximum = prog_data->local_size[1] - 1,
|
||||||
.LocalYMaximum = prog_data->local_size[1] - 1,
|
.LocalZMaximum = prog_data->local_size[2] - 1,
|
||||||
.LocalZMaximum = prog_data->local_size[2] - 1,
|
.PostSync = {
|
||||||
.ExecutionMask = dispatch.right_mask,
|
.MOCS = anv_mocs(pipeline->base.device, NULL, 0),
|
||||||
.PostSync = {
|
|
||||||
.MOCS = anv_mocs(pipeline->base.device, NULL, 0),
|
|
||||||
},
|
|
||||||
.InterfaceDescriptor = {
|
|
||||||
.KernelStartPointer = shader->kernel.offset,
|
|
||||||
/* Typically set to 0 to avoid prefetching on every thread dispatch. */
|
|
||||||
.BindingTableEntryCount = devinfo->verx10 == 125 ?
|
|
||||||
0 : 1 + MIN2(shader->bind_map.surface_count, 30),
|
|
||||||
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
|
||||||
.ThreadGroupDispatchSize =
|
|
||||||
intel_compute_threads_group_dispatch_size(dispatch.threads),
|
|
||||||
.SharedLocalMemorySize =
|
|
||||||
intel_compute_slm_encode_size(GFX_VER, prog_data->base.total_shared),
|
|
||||||
.PreferredSLMAllocationSize =
|
|
||||||
intel_compute_preferred_slm_calc_encode_size(devinfo,
|
|
||||||
prog_data->base.total_shared,
|
|
||||||
dispatch.group_size,
|
|
||||||
dispatch.simd_size),
|
|
||||||
.NumberOfBarriers = prog_data->uses_barrier,
|
|
||||||
},
|
|
||||||
.EmitInlineParameter = prog_data->uses_inline_push_addr,
|
|
||||||
},
|
},
|
||||||
|
.EmitInlineParameter = prog_data->uses_inline_push_addr,
|
||||||
|
.InterfaceDescriptor = {
|
||||||
|
.KernelStartPointer = shader->kernel.offset,
|
||||||
|
.SamplerCount = DIV_ROUND_UP(CLAMP(shader->bind_map.sampler_count, 0, 16), 4),
|
||||||
|
/* Typically set to 0 to avoid prefetching on every thread dispatch. */
|
||||||
|
.BindingTableEntryCount = devinfo->verx10 == 125 ?
|
||||||
|
0 : 1 + MIN2(shader->bind_map.surface_count, 30),
|
||||||
|
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
||||||
|
.SharedLocalMemorySize = intel_compute_slm_encode_size(
|
||||||
|
GFX_VER, prog_data->base.total_shared),
|
||||||
|
.PreferredSLMAllocationSize = intel_compute_preferred_slm_calc_encode_size(
|
||||||
|
devinfo, prog_data->base.total_shared,
|
||||||
|
dispatch.group_size, dispatch.simd_size),
|
||||||
|
.NumberOfBarriers = prog_data->uses_barrier,
|
||||||
|
#if GFX_VER >= 30
|
||||||
|
.RegistersPerThread = ptl_register_blocks(prog_data->base.grf_used),
|
||||||
|
#endif
|
||||||
|
},
|
||||||
|
.EmitInlineParameter = prog_data->uses_inline_push_addr,
|
||||||
};
|
};
|
||||||
|
|
||||||
assert(ARRAY_SIZE(pipeline->gfx125.compute_walker) >= GENX(COMPUTE_WALKER_length));
|
assert(ARRAY_SIZE(pipeline->gfx125.compute_walker_body) >=
|
||||||
GENX(COMPUTE_WALKER_pack)(NULL, pipeline->gfx125.compute_walker, &walker);
|
GENX(COMPUTE_WALKER_BODY_length));
|
||||||
|
GENX(COMPUTE_WALKER_BODY_pack)(NULL,
|
||||||
|
pipeline->gfx125.compute_walker_body,
|
||||||
|
&walker);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else /* #if GFX_VERx10 >= 125 */
|
#else /* #if GFX_VERx10 >= 125 */
|
||||||
|
|
@ -1660,7 +1668,10 @@ genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
|
||||||
*/
|
*/
|
||||||
.ThreadPreemptionDisable = true,
|
.ThreadPreemptionDisable = true,
|
||||||
#endif
|
#endif
|
||||||
|
#if GFX_VERx10 >= 125
|
||||||
|
.ThreadGroupDispatchSize =
|
||||||
|
intel_compute_threads_group_dispatch_size(dispatch->threads),
|
||||||
|
#endif
|
||||||
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
||||||
};
|
};
|
||||||
GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
|
GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue