mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-26 14:38:13 +02:00
iris: Emit a EXECUTE_INDIRECT_DISPATCH when available
On newer platforms (Arrowlake and above) we can issue a EXECUTE_INDIRECT_DISPATCH that allows us to: * Skip issuing mi load/store instructions for indirect parameters Signed-off-by: Rohan Garg <rohan.garg@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26421>
This commit is contained in:
parent
580728564e
commit
d161e3c2e2
1 changed files with 81 additions and 31 deletions
|
|
@ -8332,8 +8332,52 @@ iris_load_indirect_location(struct iris_context *ice,
|
||||||
mi_store(&b, mi_reg32(GPGPU_DISPATCHDIMZ), size_z);
|
mi_store(&b, mi_reg32(GPGPU_DISPATCHDIMZ), size_z);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool iris_emit_indirect_dispatch_supported(const struct intel_device_info *devinfo)
|
||||||
|
{
|
||||||
|
// TODO: Swizzling X and Y workgroup sizes is not supported in execute indirect dispatch
|
||||||
|
return devinfo->has_indirect_unroll;
|
||||||
|
}
|
||||||
|
|
||||||
#if GFX_VERx10 >= 125
|
#if GFX_VERx10 >= 125
|
||||||
|
|
||||||
|
static void iris_emit_execute_indirect_dispatch(struct iris_context *ice,
|
||||||
|
struct iris_batch *batch,
|
||||||
|
const struct pipe_grid_info *grid,
|
||||||
|
const struct GENX(INTERFACE_DESCRIPTOR_DATA) idd)
|
||||||
|
{
|
||||||
|
const struct iris_screen *screen = batch->screen;
|
||||||
|
const struct intel_device_info *devinfo = screen->devinfo;
|
||||||
|
struct iris_compiled_shader *shader =
|
||||||
|
ice->shaders.prog[MESA_SHADER_COMPUTE];
|
||||||
|
struct brw_stage_prog_data *prog_data = shader->prog_data;
|
||||||
|
struct brw_cs_prog_data *cs_prog_data = (void *) prog_data;
|
||||||
|
const struct brw_cs_dispatch_info dispatch =
|
||||||
|
brw_cs_get_dispatch_info(devinfo, cs_prog_data, grid->block);
|
||||||
|
struct iris_bo *indirect = iris_resource_bo(grid->indirect);
|
||||||
|
const int dispatch_size = dispatch.simd_size / 16;
|
||||||
|
|
||||||
|
struct GENX(COMPUTE_WALKER_BODY) body = {};
|
||||||
|
body.SIMDSize = dispatch_size;
|
||||||
|
body.MessageSIMD = dispatch_size;
|
||||||
|
body.LocalXMaximum = grid->block[0] - 1;
|
||||||
|
body.LocalYMaximum = grid->block[1] - 1;
|
||||||
|
body.LocalZMaximum = grid->block[2] - 1;
|
||||||
|
body.ExecutionMask = dispatch.right_mask;
|
||||||
|
body.PostSync.MOCS = iris_mocs(NULL, &screen->isl_dev, 0);
|
||||||
|
body.InterfaceDescriptor = idd;
|
||||||
|
|
||||||
|
struct iris_address indirect_bo = ro_bo(indirect, grid->indirect_offset);
|
||||||
|
iris_emit_cmd(batch, GENX(EXECUTE_INDIRECT_DISPATCH), ind) {
|
||||||
|
ind.PredicateEnable =
|
||||||
|
ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT;
|
||||||
|
ind.MaxCount = 1;
|
||||||
|
ind.COMPUTE_WALKER_BODY = body;
|
||||||
|
ind.ArgumentBufferStartAddress = indirect_bo;
|
||||||
|
ind.MOCS =
|
||||||
|
iris_mocs(indirect_bo.bo, &screen->isl_dev, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
iris_upload_compute_walker(struct iris_context *ice,
|
iris_upload_compute_walker(struct iris_context *ice,
|
||||||
struct iris_batch *batch,
|
struct iris_batch *batch,
|
||||||
|
|
@ -8363,6 +8407,25 @@ iris_upload_compute_walker(struct iris_context *ice,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct GENX(INTERFACE_DESCRIPTOR_DATA) idd = {};
|
||||||
|
idd.KernelStartPointer = KSP(shader);
|
||||||
|
idd.NumberofThreadsinGPGPUThreadGroup = dispatch.threads;
|
||||||
|
idd.SharedLocalMemorySize =
|
||||||
|
encode_slm_size(GFX_VER, prog_data->total_shared);
|
||||||
|
idd.SamplerStatePointer = shs->sampler_table.offset;
|
||||||
|
idd.SamplerCount = encode_sampler_count(shader),
|
||||||
|
idd.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE];
|
||||||
|
/* Typically set to 0 to avoid prefetching on every thread dispatch. */
|
||||||
|
idd.BindingTableEntryCount = devinfo->verx10 == 125 ?
|
||||||
|
0 : MIN2(shader->bt.size_bytes / 4, 31);
|
||||||
|
idd.PreferredSLMAllocationSize = preferred_slm_allocation_size(devinfo);
|
||||||
|
idd.NumberOfBarriers = cs_prog_data->uses_barrier;
|
||||||
|
|
||||||
|
iris_measure_snapshot(ice, batch, INTEL_SNAPSHOT_COMPUTE, NULL, NULL, NULL);
|
||||||
|
|
||||||
|
if (iris_emit_indirect_dispatch_supported(devinfo) && grid->indirect) {
|
||||||
|
iris_emit_execute_indirect_dispatch(ice, batch, grid, idd);
|
||||||
|
} else {
|
||||||
if (grid->indirect)
|
if (grid->indirect)
|
||||||
iris_load_indirect_location(ice, batch, grid);
|
iris_load_indirect_location(ice, batch, grid);
|
||||||
|
|
||||||
|
|
@ -8382,24 +8445,11 @@ iris_upload_compute_walker(struct iris_context *ice,
|
||||||
cw.ThreadGroupIDZDimension = grid->grid[2];
|
cw.ThreadGroupIDZDimension = grid->grid[2];
|
||||||
cw.ExecutionMask = dispatch.right_mask;
|
cw.ExecutionMask = dispatch.right_mask;
|
||||||
cw.PostSync.MOCS = iris_mocs(NULL, &screen->isl_dev, 0);
|
cw.PostSync.MOCS = iris_mocs(NULL, &screen->isl_dev, 0);
|
||||||
|
cw.InterfaceDescriptor = idd;
|
||||||
cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
|
|
||||||
.KernelStartPointer = KSP(shader),
|
|
||||||
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
|
||||||
.SharedLocalMemorySize =
|
|
||||||
encode_slm_size(GFX_VER, prog_data->total_shared),
|
|
||||||
.PreferredSLMAllocationSize = preferred_slm_allocation_size(devinfo),
|
|
||||||
.NumberOfBarriers = cs_prog_data->uses_barrier,
|
|
||||||
.SamplerStatePointer = shs->sampler_table.offset,
|
|
||||||
.SamplerCount = encode_sampler_count(shader),
|
|
||||||
.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE],
|
|
||||||
/* Typically set to 0 to avoid prefetching on every thread dispatch. */
|
|
||||||
.BindingTableEntryCount = devinfo->verx10 == 125 ?
|
|
||||||
0 : MIN2(shader->bt.size_bytes / 4, 31),
|
|
||||||
};
|
|
||||||
|
|
||||||
assert(brw_cs_push_const_total_size(cs_prog_data, dispatch.threads) == 0);
|
assert(brw_cs_push_const_total_size(cs_prog_data, dispatch.threads) == 0);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
trace_intel_end_compute(&batch->trace, grid->grid[0], grid->grid[1], grid->grid[2]);
|
trace_intel_end_compute(&batch->trace, grid->grid[0], grid->grid[1], grid->grid[2]);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue