mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-26 16:48:13 +02:00
iris: Emit EXECUTE_INDIRECT_DRAW when available
On newer platforms (Arrowlake and above) we can issue a
EXECUTE_INDIRECT_DRAW that allows us to:
* Skip issuing mi load/store instructions for indirect parameters
* Skip doing the indirect draw unroll on the CPU side when the
appropriate stride is passed
Signed-off-by: Rohan Garg <rohan.garg@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26178>
This commit is contained in:
parent
e5b7e16f3b
commit
d1109f67bb
4 changed files with 231 additions and 53 deletions
|
|
@ -1152,6 +1152,30 @@ int iris_get_driver_query_group_info(struct pipe_screen *pscreen,
|
|||
void gfx9_toggle_preemption(struct iris_context *ice,
|
||||
struct iris_batch *batch,
|
||||
const struct pipe_draw_info *draw);
|
||||
static const bool
|
||||
iris_execute_indirect_draw_supported(const struct iris_context *ice,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_info *draw)
|
||||
{
|
||||
const struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
|
||||
const struct brw_vs_prog_data *vs_prog_data = (void *)
|
||||
ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data;
|
||||
const bool is_multiview = draw->view_mask != 0;
|
||||
const size_t struct_size = draw->index_size ?
|
||||
sizeof(uint32_t) * 5 :
|
||||
sizeof(uint32_t) * 4;
|
||||
const bool aligned_stride =
|
||||
indirect && (indirect->stride == 0 || indirect->stride == struct_size);
|
||||
|
||||
return (screen->devinfo->has_indirect_unroll &&
|
||||
aligned_stride &&
|
||||
(indirect &&
|
||||
!indirect->count_from_stream_output) &&
|
||||
!is_multiview &&
|
||||
!(vs_prog_data->uses_firstvertex ||
|
||||
vs_prog_data->uses_baseinstance ||
|
||||
vs_prog_data->uses_drawid));
|
||||
}
|
||||
|
||||
#ifdef genX
|
||||
# include "iris_genx_protos.h"
|
||||
|
|
|
|||
|
|
@ -181,59 +181,6 @@ iris_update_draw_parameters(struct iris_context *ice,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
iris_indirect_draw_vbo(struct iris_context *ice,
|
||||
const struct pipe_draw_info *dinfo,
|
||||
unsigned drawid_offset,
|
||||
const struct pipe_draw_indirect_info *dindirect,
|
||||
const struct pipe_draw_start_count_bias *draw)
|
||||
{
|
||||
struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
|
||||
struct pipe_draw_info info = *dinfo;
|
||||
struct pipe_draw_indirect_info indirect = *dindirect;
|
||||
|
||||
iris_emit_buffer_barrier_for(batch, iris_resource_bo(indirect.buffer),
|
||||
IRIS_DOMAIN_VF_READ);
|
||||
|
||||
if (indirect.indirect_draw_count) {
|
||||
struct iris_bo *draw_count_bo =
|
||||
iris_resource_bo(indirect.indirect_draw_count);
|
||||
iris_emit_buffer_barrier_for(batch, draw_count_bo,
|
||||
IRIS_DOMAIN_OTHER_READ);
|
||||
|
||||
if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) {
|
||||
/* Upload MI_PREDICATE_RESULT to GPR15.*/
|
||||
batch->screen->vtbl.load_register_reg64(batch, CS_GPR(15), MI_PREDICATE_RESULT);
|
||||
}
|
||||
}
|
||||
|
||||
const uint64_t orig_dirty = ice->state.dirty;
|
||||
const uint64_t orig_stage_dirty = ice->state.stage_dirty;
|
||||
|
||||
for (int i = 0; i < indirect.draw_count; i++) {
|
||||
iris_batch_maybe_flush(batch, 1500);
|
||||
|
||||
iris_update_draw_parameters(ice, &info, drawid_offset + i, &indirect, draw);
|
||||
|
||||
batch->screen->vtbl.upload_render_state(ice, batch, &info, drawid_offset + i, &indirect, draw);
|
||||
|
||||
ice->state.dirty &= ~IRIS_ALL_DIRTY_FOR_RENDER;
|
||||
ice->state.stage_dirty &= ~IRIS_ALL_STAGE_DIRTY_FOR_RENDER;
|
||||
|
||||
indirect.offset += indirect.stride;
|
||||
}
|
||||
|
||||
if (indirect.indirect_draw_count &&
|
||||
ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) {
|
||||
/* Restore MI_PREDICATE_RESULT. */
|
||||
batch->screen->vtbl.load_register_reg64(batch, MI_PREDICATE_RESULT, CS_GPR(15));
|
||||
}
|
||||
|
||||
/* Put this back for post-draw resolves, we'll clear it again after. */
|
||||
ice->state.dirty = orig_dirty;
|
||||
ice->state.stage_dirty = orig_stage_dirty;
|
||||
}
|
||||
|
||||
static void
|
||||
iris_simple_draw_vbo(struct iris_context *ice,
|
||||
const struct pipe_draw_info *draw,
|
||||
|
|
@ -250,6 +197,64 @@ iris_simple_draw_vbo(struct iris_context *ice,
|
|||
batch->screen->vtbl.upload_render_state(ice, batch, draw, drawid_offset, indirect, sc);
|
||||
}
|
||||
|
||||
static void
|
||||
iris_indirect_draw_vbo(struct iris_context *ice,
|
||||
const struct pipe_draw_info *dinfo,
|
||||
unsigned drawid_offset,
|
||||
const struct pipe_draw_indirect_info *dindirect,
|
||||
const struct pipe_draw_start_count_bias *draw)
|
||||
{
|
||||
struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
|
||||
struct pipe_draw_info info = *dinfo;
|
||||
struct pipe_draw_indirect_info indirect = *dindirect;
|
||||
const bool use_predicate =
|
||||
ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT;
|
||||
|
||||
const uint64_t orig_dirty = ice->state.dirty;
|
||||
const uint64_t orig_stage_dirty = ice->state.stage_dirty;
|
||||
|
||||
if (iris_execute_indirect_draw_supported(ice, &indirect, &info)) {
|
||||
iris_batch_maybe_flush(batch, 1500);
|
||||
|
||||
iris_update_draw_parameters(ice, &info, drawid_offset, &indirect, draw);
|
||||
|
||||
batch->screen->vtbl.upload_indirect_render_state(ice, &info, &indirect, draw);
|
||||
} else {
|
||||
iris_emit_buffer_barrier_for(batch, iris_resource_bo(indirect.buffer),
|
||||
IRIS_DOMAIN_VF_READ);
|
||||
|
||||
if (indirect.indirect_draw_count) {
|
||||
struct iris_bo *draw_count_bo =
|
||||
iris_resource_bo(indirect.indirect_draw_count);
|
||||
iris_emit_buffer_barrier_for(batch, draw_count_bo,
|
||||
IRIS_DOMAIN_OTHER_READ);
|
||||
}
|
||||
|
||||
if (use_predicate) {
|
||||
/* Upload MI_PREDICATE_RESULT to GPR15.*/
|
||||
batch->screen->vtbl.load_register_reg64(batch, CS_GPR(15), MI_PREDICATE_RESULT);
|
||||
}
|
||||
|
||||
for (int i = 0; i < indirect.draw_count; i++) {
|
||||
iris_simple_draw_vbo(ice, &info, drawid_offset + i, &indirect, draw);
|
||||
|
||||
ice->state.dirty &= ~IRIS_ALL_DIRTY_FOR_RENDER;
|
||||
ice->state.stage_dirty &= ~IRIS_ALL_STAGE_DIRTY_FOR_RENDER;
|
||||
|
||||
indirect.offset += indirect.stride;
|
||||
}
|
||||
|
||||
if (use_predicate) {
|
||||
/* Restore MI_PREDICATE_RESULT. */
|
||||
batch->screen->vtbl.load_register_reg64(batch, MI_PREDICATE_RESULT, CS_GPR(15));
|
||||
}
|
||||
}
|
||||
|
||||
/* Put this back for post-draw resolves, we'll clear it again after. */
|
||||
ice->state.dirty = orig_dirty;
|
||||
ice->state.stage_dirty = orig_stage_dirty;
|
||||
}
|
||||
|
||||
/**
|
||||
* The pipe->draw_vbo() driver hook. Performs a draw on the GPU.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -69,6 +69,10 @@ struct iris_vtable {
|
|||
unsigned drawid_offset,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *sc);
|
||||
void (*upload_indirect_render_state)(struct iris_context *ice,
|
||||
const struct pipe_draw_info *draw,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *sc);
|
||||
void (*update_binder_address)(struct iris_batch *batch,
|
||||
struct iris_binder *binder);
|
||||
void (*upload_compute_state)(struct iris_context *ice,
|
||||
|
|
|
|||
|
|
@ -8317,6 +8317,150 @@ iris_upload_render_state(struct iris_context *ice,
|
|||
trace_intel_end_draw(&batch->trace, count);
|
||||
}
|
||||
|
||||
static void
|
||||
iris_upload_indirect_render_state(struct iris_context *ice,
|
||||
const struct pipe_draw_info *draw,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *sc)
|
||||
{
|
||||
#if GFX_VERx10 >= 125
|
||||
assert(indirect);
|
||||
|
||||
struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
|
||||
UNUSED struct iris_screen *screen = batch->screen;
|
||||
UNUSED const struct intel_device_info *devinfo = screen->devinfo;
|
||||
const bool use_predicate =
|
||||
ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT;
|
||||
|
||||
trace_intel_begin_draw(&batch->trace);
|
||||
|
||||
if (ice->state.dirty & IRIS_DIRTY_VERTEX_BUFFER_FLUSHES)
|
||||
flush_vbos(ice, batch);
|
||||
|
||||
iris_batch_sync_region_start(batch);
|
||||
|
||||
/* Always pin the binder. If we're emitting new binding table pointers,
|
||||
* we need it. If not, we're probably inheriting old tables via the
|
||||
* context, and need it anyway. Since true zero-bindings cases are
|
||||
* practically non-existent, just pin it and avoid last_res tracking.
|
||||
*/
|
||||
iris_use_pinned_bo(batch, ice->state.binder.bo, false,
|
||||
IRIS_DOMAIN_NONE);
|
||||
|
||||
if (!batch->contains_draw) {
|
||||
/* Re-emit constants when starting a new batch buffer in order to
|
||||
* work around push constant corruption on context switch.
|
||||
*
|
||||
* XXX - Provide hardware spec quotation when available.
|
||||
*/
|
||||
ice->state.stage_dirty |= (IRIS_STAGE_DIRTY_CONSTANTS_VS |
|
||||
IRIS_STAGE_DIRTY_CONSTANTS_TCS |
|
||||
IRIS_STAGE_DIRTY_CONSTANTS_TES |
|
||||
IRIS_STAGE_DIRTY_CONSTANTS_GS |
|
||||
IRIS_STAGE_DIRTY_CONSTANTS_FS);
|
||||
batch->contains_draw = true;
|
||||
}
|
||||
|
||||
if (!batch->contains_draw_with_next_seqno) {
|
||||
iris_restore_render_saved_bos(ice, batch, draw);
|
||||
batch->contains_draw_with_next_seqno = true;
|
||||
}
|
||||
|
||||
/* Wa_1306463417 - Send HS state for every primitive on gfx11.
|
||||
* Wa_16011107343 (same for gfx12)
|
||||
* We implement this by setting TCS dirty on each draw.
|
||||
*/
|
||||
if ((INTEL_NEEDS_WA_1306463417 || INTEL_NEEDS_WA_16011107343) &&
|
||||
ice->shaders.prog[MESA_SHADER_TESS_CTRL]) {
|
||||
ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TCS;
|
||||
}
|
||||
|
||||
iris_upload_dirty_render_state(ice, batch, draw);
|
||||
|
||||
if (draw->index_size > 0) {
|
||||
unsigned offset;
|
||||
|
||||
if (draw->has_user_indices) {
|
||||
unsigned start_offset = draw->index_size * sc->start;
|
||||
|
||||
u_upload_data(ice->ctx.const_uploader, start_offset,
|
||||
sc->count * draw->index_size, 4,
|
||||
(char*)draw->index.user + start_offset,
|
||||
&offset, &ice->state.last_res.index_buffer);
|
||||
offset -= start_offset;
|
||||
} else {
|
||||
struct iris_resource *res = (void *) draw->index.resource;
|
||||
res->bind_history |= PIPE_BIND_INDEX_BUFFER;
|
||||
|
||||
pipe_resource_reference(&ice->state.last_res.index_buffer,
|
||||
draw->index.resource);
|
||||
offset = 0;
|
||||
|
||||
iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_VF_READ);
|
||||
}
|
||||
|
||||
struct iris_genx_state *genx = ice->state.genx;
|
||||
struct iris_bo *bo = iris_resource_bo(ice->state.last_res.index_buffer);
|
||||
|
||||
uint32_t ib_packet[GENX(3DSTATE_INDEX_BUFFER_length)];
|
||||
iris_pack_command(GENX(3DSTATE_INDEX_BUFFER), ib_packet, ib) {
|
||||
ib.IndexFormat = draw->index_size >> 1;
|
||||
ib.MOCS = iris_mocs(bo, &batch->screen->isl_dev,
|
||||
ISL_SURF_USAGE_INDEX_BUFFER_BIT);
|
||||
ib.BufferSize = bo->size - offset;
|
||||
ib.BufferStartingAddress = ro_bo(NULL, bo->address + offset);
|
||||
ib.L3BypassDisable = true;
|
||||
}
|
||||
|
||||
if (memcmp(genx->last_index_buffer, ib_packet, sizeof(ib_packet)) != 0) {
|
||||
memcpy(genx->last_index_buffer, ib_packet, sizeof(ib_packet));
|
||||
iris_batch_emit(batch, ib_packet, sizeof(ib_packet));
|
||||
iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_VF_READ);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
iris_measure_snapshot(ice, batch, INTEL_SNAPSHOT_DRAW, draw, indirect, sc);
|
||||
|
||||
genX(maybe_emit_breakpoint)(batch, true);
|
||||
|
||||
iris_emit_cmd(batch, GENX(EXECUTE_INDIRECT_DRAW), ind) {
|
||||
ind.ArgumentFormat =
|
||||
draw->index_size > 0 ? DRAWINDEXED : DRAW;
|
||||
ind.PredicateEnable = use_predicate;
|
||||
ind.TBIMREnabled = ice->state.use_tbimr;
|
||||
ind.MaxCount = indirect->draw_count;
|
||||
|
||||
if (indirect->buffer) {
|
||||
struct iris_bo *bo = iris_resource_bo(indirect->buffer);
|
||||
ind.ArgumentBufferStartAddress = ro_bo(bo, indirect->offset);
|
||||
ind.MOCS = iris_mocs(bo, &screen->isl_dev, 0);
|
||||
} else {
|
||||
ind.MOCS = iris_mocs(NULL, &screen->isl_dev, 0);
|
||||
}
|
||||
|
||||
if (indirect->indirect_draw_count) {
|
||||
struct iris_bo *draw_count_bo =
|
||||
iris_resource_bo(indirect->indirect_draw_count);
|
||||
ind.CountBufferIndirectEnable = true;
|
||||
ind.CountBufferAddress =
|
||||
ro_bo(draw_count_bo, indirect->indirect_draw_count_offset);
|
||||
}
|
||||
}
|
||||
|
||||
genX(emit_3dprimitive_was)(batch, indirect, ice->state.prim_mode, sc->count);
|
||||
genX(maybe_emit_breakpoint)(batch, false);
|
||||
|
||||
iris_batch_sync_region_end(batch);
|
||||
|
||||
uint32_t count = (sc) ? sc->count : 0;
|
||||
count *= draw->instance_count ? draw->instance_count : 1;
|
||||
trace_intel_end_draw(&batch->trace, count);
|
||||
#else
|
||||
unreachable("Unsupported path");
|
||||
#endif /* GFX_VERx10 >= 125 */
|
||||
}
|
||||
|
||||
static void
|
||||
iris_load_indirect_location(struct iris_context *ice,
|
||||
struct iris_batch *batch,
|
||||
|
|
@ -9728,6 +9872,7 @@ genX(init_screen_state)(struct iris_screen *screen)
|
|||
screen->vtbl.init_render_context = iris_init_render_context;
|
||||
screen->vtbl.init_compute_context = iris_init_compute_context;
|
||||
screen->vtbl.upload_render_state = iris_upload_render_state;
|
||||
screen->vtbl.upload_indirect_render_state = iris_upload_indirect_render_state;
|
||||
screen->vtbl.update_binder_address = iris_update_binder_address;
|
||||
screen->vtbl.upload_compute_state = iris_upload_compute_state;
|
||||
screen->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue