From 6664fc41622f8ca89c7eddf1bec178fb2d5f2d95 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 13 Dec 2023 11:45:31 +0200 Subject: [PATCH] anv: wait for CS write completion before executing secondary Got a error state on DG2 with a jump to secondary. The secondary is empty and padded with MI_NOOPs to workaround the CS prefetching. According to the error state, the return jump address from the secondary to the primary is 0x0. The ACTHD register value is 0x10, so it seems that the command streamer indeed jumped to 0x0 and hanged on a few dwords after that. The return address should have been set edited by a previous MI_STORE_DATA_IMM instruction. So it appears it did not complete in time for the command stream to catch it. On Gfx12+ this can happend if we do not set ForceWriteCompletionCheck. This change also takes the opportunity to remove the padding MI_NOOPs at the end of secondaries on Gfx12+ by using disabling the prefetching just before jumping into secondaries and reenabling it at the beginning of each secondary. Signed-off-by: Lionel Landwerlin Cc: mesa-stable Reviewed-by: Ivan Briano Part-of: (cherry picked from commit 6a92af158dc132eee449c175bdee66d92c68d191) --- .pick_status.json | 2 +- src/intel/vulkan/anv_batch_chain.c | 36 +++--------------- src/intel/vulkan/anv_genX.h | 6 +++ src/intel/vulkan/genX_cmd_buffer.c | 60 ++++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 32 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index d6598bc506d..384d551bcfc 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1254,7 +1254,7 @@ "description": "anv: wait for CS write completion before executing secondary", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": "" diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index eb39ae6e85b..433cf00e5ef 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -1016,27 +1016,9 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) const uint32_t length = cmd_buffer->batch.next - cmd_buffer->batch.start; if (cmd_buffer->device->physical->use_call_secondary) { cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN; - /* If the secondary command buffer begins & ends in the same BO and - * its length is less than the length of CS prefetch, add some NOOPs - * instructions so the last MI_BATCH_BUFFER_START is outside the CS - * prefetch. - */ - if (cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) { - const enum intel_engine_class engine_class = cmd_buffer->queue_family->engine_class; - /* Careful to have everything in signed integer. */ - int32_t prefetch_len = devinfo->engine_class_prefetch[engine_class]; - int32_t batch_len = cmd_buffer->batch.next - cmd_buffer->batch.start; - - for (int32_t i = 0; i < (prefetch_len - batch_len); i += 4) - anv_batch_emit(&cmd_buffer->batch, GFX9_MI_NOOP, noop); - } void *jump_addr = - anv_batch_emitn(&cmd_buffer->batch, - GFX9_MI_BATCH_BUFFER_START_length, - GFX9_MI_BATCH_BUFFER_START, - .AddressSpaceIndicator = ASI_PPGTT, - .SecondLevelBatchBuffer = Firstlevelbatch) + + anv_genX(devinfo, batch_emit_return)(&cmd_buffer->batch) + (GFX9_MI_BATCH_BUFFER_START_BatchBufferStartAddress_start / 8); cmd_buffer->return_addr = anv_batch_address(&cmd_buffer->batch, jump_addr); @@ -1156,18 +1138,10 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, struct anv_batch_bo *first_bbo = list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); - uint64_t *write_return_addr = - anv_batch_emitn(&primary->batch, - GFX9_MI_STORE_DATA_IMM_length + 1 /* QWord write */, - GFX9_MI_STORE_DATA_IMM, - .Address = secondary->return_addr) - + (GFX9_MI_STORE_DATA_IMM_ImmediateData_start / 8); - - emit_batch_buffer_start(&primary->batch, first_bbo->bo, 0); - - *write_return_addr = - anv_address_physical(anv_batch_address(&primary->batch, - primary->batch.next)); + anv_genX(primary->device->info, batch_emit_secondary_call)( + &primary->batch, + (struct anv_address) { .bo = first_bbo->bo }, + secondary->return_addr); anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); break; diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 7799f1780eb..f59c98472f8 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -173,6 +173,12 @@ void genX(cmd_buffer_dispatch_kernel)(struct anv_cmd_buffer *cmd_buffer, void genX(blorp_exec)(struct blorp_batch *batch, const struct blorp_params *params); +void genX(batch_emit_secondary_call)(struct anv_batch *batch, + struct anv_address secondary_addr, + struct anv_address secondary_return_addr); + +void *genX(batch_emit_return)(struct anv_batch *batch); + void genX(cmd_emit_timestamp)(struct anv_batch *batch, struct anv_device *device, struct anv_address addr, diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 076bc97737d..9142d066365 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3295,6 +3295,19 @@ genX(BeginCommandBuffer)( if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) cmd_buffer->usage_flags &= ~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; +#if GFX_VER >= 12 + /* Reenable prefetching at the beginning of secondary command buffers. We + * do this so that the return instruction edition is not prefetched before + * completion. + */ + if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { + anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) { + arb.PreParserDisableMask = true; + arb.PreParserDisable = false; + } + } +#endif + trace_intel_begin_cmd_buffer(&cmd_buffer->trace); if (anv_cmd_buffer_is_video_queue(cmd_buffer) || @@ -8130,6 +8143,53 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch, } } +void genX(batch_emit_secondary_call)(struct anv_batch *batch, + struct anv_address secondary_addr, + struct anv_address secondary_return_addr) +{ + /* Emit a write to change the return address of the secondary */ + uint64_t *write_return_addr = + anv_batch_emitn(batch, + GENX(MI_STORE_DATA_IMM_length) + 1 /* QWord write */, + GENX(MI_STORE_DATA_IMM), +#if GFX_VER >= 12 + .ForceWriteCompletionCheck = true, +#endif + .Address = secondary_return_addr) + + GENX(MI_STORE_DATA_IMM_ImmediateData_start) / 8; + +#if GFX_VER >= 12 + /* Disable prefetcher before jumping into a secondary */ + anv_batch_emit(batch, GENX(MI_ARB_CHECK), arb) { + arb.PreParserDisableMask = true; + arb.PreParserDisable = true; + } +#endif + + /* Jump into the secondary */ + anv_batch_emit(batch, GENX(MI_BATCH_BUFFER_START), bbs) { + bbs.AddressSpaceIndicator = ASI_PPGTT; + bbs.SecondLevelBatchBuffer = Firstlevelbatch; + bbs.BatchBufferStartAddress = secondary_addr; + } + + /* Replace the return address written by the MI_STORE_DATA_IMM above with + * the primary's current batch address (immediately after the jump). + */ + *write_return_addr = + anv_address_physical(anv_batch_current_address(batch)); +} + +void * +genX(batch_emit_return)(struct anv_batch *batch) +{ + return anv_batch_emitn(batch, + GENX(MI_BATCH_BUFFER_START_length), + GENX(MI_BATCH_BUFFER_START), + .AddressSpaceIndicator = ASI_PPGTT, + .SecondLevelBatchBuffer = Firstlevelbatch); +} + void genX(batch_emit_dummy_post_sync_op)(struct anv_batch *batch, struct anv_device *device,