diff --git a/.pick_status.json b/.pick_status.json index d6598bc506d..384d551bcfc 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1254,7 +1254,7 @@ "description": "anv: wait for CS write completion before executing secondary", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": "" diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index eb39ae6e85b..433cf00e5ef 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -1016,27 +1016,9 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) const uint32_t length = cmd_buffer->batch.next - cmd_buffer->batch.start; if (cmd_buffer->device->physical->use_call_secondary) { cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN; - /* If the secondary command buffer begins & ends in the same BO and - * its length is less than the length of CS prefetch, add some NOOPs - * instructions so the last MI_BATCH_BUFFER_START is outside the CS - * prefetch. - */ - if (cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) { - const enum intel_engine_class engine_class = cmd_buffer->queue_family->engine_class; - /* Careful to have everything in signed integer. */ - int32_t prefetch_len = devinfo->engine_class_prefetch[engine_class]; - int32_t batch_len = cmd_buffer->batch.next - cmd_buffer->batch.start; - - for (int32_t i = 0; i < (prefetch_len - batch_len); i += 4) - anv_batch_emit(&cmd_buffer->batch, GFX9_MI_NOOP, noop); - } void *jump_addr = - anv_batch_emitn(&cmd_buffer->batch, - GFX9_MI_BATCH_BUFFER_START_length, - GFX9_MI_BATCH_BUFFER_START, - .AddressSpaceIndicator = ASI_PPGTT, - .SecondLevelBatchBuffer = Firstlevelbatch) + + anv_genX(devinfo, batch_emit_return)(&cmd_buffer->batch) + (GFX9_MI_BATCH_BUFFER_START_BatchBufferStartAddress_start / 8); cmd_buffer->return_addr = anv_batch_address(&cmd_buffer->batch, jump_addr); @@ -1156,18 +1138,10 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, struct anv_batch_bo *first_bbo = list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); - uint64_t *write_return_addr = - anv_batch_emitn(&primary->batch, - GFX9_MI_STORE_DATA_IMM_length + 1 /* QWord write */, - GFX9_MI_STORE_DATA_IMM, - .Address = secondary->return_addr) - + (GFX9_MI_STORE_DATA_IMM_ImmediateData_start / 8); - - emit_batch_buffer_start(&primary->batch, first_bbo->bo, 0); - - *write_return_addr = - anv_address_physical(anv_batch_address(&primary->batch, - primary->batch.next)); + anv_genX(primary->device->info, batch_emit_secondary_call)( + &primary->batch, + (struct anv_address) { .bo = first_bbo->bo }, + secondary->return_addr); anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); break; diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 7799f1780eb..f59c98472f8 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -173,6 +173,12 @@ void genX(cmd_buffer_dispatch_kernel)(struct anv_cmd_buffer *cmd_buffer, void genX(blorp_exec)(struct blorp_batch *batch, const struct blorp_params *params); +void genX(batch_emit_secondary_call)(struct anv_batch *batch, + struct anv_address secondary_addr, + struct anv_address secondary_return_addr); + +void *genX(batch_emit_return)(struct anv_batch *batch); + void genX(cmd_emit_timestamp)(struct anv_batch *batch, struct anv_device *device, struct anv_address addr, diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 076bc97737d..9142d066365 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3295,6 +3295,19 @@ genX(BeginCommandBuffer)( if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) cmd_buffer->usage_flags &= ~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; +#if GFX_VER >= 12 + /* Reenable prefetching at the beginning of secondary command buffers. We + * do this so that the return instruction edition is not prefetched before + * completion. + */ + if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { + anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) { + arb.PreParserDisableMask = true; + arb.PreParserDisable = false; + } + } +#endif + trace_intel_begin_cmd_buffer(&cmd_buffer->trace); if (anv_cmd_buffer_is_video_queue(cmd_buffer) || @@ -8130,6 +8143,53 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch, } } +void genX(batch_emit_secondary_call)(struct anv_batch *batch, + struct anv_address secondary_addr, + struct anv_address secondary_return_addr) +{ + /* Emit a write to change the return address of the secondary */ + uint64_t *write_return_addr = + anv_batch_emitn(batch, + GENX(MI_STORE_DATA_IMM_length) + 1 /* QWord write */, + GENX(MI_STORE_DATA_IMM), +#if GFX_VER >= 12 + .ForceWriteCompletionCheck = true, +#endif + .Address = secondary_return_addr) + + GENX(MI_STORE_DATA_IMM_ImmediateData_start) / 8; + +#if GFX_VER >= 12 + /* Disable prefetcher before jumping into a secondary */ + anv_batch_emit(batch, GENX(MI_ARB_CHECK), arb) { + arb.PreParserDisableMask = true; + arb.PreParserDisable = true; + } +#endif + + /* Jump into the secondary */ + anv_batch_emit(batch, GENX(MI_BATCH_BUFFER_START), bbs) { + bbs.AddressSpaceIndicator = ASI_PPGTT; + bbs.SecondLevelBatchBuffer = Firstlevelbatch; + bbs.BatchBufferStartAddress = secondary_addr; + } + + /* Replace the return address written by the MI_STORE_DATA_IMM above with + * the primary's current batch address (immediately after the jump). + */ + *write_return_addr = + anv_address_physical(anv_batch_current_address(batch)); +} + +void * +genX(batch_emit_return)(struct anv_batch *batch) +{ + return anv_batch_emitn(batch, + GENX(MI_BATCH_BUFFER_START_length), + GENX(MI_BATCH_BUFFER_START), + .AddressSpaceIndicator = ASI_PPGTT, + .SecondLevelBatchBuffer = Firstlevelbatch); +} + void genX(batch_emit_dummy_post_sync_op)(struct anv_batch *batch, struct anv_device *device,