diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 8553327626a..a1e8c855a4d 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -22,6 +22,7 @@ */ #include "anv_private.h" +#include "genxml/gen8_pack.h" static bool lookup_blorp_shader(struct blorp_batch *batch, @@ -371,6 +372,38 @@ copy_image(struct anv_cmd_buffer *cmd_buffer, } } +static struct anv_state +record_main_rcs_cmd_buffer_done(struct anv_cmd_buffer *cmd_buffer) +{ + const struct intel_device_info *info = cmd_buffer->device->info; + + if (cmd_buffer->companion_rcs_cmd_buffer == NULL) { + anv_create_companion_rcs_command_buffer(cmd_buffer); + /* Re-emit the aux table register in every command buffer. This way we're + * ensured that we have the table even if this command buffer doesn't + * initialize any images. + */ + if (cmd_buffer->device->info->has_aux_map) { + assert(cmd_buffer->companion_rcs_cmd_buffer != NULL); + anv_add_pending_pipe_bits(cmd_buffer->companion_rcs_cmd_buffer, + ANV_PIPE_AUX_TABLE_INVALIDATE_BIT, + "new cmd buffer with aux-tt"); + } + } + + assert(cmd_buffer->companion_rcs_cmd_buffer != NULL); + return anv_genX(info, cmd_buffer_begin_companion_rcs_syncpoint)(cmd_buffer); +} + +static void +end_main_rcs_cmd_buffer_done(struct anv_cmd_buffer *cmd_buffer, + struct anv_state syncpoint) +{ + const struct intel_device_info *info = cmd_buffer->device->info; + anv_genX(info, cmd_buffer_end_companion_rcs_syncpoint)(cmd_buffer, + syncpoint); +} + void anv_CmdCopyImage2( VkCommandBuffer commandBuffer, const VkCopyImageInfo2* pCopyImageInfo) @@ -379,6 +412,17 @@ void anv_CmdCopyImage2( ANV_FROM_HANDLE(anv_image, src_image, pCopyImageInfo->srcImage); ANV_FROM_HANDLE(anv_image, dst_image, pCopyImageInfo->dstImage); + struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer; + UNUSED struct anv_state rcs_done = ANV_STATE_NULL;; + + if (cmd_buffer->device->info->verx10 >= 125 && + dst_image->vk.samples > 1 && + (anv_cmd_buffer_is_blitter_queue(main_cmd_buffer) || + anv_cmd_buffer_is_compute_queue(main_cmd_buffer))) { + rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer); + cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer; + } + struct blorp_batch batch; anv_blorp_batch_init(cmd_buffer, &batch, 0); @@ -390,6 +434,9 @@ void anv_CmdCopyImage2( } anv_blorp_batch_finish(&batch); + + if (rcs_done.alloc_size) + end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done); } static enum isl_format @@ -974,6 +1021,17 @@ void anv_CmdClearColorImage( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_image, image, _image); + struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer; + UNUSED struct anv_state rcs_done = ANV_STATE_NULL; + + if (cmd_buffer->device->info->verx10 >= 125 && + image->vk.samples > 1 && + (anv_cmd_buffer_is_blitter_queue(main_cmd_buffer) || + anv_cmd_buffer_is_compute_queue(main_cmd_buffer))) { + rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer); + cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer; + } + struct blorp_batch batch; anv_blorp_batch_init(cmd_buffer, &batch, 0); @@ -1023,6 +1081,9 @@ void anv_CmdClearColorImage( } anv_blorp_batch_finish(&batch); + + if (rcs_done.alloc_size) + end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done); } void anv_CmdClearDepthStencilImage( diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 1f9971ca06a..43952a1f03c 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -246,3 +246,10 @@ genX(emit_breakpoint)(struct anv_batch *batch, if (INTEL_DEBUG(DEBUG_DRAW_BKP)) genX(batch_emit_breakpoint)(batch, device, emit_before_draw); } + +struct anv_state +genX(cmd_buffer_begin_companion_rcs_syncpoint)(struct anv_cmd_buffer *cmd_buffer); + +void +genX(cmd_buffer_end_companion_rcs_syncpoint)(struct anv_cmd_buffer *cmd_buffer, + struct anv_state syncpoint); diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 3782919cbd1..95d905758bc 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -7961,3 +7961,111 @@ genX(batch_emit_dummy_post_sync_op)(struct anv_batch *batch, } } + +struct anv_state +genX(cmd_buffer_begin_companion_rcs_syncpoint)( + struct anv_cmd_buffer *cmd_buffer) +{ +#if GFX_VERx10 >= 125 + const struct intel_device_info *info = cmd_buffer->device->info; + struct anv_state syncpoint = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 2 * sizeof(uint32_t), 4); + struct anv_address xcs_wait_addr = + anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool, + syncpoint); + struct anv_address rcs_wait_addr = anv_address_add(xcs_wait_addr, 4); + + /* Reset the sync point */ + memset(syncpoint.map, 0, 2 * sizeof(uint32_t)); + + struct mi_builder b; + + /* On CCS: + * - flush all caches & invalidate + * - unblock RCS + * - wait on RCS to complete + * - clear the value we waited on + */ + + if (anv_cmd_buffer_is_compute_queue(cmd_buffer)) { + anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_FLUSH_BITS | + ANV_PIPE_INVALIDATE_BITS | + ANV_PIPE_STALL_BITS, + "post main cmd buffer invalidate"); + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); + } else if (anv_cmd_buffer_is_blitter_queue(cmd_buffer)) { + anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), fd) { + fd.FlushCCS = true; /* Maybe handle Flush LLC */ + } + } + + { + mi_builder_init(&b, info, &cmd_buffer->batch); + mi_store(&b, mi_mem32(rcs_wait_addr), mi_imm(0x1)); + anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT), sem) { + sem.WaitMode = PollingMode; + sem.CompareOperation = COMPARE_SAD_EQUAL_SDD; + sem.SemaphoreDataDword = 0x1; + sem.SemaphoreAddress = xcs_wait_addr; + } + /* Make sure to reset the semaphore in case the command buffer is run + * multiple times. + */ + mi_store(&b, mi_mem32(xcs_wait_addr), mi_imm(0x0)); + } + + /* On RCS: + * - wait on CCS signal + * - clear the value we waited on + */ + { + mi_builder_init(&b, info, &cmd_buffer->companion_rcs_cmd_buffer->batch); + anv_batch_emit(&cmd_buffer->companion_rcs_cmd_buffer->batch, + GENX(MI_SEMAPHORE_WAIT), + sem) { + sem.WaitMode = PollingMode; + sem.CompareOperation = COMPARE_SAD_EQUAL_SDD; + sem.SemaphoreDataDword = 0x1; + sem.SemaphoreAddress = rcs_wait_addr; + } + /* Make sure to reset the semaphore in case the command buffer is run + * multiple times. + */ + mi_store(&b, mi_mem32(rcs_wait_addr), mi_imm(0x0)); + } + + return syncpoint; +#else + unreachable("Not implemented"); +#endif +} + +void +genX(cmd_buffer_end_companion_rcs_syncpoint)(struct anv_cmd_buffer *cmd_buffer, + struct anv_state syncpoint) +{ +#if GFX_VERx10 >= 125 + struct anv_address xcs_wait_addr = + anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool, + syncpoint); + + struct mi_builder b; + + /* On RCS: + * - flush all caches & invalidate + * - unblock the CCS + */ + anv_add_pending_pipe_bits(cmd_buffer->companion_rcs_cmd_buffer, + ANV_PIPE_FLUSH_BITS | + ANV_PIPE_INVALIDATE_BITS | + ANV_PIPE_STALL_BITS, + "post rcs flush"); + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer->companion_rcs_cmd_buffer); + + mi_builder_init(&b, cmd_buffer->device->info, + &cmd_buffer->companion_rcs_cmd_buffer->batch); + mi_store(&b, mi_mem32(xcs_wait_addr), mi_imm(0x1)); +#else + unreachable("Not implemented"); +#endif +}