diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 49f019c0c35..414893d2343 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -112,9 +112,16 @@ static void anv_blorp_batch_init(struct anv_cmd_buffer *cmd_buffer, struct blorp_batch *batch, enum blorp_batch_flags flags) { - if (!(cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT)) { - assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_COMPUTE_BIT); + VkQueueFlags queue_flags = cmd_buffer->queue_family->queueFlags; + + if (queue_flags & VK_QUEUE_GRAPHICS_BIT) { + /* blorp runs on render engine by default */ + } else if (queue_flags & VK_QUEUE_COMPUTE_BIT) { flags |= BLORP_BATCH_USE_COMPUTE; + } else if (queue_flags & VK_QUEUE_TRANSFER_BIT) { + flags |= BLORP_BATCH_USE_BLITTER; + } else { + unreachable("unknown queue family"); } blorp_batch_init(&cmd_buffer->device->blorp, batch, cmd_buffer, flags); diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index a06dd75f160..2c87338b9ff 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1145,6 +1145,12 @@ anv_physical_device_init_queue_families(struct anv_physical_device *pdevice) enum intel_engine_class compute_class = c_count < 1 ? INTEL_ENGINE_CLASS_RENDER : INTEL_ENGINE_CLASS_COMPUTE; + int blit_count = 0; + if (debug_get_bool_option("INTEL_COPY_CLASS", false)) { + blit_count = intel_engines_count(pdevice->engine_info, + INTEL_ENGINE_CLASS_COPY); + } + anv_override_engine_counts(&gc_count, &g_count, &c_count, &v_count); if (gc_count > 0) { @@ -1192,6 +1198,13 @@ anv_physical_device_init_queue_families(struct anv_physical_device *pdevice) .engine_class = INTEL_ENGINE_CLASS_VIDEO, }; } + if (blit_count > 0) { + pdevice->queue.families[family_count++] = (struct anv_queue_family) { + .queueFlags = VK_QUEUE_TRANSFER_BIT, + .queueCount = blit_count, + .engine_class = INTEL_ENGINE_CLASS_COPY, + }; + } /* Increase count below when other families are added as a reminder to * increase the ANV_MAX_QUEUE_FAMILIES value. diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index 498069403fb..db5e7530654 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -414,6 +414,18 @@ blorp_exec_on_compute(struct blorp_batch *batch, cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; } +static void +blorp_exec_on_blitter(struct blorp_batch *batch, + const struct blorp_params *params) +{ + assert(batch->flags & BLORP_BATCH_USE_BLITTER); + + struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; + assert(cmd_buffer->queue_family->queueFlags == VK_QUEUE_TRANSFER_BIT); + + blorp_exec(batch, params); +} + void genX(blorp_exec)(struct blorp_batch *batch, const struct blorp_params *params) @@ -430,7 +442,9 @@ genX(blorp_exec)(struct blorp_batch *batch, genX(cmd_buffer_config_l3)(cmd_buffer, cfg); } - if (batch->flags & BLORP_BATCH_USE_COMPUTE) + if (batch->flags & BLORP_BATCH_USE_BLITTER) + blorp_exec_on_blitter(batch, params); + else if (batch->flags & BLORP_BATCH_USE_COMPUTE) blorp_exec_on_compute(batch, params); else blorp_exec_on_render(batch, params); diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 51488794c3d..aa82d49e1c0 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1808,6 +1808,9 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer) else if (bits == 0) return; + if (anv_cmd_buffer_is_blitter_queue(cmd_buffer)) + return; + const bool trace_flush = (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_STALL_BITS | @@ -3390,7 +3393,8 @@ genX(BeginCommandBuffer)( trace_intel_begin_cmd_buffer(&cmd_buffer->trace); - if (anv_cmd_buffer_is_video_queue(cmd_buffer)) + if (anv_cmd_buffer_is_video_queue(cmd_buffer) || + anv_cmd_buffer_is_blitter_queue(cmd_buffer)) return VK_SUCCESS; genX(cmd_buffer_emit_state_base_address)(cmd_buffer); @@ -3560,7 +3564,8 @@ end_command_buffer(struct anv_cmd_buffer *cmd_buffer) anv_measure_endcommandbuffer(cmd_buffer); - if (anv_cmd_buffer_is_video_queue(cmd_buffer)) { + if (anv_cmd_buffer_is_video_queue(cmd_buffer) || + anv_cmd_buffer_is_blitter_queue(cmd_buffer)) { trace_intel_end_cmd_buffer(&cmd_buffer->trace, cmd_buffer->vk.level); anv_cmd_buffer_end_batch_buffer(cmd_buffer); return VK_SUCCESS; @@ -3947,6 +3952,88 @@ cmd_buffer_barrier_video(struct anv_cmd_buffer *cmd_buffer, } } +static void +cmd_buffer_barrier_blitter(struct anv_cmd_buffer *cmd_buffer, + const VkDependencyInfo *dep_info) +{ +#if GFX_VERx10 >= 125 + assert(anv_cmd_buffer_is_blitter_queue(cmd_buffer)); + + /* The blitter requires an MI_FLUSH_DW command when a buffer transitions + * from being a destination to a source. + */ + bool flush_llc = false; + bool flush_ccs = false; + for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) { + const VkImageMemoryBarrier2 *img_barrier = + &dep_info->pImageMemoryBarriers[i]; + + ANV_FROM_HANDLE(anv_image, image, img_barrier->image); + const VkImageSubresourceRange *range = &img_barrier->subresourceRange; + + /* If srcQueueFamilyIndex is not equal to dstQueueFamilyIndex, this + * memory barrier defines a queue family transfer operation. + */ + if (img_barrier->srcQueueFamilyIndex != img_barrier->dstQueueFamilyIndex) + flush_llc = true; + + /* Flush cache if transfer command reads the output of the previous + * transfer command, ideally we should just wait for the completion but + * for now just flush the cache to make the data visible. + */ + if ((img_barrier->oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || + img_barrier->oldLayout == VK_IMAGE_LAYOUT_GENERAL) && + (img_barrier->newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL || + img_barrier->newLayout == VK_IMAGE_LAYOUT_GENERAL)) { + flush_llc = true; + } + + VkImageAspectFlags img_aspects = + vk_image_expand_aspect_mask(&image->vk, range->aspectMask); + anv_foreach_image_aspect_bit(aspect_bit, image, img_aspects) { + const uint32_t plane = + anv_image_aspect_to_plane(image, 1UL << aspect_bit); + if (isl_aux_usage_has_ccs(image->planes[plane].aux_usage)) { + flush_ccs = true; + } + } + } + + for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) { + /* Flush the cache if something is written by the transfer command and + * used by any other stages except transfer stage or if + * srcQueueFamilyIndex is not equal to dstQueueFamilyIndex, this memory + * barrier defines a queue family transfer operation. + */ + if ((stage_is_transfer(dep_info->pBufferMemoryBarriers[i].srcStageMask) && + mask_is_write(dep_info->pBufferMemoryBarriers[i].srcAccessMask)) || + (dep_info->pBufferMemoryBarriers[i].srcQueueFamilyIndex != + dep_info->pBufferMemoryBarriers[i].dstQueueFamilyIndex)) { + flush_llc = true; + break; + } + } + + for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) { + /* Flush the cache if something is written by the transfer command and + * used by any other stages except transfer stage. + */ + if (stage_is_transfer(dep_info->pMemoryBarriers[i].srcStageMask) && + mask_is_write(dep_info->pMemoryBarriers[i].srcAccessMask)) { + flush_llc = true; + break; + } + } + + if (flush_ccs || flush_llc) { + anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), fd) { + fd.FlushCCS = flush_ccs; + fd.FlushLLC = flush_llc; + } + } +#endif +} + static void cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, const VkDependencyInfo *dep_info, @@ -3957,6 +4044,11 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, return; } + if (anv_cmd_buffer_is_blitter_queue(cmd_buffer)) { + cmd_buffer_barrier_blitter(cmd_buffer, dep_info); + return; + } + struct anv_device *device = cmd_buffer->device; /* XXX: Right now, we're really dumb and just flush whatever categories diff --git a/src/intel/vulkan/genX_init_state.c b/src/intel/vulkan/genX_init_state.c index 00d861604cf..965ff32f25e 100644 --- a/src/intel/vulkan/genX_init_state.c +++ b/src/intel/vulkan/genX_init_state.c @@ -711,6 +711,13 @@ genX(init_device_state)(struct anv_device *device) case INTEL_ENGINE_CLASS_VIDEO: res = VK_SUCCESS; break; + case INTEL_ENGINE_CLASS_COPY: + /** + * Execute RCS init batch by default on the companion RCS command buffer in + * order to support MSAA copy/clear operations on copy queue. + */ + res = init_render_queue_state(queue, true /* is_companion_rcs_batch */); + break; default: res = vk_error(device, VK_ERROR_INITIALIZATION_FAILED); break;