mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 04:20:08 +01:00
anv: Add support for a transfer queue on Alchemist
Alchemist has an improved blitter that's sufficiently powerful to implement a transfer queue. Tigerlake's blitter lacks compression handling and other features we need, unfortunately. Rework (Sagar): - Check blitter command buffer in EndCommandBuffer v2: (Lionel) - Look at image, buffer and memory barriers as well - Flush cache if there is queue ownership transfer Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18325>
This commit is contained in:
parent
5112b42146
commit
17b8b2cffd
5 changed files with 138 additions and 5 deletions
|
|
@ -112,9 +112,16 @@ static void
|
|||
anv_blorp_batch_init(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct blorp_batch *batch, enum blorp_batch_flags flags)
|
||||
{
|
||||
if (!(cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT)) {
|
||||
assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_COMPUTE_BIT);
|
||||
VkQueueFlags queue_flags = cmd_buffer->queue_family->queueFlags;
|
||||
|
||||
if (queue_flags & VK_QUEUE_GRAPHICS_BIT) {
|
||||
/* blorp runs on render engine by default */
|
||||
} else if (queue_flags & VK_QUEUE_COMPUTE_BIT) {
|
||||
flags |= BLORP_BATCH_USE_COMPUTE;
|
||||
} else if (queue_flags & VK_QUEUE_TRANSFER_BIT) {
|
||||
flags |= BLORP_BATCH_USE_BLITTER;
|
||||
} else {
|
||||
unreachable("unknown queue family");
|
||||
}
|
||||
|
||||
blorp_batch_init(&cmd_buffer->device->blorp, batch, cmd_buffer, flags);
|
||||
|
|
|
|||
|
|
@ -1145,6 +1145,12 @@ anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
|
|||
enum intel_engine_class compute_class =
|
||||
c_count < 1 ? INTEL_ENGINE_CLASS_RENDER : INTEL_ENGINE_CLASS_COMPUTE;
|
||||
|
||||
int blit_count = 0;
|
||||
if (debug_get_bool_option("INTEL_COPY_CLASS", false)) {
|
||||
blit_count = intel_engines_count(pdevice->engine_info,
|
||||
INTEL_ENGINE_CLASS_COPY);
|
||||
}
|
||||
|
||||
anv_override_engine_counts(&gc_count, &g_count, &c_count, &v_count);
|
||||
|
||||
if (gc_count > 0) {
|
||||
|
|
@ -1192,6 +1198,13 @@ anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
|
|||
.engine_class = INTEL_ENGINE_CLASS_VIDEO,
|
||||
};
|
||||
}
|
||||
if (blit_count > 0) {
|
||||
pdevice->queue.families[family_count++] = (struct anv_queue_family) {
|
||||
.queueFlags = VK_QUEUE_TRANSFER_BIT,
|
||||
.queueCount = blit_count,
|
||||
.engine_class = INTEL_ENGINE_CLASS_COPY,
|
||||
};
|
||||
}
|
||||
|
||||
/* Increase count below when other families are added as a reminder to
|
||||
* increase the ANV_MAX_QUEUE_FAMILIES value.
|
||||
|
|
|
|||
|
|
@ -414,6 +414,18 @@ blorp_exec_on_compute(struct blorp_batch *batch,
|
|||
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_exec_on_blitter(struct blorp_batch *batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
assert(batch->flags & BLORP_BATCH_USE_BLITTER);
|
||||
|
||||
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
|
||||
assert(cmd_buffer->queue_family->queueFlags == VK_QUEUE_TRANSFER_BIT);
|
||||
|
||||
blorp_exec(batch, params);
|
||||
}
|
||||
|
||||
void
|
||||
genX(blorp_exec)(struct blorp_batch *batch,
|
||||
const struct blorp_params *params)
|
||||
|
|
@ -430,7 +442,9 @@ genX(blorp_exec)(struct blorp_batch *batch,
|
|||
genX(cmd_buffer_config_l3)(cmd_buffer, cfg);
|
||||
}
|
||||
|
||||
if (batch->flags & BLORP_BATCH_USE_COMPUTE)
|
||||
if (batch->flags & BLORP_BATCH_USE_BLITTER)
|
||||
blorp_exec_on_blitter(batch, params);
|
||||
else if (batch->flags & BLORP_BATCH_USE_COMPUTE)
|
||||
blorp_exec_on_compute(batch, params);
|
||||
else
|
||||
blorp_exec_on_render(batch, params);
|
||||
|
|
|
|||
|
|
@ -1808,6 +1808,9 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
|
|||
else if (bits == 0)
|
||||
return;
|
||||
|
||||
if (anv_cmd_buffer_is_blitter_queue(cmd_buffer))
|
||||
return;
|
||||
|
||||
const bool trace_flush =
|
||||
(bits & (ANV_PIPE_FLUSH_BITS |
|
||||
ANV_PIPE_STALL_BITS |
|
||||
|
|
@ -3390,7 +3393,8 @@ genX(BeginCommandBuffer)(
|
|||
|
||||
trace_intel_begin_cmd_buffer(&cmd_buffer->trace);
|
||||
|
||||
if (anv_cmd_buffer_is_video_queue(cmd_buffer))
|
||||
if (anv_cmd_buffer_is_video_queue(cmd_buffer) ||
|
||||
anv_cmd_buffer_is_blitter_queue(cmd_buffer))
|
||||
return VK_SUCCESS;
|
||||
|
||||
genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
|
||||
|
|
@ -3560,7 +3564,8 @@ end_command_buffer(struct anv_cmd_buffer *cmd_buffer)
|
|||
|
||||
anv_measure_endcommandbuffer(cmd_buffer);
|
||||
|
||||
if (anv_cmd_buffer_is_video_queue(cmd_buffer)) {
|
||||
if (anv_cmd_buffer_is_video_queue(cmd_buffer) ||
|
||||
anv_cmd_buffer_is_blitter_queue(cmd_buffer)) {
|
||||
trace_intel_end_cmd_buffer(&cmd_buffer->trace, cmd_buffer->vk.level);
|
||||
anv_cmd_buffer_end_batch_buffer(cmd_buffer);
|
||||
return VK_SUCCESS;
|
||||
|
|
@ -3947,6 +3952,88 @@ cmd_buffer_barrier_video(struct anv_cmd_buffer *cmd_buffer,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
cmd_buffer_barrier_blitter(struct anv_cmd_buffer *cmd_buffer,
|
||||
const VkDependencyInfo *dep_info)
|
||||
{
|
||||
#if GFX_VERx10 >= 125
|
||||
assert(anv_cmd_buffer_is_blitter_queue(cmd_buffer));
|
||||
|
||||
/* The blitter requires an MI_FLUSH_DW command when a buffer transitions
|
||||
* from being a destination to a source.
|
||||
*/
|
||||
bool flush_llc = false;
|
||||
bool flush_ccs = false;
|
||||
for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) {
|
||||
const VkImageMemoryBarrier2 *img_barrier =
|
||||
&dep_info->pImageMemoryBarriers[i];
|
||||
|
||||
ANV_FROM_HANDLE(anv_image, image, img_barrier->image);
|
||||
const VkImageSubresourceRange *range = &img_barrier->subresourceRange;
|
||||
|
||||
/* If srcQueueFamilyIndex is not equal to dstQueueFamilyIndex, this
|
||||
* memory barrier defines a queue family transfer operation.
|
||||
*/
|
||||
if (img_barrier->srcQueueFamilyIndex != img_barrier->dstQueueFamilyIndex)
|
||||
flush_llc = true;
|
||||
|
||||
/* Flush cache if transfer command reads the output of the previous
|
||||
* transfer command, ideally we should just wait for the completion but
|
||||
* for now just flush the cache to make the data visible.
|
||||
*/
|
||||
if ((img_barrier->oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL ||
|
||||
img_barrier->oldLayout == VK_IMAGE_LAYOUT_GENERAL) &&
|
||||
(img_barrier->newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL ||
|
||||
img_barrier->newLayout == VK_IMAGE_LAYOUT_GENERAL)) {
|
||||
flush_llc = true;
|
||||
}
|
||||
|
||||
VkImageAspectFlags img_aspects =
|
||||
vk_image_expand_aspect_mask(&image->vk, range->aspectMask);
|
||||
anv_foreach_image_aspect_bit(aspect_bit, image, img_aspects) {
|
||||
const uint32_t plane =
|
||||
anv_image_aspect_to_plane(image, 1UL << aspect_bit);
|
||||
if (isl_aux_usage_has_ccs(image->planes[plane].aux_usage)) {
|
||||
flush_ccs = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) {
|
||||
/* Flush the cache if something is written by the transfer command and
|
||||
* used by any other stages except transfer stage or if
|
||||
* srcQueueFamilyIndex is not equal to dstQueueFamilyIndex, this memory
|
||||
* barrier defines a queue family transfer operation.
|
||||
*/
|
||||
if ((stage_is_transfer(dep_info->pBufferMemoryBarriers[i].srcStageMask) &&
|
||||
mask_is_write(dep_info->pBufferMemoryBarriers[i].srcAccessMask)) ||
|
||||
(dep_info->pBufferMemoryBarriers[i].srcQueueFamilyIndex !=
|
||||
dep_info->pBufferMemoryBarriers[i].dstQueueFamilyIndex)) {
|
||||
flush_llc = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) {
|
||||
/* Flush the cache if something is written by the transfer command and
|
||||
* used by any other stages except transfer stage.
|
||||
*/
|
||||
if (stage_is_transfer(dep_info->pMemoryBarriers[i].srcStageMask) &&
|
||||
mask_is_write(dep_info->pMemoryBarriers[i].srcAccessMask)) {
|
||||
flush_llc = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (flush_ccs || flush_llc) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), fd) {
|
||||
fd.FlushCCS = flush_ccs;
|
||||
fd.FlushLLC = flush_llc;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
|
||||
const VkDependencyInfo *dep_info,
|
||||
|
|
@ -3957,6 +4044,11 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
|
|||
return;
|
||||
}
|
||||
|
||||
if (anv_cmd_buffer_is_blitter_queue(cmd_buffer)) {
|
||||
cmd_buffer_barrier_blitter(cmd_buffer, dep_info);
|
||||
return;
|
||||
}
|
||||
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
|
||||
/* XXX: Right now, we're really dumb and just flush whatever categories
|
||||
|
|
|
|||
|
|
@ -711,6 +711,13 @@ genX(init_device_state)(struct anv_device *device)
|
|||
case INTEL_ENGINE_CLASS_VIDEO:
|
||||
res = VK_SUCCESS;
|
||||
break;
|
||||
case INTEL_ENGINE_CLASS_COPY:
|
||||
/**
|
||||
* Execute RCS init batch by default on the companion RCS command buffer in
|
||||
* order to support MSAA copy/clear operations on copy queue.
|
||||
*/
|
||||
res = init_render_queue_state(queue, true /* is_companion_rcs_batch */);
|
||||
break;
|
||||
default:
|
||||
res = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
|
||||
break;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue