anv: Copy/Clear MSAA images over companion RCS while we are on compute

When we have MSAA copy/clear operation on the compute queue, use the
companion RCS command buffer to carry out copy/clear operations.

v2: (Sagar)
- Flush cache according to command buffer
- Invalidate AUX when we create new companion RCS command buffer if
  platform support AUX TT.

Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23661>
This commit is contained in:
Lionel Landwerlin 2023-05-21 23:11:13 -07:00 committed by Marge Bot
parent 5b8bef8650
commit 6f4fe3f81b
3 changed files with 176 additions and 0 deletions

View file

@ -22,6 +22,7 @@
*/
#include "anv_private.h"
#include "genxml/gen8_pack.h"
static bool
lookup_blorp_shader(struct blorp_batch *batch,
@ -371,6 +372,38 @@ copy_image(struct anv_cmd_buffer *cmd_buffer,
}
}
static struct anv_state
record_main_rcs_cmd_buffer_done(struct anv_cmd_buffer *cmd_buffer)
{
const struct intel_device_info *info = cmd_buffer->device->info;
if (cmd_buffer->companion_rcs_cmd_buffer == NULL) {
anv_create_companion_rcs_command_buffer(cmd_buffer);
/* Re-emit the aux table register in every command buffer. This way we're
* ensured that we have the table even if this command buffer doesn't
* initialize any images.
*/
if (cmd_buffer->device->info->has_aux_map) {
assert(cmd_buffer->companion_rcs_cmd_buffer != NULL);
anv_add_pending_pipe_bits(cmd_buffer->companion_rcs_cmd_buffer,
ANV_PIPE_AUX_TABLE_INVALIDATE_BIT,
"new cmd buffer with aux-tt");
}
}
assert(cmd_buffer->companion_rcs_cmd_buffer != NULL);
return anv_genX(info, cmd_buffer_begin_companion_rcs_syncpoint)(cmd_buffer);
}
static void
end_main_rcs_cmd_buffer_done(struct anv_cmd_buffer *cmd_buffer,
struct anv_state syncpoint)
{
const struct intel_device_info *info = cmd_buffer->device->info;
anv_genX(info, cmd_buffer_end_companion_rcs_syncpoint)(cmd_buffer,
syncpoint);
}
void anv_CmdCopyImage2(
VkCommandBuffer commandBuffer,
const VkCopyImageInfo2* pCopyImageInfo)
@ -379,6 +412,17 @@ void anv_CmdCopyImage2(
ANV_FROM_HANDLE(anv_image, src_image, pCopyImageInfo->srcImage);
ANV_FROM_HANDLE(anv_image, dst_image, pCopyImageInfo->dstImage);
struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
UNUSED struct anv_state rcs_done = ANV_STATE_NULL;;
if (cmd_buffer->device->info->verx10 >= 125 &&
dst_image->vk.samples > 1 &&
(anv_cmd_buffer_is_blitter_queue(main_cmd_buffer) ||
anv_cmd_buffer_is_compute_queue(main_cmd_buffer))) {
rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
}
struct blorp_batch batch;
anv_blorp_batch_init(cmd_buffer, &batch, 0);
@ -390,6 +434,9 @@ void anv_CmdCopyImage2(
}
anv_blorp_batch_finish(&batch);
if (rcs_done.alloc_size)
end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
}
static enum isl_format
@ -974,6 +1021,17 @@ void anv_CmdClearColorImage(
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_image, image, _image);
struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
if (cmd_buffer->device->info->verx10 >= 125 &&
image->vk.samples > 1 &&
(anv_cmd_buffer_is_blitter_queue(main_cmd_buffer) ||
anv_cmd_buffer_is_compute_queue(main_cmd_buffer))) {
rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
}
struct blorp_batch batch;
anv_blorp_batch_init(cmd_buffer, &batch, 0);
@ -1023,6 +1081,9 @@ void anv_CmdClearColorImage(
}
anv_blorp_batch_finish(&batch);
if (rcs_done.alloc_size)
end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
}
void anv_CmdClearDepthStencilImage(

View file

@ -246,3 +246,10 @@ genX(emit_breakpoint)(struct anv_batch *batch,
if (INTEL_DEBUG(DEBUG_DRAW_BKP))
genX(batch_emit_breakpoint)(batch, device, emit_before_draw);
}
struct anv_state
genX(cmd_buffer_begin_companion_rcs_syncpoint)(struct anv_cmd_buffer *cmd_buffer);
void
genX(cmd_buffer_end_companion_rcs_syncpoint)(struct anv_cmd_buffer *cmd_buffer,
struct anv_state syncpoint);

View file

@ -7961,3 +7961,111 @@ genX(batch_emit_dummy_post_sync_op)(struct anv_batch *batch,
}
}
struct anv_state
genX(cmd_buffer_begin_companion_rcs_syncpoint)(
struct anv_cmd_buffer *cmd_buffer)
{
#if GFX_VERx10 >= 125
const struct intel_device_info *info = cmd_buffer->device->info;
struct anv_state syncpoint =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 2 * sizeof(uint32_t), 4);
struct anv_address xcs_wait_addr =
anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool,
syncpoint);
struct anv_address rcs_wait_addr = anv_address_add(xcs_wait_addr, 4);
/* Reset the sync point */
memset(syncpoint.map, 0, 2 * sizeof(uint32_t));
struct mi_builder b;
/* On CCS:
* - flush all caches & invalidate
* - unblock RCS
* - wait on RCS to complete
* - clear the value we waited on
*/
if (anv_cmd_buffer_is_compute_queue(cmd_buffer)) {
anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_FLUSH_BITS |
ANV_PIPE_INVALIDATE_BITS |
ANV_PIPE_STALL_BITS,
"post main cmd buffer invalidate");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
} else if (anv_cmd_buffer_is_blitter_queue(cmd_buffer)) {
anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), fd) {
fd.FlushCCS = true; /* Maybe handle Flush LLC */
}
}
{
mi_builder_init(&b, info, &cmd_buffer->batch);
mi_store(&b, mi_mem32(rcs_wait_addr), mi_imm(0x1));
anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT), sem) {
sem.WaitMode = PollingMode;
sem.CompareOperation = COMPARE_SAD_EQUAL_SDD;
sem.SemaphoreDataDword = 0x1;
sem.SemaphoreAddress = xcs_wait_addr;
}
/* Make sure to reset the semaphore in case the command buffer is run
* multiple times.
*/
mi_store(&b, mi_mem32(xcs_wait_addr), mi_imm(0x0));
}
/* On RCS:
* - wait on CCS signal
* - clear the value we waited on
*/
{
mi_builder_init(&b, info, &cmd_buffer->companion_rcs_cmd_buffer->batch);
anv_batch_emit(&cmd_buffer->companion_rcs_cmd_buffer->batch,
GENX(MI_SEMAPHORE_WAIT),
sem) {
sem.WaitMode = PollingMode;
sem.CompareOperation = COMPARE_SAD_EQUAL_SDD;
sem.SemaphoreDataDword = 0x1;
sem.SemaphoreAddress = rcs_wait_addr;
}
/* Make sure to reset the semaphore in case the command buffer is run
* multiple times.
*/
mi_store(&b, mi_mem32(rcs_wait_addr), mi_imm(0x0));
}
return syncpoint;
#else
unreachable("Not implemented");
#endif
}
void
genX(cmd_buffer_end_companion_rcs_syncpoint)(struct anv_cmd_buffer *cmd_buffer,
struct anv_state syncpoint)
{
#if GFX_VERx10 >= 125
struct anv_address xcs_wait_addr =
anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool,
syncpoint);
struct mi_builder b;
/* On RCS:
* - flush all caches & invalidate
* - unblock the CCS
*/
anv_add_pending_pipe_bits(cmd_buffer->companion_rcs_cmd_buffer,
ANV_PIPE_FLUSH_BITS |
ANV_PIPE_INVALIDATE_BITS |
ANV_PIPE_STALL_BITS,
"post rcs flush");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer->companion_rcs_cmd_buffer);
mi_builder_init(&b, cmd_buffer->device->info,
&cmd_buffer->companion_rcs_cmd_buffer->batch);
mi_store(&b, mi_mem32(xcs_wait_addr), mi_imm(0x1));
#else
unreachable("Not implemented");
#endif
}