mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 07:08:04 +02:00
radv: make sure to wait for CP DMA when needed
This might fix some synchronization issues. I don't know if that will affect performance but it's required for correctness. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
parent
0ecf5dcedc
commit
27aaa64c55
3 changed files with 52 additions and 4 deletions
|
|
@ -2500,6 +2500,11 @@ VkResult radv_EndCommandBuffer(
|
|||
si_emit_cache_flush(cmd_buffer);
|
||||
}
|
||||
|
||||
/* Make sure CP DMA is idle at the end of IBs because the kernel
|
||||
* doesn't wait for it.
|
||||
*/
|
||||
si_cp_dma_wait_for_idle(cmd_buffer);
|
||||
|
||||
vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
|
||||
|
||||
if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs))
|
||||
|
|
@ -4054,6 +4059,11 @@ void radv_CmdPipelineBarrier(
|
|||
0);
|
||||
}
|
||||
|
||||
/* Make sure CP DMA is idle because the driver might have performed a
|
||||
* DMA operation for copying or filling buffers/images.
|
||||
*/
|
||||
si_cp_dma_wait_for_idle(cmd_buffer);
|
||||
|
||||
cmd_buffer->state.flush_bits |= dst_flush_bits;
|
||||
}
|
||||
|
||||
|
|
@ -4070,6 +4080,11 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer,
|
|||
|
||||
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 18);
|
||||
|
||||
/* Make sure CP DMA is idle because the driver might have performed a
|
||||
* DMA operation for copying or filling buffers/images.
|
||||
*/
|
||||
si_cp_dma_wait_for_idle(cmd_buffer);
|
||||
|
||||
/* TODO: this is overkill. Probably should figure something out from
|
||||
* the stage mask. */
|
||||
|
||||
|
|
|
|||
|
|
@ -972,6 +972,9 @@ struct radv_cmd_state {
|
|||
uint32_t last_num_instances;
|
||||
uint32_t last_first_instance;
|
||||
uint32_t last_vertex_offset;
|
||||
|
||||
/* Whether CP DMA is busy/idle. */
|
||||
bool dma_is_busy;
|
||||
};
|
||||
|
||||
struct radv_cmd_pool {
|
||||
|
|
@ -1086,6 +1089,8 @@ void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
|
|||
unsigned size);
|
||||
void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
|
||||
uint64_t size, unsigned value);
|
||||
void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
|
||||
|
||||
void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer);
|
||||
bool
|
||||
radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
|
||||
|
|
|
|||
|
|
@ -1214,7 +1214,6 @@ static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer,
|
|||
struct radeon_winsys_cs *cs = cmd_buffer->cs;
|
||||
uint32_t header = 0, command = 0;
|
||||
|
||||
assert(size);
|
||||
assert(size <= cp_dma_max_byte_count(cmd_buffer));
|
||||
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
|
||||
|
|
@ -1273,9 +1272,14 @@ static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer,
|
|||
* indices. If we wanted to execute CP DMA in PFP, this packet
|
||||
* should precede it.
|
||||
*/
|
||||
if ((flags & CP_DMA_SYNC) && cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
|
||||
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
|
||||
radeon_emit(cs, 0);
|
||||
if (flags & CP_DMA_SYNC) {
|
||||
if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
|
||||
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
|
||||
radeon_emit(cs, 0);
|
||||
}
|
||||
|
||||
/* CP will see the sync flag and wait for all DMAs to complete. */
|
||||
cmd_buffer->state.dma_is_busy = false;
|
||||
}
|
||||
|
||||
if (unlikely(cmd_buffer->device->trace_bo))
|
||||
|
|
@ -1339,6 +1343,8 @@ void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
|
|||
uint64_t main_src_va, main_dest_va;
|
||||
uint64_t skipped_size = 0, realign_size = 0;
|
||||
|
||||
/* Assume that we are not going to sync after the last DMA operation. */
|
||||
cmd_buffer->state.dma_is_busy = true;
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.family <= CHIP_CARRIZO ||
|
||||
cmd_buffer->device->physical_device->rad_info.family == CHIP_STONEY) {
|
||||
|
|
@ -1402,6 +1408,9 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
|
|||
|
||||
assert(va % 4 == 0 && size % 4 == 0);
|
||||
|
||||
/* Assume that we are not going to sync after the last DMA operation. */
|
||||
cmd_buffer->state.dma_is_busy = true;
|
||||
|
||||
while (size) {
|
||||
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
|
||||
unsigned dma_flags = CP_DMA_CLEAR;
|
||||
|
|
@ -1417,6 +1426,25 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
|
|||
}
|
||||
}
|
||||
|
||||
void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class < CIK)
|
||||
return;
|
||||
|
||||
if (!cmd_buffer->state.dma_is_busy)
|
||||
return;
|
||||
|
||||
/* Issue a dummy DMA that copies zero bytes.
|
||||
*
|
||||
* The DMA engine will see that there's no work to do and skip this
|
||||
* DMA request, however, the CP will see the sync flag and still wait
|
||||
* for all DMAs to complete.
|
||||
*/
|
||||
si_emit_cp_dma(cmd_buffer, 0, 0, 0, CP_DMA_SYNC);
|
||||
|
||||
cmd_buffer->state.dma_is_busy = false;
|
||||
}
|
||||
|
||||
/* For MSAA sample positions. */
|
||||
#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \
|
||||
(((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) | \
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue