radv: emit a dummy ZPASS_DONE to prevent GPU hangs on GFX9

A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
counters) must immediately precede every timestamp event to
prevent a GPU hang on GFX9.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Samuel Pitoiset 2018-07-13 19:37:20 +02:00 committed by Dylan Baker
parent a0dd7bd498
commit 63ac3b50e7
5 changed files with 47 additions and 14 deletions

View file

@ -319,11 +319,21 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
}
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
unsigned num_db = cmd_buffer->device->physical_device->rad_info.num_render_backends;
unsigned eop_bug_offset;
void *fence_ptr;
radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0,
&cmd_buffer->gfx9_fence_offset,
&fence_ptr);
cmd_buffer->gfx9_fence_bo = cmd_buffer->upload.upload_bo;
/* Allocate a buffer for the EOP bug on GFX9. */
radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0,
&eop_bug_offset, &fence_ptr);
cmd_buffer->gfx9_eop_bug_va =
radv_buffer_get_va(cmd_buffer->upload.upload_bo);
cmd_buffer->gfx9_eop_bug_va += eop_bug_offset;
}
cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL;
@ -473,7 +483,7 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer,
cmd_buffer->device->physical_device->rad_info.chip_class,
ptr, va,
radv_cmd_buffer_uses_mec(cmd_buffer),
flags);
flags, cmd_buffer->gfx9_eop_bug_va);
}
if (unlikely(cmd_buffer->device->trace_bo))
@ -4100,7 +4110,8 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer,
cmd_buffer->device->physical_device->rad_info.chip_class,
radv_cmd_buffer_uses_mec(cmd_buffer),
V_028A90_BOTTOM_OF_PIPE_TS, 0,
1, va, 2, value);
1, va, 2, value,
cmd_buffer->gfx9_eop_bug_va);
assert(cmd_buffer->cs->cdw <= cdw_max);
}

View file

@ -2181,7 +2181,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
RADV_CMD_FLAG_INV_ICACHE |
RADV_CMD_FLAG_INV_SMEM_L1 |
RADV_CMD_FLAG_INV_VMEM_L1 |
RADV_CMD_FLAG_INV_GLOBAL_L2);
RADV_CMD_FLAG_INV_GLOBAL_L2, 0);
} else if (i == 1) {
si_cs_emit_cache_flush(cs,
queue->device->physical_device->rad_info.chip_class,
@ -2191,7 +2191,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
RADV_CMD_FLAG_INV_ICACHE |
RADV_CMD_FLAG_INV_SMEM_L1 |
RADV_CMD_FLAG_INV_VMEM_L1 |
RADV_CMD_FLAG_INV_GLOBAL_L2);
RADV_CMD_FLAG_INV_GLOBAL_L2, 0);
}
if (!queue->device->ws->cs_finalize(cs))

View file

@ -1037,6 +1037,7 @@ struct radv_cmd_buffer {
uint32_t gfx9_fence_offset;
struct radeon_winsys_bo *gfx9_fence_bo;
uint32_t gfx9_fence_idx;
uint64_t gfx9_eop_bug_va;
/**
* Whether a query pool has been resetted and we have to flush caches.
@ -1069,7 +1070,8 @@ void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs,
unsigned data_sel,
uint64_t va,
uint32_t old_fence,
uint32_t new_fence);
uint32_t new_fence,
uint64_t gfx9_eop_bug_va);
void si_emit_wait_fence(struct radeon_winsys_cs *cs,
bool predicated,
@ -1079,7 +1081,8 @@ void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
enum chip_class chip_class,
uint32_t *fence_ptr, uint64_t va,
bool is_mec,
enum radv_cmd_flush_bits flush_bits);
enum radv_cmd_flush_bits flush_bits,
uint64_t gfx9_eop_bug_va);
void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va);
void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,

View file

@ -1169,7 +1169,8 @@ static void emit_end_query(struct radv_cmd_buffer *cmd_buffer,
cmd_buffer->device->physical_device->rad_info.chip_class,
radv_cmd_buffer_uses_mec(cmd_buffer),
V_028A90_BOTTOM_OF_PIPE_TS, 0,
1, avail_va, 0, 1);
1, avail_va, 0, 1,
cmd_buffer->gfx9_eop_bug_va);
break;
default:
unreachable("ending unhandled query type");
@ -1292,13 +1293,15 @@ void radv_CmdWriteTimestamp(
cmd_buffer->device->physical_device->rad_info.chip_class,
mec,
V_028A90_BOTTOM_OF_PIPE_TS, 0,
3, query_va, 0, 0);
3, query_va, 0, 0,
cmd_buffer->gfx9_eop_bug_va);
si_cs_emit_write_event_eop(cs,
false,
cmd_buffer->device->physical_device->rad_info.chip_class,
mec,
V_028A90_BOTTOM_OF_PIPE_TS, 0,
1, avail_va, 0, 1);
1, avail_va, 0, 1,
cmd_buffer->gfx9_eop_bug_va);
break;
}
query_va += pool->stride;

View file

@ -852,7 +852,8 @@ void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs,
unsigned data_sel,
uint64_t va,
uint32_t old_fence,
uint32_t new_fence)
uint32_t new_fence,
uint64_t gfx9_eop_bug_va)
{
unsigned op = EVENT_TYPE(event) |
EVENT_INDEX(5) |
@ -860,6 +861,17 @@ void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs,
unsigned is_gfx8_mec = is_mec && chip_class < GFX9;
if (chip_class >= GFX9 || is_gfx8_mec) {
/* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
* counters) must immediately precede every timestamp event to
* prevent a GPU hang on GFX9.
*/
if (chip_class == GFX9) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(cs, gfx9_eop_bug_va);
radeon_emit(cs, gfx9_eop_bug_va >> 32);
}
radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ? 5 : 6, predicated));
radeon_emit(cs, op);
radeon_emit(cs, EOP_DATA_SEL(data_sel));
@ -941,7 +953,8 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
uint32_t *flush_cnt,
uint64_t flush_va,
bool is_mec,
enum radv_cmd_flush_bits flush_bits)
enum radv_cmd_flush_bits flush_bits,
uint64_t gfx9_eop_bug_va)
{
unsigned cp_coher_cntl = 0;
uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
@ -971,7 +984,8 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
chip_class,
is_mec,
V_028A90_FLUSH_AND_INV_CB_DATA_TS,
0, 0, 0, 0, 0);
0, 0, 0, 0, 0,
gfx9_eop_bug_va);
}
}
if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
@ -1057,7 +1071,8 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
uint32_t old_fence = (*flush_cnt)++;
si_cs_emit_write_event_eop(cs, false, chip_class, false, cb_db_event, tc_flags, 1,
flush_va, old_fence, *flush_cnt);
flush_va, old_fence, *flush_cnt,
gfx9_eop_bug_va);
si_emit_wait_fence(cs, false, flush_va, *flush_cnt, 0xffffffff);
}
@ -1149,7 +1164,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
cmd_buffer->device->physical_device->rad_info.chip_class,
ptr, va,
radv_cmd_buffer_uses_mec(cmd_buffer),
cmd_buffer->state.flush_bits);
cmd_buffer->state.flush_bits,
cmd_buffer->gfx9_eop_bug_va);
if (unlikely(cmd_buffer->device->trace_bo))