radv: Dump command buffer on hang.

v2:
  - Now use the filename specified by RADV_TRACE_FILE env var.
  - Use the same var to enable tracing.

I thought we could as well always set the filename explicitly
instead of having some arbitrary defaults, and at that point
we don't need a separate feature enable.

Signed-off-by: Bas Nieuwenhuizen <basni@google.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
Bas Nieuwenhuizen 2016-12-23 23:51:18 +01:00
parent 0ef1b4d5b1
commit 97dfff5410
6 changed files with 150 additions and 9 deletions

View file

@ -32,6 +32,8 @@
#include "vk_format.h"
#include "radv_meta.h"
#include "ac_debug.h"
static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
VkImageLayout src_layout,
@ -272,6 +274,32 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
return true;
}
void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_device *device = cmd_buffer->device;
struct radeon_winsys_cs *cs = cmd_buffer->cs;
uint64_t va;
if (!device->trace_bo)
return;
va = device->ws->buffer_get_va(device->trace_bo);
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 7);
++cmd_buffer->state.trace_id;
device->ws->cs_add_buffer(cs, device->trace_bo, 8);
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_ME));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, cmd_buffer->state.trace_id);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
}
static void
radv_emit_graphics_blend_state(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline)
@ -1929,6 +1957,8 @@ void radv_CmdDraw(
S_0287F0_USE_OPAQUE(0));
assert(cmd_buffer->cs->cdw <= cdw_max);
radv_cmd_buffer_trace_emit(cmd_buffer);
}
static void radv_emit_primitive_reset_index(struct radv_cmd_buffer *cmd_buffer)
@ -1984,6 +2014,7 @@ void radv_CmdDrawIndexed(
radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA);
assert(cmd_buffer->cs->cdw <= cdw_max);
radv_cmd_buffer_trace_emit(cmd_buffer);
}
static void
@ -2035,6 +2066,7 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cs, count_va >> 32);
radeon_emit(cs, stride); /* stride */
radeon_emit(cs, di_src_sel);
radv_cmd_buffer_trace_emit(cmd_buffer);
}
static void
@ -2188,6 +2220,7 @@ void radv_CmdDispatch(
radeon_emit(cmd_buffer->cs, 1);
assert(cmd_buffer->cs->cdw <= cdw_max);
radv_cmd_buffer_trace_emit(cmd_buffer);
}
void radv_CmdDispatchIndirect(
@ -2239,6 +2272,7 @@ void radv_CmdDispatchIndirect(
}
assert(cmd_buffer->cs->cdw <= cdw_max);
radv_cmd_buffer_trace_emit(cmd_buffer);
}
void radv_unaligned_dispatch(
@ -2292,6 +2326,7 @@ void radv_unaligned_dispatch(
S_00B800_PARTIAL_TG_EN(1));
assert(cmd_buffer->cs->cdw <= cdw_max);
radv_cmd_buffer_trace_emit(cmd_buffer);
}
void radv_CmdEndRenderPass(

View file

@ -760,16 +760,34 @@ VkResult radv_CreateDevice(
device->ws->cs_finalize(device->empty_cs[family]);
}
if (getenv("RADV_TRACE_FILE")) {
device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
if (!device->trace_bo)
goto fail;
device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
if (!device->trace_id_ptr)
goto fail;
}
*pDevice = radv_device_to_handle(device);
return VK_SUCCESS;
fail:
if (device->trace_bo)
device->ws->buffer_destroy(device->trace_bo);
for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
for (unsigned q = 0; q < device->queue_count[i]; q++)
radv_queue_finish(&device->queues[i][q]);
if (device->queue_count[i])
vk_free(&device->alloc, device->queues[i]);
}
if (device->hw_ctx)
device->ws->ctx_destroy(device->hw_ctx);
vk_free(&device->alloc, device);
return result;
}
@ -780,6 +798,9 @@ void radv_DestroyDevice(
{
RADV_FROM_HANDLE(radv_device, device, _device);
if (device->trace_bo)
device->ws->buffer_destroy(device->trace_bo);
device->ws->ctx_destroy(device->hw_ctx);
for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
for (unsigned q = 0; q < device->queue_count[i]; q++)
@ -869,6 +890,21 @@ void radv_GetDeviceQueue(
*pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
}
static void radv_dump_trace(struct radv_device *device,
struct radeon_winsys_cs *cs)
{
const char *filename = getenv("RADV_TRACE_FILE");
FILE *f = fopen(filename, "w");
if (!f) {
fprintf(stderr, "Failed to write trace dump to %s\n", filename);
return;
}
fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
device->ws->cs_dump(cs, f, *device->trace_id_ptr);
fclose(f);
}
VkResult radv_QueueSubmit(
VkQueue _queue,
uint32_t submitCount,
@ -880,10 +916,12 @@ VkResult radv_QueueSubmit(
struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
struct radeon_winsys_ctx *ctx = queue->device->hw_ctx;
int ret;
uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
for (uint32_t i = 0; i < submitCount; i++) {
struct radeon_winsys_cs **cs_array;
bool can_patch = true;
uint32_t advance;
if (!pSubmits[i].commandBufferCount)
continue;
@ -900,15 +938,41 @@ VkResult radv_QueueSubmit(
if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
can_patch = false;
}
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array,
pSubmits[i].commandBufferCount,
(struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
pSubmits[i].waitSemaphoreCount,
(struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
pSubmits[i].signalSemaphoreCount,
can_patch, base_fence);
if (ret)
radv_loge("failed to submit CS %d\n", i);
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
advance = MIN2(max_cs_submission,
pSubmits[i].commandBufferCount - j);
bool b = j == 0;
bool e = j + advance == pSubmits[i].commandBufferCount;
if (queue->device->trace_bo)
*queue->device->trace_id_ptr = 0;
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array,
pSubmits[i].commandBufferCount,
(struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
b ? pSubmits[i].waitSemaphoreCount : 0,
(struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
e ? pSubmits[i].signalSemaphoreCount : 0,
can_patch, base_fence);
if (ret) {
radv_loge("failed to submit CS %d\n", i);
abort();
}
if (queue->device->trace_bo) {
bool success = queue->device->ws->ctx_wait_idle(
queue->device->hw_ctx,
radv_queue_family_to_ring(
queue->queue_family_index),
queue->queue_idx);
if (!success) { /* Hang */
radv_dump_trace(queue->device, cs_array[j]);
abort();
}
}
}
free(cs_array);
}

View file

@ -481,6 +481,9 @@ struct radv_device {
float sample_locations_4x[4][2];
float sample_locations_8x[8][2];
float sample_locations_16x[16][2];
struct radeon_winsys_bo *trace_bo;
uint32_t *trace_id_ptr;
};
struct radv_device_memory {
@ -671,6 +674,7 @@ struct radv_cmd_state {
unsigned active_occlusion_queries;
float offset_scale;
uint32_t descriptors_dirty;
uint32_t trace_id;
};
struct radv_cmd_pool {
@ -765,6 +769,7 @@ void radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer,
void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
struct radeon_winsys_bo *bo,
uint64_t offset, uint64_t size, uint32_t value);
void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
/*
* Takes x,y,z as exact numbers of invocations, instead of blocks.

View file

@ -319,6 +319,8 @@ struct radeon_winsys {
void (*cs_execute_secondary)(struct radeon_winsys_cs *parent,
struct radeon_winsys_cs *child);
void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, uint32_t trace_id);
int (*surface_init)(struct radeon_winsys *ws,
struct radeon_surf *surf);

View file

@ -718,6 +718,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
}
}
if (cmd_buffer->state.flush_bits)
radv_cmd_buffer_trace_emit(cmd_buffer);
cmd_buffer->state.flush_bits = 0;
}
@ -780,6 +782,8 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(cs, 0);
}
radv_cmd_buffer_trace_emit(cmd_buffer);
}
/* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
@ -820,6 +824,7 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(cs, 0);
}
radv_cmd_buffer_trace_emit(cmd_buffer);
}
static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count,

View file

@ -27,6 +27,7 @@
#include <amdgpu_drm.h>
#include <assert.h>
#include "ac_debug.h"
#include "amdgpu_id.h"
#include "radv_radeon_winsys.h"
#include "radv_amdgpu_cs.h"
@ -775,6 +776,34 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
return ret;
}
static void *radv_amdgpu_winsys_get_cpu_addr(struct radv_amdgpu_cs *cs, uint64_t addr)
{
void *ret = NULL;
for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
struct radv_amdgpu_winsys_bo *bo;
bo = (struct radv_amdgpu_winsys_bo*)
(i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i]);
if (addr >= bo->va && addr - bo->va < bo->size) {
if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
return (char *)ret + (addr - bo->va);
}
}
return ret;
}
static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs,
FILE* file,
uint32_t trace_id)
{
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
ac_parse_ib(file,
radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address),
cs->ib.size, trace_id, "main IB", cs->ws->info.chip_class);
}
static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws)
{
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
@ -850,6 +879,7 @@ void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
ws->base.cs_dump = radv_amdgpu_winsys_cs_dump;
ws->base.create_fence = radv_amdgpu_create_fence;
ws->base.destroy_fence = radv_amdgpu_destroy_fence;
ws->base.create_sem = radv_amdgpu_create_sem;