mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
radv: Dump command buffer on hang.
v2: - Now use the filename specified by RADV_TRACE_FILE env var. - Use the same var to enable tracing. I thought we could as well always set the filename explicitly instead of having some arbitrary defaults, and at that point we don't need a separate feature enable. Signed-off-by: Bas Nieuwenhuizen <basni@google.com> Reviewed-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
parent
0ef1b4d5b1
commit
97dfff5410
6 changed files with 150 additions and 9 deletions
|
|
@ -32,6 +32,8 @@
|
|||
#include "vk_format.h"
|
||||
#include "radv_meta.h"
|
||||
|
||||
#include "ac_debug.h"
|
||||
|
||||
static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
|
||||
struct radv_image *image,
|
||||
VkImageLayout src_layout,
|
||||
|
|
@ -272,6 +274,32 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
|
|||
return true;
|
||||
}
|
||||
|
||||
void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct radv_device *device = cmd_buffer->device;
|
||||
struct radeon_winsys_cs *cs = cmd_buffer->cs;
|
||||
uint64_t va;
|
||||
|
||||
if (!device->trace_bo)
|
||||
return;
|
||||
|
||||
va = device->ws->buffer_get_va(device->trace_bo);
|
||||
|
||||
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 7);
|
||||
|
||||
++cmd_buffer->state.trace_id;
|
||||
device->ws->cs_add_buffer(cs, device->trace_bo, 8);
|
||||
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
|
||||
radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
|
||||
S_370_WR_CONFIRM(1) |
|
||||
S_370_ENGINE_SEL(V_370_ME));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
radeon_emit(cs, cmd_buffer->state.trace_id);
|
||||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
|
||||
radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
|
||||
}
|
||||
|
||||
static void
|
||||
radv_emit_graphics_blend_state(struct radv_cmd_buffer *cmd_buffer,
|
||||
struct radv_pipeline *pipeline)
|
||||
|
|
@ -1929,6 +1957,8 @@ void radv_CmdDraw(
|
|||
S_0287F0_USE_OPAQUE(0));
|
||||
|
||||
assert(cmd_buffer->cs->cdw <= cdw_max);
|
||||
|
||||
radv_cmd_buffer_trace_emit(cmd_buffer);
|
||||
}
|
||||
|
||||
static void radv_emit_primitive_reset_index(struct radv_cmd_buffer *cmd_buffer)
|
||||
|
|
@ -1984,6 +2014,7 @@ void radv_CmdDrawIndexed(
|
|||
radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA);
|
||||
|
||||
assert(cmd_buffer->cs->cdw <= cdw_max);
|
||||
radv_cmd_buffer_trace_emit(cmd_buffer);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -2035,6 +2066,7 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer,
|
|||
radeon_emit(cs, count_va >> 32);
|
||||
radeon_emit(cs, stride); /* stride */
|
||||
radeon_emit(cs, di_src_sel);
|
||||
radv_cmd_buffer_trace_emit(cmd_buffer);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -2188,6 +2220,7 @@ void radv_CmdDispatch(
|
|||
radeon_emit(cmd_buffer->cs, 1);
|
||||
|
||||
assert(cmd_buffer->cs->cdw <= cdw_max);
|
||||
radv_cmd_buffer_trace_emit(cmd_buffer);
|
||||
}
|
||||
|
||||
void radv_CmdDispatchIndirect(
|
||||
|
|
@ -2239,6 +2272,7 @@ void radv_CmdDispatchIndirect(
|
|||
}
|
||||
|
||||
assert(cmd_buffer->cs->cdw <= cdw_max);
|
||||
radv_cmd_buffer_trace_emit(cmd_buffer);
|
||||
}
|
||||
|
||||
void radv_unaligned_dispatch(
|
||||
|
|
@ -2292,6 +2326,7 @@ void radv_unaligned_dispatch(
|
|||
S_00B800_PARTIAL_TG_EN(1));
|
||||
|
||||
assert(cmd_buffer->cs->cdw <= cdw_max);
|
||||
radv_cmd_buffer_trace_emit(cmd_buffer);
|
||||
}
|
||||
|
||||
void radv_CmdEndRenderPass(
|
||||
|
|
|
|||
|
|
@ -760,16 +760,34 @@ VkResult radv_CreateDevice(
|
|||
device->ws->cs_finalize(device->empty_cs[family]);
|
||||
}
|
||||
|
||||
if (getenv("RADV_TRACE_FILE")) {
|
||||
device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
|
||||
RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
|
||||
if (!device->trace_bo)
|
||||
goto fail;
|
||||
|
||||
device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
|
||||
if (!device->trace_id_ptr)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
*pDevice = radv_device_to_handle(device);
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail:
|
||||
if (device->trace_bo)
|
||||
device->ws->buffer_destroy(device->trace_bo);
|
||||
|
||||
for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
|
||||
for (unsigned q = 0; q < device->queue_count[i]; q++)
|
||||
radv_queue_finish(&device->queues[i][q]);
|
||||
if (device->queue_count[i])
|
||||
vk_free(&device->alloc, device->queues[i]);
|
||||
}
|
||||
|
||||
if (device->hw_ctx)
|
||||
device->ws->ctx_destroy(device->hw_ctx);
|
||||
|
||||
vk_free(&device->alloc, device);
|
||||
return result;
|
||||
}
|
||||
|
|
@ -780,6 +798,9 @@ void radv_DestroyDevice(
|
|||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
|
||||
if (device->trace_bo)
|
||||
device->ws->buffer_destroy(device->trace_bo);
|
||||
|
||||
device->ws->ctx_destroy(device->hw_ctx);
|
||||
for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
|
||||
for (unsigned q = 0; q < device->queue_count[i]; q++)
|
||||
|
|
@ -869,6 +890,21 @@ void radv_GetDeviceQueue(
|
|||
*pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
|
||||
}
|
||||
|
||||
static void radv_dump_trace(struct radv_device *device,
|
||||
struct radeon_winsys_cs *cs)
|
||||
{
|
||||
const char *filename = getenv("RADV_TRACE_FILE");
|
||||
FILE *f = fopen(filename, "w");
|
||||
if (!f) {
|
||||
fprintf(stderr, "Failed to write trace dump to %s\n", filename);
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
|
||||
device->ws->cs_dump(cs, f, *device->trace_id_ptr);
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
VkResult radv_QueueSubmit(
|
||||
VkQueue _queue,
|
||||
uint32_t submitCount,
|
||||
|
|
@ -880,10 +916,12 @@ VkResult radv_QueueSubmit(
|
|||
struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
|
||||
struct radeon_winsys_ctx *ctx = queue->device->hw_ctx;
|
||||
int ret;
|
||||
uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
|
||||
|
||||
for (uint32_t i = 0; i < submitCount; i++) {
|
||||
struct radeon_winsys_cs **cs_array;
|
||||
bool can_patch = true;
|
||||
uint32_t advance;
|
||||
|
||||
if (!pSubmits[i].commandBufferCount)
|
||||
continue;
|
||||
|
|
@ -900,15 +938,41 @@ VkResult radv_QueueSubmit(
|
|||
if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
|
||||
can_patch = false;
|
||||
}
|
||||
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array,
|
||||
pSubmits[i].commandBufferCount,
|
||||
(struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
|
||||
pSubmits[i].waitSemaphoreCount,
|
||||
(struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
|
||||
pSubmits[i].signalSemaphoreCount,
|
||||
can_patch, base_fence);
|
||||
if (ret)
|
||||
radv_loge("failed to submit CS %d\n", i);
|
||||
|
||||
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
|
||||
advance = MIN2(max_cs_submission,
|
||||
pSubmits[i].commandBufferCount - j);
|
||||
bool b = j == 0;
|
||||
bool e = j + advance == pSubmits[i].commandBufferCount;
|
||||
|
||||
if (queue->device->trace_bo)
|
||||
*queue->device->trace_id_ptr = 0;
|
||||
|
||||
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array,
|
||||
pSubmits[i].commandBufferCount,
|
||||
(struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
|
||||
b ? pSubmits[i].waitSemaphoreCount : 0,
|
||||
(struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
|
||||
e ? pSubmits[i].signalSemaphoreCount : 0,
|
||||
can_patch, base_fence);
|
||||
|
||||
if (ret) {
|
||||
radv_loge("failed to submit CS %d\n", i);
|
||||
abort();
|
||||
}
|
||||
if (queue->device->trace_bo) {
|
||||
bool success = queue->device->ws->ctx_wait_idle(
|
||||
queue->device->hw_ctx,
|
||||
radv_queue_family_to_ring(
|
||||
queue->queue_family_index),
|
||||
queue->queue_idx);
|
||||
|
||||
if (!success) { /* Hang */
|
||||
radv_dump_trace(queue->device, cs_array[j]);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
free(cs_array);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -481,6 +481,9 @@ struct radv_device {
|
|||
float sample_locations_4x[4][2];
|
||||
float sample_locations_8x[8][2];
|
||||
float sample_locations_16x[16][2];
|
||||
|
||||
struct radeon_winsys_bo *trace_bo;
|
||||
uint32_t *trace_id_ptr;
|
||||
};
|
||||
|
||||
struct radv_device_memory {
|
||||
|
|
@ -671,6 +674,7 @@ struct radv_cmd_state {
|
|||
unsigned active_occlusion_queries;
|
||||
float offset_scale;
|
||||
uint32_t descriptors_dirty;
|
||||
uint32_t trace_id;
|
||||
};
|
||||
|
||||
struct radv_cmd_pool {
|
||||
|
|
@ -765,6 +769,7 @@ void radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer,
|
|||
void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
|
||||
struct radeon_winsys_bo *bo,
|
||||
uint64_t offset, uint64_t size, uint32_t value);
|
||||
void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
|
||||
|
||||
/*
|
||||
* Takes x,y,z as exact numbers of invocations, instead of blocks.
|
||||
|
|
|
|||
|
|
@ -319,6 +319,8 @@ struct radeon_winsys {
|
|||
void (*cs_execute_secondary)(struct radeon_winsys_cs *parent,
|
||||
struct radeon_winsys_cs *child);
|
||||
|
||||
void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, uint32_t trace_id);
|
||||
|
||||
int (*surface_init)(struct radeon_winsys *ws,
|
||||
struct radeon_surf *surf);
|
||||
|
||||
|
|
|
|||
|
|
@ -718,6 +718,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
|
|||
}
|
||||
}
|
||||
|
||||
if (cmd_buffer->state.flush_bits)
|
||||
radv_cmd_buffer_trace_emit(cmd_buffer);
|
||||
cmd_buffer->state.flush_bits = 0;
|
||||
}
|
||||
|
||||
|
|
@ -780,6 +782,8 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
|
|||
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
|
||||
radeon_emit(cs, 0);
|
||||
}
|
||||
|
||||
radv_cmd_buffer_trace_emit(cmd_buffer);
|
||||
}
|
||||
|
||||
/* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
|
||||
|
|
@ -820,6 +824,7 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer,
|
|||
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
|
||||
radeon_emit(cs, 0);
|
||||
}
|
||||
radv_cmd_buffer_trace_emit(cmd_buffer);
|
||||
}
|
||||
|
||||
static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count,
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@
|
|||
#include <amdgpu_drm.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "ac_debug.h"
|
||||
#include "amdgpu_id.h"
|
||||
#include "radv_radeon_winsys.h"
|
||||
#include "radv_amdgpu_cs.h"
|
||||
|
|
@ -775,6 +776,34 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
|
|||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static void *radv_amdgpu_winsys_get_cpu_addr(struct radv_amdgpu_cs *cs, uint64_t addr)
|
||||
{
|
||||
void *ret = NULL;
|
||||
for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
|
||||
struct radv_amdgpu_winsys_bo *bo;
|
||||
|
||||
bo = (struct radv_amdgpu_winsys_bo*)
|
||||
(i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i]);
|
||||
if (addr >= bo->va && addr - bo->va < bo->size) {
|
||||
if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
|
||||
return (char *)ret + (addr - bo->va);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs,
|
||||
FILE* file,
|
||||
uint32_t trace_id)
|
||||
{
|
||||
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
|
||||
|
||||
ac_parse_ib(file,
|
||||
radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address),
|
||||
cs->ib.size, trace_id, "main IB", cs->ws->info.chip_class);
|
||||
}
|
||||
|
||||
static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws)
|
||||
{
|
||||
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
|
||||
|
|
@ -850,6 +879,7 @@ void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
|
|||
ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
|
||||
ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
|
||||
ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
|
||||
ws->base.cs_dump = radv_amdgpu_winsys_cs_dump;
|
||||
ws->base.create_fence = radv_amdgpu_create_fence;
|
||||
ws->base.destroy_fence = radv_amdgpu_destroy_fence;
|
||||
ws->base.create_sem = radv_amdgpu_create_sem;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue