winsys/amdgpu: userq job log fwm packet debug count
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Add WRITE_DATA packet before and after FENCE_WAIT_MULTI packet. Based
on the last number written in WRITE_DATA packet buffer, it can be
found if FENCE_WAIT_MULTI packet passed or not in CP firmware.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39206>
This commit is contained in:
Yogesh Mohan Marimuthu 2024-10-29 10:51:43 +05:30 committed by Marge Bot
parent 2547fd0f59
commit ddf1e34dd6
3 changed files with 39 additions and 5 deletions

View file

@ -1416,6 +1416,15 @@ struct cond_exec_skip_count {
uint64_t start_wptr;
};
#define add_dbg_count_write_data_pkt(number) do { \
amdgpu_pkt_add_dw(PKT3(PKT3_WRITE_DATA, 4, 0)); \
amdgpu_pkt_add_dw(WRITE_DATA_DST_SEL(5) | WRITE_DATA_WR_CONFIRM | WRITE_DATA_CACHE_POLICY(3)); \
amdgpu_pkt_add_dw(userq->write_data_pkt_dbg_count_va); \
amdgpu_pkt_add_dw(userq->write_data_pkt_dbg_count_va >> 32); \
amdgpu_pkt_add_dw(number); \
amdgpu_pkt_add_dw((uint64_t)number >> 32); \
} while (0)
static void amdgpu_cs_add_userq_packets(struct amdgpu_winsys *aws,
struct amdgpu_userq *userq,
struct amdgpu_cs_context *csc,
@ -1441,6 +1450,9 @@ static void amdgpu_cs_add_userq_packets(struct amdgpu_winsys *aws,
cond_exec_skip_counts[0].start_wptr = amdgpu_pkt_get_next_wptr();
}
if (aws->userq_job_log)
add_dbg_count_write_data_pkt(1);
if (num_fences) {
unsigned max_num_fences_fwm;
unsigned num_fences_in_iter;
@ -1473,6 +1485,9 @@ static void amdgpu_cs_add_userq_packets(struct amdgpu_winsys *aws,
}
}
if (aws->userq_job_log)
add_dbg_count_write_data_pkt(2);
amdgpu_pkt_add_dw(PKT3(PKT3_HDP_FLUSH, 0, 0));
amdgpu_pkt_add_dw(0x0);

View file

@ -29,7 +29,8 @@ static bool
amdgpu_userq_ring_init(struct amdgpu_winsys *aws, struct amdgpu_userq *userq,
uint64_t *vm_timeline_point_to_wait)
{
/* Allocate ring and user fence in one buffer. */
/* Allocate ring and user fence in one buffer. Also allocate for wait packet debug count
* variable. */
uint32_t gtt_bo_size = AMDGPU_USERQ_RING_SIZE + aws->info.gart_page_size;
userq->gtt_bo = amdgpu_bo_create(aws, gtt_bo_size, 256, RADEON_DOMAIN_GTT,
RADEON_FLAG_GL2_BYPASS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
@ -59,6 +60,12 @@ amdgpu_userq_ring_init(struct amdgpu_winsys *aws, struct amdgpu_userq *userq,
*userq->wptr_bo_map = 0;
userq->next_wptr = 0;
userq->write_data_pkt_dbg_count_ptr = (uint64_t*)(userq->gtt_bo_map +
AMDGPU_USERQ_RING_SIZE + 8);
userq->write_data_pkt_dbg_count_va = amdgpu_bo_get_va(userq->gtt_bo) +
AMDGPU_USERQ_RING_SIZE + 8;
*userq->write_data_pkt_dbg_count_ptr = 0;
/* Allocate memory for rptr. */
userq->vram_bo = amdgpu_bo_create(aws, aws->info.gart_page_size, 256, RADEON_DOMAIN_VRAM,
RADEON_FLAG_CLEAR_VRAM | RADEON_FLAG_GL2_BYPASS |
@ -85,14 +92,18 @@ userq_job_log_thread(void *data)
if (userq->userq_handle) {
uint64_t last_submitted_job = *userq->wptr_bo_map;
uint64_t last_completed_job = *userq->user_fence_ptr;
uint64_t last_write_data_pkt_dbg_count = *userq->write_data_pkt_dbg_count_ptr;
if (userq->last_submitted_job != last_submitted_job ||
userq->last_completed_job != last_completed_job) {
mesa_logi("amdgpu: uq_log: %s: submitted_job=%llx completed_job=%llx\n",
amdgpu_userq_str[i], (long long)last_submitted_job,
(long long)last_completed_job);
userq->last_completed_job != last_completed_job ||
userq->last_write_data_pkt_dbg_count != last_write_data_pkt_dbg_count) {
mesa_logi("amdgpu: uq_log: %s: submitted_job=%llx completed_job=%llx"
" write_data_pkt_dbg_count=%llx\n", amdgpu_userq_str[i],
(long long)last_submitted_job, (long long)last_completed_job,
(long long)last_write_data_pkt_dbg_count);
userq->last_submitted_job = last_submitted_job;
userq->last_completed_job = last_completed_job;
userq->last_write_data_pkt_dbg_count = last_write_data_pkt_dbg_count;
}
}
}

View file

@ -79,6 +79,13 @@ struct amdgpu_userq {
struct pb_buffer_lean *doorbell_bo;
uint64_t *doorbell_bo_map;
/* For debugging where the ring is stuck, WRITE_DATA packet with unique number is
* inserted in the ring. The number will indicate the packets that are parsed by CP.
* This value is printed in job log.
*/
uint64_t *write_data_pkt_dbg_count_ptr;
uint64_t write_data_pkt_dbg_count_va;
/* In case of gfx11.5 shadow register address has to be initialized using LOAD_* packet.
* Also for every new ib/job submission, the shadowed registers has to be loaded using LOAD_*
* packets.
@ -103,6 +110,7 @@ struct amdgpu_userq {
/* Used in userq job log thread to only print if data has changed */
uint64_t last_submitted_job;
uint64_t last_completed_job;
uint64_t last_write_data_pkt_dbg_count;
};
void