mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 09:18:04 +02:00
winsys/amdgpu: print userq job info
Reviewed-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39206>
This commit is contained in:
parent
a542715271
commit
2547fd0f59
6 changed files with 81 additions and 0 deletions
|
|
@ -1823,6 +1823,8 @@ RadeonSI driver environment variables
|
||||||
Enable CP register shadowing in kernel queue.
|
Enable CP register shadowing in kernel queue.
|
||||||
``userqnoshadowregs``
|
``userqnoshadowregs``
|
||||||
Disable register shadowing in userqueue. This will also disable userqueue mcbp.
|
Disable register shadowing in userqueue. This will also disable userqueue mcbp.
|
||||||
|
``userqjoblog``
|
||||||
|
Print submitted, completed... job info for userqueues.
|
||||||
``novideotiling``
|
``novideotiling``
|
||||||
Disable tiling for video.
|
Disable tiling for video.
|
||||||
``nodectier1``
|
``nodectier1``
|
||||||
|
|
|
||||||
|
|
@ -1628,6 +1628,18 @@ static int amdgpu_cs_submit_ib_userq(struct amdgpu_userq *userq,
|
||||||
if (r)
|
if (r)
|
||||||
mesa_loge("amdgpu: getting wait fences failed\n");
|
mesa_loge("amdgpu: getting wait fences failed\n");
|
||||||
|
|
||||||
|
if (aws->userq_job_log) {
|
||||||
|
for (unsigned i = 0; i < userq_wait_data.num_fences; i++) {
|
||||||
|
/* The uq_va memory is allocated in kernel from a memory chunk. This memory chunk is
|
||||||
|
* mapped to same address for all process/apps. Once uq_va is guess mapped to a
|
||||||
|
* given queue, cross process/queue fence dependency can be analyzed.
|
||||||
|
*/
|
||||||
|
mesa_logi("amdgpu: uq_log: %s: num_wait_fences=%d uq_va=%llx job=%llx\n",
|
||||||
|
amdgpu_userq_str[acs->queue_index], userq_wait_data.num_fences, fence_info[i].va,
|
||||||
|
fence_info[i].value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
simple_mtx_lock(&userq->lock);
|
simple_mtx_lock(&userq->lock);
|
||||||
amdgpu_cs_add_userq_packets(aws, userq, csc, userq_wait_data.num_fences, fence_info);
|
amdgpu_cs_add_userq_packets(aws, userq, csc, userq_wait_data.num_fences, fence_info);
|
||||||
struct drm_amdgpu_userq_signal userq_signal_data = {
|
struct drm_amdgpu_userq_signal userq_signal_data = {
|
||||||
|
|
@ -1658,6 +1670,11 @@ static int amdgpu_cs_submit_ib_userq(struct amdgpu_userq *userq,
|
||||||
userq->doorbell_bo_map[AMDGPU_USERQ_DOORBELL_INDEX] = userq->next_wptr;
|
userq->doorbell_bo_map[AMDGPU_USERQ_DOORBELL_INDEX] = userq->next_wptr;
|
||||||
r = ac_drm_userq_signal(aws->dev, &userq_signal_data);
|
r = ac_drm_userq_signal(aws->dev, &userq_signal_data);
|
||||||
|
|
||||||
|
if (aws->userq_job_log) {
|
||||||
|
mesa_logi("amdgpu: uq_log: %s: submitted_job=%llx\n", amdgpu_userq_str[acs->queue_index],
|
||||||
|
(long long)*userq->wptr_bo_map);
|
||||||
|
}
|
||||||
|
|
||||||
*seq_no = userq->user_fence_seq_num;
|
*seq_no = userq->user_fence_seq_num;
|
||||||
simple_mtx_unlock(&userq->lock);
|
simple_mtx_unlock(&userq->lock);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -72,6 +72,41 @@ amdgpu_userq_ring_init(struct amdgpu_winsys *aws, struct amdgpu_userq *userq,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void *
|
||||||
|
userq_job_log_thread(void *data)
|
||||||
|
{
|
||||||
|
struct amdgpu_winsys *aws = data;
|
||||||
|
struct amdgpu_userq *userq;
|
||||||
|
|
||||||
|
while (aws->userq_job_log) {
|
||||||
|
os_time_sleep(1000 * 700);
|
||||||
|
for (unsigned i = 0; i < AMDGPU_MAX_QUEUES; i++) {
|
||||||
|
userq = &aws->queues[i].userq;
|
||||||
|
if (userq->userq_handle) {
|
||||||
|
uint64_t last_submitted_job = *userq->wptr_bo_map;
|
||||||
|
uint64_t last_completed_job = *userq->user_fence_ptr;
|
||||||
|
|
||||||
|
if (userq->last_submitted_job != last_submitted_job ||
|
||||||
|
userq->last_completed_job != last_completed_job) {
|
||||||
|
mesa_logi("amdgpu: uq_log: %s: submitted_job=%llx completed_job=%llx\n",
|
||||||
|
amdgpu_userq_str[i], (long long)last_submitted_job,
|
||||||
|
(long long)last_completed_job);
|
||||||
|
userq->last_submitted_job = last_submitted_job;
|
||||||
|
userq->last_completed_job = last_completed_job;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
amdgpu_userq_start_job_log_thread(struct amdgpu_winsys *aws)
|
||||||
|
{
|
||||||
|
pthread_create(&aws->userq_job_log_thread, NULL, userq_job_log_thread, aws);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
amdgpu_userq_deinit(struct amdgpu_winsys *aws, struct amdgpu_userq *userq)
|
amdgpu_userq_deinit(struct amdgpu_winsys *aws, struct amdgpu_userq *userq)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -99,8 +99,15 @@ struct amdgpu_userq {
|
||||||
struct amdgpu_userq_compute_data compute_data;
|
struct amdgpu_userq_compute_data compute_data;
|
||||||
struct amdgpu_userq_sdma_data sdma_data;
|
struct amdgpu_userq_sdma_data sdma_data;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Used in userq job log thread to only print if data has changed */
|
||||||
|
uint64_t last_submitted_job;
|
||||||
|
uint64_t last_completed_job;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void
|
||||||
|
amdgpu_userq_start_job_log_thread(struct amdgpu_winsys *aws);
|
||||||
|
|
||||||
bool
|
bool
|
||||||
amdgpu_userq_init(struct amdgpu_winsys *aws, struct amdgpu_userq *userq, enum amd_ip_type ip_type,
|
amdgpu_userq_init(struct amdgpu_winsys *aws, struct amdgpu_userq *userq, enum amd_ip_type ip_type,
|
||||||
unsigned queue_index);
|
unsigned queue_index);
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,13 @@
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include "sid.h"
|
#include "sid.h"
|
||||||
|
|
||||||
|
char amdgpu_userq_str[AMDGPU_MAX_QUEUES][8] = {
|
||||||
|
"gfx",
|
||||||
|
"gfx_hi",
|
||||||
|
"comp",
|
||||||
|
"sdma"
|
||||||
|
};
|
||||||
|
|
||||||
static struct hash_table *dev_tab = NULL;
|
static struct hash_table *dev_tab = NULL;
|
||||||
static simple_mtx_t dev_tab_mutex = SIMPLE_MTX_INITIALIZER;
|
static simple_mtx_t dev_tab_mutex = SIMPLE_MTX_INITIALIZER;
|
||||||
|
|
||||||
|
|
@ -59,6 +66,7 @@ static bool do_winsys_init(struct amdgpu_winsys *aws,
|
||||||
strstr(debug_get_option("AMD_DEBUG", ""), "sqtt") != NULL;
|
strstr(debug_get_option("AMD_DEBUG", ""), "sqtt") != NULL;
|
||||||
aws->zero_all_vram_allocs = strstr(debug_get_option("R600_DEBUG", ""), "zerovram") != NULL ||
|
aws->zero_all_vram_allocs = strstr(debug_get_option("R600_DEBUG", ""), "zerovram") != NULL ||
|
||||||
driQueryOptionb(config->options, "radeonsi_zerovram");
|
driQueryOptionb(config->options, "radeonsi_zerovram");
|
||||||
|
aws->userq_job_log = strstr(debug_get_option("AMD_DEBUG", ""), "userqjoblog") != NULL;
|
||||||
|
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(aws->queues); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(aws->queues); i++)
|
||||||
simple_mtx_init(&aws->queues[i].userq.lock, mtx_plain);
|
simple_mtx_init(&aws->queues[i].userq.lock, mtx_plain);
|
||||||
|
|
@ -67,6 +75,9 @@ static bool do_winsys_init(struct amdgpu_winsys *aws,
|
||||||
if (!aws->info.userq_ip_mask)
|
if (!aws->info.userq_ip_mask)
|
||||||
aws->info.has_vm_always_valid = false;
|
aws->info.has_vm_always_valid = false;
|
||||||
|
|
||||||
|
if (aws->userq_job_log)
|
||||||
|
amdgpu_userq_start_job_log_thread(aws);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
|
|
@ -80,6 +91,11 @@ static void do_winsys_deinit(struct amdgpu_winsys *aws)
|
||||||
if (aws->reserve_vmid)
|
if (aws->reserve_vmid)
|
||||||
ac_drm_vm_unreserve_vmid(aws->dev, 0);
|
ac_drm_vm_unreserve_vmid(aws->dev, 0);
|
||||||
|
|
||||||
|
if (aws->userq_job_log) {
|
||||||
|
aws->userq_job_log = false;
|
||||||
|
pthread_join(aws->userq_job_log_thread, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(aws->queues); i++) {
|
for (unsigned i = 0; i < ARRAY_SIZE(aws->queues); i++) {
|
||||||
for (unsigned j = 0; j < ARRAY_SIZE(aws->queues[i].fences); j++)
|
for (unsigned j = 0; j < ARRAY_SIZE(aws->queues[i].fences); j++)
|
||||||
amdgpu_fence_reference(&aws->queues[i].fences[j], NULL);
|
amdgpu_fence_reference(&aws->queues[i].fences[j], NULL);
|
||||||
|
|
|
||||||
|
|
@ -126,6 +126,8 @@ enum amdgpu_queue_index {
|
||||||
AMDGPU_QUEUE_USES_ALT_FENCE = INT_MAX,
|
AMDGPU_QUEUE_USES_ALT_FENCE = INT_MAX,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
extern char amdgpu_userq_str[AMDGPU_MAX_QUEUES][8];
|
||||||
|
|
||||||
/* This can use any integer type because the logic handles integer wraparounds robustly, but
|
/* This can use any integer type because the logic handles integer wraparounds robustly, but
|
||||||
* uint8_t wraps around so quickly that some BOs might never become idle because we don't
|
* uint8_t wraps around so quickly that some BOs might never become idle because we don't
|
||||||
* remove idle fences from BOs, so they become "busy" again after a queue sequence number wraps
|
* remove idle fences from BOs, so they become "busy" again after a queue sequence number wraps
|
||||||
|
|
@ -207,6 +209,8 @@ struct amdgpu_winsys {
|
||||||
|
|
||||||
/* Protected by bo_fence_lock. */
|
/* Protected by bo_fence_lock. */
|
||||||
struct amdgpu_queue queues[AMDGPU_MAX_QUEUES];
|
struct amdgpu_queue queues[AMDGPU_MAX_QUEUES];
|
||||||
|
pthread_t userq_job_log_thread;
|
||||||
|
bool userq_job_log; /* enable userq job log thread */
|
||||||
|
|
||||||
struct pb_cache bo_cache;
|
struct pb_cache bo_cache;
|
||||||
struct pb_slabs bo_slabs; /* Slab allocator. */
|
struct pb_slabs bo_slabs; /* Slab allocator. */
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue