ac,winsys/amdgpu: get userq_ip_mask supported from kernel info ioctl

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34370>
This commit is contained in:
Yogesh Mohan Marimuthu 2025-04-11 12:01:34 +05:30 committed by Marge Bot
parent b9054115d4
commit 61fd80a42e
5 changed files with 26 additions and 11 deletions

View file

@ -202,6 +202,9 @@ struct drm_amdgpu_info_device {
uint32_t csa_size;
/* context save area base virtual alignment for gfx11 */
uint32_t csa_alignment;
/* Userq IP mask (1 << AMDGPU_HW_IP_*) */
uint32_t userq_ip_mask;
uint32_t pad;
};
struct drm_amdgpu_info_hw_ip {
uint32_t hw_ip_version_major;
@ -568,13 +571,28 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
return false;
}
info->userq_ip_mask = device_info.userq_ip_mask;
for (unsigned ip_type = 0; ip_type < AMD_NUM_IP_TYPES; ip_type++) {
struct drm_amdgpu_info_hw_ip ip_info = {0};
r = ac_drm_query_hw_ip_info(dev, ip_type, 0, &ip_info);
if (r || !ip_info.available_rings)
if (r)
continue;
if (ip_info.available_rings) {
info->ip[ip_type].num_queues = util_bitcount(ip_info.available_rings);
/* Kernel can set both available_rings and userq_ip_mask. Clear userq_ip_mask. */
info->userq_ip_mask &= ~BITFIELD_BIT(ip_type);
} else if (info->userq_ip_mask & BITFIELD_BIT(ip_type)) {
/* info[ip_type].num_queues variable is also used to describe if that ip_type is
* supported or not. Setting this variable to 1 for userqueues.
*/
info->ip[ip_type].num_queues = 1;
} else {
continue;
}
/* Gfx6-8 don't set ip_discovery_version. */
if (info->drm_minor >= 48 && ip_info.ip_discovery_version) {
info->ip[ip_type].ver_major = (ip_info.ip_discovery_version >> 16) & 0xff;
@ -597,7 +615,6 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
device_info.family == FAMILY_MDN)
info->ip[AMD_IP_GFX].ver_minor = info->ip[AMD_IP_COMPUTE].ver_minor = 3;
}
info->ip[ip_type].num_queues = util_bitcount(ip_info.available_rings);
/* query ip count */
r = ac_drm_query_hw_ip_count(dev, ip_type, &num_instances);

View file

@ -244,7 +244,7 @@ struct radeon_info {
bool has_tmz_support;
bool has_trap_handler_support;
bool kernel_has_modifiers;
bool use_userq;
uint32_t userq_ip_mask; /* AMD_IP_* bits */
/* If the kernel driver uses CU reservation for high priority compute on gfx10+, it programs
* a global CU mask in the hw that is AND'ed with CU_EN register fields set by userspace.

View file

@ -180,7 +180,7 @@ static int amdgpu_bo_va_op_common(struct amdgpu_winsys *aws, struct amdgpu_winsy
{
int r;
if (aws->info.use_userq) {
if (aws->info.userq_ip_mask) {
uint32_t syncobj_arr[AMDGPU_MAX_QUEUES + 1];
uint32_t num_fences = 0;

View file

@ -978,8 +978,7 @@ amdgpu_cs_create(struct radeon_cmdbuf *rcs,
if (!amdgpu_get_new_ib(ctx->aws, rcs, &acs->main_ib, acs))
goto fail;
/* Currently only gfx, compute and sdma queues supports user queue. */
if (acs->aws->info.use_userq && ip_type <= AMD_IP_SDMA) {
if (acs->aws->info.userq_ip_mask & BITFIELD_BIT(acs->ip_type)) {
if (!amdgpu_userq_init(acs->aws, &acs->aws->queues[acs->queue_index].userq, ip_type))
goto fail;
}
@ -1202,7 +1201,8 @@ static void amdgpu_cs_add_fence_dependency(struct radeon_cmdbuf *rcs,
util_queue_fence_wait(&fence->submitted);
if (!fence->imported) {
if (!aws->info.use_userq || fence->ip_type != acs->ip_type || acs->ip_type > AMD_IP_SDMA) {
if (!(aws->info.userq_ip_mask & BITFIELD_BIT(acs->ip_type)) ||
fence->ip_type != acs->ip_type) {
/* Ignore idle fences. This will only check the user fence in memory. */
if (!amdgpu_fence_wait((struct pipe_fence_handle *)fence, 0, false)) {
add_seq_no_to_list(acs->aws, &csc->seq_no_dependencies, fence->queue_index,
@ -2162,8 +2162,7 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
csc_current = amdgpu_csc_get_current(acs);
struct amdgpu_cs_context *csc_submitted = amdgpu_csc_get_submitted(acs);
/* only gfx, compute and sdma queues are supported in userqueues. */
if (aws->info.use_userq && acs->ip_type <= AMD_IP_SDMA) {
if (aws->info.userq_ip_mask & BITFIELD_BIT(acs->ip_type)) {
util_queue_add_job(&aws->cs_queue, acs, &acs->flush_completed,
amdgpu_cs_submit_ib<USERQ>, NULL, 0);
} else {

View file

@ -56,13 +56,12 @@ static bool do_winsys_init(struct amdgpu_winsys *aws,
strstr(debug_get_option("AMD_DEBUG", ""), "sqtt") != NULL;
aws->zero_all_vram_allocs = strstr(debug_get_option("R600_DEBUG", ""), "zerovram") != NULL ||
driQueryOptionb(config->options, "radeonsi_zerovram");
aws->info.use_userq = debug_get_bool_option("AMD_USERQ", false);
for (unsigned i = 0; i < ARRAY_SIZE(aws->queues); i++)
simple_mtx_init(&aws->queues[i].userq.lock, mtx_plain);
/* TODO: Enable this once the kernel handles it efficiently. */
if (!aws->info.use_userq)
if (!aws->info.userq_ip_mask)
aws->info.has_local_buffers = false;
return true;