diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 890fe3881d8..d406c7ab29a 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -202,6 +202,9 @@ struct drm_amdgpu_info_device { uint32_t csa_size; /* context save area base virtual alignment for gfx11 */ uint32_t csa_alignment; + /* Userq IP mask (1 << AMDGPU_HW_IP_*) */ + uint32_t userq_ip_mask; + uint32_t pad; }; struct drm_amdgpu_info_hw_ip { uint32_t hw_ip_version_major; @@ -568,13 +571,28 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, return false; } + info->userq_ip_mask = device_info.userq_ip_mask; + for (unsigned ip_type = 0; ip_type < AMD_NUM_IP_TYPES; ip_type++) { struct drm_amdgpu_info_hw_ip ip_info = {0}; r = ac_drm_query_hw_ip_info(dev, ip_type, 0, &ip_info); - if (r || !ip_info.available_rings) + if (r) continue; + if (ip_info.available_rings) { + info->ip[ip_type].num_queues = util_bitcount(ip_info.available_rings); + /* Kernel can set both available_rings and userq_ip_mask. Clear userq_ip_mask. */ + info->userq_ip_mask &= ~BITFIELD_BIT(ip_type); + } else if (info->userq_ip_mask & BITFIELD_BIT(ip_type)) { + /* info[ip_type].num_queues variable is also used to describe if that ip_type is + * supported or not. Setting this variable to 1 for userqueues. + */ + info->ip[ip_type].num_queues = 1; + } else { + continue; + } + /* Gfx6-8 don't set ip_discovery_version. */ if (info->drm_minor >= 48 && ip_info.ip_discovery_version) { info->ip[ip_type].ver_major = (ip_info.ip_discovery_version >> 16) & 0xff; @@ -597,7 +615,6 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, device_info.family == FAMILY_MDN) info->ip[AMD_IP_GFX].ver_minor = info->ip[AMD_IP_COMPUTE].ver_minor = 3; } - info->ip[ip_type].num_queues = util_bitcount(ip_info.available_rings); /* query ip count */ r = ac_drm_query_hw_ip_count(dev, ip_type, &num_instances); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 846b0e79842..398f0a8d57f 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -244,7 +244,7 @@ struct radeon_info { bool has_tmz_support; bool has_trap_handler_support; bool kernel_has_modifiers; - bool use_userq; + uint32_t userq_ip_mask; /* AMD_IP_* bits */ /* If the kernel driver uses CU reservation for high priority compute on gfx10+, it programs * a global CU mask in the hw that is AND'ed with CU_EN register fields set by userspace. diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index dfefc468bca..1f606b4e286 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -180,7 +180,7 @@ static int amdgpu_bo_va_op_common(struct amdgpu_winsys *aws, struct amdgpu_winsy { int r; - if (aws->info.use_userq) { + if (aws->info.userq_ip_mask) { uint32_t syncobj_arr[AMDGPU_MAX_QUEUES + 1]; uint32_t num_fences = 0; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp index 40884772c9a..fe559836961 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp @@ -978,8 +978,7 @@ amdgpu_cs_create(struct radeon_cmdbuf *rcs, if (!amdgpu_get_new_ib(ctx->aws, rcs, &acs->main_ib, acs)) goto fail; - /* Currently only gfx, compute and sdma queues supports user queue. */ - if (acs->aws->info.use_userq && ip_type <= AMD_IP_SDMA) { + if (acs->aws->info.userq_ip_mask & BITFIELD_BIT(acs->ip_type)) { if (!amdgpu_userq_init(acs->aws, &acs->aws->queues[acs->queue_index].userq, ip_type)) goto fail; } @@ -1202,7 +1201,8 @@ static void amdgpu_cs_add_fence_dependency(struct radeon_cmdbuf *rcs, util_queue_fence_wait(&fence->submitted); if (!fence->imported) { - if (!aws->info.use_userq || fence->ip_type != acs->ip_type || acs->ip_type > AMD_IP_SDMA) { + if (!(aws->info.userq_ip_mask & BITFIELD_BIT(acs->ip_type)) || + fence->ip_type != acs->ip_type) { /* Ignore idle fences. This will only check the user fence in memory. */ if (!amdgpu_fence_wait((struct pipe_fence_handle *)fence, 0, false)) { add_seq_no_to_list(acs->aws, &csc->seq_no_dependencies, fence->queue_index, @@ -2162,8 +2162,7 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, csc_current = amdgpu_csc_get_current(acs); struct amdgpu_cs_context *csc_submitted = amdgpu_csc_get_submitted(acs); - /* only gfx, compute and sdma queues are supported in userqueues. */ - if (aws->info.use_userq && acs->ip_type <= AMD_IP_SDMA) { + if (aws->info.userq_ip_mask & BITFIELD_BIT(acs->ip_type)) { util_queue_add_job(&aws->cs_queue, acs, &acs->flush_completed, amdgpu_cs_submit_ib, NULL, 0); } else { diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index d825b525d2c..f5a7f5ed9dd 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -56,13 +56,12 @@ static bool do_winsys_init(struct amdgpu_winsys *aws, strstr(debug_get_option("AMD_DEBUG", ""), "sqtt") != NULL; aws->zero_all_vram_allocs = strstr(debug_get_option("R600_DEBUG", ""), "zerovram") != NULL || driQueryOptionb(config->options, "radeonsi_zerovram"); - aws->info.use_userq = debug_get_bool_option("AMD_USERQ", false); for (unsigned i = 0; i < ARRAY_SIZE(aws->queues); i++) simple_mtx_init(&aws->queues[i].userq.lock, mtx_plain); /* TODO: Enable this once the kernel handles it efficiently. */ - if (!aws->info.use_userq) + if (!aws->info.userq_ip_mask) aws->info.has_local_buffers = false; return true;