ac,winsys/amdgpu: get userq_ip_mask supported from kernel info ioctl

Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34370>
2025-12-24 19:40:10 +01:00 · 2025-04-11 12:01:34 +05:30 · 2025-04-11 12:01:34 +05:30 · 61fd80a42e
commit 61fd80a42e
parent b9054115d4
5 changed files with 26 additions and 11 deletions
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@ -202,6 +202,9 @@ struct drm_amdgpu_info_device {
 	uint32_t csa_size;
 	/* context save area base virtual alignment for gfx11 */
 	uint32_t csa_alignment;
+	/* Userq IP mask (1 << AMDGPU_HW_IP_*) */
+	uint32_t userq_ip_mask;
+	uint32_t pad;
 };
 struct drm_amdgpu_info_hw_ip {
   uint32_t hw_ip_version_major;
@ -568,13 +571,28 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
      return false;
   }

+   info->userq_ip_mask = device_info.userq_ip_mask;
+
   for (unsigned ip_type = 0; ip_type < AMD_NUM_IP_TYPES; ip_type++) {
      struct drm_amdgpu_info_hw_ip ip_info = {0};

      r = ac_drm_query_hw_ip_info(dev, ip_type, 0, &ip_info);
-      if (r || !ip_info.available_rings)
+      if (r)
         continue;

+      if (ip_info.available_rings) {
+         info->ip[ip_type].num_queues = util_bitcount(ip_info.available_rings);
+         /* Kernel can set both available_rings and userq_ip_mask. Clear userq_ip_mask. */
+         info->userq_ip_mask &= ~BITFIELD_BIT(ip_type);
+      } else if (info->userq_ip_mask & BITFIELD_BIT(ip_type)) {
+         /* info[ip_type].num_queues variable is also used to describe if that ip_type is
+          * supported or not. Setting this variable to 1 for userqueues.
+          */
+         info->ip[ip_type].num_queues = 1;
+      } else {
+         continue;
+      }
+
      /* Gfx6-8 don't set ip_discovery_version. */
      if (info->drm_minor >= 48 && ip_info.ip_discovery_version) {
         info->ip[ip_type].ver_major = (ip_info.ip_discovery_version >> 16) & 0xff;
@ -597,7 +615,6 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
                  device_info.family == FAMILY_MDN)
            info->ip[AMD_IP_GFX].ver_minor = info->ip[AMD_IP_COMPUTE].ver_minor = 3;
      }
-      info->ip[ip_type].num_queues = util_bitcount(ip_info.available_rings);

      /* query ip count */
      r = ac_drm_query_hw_ip_count(dev, ip_type, &num_instances);
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@ -244,7 +244,7 @@ struct radeon_info {
   bool has_tmz_support;
   bool has_trap_handler_support;
   bool kernel_has_modifiers;
-   bool use_userq;
+   uint32_t userq_ip_mask; /* AMD_IP_* bits */

   /* If the kernel driver uses CU reservation for high priority compute on gfx10+, it programs
    * a global CU mask in the hw that is AND'ed with CU_EN register fields set by userspace.
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@ -180,7 +180,7 @@ static int amdgpu_bo_va_op_common(struct amdgpu_winsys *aws, struct amdgpu_winsy
 {
   int r;

-   if (aws->info.use_userq) {
+   if (aws->info.userq_ip_mask) {
      uint32_t syncobj_arr[AMDGPU_MAX_QUEUES + 1];
      uint32_t num_fences = 0;

--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp
@ -978,8 +978,7 @@ amdgpu_cs_create(struct radeon_cmdbuf *rcs,
   if (!amdgpu_get_new_ib(ctx->aws, rcs, &acs->main_ib, acs))
      goto fail;

-   /* Currently only gfx, compute and sdma queues supports user queue. */
-   if (acs->aws->info.use_userq && ip_type <= AMD_IP_SDMA) {
+   if (acs->aws->info.userq_ip_mask & BITFIELD_BIT(acs->ip_type)) {
      if (!amdgpu_userq_init(acs->aws, &acs->aws->queues[acs->queue_index].userq, ip_type))
         goto fail;
   }
@ -1202,7 +1201,8 @@ static void amdgpu_cs_add_fence_dependency(struct radeon_cmdbuf *rcs,
   util_queue_fence_wait(&fence->submitted);

   if (!fence->imported) {
-      if (!aws->info.use_userq || fence->ip_type != acs->ip_type || acs->ip_type > AMD_IP_SDMA) {
+      if (!(aws->info.userq_ip_mask & BITFIELD_BIT(acs->ip_type)) ||
+          fence->ip_type != acs->ip_type) {
         /* Ignore idle fences. This will only check the user fence in memory. */
         if (!amdgpu_fence_wait((struct pipe_fence_handle *)fence, 0, false)) {
            add_seq_no_to_list(acs->aws, &csc->seq_no_dependencies, fence->queue_index,
@ -2162,8 +2162,7 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
      csc_current = amdgpu_csc_get_current(acs);
      struct amdgpu_cs_context *csc_submitted = amdgpu_csc_get_submitted(acs);

-      /* only gfx, compute and sdma queues are supported in userqueues. */
-      if (aws->info.use_userq && acs->ip_type <= AMD_IP_SDMA) {
+      if (aws->info.userq_ip_mask & BITFIELD_BIT(acs->ip_type)) {
         util_queue_add_job(&aws->cs_queue, acs, &acs->flush_completed,
                            amdgpu_cs_submit_ib<USERQ>, NULL, 0);
      } else {
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@ -56,13 +56,12 @@ static bool do_winsys_init(struct amdgpu_winsys *aws,
                      strstr(debug_get_option("AMD_DEBUG", ""), "sqtt") != NULL;
   aws->zero_all_vram_allocs = strstr(debug_get_option("R600_DEBUG", ""), "zerovram") != NULL ||
                              driQueryOptionb(config->options, "radeonsi_zerovram");
-   aws->info.use_userq = debug_get_bool_option("AMD_USERQ", false);

   for (unsigned i = 0; i < ARRAY_SIZE(aws->queues); i++)
      simple_mtx_init(&aws->queues[i].userq.lock, mtx_plain);

   /* TODO: Enable this once the kernel handles it efficiently. */
-   if (!aws->info.use_userq)
+   if (!aws->info.userq_ip_mask)
      aws->info.has_local_buffers = false;

   return true;