ac/gpu_info: add some more flags to ac_cu_info

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38701>
2026-03-18 16:40:34 +01:00 · 2025-11-27 11:43:06 +01:00 · 2025-11-27 11:43:06 +01:00 · f7c4aa48a0
commit f7c4aa48a0
parent f791e46c47
9 changed files with 57 additions and 49 deletions
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@ -299,6 +299,23 @@ ac_fill_cu_info(struct radeon_info *info, struct drm_amdgpu_info_device *device_
   cu_info->max_vgpr_alloc = 256;

   cu_info->num_simd_per_compute_unit = info->gfx_level >= GFX10 ? 2 : 4;
+
+   /* Flags */
+   cu_info->has_lds_bank_count_16 = info->family == CHIP_KABINI || info->family == CHIP_STONEY;
+   cu_info->has_sram_ecc_enabled = info->family == CHIP_VEGA20 || info->family == CHIP_MI100 ||
+                                   info->family == CHIP_MI200 || info->family == CHIP_GFX940;
+   cu_info->has_fast_fma32 = info->gfx_level >= GFX9 || info->family == CHIP_TAHITI ||
+                             info->family == CHIP_HAWAII || info->family == CHIP_CARRIZO;
+   cu_info->has_fma_mix = info->gfx_level >= GFX10 ||
+      info->family == CHIP_VEGA12 || info->family == CHIP_VEGA20 ||
+       info->family == CHIP_MI100 || info->family == CHIP_MI200 ||
+       info->family == CHIP_GFX940;
+   cu_info->has_packed_math_16bit = info->gfx_level >= GFX9;
+   cu_info->has_accelerated_dot_product =
+      info->family == CHIP_VEGA20 ||
+      (info->family >= CHIP_MI100 && info->family != CHIP_NAVI10 && info->family != CHIP_GFX1013);
+   /* GFX1013 is GFX10 plus ray tracing instructions */
+   cu_info->has_image_bvh_intersect_ray = info->gfx_level >= GFX10_3 || info->family == CHIP_GFX1013;
 }

 enum ac_query_gpu_info_result
@ -968,16 +985,6 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
   info->has_out_of_order_rast =
      info->gfx_level >= GFX8 && info->gfx_level <= GFX9 && info->max_se >= 2;

-   /* Whether chips support double rate packed math instructions. */
-   info->has_packed_math_16bit = info->gfx_level >= GFX9;
-
-   /* Whether chips support dot product instructions. A subset of these support a smaller
-    * instruction encoding which accumulates with the destination.
-    */
-   info->has_accelerated_dot_product =
-      info->family == CHIP_VEGA20 ||
-      (info->family >= CHIP_MI100 && info->family != CHIP_NAVI10 && info->family != CHIP_GFX1013);
-
   /* TODO: Figure out how to use LOAD_CONTEXT_REG on GFX6-GFX7. */
   info->has_load_ctx_reg_pkt =
      info->gfx_level >= GFX9 || (info->gfx_level >= GFX8 && info->me_fw_feature >= 41);
@ -1608,15 +1615,11 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
                                 info->max_good_cu_per_sa * info->max_sa_per_se * info->max_se;
   info->total_tess_ring_size = info->tess_offchip_ring_size + info->tess_factor_ring_size;

-   /* GFX1013 is GFX10 plus ray tracing instructions */
-   info->has_image_bvh_intersect_ray = info->gfx_level >= GFX10_3 ||
-                                       info->family == CHIP_GFX1013;
-
   if (info->gfx_level >= GFX12)
      info->rt_ip_version = RT_3_1;
   else if (info->gfx_level >= GFX11)
      info->rt_ip_version = RT_2_0;
-   else if (info->has_image_bvh_intersect_ray)
+   else if (info->cu_info.has_image_bvh_intersect_ray)
      info->rt_ip_version = RT_1_1;

   set_custom_cu_en_mask(info);
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@ -43,6 +43,25 @@ struct ac_cu_info {
   uint32_t min_wave64_vgpr_alloc;
   uint32_t max_vgpr_alloc;
   uint32_t wave64_vgpr_alloc_granularity;
+
+   /* Flags */
+   bool has_lds_bank_count_16 : 1;
+   bool has_sram_ecc_enabled : 1;
+   bool has_fast_fma32 : 1;
+   /* Whether chips support fused v_fma_mix* instructions.
+    * Otherwise, unfused v_mad_mix* is available on GFX9.
+    */
+   bool has_fma_mix : 1;
+   /* Whether chips support double rate packed math instructions. */
+   bool has_packed_math_16bit : 1;
+   /* Whether chips support dot product instructions. A subset of these support a smaller
+    * instruction encoding which accumulates with the destination.
+    */
+   bool has_accelerated_dot_product : 1;
+   /* Device supports hardware-accelerated raytracing using
+    * image_bvh*_intersect_ray instructions
+    */
+   bool has_image_bvh_intersect_ray : 1;
 };

 struct radeon_info {
@ -101,8 +120,6 @@ struct radeon_info {
   bool rbplus_allowed; /* if RB+ is allowed */
   bool has_load_ctx_reg_pkt;
   bool has_out_of_order_rast;
-   bool has_packed_math_16bit;
-   bool has_accelerated_dot_product;
   bool cpdma_prefetch_writes_memory;
   bool has_gfx9_scissor_bug;
   bool has_htile_stencil_mipmap_bug;
@ -350,11 +367,6 @@ struct radeon_info {
      uint32_t sdma_csa_size;
      uint32_t sdma_csa_alignment;
   } fw_based_mcbp;
-
-   /* Device supports hardware-accelerated raytracing using
-    * image_bvh*_intersect_ray instructions
-    */
-   bool has_image_bvh_intersect_ray;
 };

 enum ac_query_gpu_info_result {
--- a/src/amd/common/ac_null_device.c
+++ b/src/amd/common/ac_null_device.c
@ -60,7 +60,6 @@ ac_null_device_create(struct radeon_info *gpu_info, const char *family)
   gpu_info->max_render_backends = pci_ids[gpu_info->family].num_render_backends;

   gpu_info->has_dedicated_vram = pci_ids[gpu_info->family].has_dedicated_vram;
-   gpu_info->has_packed_math_16bit = gpu_info->gfx_level >= GFX9;

   gpu_info->has_cb_lt16bit_int_clamp_bug = gpu_info->gfx_level <= GFX7 &&
                                            gpu_info->family != CHIP_HAWAII;
@ -70,12 +69,6 @@ ac_null_device_create(struct radeon_info *gpu_info, const char *family)
   gpu_info->has_distributed_tess =
      gpu_info->gfx_level >= GFX10 || (gpu_info->gfx_level >= GFX8 && gpu_info->max_se >= 2);

-   gpu_info->has_accelerated_dot_product =
-      gpu_info->family == CHIP_VEGA20 ||
-      (gpu_info->family >= CHIP_MI100 && gpu_info->family != CHIP_NAVI10 && gpu_info->family != CHIP_GFX1013);
-
-   gpu_info->has_image_bvh_intersect_ray = gpu_info->gfx_level >= GFX10_3 || gpu_info->family == CHIP_GFX1013;
-
   gpu_info->address32_hi = gpu_info->gfx_level >= GFX9 ? 0xffff8000u : 0x0;

   gpu_info->has_rbplus = gpu_info->family == CHIP_STONEY || gpu_info->gfx_level >= GFX9;
--- a/src/amd/common/nir/ac_nir.c
+++ b/src/amd/common/nir/ac_nir.c
@ -73,13 +73,13 @@ void ac_nir_set_options(struct radeon_info *info, bool use_llvm,
   options->has_ford_funord = true;
   options->has_fsub = true;
   options->has_isub = true;
-   options->has_sdot_4x8 = info->has_accelerated_dot_product;
-   options->has_sudot_4x8 = info->has_accelerated_dot_product && info->gfx_level >= GFX11;
-   options->has_udot_4x8 = info->has_accelerated_dot_product;
-   options->has_sdot_4x8_sat = info->has_accelerated_dot_product;
-   options->has_sudot_4x8_sat = info->has_accelerated_dot_product && info->gfx_level >= GFX11;
-   options->has_udot_4x8_sat = info->has_accelerated_dot_product;
-   options->has_dot_2x16 = info->has_accelerated_dot_product && info->gfx_level < GFX11;
+   options->has_sdot_4x8 = info->cu_info.has_accelerated_dot_product;
+   options->has_sudot_4x8 = info->cu_info.has_accelerated_dot_product && info->gfx_level >= GFX11;
+   options->has_udot_4x8 = info->cu_info.has_accelerated_dot_product;
+   options->has_sdot_4x8_sat = info->cu_info.has_accelerated_dot_product;
+   options->has_sudot_4x8_sat = info->cu_info.has_accelerated_dot_product && info->gfx_level >= GFX11;
+   options->has_udot_4x8_sat = info->cu_info.has_accelerated_dot_product;
+   options->has_dot_2x16 = info->cu_info.has_accelerated_dot_product && info->gfx_level < GFX11;
   options->has_bfdot2_bfadd = info->gfx_level >= GFX12;
   options->has_find_msb_rev = true;
   options->has_pack_32_4x8 = true;
@ -103,7 +103,7 @@ void ac_nir_set_options(struct radeon_info *info, bool use_llvm,
   options->optimize_quad_vote_to_reduce = !use_llvm;
   options->lower_fisnormal = true;
   options->support_16bit_alu = info->gfx_level >= GFX8;
-   options->vectorize_vec2_16bit = info->has_packed_math_16bit;
+   options->vectorize_vec2_16bit = info->cu_info.has_packed_math_16bit;
   options->discard_is_demote = true;
   options->optimize_sample_mask_in = true;
   options->optimize_load_front_face_fsign = true;
--- a/src/amd/common/nir/tests/ac_nir_lower_mem_access_test.cpp
+++ b/src/amd/common/nir/tests/ac_nir_lower_mem_access_test.cpp
@ -221,8 +221,8 @@ static void run_subtest(subtest *st, bool print = false)

   struct radeon_info info = {};
   info.gfx_level = st->gfx_level;
-   info.has_packed_math_16bit = true;
-   info.has_accelerated_dot_product = true;
+   info.cu_info.has_packed_math_16bit = true;
+   info.cu_info.has_accelerated_dot_product = true;

   nir_shader_compiler_options options = {};
   ac_nir_set_options(&info, st->use_llvm, &options);
--- a/src/amd/vulkan/nir/radv_nir_rt_common.c
+++ b/src/amd/vulkan/nir/radv_nir_rt_common.c
@ -946,7 +946,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
      nir_def *global_bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node));

      bool has_result = false;
-      if (pdev->info.has_image_bvh_intersect_ray && !radv_emulate_rt(pdev)) {
+      if (pdev->info.cu_info.has_image_bvh_intersect_ray && !radv_emulate_rt(pdev)) {
         nir_store_var(
            b, intrinsic_result,
            nir_bvh64_intersect_ray_amd(b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node),
--- a/src/amd/vulkan/radv_physical_device.c
+++ b/src/amd/vulkan/radv_physical_device.c
@ -179,7 +179,7 @@ radv_shader_fp16_enabled(const struct radv_physical_device *pdev)
   /* GFX8 supports fp16, but not double rate packed math.  We don't enable
    * that by default because it can sometimes hurt perf.
    */
-   return pdev->info.has_packed_math_16bit ||
+   return pdev->info.cu_info.has_packed_math_16bit ||
          (pdev->info.gfx_level == GFX8 && instance->drirc.features.expose_float16_gfx8);
 }

@ -193,7 +193,7 @@ radv_host_image_copy_enabled(const struct radv_physical_device *pdev)
 bool
 radv_enable_rt(const struct radv_physical_device *pdev)
 {
-   if (!pdev->info.has_image_bvh_intersect_ray && !radv_emulate_rt(pdev))
+   if (!pdev->info.cu_info.has_image_bvh_intersect_ray && !radv_emulate_rt(pdev))
      return false;

   if (pdev->use_llvm)
@ -210,7 +210,7 @@ radv_emulate_rt(const struct radv_physical_device *pdev)
      return true;

   /* Do not force emulated RT on GPUs that have native support. */
-   return !pdev->info.has_image_bvh_intersect_ray && instance->drirc.features.emulate_rt;
+   return !pdev->info.cu_info.has_image_bvh_intersect_ray && instance->drirc.features.emulate_rt;
 }

 bool
@ -830,8 +830,8 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device
      .AMD_device_coherent_memory = pdev->info.has_l2_uncached,
      .AMD_draw_indirect_count = true,
      .AMD_gcn_shader = true,
-      .AMD_gpu_shader_half_float = pdev->info.has_packed_math_16bit,
-      .AMD_gpu_shader_int16 = pdev->info.has_packed_math_16bit,
+      .AMD_gpu_shader_half_float = pdev->info.cu_info.has_packed_math_16bit,
+      .AMD_gpu_shader_int16 = pdev->info.cu_info.has_packed_math_16bit,
      .AMD_memory_overallocation_behavior = true,
      .AMD_mixed_attachment_samples = true,
      .AMD_rasterization_order = pdev->info.has_out_of_order_rast,
@ -931,7 +931,7 @@ radv_physical_device_get_features(const struct radv_physical_device *pdev, struc
      .storageBuffer16BitAccess = true,
      .uniformAndStorageBuffer16BitAccess = true,
      .storagePushConstant16 = true,
-      .storageInputOutput16 = pdev->info.has_packed_math_16bit,
+      .storageInputOutput16 = pdev->info.cu_info.has_packed_math_16bit,
      .multiview = true,
      .multiviewGeometryShader = true,
      .multiviewTessellationShader = true,
@ -1589,7 +1589,7 @@ radv_get_physical_device_properties(struct radv_physical_device *pdev)
      radv_taskmesh_enabled(pdev) ? VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT : 0;
   VkShaderStageFlags rt_stages = radv_enable_rt(pdev) ? RADV_RT_STAGE_BITS : 0;

-   bool accel_dot = pdev->info.has_accelerated_dot_product;
+   bool accel_dot = pdev->info.cu_info.has_accelerated_dot_product;
   bool gfx11plus = pdev->info.gfx_level >= GFX11;

   VkExtent2D vrs_texel_extent = radv_vrs_attachment_enabled(pdev) ? (VkExtent2D){8, 8} : (VkExtent2D){0, 0};
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@ -821,7 +821,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
   options->lower_uniforms_to_ubo = true;
   options->lower_to_scalar = true;
   options->lower_to_scalar_filter =
-      sscreen->info.has_packed_math_16bit ? si_alu_to_scalar_packed_math_filter : NULL;
+      sscreen->info.cu_info.has_packed_math_16bit ? si_alu_to_scalar_packed_math_filter : NULL;
   options->max_unroll_iterations = 128;
   options->max_unroll_iterations_aggressive = 128;
   /* For OpenGL, rounding mode is undefined. We want fast packing with v_cvt_pkrtz_f16,
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@ -122,7 +122,7 @@ void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool has_arr
      if (nir->info.stage == MESA_SHADER_FRAGMENT)
         NIR_PASS(_, nir, nir_opt_move_discards_to_top);

-      if (sscreen->info.has_packed_math_16bit)
+      if (sscreen->info.cu_info.has_packed_math_16bit)
         NIR_PASS(progress, nir, nir_opt_vectorize, si_vectorize_callback, NULL);
   } while (progress);