diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 233e95946ba..247077e481c 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -299,6 +299,23 @@ ac_fill_cu_info(struct radeon_info *info, struct drm_amdgpu_info_device *device_ cu_info->max_vgpr_alloc = 256; cu_info->num_simd_per_compute_unit = info->gfx_level >= GFX10 ? 2 : 4; + + /* Flags */ + cu_info->has_lds_bank_count_16 = info->family == CHIP_KABINI || info->family == CHIP_STONEY; + cu_info->has_sram_ecc_enabled = info->family == CHIP_VEGA20 || info->family == CHIP_MI100 || + info->family == CHIP_MI200 || info->family == CHIP_GFX940; + cu_info->has_fast_fma32 = info->gfx_level >= GFX9 || info->family == CHIP_TAHITI || + info->family == CHIP_HAWAII || info->family == CHIP_CARRIZO; + cu_info->has_fma_mix = info->gfx_level >= GFX10 || + info->family == CHIP_VEGA12 || info->family == CHIP_VEGA20 || + info->family == CHIP_MI100 || info->family == CHIP_MI200 || + info->family == CHIP_GFX940; + cu_info->has_packed_math_16bit = info->gfx_level >= GFX9; + cu_info->has_accelerated_dot_product = + info->family == CHIP_VEGA20 || + (info->family >= CHIP_MI100 && info->family != CHIP_NAVI10 && info->family != CHIP_GFX1013); + /* GFX1013 is GFX10 plus ray tracing instructions */ + cu_info->has_image_bvh_intersect_ray = info->gfx_level >= GFX10_3 || info->family == CHIP_GFX1013; } enum ac_query_gpu_info_result @@ -968,16 +985,6 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, info->has_out_of_order_rast = info->gfx_level >= GFX8 && info->gfx_level <= GFX9 && info->max_se >= 2; - /* Whether chips support double rate packed math instructions. */ - info->has_packed_math_16bit = info->gfx_level >= GFX9; - - /* Whether chips support dot product instructions. A subset of these support a smaller - * instruction encoding which accumulates with the destination. - */ - info->has_accelerated_dot_product = - info->family == CHIP_VEGA20 || - (info->family >= CHIP_MI100 && info->family != CHIP_NAVI10 && info->family != CHIP_GFX1013); - /* TODO: Figure out how to use LOAD_CONTEXT_REG on GFX6-GFX7. */ info->has_load_ctx_reg_pkt = info->gfx_level >= GFX9 || (info->gfx_level >= GFX8 && info->me_fw_feature >= 41); @@ -1608,15 +1615,11 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, info->max_good_cu_per_sa * info->max_sa_per_se * info->max_se; info->total_tess_ring_size = info->tess_offchip_ring_size + info->tess_factor_ring_size; - /* GFX1013 is GFX10 plus ray tracing instructions */ - info->has_image_bvh_intersect_ray = info->gfx_level >= GFX10_3 || - info->family == CHIP_GFX1013; - if (info->gfx_level >= GFX12) info->rt_ip_version = RT_3_1; else if (info->gfx_level >= GFX11) info->rt_ip_version = RT_2_0; - else if (info->has_image_bvh_intersect_ray) + else if (info->cu_info.has_image_bvh_intersect_ray) info->rt_ip_version = RT_1_1; set_custom_cu_en_mask(info); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 6687b9be569..1e95c9e2aa8 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -43,6 +43,25 @@ struct ac_cu_info { uint32_t min_wave64_vgpr_alloc; uint32_t max_vgpr_alloc; uint32_t wave64_vgpr_alloc_granularity; + + /* Flags */ + bool has_lds_bank_count_16 : 1; + bool has_sram_ecc_enabled : 1; + bool has_fast_fma32 : 1; + /* Whether chips support fused v_fma_mix* instructions. + * Otherwise, unfused v_mad_mix* is available on GFX9. + */ + bool has_fma_mix : 1; + /* Whether chips support double rate packed math instructions. */ + bool has_packed_math_16bit : 1; + /* Whether chips support dot product instructions. A subset of these support a smaller + * instruction encoding which accumulates with the destination. + */ + bool has_accelerated_dot_product : 1; + /* Device supports hardware-accelerated raytracing using + * image_bvh*_intersect_ray instructions + */ + bool has_image_bvh_intersect_ray : 1; }; struct radeon_info { @@ -101,8 +120,6 @@ struct radeon_info { bool rbplus_allowed; /* if RB+ is allowed */ bool has_load_ctx_reg_pkt; bool has_out_of_order_rast; - bool has_packed_math_16bit; - bool has_accelerated_dot_product; bool cpdma_prefetch_writes_memory; bool has_gfx9_scissor_bug; bool has_htile_stencil_mipmap_bug; @@ -350,11 +367,6 @@ struct radeon_info { uint32_t sdma_csa_size; uint32_t sdma_csa_alignment; } fw_based_mcbp; - - /* Device supports hardware-accelerated raytracing using - * image_bvh*_intersect_ray instructions - */ - bool has_image_bvh_intersect_ray; }; enum ac_query_gpu_info_result { diff --git a/src/amd/common/ac_null_device.c b/src/amd/common/ac_null_device.c index d299f905bf6..d4f91138c0f 100644 --- a/src/amd/common/ac_null_device.c +++ b/src/amd/common/ac_null_device.c @@ -60,7 +60,6 @@ ac_null_device_create(struct radeon_info *gpu_info, const char *family) gpu_info->max_render_backends = pci_ids[gpu_info->family].num_render_backends; gpu_info->has_dedicated_vram = pci_ids[gpu_info->family].has_dedicated_vram; - gpu_info->has_packed_math_16bit = gpu_info->gfx_level >= GFX9; gpu_info->has_cb_lt16bit_int_clamp_bug = gpu_info->gfx_level <= GFX7 && gpu_info->family != CHIP_HAWAII; @@ -70,12 +69,6 @@ ac_null_device_create(struct radeon_info *gpu_info, const char *family) gpu_info->has_distributed_tess = gpu_info->gfx_level >= GFX10 || (gpu_info->gfx_level >= GFX8 && gpu_info->max_se >= 2); - gpu_info->has_accelerated_dot_product = - gpu_info->family == CHIP_VEGA20 || - (gpu_info->family >= CHIP_MI100 && gpu_info->family != CHIP_NAVI10 && gpu_info->family != CHIP_GFX1013); - - gpu_info->has_image_bvh_intersect_ray = gpu_info->gfx_level >= GFX10_3 || gpu_info->family == CHIP_GFX1013; - gpu_info->address32_hi = gpu_info->gfx_level >= GFX9 ? 0xffff8000u : 0x0; gpu_info->has_rbplus = gpu_info->family == CHIP_STONEY || gpu_info->gfx_level >= GFX9; diff --git a/src/amd/common/nir/ac_nir.c b/src/amd/common/nir/ac_nir.c index b652dfcf996..9fe85f0a76e 100644 --- a/src/amd/common/nir/ac_nir.c +++ b/src/amd/common/nir/ac_nir.c @@ -73,13 +73,13 @@ void ac_nir_set_options(struct radeon_info *info, bool use_llvm, options->has_ford_funord = true; options->has_fsub = true; options->has_isub = true; - options->has_sdot_4x8 = info->has_accelerated_dot_product; - options->has_sudot_4x8 = info->has_accelerated_dot_product && info->gfx_level >= GFX11; - options->has_udot_4x8 = info->has_accelerated_dot_product; - options->has_sdot_4x8_sat = info->has_accelerated_dot_product; - options->has_sudot_4x8_sat = info->has_accelerated_dot_product && info->gfx_level >= GFX11; - options->has_udot_4x8_sat = info->has_accelerated_dot_product; - options->has_dot_2x16 = info->has_accelerated_dot_product && info->gfx_level < GFX11; + options->has_sdot_4x8 = info->cu_info.has_accelerated_dot_product; + options->has_sudot_4x8 = info->cu_info.has_accelerated_dot_product && info->gfx_level >= GFX11; + options->has_udot_4x8 = info->cu_info.has_accelerated_dot_product; + options->has_sdot_4x8_sat = info->cu_info.has_accelerated_dot_product; + options->has_sudot_4x8_sat = info->cu_info.has_accelerated_dot_product && info->gfx_level >= GFX11; + options->has_udot_4x8_sat = info->cu_info.has_accelerated_dot_product; + options->has_dot_2x16 = info->cu_info.has_accelerated_dot_product && info->gfx_level < GFX11; options->has_bfdot2_bfadd = info->gfx_level >= GFX12; options->has_find_msb_rev = true; options->has_pack_32_4x8 = true; @@ -103,7 +103,7 @@ void ac_nir_set_options(struct radeon_info *info, bool use_llvm, options->optimize_quad_vote_to_reduce = !use_llvm; options->lower_fisnormal = true; options->support_16bit_alu = info->gfx_level >= GFX8; - options->vectorize_vec2_16bit = info->has_packed_math_16bit; + options->vectorize_vec2_16bit = info->cu_info.has_packed_math_16bit; options->discard_is_demote = true; options->optimize_sample_mask_in = true; options->optimize_load_front_face_fsign = true; diff --git a/src/amd/common/nir/tests/ac_nir_lower_mem_access_test.cpp b/src/amd/common/nir/tests/ac_nir_lower_mem_access_test.cpp index 127431e7486..98dafeadaea 100644 --- a/src/amd/common/nir/tests/ac_nir_lower_mem_access_test.cpp +++ b/src/amd/common/nir/tests/ac_nir_lower_mem_access_test.cpp @@ -221,8 +221,8 @@ static void run_subtest(subtest *st, bool print = false) struct radeon_info info = {}; info.gfx_level = st->gfx_level; - info.has_packed_math_16bit = true; - info.has_accelerated_dot_product = true; + info.cu_info.has_packed_math_16bit = true; + info.cu_info.has_accelerated_dot_product = true; nir_shader_compiler_options options = {}; ac_nir_set_options(&info, st->use_llvm, &options); diff --git a/src/amd/vulkan/nir/radv_nir_rt_common.c b/src/amd/vulkan/nir/radv_nir_rt_common.c index d12e3fcf4e3..03cf8ebc033 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_common.c +++ b/src/amd/vulkan/nir/radv_nir_rt_common.c @@ -946,7 +946,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc nir_def *global_bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node)); bool has_result = false; - if (pdev->info.has_image_bvh_intersect_ray && !radv_emulate_rt(pdev)) { + if (pdev->info.cu_info.has_image_bvh_intersect_ray && !radv_emulate_rt(pdev)) { nir_store_var( b, intrinsic_result, nir_bvh64_intersect_ray_amd(b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node), diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index 75101273609..73670fbd3d4 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -179,7 +179,7 @@ radv_shader_fp16_enabled(const struct radv_physical_device *pdev) /* GFX8 supports fp16, but not double rate packed math. We don't enable * that by default because it can sometimes hurt perf. */ - return pdev->info.has_packed_math_16bit || + return pdev->info.cu_info.has_packed_math_16bit || (pdev->info.gfx_level == GFX8 && instance->drirc.features.expose_float16_gfx8); } @@ -193,7 +193,7 @@ radv_host_image_copy_enabled(const struct radv_physical_device *pdev) bool radv_enable_rt(const struct radv_physical_device *pdev) { - if (!pdev->info.has_image_bvh_intersect_ray && !radv_emulate_rt(pdev)) + if (!pdev->info.cu_info.has_image_bvh_intersect_ray && !radv_emulate_rt(pdev)) return false; if (pdev->use_llvm) @@ -210,7 +210,7 @@ radv_emulate_rt(const struct radv_physical_device *pdev) return true; /* Do not force emulated RT on GPUs that have native support. */ - return !pdev->info.has_image_bvh_intersect_ray && instance->drirc.features.emulate_rt; + return !pdev->info.cu_info.has_image_bvh_intersect_ray && instance->drirc.features.emulate_rt; } bool @@ -830,8 +830,8 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device .AMD_device_coherent_memory = pdev->info.has_l2_uncached, .AMD_draw_indirect_count = true, .AMD_gcn_shader = true, - .AMD_gpu_shader_half_float = pdev->info.has_packed_math_16bit, - .AMD_gpu_shader_int16 = pdev->info.has_packed_math_16bit, + .AMD_gpu_shader_half_float = pdev->info.cu_info.has_packed_math_16bit, + .AMD_gpu_shader_int16 = pdev->info.cu_info.has_packed_math_16bit, .AMD_memory_overallocation_behavior = true, .AMD_mixed_attachment_samples = true, .AMD_rasterization_order = pdev->info.has_out_of_order_rast, @@ -931,7 +931,7 @@ radv_physical_device_get_features(const struct radv_physical_device *pdev, struc .storageBuffer16BitAccess = true, .uniformAndStorageBuffer16BitAccess = true, .storagePushConstant16 = true, - .storageInputOutput16 = pdev->info.has_packed_math_16bit, + .storageInputOutput16 = pdev->info.cu_info.has_packed_math_16bit, .multiview = true, .multiviewGeometryShader = true, .multiviewTessellationShader = true, @@ -1589,7 +1589,7 @@ radv_get_physical_device_properties(struct radv_physical_device *pdev) radv_taskmesh_enabled(pdev) ? VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT : 0; VkShaderStageFlags rt_stages = radv_enable_rt(pdev) ? RADV_RT_STAGE_BITS : 0; - bool accel_dot = pdev->info.has_accelerated_dot_product; + bool accel_dot = pdev->info.cu_info.has_accelerated_dot_product; bool gfx11plus = pdev->info.gfx_level >= GFX11; VkExtent2D vrs_texel_extent = radv_vrs_attachment_enabled(pdev) ? (VkExtent2D){8, 8} : (VkExtent2D){0, 0}; diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 2648e6769aa..86be4cbfd6f 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -821,7 +821,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen) options->lower_uniforms_to_ubo = true; options->lower_to_scalar = true; options->lower_to_scalar_filter = - sscreen->info.has_packed_math_16bit ? si_alu_to_scalar_packed_math_filter : NULL; + sscreen->info.cu_info.has_packed_math_16bit ? si_alu_to_scalar_packed_math_filter : NULL; options->max_unroll_iterations = 128; options->max_unroll_iterations_aggressive = 128; /* For OpenGL, rounding mode is undefined. We want fast packing with v_cvt_pkrtz_f16, diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 63582dd265f..931a6ce1e01 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -122,7 +122,7 @@ void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool has_arr if (nir->info.stage == MESA_SHADER_FRAGMENT) NIR_PASS(_, nir, nir_opt_move_discards_to_top); - if (sscreen->info.has_packed_math_16bit) + if (sscreen->info.cu_info.has_packed_math_16bit) NIR_PASS(progress, nir, nir_opt_vectorize, si_vectorize_callback, NULL); } while (progress);