mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-07 06:30:11 +01:00
ac/gpu_info: add some more flags to ac_cu_info
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38701>
This commit is contained in:
parent
f791e46c47
commit
f7c4aa48a0
9 changed files with 57 additions and 49 deletions
|
|
@ -299,6 +299,23 @@ ac_fill_cu_info(struct radeon_info *info, struct drm_amdgpu_info_device *device_
|
|||
cu_info->max_vgpr_alloc = 256;
|
||||
|
||||
cu_info->num_simd_per_compute_unit = info->gfx_level >= GFX10 ? 2 : 4;
|
||||
|
||||
/* Flags */
|
||||
cu_info->has_lds_bank_count_16 = info->family == CHIP_KABINI || info->family == CHIP_STONEY;
|
||||
cu_info->has_sram_ecc_enabled = info->family == CHIP_VEGA20 || info->family == CHIP_MI100 ||
|
||||
info->family == CHIP_MI200 || info->family == CHIP_GFX940;
|
||||
cu_info->has_fast_fma32 = info->gfx_level >= GFX9 || info->family == CHIP_TAHITI ||
|
||||
info->family == CHIP_HAWAII || info->family == CHIP_CARRIZO;
|
||||
cu_info->has_fma_mix = info->gfx_level >= GFX10 ||
|
||||
info->family == CHIP_VEGA12 || info->family == CHIP_VEGA20 ||
|
||||
info->family == CHIP_MI100 || info->family == CHIP_MI200 ||
|
||||
info->family == CHIP_GFX940;
|
||||
cu_info->has_packed_math_16bit = info->gfx_level >= GFX9;
|
||||
cu_info->has_accelerated_dot_product =
|
||||
info->family == CHIP_VEGA20 ||
|
||||
(info->family >= CHIP_MI100 && info->family != CHIP_NAVI10 && info->family != CHIP_GFX1013);
|
||||
/* GFX1013 is GFX10 plus ray tracing instructions */
|
||||
cu_info->has_image_bvh_intersect_ray = info->gfx_level >= GFX10_3 || info->family == CHIP_GFX1013;
|
||||
}
|
||||
|
||||
enum ac_query_gpu_info_result
|
||||
|
|
@ -968,16 +985,6 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
info->has_out_of_order_rast =
|
||||
info->gfx_level >= GFX8 && info->gfx_level <= GFX9 && info->max_se >= 2;
|
||||
|
||||
/* Whether chips support double rate packed math instructions. */
|
||||
info->has_packed_math_16bit = info->gfx_level >= GFX9;
|
||||
|
||||
/* Whether chips support dot product instructions. A subset of these support a smaller
|
||||
* instruction encoding which accumulates with the destination.
|
||||
*/
|
||||
info->has_accelerated_dot_product =
|
||||
info->family == CHIP_VEGA20 ||
|
||||
(info->family >= CHIP_MI100 && info->family != CHIP_NAVI10 && info->family != CHIP_GFX1013);
|
||||
|
||||
/* TODO: Figure out how to use LOAD_CONTEXT_REG on GFX6-GFX7. */
|
||||
info->has_load_ctx_reg_pkt =
|
||||
info->gfx_level >= GFX9 || (info->gfx_level >= GFX8 && info->me_fw_feature >= 41);
|
||||
|
|
@ -1608,15 +1615,11 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
info->max_good_cu_per_sa * info->max_sa_per_se * info->max_se;
|
||||
info->total_tess_ring_size = info->tess_offchip_ring_size + info->tess_factor_ring_size;
|
||||
|
||||
/* GFX1013 is GFX10 plus ray tracing instructions */
|
||||
info->has_image_bvh_intersect_ray = info->gfx_level >= GFX10_3 ||
|
||||
info->family == CHIP_GFX1013;
|
||||
|
||||
if (info->gfx_level >= GFX12)
|
||||
info->rt_ip_version = RT_3_1;
|
||||
else if (info->gfx_level >= GFX11)
|
||||
info->rt_ip_version = RT_2_0;
|
||||
else if (info->has_image_bvh_intersect_ray)
|
||||
else if (info->cu_info.has_image_bvh_intersect_ray)
|
||||
info->rt_ip_version = RT_1_1;
|
||||
|
||||
set_custom_cu_en_mask(info);
|
||||
|
|
|
|||
|
|
@ -43,6 +43,25 @@ struct ac_cu_info {
|
|||
uint32_t min_wave64_vgpr_alloc;
|
||||
uint32_t max_vgpr_alloc;
|
||||
uint32_t wave64_vgpr_alloc_granularity;
|
||||
|
||||
/* Flags */
|
||||
bool has_lds_bank_count_16 : 1;
|
||||
bool has_sram_ecc_enabled : 1;
|
||||
bool has_fast_fma32 : 1;
|
||||
/* Whether chips support fused v_fma_mix* instructions.
|
||||
* Otherwise, unfused v_mad_mix* is available on GFX9.
|
||||
*/
|
||||
bool has_fma_mix : 1;
|
||||
/* Whether chips support double rate packed math instructions. */
|
||||
bool has_packed_math_16bit : 1;
|
||||
/* Whether chips support dot product instructions. A subset of these support a smaller
|
||||
* instruction encoding which accumulates with the destination.
|
||||
*/
|
||||
bool has_accelerated_dot_product : 1;
|
||||
/* Device supports hardware-accelerated raytracing using
|
||||
* image_bvh*_intersect_ray instructions
|
||||
*/
|
||||
bool has_image_bvh_intersect_ray : 1;
|
||||
};
|
||||
|
||||
struct radeon_info {
|
||||
|
|
@ -101,8 +120,6 @@ struct radeon_info {
|
|||
bool rbplus_allowed; /* if RB+ is allowed */
|
||||
bool has_load_ctx_reg_pkt;
|
||||
bool has_out_of_order_rast;
|
||||
bool has_packed_math_16bit;
|
||||
bool has_accelerated_dot_product;
|
||||
bool cpdma_prefetch_writes_memory;
|
||||
bool has_gfx9_scissor_bug;
|
||||
bool has_htile_stencil_mipmap_bug;
|
||||
|
|
@ -350,11 +367,6 @@ struct radeon_info {
|
|||
uint32_t sdma_csa_size;
|
||||
uint32_t sdma_csa_alignment;
|
||||
} fw_based_mcbp;
|
||||
|
||||
/* Device supports hardware-accelerated raytracing using
|
||||
* image_bvh*_intersect_ray instructions
|
||||
*/
|
||||
bool has_image_bvh_intersect_ray;
|
||||
};
|
||||
|
||||
enum ac_query_gpu_info_result {
|
||||
|
|
|
|||
|
|
@ -60,7 +60,6 @@ ac_null_device_create(struct radeon_info *gpu_info, const char *family)
|
|||
gpu_info->max_render_backends = pci_ids[gpu_info->family].num_render_backends;
|
||||
|
||||
gpu_info->has_dedicated_vram = pci_ids[gpu_info->family].has_dedicated_vram;
|
||||
gpu_info->has_packed_math_16bit = gpu_info->gfx_level >= GFX9;
|
||||
|
||||
gpu_info->has_cb_lt16bit_int_clamp_bug = gpu_info->gfx_level <= GFX7 &&
|
||||
gpu_info->family != CHIP_HAWAII;
|
||||
|
|
@ -70,12 +69,6 @@ ac_null_device_create(struct radeon_info *gpu_info, const char *family)
|
|||
gpu_info->has_distributed_tess =
|
||||
gpu_info->gfx_level >= GFX10 || (gpu_info->gfx_level >= GFX8 && gpu_info->max_se >= 2);
|
||||
|
||||
gpu_info->has_accelerated_dot_product =
|
||||
gpu_info->family == CHIP_VEGA20 ||
|
||||
(gpu_info->family >= CHIP_MI100 && gpu_info->family != CHIP_NAVI10 && gpu_info->family != CHIP_GFX1013);
|
||||
|
||||
gpu_info->has_image_bvh_intersect_ray = gpu_info->gfx_level >= GFX10_3 || gpu_info->family == CHIP_GFX1013;
|
||||
|
||||
gpu_info->address32_hi = gpu_info->gfx_level >= GFX9 ? 0xffff8000u : 0x0;
|
||||
|
||||
gpu_info->has_rbplus = gpu_info->family == CHIP_STONEY || gpu_info->gfx_level >= GFX9;
|
||||
|
|
|
|||
|
|
@ -73,13 +73,13 @@ void ac_nir_set_options(struct radeon_info *info, bool use_llvm,
|
|||
options->has_ford_funord = true;
|
||||
options->has_fsub = true;
|
||||
options->has_isub = true;
|
||||
options->has_sdot_4x8 = info->has_accelerated_dot_product;
|
||||
options->has_sudot_4x8 = info->has_accelerated_dot_product && info->gfx_level >= GFX11;
|
||||
options->has_udot_4x8 = info->has_accelerated_dot_product;
|
||||
options->has_sdot_4x8_sat = info->has_accelerated_dot_product;
|
||||
options->has_sudot_4x8_sat = info->has_accelerated_dot_product && info->gfx_level >= GFX11;
|
||||
options->has_udot_4x8_sat = info->has_accelerated_dot_product;
|
||||
options->has_dot_2x16 = info->has_accelerated_dot_product && info->gfx_level < GFX11;
|
||||
options->has_sdot_4x8 = info->cu_info.has_accelerated_dot_product;
|
||||
options->has_sudot_4x8 = info->cu_info.has_accelerated_dot_product && info->gfx_level >= GFX11;
|
||||
options->has_udot_4x8 = info->cu_info.has_accelerated_dot_product;
|
||||
options->has_sdot_4x8_sat = info->cu_info.has_accelerated_dot_product;
|
||||
options->has_sudot_4x8_sat = info->cu_info.has_accelerated_dot_product && info->gfx_level >= GFX11;
|
||||
options->has_udot_4x8_sat = info->cu_info.has_accelerated_dot_product;
|
||||
options->has_dot_2x16 = info->cu_info.has_accelerated_dot_product && info->gfx_level < GFX11;
|
||||
options->has_bfdot2_bfadd = info->gfx_level >= GFX12;
|
||||
options->has_find_msb_rev = true;
|
||||
options->has_pack_32_4x8 = true;
|
||||
|
|
@ -103,7 +103,7 @@ void ac_nir_set_options(struct radeon_info *info, bool use_llvm,
|
|||
options->optimize_quad_vote_to_reduce = !use_llvm;
|
||||
options->lower_fisnormal = true;
|
||||
options->support_16bit_alu = info->gfx_level >= GFX8;
|
||||
options->vectorize_vec2_16bit = info->has_packed_math_16bit;
|
||||
options->vectorize_vec2_16bit = info->cu_info.has_packed_math_16bit;
|
||||
options->discard_is_demote = true;
|
||||
options->optimize_sample_mask_in = true;
|
||||
options->optimize_load_front_face_fsign = true;
|
||||
|
|
|
|||
|
|
@ -221,8 +221,8 @@ static void run_subtest(subtest *st, bool print = false)
|
|||
|
||||
struct radeon_info info = {};
|
||||
info.gfx_level = st->gfx_level;
|
||||
info.has_packed_math_16bit = true;
|
||||
info.has_accelerated_dot_product = true;
|
||||
info.cu_info.has_packed_math_16bit = true;
|
||||
info.cu_info.has_accelerated_dot_product = true;
|
||||
|
||||
nir_shader_compiler_options options = {};
|
||||
ac_nir_set_options(&info, st->use_llvm, &options);
|
||||
|
|
|
|||
|
|
@ -946,7 +946,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
nir_def *global_bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node));
|
||||
|
||||
bool has_result = false;
|
||||
if (pdev->info.has_image_bvh_intersect_ray && !radv_emulate_rt(pdev)) {
|
||||
if (pdev->info.cu_info.has_image_bvh_intersect_ray && !radv_emulate_rt(pdev)) {
|
||||
nir_store_var(
|
||||
b, intrinsic_result,
|
||||
nir_bvh64_intersect_ray_amd(b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node),
|
||||
|
|
|
|||
|
|
@ -179,7 +179,7 @@ radv_shader_fp16_enabled(const struct radv_physical_device *pdev)
|
|||
/* GFX8 supports fp16, but not double rate packed math. We don't enable
|
||||
* that by default because it can sometimes hurt perf.
|
||||
*/
|
||||
return pdev->info.has_packed_math_16bit ||
|
||||
return pdev->info.cu_info.has_packed_math_16bit ||
|
||||
(pdev->info.gfx_level == GFX8 && instance->drirc.features.expose_float16_gfx8);
|
||||
}
|
||||
|
||||
|
|
@ -193,7 +193,7 @@ radv_host_image_copy_enabled(const struct radv_physical_device *pdev)
|
|||
bool
|
||||
radv_enable_rt(const struct radv_physical_device *pdev)
|
||||
{
|
||||
if (!pdev->info.has_image_bvh_intersect_ray && !radv_emulate_rt(pdev))
|
||||
if (!pdev->info.cu_info.has_image_bvh_intersect_ray && !radv_emulate_rt(pdev))
|
||||
return false;
|
||||
|
||||
if (pdev->use_llvm)
|
||||
|
|
@ -210,7 +210,7 @@ radv_emulate_rt(const struct radv_physical_device *pdev)
|
|||
return true;
|
||||
|
||||
/* Do not force emulated RT on GPUs that have native support. */
|
||||
return !pdev->info.has_image_bvh_intersect_ray && instance->drirc.features.emulate_rt;
|
||||
return !pdev->info.cu_info.has_image_bvh_intersect_ray && instance->drirc.features.emulate_rt;
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
@ -830,8 +830,8 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device
|
|||
.AMD_device_coherent_memory = pdev->info.has_l2_uncached,
|
||||
.AMD_draw_indirect_count = true,
|
||||
.AMD_gcn_shader = true,
|
||||
.AMD_gpu_shader_half_float = pdev->info.has_packed_math_16bit,
|
||||
.AMD_gpu_shader_int16 = pdev->info.has_packed_math_16bit,
|
||||
.AMD_gpu_shader_half_float = pdev->info.cu_info.has_packed_math_16bit,
|
||||
.AMD_gpu_shader_int16 = pdev->info.cu_info.has_packed_math_16bit,
|
||||
.AMD_memory_overallocation_behavior = true,
|
||||
.AMD_mixed_attachment_samples = true,
|
||||
.AMD_rasterization_order = pdev->info.has_out_of_order_rast,
|
||||
|
|
@ -931,7 +931,7 @@ radv_physical_device_get_features(const struct radv_physical_device *pdev, struc
|
|||
.storageBuffer16BitAccess = true,
|
||||
.uniformAndStorageBuffer16BitAccess = true,
|
||||
.storagePushConstant16 = true,
|
||||
.storageInputOutput16 = pdev->info.has_packed_math_16bit,
|
||||
.storageInputOutput16 = pdev->info.cu_info.has_packed_math_16bit,
|
||||
.multiview = true,
|
||||
.multiviewGeometryShader = true,
|
||||
.multiviewTessellationShader = true,
|
||||
|
|
@ -1589,7 +1589,7 @@ radv_get_physical_device_properties(struct radv_physical_device *pdev)
|
|||
radv_taskmesh_enabled(pdev) ? VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT : 0;
|
||||
VkShaderStageFlags rt_stages = radv_enable_rt(pdev) ? RADV_RT_STAGE_BITS : 0;
|
||||
|
||||
bool accel_dot = pdev->info.has_accelerated_dot_product;
|
||||
bool accel_dot = pdev->info.cu_info.has_accelerated_dot_product;
|
||||
bool gfx11plus = pdev->info.gfx_level >= GFX11;
|
||||
|
||||
VkExtent2D vrs_texel_extent = radv_vrs_attachment_enabled(pdev) ? (VkExtent2D){8, 8} : (VkExtent2D){0, 0};
|
||||
|
|
|
|||
|
|
@ -821,7 +821,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
|
|||
options->lower_uniforms_to_ubo = true;
|
||||
options->lower_to_scalar = true;
|
||||
options->lower_to_scalar_filter =
|
||||
sscreen->info.has_packed_math_16bit ? si_alu_to_scalar_packed_math_filter : NULL;
|
||||
sscreen->info.cu_info.has_packed_math_16bit ? si_alu_to_scalar_packed_math_filter : NULL;
|
||||
options->max_unroll_iterations = 128;
|
||||
options->max_unroll_iterations_aggressive = 128;
|
||||
/* For OpenGL, rounding mode is undefined. We want fast packing with v_cvt_pkrtz_f16,
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool has_arr
|
|||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
NIR_PASS(_, nir, nir_opt_move_discards_to_top);
|
||||
|
||||
if (sscreen->info.has_packed_math_16bit)
|
||||
if (sscreen->info.cu_info.has_packed_math_16bit)
|
||||
NIR_PASS(progress, nir, nir_opt_vectorize, si_vectorize_callback, NULL);
|
||||
} while (progress);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue