diff --git a/src/gallium/winsys/radeon/drm/meson.build b/src/gallium/winsys/radeon/drm/meson.build index 8d2322661a3..90fa00e8082 100644 --- a/src/gallium/winsys/radeon/drm/meson.build +++ b/src/gallium/winsys/radeon/drm/meson.build @@ -1,6 +1,14 @@ # Copyright © 2017 Dylan Baker # SPDX-License-Identifier: MIT +libradeonwinsys_deps = [idep_mesautil, dep_libdrm] +libradeonwinsys_c_args = [] + +if with_gallium_radeonsi + libradeonwinsys_deps += [idep_amdgfxregs_h] + libradeonwinsys_c_args = ['-DHAVE_GALLIUM_RADEONSI'] +endif + libradeonwinsys = static_library( 'radeonwinsys', files('radeon_drm_bo.c', @@ -14,5 +22,6 @@ libradeonwinsys = static_library( 'radeon_surface.h'), include_directories : [inc_src, inc_include, inc_gallium, inc_gallium_aux], gnu_symbol_visibility : 'hidden', - dependencies : [idep_mesautil, dep_libdrm], + c_args : libradeonwinsys_c_args, + dependencies : libradeonwinsys_deps, ) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index d56ef2392ea..5dda8e963fd 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -8,6 +8,10 @@ #include "radeon_drm_bo.h" #include "radeon_drm_cs.h" +#ifdef HAVE_GALLIUM_RADEONSI +#include "amdgfxregs.h" +#endif + #include "util/os_file.h" #include "util/simple_mtx.h" #include "util/thread_sched.h" @@ -105,6 +109,73 @@ static bool radeon_get_drm_value(int fd, unsigned request, return true; } +static void get_hs_info(struct radeon_info *info) +{ + /* This is the size of all TCS outputs in memory per workgroup. + * Hawaii can't handle num_workgroups > 256 with 8K per workgroup, so use 4K. + */ + unsigned max_hs_out_vram_dwords_per_wg = info->family == CHIP_HAWAII ? 4096 : 8192; + unsigned max_workgroups_per_se; + +#ifdef HAVE_GALLIUM_RADEONSI /* for gfx6+ register definitions */ + unsigned max_hs_out_vram_dwords_enum = 0; + + switch (max_hs_out_vram_dwords_per_wg) { + case 8192: + max_hs_out_vram_dwords_enum = V_03093C_X_8K_DWORDS; + break; + case 4096: + max_hs_out_vram_dwords_enum = V_03093C_X_4K_DWORDS; + break; + case 2048: + max_hs_out_vram_dwords_enum = V_03093C_X_2K_DWORDS; + break; + case 1024: + max_hs_out_vram_dwords_enum = V_03093C_X_1K_DWORDS; + break; + default: + UNREACHABLE("invalid TCS workgroup size"); + } +#endif + + /* Gfx7 should limit num_workgroups to 508 (127 per SE) + * Gfx6 should limit num_workgroups to 126 (63 per SE) + */ + if (info->gfx_level == GFX7) { + max_workgroups_per_se = 127; + } else { + max_workgroups_per_se = 63; + } + + /* Limit to 4 workgroups per CU for TCS, which exhausts LDS if each workgroup occupies 16KB. + * Note that the offchip allocation isn't deallocated until the corresponding TES waves finish. + */ + unsigned num_offchip_wg_per_cu = 4; + unsigned num_workgroups_per_se = MIN2(num_offchip_wg_per_cu * info->max_good_cu_per_sa * + info->max_sa_per_se, max_workgroups_per_se); + unsigned num_workgroups = num_workgroups_per_se * info->max_se; + +#ifdef HAVE_GALLIUM_RADEONSI /* for gfx6+ register definitions */ + if (info->gfx_level == GFX7) { + info->hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(num_workgroups) | + S_03093C_OFFCHIP_GRANULARITY_GFX7(max_hs_out_vram_dwords_enum); + } else { + info->hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(num_workgroups) | + S_0089B0_OFFCHIP_GRANULARITY(max_hs_out_vram_dwords_enum); + } +#endif + + /* The typical size of tess factors of 1 TCS workgroup if all patches are triangles. */ + unsigned typical_tess_factor_size_per_wg = (192 / 3) * 16; + unsigned num_tess_factor_wg_per_cu = 3; + + info->hs_offchip_workgroup_dw_size = max_hs_out_vram_dwords_per_wg; + info->tess_offchip_ring_size = num_workgroups * max_hs_out_vram_dwords_per_wg * 4; + info->tess_factor_ring_size = typical_tess_factor_size_per_wg * num_tess_factor_wg_per_cu * + info->max_good_cu_per_sa * info->max_sa_per_se * info->max_se; + info->total_tess_ring_size = info->tess_offchip_ring_size + info->tess_factor_ring_size; +} + /* Helper function to do the ioctls needed for setup and init. */ static bool do_winsys_init(struct radeon_drm_winsys *ws) { @@ -639,6 +710,9 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) default:; } + if (ws->gen == DRV_SI) + get_hs_info(&ws->info); + ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL || strstr(debug_get_option("AMD_DEBUG", ""), "check_vm") != NULL; ws->noop_cs = debug_get_bool_option("RADEON_NOOP", false);