mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 11:20:20 +01:00
winsys/radeon: fix completely broken tessellation for gfx6-7
The info was moved to radeon_info, but it was only set for the amdgpu
kernel driver. It was uninitialized for radeon.
Fixes: d82eda72a1 - ac/gpu_info: move HS info into radeon_info
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37910>
This commit is contained in:
parent
b1370e1935
commit
f5b648f6d3
2 changed files with 84 additions and 1 deletions
|
|
@ -1,6 +1,14 @@
|
|||
# Copyright © 2017 Dylan Baker
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
libradeonwinsys_deps = [idep_mesautil, dep_libdrm]
|
||||
libradeonwinsys_c_args = []
|
||||
|
||||
if with_gallium_radeonsi
|
||||
libradeonwinsys_deps += [idep_amdgfxregs_h]
|
||||
libradeonwinsys_c_args = ['-DHAVE_GALLIUM_RADEONSI']
|
||||
endif
|
||||
|
||||
libradeonwinsys = static_library(
|
||||
'radeonwinsys',
|
||||
files('radeon_drm_bo.c',
|
||||
|
|
@ -14,5 +22,6 @@ libradeonwinsys = static_library(
|
|||
'radeon_surface.h'),
|
||||
include_directories : [inc_src, inc_include, inc_gallium, inc_gallium_aux],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
dependencies : [idep_mesautil, dep_libdrm],
|
||||
c_args : libradeonwinsys_c_args,
|
||||
dependencies : libradeonwinsys_deps,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -8,6 +8,10 @@
|
|||
#include "radeon_drm_bo.h"
|
||||
#include "radeon_drm_cs.h"
|
||||
|
||||
#ifdef HAVE_GALLIUM_RADEONSI
|
||||
#include "amdgfxregs.h"
|
||||
#endif
|
||||
|
||||
#include "util/os_file.h"
|
||||
#include "util/simple_mtx.h"
|
||||
#include "util/thread_sched.h"
|
||||
|
|
@ -105,6 +109,73 @@ static bool radeon_get_drm_value(int fd, unsigned request,
|
|||
return true;
|
||||
}
|
||||
|
||||
static void get_hs_info(struct radeon_info *info)
|
||||
{
|
||||
/* This is the size of all TCS outputs in memory per workgroup.
|
||||
* Hawaii can't handle num_workgroups > 256 with 8K per workgroup, so use 4K.
|
||||
*/
|
||||
unsigned max_hs_out_vram_dwords_per_wg = info->family == CHIP_HAWAII ? 4096 : 8192;
|
||||
unsigned max_workgroups_per_se;
|
||||
|
||||
#ifdef HAVE_GALLIUM_RADEONSI /* for gfx6+ register definitions */
|
||||
unsigned max_hs_out_vram_dwords_enum = 0;
|
||||
|
||||
switch (max_hs_out_vram_dwords_per_wg) {
|
||||
case 8192:
|
||||
max_hs_out_vram_dwords_enum = V_03093C_X_8K_DWORDS;
|
||||
break;
|
||||
case 4096:
|
||||
max_hs_out_vram_dwords_enum = V_03093C_X_4K_DWORDS;
|
||||
break;
|
||||
case 2048:
|
||||
max_hs_out_vram_dwords_enum = V_03093C_X_2K_DWORDS;
|
||||
break;
|
||||
case 1024:
|
||||
max_hs_out_vram_dwords_enum = V_03093C_X_1K_DWORDS;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("invalid TCS workgroup size");
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Gfx7 should limit num_workgroups to 508 (127 per SE)
|
||||
* Gfx6 should limit num_workgroups to 126 (63 per SE)
|
||||
*/
|
||||
if (info->gfx_level == GFX7) {
|
||||
max_workgroups_per_se = 127;
|
||||
} else {
|
||||
max_workgroups_per_se = 63;
|
||||
}
|
||||
|
||||
/* Limit to 4 workgroups per CU for TCS, which exhausts LDS if each workgroup occupies 16KB.
|
||||
* Note that the offchip allocation isn't deallocated until the corresponding TES waves finish.
|
||||
*/
|
||||
unsigned num_offchip_wg_per_cu = 4;
|
||||
unsigned num_workgroups_per_se = MIN2(num_offchip_wg_per_cu * info->max_good_cu_per_sa *
|
||||
info->max_sa_per_se, max_workgroups_per_se);
|
||||
unsigned num_workgroups = num_workgroups_per_se * info->max_se;
|
||||
|
||||
#ifdef HAVE_GALLIUM_RADEONSI /* for gfx6+ register definitions */
|
||||
if (info->gfx_level == GFX7) {
|
||||
info->hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(num_workgroups) |
|
||||
S_03093C_OFFCHIP_GRANULARITY_GFX7(max_hs_out_vram_dwords_enum);
|
||||
} else {
|
||||
info->hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(num_workgroups) |
|
||||
S_0089B0_OFFCHIP_GRANULARITY(max_hs_out_vram_dwords_enum);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* The typical size of tess factors of 1 TCS workgroup if all patches are triangles. */
|
||||
unsigned typical_tess_factor_size_per_wg = (192 / 3) * 16;
|
||||
unsigned num_tess_factor_wg_per_cu = 3;
|
||||
|
||||
info->hs_offchip_workgroup_dw_size = max_hs_out_vram_dwords_per_wg;
|
||||
info->tess_offchip_ring_size = num_workgroups * max_hs_out_vram_dwords_per_wg * 4;
|
||||
info->tess_factor_ring_size = typical_tess_factor_size_per_wg * num_tess_factor_wg_per_cu *
|
||||
info->max_good_cu_per_sa * info->max_sa_per_se * info->max_se;
|
||||
info->total_tess_ring_size = info->tess_offchip_ring_size + info->tess_factor_ring_size;
|
||||
}
|
||||
|
||||
/* Helper function to do the ioctls needed for setup and init. */
|
||||
static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
||||
{
|
||||
|
|
@ -639,6 +710,9 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
|||
default:;
|
||||
}
|
||||
|
||||
if (ws->gen == DRV_SI)
|
||||
get_hs_info(&ws->info);
|
||||
|
||||
ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL ||
|
||||
strstr(debug_get_option("AMD_DEBUG", ""), "check_vm") != NULL;
|
||||
ws->noop_cs = debug_get_bool_option("RADEON_NOOP", false);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue