mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 06:20:09 +01:00
radeonsi/gfx11: enable NGG-only draw paths
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16328>
This commit is contained in:
parent
7bd4dd79c8
commit
7c423a7ad0
5 changed files with 53 additions and 12 deletions
|
|
@ -103,7 +103,7 @@ radeonsi_include_dirs = [inc_src, inc_include, inc_gallium, inc_gallium_aux, inc
|
|||
radeonsi_deps = [dep_llvm, dep_clock, dep_libdrm_radeon, idep_nir_headers, idep_amdgfxregs_h, idep_mesautil]
|
||||
|
||||
radeonsi_gfx_libs = []
|
||||
foreach ver : ['6', '7', '8', '9', '10', '103']
|
||||
foreach ver : ['6', '7', '8', '9', '10', '103', '11']
|
||||
radeonsi_gfx_libs += static_library(
|
||||
'radeonsi_gfx@0@'.format(ver),
|
||||
['si_state_draw.cpp'],
|
||||
|
|
|
|||
|
|
@ -632,6 +632,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign
|
|||
case GFX10_3:
|
||||
si_init_draw_functions_GFX10_3(sctx);
|
||||
break;
|
||||
case GFX11:
|
||||
si_init_draw_functions_GFX11(sctx);
|
||||
break;
|
||||
default:
|
||||
unreachable("unhandled chip class");
|
||||
}
|
||||
|
|
@ -1270,15 +1273,23 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
|||
sscreen->has_out_of_order_rast =
|
||||
sscreen->info.has_out_of_order_rast && !(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER));
|
||||
|
||||
sscreen->use_ngg = !(sscreen->debug_flags & DBG(NO_NGG)) &&
|
||||
sscreen->info.chip_class >= GFX10 &&
|
||||
(sscreen->info.family != CHIP_NAVI14 ||
|
||||
sscreen->info.is_pro_graphics);
|
||||
sscreen->use_ngg_culling = sscreen->use_ngg &&
|
||||
sscreen->info.max_render_backends >= 2 &&
|
||||
!((sscreen->debug_flags & DBG(NO_NGG_CULLING)) ||
|
||||
LLVM_VERSION_MAJOR <= 11 /* hangs on 11, see #4874 */);
|
||||
sscreen->use_ngg_streamout = false;
|
||||
if (sscreen->info.chip_class >= GFX11) {
|
||||
sscreen->use_ngg = true;
|
||||
sscreen->use_ngg_streamout = true;
|
||||
/* TODO: Disable for now. Investigate if it helps. */
|
||||
sscreen->use_ngg_culling = (sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_ALL)) &&
|
||||
!(sscreen->debug_flags & DBG(NO_NGG_CULLING));
|
||||
} else {
|
||||
sscreen->use_ngg = !(sscreen->debug_flags & DBG(NO_NGG)) &&
|
||||
sscreen->info.chip_class >= GFX10 &&
|
||||
(sscreen->info.family != CHIP_NAVI14 ||
|
||||
sscreen->info.is_pro_graphics);
|
||||
sscreen->use_ngg_streamout = false;
|
||||
sscreen->use_ngg_culling = sscreen->use_ngg &&
|
||||
sscreen->info.max_render_backends >= 2 &&
|
||||
!(sscreen->debug_flags & DBG(NO_NGG_CULLING)) &&
|
||||
LLVM_VERSION_MAJOR >= 12; /* hangs on 11, see #4874 */
|
||||
}
|
||||
|
||||
/* Only set this for the cases that are known to work, which are:
|
||||
* - GFX9 if bpp >= 4 (in bytes)
|
||||
|
|
|
|||
|
|
@ -601,6 +601,7 @@ void si_init_draw_functions_GFX8(struct si_context *sctx);
|
|||
void si_init_draw_functions_GFX9(struct si_context *sctx);
|
||||
void si_init_draw_functions_GFX10(struct si_context *sctx);
|
||||
void si_init_draw_functions_GFX10_3(struct si_context *sctx);
|
||||
void si_init_draw_functions_GFX11(struct si_context *sctx);
|
||||
void si_init_spi_map_functions(struct si_context *sctx);
|
||||
|
||||
/* si_state_msaa.c */
|
||||
|
|
|
|||
|
|
@ -42,6 +42,8 @@
|
|||
#define GFX(name) name##GFX10
|
||||
#elif (GFX_VER == 103)
|
||||
#define GFX(name) name##GFX10_3
|
||||
#elif (GFX_VER == 11)
|
||||
#define GFX(name) name##GFX11
|
||||
#else
|
||||
#error "Unknown gfx version"
|
||||
#endif
|
||||
|
|
@ -425,8 +427,26 @@ static void si_prefetch_shaders(struct si_context *sctx)
|
|||
return;
|
||||
|
||||
/* Prefetch shaders and VBO descriptors to TC L2. */
|
||||
if (GFX_VERSION >= GFX9) {
|
||||
/* Choose the right spot for the VBO prefetch. */
|
||||
if (GFX_VERSION >= GFX11) {
|
||||
if (HAS_TESS) {
|
||||
if (mode != PREFETCH_AFTER_DRAW) {
|
||||
if (mask & SI_PREFETCH_HS)
|
||||
si_prefetch_shader_async(sctx, sctx->queued.named.hs);
|
||||
|
||||
if (mode == PREFETCH_BEFORE_DRAW)
|
||||
return;
|
||||
}
|
||||
|
||||
if (mask & SI_PREFETCH_GS)
|
||||
si_prefetch_shader_async(sctx, sctx->queued.named.gs);
|
||||
} else if (mode != PREFETCH_AFTER_DRAW) {
|
||||
if (mask & SI_PREFETCH_GS)
|
||||
si_prefetch_shader_async(sctx, sctx->queued.named.gs);
|
||||
|
||||
if (mode == PREFETCH_BEFORE_DRAW)
|
||||
return;
|
||||
}
|
||||
} else if (GFX_VERSION >= GFX9) {
|
||||
if (HAS_TESS) {
|
||||
if (mode != PREFETCH_AFTER_DRAW) {
|
||||
if (mask & SI_PREFETCH_HS)
|
||||
|
|
@ -1735,6 +1755,9 @@ void si_set_vertex_buffer_descriptor(struct si_screen *sscreen, struct si_vertex
|
|||
case GFX10_3:
|
||||
si_set_vb_descriptor<GFX10_3>(velems, vb, element_index, out);
|
||||
break;
|
||||
case GFX11:
|
||||
si_set_vb_descriptor<GFX11>(velems, vb, element_index, out);
|
||||
break;
|
||||
default:
|
||||
unreachable("unhandled chip class");
|
||||
}
|
||||
|
|
@ -2572,6 +2595,9 @@ static void si_init_draw_vbo(struct si_context *sctx)
|
|||
if (NGG && GFX_VERSION < GFX10)
|
||||
return;
|
||||
|
||||
if (!NGG && GFX_VERSION >= GFX11)
|
||||
return;
|
||||
|
||||
sctx->draw_vbo[HAS_TESS][HAS_GS][NGG] =
|
||||
si_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG>;
|
||||
|
||||
|
|
|
|||
|
|
@ -999,6 +999,8 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
|||
unsigned max_stream = util_last_bit(sel->info.base.gs.active_stream_mask);
|
||||
unsigned offset;
|
||||
|
||||
assert(sscreen->info.chip_class < GFX11); /* gfx11 doesn't have the legacy pipeline */
|
||||
|
||||
pm4 = si_get_shader_pm4_state(shader);
|
||||
if (!pm4)
|
||||
return;
|
||||
|
|
@ -3069,6 +3071,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
|||
* - LDS usage is too high
|
||||
*/
|
||||
sel->tess_turns_off_ngg = sscreen->info.chip_class >= GFX10 &&
|
||||
sscreen->info.chip_class <= GFX10_3 &&
|
||||
(sel->info.base.gs.invocations * sel->info.base.gs.vertices_out > 256 ||
|
||||
sel->info.base.gs.invocations * sel->info.base.gs.vertices_out *
|
||||
(sel->info.num_outputs * 4 + 1) > 6500 /* max dw per GS primitive */);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue