diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 335c4dda83c..d94490a5a1a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -764,7 +764,7 @@ struct gfx9_gs_info { unsigned esgs_ring_size; /* in bytes */ }; -#define SI_NUM_VGT_STAGES_KEY_BITS 5 +#define SI_NUM_VGT_STAGES_KEY_BITS 8 #define SI_NUM_VGT_STAGES_STATES (1 << SI_NUM_VGT_STAGES_KEY_BITS) /* The VGT_SHADER_STAGES key used to index the table of precomputed values. @@ -778,9 +778,13 @@ union si_vgt_stages_key { uint8_t ngg_passthrough : 1; uint8_t ngg : 1; /* gfx10+ */ uint8_t streamout : 1; /* only used with NGG */ - uint8_t _pad : 8 - SI_NUM_VGT_STAGES_KEY_BITS; + uint8_t hs_wave32 : 1; + uint8_t gs_wave32 : 1; + uint8_t vs_wave32 : 1; #else /* UTIL_ARCH_BIG_ENDIAN */ - uint8_t _pad : 8 - SI_NUM_VGT_STAGES_KEY_BITS; + uint8_t vs_wave32 : 1; + uint8_t gs_wave32 : 1; + uint8_t hs_wave32 : 1; uint8_t streamout : 1; uint8_t ngg : 1; uint8_t ngg_passthrough : 1; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 90b01e033a1..3fcb0ab9f05 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -229,12 +229,23 @@ static bool si_update_shaders(struct si_context *sctx) key.index = 0; /* Update VGT_SHADER_STAGES_EN. */ - if (HAS_TESS) + if (HAS_TESS) { key.u.tess = 1; + if (GFX_VERSION >= GFX10) + key.u.hs_wave32 = sctx->queued.named.hs->wave_size == 32; + } if (HAS_GS) key.u.gs = 1; - if (NGG) + if (NGG) { key.index |= si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->ctx_reg.ngg.vgt_stages.index; + } else if (GFX_VERSION >= GFX10) { + if (HAS_GS) { + key.u.gs_wave32 = sctx->shader.gs.current->wave_size == 32; + key.u.vs_wave32 = sctx->shader.gs.cso->gs_copy_shader->wave_size == 32; + } else { + key.u.vs_wave32 = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->wave_size == 32; + } + } struct si_pm4_state **pm4 = &sctx->vgt_shader_config[key.index]; if (unlikely(!*pm4)) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 44cd966a873..478e8d5bc85 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -1342,6 +1342,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader shader->ctx_reg.ngg.vgt_stages.u.ngg = 1; shader->ctx_reg.ngg.vgt_stages.u.streamout = gs_sel->so.num_outputs; shader->ctx_reg.ngg.vgt_stages.u.ngg_passthrough = gfx10_is_ngg_passthrough(shader); + shader->ctx_reg.ngg.vgt_stages.u.gs_wave32 = shader->wave_size == 32; } static void si_emit_shader_vs(struct si_context *sctx) @@ -4057,10 +4058,12 @@ struct si_pm4_state *si_build_vgt_shader_config(struct si_screen *screen, union if (screen->info.chip_class >= GFX9) stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2); - if (screen->info.chip_class >= GFX10 && screen->ge_wave_size == 32) { - stages |= S_028B54_HS_W32_EN(1) | - S_028B54_GS_W32_EN(key.u.ngg) | /* legacy GS only supports Wave64 */ - S_028B54_VS_W32_EN(1); + if (screen->info.chip_class >= GFX10) { + stages |= S_028B54_HS_W32_EN(key.u.hs_wave32) | + S_028B54_GS_W32_EN(key.u.gs_wave32) | + S_028B54_VS_W32_EN(key.u.vs_wave32); + /* Legacy GS only supports Wave64. Read it as an implication. */ + assert(!(key.u.gs && !key.u.ngg) || !key.u.gs_wave32); } si_pm4_set_reg(pm4, R_028B54_VGT_SHADER_STAGES_EN, stages);