diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index c65d7dbdf08..3823803b372 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -837,35 +837,6 @@ union si_vgt_param_key { uint16_t index; }; -#define SI_NUM_VGT_STAGES_KEY_BITS 6 -#define SI_NUM_VGT_STAGES_STATES (1 << SI_NUM_VGT_STAGES_KEY_BITS) - -/* The VGT_SHADER_STAGES key used to index the table of precomputed values. - * Some fields are set by state-change calls, most are set by draw_vbo. - */ -union si_vgt_stages_key { - struct { -#if UTIL_ARCH_LITTLE_ENDIAN - uint8_t tess : 1; - uint8_t gs : 1; - uint8_t ngg_gs_fast_launch : 1; - uint8_t ngg_passthrough : 1; - uint8_t ngg : 1; /* gfx10+ */ - uint8_t streamout : 1; /* only used with NGG */ - uint8_t _pad : 8 - SI_NUM_VGT_STAGES_KEY_BITS; -#else /* UTIL_ARCH_BIG_ENDIAN */ - uint8_t _pad : 8 - SI_NUM_VGT_STAGES_KEY_BITS; - uint8_t streamout : 1; - uint8_t ngg : 1; - uint8_t ngg_passthrough : 1; - uint8_t ngg_gs_fast_launch : 1; - uint8_t gs : 1; - uint8_t tess : 1; -#endif - } u; - uint8_t index; -}; - struct si_texture_handle { unsigned desc_slot; bool desc_dirty; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index e943347f3ba..b14a9a27f28 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -729,6 +729,35 @@ struct gfx9_gs_info { unsigned esgs_ring_size; /* in bytes */ }; +#define SI_NUM_VGT_STAGES_KEY_BITS 6 +#define SI_NUM_VGT_STAGES_STATES (1 << SI_NUM_VGT_STAGES_KEY_BITS) + +/* The VGT_SHADER_STAGES key used to index the table of precomputed values. + * Some fields are set by state-change calls, most are set by draw_vbo. + */ +union si_vgt_stages_key { + struct { +#if UTIL_ARCH_LITTLE_ENDIAN + uint8_t tess : 1; + uint8_t gs : 1; + uint8_t ngg_gs_fast_launch : 1; + uint8_t ngg_passthrough : 1; + uint8_t ngg : 1; /* gfx10+ */ + uint8_t streamout : 1; /* only used with NGG */ + uint8_t _pad : 8 - SI_NUM_VGT_STAGES_KEY_BITS; +#else /* UTIL_ARCH_BIG_ENDIAN */ + uint8_t _pad : 8 - SI_NUM_VGT_STAGES_KEY_BITS; + uint8_t streamout : 1; + uint8_t ngg : 1; + uint8_t ngg_passthrough : 1; + uint8_t ngg_gs_fast_launch : 1; + uint8_t gs : 1; + uint8_t tess : 1; +#endif + } u; + uint8_t index; +}; + struct si_shader { struct si_compiler_ctx_state compiler_ctx_state; @@ -812,6 +841,7 @@ struct si_shader { unsigned pa_cl_ngg_cntl; unsigned vgt_gs_max_vert_out; /* for API GS */ unsigned ge_pc_alloc; /* uconfig register */ + union si_vgt_stages_key vgt_stages; } ngg; struct { diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index e66dc69bd3f..4b76d0cd6c5 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -178,16 +178,8 @@ static bool si_update_shaders(struct si_context *sctx) key.u.tess = 1; if (HAS_GS) key.u.gs = 1; - - if (NGG) { - struct si_shader *vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current; - - key.u.ngg = 1; - key.u.streamout = !!si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->cso->so.num_outputs; - /* These must be done after the shader variant is selected. */ - key.u.ngg_passthrough = gfx10_is_ngg_passthrough(vs); - key.u.ngg_gs_fast_launch = !!(vs->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL); - } + if (NGG) + key.index |= si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->ctx_reg.ngg.vgt_stages.index; struct si_pm4_state **pm4 = &sctx->vgt_shader_config[key.index]; if (unlikely(!*pm4)) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 358ae4a91d8..d04f538052a 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1346,6 +1346,12 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) | S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1); } + + shader->ctx_reg.ngg.vgt_stages.u.ngg = 1; + shader->ctx_reg.ngg.vgt_stages.u.streamout = gs_sel->so.num_outputs; + shader->ctx_reg.ngg.vgt_stages.u.ngg_passthrough = gfx10_is_ngg_passthrough(shader); + shader->ctx_reg.ngg.vgt_stages.u.ngg_gs_fast_launch = + !!(shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL); } static void si_emit_shader_vs(struct si_context *sctx)