diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index bec1c1efc40..8bbe4ca5aa7 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -532,7 +532,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) /* Invalidate various draw states so that they are emitted before * the first draw call. */ - si_invalidate_draw_constants(ctx); + ctx->last_instance_count = SI_INSTANCE_COUNT_UNKNOWN; ctx->last_index_size = -1; /* Primitive restart is set to false by the gfx preamble on GFX11+. */ ctx->last_primitive_restart_en = ctx->gfx_level >= GFX11 ? false : -1; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 154ab2bf138..57f9b62cb68 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -31,12 +31,10 @@ struct ac_llvm_compiler; /* special primitive types */ #define SI_PRIM_RECTANGLE_LIST MESA_PRIM_COUNT -/* The base vertex and primitive restart can be any number, but we must pick - * one which will mean "unknown" for the purpose of state tracking and - * the number shouldn't be a commonly-used one. */ -#define SI_BASE_VERTEX_UNKNOWN INT_MIN -#define SI_START_INSTANCE_UNKNOWN ((unsigned)INT_MIN) -#define SI_DRAW_ID_UNKNOWN ((unsigned)INT_MIN) +/* The primitive restart can be any number, but we must pick one which will + * mean "unknown" for the purpose of state tracking and the number shouldn't + * be a commonly-used one. + */ #define SI_RESTART_INDEX_UNKNOWN ((unsigned)INT_MIN) #define SI_INSTANCE_COUNT_UNKNOWN ((unsigned)INT_MIN) #define SI_NUM_SMOOTH_AA_SAMPLES 4 @@ -1167,11 +1165,7 @@ struct si_context { bool disable_instance_packing : 1; uint16_t ngg_culling; unsigned last_index_size; - int last_base_vertex; - unsigned last_start_instance; unsigned last_instance_count; - unsigned last_drawid; - unsigned last_sh_base_reg; int last_primitive_restart_en; unsigned last_restart_index; unsigned last_prim; @@ -1753,19 +1747,6 @@ static inline void si_context_add_resource_size(struct si_context *sctx, struct } } -static inline void si_invalidate_draw_sh_constants(struct si_context *sctx) -{ - sctx->last_base_vertex = SI_BASE_VERTEX_UNKNOWN; - sctx->last_start_instance = SI_START_INSTANCE_UNKNOWN; - sctx->last_drawid = SI_DRAW_ID_UNKNOWN; -} - -static inline void si_invalidate_draw_constants(struct si_context *sctx) -{ - si_invalidate_draw_sh_constants(sctx); - sctx->last_instance_count = SI_INSTANCE_COUNT_UNKNOWN; -} - static inline unsigned si_get_atom_bit(struct si_context *sctx, struct si_atom *atom) { return 1 << (atom - sctx->atoms.array); diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 3b8284c8e69..eba46d752c2 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -341,6 +341,18 @@ enum si_tracked_other_reg { SI_TRACKED_SPI_SHADER_USER_DATA_HS__TCS_OFFCHIP_ADDR, SI_TRACKED_SPI_SHADER_USER_DATA_HS__VS_STATE_BITS, /* GFX6-8 */ + SI_TRACKED_SPI_SHADER_USER_DATA_LS__BASE_VERTEX, + SI_TRACKED_SPI_SHADER_USER_DATA_LS__DRAWID, + SI_TRACKED_SPI_SHADER_USER_DATA_LS__START_INSTANCE, + + SI_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX, + SI_TRACKED_SPI_SHADER_USER_DATA_ES__DRAWID, + SI_TRACKED_SPI_SHADER_USER_DATA_ES__START_INSTANCE, + + SI_TRACKED_SPI_SHADER_USER_DATA_VS__BASE_VERTEX, /* GFX6-10 */ + SI_TRACKED_SPI_SHADER_USER_DATA_VS__DRAWID, /* GFX6-10 */ + SI_TRACKED_SPI_SHADER_USER_DATA_VS__START_INSTANCE, /* GFX6-10 */ + SI_TRACKED_COMPUTE_RESOURCE_LIMITS, SI_TRACKED_COMPUTE_NUM_THREAD_X, SI_TRACKED_COMPUTE_NUM_THREAD_Y, @@ -359,6 +371,13 @@ enum si_tracked_other_reg { SI_NUM_TRACKED_OTHER_REGS, }; +/* For 3 draw constants: BaseVertex, DrawID, StartInstance */ +#define BASEVERTEX_MASK 0x1 +#define DRAWID_MASK 0x2 +#define STARTINSTANCE_MASK 0x4 +#define BASEVERTEX_DRAWID_MASK (BASEVERTEX_MASK | DRAWID_MASK) +#define BASEVERTEX_DRAWID_STARTINSTANCE_MASK (BASEVERTEX_MASK | DRAWID_MASK | STARTINSTANCE_MASK) + struct si_tracked_regs { uint64_t context_reg_saved_mask; uint32_t context_reg_value[SI_NUM_TRACKED_CONTEXT_REGS]; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index f36f3030481..0911407d3b6 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -901,18 +901,24 @@ static void si_emit_tess_io_layout_state(struct si_context *sctx) unsigned tes_sh_base = sctx->shader_pointers.sh_base[PIPE_SHADER_TESS_EVAL]; assert(tes_sh_base); - /* These can't be optimized because the user data SGPRs may have different meaning - * without tessellation. (they are VS and ES/GS user data SGPRs) + /* TES (as ES or VS) reuses the BaseVertex and DrawID user SGPRs that are used when + * tessellation is disabled. That's because those user SGPRs are only set in LS + * for tessellation. */ if (sctx->screen->info.has_set_pairs_packets) { - radeon_push_gfx_sh_reg(tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, - sctx->tcs_offchip_layout); - radeon_push_gfx_sh_reg(tes_sh_base + SI_SGPR_TES_OFFCHIP_ADDR * 4, - sctx->tes_offchip_ring_va_sgpr); + radeon_opt_push_gfx_sh_reg(tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, + SI_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX, + sctx->tcs_offchip_layout); + radeon_opt_push_gfx_sh_reg(tes_sh_base + SI_SGPR_TES_OFFCHIP_ADDR * 4, + SI_TRACKED_SPI_SHADER_USER_DATA_ES__DRAWID, + sctx->tes_offchip_ring_va_sgpr); } else { - radeon_set_sh_reg_seq(tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, 2); - radeon_emit(sctx->tcs_offchip_layout); - radeon_emit(sctx->tes_offchip_ring_va_sgpr); + bool has_gs = sctx->ngg || sctx->shader.gs.cso; + + radeon_opt_set_sh_reg2(sctx, tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, + has_gs ? SI_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX + : SI_TRACKED_SPI_SHADER_USER_DATA_VS__BASE_VERTEX, + sctx->tcs_offchip_layout, sctx->tes_offchip_ring_va_sgpr); } radeon_end(); @@ -1678,6 +1684,14 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw unsigned sh_base_reg = si_get_user_data_base(GFX_VERSION, HAS_TESS, HAS_GS, NGG, PIPE_SHADER_VERTEX); bool render_cond_bit = sctx->render_cond_enabled; + unsigned tracked_base_vertex_reg; + + if (HAS_TESS) + tracked_base_vertex_reg = SI_TRACKED_SPI_SHADER_USER_DATA_LS__BASE_VERTEX; + else if (HAS_GS || NGG) + tracked_base_vertex_reg = SI_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX; + else + tracked_base_vertex_reg = SI_TRACKED_SPI_SHADER_USER_DATA_VS__BASE_VERTEX; if (!IS_DRAW_VERTEX_STATE && indirect) { assert(num_draws == 1); @@ -1692,7 +1706,10 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw radeon_begin_again(cs); } - si_invalidate_draw_constants(sctx); + /* Invalidate tracked draw constants because DrawIndirect overwrites them. */ + sctx->tracked_regs.other_reg_saved_mask &= + ~(BASEVERTEX_DRAWID_STARTINSTANCE_MASK << tracked_base_vertex_reg); + sctx->last_instance_count = SI_INSTANCE_COUNT_UNKNOWN; radeon_emit(PKT3(PKT3_SET_BASE, 2, 0)); radeon_emit(1); @@ -1766,62 +1783,28 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw if (!is_blit) { /* Prefer SET_SH_REG_PAIRS_PACKED* on Gfx11+. */ if (HAS_PAIRS) { - bool shader_switch = sh_base_reg != sctx->last_sh_base_reg; - - if (shader_switch || - base_vertex != sctx->last_base_vertex || - sctx->last_base_vertex == SI_BASE_VERTEX_UNKNOWN) { - radeon_push_gfx_sh_reg(sh_base_reg + SI_SGPR_BASE_VERTEX * 4, base_vertex); - sctx->last_base_vertex = base_vertex; + radeon_opt_push_gfx_sh_reg(sh_base_reg + SI_SGPR_BASE_VERTEX * 4, + tracked_base_vertex_reg, base_vertex); + if (set_draw_id) { + radeon_opt_push_gfx_sh_reg(sh_base_reg + SI_SGPR_DRAWID * 4, + tracked_base_vertex_reg + 1, drawid_base); } - - if (set_draw_id && - (shader_switch || - drawid_base != sctx->last_drawid || - sctx->last_drawid == SI_DRAW_ID_UNKNOWN)) { - radeon_push_gfx_sh_reg(sh_base_reg + SI_SGPR_DRAWID * 4, drawid_base); - sctx->last_drawid = drawid_base; - } - - if (set_base_instance && - (shader_switch || - info->start_instance != sctx->last_start_instance || - sctx->last_start_instance == SI_START_INSTANCE_UNKNOWN)) { - radeon_push_gfx_sh_reg(sh_base_reg + SI_SGPR_START_INSTANCE * 4, - info->start_instance); - sctx->last_start_instance = info->start_instance; - } - - sctx->last_sh_base_reg = sh_base_reg; - } else if (base_vertex != sctx->last_base_vertex || - sctx->last_base_vertex == SI_BASE_VERTEX_UNKNOWN || - (set_base_instance && - (info->start_instance != sctx->last_start_instance || - sctx->last_start_instance == SI_START_INSTANCE_UNKNOWN)) || - (set_draw_id && - (drawid_base != sctx->last_drawid || - sctx->last_drawid == SI_DRAW_ID_UNKNOWN)) || - sh_base_reg != sctx->last_sh_base_reg) { if (set_base_instance) { - radeon_set_sh_reg_seq(sh_base_reg + SI_SGPR_BASE_VERTEX * 4, 3); - radeon_emit(base_vertex); - radeon_emit(drawid_base); - radeon_emit(info->start_instance); - - sctx->last_start_instance = info->start_instance; - sctx->last_drawid = drawid_base; - } else if (set_draw_id) { - radeon_set_sh_reg_seq(sh_base_reg + SI_SGPR_BASE_VERTEX * 4, 2); - radeon_emit(base_vertex); - radeon_emit(drawid_base); - - sctx->last_drawid = drawid_base; - } else { - radeon_set_sh_reg(sh_base_reg + SI_SGPR_BASE_VERTEX * 4, base_vertex); + radeon_opt_push_gfx_sh_reg(sh_base_reg + SI_SGPR_START_INSTANCE * 4, + tracked_base_vertex_reg + 2, info->start_instance); + } + } else { + if (set_base_instance) { + radeon_opt_set_sh_reg3(sctx, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, + tracked_base_vertex_reg, base_vertex, drawid_base, + info->start_instance); + } else if (set_draw_id) { + radeon_opt_set_sh_reg2(sctx, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, + tracked_base_vertex_reg, base_vertex, drawid_base); + } else { + radeon_opt_set_sh_reg(sctx, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, + tracked_base_vertex_reg, base_vertex); } - - sctx->last_base_vertex = base_vertex; - sctx->last_sh_base_reg = sh_base_reg; } } @@ -1837,7 +1820,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw */ if (is_blit) { /* Re-emit draw constants after we leave u_blitter. */ - si_invalidate_draw_sh_constants(sctx); + sctx->tracked_regs.other_reg_saved_mask &= + ~(BASEVERTEX_DRAWID_STARTINSTANCE_MASK << tracked_base_vertex_reg); /* Blit VS doesn't use BASE_VERTEX, START_INSTANCE, and DRAWID. */ radeon_set_sh_reg_seq(sh_base_reg + SI_SGPR_VS_BLIT_DATA * 4, sctx->num_vs_blit_sgprs); @@ -1886,8 +1870,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw radeon_emit(V_0287F0_DI_SRC_SEL_DMA); /* NOT_EOP disabled */ } if (num_draws > 1) { - sctx->last_base_vertex = draws[num_draws - 1].index_bias; - sctx->last_drawid = drawid_base + num_draws - 1; + sctx->tracked_regs.other_reg_saved_mask &= + ~(BASEVERTEX_DRAWID_MASK << tracked_base_vertex_reg); } } else { /* Only DrawID varies. */ @@ -1904,8 +1888,10 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw radeon_emit(draws[i].count); radeon_emit(V_0287F0_DI_SRC_SEL_DMA); /* NOT_EOP disabled */ } - if (num_draws > 1) - sctx->last_drawid = drawid_base + num_draws - 1; + if (num_draws > 1) { + sctx->tracked_regs.other_reg_saved_mask &= + ~(DRAWID_MASK << tracked_base_vertex_reg); + } } } else { if (index_bias_varies) { @@ -1923,8 +1909,10 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw radeon_emit(draws[i].count); radeon_emit(V_0287F0_DI_SRC_SEL_DMA); /* NOT_EOP disabled */ } - if (num_draws > 1) - sctx->last_base_vertex = draws[num_draws - 1].index_bias; + if (num_draws > 1) { + sctx->tracked_regs.other_reg_saved_mask &= + ~(BASEVERTEX_MASK << tracked_base_vertex_reg); + } } else { /* DrawID and BaseVertex are constant. */ if (GFX_VERSION == GFX10) { @@ -1966,8 +1954,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque); } if (num_draws > 1 && (IS_DRAW_VERTEX_STATE || !sctx->num_vs_blit_sgprs)) { - sctx->last_base_vertex = draws[num_draws - 1].start; - sctx->last_drawid = drawid_base + num_draws - 1; + sctx->tracked_regs.other_reg_saved_mask &= + ~(BASEVERTEX_DRAWID_MASK << tracked_base_vertex_reg); } } else { for (unsigned i = 0; i < num_draws; i++) { @@ -1978,8 +1966,10 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw radeon_emit(draws[i].count); radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque); } - if (num_draws > 1 && (IS_DRAW_VERTEX_STATE || !sctx->num_vs_blit_sgprs)) - sctx->last_base_vertex = draws[num_draws - 1].start; + if (num_draws > 1 && (IS_DRAW_VERTEX_STATE || !sctx->num_vs_blit_sgprs)) { + sctx->tracked_regs.other_reg_saved_mask &= + ~(BASEVERTEX_MASK << tracked_base_vertex_reg); + } } } }