mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 16:08:04 +02:00
radeonsi: don't cull front/back faces in the hw if the shader culls them
This reduces the number of context rolls by not setting the CULL_FRONT/CULL_BACK register fields. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33482>
This commit is contained in:
parent
bafab3324e
commit
ce0d213ac8
4 changed files with 36 additions and 5 deletions
|
|
@ -1166,6 +1166,8 @@ struct si_context {
|
|||
/* Emitted draw state. */
|
||||
bool ngg : 1;
|
||||
bool disable_instance_packing : 1;
|
||||
bool fixed_func_face_culling_needed : 1;
|
||||
bool fixed_func_face_culling_has_effect : 1;
|
||||
uint16_t ngg_culling;
|
||||
unsigned last_index_size;
|
||||
unsigned last_instance_count;
|
||||
|
|
|
|||
|
|
@ -1076,8 +1076,6 @@ static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rast
|
|||
(state->fill_back != PIPE_POLYGON_MODE_FILL && !(state->cull_face & PIPE_FACE_BACK));
|
||||
|
||||
rs->pa_su_sc_mode_cntl = S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) |
|
||||
S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
|
||||
S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
|
||||
S_028814_FACE(!state->front_ccw) |
|
||||
S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
|
||||
S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
|
||||
|
|
@ -1090,6 +1088,9 @@ static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rast
|
|||
sscreen->info.gfx_level < GFX12 ?
|
||||
polygon_mode_enabled ||
|
||||
rs->perpendicular_end_caps : 0);
|
||||
rs->pa_su_cull_bits = S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
|
||||
S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0);
|
||||
|
||||
if (sscreen->info.gfx_level >= GFX10) {
|
||||
rs->pa_cl_ngg_cntl = S_028838_INDEX_BUF_EDGE_FLAG_ENA(rs->polygon_mode_is_points ||
|
||||
rs->polygon_mode_is_lines) |
|
||||
|
|
@ -1147,6 +1148,22 @@ static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rast
|
|||
static void si_pm4_emit_rasterizer(struct si_context *sctx, unsigned index)
|
||||
{
|
||||
struct si_state_rasterizer *state = sctx->queued.named.rasterizer;
|
||||
const unsigned cull_bits = S_028814_CULL_FRONT(1) | S_028814_CULL_BACK(1);
|
||||
unsigned last_pa_su_sc_mode_nctl = sctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_SC_MODE_CNTL];
|
||||
unsigned pa_su_sc_mode_cntl;
|
||||
|
||||
if (!sctx->fixed_func_face_culling_has_effect &&
|
||||
(last_pa_su_sc_mode_nctl & ~cull_bits) == state->pa_su_sc_mode_cntl) {
|
||||
/* Keep the previous cull bits because they have no effect. */
|
||||
pa_su_sc_mode_cntl = last_pa_su_sc_mode_nctl;
|
||||
} else if (sctx->fixed_func_face_culling_needed) {
|
||||
pa_su_sc_mode_cntl = state->pa_su_sc_mode_cntl | state->pa_su_cull_bits;
|
||||
} else {
|
||||
pa_su_sc_mode_cntl = state->pa_su_sc_mode_cntl;
|
||||
}
|
||||
|
||||
if (sctx->fixed_func_face_culling_needed)
|
||||
pa_su_sc_mode_cntl |= state->pa_su_cull_bits;
|
||||
|
||||
if (sctx->screen->info.gfx_level >= GFX12) {
|
||||
radeon_begin(&sctx->gfx_cs);
|
||||
|
|
@ -1167,7 +1184,7 @@ static void si_pm4_emit_rasterizer(struct si_context *sctx, unsigned index)
|
|||
gfx12_opt_set_context_reg(R_028A48_PA_SC_MODE_CNTL_0, SI_TRACKED_PA_SC_MODE_CNTL_0,
|
||||
state->pa_sc_mode_cntl_0);
|
||||
gfx12_opt_set_context_reg(R_02881C_PA_SU_SC_MODE_CNTL, SI_TRACKED_PA_SU_SC_MODE_CNTL,
|
||||
state->pa_su_sc_mode_cntl);
|
||||
pa_su_sc_mode_cntl);
|
||||
gfx12_opt_set_context_reg(R_028838_PA_CL_NGG_CNTL, SI_TRACKED_PA_CL_NGG_CNTL,
|
||||
state->pa_cl_ngg_cntl);
|
||||
gfx12_opt_set_context_reg(R_028230_PA_SC_EDGERULE, SI_TRACKED_PA_SC_EDGERULE,
|
||||
|
|
@ -1212,7 +1229,7 @@ static void si_pm4_emit_rasterizer(struct si_context *sctx, unsigned index)
|
|||
gfx11_opt_set_context_reg(R_028A48_PA_SC_MODE_CNTL_0, SI_TRACKED_PA_SC_MODE_CNTL_0,
|
||||
state->pa_sc_mode_cntl_0);
|
||||
gfx11_opt_set_context_reg(R_028814_PA_SU_SC_MODE_CNTL, SI_TRACKED_PA_SU_SC_MODE_CNTL,
|
||||
state->pa_su_sc_mode_cntl);
|
||||
pa_su_sc_mode_cntl);
|
||||
gfx11_opt_set_context_reg(R_028838_PA_CL_NGG_CNTL, SI_TRACKED_PA_CL_NGG_CNTL,
|
||||
state->pa_cl_ngg_cntl);
|
||||
gfx11_opt_set_context_reg(R_028230_PA_SC_EDGERULE, SI_TRACKED_PA_SC_EDGERULE,
|
||||
|
|
@ -1257,7 +1274,7 @@ static void si_pm4_emit_rasterizer(struct si_context *sctx, unsigned index)
|
|||
radeon_opt_set_context_reg(R_028A48_PA_SC_MODE_CNTL_0, SI_TRACKED_PA_SC_MODE_CNTL_0,
|
||||
state->pa_sc_mode_cntl_0);
|
||||
radeon_opt_set_context_reg(R_028814_PA_SU_SC_MODE_CNTL,
|
||||
SI_TRACKED_PA_SU_SC_MODE_CNTL, state->pa_su_sc_mode_cntl);
|
||||
SI_TRACKED_PA_SU_SC_MODE_CNTL, pa_su_sc_mode_cntl);
|
||||
if (sctx->gfx_level >= GFX10) {
|
||||
radeon_opt_set_context_reg(R_028838_PA_CL_NGG_CNTL, SI_TRACKED_PA_CL_NGG_CNTL,
|
||||
state->pa_cl_ngg_cntl);
|
||||
|
|
|
|||
|
|
@ -62,6 +62,7 @@ struct si_state_rasterizer {
|
|||
unsigned pa_su_line_cntl;
|
||||
unsigned pa_sc_mode_cntl_0;
|
||||
unsigned pa_su_sc_mode_cntl;
|
||||
unsigned pa_su_cull_bits;
|
||||
unsigned pa_cl_ngg_cntl;
|
||||
unsigned pa_sc_edgerule;
|
||||
unsigned pa_su_poly_offset_db_fmt_cntl[3];
|
||||
|
|
|
|||
|
|
@ -249,6 +249,17 @@ static bool si_update_shaders(struct si_context *sctx)
|
|||
|
||||
struct si_shader *hw_vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current;
|
||||
|
||||
bool fixed_func_face_culling_needed = !NGG || !si_shader_culling_enabled(hw_vs);
|
||||
bool fixed_func_face_culling_has_effect = (!HAS_TESS && !HAS_GS) ||
|
||||
hw_vs->selector->rast_prim == MESA_PRIM_TRIANGLES;
|
||||
|
||||
if (sctx->fixed_func_face_culling_needed != fixed_func_face_culling_needed ||
|
||||
sctx->fixed_func_face_culling_has_effect != fixed_func_face_culling_has_effect) {
|
||||
sctx->fixed_func_face_culling_needed = fixed_func_face_culling_needed;
|
||||
sctx->fixed_func_face_culling_has_effect = fixed_func_face_culling_has_effect;
|
||||
sctx->dirty_atoms |= SI_STATE_BIT(rasterizer);
|
||||
}
|
||||
|
||||
if (old_pa_cl_vs_out_cntl != hw_vs->pa_cl_vs_out_cntl)
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue