mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 15:20:10 +01:00
radeonsi/gfx9: rework the gfx9 scissor bug workaround (v2)
Needed to track context rolls caused by streamout and ACQUIRE_MEM. ACQUIRE_MEM can occur outside of draw calls. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110355 v2: squashed patches and done more rework Cc: 19.0 <mesa-stable@lists.freedesktop.org>
This commit is contained in:
parent
bc0d924507
commit
440135e5a0
8 changed files with 68 additions and 48 deletions
|
|
@ -1097,6 +1097,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
|
|||
#include "si_debug_options.h"
|
||||
}
|
||||
|
||||
sscreen->has_gfx9_scissor_bug = sscreen->info.family == CHIP_VEGA10 ||
|
||||
sscreen->info.family == CHIP_RAVEN;
|
||||
sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= CHIP_POLARIS10 &&
|
||||
sscreen->info.family <= CHIP_POLARIS12) ||
|
||||
sscreen->info.family == CHIP_VEGA10 ||
|
||||
|
|
|
|||
|
|
@ -468,6 +468,7 @@ struct si_screen {
|
|||
bool has_out_of_order_rast;
|
||||
bool assume_no_z_fights;
|
||||
bool commutative_blend_add;
|
||||
bool has_gfx9_scissor_bug;
|
||||
bool has_msaa_sample_loc_bug;
|
||||
bool has_ls_vgpr_init_bug;
|
||||
bool has_dcc_constant_encode;
|
||||
|
|
@ -1075,7 +1076,7 @@ struct si_context {
|
|||
unsigned num_resident_handles;
|
||||
uint64_t num_alloc_tex_transfer_bytes;
|
||||
unsigned last_tex_ps_draw_ratio; /* for query */
|
||||
unsigned context_roll_counter;
|
||||
unsigned context_roll;
|
||||
|
||||
/* Queries. */
|
||||
/* Maintain the list of active queries for pausing between IBs. */
|
||||
|
|
|
|||
|
|
@ -256,7 +256,7 @@ static void si_emit_cb_render_state(struct si_context *sctx)
|
|||
sx_blend_opt_control);
|
||||
}
|
||||
if (initial_cdw != cs->current.cdw)
|
||||
sctx->context_roll_counter++;
|
||||
sctx->context_roll = true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -793,7 +793,7 @@ static void si_emit_clip_regs(struct si_context *sctx)
|
|||
S_028810_CLIP_DISABLE(window_space));
|
||||
|
||||
if (initial_cdw != sctx->gfx_cs->current.cdw)
|
||||
sctx->context_roll_counter++;
|
||||
sctx->context_roll = true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -1455,7 +1455,7 @@ static void si_emit_db_render_state(struct si_context *sctx)
|
|||
SI_TRACKED_DB_SHADER_CONTROL, db_shader_control);
|
||||
|
||||
if (initial_cdw != sctx->gfx_cs->current.cdw)
|
||||
sctx->context_roll_counter++;
|
||||
sctx->context_roll = true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -3544,7 +3544,7 @@ static void si_emit_msaa_config(struct si_context *sctx)
|
|||
SI_TRACKED_PA_SC_MODE_CNTL_1, sc_mode_cntl_1);
|
||||
|
||||
if (initial_cdw != cs->current.cdw) {
|
||||
sctx->context_roll_counter++;
|
||||
sctx->context_roll = true;
|
||||
|
||||
/* GFX9: Flush DFSM when the AA mode changes. */
|
||||
if (sctx->screen->dfsm_allowed) {
|
||||
|
|
|
|||
|
|
@ -321,7 +321,7 @@ static void si_emit_dpbb_disable(struct si_context *sctx)
|
|||
S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) |
|
||||
S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
|
||||
if (initial_cdw != sctx->gfx_cs->current.cdw)
|
||||
sctx->context_roll_counter++;
|
||||
sctx->context_roll = true;
|
||||
}
|
||||
|
||||
void si_emit_dpbb_state(struct si_context *sctx)
|
||||
|
|
@ -443,5 +443,5 @@ void si_emit_dpbb_state(struct si_context *sctx)
|
|||
S_028060_PUNCHOUT_MODE(punchout_mode) |
|
||||
S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
|
||||
if (initial_cdw != sctx->gfx_cs->current.cdw)
|
||||
sctx->context_roll_counter++;
|
||||
sctx->context_roll = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ static unsigned si_conv_pipe_prim(unsigned mode)
|
|||
* The information about LDS and other non-compile-time parameters is then
|
||||
* written to userdata SGPRs.
|
||||
*/
|
||||
static bool si_emit_derived_tess_state(struct si_context *sctx,
|
||||
static void si_emit_derived_tess_state(struct si_context *sctx,
|
||||
const struct pipe_draw_info *info,
|
||||
unsigned *num_patches)
|
||||
{
|
||||
|
|
@ -110,7 +110,7 @@ static bool si_emit_derived_tess_state(struct si_context *sctx,
|
|||
(!has_primid_instancing_bug ||
|
||||
(sctx->last_tess_uses_primid == tess_uses_primid))) {
|
||||
*num_patches = sctx->last_num_patches;
|
||||
return false;
|
||||
return;
|
||||
}
|
||||
|
||||
sctx->last_ls = ls_current;
|
||||
|
|
@ -305,9 +305,8 @@ static bool si_emit_derived_tess_state(struct si_context *sctx,
|
|||
ls_hs_config);
|
||||
}
|
||||
sctx->last_ls_hs_config = ls_hs_config;
|
||||
return true; /* true if the context rolls */
|
||||
sctx->context_roll = true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static unsigned si_num_prims_for_vertices(const struct pipe_draw_info *info)
|
||||
|
|
@ -541,7 +540,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
|
|||
}
|
||||
|
||||
/* rast_prim is the primitive type after GS. */
|
||||
static bool si_emit_rasterizer_prim_state(struct si_context *sctx)
|
||||
static void si_emit_rasterizer_prim_state(struct si_context *sctx)
|
||||
{
|
||||
struct radeon_cmdbuf *cs = sctx->gfx_cs;
|
||||
enum pipe_prim_type rast_prim = sctx->current_rast_prim;
|
||||
|
|
@ -549,11 +548,11 @@ static bool si_emit_rasterizer_prim_state(struct si_context *sctx)
|
|||
|
||||
/* Skip this if not rendering lines. */
|
||||
if (!util_prim_is_lines(rast_prim))
|
||||
return false;
|
||||
return;
|
||||
|
||||
if (rast_prim == sctx->last_rast_prim &&
|
||||
rs->pa_sc_line_stipple == sctx->last_sc_line_stipple)
|
||||
return false;
|
||||
return;
|
||||
|
||||
/* For lines, reset the stipple pattern at each primitive. Otherwise,
|
||||
* reset the stipple pattern at each packet (line strips, line loops).
|
||||
|
|
@ -564,7 +563,7 @@ static bool si_emit_rasterizer_prim_state(struct si_context *sctx)
|
|||
|
||||
sctx->last_rast_prim = rast_prim;
|
||||
sctx->last_sc_line_stipple = rs->pa_sc_line_stipple;
|
||||
return true; /* true if the context rolls */
|
||||
sctx->context_roll = true;
|
||||
}
|
||||
|
||||
static void si_emit_vs_state(struct si_context *sctx,
|
||||
|
|
@ -659,6 +658,7 @@ static void si_emit_draw_registers(struct si_context *sctx,
|
|||
radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
|
||||
info->restart_index);
|
||||
sctx->last_restart_index = info->restart_index;
|
||||
sctx->context_roll = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -886,6 +886,11 @@ static void si_emit_surface_sync(struct si_context *sctx,
|
|||
radeon_emit(cs, 0); /* CP_COHER_BASE */
|
||||
radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
|
||||
}
|
||||
|
||||
/* ACQUIRE_MEM has an implicit context roll if the current context
|
||||
* is busy. */
|
||||
if (sctx->has_graphics)
|
||||
sctx->context_roll = true;
|
||||
}
|
||||
|
||||
void si_emit_cache_flush(struct si_context *sctx)
|
||||
|
|
@ -1213,26 +1218,10 @@ static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_i
|
|||
unsigned skip_atom_mask)
|
||||
{
|
||||
unsigned num_patches = 0;
|
||||
/* Vega10/Raven scissor bug workaround. When any context register is
|
||||
* written (i.e. the GPU rolls the context), PA_SC_VPORT_SCISSOR
|
||||
* registers must be written too.
|
||||
*/
|
||||
bool handle_scissor_bug = (sctx->family == CHIP_VEGA10 || sctx->family == CHIP_RAVEN) &&
|
||||
!si_is_atom_dirty(sctx, &sctx->atoms.s.scissors);
|
||||
bool context_roll = false; /* set correctly for GFX9 only */
|
||||
|
||||
context_roll |= si_emit_rasterizer_prim_state(sctx);
|
||||
si_emit_rasterizer_prim_state(sctx);
|
||||
if (sctx->tes_shader.cso)
|
||||
context_roll |= si_emit_derived_tess_state(sctx, info, &num_patches);
|
||||
|
||||
if (handle_scissor_bug &&
|
||||
(info->count_from_stream_output ||
|
||||
sctx->dirty_atoms & si_atoms_that_always_roll_context() ||
|
||||
sctx->dirty_states & si_states_that_always_roll_context() ||
|
||||
si_prim_restart_index_changed(sctx, info)))
|
||||
context_roll = true;
|
||||
|
||||
sctx->context_roll_counter = 0;
|
||||
si_emit_derived_tess_state(sctx, info, &num_patches);
|
||||
|
||||
/* Emit state atoms. */
|
||||
unsigned mask = sctx->dirty_atoms & ~skip_atom_mask;
|
||||
|
|
@ -1255,12 +1244,6 @@ static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_i
|
|||
}
|
||||
sctx->dirty_states = 0;
|
||||
|
||||
if (handle_scissor_bug &&
|
||||
(context_roll || sctx->context_roll_counter)) {
|
||||
sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
|
||||
sctx->atoms.s.scissors.emit(sctx);
|
||||
}
|
||||
|
||||
/* Emit draw states. */
|
||||
si_emit_vs_state(sctx, info);
|
||||
si_emit_draw_registers(sctx, info, num_patches);
|
||||
|
|
@ -1462,6 +1445,22 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
|
|||
if (!si_upload_vertex_buffer_descriptors(sctx))
|
||||
goto return_cleanup;
|
||||
|
||||
/* Vega10/Raven scissor bug workaround. When any context register is
|
||||
* written (i.e. the GPU rolls the context), PA_SC_VPORT_SCISSOR
|
||||
* registers must be written too.
|
||||
*/
|
||||
bool has_gfx9_scissor_bug = sctx->screen->has_gfx9_scissor_bug;
|
||||
unsigned masked_atoms = 0;
|
||||
|
||||
if (has_gfx9_scissor_bug) {
|
||||
masked_atoms |= si_get_atom_bit(sctx, &sctx->atoms.s.scissors);
|
||||
|
||||
if (info->count_from_stream_output ||
|
||||
sctx->dirty_atoms & si_atoms_that_always_roll_context() ||
|
||||
sctx->dirty_states & si_states_that_always_roll_context())
|
||||
sctx->context_roll = true;
|
||||
}
|
||||
|
||||
/* Use optimal packet order based on whether we need to sync the pipeline. */
|
||||
if (unlikely(sctx->flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
|
||||
SI_CONTEXT_FLUSH_AND_INV_DB |
|
||||
|
|
@ -1472,8 +1471,6 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
|
|||
* Then draw and prefetch at the end. This ensures that the time
|
||||
* the CUs are idle is very short.
|
||||
*/
|
||||
unsigned masked_atoms = 0;
|
||||
|
||||
if (unlikely(sctx->flags & SI_CONTEXT_FLUSH_FOR_RENDER_COND))
|
||||
masked_atoms |= si_get_atom_bit(sctx, &sctx->atoms.s.render_cond);
|
||||
|
||||
|
|
@ -1487,6 +1484,13 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
|
|||
|
||||
if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond))
|
||||
sctx->atoms.s.render_cond.emit(sctx);
|
||||
|
||||
if (has_gfx9_scissor_bug &&
|
||||
(sctx->context_roll ||
|
||||
si_is_atom_dirty(sctx, &sctx->atoms.s.scissors))) {
|
||||
sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
|
||||
sctx->atoms.s.scissors.emit(sctx);
|
||||
}
|
||||
sctx->dirty_atoms = 0;
|
||||
|
||||
si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
|
||||
|
|
@ -1511,7 +1515,16 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
|
|||
if (!si_upload_graphics_shader_descriptors(sctx))
|
||||
return;
|
||||
|
||||
si_emit_all_states(sctx, info, 0);
|
||||
si_emit_all_states(sctx, info, masked_atoms);
|
||||
|
||||
if (has_gfx9_scissor_bug &&
|
||||
(sctx->context_roll ||
|
||||
si_is_atom_dirty(sctx, &sctx->atoms.s.scissors))) {
|
||||
sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
|
||||
sctx->atoms.s.scissors.emit(sctx);
|
||||
}
|
||||
sctx->dirty_atoms = 0;
|
||||
|
||||
si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
|
||||
|
||||
/* Prefetch the remaining shaders after the draw has been
|
||||
|
|
@ -1520,6 +1533,9 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
|
|||
cik_emit_prefetch_L2(sctx, false);
|
||||
}
|
||||
|
||||
/* Clear the context roll flag after the draw call. */
|
||||
sctx->context_roll = false;
|
||||
|
||||
if (unlikely(sctx->current_saved_cs)) {
|
||||
si_trace_emit(sctx);
|
||||
si_log_draw_state(sctx, sctx->log);
|
||||
|
|
|
|||
|
|
@ -576,7 +576,7 @@ static void si_emit_shader_es(struct si_context *sctx)
|
|||
shader->vgt_vertex_reuse_block_cntl);
|
||||
|
||||
if (initial_cdw != sctx->gfx_cs->current.cdw)
|
||||
sctx->context_roll_counter++;
|
||||
sctx->context_roll = true;
|
||||
}
|
||||
|
||||
static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
|
||||
|
|
@ -825,7 +825,7 @@ static void si_emit_shader_gs(struct si_context *sctx)
|
|||
}
|
||||
|
||||
if (initial_cdw != sctx->gfx_cs->current.cdw)
|
||||
sctx->context_roll_counter++;
|
||||
sctx->context_roll = true;
|
||||
}
|
||||
|
||||
static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
||||
|
|
@ -1002,7 +1002,7 @@ static void si_emit_shader_vs(struct si_context *sctx)
|
|||
shader->vgt_vertex_reuse_block_cntl);
|
||||
|
||||
if (initial_cdw != sctx->gfx_cs->current.cdw)
|
||||
sctx->context_roll_counter++;
|
||||
sctx->context_roll = true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1194,7 +1194,7 @@ static void si_emit_shader_ps(struct si_context *sctx)
|
|||
shader->ctx_reg.ps.cb_shader_mask);
|
||||
|
||||
if (initial_cdw != sctx->gfx_cs->current.cdw)
|
||||
sctx->context_roll_counter++;
|
||||
sctx->context_roll = true;
|
||||
}
|
||||
|
||||
static void si_shader_ps(struct si_shader *shader)
|
||||
|
|
@ -2877,7 +2877,7 @@ static void si_emit_spi_map(struct si_context *sctx)
|
|||
sctx->tracked_regs.spi_ps_input_cntl, num_interp);
|
||||
|
||||
if (initial_cdw != sctx->gfx_cs->current.cdw)
|
||||
sctx->context_roll_counter++;
|
||||
sctx->context_roll = true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -303,6 +303,7 @@ void si_emit_streamout_end(struct si_context *sctx)
|
|||
* buffer bound. This ensures that the primitives-emitted query
|
||||
* won't increment. */
|
||||
radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
|
||||
sctx->context_roll = true;
|
||||
|
||||
t[i]->buf_filled_size_valid = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -283,7 +283,7 @@ static void si_emit_guardband(struct si_context *ctx)
|
|||
S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH +
|
||||
vp_as_scissor.quant_mode));
|
||||
if (initial_cdw != ctx->gfx_cs->current.cdw)
|
||||
ctx->context_roll_counter++;
|
||||
ctx->context_roll = true;
|
||||
}
|
||||
|
||||
static void si_emit_scissors(struct si_context *ctx)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue