mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 22:38:05 +02:00
radeonsi: merge pm4 state and atom emit loops into one
This merges both loops in si_draw by tracking which pm4 states are dirty using the state atom mechanism used for other states. pm4 states now have to set their own emit function. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24732>
This commit is contained in:
parent
c21ce04014
commit
3986f27396
5 changed files with 47 additions and 45 deletions
|
|
@ -1033,11 +1033,9 @@ struct si_context {
|
|||
unsigned last_num_draw_calls;
|
||||
unsigned flags; /* flush flags */
|
||||
|
||||
/* Atoms (direct states). */
|
||||
/* Atoms (state emit functions). */
|
||||
union si_state_atoms atoms;
|
||||
unsigned dirty_atoms; /* mask */
|
||||
/* PM4 states (precomputed immutable states) */
|
||||
unsigned dirty_states;
|
||||
uint64_t dirty_atoms; /* mask */
|
||||
union si_state queued;
|
||||
union si_state emitted;
|
||||
/* Gfx11+: Buffered SH registers for SET_SH_REG_PAIRS_PACKED*. */
|
||||
|
|
@ -1759,14 +1757,14 @@ static inline unsigned si_get_minimum_num_gfx_cs_dwords(struct si_context *sctx,
|
|||
return 2048 + sctx->num_cs_dw_queries_suspend + num_draws * 10;
|
||||
}
|
||||
|
||||
static inline unsigned si_get_atom_bit(struct si_context *sctx, struct si_atom *atom)
|
||||
static inline uint64_t si_get_atom_bit(struct si_context *sctx, struct si_atom *atom)
|
||||
{
|
||||
return 1 << (atom - sctx->atoms.array);
|
||||
return 1ull << (atom - sctx->atoms.array);
|
||||
}
|
||||
|
||||
static inline void si_set_atom_dirty(struct si_context *sctx, struct si_atom *atom, bool dirty)
|
||||
{
|
||||
unsigned bit = si_get_atom_bit(sctx, atom);
|
||||
uint64_t bit = si_get_atom_bit(sctx, atom);
|
||||
|
||||
if (dirty)
|
||||
sctx->dirty_atoms |= bit;
|
||||
|
|
|
|||
|
|
@ -316,7 +316,7 @@ void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsi
|
|||
|
||||
if (sctx->queued.array[idx] == state) {
|
||||
sctx->queued.array[idx] = NULL;
|
||||
sctx->dirty_states &= ~BITFIELD_BIT(idx);
|
||||
sctx->dirty_atoms &= ~BITFIELD64_BIT(idx);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -361,7 +361,7 @@ void si_pm4_reset_emitted(struct si_context *sctx)
|
|||
|
||||
for (unsigned i = 0; i < SI_NUM_STATES; i++) {
|
||||
if (sctx->queued.array[i])
|
||||
sctx->dirty_states |= BITFIELD_BIT(i);
|
||||
sctx->dirty_atoms |= BITFIELD64_BIT(i);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5413,6 +5413,9 @@ void si_init_state_compute_functions(struct si_context *sctx)
|
|||
|
||||
void si_init_state_functions(struct si_context *sctx)
|
||||
{
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(sctx->atoms.s.pm4_states); i++)
|
||||
sctx->atoms.s.pm4_states[i].emit = si_pm4_emit_state;
|
||||
|
||||
sctx->atoms.s.framebuffer.emit = si_emit_framebuffer_state;
|
||||
sctx->atoms.s.db_render_state.emit = si_emit_db_render_state;
|
||||
sctx->atoms.s.dpbb_state.emit = si_emit_dpbb_state;
|
||||
|
|
|
|||
|
|
@ -176,18 +176,13 @@ union si_state {
|
|||
};
|
||||
|
||||
#define SI_STATE_IDX(name) (offsetof(union si_state, named.name) / sizeof(struct si_pm4_state *))
|
||||
#define SI_STATE_BIT(name) (1 << SI_STATE_IDX(name))
|
||||
#define SI_STATE_BIT(name) (1ull << SI_STATE_IDX(name))
|
||||
#define SI_NUM_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *))
|
||||
|
||||
static inline unsigned si_states_that_always_roll_context(void)
|
||||
{
|
||||
return (SI_STATE_BIT(blend) | SI_STATE_BIT(rasterizer) | SI_STATE_BIT(dsa) |
|
||||
SI_STATE_BIT(poly_offset));
|
||||
}
|
||||
|
||||
union si_state_atoms {
|
||||
struct si_atoms_s {
|
||||
/* The order matters. */
|
||||
/* This must be first. */
|
||||
struct si_atom pm4_states[SI_NUM_STATES];
|
||||
struct si_atom render_cond;
|
||||
struct si_atom streamout_begin;
|
||||
struct si_atom streamout_enable; /* must be after streamout_begin */
|
||||
|
|
@ -217,15 +212,17 @@ union si_state_atoms {
|
|||
struct si_atom array[sizeof(struct si_atoms_s) / sizeof(struct si_atom)];
|
||||
};
|
||||
|
||||
#define SI_ATOM_BIT(name) (1 << (offsetof(union si_state_atoms, s.name) / sizeof(struct si_atom)))
|
||||
#define SI_ATOM_BIT(name) (1ull << (offsetof(union si_state_atoms, s.name) / sizeof(struct si_atom)))
|
||||
#define SI_NUM_ATOMS (sizeof(union si_state_atoms) / sizeof(struct si_atom))
|
||||
|
||||
static inline unsigned si_atoms_that_always_roll_context(void)
|
||||
static inline uint64_t si_atoms_that_always_roll_context(void)
|
||||
{
|
||||
return (SI_ATOM_BIT(streamout_begin) | SI_ATOM_BIT(streamout_enable) | SI_ATOM_BIT(framebuffer) |
|
||||
SI_ATOM_BIT(sample_locations) | SI_ATOM_BIT(sample_mask) | SI_ATOM_BIT(blend_color) |
|
||||
SI_ATOM_BIT(clip_state) | SI_ATOM_BIT(scissors) | SI_ATOM_BIT(viewports) |
|
||||
SI_ATOM_BIT(stencil_ref) | SI_ATOM_BIT(scratch_state) | SI_ATOM_BIT(window_rectangles));
|
||||
return SI_STATE_BIT(blend) | SI_STATE_BIT(rasterizer) | SI_STATE_BIT(dsa) |
|
||||
SI_STATE_BIT(poly_offset) |
|
||||
SI_ATOM_BIT(streamout_begin) | SI_ATOM_BIT(streamout_enable) | SI_ATOM_BIT(framebuffer) |
|
||||
SI_ATOM_BIT(sample_locations) | SI_ATOM_BIT(sample_mask) | SI_ATOM_BIT(blend_color)|
|
||||
SI_ATOM_BIT(clip_state) | SI_ATOM_BIT(scissors) | SI_ATOM_BIT(viewports)|
|
||||
SI_ATOM_BIT(stencil_ref) | SI_ATOM_BIT(scratch_state) | SI_ATOM_BIT(window_rectangles);
|
||||
}
|
||||
|
||||
struct si_shader_data {
|
||||
|
|
@ -516,9 +513,9 @@ struct si_buffer_resources {
|
|||
do { \
|
||||
(sctx)->queued.named.member = (value); \
|
||||
if (value && value != (sctx)->emitted.named.member) \
|
||||
(sctx)->dirty_states |= SI_STATE_BIT(member); \
|
||||
(sctx)->dirty_atoms |= SI_STATE_BIT(member); \
|
||||
else \
|
||||
(sctx)->dirty_states &= ~SI_STATE_BIT(member); \
|
||||
(sctx)->dirty_atoms &= ~SI_STATE_BIT(member); \
|
||||
} while (0)
|
||||
|
||||
/* si_descriptors.c */
|
||||
|
|
|
|||
|
|
@ -1936,28 +1936,33 @@ static void si_get_draw_start_count(struct si_context *sctx, const struct pipe_d
|
|||
}
|
||||
|
||||
ALWAYS_INLINE
|
||||
static void si_emit_all_states(struct si_context *sctx, unsigned skip_atom_mask)
|
||||
static void si_emit_all_states(struct si_context *sctx, uint64_t skip_atom_mask)
|
||||
{
|
||||
/* Emit state atoms. */
|
||||
unsigned mask = sctx->dirty_atoms & ~skip_atom_mask;
|
||||
if (mask) {
|
||||
do {
|
||||
unsigned i = u_bit_scan(&mask);
|
||||
sctx->atoms.array[i].emit(sctx, i);
|
||||
} while (mask);
|
||||
/* Emit states by calling their emit functions. */
|
||||
uint64_t dirty = sctx->dirty_atoms & ~skip_atom_mask;
|
||||
|
||||
if (dirty) {
|
||||
sctx->dirty_atoms &= skip_atom_mask;
|
||||
}
|
||||
|
||||
/* Emit states. */
|
||||
mask = sctx->dirty_states;
|
||||
if (mask) {
|
||||
do {
|
||||
unsigned i = u_bit_scan(&mask);
|
||||
si_pm4_emit_state(sctx, i);
|
||||
} while (mask);
|
||||
/* u_bit_scan64 is too slow on i386. */
|
||||
if (sizeof(void*) == 8) {
|
||||
do {
|
||||
unsigned i = u_bit_scan64(&dirty);
|
||||
sctx->atoms.array[i].emit(sctx, i);
|
||||
} while (dirty);
|
||||
} else {
|
||||
unsigned dirty_lo = dirty;
|
||||
unsigned dirty_hi = dirty >> 32;
|
||||
|
||||
sctx->dirty_states = 0;
|
||||
while (dirty_lo) {
|
||||
unsigned i = u_bit_scan(&dirty_lo);
|
||||
sctx->atoms.array[i].emit(sctx, i);
|
||||
}
|
||||
while (dirty_hi) {
|
||||
unsigned i = 32 + u_bit_scan(&dirty_hi);
|
||||
sctx->atoms.array[i].emit(sctx, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2230,7 +2235,7 @@ static void si_draw(struct pipe_context *ctx,
|
|||
* It's better to draw before prefetches because we want to start fetching indices before
|
||||
* shaders. The idea is to minimize the time when the CUs are idle.
|
||||
*/
|
||||
unsigned masked_atoms = 0;
|
||||
uint64_t masked_atoms = 0;
|
||||
if (unlikely(sctx->flags & SI_CONTEXT_FLUSH_FOR_RENDER_COND)) {
|
||||
/* The render condition state should be emitted after cache flushes. */
|
||||
masked_atoms |= si_get_atom_bit(sctx, &sctx->atoms.s.render_cond);
|
||||
|
|
@ -2247,8 +2252,7 @@ static void si_draw(struct pipe_context *ctx,
|
|||
gfx9_scissor_bug = true;
|
||||
|
||||
if ((!IS_DRAW_VERTEX_STATE && indirect && indirect->count_from_stream_output) ||
|
||||
sctx->dirty_atoms & si_atoms_that_always_roll_context() ||
|
||||
sctx->dirty_states & si_states_that_always_roll_context())
|
||||
sctx->dirty_atoms & si_atoms_that_always_roll_context())
|
||||
sctx->context_roll = true;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue