radeonsi: add more variables into si_pm4_state and rework how it's created

to be used later

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23687>
This commit is contained in:
Marek Olšák 2023-06-02 14:48:12 -04:00 committed by Marge Bot
parent 8c7e32fb33
commit 7d2a0bda77
5 changed files with 37 additions and 19 deletions

View file

@ -11,8 +11,7 @@
static void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode)
{
if (!state->max_dw)
state->max_dw = ARRAY_SIZE(state->pm4);
assert(state->max_dw);
assert(state->ndw < state->max_dw);
assert(opcode <= 254);
state->last_opcode = opcode;
@ -21,8 +20,7 @@ static void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode)
void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw)
{
if (!state->max_dw)
state->max_dw = ARRAY_SIZE(state->pm4);
assert(state->max_dw);
assert(state->ndw < state->max_dw);
state->pm4[state->ndw++] = dw;
state->last_opcode = 255; /* invalid opcode */
@ -40,9 +38,7 @@ static void si_pm4_set_reg_custom(struct si_pm4_state *state, unsigned reg, uint
{
reg >>= 2;
if (!state->max_dw)
state->max_dw = ARRAY_SIZE(state->pm4);
assert(state->max_dw);
assert(state->ndw + 2 <= state->max_dw);
if (opcode != state->last_opcode || reg != (state->last_reg + 1) || idx != state->last_idx) {
@ -102,9 +98,15 @@ void si_pm4_set_reg_va(struct si_pm4_state *state, unsigned reg, uint32_t val)
state->reg_va_low_idx = state->ndw - 1;
}
void si_pm4_clear_state(struct si_pm4_state *state)
void si_pm4_clear_state(struct si_pm4_state *state, struct si_screen *sscreen,
bool is_compute_queue)
{
state->screen = sscreen;
state->ndw = 0;
state->is_compute_queue = is_compute_queue;
if (!state->max_dw)
state->max_dw = ARRAY_SIZE(state->pm4);
}
void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsigned idx)
@ -152,21 +154,24 @@ void si_pm4_reset_emitted(struct si_context *sctx)
}
}
struct si_pm4_state *si_pm4_create_sized(unsigned max_dw)
struct si_pm4_state *si_pm4_create_sized(struct si_screen *sscreen, unsigned max_dw,
bool is_compute_queue)
{
struct si_pm4_state *pm4;
unsigned size = sizeof(*pm4) + 4 * (max_dw - ARRAY_SIZE(pm4->pm4));
pm4 = (struct si_pm4_state *)calloc(1, size);
if (pm4)
if (pm4) {
pm4->max_dw = max_dw;
si_pm4_clear_state(pm4, sscreen, is_compute_queue);
}
return pm4;
}
struct si_pm4_state *si_pm4_clone(struct si_pm4_state *orig)
{
struct si_pm4_state *pm4 = si_pm4_create_sized(orig->max_dw);
struct si_pm4_state *pm4 = si_pm4_create_sized(orig->screen, orig->max_dw,
orig->is_compute_queue);
if (pm4)
memcpy(pm4, orig, sizeof(*pm4) + 4 * (pm4->max_dw - ARRAY_SIZE(pm4->pm4)));
return pm4;

View file

@ -25,12 +25,15 @@ struct si_atom {
};
struct si_pm4_state {
struct si_screen *screen;
/* PKT3_SET_*_REG handling */
uint16_t last_reg; /* register offset in dwords */
uint16_t last_pm4;
uint16_t ndw; /* number of dwords in pm4 */
uint8_t last_opcode;
uint8_t last_idx;
bool is_compute_queue;
/* For shader states only */
bool is_shader;
@ -52,12 +55,14 @@ void si_pm4_set_reg_va(struct si_pm4_state *state, unsigned reg, uint32_t val);
void si_pm4_set_reg_idx3(struct si_screen *sscreen, struct si_pm4_state *state,
unsigned reg, uint32_t val);
void si_pm4_clear_state(struct si_pm4_state *state);
void si_pm4_clear_state(struct si_pm4_state *state, struct si_screen *sscreen,
bool is_compute_queue);
void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsigned idx);
void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state);
void si_pm4_reset_emitted(struct si_context *sctx);
struct si_pm4_state *si_pm4_create_sized(unsigned max_dw);
struct si_pm4_state *si_pm4_create_sized(struct si_screen *sscreen, unsigned max_dw,
bool is_compute_queue);
struct si_pm4_state *si_pm4_clone(struct si_pm4_state *orig);
#ifdef __cplusplus

View file

@ -434,6 +434,8 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
if (!blend)
return NULL;
si_pm4_clear_state(pm4, sctx->screen, false);
blend->alpha_to_coverage = state->alpha_to_coverage;
blend->alpha_to_one = state->alpha_to_one;
blend->dual_src_blend = util_blend_state_is_dual(state, 0);
@ -940,6 +942,8 @@ static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rast
return NULL;
}
si_pm4_clear_state(pm4, sscreen, false);
rs->scissor_enable = state->scissor;
rs->clip_halfz = state->clip_halfz;
rs->two_side = state->light_twoside;
@ -1118,6 +1122,8 @@ static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rast
float offset_scale = state->offset_scale * 16.0f;
uint32_t pa_su_poly_offset_db_fmt_cntl = 0;
si_pm4_clear_state(pm4, sscreen, false);
if (!state->offset_units_unscaled) {
switch (i) {
case 0: /* 16-bit zbuffer */
@ -1336,6 +1342,8 @@ static void *si_create_dsa_state(struct pipe_context *ctx,
return NULL;
}
si_pm4_clear_state(pm4, (struct si_screen*)ctx->screen, false);
dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask;
dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask;
dsa->stencil_ref.writemask[0] = state->stencil[0].writemask;
@ -5548,7 +5556,7 @@ static void gfx6_init_gfx_preamble_state(struct si_context *sctx, bool uses_reg_
bool has_clear_state = sscreen->info.has_clear_state;
/* We need more space because the preamble is large. */
struct si_pm4_state *pm4 = si_pm4_create_sized(214);
struct si_pm4_state *pm4 = si_pm4_create_sized(sscreen, 214, sctx->has_graphics);
if (!pm4)
return;
@ -5774,7 +5782,7 @@ static void cdna_init_compute_preamble_state(struct si_context *sctx)
uint32_t compute_cu_en = S_00B858_SH0_CU_EN(sscreen->info.spi_cu_en) |
S_00B858_SH1_CU_EN(sscreen->info.spi_cu_en);
struct si_pm4_state *pm4 = si_pm4_create_sized(48);
struct si_pm4_state *pm4 = si_pm4_create_sized(sscreen, 48, true);
if (!pm4)
return;
@ -5831,7 +5839,7 @@ static void gfx10_init_gfx_preamble_state(struct si_context *sctx, bool uses_reg
}
/* We need more space because the preamble is large. */
struct si_pm4_state *pm4 = si_pm4_create_sized(214);
struct si_pm4_state *pm4 = si_pm4_create_sized(sscreen, 214, sctx->has_graphics);
if (!pm4)
return;

View file

@ -347,7 +347,7 @@ static bool si_update_shaders(struct si_context *sctx)
si_resource_reference(&pipeline->bo, bo);
/* Re-upload all gfx shaders and init PM4. */
si_pm4_clear_state(&pipeline->pm4);
si_pm4_clear_state(&pipeline->pm4, sctx->screen, false);
for (int i = 0; i < SI_NUM_GRAPHICS_SHADERS; i++) {
struct si_shader *shader = sctx->shaders[i].current;

View file

@ -607,7 +607,7 @@ static void polaris_set_vgt_vertex_reuse(struct si_screen *sscreen, struct si_sh
static struct si_pm4_state *si_get_shader_pm4_state(struct si_shader *shader,
void (*emit_func)(struct si_context *ctx))
{
si_pm4_clear_state(&shader->pm4);
si_pm4_clear_state(&shader->pm4, shader->selector->screen, false);
shader->pm4.is_shader = true;
shader->pm4.atom.emit = emit_func;
return &shader->pm4;