diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index 2898f9a3c6c..de1d251847b 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -11,8 +11,7 @@ static void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode) { - if (!state->max_dw) - state->max_dw = ARRAY_SIZE(state->pm4); + assert(state->max_dw); assert(state->ndw < state->max_dw); assert(opcode <= 254); state->last_opcode = opcode; @@ -21,8 +20,7 @@ static void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode) void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw) { - if (!state->max_dw) - state->max_dw = ARRAY_SIZE(state->pm4); + assert(state->max_dw); assert(state->ndw < state->max_dw); state->pm4[state->ndw++] = dw; state->last_opcode = 255; /* invalid opcode */ @@ -40,9 +38,7 @@ static void si_pm4_set_reg_custom(struct si_pm4_state *state, unsigned reg, uint { reg >>= 2; - if (!state->max_dw) - state->max_dw = ARRAY_SIZE(state->pm4); - + assert(state->max_dw); assert(state->ndw + 2 <= state->max_dw); if (opcode != state->last_opcode || reg != (state->last_reg + 1) || idx != state->last_idx) { @@ -102,9 +98,15 @@ void si_pm4_set_reg_va(struct si_pm4_state *state, unsigned reg, uint32_t val) state->reg_va_low_idx = state->ndw - 1; } -void si_pm4_clear_state(struct si_pm4_state *state) +void si_pm4_clear_state(struct si_pm4_state *state, struct si_screen *sscreen, + bool is_compute_queue) { + state->screen = sscreen; state->ndw = 0; + state->is_compute_queue = is_compute_queue; + + if (!state->max_dw) + state->max_dw = ARRAY_SIZE(state->pm4); } void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsigned idx) @@ -152,21 +154,24 @@ void si_pm4_reset_emitted(struct si_context *sctx) } } -struct si_pm4_state *si_pm4_create_sized(unsigned max_dw) +struct si_pm4_state *si_pm4_create_sized(struct si_screen *sscreen, unsigned max_dw, + bool is_compute_queue) { struct si_pm4_state *pm4; unsigned size = sizeof(*pm4) + 4 * (max_dw - ARRAY_SIZE(pm4->pm4)); pm4 = (struct si_pm4_state *)calloc(1, size); - if (pm4) + if (pm4) { pm4->max_dw = max_dw; + si_pm4_clear_state(pm4, sscreen, is_compute_queue); + } return pm4; } struct si_pm4_state *si_pm4_clone(struct si_pm4_state *orig) { - struct si_pm4_state *pm4 = si_pm4_create_sized(orig->max_dw); - + struct si_pm4_state *pm4 = si_pm4_create_sized(orig->screen, orig->max_dw, + orig->is_compute_queue); if (pm4) memcpy(pm4, orig, sizeof(*pm4) + 4 * (pm4->max_dw - ARRAY_SIZE(pm4->pm4))); return pm4; diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h index 483b8881453..784e324444c 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.h +++ b/src/gallium/drivers/radeonsi/si_pm4.h @@ -25,12 +25,15 @@ struct si_atom { }; struct si_pm4_state { + struct si_screen *screen; + /* PKT3_SET_*_REG handling */ uint16_t last_reg; /* register offset in dwords */ uint16_t last_pm4; uint16_t ndw; /* number of dwords in pm4 */ uint8_t last_opcode; uint8_t last_idx; + bool is_compute_queue; /* For shader states only */ bool is_shader; @@ -52,12 +55,14 @@ void si_pm4_set_reg_va(struct si_pm4_state *state, unsigned reg, uint32_t val); void si_pm4_set_reg_idx3(struct si_screen *sscreen, struct si_pm4_state *state, unsigned reg, uint32_t val); -void si_pm4_clear_state(struct si_pm4_state *state); +void si_pm4_clear_state(struct si_pm4_state *state, struct si_screen *sscreen, + bool is_compute_queue); void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsigned idx); void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state); void si_pm4_reset_emitted(struct si_context *sctx); -struct si_pm4_state *si_pm4_create_sized(unsigned max_dw); +struct si_pm4_state *si_pm4_create_sized(struct si_screen *sscreen, unsigned max_dw, + bool is_compute_queue); struct si_pm4_state *si_pm4_clone(struct si_pm4_state *orig); #ifdef __cplusplus diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 3a4879d381c..f156cf053b9 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -434,6 +434,8 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, if (!blend) return NULL; + si_pm4_clear_state(pm4, sctx->screen, false); + blend->alpha_to_coverage = state->alpha_to_coverage; blend->alpha_to_one = state->alpha_to_one; blend->dual_src_blend = util_blend_state_is_dual(state, 0); @@ -940,6 +942,8 @@ static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rast return NULL; } + si_pm4_clear_state(pm4, sscreen, false); + rs->scissor_enable = state->scissor; rs->clip_halfz = state->clip_halfz; rs->two_side = state->light_twoside; @@ -1118,6 +1122,8 @@ static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rast float offset_scale = state->offset_scale * 16.0f; uint32_t pa_su_poly_offset_db_fmt_cntl = 0; + si_pm4_clear_state(pm4, sscreen, false); + if (!state->offset_units_unscaled) { switch (i) { case 0: /* 16-bit zbuffer */ @@ -1336,6 +1342,8 @@ static void *si_create_dsa_state(struct pipe_context *ctx, return NULL; } + si_pm4_clear_state(pm4, (struct si_screen*)ctx->screen, false); + dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask; dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask; dsa->stencil_ref.writemask[0] = state->stencil[0].writemask; @@ -5548,7 +5556,7 @@ static void gfx6_init_gfx_preamble_state(struct si_context *sctx, bool uses_reg_ bool has_clear_state = sscreen->info.has_clear_state; /* We need more space because the preamble is large. */ - struct si_pm4_state *pm4 = si_pm4_create_sized(214); + struct si_pm4_state *pm4 = si_pm4_create_sized(sscreen, 214, sctx->has_graphics); if (!pm4) return; @@ -5774,7 +5782,7 @@ static void cdna_init_compute_preamble_state(struct si_context *sctx) uint32_t compute_cu_en = S_00B858_SH0_CU_EN(sscreen->info.spi_cu_en) | S_00B858_SH1_CU_EN(sscreen->info.spi_cu_en); - struct si_pm4_state *pm4 = si_pm4_create_sized(48); + struct si_pm4_state *pm4 = si_pm4_create_sized(sscreen, 48, true); if (!pm4) return; @@ -5831,7 +5839,7 @@ static void gfx10_init_gfx_preamble_state(struct si_context *sctx, bool uses_reg } /* We need more space because the preamble is large. */ - struct si_pm4_state *pm4 = si_pm4_create_sized(214); + struct si_pm4_state *pm4 = si_pm4_create_sized(sscreen, 214, sctx->has_graphics); if (!pm4) return; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 7f14cf40201..4dd979f30c2 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -347,7 +347,7 @@ static bool si_update_shaders(struct si_context *sctx) si_resource_reference(&pipeline->bo, bo); /* Re-upload all gfx shaders and init PM4. */ - si_pm4_clear_state(&pipeline->pm4); + si_pm4_clear_state(&pipeline->pm4, sctx->screen, false); for (int i = 0; i < SI_NUM_GRAPHICS_SHADERS; i++) { struct si_shader *shader = sctx->shaders[i].current; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 270910ebd39..14ddc858cd7 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -607,7 +607,7 @@ static void polaris_set_vgt_vertex_reuse(struct si_screen *sscreen, struct si_sh static struct si_pm4_state *si_get_shader_pm4_state(struct si_shader *shader, void (*emit_func)(struct si_context *ctx)) { - si_pm4_clear_state(&shader->pm4); + si_pm4_clear_state(&shader->pm4, shader->selector->screen, false); shader->pm4.is_shader = true; shader->pm4.atom.emit = emit_func; return &shader->pm4;