mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
amd,radv,radeonsi: add ac_emit_cp_acquire_mem()
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37870>
This commit is contained in:
parent
9ad7fb8569
commit
679332f9a9
4 changed files with 69 additions and 87 deletions
|
|
@ -1192,3 +1192,54 @@ ac_emit_cp_gfx_scratch(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
|||
|
||||
ac_cmdbuf_end();
|
||||
}
|
||||
|
||||
/* Execute plain ACQUIRE_MEM that just flushes caches. This optionally waits
|
||||
* for idle on older chips. "engine" determines whether to sync in PFP or ME.
|
||||
*/
|
||||
void
|
||||
ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
||||
enum amd_ip_type ip_type, uint32_t engine,
|
||||
uint32_t gcr_cntl)
|
||||
{
|
||||
assert(engine == V_580_CP_PFP || engine == V_580_CP_ME);
|
||||
assert(gcr_cntl);
|
||||
|
||||
ac_cmdbuf_begin(cs);
|
||||
|
||||
if (gfx_level >= GFX10) {
|
||||
/* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */
|
||||
const uint32_t engine_flag = engine == V_580_CP_ME ? BITFIELD_BIT(31) : 0;
|
||||
|
||||
/* Flush caches. This doesn't wait for idle. */
|
||||
ac_cmdbuf_emit(PKT3(PKT3_ACQUIRE_MEM, 6, 0));
|
||||
ac_cmdbuf_emit(engine_flag); /* which engine to use */
|
||||
ac_cmdbuf_emit(0xffffffff); /* CP_COHER_SIZE */
|
||||
ac_cmdbuf_emit(0x01ffffff); /* CP_COHER_SIZE_HI */
|
||||
ac_cmdbuf_emit(0); /* CP_COHER_BASE */
|
||||
ac_cmdbuf_emit(0); /* CP_COHER_BASE_HI */
|
||||
ac_cmdbuf_emit(0x0000000A); /* POLL_INTERVAL */
|
||||
ac_cmdbuf_emit(gcr_cntl); /* GCR_CNTL */
|
||||
} else {
|
||||
const bool is_mec = gfx_level >= GFX7 && ip_type == AMD_IP_COMPUTE;
|
||||
|
||||
if (gfx_level == GFX9 || is_mec) {
|
||||
/* Flush caches and wait for the caches to assert idle. */
|
||||
ac_cmdbuf_emit(PKT3(PKT3_ACQUIRE_MEM, 5, 0) | PKT3_SHADER_TYPE_S(is_mec));
|
||||
ac_cmdbuf_emit(gcr_cntl); /* CP_COHER_CNTL */
|
||||
ac_cmdbuf_emit(0xffffffff); /* CP_COHER_SIZE */
|
||||
ac_cmdbuf_emit(0xffffff); /* CP_COHER_SIZE_HI */
|
||||
ac_cmdbuf_emit(0); /* CP_COHER_BASE */
|
||||
ac_cmdbuf_emit(0); /* CP_COHER_BASE_HI */
|
||||
ac_cmdbuf_emit(0x0000000A); /* POLL_INTERVAL */
|
||||
} else {
|
||||
/* ACQUIRE_MEM is only required on the compute ring. */
|
||||
ac_cmdbuf_emit(PKT3(PKT3_SURFACE_SYNC, 3, 0));
|
||||
ac_cmdbuf_emit(gcr_cntl); /* CP_COHER_CNTL */
|
||||
ac_cmdbuf_emit(0xffffffff); /* CP_COHER_SIZE */
|
||||
ac_cmdbuf_emit(0); /* CP_COHER_BASE */
|
||||
ac_cmdbuf_emit(0x0000000A); /* POLL_INTERVAL */
|
||||
}
|
||||
}
|
||||
|
||||
ac_cmdbuf_end();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -160,6 +160,11 @@ void
|
|||
ac_emit_cp_gfx_scratch(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
||||
uint64_t va, uint32_t size);
|
||||
|
||||
void
|
||||
ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
||||
enum amd_ip_type ip_type, uint32_t engine,
|
||||
uint32_t gcr_cntl);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -103,32 +103,6 @@ radv_cs_emit_write_event_eop(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_
|
|||
radeon_end();
|
||||
}
|
||||
|
||||
static void
|
||||
radv_emit_acquire_mem(struct radv_cmd_stream *cs, bool is_mec, bool is_gfx9, unsigned cp_coher_cntl)
|
||||
{
|
||||
radeon_begin(cs);
|
||||
|
||||
if (is_mec || is_gfx9) {
|
||||
uint32_t hi_val = is_gfx9 ? 0xffffff : 0xff;
|
||||
radeon_emit(PKT3(PKT3_ACQUIRE_MEM, 5, false) | PKT3_SHADER_TYPE_S(is_mec));
|
||||
radeon_emit(cp_coher_cntl); /* CP_COHER_CNTL */
|
||||
radeon_emit(0xffffffff); /* CP_COHER_SIZE */
|
||||
radeon_emit(hi_val); /* CP_COHER_SIZE_HI */
|
||||
radeon_emit(0); /* CP_COHER_BASE */
|
||||
radeon_emit(0); /* CP_COHER_BASE_HI */
|
||||
radeon_emit(0x0000000A); /* POLL_INTERVAL */
|
||||
} else {
|
||||
/* ACQUIRE_MEM is only required on a compute ring. */
|
||||
radeon_emit(PKT3(PKT3_SURFACE_SYNC, 3, false));
|
||||
radeon_emit(cp_coher_cntl); /* CP_COHER_CNTL */
|
||||
radeon_emit(0xffffffff); /* CP_COHER_SIZE */
|
||||
radeon_emit(0); /* CP_COHER_BASE */
|
||||
radeon_emit(0x0000000A); /* POLL_INTERVAL */
|
||||
}
|
||||
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
static void
|
||||
gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_level, uint32_t *flush_cnt,
|
||||
uint64_t flush_va, enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits,
|
||||
|
|
@ -298,20 +272,7 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev
|
|||
|
||||
/* Ignore fields that only modify the behavior of other fields. */
|
||||
if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) {
|
||||
/* Flush caches and wait for the caches to assert idle.
|
||||
* The cache flush is executed in the ME, but the PFP waits
|
||||
* for completion.
|
||||
*/
|
||||
radeon_begin(cs);
|
||||
radeon_emit(PKT3(PKT3_ACQUIRE_MEM, 6, 0));
|
||||
radeon_emit(0); /* CP_COHER_CNTL */
|
||||
radeon_emit(0xffffffff); /* CP_COHER_SIZE */
|
||||
radeon_emit(0xffffff); /* CP_COHER_SIZE_HI */
|
||||
radeon_emit(0); /* CP_COHER_BASE */
|
||||
radeon_emit(0); /* CP_COHER_BASE_HI */
|
||||
radeon_emit(0x0000000A); /* POLL_INTERVAL */
|
||||
radeon_emit(gcr_cntl); /* GCR_CNTL */
|
||||
radeon_end();
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_580_CP_PFP, gcr_cntl);
|
||||
} else if ((cb_db_event || (flush_bits & (RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
|
||||
RADV_CMD_FLAG_CS_PARTIAL_FLUSH))) &&
|
||||
!is_mec) {
|
||||
|
|
@ -494,9 +455,9 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e
|
|||
}
|
||||
|
||||
if ((flush_bits & RADV_CMD_FLAG_INV_L2) || (gfx_level <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) {
|
||||
radv_emit_acquire_mem(cs, is_mec, gfx_level == GFX9,
|
||||
cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) |
|
||||
S_0301F0_TC_WB_ACTION_ENA(gfx_level >= GFX8));
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_580_CP_PFP,
|
||||
cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) |
|
||||
S_0301F0_TC_WB_ACTION_ENA(gfx_level >= GFX8));
|
||||
cp_coher_cntl = 0;
|
||||
|
||||
*sqtt_flush_bits |= RGP_FLUSH_INVAL_L2 | RGP_FLUSH_INVAL_VMEM_L0;
|
||||
|
|
@ -508,14 +469,14 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e
|
|||
*
|
||||
* WB doesn't work without NC.
|
||||
*/
|
||||
radv_emit_acquire_mem(cs, is_mec, gfx_level == GFX9,
|
||||
cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1));
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_580_CP_PFP,
|
||||
cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1));
|
||||
cp_coher_cntl = 0;
|
||||
|
||||
*sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2 | RGP_FLUSH_INVAL_VMEM_L0;
|
||||
}
|
||||
if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
|
||||
radv_emit_acquire_mem(cs, is_mec, gfx_level == GFX9, cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1));
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_580_CP_PFP, cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1));
|
||||
cp_coher_cntl = 0;
|
||||
|
||||
*sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0;
|
||||
|
|
@ -526,7 +487,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e
|
|||
* Therefore, it should be last. Done in PFP.
|
||||
*/
|
||||
if (cp_coher_cntl)
|
||||
radv_emit_acquire_mem(cs, is_mec, gfx_level == GFX9, cp_coher_cntl);
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_580_CP_PFP, cp_coher_cntl);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
|
|||
|
|
@ -37,30 +37,14 @@ void si_cp_release_acquire_mem_pws(struct si_context *sctx, struct radeon_cmdbuf
|
|||
si_cp_acquire_mem_pws(sctx, cs, event_type, stage_sel, 0, 0, sqtt_flush_flags);
|
||||
}
|
||||
|
||||
/* Execute plain ACQUIRE_MEM that just flushes caches. This optionally waits for idle on older
|
||||
* chips. "engine" determines whether to sync in PFP or ME.
|
||||
*/
|
||||
void si_cp_acquire_mem(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned gcr_cntl,
|
||||
unsigned engine)
|
||||
{
|
||||
assert(engine == V_580_CP_PFP || engine == V_580_CP_ME);
|
||||
assert(gcr_cntl);
|
||||
const enum amd_ip_type ip_type = sctx->is_gfx_queue ? AMD_IP_GFX : AMD_IP_COMPUTE;
|
||||
|
||||
if (sctx->gfx_level >= GFX10) {
|
||||
/* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */
|
||||
unsigned engine_flag = engine == V_580_CP_ME ? BITFIELD_BIT(31) : 0;
|
||||
|
||||
/* Flush caches. This doesn't wait for idle. */
|
||||
radeon_begin(cs);
|
||||
radeon_emit(PKT3(PKT3_ACQUIRE_MEM, 6, 0));
|
||||
radeon_emit(engine_flag); /* which engine to use */
|
||||
radeon_emit(0xffffffff); /* CP_COHER_SIZE */
|
||||
radeon_emit(0x01ffffff); /* CP_COHER_SIZE_HI */
|
||||
radeon_emit(0); /* CP_COHER_BASE */
|
||||
radeon_emit(0); /* CP_COHER_BASE_HI */
|
||||
radeon_emit(0x0000000A); /* POLL_INTERVAL */
|
||||
radeon_emit(gcr_cntl); /* GCR_CNTL */
|
||||
radeon_end();
|
||||
ac_emit_cp_acquire_mem(&cs->current, sctx->gfx_level, ip_type, engine,
|
||||
gcr_cntl);
|
||||
} else {
|
||||
bool compute_ib = !sctx->is_gfx_queue;
|
||||
|
||||
|
|
@ -68,27 +52,8 @@ void si_cp_acquire_mem(struct si_context *sctx, struct radeon_cmdbuf *cs, unsign
|
|||
if (sctx->gfx_level != GFX7)
|
||||
gcr_cntl |= 1u << 31; /* don't sync PFP, i.e. execute the sync in ME */
|
||||
|
||||
if (sctx->gfx_level == GFX9 || compute_ib) {
|
||||
/* Flush caches and wait for the caches to assert idle. */
|
||||
radeon_begin(cs);
|
||||
radeon_emit(PKT3(PKT3_ACQUIRE_MEM, 5, 0));
|
||||
radeon_emit(gcr_cntl); /* CP_COHER_CNTL */
|
||||
radeon_emit(0xffffffff); /* CP_COHER_SIZE */
|
||||
radeon_emit(0xffffff); /* CP_COHER_SIZE_HI */
|
||||
radeon_emit(0); /* CP_COHER_BASE */
|
||||
radeon_emit(0); /* CP_COHER_BASE_HI */
|
||||
radeon_emit(0x0000000A); /* POLL_INTERVAL */
|
||||
radeon_end();
|
||||
} else {
|
||||
/* ACQUIRE_MEM is only required on the compute ring. */
|
||||
radeon_begin(cs);
|
||||
radeon_emit(PKT3(PKT3_SURFACE_SYNC, 3, 0));
|
||||
radeon_emit(gcr_cntl); /* CP_COHER_CNTL */
|
||||
radeon_emit(0xffffffff); /* CP_COHER_SIZE */
|
||||
radeon_emit(0); /* CP_COHER_BASE */
|
||||
radeon_emit(0x0000000A); /* POLL_INTERVAL */
|
||||
radeon_end();
|
||||
}
|
||||
ac_emit_cp_acquire_mem(&cs->current, sctx->gfx_level, ip_type, engine,
|
||||
gcr_cntl);
|
||||
|
||||
/* ACQUIRE_MEM & SURFACE_SYNC roll the context if the current context is busy. */
|
||||
if (!compute_ib)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue