mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
amd,radv,radeonsi: add ac_emit_cp_release_mem()
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37956>
This commit is contained in:
parent
22d73fc077
commit
457d8926e1
4 changed files with 109 additions and 115 deletions
|
|
@ -440,6 +440,89 @@ ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
|||
ac_cmdbuf_end();
|
||||
}
|
||||
|
||||
void
|
||||
ac_emit_cp_release_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
||||
enum amd_ip_type ip_type, uint32_t event,
|
||||
uint32_t event_flags, uint32_t dst_sel,
|
||||
uint32_t int_sel, uint32_t data_sel, uint64_t va,
|
||||
uint32_t new_fence, uint64_t eop_bug_va)
|
||||
{
|
||||
const bool is_mec = gfx_level >= GFX7 && ip_type == AMD_IP_COMPUTE;
|
||||
|
||||
/* EOS events may be buggy on GFX7, prefer not to use them. */
|
||||
if (gfx_level == GFX7 && (event == V_028A90_CS_DONE || event == V_028A90_PS_DONE))
|
||||
event = V_028A90_BOTTOM_OF_PIPE_TS;
|
||||
|
||||
const uint32_t op = EVENT_TYPE(event) |
|
||||
EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) |
|
||||
event_flags;
|
||||
const uint32_t sel = EOP_DST_SEL(dst_sel) |
|
||||
EOP_INT_SEL(int_sel) |
|
||||
EOP_DATA_SEL(data_sel);
|
||||
|
||||
ac_cmdbuf_begin(cs);
|
||||
|
||||
if (gfx_level >= GFX9 || is_mec) {
|
||||
/* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion counters)
|
||||
* must immediately precede every timestamp event to prevent a GPU hang
|
||||
* on GFX9.
|
||||
*/
|
||||
if (gfx_level == GFX9 && !is_mec && eop_bug_va) {
|
||||
ac_cmdbuf_emit(PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
ac_cmdbuf_emit(EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
|
||||
ac_cmdbuf_emit(eop_bug_va);
|
||||
ac_cmdbuf_emit(eop_bug_va >> 32);
|
||||
}
|
||||
|
||||
ac_cmdbuf_emit(PKT3(PKT3_RELEASE_MEM, gfx_level >= GFX9 ? 6 : 5, false));
|
||||
ac_cmdbuf_emit(op);
|
||||
ac_cmdbuf_emit(sel);
|
||||
ac_cmdbuf_emit(va); /* address lo */
|
||||
ac_cmdbuf_emit(va >> 32); /* address hi */
|
||||
ac_cmdbuf_emit(new_fence); /* immediate data lo */
|
||||
ac_cmdbuf_emit(0); /* immediate data hi */
|
||||
if (gfx_level >= GFX9)
|
||||
ac_cmdbuf_emit(0); /* unused */
|
||||
} else {
|
||||
/* On GFX6, EOS events are always emitted with EVENT_WRITE_EOS.
|
||||
* On GFX7+, EOS events are emitted with EVENT_WRITE_EOS on the graphics
|
||||
* queue, and with RELEASE_MEM on the compute queue.
|
||||
*/
|
||||
if (event == V_028B9C_CS_DONE || event == V_028B9C_PS_DONE) {
|
||||
assert(event_flags == 0 && dst_sel == EOP_DST_SEL_MEM && data_sel == EOP_DATA_SEL_VALUE_32BIT);
|
||||
|
||||
ac_cmdbuf_emit(PKT3(PKT3_EVENT_WRITE_EOS, 3, false));
|
||||
ac_cmdbuf_emit(op);
|
||||
ac_cmdbuf_emit(va);
|
||||
ac_cmdbuf_emit(((va >> 32) & 0xffff) |
|
||||
EOS_DATA_SEL(EOS_DATA_SEL_VALUE_32BIT));
|
||||
ac_cmdbuf_emit(new_fence);
|
||||
} else {
|
||||
if (gfx_level == GFX7 || gfx_level == GFX8) {
|
||||
/* Two EOP events are required to make all engines go idle (and
|
||||
* optional cache flushes executed) before the timestamp is
|
||||
* written.
|
||||
*/
|
||||
ac_cmdbuf_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, false));
|
||||
ac_cmdbuf_emit(op);
|
||||
ac_cmdbuf_emit(eop_bug_va);
|
||||
ac_cmdbuf_emit(((eop_bug_va >> 32) & 0xffff) | sel);
|
||||
ac_cmdbuf_emit(0); /* immediate data */
|
||||
ac_cmdbuf_emit(0); /* unused */
|
||||
}
|
||||
|
||||
ac_cmdbuf_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, false));
|
||||
ac_cmdbuf_emit(op);
|
||||
ac_cmdbuf_emit(va);
|
||||
ac_cmdbuf_emit(((va >> 32) & 0xffff) | sel);
|
||||
ac_cmdbuf_emit(new_fence); /* immediate data */
|
||||
ac_cmdbuf_emit(0); /* unused */
|
||||
}
|
||||
}
|
||||
|
||||
ac_cmdbuf_end();
|
||||
}
|
||||
|
||||
void
|
||||
ac_emit_cp_atomic_mem(struct ac_cmdbuf *cs, uint32_t atomic_op,
|
||||
uint32_t atomic_cmd, uint64_t va, uint64_t data,
|
||||
|
|
|
|||
|
|
@ -99,6 +99,13 @@ ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
|||
enum amd_ip_type ip_type, uint32_t engine,
|
||||
uint32_t gcr_cntl);
|
||||
|
||||
void
|
||||
ac_emit_cp_release_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
||||
enum amd_ip_type ip_type, uint32_t event,
|
||||
uint32_t event_flags, uint32_t dst_sel,
|
||||
uint32_t int_sel, uint32_t data_sel, uint64_t va,
|
||||
uint32_t new_fence, uint64_t eop_bug_va);
|
||||
|
||||
void
|
||||
ac_emit_cp_atomic_mem(struct ac_cmdbuf *cs, uint32_t atomic_op,
|
||||
uint32_t atomic_cmd, uint64_t va, uint64_t data,
|
||||
|
|
|
|||
|
|
@ -26,76 +26,14 @@ radv_cs_emit_write_event_eop(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_
|
|||
return;
|
||||
}
|
||||
|
||||
/* EOS events may be buggy on GFX7, prefer not to use them. */
|
||||
if (gfx_level == GFX7 && (event == V_028A90_CS_DONE || event == V_028A90_PS_DONE))
|
||||
event = V_028A90_BOTTOM_OF_PIPE_TS;
|
||||
/* The EOP bug is specific to GFX9. Though, RadeonSI also implements it for GFX6-8 but it
|
||||
* shouldn't be necessary because it's using SURFACE_SYNC to flush L2. See
|
||||
* waEventWriteEopPrematureL2Inv in PAL.
|
||||
*/
|
||||
const uint64_t eop_bug_va = gfx_level >= GFX9 ? gfx9_eop_bug_va : va;
|
||||
|
||||
const bool is_mec = cs->hw_ip == AMD_IP_COMPUTE && gfx_level >= GFX7;
|
||||
unsigned op =
|
||||
EVENT_TYPE(event) | EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) | event_flags;
|
||||
unsigned sel = EOP_DST_SEL(dst_sel) | EOP_INT_SEL(int_sel) | EOP_DATA_SEL(data_sel);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
if (gfx_level >= GFX9 || is_mec) {
|
||||
/* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
|
||||
* counters) must immediately precede every timestamp event to
|
||||
* prevent a GPU hang on GFX9.
|
||||
*/
|
||||
if (gfx_level == GFX9 && !is_mec) {
|
||||
radeon_emit(PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
|
||||
radeon_emit(gfx9_eop_bug_va);
|
||||
radeon_emit(gfx9_eop_bug_va >> 32);
|
||||
}
|
||||
|
||||
radeon_emit(PKT3(PKT3_RELEASE_MEM, gfx_level >= GFX9 ? 6 : 5, false));
|
||||
radeon_emit(op);
|
||||
radeon_emit(sel);
|
||||
radeon_emit(va); /* address lo */
|
||||
radeon_emit(va >> 32); /* address hi */
|
||||
radeon_emit(new_fence); /* immediate data lo */
|
||||
radeon_emit(0); /* immediate data hi */
|
||||
if (gfx_level >= GFX9)
|
||||
radeon_emit(0); /* unused */
|
||||
} else {
|
||||
/* On GFX6, EOS events are always emitted with EVENT_WRITE_EOS.
|
||||
* On GFX7+, EOS events are emitted with EVENT_WRITE_EOS on
|
||||
* the graphics queue, and with RELEASE_MEM on the compute
|
||||
* queue.
|
||||
*/
|
||||
if (event == V_028B9C_CS_DONE || event == V_028B9C_PS_DONE) {
|
||||
assert(event_flags == 0 && dst_sel == EOP_DST_SEL_MEM && data_sel == EOP_DATA_SEL_VALUE_32BIT);
|
||||
|
||||
radeon_emit(PKT3(PKT3_EVENT_WRITE_EOS, 3, false));
|
||||
radeon_emit(op);
|
||||
radeon_emit(va);
|
||||
radeon_emit(((va >> 32) & 0xffff) | EOS_DATA_SEL(EOS_DATA_SEL_VALUE_32BIT));
|
||||
radeon_emit(new_fence);
|
||||
} else {
|
||||
if (gfx_level == GFX7 || gfx_level == GFX8) {
|
||||
/* Two EOP events are required to make all
|
||||
* engines go idle (and optional cache flushes
|
||||
* executed) before the timestamp is written.
|
||||
*/
|
||||
radeon_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, false));
|
||||
radeon_emit(op);
|
||||
radeon_emit(va);
|
||||
radeon_emit(((va >> 32) & 0xffff) | sel);
|
||||
radeon_emit(0); /* immediate data */
|
||||
radeon_emit(0); /* unused */
|
||||
}
|
||||
|
||||
radeon_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, false));
|
||||
radeon_emit(op);
|
||||
radeon_emit(va);
|
||||
radeon_emit(((va >> 32) & 0xffff) | sel);
|
||||
radeon_emit(new_fence); /* immediate data */
|
||||
radeon_emit(0); /* unused */
|
||||
}
|
||||
}
|
||||
|
||||
radeon_end();
|
||||
ac_emit_cp_release_mem(cs->b, gfx_level, cs->hw_ip, event, event_flags, dst_sel, int_sel, data_sel, va, new_fence,
|
||||
eop_bug_va);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -50,13 +50,8 @@ void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigne
|
|||
struct si_resource *buf, uint64_t va, uint32_t new_fence,
|
||||
unsigned query_type)
|
||||
{
|
||||
unsigned op = EVENT_TYPE(event) |
|
||||
EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) |
|
||||
event_flags;
|
||||
unsigned sel = EOP_DST_SEL(dst_sel) | EOP_INT_SEL(int_sel) | EOP_DATA_SEL(data_sel);
|
||||
bool compute_ib = !ctx->is_gfx_queue;
|
||||
|
||||
radeon_begin(cs);
|
||||
uint64_t eop_bug_va = 0;
|
||||
|
||||
if (ctx->gfx_level >= GFX9 || (compute_ib && ctx->gfx_level >= GFX7)) {
|
||||
/* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
|
||||
|
|
@ -89,57 +84,28 @@ void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigne
|
|||
}
|
||||
|
||||
assert(16 * ctx->screen->info.max_render_backends <= scratch->b.b.width0);
|
||||
radeon_emit(PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
|
||||
radeon_emit(scratch->gpu_address);
|
||||
radeon_emit(scratch->gpu_address >> 32);
|
||||
eop_bug_va = scratch->gpu_address;
|
||||
|
||||
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, scratch,
|
||||
RADEON_USAGE_WRITE | RADEON_PRIO_QUERY);
|
||||
}
|
||||
} else if (ctx->gfx_level == GFX7 || ctx->gfx_level == GFX8) {
|
||||
struct si_resource *scratch = ctx->eop_bug_scratch;
|
||||
|
||||
radeon_emit(PKT3(PKT3_RELEASE_MEM, ctx->gfx_level >= GFX9 ? 6 : 5, 0));
|
||||
radeon_emit(op);
|
||||
radeon_emit(sel);
|
||||
radeon_emit(va); /* address lo */
|
||||
radeon_emit(va >> 32); /* address hi */
|
||||
radeon_emit(new_fence); /* immediate data lo */
|
||||
radeon_emit(0); /* immediate data hi */
|
||||
if (ctx->gfx_level >= GFX9)
|
||||
radeon_emit(0); /* unused */
|
||||
} else {
|
||||
if (ctx->gfx_level == GFX7 || ctx->gfx_level == GFX8) {
|
||||
struct si_resource *scratch = ctx->eop_bug_scratch;
|
||||
uint64_t va = scratch->gpu_address;
|
||||
eop_bug_va = scratch->gpu_address;
|
||||
|
||||
/* Two EOP events are required to make all engines go idle
|
||||
* (and optional cache flushes executed) before the timestamp
|
||||
* is written.
|
||||
*/
|
||||
radeon_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
|
||||
radeon_emit(op);
|
||||
radeon_emit(va);
|
||||
radeon_emit(((va >> 32) & 0xffff) | sel);
|
||||
radeon_emit(0); /* immediate data */
|
||||
radeon_emit(0); /* unused */
|
||||
|
||||
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, scratch,
|
||||
RADEON_USAGE_WRITE | RADEON_PRIO_QUERY);
|
||||
}
|
||||
|
||||
radeon_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
|
||||
radeon_emit(op);
|
||||
radeon_emit(va);
|
||||
radeon_emit(((va >> 32) & 0xffff) | sel);
|
||||
radeon_emit(new_fence); /* immediate data */
|
||||
radeon_emit(0); /* unused */
|
||||
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, scratch,
|
||||
RADEON_USAGE_WRITE | RADEON_PRIO_QUERY);
|
||||
}
|
||||
|
||||
radeon_end();
|
||||
|
||||
if (buf) {
|
||||
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, buf, RADEON_USAGE_WRITE | RADEON_PRIO_QUERY);
|
||||
}
|
||||
|
||||
ac_emit_cp_release_mem(&cs->current, ctx->gfx_level,
|
||||
compute_ib ? AMD_IP_COMPUTE : AMD_IP_GFX, event,
|
||||
event_flags, dst_sel, int_sel, data_sel, va,
|
||||
new_fence, eop_bug_va);
|
||||
}
|
||||
|
||||
unsigned si_cp_write_fence_dwords(struct si_screen *screen)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue