amd,radv,radeonsi: add ac_emit_cp_release_mem()
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37956>
This commit is contained in:
Samuel Pitoiset 2025-10-20 14:50:24 +02:00 committed by Marge Bot
parent 22d73fc077
commit 457d8926e1
4 changed files with 109 additions and 115 deletions

View file

@ -440,6 +440,89 @@ ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
ac_cmdbuf_end();
}
void
ac_emit_cp_release_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
enum amd_ip_type ip_type, uint32_t event,
uint32_t event_flags, uint32_t dst_sel,
uint32_t int_sel, uint32_t data_sel, uint64_t va,
uint32_t new_fence, uint64_t eop_bug_va)
{
const bool is_mec = gfx_level >= GFX7 && ip_type == AMD_IP_COMPUTE;
/* EOS events may be buggy on GFX7, prefer not to use them. */
if (gfx_level == GFX7 && (event == V_028A90_CS_DONE || event == V_028A90_PS_DONE))
event = V_028A90_BOTTOM_OF_PIPE_TS;
const uint32_t op = EVENT_TYPE(event) |
EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) |
event_flags;
const uint32_t sel = EOP_DST_SEL(dst_sel) |
EOP_INT_SEL(int_sel) |
EOP_DATA_SEL(data_sel);
ac_cmdbuf_begin(cs);
if (gfx_level >= GFX9 || is_mec) {
/* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion counters)
* must immediately precede every timestamp event to prevent a GPU hang
* on GFX9.
*/
if (gfx_level == GFX9 && !is_mec && eop_bug_va) {
ac_cmdbuf_emit(PKT3(PKT3_EVENT_WRITE, 2, 0));
ac_cmdbuf_emit(EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
ac_cmdbuf_emit(eop_bug_va);
ac_cmdbuf_emit(eop_bug_va >> 32);
}
ac_cmdbuf_emit(PKT3(PKT3_RELEASE_MEM, gfx_level >= GFX9 ? 6 : 5, false));
ac_cmdbuf_emit(op);
ac_cmdbuf_emit(sel);
ac_cmdbuf_emit(va); /* address lo */
ac_cmdbuf_emit(va >> 32); /* address hi */
ac_cmdbuf_emit(new_fence); /* immediate data lo */
ac_cmdbuf_emit(0); /* immediate data hi */
if (gfx_level >= GFX9)
ac_cmdbuf_emit(0); /* unused */
} else {
/* On GFX6, EOS events are always emitted with EVENT_WRITE_EOS.
* On GFX7+, EOS events are emitted with EVENT_WRITE_EOS on the graphics
* queue, and with RELEASE_MEM on the compute queue.
*/
if (event == V_028B9C_CS_DONE || event == V_028B9C_PS_DONE) {
assert(event_flags == 0 && dst_sel == EOP_DST_SEL_MEM && data_sel == EOP_DATA_SEL_VALUE_32BIT);
ac_cmdbuf_emit(PKT3(PKT3_EVENT_WRITE_EOS, 3, false));
ac_cmdbuf_emit(op);
ac_cmdbuf_emit(va);
ac_cmdbuf_emit(((va >> 32) & 0xffff) |
EOS_DATA_SEL(EOS_DATA_SEL_VALUE_32BIT));
ac_cmdbuf_emit(new_fence);
} else {
if (gfx_level == GFX7 || gfx_level == GFX8) {
/* Two EOP events are required to make all engines go idle (and
* optional cache flushes executed) before the timestamp is
* written.
*/
ac_cmdbuf_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, false));
ac_cmdbuf_emit(op);
ac_cmdbuf_emit(eop_bug_va);
ac_cmdbuf_emit(((eop_bug_va >> 32) & 0xffff) | sel);
ac_cmdbuf_emit(0); /* immediate data */
ac_cmdbuf_emit(0); /* unused */
}
ac_cmdbuf_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, false));
ac_cmdbuf_emit(op);
ac_cmdbuf_emit(va);
ac_cmdbuf_emit(((va >> 32) & 0xffff) | sel);
ac_cmdbuf_emit(new_fence); /* immediate data */
ac_cmdbuf_emit(0); /* unused */
}
}
ac_cmdbuf_end();
}
void
ac_emit_cp_atomic_mem(struct ac_cmdbuf *cs, uint32_t atomic_op,
uint32_t atomic_cmd, uint64_t va, uint64_t data,

View file

@ -99,6 +99,13 @@ ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
enum amd_ip_type ip_type, uint32_t engine,
uint32_t gcr_cntl);
void
ac_emit_cp_release_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
enum amd_ip_type ip_type, uint32_t event,
uint32_t event_flags, uint32_t dst_sel,
uint32_t int_sel, uint32_t data_sel, uint64_t va,
uint32_t new_fence, uint64_t eop_bug_va);
void
ac_emit_cp_atomic_mem(struct ac_cmdbuf *cs, uint32_t atomic_op,
uint32_t atomic_cmd, uint64_t va, uint64_t data,

View file

@ -26,76 +26,14 @@ radv_cs_emit_write_event_eop(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_
return;
}
/* EOS events may be buggy on GFX7, prefer not to use them. */
if (gfx_level == GFX7 && (event == V_028A90_CS_DONE || event == V_028A90_PS_DONE))
event = V_028A90_BOTTOM_OF_PIPE_TS;
/* The EOP bug is specific to GFX9. Though, RadeonSI also implements it for GFX6-8 but it
* shouldn't be necessary because it's using SURFACE_SYNC to flush L2. See
* waEventWriteEopPrematureL2Inv in PAL.
*/
const uint64_t eop_bug_va = gfx_level >= GFX9 ? gfx9_eop_bug_va : va;
const bool is_mec = cs->hw_ip == AMD_IP_COMPUTE && gfx_level >= GFX7;
unsigned op =
EVENT_TYPE(event) | EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) | event_flags;
unsigned sel = EOP_DST_SEL(dst_sel) | EOP_INT_SEL(int_sel) | EOP_DATA_SEL(data_sel);
radeon_begin(cs);
if (gfx_level >= GFX9 || is_mec) {
/* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
* counters) must immediately precede every timestamp event to
* prevent a GPU hang on GFX9.
*/
if (gfx_level == GFX9 && !is_mec) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(gfx9_eop_bug_va);
radeon_emit(gfx9_eop_bug_va >> 32);
}
radeon_emit(PKT3(PKT3_RELEASE_MEM, gfx_level >= GFX9 ? 6 : 5, false));
radeon_emit(op);
radeon_emit(sel);
radeon_emit(va); /* address lo */
radeon_emit(va >> 32); /* address hi */
radeon_emit(new_fence); /* immediate data lo */
radeon_emit(0); /* immediate data hi */
if (gfx_level >= GFX9)
radeon_emit(0); /* unused */
} else {
/* On GFX6, EOS events are always emitted with EVENT_WRITE_EOS.
* On GFX7+, EOS events are emitted with EVENT_WRITE_EOS on
* the graphics queue, and with RELEASE_MEM on the compute
* queue.
*/
if (event == V_028B9C_CS_DONE || event == V_028B9C_PS_DONE) {
assert(event_flags == 0 && dst_sel == EOP_DST_SEL_MEM && data_sel == EOP_DATA_SEL_VALUE_32BIT);
radeon_emit(PKT3(PKT3_EVENT_WRITE_EOS, 3, false));
radeon_emit(op);
radeon_emit(va);
radeon_emit(((va >> 32) & 0xffff) | EOS_DATA_SEL(EOS_DATA_SEL_VALUE_32BIT));
radeon_emit(new_fence);
} else {
if (gfx_level == GFX7 || gfx_level == GFX8) {
/* Two EOP events are required to make all
* engines go idle (and optional cache flushes
* executed) before the timestamp is written.
*/
radeon_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, false));
radeon_emit(op);
radeon_emit(va);
radeon_emit(((va >> 32) & 0xffff) | sel);
radeon_emit(0); /* immediate data */
radeon_emit(0); /* unused */
}
radeon_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, false));
radeon_emit(op);
radeon_emit(va);
radeon_emit(((va >> 32) & 0xffff) | sel);
radeon_emit(new_fence); /* immediate data */
radeon_emit(0); /* unused */
}
}
radeon_end();
ac_emit_cp_release_mem(cs->b, gfx_level, cs->hw_ip, event, event_flags, dst_sel, int_sel, data_sel, va, new_fence,
eop_bug_va);
}
static void

View file

@ -50,13 +50,8 @@ void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigne
struct si_resource *buf, uint64_t va, uint32_t new_fence,
unsigned query_type)
{
unsigned op = EVENT_TYPE(event) |
EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) |
event_flags;
unsigned sel = EOP_DST_SEL(dst_sel) | EOP_INT_SEL(int_sel) | EOP_DATA_SEL(data_sel);
bool compute_ib = !ctx->is_gfx_queue;
radeon_begin(cs);
uint64_t eop_bug_va = 0;
if (ctx->gfx_level >= GFX9 || (compute_ib && ctx->gfx_level >= GFX7)) {
/* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
@ -89,57 +84,28 @@ void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigne
}
assert(16 * ctx->screen->info.max_render_backends <= scratch->b.b.width0);
radeon_emit(PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(scratch->gpu_address);
radeon_emit(scratch->gpu_address >> 32);
eop_bug_va = scratch->gpu_address;
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, scratch,
RADEON_USAGE_WRITE | RADEON_PRIO_QUERY);
}
} else if (ctx->gfx_level == GFX7 || ctx->gfx_level == GFX8) {
struct si_resource *scratch = ctx->eop_bug_scratch;
radeon_emit(PKT3(PKT3_RELEASE_MEM, ctx->gfx_level >= GFX9 ? 6 : 5, 0));
radeon_emit(op);
radeon_emit(sel);
radeon_emit(va); /* address lo */
radeon_emit(va >> 32); /* address hi */
radeon_emit(new_fence); /* immediate data lo */
radeon_emit(0); /* immediate data hi */
if (ctx->gfx_level >= GFX9)
radeon_emit(0); /* unused */
} else {
if (ctx->gfx_level == GFX7 || ctx->gfx_level == GFX8) {
struct si_resource *scratch = ctx->eop_bug_scratch;
uint64_t va = scratch->gpu_address;
eop_bug_va = scratch->gpu_address;
/* Two EOP events are required to make all engines go idle
* (and optional cache flushes executed) before the timestamp
* is written.
*/
radeon_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
radeon_emit(op);
radeon_emit(va);
radeon_emit(((va >> 32) & 0xffff) | sel);
radeon_emit(0); /* immediate data */
radeon_emit(0); /* unused */
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, scratch,
RADEON_USAGE_WRITE | RADEON_PRIO_QUERY);
}
radeon_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
radeon_emit(op);
radeon_emit(va);
radeon_emit(((va >> 32) & 0xffff) | sel);
radeon_emit(new_fence); /* immediate data */
radeon_emit(0); /* unused */
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, scratch,
RADEON_USAGE_WRITE | RADEON_PRIO_QUERY);
}
radeon_end();
if (buf) {
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, buf, RADEON_USAGE_WRITE | RADEON_PRIO_QUERY);
}
ac_emit_cp_release_mem(&cs->current, ctx->gfx_level,
compute_ib ? AMD_IP_COMPUTE : AMD_IP_GFX, event,
event_flags, dst_sel, int_sel, data_sel, va,
new_fence, eop_bug_va);
}
unsigned si_cp_write_fence_dwords(struct si_screen *screen)