diff --git a/src/amd/common/ac_cmdbuf_cp.c b/src/amd/common/ac_cmdbuf_cp.c index 89800c3a767..04e7ecdca11 100644 --- a/src/amd/common/ac_cmdbuf_cp.c +++ b/src/amd/common/ac_cmdbuf_cp.c @@ -440,6 +440,89 @@ ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, ac_cmdbuf_end(); } +void +ac_emit_cp_release_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, + enum amd_ip_type ip_type, uint32_t event, + uint32_t event_flags, uint32_t dst_sel, + uint32_t int_sel, uint32_t data_sel, uint64_t va, + uint32_t new_fence, uint64_t eop_bug_va) +{ + const bool is_mec = gfx_level >= GFX7 && ip_type == AMD_IP_COMPUTE; + + /* EOS events may be buggy on GFX7, prefer not to use them. */ + if (gfx_level == GFX7 && (event == V_028A90_CS_DONE || event == V_028A90_PS_DONE)) + event = V_028A90_BOTTOM_OF_PIPE_TS; + + const uint32_t op = EVENT_TYPE(event) | + EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) | + event_flags; + const uint32_t sel = EOP_DST_SEL(dst_sel) | + EOP_INT_SEL(int_sel) | + EOP_DATA_SEL(data_sel); + + ac_cmdbuf_begin(cs); + + if (gfx_level >= GFX9 || is_mec) { + /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion counters) + * must immediately precede every timestamp event to prevent a GPU hang + * on GFX9. + */ + if (gfx_level == GFX9 && !is_mec && eop_bug_va) { + ac_cmdbuf_emit(PKT3(PKT3_EVENT_WRITE, 2, 0)); + ac_cmdbuf_emit(EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1)); + ac_cmdbuf_emit(eop_bug_va); + ac_cmdbuf_emit(eop_bug_va >> 32); + } + + ac_cmdbuf_emit(PKT3(PKT3_RELEASE_MEM, gfx_level >= GFX9 ? 6 : 5, false)); + ac_cmdbuf_emit(op); + ac_cmdbuf_emit(sel); + ac_cmdbuf_emit(va); /* address lo */ + ac_cmdbuf_emit(va >> 32); /* address hi */ + ac_cmdbuf_emit(new_fence); /* immediate data lo */ + ac_cmdbuf_emit(0); /* immediate data hi */ + if (gfx_level >= GFX9) + ac_cmdbuf_emit(0); /* unused */ + } else { + /* On GFX6, EOS events are always emitted with EVENT_WRITE_EOS. + * On GFX7+, EOS events are emitted with EVENT_WRITE_EOS on the graphics + * queue, and with RELEASE_MEM on the compute queue. + */ + if (event == V_028B9C_CS_DONE || event == V_028B9C_PS_DONE) { + assert(event_flags == 0 && dst_sel == EOP_DST_SEL_MEM && data_sel == EOP_DATA_SEL_VALUE_32BIT); + + ac_cmdbuf_emit(PKT3(PKT3_EVENT_WRITE_EOS, 3, false)); + ac_cmdbuf_emit(op); + ac_cmdbuf_emit(va); + ac_cmdbuf_emit(((va >> 32) & 0xffff) | + EOS_DATA_SEL(EOS_DATA_SEL_VALUE_32BIT)); + ac_cmdbuf_emit(new_fence); + } else { + if (gfx_level == GFX7 || gfx_level == GFX8) { + /* Two EOP events are required to make all engines go idle (and + * optional cache flushes executed) before the timestamp is + * written. + */ + ac_cmdbuf_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, false)); + ac_cmdbuf_emit(op); + ac_cmdbuf_emit(eop_bug_va); + ac_cmdbuf_emit(((eop_bug_va >> 32) & 0xffff) | sel); + ac_cmdbuf_emit(0); /* immediate data */ + ac_cmdbuf_emit(0); /* unused */ + } + + ac_cmdbuf_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, false)); + ac_cmdbuf_emit(op); + ac_cmdbuf_emit(va); + ac_cmdbuf_emit(((va >> 32) & 0xffff) | sel); + ac_cmdbuf_emit(new_fence); /* immediate data */ + ac_cmdbuf_emit(0); /* unused */ + } + } + + ac_cmdbuf_end(); +} + void ac_emit_cp_atomic_mem(struct ac_cmdbuf *cs, uint32_t atomic_op, uint32_t atomic_cmd, uint64_t va, uint64_t data, diff --git a/src/amd/common/ac_cmdbuf_cp.h b/src/amd/common/ac_cmdbuf_cp.h index 1680ce2c3a8..246683e383f 100644 --- a/src/amd/common/ac_cmdbuf_cp.h +++ b/src/amd/common/ac_cmdbuf_cp.h @@ -99,6 +99,13 @@ ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, enum amd_ip_type ip_type, uint32_t engine, uint32_t gcr_cntl); +void +ac_emit_cp_release_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, + enum amd_ip_type ip_type, uint32_t event, + uint32_t event_flags, uint32_t dst_sel, + uint32_t int_sel, uint32_t data_sel, uint64_t va, + uint32_t new_fence, uint64_t eop_bug_va); + void ac_emit_cp_atomic_mem(struct ac_cmdbuf *cs, uint32_t atomic_op, uint32_t atomic_cmd, uint64_t va, uint64_t data, diff --git a/src/amd/vulkan/radv_cs.c b/src/amd/vulkan/radv_cs.c index d1e7fa837e1..44ac4561cfa 100644 --- a/src/amd/vulkan/radv_cs.c +++ b/src/amd/vulkan/radv_cs.c @@ -26,76 +26,14 @@ radv_cs_emit_write_event_eop(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_ return; } - /* EOS events may be buggy on GFX7, prefer not to use them. */ - if (gfx_level == GFX7 && (event == V_028A90_CS_DONE || event == V_028A90_PS_DONE)) - event = V_028A90_BOTTOM_OF_PIPE_TS; + /* The EOP bug is specific to GFX9. Though, RadeonSI also implements it for GFX6-8 but it + * shouldn't be necessary because it's using SURFACE_SYNC to flush L2. See + * waEventWriteEopPrematureL2Inv in PAL. + */ + const uint64_t eop_bug_va = gfx_level >= GFX9 ? gfx9_eop_bug_va : va; - const bool is_mec = cs->hw_ip == AMD_IP_COMPUTE && gfx_level >= GFX7; - unsigned op = - EVENT_TYPE(event) | EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) | event_flags; - unsigned sel = EOP_DST_SEL(dst_sel) | EOP_INT_SEL(int_sel) | EOP_DATA_SEL(data_sel); - - radeon_begin(cs); - - if (gfx_level >= GFX9 || is_mec) { - /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion - * counters) must immediately precede every timestamp event to - * prevent a GPU hang on GFX9. - */ - if (gfx_level == GFX9 && !is_mec) { - radeon_emit(PKT3(PKT3_EVENT_WRITE, 2, 0)); - radeon_emit(EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1)); - radeon_emit(gfx9_eop_bug_va); - radeon_emit(gfx9_eop_bug_va >> 32); - } - - radeon_emit(PKT3(PKT3_RELEASE_MEM, gfx_level >= GFX9 ? 6 : 5, false)); - radeon_emit(op); - radeon_emit(sel); - radeon_emit(va); /* address lo */ - radeon_emit(va >> 32); /* address hi */ - radeon_emit(new_fence); /* immediate data lo */ - radeon_emit(0); /* immediate data hi */ - if (gfx_level >= GFX9) - radeon_emit(0); /* unused */ - } else { - /* On GFX6, EOS events are always emitted with EVENT_WRITE_EOS. - * On GFX7+, EOS events are emitted with EVENT_WRITE_EOS on - * the graphics queue, and with RELEASE_MEM on the compute - * queue. - */ - if (event == V_028B9C_CS_DONE || event == V_028B9C_PS_DONE) { - assert(event_flags == 0 && dst_sel == EOP_DST_SEL_MEM && data_sel == EOP_DATA_SEL_VALUE_32BIT); - - radeon_emit(PKT3(PKT3_EVENT_WRITE_EOS, 3, false)); - radeon_emit(op); - radeon_emit(va); - radeon_emit(((va >> 32) & 0xffff) | EOS_DATA_SEL(EOS_DATA_SEL_VALUE_32BIT)); - radeon_emit(new_fence); - } else { - if (gfx_level == GFX7 || gfx_level == GFX8) { - /* Two EOP events are required to make all - * engines go idle (and optional cache flushes - * executed) before the timestamp is written. - */ - radeon_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, false)); - radeon_emit(op); - radeon_emit(va); - radeon_emit(((va >> 32) & 0xffff) | sel); - radeon_emit(0); /* immediate data */ - radeon_emit(0); /* unused */ - } - - radeon_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, false)); - radeon_emit(op); - radeon_emit(va); - radeon_emit(((va >> 32) & 0xffff) | sel); - radeon_emit(new_fence); /* immediate data */ - radeon_emit(0); /* unused */ - } - } - - radeon_end(); + ac_emit_cp_release_mem(cs->b, gfx_level, cs->hw_ip, event, event_flags, dst_sel, int_sel, data_sel, va, new_fence, + eop_bug_va); } static void diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c index 560f974529d..c75f26b6a11 100644 --- a/src/gallium/drivers/radeonsi/si_fence.c +++ b/src/gallium/drivers/radeonsi/si_fence.c @@ -50,13 +50,8 @@ void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigne struct si_resource *buf, uint64_t va, uint32_t new_fence, unsigned query_type) { - unsigned op = EVENT_TYPE(event) | - EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) | - event_flags; - unsigned sel = EOP_DST_SEL(dst_sel) | EOP_INT_SEL(int_sel) | EOP_DATA_SEL(data_sel); bool compute_ib = !ctx->is_gfx_queue; - - radeon_begin(cs); + uint64_t eop_bug_va = 0; if (ctx->gfx_level >= GFX9 || (compute_ib && ctx->gfx_level >= GFX7)) { /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion @@ -89,57 +84,28 @@ void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigne } assert(16 * ctx->screen->info.max_render_backends <= scratch->b.b.width0); - radeon_emit(PKT3(PKT3_EVENT_WRITE, 2, 0)); - radeon_emit(EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1)); - radeon_emit(scratch->gpu_address); - radeon_emit(scratch->gpu_address >> 32); + eop_bug_va = scratch->gpu_address; radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, scratch, RADEON_USAGE_WRITE | RADEON_PRIO_QUERY); } + } else if (ctx->gfx_level == GFX7 || ctx->gfx_level == GFX8) { + struct si_resource *scratch = ctx->eop_bug_scratch; - radeon_emit(PKT3(PKT3_RELEASE_MEM, ctx->gfx_level >= GFX9 ? 6 : 5, 0)); - radeon_emit(op); - radeon_emit(sel); - radeon_emit(va); /* address lo */ - radeon_emit(va >> 32); /* address hi */ - radeon_emit(new_fence); /* immediate data lo */ - radeon_emit(0); /* immediate data hi */ - if (ctx->gfx_level >= GFX9) - radeon_emit(0); /* unused */ - } else { - if (ctx->gfx_level == GFX7 || ctx->gfx_level == GFX8) { - struct si_resource *scratch = ctx->eop_bug_scratch; - uint64_t va = scratch->gpu_address; + eop_bug_va = scratch->gpu_address; - /* Two EOP events are required to make all engines go idle - * (and optional cache flushes executed) before the timestamp - * is written. - */ - radeon_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(op); - radeon_emit(va); - radeon_emit(((va >> 32) & 0xffff) | sel); - radeon_emit(0); /* immediate data */ - radeon_emit(0); /* unused */ - - radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, scratch, - RADEON_USAGE_WRITE | RADEON_PRIO_QUERY); - } - - radeon_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(op); - radeon_emit(va); - radeon_emit(((va >> 32) & 0xffff) | sel); - radeon_emit(new_fence); /* immediate data */ - radeon_emit(0); /* unused */ + radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, scratch, + RADEON_USAGE_WRITE | RADEON_PRIO_QUERY); } - radeon_end(); - if (buf) { radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, buf, RADEON_USAGE_WRITE | RADEON_PRIO_QUERY); } + + ac_emit_cp_release_mem(&cs->current, ctx->gfx_level, + compute_ib ? AMD_IP_COMPUTE : AMD_IP_GFX, event, + event_flags, dst_sel, int_sel, data_sel, va, + new_fence, eop_bug_va); } unsigned si_cp_write_fence_dwords(struct si_screen *screen)