amd,radv,radeonsi: add ac_emit_cp_copy_data()

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37813>
This commit is contained in:
Samuel Pitoiset 2025-10-10 12:01:38 +02:00 committed by Marge Bot
parent af169d7393
commit 7ead034a06
7 changed files with 90 additions and 177 deletions

View file

@ -1035,3 +1035,28 @@ ac_emit_cp_release_mem_pws(struct ac_cmdbuf *cs, ASSERTED enum amd_gfx_level gfx
ac_cmdbuf_emit(0); /* INT_CTXID */
ac_cmdbuf_end();
}
void
ac_emit_cp_copy_data(struct ac_cmdbuf *cs, uint32_t src_sel, uint32_t dst_sel,
uint64_t src_va, uint64_t dst_va,
enum ac_cp_copy_data_flags flags)
{
uint32_t dword0 = COPY_DATA_SRC_SEL(src_sel) |
COPY_DATA_DST_SEL(dst_sel);
if (flags & AC_CP_COPY_DATA_WR_CONFIRM)
dword0 |= COPY_DATA_WR_CONFIRM;
if (flags & AC_CP_COPY_DATA_COUNT_SEL)
dword0 |= COPY_DATA_COUNT_SEL;
if (flags & AC_CP_COPY_DATA_ENGINE_PFP)
dword0 |= COPY_DATA_ENGINE_PFP;
ac_cmdbuf_begin(cs);
ac_cmdbuf_emit(PKT3(PKT3_COPY_DATA, 4, 0));
ac_cmdbuf_emit(dword0);
ac_cmdbuf_emit(src_va);
ac_cmdbuf_emit(src_va >> 32);
ac_cmdbuf_emit(dst_va);
ac_cmdbuf_emit(dst_va >> 32);
ac_cmdbuf_end();
}

View file

@ -120,6 +120,17 @@ ac_emit_cp_release_mem_pws(struct ac_cmdbuf *cs, ASSERTED enum amd_gfx_level gfx
ASSERTED enum amd_ip_type ip_type, uint32_t event_type,
uint32_t gcr_cntl);
enum ac_cp_copy_data_flags {
AC_CP_COPY_DATA_WR_CONFIRM = 1u << 0,
AC_CP_COPY_DATA_COUNT_SEL = 1u << 1, /* 64 bits */
AC_CP_COPY_DATA_ENGINE_PFP = 1u << 2,
};
void
ac_emit_cp_copy_data(struct ac_cmdbuf *cs, uint32_t src_sel, uint32_t dst_sel,
uint64_t src_va, uint64_t dst_va,
enum ac_cp_copy_data_flags flags);
#ifdef __cplusplus
}
#endif

View file

@ -5062,28 +5062,23 @@ radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct rad
uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset;
radeon_begin(cs);
if (pdev->info.has_load_ctx_reg_pkt) {
radeon_begin(cs);
radeon_emit(PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0));
radeon_emit(va);
radeon_emit(va >> 32);
radeon_emit((reg - SI_CONTEXT_REG_OFFSET) >> 2);
radeon_emit(reg_count);
radeon_end();
} else {
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) |
(reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
radeon_emit(va);
radeon_emit(va >> 32);
radeon_emit(reg >> 2);
radeon_emit(0);
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_REG, va, reg >> 2,
(reg_count == 2 ? AC_CP_COPY_DATA_COUNT_SEL : 0));
radeon_begin(cs);
radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(0);
radeon_end();
}
radeon_end();
}
/*
@ -10061,15 +10056,8 @@ static void
radv_emit_copy_data_imm(const struct radv_physical_device *pdev, struct radv_cmd_stream *cs, uint32_t src_imm,
uint64_t dst_va)
{
radeon_begin(cs);
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM |
(pdev->info.gfx_level == GFX6 ? COPY_DATA_ENGINE_PFP : 0));
radeon_emit(src_imm);
radeon_emit(0);
radeon_emit(dst_va);
radeon_emit(dst_va >> 32);
radeon_end();
ac_emit_cp_copy_data(cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, src_imm, dst_va,
AC_CP_COPY_DATA_WR_CONFIRM | (pdev->info.gfx_level == GFX6 ? AC_CP_COPY_DATA_ENGINE_PFP : 0));
}
/**
@ -10783,14 +10771,8 @@ radv_emit_indirect_taskmesh_draw_packets(const struct radv_device *device, struc
* - When count != 0, write 0 to the workaround BO and execute the indirect dispatch
* - When workaround BO != 0 (count was 0), execute an empty direct dispatch
*/
radeon_begin(ace_cs);
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
radeon_emit(1);
radeon_emit(0);
radeon_emit(workaround_cond_va);
radeon_emit(workaround_cond_va >> 32);
radeon_end();
ac_emit_cp_copy_data(ace_cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, 1, workaround_cond_va,
AC_CP_COPY_DATA_WR_CONFIRM);
/* 2x COND_EXEC + 1x COPY_DATA + Nx DISPATCH_TASKMESH_DIRECT_ACE */
ace_predication_size += 2 * 5 + 6 + 6 * num_views;
@ -10803,14 +10785,8 @@ radv_emit_indirect_taskmesh_draw_packets(const struct radv_device *device, struc
ac_emit_cond_exec(ace_cs->b, pdev->info.gfx_level, info->count_va,
6 + 11 * num_views /* 1x COPY_DATA + Nx DISPATCH_TASKMESH_INDIRECT_MULTI_ACE */);
radeon_begin(ace_cs);
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
radeon_emit(0);
radeon_emit(0);
radeon_emit(workaround_cond_va);
radeon_emit(workaround_cond_va >> 32);
radeon_end();
ac_emit_cp_copy_data(ace_cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, 0, workaround_cond_va,
AC_CP_COPY_DATA_WR_CONFIRM);
}
if (!view_mask) {
@ -13147,21 +13123,12 @@ radv_save_dispatch_size(struct radv_cmd_buffer *cmd_buffer, uint64_t indirect_va
uint64_t va = radv_buffer_get_va(device->trace_bo) + offsetof(struct radv_trace_data, indirect_dispatch);
radeon_begin(cs);
for (uint32_t i = 0; i < 3; i++) {
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
radeon_emit(indirect_va);
radeon_emit(indirect_va >> 32);
radeon_emit(va);
radeon_emit(va >> 32);
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, indirect_va, va, AC_CP_COPY_DATA_WR_CONFIRM);
indirect_va += 4;
va += 4;
}
radeon_end();
}
static void
@ -13255,15 +13222,8 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
const uint64_t src_va = unaligned_va + i * 4;
const uint64_t dst_va = indirect_va + i * 4;
radeon_begin(cs);
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
COPY_DATA_WR_CONFIRM);
radeon_emit(src_va);
radeon_emit(src_va >> 32);
radeon_emit(dst_va);
radeon_emit(dst_va >> 32);
radeon_end();
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, src_va, dst_va,
AC_CP_COPY_DATA_WR_CONFIRM);
}
}
@ -14693,16 +14653,10 @@ radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64_t va
emulated_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
radeon_check_space(device->ws, cs->b, 8);
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, va, emulated_va, AC_CP_COPY_DATA_WR_CONFIRM);
radeon_begin(cs);
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
COPY_DATA_WR_CONFIRM);
radeon_emit(va);
radeon_emit(va >> 32);
radeon_emit(emulated_va);
radeon_emit(emulated_va >> 32);
radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(0);
radeon_end();
@ -14965,32 +14919,25 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
}
radeon_begin(cs);
if (pdev->info.gfx_level >= GFX12) {
if (append) {
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
COPY_DATA_WR_CONFIRM);
radeon_emit(va);
radeon_emit(va >> 32);
radeon_emit((so->state_va + i * 8 + 4));
radeon_emit((so->state_va + i * 8 + 4) >> 32);
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, va, so->state_va + i * 8 + 4,
AC_CP_COPY_DATA_WR_CONFIRM);
}
} else if (pdev->use_ngg_streamout) {
if (append) {
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_WR_CONFIRM);
radeon_emit(va);
radeon_emit(va >> 32);
radeon_emit((R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
radeon_emit(0);
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_REG, va,
(R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i, AC_CP_COPY_DATA_WR_CONFIRM);
} else {
/* The PKT3 CAM bit workaround seems needed for initializing this GDS register to zero. */
radeon_begin(cs);
radeon_set_uconfig_perfctr_reg(pdev->info.gfx_level, ring, R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 + i * 4,
0);
radeon_end();
}
} else {
radeon_begin(cs);
/* AMD GCN binds streamout buffers as shader resources.
* VGT only counts primitives and tells the shader through
* SGPRs what to do.
@ -15017,9 +14964,9 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC
radeon_emit(0); /* unused */
radeon_emit(0); /* unused */
}
}
radeon_end();
radeon_end();
}
}
assert(cs->b->cdw <= cdw_max);
@ -15077,28 +15024,19 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
}
radeon_begin(cs);
if (pdev->info.gfx_level >= GFX12) {
if (append) {
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
COPY_DATA_WR_CONFIRM);
radeon_emit((so->state_va + i * 8 + 4));
radeon_emit((so->state_va + i * 8 + 4) >> 32);
radeon_emit(va);
radeon_emit(va >> 32);
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, so->state_va + i * 8 + 4, va,
AC_CP_COPY_DATA_WR_CONFIRM);
}
} else if (pdev->use_ngg_streamout) {
if (append) {
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_REG) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
radeon_emit((R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
radeon_emit(0);
radeon_emit(va);
radeon_emit(va >> 32);
ac_emit_cp_copy_data(cs->b, COPY_DATA_REG, COPY_DATA_DST_MEM,
(R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i, va, AC_CP_COPY_DATA_WR_CONFIRM);
}
} else {
radeon_begin(cs);
if (append) {
radeon_emit(PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
radeon_emit(STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */
@ -15116,10 +15054,10 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou
*/
radeon_set_context_reg(R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 0);
radeon_end();
cmd_buffer->cs->context_roll_without_scissor_emitted = true;
}
radeon_end();
}
assert(cs->b->cdw <= cdw_max);
@ -15150,12 +15088,11 @@ radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_d
radeon_end();
}
radeon_begin(cs);
if (gfx_level >= GFX10) {
/* Emitting a COPY_DATA packet should be enough because RADV doesn't support preemption
* (shadow memory) but for unknown reasons, it can lead to GPU hangs on GFX10+.
*/
radeon_begin(cs);
radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(0);
@ -15164,16 +15101,11 @@ radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_d
radeon_emit(draw_info->strmout_va >> 32);
radeon_emit((R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE - SI_CONTEXT_REG_OFFSET) >> 2);
radeon_emit(1); /* 1 DWORD */
radeon_end();
} else {
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_WR_CONFIRM);
radeon_emit(draw_info->strmout_va);
radeon_emit(draw_info->strmout_va >> 32);
radeon_emit(R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
radeon_emit(0); /* unused */
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_REG, draw_info->strmout_va,
R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2, AC_CP_COPY_DATA_WR_CONFIRM);
}
radeon_end();
}
VKAPI_ATTR void VKAPI_CALL
@ -15241,14 +15173,7 @@ radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlag
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 12);
if (!(stage & ~VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) {
radeon_begin(cs);
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
radeon_emit(marker);
radeon_emit(0);
radeon_emit(va);
radeon_emit(va >> 32);
radeon_end();
ac_emit_cp_copy_data(cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, marker, va, AC_CP_COPY_DATA_WR_CONFIRM);
} else {
radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
EOP_DATA_SEL_VALUE_32BIT, va, marker, cmd_buffer->gfx9_eop_bug_va);

View file

@ -560,15 +560,8 @@ radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_p
if (regs->counters)
reg = regs->counters[idx];
radeon_begin(cs);
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_TC_L2) | COPY_DATA_WR_CONFIRM |
COPY_DATA_COUNT_SEL); /* 64 bits */
radeon_emit(reg >> 2);
radeon_emit(0); /* unused */
radeon_emit(va);
radeon_emit(va >> 32);
radeon_end();
ac_emit_cp_copy_data(cs->b, COPY_DATA_PERF, COPY_DATA_TC_L2, reg >> 2, va,
AC_CP_COPY_DATA_WR_CONFIRM | AC_CP_COPY_DATA_COUNT_SEL);
va += sizeof(uint64_t) * 2 * radv_pc_get_num_instances(pdev, block);
reg += reg_delta;

View file

@ -37,14 +37,7 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VkQueryType qu
static void
gfx10_copy_shader_query(struct radv_cmd_stream *cs, uint32_t src_sel, uint64_t src_va, uint64_t dst_va)
{
radeon_begin(cs);
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(src_sel) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
radeon_emit(src_va);
radeon_emit(src_va >> 32);
radeon_emit(dst_va);
radeon_emit(dst_va >> 32);
radeon_end();
ac_emit_cp_copy_data(cs->b, src_sel, COPY_DATA_DST_MEM, src_va, dst_va, AC_CP_COPY_DATA_WR_CONFIRM);
}
static void
@ -2724,15 +2717,8 @@ radv_write_timestamp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkPipeline
struct radv_cmd_stream *cs = cmd_buffer->cs;
if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT) {
radeon_begin(cs);
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM | COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
COPY_DATA_DST_SEL(COPY_DATA_DST_MEM));
radeon_emit(0);
radeon_emit(0);
radeon_emit(va);
radeon_emit(va >> 32);
radeon_end();
ac_emit_cp_copy_data(cs->b, COPY_DATA_TIMESTAMP, COPY_DATA_DST_MEM, 0, va,
AC_CP_COPY_DATA_WR_CONFIRM | AC_CP_COPY_DATA_COUNT_SEL);
} else {
radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
EOP_DATA_SEL_TIMESTAMP, va, 0, cmd_buffer->gfx9_eop_bug_va);
@ -2810,8 +2796,6 @@ radv_CmdWriteAccelerationStructuresPropertiesKHR(VkCommandBuffer commandBuffer,
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 6 * accelerationStructureCount);
radeon_begin(cs);
for (uint32_t i = 0; i < accelerationStructureCount; ++i) {
VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, pAccelerationStructures[i]);
uint64_t va = vk_acceleration_structure_get_va(accel_struct);
@ -2833,17 +2817,11 @@ radv_CmdWriteAccelerationStructuresPropertiesKHR(VkCommandBuffer commandBuffer,
UNREACHABLE("Unhandle accel struct query type.");
}
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_COUNT_SEL |
COPY_DATA_WR_CONFIRM);
radeon_emit(va);
radeon_emit(va >> 32);
radeon_emit(query_va);
radeon_emit(query_va >> 32);
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, va, query_va,
AC_CP_COPY_DATA_WR_CONFIRM | AC_CP_COPY_DATA_COUNT_SEL);
query_va += pool->stride;
}
radeon_end();
assert(cs->b->cdw <= cdw_max);
}

View file

@ -379,12 +379,6 @@ void si_cp_copy_data(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned
uint64_t dst_va = (dst ? dst->gpu_address : 0ull) + dst_offset;
uint64_t src_va = (src ? src->gpu_address : 0ull) + src_offset;
radeon_begin(cs);
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(src_sel) | COPY_DATA_DST_SEL(dst_sel) | COPY_DATA_WR_CONFIRM);
radeon_emit(src_va);
radeon_emit(src_va >> 32);
radeon_emit(dst_va);
radeon_emit(dst_va >> 32);
radeon_end();
ac_emit_cp_copy_data(&cs->current, src_sel, dst_sel, src_va, dst_va,
AC_CP_COPY_DATA_WR_CONFIRM);
}

View file

@ -229,37 +229,24 @@ static void si_pc_emit_read(struct si_context *sctx, struct ac_pc_block *block,
unsigned reg = regs->counter0_lo;
unsigned reg_delta = 8;
radeon_begin(cs);
if (regs->select0) {
for (idx = 0; idx < count; ++idx) {
if (regs->counters)
reg = regs->counters[idx];
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
COPY_DATA_COUNT_SEL); /* 64 bits */
radeon_emit(reg >> 2);
radeon_emit(0); /* unused */
radeon_emit(va);
radeon_emit(va >> 32);
ac_emit_cp_copy_data(&cs->current, COPY_DATA_PERF, COPY_DATA_DST_MEM,
reg >> 2, va, AC_CP_COPY_DATA_COUNT_SEL);
va += sizeof(uint64_t);
reg += reg_delta;
}
} else {
/* Fake counters. */
for (idx = 0; idx < count; ++idx) {
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
COPY_DATA_COUNT_SEL);
radeon_emit(0); /* immediate */
radeon_emit(0);
radeon_emit(va);
radeon_emit(va >> 32);
ac_emit_cp_copy_data(&cs->current, COPY_DATA_IMM, COPY_DATA_DST_MEM,
0, va, AC_CP_COPY_DATA_COUNT_SEL);
va += sizeof(uint64_t);
}
}
radeon_end();
}
static void si_pc_query_destroy(struct si_context *sctx, struct si_query *squery)