diff --git a/src/amd/common/ac_cmdbuf.c b/src/amd/common/ac_cmdbuf.c index 23a8f6a0e10..cf726ffd241 100644 --- a/src/amd/common/ac_cmdbuf.c +++ b/src/amd/common/ac_cmdbuf.c @@ -1035,3 +1035,28 @@ ac_emit_cp_release_mem_pws(struct ac_cmdbuf *cs, ASSERTED enum amd_gfx_level gfx ac_cmdbuf_emit(0); /* INT_CTXID */ ac_cmdbuf_end(); } + +void +ac_emit_cp_copy_data(struct ac_cmdbuf *cs, uint32_t src_sel, uint32_t dst_sel, + uint64_t src_va, uint64_t dst_va, + enum ac_cp_copy_data_flags flags) +{ + uint32_t dword0 = COPY_DATA_SRC_SEL(src_sel) | + COPY_DATA_DST_SEL(dst_sel); + + if (flags & AC_CP_COPY_DATA_WR_CONFIRM) + dword0 |= COPY_DATA_WR_CONFIRM; + if (flags & AC_CP_COPY_DATA_COUNT_SEL) + dword0 |= COPY_DATA_COUNT_SEL; + if (flags & AC_CP_COPY_DATA_ENGINE_PFP) + dword0 |= COPY_DATA_ENGINE_PFP; + + ac_cmdbuf_begin(cs); + ac_cmdbuf_emit(PKT3(PKT3_COPY_DATA, 4, 0)); + ac_cmdbuf_emit(dword0); + ac_cmdbuf_emit(src_va); + ac_cmdbuf_emit(src_va >> 32); + ac_cmdbuf_emit(dst_va); + ac_cmdbuf_emit(dst_va >> 32); + ac_cmdbuf_end(); +} diff --git a/src/amd/common/ac_cmdbuf.h b/src/amd/common/ac_cmdbuf.h index a4b8f4df0af..05bbb799d79 100644 --- a/src/amd/common/ac_cmdbuf.h +++ b/src/amd/common/ac_cmdbuf.h @@ -120,6 +120,17 @@ ac_emit_cp_release_mem_pws(struct ac_cmdbuf *cs, ASSERTED enum amd_gfx_level gfx ASSERTED enum amd_ip_type ip_type, uint32_t event_type, uint32_t gcr_cntl); +enum ac_cp_copy_data_flags { + AC_CP_COPY_DATA_WR_CONFIRM = 1u << 0, + AC_CP_COPY_DATA_COUNT_SEL = 1u << 1, /* 64 bits */ + AC_CP_COPY_DATA_ENGINE_PFP = 1u << 2, +}; + +void +ac_emit_cp_copy_data(struct ac_cmdbuf *cs, uint32_t src_sel, uint32_t dst_sel, + uint64_t src_va, uint64_t dst_va, + enum ac_cp_copy_data_flags flags); + #ifdef __cplusplus } #endif diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 079652a783b..d6bf46ab350 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -5062,28 +5062,23 @@ radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct rad uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset; - radeon_begin(cs); - if (pdev->info.has_load_ctx_reg_pkt) { + radeon_begin(cs); radeon_emit(PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0)); radeon_emit(va); radeon_emit(va >> 32); radeon_emit((reg - SI_CONTEXT_REG_OFFSET) >> 2); radeon_emit(reg_count); + radeon_end(); } else { - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | - (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0)); - radeon_emit(va); - radeon_emit(va >> 32); - radeon_emit(reg >> 2); - radeon_emit(0); + ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_REG, va, reg >> 2, + (reg_count == 2 ? AC_CP_COPY_DATA_COUNT_SEL : 0)); + radeon_begin(cs); radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0)); radeon_emit(0); + radeon_end(); } - - radeon_end(); } /* @@ -10061,15 +10056,8 @@ static void radv_emit_copy_data_imm(const struct radv_physical_device *pdev, struct radv_cmd_stream *cs, uint32_t src_imm, uint64_t dst_va) { - radeon_begin(cs); - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM | - (pdev->info.gfx_level == GFX6 ? COPY_DATA_ENGINE_PFP : 0)); - radeon_emit(src_imm); - radeon_emit(0); - radeon_emit(dst_va); - radeon_emit(dst_va >> 32); - radeon_end(); + ac_emit_cp_copy_data(cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, src_imm, dst_va, + AC_CP_COPY_DATA_WR_CONFIRM | (pdev->info.gfx_level == GFX6 ? AC_CP_COPY_DATA_ENGINE_PFP : 0)); } /** @@ -10783,14 +10771,8 @@ radv_emit_indirect_taskmesh_draw_packets(const struct radv_device *device, struc * - When count != 0, write 0 to the workaround BO and execute the indirect dispatch * - When workaround BO != 0 (count was 0), execute an empty direct dispatch */ - radeon_begin(ace_cs); - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM); - radeon_emit(1); - radeon_emit(0); - radeon_emit(workaround_cond_va); - radeon_emit(workaround_cond_va >> 32); - radeon_end(); + ac_emit_cp_copy_data(ace_cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, 1, workaround_cond_va, + AC_CP_COPY_DATA_WR_CONFIRM); /* 2x COND_EXEC + 1x COPY_DATA + Nx DISPATCH_TASKMESH_DIRECT_ACE */ ace_predication_size += 2 * 5 + 6 + 6 * num_views; @@ -10803,14 +10785,8 @@ radv_emit_indirect_taskmesh_draw_packets(const struct radv_device *device, struc ac_emit_cond_exec(ace_cs->b, pdev->info.gfx_level, info->count_va, 6 + 11 * num_views /* 1x COPY_DATA + Nx DISPATCH_TASKMESH_INDIRECT_MULTI_ACE */); - radeon_begin(ace_cs); - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM); - radeon_emit(0); - radeon_emit(0); - radeon_emit(workaround_cond_va); - radeon_emit(workaround_cond_va >> 32); - radeon_end(); + ac_emit_cp_copy_data(ace_cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, 0, workaround_cond_va, + AC_CP_COPY_DATA_WR_CONFIRM); } if (!view_mask) { @@ -13147,21 +13123,12 @@ radv_save_dispatch_size(struct radv_cmd_buffer *cmd_buffer, uint64_t indirect_va uint64_t va = radv_buffer_get_va(device->trace_bo) + offsetof(struct radv_trace_data, indirect_dispatch); - radeon_begin(cs); - for (uint32_t i = 0; i < 3; i++) { - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM); - radeon_emit(indirect_va); - radeon_emit(indirect_va >> 32); - radeon_emit(va); - radeon_emit(va >> 32); + ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, indirect_va, va, AC_CP_COPY_DATA_WR_CONFIRM); indirect_va += 4; va += 4; } - - radeon_end(); } static void @@ -13255,15 +13222,8 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv const uint64_t src_va = unaligned_va + i * 4; const uint64_t dst_va = indirect_va + i * 4; - radeon_begin(cs); - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_WR_CONFIRM); - radeon_emit(src_va); - radeon_emit(src_va >> 32); - radeon_emit(dst_va); - radeon_emit(dst_va >> 32); - radeon_end(); + ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, src_va, dst_va, + AC_CP_COPY_DATA_WR_CONFIRM); } } @@ -14693,16 +14653,10 @@ radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64_t va emulated_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset; radeon_check_space(device->ws, cs->b, 8); + + ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, va, emulated_va, AC_CP_COPY_DATA_WR_CONFIRM); + radeon_begin(cs); - - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_WR_CONFIRM); - radeon_emit(va); - radeon_emit(va >> 32); - radeon_emit(emulated_va); - radeon_emit(emulated_va >> 32); - radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0)); radeon_emit(0); radeon_end(); @@ -14965,32 +14919,25 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC radv_cs_add_buffer(device->ws, cs->b, buffer->bo); } - radeon_begin(cs); - if (pdev->info.gfx_level >= GFX12) { if (append) { - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_WR_CONFIRM); - radeon_emit(va); - radeon_emit(va >> 32); - radeon_emit((so->state_va + i * 8 + 4)); - radeon_emit((so->state_va + i * 8 + 4) >> 32); + ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, va, so->state_va + i * 8 + 4, + AC_CP_COPY_DATA_WR_CONFIRM); } } else if (pdev->use_ngg_streamout) { if (append) { - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_WR_CONFIRM); - radeon_emit(va); - radeon_emit(va >> 32); - radeon_emit((R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i); - radeon_emit(0); + ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_REG, va, + (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i, AC_CP_COPY_DATA_WR_CONFIRM); } else { /* The PKT3 CAM bit workaround seems needed for initializing this GDS register to zero. */ + radeon_begin(cs); radeon_set_uconfig_perfctr_reg(pdev->info.gfx_level, ring, R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 + i * 4, 0); + radeon_end(); } } else { + radeon_begin(cs); + /* AMD GCN binds streamout buffers as shader resources. * VGT only counts primitives and tells the shader through * SGPRs what to do. @@ -15017,9 +14964,9 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC radeon_emit(0); /* unused */ radeon_emit(0); /* unused */ } - } - radeon_end(); + radeon_end(); + } } assert(cs->b->cdw <= cdw_max); @@ -15077,28 +15024,19 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou radv_cs_add_buffer(device->ws, cs->b, buffer->bo); } - radeon_begin(cs); - if (pdev->info.gfx_level >= GFX12) { if (append) { - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_WR_CONFIRM); - radeon_emit((so->state_va + i * 8 + 4)); - radeon_emit((so->state_va + i * 8 + 4) >> 32); - radeon_emit(va); - radeon_emit(va >> 32); + ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, so->state_va + i * 8 + 4, va, + AC_CP_COPY_DATA_WR_CONFIRM); } } else if (pdev->use_ngg_streamout) { if (append) { - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_REG) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM); - radeon_emit((R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i); - radeon_emit(0); - radeon_emit(va); - radeon_emit(va >> 32); + ac_emit_cp_copy_data(cs->b, COPY_DATA_REG, COPY_DATA_DST_MEM, + (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i, va, AC_CP_COPY_DATA_WR_CONFIRM); } } else { + radeon_begin(cs); + if (append) { radeon_emit(PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); radeon_emit(STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */ @@ -15116,10 +15054,10 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou */ radeon_set_context_reg(R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 0); + radeon_end(); + cmd_buffer->cs->context_roll_without_scissor_emitted = true; } - - radeon_end(); } assert(cs->b->cdw <= cdw_max); @@ -15150,12 +15088,11 @@ radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_d radeon_end(); } - radeon_begin(cs); - if (gfx_level >= GFX10) { /* Emitting a COPY_DATA packet should be enough because RADV doesn't support preemption * (shadow memory) but for unknown reasons, it can lead to GPU hangs on GFX10+. */ + radeon_begin(cs); radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0)); radeon_emit(0); @@ -15164,16 +15101,11 @@ radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_d radeon_emit(draw_info->strmout_va >> 32); radeon_emit((R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE - SI_CONTEXT_REG_OFFSET) >> 2); radeon_emit(1); /* 1 DWORD */ + radeon_end(); } else { - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_WR_CONFIRM); - radeon_emit(draw_info->strmout_va); - radeon_emit(draw_info->strmout_va >> 32); - radeon_emit(R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2); - radeon_emit(0); /* unused */ + ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_REG, draw_info->strmout_va, + R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2, AC_CP_COPY_DATA_WR_CONFIRM); } - - radeon_end(); } VKAPI_ATTR void VKAPI_CALL @@ -15241,14 +15173,7 @@ radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlag ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 12); if (!(stage & ~VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) { - radeon_begin(cs); - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM); - radeon_emit(marker); - radeon_emit(0); - radeon_emit(va); - radeon_emit(va >> 32); - radeon_end(); + ac_emit_cp_copy_data(cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, marker, va, AC_CP_COPY_DATA_WR_CONFIRM); } else { radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, marker, cmd_buffer->gfx9_eop_bug_va); diff --git a/src/amd/vulkan/radv_perfcounter.c b/src/amd/vulkan/radv_perfcounter.c index 17aa7452ff2..0a2c160ec33 100644 --- a/src/amd/vulkan/radv_perfcounter.c +++ b/src/amd/vulkan/radv_perfcounter.c @@ -560,15 +560,8 @@ radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_p if (regs->counters) reg = regs->counters[idx]; - radeon_begin(cs); - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_TC_L2) | COPY_DATA_WR_CONFIRM | - COPY_DATA_COUNT_SEL); /* 64 bits */ - radeon_emit(reg >> 2); - radeon_emit(0); /* unused */ - radeon_emit(va); - radeon_emit(va >> 32); - radeon_end(); + ac_emit_cp_copy_data(cs->b, COPY_DATA_PERF, COPY_DATA_TC_L2, reg >> 2, va, + AC_CP_COPY_DATA_WR_CONFIRM | AC_CP_COPY_DATA_COUNT_SEL); va += sizeof(uint64_t) * 2 * radv_pc_get_num_instances(pdev, block); reg += reg_delta; diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index a1cf42cd924..a1a9c883ee3 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -37,14 +37,7 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VkQueryType qu static void gfx10_copy_shader_query(struct radv_cmd_stream *cs, uint32_t src_sel, uint64_t src_va, uint64_t dst_va) { - radeon_begin(cs); - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(src_sel) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM); - radeon_emit(src_va); - radeon_emit(src_va >> 32); - radeon_emit(dst_va); - radeon_emit(dst_va >> 32); - radeon_end(); + ac_emit_cp_copy_data(cs->b, src_sel, COPY_DATA_DST_MEM, src_va, dst_va, AC_CP_COPY_DATA_WR_CONFIRM); } static void @@ -2724,15 +2717,8 @@ radv_write_timestamp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkPipeline struct radv_cmd_stream *cs = cmd_buffer->cs; if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT) { - radeon_begin(cs); - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM | COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) | - COPY_DATA_DST_SEL(COPY_DATA_DST_MEM)); - radeon_emit(0); - radeon_emit(0); - radeon_emit(va); - radeon_emit(va >> 32); - radeon_end(); + ac_emit_cp_copy_data(cs->b, COPY_DATA_TIMESTAMP, COPY_DATA_DST_MEM, 0, va, + AC_CP_COPY_DATA_WR_CONFIRM | AC_CP_COPY_DATA_COUNT_SEL); } else { radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_TIMESTAMP, va, 0, cmd_buffer->gfx9_eop_bug_va); @@ -2810,8 +2796,6 @@ radv_CmdWriteAccelerationStructuresPropertiesKHR(VkCommandBuffer commandBuffer, ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 6 * accelerationStructureCount); - radeon_begin(cs); - for (uint32_t i = 0; i < accelerationStructureCount; ++i) { VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, pAccelerationStructures[i]); uint64_t va = vk_acceleration_structure_get_va(accel_struct); @@ -2833,17 +2817,11 @@ radv_CmdWriteAccelerationStructuresPropertiesKHR(VkCommandBuffer commandBuffer, UNREACHABLE("Unhandle accel struct query type."); } - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_COUNT_SEL | - COPY_DATA_WR_CONFIRM); - radeon_emit(va); - radeon_emit(va >> 32); - radeon_emit(query_va); - radeon_emit(query_va >> 32); + ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, va, query_va, + AC_CP_COPY_DATA_WR_CONFIRM | AC_CP_COPY_DATA_COUNT_SEL); query_va += pool->stride; } - radeon_end(); assert(cs->b->cdw <= cdw_max); } diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index c2eda8c4cd2..48e10d610e8 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -379,12 +379,6 @@ void si_cp_copy_data(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned uint64_t dst_va = (dst ? dst->gpu_address : 0ull) + dst_offset; uint64_t src_va = (src ? src->gpu_address : 0ull) + src_offset; - radeon_begin(cs); - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(src_sel) | COPY_DATA_DST_SEL(dst_sel) | COPY_DATA_WR_CONFIRM); - radeon_emit(src_va); - radeon_emit(src_va >> 32); - radeon_emit(dst_va); - radeon_emit(dst_va >> 32); - radeon_end(); + ac_emit_cp_copy_data(&cs->current, src_sel, dst_sel, src_va, dst_va, + AC_CP_COPY_DATA_WR_CONFIRM); } diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index bd1230631d8..326fc22adee 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -229,37 +229,24 @@ static void si_pc_emit_read(struct si_context *sctx, struct ac_pc_block *block, unsigned reg = regs->counter0_lo; unsigned reg_delta = 8; - radeon_begin(cs); - if (regs->select0) { for (idx = 0; idx < count; ++idx) { if (regs->counters) reg = regs->counters[idx]; - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_COUNT_SEL); /* 64 bits */ - radeon_emit(reg >> 2); - radeon_emit(0); /* unused */ - radeon_emit(va); - radeon_emit(va >> 32); + ac_emit_cp_copy_data(&cs->current, COPY_DATA_PERF, COPY_DATA_DST_MEM, + reg >> 2, va, AC_CP_COPY_DATA_COUNT_SEL); va += sizeof(uint64_t); reg += reg_delta; } } else { /* Fake counters. */ for (idx = 0; idx < count; ++idx) { - radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_COUNT_SEL); - radeon_emit(0); /* immediate */ - radeon_emit(0); - radeon_emit(va); - radeon_emit(va >> 32); + ac_emit_cp_copy_data(&cs->current, COPY_DATA_IMM, COPY_DATA_DST_MEM, + 0, va, AC_CP_COPY_DATA_COUNT_SEL); va += sizeof(uint64_t); } } - radeon_end(); } static void si_pc_query_destroy(struct si_context *sctx, struct si_query *squery)