mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-08 10:40:11 +01:00
amd: add a predicate parameter to ac_emit_cp_copy_data()
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37881>
This commit is contained in:
parent
29c2d02d64
commit
ed7f9df864
8 changed files with 30 additions and 26 deletions
|
|
@ -1040,7 +1040,7 @@ ac_emit_cp_release_mem_pws(struct ac_cmdbuf *cs, ASSERTED enum amd_gfx_level gfx
|
|||
void
|
||||
ac_emit_cp_copy_data(struct ac_cmdbuf *cs, uint32_t src_sel, uint32_t dst_sel,
|
||||
uint64_t src_va, uint64_t dst_va,
|
||||
enum ac_cp_copy_data_flags flags)
|
||||
enum ac_cp_copy_data_flags flags, bool predicate)
|
||||
{
|
||||
uint32_t dword0 = COPY_DATA_SRC_SEL(src_sel) |
|
||||
COPY_DATA_DST_SEL(dst_sel);
|
||||
|
|
@ -1053,7 +1053,7 @@ ac_emit_cp_copy_data(struct ac_cmdbuf *cs, uint32_t src_sel, uint32_t dst_sel,
|
|||
dword0 |= COPY_DATA_ENGINE_PFP;
|
||||
|
||||
ac_cmdbuf_begin(cs);
|
||||
ac_cmdbuf_emit(PKT3(PKT3_COPY_DATA, 4, 0));
|
||||
ac_cmdbuf_emit(PKT3(PKT3_COPY_DATA, 4, predicate));
|
||||
ac_cmdbuf_emit(dword0);
|
||||
ac_cmdbuf_emit(src_va);
|
||||
ac_cmdbuf_emit(src_va >> 32);
|
||||
|
|
|
|||
|
|
@ -152,7 +152,7 @@ enum ac_cp_copy_data_flags {
|
|||
void
|
||||
ac_emit_cp_copy_data(struct ac_cmdbuf *cs, uint32_t src_sel, uint32_t dst_sel,
|
||||
uint64_t src_va, uint64_t dst_va,
|
||||
enum ac_cp_copy_data_flags flags);
|
||||
enum ac_cp_copy_data_flags flags, bool predicate);
|
||||
|
||||
void
|
||||
ac_emit_cp_pfp_sync_me(struct ac_cmdbuf *cs, bool predicate);
|
||||
|
|
|
|||
|
|
@ -5065,7 +5065,7 @@ radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct rad
|
|||
ac_emit_cp_load_context_reg_index(cs->b, reg, reg_count, va, false);
|
||||
} else {
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_REG, va, reg >> 2,
|
||||
(reg_count == 2 ? AC_CP_COPY_DATA_COUNT_SEL : 0));
|
||||
(reg_count == 2 ? AC_CP_COPY_DATA_COUNT_SEL : 0), false);
|
||||
|
||||
ac_emit_cp_pfp_sync_me(cs->b, false);
|
||||
}
|
||||
|
|
@ -10063,7 +10063,8 @@ radv_emit_copy_data_imm(const struct radv_physical_device *pdev, struct radv_cmd
|
|||
uint64_t dst_va)
|
||||
{
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, src_imm, dst_va,
|
||||
AC_CP_COPY_DATA_WR_CONFIRM | (pdev->info.gfx_level == GFX6 ? AC_CP_COPY_DATA_ENGINE_PFP : 0));
|
||||
AC_CP_COPY_DATA_WR_CONFIRM | (pdev->info.gfx_level == GFX6 ? AC_CP_COPY_DATA_ENGINE_PFP : 0),
|
||||
false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -10778,7 +10779,7 @@ radv_emit_indirect_taskmesh_draw_packets(const struct radv_device *device, struc
|
|||
* - When workaround BO != 0 (count was 0), execute an empty direct dispatch
|
||||
*/
|
||||
ac_emit_cp_copy_data(ace_cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, 1, workaround_cond_va,
|
||||
AC_CP_COPY_DATA_WR_CONFIRM);
|
||||
AC_CP_COPY_DATA_WR_CONFIRM, false);
|
||||
|
||||
/* 2x COND_EXEC + 1x COPY_DATA + Nx DISPATCH_TASKMESH_DIRECT_ACE */
|
||||
ace_predication_size += 2 * 5 + 6 + 6 * num_views;
|
||||
|
|
@ -10792,7 +10793,7 @@ radv_emit_indirect_taskmesh_draw_packets(const struct radv_device *device, struc
|
|||
6 + 11 * num_views /* 1x COPY_DATA + Nx DISPATCH_TASKMESH_INDIRECT_MULTI_ACE */);
|
||||
|
||||
ac_emit_cp_copy_data(ace_cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, 0, workaround_cond_va,
|
||||
AC_CP_COPY_DATA_WR_CONFIRM);
|
||||
AC_CP_COPY_DATA_WR_CONFIRM, false);
|
||||
}
|
||||
|
||||
if (!view_mask) {
|
||||
|
|
@ -13130,7 +13131,8 @@ radv_save_dispatch_size(struct radv_cmd_buffer *cmd_buffer, uint64_t indirect_va
|
|||
uint64_t va = radv_buffer_get_va(device->trace_bo) + offsetof(struct radv_trace_data, indirect_dispatch);
|
||||
|
||||
for (uint32_t i = 0; i < 3; i++) {
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, indirect_va, va, AC_CP_COPY_DATA_WR_CONFIRM);
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, indirect_va, va, AC_CP_COPY_DATA_WR_CONFIRM,
|
||||
false);
|
||||
|
||||
indirect_va += 4;
|
||||
va += 4;
|
||||
|
|
@ -13229,7 +13231,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
|
|||
const uint64_t dst_va = indirect_va + i * 4;
|
||||
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, src_va, dst_va,
|
||||
AC_CP_COPY_DATA_WR_CONFIRM);
|
||||
AC_CP_COPY_DATA_WR_CONFIRM, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -14650,7 +14652,8 @@ radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64_t va
|
|||
|
||||
radeon_check_space(device->ws, cs->b, 8);
|
||||
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, va, emulated_va, AC_CP_COPY_DATA_WR_CONFIRM);
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, va, emulated_va, AC_CP_COPY_DATA_WR_CONFIRM,
|
||||
false);
|
||||
|
||||
ac_emit_cp_pfp_sync_me(cs->b, false);
|
||||
|
||||
|
|
@ -14887,12 +14890,12 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC
|
|||
if (pdev->info.gfx_level >= GFX12) {
|
||||
if (append) {
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, va, so->state_va + i * 8 + 4,
|
||||
AC_CP_COPY_DATA_WR_CONFIRM);
|
||||
AC_CP_COPY_DATA_WR_CONFIRM, false);
|
||||
}
|
||||
} else if (pdev->use_ngg_streamout) {
|
||||
if (append) {
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_REG, va,
|
||||
(R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i, AC_CP_COPY_DATA_WR_CONFIRM);
|
||||
(R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i, AC_CP_COPY_DATA_WR_CONFIRM, false);
|
||||
} else {
|
||||
/* The PKT3 CAM bit workaround seems needed for initializing this GDS register to zero. */
|
||||
radeon_begin(cs);
|
||||
|
|
@ -14992,12 +14995,13 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou
|
|||
if (pdev->info.gfx_level >= GFX12) {
|
||||
if (append) {
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, so->state_va + i * 8 + 4, va,
|
||||
AC_CP_COPY_DATA_WR_CONFIRM);
|
||||
AC_CP_COPY_DATA_WR_CONFIRM, false);
|
||||
}
|
||||
} else if (pdev->use_ngg_streamout) {
|
||||
if (append) {
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_REG, COPY_DATA_DST_MEM,
|
||||
(R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i, va, AC_CP_COPY_DATA_WR_CONFIRM);
|
||||
(R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i, va, AC_CP_COPY_DATA_WR_CONFIRM,
|
||||
false);
|
||||
}
|
||||
} else {
|
||||
radeon_begin(cs);
|
||||
|
|
@ -15063,7 +15067,7 @@ radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_d
|
|||
draw_info->strmout_va, false);
|
||||
} else {
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_REG, draw_info->strmout_va,
|
||||
R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2, AC_CP_COPY_DATA_WR_CONFIRM);
|
||||
R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2, AC_CP_COPY_DATA_WR_CONFIRM, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -15132,7 +15136,7 @@ radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlag
|
|||
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 12);
|
||||
|
||||
if (!(stage & ~VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) {
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, marker, va, AC_CP_COPY_DATA_WR_CONFIRM);
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, marker, va, AC_CP_COPY_DATA_WR_CONFIRM, false);
|
||||
} else {
|
||||
radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
|
||||
EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT, va, marker,
|
||||
|
|
|
|||
|
|
@ -561,7 +561,7 @@ radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_p
|
|||
reg = regs->counters[idx];
|
||||
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_PERF, COPY_DATA_TC_L2, reg >> 2, va,
|
||||
AC_CP_COPY_DATA_WR_CONFIRM | AC_CP_COPY_DATA_COUNT_SEL);
|
||||
AC_CP_COPY_DATA_WR_CONFIRM | AC_CP_COPY_DATA_COUNT_SEL, false);
|
||||
|
||||
va += sizeof(uint64_t) * 2 * radv_pc_get_num_instances(pdev, block);
|
||||
reg += reg_delta;
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VkQueryType qu
|
|||
static void
|
||||
gfx10_copy_shader_query(struct radv_cmd_stream *cs, uint32_t src_sel, uint64_t src_va, uint64_t dst_va)
|
||||
{
|
||||
ac_emit_cp_copy_data(cs->b, src_sel, COPY_DATA_DST_MEM, src_va, dst_va, AC_CP_COPY_DATA_WR_CONFIRM);
|
||||
ac_emit_cp_copy_data(cs->b, src_sel, COPY_DATA_DST_MEM, src_va, dst_va, AC_CP_COPY_DATA_WR_CONFIRM, false);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -2714,7 +2714,7 @@ radv_write_timestamp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkPipeline
|
|||
|
||||
if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT) {
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_TIMESTAMP, COPY_DATA_DST_MEM, 0, va,
|
||||
AC_CP_COPY_DATA_WR_CONFIRM | AC_CP_COPY_DATA_COUNT_SEL);
|
||||
AC_CP_COPY_DATA_WR_CONFIRM | AC_CP_COPY_DATA_COUNT_SEL, false);
|
||||
} else {
|
||||
radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
|
||||
EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_TIMESTAMP, va, 0,
|
||||
|
|
@ -2815,7 +2815,7 @@ radv_CmdWriteAccelerationStructuresPropertiesKHR(VkCommandBuffer commandBuffer,
|
|||
}
|
||||
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, va, query_va,
|
||||
AC_CP_COPY_DATA_WR_CONFIRM | AC_CP_COPY_DATA_COUNT_SEL);
|
||||
AC_CP_COPY_DATA_WR_CONFIRM | AC_CP_COPY_DATA_COUNT_SEL, false);
|
||||
|
||||
query_va += pool->stride;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1522,16 +1522,16 @@ radv_create_perf_counter_lock_cs(struct radv_device *device, unsigned pass, bool
|
|||
uint64_t set_va = va + (unlock ? 0 : 8 * pass);
|
||||
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, 0, unset_va,
|
||||
AC_CP_COPY_DATA_COUNT_SEL | AC_CP_COPY_DATA_WR_CONFIRM);
|
||||
AC_CP_COPY_DATA_COUNT_SEL | AC_CP_COPY_DATA_WR_CONFIRM, false);
|
||||
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, 1, set_va,
|
||||
AC_CP_COPY_DATA_COUNT_SEL | AC_CP_COPY_DATA_WR_CONFIRM);
|
||||
AC_CP_COPY_DATA_COUNT_SEL | AC_CP_COPY_DATA_WR_CONFIRM, false);
|
||||
|
||||
if (unlock) {
|
||||
uint64_t mutex_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_LOCK_OFFSET;
|
||||
|
||||
ac_emit_cp_copy_data(cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, 0, mutex_va,
|
||||
AC_CP_COPY_DATA_COUNT_SEL | AC_CP_COPY_DATA_WR_CONFIRM);
|
||||
AC_CP_COPY_DATA_COUNT_SEL | AC_CP_COPY_DATA_WR_CONFIRM, false);
|
||||
}
|
||||
|
||||
assert(cs->b->cdw <= cdw);
|
||||
|
|
|
|||
|
|
@ -380,5 +380,5 @@ void si_cp_copy_data(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned
|
|||
uint64_t src_va = (src ? src->gpu_address : 0ull) + src_offset;
|
||||
|
||||
ac_emit_cp_copy_data(&cs->current, src_sel, dst_sel, src_va, dst_va,
|
||||
AC_CP_COPY_DATA_WR_CONFIRM);
|
||||
AC_CP_COPY_DATA_WR_CONFIRM, false);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -235,7 +235,7 @@ static void si_pc_emit_read(struct si_context *sctx, struct ac_pc_block *block,
|
|||
reg = regs->counters[idx];
|
||||
|
||||
ac_emit_cp_copy_data(&cs->current, COPY_DATA_PERF, COPY_DATA_DST_MEM,
|
||||
reg >> 2, va, AC_CP_COPY_DATA_COUNT_SEL);
|
||||
reg >> 2, va, AC_CP_COPY_DATA_COUNT_SEL, false);
|
||||
va += sizeof(uint64_t);
|
||||
reg += reg_delta;
|
||||
}
|
||||
|
|
@ -243,7 +243,7 @@ static void si_pc_emit_read(struct si_context *sctx, struct ac_pc_block *block,
|
|||
/* Fake counters. */
|
||||
for (idx = 0; idx < count; ++idx) {
|
||||
ac_emit_cp_copy_data(&cs->current, COPY_DATA_IMM, COPY_DATA_DST_MEM,
|
||||
0, va, AC_CP_COPY_DATA_COUNT_SEL);
|
||||
0, va, AC_CP_COPY_DATA_COUNT_SEL, false);
|
||||
va += sizeof(uint64_t);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue