amd: add missing _cp_ to some emit helpers

Just for consistency with other helpers.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37881>
This commit is contained in:
Samuel Pitoiset 2025-10-15 09:48:37 +02:00
parent a0117b5e74
commit 5801986f53
6 changed files with 33 additions and 32 deletions

View file

@ -859,8 +859,8 @@ ac_init_graphics_preamble_state(const struct ac_preamble_state *state,
}
void
ac_emit_cond_exec(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
uint64_t va, uint32_t count)
ac_emit_cp_cond_exec(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
uint64_t va, uint32_t count)
{
ac_cmdbuf_begin(cs);
if (gfx_level >= GFX7) {
@ -879,7 +879,8 @@ ac_emit_cond_exec(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
}
void
ac_emit_write_data_imm(struct ac_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t value)
ac_emit_cp_write_data_imm(struct ac_cmdbuf *cs, unsigned engine_sel,
uint64_t va, uint32_t value)
{
ac_cmdbuf_begin(cs);
ac_cmdbuf_emit(PKT3(PKT3_WRITE_DATA, 3, 0));

View file

@ -121,12 +121,12 @@ ac_init_graphics_preamble_state(const struct ac_preamble_state *state,
struct ac_pm4_state *pm4);
void
ac_emit_cond_exec(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
uint64_t va, uint32_t count);
ac_emit_cp_cond_exec(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
uint64_t va, uint32_t count);
void
ac_emit_write_data_imm(struct ac_cmdbuf *cs, unsigned engine_sel,
uint64_t va, uint32_t value);
ac_emit_cp_write_data_imm(struct ac_cmdbuf *cs, unsigned engine_sel,
uint64_t va, uint32_t value);
void
ac_emit_cp_wait_mem(struct ac_cmdbuf *cs, uint64_t va, uint32_t ref,

View file

@ -4641,7 +4641,7 @@ radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_
if (requires_cond_exec) {
uint64_t va = radv_get_tc_compat_zrange_va(image, iview->vk.base_mip_level);
ac_emit_cond_exec(cmd_buffer->cs->b, pdev->info.gfx_level, va, 3 /* SET_CONTEXT_REG size */);
ac_emit_cp_cond_exec(cmd_buffer->cs->b, pdev->info.gfx_level, va, 3 /* SET_CONTEXT_REG size */);
}
radeon_begin(cmd_buffer->cs);
@ -5620,7 +5620,7 @@ radv_gfx12_emit_hiz_wa_full(struct radv_cmd_buffer *cmd_buffer)
} else {
const uint64_t va = radv_get_hiz_valid_va(iview->image, iview->vk.base_mip_level);
ac_emit_cond_exec(cmd_buffer->cs->b, pdev->info.gfx_level, va, num_dwords);
ac_emit_cp_cond_exec(cmd_buffer->cs->b, pdev->info.gfx_level, va, num_dwords);
radv_gfx12_override_hiz_enable(cmd_buffer, true);
}
@ -10109,7 +10109,7 @@ radv_cs_emit_compute_predication(const struct radv_device *device, struct radv_c
radv_emit_copy_data_imm(pdev, cs, 1, inv_va);
/* If the API predication VA == 0, skip next command. */
ac_emit_cond_exec(cs->b, pdev->info.gfx_level, va, 6 /* 1x COPY_DATA size */);
ac_emit_cp_cond_exec(cs->b, pdev->info.gfx_level, va, 6 /* 1x COPY_DATA size */);
/* Write 0 to the new predication VA (when the API condition != 0) */
radv_emit_copy_data_imm(pdev, cs, 0, inv_va);
@ -10118,7 +10118,7 @@ radv_cs_emit_compute_predication(const struct radv_device *device, struct radv_c
va = inv_va;
}
ac_emit_cond_exec(cs->b, pdev->info.gfx_level, va, dwords);
ac_emit_cp_cond_exec(cs->b, pdev->info.gfx_level, va, dwords);
}
ALWAYS_INLINE static void
@ -10803,8 +10803,8 @@ radv_emit_indirect_taskmesh_draw_packets(const struct radv_device *device, struc
&cmd_state->mec_inv_pred_emitted, ace_predication_size);
if (workaround_cond_va) {
ac_emit_cond_exec(ace_cs->b, pdev->info.gfx_level, info->count_va,
6 + 11 * num_views /* 1x COPY_DATA + Nx DISPATCH_TASKMESH_INDIRECT_MULTI_ACE */);
ac_emit_cp_cond_exec(ace_cs->b, pdev->info.gfx_level, info->count_va,
6 + 11 * num_views /* 1x COPY_DATA + Nx DISPATCH_TASKMESH_INDIRECT_MULTI_ACE */);
ac_emit_cp_copy_data(ace_cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, 0, workaround_cond_va,
AC_CP_COPY_DATA_WR_CONFIRM);
@ -10825,8 +10825,8 @@ radv_emit_indirect_taskmesh_draw_packets(const struct radv_device *device, struc
}
if (workaround_cond_va) {
ac_emit_cond_exec(ace_cs->b, pdev->info.gfx_level, workaround_cond_va,
6 * num_views /* Nx DISPATCH_TASKMESH_DIRECT_ACE */);
ac_emit_cp_cond_exec(ace_cs->b, pdev->info.gfx_level, workaround_cond_va,
6 * num_views /* Nx DISPATCH_TASKMESH_DIRECT_ACE */);
for (unsigned v = 0; v < num_views; ++v) {
radv_cs_emit_dispatch_taskmesh_direct_ace_packet(device, cmd_state, ace_cs, 0, 0, 0);

View file

@ -625,7 +625,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query
uint64_t pred_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET + 8 * pass;
uint64_t reg_va = va + (end ? 8 : 0);
ac_emit_cond_exec(cs->b, pdev->info.gfx_level, pred_va, 0);
ac_emit_cp_cond_exec(cs->b, pdev->info.gfx_level, pred_va, 0);
uint32_t *skip_dwords = cs->b->buf + (cs->b->cdw - 1);
@ -652,7 +652,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query
if (end) {
uint64_t signal_va = va + pool->b.stride - 8 - 8 * pass;
ac_emit_write_data_imm(cs->b, V_370_ME, signal_va, 1);
ac_emit_cp_write_data_imm(cs->b, V_370_ME, signal_va, 1);
}
*skip_dwords = cs->b->buf + cs->b->cdw - skip_dwords - 1;
@ -680,7 +680,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
radv_cs_add_buffer(device->ws, cs->b, device->perf_counter_bo);
uint64_t perf_ctr_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET;
ac_emit_write_data_imm(cs->b, V_370_ME, perf_ctr_va, 0);
ac_emit_cp_write_data_imm(cs->b, V_370_ME, perf_ctr_va, 0);
radv_pc_wait_idle(cmd_buffer);
radv_perfcounter_emit_reset(cs, false);
@ -691,7 +691,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
for (unsigned pass = 0; pass < pool->num_passes; ++pass) {
uint64_t pred_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET + 8 * pass;
ac_emit_cond_exec(cs->b, pdev->info.gfx_level, pred_va, 0);
ac_emit_cp_cond_exec(cs->b, pdev->info.gfx_level, pred_va, 0);
uint32_t *skip_dwords = cs->b->buf + (cs->b->cdw - 1);

View file

@ -677,7 +677,7 @@ radv_begin_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_q
radeon_check_space(device->ws, ace_cs->b, 11);
gfx10_copy_shader_query_ace(cmd_buffer, RADV_SHADER_QUERY_TS_INVOCATION_OFFSET, va + task_invoc_offset);
ac_emit_write_data_imm(ace_cs->b, V_370_ME, va + task_invoc_offset + 4, 0x80000000);
ac_emit_cp_write_data_imm(ace_cs->b, V_370_ME, va + task_invoc_offset + 4, 0x80000000);
/* Record that the command buffer needs GDS. */
cmd_buffer->gds_needed = true;
@ -751,7 +751,7 @@ radv_end_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_que
radeon_check_space(device->ws, ace_cs->b, 11);
gfx10_copy_shader_query_ace(cmd_buffer, RADV_SHADER_QUERY_TS_INVOCATION_OFFSET, va + task_invoc_offset);
ac_emit_write_data_imm(ace_cs->b, V_370_ME, va + task_invoc_offset + 4, 0x80000000);
ac_emit_cp_write_data_imm(ace_cs->b, V_370_ME, va + task_invoc_offset + 4, 0x80000000);
cmd_buffer->state.active_pipeline_ace_queries--;
@ -976,11 +976,11 @@ radv_begin_tfb_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t i
if (pdev->use_ngg_streamout) {
/* generated prim counter */
gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
ac_emit_write_data_imm(cs->b, V_370_ME, va + 4, 0x80000000);
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 4, 0x80000000);
/* written prim counter */
gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(index), va + 8);
ac_emit_write_data_imm(cs->b, V_370_ME, va + 12, 0x80000000);
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 12, 0x80000000);
if (!cmd_buffer->state.active_emulated_prims_xfb_queries)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
@ -1005,11 +1005,11 @@ radv_end_tfb_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t ind
if (pdev->use_ngg_streamout) {
/* generated prim counter */
gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
ac_emit_write_data_imm(cs->b, V_370_ME, va + 20, 0x80000000);
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 20, 0x80000000);
/* written prim counter */
gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(index), va + 24);
ac_emit_write_data_imm(cs->b, V_370_ME, va + 28, 0x80000000);
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 28, 0x80000000);
cmd_buffer->state.active_emulated_prims_xfb_queries--;
@ -1345,7 +1345,7 @@ radv_begin_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *
if (pdev->info.gfx_level >= GFX11) {
/* On GFX11+, primitives generated query are always emulated. */
gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
ac_emit_write_data_imm(cs->b, V_370_ME, va + 4, 0x80000000);
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 4, 0x80000000);
if (!cmd_buffer->state.active_emulated_prims_gen_queries)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
@ -1369,7 +1369,7 @@ radv_begin_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *
if (pool->uses_emulated_queries) {
/* generated prim counter */
gfx10_copy_shader_query_gfx(cmd_buffer, true, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 32);
ac_emit_write_data_imm(cs->b, V_370_ME, va + 36, 0x80000000);
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 36, 0x80000000);
/* Record that the command buffer needs GDS. */
cmd_buffer->gds_needed = true;
@ -1394,7 +1394,7 @@ radv_end_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *po
if (pdev->info.gfx_level >= GFX11) {
/* On GFX11+, primitives generated query are always emulated. */
gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
ac_emit_write_data_imm(cs->b, V_370_ME, va + 20, 0x80000000);
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 20, 0x80000000);
cmd_buffer->state.active_emulated_prims_gen_queries--;
@ -1418,7 +1418,7 @@ radv_end_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *po
if (pool->uses_emulated_queries) {
/* generated prim counter */
gfx10_copy_shader_query_gfx(cmd_buffer, true, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 40);
ac_emit_write_data_imm(cs->b, V_370_ME, va + 44, 0x80000000);
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 44, 0x80000000);
cmd_buffer->state.active_emulated_prims_gen_queries--;
@ -1596,7 +1596,7 @@ radv_begin_ms_prim_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
radv_emit_event_write(&pdev->info, cs, RADV_EVENT_WRITE_PIPELINE_STAT, va);
} else {
gfx10_copy_shader_query_gfx(cmd_buffer, true, RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET, va);
ac_emit_write_data_imm(cs->b, V_370_ME, va + 4, 0x80000000);
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 4, 0x80000000);
/* Record that the command buffer needs GDS. */
cmd_buffer->gds_needed = true;
@ -1633,7 +1633,7 @@ radv_end_ms_prim_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t
cmd_buffer->gfx9_eop_bug_va);
} else {
gfx10_copy_shader_query_gfx(cmd_buffer, true, RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET, va + 8);
ac_emit_write_data_imm(cs->b, V_370_ME, va + 12, 0x80000000);
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 12, 0x80000000);
cmd_buffer->state.active_emulated_prims_gen_queries--;

View file

@ -844,7 +844,7 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_h
/* Clear the emulated counter end value. We don't clear start because it's unused. */
va += si_query_pipestat_end_dw_offset(sctx->screen, query->index) * 4;
ac_emit_write_data_imm(&cs->current, V_370_PFP, va, 0);
ac_emit_cp_write_data_imm(&cs->current, V_370_PFP, va, 0);
sctx->num_pipeline_stat_emulated_queries++;
} else {