diff --git a/src/amd/common/ac_cmdbuf.c b/src/amd/common/ac_cmdbuf.c index 6774f381716..63bf0f5d03d 100644 --- a/src/amd/common/ac_cmdbuf.c +++ b/src/amd/common/ac_cmdbuf.c @@ -859,8 +859,8 @@ ac_init_graphics_preamble_state(const struct ac_preamble_state *state, } void -ac_emit_cond_exec(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, - uint64_t va, uint32_t count) +ac_emit_cp_cond_exec(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, + uint64_t va, uint32_t count) { ac_cmdbuf_begin(cs); if (gfx_level >= GFX7) { @@ -879,7 +879,8 @@ ac_emit_cond_exec(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, } void -ac_emit_write_data_imm(struct ac_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t value) +ac_emit_cp_write_data_imm(struct ac_cmdbuf *cs, unsigned engine_sel, + uint64_t va, uint32_t value) { ac_cmdbuf_begin(cs); ac_cmdbuf_emit(PKT3(PKT3_WRITE_DATA, 3, 0)); diff --git a/src/amd/common/ac_cmdbuf.h b/src/amd/common/ac_cmdbuf.h index f0428c0da4f..ce46f2e6c9a 100644 --- a/src/amd/common/ac_cmdbuf.h +++ b/src/amd/common/ac_cmdbuf.h @@ -121,12 +121,12 @@ ac_init_graphics_preamble_state(const struct ac_preamble_state *state, struct ac_pm4_state *pm4); void -ac_emit_cond_exec(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, - uint64_t va, uint32_t count); +ac_emit_cp_cond_exec(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, + uint64_t va, uint32_t count); void -ac_emit_write_data_imm(struct ac_cmdbuf *cs, unsigned engine_sel, - uint64_t va, uint32_t value); +ac_emit_cp_write_data_imm(struct ac_cmdbuf *cs, unsigned engine_sel, + uint64_t va, uint32_t value); void ac_emit_cp_wait_mem(struct ac_cmdbuf *cs, uint64_t va, uint32_t ref, diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index c4ed6f3bad3..f1468cbaf45 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -4641,7 +4641,7 @@ radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_ if (requires_cond_exec) { uint64_t va = radv_get_tc_compat_zrange_va(image, iview->vk.base_mip_level); - ac_emit_cond_exec(cmd_buffer->cs->b, pdev->info.gfx_level, va, 3 /* SET_CONTEXT_REG size */); + ac_emit_cp_cond_exec(cmd_buffer->cs->b, pdev->info.gfx_level, va, 3 /* SET_CONTEXT_REG size */); } radeon_begin(cmd_buffer->cs); @@ -5620,7 +5620,7 @@ radv_gfx12_emit_hiz_wa_full(struct radv_cmd_buffer *cmd_buffer) } else { const uint64_t va = radv_get_hiz_valid_va(iview->image, iview->vk.base_mip_level); - ac_emit_cond_exec(cmd_buffer->cs->b, pdev->info.gfx_level, va, num_dwords); + ac_emit_cp_cond_exec(cmd_buffer->cs->b, pdev->info.gfx_level, va, num_dwords); radv_gfx12_override_hiz_enable(cmd_buffer, true); } @@ -10109,7 +10109,7 @@ radv_cs_emit_compute_predication(const struct radv_device *device, struct radv_c radv_emit_copy_data_imm(pdev, cs, 1, inv_va); /* If the API predication VA == 0, skip next command. */ - ac_emit_cond_exec(cs->b, pdev->info.gfx_level, va, 6 /* 1x COPY_DATA size */); + ac_emit_cp_cond_exec(cs->b, pdev->info.gfx_level, va, 6 /* 1x COPY_DATA size */); /* Write 0 to the new predication VA (when the API condition != 0) */ radv_emit_copy_data_imm(pdev, cs, 0, inv_va); @@ -10118,7 +10118,7 @@ radv_cs_emit_compute_predication(const struct radv_device *device, struct radv_c va = inv_va; } - ac_emit_cond_exec(cs->b, pdev->info.gfx_level, va, dwords); + ac_emit_cp_cond_exec(cs->b, pdev->info.gfx_level, va, dwords); } ALWAYS_INLINE static void @@ -10803,8 +10803,8 @@ radv_emit_indirect_taskmesh_draw_packets(const struct radv_device *device, struc &cmd_state->mec_inv_pred_emitted, ace_predication_size); if (workaround_cond_va) { - ac_emit_cond_exec(ace_cs->b, pdev->info.gfx_level, info->count_va, - 6 + 11 * num_views /* 1x COPY_DATA + Nx DISPATCH_TASKMESH_INDIRECT_MULTI_ACE */); + ac_emit_cp_cond_exec(ace_cs->b, pdev->info.gfx_level, info->count_va, + 6 + 11 * num_views /* 1x COPY_DATA + Nx DISPATCH_TASKMESH_INDIRECT_MULTI_ACE */); ac_emit_cp_copy_data(ace_cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, 0, workaround_cond_va, AC_CP_COPY_DATA_WR_CONFIRM); @@ -10825,8 +10825,8 @@ radv_emit_indirect_taskmesh_draw_packets(const struct radv_device *device, struc } if (workaround_cond_va) { - ac_emit_cond_exec(ace_cs->b, pdev->info.gfx_level, workaround_cond_va, - 6 * num_views /* Nx DISPATCH_TASKMESH_DIRECT_ACE */); + ac_emit_cp_cond_exec(ace_cs->b, pdev->info.gfx_level, workaround_cond_va, + 6 * num_views /* Nx DISPATCH_TASKMESH_DIRECT_ACE */); for (unsigned v = 0; v < num_views; ++v) { radv_cs_emit_dispatch_taskmesh_direct_ace_packet(device, cmd_state, ace_cs, 0, 0, 0); diff --git a/src/amd/vulkan/radv_perfcounter.c b/src/amd/vulkan/radv_perfcounter.c index 47bb283f64d..b27aeff43d5 100644 --- a/src/amd/vulkan/radv_perfcounter.c +++ b/src/amd/vulkan/radv_perfcounter.c @@ -625,7 +625,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query uint64_t pred_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET + 8 * pass; uint64_t reg_va = va + (end ? 8 : 0); - ac_emit_cond_exec(cs->b, pdev->info.gfx_level, pred_va, 0); + ac_emit_cp_cond_exec(cs->b, pdev->info.gfx_level, pred_va, 0); uint32_t *skip_dwords = cs->b->buf + (cs->b->cdw - 1); @@ -652,7 +652,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query if (end) { uint64_t signal_va = va + pool->b.stride - 8 - 8 * pass; - ac_emit_write_data_imm(cs->b, V_370_ME, signal_va, 1); + ac_emit_cp_write_data_imm(cs->b, V_370_ME, signal_va, 1); } *skip_dwords = cs->b->buf + cs->b->cdw - skip_dwords - 1; @@ -680,7 +680,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo radv_cs_add_buffer(device->ws, cs->b, device->perf_counter_bo); uint64_t perf_ctr_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET; - ac_emit_write_data_imm(cs->b, V_370_ME, perf_ctr_va, 0); + ac_emit_cp_write_data_imm(cs->b, V_370_ME, perf_ctr_va, 0); radv_pc_wait_idle(cmd_buffer); radv_perfcounter_emit_reset(cs, false); @@ -691,7 +691,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo for (unsigned pass = 0; pass < pool->num_passes; ++pass) { uint64_t pred_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET + 8 * pass; - ac_emit_cond_exec(cs->b, pdev->info.gfx_level, pred_va, 0); + ac_emit_cp_cond_exec(cs->b, pdev->info.gfx_level, pred_va, 0); uint32_t *skip_dwords = cs->b->buf + (cs->b->cdw - 1); diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 865b1d10c2c..a5cd0968e44 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -677,7 +677,7 @@ radv_begin_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_q radeon_check_space(device->ws, ace_cs->b, 11); gfx10_copy_shader_query_ace(cmd_buffer, RADV_SHADER_QUERY_TS_INVOCATION_OFFSET, va + task_invoc_offset); - ac_emit_write_data_imm(ace_cs->b, V_370_ME, va + task_invoc_offset + 4, 0x80000000); + ac_emit_cp_write_data_imm(ace_cs->b, V_370_ME, va + task_invoc_offset + 4, 0x80000000); /* Record that the command buffer needs GDS. */ cmd_buffer->gds_needed = true; @@ -751,7 +751,7 @@ radv_end_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_que radeon_check_space(device->ws, ace_cs->b, 11); gfx10_copy_shader_query_ace(cmd_buffer, RADV_SHADER_QUERY_TS_INVOCATION_OFFSET, va + task_invoc_offset); - ac_emit_write_data_imm(ace_cs->b, V_370_ME, va + task_invoc_offset + 4, 0x80000000); + ac_emit_cp_write_data_imm(ace_cs->b, V_370_ME, va + task_invoc_offset + 4, 0x80000000); cmd_buffer->state.active_pipeline_ace_queries--; @@ -976,11 +976,11 @@ radv_begin_tfb_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t i if (pdev->use_ngg_streamout) { /* generated prim counter */ gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va); - ac_emit_write_data_imm(cs->b, V_370_ME, va + 4, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 4, 0x80000000); /* written prim counter */ gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(index), va + 8); - ac_emit_write_data_imm(cs->b, V_370_ME, va + 12, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 12, 0x80000000); if (!cmd_buffer->state.active_emulated_prims_xfb_queries) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY; @@ -1005,11 +1005,11 @@ radv_end_tfb_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t ind if (pdev->use_ngg_streamout) { /* generated prim counter */ gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16); - ac_emit_write_data_imm(cs->b, V_370_ME, va + 20, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 20, 0x80000000); /* written prim counter */ gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(index), va + 24); - ac_emit_write_data_imm(cs->b, V_370_ME, va + 28, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 28, 0x80000000); cmd_buffer->state.active_emulated_prims_xfb_queries--; @@ -1345,7 +1345,7 @@ radv_begin_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool * if (pdev->info.gfx_level >= GFX11) { /* On GFX11+, primitives generated query are always emulated. */ gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va); - ac_emit_write_data_imm(cs->b, V_370_ME, va + 4, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 4, 0x80000000); if (!cmd_buffer->state.active_emulated_prims_gen_queries) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY; @@ -1369,7 +1369,7 @@ radv_begin_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool * if (pool->uses_emulated_queries) { /* generated prim counter */ gfx10_copy_shader_query_gfx(cmd_buffer, true, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 32); - ac_emit_write_data_imm(cs->b, V_370_ME, va + 36, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 36, 0x80000000); /* Record that the command buffer needs GDS. */ cmd_buffer->gds_needed = true; @@ -1394,7 +1394,7 @@ radv_end_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *po if (pdev->info.gfx_level >= GFX11) { /* On GFX11+, primitives generated query are always emulated. */ gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16); - ac_emit_write_data_imm(cs->b, V_370_ME, va + 20, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 20, 0x80000000); cmd_buffer->state.active_emulated_prims_gen_queries--; @@ -1418,7 +1418,7 @@ radv_end_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *po if (pool->uses_emulated_queries) { /* generated prim counter */ gfx10_copy_shader_query_gfx(cmd_buffer, true, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 40); - ac_emit_write_data_imm(cs->b, V_370_ME, va + 44, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 44, 0x80000000); cmd_buffer->state.active_emulated_prims_gen_queries--; @@ -1596,7 +1596,7 @@ radv_begin_ms_prim_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va) radv_emit_event_write(&pdev->info, cs, RADV_EVENT_WRITE_PIPELINE_STAT, va); } else { gfx10_copy_shader_query_gfx(cmd_buffer, true, RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET, va); - ac_emit_write_data_imm(cs->b, V_370_ME, va + 4, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 4, 0x80000000); /* Record that the command buffer needs GDS. */ cmd_buffer->gds_needed = true; @@ -1633,7 +1633,7 @@ radv_end_ms_prim_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t cmd_buffer->gfx9_eop_bug_va); } else { gfx10_copy_shader_query_gfx(cmd_buffer, true, RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET, va + 8); - ac_emit_write_data_imm(cs->b, V_370_ME, va + 12, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 12, 0x80000000); cmd_buffer->state.active_emulated_prims_gen_queries--; diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index b148412c81f..4f70c6e1383 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -844,7 +844,7 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_h /* Clear the emulated counter end value. We don't clear start because it's unused. */ va += si_query_pipestat_end_dw_offset(sctx->screen, query->index) * 4; - ac_emit_write_data_imm(&cs->current, V_370_PFP, va, 0); + ac_emit_cp_write_data_imm(&cs->current, V_370_PFP, va, 0); sctx->num_pipeline_stat_emulated_queries++; } else {