diff --git a/src/amd/common/ac_cmdbuf.c b/src/amd/common/ac_cmdbuf.c index a327fd26909..3a22176256f 100644 --- a/src/amd/common/ac_cmdbuf.c +++ b/src/amd/common/ac_cmdbuf.c @@ -1243,3 +1243,42 @@ ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, ac_cmdbuf_end(); } + +void +ac_cmdbuf_flush_vgt_streamout(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level) +{ + uint32_t reg_strmout_cntl; + + ac_cmdbuf_begin(cs); + + /* The register is at different places on different ASICs. */ + if (gfx_level >= GFX9) { + reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; + + ac_cmdbuf_emit(PKT3(PKT3_WRITE_DATA, 3, 0)); + ac_cmdbuf_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | S_370_ENGINE_SEL(V_370_ME)); + ac_cmdbuf_emit(R_0300FC_CP_STRMOUT_CNTL >> 2); + ac_cmdbuf_emit(0); + ac_cmdbuf_emit(0); + } else if (gfx_level >= GFX7) { + reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; + + ac_cmdbuf_set_uconfig_reg(reg_strmout_cntl, 0); + } else { + reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL; + + ac_cmdbuf_set_config_reg(reg_strmout_cntl, 0); + } + + ac_cmdbuf_event_write(V_028A90_SO_VGTSTREAMOUT_FLUSH); + + ac_cmdbuf_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0)); + ac_cmdbuf_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ + ac_cmdbuf_emit(reg_strmout_cntl >> 2); /* register */ + ac_cmdbuf_emit(0); + ac_cmdbuf_emit(S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */ + ac_cmdbuf_emit(S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */ + ac_cmdbuf_emit(4); /* poll interval */ + + ac_cmdbuf_end(); +} diff --git a/src/amd/common/ac_cmdbuf.h b/src/amd/common/ac_cmdbuf.h index 74d34db86c8..361a29e2bd2 100644 --- a/src/amd/common/ac_cmdbuf.h +++ b/src/amd/common/ac_cmdbuf.h @@ -86,6 +86,19 @@ struct ac_cmdbuf { #define ac_cmdbuf_set_context_reg(reg, value) __ac_cmdbuf_set_reg(reg, 0, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG) +#define ac_cmdbuf_event_write_predicate(event_type, predicate) \ + do { \ + unsigned __event_type = (event_type); \ + ac_cmdbuf_emit(PKT3(PKT3_EVENT_WRITE, 0, predicate)); \ + ac_cmdbuf_emit(EVENT_TYPE(__event_type) | \ + EVENT_INDEX(__event_type == V_028A90_VS_PARTIAL_FLUSH || \ + __event_type == V_028A90_PS_PARTIAL_FLUSH || \ + __event_type == V_028A90_CS_PARTIAL_FLUSH ? 4 : \ + __event_type == V_028A90_PIXEL_PIPE_STAT_CONTROL ? 1 : 0)); \ + } while (0) + +#define ac_cmdbuf_event_write(event_type) ac_cmdbuf_event_write_predicate(event_type, false) + struct ac_preamble_state { uint64_t border_color_va; @@ -165,6 +178,9 @@ ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, enum amd_ip_type ip_type, uint32_t engine, uint32_t gcr_cntl); +void +ac_cmdbuf_flush_vgt_streamout(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level); + #ifdef __cplusplus } #endif diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 3074a514c5c..356bfc0cb7d 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -14806,39 +14806,11 @@ radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer) struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_cmd_stream *cs = cmd_buffer->cs; - unsigned reg_strmout_cntl; ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 14); - radeon_begin(cs); + ac_cmdbuf_flush_vgt_streamout(cs->b, pdev->info.gfx_level); - /* The register is at different places on different ASICs. */ - if (pdev->info.gfx_level >= GFX9) { - reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; - radeon_emit(PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | S_370_ENGINE_SEL(V_370_ME)); - radeon_emit(R_0300FC_CP_STRMOUT_CNTL >> 2); - radeon_emit(0); - radeon_emit(0); - } else if (pdev->info.gfx_level >= GFX7) { - reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; - radeon_set_uconfig_reg(reg_strmout_cntl, 0); - } else { - reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL; - radeon_set_config_reg(reg_strmout_cntl, 0); - } - - radeon_event_write(V_028A90_SO_VGTSTREAMOUT_FLUSH); - - radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ - radeon_emit(reg_strmout_cntl >> 2); /* register */ - radeon_emit(0); - radeon_emit(S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */ - radeon_emit(S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */ - radeon_emit(4); /* poll interval */ - - radeon_end(); assert(cs->b->cdw <= cdw_max); } diff --git a/src/amd/vulkan/radv_cs.h b/src/amd/vulkan/radv_cs.h index f9b13e0481a..0962a385d97 100644 --- a/src/amd/vulkan/radv_cs.h +++ b/src/amd/vulkan/radv_cs.h @@ -206,18 +206,9 @@ radeon_check_space(struct radeon_winsys *ws, struct ac_cmdbuf *cs, unsigned need radeon_emit(0); /* unused */ \ } while (0) -#define radeon_event_write_predicate(event_type, predicate) \ - do { \ - unsigned __event_type = (event_type); \ - radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, predicate)); \ - radeon_emit(EVENT_TYPE(__event_type) | EVENT_INDEX(__event_type == V_028A90_VS_PARTIAL_FLUSH || \ - __event_type == V_028A90_PS_PARTIAL_FLUSH || \ - __event_type == V_028A90_CS_PARTIAL_FLUSH \ - ? 4 \ - : 0)); \ - } while (0) +#define radeon_event_write_predicate(event_type, predicate) ac_cmdbuf_event_write_predicate(event_type, predicate) -#define radeon_event_write(event_type) radeon_event_write_predicate(event_type, false) +#define radeon_event_write(event_type) ac_cmdbuf_event_write(event_type) #define radeon_emit_32bit_pointer(sh_offset, va, info) \ do { \ diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h b/src/gallium/drivers/radeonsi/si_build_pm4.h index 38102a2b71a..806547fad5a 100644 --- a/src/gallium/drivers/radeonsi/si_build_pm4.h +++ b/src/gallium/drivers/radeonsi/si_build_pm4.h @@ -506,15 +506,8 @@ } while (0) /* Other packet helpers. */ -#define radeon_event_write(event_type) do { \ - unsigned __event_type = (event_type); \ - radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); \ - radeon_emit(EVENT_TYPE(__event_type) | \ - EVENT_INDEX(__event_type == V_028A90_VS_PARTIAL_FLUSH || \ - __event_type == V_028A90_PS_PARTIAL_FLUSH || \ - __event_type == V_028A90_CS_PARTIAL_FLUSH ? 4 : \ - __event_type == V_028A90_PIXEL_PIPE_STAT_CONTROL ? 1 : 0)); \ -} while (0) +#define radeon_event_write(event_type) \ + ac_cmdbuf_event_write(event_type) #define radeon_emit_alt_hiz_logic() do { \ static_assert(GFX_VERSION == GFX12 || !ALT_HIZ_LOGIC, ""); \ diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c index 23ae4a32188..3674c67be13 100644 --- a/src/gallium/drivers/radeonsi/si_state_streamout.c +++ b/src/gallium/drivers/radeonsi/si_state_streamout.c @@ -259,36 +259,8 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ static void si_flush_vgt_streamout(struct si_context *sctx) { struct radeon_cmdbuf *cs = &sctx->gfx_cs; - unsigned reg_strmout_cntl; - radeon_begin(cs); - - /* The register is at different places on different ASICs. */ - if (sctx->gfx_level >= GFX9) { - reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; - radeon_emit(PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | S_370_ENGINE_SEL(V_370_ME)); - radeon_emit(R_0300FC_CP_STRMOUT_CNTL >> 2); - radeon_emit(0); - radeon_emit(0); - } else if (sctx->gfx_level >= GFX7) { - reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; - radeon_set_uconfig_reg(reg_strmout_cntl, 0); - } else { - reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL; - radeon_set_config_reg(reg_strmout_cntl, 0); - } - - radeon_event_write(V_028A90_SO_VGTSTREAMOUT_FLUSH); - - radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ - radeon_emit(reg_strmout_cntl >> 2); /* register */ - radeon_emit(0); - radeon_emit(S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */ - radeon_emit(S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */ - radeon_emit(4); /* poll interval */ - radeon_end(); + ac_cmdbuf_flush_vgt_streamout(&cs->current, sctx->gfx_level); } static void si_emit_streamout_begin(struct si_context *sctx, unsigned index)