radv: implement streamout on GFX12

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29676>
This commit is contained in:
Samuel Pitoiset 2024-06-04 11:35:34 +02:00 committed by Marge Bot
parent aa9dfcad50
commit bd59478d2f

View file

@ -6351,6 +6351,22 @@ radv_emit_streamout_buffers(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
}
}
static void
radv_emit_streamout_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
{
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
const struct radv_userdata_info *loc = radv_get_user_sgpr(last_vgt_shader, AC_UD_STREAMOUT_STATE);
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
uint32_t base_reg;
if (loc->sgpr_idx == -1)
return;
base_reg = last_vgt_shader->info.user_data_0;
radv_emit_shader_pointer(device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va, false);
}
static void
radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer)
{
@ -6402,6 +6418,39 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer)
desc_va += so_offset;
radv_emit_streamout_buffers(cmd_buffer, desc_va);
if (pdev->info.gfx_level >= GFX12) {
const uint8_t first_target = ffs(so->enabled_mask) - 1;
unsigned state_offset;
uint64_t state_va;
void *state_ptr;
/* The layout is:
* struct {
* struct {
* uint32_t ordered_id; // equal for all buffers
* uint32_t dwords_written;
* } buffer[4];
* };
*
* The buffer must be initialized to 0 and the address must be aligned to 64
* because it's faster when the atomic doesn't straddle a 64B block boundary.
*/
if (!radv_cmd_buffer_upload_alloc_aligned(cmd_buffer, MAX_SO_BUFFERS * 8, 64, &state_offset, &state_ptr))
return;
memset(state_ptr, 0, MAX_SO_BUFFERS * 8);
state_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
state_va += state_offset;
/* The first enabled streamout target will contain the ordered ID/offset buffer for all
* targets.
*/
state_va += first_target * 8;
radv_emit_streamout_state(cmd_buffer, state_va);
}
}
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_STREAMOUT_BUFFER;
@ -7900,7 +7949,7 @@ radv_bind_pre_rast_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_
*/
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
if (pdev->use_ngg_streamout) {
if (pdev->use_ngg_streamout && pdev->info.gfx_level < GFX12) {
/* GFX11 needs GDS OA for streamout. */
cmd_buffer->gds_oa_needed = true;
}
@ -13217,6 +13266,7 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC
struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
struct radv_streamout_state *so = &cmd_buffer->state.streamout;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
bool first_target = true;
assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
if (!pdev->use_ngg_streamout)
@ -13245,7 +13295,24 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC
radv_cs_add_buffer(device->ws, cs, buffer->bo);
}
if (pdev->use_ngg_streamout) {
if (pdev->info.gfx_level >= GFX12) {
/* Only the first streamout target holds information. */
if (first_target) {
if (append) {
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(
cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_WR_CONFIRM);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, (R_0309B0_GE_GS_ORDERED_ID_BASE >> 2));
radeon_emit(cs, 0);
} else {
radeon_set_uconfig_reg(cs, R_0309B0_GE_GS_ORDERED_ID_BASE, 0);
}
first_target = false;
}
} else if (pdev->use_ngg_streamout) {
if (append) {
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs,
@ -13309,6 +13376,14 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou
assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
if (pdev->info.gfx_level >= GFX12) {
/* Nothing to do. The streamout state buffer already contains the next ordered ID, which
* is the only thing we need to restore.
*/
radv_set_streamout_enable(cmd_buffer, false);
return;
}
if (pdev->use_ngg_streamout) {
/* Wait for streamout to finish before reading GDS_STRMOUT registers. */
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH;