From 1cf778e011c9468303b79c1c57363937192c0720 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 5 Feb 2025 18:33:57 +0100 Subject: [PATCH] radv: fix fetching draw vertex data from counter buffers with transform feedback counterOffset was just ignored and nobody noticed (missing VKCTS coverage). VGT_STRMOUT_DRAW_OPAQUE_OFFSET will do the computation in hw for us. Cc: mesa-stable Signed-off-by: Samuel Pitoiset Part-of: (cherry picked from commit 8625decbcca61da5bbea26cd2b3f9c0a221d6bf7) --- .pick_status.json | 2 +- src/amd/common/ac_cmdbuf.c | 2 -- src/amd/vulkan/radv_cmd_buffer.c | 7 +++++-- src/gallium/drivers/radeonsi/si_state.c | 3 +++ 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 5db09ed2995..7dce363cba4 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -884,7 +884,7 @@ "description": "radv: fix fetching draw vertex data from counter buffers with transform feedback", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/common/ac_cmdbuf.c b/src/amd/common/ac_cmdbuf.c index 4182dbaa6c6..80c30d9a5bc 100644 --- a/src/amd/common/ac_cmdbuf.c +++ b/src/amd/common/ac_cmdbuf.c @@ -249,7 +249,6 @@ gfx6_init_graphics_preamble_state(const struct ac_preamble_state *state, /* CLEAR_STATE doesn't clear these correctly on certain generations. * I don't know why. Deduced by trial and error. */ - ac_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); ac_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); ac_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); } @@ -678,7 +677,6 @@ gfx12_init_graphics_preamble_state(const struct ac_preamble_state *state, ac_pm4_set_reg(pm4, R_028AA0_VGT_DRAW_PAYLOAD_CNTL, 0); ac_pm4_set_reg(pm4, R_028ABC_DB_HTILE_SURFACE, 0); - ac_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); ac_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, S_028B50_ACCUM_ISOLINE(128) | S_028B50_ACCUM_TRI(128) | diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index ee217b3c8fb..acc447c3d02 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -13618,7 +13618,8 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou } static void -radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info) +radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info, + uint32_t counter_offset) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); @@ -13651,6 +13652,8 @@ radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_d radeon_emit(cs, 0); /* unused */ } + radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, counter_offset); + radv_cs_add_buffer(device->ws, cs, draw_info->strmout_buffer->bo); } @@ -13677,7 +13680,7 @@ radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanc if (!radv_before_draw(cmd_buffer, &info, 1, false)) return; struct VkMultiDrawInfoEXT minfo = {0, 0}; - radv_emit_strmout_buffer(cmd_buffer, &info); + radv_emit_strmout_buffer(cmd_buffer, &info, counterOffset); radv_emit_direct_draw_packets(cmd_buffer, &info, 1, &minfo, S_0287F0_USE_OPAQUE(1), 0); if (pdev->info.gfx_level == GFX12) { diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index b76f2272cde..15f4222861a 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -5006,6 +5006,7 @@ static void gfx6_init_gfx_preamble_state(struct si_context *sctx) } if (sctx->gfx_level <= GFX7 || !has_clear_state) { + ac_pm4_set_reg(&pm4->base, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); ac_pm4_set_reg(&pm4->base, R_028034_PA_SC_SCREEN_SCISSOR_BR, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); } @@ -5153,6 +5154,8 @@ static void gfx12_init_gfx_preamble_state(struct si_context *sctx) S_028648_IDX0_EXPORT_FORMAT(V_028648_SPI_SHADER_1COMP)); ac_pm4_set_reg(&pm4->base, R_028658_SPI_BARYC_CNTL, 0); + ac_pm4_set_reg(&pm4->base, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); + /* The rate combiners have no effect if they are disabled like this: * VERTEX_RATE: BYPASS_VTX_RATE_COMBINER = 1 * PRIMITIVE_RATE: BYPASS_PRIM_RATE_COMBINER = 1