From 30f33ada9c697fa37a5cc63158e20f82856d3b4e Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Fri, 8 Aug 2025 11:12:02 +0200 Subject: [PATCH] ac,radv,radeonsi: fix programming PA_SU_PRIM_FILTER_CNTL on GFX12 GFX12 seems to behave slightly differently. Setting these bits to TRUE causes zero-area triangles to not pass the primitive clipping stage. So, the actual number of primitives output by the primitive clipping stage was wrong. After digging a lot, it seems PAL doesn't set these bits either on GFX12. CC: mesa-stable Signed-off-by: Samuel Pitoiset Part-of: (cherry picked from commit b2ea120732258cbd9de05623338f29e3145d0d34) --- .pick_status.json | 2 +- src/amd/common/ac_cmdbuf.c | 6 ++++++ src/amd/vulkan/radv_cmd_buffer.c | 2 +- src/amd/vulkan/radv_device.c | 2 +- src/gallium/drivers/radeonsi/si_state.c | 2 +- 5 files changed, 10 insertions(+), 4 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 98aeea762c7..f3a1c696d7c 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -3914,7 +3914,7 @@ "description": "ac,radv,radeonsi: fix programming PA_SU_PRIM_FILTER_CNTL on GFX12", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/common/ac_cmdbuf.c b/src/amd/common/ac_cmdbuf.c index 36c1a5801f9..68b1d333083 100644 --- a/src/amd/common/ac_cmdbuf.c +++ b/src/amd/common/ac_cmdbuf.c @@ -808,6 +808,12 @@ gfx12_init_graphics_preamble_state(const struct ac_preamble_state *state, ac_pm4_set_reg(pm4, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 0); ac_pm4_set_reg(pm4, R_030A04_PA_SC_LINE_STIPPLE_STATE, 0); + /* On GFX12, this seems to behave slightly differently. Programming the + * EXCLUSION fields to TRUE causes zero-area triangles to not pass the + * primitive clipping stage. + */ + ac_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); + ac_pm4_set_reg(pm4, R_031128_SPI_GRP_LAUNCH_GUARANTEE_ENABLE, S_031128_ENABLE(1) | S_031128_GS_ASSIST_EN(1) | diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index b55dbf2ec37..eb0a47680c7 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1269,7 +1269,7 @@ radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer) radeon_emit(centroid_priority); radeon_emit(centroid_priority >> 32); - if (pdev->info.gfx_level >= GFX7) { + if (pdev->info.gfx_level >= GFX7 && pdev->info.gfx_level < GFX12) { /* The exclusion bits can be set to improve rasterization efficiency if no sample lies on the pixel boundary * (-8 sample offset). */ diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 0aff835a785..43da04c4189 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1024,7 +1024,7 @@ radv_emit_default_sample_locations(const struct radv_physical_device *pdev, stru * pixel boundary (-8 sample offset). It's currently always TRUE because the driver doesn't * support 16 samples. */ - if (pdev->info.gfx_level >= GFX7) { + if (pdev->info.gfx_level >= GFX7 && pdev->info.gfx_level < GFX12) { radeon_set_context_reg(R_02882C_PA_SU_PRIM_FILTER_CNTL, S_02882C_XMAX_RIGHT_EXCLUSION(1) | S_02882C_YMAX_BOTTOM_EXCLUSION(1)); } diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 78109b6c8a4..a0b7f5391dc 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -4999,7 +4999,7 @@ static void si_init_graphics_preamble_state(struct si_context *sctx, ac_init_graphics_preamble_state(&preamble_state, &pm4->base); - if (sctx->gfx_level >= GFX7) { + if (sctx->gfx_level >= GFX7 && sctx->gfx_level < GFX12) { /* If any sample location uses the -8 coordinate, the EXCLUSION fields should be set to 0. */ ac_pm4_set_reg(&pm4->base, R_02882C_PA_SU_PRIM_FILTER_CNTL, S_02882C_XMAX_RIGHT_EXCLUSION(1) |