ac,radv,radeonsi: fix programming PA_SU_PRIM_FILTER_CNTL on GFX12

GFX12 seems to behave slightly differently. Setting these bits to TRUE
causes zero-area triangles to not pass the primitive clipping stage.
So, the actual number of primitives output by the primitive clipping
stage was wrong.

After digging a lot, it seems PAL doesn't set these bits either on
GFX12.

CC: mesa-stable
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36670>
(cherry picked from commit b2ea120732)
This commit is contained in:
Samuel Pitoiset 2025-08-08 11:12:02 +02:00 committed by Eric Engestrom
parent 768a7c3035
commit 16ef96fa07
5 changed files with 10 additions and 4 deletions

View file

@ -674,7 +674,7 @@
"description": "ac,radv,radeonsi: fix programming PA_SU_PRIM_FILTER_CNTL on GFX12",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View file

@ -743,6 +743,12 @@ gfx12_init_graphics_preamble_state(const struct ac_preamble_state *state,
ac_pm4_set_reg(pm4, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 0);
ac_pm4_set_reg(pm4, R_030A04_PA_SC_LINE_STIPPLE_STATE, 0);
/* On GFX12, this seems to behave slightly differently. Programming the
* EXCLUSION fields to TRUE causes zero-area triangles to not pass the
* primitive clipping stage.
*/
ac_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
ac_pm4_set_reg(pm4, R_031128_SPI_GRP_LAUNCH_GUARANTEE_ENABLE,
S_031128_ENABLE(1) |
S_031128_GS_ASSIST_EN(1) |

View file

@ -1264,7 +1264,7 @@ radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer)
radeon_emit(centroid_priority);
radeon_emit(centroid_priority >> 32);
if (pdev->info.gfx_level >= GFX7) {
if (pdev->info.gfx_level >= GFX7 && pdev->info.gfx_level < GFX12) {
/* The exclusion bits can be set to improve rasterization efficiency if no sample lies on the pixel boundary
* (-8 sample offset).
*/

View file

@ -1014,7 +1014,7 @@ radv_emit_default_sample_locations(const struct radv_physical_device *pdev, stru
* pixel boundary (-8 sample offset). It's currently always TRUE because the driver doesn't
* support 16 samples.
*/
if (pdev->info.gfx_level >= GFX7) {
if (pdev->info.gfx_level >= GFX7 && pdev->info.gfx_level < GFX12) {
radeon_set_context_reg(R_02882C_PA_SU_PRIM_FILTER_CNTL,
S_02882C_XMAX_RIGHT_EXCLUSION(1) | S_02882C_YMAX_BOTTOM_EXCLUSION(1));
}

View file

@ -4985,7 +4985,7 @@ static void si_init_graphics_preamble_state(struct si_context *sctx,
ac_init_graphics_preamble_state(&preamble_state, &pm4->base);
if (sctx->gfx_level >= GFX7) {
if (sctx->gfx_level >= GFX7 && sctx->gfx_level < GFX12) {
/* If any sample location uses the -8 coordinate, the EXCLUSION fields should be set to 0. */
ac_pm4_set_reg(&pm4->base, R_02882C_PA_SU_PRIM_FILTER_CNTL,
S_02882C_XMAX_RIGHT_EXCLUSION(1) |