From 8236b0fe2b6bbcc0434051c3742a430f9653ce39 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 12 Aug 2025 08:45:16 +0300 Subject: [PATCH] brw: compute consistent clip/cull distance masks with VUE We can optimize the VUE layout in cases where all shaders are compiled together and some outputs are unused. So we need to have consistent clip/cull_distance_mask with the VUE. Previously we could have a VUE without ClipDistance present in the header and yet have a non zero clip_distance_mask. This would trip the HW into taking into account a VUE field that doesn't exist. Here we set the clip/cull_distance_mask to 0 if the associated output is not written by the shader. The written outputs are always consistent with what's in the VUE. Signed-off-by: Lionel Landwerlin Fixes: 2d396f6085 ("intel: prepare VUE layout for more than 2 layouts") Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13685 Reviewed-by: Ivan Briano Part-of: (cherry picked from commit 46c16f854e7450182058e3b47e0171a08cfd5469) --- .pick_status.json | 2 +- src/intel/compiler/brw_compile_gs.cpp | 12 +++++++++--- src/intel/compiler/brw_compile_mesh.cpp | 13 ++++++++++--- src/intel/compiler/brw_compile_tes.cpp | 12 +++++++++--- src/intel/compiler/brw_compile_vs.cpp | 12 +++++++++--- 5 files changed, 38 insertions(+), 13 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 69a572ba563..1c4279cd1ec 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -3424,7 +3424,7 @@ "description": "brw: compute consistent clip/cull distance masks with VUE", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "2d396f608500d88fa00eeb99d14ed5727f1cece9", "notes": null diff --git a/src/intel/compiler/brw_compile_gs.cpp b/src/intel/compiler/brw_compile_gs.cpp index a49fe60c140..098ba4a8005 100644 --- a/src/intel/compiler/brw_compile_gs.cpp +++ b/src/intel/compiler/brw_compile_gs.cpp @@ -169,10 +169,16 @@ brw_compile_gs(const struct brw_compiler *compiler, brw_postprocess_nir(nir, compiler, debug_enabled, key->base.robust_flags); - prog_data->base.clip_distance_mask = - ((1 << nir->info.clip_distance_array_size) - 1); + const bool has_clip_cull_dist = + nir->info.outputs_written & (VARYING_BIT_CLIP_DIST0 | + VARYING_BIT_CLIP_DIST1 | + VARYING_BIT_CULL_DIST0 | + VARYING_BIT_CULL_DIST1); + prog_data->base.clip_distance_mask = has_clip_cull_dist ? + ((1 << nir->info.clip_distance_array_size) - 1) : 0; prog_data->base.cull_distance_mask = - ((1 << nir->info.cull_distance_array_size) - 1) << + (has_clip_cull_dist ? + ((1 << nir->info.cull_distance_array_size) - 1) : 0) << nir->info.clip_distance_array_size; prog_data->include_primitive_id = diff --git a/src/intel/compiler/brw_compile_mesh.cpp b/src/intel/compiler/brw_compile_mesh.cpp index 1a3d654b5aa..73d46cfbc5c 100644 --- a/src/intel/compiler/brw_compile_mesh.cpp +++ b/src/intel/compiler/brw_compile_mesh.cpp @@ -1171,10 +1171,17 @@ brw_compile_mesh(const struct brw_compiler *compiler, prog_data->base.local_size[1] = nir->info.workgroup_size[1]; prog_data->base.local_size[2] = nir->info.workgroup_size[2]; - prog_data->clip_distance_mask = (1 << nir->info.clip_distance_array_size) - 1; + const bool has_clip_cull_dist = + nir->info.outputs_written & (VARYING_BIT_CLIP_DIST0 | + VARYING_BIT_CLIP_DIST1 | + VARYING_BIT_CULL_DIST0 | + VARYING_BIT_CULL_DIST1); + prog_data->clip_distance_mask = has_clip_cull_dist ? + (1 << nir->info.clip_distance_array_size) - 1 : 0; prog_data->cull_distance_mask = - ((1 << nir->info.cull_distance_array_size) - 1) << - nir->info.clip_distance_array_size; + (has_clip_cull_dist ? + ((1 << nir->info.cull_distance_array_size) - 1) : 0) << + nir->info.clip_distance_array_size; prog_data->primitive_type = nir->info.mesh.primitive_type; /* Apply this workaround before trying to pack indices because this can diff --git a/src/intel/compiler/brw_compile_tes.cpp b/src/intel/compiler/brw_compile_tes.cpp index 88f29a84a8a..871c72e86da 100644 --- a/src/intel/compiler/brw_compile_tes.cpp +++ b/src/intel/compiler/brw_compile_tes.cpp @@ -95,10 +95,16 @@ brw_compile_tes(const struct brw_compiler *compiler, return NULL; } - prog_data->base.clip_distance_mask = - ((1 << nir->info.clip_distance_array_size) - 1); + const bool has_clip_cull_dist = + nir->info.outputs_written & (VARYING_BIT_CLIP_DIST0 | + VARYING_BIT_CLIP_DIST1 | + VARYING_BIT_CULL_DIST0 | + VARYING_BIT_CULL_DIST1); + prog_data->base.clip_distance_mask = has_clip_cull_dist ? + ((1 << nir->info.clip_distance_array_size) - 1) : 0; prog_data->base.cull_distance_mask = - ((1 << nir->info.cull_distance_array_size) - 1) << + (has_clip_cull_dist ? + ((1 << nir->info.cull_distance_array_size) - 1) : 0) << nir->info.clip_distance_array_size; prog_data->include_primitive_id = diff --git a/src/intel/compiler/brw_compile_vs.cpp b/src/intel/compiler/brw_compile_vs.cpp index 43cf6577938..7e177a9f024 100644 --- a/src/intel/compiler/brw_compile_vs.cpp +++ b/src/intel/compiler/brw_compile_vs.cpp @@ -266,10 +266,16 @@ brw_compile_vs(const struct brw_compiler *compiler, brw_postprocess_nir(nir, compiler, debug_enabled, key->base.robust_flags); - prog_data->base.clip_distance_mask = - ((1 << nir->info.clip_distance_array_size) - 1); + const bool has_clip_cull_dist = + nir->info.outputs_written & (VARYING_BIT_CLIP_DIST0 | + VARYING_BIT_CLIP_DIST1 | + VARYING_BIT_CULL_DIST0 | + VARYING_BIT_CULL_DIST1); + prog_data->base.clip_distance_mask = has_clip_cull_dist ? + ((1 << nir->info.clip_distance_array_size) - 1) : 0; prog_data->base.cull_distance_mask = - ((1 << nir->info.cull_distance_array_size) - 1) << + (has_clip_cull_dist ? + ((1 << nir->info.cull_distance_array_size) - 1) : 0) << nir->info.clip_distance_array_size; unsigned nr_attribute_slots = util_bitcount64(prog_data->inputs_read);