anv/brw: handle pipeline libraries with mesh

I always thought there was a massive issue with pipeline libraries &
mesh shaders. Indeed recent CTS tests have exposed a number of issues.

Some values delivered to the fragment shader are coming from different
places depending on whether the preceding shader is Mesh or not. For
example PrimitiveID is delivered in the per-primitive block in Mesh
pipelines whereas for other pipelines it's coming as a VUE slot (which
is per-vertex). Those are 2 different locations in the payload.

We have to find a layout for fragment shaders that is compatible with
everything. Leaving gaps here and there in the thread payload.

Fixes the following test pattern :

  dEQP-VK.mesh_shader.ext.smoke.fast_lib.shared_*

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Acked-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34109>
This commit is contained in:
Lionel Landwerlin 2025-03-18 20:56:02 +02:00 committed by Marge Bot
parent 18bbcf9a63
commit 5c7c1eceb5
8 changed files with 89 additions and 8 deletions

View file

@ -950,6 +950,8 @@ brw_nir_populate_wm_prog_data(nir_shader *shader,
*/
prog_data->alpha_to_coverage = key->alpha_to_coverage;
prog_data->mesh_input = key->mesh_input;
prog_data->uses_sample_mask =
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);

View file

@ -380,7 +380,9 @@ struct brw_wm_prog_key {
static inline bool
brw_wm_prog_key_is_dynamic(const struct brw_wm_prog_key *key)
{
return key->alpha_to_coverage == INTEL_SOMETIMES ||
return
key->mesh_input == INTEL_SOMETIMES ||
key->alpha_to_coverage == INTEL_SOMETIMES ||
key->persample_interp == INTEL_SOMETIMES ||
key->multisample_fbo == INTEL_SOMETIMES ||
key->base.vue_layout == INTEL_VUE_LAYOUT_SEPARATE_MESH;
@ -749,6 +751,11 @@ struct brw_wm_prog_data {
*/
enum intel_sometimes alpha_to_coverage;
/**
* Whether the shader is dispatch with a preceeding mesh shader.
*/
enum intel_sometimes mesh_input;
/**
* Push constant location of intel_msaa_flags (dynamic configuration of the
* pixel shader).
@ -806,7 +813,8 @@ struct brw_wm_prog_data {
static inline bool
brw_wm_prog_data_is_dynamic(const struct brw_wm_prog_data *prog_data)
{
return prog_data->alpha_to_coverage == INTEL_SOMETIMES ||
return prog_data->mesh_input == INTEL_SOMETIMES ||
prog_data->alpha_to_coverage == INTEL_SOMETIMES ||
prog_data->coarse_pixel_dispatch == INTEL_SOMETIMES ||
prog_data->persample_dispatch == INTEL_SOMETIMES;
}

View file

@ -4189,7 +4189,8 @@ brw_per_primitive_reg(const brw_builder &bld, int location, unsigned comp)
{
brw_shader &s = *bld.shader;
assert(s.stage == MESA_SHADER_FRAGMENT);
assert(BITFIELD64_BIT(location) & s.nir->info.per_primitive_inputs);
assert((BITFIELD64_BIT(location) & s.nir->info.per_primitive_inputs) ||
location == VARYING_SLOT_PRIMITIVE_ID);
const struct brw_wm_prog_data *prog_data = brw_wm_prog_data(s.prog_data);

View file

@ -110,6 +110,7 @@ void anv_nir_compute_push_layout(nir_shader *nir,
const struct anv_physical_device *pdevice,
enum brw_robustness_flags robust_flags,
bool fragment_dynamic,
bool mesh_dynamic,
struct brw_stage_prog_data *prog_data,
struct anv_pipeline_bind_map *map,
const struct anv_pipeline_push_map *push_map,

View file

@ -31,6 +31,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
const struct anv_physical_device *pdevice,
enum brw_robustness_flags robust_flags,
bool fragment_dynamic,
bool mesh_dynamic,
struct brw_stage_prog_data *prog_data,
struct anv_pipeline_bind_map *map,
const struct anv_pipeline_push_map *push_map,
@ -199,6 +200,34 @@ anv_nir_compute_push_layout(nir_shader *nir,
}
}
/* When platforms support Mesh and the fragment shader is not fully linked
* to the previous shader, payload format can change if the preceding
* shader is mesh or not, this is an issue in particular for PrimitiveID
* value (in legacy it's delivered as a VUE slot, in mesh it's delivered as
* in the per-primitive block).
*
* Here is the difference in payload format :
*
* Legacy Mesh
* ------------------- -------------------
* | ... | | ... |
* |-----------------| |-----------------|
* | Constant data | | Constant data |
* |-----------------| |-----------------|
* | VUE attributes | | Per Primive data|
* ------------------- |-----------------|
* | VUE attributes |
* -------------------
*
* To solve that issue we push an additional dummy push constant buffer in
* legacy pipelines to align everything. The compiler then adds a SEL
* instruction to source the PrimitiveID from the right location based on a
* dynamic bit in fs_msaa_intel.
*/
const bool needs_padding_per_primitive =
mesh_dynamic &&
(nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID);
unsigned n_push_ranges = 0;
if (push_ubo_ranges) {
brw_nir_analyze_ubo_ranges(compiler, nir, prog_data->ubo_ranges);
@ -224,6 +253,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
(push_reg_mask_offset - push_start) / 4;
}
const unsigned max_push_buffers = needs_padding_per_primitive ? 3 : 4;
unsigned range_start_reg = push_constant_range.length;
for (int i = 0; i < 4; i++) {
@ -231,7 +261,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
if (ubo_range->length == 0)
continue;
if (n_push_ranges >= 4) {
if (n_push_ranges >= max_push_buffers) {
memset(ubo_range, 0, sizeof(*ubo_range));
continue;
}
@ -288,6 +318,17 @@ anv_nir_compute_push_layout(nir_shader *nir,
prog_data->nr_params = 32 / 4;
}
if (needs_padding_per_primitive) {
struct anv_push_range push_constant_range = {
.set = ANV_DESCRIPTOR_SET_PER_PRIM_PADDING,
.start = 0,
.length = 1,
};
map->push_ranges[n_push_ranges++] = push_constant_range;
}
assert(n_push_ranges <= 4);
if (nir->info.stage == MESA_SHADER_FRAGMENT && fragment_dynamic) {
struct brw_wm_prog_data *wm_prog_data =
container_of(prog_data, struct brw_wm_prog_data, base);
@ -330,8 +371,12 @@ anv_nir_validate_push_layout(const struct anv_physical_device *pdevice,
prog_data_push_size += prog_data->ubo_ranges[i].length;
unsigned bind_map_push_size = 0;
for (unsigned i = 0; i < 4; i++)
for (unsigned i = 0; i < 4; i++) {
/* This is dynamic and doesn't count against prog_data->ubo_ranges[] */
if (map->push_ranges[i].set == ANV_DESCRIPTOR_SET_PER_PRIM_PADDING)
continue;
bind_map_push_size += map->push_ranges[i].length;
}
/* We could go through everything again but it should be enough to assert
* that they push the same number of registers. This should alert us if

View file

@ -604,6 +604,13 @@ anv_graphics_pipeline_stage_fragment_dynamic(const struct anv_pipeline_stage *st
brw_wm_prog_key_is_dynamic(&stage->key.wm);
}
static bool
anv_graphics_pipeline_stage_mesh_dynamic(const struct anv_pipeline_stage *stage)
{
return stage->stage == MESA_SHADER_FRAGMENT &&
stage->key.wm.mesh_input == INTEL_SOMETIMES;
}
static void
anv_pipeline_hash_common(struct mesa_sha1 *ctx,
const struct anv_pipeline *pipeline)
@ -1086,6 +1093,7 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
NIR_PASS_V(nir, anv_nir_compute_push_layout,
pdevice, stage->key.base.robust_flags,
anv_graphics_pipeline_stage_fragment_dynamic(stage),
anv_graphics_pipeline_stage_mesh_dynamic(stage),
prog_data, &stage->bind_map, &push_map,
pipeline->layout.type, mem_ctx);

View file

@ -3293,6 +3293,7 @@ anv_descriptor_set_write_template(struct anv_device *device,
const struct vk_descriptor_update_template *template,
const void *data);
#define ANV_DESCRIPTOR_SET_PER_PRIM_PADDING (UINT8_MAX - 5)
#define ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER (UINT8_MAX - 4)
#define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 3)
#define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 2)

View file

@ -196,6 +196,7 @@ get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
}
case ANV_DESCRIPTOR_SET_NULL:
case ANV_DESCRIPTOR_SET_PER_PRIM_PADDING:
return cmd_buffer->device->workaround_address;
default: {
@ -263,6 +264,7 @@ get_push_range_bound_size(struct anv_cmd_buffer *cmd_buffer,
case ANV_DESCRIPTOR_SET_NULL:
case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
case ANV_DESCRIPTOR_SET_PER_PRIM_PADDING:
return (range->start + range->length) * 32;
default: {
@ -459,6 +461,12 @@ cmd_buffer_flush_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer,
if (range->length == 0)
continue;
/* Never clear this padding register as it might contain payload
* data.
*/
if (range->set == ANV_DESCRIPTOR_SET_PER_PRIM_PADDING)
continue;
unsigned bound_size =
get_push_range_bound_size(cmd_buffer, shader, range);
if (bound_size >= range->start * 32) {
@ -479,7 +487,7 @@ cmd_buffer_flush_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer,
}
}
/* Setting NULL resets the push constant state so that we allocate a new one
/* Setting NULL resets the push constant state so that we allocate a new one
* if needed. If push constant data not dirty, get_push_range_address can
* re-use existing allocation.
*
@ -511,14 +519,21 @@ cmd_buffer_flush_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer,
if (range->length == 0)
break;
if (range->set == ANV_DESCRIPTOR_SET_PER_PRIM_PADDING &&
anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH))
break;
buffers[i] = get_push_range_address(cmd_buffer, shader, range);
max_push_range = MAX2(max_push_range, range->length);
buffer_count++;
}
/* We have at most 4 buffers but they should be tightly packed */
for (unsigned i = buffer_count; i < 4; i++)
assert(bind_map->push_ranges[i].length == 0);
for (unsigned i = buffer_count; i < 4; i++) {
assert(bind_map->push_ranges[i].length == 0 ||
bind_map->push_ranges[i].set ==
ANV_DESCRIPTOR_SET_PER_PRIM_PADDING);
}
}
#if GFX_VER >= 12