diff --git a/src/intel/compiler/brw_compile_fs.cpp b/src/intel/compiler/brw_compile_fs.cpp index c7faffeffd7..bc694032c6c 100644 --- a/src/intel/compiler/brw_compile_fs.cpp +++ b/src/intel/compiler/brw_compile_fs.cpp @@ -950,6 +950,8 @@ brw_nir_populate_wm_prog_data(nir_shader *shader, */ prog_data->alpha_to_coverage = key->alpha_to_coverage; + prog_data->mesh_input = key->mesh_input; + prog_data->uses_sample_mask = BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 8727b3ec00f..c5fa20ca105 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -380,7 +380,9 @@ struct brw_wm_prog_key { static inline bool brw_wm_prog_key_is_dynamic(const struct brw_wm_prog_key *key) { - return key->alpha_to_coverage == INTEL_SOMETIMES || + return + key->mesh_input == INTEL_SOMETIMES || + key->alpha_to_coverage == INTEL_SOMETIMES || key->persample_interp == INTEL_SOMETIMES || key->multisample_fbo == INTEL_SOMETIMES || key->base.vue_layout == INTEL_VUE_LAYOUT_SEPARATE_MESH; @@ -749,6 +751,11 @@ struct brw_wm_prog_data { */ enum intel_sometimes alpha_to_coverage; + /** + * Whether the shader is dispatch with a preceeding mesh shader. + */ + enum intel_sometimes mesh_input; + /** * Push constant location of intel_msaa_flags (dynamic configuration of the * pixel shader). @@ -806,7 +813,8 @@ struct brw_wm_prog_data { static inline bool brw_wm_prog_data_is_dynamic(const struct brw_wm_prog_data *prog_data) { - return prog_data->alpha_to_coverage == INTEL_SOMETIMES || + return prog_data->mesh_input == INTEL_SOMETIMES || + prog_data->alpha_to_coverage == INTEL_SOMETIMES || prog_data->coarse_pixel_dispatch == INTEL_SOMETIMES || prog_data->persample_dispatch == INTEL_SOMETIMES; } diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp index a397f01cba0..7d11c1f0d08 100644 --- a/src/intel/compiler/brw_from_nir.cpp +++ b/src/intel/compiler/brw_from_nir.cpp @@ -4189,7 +4189,8 @@ brw_per_primitive_reg(const brw_builder &bld, int location, unsigned comp) { brw_shader &s = *bld.shader; assert(s.stage == MESA_SHADER_FRAGMENT); - assert(BITFIELD64_BIT(location) & s.nir->info.per_primitive_inputs); + assert((BITFIELD64_BIT(location) & s.nir->info.per_primitive_inputs) || + location == VARYING_SLOT_PRIMITIVE_ID); const struct brw_wm_prog_data *prog_data = brw_wm_prog_data(s.prog_data); diff --git a/src/intel/vulkan/anv_nir.h b/src/intel/vulkan/anv_nir.h index 0284b522e06..f1469be59c5 100644 --- a/src/intel/vulkan/anv_nir.h +++ b/src/intel/vulkan/anv_nir.h @@ -110,6 +110,7 @@ void anv_nir_compute_push_layout(nir_shader *nir, const struct anv_physical_device *pdevice, enum brw_robustness_flags robust_flags, bool fragment_dynamic, + bool mesh_dynamic, struct brw_stage_prog_data *prog_data, struct anv_pipeline_bind_map *map, const struct anv_pipeline_push_map *push_map, diff --git a/src/intel/vulkan/anv_nir_compute_push_layout.c b/src/intel/vulkan/anv_nir_compute_push_layout.c index 8509e194fdd..e2e3f0d36cc 100644 --- a/src/intel/vulkan/anv_nir_compute_push_layout.c +++ b/src/intel/vulkan/anv_nir_compute_push_layout.c @@ -31,6 +31,7 @@ anv_nir_compute_push_layout(nir_shader *nir, const struct anv_physical_device *pdevice, enum brw_robustness_flags robust_flags, bool fragment_dynamic, + bool mesh_dynamic, struct brw_stage_prog_data *prog_data, struct anv_pipeline_bind_map *map, const struct anv_pipeline_push_map *push_map, @@ -199,6 +200,34 @@ anv_nir_compute_push_layout(nir_shader *nir, } } + /* When platforms support Mesh and the fragment shader is not fully linked + * to the previous shader, payload format can change if the preceding + * shader is mesh or not, this is an issue in particular for PrimitiveID + * value (in legacy it's delivered as a VUE slot, in mesh it's delivered as + * in the per-primitive block). + * + * Here is the difference in payload format : + * + * Legacy Mesh + * ------------------- ------------------- + * | ... | | ... | + * |-----------------| |-----------------| + * | Constant data | | Constant data | + * |-----------------| |-----------------| + * | VUE attributes | | Per Primive data| + * ------------------- |-----------------| + * | VUE attributes | + * ------------------- + * + * To solve that issue we push an additional dummy push constant buffer in + * legacy pipelines to align everything. The compiler then adds a SEL + * instruction to source the PrimitiveID from the right location based on a + * dynamic bit in fs_msaa_intel. + */ + const bool needs_padding_per_primitive = + mesh_dynamic && + (nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID); + unsigned n_push_ranges = 0; if (push_ubo_ranges) { brw_nir_analyze_ubo_ranges(compiler, nir, prog_data->ubo_ranges); @@ -224,6 +253,7 @@ anv_nir_compute_push_layout(nir_shader *nir, (push_reg_mask_offset - push_start) / 4; } + const unsigned max_push_buffers = needs_padding_per_primitive ? 3 : 4; unsigned range_start_reg = push_constant_range.length; for (int i = 0; i < 4; i++) { @@ -231,7 +261,7 @@ anv_nir_compute_push_layout(nir_shader *nir, if (ubo_range->length == 0) continue; - if (n_push_ranges >= 4) { + if (n_push_ranges >= max_push_buffers) { memset(ubo_range, 0, sizeof(*ubo_range)); continue; } @@ -288,6 +318,17 @@ anv_nir_compute_push_layout(nir_shader *nir, prog_data->nr_params = 32 / 4; } + if (needs_padding_per_primitive) { + struct anv_push_range push_constant_range = { + .set = ANV_DESCRIPTOR_SET_PER_PRIM_PADDING, + .start = 0, + .length = 1, + }; + map->push_ranges[n_push_ranges++] = push_constant_range; + } + + assert(n_push_ranges <= 4); + if (nir->info.stage == MESA_SHADER_FRAGMENT && fragment_dynamic) { struct brw_wm_prog_data *wm_prog_data = container_of(prog_data, struct brw_wm_prog_data, base); @@ -330,8 +371,12 @@ anv_nir_validate_push_layout(const struct anv_physical_device *pdevice, prog_data_push_size += prog_data->ubo_ranges[i].length; unsigned bind_map_push_size = 0; - for (unsigned i = 0; i < 4; i++) + for (unsigned i = 0; i < 4; i++) { + /* This is dynamic and doesn't count against prog_data->ubo_ranges[] */ + if (map->push_ranges[i].set == ANV_DESCRIPTOR_SET_PER_PRIM_PADDING) + continue; bind_map_push_size += map->push_ranges[i].length; + } /* We could go through everything again but it should be enough to assert * that they push the same number of registers. This should alert us if diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 671924c5ee7..74d91b2737a 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -604,6 +604,13 @@ anv_graphics_pipeline_stage_fragment_dynamic(const struct anv_pipeline_stage *st brw_wm_prog_key_is_dynamic(&stage->key.wm); } +static bool +anv_graphics_pipeline_stage_mesh_dynamic(const struct anv_pipeline_stage *stage) +{ + return stage->stage == MESA_SHADER_FRAGMENT && + stage->key.wm.mesh_input == INTEL_SOMETIMES; +} + static void anv_pipeline_hash_common(struct mesa_sha1 *ctx, const struct anv_pipeline *pipeline) @@ -1086,6 +1093,7 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, NIR_PASS_V(nir, anv_nir_compute_push_layout, pdevice, stage->key.base.robust_flags, anv_graphics_pipeline_stage_fragment_dynamic(stage), + anv_graphics_pipeline_stage_mesh_dynamic(stage), prog_data, &stage->bind_map, &push_map, pipeline->layout.type, mem_ctx); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index ec891a7dae0..a2bda201b02 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -3293,6 +3293,7 @@ anv_descriptor_set_write_template(struct anv_device *device, const struct vk_descriptor_update_template *template, const void *data); +#define ANV_DESCRIPTOR_SET_PER_PRIM_PADDING (UINT8_MAX - 5) #define ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER (UINT8_MAX - 4) #define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 3) #define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 2) diff --git a/src/intel/vulkan/genX_cmd_draw.c b/src/intel/vulkan/genX_cmd_draw.c index aba0f9090db..d2de18323d8 100644 --- a/src/intel/vulkan/genX_cmd_draw.c +++ b/src/intel/vulkan/genX_cmd_draw.c @@ -196,6 +196,7 @@ get_push_range_address(struct anv_cmd_buffer *cmd_buffer, } case ANV_DESCRIPTOR_SET_NULL: + case ANV_DESCRIPTOR_SET_PER_PRIM_PADDING: return cmd_buffer->device->workaround_address; default: { @@ -263,6 +264,7 @@ get_push_range_bound_size(struct anv_cmd_buffer *cmd_buffer, case ANV_DESCRIPTOR_SET_NULL: case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS: + case ANV_DESCRIPTOR_SET_PER_PRIM_PADDING: return (range->start + range->length) * 32; default: { @@ -459,6 +461,12 @@ cmd_buffer_flush_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer, if (range->length == 0) continue; + /* Never clear this padding register as it might contain payload + * data. + */ + if (range->set == ANV_DESCRIPTOR_SET_PER_PRIM_PADDING) + continue; + unsigned bound_size = get_push_range_bound_size(cmd_buffer, shader, range); if (bound_size >= range->start * 32) { @@ -479,7 +487,7 @@ cmd_buffer_flush_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer, } } - /* Setting NULL resets the push constant state so that we allocate a new one + /* Setting NULL resets the push constant state so that we allocate a new one * if needed. If push constant data not dirty, get_push_range_address can * re-use existing allocation. * @@ -511,14 +519,21 @@ cmd_buffer_flush_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer, if (range->length == 0) break; + if (range->set == ANV_DESCRIPTOR_SET_PER_PRIM_PADDING && + anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) + break; + buffers[i] = get_push_range_address(cmd_buffer, shader, range); max_push_range = MAX2(max_push_range, range->length); buffer_count++; } /* We have at most 4 buffers but they should be tightly packed */ - for (unsigned i = buffer_count; i < 4; i++) - assert(bind_map->push_ranges[i].length == 0); + for (unsigned i = buffer_count; i < 4; i++) { + assert(bind_map->push_ranges[i].length == 0 || + bind_map->push_ranges[i].set == + ANV_DESCRIPTOR_SET_PER_PRIM_PADDING); + } } #if GFX_VER >= 12