From f48d7319533816c2e45963a9c98fca1d5ad67774 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 1 Dec 2025 10:24:26 +0200 Subject: [PATCH] anv: move load_num_workgroups tracking to driver Signed-off-by: Lionel Landwerlin Reviewed-by: Alyssa Rosenzweig (cherry picked from commit 578d2f0daad197b92ad4079a89d847a57bbd6358) Part-of: --- src/intel/vulkan/anv_nir_compute_push_layout.c | 7 ++----- src/intel/vulkan/anv_private.h | 8 +++++++- src/intel/vulkan/anv_shader.c | 6 ++++-- src/intel/vulkan/genX_cmd_compute.c | 17 +++++++++++------ 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/intel/vulkan/anv_nir_compute_push_layout.c b/src/intel/vulkan/anv_nir_compute_push_layout.c index eb8eeae53b1..2092e2c2269 100644 --- a/src/intel/vulkan/anv_nir_compute_push_layout.c +++ b/src/intel/vulkan/anv_nir_compute_push_layout.c @@ -68,11 +68,8 @@ anv_nir_compute_push_layout(nir_shader *nir, */ if (nir->info.stage == MESA_SHADER_COMPUTE && base >= anv_drv_const_offset(cs.num_work_groups[0]) && - base < (anv_drv_const_offset(cs.num_work_groups[2]) + 4)) { - struct brw_cs_prog_data *cs_prog_data = - container_of(prog_data, struct brw_cs_prog_data, base); - cs_prog_data->uses_num_work_groups = true; - } + base < (anv_drv_const_offset(cs.num_work_groups[2]) + 4)) + map->binding_mask |= ANV_PIPELINE_BIND_MASK_USES_NUM_WORKGROUP; break; } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 1360bb6eaa8..90800b8db15 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1155,13 +1155,19 @@ struct anv_push_range { uint8_t length; }; +enum anv_pipeline_bind_mask { + ANV_PIPELINE_BIND_MASK_USES_NUM_WORKGROUP = BITFIELD_BIT(0), +}; + struct anv_pipeline_bind_map { unsigned char surface_sha1[20]; unsigned char sampler_sha1[20]; unsigned char push_sha1[20]; /* enum anv_descriptor_set_layout_type */ - uint32_t layout_type; + uint16_t layout_type; + /* enum anv_pipeline_bind_mask */ + uint16_t binding_mask; uint32_t surface_count; uint32_t sampler_count; diff --git a/src/intel/vulkan/anv_shader.c b/src/intel/vulkan/anv_shader.c index ed66e916e4c..afdeba16eac 100644 --- a/src/intel/vulkan/anv_shader.c +++ b/src/intel/vulkan/anv_shader.c @@ -66,7 +66,8 @@ anv_shader_deserialize(struct vk_device *vk_device, blob_copy_bytes(blob, data.bind_map.surface_sha1, sizeof(data.bind_map.surface_sha1)); blob_copy_bytes(blob, data.bind_map.sampler_sha1, sizeof(data.bind_map.sampler_sha1)); blob_copy_bytes(blob, data.bind_map.push_sha1, sizeof(data.bind_map.push_sha1)); - data.bind_map.layout_type = blob_read_uint32(blob); + data.bind_map.layout_type = blob_read_uint16(blob); + data.bind_map.binding_mask = blob_read_uint16(blob); data.bind_map.surface_count = blob_read_uint32(blob); data.bind_map.sampler_count = blob_read_uint32(blob); data.bind_map.embedded_sampler_count = blob_read_uint32(blob); @@ -143,7 +144,8 @@ anv_shader_serialize(struct vk_device *device, sizeof(shader->bind_map.sampler_sha1)); blob_write_bytes(blob, shader->bind_map.push_sha1, sizeof(shader->bind_map.push_sha1)); - blob_write_uint32(blob, shader->bind_map.layout_type); + blob_write_uint16(blob, shader->bind_map.layout_type); + blob_write_uint16(blob, shader->bind_map.binding_mask); blob_write_uint32(blob, shader->bind_map.surface_count); blob_write_uint32(blob, shader->bind_map.sampler_count); blob_write_uint32(blob, shader->bind_map.embedded_sampler_count); diff --git a/src/intel/vulkan/genX_cmd_compute.c b/src/intel/vulkan/genX_cmd_compute.c index cbe677fc668..03491fdb322 100644 --- a/src/intel/vulkan/genX_cmd_compute.c +++ b/src/intel/vulkan/genX_cmd_compute.c @@ -256,7 +256,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) static void anv_cmd_buffer_push_workgroups(struct anv_cmd_buffer *cmd_buffer, - const struct brw_cs_prog_data *prog_data, + const struct anv_pipeline_bind_map *bind_map, uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ, @@ -281,7 +281,8 @@ anv_cmd_buffer_push_workgroups(struct anv_cmd_buffer *cmd_buffer, } /* On Gfx12.5+ this value goes into the inline parameter register */ - if (GFX_VERx10 < 125 && prog_data->uses_num_work_groups) { + if (GFX_VERx10 < 125 && + (bind_map->binding_mask & ANV_PIPELINE_BIND_MASK_USES_NUM_WORKGROUP)) { if (anv_address_is_null(indirect_group)) { if (push->cs.num_work_groups[0] != groupCountX || push->cs.num_work_groups[1] != groupCountY || @@ -639,6 +640,7 @@ void genX(CmdDispatchBase)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute; + const struct anv_pipeline_bind_map *bind_map = &comp_state->shader->bind_map; const struct brw_cs_prog_data *prog_data = get_cs_prog_data(comp_state); struct intel_cs_dispatch_info dispatch = brw_cs_get_dispatch_info(cmd_buffer->device->info, prog_data, NULL); @@ -646,7 +648,7 @@ void genX(CmdDispatchBase)( if (anv_batch_has_error(&cmd_buffer->batch)) return; - anv_cmd_buffer_push_workgroups(cmd_buffer, prog_data, + anv_cmd_buffer_push_workgroups(cmd_buffer, bind_map, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ, ANV_NULL_ADDRESS); @@ -699,6 +701,7 @@ genX(cmd_dispatch_unaligned)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute; + const struct anv_pipeline_bind_map *bind_map = &comp_state->shader->bind_map; const struct brw_cs_prog_data *prog_data = get_cs_prog_data(comp_state); if (anv_batch_has_error(&cmd_buffer->batch)) return; @@ -711,7 +714,7 @@ genX(cmd_dispatch_unaligned)( struct intel_cs_dispatch_info dispatch = brw_cs_get_dispatch_info(cmd_buffer->device->info, prog_data, NULL); - anv_cmd_buffer_push_workgroups(cmd_buffer, prog_data, 0, 0, 0, groupCountX, + anv_cmd_buffer_push_workgroups(cmd_buffer, bind_map, 0, 0, 0, groupCountX, groupCountY, groupCountZ, ANV_NULL_ADDRESS); /* RT shaders have Y and Z local size set to 1 always. */ @@ -728,7 +731,8 @@ genX(cmd_dispatch_unaligned)( trace_intel_begin_compute(&cmd_buffer->trace); - assert(!prog_data->uses_num_work_groups); + assert((bind_map->binding_mask & + ANV_PIPELINE_BIND_MASK_USES_NUM_WORKGROUP) == 0); genX(cmd_buffer_flush_compute_state)(cmd_buffer); if (cmd_buffer->state.conditional_render_enabled) genX(cmd_emit_conditional_render_predicate)(cmd_buffer); @@ -757,6 +761,7 @@ genX(cmd_buffer_dispatch_indirect)(struct anv_cmd_buffer *cmd_buffer, bool is_unaligned_size_x) { struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute; + const struct anv_pipeline_bind_map *bind_map = &comp_state->shader->bind_map; const struct brw_cs_prog_data *prog_data = get_cs_prog_data(comp_state); UNUSED struct anv_batch *batch = &cmd_buffer->batch; struct intel_cs_dispatch_info dispatch = @@ -765,7 +770,7 @@ genX(cmd_buffer_dispatch_indirect)(struct anv_cmd_buffer *cmd_buffer, if (anv_batch_has_error(&cmd_buffer->batch)) return; - anv_cmd_buffer_push_workgroups(cmd_buffer, prog_data, + anv_cmd_buffer_push_workgroups(cmd_buffer, bind_map, 0, 0, 0, 0, 0, 0, indirect_addr); anv_measure_snapshot(cmd_buffer,