anv: move load_num_workgroups tracking to driver

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
(cherry picked from commit 578d2f0daa)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39003>
This commit is contained in:
Lionel Landwerlin 2025-12-01 10:24:26 +02:00 committed by Dylan Baker
parent deaf382293
commit f48d731953
4 changed files with 24 additions and 14 deletions

View file

@ -68,11 +68,8 @@ anv_nir_compute_push_layout(nir_shader *nir,
*/
if (nir->info.stage == MESA_SHADER_COMPUTE &&
base >= anv_drv_const_offset(cs.num_work_groups[0]) &&
base < (anv_drv_const_offset(cs.num_work_groups[2]) + 4)) {
struct brw_cs_prog_data *cs_prog_data =
container_of(prog_data, struct brw_cs_prog_data, base);
cs_prog_data->uses_num_work_groups = true;
}
base < (anv_drv_const_offset(cs.num_work_groups[2]) + 4))
map->binding_mask |= ANV_PIPELINE_BIND_MASK_USES_NUM_WORKGROUP;
break;
}

View file

@ -1155,13 +1155,19 @@ struct anv_push_range {
uint8_t length;
};
enum anv_pipeline_bind_mask {
ANV_PIPELINE_BIND_MASK_USES_NUM_WORKGROUP = BITFIELD_BIT(0),
};
struct anv_pipeline_bind_map {
unsigned char surface_sha1[20];
unsigned char sampler_sha1[20];
unsigned char push_sha1[20];
/* enum anv_descriptor_set_layout_type */
uint32_t layout_type;
uint16_t layout_type;
/* enum anv_pipeline_bind_mask */
uint16_t binding_mask;
uint32_t surface_count;
uint32_t sampler_count;

View file

@ -66,7 +66,8 @@ anv_shader_deserialize(struct vk_device *vk_device,
blob_copy_bytes(blob, data.bind_map.surface_sha1, sizeof(data.bind_map.surface_sha1));
blob_copy_bytes(blob, data.bind_map.sampler_sha1, sizeof(data.bind_map.sampler_sha1));
blob_copy_bytes(blob, data.bind_map.push_sha1, sizeof(data.bind_map.push_sha1));
data.bind_map.layout_type = blob_read_uint32(blob);
data.bind_map.layout_type = blob_read_uint16(blob);
data.bind_map.binding_mask = blob_read_uint16(blob);
data.bind_map.surface_count = blob_read_uint32(blob);
data.bind_map.sampler_count = blob_read_uint32(blob);
data.bind_map.embedded_sampler_count = blob_read_uint32(blob);
@ -143,7 +144,8 @@ anv_shader_serialize(struct vk_device *device,
sizeof(shader->bind_map.sampler_sha1));
blob_write_bytes(blob, shader->bind_map.push_sha1,
sizeof(shader->bind_map.push_sha1));
blob_write_uint32(blob, shader->bind_map.layout_type);
blob_write_uint16(blob, shader->bind_map.layout_type);
blob_write_uint16(blob, shader->bind_map.binding_mask);
blob_write_uint32(blob, shader->bind_map.surface_count);
blob_write_uint32(blob, shader->bind_map.sampler_count);
blob_write_uint32(blob, shader->bind_map.embedded_sampler_count);

View file

@ -256,7 +256,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
static void
anv_cmd_buffer_push_workgroups(struct anv_cmd_buffer *cmd_buffer,
const struct brw_cs_prog_data *prog_data,
const struct anv_pipeline_bind_map *bind_map,
uint32_t baseGroupX,
uint32_t baseGroupY,
uint32_t baseGroupZ,
@ -281,7 +281,8 @@ anv_cmd_buffer_push_workgroups(struct anv_cmd_buffer *cmd_buffer,
}
/* On Gfx12.5+ this value goes into the inline parameter register */
if (GFX_VERx10 < 125 && prog_data->uses_num_work_groups) {
if (GFX_VERx10 < 125 &&
(bind_map->binding_mask & ANV_PIPELINE_BIND_MASK_USES_NUM_WORKGROUP)) {
if (anv_address_is_null(indirect_group)) {
if (push->cs.num_work_groups[0] != groupCountX ||
push->cs.num_work_groups[1] != groupCountY ||
@ -639,6 +640,7 @@ void genX(CmdDispatchBase)(
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute;
const struct anv_pipeline_bind_map *bind_map = &comp_state->shader->bind_map;
const struct brw_cs_prog_data *prog_data = get_cs_prog_data(comp_state);
struct intel_cs_dispatch_info dispatch =
brw_cs_get_dispatch_info(cmd_buffer->device->info, prog_data, NULL);
@ -646,7 +648,7 @@ void genX(CmdDispatchBase)(
if (anv_batch_has_error(&cmd_buffer->batch))
return;
anv_cmd_buffer_push_workgroups(cmd_buffer, prog_data,
anv_cmd_buffer_push_workgroups(cmd_buffer, bind_map,
baseGroupX, baseGroupY, baseGroupZ,
groupCountX, groupCountY, groupCountZ,
ANV_NULL_ADDRESS);
@ -699,6 +701,7 @@ genX(cmd_dispatch_unaligned)(
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute;
const struct anv_pipeline_bind_map *bind_map = &comp_state->shader->bind_map;
const struct brw_cs_prog_data *prog_data = get_cs_prog_data(comp_state);
if (anv_batch_has_error(&cmd_buffer->batch))
return;
@ -711,7 +714,7 @@ genX(cmd_dispatch_unaligned)(
struct intel_cs_dispatch_info dispatch =
brw_cs_get_dispatch_info(cmd_buffer->device->info, prog_data, NULL);
anv_cmd_buffer_push_workgroups(cmd_buffer, prog_data, 0, 0, 0, groupCountX,
anv_cmd_buffer_push_workgroups(cmd_buffer, bind_map, 0, 0, 0, groupCountX,
groupCountY, groupCountZ, ANV_NULL_ADDRESS);
/* RT shaders have Y and Z local size set to 1 always. */
@ -728,7 +731,8 @@ genX(cmd_dispatch_unaligned)(
trace_intel_begin_compute(&cmd_buffer->trace);
assert(!prog_data->uses_num_work_groups);
assert((bind_map->binding_mask &
ANV_PIPELINE_BIND_MASK_USES_NUM_WORKGROUP) == 0);
genX(cmd_buffer_flush_compute_state)(cmd_buffer);
if (cmd_buffer->state.conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
@ -757,6 +761,7 @@ genX(cmd_buffer_dispatch_indirect)(struct anv_cmd_buffer *cmd_buffer,
bool is_unaligned_size_x)
{
struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute;
const struct anv_pipeline_bind_map *bind_map = &comp_state->shader->bind_map;
const struct brw_cs_prog_data *prog_data = get_cs_prog_data(comp_state);
UNUSED struct anv_batch *batch = &cmd_buffer->batch;
struct intel_cs_dispatch_info dispatch =
@ -765,7 +770,7 @@ genX(cmd_buffer_dispatch_indirect)(struct anv_cmd_buffer *cmd_buffer,
if (anv_batch_has_error(&cmd_buffer->batch))
return;
anv_cmd_buffer_push_workgroups(cmd_buffer, prog_data,
anv_cmd_buffer_push_workgroups(cmd_buffer, bind_map,
0, 0, 0, 0, 0, 0, indirect_addr);
anv_measure_snapshot(cmd_buffer,