anv: stop using a binding table entry for gl_NumWorkgroups

This will make things easier in situations where we don't want to use
the binding table at all (indirect draws/dispatches).

The mechanism is simple, upload a vec3 either through push constants
(<= Gfx12.0) or through the inline parameter register (>= Gfx12.5).

In the shader, do this :

  if vec.x == 0xffffffff:
     addr = pack64_2x32 vec.y, vec.z
     vec = load_global addr

This works because we limit the maximum number of workgroup size to
0xffff in all dimension :
   maxComputeWorkGroupCount = { 65535, 65535, 65535 },

So we can use the large values to signal the need for indirect
loading.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31508>
This commit is contained in:
Lionel Landwerlin 2024-05-31 12:34:53 +03:00 committed by Marge Bot
parent 97b17aa0b1
commit 02294961ee
6 changed files with 160 additions and 82 deletions

View file

@ -1895,6 +1895,45 @@ lower_ray_query_globals(nir_builder *b, nir_intrinsic_instr *intrin,
return true; return true;
} }
static bool
lower_num_workgroups(nir_builder *b, nir_intrinsic_instr *intrin,
struct apply_pipeline_layout_state *state)
{
/* For those stages, HW will generate values through payload registers. */
if (gl_shader_stage_is_mesh(b->shader->info.stage))
return false;
b->cursor = nir_instr_remove(&intrin->instr);
nir_def *num_workgroups;
/* On Gfx12.5+ we use the inline register to push the values, on prior
* generation we use push constants.
*/
if (state->pdevice->info.verx10 >= 125) {
num_workgroups =
nir_load_inline_data_intel(
b, 3, 32,
.base = ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET);
} else {
num_workgroups =
anv_load_driver_uniform(b, 3, cs.num_work_groups[0]);
}
nir_def *num_workgroups_indirect;
nir_push_if(b, nir_ieq_imm(b, nir_channel(b, num_workgroups, 0), UINT32_MAX));
{
nir_def *addr = nir_pack_64_2x32_split(b,
nir_channel(b, num_workgroups, 1),
nir_channel(b, num_workgroups, 2));
num_workgroups_indirect = nir_load_global_constant(b, addr, 4, 3, 32);
}
nir_pop_if(b, NULL);
num_workgroups = nir_if_phi(b, num_workgroups_indirect, num_workgroups);
nir_def_rewrite_uses(&intrin->def, num_workgroups);
return true;
}
static bool static bool
apply_pipeline_layout(nir_builder *b, nir_instr *instr, void *_state) apply_pipeline_layout(nir_builder *b, nir_instr *instr, void *_state)
{ {
@ -1930,6 +1969,8 @@ apply_pipeline_layout(nir_builder *b, nir_instr *instr, void *_state)
return lower_base_workgroup_id(b, intrin, state); return lower_base_workgroup_id(b, intrin, state);
case nir_intrinsic_load_ray_query_global_intel: case nir_intrinsic_load_ray_query_global_intel:
return lower_ray_query_globals(b, intrin, state); return lower_ray_query_globals(b, intrin, state);
case nir_intrinsic_load_num_workgroups:
return lower_num_workgroups(b, intrin, state);
default: default:
return false; return false;
} }
@ -2434,7 +2475,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader,
nir_opt_dce(shader); nir_opt_dce(shader);
nir_shader_instructions_pass(shader, apply_pipeline_layout, nir_shader_instructions_pass(shader, apply_pipeline_layout,
nir_metadata_control_flow, nir_metadata_none,
&state); &state);
ralloc_free(mem_ctx); ralloc_free(mem_ctx);

View file

@ -62,6 +62,16 @@ anv_nir_compute_push_layout(nir_shader *nir,
unsigned range = nir_intrinsic_range(intrin); unsigned range = nir_intrinsic_range(intrin);
push_start = MIN2(push_start, base); push_start = MIN2(push_start, base);
push_end = MAX2(push_end, base + range); push_end = MAX2(push_end, base + range);
/* We need to retain this information to update the push
* constant on vkCmdDispatch*().
*/
if (nir->info.stage == MESA_SHADER_COMPUTE &&
base >= anv_drv_const_offset(cs.num_work_groups[0]) &&
base < (anv_drv_const_offset(cs.num_work_groups[2]) + 4)) {
struct brw_cs_prog_data *cs_prog_data =
container_of(prog_data, struct brw_cs_prog_data, base);
cs_prog_data->uses_num_work_groups = true;
}
break; break;
} }

View file

@ -1700,9 +1700,6 @@ anv_pipeline_add_executable(struct anv_pipeline *pipeline,
stage->bind_map.push_ranges[i].start * 32); stage->bind_map.push_ranges[i].start * 32);
break; break;
case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS:
unreachable("gl_NumWorkgroups is never pushed");
case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS: case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
unreachable("Color attachments can't be pushed"); unreachable("Color attachments can't be pushed");
@ -2684,13 +2681,6 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
anv_stage_allocate_bind_map_tables(&pipeline->base, &stage, mem_ctx); anv_stage_allocate_bind_map_tables(&pipeline->base, &stage, mem_ctx);
/* Set up a binding for the gl_NumWorkGroups */
stage.bind_map.surface_count = 1;
stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) {
.set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
.binding = UINT32_MAX,
};
VkResult result = anv_pipeline_stage_get_nir(&pipeline->base, cache, VkResult result = anv_pipeline_stage_get_nir(&pipeline->base, cache,
mem_ctx, &stage); mem_ctx, &stage);
if (result != VK_SUCCESS) { if (result != VK_SUCCESS) {
@ -2736,12 +2726,6 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
anv_nir_validate_push_layout(device->physical, &stage.prog_data.base, anv_nir_validate_push_layout(device->physical, &stage.prog_data.base,
&stage.bind_map); &stage.bind_map);
if (!stage.prog_data.cs.uses_num_work_groups) {
assert(stage.bind_map.surface_to_descriptor[0].set ==
ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS);
stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL;
}
struct anv_shader_upload_params upload_params = { struct anv_shader_upload_params upload_params = {
.stage = MESA_SHADER_COMPUTE, .stage = MESA_SHADER_COMPUTE,
.key_data = &stage.cache_key, .key_data = &stage.cache_key,

View file

@ -222,6 +222,8 @@ struct intel_perf_query_result;
#define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1) #define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1)
#define ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET (8)
/* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64 /* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64
* and we can't put anything else there we use 64b. * and we can't put anything else there we use 64b.
*/ */
@ -3076,11 +3078,10 @@ anv_descriptor_set_write_template(struct anv_device *device,
const struct vk_descriptor_update_template *template, const struct vk_descriptor_update_template *template,
const void *data); const void *data);
#define ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER (UINT8_MAX - 5) #define ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER (UINT8_MAX - 4)
#define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 4) #define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 3)
#define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 3) #define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 2)
#define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 2) #define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 1)
#define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 1)
#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
struct anv_pipeline_binding { struct anv_pipeline_binding {
@ -3598,6 +3599,9 @@ struct anv_push_constants {
*/ */
uint32_t base_work_group_id[3]; uint32_t base_work_group_id[3];
/** gl_NumWorkgroups */
uint32_t num_work_groups[3];
/** Subgroup ID /** Subgroup ID
* *
* This is never set by software but is implicitly filled out when * This is never set by software but is implicitly filled out when
@ -3908,8 +3912,6 @@ struct anv_cmd_compute_state {
bool pipeline_dirty; bool pipeline_dirty;
struct anv_address num_workgroups;
uint32_t scratch_size; uint32_t scratch_size;
}; };

View file

@ -2102,29 +2102,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
bt_map[s] = surface_state.offset + state_offset; bt_map[s] = surface_state.offset + state_offset;
break; break;
case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS: {
/* This is always the first binding for compute shaders */
assert(shader->stage == MESA_SHADER_COMPUTE && s == 0);
struct anv_state surface_state =
anv_cmd_buffer_alloc_surface_states(cmd_buffer, 1);
if (surface_state.map == NULL)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
const enum isl_format format =
anv_isl_format_for_descriptor_type(cmd_buffer->device,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
anv_fill_buffer_surface_state(cmd_buffer->device, surface_state.map,
format, ISL_SWIZZLE_IDENTITY,
ISL_SURF_USAGE_CONSTANT_BUFFER_BIT,
cmd_buffer->state.compute.num_workgroups,
12, 1);
assert(surface_state.map);
bt_map[s] = surface_state.offset + state_offset;
break;
}
case ANV_DESCRIPTOR_SET_DESCRIPTORS: { case ANV_DESCRIPTOR_SET_DESCRIPTORS: {
struct anv_descriptor_set *set = struct anv_descriptor_set *set =
pipe_state->descriptors[binding->index]; pipe_state->descriptors[binding->index];
@ -4762,6 +4739,20 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
} }
} }
#endif #endif
#if GFX_VER == 9
/* Undocumented workaround, we need to reemit MEDIA_CURBE_LOAD on Gfx9 when
* switching from 3D->GPGPU, otherwise the shader gets corrupted push
* constants. Note that this doesn't trigger a push constant reallocation,
* we just reprogram the same pointer.
*
* The issue reproduces pretty much 100% on
* dEQP-VK.memory_model.transitive.* tests. Reducing the number of
* iteration in the test from 50 to < 10 makes the tests flaky.
*/
if (pipeline == GPGPU)
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
#endif
#endif /* else of if GFX_VER >= 20 */ #endif /* else of if GFX_VER >= 20 */
cmd_buffer->state.current_pipeline = pipeline; cmd_buffer->state.current_pipeline = pipeline;
} }

View file

@ -145,11 +145,15 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
genX(cmd_buffer_ensure_cfe_state)(cmd_buffer, prog_data->base.total_scratch); genX(cmd_buffer_ensure_cfe_state)(cmd_buffer, prog_data->base.total_scratch);
#endif #endif
/* The workgroup size of the pipeline affects our push constant layout #if GFX_VERx10 == 120
* so flag push constants as dirty if we change the pipeline. /* Normally we should not require any dirtying here, but for some reason
* on Gfx12.0, when running tests in parallel we see failures in the
* dEQP-VK.memory_model.* tests. This is likely a HW issue with push
* constants & context save/restore.
*/ */
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
comp_state->base.push_constants_data_dirty = true; comp_state->base.push_constants_data_dirty = true;
#endif
} }
cmd_buffer->state.descriptors_dirty |= cmd_buffer->state.descriptors_dirty |=
@ -217,23 +221,58 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
} }
static void static void
anv_cmd_buffer_push_base_group_id(struct anv_cmd_buffer *cmd_buffer, anv_cmd_buffer_push_workgroups(struct anv_cmd_buffer *cmd_buffer,
uint32_t baseGroupX, const struct brw_cs_prog_data *prog_data,
uint32_t baseGroupY, uint32_t baseGroupX,
uint32_t baseGroupZ) uint32_t baseGroupY,
uint32_t baseGroupZ,
uint32_t groupCountX,
uint32_t groupCountY,
uint32_t groupCountZ,
struct anv_address indirect_group)
{ {
if (anv_batch_has_error(&cmd_buffer->batch)) if (anv_batch_has_error(&cmd_buffer->batch))
return; return;
struct anv_push_constants *push = struct anv_push_constants *push =
&cmd_buffer->state.compute.base.push_constants; &cmd_buffer->state.compute.base.push_constants;
bool updated = false;
if (push->cs.base_work_group_id[0] != baseGroupX || if (push->cs.base_work_group_id[0] != baseGroupX ||
push->cs.base_work_group_id[1] != baseGroupY || push->cs.base_work_group_id[1] != baseGroupY ||
push->cs.base_work_group_id[2] != baseGroupZ) { push->cs.base_work_group_id[2] != baseGroupZ) {
push->cs.base_work_group_id[0] = baseGroupX; push->cs.base_work_group_id[0] = baseGroupX;
push->cs.base_work_group_id[1] = baseGroupY; push->cs.base_work_group_id[1] = baseGroupY;
push->cs.base_work_group_id[2] = baseGroupZ; push->cs.base_work_group_id[2] = baseGroupZ;
updated = true;
}
/* On Gfx12.5+ this value goes into the inline parameter register */
if (GFX_VERx10 < 125 && prog_data->uses_num_work_groups) {
if (anv_address_is_null(indirect_group)) {
if (push->cs.num_work_groups[0] != groupCountX ||
push->cs.num_work_groups[1] != groupCountY ||
push->cs.num_work_groups[2] != groupCountZ) {
push->cs.num_work_groups[0] = groupCountX;
push->cs.num_work_groups[1] = groupCountY;
push->cs.num_work_groups[2] = groupCountZ;
updated = true;
}
} else {
uint64_t addr64 = anv_address_physical(indirect_group);
uint32_t lower_addr32 = addr64 & 0xffffffff;
uint32_t upper_addr32 = addr64 >> 32;
if (push->cs.num_work_groups[0] != UINT32_MAX ||
push->cs.num_work_groups[1] != lower_addr32 ||
push->cs.num_work_groups[2] != upper_addr32) {
push->cs.num_work_groups[0] = UINT32_MAX;
push->cs.num_work_groups[1] = lower_addr32;
push->cs.num_work_groups[2] = upper_addr32;
updated = true;
}
}
}
if (updated) {
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
cmd_buffer->state.compute.base.push_constants_data_dirty = true; cmd_buffer->state.compute.base.push_constants_data_dirty = true;
} }
@ -321,6 +360,8 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer,
brw_cs_get_dispatch_info(devinfo, prog_data, NULL); brw_cs_get_dispatch_info(devinfo, prog_data, NULL);
const int dispatch_size = dispatch.simd_size / 16; const int dispatch_size = dispatch.simd_size / 16;
uint64_t indirect_addr64 = anv_address_physical(indirect_addr);
struct GENX(COMPUTE_WALKER_BODY) body = { struct GENX(COMPUTE_WALKER_BODY) body = {
.SIMDSize = dispatch_size, .SIMDSize = dispatch_size,
.MessageSIMD = dispatch_size, .MessageSIMD = dispatch_size,
@ -339,6 +380,11 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer,
.InterfaceDescriptor = .InterfaceDescriptor =
get_interface_descriptor_data(cmd_buffer, shader, prog_data, get_interface_descriptor_data(cmd_buffer, shader, prog_data,
&dispatch), &dispatch),
.InlineData = {
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 0] = UINT32_MAX,
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 1] = indirect_addr64 & 0xffffffff,
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 2] = indirect_addr64 >> 32,
},
}; };
cmd_buffer->state.last_indirect_dispatch = cmd_buffer->state.last_indirect_dispatch =
@ -357,7 +403,8 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer,
static inline void static inline void
emit_compute_walker(struct anv_cmd_buffer *cmd_buffer, emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
const struct anv_compute_pipeline *pipeline, bool indirect, const struct anv_compute_pipeline *pipeline,
struct anv_address indirect_addr,
const struct brw_cs_prog_data *prog_data, const struct brw_cs_prog_data *prog_data,
uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountX, uint32_t groupCountY,
uint32_t groupCountZ) uint32_t groupCountZ)
@ -369,12 +416,24 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
const struct intel_cs_dispatch_info dispatch = const struct intel_cs_dispatch_info dispatch =
brw_cs_get_dispatch_info(devinfo, prog_data, NULL); brw_cs_get_dispatch_info(devinfo, prog_data, NULL);
uint32_t num_workgroup_data[3];
if (!anv_address_is_null(indirect_addr)) {
uint64_t indirect_addr64 = anv_address_physical(indirect_addr);
num_workgroup_data[0] = 0xffffffff;
num_workgroup_data[1] = indirect_addr64 & 0xffffffff;
num_workgroup_data[2] = indirect_addr64 >> 32;
} else {
num_workgroup_data[0] = groupCountX;
num_workgroup_data[1] = groupCountY;
num_workgroup_data[2] = groupCountZ;
}
cmd_buffer->state.last_compute_walker = cmd_buffer->state.last_compute_walker =
anv_batch_emitn( anv_batch_emitn(
&cmd_buffer->batch, &cmd_buffer->batch,
GENX(COMPUTE_WALKER_length), GENX(COMPUTE_WALKER_length),
GENX(COMPUTE_WALKER), GENX(COMPUTE_WALKER),
.IndirectParameterEnable = indirect, .IndirectParameterEnable = !anv_address_is_null(indirect_addr),
.PredicateEnable = predicate, .PredicateEnable = predicate,
.SIMDSize = dispatch.simd_size / 16, .SIMDSize = dispatch.simd_size / 16,
.MessageSIMD = dispatch.simd_size / 16, .MessageSIMD = dispatch.simd_size / 16,
@ -401,7 +460,13 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
.InterfaceDescriptor = .InterfaceDescriptor =
get_interface_descriptor_data(cmd_buffer, pipeline->cs, get_interface_descriptor_data(cmd_buffer, pipeline->cs,
prog_data, &dispatch), prog_data, &dispatch),
); .EmitInlineParameter = prog_data->uses_inline_data,
.InlineData = {
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 0] = num_workgroup_data[0],
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 1] = num_workgroup_data[1],
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 2] = num_workgroup_data[2],
});
} }
#else /* #if GFX_VERx10 >= 125 */ #else /* #if GFX_VERx10 >= 125 */
@ -459,7 +524,7 @@ emit_cs_walker(struct anv_cmd_buffer *cmd_buffer,
compute_load_indirect_params(cmd_buffer, indirect_addr); compute_load_indirect_params(cmd_buffer, indirect_addr);
#if GFX_VERx10 >= 125 #if GFX_VERx10 >= 125
emit_compute_walker(cmd_buffer, pipeline, is_indirect, prog_data, emit_compute_walker(cmd_buffer, pipeline, indirect_addr, prog_data,
groupCountX, groupCountY, groupCountZ); groupCountX, groupCountY, groupCountZ);
#else #else
emit_gpgpu_walker(cmd_buffer, pipeline, is_indirect, prog_data, emit_gpgpu_walker(cmd_buffer, pipeline, is_indirect, prog_data,
@ -481,12 +546,14 @@ void genX(CmdDispatchBase)(
anv_pipeline_to_compute(cmd_buffer->state.compute.base.pipeline); anv_pipeline_to_compute(cmd_buffer->state.compute.base.pipeline);
const struct brw_cs_prog_data *prog_data = get_cs_prog_data(pipeline); const struct brw_cs_prog_data *prog_data = get_cs_prog_data(pipeline);
anv_cmd_buffer_push_base_group_id(cmd_buffer, baseGroupX,
baseGroupY, baseGroupZ);
if (anv_batch_has_error(&cmd_buffer->batch)) if (anv_batch_has_error(&cmd_buffer->batch))
return; return;
anv_cmd_buffer_push_workgroups(cmd_buffer, prog_data,
baseGroupX, baseGroupY, baseGroupZ,
groupCountX, groupCountY, groupCountZ,
ANV_NULL_ADDRESS);
anv_measure_snapshot(cmd_buffer, anv_measure_snapshot(cmd_buffer,
INTEL_SNAPSHOT_COMPUTE, INTEL_SNAPSHOT_COMPUTE,
"compute", "compute",
@ -496,20 +563,6 @@ void genX(CmdDispatchBase)(
trace_intel_begin_compute(&cmd_buffer->trace); trace_intel_begin_compute(&cmd_buffer->trace);
if (prog_data->uses_num_work_groups) {
struct anv_state state =
anv_cmd_buffer_alloc_temporary_state(cmd_buffer, 12, 4);
uint32_t *sizes = state.map;
sizes[0] = groupCountX;
sizes[1] = groupCountY;
sizes[2] = groupCountZ;
cmd_buffer->state.compute.num_workgroups =
anv_cmd_buffer_temporary_state_address(cmd_buffer, state);
/* The num_workgroups buffer goes in the binding table */
cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
}
genX(cmd_buffer_flush_compute_state)(cmd_buffer); genX(cmd_buffer_flush_compute_state)(cmd_buffer);
if (cmd_buffer->state.conditional_render_enabled) if (cmd_buffer->state.conditional_render_enabled)
@ -536,7 +589,11 @@ void genX(CmdDispatchIndirect)(
struct anv_address addr = anv_address_add(buffer->address, offset); struct anv_address addr = anv_address_add(buffer->address, offset);
UNUSED struct anv_batch *batch = &cmd_buffer->batch; UNUSED struct anv_batch *batch = &cmd_buffer->batch;
anv_cmd_buffer_push_base_group_id(cmd_buffer, 0, 0, 0); if (anv_batch_has_error(&cmd_buffer->batch))
return;
anv_cmd_buffer_push_workgroups(cmd_buffer, prog_data,
0, 0, 0, 0, 0, 0, addr);
anv_measure_snapshot(cmd_buffer, anv_measure_snapshot(cmd_buffer,
INTEL_SNAPSHOT_COMPUTE, INTEL_SNAPSHOT_COMPUTE,
@ -544,13 +601,6 @@ void genX(CmdDispatchIndirect)(
0); 0);
trace_intel_begin_compute_indirect(&cmd_buffer->trace); trace_intel_begin_compute_indirect(&cmd_buffer->trace);
if (prog_data->uses_num_work_groups) {
cmd_buffer->state.compute.num_workgroups = addr;
/* The num_workgroups buffer goes in the binding table */
cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
}
genX(cmd_buffer_flush_compute_state)(cmd_buffer); genX(cmd_buffer_flush_compute_state)(cmd_buffer);
if (cmd_buffer->state.conditional_render_enabled) if (cmd_buffer->state.conditional_render_enabled)