anv/brw/iris: get rid of param array on prog_data

Drivers can do all the lowering to push constants to find the only
value useful in that array (subgroup_id). Then drivers call into
brw_cs_fill_push_const_info() to get the cross/per thread constant
layout computed in the prog_data.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38975>
This commit is contained in:
Lionel Landwerlin 2025-12-17 11:37:22 +02:00 committed by Marge Bot
parent 05c3d427ba
commit f4a0e05970
17 changed files with 145 additions and 174 deletions

View file

@ -128,7 +128,6 @@ iris_disk_cache_store(struct disk_cache *cache,
union brw_any_prog_data serializable;
assert(prog_data_s <= sizeof(serializable));
memcpy(&serializable, shader->brw_prog_data, prog_data_s);
serializable.base.param = NULL;
serializable.base.relocs = NULL;
blob_write_bytes(&blob, &serializable, prog_data_s);
} else {
@ -152,8 +151,6 @@ iris_disk_cache_store(struct disk_cache *cache,
if (brw) {
blob_write_bytes(&blob, brw->relocs,
brw->num_relocs * sizeof(struct intel_shader_reloc));
blob_write_bytes(&blob, brw->param,
brw->nr_params * sizeof(uint32_t));
} else {
#ifdef INTEL_USE_ELK
blob_write_bytes(&blob, elk->relocs,
@ -265,12 +262,6 @@ iris_disk_cache_retrieve(struct iris_screen *screen,
brw->num_relocs * sizeof(struct intel_shader_reloc));
brw->relocs = relocs;
}
brw->param = NULL;
if (brw->nr_params) {
brw->param = ralloc_array(NULL, uint32_t, brw->nr_params);
blob_copy_bytes(&blob, brw->param, brw->nr_params * sizeof(uint32_t));
}
} else {
#ifdef INTEL_USE_ELK
elk->relocs = NULL;

View file

@ -165,9 +165,8 @@ iris_apply_brw_cs_prog_data(struct iris_compiled_shader *shader,
iris->uses_sampler = brw->uses_sampler;
iris->prog_mask = brw->prog_mask;
iris->first_param_is_builtin_subgroup_id =
brw->base.nr_params > 0 &&
brw->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID;
/* The pushed constants only contain the subgroup_id */
iris->first_param_is_builtin_subgroup_id = brw->base.nr_params > 0;
}
static void
@ -294,7 +293,6 @@ iris_apply_brw_prog_data(struct iris_compiled_shader *shader,
ralloc_steal(shader, shader->brw_prog_data);
ralloc_steal(shader->brw_prog_data, (void *)brw->relocs);
ralloc_steal(shader->brw_prog_data, brw->param);
}
#ifdef INTEL_USE_ELK
@ -1213,13 +1211,6 @@ iris_setup_uniforms(ASSERTED const struct intel_device_info *devinfo,
assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
nir_validate_shader(nir, "after remap");
/* We don't use params[] but gallium leaves num_uniforms set. We use this
* to detect when cbuf0 exists but we don't need it anymore when we get
* here. Instead, zero it out so that the back-end doesn't get confused
* when nr_params * 4 != num_uniforms != nr_params * 4.
*/
nir->num_uniforms = 0;
*out_system_values = system_values;
*out_num_system_values = num_system_values;
*out_num_cbufs = num_cbufs;
@ -3111,6 +3102,15 @@ iris_compile_cs(struct iris_screen *screen,
struct brw_cs_prog_data *brw_prog_data =
rzalloc(mem_ctx, struct brw_cs_prog_data);
bool subgroup_id_lowered = false;
NIR_PASS(subgroup_id_lowered, nir, brw_nir_lower_cs_subgroup_id, devinfo, 0);
if (subgroup_id_lowered) {
brw_prog_data->base.nr_params = 1;
brw_cs_fill_push_const_info(devinfo, brw_prog_data, 0);
} else {
brw_cs_fill_push_const_info(devinfo, brw_prog_data, -1);
}
struct brw_compile_cs_params params = {
.base = {
.mem_ctx = mem_ctx,

View file

@ -9410,21 +9410,34 @@ iris_upload_gpgpu_walker(struct iris_context *ice,
if ((stage_dirty & IRIS_STAGE_DIRTY_CS) ||
(GFX_VER == 12 && !batch->contains_draw) ||
cs_data->local_size[0] == 0 /* Variable local group size */) {
uint32_t curbe_data_offset = 0;
assert(cs_data->push.cross_thread.dwords == 0 &&
cs_data->push.per_thread.dwords == 1 &&
cs_data->first_param_is_builtin_subgroup_id);
const unsigned push_const_size =
iris_cs_push_const_total_size(shader, dispatch.threads);
uint32_t *curbe_data_map =
stream_state(batch, ice->state.dynamic_uploader,
&ice->state.last_res.cs_thread_ids,
align(push_const_size, 64), 64,
&curbe_data_offset);
assert(curbe_data_map);
memset(curbe_data_map, 0x5a, align(push_const_size, 64));
iris_fill_cs_push_const_buffer(screen, shader, dispatch.threads,
curbe_data_map);
uint32_t curbe_data_offset, push_const_size;
uint32_t *curbe_data_map;
if (cs_data->push.cross_thread.dwords == 0 &&
cs_data->push.per_thread.dwords == 0) {
push_const_size = 64;
curbe_data_map =
stream_state(batch, ice->state.dynamic_uploader,
&ice->state.last_res.cs_thread_ids,
align(push_const_size, 64), 64,
&curbe_data_offset);
assert(curbe_data_map);
memset(curbe_data_map, 0x5a, align(push_const_size, 64));
} else {
assert(cs_data->push.cross_thread.dwords == 0 &&
cs_data->push.per_thread.dwords == 1 &&
cs_data->first_param_is_builtin_subgroup_id);
push_const_size =
iris_cs_push_const_total_size(shader, dispatch.threads);
curbe_data_map =
stream_state(batch, ice->state.dynamic_uploader,
&ice->state.last_res.cs_thread_ids,
align(push_const_size, 64), 64,
&curbe_data_offset);
assert(curbe_data_map);
memset(curbe_data_map, 0x5a, align(push_const_size, 64));
iris_fill_cs_push_const_buffer(screen, shader, dispatch.threads,
curbe_data_map);
}
iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
curbe.CURBETotalDataLength = align(push_const_size, 64);

View file

@ -28,7 +28,6 @@ blorp_compile_fs_brw(struct blorp_context *blorp, void *mem_ctx,
struct brw_wm_prog_data *wm_prog_data = rzalloc(mem_ctx, struct brw_wm_prog_data);
wm_prog_data->base.nr_params = 0;
wm_prog_data->base.param = NULL;
struct brw_nir_compiler_opts opts = {
.softfp64 = blorp->get_fp64_nir ? blorp->get_fp64_nir(blorp) : NULL,
@ -147,10 +146,12 @@ blorp_compile_cs_brw(struct blorp_context *blorp, void *mem_ctx,
struct brw_cs_prog_data *cs_prog_data = rzalloc(mem_ctx, struct brw_cs_prog_data);
cs_prog_data->base.nr_params = nr_params;
cs_prog_data->base.param = rzalloc_array(NULL, uint32_t, nr_params);
brw_cs_fill_push_const_info(compiler->devinfo, cs_prog_data, nr_params);
NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics, compiler->devinfo,
cs_prog_data);
NIR_PASS(_, nir, brw_nir_lower_cs_subgroup_id, compiler->devinfo,
offsetof(struct blorp_wm_inputs, subgroup_id));
NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower_base_workgroup_id,
nir_metadata_control_flow, NULL);
@ -170,9 +171,6 @@ blorp_compile_cs_brw(struct blorp_context *blorp, void *mem_ctx,
const unsigned *kernel = brw_compile_cs(compiler, &params);
ralloc_free(cs_prog_data->base.param);
cs_prog_data->base.param = NULL;
return (struct blorp_program) {
.kernel = kernel,
.kernel_size = cs_prog_data->base.program_size,

View file

@ -25,19 +25,15 @@ fill_push_const_block_info(struct brw_push_const_block *block, unsigned dwords)
block->size = block->regs * 32;
}
static void
cs_fill_push_const_info(const struct intel_device_info *devinfo,
struct brw_cs_prog_data *cs_prog_data)
extern "C" void
brw_cs_fill_push_const_info(const struct intel_device_info *devinfo,
struct brw_cs_prog_data *cs_prog_data,
int subgroup_id_index)
{
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
int subgroup_id_index = brw_get_subgroup_id_param_index(devinfo, prog_data);
/* The thread ID should be stored in the last param dword */
assert(subgroup_id_index == -1 ||
subgroup_id_index == (int)prog_data->nr_params - 1);
unsigned cross_thread_dwords, per_thread_dwords;
if (subgroup_id_index >= 0) {
if (devinfo->verx10 < 125 && subgroup_id_index >= 0) {
/* Fill all but the last register with cross-thread payload */
cross_thread_dwords = 8 * (subgroup_id_index / 8);
per_thread_dwords = prog_data->nr_params - cross_thread_dwords;
@ -120,41 +116,6 @@ brw_nir_uses_sampler(nir_shader *shader)
NULL);
}
static inline uint32_t *
brw_stage_prog_data_add_params(struct brw_stage_prog_data *prog_data,
unsigned nr_new_params)
{
unsigned old_nr_params = prog_data->nr_params;
prog_data->nr_params += nr_new_params;
prog_data->param = reralloc(ralloc_parent(prog_data->param),
prog_data->param, uint32_t,
prog_data->nr_params);
return prog_data->param + old_nr_params;
}
static void
brw_adjust_uniforms(brw_shader &s)
{
if (s.devinfo->verx10 >= 125)
return;
assert(mesa_shader_stage_is_compute(s.stage));
if (brw_get_subgroup_id_param_index(s.devinfo, s.prog_data) == -1) {
/* Add uniforms for builtins after regular NIR uniforms. */
assert(s.uniforms == s.prog_data->nr_params);
/* Subgroup ID must be the last uniform on the list. This will make
* easier later to split between cross thread and per thread
* uniforms.
*/
uint32_t *param = brw_stage_prog_data_add_params(s.prog_data, 1);
*param = BRW_PARAM_BUILTIN_SUBGROUP_ID;
}
s.uniforms = s.prog_data->nr_params;
}
const unsigned *
brw_compile_cs(const struct brw_compiler *compiler,
struct brw_compile_cs_params *params)
@ -233,7 +194,6 @@ brw_compile_cs(const struct brw_compiler *compiler,
.archiver = params->base.archiver,
};
v[simd] = std::make_unique<brw_shader>(&shader_params);
brw_adjust_uniforms(*v[simd]);
const bool allow_spilling = simd == 0 ||
(!simd_state.compiled[simd - 1] && !brw_simd_should_compile(simd_state, simd - 1)) ||
@ -245,8 +205,6 @@ brw_compile_cs(const struct brw_compiler *compiler,
}
if (run_cs(*v[simd], allow_spilling)) {
cs_fill_push_const_info(compiler->devinfo, prog_data);
brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers);
if (devinfo->ver >= 30 && !v[simd]->spilled_any_registers &&

View file

@ -543,7 +543,6 @@ enum brw_param_builtin {
BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_X,
BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Y,
BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Z,
BRW_PARAM_BUILTIN_SUBGROUP_ID,
BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X,
BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_Y,
BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_Z,
@ -616,14 +615,6 @@ struct brw_stage_prog_data {
uint32_t source_hash;
/* 32-bit identifiers for all push/pull parameters. These can be anything
* the driver wishes them to be; the core of the back-end compiler simply
* re-arranges them. The one restriction is that the bottom 2^16 values
* are reserved for builtins defined in the brw_param_builtin enum defined
* above.
*/
uint32_t *param;
/* Whether shader uses atomic operations. */
bool uses_atomic_load_store;
};
@ -1672,6 +1663,11 @@ unsigned
brw_cs_push_const_total_size(const struct brw_cs_prog_data *cs_prog_data,
unsigned threads);
void
brw_cs_fill_push_const_info(const struct intel_device_info *devinfo,
struct brw_cs_prog_data *cs_prog_data,
int subgroup_id_index);
void
brw_write_shader_relocs(const struct brw_isa_info *isa,
void *program,

View file

@ -179,6 +179,10 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
bool brw_nir_lower_cs_intrinsics(nir_shader *nir,
const struct intel_device_info *devinfo,
struct brw_cs_prog_data *prog_data);
bool brw_nir_lower_cs_subgroup_id(nir_shader *nir,
const struct intel_device_info *devinfo,
unsigned subgroup_id_offset);
bool brw_nir_lower_alpha_to_coverage(nir_shader *shader);
bool brw_needs_vertex_attributes_bypass(const nir_shader *shader);
void brw_nir_lower_fs_barycentrics(nir_shader *shader);

View file

@ -387,3 +387,36 @@ brw_nir_lower_cs_intrinsics(nir_shader *nir,
return state.progress;
}
static bool
lower_cs_subgroup_id_instr(nir_builder *b,
nir_intrinsic_instr *intrin,
void *data)
{
if (intrin->intrinsic != nir_intrinsic_load_subgroup_id)
return false;
const unsigned *subgroup_id_offset_ptr = data;
b->cursor = nir_before_instr(&intrin->instr);
nir_def_replace(&intrin->def,
nir_load_uniform(
b, 1, 32, nir_imm_int(b, 0),
.base = *subgroup_id_offset_ptr,
.range = 4));
return true;
}
bool
brw_nir_lower_cs_subgroup_id(nir_shader *nir,
const struct intel_device_info *devinfo,
unsigned subgroup_id_offset)
{
if (devinfo->verx10 >= 125)
return false;
return nir_shader_intrinsics_pass(nir, lower_cs_subgroup_id_instr,
nir_metadata_control_flow,
&subgroup_id_offset);
}

View file

@ -969,24 +969,6 @@ brw_shader::convert_attr_sources_to_hw_regs(brw_inst *inst)
}
}
int
brw_get_subgroup_id_param_index(const intel_device_info *devinfo,
const brw_stage_prog_data *prog_data)
{
if (prog_data->nr_params == 0)
return -1;
if (devinfo->verx10 >= 125)
return -1;
/* The local thread id is always the last parameter in the list */
uint32_t last_param = prog_data->param[prog_data->nr_params - 1];
if (last_param == BRW_PARAM_BUILTIN_SUBGROUP_ID)
return prog_data->nr_params - 1;
return -1;
}
uint32_t
brw_fb_write_msg_control(const brw_inst *inst,
const struct brw_wm_prog_data *prog_data)

View file

@ -302,9 +302,6 @@ uint32_t brw_fb_write_msg_control(const brw_inst *inst,
void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);
int brw_get_subgroup_id_param_index(const intel_device_info *devinfo,
const brw_stage_prog_data *prog_data);
void brw_from_nir(brw_shader *s);
void brw_shader_phase_update(brw_shader &s, enum brw_shader_phase phase);

View file

@ -380,19 +380,9 @@ void
brw_cs_thread_payload::load_subgroup_id(const brw_builder &bld,
brw_reg &dest) const
{
auto devinfo = bld.shader->devinfo;
assert(bld.shader->devinfo->verx10 >= 125);
dest = retype(dest, BRW_TYPE_UD);
if (subgroup_id_.file != BAD_FILE) {
assert(devinfo->verx10 >= 125);
bld.AND(dest, subgroup_id_, brw_imm_ud(INTEL_MASK(7, 0)));
} else {
assert(devinfo->verx10 < 125);
assert(mesa_shader_stage_is_compute(bld.shader->stage));
int index = brw_get_subgroup_id_param_index(devinfo,
bld.shader->prog_data);
bld.MOV(dest, brw_uniform_reg(index, BRW_TYPE_UD));
}
bld.AND(dest, subgroup_id_, brw_imm_ud(INTEL_MASK(7, 0)));
}
brw_task_mesh_thread_payload::brw_task_mesh_thread_payload(brw_shader &v)

View file

@ -157,7 +157,6 @@ compile_shader(struct anv_device *device,
void *temp_ctx = ralloc_context(NULL);
prog_data.base.nr_params = nir->num_uniforms / 4;
prog_data.base.param = rzalloc_array(temp_ctx, uint32_t, prog_data.base.nr_params);
brw_nir_analyze_ubo_ranges(compiler, nir, prog_data.base.ubo_ranges);
@ -191,6 +190,8 @@ compile_shader(struct anv_device *device,
}
}
} else {
brw_cs_fill_push_const_info(device->info, &prog_data.cs, -1);
struct genisa_stats stats;
struct brw_compile_cs_params params = {
.base = {

View file

@ -143,18 +143,6 @@ anv_nir_compute_push_layout(nir_shader *nir,
push_end = MAX2(push_end, tess_config_end);
}
if (nir->info.stage == MESA_SHADER_COMPUTE && devinfo->verx10 < 125) {
/* For compute shaders, we always have to have the subgroup ID. The
* back-end compiler will "helpfully" add it for us in the last push
* constant slot. Yes, there is an off-by-one error here but that's
* because the back-end will add it so we want to claim the number of
* push constants one dword less than the full amount including
* gl_SubgroupId.
*/
assert(push_end <= anv_drv_const_offset(cs.subgroup_id));
push_end = anv_drv_const_offset(cs.subgroup_id);
}
/* Align push_start down to a 32B (for 3DSTATE_CONSTANT) and make it no
* larger than push_end (no push constants is indicated by push_start =
* UINT_MAX).
@ -188,7 +176,18 @@ anv_nir_compute_push_layout(nir_shader *nir,
const unsigned alignment = 4;
nir->num_uniforms = align(push_end - push_start, alignment);
prog_data->nr_params = nir->num_uniforms / 4;
prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
/* Fill the compute push constant layout (cross/per thread constants) for
* platforms pre Gfx12.5.
*/
if (nir->info.stage == MESA_SHADER_COMPUTE) {
const int subgroup_id_index =
push_end == (anv_drv_const_offset(cs.subgroup_id) +
anv_drv_const_size(cs.subgroup_id)) ?
(anv_drv_const_offset(cs.subgroup_id) - push_start) / 4 : -1;
struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data);
brw_cs_fill_push_const_info(devinfo, cs_prog_data, subgroup_id_index);
}
const struct anv_push_range push_constant_range = {
.set = ANV_DESCRIPTOR_SET_PUSH_CONSTANTS,

View file

@ -53,6 +53,20 @@ lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin)
return true;
}
static bool
lower_subgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
const struct anv_physical_device *pdevice)
{
if (pdevice->info.verx10 >= 125)
return false;
b->cursor = nir_before_instr(&intrin->instr);
nir_def_replace(&intrin->def,
anv_load_driver_uniform(b, 1, cs.subgroup_id));
return true;
}
static bool
lower_ray_query_globals(nir_builder *b, nir_intrinsic_instr *intrin)
{
@ -72,6 +86,8 @@ lower_driver_values(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
return lower_load_constant(b, intrin);
case nir_intrinsic_load_base_workgroup_id:
return lower_base_workgroup_id(b, intrin);
case nir_intrinsic_load_subgroup_id:
return lower_subgroup_id(b, intrin, data);
case nir_intrinsic_load_ray_query_global_intel:
return lower_ray_query_globals(b, intrin);
default:

View file

@ -94,7 +94,6 @@ anv_shader_internal_create(struct anv_device *device,
prog_data_size);
VK_MULTIALLOC_DECL(&ma, struct intel_shader_reloc, prog_data_relocs,
prog_data_in->num_relocs);
VK_MULTIALLOC_DECL(&ma, uint32_t, prog_data_param, prog_data_in->nr_params);
VK_MULTIALLOC_DECL(&ma, void, code, kernel_size);
VK_MULTIALLOC_DECL_SIZE(&ma, nir_xfb_info, xfb_info,
@ -151,7 +150,6 @@ anv_shader_internal_create(struct anv_device *device,
typed_memcpy(prog_data_relocs, prog_data_in->relocs,
prog_data_in->num_relocs);
prog_data->relocs = prog_data_relocs;
prog_data->param = prog_data_param;
shader->prog_data = prog_data;
shader->prog_data_size = prog_data_size;
@ -210,7 +208,6 @@ anv_shader_internal_serialize(struct vk_pipeline_cache_object *object,
assert(shader->prog_data_size <= sizeof(prog_data));
memcpy(&prog_data, shader->prog_data, shader->prog_data_size);
prog_data.base.relocs = NULL;
prog_data.base.param = NULL;
blob_write_bytes(blob, &prog_data, shader->prog_data_size);
blob_write_bytes(blob, shader->prog_data->relocs,

View file

@ -111,7 +111,6 @@ anv_shader_serialize(struct vk_device *device,
union brw_any_prog_data prog_data;
memcpy(&prog_data, shader->prog_data, brw_prog_data_size(vk_shader->stage));
prog_data.base.relocs = NULL;
prog_data.base.param = NULL;
blob_write_bytes(blob, &prog_data, brw_prog_data_size(vk_shader->stage));
@ -584,9 +583,6 @@ anv_shader_create(struct anv_device *device,
const uint32_t cmd_data_dwords = anv_genX(device->info, shader_cmd_size)(
device, stage);
/* We never need this at runtime */
shader_data->prog_data.base.param = NULL;
VK_MULTIALLOC(ma);
VK_MULTIALLOC_DECL(&ma, struct anv_shader, shader, 1);
VK_MULTIALLOC_DECL(&ma, uint32_t, cmd_data, cmd_data_dwords);

View file

@ -1473,8 +1473,6 @@ anv_shader_lower_nir(struct anv_device *device,
dynamic_descriptors_offsets,
&shader_data->bind_map, &shader_data->push_map, mem_ctx);
NIR_PASS(_, nir, anv_nir_lower_driver_values, pdevice);
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
anv_nir_ubo_addr_format(pdevice, shader_data->key.base.robust_flags));
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
@ -1545,28 +1543,6 @@ anv_shader_lower_nir(struct anv_device *device,
NIR_PASS(_, nir, nir_opt_dce);
}
NIR_PASS(_, nir, anv_nir_update_resource_intel_block);
NIR_PASS(_, nir, anv_nir_compute_push_layout,
pdevice, shader_data->key.base.robust_flags,
&(struct anv_nir_push_layout_info) {
.separate_tessellation = (nir->info.stage == MESA_SHADER_TESS_CTRL &&
shader_data->key.tcs.separate_tess_vue_layout) ||
(nir->info.stage == MESA_SHADER_TESS_EVAL &&
shader_data->key.tes.separate_tess_vue_layout),
.fragment_dynamic = nir->info.stage == MESA_SHADER_FRAGMENT &&
brw_wm_prog_key_is_dynamic(&shader_data->key.wm),
.mesh_dynamic = nir->info.stage == MESA_SHADER_FRAGMENT &&
shader_data->key.wm.mesh_input == INTEL_SOMETIMES,
},
&shader_data->key.base,
&shader_data->prog_data.base,
&shader_data->bind_map, &shader_data->push_map,
mem_ctx);
NIR_PASS(_, nir, anv_nir_lower_resource_intel, pdevice,
shader_data->bind_map.layout_type);
if (mesa_shader_stage_uses_workgroup(nir->info.stage)) {
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
nir_var_mem_shared, shared_type_info);
@ -1597,6 +1573,30 @@ anv_shader_lower_nir(struct anv_device *device,
&shader_data->prog_data.cs);
}
NIR_PASS(_, nir, anv_nir_lower_driver_values, pdevice);
NIR_PASS(_, nir, anv_nir_update_resource_intel_block);
NIR_PASS(_, nir, anv_nir_compute_push_layout,
pdevice, shader_data->key.base.robust_flags,
&(struct anv_nir_push_layout_info) {
.separate_tessellation = (nir->info.stage == MESA_SHADER_TESS_CTRL &&
shader_data->key.tcs.separate_tess_vue_layout) ||
(nir->info.stage == MESA_SHADER_TESS_EVAL &&
shader_data->key.tes.separate_tess_vue_layout),
.fragment_dynamic = nir->info.stage == MESA_SHADER_FRAGMENT &&
brw_wm_prog_key_is_dynamic(&shader_data->key.wm),
.mesh_dynamic = nir->info.stage == MESA_SHADER_FRAGMENT &&
shader_data->key.wm.mesh_input == INTEL_SOMETIMES,
},
&shader_data->key.base,
&shader_data->prog_data.base,
&shader_data->bind_map, &shader_data->push_map,
mem_ctx);
NIR_PASS(_, nir, anv_nir_lower_resource_intel, pdevice,
shader_data->bind_map.layout_type);
shader_data->push_desc_info.push_set_buffer =
anv_nir_loads_push_desc_buffer(
nir, set_layouts, set_layout_count, &shader_data->bind_map);