mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-27 14:10:25 +01:00
anv/brw/iris: get rid of param array on prog_data
Drivers can do all the lowering to push constants to find the only value useful in that array (subgroup_id). Then drivers call into brw_cs_fill_push_const_info() to get the cross/per thread constant layout computed in the prog_data. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38975>
This commit is contained in:
parent
05c3d427ba
commit
f4a0e05970
17 changed files with 145 additions and 174 deletions
|
|
@ -128,7 +128,6 @@ iris_disk_cache_store(struct disk_cache *cache,
|
|||
union brw_any_prog_data serializable;
|
||||
assert(prog_data_s <= sizeof(serializable));
|
||||
memcpy(&serializable, shader->brw_prog_data, prog_data_s);
|
||||
serializable.base.param = NULL;
|
||||
serializable.base.relocs = NULL;
|
||||
blob_write_bytes(&blob, &serializable, prog_data_s);
|
||||
} else {
|
||||
|
|
@ -152,8 +151,6 @@ iris_disk_cache_store(struct disk_cache *cache,
|
|||
if (brw) {
|
||||
blob_write_bytes(&blob, brw->relocs,
|
||||
brw->num_relocs * sizeof(struct intel_shader_reloc));
|
||||
blob_write_bytes(&blob, brw->param,
|
||||
brw->nr_params * sizeof(uint32_t));
|
||||
} else {
|
||||
#ifdef INTEL_USE_ELK
|
||||
blob_write_bytes(&blob, elk->relocs,
|
||||
|
|
@ -265,12 +262,6 @@ iris_disk_cache_retrieve(struct iris_screen *screen,
|
|||
brw->num_relocs * sizeof(struct intel_shader_reloc));
|
||||
brw->relocs = relocs;
|
||||
}
|
||||
|
||||
brw->param = NULL;
|
||||
if (brw->nr_params) {
|
||||
brw->param = ralloc_array(NULL, uint32_t, brw->nr_params);
|
||||
blob_copy_bytes(&blob, brw->param, brw->nr_params * sizeof(uint32_t));
|
||||
}
|
||||
} else {
|
||||
#ifdef INTEL_USE_ELK
|
||||
elk->relocs = NULL;
|
||||
|
|
|
|||
|
|
@ -165,9 +165,8 @@ iris_apply_brw_cs_prog_data(struct iris_compiled_shader *shader,
|
|||
iris->uses_sampler = brw->uses_sampler;
|
||||
iris->prog_mask = brw->prog_mask;
|
||||
|
||||
iris->first_param_is_builtin_subgroup_id =
|
||||
brw->base.nr_params > 0 &&
|
||||
brw->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID;
|
||||
/* The pushed constants only contain the subgroup_id */
|
||||
iris->first_param_is_builtin_subgroup_id = brw->base.nr_params > 0;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -294,7 +293,6 @@ iris_apply_brw_prog_data(struct iris_compiled_shader *shader,
|
|||
|
||||
ralloc_steal(shader, shader->brw_prog_data);
|
||||
ralloc_steal(shader->brw_prog_data, (void *)brw->relocs);
|
||||
ralloc_steal(shader->brw_prog_data, brw->param);
|
||||
}
|
||||
|
||||
#ifdef INTEL_USE_ELK
|
||||
|
|
@ -1213,13 +1211,6 @@ iris_setup_uniforms(ASSERTED const struct intel_device_info *devinfo,
|
|||
assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
|
||||
nir_validate_shader(nir, "after remap");
|
||||
|
||||
/* We don't use params[] but gallium leaves num_uniforms set. We use this
|
||||
* to detect when cbuf0 exists but we don't need it anymore when we get
|
||||
* here. Instead, zero it out so that the back-end doesn't get confused
|
||||
* when nr_params * 4 != num_uniforms != nr_params * 4.
|
||||
*/
|
||||
nir->num_uniforms = 0;
|
||||
|
||||
*out_system_values = system_values;
|
||||
*out_num_system_values = num_system_values;
|
||||
*out_num_cbufs = num_cbufs;
|
||||
|
|
@ -3111,6 +3102,15 @@ iris_compile_cs(struct iris_screen *screen,
|
|||
struct brw_cs_prog_data *brw_prog_data =
|
||||
rzalloc(mem_ctx, struct brw_cs_prog_data);
|
||||
|
||||
bool subgroup_id_lowered = false;
|
||||
NIR_PASS(subgroup_id_lowered, nir, brw_nir_lower_cs_subgroup_id, devinfo, 0);
|
||||
if (subgroup_id_lowered) {
|
||||
brw_prog_data->base.nr_params = 1;
|
||||
brw_cs_fill_push_const_info(devinfo, brw_prog_data, 0);
|
||||
} else {
|
||||
brw_cs_fill_push_const_info(devinfo, brw_prog_data, -1);
|
||||
}
|
||||
|
||||
struct brw_compile_cs_params params = {
|
||||
.base = {
|
||||
.mem_ctx = mem_ctx,
|
||||
|
|
|
|||
|
|
@ -9410,21 +9410,34 @@ iris_upload_gpgpu_walker(struct iris_context *ice,
|
|||
if ((stage_dirty & IRIS_STAGE_DIRTY_CS) ||
|
||||
(GFX_VER == 12 && !batch->contains_draw) ||
|
||||
cs_data->local_size[0] == 0 /* Variable local group size */) {
|
||||
uint32_t curbe_data_offset = 0;
|
||||
assert(cs_data->push.cross_thread.dwords == 0 &&
|
||||
cs_data->push.per_thread.dwords == 1 &&
|
||||
cs_data->first_param_is_builtin_subgroup_id);
|
||||
const unsigned push_const_size =
|
||||
iris_cs_push_const_total_size(shader, dispatch.threads);
|
||||
uint32_t *curbe_data_map =
|
||||
stream_state(batch, ice->state.dynamic_uploader,
|
||||
&ice->state.last_res.cs_thread_ids,
|
||||
align(push_const_size, 64), 64,
|
||||
&curbe_data_offset);
|
||||
assert(curbe_data_map);
|
||||
memset(curbe_data_map, 0x5a, align(push_const_size, 64));
|
||||
iris_fill_cs_push_const_buffer(screen, shader, dispatch.threads,
|
||||
curbe_data_map);
|
||||
uint32_t curbe_data_offset, push_const_size;
|
||||
uint32_t *curbe_data_map;
|
||||
if (cs_data->push.cross_thread.dwords == 0 &&
|
||||
cs_data->push.per_thread.dwords == 0) {
|
||||
push_const_size = 64;
|
||||
curbe_data_map =
|
||||
stream_state(batch, ice->state.dynamic_uploader,
|
||||
&ice->state.last_res.cs_thread_ids,
|
||||
align(push_const_size, 64), 64,
|
||||
&curbe_data_offset);
|
||||
assert(curbe_data_map);
|
||||
memset(curbe_data_map, 0x5a, align(push_const_size, 64));
|
||||
} else {
|
||||
assert(cs_data->push.cross_thread.dwords == 0 &&
|
||||
cs_data->push.per_thread.dwords == 1 &&
|
||||
cs_data->first_param_is_builtin_subgroup_id);
|
||||
push_const_size =
|
||||
iris_cs_push_const_total_size(shader, dispatch.threads);
|
||||
curbe_data_map =
|
||||
stream_state(batch, ice->state.dynamic_uploader,
|
||||
&ice->state.last_res.cs_thread_ids,
|
||||
align(push_const_size, 64), 64,
|
||||
&curbe_data_offset);
|
||||
assert(curbe_data_map);
|
||||
memset(curbe_data_map, 0x5a, align(push_const_size, 64));
|
||||
iris_fill_cs_push_const_buffer(screen, shader, dispatch.threads,
|
||||
curbe_data_map);
|
||||
}
|
||||
|
||||
iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
|
||||
curbe.CURBETotalDataLength = align(push_const_size, 64);
|
||||
|
|
|
|||
|
|
@ -28,7 +28,6 @@ blorp_compile_fs_brw(struct blorp_context *blorp, void *mem_ctx,
|
|||
|
||||
struct brw_wm_prog_data *wm_prog_data = rzalloc(mem_ctx, struct brw_wm_prog_data);
|
||||
wm_prog_data->base.nr_params = 0;
|
||||
wm_prog_data->base.param = NULL;
|
||||
|
||||
struct brw_nir_compiler_opts opts = {
|
||||
.softfp64 = blorp->get_fp64_nir ? blorp->get_fp64_nir(blorp) : NULL,
|
||||
|
|
@ -147,10 +146,12 @@ blorp_compile_cs_brw(struct blorp_context *blorp, void *mem_ctx,
|
|||
|
||||
struct brw_cs_prog_data *cs_prog_data = rzalloc(mem_ctx, struct brw_cs_prog_data);
|
||||
cs_prog_data->base.nr_params = nr_params;
|
||||
cs_prog_data->base.param = rzalloc_array(NULL, uint32_t, nr_params);
|
||||
brw_cs_fill_push_const_info(compiler->devinfo, cs_prog_data, nr_params);
|
||||
|
||||
NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics, compiler->devinfo,
|
||||
cs_prog_data);
|
||||
NIR_PASS(_, nir, brw_nir_lower_cs_subgroup_id, compiler->devinfo,
|
||||
offsetof(struct blorp_wm_inputs, subgroup_id));
|
||||
NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower_base_workgroup_id,
|
||||
nir_metadata_control_flow, NULL);
|
||||
|
||||
|
|
@ -170,9 +171,6 @@ blorp_compile_cs_brw(struct blorp_context *blorp, void *mem_ctx,
|
|||
|
||||
const unsigned *kernel = brw_compile_cs(compiler, ¶ms);
|
||||
|
||||
ralloc_free(cs_prog_data->base.param);
|
||||
cs_prog_data->base.param = NULL;
|
||||
|
||||
return (struct blorp_program) {
|
||||
.kernel = kernel,
|
||||
.kernel_size = cs_prog_data->base.program_size,
|
||||
|
|
|
|||
|
|
@ -25,19 +25,15 @@ fill_push_const_block_info(struct brw_push_const_block *block, unsigned dwords)
|
|||
block->size = block->regs * 32;
|
||||
}
|
||||
|
||||
static void
|
||||
cs_fill_push_const_info(const struct intel_device_info *devinfo,
|
||||
struct brw_cs_prog_data *cs_prog_data)
|
||||
extern "C" void
|
||||
brw_cs_fill_push_const_info(const struct intel_device_info *devinfo,
|
||||
struct brw_cs_prog_data *cs_prog_data,
|
||||
int subgroup_id_index)
|
||||
{
|
||||
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
||||
int subgroup_id_index = brw_get_subgroup_id_param_index(devinfo, prog_data);
|
||||
|
||||
/* The thread ID should be stored in the last param dword */
|
||||
assert(subgroup_id_index == -1 ||
|
||||
subgroup_id_index == (int)prog_data->nr_params - 1);
|
||||
|
||||
unsigned cross_thread_dwords, per_thread_dwords;
|
||||
if (subgroup_id_index >= 0) {
|
||||
if (devinfo->verx10 < 125 && subgroup_id_index >= 0) {
|
||||
/* Fill all but the last register with cross-thread payload */
|
||||
cross_thread_dwords = 8 * (subgroup_id_index / 8);
|
||||
per_thread_dwords = prog_data->nr_params - cross_thread_dwords;
|
||||
|
|
@ -120,41 +116,6 @@ brw_nir_uses_sampler(nir_shader *shader)
|
|||
NULL);
|
||||
}
|
||||
|
||||
static inline uint32_t *
|
||||
brw_stage_prog_data_add_params(struct brw_stage_prog_data *prog_data,
|
||||
unsigned nr_new_params)
|
||||
{
|
||||
unsigned old_nr_params = prog_data->nr_params;
|
||||
prog_data->nr_params += nr_new_params;
|
||||
prog_data->param = reralloc(ralloc_parent(prog_data->param),
|
||||
prog_data->param, uint32_t,
|
||||
prog_data->nr_params);
|
||||
return prog_data->param + old_nr_params;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_adjust_uniforms(brw_shader &s)
|
||||
{
|
||||
if (s.devinfo->verx10 >= 125)
|
||||
return;
|
||||
|
||||
assert(mesa_shader_stage_is_compute(s.stage));
|
||||
|
||||
if (brw_get_subgroup_id_param_index(s.devinfo, s.prog_data) == -1) {
|
||||
/* Add uniforms for builtins after regular NIR uniforms. */
|
||||
assert(s.uniforms == s.prog_data->nr_params);
|
||||
|
||||
/* Subgroup ID must be the last uniform on the list. This will make
|
||||
* easier later to split between cross thread and per thread
|
||||
* uniforms.
|
||||
*/
|
||||
uint32_t *param = brw_stage_prog_data_add_params(s.prog_data, 1);
|
||||
*param = BRW_PARAM_BUILTIN_SUBGROUP_ID;
|
||||
}
|
||||
|
||||
s.uniforms = s.prog_data->nr_params;
|
||||
}
|
||||
|
||||
const unsigned *
|
||||
brw_compile_cs(const struct brw_compiler *compiler,
|
||||
struct brw_compile_cs_params *params)
|
||||
|
|
@ -233,7 +194,6 @@ brw_compile_cs(const struct brw_compiler *compiler,
|
|||
.archiver = params->base.archiver,
|
||||
};
|
||||
v[simd] = std::make_unique<brw_shader>(&shader_params);
|
||||
brw_adjust_uniforms(*v[simd]);
|
||||
|
||||
const bool allow_spilling = simd == 0 ||
|
||||
(!simd_state.compiled[simd - 1] && !brw_simd_should_compile(simd_state, simd - 1)) ||
|
||||
|
|
@ -245,8 +205,6 @@ brw_compile_cs(const struct brw_compiler *compiler,
|
|||
}
|
||||
|
||||
if (run_cs(*v[simd], allow_spilling)) {
|
||||
cs_fill_push_const_info(compiler->devinfo, prog_data);
|
||||
|
||||
brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers);
|
||||
|
||||
if (devinfo->ver >= 30 && !v[simd]->spilled_any_registers &&
|
||||
|
|
|
|||
|
|
@ -543,7 +543,6 @@ enum brw_param_builtin {
|
|||
BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_X,
|
||||
BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Y,
|
||||
BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Z,
|
||||
BRW_PARAM_BUILTIN_SUBGROUP_ID,
|
||||
BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X,
|
||||
BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_Y,
|
||||
BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_Z,
|
||||
|
|
@ -616,14 +615,6 @@ struct brw_stage_prog_data {
|
|||
|
||||
uint32_t source_hash;
|
||||
|
||||
/* 32-bit identifiers for all push/pull parameters. These can be anything
|
||||
* the driver wishes them to be; the core of the back-end compiler simply
|
||||
* re-arranges them. The one restriction is that the bottom 2^16 values
|
||||
* are reserved for builtins defined in the brw_param_builtin enum defined
|
||||
* above.
|
||||
*/
|
||||
uint32_t *param;
|
||||
|
||||
/* Whether shader uses atomic operations. */
|
||||
bool uses_atomic_load_store;
|
||||
};
|
||||
|
|
@ -1672,6 +1663,11 @@ unsigned
|
|||
brw_cs_push_const_total_size(const struct brw_cs_prog_data *cs_prog_data,
|
||||
unsigned threads);
|
||||
|
||||
void
|
||||
brw_cs_fill_push_const_info(const struct intel_device_info *devinfo,
|
||||
struct brw_cs_prog_data *cs_prog_data,
|
||||
int subgroup_id_index);
|
||||
|
||||
void
|
||||
brw_write_shader_relocs(const struct brw_isa_info *isa,
|
||||
void *program,
|
||||
|
|
|
|||
|
|
@ -179,6 +179,10 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
|
|||
bool brw_nir_lower_cs_intrinsics(nir_shader *nir,
|
||||
const struct intel_device_info *devinfo,
|
||||
struct brw_cs_prog_data *prog_data);
|
||||
bool brw_nir_lower_cs_subgroup_id(nir_shader *nir,
|
||||
const struct intel_device_info *devinfo,
|
||||
unsigned subgroup_id_offset);
|
||||
|
||||
bool brw_nir_lower_alpha_to_coverage(nir_shader *shader);
|
||||
bool brw_needs_vertex_attributes_bypass(const nir_shader *shader);
|
||||
void brw_nir_lower_fs_barycentrics(nir_shader *shader);
|
||||
|
|
|
|||
|
|
@ -387,3 +387,36 @@ brw_nir_lower_cs_intrinsics(nir_shader *nir,
|
|||
|
||||
return state.progress;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_cs_subgroup_id_instr(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
void *data)
|
||||
{
|
||||
if (intrin->intrinsic != nir_intrinsic_load_subgroup_id)
|
||||
return false;
|
||||
|
||||
const unsigned *subgroup_id_offset_ptr = data;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
nir_def_replace(&intrin->def,
|
||||
nir_load_uniform(
|
||||
b, 1, 32, nir_imm_int(b, 0),
|
||||
.base = *subgroup_id_offset_ptr,
|
||||
.range = 4));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_nir_lower_cs_subgroup_id(nir_shader *nir,
|
||||
const struct intel_device_info *devinfo,
|
||||
unsigned subgroup_id_offset)
|
||||
{
|
||||
if (devinfo->verx10 >= 125)
|
||||
return false;
|
||||
|
||||
return nir_shader_intrinsics_pass(nir, lower_cs_subgroup_id_instr,
|
||||
nir_metadata_control_flow,
|
||||
&subgroup_id_offset);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -969,24 +969,6 @@ brw_shader::convert_attr_sources_to_hw_regs(brw_inst *inst)
|
|||
}
|
||||
}
|
||||
|
||||
int
|
||||
brw_get_subgroup_id_param_index(const intel_device_info *devinfo,
|
||||
const brw_stage_prog_data *prog_data)
|
||||
{
|
||||
if (prog_data->nr_params == 0)
|
||||
return -1;
|
||||
|
||||
if (devinfo->verx10 >= 125)
|
||||
return -1;
|
||||
|
||||
/* The local thread id is always the last parameter in the list */
|
||||
uint32_t last_param = prog_data->param[prog_data->nr_params - 1];
|
||||
if (last_param == BRW_PARAM_BUILTIN_SUBGROUP_ID)
|
||||
return prog_data->nr_params - 1;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
brw_fb_write_msg_control(const brw_inst *inst,
|
||||
const struct brw_wm_prog_data *prog_data)
|
||||
|
|
|
|||
|
|
@ -302,9 +302,6 @@ uint32_t brw_fb_write_msg_control(const brw_inst *inst,
|
|||
|
||||
void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);
|
||||
|
||||
int brw_get_subgroup_id_param_index(const intel_device_info *devinfo,
|
||||
const brw_stage_prog_data *prog_data);
|
||||
|
||||
void brw_from_nir(brw_shader *s);
|
||||
|
||||
void brw_shader_phase_update(brw_shader &s, enum brw_shader_phase phase);
|
||||
|
|
|
|||
|
|
@ -380,19 +380,9 @@ void
|
|||
brw_cs_thread_payload::load_subgroup_id(const brw_builder &bld,
|
||||
brw_reg &dest) const
|
||||
{
|
||||
auto devinfo = bld.shader->devinfo;
|
||||
assert(bld.shader->devinfo->verx10 >= 125);
|
||||
dest = retype(dest, BRW_TYPE_UD);
|
||||
|
||||
if (subgroup_id_.file != BAD_FILE) {
|
||||
assert(devinfo->verx10 >= 125);
|
||||
bld.AND(dest, subgroup_id_, brw_imm_ud(INTEL_MASK(7, 0)));
|
||||
} else {
|
||||
assert(devinfo->verx10 < 125);
|
||||
assert(mesa_shader_stage_is_compute(bld.shader->stage));
|
||||
int index = brw_get_subgroup_id_param_index(devinfo,
|
||||
bld.shader->prog_data);
|
||||
bld.MOV(dest, brw_uniform_reg(index, BRW_TYPE_UD));
|
||||
}
|
||||
bld.AND(dest, subgroup_id_, brw_imm_ud(INTEL_MASK(7, 0)));
|
||||
}
|
||||
|
||||
brw_task_mesh_thread_payload::brw_task_mesh_thread_payload(brw_shader &v)
|
||||
|
|
|
|||
|
|
@ -157,7 +157,6 @@ compile_shader(struct anv_device *device,
|
|||
void *temp_ctx = ralloc_context(NULL);
|
||||
|
||||
prog_data.base.nr_params = nir->num_uniforms / 4;
|
||||
prog_data.base.param = rzalloc_array(temp_ctx, uint32_t, prog_data.base.nr_params);
|
||||
|
||||
brw_nir_analyze_ubo_ranges(compiler, nir, prog_data.base.ubo_ranges);
|
||||
|
||||
|
|
@ -191,6 +190,8 @@ compile_shader(struct anv_device *device,
|
|||
}
|
||||
}
|
||||
} else {
|
||||
brw_cs_fill_push_const_info(device->info, &prog_data.cs, -1);
|
||||
|
||||
struct genisa_stats stats;
|
||||
struct brw_compile_cs_params params = {
|
||||
.base = {
|
||||
|
|
|
|||
|
|
@ -143,18 +143,6 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
push_end = MAX2(push_end, tess_config_end);
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_COMPUTE && devinfo->verx10 < 125) {
|
||||
/* For compute shaders, we always have to have the subgroup ID. The
|
||||
* back-end compiler will "helpfully" add it for us in the last push
|
||||
* constant slot. Yes, there is an off-by-one error here but that's
|
||||
* because the back-end will add it so we want to claim the number of
|
||||
* push constants one dword less than the full amount including
|
||||
* gl_SubgroupId.
|
||||
*/
|
||||
assert(push_end <= anv_drv_const_offset(cs.subgroup_id));
|
||||
push_end = anv_drv_const_offset(cs.subgroup_id);
|
||||
}
|
||||
|
||||
/* Align push_start down to a 32B (for 3DSTATE_CONSTANT) and make it no
|
||||
* larger than push_end (no push constants is indicated by push_start =
|
||||
* UINT_MAX).
|
||||
|
|
@ -188,7 +176,18 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
const unsigned alignment = 4;
|
||||
nir->num_uniforms = align(push_end - push_start, alignment);
|
||||
prog_data->nr_params = nir->num_uniforms / 4;
|
||||
prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
|
||||
|
||||
/* Fill the compute push constant layout (cross/per thread constants) for
|
||||
* platforms pre Gfx12.5.
|
||||
*/
|
||||
if (nir->info.stage == MESA_SHADER_COMPUTE) {
|
||||
const int subgroup_id_index =
|
||||
push_end == (anv_drv_const_offset(cs.subgroup_id) +
|
||||
anv_drv_const_size(cs.subgroup_id)) ?
|
||||
(anv_drv_const_offset(cs.subgroup_id) - push_start) / 4 : -1;
|
||||
struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data);
|
||||
brw_cs_fill_push_const_info(devinfo, cs_prog_data, subgroup_id_index);
|
||||
}
|
||||
|
||||
const struct anv_push_range push_constant_range = {
|
||||
.set = ANV_DESCRIPTOR_SET_PUSH_CONSTANTS,
|
||||
|
|
|
|||
|
|
@ -53,6 +53,20 @@ lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_subgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
const struct anv_physical_device *pdevice)
|
||||
{
|
||||
if (pdevice->info.verx10 >= 125)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
nir_def_replace(&intrin->def,
|
||||
anv_load_driver_uniform(b, 1, cs.subgroup_id));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_ray_query_globals(nir_builder *b, nir_intrinsic_instr *intrin)
|
||||
{
|
||||
|
|
@ -72,6 +86,8 @@ lower_driver_values(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
|||
return lower_load_constant(b, intrin);
|
||||
case nir_intrinsic_load_base_workgroup_id:
|
||||
return lower_base_workgroup_id(b, intrin);
|
||||
case nir_intrinsic_load_subgroup_id:
|
||||
return lower_subgroup_id(b, intrin, data);
|
||||
case nir_intrinsic_load_ray_query_global_intel:
|
||||
return lower_ray_query_globals(b, intrin);
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -94,7 +94,6 @@ anv_shader_internal_create(struct anv_device *device,
|
|||
prog_data_size);
|
||||
VK_MULTIALLOC_DECL(&ma, struct intel_shader_reloc, prog_data_relocs,
|
||||
prog_data_in->num_relocs);
|
||||
VK_MULTIALLOC_DECL(&ma, uint32_t, prog_data_param, prog_data_in->nr_params);
|
||||
VK_MULTIALLOC_DECL(&ma, void, code, kernel_size);
|
||||
|
||||
VK_MULTIALLOC_DECL_SIZE(&ma, nir_xfb_info, xfb_info,
|
||||
|
|
@ -151,7 +150,6 @@ anv_shader_internal_create(struct anv_device *device,
|
|||
typed_memcpy(prog_data_relocs, prog_data_in->relocs,
|
||||
prog_data_in->num_relocs);
|
||||
prog_data->relocs = prog_data_relocs;
|
||||
prog_data->param = prog_data_param;
|
||||
shader->prog_data = prog_data;
|
||||
shader->prog_data_size = prog_data_size;
|
||||
|
||||
|
|
@ -210,7 +208,6 @@ anv_shader_internal_serialize(struct vk_pipeline_cache_object *object,
|
|||
assert(shader->prog_data_size <= sizeof(prog_data));
|
||||
memcpy(&prog_data, shader->prog_data, shader->prog_data_size);
|
||||
prog_data.base.relocs = NULL;
|
||||
prog_data.base.param = NULL;
|
||||
blob_write_bytes(blob, &prog_data, shader->prog_data_size);
|
||||
|
||||
blob_write_bytes(blob, shader->prog_data->relocs,
|
||||
|
|
|
|||
|
|
@ -111,7 +111,6 @@ anv_shader_serialize(struct vk_device *device,
|
|||
union brw_any_prog_data prog_data;
|
||||
memcpy(&prog_data, shader->prog_data, brw_prog_data_size(vk_shader->stage));
|
||||
prog_data.base.relocs = NULL;
|
||||
prog_data.base.param = NULL;
|
||||
|
||||
blob_write_bytes(blob, &prog_data, brw_prog_data_size(vk_shader->stage));
|
||||
|
||||
|
|
@ -584,9 +583,6 @@ anv_shader_create(struct anv_device *device,
|
|||
const uint32_t cmd_data_dwords = anv_genX(device->info, shader_cmd_size)(
|
||||
device, stage);
|
||||
|
||||
/* We never need this at runtime */
|
||||
shader_data->prog_data.base.param = NULL;
|
||||
|
||||
VK_MULTIALLOC(ma);
|
||||
VK_MULTIALLOC_DECL(&ma, struct anv_shader, shader, 1);
|
||||
VK_MULTIALLOC_DECL(&ma, uint32_t, cmd_data, cmd_data_dwords);
|
||||
|
|
|
|||
|
|
@ -1473,8 +1473,6 @@ anv_shader_lower_nir(struct anv_device *device,
|
|||
dynamic_descriptors_offsets,
|
||||
&shader_data->bind_map, &shader_data->push_map, mem_ctx);
|
||||
|
||||
NIR_PASS(_, nir, anv_nir_lower_driver_values, pdevice);
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
|
||||
anv_nir_ubo_addr_format(pdevice, shader_data->key.base.robust_flags));
|
||||
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
|
||||
|
|
@ -1545,28 +1543,6 @@ anv_shader_lower_nir(struct anv_device *device,
|
|||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
}
|
||||
|
||||
NIR_PASS(_, nir, anv_nir_update_resource_intel_block);
|
||||
|
||||
NIR_PASS(_, nir, anv_nir_compute_push_layout,
|
||||
pdevice, shader_data->key.base.robust_flags,
|
||||
&(struct anv_nir_push_layout_info) {
|
||||
.separate_tessellation = (nir->info.stage == MESA_SHADER_TESS_CTRL &&
|
||||
shader_data->key.tcs.separate_tess_vue_layout) ||
|
||||
(nir->info.stage == MESA_SHADER_TESS_EVAL &&
|
||||
shader_data->key.tes.separate_tess_vue_layout),
|
||||
.fragment_dynamic = nir->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
brw_wm_prog_key_is_dynamic(&shader_data->key.wm),
|
||||
.mesh_dynamic = nir->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
shader_data->key.wm.mesh_input == INTEL_SOMETIMES,
|
||||
},
|
||||
&shader_data->key.base,
|
||||
&shader_data->prog_data.base,
|
||||
&shader_data->bind_map, &shader_data->push_map,
|
||||
mem_ctx);
|
||||
|
||||
NIR_PASS(_, nir, anv_nir_lower_resource_intel, pdevice,
|
||||
shader_data->bind_map.layout_type);
|
||||
|
||||
if (mesa_shader_stage_uses_workgroup(nir->info.stage)) {
|
||||
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
|
||||
nir_var_mem_shared, shared_type_info);
|
||||
|
|
@ -1597,6 +1573,30 @@ anv_shader_lower_nir(struct anv_device *device,
|
|||
&shader_data->prog_data.cs);
|
||||
}
|
||||
|
||||
NIR_PASS(_, nir, anv_nir_lower_driver_values, pdevice);
|
||||
|
||||
NIR_PASS(_, nir, anv_nir_update_resource_intel_block);
|
||||
|
||||
NIR_PASS(_, nir, anv_nir_compute_push_layout,
|
||||
pdevice, shader_data->key.base.robust_flags,
|
||||
&(struct anv_nir_push_layout_info) {
|
||||
.separate_tessellation = (nir->info.stage == MESA_SHADER_TESS_CTRL &&
|
||||
shader_data->key.tcs.separate_tess_vue_layout) ||
|
||||
(nir->info.stage == MESA_SHADER_TESS_EVAL &&
|
||||
shader_data->key.tes.separate_tess_vue_layout),
|
||||
.fragment_dynamic = nir->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
brw_wm_prog_key_is_dynamic(&shader_data->key.wm),
|
||||
.mesh_dynamic = nir->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
shader_data->key.wm.mesh_input == INTEL_SOMETIMES,
|
||||
},
|
||||
&shader_data->key.base,
|
||||
&shader_data->prog_data.base,
|
||||
&shader_data->bind_map, &shader_data->push_map,
|
||||
mem_ctx);
|
||||
|
||||
NIR_PASS(_, nir, anv_nir_lower_resource_intel, pdevice,
|
||||
shader_data->bind_map.layout_type);
|
||||
|
||||
shader_data->push_desc_info.push_set_buffer =
|
||||
anv_nir_loads_push_desc_buffer(
|
||||
nir, set_layouts, set_layout_count, &shader_data->bind_map);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue