mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 08:50:13 +01:00
intel: rework CL pre-compile
Stolen from asahi_clc :) We drop the nasty LLVM17+ workaround code (Thanks Alyssa!) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Tested-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Dylan Baker <None> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33014>
This commit is contained in:
parent
5adac011b8
commit
6768eb31e5
7 changed files with 233 additions and 601 deletions
|
|
@ -325,11 +325,9 @@ iris_destroy_program_cache(struct iris_context *ice)
|
|||
|
||||
static void
|
||||
link_libintel_shaders(nir_shader *nir,
|
||||
const struct intel_device_info *devinfo,
|
||||
const uint32_t *spv_code, uint32_t spv_size)
|
||||
{
|
||||
nir_shader *libintel = brw_nir_from_spirv(nir, devinfo->ver,
|
||||
spv_code, spv_size, true);
|
||||
nir_shader *libintel = brw_nir_from_spirv(nir, spv_code, spv_size);
|
||||
|
||||
nir_link_shader_functions(nir, libintel);
|
||||
NIR_PASS_V(nir, nir_inline_functions);
|
||||
|
|
@ -342,6 +340,7 @@ link_libintel_shaders(nir_shader *nir,
|
|||
nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
|
||||
nir_var_mem_global,
|
||||
nir_address_format_62bit_generic);
|
||||
NIR_PASS_V(nir, nir_lower_scratch_to_var);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -378,7 +377,7 @@ iris_ensure_indirect_generation_shader(struct iris_batch *batch)
|
|||
|
||||
nir_shader *nir = b.shader;
|
||||
|
||||
link_libintel_shaders(nir, screen->devinfo, spv_code, spv_size);
|
||||
link_libintel_shaders(nir, spv_code, spv_size);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS_V(nir, nir_opt_cse);
|
||||
|
|
|
|||
|
|
@ -66,10 +66,6 @@ brw_kernel_from_spirv(struct brw_compiler *compiler,
|
|||
const char *entrypoint_name,
|
||||
char **error_str);
|
||||
|
||||
nir_shader *
|
||||
brw_nir_from_spirv(void *mem_ctx, unsigned gfx_version,
|
||||
const uint32_t *spirv, size_t spirv_size, bool llvm17_wa);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -294,8 +294,7 @@ brw_nir_no_indirect_mask(const struct brw_compiler *compiler,
|
|||
bool brw_nir_uses_inline_data(nir_shader *shader);
|
||||
|
||||
nir_shader *
|
||||
brw_nir_from_spirv(void *mem_ctx, unsigned gfx_version, const uint32_t *spirv,
|
||||
size_t spirv_size, bool llvm17_wa);
|
||||
brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,167 +13,70 @@
|
|||
#include "dev/intel_debug.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
static nir_def *
|
||||
rebuild_value_from_store(struct util_dynarray *stores,
|
||||
nir_def *value, unsigned read_offset)
|
||||
{
|
||||
unsigned read_size = value->num_components * value->bit_size / 8;
|
||||
|
||||
util_dynarray_foreach(stores, nir_intrinsic_instr *, _store) {
|
||||
nir_intrinsic_instr *store = *_store;
|
||||
|
||||
unsigned write_offset = nir_src_as_uint(store->src[1]);
|
||||
unsigned write_size = nir_src_num_components(store->src[0]) *
|
||||
nir_src_bit_size(store->src[0]) / 8;
|
||||
if (write_offset <= read_offset &&
|
||||
(write_offset + write_size) >= (read_offset + read_size)) {
|
||||
assert(nir_block_dominates(store->instr.block, value->parent_instr->block));
|
||||
assert(write_size == read_size);
|
||||
return store->src[0].ssa;
|
||||
}
|
||||
}
|
||||
unreachable("Matching scratch store not found");
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove temporary variables stored to scratch to be then reloaded
|
||||
* immediately. Remap the load to the store SSA value.
|
||||
*
|
||||
* This workaround is only meant to be applied to shaders in src/intel/shaders
|
||||
* were we know there should be no issue. More complex cases might not work
|
||||
* with this approach.
|
||||
*/
|
||||
static bool
|
||||
nir_remove_llvm17_scratch(nir_shader *nir)
|
||||
{
|
||||
struct util_dynarray scratch_stores;
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
util_dynarray_init(&scratch_stores, mem_ctx);
|
||||
|
||||
nir_foreach_function_impl(func, nir) {
|
||||
nir_foreach_block(block, func) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (intrin->intrinsic != nir_intrinsic_store_scratch)
|
||||
continue;
|
||||
|
||||
nir_const_value *offset = nir_src_as_const_value(intrin->src[1]);
|
||||
if (offset != NULL) {
|
||||
util_dynarray_append(&scratch_stores, nir_intrinsic_instr *, intrin);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool progress = false;
|
||||
if (util_dynarray_num_elements(&scratch_stores, nir_intrinsic_instr *) > 0) {
|
||||
nir_foreach_function_impl(func, nir) {
|
||||
nir_foreach_block(block, func) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (intrin->intrinsic != nir_intrinsic_load_scratch)
|
||||
continue;
|
||||
|
||||
nir_const_value *offset = nir_src_as_const_value(intrin->src[0]);
|
||||
if (offset == NULL)
|
||||
continue;
|
||||
|
||||
nir_def_replace(&intrin->def,
|
||||
rebuild_value_from_store(&scratch_stores, &intrin->def, nir_src_as_uint(intrin->src[0])));
|
||||
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
util_dynarray_foreach(&scratch_stores, nir_intrinsic_instr *, _store) {
|
||||
nir_intrinsic_instr *store = *_store;
|
||||
nir_instr_remove(&store->instr);
|
||||
}
|
||||
|
||||
/* Quick sanity check */
|
||||
assert(util_dynarray_num_elements(&scratch_stores, nir_intrinsic_instr *) == 0 ||
|
||||
progress);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
static void
|
||||
cleanup_llvm17_scratch(nir_shader *nir)
|
||||
optimize(nir_shader *nir)
|
||||
{
|
||||
{
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
NIR_PASS(progress, nir, nir_copy_prop);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
NIR_PASS(progress, nir, nir_opt_algebraic);
|
||||
} while (progress);
|
||||
}
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
|
||||
nir_remove_llvm17_scratch(nir);
|
||||
NIR_PASS(progress, nir, nir_split_var_copies);
|
||||
NIR_PASS(progress, nir, nir_split_struct_vars, nir_var_function_temp);
|
||||
NIR_PASS(progress, nir, nir_lower_var_copies);
|
||||
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
|
||||
|
||||
{
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
NIR_PASS(progress, nir, nir_copy_prop);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
NIR_PASS(progress, nir, nir_opt_algebraic);
|
||||
} while (progress);
|
||||
}
|
||||
NIR_PASS(progress, nir, nir_copy_prop);
|
||||
NIR_PASS(progress, nir, nir_opt_remove_phis);
|
||||
NIR_PASS(progress, nir, nir_lower_phis_to_scalar, true);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
NIR_PASS(progress, nir, nir_opt_dead_cf);
|
||||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
|
||||
NIR_PASS(progress, nir, nir_opt_phi_precision);
|
||||
NIR_PASS(progress, nir, nir_opt_algebraic);
|
||||
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
||||
|
||||
NIR_PASS(progress, nir, nir_opt_deref);
|
||||
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(progress, nir, nir_opt_undef);
|
||||
NIR_PASS(progress, nir, nir_lower_undef_to_zero);
|
||||
|
||||
NIR_PASS(progress, nir, nir_opt_shrink_vectors, true);
|
||||
NIR_PASS(progress, nir, nir_opt_loop_unroll);
|
||||
|
||||
} while (progress);
|
||||
}
|
||||
|
||||
static const struct spirv_capabilities spirv_caps = {
|
||||
.Addresses = true,
|
||||
.Float16 = true,
|
||||
.Float64 = true,
|
||||
.Groups = true,
|
||||
.StorageImageWriteWithoutFormat = true,
|
||||
.Int8 = true,
|
||||
.Int16 = true,
|
||||
.Int64 = true,
|
||||
.Int64Atomics = true,
|
||||
.Kernel = true,
|
||||
.Linkage = true, /* We receive linked kernel from clc */
|
||||
.DenormFlushToZero = true,
|
||||
.DenormPreserve = true,
|
||||
.SignedZeroInfNanPreserve = true,
|
||||
.RoundingModeRTE = true,
|
||||
.RoundingModeRTZ = true,
|
||||
.GenericPointer = true,
|
||||
.GroupNonUniform = true,
|
||||
.GroupNonUniformArithmetic = true,
|
||||
.GroupNonUniformClustered = true,
|
||||
.GroupNonUniformBallot = true,
|
||||
.GroupNonUniformQuad = true,
|
||||
.GroupNonUniformShuffle = true,
|
||||
.GroupNonUniformVote = true,
|
||||
.SubgroupDispatch = true,
|
||||
};
|
||||
|
||||
nir_shader *
|
||||
brw_nir_from_spirv(void *mem_ctx, unsigned gfx_version, const uint32_t *spirv,
|
||||
size_t spirv_size, bool llvm17_wa)
|
||||
brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size)
|
||||
{
|
||||
assert(gfx_version >= 9);
|
||||
|
||||
static const struct spirv_capabilities spirv_caps = {
|
||||
.Addresses = true,
|
||||
.Float16 = true,
|
||||
.Float64 = true,
|
||||
.Groups = true,
|
||||
.StorageImageWriteWithoutFormat = true,
|
||||
.Int8 = true,
|
||||
.Int16 = true,
|
||||
.Int64 = true,
|
||||
.Int64Atomics = true,
|
||||
.Kernel = true,
|
||||
.Linkage = true, /* We receive linked kernel from clc */
|
||||
.DenormFlushToZero = true,
|
||||
.DenormPreserve = true,
|
||||
.SignedZeroInfNanPreserve = true,
|
||||
.RoundingModeRTE = true,
|
||||
.RoundingModeRTZ = true,
|
||||
.GenericPointer = true,
|
||||
.GroupNonUniform = true,
|
||||
.GroupNonUniformArithmetic = true,
|
||||
.GroupNonUniformClustered = true,
|
||||
.GroupNonUniformBallot = true,
|
||||
.GroupNonUniformQuad = true,
|
||||
.GroupNonUniformShuffle = true,
|
||||
.GroupNonUniformVote = true,
|
||||
.SubgroupDispatch = true,
|
||||
};
|
||||
struct spirv_to_nir_options spirv_options = {
|
||||
.environment = NIR_SPIRV_OPENCL,
|
||||
.capabilities = &spirv_caps,
|
||||
|
|
@ -197,163 +100,79 @@ brw_nir_from_spirv(void *mem_ctx, unsigned gfx_version, const uint32_t *spirv,
|
|||
ralloc_steal(mem_ctx, nir);
|
||||
nir->info.name = ralloc_strdup(nir, "library");
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_CS)) {
|
||||
/* Re-index SSA defs so we print more sensible numbers. */
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
nir_index_ssa_defs(impl);
|
||||
}
|
||||
nir_fixup_is_exported(nir);
|
||||
|
||||
fprintf(stderr, "NIR (from SPIR-V) for kernel\n");
|
||||
nir_print_shader(nir, stderr);
|
||||
}
|
||||
NIR_PASS(_, nir, nir_lower_system_values);
|
||||
NIR_PASS(_, nir, nir_lower_calls_to_builtins);
|
||||
|
||||
nir_lower_printf_options printf_opts = {
|
||||
.ptr_bit_size = 64,
|
||||
.use_printf_base_identifier = true,
|
||||
};
|
||||
NIR_PASS_V(nir, nir_lower_printf, &printf_opts);
|
||||
NIR_PASS_V(nir, nir_lower_printf, &(const struct nir_lower_printf_options) {
|
||||
.ptr_bit_size = 64,
|
||||
.use_printf_base_identifier = true,
|
||||
});
|
||||
|
||||
NIR_PASS_V(nir, nir_link_shader_functions, spirv_options.clc_shader);
|
||||
NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
|
||||
NIR_PASS(_, nir, nir_lower_returns);
|
||||
NIR_PASS(_, nir, nir_inline_functions);
|
||||
//nir_remove_non_exported(nir);
|
||||
NIR_PASS(_, nir, nir_copy_prop);
|
||||
NIR_PASS(_, nir, nir_opt_deref);
|
||||
|
||||
/* We have to lower away local constant initializers right before we
|
||||
* inline functions. That way they get properly initialized at the top
|
||||
* of the function and not at the top of its caller.
|
||||
/* We can't deal with constant data, get rid of it */
|
||||
nir_lower_constant_to_temp(nir);
|
||||
|
||||
/* We can go ahead and lower the rest of the constant initializers. We do
|
||||
* this here so that nir_remove_dead_variables and split_per_member_structs
|
||||
* below see the corresponding stores.
|
||||
*/
|
||||
NIR_PASS_V(nir, nir_lower_variable_initializers, ~(nir_var_shader_temp |
|
||||
nir_var_function_temp));
|
||||
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
|
||||
nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
|
||||
{
|
||||
bool progress;
|
||||
do
|
||||
{
|
||||
progress = false;
|
||||
NIR_PASS(progress, nir, nir_copy_prop);
|
||||
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(progress, nir, nir_opt_deref);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
NIR_PASS(progress, nir, nir_opt_undef);
|
||||
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS(progress, nir, nir_opt_algebraic);
|
||||
} while (progress);
|
||||
}
|
||||
NIR_PASS(_, nir, nir_lower_variable_initializers, ~0);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
|
||||
NIR_PASS_V(nir, nir_lower_returns);
|
||||
NIR_PASS_V(nir, nir_inline_functions);
|
||||
|
||||
assert(nir->scratch_size == 0);
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align);
|
||||
|
||||
{
|
||||
bool progress;
|
||||
do
|
||||
{
|
||||
progress = false;
|
||||
NIR_PASS(progress, nir, nir_copy_prop);
|
||||
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(progress, nir, nir_opt_deref);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
NIR_PASS(progress, nir, nir_opt_undef);
|
||||
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
NIR_PASS(progress, nir, nir_split_var_copies);
|
||||
NIR_PASS(progress, nir, nir_lower_var_copies);
|
||||
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS(progress, nir, nir_opt_algebraic);
|
||||
NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
|
||||
NIR_PASS(progress, nir, nir_opt_dead_cf);
|
||||
NIR_PASS(progress, nir, nir_opt_remove_phis);
|
||||
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
|
||||
NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform);
|
||||
NIR_PASS(progress, nir, nir_opt_memcpy);
|
||||
} while (progress);
|
||||
}
|
||||
|
||||
NIR_PASS_V(nir, nir_scale_fdiv);
|
||||
|
||||
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
|
||||
nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
|
||||
|
||||
|
||||
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp, NULL);
|
||||
|
||||
nir->scratch_size = 0;
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
|
||||
nir_var_mem_shared | nir_var_function_temp | nir_var_shader_temp |
|
||||
nir_var_mem_global | nir_var_mem_constant,
|
||||
glsl_get_cl_type_size_align);
|
||||
|
||||
// Lower memcpy - needs to wait until types are sized
|
||||
{
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
NIR_PASS(progress, nir, nir_opt_memcpy);
|
||||
NIR_PASS(progress, nir, nir_copy_prop);
|
||||
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(progress, nir, nir_opt_deref);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
NIR_PASS(progress, nir, nir_split_var_copies);
|
||||
NIR_PASS(progress, nir, nir_lower_var_copies);
|
||||
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
} while (progress);
|
||||
}
|
||||
NIR_PASS_V(nir, nir_lower_memcpy);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io,
|
||||
nir_var_mem_shared | nir_var_function_temp | nir_var_shader_temp | nir_var_uniform,
|
||||
nir_address_format_32bit_offset_as_64bit);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_system_values);
|
||||
|
||||
/* Hopefully we can drop this once lower_vars_to_ssa has improved to not
|
||||
* lower everything to scratch.
|
||||
/* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B
|
||||
* aligned and so it can just read/write them as vec4s. This results in a
|
||||
* LOT of vec4->vec3 casts on loads and stores. One solution to this
|
||||
* problem is to get rid of all vec3 variables.
|
||||
*/
|
||||
if (llvm17_wa)
|
||||
cleanup_llvm17_scratch(nir);
|
||||
NIR_PASS(_, nir, nir_lower_vec3_to_vec4,
|
||||
nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
|
||||
nir_var_mem_global | nir_var_mem_constant);
|
||||
|
||||
/* We assign explicit types early so that the optimizer can take advantage
|
||||
* of that information and hopefully get rid of some of our memcpys.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
|
||||
nir_var_uniform | nir_var_shader_temp | nir_var_function_temp |
|
||||
nir_var_mem_shared | nir_var_mem_global,
|
||||
glsl_get_cl_type_size_align);
|
||||
|
||||
optimize(nir);
|
||||
|
||||
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_all, NULL);
|
||||
|
||||
/* Lower again, this time after dead-variables to get more compact variable
|
||||
* layouts.
|
||||
*/
|
||||
nir->global_mem_size = 0;
|
||||
nir->scratch_size = 0;
|
||||
nir->info.shared_size = 0;
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
|
||||
nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant,
|
||||
glsl_get_cl_type_size_align);
|
||||
if (nir->constant_data_size > 0) {
|
||||
assert(nir->constant_data == NULL);
|
||||
nir->constant_data = rzalloc_size(nir, nir->constant_data_size);
|
||||
nir_gather_explicit_io_initializers(nir, nir->constant_data,
|
||||
nir->constant_data_size,
|
||||
nir_var_mem_constant);
|
||||
}
|
||||
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
|
||||
nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
|
||||
nir_var_mem_global | nir_var_mem_constant,
|
||||
glsl_get_cl_type_size_align);
|
||||
assert(nir->constant_data_size == 0);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
|
||||
nir_address_format_64bit_global);
|
||||
NIR_PASS(_, nir, nir_lower_memcpy);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,
|
||||
nir_address_format_32bit_offset_as_64bit);
|
||||
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_constant,
|
||||
nir_address_format_64bit_global);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io,
|
||||
nir_var_shader_temp | nir_var_function_temp |
|
||||
nir_var_mem_shared | nir_var_mem_global,
|
||||
nir_address_format_62bit_generic);
|
||||
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_uniform,
|
||||
nir_address_format_64bit_global);
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_CS)) {
|
||||
/* Re-index SSA defs so we print more sensible numbers. */
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
nir_index_ssa_defs(impl);
|
||||
}
|
||||
/* Note: we cannot lower explicit I/O here, because we need derefs in tact
|
||||
* for function calls into the library to work.
|
||||
*/
|
||||
|
||||
fprintf(stderr, "NIR (before I/O lowering) for kernel\n");
|
||||
nir_print_shader(nir, stderr);
|
||||
}
|
||||
NIR_PASS(_, nir, nir_lower_convert_alu_types, NULL);
|
||||
NIR_PASS(_, nir, nir_opt_if, 0);
|
||||
NIR_PASS(_, nir, nir_opt_idiv_const, 16);
|
||||
|
||||
optimize(nir);
|
||||
|
||||
return nir;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -275,8 +275,7 @@ const struct glsl_type *elk_nir_get_var_type(const struct nir_shader *nir,
|
|||
void elk_nir_adjust_payload(nir_shader *shader);
|
||||
|
||||
nir_shader *
|
||||
elk_nir_from_spirv(void *mem_ctx, unsigned gfx_version, const uint32_t *spirv,
|
||||
size_t spirv_size, bool llvm17_wa);
|
||||
elk_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,167 +13,70 @@
|
|||
#include "dev/intel_debug.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
static nir_def *
|
||||
rebuild_value_from_store(struct util_dynarray *stores,
|
||||
nir_def *value, unsigned read_offset)
|
||||
{
|
||||
unsigned read_size = value->num_components * value->bit_size / 8;
|
||||
|
||||
util_dynarray_foreach(stores, nir_intrinsic_instr *, _store) {
|
||||
nir_intrinsic_instr *store = *_store;
|
||||
|
||||
unsigned write_offset = nir_src_as_uint(store->src[1]);
|
||||
unsigned write_size = nir_src_num_components(store->src[0]) *
|
||||
nir_src_bit_size(store->src[0]) / 8;
|
||||
if (write_offset <= read_offset &&
|
||||
(write_offset + write_size) >= (read_offset + read_size)) {
|
||||
assert(nir_block_dominates(store->instr.block, value->parent_instr->block));
|
||||
assert(write_size == read_size);
|
||||
return store->src[0].ssa;
|
||||
}
|
||||
}
|
||||
unreachable("Matching scratch store not found");
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove temporary variables stored to scratch to be then reloaded
|
||||
* immediately. Remap the load to the store SSA value.
|
||||
*
|
||||
* This workaround is only meant to be applied to shaders in src/intel/shaders
|
||||
* were we know there should be no issue. More complex cases might not work
|
||||
* with this approach.
|
||||
*/
|
||||
static bool
|
||||
nir_remove_llvm17_scratch(nir_shader *nir)
|
||||
{
|
||||
struct util_dynarray scratch_stores;
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
util_dynarray_init(&scratch_stores, mem_ctx);
|
||||
|
||||
nir_foreach_function_impl(func, nir) {
|
||||
nir_foreach_block(block, func) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (intrin->intrinsic != nir_intrinsic_store_scratch)
|
||||
continue;
|
||||
|
||||
nir_const_value *offset = nir_src_as_const_value(intrin->src[1]);
|
||||
if (offset != NULL) {
|
||||
util_dynarray_append(&scratch_stores, nir_intrinsic_instr *, intrin);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool progress = false;
|
||||
if (util_dynarray_num_elements(&scratch_stores, nir_intrinsic_instr *) > 0) {
|
||||
nir_foreach_function_impl(func, nir) {
|
||||
nir_foreach_block(block, func) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (intrin->intrinsic != nir_intrinsic_load_scratch)
|
||||
continue;
|
||||
|
||||
nir_const_value *offset = nir_src_as_const_value(intrin->src[0]);
|
||||
if (offset == NULL)
|
||||
continue;
|
||||
|
||||
nir_def_replace(&intrin->def,
|
||||
rebuild_value_from_store(&scratch_stores, &intrin->def, nir_src_as_uint(intrin->src[0])));
|
||||
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
util_dynarray_foreach(&scratch_stores, nir_intrinsic_instr *, _store) {
|
||||
nir_intrinsic_instr *store = *_store;
|
||||
nir_instr_remove(&store->instr);
|
||||
}
|
||||
|
||||
/* Quick sanity check */
|
||||
assert(util_dynarray_num_elements(&scratch_stores, nir_intrinsic_instr *) == 0 ||
|
||||
progress);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
static void
|
||||
cleanup_llvm17_scratch(nir_shader *nir)
|
||||
optimize(nir_shader *nir)
|
||||
{
|
||||
{
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
NIR_PASS(progress, nir, nir_copy_prop);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
NIR_PASS(progress, nir, nir_opt_algebraic);
|
||||
} while (progress);
|
||||
}
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
|
||||
nir_remove_llvm17_scratch(nir);
|
||||
NIR_PASS(progress, nir, nir_split_var_copies);
|
||||
NIR_PASS(progress, nir, nir_split_struct_vars, nir_var_function_temp);
|
||||
NIR_PASS(progress, nir, nir_lower_var_copies);
|
||||
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
|
||||
|
||||
{
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
NIR_PASS(progress, nir, nir_copy_prop);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
NIR_PASS(progress, nir, nir_opt_algebraic);
|
||||
} while (progress);
|
||||
}
|
||||
NIR_PASS(progress, nir, nir_copy_prop);
|
||||
NIR_PASS(progress, nir, nir_opt_remove_phis);
|
||||
NIR_PASS(progress, nir, nir_lower_phis_to_scalar, true);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
NIR_PASS(progress, nir, nir_opt_dead_cf);
|
||||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
|
||||
NIR_PASS(progress, nir, nir_opt_phi_precision);
|
||||
NIR_PASS(progress, nir, nir_opt_algebraic);
|
||||
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
||||
|
||||
NIR_PASS(progress, nir, nir_opt_deref);
|
||||
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(progress, nir, nir_opt_undef);
|
||||
NIR_PASS(progress, nir, nir_lower_undef_to_zero);
|
||||
|
||||
NIR_PASS(progress, nir, nir_opt_shrink_vectors, true);
|
||||
NIR_PASS(progress, nir, nir_opt_loop_unroll);
|
||||
|
||||
} while (progress);
|
||||
}
|
||||
|
||||
static const struct spirv_capabilities spirv_caps = {
|
||||
.Addresses = true,
|
||||
.Float16 = true,
|
||||
.Float64 = true,
|
||||
.Groups = true,
|
||||
.StorageImageWriteWithoutFormat = true,
|
||||
.Int8 = true,
|
||||
.Int16 = true,
|
||||
.Int64 = true,
|
||||
.Int64Atomics = true,
|
||||
.Kernel = true,
|
||||
.Linkage = true, /* We receive linked kernel from clc */
|
||||
.DenormFlushToZero = true,
|
||||
.DenormPreserve = true,
|
||||
.SignedZeroInfNanPreserve = true,
|
||||
.RoundingModeRTE = true,
|
||||
.RoundingModeRTZ = true,
|
||||
.GenericPointer = true,
|
||||
.GroupNonUniform = true,
|
||||
.GroupNonUniformArithmetic = true,
|
||||
.GroupNonUniformClustered = true,
|
||||
.GroupNonUniformBallot = true,
|
||||
.GroupNonUniformQuad = true,
|
||||
.GroupNonUniformShuffle = true,
|
||||
.GroupNonUniformVote = true,
|
||||
.SubgroupDispatch = true,
|
||||
};
|
||||
|
||||
nir_shader *
|
||||
elk_nir_from_spirv(void *mem_ctx, unsigned gfx_version, const uint32_t *spirv,
|
||||
size_t spirv_size, bool llvm17_wa)
|
||||
elk_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size)
|
||||
{
|
||||
assert(gfx_version < 9);
|
||||
|
||||
static const struct spirv_capabilities spirv_caps = {
|
||||
.Addresses = true,
|
||||
.Float16 = true,
|
||||
.Float64 = true,
|
||||
.Groups = true,
|
||||
.StorageImageWriteWithoutFormat = true,
|
||||
.Int8 = true,
|
||||
.Int16 = true,
|
||||
.Int64 = true,
|
||||
.Int64Atomics = true,
|
||||
.Kernel = true,
|
||||
.Linkage = true, /* We receive linked kernel from clc */
|
||||
.DenormFlushToZero = true,
|
||||
.DenormPreserve = true,
|
||||
.SignedZeroInfNanPreserve = true,
|
||||
.RoundingModeRTE = true,
|
||||
.RoundingModeRTZ = true,
|
||||
.GenericPointer = true,
|
||||
.GroupNonUniform = true,
|
||||
.GroupNonUniformArithmetic = true,
|
||||
.GroupNonUniformClustered = true,
|
||||
.GroupNonUniformBallot = true,
|
||||
.GroupNonUniformQuad = true,
|
||||
.GroupNonUniformShuffle = true,
|
||||
.GroupNonUniformVote = true,
|
||||
.SubgroupDispatch = true,
|
||||
};
|
||||
struct spirv_to_nir_options spirv_options = {
|
||||
.environment = NIR_SPIRV_OPENCL,
|
||||
.capabilities = &spirv_caps,
|
||||
|
|
@ -197,163 +100,79 @@ elk_nir_from_spirv(void *mem_ctx, unsigned gfx_version, const uint32_t *spirv,
|
|||
ralloc_steal(mem_ctx, nir);
|
||||
nir->info.name = ralloc_strdup(nir, "library");
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_CS)) {
|
||||
/* Re-index SSA defs so we print more sensible numbers. */
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
nir_index_ssa_defs(impl);
|
||||
}
|
||||
nir_fixup_is_exported(nir);
|
||||
|
||||
fprintf(stderr, "NIR (from SPIR-V) for kernel\n");
|
||||
nir_print_shader(nir, stderr);
|
||||
}
|
||||
NIR_PASS(_, nir, nir_lower_system_values);
|
||||
NIR_PASS(_, nir, nir_lower_calls_to_builtins);
|
||||
|
||||
nir_lower_printf_options printf_opts = {
|
||||
.ptr_bit_size = 64,
|
||||
.use_printf_base_identifier = true,
|
||||
};
|
||||
NIR_PASS_V(nir, nir_lower_printf, &printf_opts);
|
||||
NIR_PASS_V(nir, nir_lower_printf, &(const struct nir_lower_printf_options) {
|
||||
.ptr_bit_size = 64,
|
||||
.use_printf_base_identifier = true,
|
||||
});
|
||||
|
||||
NIR_PASS_V(nir, nir_link_shader_functions, spirv_options.clc_shader);
|
||||
NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
|
||||
NIR_PASS(_, nir, nir_lower_returns);
|
||||
NIR_PASS(_, nir, nir_inline_functions);
|
||||
//nir_remove_non_exported(nir);
|
||||
NIR_PASS(_, nir, nir_copy_prop);
|
||||
NIR_PASS(_, nir, nir_opt_deref);
|
||||
|
||||
/* We have to lower away local constant initializers right before we
|
||||
* inline functions. That way they get properly initialized at the top
|
||||
* of the function and not at the top of its caller.
|
||||
/* We can't deal with constant data, get rid of it */
|
||||
nir_lower_constant_to_temp(nir);
|
||||
|
||||
/* We can go ahead and lower the rest of the constant initializers. We do
|
||||
* this here so that nir_remove_dead_variables and split_per_member_structs
|
||||
* below see the corresponding stores.
|
||||
*/
|
||||
NIR_PASS_V(nir, nir_lower_variable_initializers, ~(nir_var_shader_temp |
|
||||
nir_var_function_temp));
|
||||
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
|
||||
nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
|
||||
{
|
||||
bool progress;
|
||||
do
|
||||
{
|
||||
progress = false;
|
||||
NIR_PASS(progress, nir, nir_copy_prop);
|
||||
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(progress, nir, nir_opt_deref);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
NIR_PASS(progress, nir, nir_opt_undef);
|
||||
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS(progress, nir, nir_opt_algebraic);
|
||||
} while (progress);
|
||||
}
|
||||
NIR_PASS(_, nir, nir_lower_variable_initializers, ~0);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
|
||||
NIR_PASS_V(nir, nir_lower_returns);
|
||||
NIR_PASS_V(nir, nir_inline_functions);
|
||||
|
||||
assert(nir->scratch_size == 0);
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align);
|
||||
|
||||
{
|
||||
bool progress;
|
||||
do
|
||||
{
|
||||
progress = false;
|
||||
NIR_PASS(progress, nir, nir_copy_prop);
|
||||
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(progress, nir, nir_opt_deref);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
NIR_PASS(progress, nir, nir_opt_undef);
|
||||
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
NIR_PASS(progress, nir, nir_split_var_copies);
|
||||
NIR_PASS(progress, nir, nir_lower_var_copies);
|
||||
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS(progress, nir, nir_opt_algebraic);
|
||||
NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
|
||||
NIR_PASS(progress, nir, nir_opt_dead_cf);
|
||||
NIR_PASS(progress, nir, nir_opt_remove_phis);
|
||||
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
|
||||
NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform);
|
||||
NIR_PASS(progress, nir, nir_opt_memcpy);
|
||||
} while (progress);
|
||||
}
|
||||
|
||||
NIR_PASS_V(nir, nir_scale_fdiv);
|
||||
|
||||
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
|
||||
nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
|
||||
|
||||
|
||||
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp, NULL);
|
||||
|
||||
nir->scratch_size = 0;
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
|
||||
nir_var_mem_shared | nir_var_function_temp | nir_var_shader_temp |
|
||||
nir_var_mem_global | nir_var_mem_constant,
|
||||
glsl_get_cl_type_size_align);
|
||||
|
||||
// Lower memcpy - needs to wait until types are sized
|
||||
{
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
NIR_PASS(progress, nir, nir_opt_memcpy);
|
||||
NIR_PASS(progress, nir, nir_copy_prop);
|
||||
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(progress, nir, nir_opt_deref);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
NIR_PASS(progress, nir, nir_split_var_copies);
|
||||
NIR_PASS(progress, nir, nir_lower_var_copies);
|
||||
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
} while (progress);
|
||||
}
|
||||
NIR_PASS_V(nir, nir_lower_memcpy);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io,
|
||||
nir_var_mem_shared | nir_var_function_temp | nir_var_shader_temp | nir_var_uniform,
|
||||
nir_address_format_32bit_offset_as_64bit);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_system_values);
|
||||
|
||||
/* Hopefully we can drop this once lower_vars_to_ssa has improved to not
|
||||
* lower everything to scratch.
|
||||
/* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B
|
||||
* aligned and so it can just read/write them as vec4s. This results in a
|
||||
* LOT of vec4->vec3 casts on loads and stores. One solution to this
|
||||
* problem is to get rid of all vec3 variables.
|
||||
*/
|
||||
if (llvm17_wa)
|
||||
cleanup_llvm17_scratch(nir);
|
||||
NIR_PASS(_, nir, nir_lower_vec3_to_vec4,
|
||||
nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
|
||||
nir_var_mem_global | nir_var_mem_constant);
|
||||
|
||||
/* We assign explicit types early so that the optimizer can take advantage
|
||||
* of that information and hopefully get rid of some of our memcpys.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
|
||||
nir_var_uniform | nir_var_shader_temp | nir_var_function_temp |
|
||||
nir_var_mem_shared | nir_var_mem_global,
|
||||
glsl_get_cl_type_size_align);
|
||||
|
||||
optimize(nir);
|
||||
|
||||
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_all, NULL);
|
||||
|
||||
/* Lower again, this time after dead-variables to get more compact variable
|
||||
* layouts.
|
||||
*/
|
||||
nir->global_mem_size = 0;
|
||||
nir->scratch_size = 0;
|
||||
nir->info.shared_size = 0;
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
|
||||
nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant,
|
||||
glsl_get_cl_type_size_align);
|
||||
if (nir->constant_data_size > 0) {
|
||||
assert(nir->constant_data == NULL);
|
||||
nir->constant_data = rzalloc_size(nir, nir->constant_data_size);
|
||||
nir_gather_explicit_io_initializers(nir, nir->constant_data,
|
||||
nir->constant_data_size,
|
||||
nir_var_mem_constant);
|
||||
}
|
||||
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
|
||||
nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
|
||||
nir_var_mem_global | nir_var_mem_constant,
|
||||
glsl_get_cl_type_size_align);
|
||||
assert(nir->constant_data_size == 0);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
|
||||
nir_address_format_64bit_global);
|
||||
NIR_PASS(_, nir, nir_lower_memcpy);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,
|
||||
nir_address_format_32bit_offset_as_64bit);
|
||||
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_constant,
|
||||
nir_address_format_64bit_global);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io,
|
||||
nir_var_shader_temp | nir_var_function_temp |
|
||||
nir_var_mem_shared | nir_var_mem_global,
|
||||
nir_address_format_62bit_generic);
|
||||
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_uniform,
|
||||
nir_address_format_64bit_global);
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_CS)) {
|
||||
/* Re-index SSA defs so we print more sensible numbers. */
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
nir_index_ssa_defs(impl);
|
||||
}
|
||||
/* Note: we cannot lower explicit I/O here, because we need derefs in tact
|
||||
* for function calls into the library to work.
|
||||
*/
|
||||
|
||||
fprintf(stderr, "NIR (before I/O lowering) for kernel\n");
|
||||
nir_print_shader(nir, stderr);
|
||||
}
|
||||
NIR_PASS(_, nir, nir_lower_convert_alu_types, NULL);
|
||||
NIR_PASS(_, nir, nir_opt_if, 0);
|
||||
NIR_PASS(_, nir, nir_opt_idiv_const, 16);
|
||||
|
||||
optimize(nir);
|
||||
|
||||
return nir;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ load_libanv(struct anv_device *device)
|
|||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
return brw_nir_from_spirv(mem_ctx, device->info->ver, spv_code, spv_size, true);
|
||||
return brw_nir_from_spirv(mem_ctx, spv_code, spv_size);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -73,6 +73,7 @@ link_libanv(nir_shader *nir, const nir_shader *libanv)
|
|||
nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
|
||||
nir_var_mem_global,
|
||||
nir_address_format_62bit_generic);
|
||||
NIR_PASS_V(nir, nir_lower_scratch_to_var);
|
||||
}
|
||||
|
||||
static struct anv_shader_bin *
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue