mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 18:00:13 +01:00
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33590>
178 lines
6 KiB
C
178 lines
6 KiB
C
/*
|
|
* Copyright © 2025 Intel Corporation
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "brw_nir.h"
|
|
#include "intel_nir.h"
|
|
|
|
#include "intel_nir.h"
|
|
#include "compiler/nir/nir_builder.h"
|
|
#include "compiler/spirv/nir_spirv.h"
|
|
#include "compiler/spirv/spirv_info.h"
|
|
#include "dev/intel_debug.h"
|
|
#include "util/u_dynarray.h"
|
|
|
|
static void
|
|
optimize(nir_shader *nir)
|
|
{
|
|
bool progress;
|
|
do {
|
|
progress = false;
|
|
|
|
NIR_PASS(progress, nir, nir_split_var_copies);
|
|
NIR_PASS(progress, nir, nir_split_struct_vars, nir_var_function_temp);
|
|
NIR_PASS(progress, nir, nir_lower_var_copies);
|
|
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
|
|
|
|
NIR_PASS(progress, nir, nir_copy_prop);
|
|
NIR_PASS(progress, nir, nir_opt_remove_phis);
|
|
NIR_PASS(progress, nir, nir_lower_phis_to_scalar, true);
|
|
NIR_PASS(progress, nir, nir_opt_dce);
|
|
NIR_PASS(progress, nir, nir_opt_dead_cf);
|
|
NIR_PASS(progress, nir, nir_opt_cse);
|
|
|
|
nir_opt_peephole_select_options peephole_select_options = {
|
|
.limit = 64,
|
|
.expensive_alu_ok = true,
|
|
};
|
|
NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
|
|
NIR_PASS(progress, nir, nir_opt_phi_precision);
|
|
NIR_PASS(progress, nir, nir_opt_algebraic);
|
|
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
|
|
|
NIR_PASS(progress, nir, nir_opt_deref);
|
|
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
|
|
NIR_PASS(progress, nir, nir_opt_undef);
|
|
NIR_PASS(progress, nir, nir_lower_undef_to_zero);
|
|
|
|
NIR_PASS(progress, nir, nir_opt_shrink_vectors, true);
|
|
NIR_PASS(progress, nir, nir_opt_loop_unroll);
|
|
|
|
} while (progress);
|
|
}
|
|
|
|
nir_shader *
|
|
brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size)
|
|
{
|
|
static const struct spirv_capabilities spirv_caps = {
|
|
.Addresses = true,
|
|
.Float16 = true,
|
|
.Float64 = true,
|
|
.Groups = true,
|
|
.StorageImageWriteWithoutFormat = true,
|
|
.Int8 = true,
|
|
.Int16 = true,
|
|
.Int64 = true,
|
|
.Int64Atomics = true,
|
|
.Kernel = true,
|
|
.Linkage = true, /* We receive linked kernel from clc */
|
|
.DenormFlushToZero = true,
|
|
.DenormPreserve = true,
|
|
.SignedZeroInfNanPreserve = true,
|
|
.RoundingModeRTE = true,
|
|
.RoundingModeRTZ = true,
|
|
.GenericPointer = true,
|
|
.GroupNonUniform = true,
|
|
.GroupNonUniformArithmetic = true,
|
|
.GroupNonUniformClustered = true,
|
|
.GroupNonUniformBallot = true,
|
|
.GroupNonUniformQuad = true,
|
|
.GroupNonUniformShuffle = true,
|
|
.GroupNonUniformVote = true,
|
|
.SubgroupDispatch = true,
|
|
};
|
|
struct spirv_to_nir_options spirv_options = {
|
|
.environment = NIR_SPIRV_OPENCL,
|
|
.capabilities = &spirv_caps,
|
|
.printf = true,
|
|
.shared_addr_format = nir_address_format_62bit_generic,
|
|
.global_addr_format = nir_address_format_62bit_generic,
|
|
.temp_addr_format = nir_address_format_62bit_generic,
|
|
.constant_addr_format = nir_address_format_64bit_global,
|
|
.create_library = true,
|
|
};
|
|
|
|
assert(spirv_size % 4 == 0);
|
|
|
|
const nir_shader_compiler_options *nir_options = &brw_scalar_nir_options;
|
|
|
|
nir_shader *nir =
|
|
spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
|
|
"library", &spirv_options, nir_options);
|
|
nir_validate_shader(nir, "after spirv_to_nir");
|
|
nir_validate_ssa_dominance(nir, "after spirv_to_nir");
|
|
ralloc_steal(mem_ctx, nir);
|
|
nir->info.name = ralloc_strdup(nir, "library");
|
|
|
|
nir_fixup_is_exported(nir);
|
|
|
|
NIR_PASS(_, nir, nir_lower_system_values);
|
|
NIR_PASS(_, nir, nir_lower_calls_to_builtins);
|
|
|
|
NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
|
|
NIR_PASS(_, nir, nir_lower_returns);
|
|
NIR_PASS(_, nir, nir_inline_functions);
|
|
//nir_remove_non_exported(nir);
|
|
NIR_PASS(_, nir, nir_copy_prop);
|
|
NIR_PASS(_, nir, nir_opt_deref);
|
|
|
|
/* We can't deal with constant data, get rid of it */
|
|
nir_lower_constant_to_temp(nir);
|
|
|
|
/* We can go ahead and lower the rest of the constant initializers. We do
|
|
* this here so that nir_remove_dead_variables and split_per_member_structs
|
|
* below see the corresponding stores.
|
|
*/
|
|
NIR_PASS(_, nir, nir_lower_variable_initializers, ~0);
|
|
|
|
/* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B
|
|
* aligned and so it can just read/write them as vec4s. This results in a
|
|
* LOT of vec4->vec3 casts on loads and stores. One solution to this
|
|
* problem is to get rid of all vec3 variables.
|
|
*/
|
|
NIR_PASS(_, nir, nir_lower_vec3_to_vec4,
|
|
nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
|
|
nir_var_mem_global | nir_var_mem_constant);
|
|
|
|
/* We assign explicit types early so that the optimizer can take advantage
|
|
* of that information and hopefully get rid of some of our memcpys.
|
|
*/
|
|
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
|
|
nir_var_uniform | nir_var_shader_temp | nir_var_function_temp |
|
|
nir_var_mem_shared | nir_var_mem_global,
|
|
glsl_get_cl_type_size_align);
|
|
|
|
optimize(nir);
|
|
|
|
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_all, NULL);
|
|
|
|
/* Lower again, this time after dead-variables to get more compact variable
|
|
* layouts.
|
|
*/
|
|
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
|
|
nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
|
|
nir_var_mem_global | nir_var_mem_constant,
|
|
glsl_get_cl_type_size_align);
|
|
assert(nir->constant_data_size == 0);
|
|
|
|
NIR_PASS(_, nir, nir_lower_memcpy);
|
|
|
|
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_constant,
|
|
nir_address_format_64bit_global);
|
|
|
|
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_uniform,
|
|
nir_address_format_64bit_global);
|
|
|
|
/* Note: we cannot lower explicit I/O here, because we need derefs in tact
|
|
* for function calls into the library to work.
|
|
*/
|
|
|
|
NIR_PASS(_, nir, nir_lower_convert_alu_types, NULL);
|
|
NIR_PASS(_, nir, nir_opt_if, 0);
|
|
NIR_PASS(_, nir, nir_opt_idiv_const, 16);
|
|
|
|
optimize(nir);
|
|
|
|
return nir;
|
|
}
|