2014-07-31 16:14:51 -07:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2014 Connor Abbott
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
|
*
|
|
|
|
|
* Authors:
|
|
|
|
|
* Connor Abbott (cwabbott0@gmail.com)
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
2017-03-20 16:04:16 +00:00
|
|
|
#ifndef NIR_H
|
|
|
|
|
#define NIR_H
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2024-04-01 12:16:32 +01:00
|
|
|
#include <stdint.h>
|
2023-10-25 15:23:06 +08:00
|
|
|
#include "compiler/glsl_types.h"
|
2016-01-18 12:16:48 +02:00
|
|
|
#include "compiler/glsl/list.h"
|
2023-08-08 12:00:35 -05:00
|
|
|
#include "compiler/shader_enums.h"
|
|
|
|
|
#include "compiler/shader_info.h"
|
2018-11-13 09:45:03 -06:00
|
|
|
#include "util/bitscan.h"
|
2015-02-11 15:05:06 -08:00
|
|
|
#include "util/bitset.h"
|
2020-11-24 11:02:00 +01:00
|
|
|
#include "util/compiler.h"
|
2020-08-31 18:08:49 +02:00
|
|
|
#include "util/enum_operators.h"
|
2023-08-08 12:00:35 -05:00
|
|
|
#include "util/format/u_format.h"
|
|
|
|
|
#include "util/hash_table.h"
|
|
|
|
|
#include "util/list.h"
|
|
|
|
|
#include "util/log.h"
|
2016-05-13 13:17:34 -07:00
|
|
|
#include "util/macros.h"
|
2023-08-08 12:00:35 -05:00
|
|
|
#include "util/ralloc.h"
|
|
|
|
|
#include "util/set.h"
|
2024-06-16 15:21:30 -04:00
|
|
|
#include "util/u_math.h"
|
2022-04-16 10:48:08 +02:00
|
|
|
#include "util/u_printf.h"
|
2024-11-18 16:07:17 -04:00
|
|
|
#include "nir_defines.h"
|
2020-02-27 15:04:25 +02:00
|
|
|
#define XXH_INLINE_ALL
|
2014-07-31 16:14:51 -07:00
|
|
|
#include <stdio.h>
|
2023-08-08 12:00:35 -05:00
|
|
|
#include "util/xxhash.h"
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2017-11-23 13:16:43 +00:00
|
|
|
#ifndef NDEBUG
|
2022-09-13 12:49:56 +03:00
|
|
|
#include "util/u_debug.h"
|
2017-11-23 13:16:43 +00:00
|
|
|
#endif /* NDEBUG */
|
2017-02-16 15:16:38 +00:00
|
|
|
|
nir: use Python to autogenerate opcode information
Before, we used a system where a file, nir_opcodes.h, defined some macros that
were included to generate the enum values and the nir_op_infos structure. This
worked pretty well, but for development the error messages were never very
useful, Python tools couldn't understand the opcode list, and it was difficult
to use nir_opcodes.h to do other things like autogenerate a builder API. Now, we
store opcode information in nir_opcodes.py, and we have nir_opcodes_c.py to
generate the old nir_opcodes.c and nir_opcodes_h.py to generate nir_opcodes.h,
which contains all the enum names and gets included into nir.h like before. In
addition to solving the above problems, using Python and Mako to generate
everything means that it's much easier to add keep information centralized as we
add new things like constant propagation that require per-opcode information.
v2:
- make Opcode derive from object (Dylan)
- don't use assert like it's a function (Dylan)
- style fixes for fnoise, use xrange (Dylan)
- use iterkeys() in nir_opcodes_h.py (Dylan)
- use pydoc-style comments (Jason)
- don't make fmin/fmax commutative and associative yet (Jason)
Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
v3 Jason Ekstrand <jason.ekstrand@intel.com>
- Alphabetize source file lists
- Generate nir_opcodes.h in the builddir instead of the source dir
- Include $(builddir)/src/glsl/nir in the i965 build
- Rework nir_opcodes.h generation so it generates a complete header file
instead of one that has to be embedded inside an enum declaration
2015-01-22 23:32:14 -05:00
|
|
|
#include "nir_opcodes.h"
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
#ifdef __cplusplus
|
|
|
|
|
extern "C" {
|
|
|
|
|
#endif
|
|
|
|
|
|
2021-11-17 16:10:52 +01:00
|
|
|
extern uint32_t nir_debug;
|
|
|
|
|
extern bool nir_debug_print_shader[MESA_SHADER_KERNEL + 1];
|
|
|
|
|
|
|
|
|
|
#ifndef NDEBUG
|
2023-08-08 12:00:35 -05:00
|
|
|
#define NIR_DEBUG(flag) unlikely(nir_debug &(NIR_DEBUG_##flag))
|
2021-11-17 16:10:52 +01:00
|
|
|
#else
|
|
|
|
|
#define NIR_DEBUG(flag) false
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#define NIR_DEBUG_CLONE (1u << 0)
|
|
|
|
|
#define NIR_DEBUG_SERIALIZE (1u << 1)
|
|
|
|
|
#define NIR_DEBUG_NOVALIDATE (1u << 2)
|
|
|
|
|
#define NIR_DEBUG_VALIDATE_SSA_DOMINANCE (1u << 3)
|
|
|
|
|
#define NIR_DEBUG_TGSI (1u << 4)
|
|
|
|
|
#define NIR_DEBUG_PRINT_VS (1u << 5)
|
|
|
|
|
#define NIR_DEBUG_PRINT_TCS (1u << 6)
|
|
|
|
|
#define NIR_DEBUG_PRINT_TES (1u << 7)
|
|
|
|
|
#define NIR_DEBUG_PRINT_GS (1u << 8)
|
|
|
|
|
#define NIR_DEBUG_PRINT_FS (1u << 9)
|
|
|
|
|
#define NIR_DEBUG_PRINT_CS (1u << 10)
|
|
|
|
|
#define NIR_DEBUG_PRINT_TS (1u << 11)
|
|
|
|
|
#define NIR_DEBUG_PRINT_MS (1u << 12)
|
|
|
|
|
#define NIR_DEBUG_PRINT_RGS (1u << 13)
|
|
|
|
|
#define NIR_DEBUG_PRINT_AHS (1u << 14)
|
|
|
|
|
#define NIR_DEBUG_PRINT_CHS (1u << 15)
|
|
|
|
|
#define NIR_DEBUG_PRINT_MHS (1u << 16)
|
|
|
|
|
#define NIR_DEBUG_PRINT_IS (1u << 17)
|
|
|
|
|
#define NIR_DEBUG_PRINT_CBS (1u << 18)
|
|
|
|
|
#define NIR_DEBUG_PRINT_KS (1u << 19)
|
2023-06-09 19:55:49 -07:00
|
|
|
#define NIR_DEBUG_PRINT_NO_INLINE_CONSTS (1u << 20)
|
2023-03-03 11:25:04 +01:00
|
|
|
#define NIR_DEBUG_PRINT_INTERNAL (1u << 21)
|
2023-07-19 13:55:13 +02:00
|
|
|
#define NIR_DEBUG_PRINT_PASS_FLAGS (1u << 22)
|
2021-11-17 16:10:52 +01:00
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define NIR_DEBUG_PRINT (NIR_DEBUG_PRINT_VS | \
|
2021-11-17 16:10:52 +01:00
|
|
|
NIR_DEBUG_PRINT_TCS | \
|
|
|
|
|
NIR_DEBUG_PRINT_TES | \
|
2023-08-08 12:00:35 -05:00
|
|
|
NIR_DEBUG_PRINT_GS | \
|
|
|
|
|
NIR_DEBUG_PRINT_FS | \
|
|
|
|
|
NIR_DEBUG_PRINT_CS | \
|
|
|
|
|
NIR_DEBUG_PRINT_TS | \
|
|
|
|
|
NIR_DEBUG_PRINT_MS | \
|
2021-11-17 16:10:52 +01:00
|
|
|
NIR_DEBUG_PRINT_RGS | \
|
|
|
|
|
NIR_DEBUG_PRINT_AHS | \
|
|
|
|
|
NIR_DEBUG_PRINT_CHS | \
|
|
|
|
|
NIR_DEBUG_PRINT_MHS | \
|
2023-08-08 12:00:35 -05:00
|
|
|
NIR_DEBUG_PRINT_IS | \
|
2021-11-17 16:10:52 +01:00
|
|
|
NIR_DEBUG_PRINT_CBS | \
|
|
|
|
|
NIR_DEBUG_PRINT_KS)
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define NIR_FALSE 0u
|
|
|
|
|
#define NIR_TRUE (~0u)
|
2019-03-09 17:17:55 +01:00
|
|
|
#define NIR_MAX_VEC_COMPONENTS 16
|
2019-03-19 14:14:02 -05:00
|
|
|
#define NIR_MAX_MATRIX_COLUMNS 4
|
2023-08-08 12:00:35 -05:00
|
|
|
#define NIR_STREAM_PACKED (1 << 8)
|
2019-03-09 17:17:55 +01:00
|
|
|
typedef uint16_t nir_component_mask_t;
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_num_components_valid(unsigned num_components)
|
|
|
|
|
{
|
2023-08-08 12:00:35 -05:00
|
|
|
return (num_components >= 1 &&
|
2020-11-23 13:05:58 +00:00
|
|
|
num_components <= 5) ||
|
2023-08-08 12:00:35 -05:00
|
|
|
num_components == 8 ||
|
|
|
|
|
num_components == 16;
|
2019-03-09 17:17:55 +01:00
|
|
|
}
|
2014-10-16 16:53:03 -07:00
|
|
|
|
2024-06-16 15:21:30 -04:00
|
|
|
/*
|
|
|
|
|
* Round up a vector size to a vector size that's valid in NIR. At present, NIR
|
|
|
|
|
* supports only vec2-5, vec8, and vec16. Attempting to generate other sizes
|
|
|
|
|
* will fail validation.
|
|
|
|
|
*/
|
|
|
|
|
static inline unsigned
|
|
|
|
|
nir_round_up_components(unsigned n)
|
|
|
|
|
{
|
|
|
|
|
return (n > 5) ? util_next_power_of_two(n) : n;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-24 10:36:06 +01:00
|
|
|
static inline nir_component_mask_t
|
|
|
|
|
nir_component_mask(unsigned num_components)
|
|
|
|
|
{
|
|
|
|
|
assert(nir_num_components_valid(num_components));
|
|
|
|
|
return (1u << num_components) - 1;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-04 07:39:51 +01:00
|
|
|
void
|
|
|
|
|
nir_process_debug_variable(void);
|
|
|
|
|
|
2020-09-25 16:01:03 -05:00
|
|
|
bool nir_component_mask_can_reinterpret(nir_component_mask_t mask,
|
|
|
|
|
unsigned old_bit_size,
|
|
|
|
|
unsigned new_bit_size);
|
|
|
|
|
nir_component_mask_t
|
|
|
|
|
nir_component_mask_reinterpret(nir_component_mask_t mask,
|
|
|
|
|
unsigned old_bit_size,
|
|
|
|
|
unsigned new_bit_size);
|
|
|
|
|
|
2014-12-05 11:00:05 -08:00
|
|
|
/** Defines a cast function
|
|
|
|
|
*
|
|
|
|
|
* This macro defines a cast function from in_type to out_type where
|
|
|
|
|
* out_type is some structure type that contains a field of type out_type.
|
|
|
|
|
*
|
|
|
|
|
* Note that you have to be a bit careful as the generated cast function
|
|
|
|
|
* destroys constness.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
#define NIR_DEFINE_CAST(name, in_type, out_type, field, \
|
|
|
|
|
type_field, type_value) \
|
|
|
|
|
static inline out_type * \
|
|
|
|
|
name(const in_type *parent) \
|
|
|
|
|
{ \
|
|
|
|
|
assert(parent && parent->type_field == type_value); \
|
|
|
|
|
return exec_node_data(out_type, parent, field); \
|
|
|
|
|
}
|
2014-12-05 11:00:05 -08:00
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
struct nir_function;
|
2014-10-29 12:42:54 -07:00
|
|
|
struct nir_shader;
|
2015-01-29 21:45:53 -08:00
|
|
|
struct nir_instr;
|
2018-03-16 00:20:57 -07:00
|
|
|
struct nir_builder;
|
2022-01-01 05:32:14 -05:00
|
|
|
struct nir_xfb_info;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Description of built-in state associated with a uniform
|
|
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* :c:member:`nir_variable.state_slots`
|
2014-07-31 16:14:51 -07:00
|
|
|
*/
|
|
|
|
|
typedef struct {
|
2017-11-16 16:19:22 +01:00
|
|
|
gl_state_index16 tokens[STATE_LENGTH];
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_state_slot;
|
|
|
|
|
|
2023-07-29 10:53:09 -05:00
|
|
|
/* clang-format off */
|
2014-07-31 16:14:51 -07:00
|
|
|
typedef enum {
|
2023-07-29 10:53:09 -05:00
|
|
|
nir_var_system_value = (1 << 0),
|
|
|
|
|
nir_var_uniform = (1 << 1),
|
|
|
|
|
nir_var_shader_in = (1 << 2),
|
|
|
|
|
nir_var_shader_out = (1 << 3),
|
|
|
|
|
nir_var_image = (1 << 4),
|
2021-10-15 13:04:45 -05:00
|
|
|
/** Incoming call or ray payload data for ray-tracing shaders */
|
2023-07-29 10:53:09 -05:00
|
|
|
nir_var_shader_call_data = (1 << 5),
|
2021-10-15 13:04:45 -05:00
|
|
|
/** Ray hit attributes */
|
2023-07-29 10:53:09 -05:00
|
|
|
nir_var_ray_hit_attrib = (1 << 6),
|
2021-10-15 13:04:45 -05:00
|
|
|
|
|
|
|
|
/* Modes named nir_var_mem_* have explicit data layout */
|
2023-07-29 10:53:09 -05:00
|
|
|
nir_var_mem_ubo = (1 << 7),
|
|
|
|
|
nir_var_mem_push_const = (1 << 8),
|
|
|
|
|
nir_var_mem_ssbo = (1 << 9),
|
|
|
|
|
nir_var_mem_constant = (1 << 10),
|
|
|
|
|
nir_var_mem_task_payload = (1 << 11),
|
2023-08-01 14:35:21 +02:00
|
|
|
nir_var_mem_node_payload = (1 << 12),
|
|
|
|
|
nir_var_mem_node_payload_in = (1 << 13),
|
2021-10-15 13:04:45 -05:00
|
|
|
|
2024-08-15 11:06:08 +10:00
|
|
|
nir_var_function_in = (1 << 14),
|
|
|
|
|
nir_var_function_out = (1 << 15),
|
|
|
|
|
nir_var_function_inout = (1 << 16),
|
|
|
|
|
|
2021-10-15 13:25:50 -05:00
|
|
|
/* Generic modes intentionally come last. See encode_dref_modes() in
|
|
|
|
|
* nir_serialize.c for more details.
|
|
|
|
|
*/
|
2024-08-15 11:06:08 +10:00
|
|
|
nir_var_shader_temp = (1 << 17),
|
|
|
|
|
nir_var_function_temp = (1 << 18),
|
|
|
|
|
nir_var_mem_shared = (1 << 19),
|
|
|
|
|
nir_var_mem_global = (1 << 20),
|
2023-07-29 10:53:09 -05:00
|
|
|
|
|
|
|
|
nir_var_mem_generic = (nir_var_shader_temp |
|
|
|
|
|
nir_var_function_temp |
|
|
|
|
|
nir_var_mem_shared |
|
|
|
|
|
nir_var_mem_global),
|
|
|
|
|
|
|
|
|
|
nir_var_read_only_modes = nir_var_shader_in | nir_var_uniform |
|
|
|
|
|
nir_var_system_value | nir_var_mem_constant |
|
|
|
|
|
nir_var_mem_ubo,
|
|
|
|
|
/* Modes where vector derefs can be indexed as arrays. nir_var_shader_out
|
2023-10-13 11:30:13 +02:00
|
|
|
* is only for mesh stages. nir_var_system_value is only for kernel stages.
|
2022-11-09 16:21:32 +00:00
|
|
|
*/
|
2023-04-19 19:52:16 -05:00
|
|
|
nir_var_vec_indexable_modes = nir_var_shader_temp | nir_var_function_temp |
|
|
|
|
|
nir_var_mem_ubo | nir_var_mem_ssbo |
|
2023-08-01 14:35:21 +02:00
|
|
|
nir_var_mem_shared | nir_var_mem_global |
|
|
|
|
|
nir_var_mem_push_const | nir_var_mem_task_payload |
|
2023-10-13 11:30:13 +02:00
|
|
|
nir_var_shader_out | nir_var_system_value,
|
2024-08-15 11:06:08 +10:00
|
|
|
nir_num_variable_modes = 21,
|
2023-07-29 10:53:09 -05:00
|
|
|
nir_var_all = (1 << nir_num_variable_modes) - 1,
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_variable_mode;
|
2020-08-31 18:08:49 +02:00
|
|
|
MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_variable_mode)
|
2023-07-29 10:53:09 -05:00
|
|
|
/* clang-format on */
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2017-07-01 07:56:51 +02:00
|
|
|
/**
|
|
|
|
|
* Rounding modes.
|
|
|
|
|
*/
|
|
|
|
|
typedef enum {
|
|
|
|
|
nir_rounding_mode_undef = 0,
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_rounding_mode_rtne = 1, /* round to nearest even */
|
|
|
|
|
nir_rounding_mode_ru = 2, /* round up */
|
|
|
|
|
nir_rounding_mode_rd = 3, /* round down */
|
|
|
|
|
nir_rounding_mode_rtz = 4, /* round towards zero */
|
2017-07-01 07:56:51 +02:00
|
|
|
} nir_rounding_mode;
|
2016-11-29 22:19:28 -08:00
|
|
|
|
2022-11-10 12:20:00 +02:00
|
|
|
/**
|
|
|
|
|
* Ray query values that can read from a RayQueryKHR object.
|
|
|
|
|
*/
|
|
|
|
|
typedef enum {
|
|
|
|
|
nir_ray_query_value_intersection_type,
|
|
|
|
|
nir_ray_query_value_intersection_t,
|
|
|
|
|
nir_ray_query_value_intersection_instance_custom_index,
|
|
|
|
|
nir_ray_query_value_intersection_instance_id,
|
|
|
|
|
nir_ray_query_value_intersection_instance_sbt_index,
|
|
|
|
|
nir_ray_query_value_intersection_geometry_index,
|
|
|
|
|
nir_ray_query_value_intersection_primitive_index,
|
|
|
|
|
nir_ray_query_value_intersection_barycentrics,
|
|
|
|
|
nir_ray_query_value_intersection_front_face,
|
|
|
|
|
nir_ray_query_value_intersection_object_ray_direction,
|
|
|
|
|
nir_ray_query_value_intersection_object_ray_origin,
|
|
|
|
|
nir_ray_query_value_intersection_object_to_world,
|
|
|
|
|
nir_ray_query_value_intersection_world_to_object,
|
|
|
|
|
nir_ray_query_value_intersection_candidate_aabb_opaque,
|
|
|
|
|
nir_ray_query_value_tmin,
|
|
|
|
|
nir_ray_query_value_flags,
|
|
|
|
|
nir_ray_query_value_world_ray_direction,
|
|
|
|
|
nir_ray_query_value_world_ray_origin,
|
2022-12-01 17:09:22 +02:00
|
|
|
nir_ray_query_value_intersection_triangle_vertex_positions
|
2022-11-10 12:20:00 +02:00
|
|
|
} nir_ray_query_value;
|
|
|
|
|
|
2022-12-22 17:27:58 +02:00
|
|
|
/**
|
|
|
|
|
* Intel resource flags
|
|
|
|
|
*/
|
|
|
|
|
typedef enum {
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_resource_intel_bindless = 1u << 0,
|
|
|
|
|
nir_resource_intel_pushable = 1u << 1,
|
|
|
|
|
nir_resource_intel_sampler = 1u << 2,
|
|
|
|
|
nir_resource_intel_non_uniform = 1u << 3,
|
2023-03-21 10:36:24 +02:00
|
|
|
nir_resource_intel_sampler_embedded = 1u << 4,
|
2022-12-22 17:27:58 +02:00
|
|
|
} nir_resource_data_intel;
|
|
|
|
|
|
2023-08-08 11:02:14 -07:00
|
|
|
/**
|
|
|
|
|
* Which components to interpret as signed in cmat_muladd.
|
|
|
|
|
* See 'Cooperative Matrix Operands' in SPV_KHR_cooperative_matrix.
|
|
|
|
|
*/
|
|
|
|
|
typedef enum {
|
|
|
|
|
NIR_CMAT_A_SIGNED = 1u << 0,
|
|
|
|
|
NIR_CMAT_B_SIGNED = 1u << 1,
|
|
|
|
|
NIR_CMAT_C_SIGNED = 1u << 2,
|
|
|
|
|
NIR_CMAT_RESULT_SIGNED = 1u << 3,
|
|
|
|
|
} nir_cmat_signed;
|
|
|
|
|
|
2016-11-29 22:19:28 -08:00
|
|
|
typedef union {
|
2019-03-27 00:59:03 +01:00
|
|
|
bool b;
|
|
|
|
|
float f32;
|
|
|
|
|
double f64;
|
|
|
|
|
int8_t i8;
|
|
|
|
|
uint8_t u8;
|
|
|
|
|
int16_t i16;
|
|
|
|
|
uint16_t u16;
|
|
|
|
|
int32_t i32;
|
|
|
|
|
uint32_t u32;
|
|
|
|
|
int64_t i64;
|
|
|
|
|
uint64_t u64;
|
2016-11-29 22:19:28 -08:00
|
|
|
} nir_const_value;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2019-03-27 00:59:03 +01:00
|
|
|
#define nir_const_value_to_array(arr, c, components, m) \
|
2023-08-08 12:00:35 -05:00
|
|
|
do { \
|
|
|
|
|
for (unsigned i = 0; i < components; ++i) \
|
|
|
|
|
arr[i] = c[i].m; \
|
|
|
|
|
} while (false)
|
2019-03-27 00:59:03 +01:00
|
|
|
|
2019-06-20 10:36:10 -05:00
|
|
|
static inline nir_const_value
|
|
|
|
|
nir_const_value_for_raw_uint(uint64_t x, unsigned bit_size)
|
|
|
|
|
{
|
|
|
|
|
nir_const_value v;
|
|
|
|
|
memset(&v, 0, sizeof(v));
|
|
|
|
|
|
2023-07-29 11:19:40 -05:00
|
|
|
/* clang-format off */
|
2019-06-20 10:36:10 -05:00
|
|
|
switch (bit_size) {
|
|
|
|
|
case 1: v.b = x; break;
|
|
|
|
|
case 8: v.u8 = x; break;
|
|
|
|
|
case 16: v.u16 = x; break;
|
|
|
|
|
case 32: v.u32 = x; break;
|
|
|
|
|
case 64: v.u64 = x; break;
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Invalid bit size");
|
|
|
|
|
}
|
2023-07-29 11:19:40 -05:00
|
|
|
/* clang-format on */
|
2019-06-20 10:36:10 -05:00
|
|
|
|
|
|
|
|
return v;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_const_value
|
|
|
|
|
nir_const_value_for_int(int64_t i, unsigned bit_size)
|
|
|
|
|
{
|
|
|
|
|
assert(bit_size <= 64);
|
|
|
|
|
if (bit_size < 64) {
|
|
|
|
|
assert(i >= (-(1ll << (bit_size - 1))));
|
|
|
|
|
assert(i < (1ll << (bit_size - 1)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nir_const_value_for_raw_uint(i, bit_size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_const_value
|
|
|
|
|
nir_const_value_for_uint(uint64_t u, unsigned bit_size)
|
|
|
|
|
{
|
|
|
|
|
assert(bit_size <= 64);
|
|
|
|
|
if (bit_size < 64)
|
|
|
|
|
assert(u < (1ull << bit_size));
|
|
|
|
|
|
|
|
|
|
return nir_const_value_for_raw_uint(u, bit_size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_const_value
|
|
|
|
|
nir_const_value_for_bool(bool b, unsigned bit_size)
|
|
|
|
|
{
|
|
|
|
|
/* Booleans use a 0/-1 convention */
|
|
|
|
|
return nir_const_value_for_int(-(int)b, bit_size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* This one isn't inline because it requires half-float conversion */
|
|
|
|
|
nir_const_value nir_const_value_for_float(double b, unsigned bit_size);
|
|
|
|
|
|
|
|
|
|
static inline int64_t
|
|
|
|
|
nir_const_value_as_int(nir_const_value value, unsigned bit_size)
|
|
|
|
|
{
|
2023-07-29 11:19:40 -05:00
|
|
|
/* clang-format off */
|
2019-06-20 10:36:10 -05:00
|
|
|
switch (bit_size) {
|
|
|
|
|
/* int1_t uses 0/-1 convention */
|
|
|
|
|
case 1: return -(int)value.b;
|
|
|
|
|
case 8: return value.i8;
|
|
|
|
|
case 16: return value.i16;
|
|
|
|
|
case 32: return value.i32;
|
|
|
|
|
case 64: return value.i64;
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Invalid bit size");
|
|
|
|
|
}
|
2023-07-29 11:19:40 -05:00
|
|
|
/* clang-format on */
|
2019-06-20 10:36:10 -05:00
|
|
|
}
|
|
|
|
|
|
2019-09-25 11:59:49 -07:00
|
|
|
static inline uint64_t
|
2019-06-20 10:36:10 -05:00
|
|
|
nir_const_value_as_uint(nir_const_value value, unsigned bit_size)
|
|
|
|
|
{
|
2023-07-29 11:19:40 -05:00
|
|
|
/* clang-format off */
|
2019-06-20 10:36:10 -05:00
|
|
|
switch (bit_size) {
|
|
|
|
|
case 1: return value.b;
|
|
|
|
|
case 8: return value.u8;
|
|
|
|
|
case 16: return value.u16;
|
|
|
|
|
case 32: return value.u32;
|
|
|
|
|
case 64: return value.u64;
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Invalid bit size");
|
|
|
|
|
}
|
2023-07-29 11:19:40 -05:00
|
|
|
/* clang-format on */
|
2019-06-20 10:36:10 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_const_value_as_bool(nir_const_value value, unsigned bit_size)
|
|
|
|
|
{
|
|
|
|
|
int64_t i = nir_const_value_as_int(value, bit_size);
|
|
|
|
|
|
|
|
|
|
/* Booleans of any size use 0/-1 convention */
|
|
|
|
|
assert(i == 0 || i == -1);
|
|
|
|
|
|
|
|
|
|
return i;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* This one isn't inline because it requires half-float conversion */
|
|
|
|
|
double nir_const_value_as_float(nir_const_value value, unsigned bit_size);
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
typedef struct nir_constant {
|
|
|
|
|
/**
|
|
|
|
|
* Value of the constant.
|
|
|
|
|
*
|
|
|
|
|
* The field used to back the values supplied by the constant is determined
|
2023-08-20 20:42:34 +03:00
|
|
|
* by the type associated with the ``nir_variable``. Constants may be
|
2014-07-31 16:14:51 -07:00
|
|
|
* scalars, vectors, or matrices.
|
|
|
|
|
*/
|
2019-06-06 10:51:25 -05:00
|
|
|
nir_const_value values[NIR_MAX_VEC_COMPONENTS];
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2023-05-18 10:31:50 -07:00
|
|
|
/* Indicates all the values are 0s which can enable some optimizations */
|
|
|
|
|
bool is_null_constant;
|
|
|
|
|
|
2015-11-06 11:35:21 -05:00
|
|
|
/* we could get this from the var->type but makes clone *much* easier to
|
|
|
|
|
* not have to care about the type.
|
|
|
|
|
*/
|
|
|
|
|
unsigned num_elements;
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
/* Array elements / Structure Fields */
|
|
|
|
|
struct nir_constant **elements;
|
|
|
|
|
} nir_constant;
|
|
|
|
|
|
|
|
|
|
/**
|
2023-08-20 20:42:34 +03:00
|
|
|
* Layout qualifiers for gl_FragDepth.
|
2014-07-31 16:14:51 -07:00
|
|
|
*
|
|
|
|
|
* The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared
|
|
|
|
|
* with a layout qualifier.
|
|
|
|
|
*/
|
|
|
|
|
typedef enum {
|
2023-08-17 21:02:41 +03:00
|
|
|
/** No depth layout is specified. */
|
|
|
|
|
nir_depth_layout_none,
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_depth_layout_any,
|
|
|
|
|
nir_depth_layout_greater,
|
|
|
|
|
nir_depth_layout_less,
|
|
|
|
|
nir_depth_layout_unchanged
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_depth_layout;
|
|
|
|
|
|
2018-05-08 08:58:59 +02:00
|
|
|
/**
|
|
|
|
|
* Enum keeping track of how a variable was declared.
|
|
|
|
|
*/
|
|
|
|
|
typedef enum {
|
|
|
|
|
/**
|
|
|
|
|
* Normal declaration.
|
|
|
|
|
*/
|
|
|
|
|
nir_var_declared_normally = 0,
|
|
|
|
|
|
2023-04-05 12:09:19 +10:00
|
|
|
/**
|
|
|
|
|
* Variable is an implicitly declared built-in that has not been explicitly
|
|
|
|
|
* re-declared by the shader.
|
|
|
|
|
*/
|
|
|
|
|
nir_var_declared_implicitly,
|
|
|
|
|
|
2018-05-08 08:58:59 +02:00
|
|
|
/**
|
|
|
|
|
* Variable is implicitly generated by the compiler and should not be
|
|
|
|
|
* visible via the API.
|
|
|
|
|
*/
|
|
|
|
|
nir_var_hidden,
|
|
|
|
|
} nir_var_declaration_type;
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
/**
|
|
|
|
|
* Either a uniform, global variable, shader input, or shader output. Based on
|
|
|
|
|
* ir_variable - it should be easy to translate between the two.
|
|
|
|
|
*/
|
|
|
|
|
|
2016-01-04 13:24:08 -05:00
|
|
|
typedef struct nir_variable {
|
2014-07-31 16:14:51 -07:00
|
|
|
struct exec_node node;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Declared type of the variable
|
|
|
|
|
*/
|
|
|
|
|
const struct glsl_type *type;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Declared name of the variable
|
|
|
|
|
*/
|
|
|
|
|
char *name;
|
|
|
|
|
|
|
|
|
|
struct nir_variable_data {
|
2016-04-11 13:32:59 -07:00
|
|
|
/**
|
|
|
|
|
* Storage class of the variable.
|
|
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* :c:struct:`nir_variable_mode`
|
2016-04-11 13:32:59 -07:00
|
|
|
*/
|
2024-08-15 11:06:08 +10:00
|
|
|
unsigned mode : 21;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Is the variable read-only?
|
|
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* This is set for variables declared as ``const``, shader inputs,
|
2014-07-31 16:14:51 -07:00
|
|
|
* and uniforms.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned read_only : 1;
|
|
|
|
|
unsigned centroid : 1;
|
|
|
|
|
unsigned sample : 1;
|
|
|
|
|
unsigned patch : 1;
|
|
|
|
|
unsigned invariant : 1;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2023-09-22 16:07:25 +10:00
|
|
|
/**
|
|
|
|
|
* Was an 'invariant' qualifier explicitly set in the shader?
|
|
|
|
|
*
|
|
|
|
|
* This is used to cross validate glsl qualifiers.
|
|
|
|
|
*/
|
|
|
|
|
unsigned explicit_invariant:1;
|
|
|
|
|
|
2021-10-20 15:51:43 +03:00
|
|
|
/**
|
|
|
|
|
* Is the variable a ray query?
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned ray_query : 1;
|
2021-10-20 15:51:43 +03:00
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
/**
|
2019-10-23 12:05:10 +11:00
|
|
|
* Precision qualifier.
|
|
|
|
|
*
|
|
|
|
|
* In desktop GLSL we do not care about precision qualifiers at all, in
|
|
|
|
|
* fact, the spec says that precision qualifiers are ignored.
|
|
|
|
|
*
|
|
|
|
|
* To make things easy, we make it so that this field is always
|
|
|
|
|
* GLSL_PRECISION_NONE on desktop shaders. This way all the variables
|
|
|
|
|
* have the same precision value and the checks we add in the compiler
|
|
|
|
|
* for this field will never break a desktop shader compile.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned precision : 2;
|
2019-10-23 12:05:10 +11:00
|
|
|
|
2021-11-02 12:46:06 +11:00
|
|
|
/**
|
|
|
|
|
* Has this variable been statically assigned?
|
|
|
|
|
*
|
|
|
|
|
* This answers whether the variable was assigned in any path of
|
|
|
|
|
* the shader during ast_to_hir. This doesn't answer whether it is
|
|
|
|
|
* still written after dead code removal, nor is it maintained in
|
|
|
|
|
* non-ast_to_hir.cpp (GLSL parsing) paths.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned assigned : 1;
|
2021-11-02 12:46:06 +11:00
|
|
|
|
2019-05-15 12:49:29 +02:00
|
|
|
/**
|
|
|
|
|
* Can this variable be coalesced with another?
|
|
|
|
|
*
|
|
|
|
|
* This is set by nir_lower_io_to_temporaries to say that any
|
|
|
|
|
* copies involving this variable should stay put. Propagating it can
|
|
|
|
|
* duplicate the resulting load/store, which is not wanted, and may
|
|
|
|
|
* result in a load/store of the variable with an indirect offset which
|
|
|
|
|
* the backend may not be able to handle.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned cannot_coalesce : 1;
|
2019-05-15 12:49:29 +02:00
|
|
|
|
2017-09-11 16:19:22 +10:00
|
|
|
/**
|
|
|
|
|
* When separate shader programs are enabled, only input/outputs between
|
|
|
|
|
* the stages of a multi-stage separate program can be safely removed
|
|
|
|
|
* from the shader interface. Other input/outputs must remains active.
|
|
|
|
|
*
|
|
|
|
|
* This is also used to make sure xfb varyings that are unused by the
|
|
|
|
|
* fragment shader are not removed.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned always_active_io : 1;
|
2017-09-11 16:19:22 +10:00
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
/**
|
|
|
|
|
* Interpolation mode for shader inputs / outputs
|
|
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* :c:enum:`glsl_interp_mode`
|
2014-07-31 16:14:51 -07:00
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned interpolation : 3;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* If non-zero, then this variable may be packed along with other variables
|
|
|
|
|
* into a single varying slot, so this offset should be applied when
|
|
|
|
|
* accessing components. For example, an offset of 1 means that the x
|
|
|
|
|
* component of this variable is actually stored in component y of the
|
2023-08-20 20:42:34 +03:00
|
|
|
* location specified by ``location``.
|
2014-07-31 16:14:51 -07:00
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned location_frac : 2;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2016-10-03 20:32:22 -07:00
|
|
|
/**
|
|
|
|
|
* If true, this variable represents an array of scalars that should
|
|
|
|
|
* be tightly packed. In other words, consecutive array elements
|
|
|
|
|
* should be stored one component apart, rather than one slot apart.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned compact : 1;
|
2016-10-03 20:32:22 -07:00
|
|
|
|
2016-07-19 20:33:46 -07:00
|
|
|
/**
|
|
|
|
|
* Whether this is a fragment shader output implicitly initialized with
|
|
|
|
|
* the previous contents of the specified render target at the
|
|
|
|
|
* framebuffer location corresponding to this shader invocation.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned fb_fetch_output : 1;
|
2016-07-19 20:33:46 -07:00
|
|
|
|
2018-02-19 08:41:56 +11:00
|
|
|
/**
|
|
|
|
|
* Non-zero if this variable is considered bindless as defined by
|
|
|
|
|
* ARB_bindless_texture.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned bindless : 1;
|
2018-02-19 08:41:56 +11:00
|
|
|
|
2018-02-23 16:06:30 +01:00
|
|
|
/**
|
|
|
|
|
* Was an explicit binding set in the shader?
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned explicit_binding : 1;
|
2018-02-23 16:06:30 +01:00
|
|
|
|
2019-10-23 12:05:10 +11:00
|
|
|
/**
|
|
|
|
|
* Was the location explicitly set in the shader?
|
|
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* If the location is explicitly set in the shader, it **cannot** be changed
|
|
|
|
|
* by the linker or by the API (e.g., calls to ``glBindAttribLocation`` have
|
2019-10-23 12:05:10 +11:00
|
|
|
* no effect).
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned explicit_location : 1;
|
2019-10-23 12:05:10 +11:00
|
|
|
|
2024-03-18 12:07:58 +11:00
|
|
|
/* Was the array implicitly sized during linking */
|
|
|
|
|
unsigned implicit_sized_array : 1;
|
|
|
|
|
|
2024-03-18 11:42:29 +11:00
|
|
|
/**
|
|
|
|
|
* Highest element accessed with a constant array index
|
|
|
|
|
*
|
|
|
|
|
* Not used for non-array variables. -1 is never accessed.
|
|
|
|
|
*/
|
|
|
|
|
int max_array_access;
|
|
|
|
|
|
2024-04-02 14:43:33 +11:00
|
|
|
/**
|
|
|
|
|
* Does this variable have an initializer?
|
|
|
|
|
*
|
|
|
|
|
* This is used by the linker to cross-validiate initializers of global
|
|
|
|
|
* variables.
|
|
|
|
|
*/
|
|
|
|
|
unsigned has_initializer:1;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Is the initializer created by the compiler (glsl_zero_init)
|
|
|
|
|
*/
|
|
|
|
|
unsigned is_implicit_initializer:1;
|
|
|
|
|
|
2021-11-02 12:46:06 +11:00
|
|
|
/**
|
|
|
|
|
* Is this varying used by transform feedback?
|
|
|
|
|
*
|
|
|
|
|
* This is used by the linker to decide if it's safe to pack the varying.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned is_xfb : 1;
|
2021-11-02 12:46:06 +11:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Is this varying used only by transform feedback?
|
|
|
|
|
*
|
|
|
|
|
* This is used by the linker to decide if its safe to pack the varying.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned is_xfb_only : 1;
|
2021-11-02 12:46:06 +11:00
|
|
|
|
2017-12-12 17:09:19 +01:00
|
|
|
/**
|
|
|
|
|
* Was a transfer feedback buffer set in the shader?
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned explicit_xfb_buffer : 1;
|
2017-12-12 17:09:19 +01:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Was a transfer feedback stride set in the shader?
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned explicit_xfb_stride : 1;
|
2017-12-12 17:09:19 +01:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Was an explicit offset set in the shader?
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned explicit_offset : 1;
|
2017-12-12 17:09:19 +01:00
|
|
|
|
2020-03-13 14:18:27 +11:00
|
|
|
/**
|
|
|
|
|
* Layout of the matrix. Uses glsl_matrix_layout values.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned matrix_layout : 2;
|
2020-03-13 14:18:27 +11:00
|
|
|
|
2019-10-23 12:05:10 +11:00
|
|
|
/**
|
|
|
|
|
* Non-zero if this variable was created by lowering a named interface
|
|
|
|
|
* block.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned from_named_ifc_block : 1;
|
2019-10-23 12:05:10 +11:00
|
|
|
|
2024-03-19 16:00:49 +11:00
|
|
|
/**
|
|
|
|
|
* Unsized array buffer variable.
|
|
|
|
|
*/
|
|
|
|
|
unsigned from_ssbo_unsized_array : 1;
|
|
|
|
|
|
2021-11-02 12:46:06 +11:00
|
|
|
/**
|
|
|
|
|
* Non-zero if the variable must be a shader input. This is useful for
|
|
|
|
|
* constraints on function parameters.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned must_be_shader_input : 1;
|
2021-11-02 12:46:06 +11:00
|
|
|
|
2023-09-25 10:37:05 +10:00
|
|
|
/**
|
|
|
|
|
* Has this variable been used for reading or writing?
|
|
|
|
|
*
|
|
|
|
|
* Several GLSL semantic checks require knowledge of whether or not a
|
|
|
|
|
* variable has been used. For example, it is an error to redeclare a
|
|
|
|
|
* variable as invariant after it has been used.
|
|
|
|
|
*/
|
|
|
|
|
unsigned used:1;
|
|
|
|
|
|
2019-10-23 11:37:28 +11:00
|
|
|
/**
|
|
|
|
|
* How the variable was declared. See nir_var_declaration_type.
|
|
|
|
|
*
|
|
|
|
|
* This is used to detect variables generated by the compiler, so should
|
|
|
|
|
* not be visible via the API.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned how_declared : 2;
|
2019-10-23 11:37:28 +11:00
|
|
|
|
2020-02-11 14:41:05 -08:00
|
|
|
/**
|
|
|
|
|
* Is this variable per-view? If so, we know it must be an array with
|
|
|
|
|
* size corresponding to the number of views.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned per_view : 1;
|
2020-02-11 14:41:05 -08:00
|
|
|
|
2021-04-29 11:51:57 -07:00
|
|
|
/**
|
|
|
|
|
* Whether the variable is per-primitive.
|
|
|
|
|
* Can be use by Mesh Shader outputs and corresponding Fragment Shader inputs.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned per_primitive : 1;
|
2021-04-29 11:51:57 -07:00
|
|
|
|
2022-05-27 09:16:09 +02:00
|
|
|
/**
|
|
|
|
|
* Whether the variable is declared to indicate that a fragment shader
|
|
|
|
|
* input will not have interpolated values.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned per_vertex : 1;
|
2022-05-27 09:16:09 +02:00
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
/**
|
2023-08-20 20:42:34 +03:00
|
|
|
* Layout qualifier for gl_FragDepth. See nir_depth_layout.
|
2014-07-31 16:14:51 -07:00
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* This is not equal to ``ir_depth_layout_none`` if and only if this
|
|
|
|
|
* variable is ``gl_FragDepth`` and a layout qualifier is specified.
|
2014-07-31 16:14:51 -07:00
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned depth_layout : 3;
|
2019-10-25 15:21:30 -04:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Vertex stream output identifier.
|
|
|
|
|
*
|
|
|
|
|
* For packed outputs, NIR_STREAM_PACKED is set and bits [2*i+1,2*i]
|
|
|
|
|
* indicate the stream of the i-th component.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned stream : 9;
|
2019-10-25 15:21:30 -04:00
|
|
|
|
2019-11-07 16:53:58 -05:00
|
|
|
/**
|
2020-08-19 14:34:11 -07:00
|
|
|
* See gl_access_qualifier.
|
|
|
|
|
*
|
2019-11-07 16:53:58 -05:00
|
|
|
* Access flags for memory variables (SSBO/global), image uniforms, and
|
|
|
|
|
* bindless images in uniforms/inputs/outputs.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned access : 9;
|
2019-11-07 16:53:58 -05:00
|
|
|
|
2019-10-25 15:21:30 -04:00
|
|
|
/**
|
2019-11-07 18:02:06 -05:00
|
|
|
* Descriptor set binding for sampler or UBO.
|
2019-10-25 15:21:30 -04:00
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned descriptor_set : 5;
|
2019-10-25 15:21:30 -04:00
|
|
|
|
2024-03-05 09:39:45 -05:00
|
|
|
#define NIR_VARIABLE_NO_INDEX ~0
|
|
|
|
|
|
2019-10-25 15:21:30 -04:00
|
|
|
/**
|
2024-03-05 09:39:45 -05:00
|
|
|
* Output index for dual source blending or input attachment index. If
|
|
|
|
|
* it is not declared it is NIR_VARIABLE_NO_INDEX.
|
2019-10-25 15:21:30 -04:00
|
|
|
*/
|
2019-11-07 18:02:06 -05:00
|
|
|
unsigned index;
|
2019-10-25 15:21:30 -04:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Initial binding point for a sampler or UBO.
|
|
|
|
|
*
|
|
|
|
|
* For array types, this represents the binding point for the first element.
|
|
|
|
|
*/
|
2019-11-06 08:31:25 +01:00
|
|
|
unsigned binding;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Storage location of the base of this variable
|
|
|
|
|
*
|
|
|
|
|
* The precise meaning of this field depends on the nature of the variable.
|
|
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* - Vertex shader input: one of the values from ``gl_vert_attrib``.
|
|
|
|
|
* - Vertex shader output: one of the values from ``gl_varying_slot``.
|
|
|
|
|
* - Geometry shader input: one of the values from ``gl_varying_slot``.
|
|
|
|
|
* - Geometry shader output: one of the values from ``gl_varying_slot``.
|
|
|
|
|
* - Fragment shader input: one of the values from ``gl_varying_slot``.
|
|
|
|
|
* - Fragment shader output: one of the values from ``gl_frag_result``.
|
|
|
|
|
* - Task shader output: one of the values from ``gl_varying_slot``.
|
|
|
|
|
* - Mesh shader input: one of the values from ``gl_varying_slot``.
|
|
|
|
|
* - Mesh shader output: one of the values from ``gl_varying_slot``.
|
2014-07-31 16:14:51 -07:00
|
|
|
* - Uniforms: Per-stage uniform slot number for default uniform block.
|
|
|
|
|
* - Uniforms: Index within the uniform block definition for UBO members.
|
2015-08-30 12:50:34 +10:00
|
|
|
* - Non-UBO Uniforms: uniform slot number.
|
2014-07-31 16:14:51 -07:00
|
|
|
* - Other: This field is not currently used.
|
|
|
|
|
*
|
|
|
|
|
* If the variable is a uniform, shader input, or shader output, and the
|
|
|
|
|
* slot has not been assigned, the value will be -1.
|
|
|
|
|
*/
|
|
|
|
|
int location;
|
|
|
|
|
|
2023-12-05 10:11:46 -06:00
|
|
|
/** Required alignment of this variable */
|
|
|
|
|
unsigned alignment;
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
/**
|
2019-10-25 15:21:30 -04:00
|
|
|
* The actual location of the variable in the IR. Only valid for inputs,
|
2021-08-02 12:49:56 -07:00
|
|
|
* outputs, uniforms (including samplers and images), and for UBO and SSBO
|
|
|
|
|
* variables in GLSL.
|
2014-07-31 16:14:51 -07:00
|
|
|
*/
|
2019-10-25 15:21:30 -04:00
|
|
|
unsigned driver_location;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
|
|
|
|
/**
|
2017-12-12 17:09:19 +01:00
|
|
|
* Location an atomic counter or transform feedback is stored at.
|
2014-07-31 16:14:51 -07:00
|
|
|
*/
|
2015-12-29 21:02:56 +11:00
|
|
|
unsigned offset;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2019-10-25 15:18:32 -04:00
|
|
|
union {
|
|
|
|
|
struct {
|
2020-01-10 14:09:43 -08:00
|
|
|
/** Image internal format if specified explicitly, otherwise PIPE_FORMAT_NONE. */
|
|
|
|
|
enum pipe_format format;
|
2019-10-25 15:18:32 -04:00
|
|
|
} image;
|
|
|
|
|
|
2020-05-01 10:47:58 -07:00
|
|
|
struct {
|
|
|
|
|
/**
|
|
|
|
|
* For OpenCL inline samplers. See cl_sampler_addressing_mode and cl_sampler_filter_mode
|
|
|
|
|
*/
|
|
|
|
|
unsigned is_inline_sampler : 1;
|
|
|
|
|
unsigned addressing_mode : 3;
|
|
|
|
|
unsigned normalized_coordinates : 1;
|
|
|
|
|
unsigned filter_mode : 1;
|
|
|
|
|
} sampler;
|
|
|
|
|
|
2019-10-25 15:18:32 -04:00
|
|
|
struct {
|
|
|
|
|
/**
|
|
|
|
|
* Transform feedback buffer.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
uint16_t buffer : 2;
|
2019-10-25 15:18:32 -04:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Transform feedback stride.
|
|
|
|
|
*/
|
|
|
|
|
uint16_t stride;
|
|
|
|
|
} xfb;
|
|
|
|
|
};
|
2023-08-01 14:35:21 +02:00
|
|
|
|
|
|
|
|
/** Name of the node this payload will be enqueued to. */
|
|
|
|
|
const char *node_name;
|
2014-07-31 16:14:51 -07:00
|
|
|
} data;
|
|
|
|
|
|
2019-11-15 15:15:14 +00:00
|
|
|
/**
|
|
|
|
|
* Identifier for this variable generated by nir_index_vars() that is unique
|
|
|
|
|
* among other variables in the same exec_list.
|
|
|
|
|
*/
|
|
|
|
|
unsigned index;
|
|
|
|
|
|
2019-10-23 11:43:59 +11:00
|
|
|
/* Number of nir_variable_data members */
|
2019-11-15 14:40:19 +00:00
|
|
|
uint16_t num_members;
|
2019-10-23 11:43:59 +11:00
|
|
|
|
2024-05-29 11:10:05 +10:00
|
|
|
/**
|
|
|
|
|
* For variables with non NULL interface_type, this points to an array of
|
|
|
|
|
* integers such that if the ith member of the interface block is an array,
|
|
|
|
|
* max_ifc_array_access[i] is the maximum array element of that member that
|
|
|
|
|
* has been accessed. If the ith member of the interface block is not an
|
|
|
|
|
* array, max_ifc_array_access[i] is unused.
|
|
|
|
|
*
|
|
|
|
|
* For variables whose type is not an interface block, this pointer is
|
|
|
|
|
* NULL.
|
|
|
|
|
*/
|
|
|
|
|
int *max_ifc_array_access;
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
/**
|
|
|
|
|
* Built-in state that backs this uniform
|
|
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* Once set at variable creation, ``state_slots`` must remain invariant.
|
2014-07-31 16:14:51 -07:00
|
|
|
* This is because, ideally, this array would be shared by all clones of
|
|
|
|
|
* this variable in the IR tree. In other words, we'd really like for it
|
|
|
|
|
* to be a fly-weight.
|
|
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* If the variable is not a uniform, ``num_state_slots`` will be zero and
|
|
|
|
|
* ``state_slots`` will be ``NULL``.
|
2023-08-17 21:02:41 +03:00
|
|
|
*
|
|
|
|
|
* Number of state slots used.
|
2014-07-31 16:14:51 -07:00
|
|
|
*/
|
2023-08-17 21:02:41 +03:00
|
|
|
uint16_t num_state_slots;
|
|
|
|
|
/** State descriptors. */
|
|
|
|
|
nir_state_slot *state_slots;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Constant expression assigned in the initializer of the variable
|
2016-07-15 17:17:40 -07:00
|
|
|
*
|
|
|
|
|
* This field should only be used temporarily by creators of NIR shaders
|
2020-12-16 14:07:57 -08:00
|
|
|
* and then nir_lower_variable_initializers can be used to get rid of them.
|
2016-07-15 17:17:40 -07:00
|
|
|
* Most of the rest of NIR ignores this field or asserts that it's NULL.
|
2014-07-31 16:14:51 -07:00
|
|
|
*/
|
|
|
|
|
nir_constant *constant_initializer;
|
|
|
|
|
|
2019-12-10 15:37:53 -05:00
|
|
|
/**
|
|
|
|
|
* Global variable assigned in the initializer of the variable
|
|
|
|
|
* This field should only be used temporarily by creators of NIR shaders
|
2020-12-16 14:07:57 -08:00
|
|
|
* and then nir_lower_variable_initializers can be used to get rid of them.
|
2019-12-10 15:37:53 -05:00
|
|
|
* Most of the rest of NIR ignores this field or asserts that it's NULL.
|
|
|
|
|
*/
|
|
|
|
|
struct nir_variable *pointer_initializer;
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
/**
|
|
|
|
|
* For variables that are in an interface block or are an instance of an
|
2023-08-20 20:42:34 +03:00
|
|
|
* interface block, this is the ``GLSL_TYPE_INTERFACE`` type for that block.
|
2014-07-31 16:14:51 -07:00
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* ``ir_variable.location``
|
2014-07-31 16:14:51 -07:00
|
|
|
*/
|
|
|
|
|
const struct glsl_type *interface_type;
|
2018-03-21 16:48:35 -07:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Description of per-member data for per-member struct variables
|
|
|
|
|
*
|
|
|
|
|
* This is used for variables which are actually an amalgamation of
|
|
|
|
|
* multiple entities such as a struct of built-in values or a struct of
|
|
|
|
|
* inputs each with their own layout specifier. This is only allowed on
|
|
|
|
|
* variables with a struct or array of array of struct type.
|
|
|
|
|
*/
|
|
|
|
|
struct nir_variable_data *members;
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_variable;
|
|
|
|
|
|
2020-07-20 14:32:01 -05:00
|
|
|
static inline bool
|
|
|
|
|
_nir_shader_variable_has_mode(nir_variable *var, unsigned modes)
|
|
|
|
|
{
|
|
|
|
|
/* This isn't a shader variable */
|
|
|
|
|
assert(!(modes & nir_var_function_temp));
|
|
|
|
|
return var->data.mode & modes;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-20 16:30:37 -05:00
|
|
|
#define nir_foreach_variable_in_list(var, var_list) \
|
2015-10-02 18:15:06 -07:00
|
|
|
foreach_list_typed(nir_variable, var, node, var_list)
|
2016-03-25 10:18:35 -07:00
|
|
|
|
2020-07-20 16:30:37 -05:00
|
|
|
#define nir_foreach_variable_in_list_safe(var, var_list) \
|
2016-03-25 10:18:35 -07:00
|
|
|
foreach_list_typed_safe(nir_variable, var, node, var_list)
|
2015-10-02 18:15:06 -07:00
|
|
|
|
2020-07-20 16:30:37 -05:00
|
|
|
#define nir_foreach_variable_in_shader(var, shader) \
|
|
|
|
|
nir_foreach_variable_in_list(var, &(shader)->variables)
|
|
|
|
|
|
|
|
|
|
#define nir_foreach_variable_in_shader_safe(var, shader) \
|
|
|
|
|
nir_foreach_variable_in_list_safe(var, &(shader)->variables)
|
|
|
|
|
|
2020-07-20 14:32:01 -05:00
|
|
|
#define nir_foreach_variable_with_modes(var, shader, modes) \
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_foreach_variable_in_shader(var, shader) \
|
2020-07-20 14:32:01 -05:00
|
|
|
if (_nir_shader_variable_has_mode(var, modes))
|
|
|
|
|
|
|
|
|
|
#define nir_foreach_variable_with_modes_safe(var, shader, modes) \
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_foreach_variable_in_shader_safe(var, shader) \
|
2020-07-20 14:32:01 -05:00
|
|
|
if (_nir_shader_variable_has_mode(var, modes))
|
|
|
|
|
|
2020-07-18 18:24:25 -05:00
|
|
|
#define nir_foreach_shader_in_variable(var, shader) \
|
2020-07-20 16:30:37 -05:00
|
|
|
nir_foreach_variable_with_modes(var, shader, nir_var_shader_in)
|
2020-07-18 18:24:25 -05:00
|
|
|
|
|
|
|
|
#define nir_foreach_shader_in_variable_safe(var, shader) \
|
2020-07-20 16:30:37 -05:00
|
|
|
nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_in)
|
2020-07-18 18:24:25 -05:00
|
|
|
|
|
|
|
|
#define nir_foreach_shader_out_variable(var, shader) \
|
2020-07-20 16:30:37 -05:00
|
|
|
nir_foreach_variable_with_modes(var, shader, nir_var_shader_out)
|
2020-07-18 18:24:25 -05:00
|
|
|
|
|
|
|
|
#define nir_foreach_shader_out_variable_safe(var, shader) \
|
2020-07-20 16:30:37 -05:00
|
|
|
nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_out)
|
2020-07-18 18:24:25 -05:00
|
|
|
|
2020-07-18 18:45:18 -05:00
|
|
|
#define nir_foreach_uniform_variable(var, shader) \
|
2020-07-20 16:30:37 -05:00
|
|
|
nir_foreach_variable_with_modes(var, shader, nir_var_uniform)
|
2020-07-18 18:45:18 -05:00
|
|
|
|
|
|
|
|
#define nir_foreach_uniform_variable_safe(var, shader) \
|
2020-07-20 16:30:37 -05:00
|
|
|
nir_foreach_variable_with_modes_safe(var, shader, nir_var_uniform)
|
2020-07-18 18:45:18 -05:00
|
|
|
|
2021-09-15 11:22:59 -05:00
|
|
|
#define nir_foreach_image_variable(var, shader) \
|
2021-10-15 12:58:22 -05:00
|
|
|
nir_foreach_variable_with_modes(var, shader, nir_var_image)
|
2021-09-15 11:22:59 -05:00
|
|
|
|
|
|
|
|
#define nir_foreach_image_variable_safe(var, shader) \
|
2021-10-15 12:58:22 -05:00
|
|
|
nir_foreach_variable_with_modes_safe(var, shader, nir_var_image)
|
2021-09-15 11:22:59 -05:00
|
|
|
|
2016-02-12 11:58:06 -08:00
|
|
|
static inline bool
|
|
|
|
|
nir_variable_is_global(const nir_variable *var)
|
|
|
|
|
{
|
2019-01-16 00:05:04 +01:00
|
|
|
return var->data.mode != nir_var_function_temp;
|
2016-02-12 11:58:06 -08:00
|
|
|
}
|
|
|
|
|
|
2023-06-06 21:18:17 +02:00
|
|
|
typedef enum ENUM_PACKED {
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_instr_type_alu,
|
2018-03-14 21:45:38 -07:00
|
|
|
nir_instr_type_deref,
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_instr_type_call,
|
2014-12-05 11:03:06 -08:00
|
|
|
nir_instr_type_tex,
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_instr_type_intrinsic,
|
|
|
|
|
nir_instr_type_load_const,
|
|
|
|
|
nir_instr_type_jump,
|
2023-08-15 09:59:06 -05:00
|
|
|
nir_instr_type_undef,
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_instr_type_phi,
|
2014-10-30 21:04:15 -07:00
|
|
|
nir_instr_type_parallel_copy,
|
2024-05-19 17:29:21 +02:00
|
|
|
nir_instr_type_debug_info,
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_instr_type;
|
|
|
|
|
|
2015-01-29 21:45:53 -08:00
|
|
|
typedef struct nir_instr {
|
2014-07-31 16:14:51 -07:00
|
|
|
struct exec_node node;
|
|
|
|
|
struct nir_block *block;
|
nir: Fix holes in nir_instr
Found using pahole.
Changes in peak memory usage according to Valgrind massif:
mean soft fp64 using uint64: 1,343,991,403 => 1,342,759,331
gfxbench5 aztec ruins high 11: 63,619,971 => 63,555,571
deus ex mankind divided 148: 62,887,728 => 62,845,304
deus ex mankind divided 2890: 72,399,750 => 71,922,686
dirt showdown 676: 69,464,023 => 69,238,607
dolphin ubershaders 210: 78,359,728 => 77,822,072
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2018-10-30 19:15:18 -07:00
|
|
|
nir_instr_type type;
|
2015-09-08 16:43:51 -07:00
|
|
|
|
2015-02-09 14:41:10 -08:00
|
|
|
/* A temporary for optimization and analysis passes to use for storing
|
|
|
|
|
* flags. For instance, DCE uses this to store the "dead/live" info.
|
|
|
|
|
*/
|
|
|
|
|
uint8_t pass_flags;
|
nir: Fix holes in nir_instr
Found using pahole.
Changes in peak memory usage according to Valgrind massif:
mean soft fp64 using uint64: 1,343,991,403 => 1,342,759,331
gfxbench5 aztec ruins high 11: 63,619,971 => 63,555,571
deus ex mankind divided 148: 62,887,728 => 62,845,304
deus ex mankind divided 2890: 72,399,750 => 71,922,686
dirt showdown 676: 69,464,023 => 69,238,607
dolphin ubershaders 210: 78,359,728 => 77,822,072
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2018-10-30 19:15:18 -07:00
|
|
|
|
|
|
|
|
/** generic instruction index. */
|
2020-10-14 12:11:20 -07:00
|
|
|
uint32_t index;
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_instr;
|
|
|
|
|
|
2014-12-19 15:30:15 -08:00
|
|
|
static inline nir_instr *
|
2015-02-04 21:22:45 -08:00
|
|
|
nir_instr_next(nir_instr *instr)
|
2014-12-19 15:30:15 -08:00
|
|
|
{
|
2015-02-04 21:22:45 -08:00
|
|
|
struct exec_node *next = exec_node_get_next(&instr->node);
|
|
|
|
|
if (exec_node_is_tail_sentinel(next))
|
|
|
|
|
return NULL;
|
|
|
|
|
else
|
|
|
|
|
return exec_node_data(nir_instr, next, node);
|
2014-12-19 15:30:15 -08:00
|
|
|
}
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2014-12-19 15:30:15 -08:00
|
|
|
static inline nir_instr *
|
2015-02-04 21:22:45 -08:00
|
|
|
nir_instr_prev(nir_instr *instr)
|
2014-12-19 15:30:15 -08:00
|
|
|
{
|
2015-02-04 21:22:45 -08:00
|
|
|
struct exec_node *prev = exec_node_get_prev(&instr->node);
|
|
|
|
|
if (exec_node_is_head_sentinel(prev))
|
|
|
|
|
return NULL;
|
|
|
|
|
else
|
|
|
|
|
return exec_node_data(nir_instr, prev, node);
|
2014-12-19 15:30:15 -08:00
|
|
|
}
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2015-07-15 12:00:47 -07:00
|
|
|
static inline bool
|
2017-06-07 02:19:15 +03:00
|
|
|
nir_instr_is_first(const nir_instr *instr)
|
2015-07-15 12:00:47 -07:00
|
|
|
{
|
2017-06-07 02:19:15 +03:00
|
|
|
return exec_node_is_head_sentinel(exec_node_get_prev_const(&instr->node));
|
2015-07-15 12:00:47 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool
|
2017-06-07 02:19:15 +03:00
|
|
|
nir_instr_is_last(const nir_instr *instr)
|
2015-07-15 12:00:47 -07:00
|
|
|
{
|
2017-06-07 02:19:15 +03:00
|
|
|
return exec_node_is_tail_sentinel(exec_node_get_next_const(&instr->node));
|
2015-07-15 12:00:47 -07:00
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
typedef struct nir_def {
|
2018-06-05 19:00:42 -07:00
|
|
|
/** Instruction which produces this SSA value. */
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_instr *parent_instr;
|
|
|
|
|
|
2017-02-27 17:21:42 -08:00
|
|
|
/** set of nir_instrs where this register is used (read from) */
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 10:16:27 -07:00
|
|
|
struct list_head uses;
|
2014-12-18 17:13:22 -08:00
|
|
|
|
2020-09-29 15:44:57 -07:00
|
|
|
/** generic SSA definition index. */
|
|
|
|
|
unsigned index;
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
uint8_t num_components;
|
2015-11-17 15:45:18 +01:00
|
|
|
|
2023-05-16 10:10:23 -04:00
|
|
|
/* The bit-size of each channel; must be one of 1, 8, 16, 32, or 64 */
|
2015-11-17 15:45:18 +01:00
|
|
|
uint8_t bit_size;
|
2019-10-15 14:48:10 -05:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* True if this SSA value may have different values in different SIMD
|
|
|
|
|
* invocations of the shader. This is set by nir_divergence_analysis.
|
|
|
|
|
*/
|
|
|
|
|
bool divergent;
|
2024-09-03 15:59:36 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* True if this SSA value is loop invariant w.r.t. the innermost parent
|
|
|
|
|
* loop. This is set by nir_divergence_analysis and used to determine
|
|
|
|
|
* the divergence of a nir_src.
|
|
|
|
|
*/
|
|
|
|
|
bool loop_invariant;
|
2023-08-12 16:17:15 -04:00
|
|
|
} nir_def;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
|
|
|
|
struct nir_src;
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 10:16:27 -07:00
|
|
|
struct nir_if;
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
typedef struct nir_src {
|
2023-08-14 10:38:03 -04:00
|
|
|
/* Instruction or if-statement that consumes this value as a source. This
|
|
|
|
|
* should only be accessed through nir_src_* helpers.
|
|
|
|
|
*
|
|
|
|
|
* Internally, it is a tagged pointer to a nir_instr or nir_if.
|
|
|
|
|
*/
|
|
|
|
|
uintptr_t _parent;
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 10:16:27 -07:00
|
|
|
|
|
|
|
|
struct list_head use_link;
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *ssa;
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_src;
|
|
|
|
|
|
2023-08-14 10:38:03 -04:00
|
|
|
/* Layout of the _parent pointer. Bottom bit is set for nir_if parents (clear
|
|
|
|
|
* for nir_instr parents). Remaining bits are the pointer.
|
|
|
|
|
*/
|
|
|
|
|
#define NIR_SRC_PARENT_IS_IF (0x1)
|
|
|
|
|
#define NIR_SRC_PARENT_MASK (~((uintptr_t) NIR_SRC_PARENT_IS_IF))
|
|
|
|
|
|
2023-08-14 10:33:18 -04:00
|
|
|
static inline bool
|
|
|
|
|
nir_src_is_if(const nir_src *src)
|
|
|
|
|
{
|
2023-08-14 10:38:03 -04:00
|
|
|
return src->_parent & NIR_SRC_PARENT_IS_IF;
|
2023-08-14 10:33:18 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_instr *
|
|
|
|
|
nir_src_parent_instr(const nir_src *src)
|
|
|
|
|
{
|
2023-08-14 10:41:23 -04:00
|
|
|
assert(!nir_src_is_if(src));
|
2023-08-14 10:38:03 -04:00
|
|
|
|
|
|
|
|
/* Because it is not an if, the tag is 0, therefore we do not need to mask */
|
|
|
|
|
return (nir_instr *)(src->_parent);
|
2023-08-14 10:33:18 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline struct nir_if *
|
|
|
|
|
nir_src_parent_if(const nir_src *src)
|
|
|
|
|
{
|
2023-08-14 10:41:23 -04:00
|
|
|
assert(nir_src_is_if(src));
|
2023-08-14 10:38:03 -04:00
|
|
|
|
|
|
|
|
/* Because it is an if, the tag is 1, so we need to mask */
|
|
|
|
|
return (struct nir_if *)(src->_parent & NIR_SRC_PARENT_MASK);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
|
_nir_src_set_parent(nir_src *src, void *parent, bool is_if)
|
|
|
|
|
{
|
|
|
|
|
uintptr_t ptr = (uintptr_t) parent;
|
|
|
|
|
assert((ptr & ~NIR_SRC_PARENT_MASK) == 0 && "pointer must be aligned");
|
|
|
|
|
|
|
|
|
|
if (is_if)
|
|
|
|
|
ptr |= NIR_SRC_PARENT_IS_IF;
|
|
|
|
|
|
|
|
|
|
src->_parent = ptr;
|
2023-08-14 10:33:18 -04:00
|
|
|
}
|
|
|
|
|
|
2023-04-06 13:19:31 -04:00
|
|
|
static inline void
|
|
|
|
|
nir_src_set_parent_instr(nir_src *src, nir_instr *parent_instr)
|
|
|
|
|
{
|
2023-08-14 10:38:03 -04:00
|
|
|
_nir_src_set_parent(src, parent_instr, false);
|
2023-04-06 13:19:31 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
|
nir_src_set_parent_if(nir_src *src, struct nir_if *parent_if)
|
|
|
|
|
{
|
2023-08-14 10:38:03 -04:00
|
|
|
_nir_src_set_parent(src, parent_if, true);
|
2023-04-06 13:19:31 -04:00
|
|
|
}
|
|
|
|
|
|
2016-04-13 18:19:50 +01:00
|
|
|
static inline nir_src
|
|
|
|
|
nir_src_init(void)
|
|
|
|
|
{
|
2023-08-14 10:38:03 -04:00
|
|
|
nir_src src = { 0 };
|
2016-04-13 18:19:50 +01:00
|
|
|
return src;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define NIR_SRC_INIT nir_src_init()
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 10:16:27 -07:00
|
|
|
|
2023-04-06 13:19:31 -04:00
|
|
|
#define nir_foreach_use_including_if(src, reg_or_ssa_def) \
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 10:16:27 -07:00
|
|
|
list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
|
|
|
|
|
|
2023-04-06 13:19:31 -04:00
|
|
|
#define nir_foreach_use_including_if_safe(src, reg_or_ssa_def) \
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 10:16:27 -07:00
|
|
|
list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_foreach_use(src, reg_or_ssa_def) \
|
2023-04-06 13:19:31 -04:00
|
|
|
nir_foreach_use_including_if(src, reg_or_ssa_def) \
|
2023-08-14 09:58:47 -04:00
|
|
|
if (!nir_src_is_if(src))
|
2023-04-06 13:19:31 -04:00
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_foreach_use_safe(src, reg_or_ssa_def) \
|
2023-04-06 13:19:31 -04:00
|
|
|
nir_foreach_use_including_if_safe(src, reg_or_ssa_def) \
|
2023-08-14 09:58:47 -04:00
|
|
|
if (!nir_src_is_if(src))
|
2023-04-06 13:19:31 -04:00
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_foreach_if_use(src, reg_or_ssa_def) \
|
2023-04-06 13:19:31 -04:00
|
|
|
nir_foreach_use_including_if(src, reg_or_ssa_def) \
|
2023-08-14 09:58:47 -04:00
|
|
|
if (nir_src_is_if(src))
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 10:16:27 -07:00
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_foreach_if_use_safe(src, reg_or_ssa_def) \
|
2023-04-06 13:19:31 -04:00
|
|
|
nir_foreach_use_including_if_safe(src, reg_or_ssa_def) \
|
2023-08-14 09:58:47 -04:00
|
|
|
if (nir_src_is_if(src))
|
2023-04-06 13:19:31 -04:00
|
|
|
|
|
|
|
|
static inline bool
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def_used_by_if(const nir_def *def)
|
2023-04-06 13:19:31 -04:00
|
|
|
{
|
|
|
|
|
nir_foreach_if_use(_, def)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2015-04-21 18:00:21 -07:00
|
|
|
|
2024-02-01 14:51:24 +01:00
|
|
|
static inline bool
|
|
|
|
|
nir_def_only_used_by_if(const nir_def *def)
|
|
|
|
|
{
|
|
|
|
|
nir_foreach_use(_, def)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2015-01-21 11:10:11 -08:00
|
|
|
static inline nir_src
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_src_for_ssa(nir_def *def)
|
2015-01-21 11:10:11 -08:00
|
|
|
{
|
2015-04-21 18:00:21 -07:00
|
|
|
nir_src src = NIR_SRC_INIT;
|
2015-01-21 11:10:11 -08:00
|
|
|
|
|
|
|
|
src.ssa = def;
|
|
|
|
|
|
|
|
|
|
return src;
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-14 10:18:39 -07:00
|
|
|
static inline unsigned
|
|
|
|
|
nir_src_bit_size(nir_src src)
|
|
|
|
|
{
|
2023-08-01 11:29:43 -04:00
|
|
|
return src.ssa->bit_size;
|
2015-08-14 10:18:39 -07:00
|
|
|
}
|
|
|
|
|
|
2018-03-14 21:44:51 -07:00
|
|
|
static inline unsigned
|
|
|
|
|
nir_src_num_components(nir_src src)
|
|
|
|
|
{
|
2023-08-01 11:29:43 -04:00
|
|
|
return src.ssa->num_components;
|
2018-03-14 21:44:51 -07:00
|
|
|
}
|
|
|
|
|
|
2018-10-20 08:36:21 -05:00
|
|
|
static inline bool
|
|
|
|
|
nir_src_is_const(nir_src src)
|
|
|
|
|
{
|
2023-08-01 11:29:43 -04:00
|
|
|
return src.ssa->parent_instr->type == nir_instr_type_load_const;
|
2018-10-20 08:36:21 -05:00
|
|
|
}
|
|
|
|
|
|
2021-03-01 15:04:25 -08:00
|
|
|
static inline bool
|
|
|
|
|
nir_src_is_undef(nir_src src)
|
|
|
|
|
{
|
2023-08-15 09:59:06 -05:00
|
|
|
return src.ssa->parent_instr->type == nir_instr_type_undef;
|
2021-03-01 15:04:25 -08:00
|
|
|
}
|
|
|
|
|
|
2024-09-10 12:31:27 +02:00
|
|
|
bool nir_src_is_divergent(nir_src *src);
|
2019-10-15 14:48:10 -05:00
|
|
|
|
2020-03-10 14:38:12 -07:00
|
|
|
/* Are all components the same, ie. .xxxx */
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp)
|
|
|
|
|
{
|
|
|
|
|
for (unsigned i = 1; i < nr_comp; i++)
|
|
|
|
|
if (swiz[i] != swiz[0])
|
|
|
|
|
return false;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Are all components sequential, ie. .yzw */
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_is_sequential_comp_swizzle(uint8_t *swiz, unsigned nr_comp)
|
|
|
|
|
{
|
|
|
|
|
for (unsigned i = 1; i < nr_comp; i++)
|
|
|
|
|
if (swiz[i] != (swiz[0] + i))
|
|
|
|
|
return false;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-20 20:42:34 +03:00
|
|
|
/***/
|
2023-08-17 20:56:26 +03:00
|
|
|
typedef struct nir_alu_src {
|
2021-06-18 08:16:18 -05:00
|
|
|
/** Base source */
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_src src;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* For each input component, says which component of the register it is
|
2021-06-18 08:16:18 -05:00
|
|
|
* chosen from.
|
|
|
|
|
*
|
|
|
|
|
* Note that which elements of the swizzle are used and which are ignored
|
|
|
|
|
* are based on the write mask for most opcodes - for example, a statement
|
|
|
|
|
* like "foo.xzw = bar.zyx" would have a writemask of 1101b and a swizzle
|
|
|
|
|
* of {2, 1, x, 0} where x means "don't care."
|
2014-07-31 16:14:51 -07:00
|
|
|
*/
|
2018-07-12 03:40:23 +02:00
|
|
|
uint8_t swizzle[NIR_MAX_VEC_COMPONENTS];
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_alu_src;
|
|
|
|
|
|
2021-11-22 11:11:16 -08:00
|
|
|
nir_alu_type
|
|
|
|
|
nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type);
|
2016-12-07 08:34:02 +01:00
|
|
|
|
2017-03-07 16:46:17 -08:00
|
|
|
static inline nir_alu_type
|
|
|
|
|
nir_get_nir_type_for_glsl_type(const struct glsl_type *type)
|
|
|
|
|
{
|
|
|
|
|
return nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(type));
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-22 11:11:16 -08:00
|
|
|
enum glsl_base_type
|
|
|
|
|
nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type);
|
2020-08-20 10:20:57 -07:00
|
|
|
|
2017-07-01 07:58:26 +02:00
|
|
|
nir_op nir_type_conversion_op(nir_alu_type src, nir_alu_type dst,
|
|
|
|
|
nir_rounding_mode rnd);
|
2016-12-07 08:34:42 +01:00
|
|
|
|
nir: Add unified atomics
Currently, we have an atomic intrinsic for each combination of memory type
(global, shared, image, etc) and atomic operation (add, sub, etc). So for m
types of memory supported by the driver and n atomic opcodes, the driver has to
handle O(mn) intrinsics. This makes a total mess in every single backend I've
looked at, without fail.
It would be a lot nicer to unify the intrinsics. There are two obvious ways:
1. Make the memory type a constant index, keep different intrinsics for
different operations. The problem with this is that different memory types
imply different intrinsic signatures (number of sources, etc). As an
example, it doesn't make sense to unify global_atomic_amd with
global_atomic_2x32, as an example. The first takes 3 scalar sources, the
second takes 1 vector and 1 scalar. Also, in any single backend, there are a
lot more operations than there are memory types.
2. Make the opcode a constant index, keep different intrinsics for different
operations. This works well, with one exception: compswap and fcompswap
take an extra argument that other atomics don't, so there's an extra axis of
variation for the intrinsic signatures.
So, the solution is to have 2 intrinsics for each memory type -- for atomics
taking 1 argument and atomics taking 2 respectively. Both of these intrinsics
take an nir_atomic_op enum to describe its operation. We don't use a nir_op for
this purpose, as there are some atomics (cmpxchg, inc_wrap, etc) that don't
cleanly map to any ALU op and it would be weird to force it.
The plan is to transition to these new opcodes gradually. This series adds a
lowering pass producing these opcodes from the existing opcodes, so that
backends can opt-in to the new forms one-by-one. Then we can convert backends
separately without any cross-tree flag day. Once everything is converted, we can
convert the producers and core NIR as a flag day, but we have far fewer
producers than backends so this should be fine. Finally we can drop the old
stuff.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Jesse Natalie <jenatali@microsoft.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22914>
2023-05-08 15:29:31 -04:00
|
|
|
/**
|
|
|
|
|
* Atomic intrinsics perform different operations depending on the value of
|
|
|
|
|
* their atomic_op constant index. nir_atomic_op defines the operations.
|
|
|
|
|
*/
|
|
|
|
|
typedef enum {
|
|
|
|
|
nir_atomic_op_iadd,
|
|
|
|
|
nir_atomic_op_imin,
|
|
|
|
|
nir_atomic_op_umin,
|
|
|
|
|
nir_atomic_op_imax,
|
|
|
|
|
nir_atomic_op_umax,
|
|
|
|
|
nir_atomic_op_iand,
|
|
|
|
|
nir_atomic_op_ior,
|
|
|
|
|
nir_atomic_op_ixor,
|
|
|
|
|
nir_atomic_op_xchg,
|
|
|
|
|
nir_atomic_op_fadd,
|
|
|
|
|
nir_atomic_op_fmin,
|
|
|
|
|
nir_atomic_op_fmax,
|
|
|
|
|
nir_atomic_op_cmpxchg,
|
|
|
|
|
nir_atomic_op_fcmpxchg,
|
|
|
|
|
nir_atomic_op_inc_wrap,
|
|
|
|
|
nir_atomic_op_dec_wrap,
|
2024-03-04 05:33:56 -05:00
|
|
|
nir_atomic_op_ordered_add_gfx12_amd,
|
nir: Add unified atomics
Currently, we have an atomic intrinsic for each combination of memory type
(global, shared, image, etc) and atomic operation (add, sub, etc). So for m
types of memory supported by the driver and n atomic opcodes, the driver has to
handle O(mn) intrinsics. This makes a total mess in every single backend I've
looked at, without fail.
It would be a lot nicer to unify the intrinsics. There are two obvious ways:
1. Make the memory type a constant index, keep different intrinsics for
different operations. The problem with this is that different memory types
imply different intrinsic signatures (number of sources, etc). As an
example, it doesn't make sense to unify global_atomic_amd with
global_atomic_2x32, as an example. The first takes 3 scalar sources, the
second takes 1 vector and 1 scalar. Also, in any single backend, there are a
lot more operations than there are memory types.
2. Make the opcode a constant index, keep different intrinsics for different
operations. This works well, with one exception: compswap and fcompswap
take an extra argument that other atomics don't, so there's an extra axis of
variation for the intrinsic signatures.
So, the solution is to have 2 intrinsics for each memory type -- for atomics
taking 1 argument and atomics taking 2 respectively. Both of these intrinsics
take an nir_atomic_op enum to describe its operation. We don't use a nir_op for
this purpose, as there are some atomics (cmpxchg, inc_wrap, etc) that don't
cleanly map to any ALU op and it would be weird to force it.
The plan is to transition to these new opcodes gradually. This series adds a
lowering pass producing these opcodes from the existing opcodes, so that
backends can opt-in to the new forms one-by-one. Then we can convert backends
separately without any cross-tree flag day. Once everything is converted, we can
convert the producers and core NIR as a flag day, but we have far fewer
producers than backends so this should be fine. Finally we can drop the old
stuff.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Jesse Natalie <jenatali@microsoft.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22914>
2023-05-08 15:29:31 -04:00
|
|
|
} nir_atomic_op;
|
|
|
|
|
|
|
|
|
|
static inline nir_alu_type
|
|
|
|
|
nir_atomic_op_type(nir_atomic_op op)
|
|
|
|
|
{
|
|
|
|
|
switch (op) {
|
|
|
|
|
case nir_atomic_op_imin:
|
|
|
|
|
case nir_atomic_op_imax:
|
|
|
|
|
return nir_type_int;
|
|
|
|
|
|
|
|
|
|
case nir_atomic_op_fadd:
|
|
|
|
|
case nir_atomic_op_fmin:
|
|
|
|
|
case nir_atomic_op_fmax:
|
|
|
|
|
case nir_atomic_op_fcmpxchg:
|
|
|
|
|
return nir_type_float;
|
|
|
|
|
|
|
|
|
|
case nir_atomic_op_iadd:
|
|
|
|
|
case nir_atomic_op_iand:
|
|
|
|
|
case nir_atomic_op_ior:
|
|
|
|
|
case nir_atomic_op_ixor:
|
|
|
|
|
case nir_atomic_op_xchg:
|
|
|
|
|
case nir_atomic_op_cmpxchg:
|
|
|
|
|
case nir_atomic_op_umin:
|
|
|
|
|
case nir_atomic_op_umax:
|
|
|
|
|
case nir_atomic_op_inc_wrap:
|
|
|
|
|
case nir_atomic_op_dec_wrap:
|
2024-03-04 05:33:56 -05:00
|
|
|
case nir_atomic_op_ordered_add_gfx12_amd:
|
nir: Add unified atomics
Currently, we have an atomic intrinsic for each combination of memory type
(global, shared, image, etc) and atomic operation (add, sub, etc). So for m
types of memory supported by the driver and n atomic opcodes, the driver has to
handle O(mn) intrinsics. This makes a total mess in every single backend I've
looked at, without fail.
It would be a lot nicer to unify the intrinsics. There are two obvious ways:
1. Make the memory type a constant index, keep different intrinsics for
different operations. The problem with this is that different memory types
imply different intrinsic signatures (number of sources, etc). As an
example, it doesn't make sense to unify global_atomic_amd with
global_atomic_2x32, as an example. The first takes 3 scalar sources, the
second takes 1 vector and 1 scalar. Also, in any single backend, there are a
lot more operations than there are memory types.
2. Make the opcode a constant index, keep different intrinsics for different
operations. This works well, with one exception: compswap and fcompswap
take an extra argument that other atomics don't, so there's an extra axis of
variation for the intrinsic signatures.
So, the solution is to have 2 intrinsics for each memory type -- for atomics
taking 1 argument and atomics taking 2 respectively. Both of these intrinsics
take an nir_atomic_op enum to describe its operation. We don't use a nir_op for
this purpose, as there are some atomics (cmpxchg, inc_wrap, etc) that don't
cleanly map to any ALU op and it would be weird to force it.
The plan is to transition to these new opcodes gradually. This series adds a
lowering pass producing these opcodes from the existing opcodes, so that
backends can opt-in to the new forms one-by-one. Then we can convert backends
separately without any cross-tree flag day. Once everything is converted, we can
convert the producers and core NIR as a flag day, but we have far fewer
producers than backends so this should be fine. Finally we can drop the old
stuff.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Jesse Natalie <jenatali@microsoft.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22914>
2023-05-08 15:29:31 -04:00
|
|
|
return nir_type_uint;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unreachable("Invalid nir_atomic_op");
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-24 09:13:33 +01:00
|
|
|
nir_op
|
|
|
|
|
nir_atomic_op_to_alu(nir_atomic_op op);
|
|
|
|
|
|
2023-08-15 10:59:11 -05:00
|
|
|
/** Returns nir_op_vec<num_components> or nir_op_mov if num_components == 1
|
|
|
|
|
*
|
|
|
|
|
* This is subtly different from nir_op_is_vec() which returns false for
|
|
|
|
|
* nir_op_mov. Returning nir_op_mov from nir_op_vec() when num_components == 1
|
|
|
|
|
* makes sense under the assumption that the num_components of the resulting
|
|
|
|
|
* nir_def will same as what is passed in here because a single-component mov
|
|
|
|
|
* is effectively a vec1. However, if alu->def.num_components > 1, nir_op_mov
|
|
|
|
|
* has different semantics from nir_op_vec* so so code which detects "is this
|
|
|
|
|
* a vec?" typically needs to handle nir_op_mov separate from nir_op_vecN.
|
|
|
|
|
*
|
|
|
|
|
* In the unlikely case where you can handle nir_op_vecN and nir_op_mov
|
|
|
|
|
* together, use nir_op_is_vec_or_mov().
|
|
|
|
|
*/
|
2021-11-22 11:11:16 -08:00
|
|
|
nir_op
|
2023-08-15 10:59:11 -05:00
|
|
|
nir_op_vec(unsigned num_components);
|
2019-03-20 18:11:20 +01:00
|
|
|
|
2023-08-15 10:59:11 -05:00
|
|
|
/** Returns true if this op is one of nir_op_vec*
|
|
|
|
|
*
|
|
|
|
|
* Returns false for nir_op_mov. See nir_op_vec() for more details.
|
|
|
|
|
*/
|
2021-11-22 11:11:16 -08:00
|
|
|
bool
|
|
|
|
|
nir_op_is_vec(nir_op op);
|
2020-03-30 12:06:22 -05:00
|
|
|
|
2023-08-15 10:59:11 -05:00
|
|
|
static inline bool
|
|
|
|
|
nir_op_is_vec_or_mov(nir_op op)
|
|
|
|
|
{
|
|
|
|
|
return op == nir_op_mov || nir_op_is_vec(op);
|
|
|
|
|
}
|
|
|
|
|
|
2023-09-20 14:44:28 -04:00
|
|
|
static inline bool
|
|
|
|
|
nir_is_float_control_signed_zero_preserve(unsigned execution_mode, unsigned bit_size)
|
|
|
|
|
{
|
|
|
|
|
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16) ||
|
|
|
|
|
(32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32) ||
|
|
|
|
|
(64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_is_float_control_inf_preserve(unsigned execution_mode, unsigned bit_size)
|
|
|
|
|
{
|
|
|
|
|
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_INF_PRESERVE_FP16) ||
|
|
|
|
|
(32 == bit_size && execution_mode & FLOAT_CONTROLS_INF_PRESERVE_FP32) ||
|
|
|
|
|
(64 == bit_size && execution_mode & FLOAT_CONTROLS_INF_PRESERVE_FP64);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_is_float_control_nan_preserve(unsigned execution_mode, unsigned bit_size)
|
|
|
|
|
{
|
|
|
|
|
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP16) ||
|
|
|
|
|
(32 == bit_size && execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP32) ||
|
|
|
|
|
(64 == bit_size && execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP64);
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-01 11:23:28 +01:00
|
|
|
static inline bool
|
|
|
|
|
nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size)
|
|
|
|
|
{
|
2023-08-08 12:00:35 -05:00
|
|
|
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) ||
|
|
|
|
|
(32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32) ||
|
|
|
|
|
(64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64);
|
2019-02-01 11:23:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_is_denorm_flush_to_zero(unsigned execution_mode, unsigned bit_size)
|
|
|
|
|
{
|
2023-08-08 12:00:35 -05:00
|
|
|
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16) ||
|
|
|
|
|
(32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) ||
|
|
|
|
|
(64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64);
|
2019-02-01 11:23:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_is_denorm_preserve(unsigned execution_mode, unsigned bit_size)
|
|
|
|
|
{
|
2023-08-08 12:00:35 -05:00
|
|
|
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP16) ||
|
|
|
|
|
(32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32) ||
|
|
|
|
|
(64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP64);
|
2019-02-01 11:23:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_is_rounding_mode_rtne(unsigned execution_mode, unsigned bit_size)
|
|
|
|
|
{
|
2023-08-08 12:00:35 -05:00
|
|
|
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) ||
|
|
|
|
|
(32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) ||
|
|
|
|
|
(64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64);
|
2019-02-01 11:23:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_is_rounding_mode_rtz(unsigned execution_mode, unsigned bit_size)
|
|
|
|
|
{
|
2023-08-08 12:00:35 -05:00
|
|
|
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) ||
|
|
|
|
|
(32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) ||
|
|
|
|
|
(64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64);
|
2019-02-01 11:23:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_has_any_rounding_mode_rtz(unsigned execution_mode)
|
|
|
|
|
{
|
2023-08-08 12:00:35 -05:00
|
|
|
return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) ||
|
|
|
|
|
(execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) ||
|
|
|
|
|
(execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64);
|
2019-02-01 11:23:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_has_any_rounding_mode_rtne(unsigned execution_mode)
|
|
|
|
|
{
|
2023-08-08 12:00:35 -05:00
|
|
|
return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) ||
|
|
|
|
|
(execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) ||
|
|
|
|
|
(execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64);
|
2019-02-01 11:23:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_rounding_mode
|
|
|
|
|
nir_get_rounding_mode_from_float_controls(unsigned execution_mode,
|
|
|
|
|
nir_alu_type type)
|
|
|
|
|
{
|
|
|
|
|
if (nir_alu_type_get_base_type(type) != nir_type_float)
|
|
|
|
|
return nir_rounding_mode_undef;
|
|
|
|
|
|
|
|
|
|
unsigned bit_size = nir_alu_type_get_type_size(type);
|
|
|
|
|
|
|
|
|
|
if (nir_is_rounding_mode_rtz(execution_mode, bit_size))
|
|
|
|
|
return nir_rounding_mode_rtz;
|
|
|
|
|
if (nir_is_rounding_mode_rtne(execution_mode, bit_size))
|
|
|
|
|
return nir_rounding_mode_rtne;
|
|
|
|
|
return nir_rounding_mode_undef;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_has_any_rounding_mode_enabled(unsigned execution_mode)
|
|
|
|
|
{
|
|
|
|
|
bool result =
|
|
|
|
|
nir_has_any_rounding_mode_rtne(execution_mode) ||
|
|
|
|
|
nir_has_any_rounding_mode_rtz(execution_mode);
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2014-12-16 12:22:01 -08:00
|
|
|
typedef enum {
|
2019-05-09 15:27:14 -07:00
|
|
|
/**
|
|
|
|
|
* Operation where the first two sources are commutative.
|
|
|
|
|
*
|
|
|
|
|
* For 2-source operations, this just mathematical commutativity. Some
|
|
|
|
|
* 3-source operations, like ffma, are only commutative in the first two
|
|
|
|
|
* sources.
|
|
|
|
|
*/
|
|
|
|
|
NIR_OP_IS_2SRC_COMMUTATIVE = (1 << 0),
|
2021-06-18 08:16:18 -05:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Operation is associative
|
|
|
|
|
*/
|
2014-12-16 12:22:01 -08:00
|
|
|
NIR_OP_IS_ASSOCIATIVE = (1 << 1),
|
2019-06-18 18:08:45 -07:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Operation where src[0] is used to select src[1] on true or src[2] false.
|
|
|
|
|
* src[0] may be Boolean, or it may be another type used in an implicit
|
|
|
|
|
* comparison.
|
|
|
|
|
*/
|
|
|
|
|
NIR_OP_IS_SELECTION = (1 << 2),
|
2014-12-16 12:22:01 -08:00
|
|
|
} nir_op_algebraic_property;
|
|
|
|
|
|
2021-06-04 02:48:59 +08:00
|
|
|
/* vec16 is the widest ALU op in NIR, making the max number of input of ALU
|
|
|
|
|
* instructions to be the same as NIR_MAX_VEC_COMPONENTS.
|
|
|
|
|
*/
|
|
|
|
|
#define NIR_ALU_MAX_INPUTS NIR_MAX_VEC_COMPONENTS
|
|
|
|
|
|
2023-08-20 20:42:34 +03:00
|
|
|
/***/
|
2021-06-18 08:16:18 -05:00
|
|
|
typedef struct nir_op_info {
|
|
|
|
|
/** Name of the NIR ALU opcode */
|
2014-07-31 16:14:51 -07:00
|
|
|
const char *name;
|
|
|
|
|
|
2021-06-18 08:16:18 -05:00
|
|
|
/** Number of inputs (sources) */
|
2020-05-14 14:50:52 -05:00
|
|
|
uint8_t num_inputs;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
|
|
|
|
/**
|
2014-12-16 14:43:26 -08:00
|
|
|
* The number of components in the output
|
2014-07-31 16:14:51 -07:00
|
|
|
*
|
2014-12-16 14:43:26 -08:00
|
|
|
* If non-zero, this is the size of the output and input sizes are
|
|
|
|
|
* explicitly given; swizzle and writemask are still in effect, but if
|
|
|
|
|
* the output component is masked out, then the input component may
|
|
|
|
|
* still be in use.
|
2014-07-31 16:14:51 -07:00
|
|
|
*
|
2014-12-16 14:43:26 -08:00
|
|
|
* If zero, the opcode acts in the standard, per-component manner; the
|
|
|
|
|
* operation is performed on each component (except the ones that are
|
|
|
|
|
* masked out) with the input being taken from the input swizzle for
|
|
|
|
|
* that component.
|
|
|
|
|
*
|
|
|
|
|
* The size of some of the inputs may be given (i.e. non-zero) even
|
|
|
|
|
* though output_size is zero; in that case, the inputs with a zero
|
|
|
|
|
* size act per-component, while the inputs with non-zero size don't.
|
2014-07-31 16:14:51 -07:00
|
|
|
*/
|
2020-05-14 14:50:52 -05:00
|
|
|
uint8_t output_size;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
|
|
|
|
/**
|
2024-10-25 19:24:20 -04:00
|
|
|
* The type of vector that the instruction outputs.
|
2014-07-31 16:14:51 -07:00
|
|
|
*/
|
|
|
|
|
nir_alu_type output_type;
|
|
|
|
|
|
|
|
|
|
/**
|
2014-12-16 14:43:26 -08:00
|
|
|
* The number of components in each input
|
2021-06-18 08:16:18 -05:00
|
|
|
*
|
|
|
|
|
* See nir_op_infos::output_size for more detail about the relationship
|
|
|
|
|
* between input and output sizes.
|
2014-07-31 16:14:51 -07:00
|
|
|
*/
|
2021-06-04 02:48:59 +08:00
|
|
|
uint8_t input_sizes[NIR_ALU_MAX_INPUTS];
|
2014-07-31 16:14:51 -07:00
|
|
|
|
|
|
|
|
/**
|
2023-08-01 11:55:12 -04:00
|
|
|
* The type of vector that each input takes.
|
2014-07-31 16:14:51 -07:00
|
|
|
*/
|
2021-06-04 02:48:59 +08:00
|
|
|
nir_alu_type input_types[NIR_ALU_MAX_INPUTS];
|
2014-12-16 12:22:01 -08:00
|
|
|
|
2021-06-18 08:16:18 -05:00
|
|
|
/** Algebraic properties of this opcode */
|
2014-12-16 12:22:01 -08:00
|
|
|
nir_op_algebraic_property algebraic_properties;
|
2019-02-12 12:55:28 +01:00
|
|
|
|
2021-06-18 08:16:18 -05:00
|
|
|
/** Whether this represents a numeric conversion opcode */
|
2019-02-12 12:55:28 +01:00
|
|
|
bool is_conversion;
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_op_info;
|
|
|
|
|
|
2021-06-18 08:16:18 -05:00
|
|
|
/** Metadata for each nir_op, indexed by opcode */
|
2014-07-31 16:14:51 -07:00
|
|
|
extern const nir_op_info nir_op_infos[nir_num_opcodes];
|
|
|
|
|
|
2019-06-18 18:08:45 -07:00
|
|
|
static inline bool
|
|
|
|
|
nir_op_is_selection(nir_op op)
|
|
|
|
|
{
|
|
|
|
|
return (nir_op_infos[op].algebraic_properties & NIR_OP_IS_SELECTION) != 0;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-20 20:42:34 +03:00
|
|
|
/***/
|
2014-07-31 16:14:51 -07:00
|
|
|
typedef struct nir_alu_instr {
|
2021-06-18 08:16:18 -05:00
|
|
|
/** Base instruction */
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_instr instr;
|
2021-06-18 08:16:18 -05:00
|
|
|
|
|
|
|
|
/** Opcode */
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_op op;
|
2016-03-17 10:50:27 -07:00
|
|
|
|
|
|
|
|
/** Indicates that this ALU instruction generates an exact value
|
|
|
|
|
*
|
|
|
|
|
* This is kind of a mixture of GLSL "precise" and "invariant" and not
|
|
|
|
|
* really equivalent to either. This indicates that the value generated by
|
|
|
|
|
* this operation is high-precision and any code transformations that touch
|
|
|
|
|
* it must ensure that the resulting value is bit-for-bit identical to the
|
|
|
|
|
* original.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
bool exact : 1;
|
2019-05-17 13:46:38 -07:00
|
|
|
|
|
|
|
|
/**
|
2021-06-18 08:16:18 -05:00
|
|
|
* Indicates that this instruction doese not cause signed integer wrapping
|
|
|
|
|
* to occur, in the form of overflow or underflow.
|
2019-05-17 13:46:38 -07:00
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
bool no_signed_wrap : 1;
|
2021-06-18 08:16:18 -05:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Indicates that this instruction does not cause unsigned integer wrapping
|
|
|
|
|
* to occur, in the form of overflow or underflow.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
bool no_unsigned_wrap : 1;
|
2016-03-17 10:50:27 -07:00
|
|
|
|
2024-02-13 16:24:56 -08:00
|
|
|
/**
|
|
|
|
|
* The float controls bit float_controls2 cares about. That is,
|
|
|
|
|
* NAN/INF/SIGNED_ZERO_PRESERVE only. Allow{Contract,Reassoc,Transform} are
|
|
|
|
|
* still handled through the exact bit, and the other float controls bits
|
|
|
|
|
* (rounding mode and denorm handling) remain in the execution mode only.
|
|
|
|
|
*/
|
|
|
|
|
uint32_t fp_fast_math : 9;
|
|
|
|
|
|
2021-06-18 08:16:18 -05:00
|
|
|
/** Destination */
|
2023-08-14 11:43:35 -05:00
|
|
|
nir_def def;
|
2021-06-18 08:16:18 -05:00
|
|
|
|
|
|
|
|
/** Sources
|
|
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* The size of the array is given by :c:member:`nir_op_info.num_inputs`.
|
2021-06-18 08:16:18 -05:00
|
|
|
*/
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_alu_src src[];
|
|
|
|
|
} nir_alu_instr;
|
|
|
|
|
|
2024-07-09 17:17:05 -04:00
|
|
|
static inline bool
|
|
|
|
|
nir_alu_instr_is_signed_zero_preserve(nir_alu_instr *alu)
|
|
|
|
|
{
|
|
|
|
|
return nir_is_float_control_signed_zero_preserve(alu->fp_fast_math, alu->def.bit_size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_alu_instr_is_inf_preserve(nir_alu_instr *alu)
|
|
|
|
|
{
|
|
|
|
|
return nir_is_float_control_inf_preserve(alu->fp_fast_math, alu->def.bit_size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_alu_instr_is_nan_preserve(nir_alu_instr *alu)
|
|
|
|
|
{
|
|
|
|
|
return nir_is_float_control_nan_preserve(alu->fp_fast_math, alu->def.bit_size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_alu_instr_is_signed_zero_inf_nan_preserve(nir_alu_instr *alu)
|
|
|
|
|
{
|
|
|
|
|
return nir_is_float_control_signed_zero_inf_nan_preserve(alu->fp_fast_math, alu->def.bit_size);
|
|
|
|
|
}
|
|
|
|
|
|
2023-09-06 14:04:39 +10:00
|
|
|
void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src);
|
2015-09-09 13:18:29 -07:00
|
|
|
|
2021-11-22 11:11:16 -08:00
|
|
|
nir_component_mask_t
|
|
|
|
|
nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src);
|
2019-06-07 08:35:51 -07:00
|
|
|
/**
|
|
|
|
|
* Get the number of channels used for a source
|
2015-01-25 11:42:34 -05:00
|
|
|
*/
|
2021-11-22 11:11:16 -08:00
|
|
|
unsigned
|
|
|
|
|
nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src);
|
2015-01-25 11:42:34 -05:00
|
|
|
|
2023-11-06 16:50:11 -04:00
|
|
|
/* is this source channel used? */
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_alu_instr_channel_used(const nir_alu_instr *instr, unsigned src,
|
|
|
|
|
unsigned channel)
|
|
|
|
|
{
|
|
|
|
|
return channel < nir_ssa_alu_instr_src_components(instr, src);
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-22 11:11:16 -08:00
|
|
|
bool
|
|
|
|
|
nir_alu_instr_is_comparison(const nir_alu_instr *instr);
|
2019-06-07 17:57:35 -05:00
|
|
|
|
2019-06-13 14:06:55 -07:00
|
|
|
bool nir_const_value_negative_equal(nir_const_value c1, nir_const_value c2,
|
2019-06-13 12:59:29 -07:00
|
|
|
nir_alu_type full_type);
|
2018-05-24 11:37:51 -07:00
|
|
|
|
2016-07-29 01:29:11 -07:00
|
|
|
bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2,
|
|
|
|
|
unsigned src1, unsigned src2);
|
|
|
|
|
|
2018-05-22 18:18:07 -07:00
|
|
|
bool nir_alu_srcs_negative_equal(const nir_alu_instr *alu1,
|
|
|
|
|
const nir_alu_instr *alu2,
|
|
|
|
|
unsigned src1, unsigned src2);
|
|
|
|
|
|
2020-11-03 13:17:22 +00:00
|
|
|
bool nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn);
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
typedef enum {
|
|
|
|
|
nir_deref_type_var,
|
|
|
|
|
nir_deref_type_array,
|
2018-03-14 21:45:38 -07:00
|
|
|
nir_deref_type_array_wildcard,
|
2018-11-28 12:26:52 -06:00
|
|
|
nir_deref_type_ptr_as_array,
|
2018-03-14 21:45:38 -07:00
|
|
|
nir_deref_type_struct,
|
|
|
|
|
nir_deref_type_cast,
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_deref_type;
|
|
|
|
|
|
2018-03-14 21:45:38 -07:00
|
|
|
typedef struct {
|
|
|
|
|
nir_instr instr;
|
|
|
|
|
|
|
|
|
|
/** The type of this deref instruction */
|
|
|
|
|
nir_deref_type deref_type;
|
|
|
|
|
|
2020-10-30 12:14:05 -05:00
|
|
|
/** Bitmask what modes the underlying variable might be
|
|
|
|
|
*
|
|
|
|
|
* For OpenCL-style generic pointers, we may not know exactly what mode it
|
|
|
|
|
* is at any given point in time in the compile process. This bitfield
|
|
|
|
|
* contains the set of modes which it MAY be.
|
2020-10-30 12:19:25 -05:00
|
|
|
*
|
|
|
|
|
* Generally, this field should not be accessed directly. Use one of the
|
|
|
|
|
* nir_deref_mode_ helpers instead.
|
|
|
|
|
*/
|
2020-10-30 12:14:05 -05:00
|
|
|
nir_variable_mode modes;
|
2018-03-14 21:45:38 -07:00
|
|
|
|
|
|
|
|
/** The dereferenced type of the resulting pointer value */
|
|
|
|
|
const struct glsl_type *type;
|
|
|
|
|
|
|
|
|
|
union {
|
|
|
|
|
/** Variable being dereferenced if deref_type is a deref_var */
|
|
|
|
|
nir_variable *var;
|
|
|
|
|
|
|
|
|
|
/** Parent deref if deref_type is not deref_var */
|
|
|
|
|
nir_src parent;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/** Additional deref parameters */
|
|
|
|
|
union {
|
|
|
|
|
struct {
|
|
|
|
|
nir_src index;
|
2022-05-26 21:12:33 +02:00
|
|
|
bool in_bounds;
|
2018-03-14 21:45:38 -07:00
|
|
|
} arr;
|
|
|
|
|
|
|
|
|
|
struct {
|
|
|
|
|
unsigned index;
|
|
|
|
|
} strct;
|
2018-11-28 12:26:52 -06:00
|
|
|
|
|
|
|
|
struct {
|
|
|
|
|
unsigned ptr_stride;
|
2020-08-24 09:51:04 -05:00
|
|
|
unsigned align_mul;
|
|
|
|
|
unsigned align_offset;
|
2018-11-28 12:26:52 -06:00
|
|
|
} cast;
|
2018-03-14 21:45:38 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/** Destination to store the resulting "pointer" */
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_def def;
|
2018-03-14 21:45:38 -07:00
|
|
|
} nir_deref_instr;
|
|
|
|
|
|
2024-01-22 18:06:44 +01:00
|
|
|
/**
|
|
|
|
|
* Returns true if the cast is trivial, i.e. the source and destination type is
|
|
|
|
|
* the same.
|
|
|
|
|
*/
|
|
|
|
|
bool nir_deref_cast_is_trivial(nir_deref_instr *cast);
|
|
|
|
|
|
2020-10-30 12:19:25 -05:00
|
|
|
/** Returns true if deref might have one of the given modes
|
|
|
|
|
*
|
|
|
|
|
* For multi-mode derefs, this returns true if any of the possible modes for
|
|
|
|
|
* the deref to have any of the specified modes. This function returning true
|
|
|
|
|
* does NOT mean that the deref definitely has one of those modes. It simply
|
|
|
|
|
* means that, with the best information we have at the time, it might.
|
|
|
|
|
*/
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_deref_mode_may_be(const nir_deref_instr *deref, nir_variable_mode modes)
|
|
|
|
|
{
|
|
|
|
|
assert(!(modes & ~nir_var_all));
|
2020-10-30 12:14:05 -05:00
|
|
|
assert(deref->modes != 0);
|
|
|
|
|
return deref->modes & modes;
|
2020-10-30 12:19:25 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** Returns true if deref must have one of the given modes
|
|
|
|
|
*
|
|
|
|
|
* For multi-mode derefs, this returns true if NIR can prove that the given
|
|
|
|
|
* deref has one of the specified modes. This function returning false does
|
|
|
|
|
* NOT mean that deref doesn't have one of the given mode. It very well may
|
|
|
|
|
* have one of those modes, we just don't have enough information to prove
|
|
|
|
|
* that it does for sure.
|
|
|
|
|
*/
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_deref_mode_must_be(const nir_deref_instr *deref, nir_variable_mode modes)
|
|
|
|
|
{
|
|
|
|
|
assert(!(modes & ~nir_var_all));
|
2020-10-30 12:14:05 -05:00
|
|
|
assert(deref->modes != 0);
|
|
|
|
|
return !(deref->modes & ~modes);
|
2020-10-30 12:19:25 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** Returns true if deref has the given mode
|
|
|
|
|
*
|
|
|
|
|
* This returns true if the deref has exactly the mode specified. If the
|
|
|
|
|
* deref may have that mode but may also have a different mode (i.e. modes has
|
|
|
|
|
* multiple bits set), this will assert-fail.
|
|
|
|
|
*
|
|
|
|
|
* If you're confused about which nir_deref_mode_ helper to use, use this one
|
|
|
|
|
* or nir_deref_mode_is_one_of below.
|
|
|
|
|
*/
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_deref_mode_is(const nir_deref_instr *deref, nir_variable_mode mode)
|
|
|
|
|
{
|
|
|
|
|
assert(util_bitcount(mode) == 1 && (mode & nir_var_all));
|
2020-10-30 12:14:05 -05:00
|
|
|
assert(deref->modes != 0);
|
2020-10-30 12:19:25 -05:00
|
|
|
|
|
|
|
|
/* This is only for "simple" cases so, if modes might interact with this
|
|
|
|
|
* deref then the deref has to have a single mode.
|
|
|
|
|
*/
|
|
|
|
|
if (nir_deref_mode_may_be(deref, mode)) {
|
2020-10-30 12:14:05 -05:00
|
|
|
assert(util_bitcount(deref->modes) == 1);
|
|
|
|
|
assert(deref->modes == mode);
|
2020-10-30 12:19:25 -05:00
|
|
|
}
|
|
|
|
|
|
2020-10-30 12:14:05 -05:00
|
|
|
return deref->modes == mode;
|
2020-10-30 12:19:25 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** Returns true if deref has one of the given modes
|
|
|
|
|
*
|
|
|
|
|
* This returns true if the deref has exactly one possible mode and that mode
|
|
|
|
|
* is one of the modes specified. If the deref may have one of those modes
|
|
|
|
|
* but may also have a different mode (i.e. modes has multiple bits set), this
|
|
|
|
|
* will assert-fail.
|
|
|
|
|
*/
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_deref_mode_is_one_of(const nir_deref_instr *deref, nir_variable_mode modes)
|
|
|
|
|
{
|
|
|
|
|
/* This is only for "simple" cases so, if modes might interact with this
|
|
|
|
|
* deref then the deref has to have a single mode.
|
|
|
|
|
*/
|
|
|
|
|
if (nir_deref_mode_may_be(deref, modes)) {
|
2020-10-30 12:14:05 -05:00
|
|
|
assert(util_bitcount(deref->modes) == 1);
|
2020-10-30 12:19:25 -05:00
|
|
|
assert(nir_deref_mode_must_be(deref, modes));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nir_deref_mode_may_be(deref, modes);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** Returns true if deref's possible modes lie in the given set of modes
|
|
|
|
|
*
|
|
|
|
|
* This returns true if the deref's modes lie in the given set of modes. If
|
|
|
|
|
* the deref's modes overlap with the specified modes but aren't entirely
|
|
|
|
|
* contained in the specified set of modes, this will assert-fail. In
|
|
|
|
|
* particular, if this is used in a generic pointers scenario, the specified
|
|
|
|
|
* modes has to contain all or none of the possible generic pointer modes.
|
|
|
|
|
*
|
|
|
|
|
* This is intended mostly for mass-lowering of derefs which might have
|
|
|
|
|
* generic pointers.
|
|
|
|
|
*/
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_deref_mode_is_in_set(const nir_deref_instr *deref, nir_variable_mode modes)
|
|
|
|
|
{
|
|
|
|
|
if (nir_deref_mode_may_be(deref, modes))
|
|
|
|
|
assert(nir_deref_mode_must_be(deref, modes));
|
|
|
|
|
|
|
|
|
|
return nir_deref_mode_may_be(deref, modes);
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-19 15:09:04 -05:00
|
|
|
static inline nir_deref_instr *nir_src_as_deref(nir_src src);
|
2018-03-14 21:45:38 -07:00
|
|
|
|
2018-03-16 01:15:47 -07:00
|
|
|
static inline nir_deref_instr *
|
|
|
|
|
nir_deref_instr_parent(const nir_deref_instr *instr)
|
|
|
|
|
{
|
|
|
|
|
if (instr->deref_type == nir_deref_type_var)
|
|
|
|
|
return NULL;
|
|
|
|
|
else
|
|
|
|
|
return nir_src_as_deref(instr->parent);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_variable *
|
|
|
|
|
nir_deref_instr_get_variable(const nir_deref_instr *instr)
|
|
|
|
|
{
|
|
|
|
|
while (instr->deref_type != nir_deref_type_var) {
|
|
|
|
|
if (instr->deref_type == nir_deref_type_cast)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
instr = nir_deref_instr_parent(instr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return instr->var;
|
|
|
|
|
}
|
|
|
|
|
|
2018-06-28 19:46:01 -07:00
|
|
|
bool nir_deref_instr_has_indirect(nir_deref_instr *instr);
|
2019-06-18 12:12:49 +02:00
|
|
|
bool nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr);
|
2021-03-01 21:22:06 -06:00
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
|
nir_deref_instr_has_complex_use_allow_memcpy_src = (1 << 0),
|
|
|
|
|
nir_deref_instr_has_complex_use_allow_memcpy_dst = (1 << 1),
|
2023-05-19 14:40:17 -07:00
|
|
|
nir_deref_instr_has_complex_use_allow_atomics = (1 << 2),
|
2021-03-01 21:22:06 -06:00
|
|
|
} nir_deref_instr_has_complex_use_options;
|
|
|
|
|
|
|
|
|
|
bool nir_deref_instr_has_complex_use(nir_deref_instr *instr,
|
|
|
|
|
nir_deref_instr_has_complex_use_options opts);
|
2018-06-28 19:46:01 -07:00
|
|
|
|
2018-03-20 17:32:07 -07:00
|
|
|
bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr);
|
|
|
|
|
|
2020-08-27 11:59:54 -05:00
|
|
|
unsigned nir_deref_instr_array_stride(nir_deref_instr *instr);
|
2018-11-28 12:26:52 -06:00
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
typedef struct {
|
|
|
|
|
nir_instr instr;
|
|
|
|
|
|
2019-03-19 10:18:49 -05:00
|
|
|
struct nir_function *callee;
|
2018-03-22 16:41:18 -07:00
|
|
|
|
|
|
|
|
unsigned num_params;
|
|
|
|
|
nir_src params[];
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_call_instr;
|
|
|
|
|
|
|
|
|
|
#include "nir_intrinsics.h"
|
|
|
|
|
|
2023-09-26 15:01:03 -05:00
|
|
|
#define NIR_INTRINSIC_MAX_CONST_INDEX 8
|
2016-01-13 18:43:14 -05:00
|
|
|
|
2014-12-18 17:13:22 -08:00
|
|
|
/** Represents an intrinsic
|
|
|
|
|
*
|
|
|
|
|
* An intrinsic is an instruction type for handling things that are
|
|
|
|
|
* more-or-less regular operations but don't just consume and produce SSA
|
|
|
|
|
* values like ALU operations do. Intrinsics are not for things that have
|
|
|
|
|
* special semantic meaning such as phi nodes and parallel copies.
|
|
|
|
|
* Examples of intrinsics include variable load/store operations, system
|
|
|
|
|
* value loads, and the like. Even though texturing more-or-less falls
|
|
|
|
|
* under this category, texturing is its own instruction type because
|
|
|
|
|
* trying to represent texturing with intrinsics would lead to a
|
|
|
|
|
* combinatorial explosion of intrinsic opcodes.
|
|
|
|
|
*
|
|
|
|
|
* By having a single instruction type for handling a lot of different
|
|
|
|
|
* cases, optimization passes can look for intrinsics and, for the most
|
|
|
|
|
* part, completely ignore them. Each intrinsic type also has a few
|
|
|
|
|
* possible flags that govern whether or not they can be reordered or
|
|
|
|
|
* eliminated. That way passes like dead code elimination can still work
|
|
|
|
|
* on intrisics without understanding the meaning of each.
|
|
|
|
|
*
|
|
|
|
|
* Each intrinsic has some number of constant indices, some number of
|
|
|
|
|
* variables, and some number of sources. What these sources, variables,
|
|
|
|
|
* and indices mean depends on the intrinsic and is documented with the
|
|
|
|
|
* intrinsic declaration in nir_intrinsics.h. Intrinsics and texture
|
|
|
|
|
* instructions are the only types of instruction that can operate on
|
|
|
|
|
* variables.
|
|
|
|
|
*/
|
2014-07-31 16:14:51 -07:00
|
|
|
typedef struct {
|
|
|
|
|
nir_instr instr;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_op intrinsic;
|
|
|
|
|
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_def def;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2014-12-18 17:13:22 -08:00
|
|
|
/** number of components if this is a vectorized intrinsic
|
|
|
|
|
*
|
|
|
|
|
* Similarly to ALU operations, some intrinsics are vectorized.
|
|
|
|
|
* An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0.
|
|
|
|
|
* For vectorized intrinsics, the num_components field specifies the
|
|
|
|
|
* number of destination components and the number of source components
|
|
|
|
|
* for all sources with nir_intrinsic_infos.src_components[i] == 0.
|
|
|
|
|
*/
|
2014-12-03 17:03:19 -08:00
|
|
|
uint8_t num_components;
|
|
|
|
|
|
2016-01-13 18:43:14 -05:00
|
|
|
int const_index[NIR_INTRINSIC_MAX_CONST_INDEX];
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2024-04-18 10:54:02 -04:00
|
|
|
/* a variable name associated with this instr; cannot be modified or freed */
|
|
|
|
|
const char *name;
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_src src[];
|
|
|
|
|
} nir_intrinsic_instr;
|
|
|
|
|
|
2018-04-04 20:40:33 -04:00
|
|
|
static inline nir_variable *
|
2023-07-06 14:07:31 +02:00
|
|
|
nir_intrinsic_get_var(const nir_intrinsic_instr *intrin, unsigned i)
|
2018-04-04 20:40:33 -04:00
|
|
|
{
|
|
|
|
|
return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i]));
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-18 16:14:03 -07:00
|
|
|
typedef enum {
|
|
|
|
|
/* Memory ordering. */
|
2023-08-08 12:00:35 -05:00
|
|
|
NIR_MEMORY_ACQUIRE = 1 << 0,
|
|
|
|
|
NIR_MEMORY_RELEASE = 1 << 1,
|
|
|
|
|
NIR_MEMORY_ACQ_REL = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE,
|
2019-07-18 16:14:03 -07:00
|
|
|
|
|
|
|
|
/* Memory visibility operations. */
|
2020-02-20 09:47:06 -08:00
|
|
|
NIR_MEMORY_MAKE_AVAILABLE = 1 << 2,
|
2023-08-08 12:00:35 -05:00
|
|
|
NIR_MEMORY_MAKE_VISIBLE = 1 << 3,
|
2019-07-18 16:14:03 -07:00
|
|
|
} nir_memory_semantics;
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
/**
|
2023-08-20 20:42:34 +03:00
|
|
|
* NIR intrinsics semantic flags
|
2014-07-31 16:14:51 -07:00
|
|
|
*
|
|
|
|
|
* information about what the compiler can do with the intrinsics.
|
|
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* :c:member:`nir_intrinsic_info.flags`
|
2014-07-31 16:14:51 -07:00
|
|
|
*/
|
2014-12-19 15:56:55 -08:00
|
|
|
typedef enum {
|
|
|
|
|
/**
|
|
|
|
|
* whether the intrinsic can be safely eliminated if none of its output
|
|
|
|
|
* value is not being used.
|
|
|
|
|
*/
|
|
|
|
|
NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0),
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2014-12-19 15:56:55 -08:00
|
|
|
/**
|
|
|
|
|
* Whether the intrinsic can be reordered with respect to any other
|
|
|
|
|
* intrinsic, i.e. whether the only reordering dependencies of the
|
|
|
|
|
* intrinsic are due to the register reads/writes.
|
|
|
|
|
*/
|
|
|
|
|
NIR_INTRINSIC_CAN_REORDER = (1 << 1),
|
|
|
|
|
} nir_intrinsic_semantic_flag;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2020-09-08 13:07:56 -07:00
|
|
|
/**
|
|
|
|
|
* Maximum valid value for a nir align_mul value (in intrinsics or derefs).
|
|
|
|
|
*
|
|
|
|
|
* Offsets can be signed, so this is the largest power of two in int32_t.
|
|
|
|
|
*/
|
|
|
|
|
#define NIR_ALIGN_MUL_MAX 0x40000000
|
|
|
|
|
|
2020-11-24 12:51:59 +00:00
|
|
|
typedef struct nir_io_semantics {
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned location : 7; /* gl_vert_attrib, gl_varying_slot, or gl_frag_result */
|
|
|
|
|
unsigned num_slots : 6; /* max 32, may be pessimistic with const indexing */
|
|
|
|
|
unsigned dual_source_blend_index : 1;
|
|
|
|
|
unsigned fb_fetch_output : 1; /* for GL_KHR_blend_equation_advanced */
|
2024-11-12 19:18:28 -05:00
|
|
|
unsigned fb_fetch_output_coherent : 1;
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned gs_streams : 8; /* xxyyzzww: 2-bit stream index for each component */
|
|
|
|
|
unsigned medium_precision : 1; /* GLSL mediump qualifier */
|
|
|
|
|
unsigned per_view : 1;
|
|
|
|
|
unsigned high_16bits : 1; /* whether accessing low or high half of the slot */
|
|
|
|
|
unsigned invariant : 1; /* The variable has the invariant flag set */
|
2023-03-13 00:18:47 -04:00
|
|
|
unsigned high_dvec2 : 1; /* whether accessing the high half of dvec3/dvec4 */
|
2022-01-02 19:46:45 -05:00
|
|
|
/* CLIP_DISTn, LAYER, VIEWPORT, and TESS_LEVEL_* have up to 3 uses:
|
|
|
|
|
* - an output consumed by the next stage
|
|
|
|
|
* - a system value output affecting fixed-func hardware, e.g. the clipper
|
|
|
|
|
* - a transform feedback output written to memory
|
|
|
|
|
* The following fields disable the first two. Transform feedback is disabled
|
|
|
|
|
* by transform feedback info.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned no_varying : 1; /* whether this output isn't consumed by the next stage */
|
|
|
|
|
unsigned no_sysval_output : 1; /* whether this system value output has no
|
|
|
|
|
effect due to current pipeline states */
|
2024-03-18 12:15:41 -04:00
|
|
|
unsigned interp_explicit_strict : 1; /* preserve original vertex order */
|
2020-08-11 23:48:12 -04:00
|
|
|
} nir_io_semantics;
|
|
|
|
|
|
2021-12-19 14:20:52 -05:00
|
|
|
/* Transform feedback info for 2 outputs. nir_intrinsic_store_output contains
|
|
|
|
|
* this structure twice to support up to 4 outputs. The structure is limited
|
|
|
|
|
* to 32 bits because it's stored in nir_intrinsic_instr::const_index[].
|
|
|
|
|
*/
|
|
|
|
|
typedef struct nir_io_xfb {
|
|
|
|
|
struct {
|
|
|
|
|
/* start_component is equal to the index of out[]; add 2 for io_xfb2 */
|
|
|
|
|
/* start_component is not relative to nir_intrinsic_component */
|
|
|
|
|
/* get the stream index from nir_io_semantics */
|
2023-08-08 12:00:35 -05:00
|
|
|
uint8_t num_components : 4; /* max 4; if this is 0, xfb is disabled */
|
|
|
|
|
uint8_t buffer : 4; /* buffer index, max 3 */
|
|
|
|
|
uint8_t offset; /* transform feedback buffer offset in dwords,
|
|
|
|
|
max (1K - 4) bytes */
|
2021-12-19 14:20:52 -05:00
|
|
|
} out[2];
|
|
|
|
|
} nir_io_xfb;
|
|
|
|
|
|
|
|
|
|
unsigned
|
|
|
|
|
nir_instr_xfb_write_mask(nir_intrinsic_instr *instr);
|
|
|
|
|
|
2020-05-14 14:40:48 -05:00
|
|
|
#define NIR_INTRINSIC_MAX_INPUTS 11
|
2014-07-31 16:14:51 -07:00
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
const char *name;
|
|
|
|
|
|
2023-08-17 21:02:41 +03:00
|
|
|
/** number of register/SSA inputs */
|
|
|
|
|
uint8_t num_srcs;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2014-12-03 17:03:19 -08:00
|
|
|
/** number of components of each input register
|
|
|
|
|
*
|
|
|
|
|
* If this value is 0, the number of components is given by the
|
2018-11-27 21:30:22 -06:00
|
|
|
* num_components field of nir_intrinsic_instr. If this value is -1, the
|
|
|
|
|
* intrinsic consumes however many components are provided and it is not
|
|
|
|
|
* validated at all.
|
2014-12-03 17:03:19 -08:00
|
|
|
*/
|
2020-05-14 14:50:52 -05:00
|
|
|
int8_t src_components[NIR_INTRINSIC_MAX_INPUTS];
|
2014-07-31 16:14:51 -07:00
|
|
|
|
|
|
|
|
bool has_dest;
|
|
|
|
|
|
2014-12-03 17:03:19 -08:00
|
|
|
/** number of components of the output register
|
|
|
|
|
*
|
|
|
|
|
* If this value is 0, the number of components is given by the
|
|
|
|
|
* num_components field of nir_intrinsic_instr.
|
|
|
|
|
*/
|
2020-05-14 14:50:52 -05:00
|
|
|
uint8_t dest_components;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2018-07-19 13:04:43 +02:00
|
|
|
/** bitfield of legal bit sizes */
|
2020-05-14 14:50:52 -05:00
|
|
|
uint8_t dest_bit_sizes;
|
2018-07-19 13:04:43 +02:00
|
|
|
|
2020-09-07 13:25:59 +01:00
|
|
|
/** source which the destination bit size must match
|
|
|
|
|
*
|
|
|
|
|
* Some intrinsics, such as subgroup intrinsics, are data manipulation
|
|
|
|
|
* intrinsics and they have similar bit-size rules to ALU ops. This enables
|
|
|
|
|
* validation to validate a bit more and enables auto-generated builder code
|
|
|
|
|
* to properly determine destination bit sizes automatically.
|
|
|
|
|
*/
|
|
|
|
|
int8_t bit_size_src;
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
/** the number of constant indices used by the intrinsic */
|
2020-05-14 14:50:52 -05:00
|
|
|
uint8_t num_indices;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2020-09-14 19:51:41 +01:00
|
|
|
/** list of indices */
|
|
|
|
|
uint8_t indices[NIR_INTRINSIC_MAX_CONST_INDEX];
|
|
|
|
|
|
2016-01-13 18:43:14 -05:00
|
|
|
/** indicates the usage of intr->const_index[n] */
|
2020-05-14 14:50:52 -05:00
|
|
|
uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS];
|
2016-01-13 18:43:14 -05:00
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
/** semantic flags for calls to this intrinsic */
|
2014-12-19 15:56:55 -08:00
|
|
|
nir_intrinsic_semantic_flag flags;
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_intrinsic_info;
|
|
|
|
|
|
|
|
|
|
extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
|
|
|
|
|
|
2021-11-22 11:11:16 -08:00
|
|
|
unsigned
|
|
|
|
|
nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn);
|
2018-03-28 08:32:10 -04:00
|
|
|
|
2021-11-22 11:11:16 -08:00
|
|
|
unsigned
|
|
|
|
|
nir_intrinsic_dest_components(nir_intrinsic_instr *intr);
|
2016-01-13 18:43:14 -05:00
|
|
|
|
2023-06-12 21:58:19 -07:00
|
|
|
nir_alu_type
|
|
|
|
|
nir_intrinsic_instr_src_type(const nir_intrinsic_instr *intrin, unsigned src);
|
|
|
|
|
|
|
|
|
|
nir_alu_type
|
|
|
|
|
nir_intrinsic_instr_dest_type(const nir_intrinsic_instr *intrin);
|
|
|
|
|
|
2020-05-06 13:35:51 -07:00
|
|
|
/**
|
|
|
|
|
* Helper to copy const_index[] from src to dst, without assuming they
|
|
|
|
|
* match in order.
|
|
|
|
|
*/
|
2021-11-22 11:11:16 -08:00
|
|
|
void nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src);
|
2020-05-06 13:35:51 -07:00
|
|
|
|
2020-11-24 12:51:59 +00:00
|
|
|
#include "nir_intrinsics_indices.h"
|
2018-11-13 09:45:03 -06:00
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
|
nir_intrinsic_set_align(nir_intrinsic_instr *intrin,
|
|
|
|
|
unsigned align_mul, unsigned align_offset)
|
|
|
|
|
{
|
|
|
|
|
assert(util_is_power_of_two_nonzero(align_mul));
|
|
|
|
|
assert(align_offset < align_mul);
|
|
|
|
|
nir_intrinsic_set_align_mul(intrin, align_mul);
|
|
|
|
|
nir_intrinsic_set_align_offset(intrin, align_offset);
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-27 08:33:02 -06:00
|
|
|
/** Returns a simple alignment for an align_mul/offset pair
|
|
|
|
|
*
|
|
|
|
|
* This helper converts from the full mul+offset alignment scheme used by
|
|
|
|
|
* most NIR intrinsics to a simple alignment. The returned value is the
|
|
|
|
|
* largest power of two which divides both align_mul and align_offset.
|
|
|
|
|
* For any offset X which satisfies the complex alignment described by
|
|
|
|
|
* align_mul/offset, X % align == 0.
|
|
|
|
|
*/
|
|
|
|
|
static inline uint32_t
|
|
|
|
|
nir_combined_align(uint32_t align_mul, uint32_t align_offset)
|
|
|
|
|
{
|
|
|
|
|
assert(util_is_power_of_two_nonzero(align_mul));
|
|
|
|
|
assert(align_offset < align_mul);
|
|
|
|
|
return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul;
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-13 09:45:03 -06:00
|
|
|
/** Returns a simple alignment for a load/store intrinsic offset
|
|
|
|
|
*
|
|
|
|
|
* Instead of the full mul+offset alignment scheme provided by the ALIGN_MUL
|
|
|
|
|
* and ALIGN_OFFSET parameters, this helper takes both into account and
|
|
|
|
|
* provides a single simple alignment parameter. The offset X is guaranteed
|
|
|
|
|
* to satisfy X % align == 0.
|
|
|
|
|
*/
|
|
|
|
|
static inline unsigned
|
2018-12-14 11:08:51 +00:00
|
|
|
nir_intrinsic_align(const nir_intrinsic_instr *intrin)
|
2018-11-13 09:45:03 -06:00
|
|
|
{
|
2023-02-27 08:33:02 -06:00
|
|
|
return nir_combined_align(nir_intrinsic_align_mul(intrin),
|
|
|
|
|
nir_intrinsic_align_offset(intrin));
|
2018-11-13 09:45:03 -06:00
|
|
|
}
|
2016-01-13 18:43:14 -05:00
|
|
|
|
2020-05-27 17:08:28 -05:00
|
|
|
static inline bool
|
|
|
|
|
nir_intrinsic_has_align(const nir_intrinsic_instr *intrin)
|
|
|
|
|
{
|
|
|
|
|
return nir_intrinsic_has_align_mul(intrin) &&
|
|
|
|
|
nir_intrinsic_has_align_offset(intrin);
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-05 15:46:40 -08:00
|
|
|
unsigned
|
|
|
|
|
nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr);
|
|
|
|
|
|
2019-03-28 22:21:46 +01:00
|
|
|
/* Converts a image_deref_* intrinsic into a image_* one */
|
|
|
|
|
void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *handle, bool bindless);
|
2019-03-28 22:21:46 +01:00
|
|
|
|
2019-06-04 13:02:31 +02:00
|
|
|
/* Determine if an intrinsic can be arbitrarily reordered and eliminated. */
|
2024-11-25 14:58:20 +00:00
|
|
|
bool nir_intrinsic_can_reorder(nir_intrinsic_instr *instr);
|
2019-06-04 13:02:31 +02:00
|
|
|
|
2020-11-13 00:14:04 -06:00
|
|
|
bool nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr);
|
|
|
|
|
|
2022-05-18 20:06:31 +02:00
|
|
|
static inline bool
|
|
|
|
|
nir_intrinsic_is_ray_query(nir_intrinsic_op intrinsic)
|
|
|
|
|
{
|
|
|
|
|
switch (intrinsic) {
|
|
|
|
|
case nir_intrinsic_rq_confirm_intersection:
|
|
|
|
|
case nir_intrinsic_rq_generate_intersection:
|
|
|
|
|
case nir_intrinsic_rq_initialize:
|
|
|
|
|
case nir_intrinsic_rq_load:
|
|
|
|
|
case nir_intrinsic_rq_proceed:
|
|
|
|
|
case nir_intrinsic_rq_terminate:
|
|
|
|
|
return true;
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/** Texture instruction source type */
|
2023-08-17 20:56:26 +03:00
|
|
|
typedef enum nir_tex_src_type {
|
2021-07-07 17:44:27 -05:00
|
|
|
/** Texture coordinate
|
|
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* Must have :c:member:`nir_tex_instr.coord_components` components.
|
2021-07-07 17:44:27 -05:00
|
|
|
*/
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_tex_src_coord,
|
2021-07-07 17:44:27 -05:00
|
|
|
|
|
|
|
|
/** Projector
|
|
|
|
|
*
|
|
|
|
|
* The texture coordinate (except for the array component, if any) is
|
|
|
|
|
* divided by this value before LOD computation and sampling.
|
|
|
|
|
*
|
|
|
|
|
* Must be a float scalar.
|
|
|
|
|
*/
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_tex_src_projector,
|
2021-07-07 17:44:27 -05:00
|
|
|
|
|
|
|
|
/** Shadow comparator
|
|
|
|
|
*
|
|
|
|
|
* For shadow sampling, the fetched texel values are compared against the
|
|
|
|
|
* shadow comparator using the compare op specified by the sampler object
|
|
|
|
|
* and converted to 1.0 if the comparison succeeds and 0.0 if it fails.
|
|
|
|
|
* Interpolation happens after this conversion so the actual result may be
|
|
|
|
|
* anywhere in the range [0.0, 1.0].
|
|
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* Only valid if :c:member:`nir_tex_instr.is_shadow` and must be a float
|
|
|
|
|
* scalar.
|
2021-07-07 17:44:27 -05:00
|
|
|
*/
|
|
|
|
|
nir_tex_src_comparator,
|
|
|
|
|
|
|
|
|
|
/** Coordinate offset
|
|
|
|
|
*
|
|
|
|
|
* An integer value that is added to the texel address before sampling.
|
|
|
|
|
* This is only allowed with operations that take an explicit LOD as it is
|
|
|
|
|
* applied in integer texel space after LOD selection and not normalized
|
|
|
|
|
* coordinate space.
|
|
|
|
|
*/
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_tex_src_offset,
|
2021-07-07 17:44:27 -05:00
|
|
|
|
|
|
|
|
/** LOD bias
|
|
|
|
|
*
|
|
|
|
|
* This value is added to the computed LOD before mip-mapping.
|
|
|
|
|
*/
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_tex_src_bias,
|
2021-07-07 17:44:27 -05:00
|
|
|
|
|
|
|
|
/** Explicit LOD */
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_tex_src_lod,
|
2021-07-07 17:44:27 -05:00
|
|
|
|
|
|
|
|
/** Min LOD
|
|
|
|
|
*
|
|
|
|
|
* The computed LOD is clamped to be at least as large as min_lod before
|
|
|
|
|
* mip-mapping.
|
|
|
|
|
*/
|
2018-10-02 21:15:47 -05:00
|
|
|
nir_tex_src_min_lod,
|
2021-07-07 17:44:27 -05:00
|
|
|
|
|
|
|
|
/** MSAA sample index */
|
|
|
|
|
nir_tex_src_ms_index,
|
|
|
|
|
|
|
|
|
|
/** Intel-specific MSAA compression data */
|
|
|
|
|
nir_tex_src_ms_mcs_intel,
|
|
|
|
|
|
|
|
|
|
/** Explicit horizontal (X-major) coordinate derivative */
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_tex_src_ddx,
|
2021-07-07 17:44:27 -05:00
|
|
|
|
|
|
|
|
/** Explicit vertical (Y-major) coordinate derivative */
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_tex_src_ddy,
|
2021-07-07 17:44:27 -05:00
|
|
|
|
|
|
|
|
/** Texture variable dereference */
|
|
|
|
|
nir_tex_src_texture_deref,
|
|
|
|
|
|
|
|
|
|
/** Sampler variable dereference */
|
|
|
|
|
nir_tex_src_sampler_deref,
|
|
|
|
|
|
|
|
|
|
/** Texture index offset
|
|
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* This is added to :c:member:`nir_tex_instr.texture_index`. Unless
|
|
|
|
|
* :c:member:`nir_tex_instr.texture_non_uniform` is set, this is guaranteed
|
|
|
|
|
* to be dynamically uniform.
|
2021-07-07 17:44:27 -05:00
|
|
|
*/
|
|
|
|
|
nir_tex_src_texture_offset,
|
|
|
|
|
|
|
|
|
|
/** Dynamically uniform sampler index offset
|
|
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* This is added to :c:member:`nir_tex_instr.sampler_index`. Unless
|
|
|
|
|
* :c:member:`nir_tex_instr.sampler_non_uniform` is set, this is guaranteed to be
|
2021-11-01 09:32:03 -07:00
|
|
|
* dynamically uniform. This should not be present until GLSL ES 3.20, GLSL
|
|
|
|
|
* 4.00, or ARB_gpu_shader5, because in ES 3.10 and GL 3.30 samplers said
|
|
|
|
|
* "When aggregated into arrays within a shader, samplers can only be indexed
|
|
|
|
|
* with a constant integral expression."
|
2021-07-07 17:44:27 -05:00
|
|
|
*/
|
|
|
|
|
nir_tex_src_sampler_offset,
|
|
|
|
|
|
|
|
|
|
/** Bindless texture handle
|
|
|
|
|
*
|
|
|
|
|
* This is, unfortunately, a bit overloaded at the moment. There are
|
|
|
|
|
* generally two types of bindless handles:
|
|
|
|
|
*
|
|
|
|
|
* 1. For GL_ARB_bindless bindless handles. These are part of the
|
|
|
|
|
* GL/Gallium-level API and are always a 64-bit integer.
|
|
|
|
|
*
|
|
|
|
|
* 2. HW-specific handles. GL_ARB_bindless handles may be lowered to
|
|
|
|
|
* these. Also, these are used by many Vulkan drivers to implement
|
|
|
|
|
* descriptor sets, especially for UPDATE_AFTER_BIND descriptors.
|
|
|
|
|
* The details of hardware handles (bit size, format, etc.) is
|
|
|
|
|
* HW-specific.
|
|
|
|
|
*
|
|
|
|
|
* Because of this overloading and the resulting ambiguity, we currently
|
|
|
|
|
* don't validate anything for these.
|
|
|
|
|
*/
|
|
|
|
|
nir_tex_src_texture_handle,
|
|
|
|
|
|
|
|
|
|
/** Bindless sampler handle
|
|
|
|
|
*
|
|
|
|
|
* See nir_tex_src_texture_handle,
|
|
|
|
|
*/
|
|
|
|
|
nir_tex_src_sampler_handle,
|
|
|
|
|
|
2024-07-23 13:04:10 +10:00
|
|
|
/** Tex src intrinsic
|
|
|
|
|
*
|
|
|
|
|
* This is an intrinsic used before function inlining i.e. before we know
|
|
|
|
|
* if a bindless value has been given as function param for use as a tex
|
|
|
|
|
* src.
|
|
|
|
|
*/
|
|
|
|
|
nir_tex_src_sampler_deref_intrinsic,
|
|
|
|
|
nir_tex_src_texture_deref_intrinsic,
|
|
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/** Plane index for multi-plane YCbCr textures */
|
|
|
|
|
nir_tex_src_plane,
|
|
|
|
|
|
2021-07-14 16:41:58 -07:00
|
|
|
/**
|
|
|
|
|
* Backend-specific vec4 tex src argument.
|
|
|
|
|
*
|
2023-08-01 10:33:11 -04:00
|
|
|
* Can be used to have NIR optimization (copy propagation, lower_vec_to_regs)
|
2021-07-14 16:41:58 -07:00
|
|
|
* apply to the packing of the tex srcs. This lowering must only happen
|
|
|
|
|
* after nir_lower_tex().
|
|
|
|
|
*
|
|
|
|
|
* The nir_tex_instr_src_type() of this argument is float, so no lowering
|
|
|
|
|
* will happen if nir_lower_int_to_float is used.
|
|
|
|
|
*/
|
|
|
|
|
nir_tex_src_backend1,
|
|
|
|
|
|
|
|
|
|
/** Second backend-specific vec4 tex src argument, see nir_tex_src_backend1. */
|
|
|
|
|
nir_tex_src_backend2,
|
|
|
|
|
|
2015-01-09 20:01:13 -08:00
|
|
|
nir_num_tex_src_types
|
|
|
|
|
} nir_tex_src_type;
|
|
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/** A texture instruction source */
|
2023-08-17 20:56:26 +03:00
|
|
|
typedef struct nir_tex_src {
|
2021-07-07 17:44:27 -05:00
|
|
|
/** Base source */
|
2015-01-09 20:01:13 -08:00
|
|
|
nir_src src;
|
2021-07-07 17:44:27 -05:00
|
|
|
|
|
|
|
|
/** Type of this source */
|
2015-01-09 20:01:13 -08:00
|
|
|
nir_tex_src_type src_type;
|
|
|
|
|
} nir_tex_src;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/** Texture instruction opcode */
|
2023-08-17 20:56:26 +03:00
|
|
|
typedef enum nir_texop {
|
2023-08-17 21:02:41 +03:00
|
|
|
/** Regular texture look-up */
|
|
|
|
|
nir_texop_tex,
|
|
|
|
|
/** Texture look-up with LOD bias */
|
|
|
|
|
nir_texop_txb,
|
|
|
|
|
/** Texture look-up with explicit LOD */
|
|
|
|
|
nir_texop_txl,
|
|
|
|
|
/** Texture look-up with partial derivatives */
|
|
|
|
|
nir_texop_txd,
|
|
|
|
|
/** Texel fetch with explicit LOD */
|
|
|
|
|
nir_texop_txf,
|
|
|
|
|
/** Multisample texture fetch */
|
|
|
|
|
nir_texop_txf_ms,
|
|
|
|
|
/** Multisample texture fetch from framebuffer */
|
|
|
|
|
nir_texop_txf_ms_fb,
|
|
|
|
|
/** Multisample compression value fetch */
|
|
|
|
|
nir_texop_txf_ms_mcs_intel,
|
|
|
|
|
/** Texture size */
|
|
|
|
|
nir_texop_txs,
|
|
|
|
|
/** Texture lod query */
|
|
|
|
|
nir_texop_lod,
|
|
|
|
|
/** Texture gather */
|
|
|
|
|
nir_texop_tg4,
|
|
|
|
|
/** Texture levels query */
|
|
|
|
|
nir_texop_query_levels,
|
|
|
|
|
/** Texture samples query */
|
|
|
|
|
nir_texop_texture_samples,
|
|
|
|
|
/** Query whether all samples are definitely identical. */
|
|
|
|
|
nir_texop_samples_identical,
|
|
|
|
|
/** Regular texture look-up, eligible for pre-dispatch */
|
|
|
|
|
nir_texop_tex_prefetch,
|
|
|
|
|
/** Multisample fragment color texture fetch */
|
|
|
|
|
nir_texop_fragment_fetch_amd,
|
|
|
|
|
/** Multisample fragment mask texture fetch */
|
|
|
|
|
nir_texop_fragment_mask_fetch_amd,
|
|
|
|
|
/** Returns a buffer or image descriptor. */
|
|
|
|
|
nir_texop_descriptor_amd,
|
|
|
|
|
/** Returns a sampler descriptor. */
|
|
|
|
|
nir_texop_sampler_descriptor_amd,
|
|
|
|
|
/** Returns the sampler's LOD bias */
|
|
|
|
|
nir_texop_lod_bias_agx,
|
2024-04-27 11:34:19 -04:00
|
|
|
/** Returns a bool indicating that the sampler uses a custom border colour */
|
|
|
|
|
nir_texop_has_custom_border_color_agx,
|
|
|
|
|
/** Returns the sampler's custom border colour (if has_custom_border_agx) */
|
|
|
|
|
nir_texop_custom_border_color_agx,
|
2023-08-17 21:02:41 +03:00
|
|
|
/** Maps to TXQ.DIMENSION */
|
|
|
|
|
nir_texop_hdr_dim_nv,
|
|
|
|
|
/** Maps to TXQ.TEXTURE_TYPE */
|
|
|
|
|
nir_texop_tex_type_nv,
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_texop;
|
|
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/** Represents a texture instruction */
|
2023-08-17 20:56:26 +03:00
|
|
|
typedef struct nir_tex_instr {
|
2021-07-07 17:44:27 -05:00
|
|
|
/** Base instruction */
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_instr instr;
|
|
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/** Dimensionality of the texture operation
|
|
|
|
|
*
|
|
|
|
|
* This will typically match the dimensionality of the texture deref type
|
|
|
|
|
* if a nir_tex_src_texture_deref is present. However, it may not if
|
|
|
|
|
* texture lowering has occurred.
|
|
|
|
|
*/
|
2014-07-31 16:14:51 -07:00
|
|
|
enum glsl_sampler_dim sampler_dim;
|
2021-07-07 17:44:27 -05:00
|
|
|
|
|
|
|
|
/** ALU type of the destination
|
|
|
|
|
*
|
|
|
|
|
* This is the canonical sampled type for this texture operation and may
|
|
|
|
|
* not exactly match the sampled type of the deref type when a
|
|
|
|
|
* nir_tex_src_texture_deref is present. For OpenCL, the sampled type of
|
|
|
|
|
* the texture deref will be GLSL_TYPE_VOID and this is allowed to be
|
|
|
|
|
* anything. With SPIR-V, the signedness of integer types is allowed to
|
|
|
|
|
* differ. For all APIs, the bit size may differ if the driver has done
|
|
|
|
|
* any sort of mediump or similar lowering since texture types always have
|
|
|
|
|
* 32-bit sampled types.
|
|
|
|
|
*/
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_alu_type dest_type;
|
|
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/** Texture opcode */
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_texop op;
|
2021-07-07 17:44:27 -05:00
|
|
|
|
|
|
|
|
/** Destination */
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_def def;
|
2021-07-07 17:44:27 -05:00
|
|
|
|
|
|
|
|
/** Array of sources
|
|
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* This array has :c:member:`nir_tex_instr.num_srcs` elements
|
2021-07-07 17:44:27 -05:00
|
|
|
*/
|
2015-01-09 20:01:13 -08:00
|
|
|
nir_tex_src *src;
|
2021-07-07 17:44:27 -05:00
|
|
|
|
|
|
|
|
/** Number of sources */
|
|
|
|
|
unsigned num_srcs;
|
|
|
|
|
|
|
|
|
|
/** Number of components in the coordinate, if any */
|
|
|
|
|
unsigned coord_components;
|
|
|
|
|
|
|
|
|
|
/** True if the texture instruction acts on an array texture */
|
|
|
|
|
bool is_array;
|
|
|
|
|
|
|
|
|
|
/** True if the texture instruction performs a shadow comparison
|
|
|
|
|
*
|
|
|
|
|
* If this is true, the texture instruction must have a
|
|
|
|
|
* nir_tex_src_comparator.
|
|
|
|
|
*/
|
|
|
|
|
bool is_shadow;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
|
|
|
|
/**
|
2021-07-07 17:44:27 -05:00
|
|
|
* If is_shadow is true, whether this is the old-style shadow that outputs
|
|
|
|
|
* 4 components or the new-style shadow that outputs 1 component.
|
2014-07-31 16:14:51 -07:00
|
|
|
*/
|
|
|
|
|
bool is_new_style_shadow;
|
|
|
|
|
|
2020-11-20 15:10:42 +00:00
|
|
|
/**
|
2021-07-07 17:44:27 -05:00
|
|
|
* True if this texture instruction should return a sparse residency code.
|
|
|
|
|
* The code is in the last component of the result.
|
2020-11-20 15:10:42 +00:00
|
|
|
*/
|
|
|
|
|
bool is_sparse;
|
|
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/** nir_texop_tg4 component selector
|
|
|
|
|
*
|
|
|
|
|
* This determines which RGBA component is gathered.
|
|
|
|
|
*/
|
2014-07-31 16:14:51 -07:00
|
|
|
unsigned component : 2;
|
|
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/** Validation needs to know this for gradient component count */
|
2021-01-13 10:28:59 +01:00
|
|
|
unsigned array_is_lowered_cube : 1;
|
|
|
|
|
|
2023-04-13 14:13:35 +01:00
|
|
|
/** True if this tg4 instruction has an implicit LOD or LOD bias, instead of using level 0 */
|
|
|
|
|
unsigned is_gather_implicit_lod : 1;
|
|
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/** Gather offsets */
|
2019-03-18 21:23:59 +01:00
|
|
|
int8_t tg4_offsets[4][2];
|
|
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/** True if the texture index or handle is not dynamically uniform */
|
2019-02-27 14:36:44 -06:00
|
|
|
bool texture_non_uniform;
|
|
|
|
|
|
2021-11-01 09:32:03 -07:00
|
|
|
/** True if the sampler index or handle is not dynamically uniform.
|
|
|
|
|
*
|
|
|
|
|
* This may be set when VK_EXT_descriptor_indexing is supported and the
|
|
|
|
|
* appropriate capability is enabled.
|
|
|
|
|
*
|
|
|
|
|
* This should always be false in GLSL (GLSL ES 3.20 says "When aggregated
|
|
|
|
|
* into arrays within a shader, opaque types can only be indexed with a
|
|
|
|
|
* dynamically uniform integral expression", and GLSL 4.60 says "When
|
|
|
|
|
* aggregated into arrays within a shader, [texture, sampler, and
|
|
|
|
|
* samplerShadow] types can only be indexed with a dynamically uniform
|
|
|
|
|
* expression, or texture lookup will result in undefined values.").
|
|
|
|
|
*/
|
2019-02-27 14:36:44 -06:00
|
|
|
bool sampler_non_uniform;
|
|
|
|
|
|
2016-02-06 09:05:10 -08:00
|
|
|
/** The texture index
|
2014-12-05 14:46:24 -08:00
|
|
|
*
|
2016-02-06 09:05:10 -08:00
|
|
|
* If this texture instruction has a nir_tex_src_texture_offset source,
|
|
|
|
|
* then the texture index is given by texture_index + texture_offset.
|
2014-12-05 14:46:24 -08:00
|
|
|
*/
|
2016-02-06 09:05:10 -08:00
|
|
|
unsigned texture_index;
|
2014-12-05 14:46:24 -08:00
|
|
|
|
2015-11-02 17:58:29 -08:00
|
|
|
/** The sampler index
|
|
|
|
|
*
|
|
|
|
|
* The following operations do not require a sampler and, as such, this
|
|
|
|
|
* field should be ignored:
|
2023-08-20 20:42:34 +03:00
|
|
|
*
|
2015-11-02 17:58:29 -08:00
|
|
|
* - nir_texop_txf
|
|
|
|
|
* - nir_texop_txf_ms
|
|
|
|
|
* - nir_texop_txs
|
|
|
|
|
* - nir_texop_query_levels
|
|
|
|
|
* - nir_texop_texture_samples
|
|
|
|
|
* - nir_texop_samples_identical
|
|
|
|
|
*
|
|
|
|
|
* If this texture instruction has a nir_tex_src_sampler_offset source,
|
|
|
|
|
* then the sampler index is given by sampler_index + sampler_offset.
|
|
|
|
|
*/
|
|
|
|
|
unsigned sampler_index;
|
2023-04-04 15:32:00 -05:00
|
|
|
|
|
|
|
|
/* Back-end specific flags, intended to be used in combination with
|
|
|
|
|
* nir_tex_src_backend1/2 to provide additional hw-specific information
|
|
|
|
|
* to the back-end compiler.
|
|
|
|
|
*/
|
|
|
|
|
uint32_t backend_flags;
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_tex_instr;
|
|
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/**
|
|
|
|
|
* Returns true if the texture operation requires a sampler as a general rule
|
2020-04-22 23:32:47 +02:00
|
|
|
*
|
|
|
|
|
* Note that the specific hw/driver backend could require to a sampler
|
|
|
|
|
* object/configuration packet in any case, for some other reason.
|
2021-07-07 17:44:27 -05:00
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* See also :c:member:`nir_tex_instr.sampler_index`.
|
2020-04-22 23:32:47 +02:00
|
|
|
*/
|
2021-11-22 11:11:16 -08:00
|
|
|
bool nir_tex_instr_need_sampler(const nir_tex_instr *instr);
|
2020-04-22 23:32:47 +02:00
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/** Returns the number of components returned by this nir_tex_instr
|
|
|
|
|
*
|
|
|
|
|
* Useful for code building texture instructions when you don't want to think
|
|
|
|
|
* about how many components a particular texture op returns. This does not
|
|
|
|
|
* include the sparse residency code.
|
|
|
|
|
*/
|
2021-11-22 11:11:16 -08:00
|
|
|
unsigned
|
|
|
|
|
nir_tex_instr_result_size(const nir_tex_instr *instr);
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/**
|
|
|
|
|
* Returns the destination size of this nir_tex_instr including the sparse
|
|
|
|
|
* residency code, if any.
|
|
|
|
|
*/
|
2020-11-20 15:10:42 +00:00
|
|
|
static inline unsigned
|
|
|
|
|
nir_tex_instr_dest_size(const nir_tex_instr *instr)
|
|
|
|
|
{
|
|
|
|
|
/* One more component is needed for the residency code. */
|
|
|
|
|
return nir_tex_instr_result_size(instr) + instr->is_sparse;
|
|
|
|
|
}
|
|
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/**
|
|
|
|
|
* Returns true if this texture operation queries something about the texture
|
2015-11-11 18:30:09 -08:00
|
|
|
* rather than actually sampling it.
|
|
|
|
|
*/
|
2021-11-22 11:11:16 -08:00
|
|
|
bool
|
|
|
|
|
nir_tex_instr_is_query(const nir_tex_instr *instr);
|
2015-11-11 18:30:09 -08:00
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/** Returns true if this texture instruction does implicit derivatives
|
|
|
|
|
*
|
|
|
|
|
* This is important as there are extra control-flow rules around derivatives
|
|
|
|
|
* and texture instructions which perform them implicitly.
|
|
|
|
|
*/
|
2021-11-22 11:11:16 -08:00
|
|
|
bool
|
|
|
|
|
nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr);
|
2019-06-07 17:58:15 -05:00
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/** Returns the ALU type of the given texture instruction source */
|
2021-11-22 11:11:16 -08:00
|
|
|
nir_alu_type
|
|
|
|
|
nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src);
|
2016-05-03 20:18:50 -07:00
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/**
|
|
|
|
|
* Returns the number of components required by the given texture instruction
|
|
|
|
|
* source
|
|
|
|
|
*/
|
2021-11-22 11:11:16 -08:00
|
|
|
unsigned
|
|
|
|
|
nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src);
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/**
|
|
|
|
|
* Returns the index of the texture instruction source with the given
|
|
|
|
|
* nir_tex_src_type or -1 if no such source exists.
|
|
|
|
|
*/
|
2014-07-31 16:14:51 -07:00
|
|
|
static inline int
|
2017-06-07 02:19:15 +03:00
|
|
|
nir_tex_instr_src_index(const nir_tex_instr *instr, nir_tex_src_type type)
|
2014-07-31 16:14:51 -07:00
|
|
|
{
|
|
|
|
|
for (unsigned i = 0; i < instr->num_srcs; i++)
|
2015-01-09 20:01:13 -08:00
|
|
|
if (instr->src[i].src_type == type)
|
2023-08-08 12:00:35 -05:00
|
|
|
return (int)i;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/** Adds a source to a texture instruction */
|
2017-10-16 08:50:23 -07:00
|
|
|
void nir_tex_instr_add_src(nir_tex_instr *tex,
|
|
|
|
|
nir_tex_src_type src_type,
|
2023-08-16 10:44:46 -05:00
|
|
|
nir_def *src);
|
2017-10-16 08:50:23 -07:00
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/** Removes a source from a texture instruction */
|
2016-09-08 14:07:06 -04:00
|
|
|
void nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx);
|
|
|
|
|
|
2019-03-18 21:23:59 +01:00
|
|
|
bool nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex);
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
typedef struct {
|
|
|
|
|
nir_instr instr;
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def def;
|
2019-03-27 00:59:03 +01:00
|
|
|
|
|
|
|
|
nir_const_value value[];
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_load_const_instr;
|
|
|
|
|
|
|
|
|
|
typedef enum {
|
2020-05-18 14:26:30 -05:00
|
|
|
/** Return from a function
|
|
|
|
|
*
|
|
|
|
|
* This instruction is a classic function return. It jumps to
|
|
|
|
|
* nir_function_impl::end_block. No return value is provided in this
|
|
|
|
|
* instruction. Instead, the function is expected to write any return
|
|
|
|
|
* data to a deref passed in from the caller.
|
|
|
|
|
*/
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_jump_return,
|
2020-05-18 14:26:30 -05:00
|
|
|
|
2020-05-15 15:46:08 -05:00
|
|
|
/** Immediately exit the current shader
|
|
|
|
|
*
|
|
|
|
|
* This instruction is roughly the equivalent of C's "exit()" in that it
|
|
|
|
|
* immediately terminates the current shader invocation. From a CFG
|
|
|
|
|
* perspective, it looks like a jump to nir_function_impl::end_block but
|
|
|
|
|
* it actually jumps to the end block of the shader entrypoint. A halt
|
|
|
|
|
* instruction in the shader entrypoint itself is semantically identical
|
|
|
|
|
* to a return.
|
|
|
|
|
*
|
|
|
|
|
* For shaders with built-in I/O, any outputs written prior to a halt
|
|
|
|
|
* instruction remain written and any outputs not written prior to the
|
|
|
|
|
* halt have undefined values. It does NOT cause an implicit discard of
|
|
|
|
|
* written results. If one wants discard results in a fragment shader,
|
|
|
|
|
* for instance, a discard or demote intrinsic is required.
|
|
|
|
|
*/
|
|
|
|
|
nir_jump_halt,
|
|
|
|
|
|
2020-05-18 14:26:30 -05:00
|
|
|
/** Break out of the inner-most loop
|
|
|
|
|
*
|
|
|
|
|
* This has the same semantics as C's "break" statement.
|
|
|
|
|
*/
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_jump_break,
|
2020-05-18 14:26:30 -05:00
|
|
|
|
|
|
|
|
/** Jump back to the top of the inner-most loop
|
|
|
|
|
*
|
|
|
|
|
* This has the same semantics as C's "continue" statement assuming that a
|
|
|
|
|
* NIR loop is implemented as "while (1) { body }".
|
|
|
|
|
*/
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_jump_continue,
|
2020-07-02 14:32:04 +02:00
|
|
|
|
|
|
|
|
/** Jumps for unstructured CFG.
|
|
|
|
|
*
|
|
|
|
|
* As within an unstructured CFG we can't rely on block ordering we need to
|
|
|
|
|
* place explicit jumps at the end of every block.
|
|
|
|
|
*/
|
|
|
|
|
nir_jump_goto,
|
|
|
|
|
nir_jump_goto_if,
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_jump_type;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
nir_instr instr;
|
|
|
|
|
nir_jump_type type;
|
2020-07-02 14:32:04 +02:00
|
|
|
nir_src condition;
|
|
|
|
|
struct nir_block *target;
|
|
|
|
|
struct nir_block *else_target;
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_jump_instr;
|
|
|
|
|
|
|
|
|
|
/* creates a new SSA variable in an undefined state */
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
nir_instr instr;
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def def;
|
|
|
|
|
} nir_undef_instr;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
struct exec_node node;
|
2014-12-18 17:13:22 -08:00
|
|
|
|
|
|
|
|
/* The predecessor block corresponding to this source */
|
2014-07-31 16:14:51 -07:00
|
|
|
struct nir_block *pred;
|
2014-12-18 17:13:22 -08:00
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_src src;
|
|
|
|
|
} nir_phi_src;
|
|
|
|
|
|
2016-04-26 20:16:21 -07:00
|
|
|
#define nir_foreach_phi_src(phi_src, phi) \
|
|
|
|
|
foreach_list_typed(nir_phi_src, phi_src, node, &(phi)->srcs)
|
|
|
|
|
#define nir_foreach_phi_src_safe(phi_src, phi) \
|
|
|
|
|
foreach_list_typed_safe(nir_phi_src, phi_src, node, &(phi)->srcs)
|
2015-01-20 16:30:14 -08:00
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
typedef struct {
|
|
|
|
|
nir_instr instr;
|
|
|
|
|
|
2023-08-17 21:02:41 +03:00
|
|
|
/** list of nir_phi_src */
|
|
|
|
|
struct exec_list srcs;
|
2014-12-18 17:13:22 -08:00
|
|
|
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_def def;
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_phi_instr;
|
|
|
|
|
|
2020-11-11 16:34:06 +01:00
|
|
|
static inline nir_phi_src *
|
|
|
|
|
nir_phi_get_src_from_block(nir_phi_instr *phi, struct nir_block *block)
|
|
|
|
|
{
|
|
|
|
|
nir_foreach_phi_src(src, phi) {
|
|
|
|
|
if (src->pred == block)
|
|
|
|
|
return src;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(!"Block is not a predecessor of phi.");
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2014-10-30 21:04:15 -07:00
|
|
|
typedef struct {
|
|
|
|
|
struct exec_node node;
|
2023-06-09 09:36:22 -04:00
|
|
|
bool src_is_reg;
|
|
|
|
|
bool dest_is_reg;
|
2014-10-30 21:04:15 -07:00
|
|
|
nir_src src;
|
2023-06-09 09:36:22 -04:00
|
|
|
union {
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_def def;
|
2023-06-09 09:36:22 -04:00
|
|
|
nir_src reg;
|
|
|
|
|
} dest;
|
2014-12-17 16:53:04 -08:00
|
|
|
} nir_parallel_copy_entry;
|
|
|
|
|
|
2016-04-26 20:21:27 -07:00
|
|
|
#define nir_foreach_parallel_copy_entry(entry, pcopy) \
|
2014-12-17 16:53:04 -08:00
|
|
|
foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries)
|
2014-10-30 21:04:15 -07:00
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
nir_instr instr;
|
2014-12-17 16:53:04 -08:00
|
|
|
|
2017-02-27 17:21:42 -08:00
|
|
|
/* A list of nir_parallel_copy_entrys. The sources of all of the
|
2014-12-17 16:53:04 -08:00
|
|
|
* entries are copied to the corresponding destinations "in parallel".
|
|
|
|
|
* In other words, if we have two entries: a -> b and b -> a, the values
|
|
|
|
|
* get swapped.
|
|
|
|
|
*/
|
|
|
|
|
struct exec_list entries;
|
2014-10-30 21:04:15 -07:00
|
|
|
} nir_parallel_copy_instr;
|
|
|
|
|
|
2024-05-19 17:29:21 +02:00
|
|
|
typedef enum nir_debug_info_type {
|
|
|
|
|
nir_debug_info_src_loc,
|
|
|
|
|
nir_debug_info_string,
|
|
|
|
|
} nir_debug_info_type;
|
|
|
|
|
|
|
|
|
|
typedef enum nir_debug_info_source {
|
|
|
|
|
nir_debug_info_spirv,
|
|
|
|
|
nir_debug_info_nir,
|
|
|
|
|
} nir_debug_info_source;
|
|
|
|
|
|
|
|
|
|
typedef struct nir_debug_info_instr {
|
|
|
|
|
nir_instr instr;
|
|
|
|
|
|
|
|
|
|
nir_debug_info_type type;
|
|
|
|
|
|
|
|
|
|
union {
|
|
|
|
|
struct {
|
|
|
|
|
nir_src filename;
|
|
|
|
|
/* 0 if only the spirv_offset is available. */
|
|
|
|
|
uint32_t line;
|
|
|
|
|
uint32_t column;
|
|
|
|
|
|
|
|
|
|
uint32_t spirv_offset;
|
|
|
|
|
|
|
|
|
|
nir_debug_info_source source;
|
|
|
|
|
} src_loc;
|
|
|
|
|
|
|
|
|
|
uint16_t string_length;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
nir_def def;
|
|
|
|
|
|
|
|
|
|
char string[];
|
|
|
|
|
} nir_debug_info_instr;
|
|
|
|
|
|
2016-10-05 18:09:25 -07:00
|
|
|
NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr,
|
|
|
|
|
type, nir_instr_type_alu)
|
2019-04-19 15:09:04 -05:00
|
|
|
NIR_DEFINE_CAST(nir_instr_as_deref, nir_instr, nir_deref_instr, instr,
|
|
|
|
|
type, nir_instr_type_deref)
|
2016-10-05 18:09:25 -07:00
|
|
|
NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr,
|
|
|
|
|
type, nir_instr_type_call)
|
|
|
|
|
NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr,
|
|
|
|
|
type, nir_instr_type_jump)
|
|
|
|
|
NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr,
|
|
|
|
|
type, nir_instr_type_tex)
|
|
|
|
|
NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr,
|
|
|
|
|
type, nir_instr_type_intrinsic)
|
|
|
|
|
NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr,
|
|
|
|
|
type, nir_instr_type_load_const)
|
2023-08-15 09:59:06 -05:00
|
|
|
NIR_DEFINE_CAST(nir_instr_as_undef, nir_instr, nir_undef_instr, instr,
|
|
|
|
|
type, nir_instr_type_undef)
|
2016-10-05 18:09:25 -07:00
|
|
|
NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr,
|
|
|
|
|
type, nir_instr_type_phi)
|
2014-12-05 11:00:05 -08:00
|
|
|
NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr,
|
2016-10-05 18:09:25 -07:00
|
|
|
nir_parallel_copy_instr, instr,
|
|
|
|
|
type, nir_instr_type_parallel_copy)
|
2024-05-19 17:29:21 +02:00
|
|
|
NIR_DEFINE_CAST(nir_instr_as_debug_info, nir_instr,
|
|
|
|
|
nir_debug_info_instr, instr,
|
|
|
|
|
type, nir_instr_type_debug_info)
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define NIR_DEFINE_SRC_AS_CONST(type, suffix) \
|
|
|
|
|
static inline type \
|
|
|
|
|
nir_src_comp_as_##suffix(nir_src src, unsigned comp) \
|
|
|
|
|
{ \
|
|
|
|
|
assert(nir_src_is_const(src)); \
|
|
|
|
|
nir_load_const_instr *load = \
|
|
|
|
|
nir_instr_as_load_const(src.ssa->parent_instr); \
|
|
|
|
|
assert(comp < load->def.num_components); \
|
|
|
|
|
return nir_const_value_as_##suffix(load->value[comp], \
|
|
|
|
|
load->def.bit_size); \
|
|
|
|
|
} \
|
|
|
|
|
\
|
|
|
|
|
static inline type \
|
|
|
|
|
nir_src_as_##suffix(nir_src src) \
|
|
|
|
|
{ \
|
|
|
|
|
assert(nir_src_num_components(src) == 1); \
|
|
|
|
|
return nir_src_comp_as_##suffix(src, 0); \
|
|
|
|
|
}
|
2019-06-25 20:33:46 -05:00
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
NIR_DEFINE_SRC_AS_CONST(int64_t, int)
|
|
|
|
|
NIR_DEFINE_SRC_AS_CONST(uint64_t, uint)
|
|
|
|
|
NIR_DEFINE_SRC_AS_CONST(bool, bool)
|
|
|
|
|
NIR_DEFINE_SRC_AS_CONST(double, float)
|
2019-06-25 20:33:46 -05:00
|
|
|
|
|
|
|
|
#undef NIR_DEFINE_SRC_AS_CONST
|
|
|
|
|
|
2019-06-20 11:12:54 -05:00
|
|
|
typedef struct {
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *def;
|
2019-06-20 11:12:54 -05:00
|
|
|
unsigned comp;
|
2023-08-12 16:17:15 -04:00
|
|
|
} nir_scalar;
|
2019-06-20 11:12:54 -05:00
|
|
|
|
|
|
|
|
static inline bool
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_scalar_is_const(nir_scalar s)
|
2019-06-20 11:12:54 -05:00
|
|
|
{
|
|
|
|
|
return s.def->parent_instr->type == nir_instr_type_load_const;
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-22 16:32:36 +02:00
|
|
|
static inline bool
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_scalar_is_undef(nir_scalar s)
|
2022-08-22 16:32:36 +02:00
|
|
|
{
|
2023-08-15 09:59:06 -05:00
|
|
|
return s.def->parent_instr->type == nir_instr_type_undef;
|
2022-08-22 16:32:36 +02:00
|
|
|
}
|
|
|
|
|
|
2019-06-20 11:12:54 -05:00
|
|
|
static inline nir_const_value
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_scalar_as_const_value(nir_scalar s)
|
2019-06-20 11:12:54 -05:00
|
|
|
{
|
|
|
|
|
assert(s.comp < s.def->num_components);
|
|
|
|
|
nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr);
|
|
|
|
|
return load->value[s.comp];
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix) \
|
|
|
|
|
static inline type \
|
|
|
|
|
nir_scalar_as_##suffix(nir_scalar s) \
|
|
|
|
|
{ \
|
|
|
|
|
return nir_const_value_as_##suffix( \
|
|
|
|
|
nir_scalar_as_const_value(s), s.def->bit_size); \
|
2023-08-08 12:00:35 -05:00
|
|
|
}
|
2019-06-20 11:12:54 -05:00
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
NIR_DEFINE_SCALAR_AS_CONST(int64_t, int)
|
|
|
|
|
NIR_DEFINE_SCALAR_AS_CONST(uint64_t, uint)
|
|
|
|
|
NIR_DEFINE_SCALAR_AS_CONST(bool, bool)
|
|
|
|
|
NIR_DEFINE_SCALAR_AS_CONST(double, float)
|
2019-06-20 11:12:54 -05:00
|
|
|
|
|
|
|
|
#undef NIR_DEFINE_SCALAR_AS_CONST
|
|
|
|
|
|
|
|
|
|
static inline bool
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_scalar_is_alu(nir_scalar s)
|
2019-06-20 11:12:54 -05:00
|
|
|
{
|
|
|
|
|
return s.def->parent_instr->type == nir_instr_type_alu;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_op
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_scalar_alu_op(nir_scalar s)
|
2019-06-20 11:12:54 -05:00
|
|
|
{
|
|
|
|
|
return nir_instr_as_alu(s.def->parent_instr)->op;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-13 00:03:03 +02:00
|
|
|
static inline bool
|
|
|
|
|
nir_scalar_is_intrinsic(nir_scalar s)
|
|
|
|
|
{
|
|
|
|
|
return s.def->parent_instr->type == nir_instr_type_intrinsic;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_intrinsic_op
|
|
|
|
|
nir_scalar_intrinsic_op(nir_scalar s)
|
|
|
|
|
{
|
|
|
|
|
return nir_instr_as_intrinsic(s.def->parent_instr)->intrinsic;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static inline nir_scalar
|
|
|
|
|
nir_scalar_chase_alu_src(nir_scalar s, unsigned alu_src_idx)
|
2019-06-20 11:12:54 -05:00
|
|
|
{
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_scalar out = { NULL, 0 };
|
2019-06-20 11:12:54 -05:00
|
|
|
|
|
|
|
|
nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
|
|
|
|
|
assert(alu_src_idx < nir_op_infos[alu->op].num_inputs);
|
|
|
|
|
|
|
|
|
|
/* Our component must be written */
|
|
|
|
|
assert(s.comp < s.def->num_components);
|
|
|
|
|
|
|
|
|
|
out.def = alu->src[alu_src_idx].src.ssa;
|
|
|
|
|
|
|
|
|
|
if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) {
|
|
|
|
|
/* The ALU src is unsized so the source component follows the
|
|
|
|
|
* destination component.
|
|
|
|
|
*/
|
|
|
|
|
out.comp = alu->src[alu_src_idx].swizzle[s.comp];
|
|
|
|
|
} else {
|
|
|
|
|
/* This is a sized source so all source components work together to
|
|
|
|
|
* produce all the destination components. Since we need to return a
|
|
|
|
|
* scalar, this only works if the source is a scalar.
|
|
|
|
|
*/
|
|
|
|
|
assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1);
|
|
|
|
|
out.comp = alu->src[alu_src_idx].swizzle[0];
|
|
|
|
|
}
|
|
|
|
|
assert(out.comp < out.def->num_components);
|
|
|
|
|
|
|
|
|
|
return out;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_scalar nir_scalar_chase_movs(nir_scalar s);
|
2021-06-25 13:58:04 -07:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static inline nir_scalar
|
2023-08-15 10:07:24 -05:00
|
|
|
nir_get_scalar(nir_def *def, unsigned channel)
|
2022-02-03 11:15:59 -08:00
|
|
|
{
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_scalar s = { def, channel };
|
2022-02-03 11:15:59 -08:00
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
/** Returns a nir_scalar where we've followed the bit-exact mov/vec use chain to the original definition */
|
|
|
|
|
static inline nir_scalar
|
|
|
|
|
nir_scalar_resolved(nir_def *def, unsigned channel)
|
2021-06-25 13:58:04 -07:00
|
|
|
{
|
2023-08-15 10:07:24 -05:00
|
|
|
return nir_scalar_chase_movs(nir_get_scalar(def, channel));
|
2021-06-25 13:58:04 -07:00
|
|
|
}
|
|
|
|
|
|
2023-08-13 00:14:29 +02:00
|
|
|
static inline bool
|
|
|
|
|
nir_scalar_equal(nir_scalar s1, nir_scalar s2)
|
|
|
|
|
{
|
|
|
|
|
return s1.def == s2.def && s1.comp == s2.comp;
|
|
|
|
|
}
|
|
|
|
|
|
2023-04-25 14:39:23 -04:00
|
|
|
static inline uint64_t
|
|
|
|
|
nir_alu_src_as_uint(nir_alu_src src)
|
|
|
|
|
{
|
2023-08-15 10:07:24 -05:00
|
|
|
nir_scalar scalar = nir_get_scalar(src.src.ssa, src.swizzle[0]);
|
2023-08-12 16:17:15 -04:00
|
|
|
return nir_scalar_as_uint(scalar);
|
2023-04-25 14:39:23 -04:00
|
|
|
}
|
2019-06-20 11:12:54 -05:00
|
|
|
|
2020-10-23 11:22:48 +01:00
|
|
|
typedef struct {
|
|
|
|
|
bool success;
|
|
|
|
|
|
|
|
|
|
nir_variable *var;
|
|
|
|
|
unsigned desc_set;
|
|
|
|
|
unsigned binding;
|
|
|
|
|
unsigned num_indices;
|
|
|
|
|
nir_src indices[4];
|
|
|
|
|
bool read_first_invocation;
|
|
|
|
|
} nir_binding;
|
|
|
|
|
|
|
|
|
|
nir_binding nir_chase_binding(nir_src rsrc);
|
|
|
|
|
nir_variable *nir_get_binding_variable(struct nir_shader *shader, nir_binding binding);
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
/*
|
|
|
|
|
* Control flow
|
|
|
|
|
*
|
|
|
|
|
* Control flow consists of a tree of control flow nodes, which include
|
|
|
|
|
* if-statements and loops. The leaves of the tree are basic blocks, lists of
|
|
|
|
|
* instructions that always run start-to-finish. Each basic block also keeps
|
|
|
|
|
* track of its successors (blocks which may run immediately after the current
|
|
|
|
|
* block) and predecessors (blocks which could have run immediately before the
|
|
|
|
|
* current block). Each function also has a start block and an end block which
|
|
|
|
|
* all return statements point to (which is always empty). Together, all the
|
|
|
|
|
* blocks with their predecessors and successors make up the control flow
|
|
|
|
|
* graph (CFG) of the function. There are helpers that modify the tree of
|
|
|
|
|
* control flow nodes while modifying the CFG appropriately; these should be
|
|
|
|
|
* used instead of modifying the tree directly.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
|
nir_cf_node_block,
|
|
|
|
|
nir_cf_node_if,
|
|
|
|
|
nir_cf_node_loop,
|
|
|
|
|
nir_cf_node_function
|
|
|
|
|
} nir_cf_node_type;
|
|
|
|
|
|
|
|
|
|
typedef struct nir_cf_node {
|
|
|
|
|
struct exec_node node;
|
|
|
|
|
nir_cf_node_type type;
|
|
|
|
|
struct nir_cf_node *parent;
|
|
|
|
|
} nir_cf_node;
|
|
|
|
|
|
|
|
|
|
typedef struct nir_block {
|
|
|
|
|
nir_cf_node cf_node;
|
|
|
|
|
|
2023-08-17 21:02:41 +03:00
|
|
|
/** list of nir_instr */
|
|
|
|
|
struct exec_list instr_list;
|
2014-12-18 17:13:22 -08:00
|
|
|
|
|
|
|
|
/** generic block index; generated by nir_index_blocks */
|
2014-07-31 16:14:51 -07:00
|
|
|
unsigned index;
|
|
|
|
|
|
2024-03-09 12:59:21 -05:00
|
|
|
/* This indicates whether the block or any parent block is executed
|
|
|
|
|
* conditionally and whether the condition uses a divergent value.
|
|
|
|
|
*/
|
|
|
|
|
bool divergent;
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
/*
|
|
|
|
|
* Each block can only have up to 2 successors, so we put them in a simple
|
|
|
|
|
* array - no need for anything more complicated.
|
|
|
|
|
*/
|
|
|
|
|
struct nir_block *successors[2];
|
|
|
|
|
|
2014-12-18 17:13:22 -08:00
|
|
|
/* Set of nir_block predecessors in the CFG */
|
2014-07-31 16:14:51 -07:00
|
|
|
struct set *predecessors;
|
2014-07-18 16:13:11 -07:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* this node's immediate dominator in the dominance tree - set to NULL for
|
2024-03-20 17:23:22 -05:00
|
|
|
* the start block and any unreachable blocks.
|
2014-07-18 16:13:11 -07:00
|
|
|
*/
|
|
|
|
|
struct nir_block *imm_dom;
|
|
|
|
|
|
|
|
|
|
/* This node's children in the dominance tree */
|
|
|
|
|
unsigned num_dom_children;
|
|
|
|
|
struct nir_block **dom_children;
|
|
|
|
|
|
2017-02-27 17:21:42 -08:00
|
|
|
/* Set of nir_blocks on the dominance frontier of this block */
|
2014-07-18 16:13:11 -07:00
|
|
|
struct set *dom_frontier;
|
2014-10-29 14:17:17 -07:00
|
|
|
|
2015-02-06 12:45:43 -08:00
|
|
|
/*
|
|
|
|
|
* These two indices have the property that dom_{pre,post}_index for each
|
|
|
|
|
* child of this block in the dominance tree will always be between
|
|
|
|
|
* dom_pre_index and dom_post_index for this block, which makes testing if
|
|
|
|
|
* a given block is dominated by another block an O(1) operation.
|
|
|
|
|
*/
|
2020-09-08 17:50:23 -07:00
|
|
|
uint32_t dom_pre_index, dom_post_index;
|
2015-02-06 12:45:43 -08:00
|
|
|
|
2020-10-14 12:11:20 -07:00
|
|
|
/**
|
2020-12-11 09:48:15 -08:00
|
|
|
* Value just before the first nir_instr->index in the block, but after
|
|
|
|
|
* end_ip that of any predecessor block.
|
2020-10-14 12:11:20 -07:00
|
|
|
*/
|
|
|
|
|
uint32_t start_ip;
|
|
|
|
|
/**
|
2020-12-11 09:48:15 -08:00
|
|
|
* Value just after the last nir_instr->index in the block, but before the
|
|
|
|
|
* start_ip of any successor block.
|
2020-10-14 12:11:20 -07:00
|
|
|
*/
|
|
|
|
|
uint32_t end_ip;
|
|
|
|
|
|
2020-07-23 12:29:02 -07:00
|
|
|
/* SSA def live in and out for this block; used for liveness analysis.
|
|
|
|
|
* Indexed by ssa_def->index
|
|
|
|
|
*/
|
2014-10-29 14:17:17 -07:00
|
|
|
BITSET_WORD *live_in;
|
|
|
|
|
BITSET_WORD *live_out;
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_block;
|
|
|
|
|
|
2017-01-17 18:38:35 -08:00
|
|
|
static inline bool
|
|
|
|
|
nir_block_is_reachable(nir_block *b)
|
|
|
|
|
{
|
|
|
|
|
/* See also nir_block_dominates */
|
2020-09-08 17:50:23 -07:00
|
|
|
return b->dom_post_index != 0;
|
2017-01-17 18:38:35 -08:00
|
|
|
}
|
|
|
|
|
|
2014-12-19 15:30:15 -08:00
|
|
|
static inline nir_instr *
|
|
|
|
|
nir_block_first_instr(nir_block *block)
|
|
|
|
|
{
|
|
|
|
|
struct exec_node *head = exec_list_get_head(&block->instr_list);
|
|
|
|
|
return exec_node_data(nir_instr, head, node);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_instr *
|
|
|
|
|
nir_block_last_instr(nir_block *block)
|
|
|
|
|
{
|
|
|
|
|
struct exec_node *tail = exec_list_get_tail(&block->instr_list);
|
|
|
|
|
return exec_node_data(nir_instr, tail, node);
|
|
|
|
|
}
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2018-08-24 09:34:05 -05:00
|
|
|
static inline bool
|
|
|
|
|
nir_block_ends_in_jump(nir_block *block)
|
|
|
|
|
{
|
|
|
|
|
return !exec_list_is_empty(&block->instr_list) &&
|
|
|
|
|
nir_block_last_instr(block)->type == nir_instr_type_jump;
|
|
|
|
|
}
|
|
|
|
|
|
2021-08-23 16:45:20 +03:00
|
|
|
static inline bool
|
|
|
|
|
nir_block_ends_in_return_or_halt(nir_block *block)
|
|
|
|
|
{
|
|
|
|
|
if (exec_list_is_empty(&block->instr_list))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
nir_instr *instr = nir_block_last_instr(block);
|
|
|
|
|
if (instr->type != nir_instr_type_jump)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
|
|
|
|
|
return jump_instr->type == nir_jump_return ||
|
|
|
|
|
jump_instr->type == nir_jump_halt;
|
|
|
|
|
}
|
|
|
|
|
|
2021-07-29 15:43:26 +10:00
|
|
|
static inline bool
|
|
|
|
|
nir_block_ends_in_break(nir_block *block)
|
|
|
|
|
{
|
|
|
|
|
if (exec_list_is_empty(&block->instr_list))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
nir_instr *instr = nir_block_last_instr(block);
|
|
|
|
|
return instr->type == nir_instr_type_jump &&
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_instr_as_jump(instr)->type == nir_jump_break;
|
2021-07-29 15:43:26 +10:00
|
|
|
}
|
|
|
|
|
|
2024-09-02 11:58:22 +01:00
|
|
|
bool nir_block_contains_work(nir_block *block);
|
|
|
|
|
|
2016-04-26 18:34:19 -07:00
|
|
|
#define nir_foreach_instr(instr, block) \
|
2014-07-31 16:14:51 -07:00
|
|
|
foreach_list_typed(nir_instr, instr, node, &(block)->instr_list)
|
2016-04-26 18:34:19 -07:00
|
|
|
#define nir_foreach_instr_reverse(instr, block) \
|
2014-07-31 16:14:51 -07:00
|
|
|
foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list)
|
2016-04-26 18:34:19 -07:00
|
|
|
#define nir_foreach_instr_safe(instr, block) \
|
2014-07-31 16:14:51 -07:00
|
|
|
foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list)
|
2016-04-26 18:34:19 -07:00
|
|
|
#define nir_foreach_instr_reverse_safe(instr, block) \
|
2015-11-30 09:24:23 -08:00
|
|
|
foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list)
|
2014-07-31 16:14:51 -07:00
|
|
|
|
nir: Add nir_foreach_phi(_safe) macro
Serious preprocessor voodoo here. There are two tricks here.
1. Iterating only phis. We know that phis come only at the beginning of a block,
so all over the tree, we open-code iteration like:
nir_foreach_instr(instr, block) {
if (instr->type != phi)
break;
/* do stuff */
}
We can express this equivalently as
nir_foreach_instr(instr, block)
if (instr->type != phi)
break;
else {
/* do stuff */
}
So, we can define a macro
#define nir_foreach_phi(instr, block)
if (instr->type != phi)
break;
else
and then
nir_foreach_phi(..)
statement;
and
nir_foreach_phi(..) {
...
}
will expand to the right thing.
2. Automatically getting the phi as a phi. We want the instruction to go to some
hidden variable, and then automatically insert nir_phi_instr *phi =
nir_instr_as_phi(instr_internal); We can't do that directly, since we need to
express the assignment implicitly in the control flow for the above trick to
work. But we can do it indirectly with a loop initializer.
for (nir_phi_instr *phi = nir_instr_as_phi(instr_internal); ...)
That loop needs to break after exactly one iteration. We know that phi
will always be non-null on its first iteration, since the original
instruction is non-null, so we can use phi==NULL as a sentinel and express a
one-iteration loop as for (phi = nonnull; phi != NULL; phi = NULL).
Putting these together gives the macros implemented used.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Reviewed-by: Jesse Natalie <jenatali@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22967>
2023-05-11 13:11:57 -04:00
|
|
|
/* Phis come first in the block */
|
2023-06-30 17:29:51 -04:00
|
|
|
static inline nir_phi_instr *
|
|
|
|
|
nir_first_phi_in_block(nir_block *block)
|
|
|
|
|
{
|
|
|
|
|
nir_foreach_instr(instr, block) {
|
|
|
|
|
if (instr->type == nir_instr_type_phi)
|
|
|
|
|
return nir_instr_as_phi(instr);
|
|
|
|
|
else
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_phi_instr *
|
|
|
|
|
nir_next_phi(nir_phi_instr *phi)
|
|
|
|
|
{
|
|
|
|
|
nir_instr *next = nir_instr_next(&phi->instr);
|
|
|
|
|
|
|
|
|
|
if (next && next->type == nir_instr_type_phi)
|
|
|
|
|
return nir_instr_as_phi(next);
|
|
|
|
|
else
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_foreach_phi(instr, block) \
|
|
|
|
|
for (nir_phi_instr *instr = nir_first_phi_in_block(block); instr != NULL; \
|
2023-06-30 17:29:51 -04:00
|
|
|
instr = nir_next_phi(instr))
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_foreach_phi_safe(instr, block) \
|
|
|
|
|
for (nir_phi_instr *instr = nir_first_phi_in_block(block), \
|
|
|
|
|
*__next = instr ? nir_next_phi(instr) : NULL; \
|
|
|
|
|
instr != NULL; \
|
2023-06-30 17:29:51 -04:00
|
|
|
instr = __next, __next = instr ? nir_next_phi(instr) : NULL)
|
nir: Add nir_foreach_phi(_safe) macro
Serious preprocessor voodoo here. There are two tricks here.
1. Iterating only phis. We know that phis come only at the beginning of a block,
so all over the tree, we open-code iteration like:
nir_foreach_instr(instr, block) {
if (instr->type != phi)
break;
/* do stuff */
}
We can express this equivalently as
nir_foreach_instr(instr, block)
if (instr->type != phi)
break;
else {
/* do stuff */
}
So, we can define a macro
#define nir_foreach_phi(instr, block)
if (instr->type != phi)
break;
else
and then
nir_foreach_phi(..)
statement;
and
nir_foreach_phi(..) {
...
}
will expand to the right thing.
2. Automatically getting the phi as a phi. We want the instruction to go to some
hidden variable, and then automatically insert nir_phi_instr *phi =
nir_instr_as_phi(instr_internal); We can't do that directly, since we need to
express the assignment implicitly in the control flow for the above trick to
work. But we can do it indirectly with a loop initializer.
for (nir_phi_instr *phi = nir_instr_as_phi(instr_internal); ...)
That loop needs to break after exactly one iteration. We know that phi
will always be non-null on its first iteration, since the original
instruction is non-null, so we can use phi==NULL as a sentinel and express a
one-iteration loop as for (phi = nonnull; phi != NULL; phi = NULL).
Putting these together gives the macros implemented used.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Reviewed-by: Jesse Natalie <jenatali@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22967>
2023-05-11 13:11:57 -04:00
|
|
|
|
2021-02-04 10:38:21 -06:00
|
|
|
static inline nir_phi_instr *
|
|
|
|
|
nir_block_last_phi_instr(nir_block *block)
|
|
|
|
|
{
|
|
|
|
|
nir_phi_instr *last_phi = NULL;
|
2023-05-11 13:20:43 -04:00
|
|
|
nir_foreach_phi(instr, block)
|
|
|
|
|
last_phi = instr;
|
|
|
|
|
|
2021-02-04 10:38:21 -06:00
|
|
|
return last_phi;
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-20 15:42:56 +11:00
|
|
|
typedef enum {
|
|
|
|
|
nir_selection_control_none = 0x0,
|
2022-09-07 13:58:00 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Defined by SPIR-V spec 3.22 "Selection Control".
|
|
|
|
|
* The application prefers to remove control flow.
|
|
|
|
|
*/
|
2019-03-20 15:42:56 +11:00
|
|
|
nir_selection_control_flatten = 0x1,
|
2022-09-07 13:58:00 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Defined by SPIR-V spec 3.22 "Selection Control".
|
|
|
|
|
* The application prefers to keep control flow.
|
|
|
|
|
*/
|
2019-03-20 15:42:56 +11:00
|
|
|
nir_selection_control_dont_flatten = 0x2,
|
2022-09-07 13:58:30 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* May be applied by the compiler stack when it knows
|
|
|
|
|
* that a branch is divergent, and:
|
|
|
|
|
* - either both the if and else are always taken
|
|
|
|
|
* - the if or else is empty and the other is always taken
|
|
|
|
|
*/
|
|
|
|
|
nir_selection_control_divergent_always_taken = 0x3,
|
2019-03-20 15:42:56 +11:00
|
|
|
} nir_selection_control;
|
|
|
|
|
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 10:16:27 -07:00
|
|
|
typedef struct nir_if {
|
2014-07-31 16:14:51 -07:00
|
|
|
nir_cf_node cf_node;
|
|
|
|
|
nir_src condition;
|
2019-03-20 15:42:56 +11:00
|
|
|
nir_selection_control control;
|
2014-12-18 17:13:22 -08:00
|
|
|
|
2023-08-17 21:02:41 +03:00
|
|
|
/** list of nir_cf_node */
|
|
|
|
|
struct exec_list then_list;
|
|
|
|
|
|
|
|
|
|
/** list of nir_cf_node */
|
|
|
|
|
struct exec_list else_list;
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_if;
|
|
|
|
|
|
2016-12-13 14:39:51 +11:00
|
|
|
typedef struct {
|
|
|
|
|
nir_if *nif;
|
|
|
|
|
|
2024-04-24 14:46:25 +10:00
|
|
|
/** Condition instruction that contains the induction variable */
|
2016-12-13 14:39:51 +11:00
|
|
|
nir_instr *conditional_instr;
|
|
|
|
|
|
2019-01-16 11:34:35 -08:00
|
|
|
/** Block within ::nif that has the break instruction. */
|
2016-12-13 14:39:51 +11:00
|
|
|
nir_block *break_block;
|
2019-01-16 11:34:35 -08:00
|
|
|
|
|
|
|
|
/** Last block for the then- or else-path that does not contain the break. */
|
2016-12-13 14:39:51 +11:00
|
|
|
nir_block *continue_from_block;
|
|
|
|
|
|
2019-01-16 11:34:35 -08:00
|
|
|
/** True when ::break_block is in the else-path of ::nif. */
|
2016-12-13 14:39:51 +11:00
|
|
|
bool continue_from_then;
|
2018-11-15 23:23:09 +11:00
|
|
|
bool induction_rhs;
|
2016-12-13 14:39:51 +11:00
|
|
|
|
2018-11-20 13:45:58 +11:00
|
|
|
/* This is true if the terminators exact trip count is unknown. For
|
|
|
|
|
* example:
|
|
|
|
|
*
|
|
|
|
|
* for (int i = 0; i < imin(x, 4); i++)
|
|
|
|
|
* ...
|
|
|
|
|
*
|
|
|
|
|
* Here loop analysis would have set a max_trip_count of 4 however we dont
|
|
|
|
|
* know for sure that this is the exact trip count.
|
|
|
|
|
*/
|
|
|
|
|
bool exact_trip_count_unknown;
|
|
|
|
|
|
2016-12-13 14:39:51 +11:00
|
|
|
struct list_head loop_terminator_link;
|
|
|
|
|
} nir_loop_terminator;
|
|
|
|
|
|
2021-07-15 17:40:40 +08:00
|
|
|
typedef struct {
|
|
|
|
|
/* Induction variable. */
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *def;
|
2021-07-15 17:40:40 +08:00
|
|
|
|
|
|
|
|
/* Init statement with only uniform. */
|
|
|
|
|
nir_src *init_src;
|
|
|
|
|
|
|
|
|
|
/* Update statement with only uniform. */
|
|
|
|
|
nir_alu_src *update_src;
|
|
|
|
|
} nir_loop_induction_variable;
|
|
|
|
|
|
2016-12-13 14:39:51 +11:00
|
|
|
typedef struct {
|
2019-03-03 09:24:12 -06:00
|
|
|
/* Estimated cost (in number of instructions) of the loop */
|
|
|
|
|
unsigned instr_cost;
|
2016-12-13 14:39:51 +11:00
|
|
|
|
2022-09-29 14:59:06 +01:00
|
|
|
/* Contains fp64 ops that will be lowered */
|
|
|
|
|
bool has_soft_fp64;
|
|
|
|
|
|
2018-11-15 23:23:09 +11:00
|
|
|
/* Guessed trip count based on array indexing */
|
|
|
|
|
unsigned guessed_trip_count;
|
|
|
|
|
|
2018-11-20 11:35:37 +11:00
|
|
|
/* Maximum number of times the loop is run (if known) */
|
|
|
|
|
unsigned max_trip_count;
|
|
|
|
|
|
|
|
|
|
/* Do we know the exact number of times the loop will be run */
|
|
|
|
|
bool exact_trip_count_known;
|
2016-12-13 14:39:51 +11:00
|
|
|
|
|
|
|
|
/* Unroll the loop regardless of its size */
|
|
|
|
|
bool force_unroll;
|
|
|
|
|
|
2018-07-07 12:09:26 +10:00
|
|
|
/* Does the loop contain complex loop terminators, continues or other
|
|
|
|
|
* complex behaviours? If this is true we can't rely on
|
|
|
|
|
* loop_terminator_list to be complete or accurate.
|
|
|
|
|
*/
|
|
|
|
|
bool complex_loop;
|
|
|
|
|
|
2016-12-13 14:39:51 +11:00
|
|
|
nir_loop_terminator *limiting_terminator;
|
|
|
|
|
|
|
|
|
|
/* A list of loop_terminators terminating this loop. */
|
|
|
|
|
struct list_head loop_terminator_list;
|
2021-07-15 17:40:40 +08:00
|
|
|
|
|
|
|
|
/* array of induction variables for this loop */
|
|
|
|
|
nir_loop_induction_variable *induction_vars;
|
|
|
|
|
unsigned num_induction_vars;
|
2016-12-13 14:39:51 +11:00
|
|
|
} nir_loop_info;
|
|
|
|
|
|
2019-03-20 13:39:36 +11:00
|
|
|
typedef enum {
|
|
|
|
|
nir_loop_control_none = 0x0,
|
|
|
|
|
nir_loop_control_unroll = 0x1,
|
|
|
|
|
nir_loop_control_dont_unroll = 0x2,
|
|
|
|
|
} nir_loop_control;
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
typedef struct {
|
|
|
|
|
nir_cf_node cf_node;
|
2014-12-18 17:13:22 -08:00
|
|
|
|
2023-08-17 21:02:41 +03:00
|
|
|
/** list of nir_cf_node */
|
|
|
|
|
struct exec_list body;
|
|
|
|
|
|
|
|
|
|
/** (optional) list of nir_cf_node */
|
|
|
|
|
struct exec_list continue_list;
|
2016-12-13 14:39:51 +11:00
|
|
|
|
|
|
|
|
nir_loop_info *info;
|
2019-03-20 13:39:36 +11:00
|
|
|
nir_loop_control control;
|
2018-11-19 17:01:52 +11:00
|
|
|
bool partially_unrolled;
|
2024-04-05 17:00:33 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Whether some loop-active invocations might take a different control-flow path:
|
|
|
|
|
* divergent_continue indicates that a continue statement might be taken by
|
|
|
|
|
* only some of the loop-active invocations. A subsequent break is always
|
|
|
|
|
* considered divergent.
|
|
|
|
|
*/
|
|
|
|
|
bool divergent_continue;
|
|
|
|
|
bool divergent_break;
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_loop;
|
|
|
|
|
|
2024-04-05 17:00:33 +02:00
|
|
|
static inline bool
|
|
|
|
|
nir_loop_is_divergent(nir_loop *loop)
|
|
|
|
|
{
|
|
|
|
|
return loop->divergent_continue || loop->divergent_break;
|
|
|
|
|
}
|
|
|
|
|
|
2014-10-29 12:42:54 -07:00
|
|
|
/**
|
|
|
|
|
* Various bits of metadata that can may be created or required by
|
|
|
|
|
* optimization and analysis passes
|
|
|
|
|
*/
|
|
|
|
|
typedef enum {
|
|
|
|
|
nir_metadata_none = 0x0,
|
2020-05-13 13:29:43 -05:00
|
|
|
|
|
|
|
|
/** Indicates that nir_block::index values are valid.
|
|
|
|
|
*
|
|
|
|
|
* The start block has index 0 and they increase through a natural walk of
|
|
|
|
|
* the CFG. nir_function_impl::num_blocks is the number of blocks and
|
|
|
|
|
* every block index is in the range [0, nir_function_impl::num_blocks].
|
|
|
|
|
*
|
|
|
|
|
* A pass can preserve this metadata type if it doesn't touch the CFG.
|
|
|
|
|
*/
|
2014-10-29 12:42:54 -07:00
|
|
|
nir_metadata_block_index = 0x1,
|
2020-05-13 13:29:43 -05:00
|
|
|
|
|
|
|
|
/** Indicates that block dominance information is valid
|
|
|
|
|
*
|
|
|
|
|
* This includes:
|
|
|
|
|
*
|
|
|
|
|
* - nir_block::num_dom_children
|
|
|
|
|
* - nir_block::dom_children
|
|
|
|
|
* - nir_block::dom_frontier
|
|
|
|
|
* - nir_block::dom_pre_index
|
|
|
|
|
* - nir_block::dom_post_index
|
|
|
|
|
*
|
|
|
|
|
* A pass can preserve this metadata type if it doesn't touch the CFG.
|
|
|
|
|
*/
|
2014-10-29 12:42:54 -07:00
|
|
|
nir_metadata_dominance = 0x2,
|
2020-05-13 13:29:43 -05:00
|
|
|
|
|
|
|
|
/** Indicates that SSA def data-flow liveness information is valid
|
|
|
|
|
*
|
|
|
|
|
* This includes:
|
|
|
|
|
*
|
|
|
|
|
* - nir_block::live_in
|
|
|
|
|
* - nir_block::live_out
|
|
|
|
|
*
|
|
|
|
|
* A pass can preserve this metadata type if it never adds or removes any
|
2021-08-03 10:16:58 -05:00
|
|
|
* SSA defs or uses of SSA defs (most passes shouldn't preserve this
|
|
|
|
|
* metadata type).
|
2020-05-13 13:29:43 -05:00
|
|
|
*/
|
2023-08-15 10:11:43 -05:00
|
|
|
nir_metadata_live_defs = 0x4,
|
2020-05-13 13:29:43 -05:00
|
|
|
|
|
|
|
|
/** A dummy metadata value to track when a pass forgot to call
|
|
|
|
|
* nir_metadata_preserve.
|
|
|
|
|
*
|
|
|
|
|
* A pass should always clear this value even if it doesn't make any
|
|
|
|
|
* progress to indicate that it thought about preserving metadata.
|
|
|
|
|
*/
|
2015-11-03 00:31:22 -08:00
|
|
|
nir_metadata_not_properly_reset = 0x8,
|
2020-05-13 13:29:43 -05:00
|
|
|
|
|
|
|
|
/** Indicates that loop analysis information is valid.
|
|
|
|
|
*
|
|
|
|
|
* This includes everything pointed to by nir_loop::info.
|
|
|
|
|
*
|
|
|
|
|
* A pass can preserve this metadata type if it is guaranteed to not affect
|
|
|
|
|
* any loop metadata. However, since loop metadata includes things like
|
|
|
|
|
* loop counts which depend on arithmetic in the loop, this is very hard to
|
|
|
|
|
* determine. Most passes shouldn't preserve this metadata type.
|
|
|
|
|
*/
|
2016-12-13 14:39:51 +11:00
|
|
|
nir_metadata_loop_analysis = 0x10,
|
2020-05-21 20:39:30 -05:00
|
|
|
|
2020-09-29 15:33:39 -07:00
|
|
|
/** Indicates that nir_instr::index values are valid.
|
|
|
|
|
*
|
|
|
|
|
* The start instruction has index 0 and they increase through a natural
|
|
|
|
|
* walk of instructions in blocks in the CFG. The indices my have holes
|
|
|
|
|
* after passes such as DCE.
|
|
|
|
|
*
|
|
|
|
|
* A pass can preserve this metadata type if it never adds or moves any
|
|
|
|
|
* instructions (most passes shouldn't preserve this metadata type), but
|
|
|
|
|
* can preserve it if it only removes instructions.
|
|
|
|
|
*/
|
|
|
|
|
nir_metadata_instr_index = 0x20,
|
|
|
|
|
|
2024-06-16 16:26:19 -04:00
|
|
|
/** All control flow metadata
|
|
|
|
|
*
|
|
|
|
|
* This includes all metadata preserved by a pass that preserves control flow
|
|
|
|
|
* but modifies instructions. For example, a pass using
|
|
|
|
|
* nir_shader_instructions_pass will typically preserve this if it does not
|
|
|
|
|
* insert control flow.
|
|
|
|
|
*
|
|
|
|
|
* This is the most common metadata set to preserve, so it has its own alias.
|
|
|
|
|
*/
|
|
|
|
|
nir_metadata_control_flow = nir_metadata_block_index |
|
|
|
|
|
nir_metadata_dominance,
|
|
|
|
|
|
2020-05-21 20:39:30 -05:00
|
|
|
/** All metadata
|
|
|
|
|
*
|
|
|
|
|
* This includes all nir_metadata flags except not_properly_reset. Passes
|
|
|
|
|
* which do not change the shader in any way should call
|
|
|
|
|
*
|
|
|
|
|
* nir_metadata_preserve(impl, nir_metadata_all);
|
|
|
|
|
*/
|
|
|
|
|
nir_metadata_all = ~nir_metadata_not_properly_reset,
|
2014-10-29 12:42:54 -07:00
|
|
|
} nir_metadata;
|
2020-08-31 18:08:49 +02:00
|
|
|
MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_metadata)
|
2014-10-29 12:42:54 -07:00
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
typedef struct {
|
|
|
|
|
nir_cf_node cf_node;
|
|
|
|
|
|
2015-12-26 10:00:47 -08:00
|
|
|
/** pointer to the function of which this is an implementation */
|
|
|
|
|
struct nir_function *function;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2021-09-24 18:12:24 +02:00
|
|
|
/**
|
|
|
|
|
* For entrypoints, a pointer to a nir_function_impl which runs before
|
|
|
|
|
* it, once per draw or dispatch, communicating via store_preamble and
|
|
|
|
|
* load_preamble intrinsics. If NULL then there is no preamble.
|
|
|
|
|
*/
|
|
|
|
|
struct nir_function *preamble;
|
|
|
|
|
|
2023-08-17 21:02:41 +03:00
|
|
|
/** list of nir_cf_node */
|
|
|
|
|
struct exec_list body;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2015-08-06 18:18:40 -07:00
|
|
|
nir_block *end_block;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
|
|
|
|
/** list for all local variables in the function */
|
|
|
|
|
struct exec_list locals;
|
|
|
|
|
|
|
|
|
|
/** next available SSA value index */
|
|
|
|
|
unsigned ssa_alloc;
|
|
|
|
|
|
|
|
|
|
/* total number of basic blocks, only valid when block_index_dirty = false */
|
|
|
|
|
unsigned num_blocks;
|
|
|
|
|
|
2019-10-23 20:42:40 +02:00
|
|
|
/** True if this nir_function_impl uses structured control-flow
|
|
|
|
|
*
|
|
|
|
|
* Structured nir_function_impls have different validation rules.
|
|
|
|
|
*/
|
|
|
|
|
bool structured;
|
|
|
|
|
|
2014-10-29 12:42:54 -07:00
|
|
|
nir_metadata valid_metadata;
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_function_impl;
|
|
|
|
|
|
2020-05-20 10:18:14 -05:00
|
|
|
#define nir_foreach_function_temp_variable(var, impl) \
|
|
|
|
|
foreach_list_typed(nir_variable, var, node, &(impl)->locals)
|
|
|
|
|
|
|
|
|
|
#define nir_foreach_function_temp_variable_safe(var, impl) \
|
|
|
|
|
foreach_list_typed_safe(nir_variable, var, node, &(impl)->locals)
|
|
|
|
|
|
2016-05-13 13:17:34 -07:00
|
|
|
ATTRIBUTE_RETURNS_NONNULL static inline nir_block *
|
2015-08-06 18:18:40 -07:00
|
|
|
nir_start_block(nir_function_impl *impl)
|
|
|
|
|
{
|
2023-08-08 12:00:35 -05:00
|
|
|
return (nir_block *)impl->body.head_sentinel.next;
|
2015-08-06 18:18:40 -07:00
|
|
|
}
|
|
|
|
|
|
2016-05-13 13:17:34 -07:00
|
|
|
ATTRIBUTE_RETURNS_NONNULL static inline nir_block *
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 02:11:44 -04:00
|
|
|
nir_impl_last_block(nir_function_impl *impl)
|
|
|
|
|
{
|
2023-08-08 12:00:35 -05:00
|
|
|
return (nir_block *)impl->body.tail_sentinel.prev;
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 02:11:44 -04:00
|
|
|
}
|
|
|
|
|
|
2014-12-19 15:30:15 -08:00
|
|
|
static inline nir_cf_node *
|
|
|
|
|
nir_cf_node_next(nir_cf_node *node)
|
|
|
|
|
{
|
2015-02-04 21:22:45 -08:00
|
|
|
struct exec_node *next = exec_node_get_next(&node->node);
|
|
|
|
|
if (exec_node_is_tail_sentinel(next))
|
|
|
|
|
return NULL;
|
|
|
|
|
else
|
|
|
|
|
return exec_node_data(nir_cf_node, next, node);
|
2014-12-19 15:30:15 -08:00
|
|
|
}
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2014-12-19 15:30:15 -08:00
|
|
|
static inline nir_cf_node *
|
|
|
|
|
nir_cf_node_prev(nir_cf_node *node)
|
|
|
|
|
{
|
2015-02-04 21:22:45 -08:00
|
|
|
struct exec_node *prev = exec_node_get_prev(&node->node);
|
|
|
|
|
if (exec_node_is_head_sentinel(prev))
|
|
|
|
|
return NULL;
|
|
|
|
|
else
|
|
|
|
|
return exec_node_data(nir_cf_node, prev, node);
|
2014-12-19 15:30:15 -08:00
|
|
|
}
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2014-12-19 15:30:15 -08:00
|
|
|
static inline bool
|
|
|
|
|
nir_cf_node_is_first(const nir_cf_node *node)
|
|
|
|
|
{
|
|
|
|
|
return exec_node_is_head_sentinel(node->node.prev);
|
|
|
|
|
}
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2014-12-19 15:30:15 -08:00
|
|
|
static inline bool
|
|
|
|
|
nir_cf_node_is_last(const nir_cf_node *node)
|
|
|
|
|
{
|
|
|
|
|
return exec_node_is_tail_sentinel(node->node.next);
|
|
|
|
|
}
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2016-10-05 18:09:25 -07:00
|
|
|
NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node,
|
|
|
|
|
type, nir_cf_node_block)
|
|
|
|
|
NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node,
|
|
|
|
|
type, nir_cf_node_if)
|
|
|
|
|
NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node,
|
|
|
|
|
type, nir_cf_node_loop)
|
|
|
|
|
NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node,
|
|
|
|
|
nir_function_impl, cf_node, type, nir_cf_node_function)
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2016-10-05 19:08:57 -07:00
|
|
|
static inline nir_block *
|
|
|
|
|
nir_if_first_then_block(nir_if *if_stmt)
|
|
|
|
|
{
|
|
|
|
|
struct exec_node *head = exec_list_get_head(&if_stmt->then_list);
|
|
|
|
|
return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_block *
|
|
|
|
|
nir_if_last_then_block(nir_if *if_stmt)
|
|
|
|
|
{
|
|
|
|
|
struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list);
|
|
|
|
|
return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_block *
|
|
|
|
|
nir_if_first_else_block(nir_if *if_stmt)
|
|
|
|
|
{
|
|
|
|
|
struct exec_node *head = exec_list_get_head(&if_stmt->else_list);
|
|
|
|
|
return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_block *
|
|
|
|
|
nir_if_last_else_block(nir_if *if_stmt)
|
|
|
|
|
{
|
|
|
|
|
struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list);
|
|
|
|
|
return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_block *
|
|
|
|
|
nir_loop_first_block(nir_loop *loop)
|
|
|
|
|
{
|
|
|
|
|
struct exec_node *head = exec_list_get_head(&loop->body);
|
|
|
|
|
return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_block *
|
|
|
|
|
nir_loop_last_block(nir_loop *loop)
|
|
|
|
|
{
|
|
|
|
|
struct exec_node *tail = exec_list_get_tail(&loop->body);
|
|
|
|
|
return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-01 17:34:48 +01:00
|
|
|
static inline bool
|
|
|
|
|
nir_loop_has_continue_construct(const nir_loop *loop)
|
|
|
|
|
{
|
|
|
|
|
return !exec_list_is_empty(&loop->continue_list);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_block *
|
|
|
|
|
nir_loop_first_continue_block(nir_loop *loop)
|
|
|
|
|
{
|
|
|
|
|
assert(nir_loop_has_continue_construct(loop));
|
|
|
|
|
struct exec_node *head = exec_list_get_head(&loop->continue_list);
|
|
|
|
|
return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_block *
|
|
|
|
|
nir_loop_last_continue_block(nir_loop *loop)
|
|
|
|
|
{
|
|
|
|
|
assert(nir_loop_has_continue_construct(loop));
|
|
|
|
|
struct exec_node *tail = exec_list_get_tail(&loop->continue_list);
|
|
|
|
|
return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return the target block of a nir_jump_continue statement
|
|
|
|
|
*/
|
|
|
|
|
static inline nir_block *
|
|
|
|
|
nir_loop_continue_target(nir_loop *loop)
|
|
|
|
|
{
|
|
|
|
|
if (nir_loop_has_continue_construct(loop))
|
|
|
|
|
return nir_loop_first_continue_block(loop);
|
|
|
|
|
else
|
|
|
|
|
return nir_loop_first_block(loop);
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-08 11:22:20 -07:00
|
|
|
/**
|
|
|
|
|
* Return true if this list of cf_nodes contains a single empty block.
|
|
|
|
|
*/
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_cf_list_is_empty_block(struct exec_list *cf_list)
|
|
|
|
|
{
|
|
|
|
|
if (exec_list_is_singular(cf_list)) {
|
|
|
|
|
struct exec_node *head = exec_list_get_head(cf_list);
|
|
|
|
|
nir_block *block =
|
|
|
|
|
nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
|
|
|
|
|
return exec_list_is_empty(&block->instr_list);
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
typedef struct {
|
2018-03-22 16:41:18 -07:00
|
|
|
uint8_t num_components;
|
|
|
|
|
uint8_t bit_size;
|
2024-02-27 11:26:29 +11:00
|
|
|
|
|
|
|
|
/* True if this paramater is actually the function return variable */
|
|
|
|
|
bool is_return;
|
|
|
|
|
|
2024-08-15 16:02:10 +10:00
|
|
|
bool implicit_conversion_prohibited;
|
|
|
|
|
|
2024-08-15 15:02:09 +10:00
|
|
|
nir_variable_mode mode;
|
|
|
|
|
|
2024-02-27 11:26:29 +11:00
|
|
|
/* The type of the function param */
|
|
|
|
|
const struct glsl_type *type;
|
2024-11-17 14:17:13 -04:00
|
|
|
|
|
|
|
|
/* Name if known, null if unknown */
|
|
|
|
|
const char *name;
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_parameter;
|
|
|
|
|
|
2015-12-26 10:00:47 -08:00
|
|
|
typedef struct nir_function {
|
2014-07-31 16:14:51 -07:00
|
|
|
struct exec_node node;
|
|
|
|
|
|
2015-12-26 10:00:47 -08:00
|
|
|
const char *name;
|
|
|
|
|
struct nir_shader *shader;
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
unsigned num_params;
|
|
|
|
|
nir_parameter *params;
|
|
|
|
|
|
2015-12-26 10:00:47 -08:00
|
|
|
/** The implementation of this function.
|
|
|
|
|
*
|
|
|
|
|
* If the function is only declared and not implemented, this is NULL.
|
2023-06-23 11:57:47 +08:00
|
|
|
*
|
|
|
|
|
* Unless setting to NULL or NIR_SERIALIZE_FUNC_HAS_IMPL, set with
|
|
|
|
|
* nir_function_set_impl to maintain IR invariants.
|
2015-12-26 10:00:47 -08:00
|
|
|
*/
|
|
|
|
|
nir_function_impl *impl;
|
2018-09-06 11:12:24 -07:00
|
|
|
|
|
|
|
|
bool is_entrypoint;
|
2023-10-12 12:03:11 -04:00
|
|
|
/* from SPIR-V linkage, only for libraries */
|
|
|
|
|
bool is_exported;
|
2021-09-24 18:12:24 +02:00
|
|
|
bool is_preamble;
|
2020-10-26 14:35:55 +10:00
|
|
|
/* from SPIR-V function control */
|
|
|
|
|
bool should_inline;
|
|
|
|
|
bool dont_inline; /* from SPIR-V */
|
2024-03-25 17:09:23 +11:00
|
|
|
|
2024-11-17 18:15:19 -04:00
|
|
|
/* Static workgroup size, if this is a kernel function in a library of OpenCL
|
|
|
|
|
* kernels. Normally, the size in the shader info is used instead.
|
|
|
|
|
*/
|
|
|
|
|
unsigned workgroup_size[3];
|
|
|
|
|
|
2024-03-25 17:09:23 +11:00
|
|
|
/**
|
|
|
|
|
* Is this function a subroutine type declaration
|
|
|
|
|
* e.g. subroutine void type1(float arg1);
|
|
|
|
|
*/
|
|
|
|
|
bool is_subroutine;
|
|
|
|
|
|
2024-05-22 15:32:02 +10:00
|
|
|
/* Temporary function created to wrap global instructions before they can
|
|
|
|
|
* be inlined into the main function.
|
|
|
|
|
*/
|
|
|
|
|
bool is_tmp_globals_wrapper;
|
|
|
|
|
|
2024-03-25 17:09:23 +11:00
|
|
|
/**
|
|
|
|
|
* Is this function associated to a subroutine type
|
|
|
|
|
* e.g. subroutine (type1, type2) function_name { function_body };
|
|
|
|
|
* would have num_subroutine_types 2,
|
|
|
|
|
* and pointers to the type1 and type2 types.
|
|
|
|
|
*/
|
|
|
|
|
int num_subroutine_types;
|
|
|
|
|
const struct glsl_type **subroutine_types;
|
|
|
|
|
|
|
|
|
|
int subroutine_index;
|
2024-11-17 20:50:34 -04:00
|
|
|
|
|
|
|
|
/* A temporary for passes to use for storing flags. */
|
|
|
|
|
uint32_t pass_flags;
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_function;
|
|
|
|
|
|
2019-02-25 17:13:48 -08:00
|
|
|
typedef enum {
|
|
|
|
|
nir_lower_imul64 = (1 << 0),
|
|
|
|
|
nir_lower_isign64 = (1 << 1),
|
|
|
|
|
/** Lower all int64 modulus and division opcodes */
|
|
|
|
|
nir_lower_divmod64 = (1 << 2),
|
|
|
|
|
/** Lower all 64-bit umul_high and imul_high opcodes */
|
|
|
|
|
nir_lower_imul_high64 = (1 << 3),
|
2023-06-27 19:19:39 +01:00
|
|
|
nir_lower_bcsel64 = (1 << 4),
|
2019-02-25 17:13:48 -08:00
|
|
|
nir_lower_icmp64 = (1 << 5),
|
|
|
|
|
nir_lower_iadd64 = (1 << 6),
|
|
|
|
|
nir_lower_iabs64 = (1 << 7),
|
|
|
|
|
nir_lower_ineg64 = (1 << 8),
|
|
|
|
|
nir_lower_logic64 = (1 << 9),
|
|
|
|
|
nir_lower_minmax64 = (1 << 10),
|
|
|
|
|
nir_lower_shift64 = (1 << 11),
|
2019-02-14 23:08:39 -08:00
|
|
|
nir_lower_imul_2x32_64 = (1 << 12),
|
2019-07-15 10:31:49 -05:00
|
|
|
nir_lower_extract64 = (1 << 13),
|
2019-11-20 09:23:14 +10:00
|
|
|
nir_lower_ufind_msb64 = (1 << 14),
|
2020-06-23 05:47:20 -07:00
|
|
|
nir_lower_bit_count64 = (1 << 15),
|
2020-10-26 10:50:35 -05:00
|
|
|
nir_lower_subgroup_shuffle64 = (1 << 16),
|
|
|
|
|
nir_lower_scan_reduce_bitwise64 = (1 << 17),
|
2020-10-26 12:41:08 -05:00
|
|
|
nir_lower_scan_reduce_iadd64 = (1 << 18),
|
2020-10-26 23:08:26 -05:00
|
|
|
nir_lower_vote_ieq64 = (1 << 19),
|
2022-01-17 18:11:27 +01:00
|
|
|
nir_lower_usub_sat64 = (1 << 20),
|
2021-11-20 14:23:20 +01:00
|
|
|
nir_lower_iadd_sat64 = (1 << 21),
|
2023-05-19 03:53:07 -07:00
|
|
|
nir_lower_find_lsb64 = (1 << 22),
|
2023-06-27 19:19:39 +01:00
|
|
|
nir_lower_conv64 = (1 << 23),
|
2023-01-04 12:51:39 -08:00
|
|
|
nir_lower_uadd_sat64 = (1 << 24),
|
2023-08-14 12:58:51 -07:00
|
|
|
nir_lower_iadd3_64 = (1 << 25),
|
2019-02-25 17:13:48 -08:00
|
|
|
} nir_lower_int64_options;
|
|
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
|
nir_lower_drcp = (1 << 0),
|
|
|
|
|
nir_lower_dsqrt = (1 << 1),
|
|
|
|
|
nir_lower_drsq = (1 << 2),
|
|
|
|
|
nir_lower_dtrunc = (1 << 3),
|
|
|
|
|
nir_lower_dfloor = (1 << 4),
|
|
|
|
|
nir_lower_dceil = (1 << 5),
|
|
|
|
|
nir_lower_dfract = (1 << 6),
|
|
|
|
|
nir_lower_dround_even = (1 << 7),
|
|
|
|
|
nir_lower_dmod = (1 << 8),
|
2019-07-11 17:06:31 -05:00
|
|
|
nir_lower_dsub = (1 << 9),
|
|
|
|
|
nir_lower_ddiv = (1 << 10),
|
2023-10-18 09:49:32 +02:00
|
|
|
nir_lower_dsign = (1 << 11),
|
2023-12-08 11:38:37 -06:00
|
|
|
nir_lower_dminmax = (1 << 12),
|
|
|
|
|
nir_lower_dsat = (1 << 13),
|
|
|
|
|
nir_lower_fp64_full_software = (1 << 14),
|
2019-02-25 17:13:48 -08:00
|
|
|
} nir_lower_doubles_options;
|
|
|
|
|
|
2019-05-20 14:58:23 +02:00
|
|
|
typedef enum {
|
|
|
|
|
nir_divergence_single_prim_per_subgroup = (1 << 0),
|
|
|
|
|
nir_divergence_single_patch_per_tcs_subgroup = (1 << 1),
|
|
|
|
|
nir_divergence_single_patch_per_tes_subgroup = (1 << 2),
|
|
|
|
|
nir_divergence_view_index_uniform = (1 << 3),
|
2020-10-20 10:41:00 +03:00
|
|
|
nir_divergence_single_frag_shading_rate_per_subgroup = (1 << 4),
|
2021-04-29 11:10:32 +02:00
|
|
|
nir_divergence_multiple_workgroup_per_compute_subgroup = (1 << 5),
|
2022-12-22 01:32:44 +02:00
|
|
|
nir_divergence_shader_record_ptr_uniform = (1 << 6),
|
2023-03-14 19:08:13 +01:00
|
|
|
nir_divergence_uniform_load_tears = (1 << 7),
|
2024-04-09 18:14:12 +01:00
|
|
|
/* If used, this allows phis for divergent merges with undef and a uniform source to be considered uniform */
|
|
|
|
|
nir_divergence_ignore_undef_if_phi_srcs = (1 << 8),
|
2019-05-20 14:58:23 +02:00
|
|
|
} nir_divergence_options;
|
|
|
|
|
|
2021-08-25 14:07:50 +08:00
|
|
|
typedef enum {
|
2023-12-30 16:01:50 -05:00
|
|
|
/**
|
|
|
|
|
* Whether a fragment shader can interpolate the same input multiple times
|
|
|
|
|
* with different modes (smooth, noperspective) and locations (pixel,
|
|
|
|
|
* centroid, sample, at_offset, at_sample), excluding the flat mode.
|
|
|
|
|
*
|
|
|
|
|
* This matches AMD GPU flexibility and limitations and is a superset of
|
|
|
|
|
* the GL4 requirement that each input can be interpolated at its specified
|
|
|
|
|
* location, and then also as centroid, at_offset, and at_sample.
|
|
|
|
|
*/
|
|
|
|
|
nir_io_has_flexible_input_interpolation_except_flat = BITFIELD_BIT(0),
|
2023-12-30 16:27:07 -05:00
|
|
|
|
nir: add nir_opt_varyings, new pass optimizing and compacting varyings
Highlights:
- all shader stages and all input/output types are handled, including
inputs and outputs with multiple vertices
- the optimizations performed are: unused input/output removal, constant
and uniform propagation, output deduplication, inter-shader code motion,
and compaction
- constant and uniform propagation and output deduplication work even
if a shader contains multiple stores of the same output, e.g. in GS
- the same optimizations are also performed between output stores and
output loads (for TCS)
- FS inputs are packed agressively. Only flat, interp FP32, and interp
FP16 can't be in the same vec4. Also, if an output value is
non-divergent within a primitive, the corresponding FS input is
opportunistically promoted to flat.
The big comment at the beginning of nir_opt_varyings.c has a detailed
explanation, which is the same as:
https://gitlab.freedesktop.org/mesa/mesa/-/issues/8841
dEQP and GLCTS have incorrect tests that fail with this, see:
https://gitlab.freedesktop.org/mesa/mesa/-/issues/10361
Acked-by: Timothy Arceri <tarceri@itsqueeze.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26819>
2023-04-06 04:21:00 -04:00
|
|
|
/**
|
|
|
|
|
* nir_opt_varyings compacts (relocates) components of varyings by
|
|
|
|
|
* rewriting their locations completely, effectively moving components of
|
|
|
|
|
* varyings between slots. This option forces nir_opt_varyings to make
|
|
|
|
|
* VARYING_SLOT_POS unused by moving its contents to VARn if the consumer
|
|
|
|
|
* is not FS. If this option is not set and POS is unused, it moves
|
|
|
|
|
* components of VARn to POS until it's fully used.
|
|
|
|
|
*/
|
|
|
|
|
nir_io_dont_use_pos_for_non_fs_varyings = BITFIELD_BIT(1),
|
|
|
|
|
|
2024-03-27 11:53:26 -04:00
|
|
|
nir_io_16bit_input_output_support = BITFIELD_BIT(2),
|
|
|
|
|
|
2024-05-28 16:14:46 +02:00
|
|
|
/**
|
|
|
|
|
* Implement mediump inputs and outputs as normal 32-bit IO.
|
|
|
|
|
* Causes the mediump flag to be not set for IO semantics, essentially
|
|
|
|
|
* destroying any mediump-related IO information in the shader.
|
|
|
|
|
*/
|
|
|
|
|
nir_io_mediump_is_32bit = BITFIELD_BIT(3),
|
|
|
|
|
|
2024-06-14 18:28:52 -04:00
|
|
|
/**
|
|
|
|
|
* Whether nir_opt_vectorize_io should ignore FS inputs.
|
|
|
|
|
*/
|
|
|
|
|
nir_io_prefer_scalar_fs_inputs = BITFIELD_BIT(4),
|
|
|
|
|
|
2024-06-25 05:52:32 -04:00
|
|
|
/**
|
|
|
|
|
* Whether interpolated fragment shader vec4 slots can use load_input for
|
|
|
|
|
* a subset of its components to skip interpolation for those components.
|
|
|
|
|
* The result of such load_input is a value from a random (not necessarily
|
|
|
|
|
* provoking) vertex. If a value from the provoking vertex is required,
|
|
|
|
|
* the vec4 slot should have no load_interpolated_input instructions.
|
|
|
|
|
*
|
|
|
|
|
* This exposes the AMD capability that allows packing flat inputs with
|
|
|
|
|
* interpolated inputs in a limited number of cases. Normally, flat
|
|
|
|
|
* components must be in a separate vec4 slot to get the value from
|
|
|
|
|
* the provoking vertex. If the compiler can prove that all per-vertex
|
|
|
|
|
* values are equal (convergent, i.e. the provoking vertex doesn't matter),
|
|
|
|
|
* it can put such flat components into any interpolated vec4 slot.
|
|
|
|
|
*
|
|
|
|
|
* It should also be set if the hw can mix flat and interpolated components
|
|
|
|
|
* in the same vec4 slot.
|
|
|
|
|
*
|
|
|
|
|
* This causes nir_opt_varyings to skip interpolation for all varyings
|
|
|
|
|
* that are convergent, and enables better compaction and inter-shader code
|
|
|
|
|
* motion for convergent varyings.
|
|
|
|
|
*/
|
|
|
|
|
nir_io_mix_convergent_flat_with_interpolated = BITFIELD_BIT(5),
|
|
|
|
|
|
2024-07-07 07:50:38 -04:00
|
|
|
/**
|
|
|
|
|
* Whether src_type and dest_type of IO intrinsics are irrelevant and
|
|
|
|
|
* should be ignored by nir_opt_vectorize_io. All drivers that always treat
|
|
|
|
|
* load_input and store_output as untyped and load_interpolated_input as
|
|
|
|
|
* float##bit_size should set this.
|
|
|
|
|
*/
|
|
|
|
|
nir_io_vectorizer_ignores_types = BITFIELD_BIT(6),
|
|
|
|
|
|
2024-11-01 23:06:03 -04:00
|
|
|
/**
|
|
|
|
|
* Whether nir_opt_varyings should never promote convergent FS inputs
|
|
|
|
|
* to flat.
|
|
|
|
|
*/
|
|
|
|
|
nir_io_always_interpolate_convergent_fs_inputs = BITFIELD_BIT(7),
|
|
|
|
|
|
2024-11-15 12:39:32 -05:00
|
|
|
/**
|
|
|
|
|
* Whether the first assigned color channel component should be equal to
|
|
|
|
|
* the first unused VARn component.
|
|
|
|
|
*
|
|
|
|
|
* For example, if the first unused VARn channel is VAR0.z, color channels
|
|
|
|
|
* are assigned in this order:
|
|
|
|
|
* COL0.z, COL0.w, COL0.x, COL0.y, COL1.z, COL1.w, COL1.x, COL1.y
|
|
|
|
|
*
|
|
|
|
|
* This allows certain drivers to merge outputs if each output sets
|
|
|
|
|
* different components, for example 2 outputs writing VAR0.xy and COL0.z
|
|
|
|
|
* will only use 1 HW output.
|
|
|
|
|
*/
|
|
|
|
|
nir_io_compaction_rotates_color_channels = BITFIELD_BIT(8),
|
|
|
|
|
|
2024-10-27 16:26:28 -04:00
|
|
|
/* Options affecting the GLSL compiler or Gallium are below. */
|
2023-12-30 16:27:07 -05:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Lower load_deref/store_deref to load_input/store_output/etc. intrinsics.
|
2024-10-27 16:26:28 -04:00
|
|
|
* This is only affects GLSL compilation and Gallium.
|
2023-12-30 16:27:07 -05:00
|
|
|
*/
|
2024-10-27 16:26:28 -04:00
|
|
|
nir_io_has_intrinsics = BITFIELD_BIT(16),
|
nir: add nir_opt_varyings, new pass optimizing and compacting varyings
Highlights:
- all shader stages and all input/output types are handled, including
inputs and outputs with multiple vertices
- the optimizations performed are: unused input/output removal, constant
and uniform propagation, output deduplication, inter-shader code motion,
and compaction
- constant and uniform propagation and output deduplication work even
if a shader contains multiple stores of the same output, e.g. in GS
- the same optimizations are also performed between output stores and
output loads (for TCS)
- FS inputs are packed agressively. Only flat, interp FP32, and interp
FP16 can't be in the same vec4. Also, if an output value is
non-divergent within a primitive, the corresponding FS input is
opportunistically promoted to flat.
The big comment at the beginning of nir_opt_varyings.c has a detailed
explanation, which is the same as:
https://gitlab.freedesktop.org/mesa/mesa/-/issues/8841
dEQP and GLCTS have incorrect tests that fail with this, see:
https://gitlab.freedesktop.org/mesa/mesa/-/issues/10361
Acked-by: Timothy Arceri <tarceri@itsqueeze.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26819>
2023-04-06 04:21:00 -04:00
|
|
|
|
|
|
|
|
/**
|
2024-10-27 14:46:26 -04:00
|
|
|
* Don't run nir_opt_varyings and nir_opt_vectorize_io.
|
|
|
|
|
*
|
|
|
|
|
* This option is deprecated and is a hack. DO NOT USE.
|
|
|
|
|
* Use MESA_GLSL_DISABLE_IO_OPT=1 instead.
|
nir: add nir_opt_varyings, new pass optimizing and compacting varyings
Highlights:
- all shader stages and all input/output types are handled, including
inputs and outputs with multiple vertices
- the optimizations performed are: unused input/output removal, constant
and uniform propagation, output deduplication, inter-shader code motion,
and compaction
- constant and uniform propagation and output deduplication work even
if a shader contains multiple stores of the same output, e.g. in GS
- the same optimizations are also performed between output stores and
output loads (for TCS)
- FS inputs are packed agressively. Only flat, interp FP32, and interp
FP16 can't be in the same vec4. Also, if an output value is
non-divergent within a primitive, the corresponding FS input is
opportunistically promoted to flat.
The big comment at the beginning of nir_opt_varyings.c has a detailed
explanation, which is the same as:
https://gitlab.freedesktop.org/mesa/mesa/-/issues/8841
dEQP and GLCTS have incorrect tests that fail with this, see:
https://gitlab.freedesktop.org/mesa/mesa/-/issues/10361
Acked-by: Timothy Arceri <tarceri@itsqueeze.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26819>
2023-04-06 04:21:00 -04:00
|
|
|
*/
|
2024-10-27 14:46:26 -04:00
|
|
|
nir_io_dont_optimize = BITFIELD_BIT(17),
|
2024-11-13 15:33:43 -05:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Whether clip and cull distance arrays should be separate. If this is not
|
|
|
|
|
* set, cull distances will be moved into VARYING_SLOT_CLIP_DISTn after clip
|
|
|
|
|
* distances, and shader_info::clip_distance_array_size will be the index
|
|
|
|
|
* of the first cull distance. nir_lower_clip_cull_distance_arrays does
|
|
|
|
|
* that.
|
|
|
|
|
*/
|
|
|
|
|
nir_io_separate_clip_cull_distance_arrays = BITFIELD_BIT(18),
|
2023-12-30 16:01:50 -05:00
|
|
|
} nir_io_options;
|
2021-08-25 14:07:50 +08:00
|
|
|
|
2024-08-28 11:35:07 +08:00
|
|
|
typedef enum {
|
|
|
|
|
nir_lower_packing_op_pack_64_2x32,
|
|
|
|
|
nir_lower_packing_op_unpack_64_2x32,
|
|
|
|
|
nir_lower_packing_op_pack_64_4x16,
|
|
|
|
|
nir_lower_packing_op_unpack_64_4x16,
|
|
|
|
|
nir_lower_packing_op_pack_32_2x16,
|
|
|
|
|
nir_lower_packing_op_unpack_32_2x16,
|
|
|
|
|
nir_lower_packing_op_pack_32_4x8,
|
|
|
|
|
nir_lower_packing_op_unpack_32_4x8,
|
|
|
|
|
nir_lower_packing_num_ops,
|
|
|
|
|
} nir_lower_packing_op;
|
|
|
|
|
|
2021-03-31 10:54:47 +02:00
|
|
|
/** An instruction filtering callback
|
|
|
|
|
*
|
|
|
|
|
* Returns true if the instruction should be processed and false otherwise.
|
|
|
|
|
*/
|
|
|
|
|
typedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *);
|
|
|
|
|
|
2020-12-18 19:05:47 +01:00
|
|
|
/** A vectorization width callback
|
|
|
|
|
*
|
|
|
|
|
* Returns the maximum vectorization width per instruction.
|
|
|
|
|
* 0, if the instruction must not be modified.
|
|
|
|
|
*
|
|
|
|
|
* The vectorization width must be a power of 2.
|
|
|
|
|
*/
|
|
|
|
|
typedef uint8_t (*nir_vectorize_cb)(const nir_instr *, const void *);
|
|
|
|
|
|
2015-02-02 16:13:49 -08:00
|
|
|
typedef struct nir_shader_compiler_options {
|
nir: Add a lower_fdiv option, turn fdiv into fmul/frcp.
The nir_opt_algebraic rule
(('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
can produce new fdiv operations, which need to be lowered on i965,
as we don't actually implement fdiv. (Normally, we handle this in
GLSL IR's lower_instructions pass, but in the above case we introduce
an fdiv after that point. So, make NIR do it for us.)
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Cc: mesa-stable@lists.freedesktop.org
2016-01-05 05:09:46 -08:00
|
|
|
bool lower_fdiv;
|
2020-09-24 08:46:31 -07:00
|
|
|
bool lower_ffma16;
|
|
|
|
|
bool lower_ffma32;
|
|
|
|
|
bool lower_ffma64;
|
|
|
|
|
bool fuse_ffma16;
|
|
|
|
|
bool fuse_ffma32;
|
|
|
|
|
bool fuse_ffma64;
|
2018-04-18 11:02:51 +02:00
|
|
|
bool lower_flrp16;
|
2016-04-28 07:13:10 +02:00
|
|
|
bool lower_flrp32;
|
2016-04-26 09:35:30 +02:00
|
|
|
/** Lowers flrp when it does not support doubles */
|
|
|
|
|
bool lower_flrp64;
|
2015-01-27 16:22:54 -08:00
|
|
|
bool lower_fpow;
|
2015-01-30 13:53:39 -08:00
|
|
|
bool lower_fsat;
|
2015-01-28 10:39:29 -08:00
|
|
|
bool lower_fsqrt;
|
2019-05-08 10:26:49 -04:00
|
|
|
bool lower_sincos;
|
2019-06-03 13:18:55 -07:00
|
|
|
bool lower_fmod;
|
2023-08-14 19:21:52 +02:00
|
|
|
/** Lowers ibitfield_extract/ubitfield_extract. */
|
2016-01-13 11:09:11 -08:00
|
|
|
bool lower_bitfield_extract;
|
2023-08-14 19:11:51 +02:00
|
|
|
/** Lowers bitfield_insert. */
|
2016-01-06 15:30:38 -08:00
|
|
|
bool lower_bitfield_insert;
|
2018-05-08 12:47:48 -07:00
|
|
|
/** Lowers bitfield_reverse to shifts. */
|
|
|
|
|
bool lower_bitfield_reverse;
|
2018-05-08 13:04:37 -07:00
|
|
|
/** Lowers bit_count to shifts. */
|
|
|
|
|
bool lower_bit_count;
|
2023-08-14 19:34:08 +02:00
|
|
|
/** Lowers ifind_msb. */
|
2018-05-04 13:33:47 -07:00
|
|
|
bool lower_ifind_msb;
|
2023-08-14 19:34:08 +02:00
|
|
|
/** Lowers ufind_msb. */
|
|
|
|
|
bool lower_ufind_msb;
|
2018-05-04 14:02:55 -07:00
|
|
|
/** Lowers find_lsb to ufind_msb and logic ops */
|
|
|
|
|
bool lower_find_lsb;
|
2016-01-06 15:30:38 -08:00
|
|
|
bool lower_uadd_carry;
|
|
|
|
|
bool lower_usub_borrow;
|
2018-05-08 11:24:40 -07:00
|
|
|
/** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */
|
|
|
|
|
bool lower_mul_high;
|
2020-08-27 14:35:04 +01:00
|
|
|
/** lowers fneg to fmul(x, -1.0). Driver must call nir_opt_algebraic_late() */
|
|
|
|
|
bool lower_fneg;
|
|
|
|
|
/** lowers ineg to isub. Driver must call nir_opt_algebraic_late(). */
|
|
|
|
|
bool lower_ineg;
|
2021-08-06 07:19:27 +10:00
|
|
|
/** lowers fisnormal to alu ops. */
|
|
|
|
|
bool lower_fisnormal;
|
2015-03-06 01:17:22 -08:00
|
|
|
|
2020-08-18 19:51:57 +02:00
|
|
|
/* lower {slt,sge,seq,sne} to {flt,fge,feq,fneu} + b2f: */
|
2015-03-31 11:25:19 -04:00
|
|
|
bool lower_scmp;
|
|
|
|
|
|
2020-01-03 16:33:54 -08:00
|
|
|
/* lower b/fall_equalN/b/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */
|
2019-06-02 18:44:49 -04:00
|
|
|
bool lower_vector_cmp;
|
|
|
|
|
|
2019-07-18 20:56:27 +02:00
|
|
|
/** enable rules to avoid bit ops */
|
|
|
|
|
bool lower_bitops;
|
2019-05-31 13:54:12 -04:00
|
|
|
|
2019-02-06 13:12:25 -08:00
|
|
|
/** enables rules to lower isign to imin+imax */
|
|
|
|
|
bool lower_isign;
|
|
|
|
|
|
2019-04-16 22:49:41 +02:00
|
|
|
/** enables rules to lower fsign to fsub and flt */
|
|
|
|
|
bool lower_fsign;
|
|
|
|
|
|
2020-01-10 22:59:54 +01:00
|
|
|
/** enables rules to lower iabs to ineg+imax */
|
|
|
|
|
bool lower_iabs;
|
|
|
|
|
|
2020-10-09 18:33:26 +02:00
|
|
|
/** enable rules that avoid generating umax from signed integer ops */
|
|
|
|
|
bool lower_umax;
|
|
|
|
|
|
|
|
|
|
/** enable rules that avoid generating umin from signed integer ops */
|
|
|
|
|
bool lower_umin;
|
|
|
|
|
|
2024-07-08 14:21:22 -04:00
|
|
|
/* lower fmin/fmax with signed zero preserve to fmin/fmax with
|
|
|
|
|
* no_signed_zero, for backends whose fmin/fmax implementations do not
|
|
|
|
|
* implement IEEE-754-2019 semantics for signed zero.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_fminmax_signed_zero;
|
|
|
|
|
|
2019-06-20 21:47:16 -04:00
|
|
|
/* lower fdph to fdot4 */
|
|
|
|
|
bool lower_fdph;
|
|
|
|
|
|
2019-07-27 17:58:53 +02:00
|
|
|
/** lower fdot to fmul and fsum/fadd. */
|
|
|
|
|
bool lower_fdot;
|
|
|
|
|
|
2015-09-10 10:51:46 -07:00
|
|
|
/* Does the native fdot instruction replicate its result for four
|
|
|
|
|
* components? If so, then opt_algebraic_late will turn all fdotN
|
2020-06-20 14:33:57 -07:00
|
|
|
* instructions into fdotN_replicated instructions.
|
2015-09-10 10:51:46 -07:00
|
|
|
*/
|
|
|
|
|
bool fdot_replicates;
|
|
|
|
|
|
2018-09-01 21:15:27 +02:00
|
|
|
/** lowers ffloor to fsub+ffract: */
|
|
|
|
|
bool lower_ffloor;
|
|
|
|
|
|
2015-09-14 11:13:19 -04:00
|
|
|
/** lowers ffract to fsub+ffloor: */
|
|
|
|
|
bool lower_ffract;
|
|
|
|
|
|
2018-11-12 12:49:32 -05:00
|
|
|
/** lowers fceil to fneg+ffloor+fneg: */
|
|
|
|
|
bool lower_fceil;
|
|
|
|
|
|
2019-04-12 10:12:27 +02:00
|
|
|
bool lower_ftrunc;
|
|
|
|
|
|
2022-04-11 16:28:05 -07:00
|
|
|
/** Lowers fround_even to ffract+feq+csel.
|
|
|
|
|
*
|
|
|
|
|
* Not correct in that it doesn't correctly handle the "_even" part of the
|
|
|
|
|
* rounding, but good enough for DX9 array indexing handling on DX9-class
|
|
|
|
|
* hardware.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_fround_even;
|
|
|
|
|
|
2018-02-27 19:19:21 +11:00
|
|
|
bool lower_ldexp;
|
|
|
|
|
|
2016-01-21 15:46:47 -08:00
|
|
|
bool lower_pack_half_2x16;
|
2016-01-25 11:05:52 -08:00
|
|
|
bool lower_pack_unorm_2x16;
|
|
|
|
|
bool lower_pack_snorm_2x16;
|
|
|
|
|
bool lower_pack_unorm_4x8;
|
|
|
|
|
bool lower_pack_snorm_4x8;
|
2020-09-21 14:01:24 +01:00
|
|
|
bool lower_pack_64_2x32;
|
|
|
|
|
bool lower_pack_64_4x16;
|
|
|
|
|
bool lower_pack_32_2x16;
|
2020-04-21 04:41:41 -07:00
|
|
|
bool lower_pack_64_2x32_split;
|
|
|
|
|
bool lower_pack_32_2x16_split;
|
2016-01-21 15:46:47 -08:00
|
|
|
bool lower_unpack_half_2x16;
|
2016-01-25 11:07:02 -08:00
|
|
|
bool lower_unpack_unorm_2x16;
|
|
|
|
|
bool lower_unpack_snorm_2x16;
|
|
|
|
|
bool lower_unpack_unorm_4x8;
|
|
|
|
|
bool lower_unpack_snorm_4x8;
|
2020-04-21 04:41:41 -07:00
|
|
|
bool lower_unpack_64_2x32_split;
|
|
|
|
|
bool lower_unpack_32_2x16_split;
|
2016-01-21 15:46:47 -08:00
|
|
|
|
2020-04-24 14:27:33 -04:00
|
|
|
bool lower_pack_split;
|
|
|
|
|
|
2016-01-21 09:09:29 -08:00
|
|
|
bool lower_extract_byte;
|
|
|
|
|
bool lower_extract_word;
|
2020-03-25 15:38:06 +00:00
|
|
|
bool lower_insert_byte;
|
|
|
|
|
bool lower_insert_word;
|
2016-01-21 09:09:29 -08:00
|
|
|
|
2018-01-30 10:55:19 +11:00
|
|
|
bool lower_all_io_to_temps;
|
2019-03-28 10:57:31 -04:00
|
|
|
bool lower_all_io_to_elements;
|
2018-01-30 10:55:19 +11:00
|
|
|
|
2016-03-25 10:54:27 -07:00
|
|
|
/* Indicates that the driver only has zero-based vertex id */
|
|
|
|
|
bool vertex_id_zero_based;
|
2016-05-22 15:54:48 -07:00
|
|
|
|
2018-04-28 14:09:21 +02:00
|
|
|
/**
|
|
|
|
|
* If enabled, gl_BaseVertex will be lowered as:
|
|
|
|
|
* is_indexed_draw (~0/0) & firstvertex
|
|
|
|
|
*/
|
|
|
|
|
bool lower_base_vertex;
|
|
|
|
|
|
2018-06-01 14:07:15 -04:00
|
|
|
/**
|
|
|
|
|
* If enabled, gl_HelperInvocation will be lowered as:
|
|
|
|
|
*
|
|
|
|
|
* !((1 << sample_id) & sample_mask_in))
|
|
|
|
|
*
|
|
|
|
|
* This depends on some possibly hw implementation details, which may
|
|
|
|
|
* not be true for all hw. In particular that the FS is only executed
|
|
|
|
|
* for covered samples or for helper invocations. So, do not blindly
|
|
|
|
|
* enable this option.
|
|
|
|
|
*
|
|
|
|
|
* Note: See also issue #22 in ARB_shader_image_load_store
|
|
|
|
|
*/
|
|
|
|
|
bool lower_helper_invocation;
|
|
|
|
|
|
2019-04-09 21:40:33 -04:00
|
|
|
/**
|
|
|
|
|
* Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
|
|
|
|
|
*
|
|
|
|
|
* gl_SampleMaskIn == 0 ---> gl_HelperInvocation
|
|
|
|
|
* gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
|
|
|
|
|
*/
|
|
|
|
|
bool optimize_sample_mask_in;
|
|
|
|
|
|
2023-06-13 14:07:53 +01:00
|
|
|
/**
|
|
|
|
|
* Optimize boolean reductions of quad broadcasts. This should only be enabled if
|
|
|
|
|
* nir_intrinsic_reduce supports INCLUDE_HELPERS.
|
|
|
|
|
*/
|
|
|
|
|
bool optimize_quad_vote_to_reduce;
|
|
|
|
|
|
2022-02-24 10:14:08 +01:00
|
|
|
bool lower_cs_local_index_to_id;
|
|
|
|
|
bool lower_cs_local_id_to_index;
|
2016-07-12 01:46:53 -07:00
|
|
|
|
2021-06-04 12:04:15 -07:00
|
|
|
/* Prevents lowering global_invocation_id to be in terms of workgroup_id */
|
2020-01-13 10:35:40 +01:00
|
|
|
bool has_cs_global_id;
|
|
|
|
|
|
2017-09-21 15:51:55 -07:00
|
|
|
bool lower_device_index_to_zero;
|
|
|
|
|
|
2020-12-22 14:37:45 +02:00
|
|
|
/* Set if nir_lower_pntc_ytransform() should invert gl_PointCoord.
|
|
|
|
|
* Either when frame buffer is flipped or GL_POINT_SPRITE_COORD_ORIGIN
|
|
|
|
|
* is GL_LOWER_LEFT.
|
|
|
|
|
*/
|
2018-07-06 13:43:06 -07:00
|
|
|
bool lower_wpos_pntc;
|
|
|
|
|
|
2018-09-19 01:17:09 -07:00
|
|
|
/**
|
|
|
|
|
* Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be
|
|
|
|
|
* lowered to simple arithmetic.
|
|
|
|
|
*
|
|
|
|
|
* If this flag is set, the lowering will be applied to all bit-sizes of
|
|
|
|
|
* these instructions.
|
|
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* :c:member:`lower_hadd64`
|
2018-09-19 01:17:09 -07:00
|
|
|
*/
|
2018-07-12 15:02:27 +02:00
|
|
|
bool lower_hadd;
|
2018-09-19 01:17:09 -07:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions
|
|
|
|
|
* should be lowered to simple arithmetic.
|
|
|
|
|
*
|
|
|
|
|
* If this flag is set, the lowering will be applied to only 64-bit
|
|
|
|
|
* versions of these instructions.
|
|
|
|
|
*
|
2023-08-20 20:42:34 +03:00
|
|
|
* :c:member:`lower_hadd`
|
2018-09-19 01:17:09 -07:00
|
|
|
*/
|
|
|
|
|
bool lower_hadd64;
|
|
|
|
|
|
2018-09-19 01:17:31 -07:00
|
|
|
/**
|
2022-07-19 12:34:03 -07:00
|
|
|
* Set if nir_op_uadd_sat should be lowered to simple arithmetic.
|
2018-09-19 01:17:31 -07:00
|
|
|
*
|
|
|
|
|
* If this flag is set, the lowering will be applied to all bit-sizes of
|
|
|
|
|
* these instructions.
|
|
|
|
|
*/
|
2021-08-30 11:35:36 +01:00
|
|
|
bool lower_uadd_sat;
|
2018-07-12 15:02:27 +02:00
|
|
|
|
2022-07-19 12:34:03 -07:00
|
|
|
/**
|
|
|
|
|
* Set if nir_op_usub_sat should be lowered to simple arithmetic.
|
|
|
|
|
*
|
|
|
|
|
* If this flag is set, the lowering will be applied to all bit-sizes of
|
|
|
|
|
* these instructions.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_usub_sat;
|
|
|
|
|
|
2021-08-30 11:35:36 +01:00
|
|
|
/**
|
|
|
|
|
* Set if nir_op_iadd_sat and nir_op_isub_sat should be lowered to simple
|
|
|
|
|
* arithmetic.
|
|
|
|
|
*
|
|
|
|
|
* If this flag is set, the lowering will be applied to all bit-sizes of
|
|
|
|
|
* these instructions.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_iadd_sat;
|
|
|
|
|
|
2021-11-19 22:38:30 +01:00
|
|
|
/**
|
|
|
|
|
* Set if imul_32x16 and umul_32x16 should be lowered to simple
|
|
|
|
|
* arithmetic.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_mul_32x16;
|
|
|
|
|
|
st/nir: Re-vectorize shader IO
We scalarize IO to enable further optimizations, such as propagating
constant components across shaders, eliminating dead components, and
so on. This patch attempts to re-vectorize those operations after
the varying optimizations are done.
Intel GPUs are a scalar architecture, but IO operations work on whole
vec4's at a time, so we'd prefer to have a single IO load per vector
rather than 4 scalar IO loads. This re-vectorization can help a lot.
Broadcom GPUs, however, really do want scalar IO. radeonsi may want
this, or may want to leave it to LLVM. So, we make a new flag in the
NIR compiler options struct, and key it off of that, allowing drivers
to pick. (It's a bit awkward because we have per-stage settings, but
this is about IO between two stages...but I expect drivers to globally
prefer one way or the other. We can adjust later if needed.)
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
2019-04-11 12:28:48 -07:00
|
|
|
/**
|
|
|
|
|
* Should IO be re-vectorized? Some scalar ISAs still operate on vec4's
|
|
|
|
|
* for IO purposes and would prefer loads/stores be vectorized.
|
|
|
|
|
*/
|
|
|
|
|
bool vectorize_io;
|
2022-08-03 20:37:20 -07:00
|
|
|
bool vectorize_tess_levels;
|
2019-10-07 22:46:00 -04:00
|
|
|
bool lower_to_scalar;
|
2021-03-31 10:54:47 +02:00
|
|
|
nir_instr_filter_cb lower_to_scalar_filter;
|
st/nir: Re-vectorize shader IO
We scalarize IO to enable further optimizations, such as propagating
constant components across shaders, eliminating dead components, and
so on. This patch attempts to re-vectorize those operations after
the varying optimizations are done.
Intel GPUs are a scalar architecture, but IO operations work on whole
vec4's at a time, so we'd prefer to have a single IO load per vector
rather than 4 scalar IO loads. This re-vectorization can help a lot.
Broadcom GPUs, however, really do want scalar IO. radeonsi may want
this, or may want to leave it to LLVM. So, we make a new flag in the
NIR compiler options struct, and key it off of that, allowing drivers
to pick. (It's a bit awkward because we have per-stage settings, but
this is about IO between two stages...but I expect drivers to globally
prefer one way or the other. We can adjust later if needed.)
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
2019-04-11 12:28:48 -07:00
|
|
|
|
2020-05-03 20:10:57 -04:00
|
|
|
/**
|
2020-12-18 19:05:47 +01:00
|
|
|
* Disables potentially harmful algebraic transformations for architectures
|
|
|
|
|
* with SIMD-within-a-register semantics.
|
|
|
|
|
*
|
|
|
|
|
* Note, to actually vectorize 16bit instructions, use nir_opt_vectorize()
|
|
|
|
|
* with a suitable callback function.
|
2020-05-03 20:10:57 -04:00
|
|
|
*/
|
|
|
|
|
bool vectorize_vec2_16bit;
|
|
|
|
|
|
2019-06-17 17:10:06 -05:00
|
|
|
/**
|
|
|
|
|
* Should the linker unify inputs_read/outputs_written between adjacent
|
|
|
|
|
* shader stages which are linked into a single program?
|
|
|
|
|
*/
|
|
|
|
|
bool unify_interfaces;
|
|
|
|
|
|
2020-08-07 10:34:30 +02:00
|
|
|
/**
|
|
|
|
|
* Whether nir_lower_io() will lower interpolateAt functions to
|
|
|
|
|
* load_interpolated_input intrinsics.
|
|
|
|
|
*
|
2024-11-18 12:43:22 -05:00
|
|
|
* Unlike nir_lower_io_use_interpolated_input_intrinsics this will only
|
|
|
|
|
* lower these functions and leave input load intrinsics untouched.
|
2020-08-07 10:34:30 +02:00
|
|
|
*/
|
|
|
|
|
bool lower_interpolate_at;
|
|
|
|
|
|
2019-02-14 23:08:39 -08:00
|
|
|
/* Lowers when 32x32->64 bit multiplication is not supported */
|
|
|
|
|
bool lower_mul_2x32_64;
|
|
|
|
|
|
2024-01-16 12:55:10 +01:00
|
|
|
/* Indicates that urol and uror are supported */
|
|
|
|
|
bool has_rotate8;
|
|
|
|
|
bool has_rotate16;
|
|
|
|
|
bool has_rotate32;
|
2019-05-30 14:15:51 -07:00
|
|
|
|
2024-01-05 17:37:33 +00:00
|
|
|
/** Backend supports shfr */
|
|
|
|
|
bool has_shfr32;
|
|
|
|
|
|
2021-06-28 17:41:20 -07:00
|
|
|
/** Backend supports ternary addition */
|
|
|
|
|
bool has_iadd3;
|
|
|
|
|
|
2024-11-04 10:20:31 -04:00
|
|
|
/**
|
|
|
|
|
* Backend supports amul and would like them generated whenever
|
|
|
|
|
* possible. This is stronger than has_imul24 for amul, but does not imply
|
|
|
|
|
* support for imul24.
|
|
|
|
|
*/
|
|
|
|
|
bool has_amul;
|
|
|
|
|
|
2019-09-27 10:15:02 -07:00
|
|
|
/**
|
|
|
|
|
* Backend supports imul24, and would like to use it (when possible)
|
|
|
|
|
* for address/offset calculation. If true, driver should call
|
|
|
|
|
* nir_lower_amul(). (If not set, amul will automatically be lowered
|
|
|
|
|
* to imul.)
|
|
|
|
|
*/
|
|
|
|
|
bool has_imul24;
|
|
|
|
|
|
2020-04-12 16:36:20 +02:00
|
|
|
/** Backend supports umul24, if not set umul24 will automatically be lowered
|
|
|
|
|
* to imul with masked inputs */
|
|
|
|
|
bool has_umul24;
|
|
|
|
|
|
2024-01-18 16:48:41 -06:00
|
|
|
/** Backend supports 32-bit imad */
|
|
|
|
|
bool has_imad32;
|
|
|
|
|
|
2020-04-12 16:36:20 +02:00
|
|
|
/** Backend supports umad24, if not set umad24 will automatically be lowered
|
|
|
|
|
* to imul with masked inputs and iadd */
|
|
|
|
|
bool has_umad24;
|
|
|
|
|
|
2024-11-19 14:14:12 +01:00
|
|
|
/* Backend supports fused compare against zero and csel */
|
2021-03-10 09:42:22 +01:00
|
|
|
bool has_fused_comp_and_csel;
|
2024-11-19 13:10:13 +01:00
|
|
|
/* Backend supports fused int eq/ne against zero and csel. */
|
|
|
|
|
bool has_icsel_eqz64;
|
|
|
|
|
bool has_icsel_eqz32;
|
|
|
|
|
bool has_icsel_eqz16;
|
2021-03-10 09:42:22 +01:00
|
|
|
|
2024-05-29 14:56:17 +02:00
|
|
|
/* Backend supports fneo, fequ, fltu, fgeu. */
|
|
|
|
|
bool has_fneo_fcmpu;
|
|
|
|
|
|
|
|
|
|
/* Backend supports ford and funord. */
|
|
|
|
|
bool has_ford_funord;
|
|
|
|
|
|
2020-09-04 11:24:26 +01:00
|
|
|
/** Backend supports fsub, if not set fsub will automatically be lowered to
|
|
|
|
|
* fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */
|
|
|
|
|
bool has_fsub;
|
|
|
|
|
|
|
|
|
|
/** Backend supports isub, if not set isub will automatically be lowered to
|
|
|
|
|
* iadd(x, ineg(y)). If true, driver should call nir_opt_algebraic_late(). */
|
|
|
|
|
bool has_isub;
|
|
|
|
|
|
2021-01-25 16:31:17 -08:00
|
|
|
/** Backend supports pack_32_4x8 or pack_32_4x8_split. */
|
|
|
|
|
bool has_pack_32_4x8;
|
|
|
|
|
|
2023-07-10 22:24:46 +02:00
|
|
|
/** Backend supports nir_load_texture_scale and prefers it over txs for nir
|
|
|
|
|
* lowerings. */
|
|
|
|
|
bool has_texture_scaling;
|
2021-02-01 11:05:48 +01:00
|
|
|
|
2023-12-05 19:58:20 -06:00
|
|
|
/** Backend supports sdot_4x8_iadd. */
|
2021-11-26 19:27:03 +02:00
|
|
|
bool has_sdot_4x8;
|
|
|
|
|
|
2023-12-05 19:58:20 -06:00
|
|
|
/** Backend supports udot_4x8_uadd. */
|
2021-11-26 19:27:03 +02:00
|
|
|
bool has_udot_4x8;
|
2021-02-23 17:33:04 -08:00
|
|
|
|
2023-12-05 19:58:20 -06:00
|
|
|
/** Backend supports sudot_4x8_iadd. */
|
2021-02-23 17:33:04 -08:00
|
|
|
bool has_sudot_4x8;
|
|
|
|
|
|
2023-12-05 19:58:20 -06:00
|
|
|
/** Backend supports sdot_4x8_iadd_sat. */
|
|
|
|
|
bool has_sdot_4x8_sat;
|
|
|
|
|
|
|
|
|
|
/** Backend supports udot_4x8_uadd_sat. */
|
|
|
|
|
bool has_udot_4x8_sat;
|
|
|
|
|
|
|
|
|
|
/** Backend supports sudot_4x8_iadd_sat. */
|
|
|
|
|
bool has_sudot_4x8_sat;
|
|
|
|
|
|
2021-08-30 13:56:01 +01:00
|
|
|
/** Backend supports sdot_2x16 and udot_2x16 opcodes. */
|
|
|
|
|
bool has_dot_2x16;
|
|
|
|
|
|
2021-04-28 17:48:54 +01:00
|
|
|
/** Backend supports fmulz (and ffmaz if lower_ffma32=false) */
|
|
|
|
|
bool has_fmulz;
|
|
|
|
|
|
2023-12-07 10:10:34 -06:00
|
|
|
/**
|
|
|
|
|
* Backend supports fmulz (and ffmaz if lower_ffma32=false) but only if
|
|
|
|
|
* FLOAT_CONTROLS_DENORM_PRESERVE_FP32 is not set
|
|
|
|
|
*/
|
|
|
|
|
bool has_fmulz_no_denorms;
|
|
|
|
|
|
2022-10-04 16:02:28 +02:00
|
|
|
/** Backend supports 32bit ufind_msb_rev and ifind_msb_rev. */
|
|
|
|
|
bool has_find_msb_rev;
|
|
|
|
|
|
2023-01-24 18:53:52 +01:00
|
|
|
/** Backend supports pack_half_2x16_rtz_split. */
|
|
|
|
|
bool has_pack_half_2x16_rtz;
|
|
|
|
|
|
2023-05-27 15:28:10 +02:00
|
|
|
/** Backend supports bitz/bitnz. */
|
|
|
|
|
bool has_bit_test;
|
|
|
|
|
|
2023-08-14 19:21:52 +02:00
|
|
|
/** Backend supports ubfe/ibfe. */
|
|
|
|
|
bool has_bfe;
|
|
|
|
|
|
2023-08-14 19:11:51 +02:00
|
|
|
/** Backend supports bfm. */
|
|
|
|
|
bool has_bfm;
|
|
|
|
|
|
|
|
|
|
/** Backend supports bfi. */
|
|
|
|
|
bool has_bfi;
|
|
|
|
|
|
|
|
|
|
/** Backend supports bitfield_select. */
|
|
|
|
|
bool has_bitfield_select;
|
|
|
|
|
|
2023-08-14 19:34:08 +02:00
|
|
|
/** Backend supports uclz. */
|
|
|
|
|
bool has_uclz;
|
|
|
|
|
|
2023-11-17 11:21:19 +00:00
|
|
|
/** Backend support msad_u4x8. */
|
|
|
|
|
bool has_msad;
|
|
|
|
|
|
2019-06-03 15:22:15 -07:00
|
|
|
/**
|
|
|
|
|
* Is this the Intel vec4 backend?
|
|
|
|
|
*
|
|
|
|
|
* Used to inhibit algebraic optimizations that are known to be harmful on
|
|
|
|
|
* the Intel vec4 backend. This is generally applicable to any
|
|
|
|
|
* optimization that might cause more immediate values to be used in
|
|
|
|
|
* 3-source (e.g., ffma and flrp) instructions.
|
|
|
|
|
*/
|
|
|
|
|
bool intel_vec4;
|
|
|
|
|
|
2021-06-02 15:14:41 +01:00
|
|
|
/**
|
|
|
|
|
* For most Intel GPUs, all ternary operations such as FMA and BFE cannot
|
|
|
|
|
* have immediates, so two to three instructions may eventually be needed.
|
|
|
|
|
*/
|
|
|
|
|
bool avoid_ternary_with_two_constants;
|
2020-03-06 13:22:45 -08:00
|
|
|
|
2020-03-31 13:57:42 +01:00
|
|
|
/** Whether 8-bit ALU is supported. */
|
|
|
|
|
bool support_8bit_alu;
|
|
|
|
|
|
|
|
|
|
/** Whether 16-bit ALU is supported. */
|
|
|
|
|
bool support_16bit_alu;
|
|
|
|
|
|
2016-12-13 14:39:51 +11:00
|
|
|
unsigned max_unroll_iterations;
|
nir/loop_unroll: unroll more aggressively if it can improve load scheduling
Significantly improves performance of a Control compute shader. Also seems
to increase FPS at the very start of the game by ~5% (RX 580, 1080p,
medium settings, no MSAA).
fossil-db (Sienna):
Totals from 81 (0.06% of 139391) affected shaders:
SGPRs: 3848 -> 4362 (+13.36%); split: -0.99%, +14.35%
VGPRs: 4132 -> 4648 (+12.49%)
CodeSize: 275532 -> 659188 (+139.24%)
MaxWaves: 986 -> 906 (-8.11%)
Instrs: 54422 -> 126865 (+133.11%)
Cycles: 1057240 -> 750464 (-29.02%); split: -42.61%, +13.60%
VMEM: 26507 -> 61829 (+133.26%); split: +135.56%, -2.30%
SMEM: 4748 -> 5895 (+24.16%); split: +31.47%, -7.31%
VClause: 1933 -> 6802 (+251.89%); split: -0.72%, +252.61%
SClause: 1179 -> 1810 (+53.52%); split: -3.14%, +56.66%
Branches: 1174 -> 1157 (-1.45%); split: -23.94%, +22.49%
PreVGPRs: 3219 -> 3387 (+5.22%); split: -0.96%, +6.18%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6538>
2020-09-01 11:55:58 +01:00
|
|
|
unsigned max_unroll_iterations_aggressive;
|
2022-09-29 15:04:21 +01:00
|
|
|
unsigned max_unroll_iterations_fp64;
|
2019-02-25 17:13:48 -08:00
|
|
|
|
2020-09-13 21:01:55 +02:00
|
|
|
bool lower_uniforms_to_ubo;
|
|
|
|
|
|
2021-05-10 08:54:50 +02:00
|
|
|
/* If the precision is ignored, backends that don't handle
|
|
|
|
|
* different precisions when passing data between stages and use
|
|
|
|
|
* vectorized IO can pack more varyings when linking. */
|
|
|
|
|
bool linker_ignore_precision;
|
|
|
|
|
|
2022-05-11 20:12:56 +10:00
|
|
|
/* Specifies if indirect sampler array access will trigger forced loop
|
|
|
|
|
* unrolling.
|
|
|
|
|
*/
|
|
|
|
|
bool force_indirect_unrolling_sampler;
|
|
|
|
|
|
2022-05-18 12:00:30 +10:00
|
|
|
/* Some older drivers don't support GLSL versions with the concept of flat
|
|
|
|
|
* varyings and also don't support integers. This setting helps us avoid
|
|
|
|
|
* marking varyings as flat and potentially having them changed to ints via
|
|
|
|
|
* varying packing.
|
|
|
|
|
*/
|
|
|
|
|
bool no_integers;
|
|
|
|
|
|
2021-07-29 19:34:26 +10:00
|
|
|
/**
|
|
|
|
|
* Specifies which type of indirectly accessed variables should force
|
|
|
|
|
* loop unrolling.
|
|
|
|
|
*/
|
|
|
|
|
nir_variable_mode force_indirect_unrolling;
|
|
|
|
|
|
2020-10-26 15:17:30 +10:00
|
|
|
bool driver_functions;
|
|
|
|
|
|
2024-11-03 21:57:28 -05:00
|
|
|
/**
|
|
|
|
|
* If true, the driver will call nir_lower_int64 itself and the frontend
|
|
|
|
|
* should not do so. This may enable better optimization around address
|
|
|
|
|
* modes.
|
|
|
|
|
*/
|
|
|
|
|
bool late_lower_int64;
|
2019-02-25 17:13:48 -08:00
|
|
|
nir_lower_int64_options lower_int64_options;
|
|
|
|
|
nir_lower_doubles_options lower_doubles_options;
|
2020-09-02 11:45:46 +01:00
|
|
|
nir_divergence_options divergence_analysis_options;
|
2021-08-25 14:07:50 +08:00
|
|
|
|
2022-01-01 05:32:14 -05:00
|
|
|
/**
|
|
|
|
|
* The masks of shader stages that support indirect indexing with
|
2023-12-30 16:27:07 -05:00
|
|
|
* load_input and store_output intrinsics. It's used by
|
|
|
|
|
* nir_lower_io_passes.
|
2022-01-01 05:32:14 -05:00
|
|
|
*/
|
|
|
|
|
uint8_t support_indirect_inputs;
|
|
|
|
|
uint8_t support_indirect_outputs;
|
2022-03-11 13:55:02 +08:00
|
|
|
|
2022-11-24 13:23:06 +01:00
|
|
|
/** store the variable offset into the instrinsic range_base instead
|
|
|
|
|
* of adding it to the image index.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_image_offset_to_range_base;
|
2022-11-24 17:03:38 +01:00
|
|
|
|
|
|
|
|
/** store the variable offset into the instrinsic range_base instead
|
|
|
|
|
* of adding it to the atomic source
|
|
|
|
|
*/
|
|
|
|
|
bool lower_atomic_offset_to_range_base;
|
2023-05-31 13:10:47 -07:00
|
|
|
|
|
|
|
|
/** Don't convert medium-precision casts (e.g. f2fmp) into concrete
|
|
|
|
|
* type casts (e.g. f2f16).
|
|
|
|
|
*/
|
|
|
|
|
bool preserve_mediump;
|
2023-08-10 14:12:37 -04:00
|
|
|
|
|
|
|
|
/** lowers fquantize2f16 to alu ops. */
|
|
|
|
|
bool lower_fquantize2f16;
|
2023-11-08 10:44:22 +08:00
|
|
|
|
|
|
|
|
/** Lower f2f16 to f2f16_rtz when execution mode is not rtne. */
|
|
|
|
|
bool force_f2f16_rtz;
|
2023-11-18 22:44:56 -05:00
|
|
|
|
|
|
|
|
/** Lower VARYING_SLOT_LAYER in FS to SYSTEM_VALUE_LAYER_ID. */
|
|
|
|
|
bool lower_layer_fs_input_to_sysval;
|
2023-12-30 16:01:50 -05:00
|
|
|
|
2024-04-03 13:46:38 -04:00
|
|
|
/** clip/cull distance and tess level arrays use compact semantics */
|
|
|
|
|
bool compact_arrays;
|
|
|
|
|
|
nir: introduce discard_is_demote compiler option
This new option indicates that the driver emits the same
code for nir_intrinsic_discard and nir_intrinsic_demote.
Otherwise, it is assumed that discard is implemented as
terminate.
spirv_to_nir uses this option in order to directly emit
nir_demote in case of OpKill.
RADV GFX11:
Totals from 3965 (4.99% of 79439) affected shaders:
MaxWaves: 119418 -> 119424 (+0.01%); split: +0.03%, -0.03%
Instrs: 1608753 -> 1620830 (+0.75%); split: -0.18%, +0.93%
CodeSize: 8759152 -> 8785152 (+0.30%); split: -0.18%, +0.48%
VGPRs: 152292 -> 149232 (-2.01%); split: -2.37%, +0.36%
Latency: 9162314 -> 10033923 (+9.51%); split: -0.46%, +9.97%
InvThroughput: 1491656 -> 1493408 (+0.12%); split: -0.10%, +0.22%
VClause: 21424 -> 21452 (+0.13%); split: -0.31%, +0.44%
SClause: 53598 -> 55871 (+4.24%); split: -2.15%, +6.39%
Copies: 90553 -> 90462 (-0.10%); split: -2.91%, +2.81%
Branches: 16283 -> 16311 (+0.17%)
PreSGPRs: 113993 -> 113254 (-0.65%); split: -1.84%, +1.19%
PreVGPRs: 110951 -> 108914 (-1.84%); split: -2.08%, +0.24%
VALU: 963192 -> 963167 (-0.00%); split: -0.01%, +0.01%
SALU: 87926 -> 90795 (+3.26%); split: -2.92%, +6.18%
VMEM: 25937 -> 25936 (-0.00%)
SMEM: 110012 -> 109799 (-0.19%); split: -0.20%, +0.01%
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27617>
2024-02-14 12:38:40 +01:00
|
|
|
/**
|
|
|
|
|
* Whether discard gets emitted as nir_intrinsic_demote.
|
|
|
|
|
* Otherwise, nir_intrinsic_terminate is being used.
|
|
|
|
|
*/
|
|
|
|
|
bool discard_is_demote;
|
|
|
|
|
|
2024-07-23 12:12:15 -04:00
|
|
|
/**
|
|
|
|
|
* Whether the new-style derivative intrinsics are supported. If false,
|
|
|
|
|
* legacy ALU derivative ops will be emitted. This transitional option will
|
|
|
|
|
* be removed once all drivers are converted to derivative intrinsics.
|
|
|
|
|
*/
|
|
|
|
|
bool has_ddx_intrinsics;
|
|
|
|
|
|
|
|
|
|
/** Whether derivative intrinsics must be scalarized. */
|
|
|
|
|
bool scalarize_ddx;
|
|
|
|
|
|
2024-10-19 21:03:13 -07:00
|
|
|
/**
|
|
|
|
|
* Assign a range of driver locations to per-view outputs, with unique
|
|
|
|
|
* slots for each view. If unset, per-view outputs will be treated
|
|
|
|
|
* similarly to other arrayed IO, and only slots for one view will be
|
|
|
|
|
* assigned. Regardless of this setting, per-view outputs are only assigned
|
|
|
|
|
* slots for one value in var->data.location.
|
|
|
|
|
*/
|
|
|
|
|
bool per_view_unique_driver_locations;
|
|
|
|
|
|
2024-10-29 14:49:02 -07:00
|
|
|
/**
|
|
|
|
|
* Emit nir_intrinsic_store_per_view_output with compacted view indices
|
|
|
|
|
* rather than absolute view indices. When using compacted indices, the Nth
|
|
|
|
|
* index refers to the Nth enabled view, not the Nth absolute view. For
|
|
|
|
|
* example, with view mask 0b1010, compacted index 0 is absolute index 1,
|
|
|
|
|
* and compacted index 1 is absolute index 3. Note that compacted view
|
|
|
|
|
* indices do not correspond directly to gl_ViewIndex.
|
|
|
|
|
*
|
|
|
|
|
* If compact_view_index is unset, per-view indices must be constant before
|
|
|
|
|
* nir_lower_io. This can be guaranteed by calling nir_lower_io_temporaries
|
|
|
|
|
* first.
|
|
|
|
|
*/
|
|
|
|
|
bool compact_view_index;
|
|
|
|
|
|
2023-12-30 16:01:50 -05:00
|
|
|
/** Options determining lowering and behavior of inputs and outputs. */
|
|
|
|
|
nir_io_options io_options;
|
2023-12-30 16:43:35 -05:00
|
|
|
|
2024-08-28 11:35:07 +08:00
|
|
|
/**
|
|
|
|
|
* Bit mask of nir_lower_packing_op to skip lowering some nir ops in
|
|
|
|
|
* nir_lower_packing().
|
|
|
|
|
*/
|
|
|
|
|
unsigned skip_lower_packing_ops;
|
|
|
|
|
|
2023-12-30 16:43:35 -05:00
|
|
|
/** Driver callback where drivers can define how to lower mediump.
|
|
|
|
|
* Used by nir_lower_io_passes.
|
|
|
|
|
*/
|
|
|
|
|
void (*lower_mediump_io)(struct nir_shader *nir);
|
nir: add nir_opt_varyings, new pass optimizing and compacting varyings
Highlights:
- all shader stages and all input/output types are handled, including
inputs and outputs with multiple vertices
- the optimizations performed are: unused input/output removal, constant
and uniform propagation, output deduplication, inter-shader code motion,
and compaction
- constant and uniform propagation and output deduplication work even
if a shader contains multiple stores of the same output, e.g. in GS
- the same optimizations are also performed between output stores and
output loads (for TCS)
- FS inputs are packed agressively. Only flat, interp FP32, and interp
FP16 can't be in the same vec4. Also, if an output value is
non-divergent within a primitive, the corresponding FS input is
opportunistically promoted to flat.
The big comment at the beginning of nir_opt_varyings.c has a detailed
explanation, which is the same as:
https://gitlab.freedesktop.org/mesa/mesa/-/issues/8841
dEQP and GLCTS have incorrect tests that fail with this, see:
https://gitlab.freedesktop.org/mesa/mesa/-/issues/10361
Acked-by: Timothy Arceri <tarceri@itsqueeze.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26819>
2023-04-06 04:21:00 -04:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return the maximum cost of an expression that's written to a shader
|
|
|
|
|
* output that can be moved into the next shader to remove that output.
|
|
|
|
|
*
|
|
|
|
|
* Currently only uniform expressions are moved. A uniform expression is
|
|
|
|
|
* any ALU expression sourcing only constants, uniforms, and UBO loads.
|
|
|
|
|
*
|
|
|
|
|
* Set to NULL or return 0 if you only want to propagate constants from
|
|
|
|
|
* outputs to inputs.
|
|
|
|
|
*
|
|
|
|
|
* Drivers can set the maximum cost based on the types of consecutive
|
|
|
|
|
* shaders or shader SHA1s.
|
|
|
|
|
*
|
|
|
|
|
* Drivers should also set "varying_estimate_instr_cost".
|
|
|
|
|
*/
|
|
|
|
|
unsigned (*varying_expression_max_cost)(struct nir_shader *consumer,
|
|
|
|
|
struct nir_shader *producer);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return the cost of an instruction that could be moved into the next
|
|
|
|
|
* shader. If the cost of all instructions in an expression is <=
|
|
|
|
|
* varying_expression_max_cost(), the instruction is moved.
|
2024-11-27 22:24:09 -05:00
|
|
|
*
|
|
|
|
|
* When this callback isn't set, nir_opt_varyings uses its own version.
|
nir: add nir_opt_varyings, new pass optimizing and compacting varyings
Highlights:
- all shader stages and all input/output types are handled, including
inputs and outputs with multiple vertices
- the optimizations performed are: unused input/output removal, constant
and uniform propagation, output deduplication, inter-shader code motion,
and compaction
- constant and uniform propagation and output deduplication work even
if a shader contains multiple stores of the same output, e.g. in GS
- the same optimizations are also performed between output stores and
output loads (for TCS)
- FS inputs are packed agressively. Only flat, interp FP32, and interp
FP16 can't be in the same vec4. Also, if an output value is
non-divergent within a primitive, the corresponding FS input is
opportunistically promoted to flat.
The big comment at the beginning of nir_opt_varyings.c has a detailed
explanation, which is the same as:
https://gitlab.freedesktop.org/mesa/mesa/-/issues/8841
dEQP and GLCTS have incorrect tests that fail with this, see:
https://gitlab.freedesktop.org/mesa/mesa/-/issues/10361
Acked-by: Timothy Arceri <tarceri@itsqueeze.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26819>
2023-04-06 04:21:00 -04:00
|
|
|
*/
|
|
|
|
|
unsigned (*varying_estimate_instr_cost)(struct nir_instr *instr);
|
2024-11-27 22:34:40 -05:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* When the varying_expression_max_cost callback isn't set, this specifies
|
|
|
|
|
* the maximum cost of a uniform expression that is allowed to be moved
|
|
|
|
|
* from output stores into the next shader stage to eliminate those output
|
|
|
|
|
* stores and corresponding inputs.
|
|
|
|
|
*
|
|
|
|
|
* 0 only allows propagating constants written to output stores to
|
|
|
|
|
* the next shader.
|
|
|
|
|
*
|
|
|
|
|
* At least 2 is required for moving a uniform stored in an output into
|
|
|
|
|
* the next shader according to default_varying_estimate_instr_cost.
|
|
|
|
|
*/
|
|
|
|
|
unsigned max_varying_expression_cost;
|
2015-02-02 16:13:49 -08:00
|
|
|
} nir_shader_compiler_options;
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
typedef struct nir_shader {
|
2021-09-08 15:24:10 +01:00
|
|
|
gc_ctx *gctx;
|
|
|
|
|
|
2015-09-17 18:18:19 -04:00
|
|
|
/** list of uniforms (nir_variable) */
|
2020-07-20 16:30:37 -05:00
|
|
|
struct exec_list variables;
|
2016-01-08 17:16:29 -08:00
|
|
|
|
2015-02-02 16:13:49 -08:00
|
|
|
/** Set of driver-specific options for the shader.
|
|
|
|
|
*
|
|
|
|
|
* The memory for the options is expected to be kept in a single static
|
|
|
|
|
* copy by the driver.
|
|
|
|
|
*/
|
|
|
|
|
const struct nir_shader_compiler_options *options;
|
|
|
|
|
|
2015-08-05 17:14:59 -07:00
|
|
|
/** Various bits of compile-time information about a given shader */
|
2017-05-08 09:20:21 -07:00
|
|
|
struct shader_info info;
|
2015-08-05 17:14:59 -07:00
|
|
|
|
2023-08-17 21:02:41 +03:00
|
|
|
/** list of nir_function */
|
|
|
|
|
struct exec_list functions;
|
2014-07-31 16:14:51 -07:00
|
|
|
|
2014-08-05 10:54:27 -07:00
|
|
|
/**
|
2020-08-31 13:06:04 -05:00
|
|
|
* The size of the variable space for load_input_*, load_uniform_*, etc.
|
|
|
|
|
* intrinsics. This is in back-end specific units which is likely one of
|
|
|
|
|
* bytes, dwords, or vec4s depending on context and back-end.
|
2014-08-05 10:54:27 -07:00
|
|
|
*/
|
2020-08-31 13:04:50 -05:00
|
|
|
unsigned num_inputs, num_uniforms, num_outputs;
|
|
|
|
|
|
2020-11-02 17:58:42 -06:00
|
|
|
/** Size in bytes of required implicitly bound global memory */
|
|
|
|
|
unsigned global_mem_size;
|
|
|
|
|
|
2016-12-02 11:36:42 -08:00
|
|
|
/** Size in bytes of required scratch space */
|
|
|
|
|
unsigned scratch_size;
|
|
|
|
|
|
2018-06-28 19:16:19 -07:00
|
|
|
/** Constant data associated with this shader.
|
|
|
|
|
*
|
2020-07-07 12:25:13 -07:00
|
|
|
* Constant data is loaded through load_constant intrinsics (as compared to
|
|
|
|
|
* the NIR load_const instructions which have the constant value inlined
|
|
|
|
|
* into them). This is usually generated by nir_opt_large_constants (so
|
|
|
|
|
* shaders don't have to load_const into a temporary array when they want
|
|
|
|
|
* to indirect on a const array).
|
2018-06-28 19:16:19 -07:00
|
|
|
*/
|
|
|
|
|
void *constant_data;
|
2020-07-07 12:25:13 -07:00
|
|
|
/** Size of the constant data associated with the shader, in bytes */
|
2018-06-28 19:16:19 -07:00
|
|
|
unsigned constant_data_size;
|
2020-06-21 14:35:29 -07:00
|
|
|
|
2022-05-17 10:16:55 -05:00
|
|
|
struct nir_xfb_info *xfb_info;
|
|
|
|
|
|
2020-06-21 14:35:29 -07:00
|
|
|
unsigned printf_info_count;
|
2022-04-16 10:48:08 +02:00
|
|
|
u_printf_info *printf_info;
|
2014-07-31 16:14:51 -07:00
|
|
|
} nir_shader;
|
|
|
|
|
|
2018-09-06 11:12:24 -07:00
|
|
|
#define nir_foreach_function(func, shader) \
|
|
|
|
|
foreach_list_typed(nir_function, func, node, &(shader)->functions)
|
|
|
|
|
|
2023-06-29 01:05:45 +08:00
|
|
|
#define nir_foreach_function_safe(func, shader) \
|
|
|
|
|
foreach_list_typed_safe(nir_function, func, node, &(shader)->functions)
|
|
|
|
|
|
2024-11-17 19:42:02 -04:00
|
|
|
#define nir_foreach_entrypoint(func, lib) \
|
|
|
|
|
nir_foreach_function(func, lib) \
|
|
|
|
|
if (func->is_entrypoint)
|
|
|
|
|
|
|
|
|
|
#define nir_foreach_entrypoint_safe(func, lib) \
|
|
|
|
|
nir_foreach_function_safe(func, lib) \
|
|
|
|
|
if (func->is_entrypoint)
|
|
|
|
|
|
2023-06-22 12:38:16 -04:00
|
|
|
static inline nir_function *
|
2023-06-30 04:09:33 +08:00
|
|
|
nir_foreach_function_with_impl_first(const nir_shader *shader)
|
2023-06-22 12:38:16 -04:00
|
|
|
{
|
|
|
|
|
foreach_list_typed(nir_function, func, node, &shader->functions) {
|
|
|
|
|
if (func->impl != NULL)
|
|
|
|
|
return func;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_function_impl *
|
2023-06-30 04:09:33 +08:00
|
|
|
nir_foreach_function_with_impl_next(nir_function **it)
|
2023-06-22 12:38:16 -04:00
|
|
|
{
|
|
|
|
|
foreach_list_typed_from(nir_function, func, node, _, (*it)->node.next) {
|
|
|
|
|
if (func->impl != NULL) {
|
|
|
|
|
*it = func;
|
|
|
|
|
return func->impl;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_foreach_function_with_impl(it, impl_it, shader) \
|
|
|
|
|
for (nir_function *it = nir_foreach_function_with_impl_first(shader); \
|
|
|
|
|
it != NULL; \
|
|
|
|
|
it = NULL) \
|
|
|
|
|
\
|
|
|
|
|
for (nir_function_impl *impl_it = it->impl; \
|
|
|
|
|
impl_it != NULL; \
|
2023-06-30 04:09:33 +08:00
|
|
|
impl_it = nir_foreach_function_with_impl_next(&it))
|
2023-06-28 18:20:40 +08:00
|
|
|
|
2023-06-22 12:38:16 -04:00
|
|
|
/* Equivalent to
|
|
|
|
|
*
|
|
|
|
|
* nir_foreach_function(func, shader) {
|
|
|
|
|
* if (func->impl != NULL) {
|
|
|
|
|
* ...
|
|
|
|
|
* }
|
|
|
|
|
* }
|
|
|
|
|
*
|
|
|
|
|
* Carefully written to ensure break/continue work in the user code.
|
|
|
|
|
*/
|
|
|
|
|
|
2023-06-28 18:20:40 +08:00
|
|
|
#define nir_foreach_function_impl(it, shader) \
|
|
|
|
|
nir_foreach_function_with_impl(_func_##it, it, shader)
|
2023-06-22 12:38:16 -04:00
|
|
|
|
2016-08-24 19:09:57 -07:00
|
|
|
static inline nir_function_impl *
|
2022-03-13 12:40:24 +01:00
|
|
|
nir_shader_get_entrypoint(const nir_shader *shader)
|
2016-03-25 14:07:41 -07:00
|
|
|
{
|
2018-09-06 11:12:24 -07:00
|
|
|
nir_function *func = NULL;
|
|
|
|
|
|
|
|
|
|
nir_foreach_function(function, shader) {
|
|
|
|
|
assert(func == NULL);
|
|
|
|
|
if (function->is_entrypoint) {
|
|
|
|
|
func = function;
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
|
break;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!func)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
2016-03-25 14:07:41 -07:00
|
|
|
assert(func->num_params == 0);
|
2016-08-24 19:09:57 -07:00
|
|
|
assert(func->impl);
|
|
|
|
|
return func->impl;
|
2016-03-25 14:07:41 -07:00
|
|
|
}
|
|
|
|
|
|
2022-04-07 16:44:08 -05:00
|
|
|
static inline nir_function *
|
|
|
|
|
nir_shader_get_function_for_name(const nir_shader *shader, const char *name)
|
|
|
|
|
{
|
|
|
|
|
nir_foreach_function(func, shader) {
|
2024-05-06 14:05:14 +02:00
|
|
|
if (func->name && strcmp(func->name, name) == 0)
|
2022-04-07 16:44:08 -05:00
|
|
|
return func;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-12 12:03:42 -04:00
|
|
|
/*
|
|
|
|
|
* After all functions are forcibly inlined, these passes remove redundant
|
|
|
|
|
* functions from a shader and library respectively.
|
|
|
|
|
*/
|
2022-04-28 22:08:00 +02:00
|
|
|
void nir_remove_non_entrypoints(nir_shader *shader);
|
2023-10-12 12:03:42 -04:00
|
|
|
void nir_remove_non_exported(nir_shader *shader);
|
2024-11-25 07:52:27 -05:00
|
|
|
void nir_remove_entrypoints(nir_shader *shader);
|
2024-11-19 13:16:54 -04:00
|
|
|
void nir_fixup_is_exported(nir_shader *shader);
|
2022-04-28 22:08:00 +02:00
|
|
|
|
2015-02-02 16:13:49 -08:00
|
|
|
nir_shader *nir_shader_create(void *mem_ctx,
|
2015-08-18 01:48:34 -07:00
|
|
|
gl_shader_stage stage,
|
2016-10-13 11:41:23 +11:00
|
|
|
const nir_shader_compiler_options *options,
|
|
|
|
|
shader_info *si);
|
2014-07-31 16:16:23 -07:00
|
|
|
|
2017-02-27 16:28:53 -08:00
|
|
|
/** Adds a variable to the appropriate list in nir_shader */
|
2015-10-09 07:05:11 -07:00
|
|
|
void nir_shader_add_variable(nir_shader *shader, nir_variable *var);
|
|
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
|
nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var)
|
|
|
|
|
{
|
2019-01-16 00:05:04 +01:00
|
|
|
assert(var->data.mode == nir_var_function_temp);
|
2015-10-09 07:05:11 -07:00
|
|
|
exec_list_push_tail(&impl->locals, &var->node);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** creates a variable, sets a few defaults, and adds it to the list */
|
|
|
|
|
nir_variable *nir_variable_create(nir_shader *shader,
|
|
|
|
|
nir_variable_mode mode,
|
|
|
|
|
const struct glsl_type *type,
|
|
|
|
|
const char *name);
|
|
|
|
|
/** creates a local variable and adds it to the list */
|
|
|
|
|
nir_variable *nir_local_variable_create(nir_function_impl *impl,
|
|
|
|
|
const struct glsl_type *type,
|
|
|
|
|
const char *name);
|
2023-05-02 15:40:51 -07:00
|
|
|
|
2023-05-16 13:37:53 -07:00
|
|
|
/** Creates a uniform builtin state variable. */
|
|
|
|
|
nir_variable *
|
|
|
|
|
nir_state_variable_create(nir_shader *shader,
|
|
|
|
|
const struct glsl_type *type,
|
|
|
|
|
const char *name,
|
|
|
|
|
const gl_state_index16 tokens[STATE_LENGTH]);
|
|
|
|
|
|
2023-05-02 15:40:51 -07:00
|
|
|
/* Gets the variable for the given mode and location, creating it (with the given
|
|
|
|
|
* type) if necessary.
|
|
|
|
|
*/
|
|
|
|
|
nir_variable *
|
|
|
|
|
nir_get_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
|
|
|
|
|
const struct glsl_type *type);
|
|
|
|
|
|
|
|
|
|
/* Creates a variable for the given mode and location.
|
|
|
|
|
*/
|
|
|
|
|
nir_variable *
|
|
|
|
|
nir_create_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
|
|
|
|
|
const struct glsl_type *type);
|
2015-10-09 07:05:11 -07:00
|
|
|
|
2020-07-22 23:37:27 -05:00
|
|
|
nir_variable *nir_find_variable_with_location(nir_shader *shader,
|
|
|
|
|
nir_variable_mode mode,
|
|
|
|
|
unsigned location);
|
|
|
|
|
|
|
|
|
|
nir_variable *nir_find_variable_with_driver_location(nir_shader *shader,
|
|
|
|
|
nir_variable_mode mode,
|
|
|
|
|
unsigned location);
|
|
|
|
|
|
2023-05-04 08:20:41 +02:00
|
|
|
nir_variable *nir_find_state_variable(nir_shader *s,
|
|
|
|
|
gl_state_index16 tokens[STATE_LENGTH]);
|
|
|
|
|
|
2023-09-11 16:08:31 +03:00
|
|
|
nir_variable *nir_find_sampler_variable_with_tex_index(nir_shader *shader,
|
|
|
|
|
unsigned texture_index);
|
|
|
|
|
|
2021-06-10 13:46:15 -07:00
|
|
|
void nir_sort_variables_with_modes(nir_shader *shader,
|
|
|
|
|
int (*compar)(const nir_variable *,
|
|
|
|
|
const nir_variable *),
|
|
|
|
|
nir_variable_mode modes);
|
2020-11-06 15:19:53 -08:00
|
|
|
|
2014-07-31 16:16:23 -07:00
|
|
|
/** creates a function and adds it to the shader's list of functions */
|
|
|
|
|
nir_function *nir_function_create(nir_shader *shader, const char *name);
|
|
|
|
|
|
2023-06-23 11:57:47 +08:00
|
|
|
static inline void
|
|
|
|
|
nir_function_set_impl(nir_function *func, nir_function_impl *impl)
|
|
|
|
|
{
|
|
|
|
|
func->impl = impl;
|
|
|
|
|
impl->function = func;
|
|
|
|
|
}
|
|
|
|
|
|
2015-12-26 10:00:47 -08:00
|
|
|
nir_function_impl *nir_function_impl_create(nir_function *func);
|
2015-10-27 21:34:56 -07:00
|
|
|
/** creates a function_impl that isn't tied to any particular function */
|
|
|
|
|
nir_function_impl *nir_function_impl_create_bare(nir_shader *shader);
|
2014-07-31 16:16:23 -07:00
|
|
|
|
2015-10-21 10:57:15 -04:00
|
|
|
nir_block *nir_block_create(nir_shader *shader);
|
|
|
|
|
nir_if *nir_if_create(nir_shader *shader);
|
|
|
|
|
nir_loop *nir_loop_create(nir_shader *shader);
|
2014-07-31 16:16:23 -07:00
|
|
|
|
|
|
|
|
nir_function_impl *nir_cf_node_get_function(nir_cf_node *node);
|
|
|
|
|
|
2014-10-29 12:42:54 -07:00
|
|
|
/** requests that the given pieces of metadata be generated */
|
2016-12-13 14:39:51 +11:00
|
|
|
void nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...);
|
2014-10-29 12:42:54 -07:00
|
|
|
/** dirties all but the preserved metadata */
|
2014-12-12 16:22:46 -08:00
|
|
|
void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved);
|
2020-05-21 21:37:33 -05:00
|
|
|
/** Preserves all metadata for the given shader */
|
|
|
|
|
void nir_shader_preserve_all_metadata(nir_shader *shader);
|
2014-10-29 12:42:54 -07:00
|
|
|
|
2014-07-31 16:16:23 -07:00
|
|
|
/** creates an instruction with default swizzle/writemask/etc. with NULL registers */
|
2015-04-07 12:33:17 -07:00
|
|
|
nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op);
|
2014-07-31 16:16:23 -07:00
|
|
|
|
2018-03-14 21:45:38 -07:00
|
|
|
nir_deref_instr *nir_deref_instr_create(nir_shader *shader,
|
|
|
|
|
nir_deref_type deref_type);
|
|
|
|
|
|
2015-04-07 12:33:17 -07:00
|
|
|
nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type);
|
2014-07-31 16:16:23 -07:00
|
|
|
|
2015-04-07 12:33:17 -07:00
|
|
|
nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader,
|
2016-03-23 08:04:18 +01:00
|
|
|
unsigned num_components,
|
|
|
|
|
unsigned bit_size);
|
2014-07-31 16:16:23 -07:00
|
|
|
|
2015-04-07 12:33:17 -07:00
|
|
|
nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader,
|
2014-07-31 16:16:23 -07:00
|
|
|
nir_intrinsic_op op);
|
|
|
|
|
|
2015-04-07 12:33:17 -07:00
|
|
|
nir_call_instr *nir_call_instr_create(nir_shader *shader,
|
2019-03-19 10:18:49 -05:00
|
|
|
nir_function *callee);
|
2014-07-31 16:16:23 -07:00
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/** Creates a NIR texture instruction */
|
2015-04-07 12:33:17 -07:00
|
|
|
nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs);
|
2014-07-31 16:16:23 -07:00
|
|
|
|
2015-04-07 12:33:17 -07:00
|
|
|
nir_phi_instr *nir_phi_instr_create(nir_shader *shader);
|
2023-08-17 15:26:32 -05:00
|
|
|
nir_phi_src *nir_phi_instr_add_src(nir_phi_instr *instr,
|
|
|
|
|
nir_block *pred, nir_def *src);
|
2014-07-31 16:16:23 -07:00
|
|
|
|
2015-04-07 12:33:17 -07:00
|
|
|
nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader);
|
2014-10-30 21:04:15 -07:00
|
|
|
|
2024-05-19 17:29:21 +02:00
|
|
|
nir_debug_info_instr *nir_debug_info_instr_create(nir_shader *shader,
|
|
|
|
|
nir_debug_info_type type,
|
|
|
|
|
uint32_t string_length);
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_undef_instr *nir_undef_instr_create(nir_shader *shader,
|
|
|
|
|
unsigned num_components,
|
|
|
|
|
unsigned bit_size);
|
2014-07-31 16:16:23 -07:00
|
|
|
|
2017-08-29 20:36:55 -07:00
|
|
|
nir_const_value nir_alu_binop_identity(nir_op binop, unsigned bit_size);
|
|
|
|
|
|
2015-08-25 10:01:31 -07:00
|
|
|
/**
|
|
|
|
|
* NIR Cursors and Instruction Insertion API
|
|
|
|
|
* @{
|
|
|
|
|
*
|
|
|
|
|
* A tiny struct representing a point to insert/extract instructions or
|
|
|
|
|
* control flow nodes. Helps reduce the combinatorial explosion of possible
|
|
|
|
|
* points to insert/extract.
|
|
|
|
|
*
|
|
|
|
|
* \sa nir_control_flow.h
|
|
|
|
|
*/
|
|
|
|
|
typedef enum {
|
|
|
|
|
nir_cursor_before_block,
|
|
|
|
|
nir_cursor_after_block,
|
|
|
|
|
nir_cursor_before_instr,
|
|
|
|
|
nir_cursor_after_instr,
|
|
|
|
|
} nir_cursor_option;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
nir_cursor_option option;
|
|
|
|
|
union {
|
|
|
|
|
nir_block *block;
|
|
|
|
|
nir_instr *instr;
|
|
|
|
|
};
|
|
|
|
|
} nir_cursor;
|
|
|
|
|
|
2016-03-25 14:16:47 -07:00
|
|
|
static inline nir_block *
|
|
|
|
|
nir_cursor_current_block(nir_cursor cursor)
|
|
|
|
|
{
|
|
|
|
|
if (cursor.option == nir_cursor_before_instr ||
|
|
|
|
|
cursor.option == nir_cursor_after_instr) {
|
|
|
|
|
return cursor.instr->block;
|
|
|
|
|
} else {
|
|
|
|
|
return cursor.block;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-12-26 10:32:10 -08:00
|
|
|
bool nir_cursors_equal(nir_cursor a, nir_cursor b);
|
|
|
|
|
|
2015-08-25 10:01:31 -07:00
|
|
|
static inline nir_cursor
|
|
|
|
|
nir_before_block(nir_block *block)
|
|
|
|
|
{
|
|
|
|
|
nir_cursor cursor;
|
|
|
|
|
cursor.option = nir_cursor_before_block;
|
|
|
|
|
cursor.block = block;
|
|
|
|
|
return cursor;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_cursor
|
|
|
|
|
nir_after_block(nir_block *block)
|
|
|
|
|
{
|
|
|
|
|
nir_cursor cursor;
|
|
|
|
|
cursor.option = nir_cursor_after_block;
|
|
|
|
|
cursor.block = block;
|
|
|
|
|
return cursor;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_cursor
|
|
|
|
|
nir_before_instr(nir_instr *instr)
|
|
|
|
|
{
|
|
|
|
|
nir_cursor cursor;
|
|
|
|
|
cursor.option = nir_cursor_before_instr;
|
|
|
|
|
cursor.instr = instr;
|
|
|
|
|
return cursor;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_cursor
|
|
|
|
|
nir_after_instr(nir_instr *instr)
|
|
|
|
|
{
|
|
|
|
|
nir_cursor cursor;
|
|
|
|
|
cursor.option = nir_cursor_after_instr;
|
|
|
|
|
cursor.instr = instr;
|
|
|
|
|
return cursor;
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-04 10:38:21 -06:00
|
|
|
static inline nir_cursor
|
|
|
|
|
nir_before_block_after_phis(nir_block *block)
|
|
|
|
|
{
|
|
|
|
|
nir_phi_instr *last_phi = nir_block_last_phi_instr(block);
|
|
|
|
|
if (last_phi)
|
|
|
|
|
return nir_after_instr(&last_phi->instr);
|
|
|
|
|
else
|
|
|
|
|
return nir_before_block(block);
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-28 17:17:39 -07:00
|
|
|
static inline nir_cursor
|
|
|
|
|
nir_after_block_before_jump(nir_block *block)
|
|
|
|
|
{
|
|
|
|
|
nir_instr *last_instr = nir_block_last_instr(block);
|
|
|
|
|
if (last_instr && last_instr->type == nir_instr_type_jump) {
|
|
|
|
|
return nir_before_instr(last_instr);
|
|
|
|
|
} else {
|
|
|
|
|
return nir_after_block(block);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-29 10:24:43 -05:00
|
|
|
static inline nir_cursor
|
2023-04-06 15:35:15 -04:00
|
|
|
nir_before_src(nir_src *src)
|
2018-08-29 10:24:43 -05:00
|
|
|
{
|
2023-08-14 09:58:47 -04:00
|
|
|
if (nir_src_is_if(src)) {
|
2018-08-29 10:24:43 -05:00
|
|
|
nir_block *prev_block =
|
2023-08-14 09:58:47 -04:00
|
|
|
nir_cf_node_as_block(nir_cf_node_prev(&nir_src_parent_if(src)->cf_node));
|
2018-08-29 10:24:43 -05:00
|
|
|
return nir_after_block(prev_block);
|
2023-08-14 09:58:47 -04:00
|
|
|
} else if (nir_src_parent_instr(src)->type == nir_instr_type_phi) {
|
2018-08-29 10:24:43 -05:00
|
|
|
#ifndef NDEBUG
|
2023-08-14 09:58:47 -04:00
|
|
|
nir_phi_instr *cond_phi = nir_instr_as_phi(nir_src_parent_instr(src));
|
2018-08-29 10:24:43 -05:00
|
|
|
bool found = false;
|
|
|
|
|
nir_foreach_phi_src(phi_src, cond_phi) {
|
|
|
|
|
if (phi_src->src.ssa == src->ssa) {
|
|
|
|
|
found = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
assert(found);
|
|
|
|
|
#endif
|
2022-07-27 16:48:11 +01:00
|
|
|
/* The list_entry() macro is a generic container-of macro, it just happens
|
2018-08-29 10:24:43 -05:00
|
|
|
* to have a more specific name.
|
|
|
|
|
*/
|
2022-07-27 16:48:11 +01:00
|
|
|
nir_phi_src *phi_src = list_entry(src, nir_phi_src, src);
|
2018-08-29 10:24:43 -05:00
|
|
|
return nir_after_block_before_jump(phi_src->pred);
|
|
|
|
|
} else {
|
2023-08-14 09:58:47 -04:00
|
|
|
return nir_before_instr(nir_src_parent_instr(src));
|
2018-08-29 10:24:43 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-25 10:01:31 -07:00
|
|
|
static inline nir_cursor
|
|
|
|
|
nir_before_cf_node(nir_cf_node *node)
|
|
|
|
|
{
|
|
|
|
|
if (node->type == nir_cf_node_block)
|
|
|
|
|
return nir_before_block(nir_cf_node_as_block(node));
|
|
|
|
|
|
|
|
|
|
return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_cursor
|
|
|
|
|
nir_after_cf_node(nir_cf_node *node)
|
|
|
|
|
{
|
|
|
|
|
if (node->type == nir_cf_node_block)
|
|
|
|
|
return nir_after_block(nir_cf_node_as_block(node));
|
|
|
|
|
|
|
|
|
|
return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node)));
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-02 19:06:52 -04:00
|
|
|
static inline nir_cursor
|
|
|
|
|
nir_after_phis(nir_block *block)
|
|
|
|
|
{
|
|
|
|
|
nir_foreach_instr(instr, block) {
|
|
|
|
|
if (instr->type != nir_instr_type_phi)
|
|
|
|
|
return nir_before_instr(instr);
|
|
|
|
|
}
|
|
|
|
|
return nir_after_block(block);
|
|
|
|
|
}
|
|
|
|
|
|
2021-03-08 14:15:05 -08:00
|
|
|
static inline nir_cursor
|
|
|
|
|
nir_after_instr_and_phis(nir_instr *instr)
|
|
|
|
|
{
|
|
|
|
|
if (instr->type == nir_instr_type_phi)
|
|
|
|
|
return nir_after_phis(instr->block);
|
|
|
|
|
else
|
|
|
|
|
return nir_after_instr(instr);
|
|
|
|
|
}
|
|
|
|
|
|
2015-12-27 22:50:14 -08:00
|
|
|
static inline nir_cursor
|
|
|
|
|
nir_after_cf_node_and_phis(nir_cf_node *node)
|
|
|
|
|
{
|
|
|
|
|
if (node->type == nir_cf_node_block)
|
|
|
|
|
return nir_after_block(nir_cf_node_as_block(node));
|
|
|
|
|
|
|
|
|
|
nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node));
|
|
|
|
|
|
2016-09-02 19:06:52 -04:00
|
|
|
return nir_after_phis(block);
|
2015-12-27 22:50:14 -08:00
|
|
|
}
|
|
|
|
|
|
2015-08-25 10:01:31 -07:00
|
|
|
static inline nir_cursor
|
|
|
|
|
nir_before_cf_list(struct exec_list *cf_list)
|
|
|
|
|
{
|
|
|
|
|
nir_cf_node *first_node = exec_node_data(nir_cf_node,
|
|
|
|
|
exec_list_get_head(cf_list), node);
|
|
|
|
|
return nir_before_cf_node(first_node);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_cursor
|
|
|
|
|
nir_after_cf_list(struct exec_list *cf_list)
|
|
|
|
|
{
|
|
|
|
|
nir_cf_node *last_node = exec_node_data(nir_cf_node,
|
|
|
|
|
exec_list_get_tail(cf_list), node);
|
|
|
|
|
return nir_after_cf_node(last_node);
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-28 13:53:06 -04:00
|
|
|
static inline nir_cursor
|
|
|
|
|
nir_before_impl(nir_function_impl *impl)
|
|
|
|
|
{
|
|
|
|
|
return nir_before_cf_list(&impl->body);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_cursor
|
|
|
|
|
nir_after_impl(nir_function_impl *impl)
|
|
|
|
|
{
|
|
|
|
|
return nir_after_cf_list(&impl->body);
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-09 18:30:33 -07:00
|
|
|
/**
|
|
|
|
|
* Insert a NIR instruction at the given cursor.
|
|
|
|
|
*
|
|
|
|
|
* Note: This does not update the cursor.
|
|
|
|
|
*/
|
|
|
|
|
void nir_instr_insert(nir_cursor cursor, nir_instr *instr);
|
|
|
|
|
|
2018-07-04 09:10:28 -07:00
|
|
|
bool nir_instr_move(nir_cursor cursor, nir_instr *instr);
|
|
|
|
|
|
2015-08-09 18:30:33 -07:00
|
|
|
static inline void
|
|
|
|
|
nir_instr_insert_before(nir_instr *instr, nir_instr *before)
|
|
|
|
|
{
|
|
|
|
|
nir_instr_insert(nir_before_instr(instr), before);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
|
nir_instr_insert_after(nir_instr *instr, nir_instr *after)
|
|
|
|
|
{
|
|
|
|
|
nir_instr_insert(nir_after_instr(instr), after);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
|
nir_instr_insert_before_block(nir_block *block, nir_instr *before)
|
|
|
|
|
{
|
|
|
|
|
nir_instr_insert(nir_before_block(block), before);
|
|
|
|
|
}
|
2014-07-31 16:16:23 -07:00
|
|
|
|
2015-08-09 18:30:33 -07:00
|
|
|
static inline void
|
|
|
|
|
nir_instr_insert_after_block(nir_block *block, nir_instr *after)
|
|
|
|
|
{
|
|
|
|
|
nir_instr_insert(nir_after_block(block), after);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
|
nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before)
|
|
|
|
|
{
|
|
|
|
|
nir_instr_insert(nir_before_cf_node(node), before);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
|
nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after)
|
|
|
|
|
{
|
|
|
|
|
nir_instr_insert(nir_after_cf_node(node), after);
|
|
|
|
|
}
|
2014-07-31 16:16:23 -07:00
|
|
|
|
2015-08-09 18:30:33 -07:00
|
|
|
static inline void
|
|
|
|
|
nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before)
|
|
|
|
|
{
|
|
|
|
|
nir_instr_insert(nir_before_cf_list(list), before);
|
|
|
|
|
}
|
2014-07-31 16:16:23 -07:00
|
|
|
|
2015-08-09 18:30:33 -07:00
|
|
|
static inline void
|
|
|
|
|
nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after)
|
|
|
|
|
{
|
|
|
|
|
nir_instr_insert(nir_after_cf_list(list), after);
|
|
|
|
|
}
|
2014-07-31 16:16:23 -07:00
|
|
|
|
2018-03-16 09:52:04 -07:00
|
|
|
void nir_instr_remove_v(nir_instr *instr);
|
2021-07-07 10:07:46 -07:00
|
|
|
void nir_instr_free(nir_instr *instr);
|
2021-07-07 12:46:49 -07:00
|
|
|
void nir_instr_free_list(struct exec_list *list);
|
2018-03-16 09:52:04 -07:00
|
|
|
|
|
|
|
|
static inline nir_cursor
|
|
|
|
|
nir_instr_remove(nir_instr *instr)
|
|
|
|
|
{
|
|
|
|
|
nir_cursor cursor;
|
|
|
|
|
nir_instr *prev = nir_instr_prev(instr);
|
|
|
|
|
if (prev) {
|
|
|
|
|
cursor = nir_after_instr(prev);
|
|
|
|
|
} else {
|
|
|
|
|
cursor = nir_before_block(instr->block);
|
|
|
|
|
}
|
|
|
|
|
nir_instr_remove_v(instr);
|
|
|
|
|
return cursor;
|
|
|
|
|
}
|
2014-07-31 16:16:23 -07:00
|
|
|
|
2021-06-28 14:42:21 -07:00
|
|
|
nir_cursor nir_instr_free_and_dce(nir_instr *instr);
|
2021-06-25 16:53:55 -07:00
|
|
|
|
2015-08-25 10:01:31 -07:00
|
|
|
/** @} */
|
|
|
|
|
|
2023-08-15 12:05:54 -05:00
|
|
|
nir_def *nir_instr_def(nir_instr *instr);
|
2019-07-11 15:05:27 -05:00
|
|
|
|
2023-08-12 19:10:24 -05:00
|
|
|
typedef bool (*nir_foreach_def_cb)(nir_def *def, void *state);
|
2014-07-31 16:16:23 -07:00
|
|
|
typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state);
|
2021-01-18 14:43:15 +00:00
|
|
|
static inline bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state);
|
2020-05-18 16:49:29 -05:00
|
|
|
bool nir_foreach_phi_src_leaving_block(nir_block *instr,
|
|
|
|
|
nir_foreach_src_cb cb,
|
|
|
|
|
void *state);
|
2014-07-31 16:16:23 -07:00
|
|
|
|
2014-12-08 17:34:23 -08:00
|
|
|
nir_const_value *nir_src_as_const_value(nir_src src);
|
2018-06-05 19:19:39 -07:00
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define NIR_SRC_AS_(name, c_type, type_enum, cast_macro) \
|
|
|
|
|
static inline c_type * \
|
|
|
|
|
nir_src_as_##name(nir_src src) \
|
|
|
|
|
{ \
|
|
|
|
|
return src.ssa->parent_instr->type == type_enum \
|
|
|
|
|
? cast_macro(src.ssa->parent_instr) \
|
|
|
|
|
: NULL; \
|
|
|
|
|
}
|
2018-06-05 19:19:39 -07:00
|
|
|
|
|
|
|
|
NIR_SRC_AS_(alu_instr, nir_alu_instr, nir_instr_type_alu, nir_instr_as_alu)
|
2019-04-17 17:18:19 -05:00
|
|
|
NIR_SRC_AS_(intrinsic, nir_intrinsic_instr,
|
|
|
|
|
nir_instr_type_intrinsic, nir_instr_as_intrinsic)
|
2019-04-19 15:09:04 -05:00
|
|
|
NIR_SRC_AS_(deref, nir_deref_instr, nir_instr_type_deref, nir_instr_as_deref)
|
2024-05-19 17:29:21 +02:00
|
|
|
NIR_SRC_AS_(debug_info, nir_debug_info_instr, nir_instr_type_debug_info, nir_instr_as_debug_info)
|
|
|
|
|
|
|
|
|
|
const char *nir_src_as_string(nir_src src);
|
2018-06-05 19:19:39 -07:00
|
|
|
|
2022-02-11 11:18:51 +01:00
|
|
|
bool nir_src_is_always_uniform(nir_src src);
|
2014-12-12 12:52:11 -08:00
|
|
|
bool nir_srcs_equal(nir_src src1, nir_src src2);
|
2019-06-20 13:47:30 -05:00
|
|
|
bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2);
|
2024-08-27 09:21:41 +02:00
|
|
|
nir_block *nir_src_get_block(nir_src *src);
|
2021-01-29 19:33:19 -06:00
|
|
|
|
|
|
|
|
static inline void
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_src_rewrite(nir_src *src, nir_def *new_ssa)
|
2021-01-29 19:33:19 -06:00
|
|
|
{
|
2023-08-01 11:29:43 -04:00
|
|
|
assert(src->ssa);
|
2023-08-14 09:58:47 -04:00
|
|
|
assert(nir_src_is_if(src) ? (nir_src_parent_if(src) != NULL) : (nir_src_parent_instr(src) != NULL));
|
2021-01-29 19:33:19 -06:00
|
|
|
list_del(&src->use_link);
|
|
|
|
|
src->ssa = new_ssa;
|
|
|
|
|
list_addtail(&src->use_link, &new_ssa->uses);
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-17 16:16:10 -05:00
|
|
|
/** Initialize a nir_src
|
|
|
|
|
*
|
|
|
|
|
* This is almost never the helper you want to use. This helper assumes that
|
|
|
|
|
* the source is uninitialized garbage and blasts over it without doing any
|
|
|
|
|
* tear-down the existing source, including removing it from uses lists.
|
|
|
|
|
* Using this helper on a source that currently exists in any uses list will
|
|
|
|
|
* result in linked list corruption. It also assumes that the instruction is
|
|
|
|
|
* currently live in the IR and adds the source to the uses list for the given
|
|
|
|
|
* nir_def as part of setup.
|
|
|
|
|
*
|
|
|
|
|
* This is pretty much only useful for adding sources to extant instructions
|
|
|
|
|
* or manipulating parallel copy instructions as part of out-of-SSA.
|
|
|
|
|
*
|
|
|
|
|
* When in doubt, use nir_src_rewrite() instead.
|
|
|
|
|
*/
|
|
|
|
|
void nir_instr_init_src(nir_instr *instr, nir_src *src, nir_def *def);
|
|
|
|
|
|
2023-08-16 11:16:00 -05:00
|
|
|
/** Clear a nir_src
|
|
|
|
|
*
|
|
|
|
|
* This helper clears a nir_src by removing it from any uses lists and
|
|
|
|
|
* resetting its contents to NIR_SRC_INIT. This is typically used as a
|
|
|
|
|
* precursor to removing the source from the instruction by adjusting a
|
|
|
|
|
* num_srcs parameter somewhere or overwriting it with nir_instr_move_src().
|
|
|
|
|
*/
|
|
|
|
|
void nir_instr_clear_src(nir_instr *instr, nir_src *src);
|
|
|
|
|
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 10:16:27 -07:00
|
|
|
void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
|
2021-01-29 19:33:19 -06:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
void nir_def_init(nir_instr *instr, nir_def *def,
|
|
|
|
|
unsigned num_components, unsigned bit_size);
|
2017-08-22 14:08:32 -07:00
|
|
|
static inline void
|
2023-08-12 18:59:27 -05:00
|
|
|
nir_def_init_for_type(nir_instr *instr, nir_def *def,
|
|
|
|
|
const struct glsl_type *type)
|
2017-08-22 14:08:32 -07:00
|
|
|
{
|
|
|
|
|
assert(glsl_type_is_vector_or_scalar(type));
|
2023-08-12 18:59:27 -05:00
|
|
|
nir_def_init(instr, def, glsl_get_components(type),
|
2023-08-12 18:55:58 -05:00
|
|
|
glsl_get_bit_size(type));
|
2017-08-22 14:08:32 -07:00
|
|
|
}
|
2023-08-12 16:17:15 -04:00
|
|
|
void nir_def_rewrite_uses(nir_def *def, nir_def *new_ssa);
|
|
|
|
|
void nir_def_rewrite_uses_src(nir_def *def, nir_src new_src);
|
|
|
|
|
void nir_def_rewrite_uses_after(nir_def *def, nir_def *new_ssa,
|
|
|
|
|
nir_instr *after_me);
|
2014-11-04 10:40:48 -08:00
|
|
|
|
2024-06-20 11:57:37 -04:00
|
|
|
static inline void
|
|
|
|
|
nir_def_replace(nir_def *def, nir_def *new_ssa)
|
|
|
|
|
{
|
|
|
|
|
nir_def_rewrite_uses(def, new_ssa);
|
|
|
|
|
nir_instr_remove(def->parent_instr);
|
|
|
|
|
}
|
|
|
|
|
|
2021-08-19 14:33:02 +01:00
|
|
|
nir_component_mask_t nir_src_components_read(const nir_src *src);
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_component_mask_t nir_def_components_read(const nir_def *def);
|
2023-04-28 18:44:55 -05:00
|
|
|
bool nir_def_all_uses_are_fsat(const nir_def *def);
|
2024-10-25 15:39:30 +02:00
|
|
|
bool nir_def_all_uses_ignore_sign_bit(const nir_def *def);
|
2015-10-09 08:13:43 -07:00
|
|
|
|
2024-06-15 00:16:08 -04:00
|
|
|
static inline int
|
|
|
|
|
nir_def_last_component_read(nir_def *def)
|
|
|
|
|
{
|
|
|
|
|
return (int)util_last_bit(nir_def_components_read(def)) - 1;
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-02 16:00:53 +00:00
|
|
|
static inline bool
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def_is_unused(nir_def *ssa)
|
2021-02-02 16:00:53 +00:00
|
|
|
{
|
2023-04-06 13:19:31 -04:00
|
|
|
return list_is_empty(&ssa->uses);
|
2021-02-02 16:00:53 +00:00
|
|
|
}
|
|
|
|
|
|
2024-03-19 12:45:06 -05:00
|
|
|
/** Sorts unstructured blocks
|
|
|
|
|
*
|
|
|
|
|
* NIR requires that unstructured blocks be sorted in reverse post
|
|
|
|
|
* depth-first-search order. This is the standard ordering used in the
|
|
|
|
|
* compiler literature which guarantees dominance. In particular, reverse
|
|
|
|
|
* post-DFS order guarantees that dominators occur in the list before the
|
|
|
|
|
* blocks they dominate.
|
|
|
|
|
*
|
|
|
|
|
* NOTE: This function also implicitly deletes any unreachable blocks.
|
|
|
|
|
*/
|
|
|
|
|
void nir_sort_unstructured_blocks(nir_function_impl *impl);
|
|
|
|
|
|
2024-03-19 12:59:30 -05:00
|
|
|
/** Returns the next block
|
2020-08-11 14:13:36 -05:00
|
|
|
*
|
2024-03-19 12:59:30 -05:00
|
|
|
* For structured control-flow, this follows the same order as
|
|
|
|
|
* nir_block_cf_tree_next(). For unstructured control-flow the blocks are in
|
|
|
|
|
* reverse post-DFS order. (See nir_sort_unstructured_blocks() above.)
|
2020-08-11 14:13:36 -05:00
|
|
|
*/
|
|
|
|
|
nir_block *nir_block_unstructured_next(nir_block *block);
|
|
|
|
|
nir_block *nir_unstructured_start_block(nir_function_impl *impl);
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_foreach_block_unstructured(block, impl) \
|
2020-08-11 14:13:36 -05:00
|
|
|
for (nir_block *block = nir_unstructured_start_block(impl); block != NULL; \
|
|
|
|
|
block = nir_block_unstructured_next(block))
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_foreach_block_unstructured_safe(block, impl) \
|
2020-08-11 14:13:36 -05:00
|
|
|
for (nir_block *block = nir_unstructured_start_block(impl), \
|
2023-08-08 12:00:35 -05:00
|
|
|
*next = nir_block_unstructured_next(block); \
|
|
|
|
|
block != NULL; \
|
2020-08-11 14:13:36 -05:00
|
|
|
block = next, next = nir_block_unstructured_next(block))
|
|
|
|
|
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 02:11:44 -04:00
|
|
|
/*
|
|
|
|
|
* finds the next basic block in source-code order, returns NULL if there is
|
|
|
|
|
* none
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
nir_block *nir_block_cf_tree_next(nir_block *block);
|
|
|
|
|
|
|
|
|
|
/* Performs the opposite of nir_block_cf_tree_next() */
|
|
|
|
|
|
|
|
|
|
nir_block *nir_block_cf_tree_prev(nir_block *block);
|
|
|
|
|
|
|
|
|
|
/* Gets the first block in a CF node in source-code order */
|
|
|
|
|
|
|
|
|
|
nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node);
|
|
|
|
|
|
|
|
|
|
/* Gets the last block in a CF node in source-code order */
|
|
|
|
|
|
|
|
|
|
nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node);
|
|
|
|
|
|
|
|
|
|
/* Gets the next block after a CF node in source-code order */
|
|
|
|
|
|
|
|
|
|
nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node);
|
|
|
|
|
|
2023-09-20 16:31:05 +02:00
|
|
|
/* Gets the block before a CF node in source-code order */
|
|
|
|
|
|
|
|
|
|
nir_block *nir_cf_node_cf_tree_prev(nir_cf_node *node);
|
|
|
|
|
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 02:11:44 -04:00
|
|
|
/* Macros for loops that visit blocks in source-code order */
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_foreach_block(block, impl) \
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 02:11:44 -04:00
|
|
|
for (nir_block *block = nir_start_block(impl); block != NULL; \
|
|
|
|
|
block = nir_block_cf_tree_next(block))
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_foreach_block_safe(block, impl) \
|
|
|
|
|
for (nir_block *block = nir_start_block(impl), \
|
|
|
|
|
*next = nir_block_cf_tree_next(block); \
|
|
|
|
|
block != NULL; \
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 02:11:44 -04:00
|
|
|
block = next, next = nir_block_cf_tree_next(block))
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_foreach_block_reverse(block, impl) \
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 02:11:44 -04:00
|
|
|
for (nir_block *block = nir_impl_last_block(impl); block != NULL; \
|
|
|
|
|
block = nir_block_cf_tree_prev(block))
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_foreach_block_reverse_safe(block, impl) \
|
|
|
|
|
for (nir_block *block = nir_impl_last_block(impl), \
|
|
|
|
|
*prev = nir_block_cf_tree_prev(block); \
|
|
|
|
|
block != NULL; \
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 02:11:44 -04:00
|
|
|
block = prev, prev = nir_block_cf_tree_prev(block))
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_foreach_block_in_cf_node(block, node) \
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 02:11:44 -04:00
|
|
|
for (nir_block *block = nir_cf_node_cf_tree_first(node); \
|
2023-08-08 12:00:35 -05:00
|
|
|
block != nir_cf_node_cf_tree_next(node); \
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 02:11:44 -04:00
|
|
|
block = nir_block_cf_tree_next(block))
|
|
|
|
|
|
2024-05-25 08:09:43 -05:00
|
|
|
#define nir_foreach_block_in_cf_node_safe(block, node) \
|
|
|
|
|
for (nir_block *block = nir_cf_node_cf_tree_first(node), \
|
|
|
|
|
*next = nir_block_cf_tree_next(block); \
|
|
|
|
|
block != nir_cf_node_cf_tree_next(node); \
|
|
|
|
|
block = next, next = nir_block_cf_tree_next(block))
|
|
|
|
|
|
2023-09-20 16:32:07 +02:00
|
|
|
#define nir_foreach_block_in_cf_node_reverse(block, node) \
|
|
|
|
|
for (nir_block *block = nir_cf_node_cf_tree_last(node); \
|
|
|
|
|
block != nir_cf_node_cf_tree_prev(node); \
|
|
|
|
|
block = nir_block_cf_tree_prev(block))
|
|
|
|
|
|
2024-05-25 08:09:43 -05:00
|
|
|
#define nir_foreach_block_in_cf_node_reverse_safe(block, node) \
|
|
|
|
|
for (nir_block *block = nir_cf_node_cf_tree_last(node), \
|
|
|
|
|
*prev = nir_block_cf_tree_prev(block); \
|
|
|
|
|
block != nir_cf_node_cf_tree_prev(node); \
|
|
|
|
|
block = prev, prev = nir_block_cf_tree_prev(block))
|
|
|
|
|
|
2014-10-29 16:25:51 -07:00
|
|
|
/* If the following CF node is an if, this function returns that if.
|
|
|
|
|
* Otherwise, it returns NULL.
|
|
|
|
|
*/
|
2014-12-17 14:49:24 -08:00
|
|
|
nir_if *nir_block_get_following_if(nir_block *block);
|
2014-10-29 16:25:51 -07:00
|
|
|
|
2015-05-08 13:17:10 -04:00
|
|
|
nir_loop *nir_block_get_following_loop(nir_block *block);
|
|
|
|
|
|
2021-04-08 16:26:38 +01:00
|
|
|
nir_block **nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx);
|
|
|
|
|
|
2014-07-31 16:16:23 -07:00
|
|
|
void nir_index_ssa_defs(nir_function_impl *impl);
|
2015-09-08 16:43:51 -07:00
|
|
|
unsigned nir_index_instrs(nir_function_impl *impl);
|
2014-07-31 16:16:23 -07:00
|
|
|
|
|
|
|
|
void nir_index_blocks(nir_function_impl *impl);
|
|
|
|
|
|
2023-06-21 14:02:57 +02:00
|
|
|
void nir_shader_clear_pass_flags(nir_shader *shader);
|
|
|
|
|
|
2020-07-20 16:49:46 -05:00
|
|
|
unsigned nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes);
|
|
|
|
|
unsigned nir_function_impl_index_vars(nir_function_impl *impl);
|
2019-11-15 15:15:14 +00:00
|
|
|
|
2014-07-30 15:29:27 -07:00
|
|
|
void nir_print_shader(nir_shader *shader, FILE *fp);
|
2016-05-14 15:37:32 -04:00
|
|
|
void nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors);
|
2015-02-09 13:42:16 -08:00
|
|
|
void nir_print_instr(const nir_instr *instr, FILE *fp);
|
2018-12-19 12:13:46 -08:00
|
|
|
void nir_print_deref(const nir_deref_instr *deref, FILE *fp);
|
2021-06-16 11:13:54 -07:00
|
|
|
void nir_log_shader_annotated_tagged(enum mesa_log_level level, const char *tag, nir_shader *shader, struct hash_table *annotations);
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_log_shadere(s) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), NULL)
|
|
|
|
|
#define nir_log_shaderw(s) nir_log_shader_annotated_tagged(MESA_LOG_WARN, (MESA_LOG_TAG), (s), NULL)
|
|
|
|
|
#define nir_log_shaderi(s) nir_log_shader_annotated_tagged(MESA_LOG_INFO, (MESA_LOG_TAG), (s), NULL)
|
2021-06-16 11:13:54 -07:00
|
|
|
#define nir_log_shader_annotated(s, annotations) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), annotations)
|
2014-07-30 15:29:27 -07:00
|
|
|
|
2021-02-05 13:25:03 +02:00
|
|
|
char *nir_shader_as_str(nir_shader *nir, void *mem_ctx);
|
2021-06-16 11:13:54 -07:00
|
|
|
char *nir_shader_as_str_annotated(nir_shader *nir, struct hash_table *annotations, void *mem_ctx);
|
2021-10-05 15:32:33 -07:00
|
|
|
char *nir_instr_as_str(const nir_instr *instr, void *mem_ctx);
|
2021-02-05 13:25:03 +02:00
|
|
|
|
2024-08-25 14:17:24 +02:00
|
|
|
/** Adds debug information to the shader. The line numbers point to
|
|
|
|
|
* the corresponding lines in the printed NIR, starting first_line;
|
|
|
|
|
*/
|
|
|
|
|
char *nir_shader_gather_debug_info(nir_shader *shader, const char *filename, uint32_t first_line);
|
2024-04-06 10:05:41 +02:00
|
|
|
|
2020-05-18 10:39:43 -05:00
|
|
|
/** Shallow clone of a single instruction. */
|
|
|
|
|
nir_instr *nir_instr_clone(nir_shader *s, const nir_instr *orig);
|
|
|
|
|
|
2021-09-24 18:38:27 +02:00
|
|
|
/** Clone a single instruction, including a remap table to rewrite sources. */
|
|
|
|
|
nir_instr *nir_instr_clone_deep(nir_shader *s, const nir_instr *orig,
|
|
|
|
|
struct hash_table *remap_table);
|
|
|
|
|
|
2019-05-29 16:48:17 -07:00
|
|
|
/** Shallow clone of a single ALU instruction. */
|
|
|
|
|
nir_alu_instr *nir_alu_instr_clone(nir_shader *s, const nir_alu_instr *orig);
|
|
|
|
|
|
2016-03-23 14:57:57 -07:00
|
|
|
nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s);
|
2023-08-15 16:01:51 +10:00
|
|
|
nir_function *nir_function_clone(nir_shader *ns, const nir_function *fxn);
|
2019-03-04 15:32:36 -06:00
|
|
|
nir_function_impl *nir_function_impl_clone(nir_shader *shader,
|
|
|
|
|
const nir_function_impl *fi);
|
2024-05-29 11:15:07 +10:00
|
|
|
nir_function_impl *
|
|
|
|
|
nir_function_impl_clone_remap_globals(nir_shader *shader,
|
|
|
|
|
const nir_function_impl *fi,
|
|
|
|
|
struct hash_table *remap_table);
|
2015-12-30 18:44:19 -08:00
|
|
|
nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var);
|
2016-03-23 15:05:55 -07:00
|
|
|
nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader);
|
2015-11-11 08:31:29 -08:00
|
|
|
|
2019-06-04 17:48:33 -05:00
|
|
|
void nir_shader_replace(nir_shader *dest, nir_shader *src);
|
|
|
|
|
|
2019-06-04 17:50:22 -05:00
|
|
|
void nir_shader_serialize_deserialize(nir_shader *s);
|
2017-09-14 16:49:53 -07:00
|
|
|
|
2017-11-23 13:16:43 +00:00
|
|
|
#ifndef NDEBUG
|
2018-10-18 15:18:30 -05:00
|
|
|
void nir_validate_shader(nir_shader *shader, const char *when);
|
2020-06-01 15:39:31 -05:00
|
|
|
void nir_validate_ssa_dominance(nir_shader *shader, const char *when);
|
2015-11-03 00:31:22 -08:00
|
|
|
void nir_metadata_set_validation_flag(nir_shader *shader);
|
|
|
|
|
void nir_metadata_check_validation_flag(nir_shader *shader);
|
2015-11-18 16:33:41 -05:00
|
|
|
|
2019-01-17 13:06:04 -08:00
|
|
|
static inline bool
|
|
|
|
|
should_skip_nir(const char *name)
|
|
|
|
|
{
|
2020-11-24 23:34:59 +01:00
|
|
|
static const char *list = NULL;
|
|
|
|
|
if (!list) {
|
|
|
|
|
/* Comma separated list of names to skip. */
|
|
|
|
|
list = getenv("NIR_SKIP");
|
|
|
|
|
if (!list)
|
|
|
|
|
list = "";
|
|
|
|
|
}
|
2019-01-17 13:06:04 -08:00
|
|
|
|
2020-11-24 23:34:59 +01:00
|
|
|
if (!list[0])
|
2019-01-17 13:06:04 -08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return comma_separated_list_contains(list, name);
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-20 17:19:06 +02:00
|
|
|
static inline bool
|
2020-08-26 14:22:07 -07:00
|
|
|
should_print_nir(nir_shader *shader)
|
2017-05-20 17:19:06 +02:00
|
|
|
{
|
2023-03-03 11:25:04 +01:00
|
|
|
if ((shader->info.internal && !NIR_DEBUG(PRINT_INTERNAL)) ||
|
2021-11-17 16:10:52 +01:00
|
|
|
shader->info.stage < 0 ||
|
|
|
|
|
shader->info.stage > MESA_SHADER_KERNEL)
|
|
|
|
|
return false;
|
2017-05-20 17:19:06 +02:00
|
|
|
|
2021-11-17 16:10:52 +01:00
|
|
|
return unlikely(nir_debug_print_shader[shader->info.stage]);
|
2017-05-20 17:19:06 +02:00
|
|
|
}
|
2015-01-06 16:11:57 -08:00
|
|
|
#else
|
2023-08-08 12:00:35 -05:00
|
|
|
static inline void
|
|
|
|
|
nir_validate_shader(nir_shader *shader, const char *when)
|
|
|
|
|
{
|
|
|
|
|
(void)shader;
|
|
|
|
|
(void)when;
|
|
|
|
|
}
|
|
|
|
|
static inline void
|
|
|
|
|
nir_validate_ssa_dominance(nir_shader *shader, const char *when)
|
|
|
|
|
{
|
|
|
|
|
(void)shader;
|
|
|
|
|
(void)when;
|
|
|
|
|
}
|
|
|
|
|
static inline void
|
|
|
|
|
nir_metadata_set_validation_flag(nir_shader *shader)
|
|
|
|
|
{
|
|
|
|
|
(void)shader;
|
|
|
|
|
}
|
|
|
|
|
static inline void
|
|
|
|
|
nir_metadata_check_validation_flag(nir_shader *shader)
|
|
|
|
|
{
|
|
|
|
|
(void)shader;
|
|
|
|
|
}
|
|
|
|
|
static inline bool
|
|
|
|
|
should_skip_nir(UNUSED const char *pass_name)
|
|
|
|
|
{
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
static inline bool
|
|
|
|
|
should_print_nir(UNUSED nir_shader *shader)
|
|
|
|
|
{
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2017-11-23 13:16:43 +00:00
|
|
|
#endif /* NDEBUG */
|
2014-07-30 15:20:53 -07:00
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define _PASS(pass, nir, do_pass) \
|
|
|
|
|
do { \
|
|
|
|
|
if (should_skip_nir(#pass)) { \
|
|
|
|
|
printf("skipping %s\n", #pass); \
|
|
|
|
|
break; \
|
|
|
|
|
} \
|
|
|
|
|
do_pass if (NIR_DEBUG(CLONE)) \
|
|
|
|
|
{ \
|
2023-09-06 16:23:36 +10:00
|
|
|
nir_shader *_clone = nir_shader_clone(ralloc_parent(nir), nir);\
|
|
|
|
|
nir_shader_replace(nir, _clone); \
|
2023-08-08 12:00:35 -05:00
|
|
|
} \
|
|
|
|
|
if (NIR_DEBUG(SERIALIZE)) { \
|
|
|
|
|
nir_shader_serialize_deserialize(nir); \
|
|
|
|
|
} \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
#define NIR_PASS(progress, nir, pass, ...) _PASS(pass, nir, { \
|
|
|
|
|
nir_metadata_set_validation_flag(nir); \
|
|
|
|
|
if (should_print_nir(nir)) \
|
|
|
|
|
printf("%s\n", #pass); \
|
|
|
|
|
if (pass(nir, ##__VA_ARGS__)) { \
|
|
|
|
|
nir_validate_shader(nir, "after " #pass " in " __FILE__); \
|
|
|
|
|
UNUSED bool _; \
|
|
|
|
|
progress = true; \
|
|
|
|
|
if (should_print_nir(nir)) \
|
|
|
|
|
nir_print_shader(nir, stdout); \
|
|
|
|
|
nir_metadata_check_validation_flag(nir); \
|
|
|
|
|
} \
|
2023-08-08 11:53:39 -05:00
|
|
|
})
|
2015-11-18 16:33:41 -05:00
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define NIR_PASS_V(nir, pass, ...) _PASS(pass, nir, { \
|
|
|
|
|
if (should_print_nir(nir)) \
|
|
|
|
|
printf("%s\n", #pass); \
|
|
|
|
|
pass(nir, ##__VA_ARGS__); \
|
|
|
|
|
nir_validate_shader(nir, "after " #pass " in " __FILE__); \
|
|
|
|
|
if (should_print_nir(nir)) \
|
|
|
|
|
nir_print_shader(nir, stdout); \
|
2023-08-08 11:53:39 -05:00
|
|
|
})
|
2015-11-18 16:33:41 -05:00
|
|
|
|
nir: add helpers to skip idempotent passes
For example, in the loop:
while (more_late_algebraic) {
more_late_algebraic = false;
NIR_PASS(more_late_algebraic, nir, nir_opt_algebraic_late);
NIR_PASS(_, nir, nir_opt_constant_folding);
NIR_PASS(_, nir, nir_copy_prop);
NIR_PASS(_, nir, nir_opt_dce);
NIR_PASS(_, nir, nir_opt_cse);
}
if nir_opt_algebraic_late makes no progress, later passes might be
skippable depending on which ones made progress in the previous iteration.
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24197>
2023-07-17 17:49:04 +01:00
|
|
|
#define _NIR_LOOP_PASS(progress, idempotent, skip, nir, pass, ...) \
|
|
|
|
|
do { \
|
|
|
|
|
bool nir_loop_pass_progress = false; \
|
|
|
|
|
if (!_mesa_set_search(skip, (void (*)())&pass)) \
|
|
|
|
|
NIR_PASS(nir_loop_pass_progress, nir, pass, ##__VA_ARGS__); \
|
|
|
|
|
if (nir_loop_pass_progress) \
|
|
|
|
|
_mesa_set_clear(skip, NULL); \
|
|
|
|
|
if (idempotent || !nir_loop_pass_progress) \
|
|
|
|
|
_mesa_set_add(skip, (void (*)())&pass); \
|
|
|
|
|
UNUSED bool _ = false; \
|
|
|
|
|
progress |= nir_loop_pass_progress; \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
/* Helper to skip a pass if no different passes have made progress since it was
|
|
|
|
|
* previously run. Note that two passes are considered the same if they have
|
|
|
|
|
* the same function pointer, even if they used different options.
|
|
|
|
|
*
|
|
|
|
|
* The usage of this is mostly identical to NIR_PASS. "skip" is a "struct set *"
|
|
|
|
|
* (created by _mesa_pointer_set_create) which the macro uses to keep track of
|
|
|
|
|
* already run passes.
|
|
|
|
|
*
|
|
|
|
|
* Example:
|
|
|
|
|
* bool progress = true;
|
|
|
|
|
* struct set *skip = _mesa_pointer_set_create(NULL);
|
|
|
|
|
* while (progress) {
|
|
|
|
|
* progress = false;
|
|
|
|
|
* NIR_LOOP_PASS(progress, skip, nir, pass1);
|
|
|
|
|
* NIR_LOOP_PASS_NOT_IDEMPOTENT(progress, skip, nir, nir_opt_algebraic);
|
|
|
|
|
* NIR_LOOP_PASS(progress, skip, nir, pass2);
|
|
|
|
|
* ...
|
|
|
|
|
* }
|
|
|
|
|
* _mesa_set_destroy(skip, NULL);
|
|
|
|
|
*
|
|
|
|
|
* You shouldn't mix usage of this with the NIR_PASS set of helpers, without
|
|
|
|
|
* using a new "skip" in-between.
|
|
|
|
|
*/
|
|
|
|
|
#define NIR_LOOP_PASS(progress, skip, nir, pass, ...) \
|
|
|
|
|
_NIR_LOOP_PASS(progress, true, skip, nir, pass, ##__VA_ARGS__)
|
|
|
|
|
|
|
|
|
|
/* Like NIR_LOOP_PASS, but use this for passes which may make further progress
|
|
|
|
|
* when repeated.
|
|
|
|
|
*/
|
|
|
|
|
#define NIR_LOOP_PASS_NOT_IDEMPOTENT(progress, skip, nir, pass, ...) \
|
|
|
|
|
_NIR_LOOP_PASS(progress, false, skip, nir, pass, ##__VA_ARGS__)
|
|
|
|
|
|
2019-01-17 13:06:04 -08:00
|
|
|
#define NIR_SKIP(name) should_skip_nir(#name)
|
|
|
|
|
|
2020-08-30 15:07:23 +02:00
|
|
|
/** An instruction filtering callback with writemask
|
|
|
|
|
*
|
|
|
|
|
* Returns true if the instruction should be processed with the associated
|
|
|
|
|
* writemask and false otherwise.
|
|
|
|
|
*/
|
|
|
|
|
typedef bool (*nir_instr_writemask_filter_cb)(const nir_instr *,
|
|
|
|
|
unsigned writemask, const void *);
|
|
|
|
|
|
2019-07-11 13:00:42 -05:00
|
|
|
/** A simple instruction lowering callback
|
|
|
|
|
*
|
|
|
|
|
* Many instruction lowering passes can be written as a simple function which
|
|
|
|
|
* takes an instruction as its input and returns a sequence of instructions
|
|
|
|
|
* that implement the consumed instruction. This function type represents
|
|
|
|
|
* such a lowering function. When called, a function with this prototype
|
|
|
|
|
* should either return NULL indicating that no lowering needs to be done or
|
|
|
|
|
* emit a sequence of instructions using the provided builder (whose cursor
|
|
|
|
|
* will already be placed after the instruction to be lowered) and return the
|
2023-08-12 16:17:15 -04:00
|
|
|
* resulting nir_def.
|
2019-07-11 13:00:42 -05:00
|
|
|
*/
|
2023-08-12 16:17:15 -04:00
|
|
|
typedef nir_def *(*nir_lower_instr_cb)(struct nir_builder *,
|
|
|
|
|
nir_instr *, void *);
|
2019-07-11 13:00:42 -05:00
|
|
|
|
2019-07-16 10:52:25 -07:00
|
|
|
/**
|
|
|
|
|
* Special return value for nir_lower_instr_cb when some progress occurred
|
|
|
|
|
* (like changing an input to the instr) that didn't result in a replacement
|
|
|
|
|
* SSA def being generated.
|
|
|
|
|
*/
|
2023-08-12 16:17:15 -04:00
|
|
|
#define NIR_LOWER_INSTR_PROGRESS ((nir_def *)(uintptr_t)1)
|
2019-07-16 10:52:25 -07:00
|
|
|
|
2020-12-09 09:50:11 +01:00
|
|
|
/**
|
|
|
|
|
* Special return value for nir_lower_instr_cb when some progress occurred
|
|
|
|
|
* that should remove the current instruction that doesn't create an output
|
|
|
|
|
* (like a store)
|
|
|
|
|
*/
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
#define NIR_LOWER_INSTR_PROGRESS_REPLACE ((nir_def *)(uintptr_t)2)
|
2020-12-09 09:50:11 +01:00
|
|
|
|
2019-07-11 13:00:42 -05:00
|
|
|
/** Iterate over all the instructions in a nir_function_impl and lower them
|
|
|
|
|
* using the provided callbacks
|
|
|
|
|
*
|
|
|
|
|
* This function implements the guts of a standard lowering pass for you. It
|
|
|
|
|
* iterates over all of the instructions in a nir_function_impl and calls the
|
|
|
|
|
* filter callback on each one. If the filter callback returns true, it then
|
|
|
|
|
* calls the lowering call back on the instruction. (Splitting it this way
|
|
|
|
|
* allows us to avoid some save/restore work for instructions we know won't be
|
|
|
|
|
* lowered.) If the instruction is dead after the lowering is complete, it
|
|
|
|
|
* will be removed. If new instructions are added, the lowering callback will
|
|
|
|
|
* also be called on them in case multiple lowerings are required.
|
|
|
|
|
*
|
2021-06-28 10:51:06 -07:00
|
|
|
* If the callback indicates that the original instruction is replaced (either
|
|
|
|
|
* through a new SSA def or NIR_LOWER_INSTR_PROGRESS_REPLACE), then the
|
|
|
|
|
* instruction is removed along with any now-dead SSA defs it used.
|
|
|
|
|
*
|
2019-07-11 13:00:42 -05:00
|
|
|
* The metadata for the nir_function_impl will also be updated. If any blocks
|
|
|
|
|
* are added (they cannot be removed), dominance and block indices will be
|
|
|
|
|
* invalidated.
|
|
|
|
|
*/
|
|
|
|
|
bool nir_function_impl_lower_instructions(nir_function_impl *impl,
|
|
|
|
|
nir_instr_filter_cb filter,
|
|
|
|
|
nir_lower_instr_cb lower,
|
|
|
|
|
void *cb_data);
|
|
|
|
|
bool nir_shader_lower_instructions(nir_shader *shader,
|
|
|
|
|
nir_instr_filter_cb filter,
|
|
|
|
|
nir_lower_instr_cb lower,
|
|
|
|
|
void *cb_data);
|
|
|
|
|
|
2014-07-18 16:13:11 -07:00
|
|
|
void nir_calc_dominance_impl(nir_function_impl *impl);
|
|
|
|
|
void nir_calc_dominance(nir_shader *shader);
|
|
|
|
|
|
2015-02-06 12:06:04 -08:00
|
|
|
nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2);
|
2015-02-06 12:45:43 -08:00
|
|
|
bool nir_block_dominates(nir_block *parent, nir_block *child);
|
2019-09-02 12:53:16 -05:00
|
|
|
bool nir_block_is_unreachable(nir_block *block);
|
2015-02-06 12:06:04 -08:00
|
|
|
|
2014-07-18 16:13:11 -07:00
|
|
|
void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp);
|
|
|
|
|
void nir_dump_dom_tree(nir_shader *shader, FILE *fp);
|
|
|
|
|
|
|
|
|
|
void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp);
|
|
|
|
|
void nir_dump_dom_frontier(nir_shader *shader, FILE *fp);
|
|
|
|
|
|
|
|
|
|
void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
|
|
|
|
|
void nir_dump_cfg(nir_shader *shader, FILE *fp);
|
|
|
|
|
|
2020-10-02 16:10:38 +02:00
|
|
|
void nir_gs_count_vertices_and_primitives(const nir_shader *shader,
|
|
|
|
|
int *out_vtxcnt,
|
|
|
|
|
int *out_prmcnt,
|
2023-08-14 16:55:21 -04:00
|
|
|
int *out_decomposed_prmcnt,
|
2020-10-02 16:10:38 +02:00
|
|
|
unsigned num_streams);
|
2015-09-24 17:01:23 -07:00
|
|
|
|
2021-10-26 20:00:58 -04:00
|
|
|
typedef enum {
|
|
|
|
|
nir_group_all,
|
|
|
|
|
nir_group_same_resource_only,
|
|
|
|
|
} nir_load_grouping;
|
|
|
|
|
|
|
|
|
|
void nir_group_loads(nir_shader *shader, nir_load_grouping grouping,
|
|
|
|
|
unsigned max_distance);
|
|
|
|
|
|
2018-07-24 19:32:27 -07:00
|
|
|
bool nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes);
|
2018-07-24 12:33:46 -07:00
|
|
|
bool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes);
|
2015-09-17 12:33:36 -07:00
|
|
|
bool nir_split_var_copies(nir_shader *shader);
|
2018-03-21 16:48:35 -07:00
|
|
|
bool nir_split_per_member_structs(nir_shader *shader);
|
2018-07-24 10:08:06 -07:00
|
|
|
bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes);
|
2014-11-19 14:52:30 -08:00
|
|
|
|
2016-02-13 17:08:57 -08:00
|
|
|
bool nir_lower_returns_impl(nir_function_impl *impl);
|
|
|
|
|
bool nir_lower_returns(nir_shader *shader);
|
|
|
|
|
|
2019-03-04 15:32:36 -06:00
|
|
|
void nir_inline_function_impl(struct nir_builder *b,
|
|
|
|
|
const nir_function_impl *impl,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def **params,
|
2020-08-20 15:00:15 -05:00
|
|
|
struct hash_table *shader_var_remap);
|
2016-02-13 17:31:05 -08:00
|
|
|
bool nir_inline_functions(nir_shader *shader);
|
2020-10-27 07:28:04 +10:00
|
|
|
void nir_cleanup_functions(nir_shader *shader);
|
2023-09-04 11:41:03 +10:00
|
|
|
bool nir_link_shader_functions(nir_shader *shader,
|
|
|
|
|
const nir_shader *link_shader);
|
2024-11-19 00:10:47 -04:00
|
|
|
bool nir_lower_calls_to_builtins(nir_shader *s);
|
2016-02-13 17:31:05 -08:00
|
|
|
|
2020-09-29 17:34:28 -04:00
|
|
|
void nir_find_inlinable_uniforms(nir_shader *shader);
|
|
|
|
|
void nir_inline_uniforms(nir_shader *shader, unsigned num_uniforms,
|
|
|
|
|
const uint32_t *uniform_values,
|
|
|
|
|
const uint16_t *uniform_dw_offsets);
|
2023-02-06 12:17:45 -08:00
|
|
|
bool nir_collect_src_uniforms(const nir_src *src, int component,
|
|
|
|
|
uint32_t *uni_offsets, uint8_t *num_offsets,
|
|
|
|
|
unsigned max_num_bo, unsigned max_offset);
|
2023-02-06 12:32:48 -08:00
|
|
|
void nir_add_inlinable_uniforms(const nir_src *cond, nir_loop_info *info,
|
|
|
|
|
uint32_t *uni_offsets, uint8_t *num_offsets,
|
|
|
|
|
unsigned max_num_bo, unsigned max_offset);
|
2020-09-29 17:34:28 -04:00
|
|
|
|
2021-01-13 15:50:52 +00:00
|
|
|
bool nir_propagate_invariant(nir_shader *shader, bool invariant_prim);
|
2016-06-13 12:47:19 -07:00
|
|
|
|
2016-12-24 10:34:33 -08:00
|
|
|
void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader);
|
2018-03-16 00:20:57 -07:00
|
|
|
void nir_lower_deref_copy_instr(struct nir_builder *b,
|
|
|
|
|
nir_intrinsic_instr *copy);
|
2017-02-24 15:34:40 -08:00
|
|
|
bool nir_lower_var_copies(nir_shader *shader);
|
2015-01-14 15:19:49 -08:00
|
|
|
|
2020-09-25 13:02:25 -05:00
|
|
|
bool nir_opt_memcpy(nir_shader *shader);
|
2020-09-14 15:20:37 -05:00
|
|
|
bool nir_lower_memcpy(nir_shader *shader);
|
|
|
|
|
|
2018-03-15 15:37:50 -07:00
|
|
|
void nir_fixup_deref_modes(nir_shader *shader);
|
2023-11-24 13:49:10 +11:00
|
|
|
void nir_fixup_deref_types(nir_shader *shader);
|
2018-03-15 15:37:50 -07:00
|
|
|
|
2015-09-17 08:38:10 -07:00
|
|
|
bool nir_lower_global_vars_to_local(nir_shader *shader);
|
2024-11-25 07:56:57 -05:00
|
|
|
void nir_lower_constant_to_temp(nir_shader *shader);
|
2014-12-02 12:48:38 -08:00
|
|
|
|
2019-03-11 18:47:39 -05:00
|
|
|
typedef enum {
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_lower_direct_array_deref_of_vec_load = (1 << 0),
|
|
|
|
|
nir_lower_indirect_array_deref_of_vec_load = (1 << 1),
|
|
|
|
|
nir_lower_direct_array_deref_of_vec_store = (1 << 2),
|
|
|
|
|
nir_lower_indirect_array_deref_of_vec_store = (1 << 3),
|
2019-03-11 18:47:39 -05:00
|
|
|
} nir_lower_array_deref_of_vec_options;
|
|
|
|
|
|
|
|
|
|
bool nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes,
|
2024-06-21 16:26:37 +08:00
|
|
|
bool (*filter)(nir_variable *),
|
2019-03-11 18:47:39 -05:00
|
|
|
nir_lower_array_deref_of_vec_options options);
|
|
|
|
|
|
2020-07-14 13:55:19 -05:00
|
|
|
bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes,
|
|
|
|
|
uint32_t max_lower_array_len);
|
2016-02-13 17:45:37 -08:00
|
|
|
|
2022-05-12 13:51:31 -05:00
|
|
|
bool nir_lower_indirect_var_derefs(nir_shader *shader,
|
|
|
|
|
const struct set *vars);
|
|
|
|
|
|
2023-07-20 08:14:09 -04:00
|
|
|
bool nir_lower_locals_to_regs(nir_shader *shader, uint8_t bool_bitsize);
|
2014-12-01 20:29:35 -08:00
|
|
|
|
2024-01-10 09:41:02 -04:00
|
|
|
bool nir_lower_io_to_temporaries(nir_shader *shader,
|
2016-08-24 19:09:57 -07:00
|
|
|
nir_function_impl *entrypoint,
|
2016-03-25 15:10:50 -04:00
|
|
|
bool outputs, bool inputs);
|
2016-03-25 13:52:26 -04:00
|
|
|
|
2016-12-02 11:36:42 -08:00
|
|
|
bool nir_lower_vars_to_scratch(nir_shader *shader,
|
|
|
|
|
nir_variable_mode modes,
|
|
|
|
|
int size_threshold,
|
2024-07-14 08:59:27 +02:00
|
|
|
glsl_type_size_align_func variable_size_align,
|
|
|
|
|
glsl_type_size_align_func scratch_layout_size_align);
|
2016-12-02 11:36:42 -08:00
|
|
|
|
2024-12-09 17:05:54 -05:00
|
|
|
bool nir_lower_scratch_to_var(nir_shader *nir);
|
|
|
|
|
|
2019-12-15 18:03:49 +01:00
|
|
|
void nir_lower_clip_halfz(nir_shader *shader);
|
|
|
|
|
|
2016-03-25 10:23:25 -07:00
|
|
|
void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
|
2015-08-28 17:09:02 -07:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
void nir_gather_types(nir_function_impl *impl,
|
|
|
|
|
BITSET_WORD *float_types,
|
|
|
|
|
BITSET_WORD *int_types);
|
2019-01-05 09:13:44 -06:00
|
|
|
|
2024-09-29 21:18:41 -04:00
|
|
|
typedef struct {
|
|
|
|
|
/* Whether all invocations write tess level outputs.
|
|
|
|
|
*
|
|
|
|
|
* This is useful when a pass wants to read tess level values at the end
|
|
|
|
|
* of the shader. If this is true, the pass doesn't have to insert a barrier
|
|
|
|
|
* and use output loads, it can just use the SSA defs that are being stored
|
|
|
|
|
* (or phis thereof) to get the tess level output values.
|
|
|
|
|
*/
|
|
|
|
|
bool all_invocations_define_tess_levels;
|
|
|
|
|
|
|
|
|
|
/* Whether any of the outer tess level components is effectively 0, meaning
|
|
|
|
|
* that the shader discards the patch. NaNs and negative values are included
|
|
|
|
|
* in this. If the patch is discarded, inner tess levels have no effect.
|
|
|
|
|
*/
|
|
|
|
|
bool all_tess_levels_are_effectively_zero;
|
|
|
|
|
|
|
|
|
|
/* Whether all tess levels are effectively 1, meaning that the tessellator
|
|
|
|
|
* behaves as if they were 1. There is a range of values that lead to that
|
|
|
|
|
* behavior depending on the tessellation spacing.
|
|
|
|
|
*/
|
|
|
|
|
bool all_tess_levels_are_effectively_one;
|
|
|
|
|
|
|
|
|
|
/* Whether the shader uses a barrier synchronizing TCS output stores.
|
|
|
|
|
* For example, passes that write an output at the beginning of the shader
|
|
|
|
|
* and load it at the end can use this to determine whether they have to
|
|
|
|
|
* insert a barrier or whether the shader already contains a barrier.
|
|
|
|
|
*/
|
|
|
|
|
bool always_executes_barrier;
|
|
|
|
|
|
|
|
|
|
/* Whether outer tess levels <= 0 are written anywhere in the shader. */
|
|
|
|
|
bool discards_patches;
|
|
|
|
|
} nir_tcs_info;
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
nir_gather_tcs_info(const nir_shader *nir, nir_tcs_info *info,
|
|
|
|
|
enum tess_primitive_mode prim,
|
|
|
|
|
enum gl_tess_spacing spacing);
|
|
|
|
|
|
2020-07-20 14:51:04 -05:00
|
|
|
void nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
|
|
|
|
|
unsigned *size,
|
2019-03-29 12:39:48 +11:00
|
|
|
int (*type_size)(const struct glsl_type *, bool));
|
2014-12-01 22:01:05 -08:00
|
|
|
|
2017-09-07 23:27:59 +10:00
|
|
|
/* Some helpers to do very simple linking */
|
|
|
|
|
bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer);
|
2020-05-20 10:09:05 -05:00
|
|
|
bool nir_remove_unused_io_vars(nir_shader *shader, nir_variable_mode mode,
|
2018-09-26 09:13:13 -07:00
|
|
|
uint64_t *used_by_other_stage,
|
|
|
|
|
uint64_t *used_by_other_stage_patches);
|
2017-10-18 19:40:06 +11:00
|
|
|
void nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
|
|
|
|
|
bool default_to_smooth_interp);
|
2018-10-23 21:56:29 +11:00
|
|
|
void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer);
|
2018-12-11 11:49:00 +11:00
|
|
|
bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer);
|
2020-08-03 15:21:16 -04:00
|
|
|
void nir_link_varying_precision(nir_shader *producer, nir_shader *consumer);
|
2023-04-05 06:40:59 -04:00
|
|
|
nir_variable *nir_clone_uniform_variable(nir_shader *nir,
|
2023-04-06 23:34:47 -04:00
|
|
|
nir_variable *uniform, bool spirv);
|
2023-04-05 06:40:59 -04:00
|
|
|
nir_deref_instr *nir_clone_deref_instr(struct nir_builder *b,
|
|
|
|
|
nir_variable *var,
|
|
|
|
|
nir_deref_instr *deref);
|
2017-09-07 23:27:59 +10:00
|
|
|
|
nir: add nir_opt_varyings, new pass optimizing and compacting varyings
Highlights:
- all shader stages and all input/output types are handled, including
inputs and outputs with multiple vertices
- the optimizations performed are: unused input/output removal, constant
and uniform propagation, output deduplication, inter-shader code motion,
and compaction
- constant and uniform propagation and output deduplication work even
if a shader contains multiple stores of the same output, e.g. in GS
- the same optimizations are also performed between output stores and
output loads (for TCS)
- FS inputs are packed agressively. Only flat, interp FP32, and interp
FP16 can't be in the same vec4. Also, if an output value is
non-divergent within a primitive, the corresponding FS input is
opportunistically promoted to flat.
The big comment at the beginning of nir_opt_varyings.c has a detailed
explanation, which is the same as:
https://gitlab.freedesktop.org/mesa/mesa/-/issues/8841
dEQP and GLCTS have incorrect tests that fail with this, see:
https://gitlab.freedesktop.org/mesa/mesa/-/issues/10361
Acked-by: Timothy Arceri <tarceri@itsqueeze.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26819>
2023-04-06 04:21:00 -04:00
|
|
|
|
|
|
|
|
/* Return status from nir_opt_varyings. */
|
|
|
|
|
typedef enum {
|
|
|
|
|
/* Whether the IR changed such that NIR optimizations should be run, such
|
|
|
|
|
* as due to removal of loads and stores. IO semantic changes such as
|
|
|
|
|
* compaction don't count as IR changes because they don't affect NIR
|
|
|
|
|
* optimizations.
|
|
|
|
|
*/
|
|
|
|
|
nir_progress_producer = BITFIELD_BIT(0),
|
|
|
|
|
nir_progress_consumer = BITFIELD_BIT(1),
|
|
|
|
|
} nir_opt_varyings_progress;
|
|
|
|
|
|
|
|
|
|
nir_opt_varyings_progress
|
|
|
|
|
nir_opt_varyings(nir_shader *producer, nir_shader *consumer, bool spirv,
|
|
|
|
|
unsigned max_uniform_components, unsigned max_ubos_per_stage);
|
|
|
|
|
|
2023-03-12 03:16:18 -04:00
|
|
|
bool nir_slot_is_sysval_output(gl_varying_slot slot,
|
|
|
|
|
gl_shader_stage next_shader);
|
2022-01-02 19:46:45 -05:00
|
|
|
bool nir_slot_is_varying(gl_varying_slot slot);
|
2023-03-12 03:16:18 -04:00
|
|
|
bool nir_slot_is_sysval_output_and_varying(gl_varying_slot slot,
|
|
|
|
|
gl_shader_stage next_shader);
|
2023-03-12 03:18:38 -04:00
|
|
|
bool nir_remove_varying(nir_intrinsic_instr *intr, gl_shader_stage next_shader);
|
2023-11-18 22:37:27 -05:00
|
|
|
bool nir_remove_sysval_output(nir_intrinsic_instr *intr);
|
2022-01-02 19:46:45 -05:00
|
|
|
|
2019-09-27 10:15:02 -07:00
|
|
|
bool nir_lower_amul(nir_shader *shader,
|
|
|
|
|
int (*type_size)(const struct glsl_type *, bool));
|
2019-05-10 10:18:12 +02:00
|
|
|
|
2020-08-18 11:38:41 -07:00
|
|
|
bool nir_lower_ubo_vec4(nir_shader *shader);
|
|
|
|
|
|
2023-09-01 18:56:10 -04:00
|
|
|
void nir_sort_variables_by_location(nir_shader *shader, nir_variable_mode mode);
|
2020-07-20 14:42:53 -05:00
|
|
|
void nir_assign_io_var_locations(nir_shader *shader,
|
|
|
|
|
nir_variable_mode mode,
|
2019-05-10 10:18:12 +02:00
|
|
|
unsigned *size,
|
|
|
|
|
gl_shader_stage stage);
|
|
|
|
|
|
2024-12-05 19:59:02 +01:00
|
|
|
bool nir_opt_clip_cull_const(nir_shader *shader);
|
|
|
|
|
|
2016-09-14 10:29:38 -07:00
|
|
|
typedef enum {
|
2019-07-19 17:10:07 -05:00
|
|
|
/* If set, this causes all 64-bit IO operations to be lowered on-the-fly
|
|
|
|
|
* to 32-bit operations. This is only valid for nir_var_shader_in/out
|
|
|
|
|
* modes.
|
2023-03-13 00:18:47 -04:00
|
|
|
*
|
|
|
|
|
* Note that this destroys dual-slot information i.e. whether an input
|
|
|
|
|
* occupies the low or high half of dvec4. Instead, it adds an offset of 1
|
|
|
|
|
* to the load (which is ambiguous) and expects driver locations of inputs
|
|
|
|
|
* to be final, which prevents any further optimizations.
|
|
|
|
|
*
|
|
|
|
|
* TODO: remove this in favor of nir_lower_io_lower_64bit_to_32_new.
|
2019-07-19 17:10:07 -05:00
|
|
|
*/
|
|
|
|
|
nir_lower_io_lower_64bit_to_32 = (1 << 0),
|
2023-03-13 00:18:47 -04:00
|
|
|
|
2023-08-04 14:59:14 -04:00
|
|
|
/* If set, this causes the subset of 64-bit IO operations involving floats to be lowered on-the-fly
|
|
|
|
|
* to 32-bit operations. This is only valid for nir_var_shader_in/out
|
|
|
|
|
* modes.
|
|
|
|
|
*/
|
|
|
|
|
nir_lower_io_lower_64bit_float_to_32 = (1 << 1),
|
2023-03-13 00:18:47 -04:00
|
|
|
|
|
|
|
|
/* This causes all 64-bit IO operations to be lowered to 32-bit operations.
|
|
|
|
|
* This is only valid for nir_var_shader_in/out modes.
|
|
|
|
|
*
|
|
|
|
|
* Only VS inputs: Dual slot information is preserved as nir_io_semantics::
|
|
|
|
|
* high_dvec2 and gathered into shader_info::dual_slot_inputs, so that
|
|
|
|
|
* the shader can be arbitrarily optimized and the low or high half of
|
|
|
|
|
* dvec4 can be DCE'd independently without affecting the other half.
|
|
|
|
|
*/
|
|
|
|
|
nir_lower_io_lower_64bit_to_32_new = (1 << 2),
|
2024-11-18 12:43:22 -05:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Should nir_lower_io() create load_interpolated_input intrinsics?
|
|
|
|
|
*
|
|
|
|
|
* If not, it generates regular load_input intrinsics and interpolation
|
|
|
|
|
* information must be inferred from the list of input nir_variables.
|
|
|
|
|
*/
|
|
|
|
|
nir_lower_io_use_interpolated_input_intrinsics = (1 << 3),
|
2016-09-14 10:29:38 -07:00
|
|
|
} nir_lower_io_options;
|
2017-03-09 11:01:22 -08:00
|
|
|
bool nir_lower_io(nir_shader *shader,
|
2016-04-11 13:43:27 -07:00
|
|
|
nir_variable_mode modes,
|
2019-03-29 12:39:48 +11:00
|
|
|
int (*type_size)(const struct glsl_type *, bool),
|
2016-09-14 10:29:38 -07:00
|
|
|
nir_lower_io_options);
|
2018-11-27 21:31:42 -06:00
|
|
|
|
2020-09-28 13:13:49 +02:00
|
|
|
bool nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes);
|
2023-03-09 21:30:41 -05:00
|
|
|
bool nir_lower_color_inputs(nir_shader *nir);
|
2023-05-04 15:45:49 -04:00
|
|
|
void nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs);
|
2022-05-30 11:43:03 -04:00
|
|
|
bool nir_io_add_intrinsic_xfb_info(nir_shader *nir);
|
|
|
|
|
|
2019-04-09 20:18:11 +01:00
|
|
|
bool
|
|
|
|
|
nir_lower_vars_to_explicit_types(nir_shader *shader,
|
|
|
|
|
nir_variable_mode modes,
|
|
|
|
|
glsl_type_size_align_func type_info);
|
2020-11-03 11:33:19 -06:00
|
|
|
void
|
|
|
|
|
nir_gather_explicit_io_initializers(nir_shader *shader,
|
|
|
|
|
void *dst, size_t dst_size,
|
|
|
|
|
nir_variable_mode mode);
|
2019-04-09 20:18:11 +01:00
|
|
|
|
2020-09-25 17:43:33 -05:00
|
|
|
bool nir_lower_vec3_to_vec4(nir_shader *shader, nir_variable_mode modes);
|
|
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
typedef enum {
|
2019-01-07 17:17:46 -06:00
|
|
|
/**
|
|
|
|
|
* An address format which is a simple 32-bit global GPU address.
|
|
|
|
|
*/
|
|
|
|
|
nir_address_format_32bit_global,
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* An address format which is a simple 64-bit global GPU address.
|
|
|
|
|
*/
|
|
|
|
|
nir_address_format_64bit_global,
|
|
|
|
|
|
2022-06-22 11:28:53 +02:00
|
|
|
/**
|
|
|
|
|
* An address format which is a 64-bit global GPU address encoded as a
|
|
|
|
|
* 2x32-bit vector.
|
|
|
|
|
*/
|
|
|
|
|
nir_address_format_2x32bit_global,
|
|
|
|
|
|
2021-01-20 14:27:32 -06:00
|
|
|
/**
|
|
|
|
|
* An address format which is a 64-bit global base address and a 32-bit
|
|
|
|
|
* offset.
|
|
|
|
|
*
|
2023-02-21 07:37:28 -06:00
|
|
|
* This is identical to 64bit_bounded_global except that bounds checking
|
|
|
|
|
* is not applied when lowering to global access. Even though the size is
|
|
|
|
|
* never used for an actual bounds check, it needs to be valid so we can
|
|
|
|
|
* lower deref_buffer_array_length properly.
|
2021-01-20 14:27:32 -06:00
|
|
|
*/
|
|
|
|
|
nir_address_format_64bit_global_32bit_offset,
|
|
|
|
|
|
2019-01-09 14:56:02 -06:00
|
|
|
/**
|
|
|
|
|
* An address format which is a bounds-checked 64-bit global GPU address.
|
|
|
|
|
*
|
|
|
|
|
* The address is comprised as a 32-bit vec4 where .xy are a uint64_t base
|
|
|
|
|
* address stored with the low bits in .x and high bits in .y, .z is a
|
|
|
|
|
* size, and .w is an offset. When the final I/O operation is lowered, .w
|
|
|
|
|
* is checked against .z and the operation is predicated on the result.
|
|
|
|
|
*/
|
|
|
|
|
nir_address_format_64bit_bounded_global,
|
|
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
/**
|
|
|
|
|
* An address format which is comprised of a vec2 where the first
|
2019-03-09 10:10:37 -06:00
|
|
|
* component is a buffer index and the second is an offset.
|
2018-11-27 21:31:42 -06:00
|
|
|
*/
|
2019-03-09 10:10:37 -06:00
|
|
|
nir_address_format_32bit_index_offset,
|
2019-05-01 13:24:45 -07:00
|
|
|
|
2020-05-25 08:53:00 -07:00
|
|
|
/**
|
|
|
|
|
* An address format which is a 64-bit value, where the high 32 bits
|
|
|
|
|
* are a buffer index, and the low 32 bits are an offset.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_address_format_32bit_index_offset_pack64,
|
2020-05-25 08:53:00 -07:00
|
|
|
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
/**
|
|
|
|
|
* An address format which is comprised of a vec3 where the first two
|
|
|
|
|
* components specify the buffer and the third is an offset.
|
|
|
|
|
*/
|
|
|
|
|
nir_address_format_vec2_index_32bit_offset,
|
|
|
|
|
|
2020-08-15 00:57:14 -05:00
|
|
|
/**
|
|
|
|
|
* An address format which represents generic pointers with a 62-bit
|
|
|
|
|
* pointer and a 2-bit enum in the top two bits. The top two bits have
|
|
|
|
|
* the following meanings:
|
|
|
|
|
*
|
|
|
|
|
* - 0x0: Global memory
|
|
|
|
|
* - 0x1: Shared memory
|
|
|
|
|
* - 0x2: Scratch memory
|
|
|
|
|
* - 0x3: Global memory
|
|
|
|
|
*
|
|
|
|
|
* The redundancy between 0x0 and 0x3 is because of Intel sign-extension of
|
|
|
|
|
* addresses. Valid global memory addresses may naturally have either 0 or
|
|
|
|
|
* ~0 as their high bits.
|
|
|
|
|
*
|
|
|
|
|
* Shared and scratch pointers are represented as 32-bit offsets with the
|
|
|
|
|
* top 32 bits only being used for the enum. This allows us to avoid
|
|
|
|
|
* 64-bit address calculations in a bunch of cases.
|
|
|
|
|
*/
|
|
|
|
|
nir_address_format_62bit_generic,
|
|
|
|
|
|
2019-05-03 14:34:55 -07:00
|
|
|
/**
|
|
|
|
|
* An address format which is a simple 32-bit offset.
|
|
|
|
|
*/
|
|
|
|
|
nir_address_format_32bit_offset,
|
|
|
|
|
|
2020-05-25 08:57:14 -07:00
|
|
|
/**
|
|
|
|
|
* An address format which is a simple 32-bit offset cast to 64-bit.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_address_format_32bit_offset_as_64bit,
|
2020-05-25 08:57:14 -07:00
|
|
|
|
2019-05-01 13:24:45 -07:00
|
|
|
/**
|
|
|
|
|
* An address format representing a purely logical addressing model. In
|
|
|
|
|
* this model, all deref chains must be complete from the dereference
|
|
|
|
|
* operation to the variable. Cast derefs are not allowed. These
|
|
|
|
|
* addresses will be 32-bit scalars but the format is immaterial because
|
|
|
|
|
* you can always chase the chain.
|
|
|
|
|
*/
|
|
|
|
|
nir_address_format_logical,
|
2018-11-27 21:31:42 -06:00
|
|
|
} nir_address_format;
|
2019-01-07 18:00:22 -06:00
|
|
|
|
2021-11-22 11:11:16 -08:00
|
|
|
unsigned
|
|
|
|
|
nir_address_format_bit_size(nir_address_format addr_format);
|
2019-04-18 12:08:34 -05:00
|
|
|
|
2021-11-22 11:11:16 -08:00
|
|
|
unsigned
|
|
|
|
|
nir_address_format_num_components(nir_address_format addr_format);
|
2019-04-18 12:08:34 -05:00
|
|
|
|
|
|
|
|
static inline const struct glsl_type *
|
|
|
|
|
nir_address_format_to_glsl_type(nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
unsigned bit_size = nir_address_format_bit_size(addr_format);
|
|
|
|
|
assert(bit_size == 32 || bit_size == 64);
|
|
|
|
|
return glsl_vector_type(bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64,
|
|
|
|
|
nir_address_format_num_components(addr_format));
|
|
|
|
|
}
|
|
|
|
|
|
2019-05-01 14:44:15 -07:00
|
|
|
const nir_const_value *nir_address_format_null_value(nir_address_format addr_format);
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *nir_build_addr_iadd(struct nir_builder *b, nir_def *addr,
|
|
|
|
|
nir_address_format addr_format,
|
|
|
|
|
nir_variable_mode modes,
|
|
|
|
|
nir_def *offset);
|
|
|
|
|
|
|
|
|
|
nir_def *nir_build_addr_iadd_imm(struct nir_builder *b, nir_def *addr,
|
2023-01-09 21:15:08 +02:00
|
|
|
nir_address_format addr_format,
|
|
|
|
|
nir_variable_mode modes,
|
2023-08-12 16:17:15 -04:00
|
|
|
int64_t offset);
|
2023-01-09 21:15:08 +02:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *nir_build_addr_ieq(struct nir_builder *b, nir_def *addr0, nir_def *addr1,
|
|
|
|
|
nir_address_format addr_format);
|
2023-01-09 21:15:08 +02:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *nir_build_addr_isub(struct nir_builder *b, nir_def *addr0, nir_def *addr1,
|
|
|
|
|
nir_address_format addr_format);
|
2019-05-16 15:11:07 -07:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *nir_explicit_io_address_from_deref(struct nir_builder *b,
|
|
|
|
|
nir_deref_instr *deref,
|
|
|
|
|
nir_def *base_addr,
|
|
|
|
|
nir_address_format addr_format);
|
2020-08-24 10:48:51 -05:00
|
|
|
|
|
|
|
|
bool nir_get_explicit_deref_align(nir_deref_instr *deref,
|
|
|
|
|
bool default_to_type_align,
|
|
|
|
|
uint32_t *align_mul,
|
|
|
|
|
uint32_t *align_offset);
|
|
|
|
|
|
2019-01-07 18:00:22 -06:00
|
|
|
void nir_lower_explicit_io_instr(struct nir_builder *b,
|
|
|
|
|
nir_intrinsic_instr *io_instr,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *addr,
|
2019-01-07 18:00:22 -06:00
|
|
|
nir_address_format addr_format);
|
|
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
bool nir_lower_explicit_io(nir_shader *shader,
|
|
|
|
|
nir_variable_mode modes,
|
|
|
|
|
nir_address_format);
|
|
|
|
|
|
2024-10-30 14:41:25 +00:00
|
|
|
typedef enum {
|
|
|
|
|
/* Use open-coded funnel shifts for each component. */
|
|
|
|
|
nir_mem_access_shift_method_scalar,
|
|
|
|
|
/* Prefer to use 64-bit shifts to do the same with less instructions. Useful
|
|
|
|
|
* if 64-bit shifts are cheap.
|
|
|
|
|
*/
|
|
|
|
|
nir_mem_access_shift_method_shift64,
|
|
|
|
|
/* If nir_op_alignbyte_amd can be used, this is the best option with just a
|
|
|
|
|
* single nir_op_alignbyte_amd for each 32-bit components.
|
|
|
|
|
*/
|
|
|
|
|
nir_mem_access_shift_method_bytealign_amd,
|
|
|
|
|
} nir_mem_access_shift_method;
|
|
|
|
|
|
2023-02-09 18:01:16 -06:00
|
|
|
typedef struct {
|
|
|
|
|
uint8_t num_components;
|
|
|
|
|
uint8_t bit_size;
|
2023-03-01 09:10:20 -06:00
|
|
|
uint16_t align;
|
2024-10-30 14:41:25 +00:00
|
|
|
/* If a load's alignment is increased, this specifies how the data should be
|
|
|
|
|
* shifted before converting to the original bit size.
|
|
|
|
|
*/
|
|
|
|
|
nir_mem_access_shift_method shift;
|
2023-02-09 18:01:16 -06:00
|
|
|
} nir_mem_access_size_align;
|
|
|
|
|
|
2023-07-29 11:37:16 -05:00
|
|
|
/* clang-format off */
|
2023-02-09 18:01:16 -06:00
|
|
|
typedef nir_mem_access_size_align
|
|
|
|
|
(*nir_lower_mem_access_bit_sizes_cb)(nir_intrinsic_op intrin,
|
|
|
|
|
uint8_t bytes,
|
2023-05-22 09:49:23 -07:00
|
|
|
uint8_t bit_size,
|
2023-02-09 18:01:16 -06:00
|
|
|
uint32_t align_mul,
|
|
|
|
|
uint32_t align_offset,
|
|
|
|
|
bool offset_is_const,
|
2024-10-22 12:21:09 +01:00
|
|
|
enum gl_access_qualifier,
|
2023-02-09 18:01:16 -06:00
|
|
|
const void *cb_data);
|
2023-07-29 11:37:16 -05:00
|
|
|
/* clang-format on */
|
2023-02-09 18:01:16 -06:00
|
|
|
|
2023-06-12 14:26:24 -07:00
|
|
|
typedef struct {
|
|
|
|
|
nir_lower_mem_access_bit_sizes_cb callback;
|
|
|
|
|
nir_variable_mode modes;
|
2023-05-22 15:09:30 -07:00
|
|
|
bool may_lower_unaligned_stores_to_atomics;
|
2023-06-12 14:26:24 -07:00
|
|
|
void *cb_data;
|
|
|
|
|
} nir_lower_mem_access_bit_sizes_options;
|
|
|
|
|
|
2023-02-09 18:01:16 -06:00
|
|
|
bool nir_lower_mem_access_bit_sizes(nir_shader *shader,
|
2023-06-12 14:26:24 -07:00
|
|
|
const nir_lower_mem_access_bit_sizes_options *options);
|
2023-02-09 18:01:16 -06:00
|
|
|
|
2023-06-27 16:29:55 -04:00
|
|
|
typedef struct {
|
|
|
|
|
/* Lower load_ubo to be robust. Out-of-bounds loads will return UNDEFINED
|
|
|
|
|
* values (not necessarily zero).
|
|
|
|
|
*/
|
|
|
|
|
bool lower_ubo;
|
|
|
|
|
|
|
|
|
|
/* Lower load_ssbo/store_ssbo/ssbo_atomic(_swap) to be robust. Out-of-bounds
|
|
|
|
|
* loads and atomics will return UNDEFINED values (not necessarily zero).
|
|
|
|
|
* Out-of-bounds stores and atomics CORRUPT the contents of the SSBO.
|
|
|
|
|
*
|
|
|
|
|
* This suffices for robustBufferAccess but not robustBufferAccess2.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_ssbo;
|
|
|
|
|
|
|
|
|
|
/* Lower all image_load/image_store/image_atomic(_swap) instructions to be
|
|
|
|
|
* robust. Out-of-bounds loads will return ZERO.
|
|
|
|
|
*
|
|
|
|
|
* This suffices for robustImageAccess but not robustImageAccess2.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_image;
|
|
|
|
|
|
|
|
|
|
/* Lower all buffer image instructions as above. Implied by lower_image. */
|
|
|
|
|
bool lower_buffer_image;
|
|
|
|
|
|
|
|
|
|
/* Lower image_atomic(_swap) for all dimensions. Implied by lower_image. */
|
|
|
|
|
bool lower_image_atomic;
|
|
|
|
|
|
|
|
|
|
/* Vulkan's robustBufferAccess feature is only concerned with buffers that
|
|
|
|
|
* are bound through descriptor sets, so shared memory is not included, but
|
|
|
|
|
* it may be useful to enable this for debugging.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_shared;
|
|
|
|
|
} nir_lower_robust_access_options;
|
|
|
|
|
|
|
|
|
|
bool nir_lower_robust_access(nir_shader *s,
|
|
|
|
|
const nir_lower_robust_access_options *opts);
|
|
|
|
|
|
2023-07-29 11:37:16 -05:00
|
|
|
/* clang-format off */
|
2022-11-29 12:36:44 +02:00
|
|
|
typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul,
|
|
|
|
|
unsigned align_offset,
|
|
|
|
|
unsigned bit_size,
|
|
|
|
|
unsigned num_components,
|
2024-06-15 00:36:12 -04:00
|
|
|
/* The hole between low and
|
|
|
|
|
* high if they are not adjacent. */
|
|
|
|
|
unsigned hole_size,
|
2023-07-29 11:37:16 -05:00
|
|
|
nir_intrinsic_instr *low,
|
|
|
|
|
nir_intrinsic_instr *high,
|
2022-11-29 12:36:44 +02:00
|
|
|
void *data);
|
2023-07-29 11:37:16 -05:00
|
|
|
/* clang-format on */
|
2022-11-29 12:36:44 +02:00
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
nir_should_vectorize_mem_func callback;
|
|
|
|
|
nir_variable_mode modes;
|
|
|
|
|
nir_variable_mode robust_modes;
|
|
|
|
|
void *cb_data;
|
|
|
|
|
bool has_shared2_amd;
|
|
|
|
|
} nir_load_store_vectorize_options;
|
|
|
|
|
|
|
|
|
|
bool nir_opt_load_store_vectorize(nir_shader *shader, const nir_load_store_vectorize_options *options);
|
2024-05-17 00:35:32 +02:00
|
|
|
bool nir_opt_load_store_update_alignments(nir_shader *shader);
|
2022-11-29 12:36:44 +02:00
|
|
|
|
2023-01-19 11:54:10 +02:00
|
|
|
typedef bool (*nir_lower_shader_calls_should_remat_func)(nir_instr *instr, void *data);
|
|
|
|
|
|
2022-10-19 16:33:20 +03:00
|
|
|
typedef struct nir_lower_shader_calls_options {
|
|
|
|
|
/* Address format used for load/store operations on the call stack. */
|
|
|
|
|
nir_address_format address_format;
|
|
|
|
|
|
|
|
|
|
/* Stack alignment */
|
|
|
|
|
unsigned stack_alignment;
|
2022-05-18 18:31:27 +03:00
|
|
|
|
|
|
|
|
/* Put loads from the stack as close as possible from where they're needed.
|
|
|
|
|
* You might want to disable combined_loads for best effects.
|
|
|
|
|
*/
|
|
|
|
|
bool localized_loads;
|
2022-11-29 12:36:44 +02:00
|
|
|
|
|
|
|
|
/* If this function pointer is not NULL, lower_shader_calls will run
|
|
|
|
|
* nir_opt_load_store_vectorize for stack load/store operations. Otherwise
|
|
|
|
|
* the optimizaion is not run.
|
|
|
|
|
*/
|
|
|
|
|
nir_should_vectorize_mem_func vectorizer_callback;
|
|
|
|
|
|
|
|
|
|
/* Data passed to vectorizer_callback */
|
|
|
|
|
void *vectorizer_data;
|
2023-01-19 11:54:10 +02:00
|
|
|
|
|
|
|
|
/* If this function pointer is not NULL, lower_shader_calls will call this
|
|
|
|
|
* function on instructions that require spill/fill/rematerialization of
|
|
|
|
|
* their value. If this function returns true, lower_shader_calls will
|
|
|
|
|
* ensure that the instruction is rematerialized, adding the sources of the
|
|
|
|
|
* instruction to be spilled/filled.
|
|
|
|
|
*/
|
|
|
|
|
nir_lower_shader_calls_should_remat_func should_remat_callback;
|
|
|
|
|
|
|
|
|
|
/* Data passed to should_remat_callback */
|
|
|
|
|
void *should_remat_data;
|
2022-10-19 16:33:20 +03:00
|
|
|
} nir_lower_shader_calls_options;
|
|
|
|
|
|
2021-02-16 02:37:40 +01:00
|
|
|
bool
|
|
|
|
|
nir_lower_shader_calls(nir_shader *shader,
|
2022-10-19 16:33:20 +03:00
|
|
|
const nir_lower_shader_calls_options *options,
|
2021-02-16 02:37:40 +01:00
|
|
|
nir_shader ***resume_shaders_out,
|
|
|
|
|
uint32_t *num_resume_shaders_out,
|
|
|
|
|
void *mem_ctx);
|
|
|
|
|
|
2023-06-12 21:58:43 -07:00
|
|
|
int nir_get_io_offset_src_number(const nir_intrinsic_instr *instr);
|
|
|
|
|
int nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr *instr);
|
|
|
|
|
|
2015-11-25 14:14:05 -08:00
|
|
|
nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr);
|
2021-10-14 18:14:12 +02:00
|
|
|
nir_src *nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr);
|
2020-07-10 15:32:43 -05:00
|
|
|
nir_src *nir_get_shader_call_payload_src(nir_intrinsic_instr *call);
|
2015-11-07 22:35:33 -08:00
|
|
|
|
2021-04-27 14:49:40 -07:00
|
|
|
bool nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage);
|
2016-09-25 22:19:07 -07:00
|
|
|
|
2023-05-22 14:44:52 -04:00
|
|
|
bool nir_lower_reg_intrinsics_to_ssa_impl(nir_function_impl *impl);
|
|
|
|
|
bool nir_lower_reg_intrinsics_to_ssa(nir_shader *shader);
|
2017-02-24 15:22:54 -08:00
|
|
|
bool nir_lower_vars_to_ssa(nir_shader *shader);
|
2014-11-13 17:16:31 -08:00
|
|
|
|
2018-03-20 17:32:07 -07:00
|
|
|
bool nir_remove_dead_derefs(nir_shader *shader);
|
|
|
|
|
bool nir_remove_dead_derefs_impl(nir_function_impl *impl);
|
2021-01-21 08:41:15 -08:00
|
|
|
|
|
|
|
|
typedef struct nir_remove_dead_variables_options {
|
|
|
|
|
bool (*can_remove_var)(nir_variable *var, void *data);
|
|
|
|
|
void *can_remove_var_data;
|
|
|
|
|
} nir_remove_dead_variables_options;
|
|
|
|
|
|
2020-05-28 10:59:28 +10:00
|
|
|
bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes,
|
2021-01-21 08:41:15 -08:00
|
|
|
const nir_remove_dead_variables_options *options);
|
|
|
|
|
|
2020-02-07 14:18:49 -06:00
|
|
|
bool nir_lower_variable_initializers(nir_shader *shader,
|
2016-07-15 16:44:53 -07:00
|
|
|
nir_variable_mode modes);
|
2020-09-18 14:19:21 -07:00
|
|
|
bool nir_zero_initialize_shared_memory(nir_shader *shader,
|
|
|
|
|
const unsigned shared_size,
|
|
|
|
|
const unsigned chunk_size);
|
2023-11-01 15:32:14 +01:00
|
|
|
bool nir_clear_shared_memory(nir_shader *shader,
|
|
|
|
|
const unsigned shared_size,
|
|
|
|
|
const unsigned chunk_size);
|
2014-07-30 11:56:52 -07:00
|
|
|
|
2023-08-29 08:46:17 +02:00
|
|
|
bool nir_move_vec_src_uses_to_dest(nir_shader *shader, bool skip_const_srcs);
|
2023-06-09 09:41:21 -04:00
|
|
|
bool nir_lower_vec_to_regs(nir_shader *shader, nir_instr_writemask_filter_cb cb,
|
|
|
|
|
const void *_data);
|
2024-01-10 10:14:16 -04:00
|
|
|
bool nir_lower_alpha_test(nir_shader *shader, enum compare_func func,
|
2019-10-03 16:22:58 -04:00
|
|
|
bool alpha_to_one,
|
|
|
|
|
const gl_state_index16 *alpha_ref_state_tokens);
|
2018-05-08 11:24:40 -07:00
|
|
|
bool nir_lower_alu(nir_shader *shader);
|
2018-08-18 11:46:46 -07:00
|
|
|
|
|
|
|
|
bool nir_lower_flrp(nir_shader *shader, unsigned lowering_mask,
|
2020-07-22 22:13:16 -04:00
|
|
|
bool always_precise);
|
2018-08-18 11:46:46 -07:00
|
|
|
|
2022-04-15 15:06:40 -05:00
|
|
|
bool nir_scale_fdiv(nir_shader *shader);
|
|
|
|
|
|
2019-08-29 21:14:54 -07:00
|
|
|
bool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data);
|
2021-07-06 19:08:04 +02:00
|
|
|
bool nir_lower_alu_width(nir_shader *shader, nir_vectorize_cb cb, const void *data);
|
2023-09-21 14:03:55 +02:00
|
|
|
bool nir_lower_alu_vec8_16_srcs(nir_shader *shader);
|
2018-10-24 09:25:29 +02:00
|
|
|
bool nir_lower_bool_to_bitsize(nir_shader *shader);
|
nir/lower_bool: ntt: Generate a good opcode for bcsel
This is heavily copy-pasted from a patch of Ian Romanick, including the
commit message.
Previously, this pass always generated fcsel for bcsel. This was the
only place that generate fcsel, so various drivers assumed (and needed!)
that src0 was a Boolean with 0.0 or 1.0 as the only values.
Specifically, many DX9 / GL_ARB_vertex_program platforms lack a CMP
instruction in vertex shaders. In those cases, they would use LRP to
implement fcsel. The bummer is that many plaforms have a real fcsel
instruction, and those platforms would benefit from other places
generating that opcode.
Instead of leaving assumptions in drivers about the sources of an opcode
that they can't really support, allow them to control the way the
lowering pass translates bcsel. Two flags are used to control this:
- If the driver sets has_fused_comp_and_csel in nir_options, fcsel_gt
will be used. Since the Boolean value is 0.0 or 1.0, this is
equivalent to fcsel.
- If the parameter has_fcsel_ne is set, fcsel will be used. This is the
old path.
- Otherwise, the lowering pass assumes we're on a crufty, old DX9 vertex
program, and it emits flrp.
With this, the assumptions about src0 of fcsel in NTT can be removed.
If a platform can't handle fcsel, it should ensure that the lowering
pass won't generate it.
No change in shader-db.
Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20162>
2022-11-24 09:24:35 +01:00
|
|
|
bool nir_lower_bool_to_float(nir_shader *shader, bool has_fcsel_ne);
|
2018-10-18 12:04:09 -05:00
|
|
|
bool nir_lower_bool_to_int32(nir_shader *shader);
|
2020-09-30 16:15:02 -05:00
|
|
|
bool nir_opt_simplify_convert_alu_types(nir_shader *shader);
|
2021-10-10 20:33:15 +11:00
|
|
|
bool nir_lower_const_arrays_to_uniforms(nir_shader *shader,
|
|
|
|
|
unsigned max_uniform_components);
|
2020-09-30 16:15:02 -05:00
|
|
|
bool nir_lower_convert_alu_types(nir_shader *shader,
|
|
|
|
|
bool (*should_lower)(nir_intrinsic_instr *));
|
|
|
|
|
bool nir_lower_constant_convert_alu_types(nir_shader *shader);
|
2020-10-01 18:39:12 -05:00
|
|
|
bool nir_lower_alu_conversion_to_intrinsic(nir_shader *shader);
|
2019-04-30 22:25:05 -07:00
|
|
|
bool nir_lower_int_to_float(nir_shader *shader);
|
2017-02-24 15:32:11 -08:00
|
|
|
bool nir_lower_load_const_to_scalar(nir_shader *shader);
|
2017-07-05 21:57:44 -07:00
|
|
|
bool nir_lower_read_invocation_to_scalar(nir_shader *shader);
|
2021-02-23 11:31:41 +01:00
|
|
|
bool nir_lower_phis_to_scalar(nir_shader *shader, bool lower_all);
|
2017-10-23 15:51:29 +11:00
|
|
|
void nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer);
|
2024-01-10 10:19:00 -04:00
|
|
|
bool nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader,
|
2018-01-19 13:05:35 +11:00
|
|
|
bool outputs_only);
|
2023-07-21 12:53:49 -04:00
|
|
|
bool nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask, nir_instr_filter_cb filter, void *filter_data);
|
2020-09-21 15:56:25 +01:00
|
|
|
bool nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask);
|
2019-03-06 15:21:51 -06:00
|
|
|
bool nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode mask);
|
2021-01-06 14:50:57 +00:00
|
|
|
bool nir_vectorize_tess_levels(nir_shader *shader);
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_shader *nir_create_passthrough_tcs_impl(const nir_shader_compiler_options *options,
|
|
|
|
|
unsigned *locations, unsigned num_locations,
|
|
|
|
|
uint8_t patch_vertices);
|
|
|
|
|
nir_shader *nir_create_passthrough_tcs(const nir_shader_compiler_options *options,
|
|
|
|
|
const nir_shader *vs, uint8_t patch_vertices);
|
|
|
|
|
nir_shader *nir_create_passthrough_gs(const nir_shader_compiler_options *options,
|
|
|
|
|
const nir_shader *prev_stage,
|
|
|
|
|
enum mesa_prim primitive_type,
|
2023-09-21 19:24:49 +02:00
|
|
|
enum mesa_prim output_primitive_type,
|
2023-08-08 12:00:35 -05:00
|
|
|
bool emulate_edgeflags,
|
2024-11-29 00:22:37 +01:00
|
|
|
bool force_line_strip_out,
|
|
|
|
|
bool passthrough_prim_id);
|
2015-01-21 15:23:32 -08:00
|
|
|
|
2021-04-22 11:28:42 -04:00
|
|
|
bool nir_lower_fragcolor(nir_shader *shader, unsigned max_cbufs);
|
2020-07-06 09:38:01 +02:00
|
|
|
bool nir_lower_fragcoord_wtrans(nir_shader *shader);
|
2024-10-27 13:50:16 +01:00
|
|
|
bool nir_opt_frag_coord_to_pixel_coord(nir_shader *shader);
|
2023-06-23 10:23:43 -04:00
|
|
|
bool nir_lower_frag_coord_to_pixel_coord(nir_shader *shader);
|
2021-07-02 01:16:13 +02:00
|
|
|
bool nir_lower_viewport_transform(nir_shader *shader);
|
2021-04-09 16:10:30 -07:00
|
|
|
bool nir_lower_uniforms_to_ubo(nir_shader *shader, bool dword_packed, bool load_vec4);
|
2019-02-08 22:36:37 +01:00
|
|
|
|
2021-03-05 19:58:25 +02:00
|
|
|
bool nir_lower_is_helper_invocation(nir_shader *shader);
|
|
|
|
|
|
2021-12-01 16:30:08 -06:00
|
|
|
bool nir_lower_single_sampled(nir_shader *shader);
|
|
|
|
|
|
2024-05-22 09:21:58 +02:00
|
|
|
bool nir_lower_atomics(nir_shader *shader, nir_instr_filter_cb filter);
|
|
|
|
|
|
2017-08-22 13:23:59 -07:00
|
|
|
typedef struct nir_lower_subgroups_options {
|
2024-05-06 22:31:14 -04:00
|
|
|
/* In addition to the boolean lowering options below, this optional callback
|
|
|
|
|
* will filter instructions for lowering if non-NULL. The data passed will be
|
2024-11-29 17:09:16 +01:00
|
|
|
* filter_data.
|
2024-05-06 22:31:14 -04:00
|
|
|
*/
|
|
|
|
|
nir_instr_filter_cb filter;
|
|
|
|
|
|
2024-11-29 17:09:16 +01:00
|
|
|
/* Extra data passed to the filter. */
|
|
|
|
|
const void *filter_data;
|
|
|
|
|
|
2024-11-29 17:09:15 +01:00
|
|
|
/* In case the exact subgroup size is not known, subgroup_size should be
|
|
|
|
|
* set to 0. In that case, the maximum subgroup size will be calculated by
|
|
|
|
|
* ballot_components * ballot_bit_size.
|
|
|
|
|
*/
|
2017-08-22 18:57:56 -07:00
|
|
|
uint8_t subgroup_size;
|
2017-08-22 18:44:51 -07:00
|
|
|
uint8_t ballot_bit_size;
|
2020-09-10 18:48:04 +02:00
|
|
|
uint8_t ballot_components;
|
2023-08-08 12:00:35 -05:00
|
|
|
bool lower_to_scalar : 1;
|
|
|
|
|
bool lower_vote_trivial : 1;
|
|
|
|
|
bool lower_vote_eq : 1;
|
2023-10-24 13:40:47 -05:00
|
|
|
bool lower_vote_bool_eq : 1;
|
2023-10-23 10:28:30 -05:00
|
|
|
bool lower_first_invocation_to_ballot : 1;
|
2023-10-23 10:33:14 -05:00
|
|
|
bool lower_read_first_invocation : 1;
|
2023-08-08 12:00:35 -05:00
|
|
|
bool lower_subgroup_masks : 1;
|
|
|
|
|
bool lower_relative_shuffle : 1;
|
|
|
|
|
bool lower_shuffle_to_32bit : 1;
|
|
|
|
|
bool lower_shuffle_to_swizzle_amd : 1;
|
|
|
|
|
bool lower_shuffle : 1;
|
|
|
|
|
bool lower_quad : 1;
|
|
|
|
|
bool lower_quad_broadcast_dynamic : 1;
|
|
|
|
|
bool lower_quad_broadcast_dynamic_to_const : 1;
|
2024-09-30 17:56:26 -05:00
|
|
|
bool lower_quad_vote : 1;
|
2023-08-08 12:00:35 -05:00
|
|
|
bool lower_elect : 1;
|
|
|
|
|
bool lower_read_invocation_to_cond : 1;
|
|
|
|
|
bool lower_rotate_to_shuffle : 1;
|
2024-11-29 17:09:16 +01:00
|
|
|
bool lower_rotate_clustered_to_shuffle : 1;
|
2023-08-08 12:00:35 -05:00
|
|
|
bool lower_ballot_bit_count_to_mbcnt_amd : 1;
|
nir/spirv: Add inverse_ballot intrinsic
This is actually a no-op on AMD, so we really don't want to lower it to
something more complicated. There may be a more efficient way to do
this on Intel too. In addition, in the future we'll want to use this for
lowering boolean reduce operations, where the inverse ballot will
operate on the backend's "natural" ballot type as indicated by
options->ballot_bit_size, instead of uvec4 as produced by SPIR-V. In
total, there are now three possible lowerings we may have to perform:
- inverse_ballot with source type of uvec4 from SPIR-V to inverse_ballot
with natural source type, when the backend supports inverse_ballot
natively.
- inverse_ballot with source type of uvec4 from SPIR-V to arithmetic,
when the backend doesn't support inverse_ballot.
- inverse_ballot with natural source type from reduce operation, when
the backend doesn't support inverse_ballot.
Previously we just did the second lowering unconditionally in vtn, but
it's just a combination of the first and third. We add support here for
the first and third lowerings in nir_lower_subgroups, instead of simply
moving the second lowering, to avoid unnecessary churn.
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25123>
2019-02-01 11:37:50 +01:00
|
|
|
bool lower_inverse_ballot : 1;
|
2024-05-06 22:31:53 -04:00
|
|
|
bool lower_reduce : 1;
|
2019-02-04 12:55:32 +01:00
|
|
|
bool lower_boolean_reduce : 1;
|
2024-01-17 14:12:43 +01:00
|
|
|
bool lower_boolean_shuffle : 1;
|
2017-08-22 13:23:59 -07:00
|
|
|
} nir_lower_subgroups_options;
|
|
|
|
|
|
|
|
|
|
bool nir_lower_subgroups(nir_shader *shader,
|
|
|
|
|
const nir_lower_subgroups_options *options);
|
|
|
|
|
|
2015-09-17 13:00:58 -07:00
|
|
|
bool nir_lower_system_values(nir_shader *shader);
|
2015-09-16 12:56:58 -04:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *
|
2022-11-29 12:41:08 +01:00
|
|
|
nir_build_lowered_load_helper_invocation(struct nir_builder *b);
|
|
|
|
|
|
2020-08-21 10:40:45 -07:00
|
|
|
typedef struct nir_lower_compute_system_values_options {
|
2023-08-08 12:00:35 -05:00
|
|
|
bool has_base_global_invocation_id : 1;
|
|
|
|
|
bool has_base_workgroup_id : 1;
|
2024-08-01 12:42:12 +02:00
|
|
|
bool has_global_size : 1;
|
2023-08-08 12:00:35 -05:00
|
|
|
bool shuffle_local_ids_for_quad_derivatives : 1;
|
|
|
|
|
bool lower_local_invocation_index : 1;
|
|
|
|
|
bool lower_cs_local_id_to_index : 1;
|
|
|
|
|
bool lower_workgroup_id_to_index : 1;
|
2024-11-19 01:15:44 -04:00
|
|
|
bool global_id_is_32bit : 1;
|
2023-03-31 12:19:43 +02:00
|
|
|
/* At shader execution time, check if WorkGroupId should be 1D
|
|
|
|
|
* and compute it quickly. Fall back to slow computation if not.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
bool shortcut_1d_workgroup_id : 1;
|
2023-09-08 15:52:21 +01:00
|
|
|
uint32_t num_workgroups[3]; /* Compile-time-known dispatch sizes, or 0 if unknown. */
|
2020-08-21 10:40:45 -07:00
|
|
|
} nir_lower_compute_system_values_options;
|
|
|
|
|
|
|
|
|
|
bool nir_lower_compute_system_values(nir_shader *shader,
|
|
|
|
|
const nir_lower_compute_system_values_options *options);
|
2020-08-21 10:18:14 -07:00
|
|
|
|
2021-09-27 14:20:20 +02:00
|
|
|
struct nir_lower_sysvals_to_varyings_options {
|
2023-08-08 12:00:35 -05:00
|
|
|
bool frag_coord : 1;
|
|
|
|
|
bool front_face : 1;
|
|
|
|
|
bool point_coord : 1;
|
2021-09-27 14:20:20 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
nir_lower_sysvals_to_varyings(nir_shader *shader,
|
|
|
|
|
const struct nir_lower_sysvals_to_varyings_options *options);
|
|
|
|
|
|
2023-08-20 20:42:34 +03:00
|
|
|
/***/
|
2023-06-06 21:18:17 +02:00
|
|
|
enum ENUM_PACKED nir_lower_tex_packing {
|
2021-07-07 17:44:27 -05:00
|
|
|
/** No packing */
|
2018-12-19 13:53:39 -08:00
|
|
|
nir_lower_tex_packing_none = 0,
|
2021-07-07 17:44:27 -05:00
|
|
|
/**
|
|
|
|
|
* The sampler returns up to 2 32-bit words of half floats or 16-bit signed
|
2018-12-19 13:53:39 -08:00
|
|
|
* or unsigned ints based on the sampler type
|
|
|
|
|
*/
|
|
|
|
|
nir_lower_tex_packing_16,
|
2021-07-07 17:44:27 -05:00
|
|
|
/** The sampler returns 1 32-bit word of 4x8 unorm */
|
2018-12-19 13:53:39 -08:00
|
|
|
nir_lower_tex_packing_8,
|
|
|
|
|
};
|
|
|
|
|
|
2023-08-20 20:42:34 +03:00
|
|
|
/***/
|
2015-09-16 12:56:58 -04:00
|
|
|
typedef struct nir_lower_tex_options {
|
|
|
|
|
/**
|
|
|
|
|
* bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which
|
|
|
|
|
* sampler types a texture projector is lowered.
|
|
|
|
|
*/
|
|
|
|
|
unsigned lower_txp;
|
2015-09-16 16:49:14 -04:00
|
|
|
|
2022-03-22 10:56:43 -04:00
|
|
|
/**
|
|
|
|
|
* If true, lower texture projector for any array sampler dims
|
|
|
|
|
*/
|
|
|
|
|
bool lower_txp_array;
|
|
|
|
|
|
2016-07-20 20:32:31 -07:00
|
|
|
/**
|
|
|
|
|
* If true, lower away nir_tex_src_offset for all texelfetch instructions.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_txf_offset;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* If true, lower away nir_tex_src_offset for all rect textures.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_rect_offset;
|
|
|
|
|
|
2021-12-09 12:55:21 -08:00
|
|
|
/**
|
|
|
|
|
* If not NULL, this filter will return true for tex instructions that
|
|
|
|
|
* should lower away nir_tex_src_offset.
|
|
|
|
|
*/
|
|
|
|
|
nir_instr_filter_cb lower_offset_filter;
|
|
|
|
|
|
2015-09-16 16:49:14 -04:00
|
|
|
/**
|
|
|
|
|
* If true, lower rect textures to 2D, using txs to fetch the
|
|
|
|
|
* texture dimensions and dividing the texture coords by the
|
|
|
|
|
* texture dims to normalize.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_rect;
|
2015-09-18 10:44:27 -04:00
|
|
|
|
2023-11-27 09:40:50 -04:00
|
|
|
/**
|
|
|
|
|
* If true, lower 1D textures to 2D. This requires the GL/VK driver to map 1D
|
|
|
|
|
* textures to 2D textures with height=1.
|
|
|
|
|
*
|
|
|
|
|
* lower_1d_shadow does this lowering for shadow textures only.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_1d;
|
|
|
|
|
bool lower_1d_shadow;
|
|
|
|
|
|
2016-05-01 21:13:37 -07:00
|
|
|
/**
|
|
|
|
|
* If true, convert yuv to rgb.
|
|
|
|
|
*/
|
|
|
|
|
unsigned lower_y_uv_external;
|
2023-02-09 12:39:33 +00:00
|
|
|
unsigned lower_y_vu_external;
|
2016-05-01 21:13:37 -07:00
|
|
|
unsigned lower_y_u_v_external;
|
|
|
|
|
unsigned lower_yx_xuxv_external;
|
2023-02-09 12:39:33 +00:00
|
|
|
unsigned lower_yx_xvxu_external;
|
2017-06-16 13:40:31 +08:00
|
|
|
unsigned lower_xy_uxvx_external;
|
2023-02-09 12:39:33 +00:00
|
|
|
unsigned lower_xy_vxux_external;
|
2018-11-08 16:28:20 +00:00
|
|
|
unsigned lower_ayuv_external;
|
2019-02-12 16:02:20 -08:00
|
|
|
unsigned lower_xyuv_external;
|
2020-09-28 20:11:18 +00:00
|
|
|
unsigned lower_yuv_external;
|
2021-03-12 23:07:09 -08:00
|
|
|
unsigned lower_yu_yv_external;
|
2023-02-09 12:39:33 +00:00
|
|
|
unsigned lower_yv_yu_external;
|
2021-03-10 19:49:05 -08:00
|
|
|
unsigned lower_y41x_external;
|
2020-07-08 13:23:22 +09:00
|
|
|
unsigned bt709_external;
|
|
|
|
|
unsigned bt2020_external;
|
2022-05-21 23:25:02 +00:00
|
|
|
unsigned yuv_full_range_external;
|
2016-05-01 21:13:37 -07:00
|
|
|
|
2015-09-18 10:44:27 -04:00
|
|
|
/**
|
|
|
|
|
* To emulate certain texture wrap modes, this can be used
|
|
|
|
|
* to saturate the specified tex coord to [0.0, 1.0]. The
|
|
|
|
|
* bits are according to sampler #, ie. if, for example:
|
|
|
|
|
*
|
|
|
|
|
* (conf->saturate_s & (1 << n))
|
|
|
|
|
*
|
|
|
|
|
* is true, then the s coord for sampler n is saturated.
|
|
|
|
|
*
|
|
|
|
|
* Note that clamping must happen *after* projector lowering
|
|
|
|
|
* so any projected texture sample instruction with a clamped
|
|
|
|
|
* coordinate gets automatically lowered, regardless of the
|
|
|
|
|
* 'lower_txp' setting.
|
|
|
|
|
*/
|
|
|
|
|
unsigned saturate_s;
|
|
|
|
|
unsigned saturate_t;
|
|
|
|
|
unsigned saturate_r;
|
2015-11-11 18:30:31 -08:00
|
|
|
|
2016-02-06 09:05:10 -08:00
|
|
|
/* Bitmask of textures that need swizzling.
|
2015-11-11 18:30:31 -08:00
|
|
|
*
|
2016-02-06 09:05:10 -08:00
|
|
|
* If (swizzle_result & (1 << texture_index)), then the swizzle in
|
|
|
|
|
* swizzles[texture_index] is applied to the result of the texturing
|
2015-11-11 18:30:31 -08:00
|
|
|
* operation.
|
|
|
|
|
*/
|
|
|
|
|
unsigned swizzle_result;
|
|
|
|
|
|
2016-02-06 09:05:10 -08:00
|
|
|
/* A swizzle for each texture. Values 0-3 represent x, y, z, or w swizzles
|
2015-11-11 18:30:31 -08:00
|
|
|
* while 4 and 5 represent 0 and 1 respectively.
|
2020-10-28 12:59:24 +01:00
|
|
|
*
|
|
|
|
|
* Indexed by texture-id.
|
2015-11-11 18:30:31 -08:00
|
|
|
*/
|
|
|
|
|
uint8_t swizzles[32][4];
|
2016-04-19 08:28:22 -04:00
|
|
|
|
2020-10-28 12:59:24 +01:00
|
|
|
/* Can be used to scale sampled values in range required by the
|
|
|
|
|
* format.
|
|
|
|
|
*
|
|
|
|
|
* Indexed by texture-id.
|
|
|
|
|
*/
|
2019-02-11 09:25:18 +02:00
|
|
|
float scale_factors[32];
|
|
|
|
|
|
2016-04-19 08:28:22 -04:00
|
|
|
/**
|
|
|
|
|
* Bitmap of textures that need srgb to linear conversion. If
|
|
|
|
|
* (lower_srgb & (1 << texture_index)) then the rgb (xyz) components
|
|
|
|
|
* of the texture are lowered to linear.
|
|
|
|
|
*/
|
|
|
|
|
unsigned lower_srgb;
|
2016-11-30 09:40:43 +01:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* If true, lower nir_texop_txd on cube maps with nir_texop_txl.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_txd_cube_map;
|
2016-11-30 11:31:01 +01:00
|
|
|
|
2018-12-13 11:40:58 -08:00
|
|
|
/**
|
|
|
|
|
* If true, lower nir_texop_txd on 3D surfaces with nir_texop_txl.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_txd_3d;
|
|
|
|
|
|
2022-03-30 16:36:06 -07:00
|
|
|
/**
|
|
|
|
|
* If true, lower nir_texop_txd any array surfaces with nir_texop_txl.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_txd_array;
|
|
|
|
|
|
2016-11-30 11:31:01 +01:00
|
|
|
/**
|
|
|
|
|
* If true, lower nir_texop_txd on shadow samplers (except cube maps)
|
|
|
|
|
* with nir_texop_txl. Notice that cube map shadow samplers are lowered
|
|
|
|
|
* with lower_txd_cube_map.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_txd_shadow;
|
2017-11-21 16:21:36 -08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* If true, lower nir_texop_txd on all samplers to a nir_texop_txl.
|
|
|
|
|
* Implies lower_txd_cube_map and lower_txd_shadow.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_txd;
|
2018-10-11 14:14:29 -05:00
|
|
|
|
2023-04-10 17:23:32 -05:00
|
|
|
/**
|
|
|
|
|
* If true, lower nir_texop_txd when it uses min_lod.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_txd_clamp;
|
|
|
|
|
|
2018-10-11 14:14:29 -05:00
|
|
|
/**
|
|
|
|
|
* If true, lower nir_texop_txb that try to use shadow compare and min_lod
|
|
|
|
|
* at the same time to a nir_texop_lod, some math, and nir_texop_tex.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_txb_shadow_clamp;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* If true, lower nir_texop_txd on shadow samplers when it uses min_lod
|
|
|
|
|
* with nir_texop_txl. This includes cube maps.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_txd_shadow_clamp;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* If true, lower nir_texop_txd on when it uses both offset and min_lod
|
|
|
|
|
* with nir_texop_txl. This includes cube maps.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_txd_offset_clamp;
|
2018-12-19 13:53:39 -08:00
|
|
|
|
2019-02-08 17:56:52 -06:00
|
|
|
/**
|
|
|
|
|
* If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the
|
|
|
|
|
* sampler is bindless.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_txd_clamp_bindless_sampler;
|
|
|
|
|
|
2019-02-08 17:51:24 -06:00
|
|
|
/**
|
|
|
|
|
* If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the
|
|
|
|
|
* sampler index is not statically determinable to be less than 16.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_txd_clamp_if_sampler_index_not_lt_16;
|
|
|
|
|
|
2019-06-17 11:43:13 +02:00
|
|
|
/**
|
|
|
|
|
* If true, lower nir_texop_txs with a non-0-lod into nir_texop_txs with
|
|
|
|
|
* 0-lod followed by a nir_ishr.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_txs_lod;
|
|
|
|
|
|
2021-07-21 16:39:15 -05:00
|
|
|
/**
|
|
|
|
|
* If true, lower nir_texop_txs for cube arrays to a nir_texop_txs with a
|
|
|
|
|
* 2D array type followed by a nir_idiv by 6.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_txs_cube_array;
|
|
|
|
|
|
2018-12-26 22:45:04 -08:00
|
|
|
/**
|
|
|
|
|
* If true, apply a .bagr swizzle on tg4 results to handle Broadcom's
|
|
|
|
|
* mixed-up tg4 locations.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_tg4_broadcom_swizzle;
|
|
|
|
|
|
2019-03-19 18:47:20 +01:00
|
|
|
/**
|
|
|
|
|
* If true, lowers tg4 with 4 constant offsets to 4 tg4 calls
|
|
|
|
|
*/
|
|
|
|
|
bool lower_tg4_offsets;
|
|
|
|
|
|
2021-08-04 16:17:39 +01:00
|
|
|
/**
|
|
|
|
|
* Lower txf_ms to fragment_mask_fetch and fragment_fetch and samples_identical to
|
|
|
|
|
* fragment_mask_fetch.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_to_fragment_fetch_amd;
|
|
|
|
|
|
2020-10-28 12:59:24 +01:00
|
|
|
/**
|
2023-08-07 10:48:45 +02:00
|
|
|
* To lower packed sampler return formats. This will be called for all
|
|
|
|
|
* tex instructions.
|
2020-10-28 12:59:24 +01:00
|
|
|
*/
|
2023-08-07 10:48:45 +02:00
|
|
|
enum nir_lower_tex_packing (*lower_tex_packing_cb)(const nir_tex_instr *tex, const void *data);
|
|
|
|
|
const void *lower_tex_packing_data;
|
2021-12-10 13:45:36 +01:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* If true, lower nir_texop_lod to return -FLT_MAX if the sum of the
|
|
|
|
|
* absolute values of derivatives is 0 for all coordinates.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_lod_zero_width;
|
2021-12-09 12:55:21 -08:00
|
|
|
|
2022-04-25 16:55:45 -07:00
|
|
|
/* Turns nir_op_tex and other ops with an implicit derivative, in stages
|
|
|
|
|
* without implicit derivatives (like the vertex shader) to have an explicit
|
|
|
|
|
* LOD with a value of 0.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_invalid_implicit_lod;
|
|
|
|
|
|
2023-02-25 22:59:07 -05:00
|
|
|
/* If true, texture_index (sampler_index) will be zero if a texture_offset
|
|
|
|
|
* (sampler_offset) source is present. This is convenient for backends that
|
|
|
|
|
* support indirect indexing of textures (samplers) but not offsetting it.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_index_to_offset;
|
|
|
|
|
|
2021-12-09 12:55:21 -08:00
|
|
|
/**
|
|
|
|
|
* Payload data to be sent to callback / filter functions.
|
|
|
|
|
*/
|
|
|
|
|
void *callback_data;
|
2015-09-16 12:56:58 -04:00
|
|
|
} nir_lower_tex_options;
|
|
|
|
|
|
2021-07-07 17:44:27 -05:00
|
|
|
/** Lowers complex texture instructions to simpler ones */
|
2015-11-11 10:46:09 -08:00
|
|
|
bool nir_lower_tex(nir_shader *shader,
|
2015-09-16 12:56:58 -04:00
|
|
|
const nir_lower_tex_options *options);
|
|
|
|
|
|
2021-12-23 13:25:53 +01:00
|
|
|
typedef struct nir_lower_tex_shadow_swizzle {
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned swizzle_r : 3;
|
|
|
|
|
unsigned swizzle_g : 3;
|
|
|
|
|
unsigned swizzle_b : 3;
|
|
|
|
|
unsigned swizzle_a : 3;
|
2021-12-23 13:25:53 +01:00
|
|
|
} nir_lower_tex_shadow_swizzle;
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
nir_lower_tex_shadow(nir_shader *s,
|
|
|
|
|
unsigned n_states,
|
|
|
|
|
enum compare_func *compare_func,
|
|
|
|
|
nir_lower_tex_shadow_swizzle *tex_swizzles);
|
|
|
|
|
|
2021-07-15 01:14:17 -07:00
|
|
|
typedef struct nir_lower_image_options {
|
|
|
|
|
/**
|
|
|
|
|
* If true, lower cube size operations.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_cube_size;
|
2022-09-08 14:43:39 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Lower multi sample image load and samples_identical to use fragment_mask_load.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_to_fragment_mask_load_amd;
|
2023-01-24 11:35:43 +01:00
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
/**
|
|
|
|
|
* Lower image_samples to a constant in case the driver doesn't support multisampled
|
|
|
|
|
* images.
|
|
|
|
|
*/
|
|
|
|
|
bool lower_image_samples_to_one;
|
2021-07-15 01:14:17 -07:00
|
|
|
} nir_lower_image_options;
|
|
|
|
|
|
|
|
|
|
bool nir_lower_image(nir_shader *nir,
|
|
|
|
|
const nir_lower_image_options *options);
|
|
|
|
|
|
2023-05-18 23:20:15 -04:00
|
|
|
bool
|
|
|
|
|
nir_lower_image_atomics_to_global(nir_shader *s);
|
|
|
|
|
|
2021-04-20 08:08:32 -07:00
|
|
|
bool nir_lower_readonly_images_to_tex(nir_shader *shader, bool per_variable);
|
2020-09-02 16:51:51 -05:00
|
|
|
|
2019-02-27 14:36:44 -06:00
|
|
|
enum nir_lower_non_uniform_access_type {
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_lower_non_uniform_ubo_access = (1 << 0),
|
|
|
|
|
nir_lower_non_uniform_ssbo_access = (1 << 1),
|
2019-02-27 14:36:44 -06:00
|
|
|
nir_lower_non_uniform_texture_access = (1 << 2),
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_lower_non_uniform_image_access = (1 << 3),
|
|
|
|
|
nir_lower_non_uniform_get_ssbo_size = (1 << 4),
|
2024-08-04 16:49:43 +02:00
|
|
|
nir_lower_non_uniform_access_type_count = 5,
|
2019-02-27 14:36:44 -06:00
|
|
|
};
|
|
|
|
|
|
2021-03-22 11:23:29 +00:00
|
|
|
/* Given the nir_src used for the resource, return the channels which might be non-uniform. */
|
|
|
|
|
typedef nir_component_mask_t (*nir_lower_non_uniform_access_callback)(const nir_src *, void *);
|
|
|
|
|
|
|
|
|
|
typedef struct nir_lower_non_uniform_access_options {
|
|
|
|
|
enum nir_lower_non_uniform_access_type types;
|
|
|
|
|
nir_lower_non_uniform_access_callback callback;
|
|
|
|
|
void *callback_data;
|
|
|
|
|
} nir_lower_non_uniform_access_options;
|
|
|
|
|
|
2022-07-15 17:00:22 +03:00
|
|
|
bool nir_has_non_uniform_access(nir_shader *shader, enum nir_lower_non_uniform_access_type types);
|
|
|
|
|
bool nir_opt_non_uniform_access(nir_shader *shader);
|
2019-02-27 14:36:44 -06:00
|
|
|
bool nir_lower_non_uniform_access(nir_shader *shader,
|
2021-03-22 11:23:29 +00:00
|
|
|
const nir_lower_non_uniform_access_options *options);
|
2019-02-27 14:36:44 -06:00
|
|
|
|
2021-04-07 19:17:46 +01:00
|
|
|
typedef struct {
|
|
|
|
|
/* Whether 16-bit floating point arithmetic should be allowed in 8-bit
|
|
|
|
|
* division lowering
|
|
|
|
|
*/
|
|
|
|
|
bool allow_fp16;
|
|
|
|
|
} nir_lower_idiv_options;
|
2019-02-05 15:56:24 +00:00
|
|
|
|
2021-04-07 19:17:46 +01:00
|
|
|
bool nir_lower_idiv(nir_shader *shader, const nir_lower_idiv_options *options);
|
2014-07-30 12:07:45 -07:00
|
|
|
|
2020-07-01 16:55:46 +02:00
|
|
|
typedef struct nir_input_attachment_options {
|
|
|
|
|
bool use_fragcoord_sysval;
|
2020-07-01 17:16:01 +02:00
|
|
|
bool use_layer_id_sysval;
|
2020-07-01 17:29:45 +02:00
|
|
|
bool use_view_id_for_layer;
|
2024-09-19 08:17:05 -04:00
|
|
|
bool unscaled_depth_stencil_ir3;
|
2022-12-13 18:09:28 +01:00
|
|
|
uint32_t unscaled_input_attachment_ir3;
|
2020-07-01 16:55:46 +02:00
|
|
|
} nir_input_attachment_options;
|
|
|
|
|
|
|
|
|
|
bool nir_lower_input_attachments(nir_shader *shader,
|
|
|
|
|
const nir_input_attachment_options *options);
|
2019-04-03 17:29:20 +02:00
|
|
|
|
2019-10-02 16:19:08 -04:00
|
|
|
bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables,
|
|
|
|
|
bool use_vars,
|
2019-10-02 16:30:45 -04:00
|
|
|
bool use_clipdist_array,
|
|
|
|
|
const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
|
2019-10-02 16:19:08 -04:00
|
|
|
bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables,
|
2019-10-02 16:30:45 -04:00
|
|
|
bool use_clipdist_array,
|
|
|
|
|
const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
|
2019-10-02 16:19:08 -04:00
|
|
|
bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables,
|
2024-11-18 12:43:22 -05:00
|
|
|
bool use_clipdist_array, bool use_load_interp);
|
2023-10-17 09:42:27 +11:00
|
|
|
|
2024-01-10 09:45:01 -04:00
|
|
|
bool nir_lower_clip_cull_distance_to_vec4s(nir_shader *shader);
|
2017-02-24 15:38:28 -08:00
|
|
|
bool nir_lower_clip_cull_distance_arrays(nir_shader *nir);
|
2020-06-17 09:35:46 -04:00
|
|
|
bool nir_lower_clip_disable(nir_shader *shader, unsigned clip_plane_enable);
|
2015-09-09 14:57:15 -04:00
|
|
|
|
2024-01-10 10:15:45 -04:00
|
|
|
bool nir_lower_point_size_mov(nir_shader *shader,
|
2019-10-03 16:44:29 -04:00
|
|
|
const gl_state_index16 *pointsize_state_tokens);
|
|
|
|
|
|
2019-03-22 09:24:57 +01:00
|
|
|
bool nir_lower_frexp(nir_shader *nir);
|
|
|
|
|
|
2021-07-04 11:25:50 +02:00
|
|
|
bool nir_lower_two_sided_color(nir_shader *shader, bool face_sysval);
|
2016-02-01 17:29:22 -05:00
|
|
|
|
2017-03-02 11:18:04 -08:00
|
|
|
bool nir_lower_clamp_color_outputs(nir_shader *shader);
|
2016-02-01 17:34:12 -05:00
|
|
|
|
2019-01-24 13:07:42 +10:00
|
|
|
bool nir_lower_flatshade(nir_shader *shader);
|
|
|
|
|
|
2024-01-10 10:17:27 -04:00
|
|
|
bool nir_lower_passthrough_edgeflags(nir_shader *shader);
|
2018-07-18 16:42:03 -07:00
|
|
|
bool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count,
|
|
|
|
|
const gl_state_index16 *uniform_state_tokens);
|
2015-09-17 13:17:08 -04:00
|
|
|
|
2015-11-07 10:59:09 -05:00
|
|
|
typedef struct nir_lower_wpos_ytransform_options {
|
2017-11-16 16:19:22 +01:00
|
|
|
gl_state_index16 state_tokens[STATE_LENGTH];
|
2023-08-08 12:00:35 -05:00
|
|
|
bool fs_coord_origin_upper_left : 1;
|
|
|
|
|
bool fs_coord_origin_lower_left : 1;
|
|
|
|
|
bool fs_coord_pixel_center_integer : 1;
|
|
|
|
|
bool fs_coord_pixel_center_half_integer : 1;
|
2015-11-07 10:59:09 -05:00
|
|
|
} nir_lower_wpos_ytransform_options;
|
|
|
|
|
|
|
|
|
|
bool nir_lower_wpos_ytransform(nir_shader *shader,
|
|
|
|
|
const nir_lower_wpos_ytransform_options *options);
|
2021-12-02 14:41:41 -06:00
|
|
|
bool nir_lower_wpos_center(nir_shader *shader);
|
2015-11-07 10:59:09 -05:00
|
|
|
|
2020-12-22 14:37:45 +02:00
|
|
|
bool nir_lower_pntc_ytransform(nir_shader *shader,
|
|
|
|
|
const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
|
|
|
|
|
|
2020-05-06 14:20:09 -07:00
|
|
|
bool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data);
|
|
|
|
|
|
2019-04-26 10:05:08 -07:00
|
|
|
bool nir_lower_fb_read(nir_shader *shader);
|
|
|
|
|
|
2015-12-21 21:27:25 -05:00
|
|
|
typedef struct nir_lower_drawpixels_options {
|
2017-11-16 16:19:22 +01:00
|
|
|
gl_state_index16 texcoord_state_tokens[STATE_LENGTH];
|
|
|
|
|
gl_state_index16 scale_state_tokens[STATE_LENGTH];
|
|
|
|
|
gl_state_index16 bias_state_tokens[STATE_LENGTH];
|
2015-12-21 21:27:25 -05:00
|
|
|
unsigned drawpix_sampler;
|
|
|
|
|
unsigned pixelmap_sampler;
|
2023-08-08 12:00:35 -05:00
|
|
|
bool pixel_maps : 1;
|
|
|
|
|
bool scale_and_bias : 1;
|
2015-12-21 21:27:25 -05:00
|
|
|
} nir_lower_drawpixels_options;
|
|
|
|
|
|
2024-01-10 10:06:16 -04:00
|
|
|
bool nir_lower_drawpixels(nir_shader *shader,
|
2015-12-21 21:27:25 -05:00
|
|
|
const nir_lower_drawpixels_options *options);
|
|
|
|
|
|
2015-12-21 21:54:00 -05:00
|
|
|
typedef struct nir_lower_bitmap_options {
|
|
|
|
|
unsigned sampler;
|
|
|
|
|
bool swizzle_xxxx;
|
|
|
|
|
} nir_lower_bitmap_options;
|
|
|
|
|
|
2024-01-10 10:08:11 -04:00
|
|
|
bool nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options);
|
2015-12-21 21:54:00 -05:00
|
|
|
|
2022-05-27 13:34:09 -04:00
|
|
|
bool nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned offset_align_state);
|
2018-11-12 09:17:34 +01:00
|
|
|
|
2020-06-08 12:16:13 +02:00
|
|
|
typedef enum {
|
|
|
|
|
nir_lower_gs_intrinsics_per_stream = 1 << 0,
|
|
|
|
|
nir_lower_gs_intrinsics_count_primitives = 1 << 1,
|
2020-06-16 18:58:39 +02:00
|
|
|
nir_lower_gs_intrinsics_count_vertices_per_primitive = 1 << 2,
|
2020-07-15 13:44:39 +02:00
|
|
|
nir_lower_gs_intrinsics_overwrite_incomplete = 1 << 3,
|
2023-07-16 11:05:35 -04:00
|
|
|
nir_lower_gs_intrinsics_always_end_primitive = 1 << 4,
|
2023-08-14 16:45:44 -04:00
|
|
|
nir_lower_gs_intrinsics_count_decomposed_primitives = 1 << 5,
|
2020-06-08 12:16:13 +02:00
|
|
|
} nir_lower_gs_intrinsics_flags;
|
|
|
|
|
|
|
|
|
|
bool nir_lower_gs_intrinsics(nir_shader *shader, nir_lower_gs_intrinsics_flags options);
|
2015-05-12 01:05:29 -07:00
|
|
|
|
2023-07-14 10:28:37 -04:00
|
|
|
bool nir_lower_tess_coord_z(nir_shader *shader, bool triangles);
|
|
|
|
|
|
2022-05-26 13:20:36 +02:00
|
|
|
typedef struct {
|
|
|
|
|
bool payload_to_shared_for_atomics : 1;
|
2022-09-07 12:43:10 +02:00
|
|
|
bool payload_to_shared_for_small_types : 1;
|
2022-10-24 14:55:38 +02:00
|
|
|
uint32_t payload_offset_in_bytes;
|
2022-05-26 13:20:36 +02:00
|
|
|
} nir_lower_task_shader_options;
|
|
|
|
|
|
|
|
|
|
bool nir_lower_task_shader(nir_shader *shader, nir_lower_task_shader_options options);
|
|
|
|
|
|
2020-11-05 22:53:52 -06:00
|
|
|
typedef unsigned (*nir_lower_bit_size_callback)(const nir_instr *, void *);
|
2018-04-26 10:02:04 +02:00
|
|
|
|
|
|
|
|
bool nir_lower_bit_size(nir_shader *shader,
|
|
|
|
|
nir_lower_bit_size_callback callback,
|
|
|
|
|
void *callback_data);
|
2020-06-05 15:43:26 -07:00
|
|
|
bool nir_lower_64bit_phis(nir_shader *shader);
|
2018-04-26 10:02:04 +02:00
|
|
|
|
2022-04-13 17:57:06 +02:00
|
|
|
bool nir_split_64bit_vec3_and_vec4(nir_shader *shader);
|
|
|
|
|
|
2019-03-01 17:39:54 -06:00
|
|
|
nir_lower_int64_options nir_lower_int64_op_to_options_mask(nir_op opcode);
|
2020-07-13 20:28:16 +02:00
|
|
|
bool nir_lower_int64(nir_shader *shader);
|
2023-05-12 02:20:39 -07:00
|
|
|
bool nir_lower_int64_float_conversions(nir_shader *shader);
|
2017-02-23 13:56:15 -08:00
|
|
|
|
2019-03-01 17:39:54 -06:00
|
|
|
nir_lower_doubles_options nir_lower_doubles_op_to_options_mask(nir_op opcode);
|
2019-03-04 15:55:19 -06:00
|
|
|
bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64,
|
|
|
|
|
nir_lower_doubles_options options);
|
2018-04-27 09:28:48 +02:00
|
|
|
bool nir_lower_pack(nir_shader *shader);
|
2015-11-12 11:40:34 +01:00
|
|
|
|
2022-01-04 12:34:23 -05:00
|
|
|
bool nir_recompute_io_bases(nir_shader *nir, nir_variable_mode modes);
|
2022-08-23 14:54:37 -07:00
|
|
|
bool nir_lower_mediump_vars(nir_shader *nir, nir_variable_mode modes);
|
2021-02-07 21:10:08 -05:00
|
|
|
bool nir_lower_mediump_io(nir_shader *nir, nir_variable_mode modes,
|
|
|
|
|
uint64_t varying_mask, bool use_16bit_slots);
|
|
|
|
|
bool nir_force_mediump_io(nir_shader *nir, nir_variable_mode modes,
|
|
|
|
|
nir_alu_type types);
|
|
|
|
|
bool nir_unpack_16bit_varying_slots(nir_shader *nir, nir_variable_mode modes);
|
2022-06-04 15:56:31 +02:00
|
|
|
|
2024-04-10 09:59:50 +02:00
|
|
|
struct nir_opt_tex_srcs_options {
|
2022-06-04 15:56:31 +02:00
|
|
|
unsigned sampler_dims;
|
|
|
|
|
unsigned src_types;
|
|
|
|
|
};
|
|
|
|
|
|
2024-04-10 09:59:50 +02:00
|
|
|
struct nir_opt_16bit_tex_image_options {
|
2022-06-04 15:56:31 +02:00
|
|
|
nir_rounding_mode rounding_mode;
|
2024-04-10 09:59:50 +02:00
|
|
|
nir_alu_type opt_tex_dest_types;
|
|
|
|
|
nir_alu_type opt_image_dest_types;
|
2024-04-13 22:16:15 +02:00
|
|
|
bool integer_dest_saturates;
|
2024-04-10 09:59:50 +02:00
|
|
|
bool opt_image_store_data;
|
|
|
|
|
bool opt_image_srcs;
|
|
|
|
|
unsigned opt_srcs_options_count;
|
|
|
|
|
struct nir_opt_tex_srcs_options *opt_srcs_options;
|
2022-06-04 15:56:31 +02:00
|
|
|
};
|
|
|
|
|
|
2024-04-10 09:59:50 +02:00
|
|
|
bool nir_opt_16bit_tex_image(nir_shader *nir,
|
|
|
|
|
struct nir_opt_16bit_tex_image_options *options);
|
2021-02-07 21:10:08 -05:00
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
bool legalize_type; /* whether this src should be legalized */
|
|
|
|
|
uint8_t bit_size; /* bit_size to enforce */
|
|
|
|
|
nir_tex_src_type match_src; /* if bit_size is 0, match bit size of this */
|
|
|
|
|
} nir_tex_src_type_constraint, nir_tex_src_type_constraints[nir_num_tex_src_types];
|
|
|
|
|
|
|
|
|
|
bool nir_legalize_16bit_sampler_srcs(nir_shader *nir,
|
|
|
|
|
nir_tex_src_type_constraints constraints);
|
2020-04-23 19:10:43 -04:00
|
|
|
|
2019-07-31 09:47:08 +02:00
|
|
|
bool nir_lower_point_size(nir_shader *shader, float min, float max);
|
|
|
|
|
|
2020-08-27 13:25:15 +02:00
|
|
|
void nir_lower_texcoord_replace(nir_shader *s, unsigned coord_replace,
|
2021-02-17 09:48:34 -05:00
|
|
|
bool point_coord_is_sysval, bool yinvert);
|
2020-08-27 13:25:15 +02:00
|
|
|
|
2024-12-04 11:14:06 +01:00
|
|
|
bool nir_lower_texcoord_replace_late(nir_shader *s, unsigned coord_replace,
|
2022-12-17 23:02:32 -05:00
|
|
|
bool point_coord_is_sysval);
|
|
|
|
|
|
2019-04-03 19:29:36 -04:00
|
|
|
typedef enum {
|
|
|
|
|
nir_lower_interpolation_at_sample = (1 << 1),
|
|
|
|
|
nir_lower_interpolation_at_offset = (1 << 2),
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_lower_interpolation_centroid = (1 << 3),
|
|
|
|
|
nir_lower_interpolation_pixel = (1 << 4),
|
|
|
|
|
nir_lower_interpolation_sample = (1 << 5),
|
2019-04-03 19:29:36 -04:00
|
|
|
} nir_lower_interpolation_options;
|
|
|
|
|
|
|
|
|
|
bool nir_lower_interpolation(nir_shader *shader,
|
|
|
|
|
nir_lower_interpolation_options options);
|
|
|
|
|
|
2022-04-13 16:22:47 -07:00
|
|
|
typedef enum {
|
|
|
|
|
nir_lower_discard_if_to_cf = (1 << 0),
|
|
|
|
|
nir_lower_demote_if_to_cf = (1 << 1),
|
|
|
|
|
nir_lower_terminate_if_to_cf = (1 << 2),
|
|
|
|
|
} nir_lower_discard_if_options;
|
|
|
|
|
|
|
|
|
|
bool nir_lower_discard_if(nir_shader *shader, nir_lower_discard_if_options options);
|
2021-12-14 15:59:45 -05:00
|
|
|
|
2024-03-20 19:17:55 -05:00
|
|
|
bool nir_lower_terminate_to_demote(nir_shader *nir);
|
|
|
|
|
|
2020-05-01 14:32:31 +01:00
|
|
|
bool nir_lower_memory_model(nir_shader *shader);
|
|
|
|
|
|
2020-04-06 12:52:06 +02:00
|
|
|
bool nir_lower_goto_ifs(nir_shader *shader);
|
2021-12-01 17:46:16 +01:00
|
|
|
bool nir_lower_continue_constructs(nir_shader *shader);
|
2020-04-06 12:52:06 +02:00
|
|
|
|
2024-10-09 17:58:35 -07:00
|
|
|
typedef struct nir_lower_multiview_options {
|
|
|
|
|
uint32_t view_mask;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Bitfield of output locations that may be converted to a per-view array.
|
|
|
|
|
*
|
|
|
|
|
* If a variable exists in an allowed location, it will be converted to an
|
|
|
|
|
* array even if its value does not depend on the view index.
|
|
|
|
|
*/
|
|
|
|
|
uint64_t allowed_per_view_outputs;
|
|
|
|
|
} nir_lower_multiview_options;
|
|
|
|
|
|
2020-07-06 15:25:03 +02:00
|
|
|
bool nir_shader_uses_view_index(nir_shader *shader);
|
2024-10-09 17:58:35 -07:00
|
|
|
bool nir_can_lower_multiview(nir_shader *shader, nir_lower_multiview_options options);
|
|
|
|
|
bool nir_lower_multiview(nir_shader *shader, nir_lower_multiview_options options);
|
2020-07-06 15:25:03 +02:00
|
|
|
|
2024-07-23 10:30:06 -07:00
|
|
|
bool nir_lower_view_index_to_device_index(nir_shader *shader);
|
|
|
|
|
|
2023-01-30 11:10:03 -08:00
|
|
|
typedef enum {
|
|
|
|
|
nir_lower_fp16_rtz = (1 << 0),
|
|
|
|
|
nir_lower_fp16_rtne = (1 << 1),
|
|
|
|
|
nir_lower_fp16_ru = (1 << 2),
|
|
|
|
|
nir_lower_fp16_rd = (1 << 3),
|
|
|
|
|
nir_lower_fp16_all = 0xf,
|
2023-10-04 14:23:59 +01:00
|
|
|
nir_lower_fp16_split_fp64 = (1 << 4),
|
2023-01-30 11:10:03 -08:00
|
|
|
} nir_lower_fp16_cast_options;
|
|
|
|
|
bool nir_lower_fp16_casts(nir_shader *shader, nir_lower_fp16_cast_options options);
|
2015-09-17 13:18:41 -07:00
|
|
|
bool nir_normalize_cubemap_coords(nir_shader *shader);
|
2015-04-02 16:38:30 -07:00
|
|
|
|
2021-07-29 14:59:11 -07:00
|
|
|
bool nir_shader_supports_implicit_lod(nir_shader *shader);
|
|
|
|
|
|
2023-08-15 10:11:43 -05:00
|
|
|
void nir_live_defs_impl(nir_function_impl *impl);
|
2016-12-13 14:39:51 +11:00
|
|
|
|
2023-08-15 10:11:43 -05:00
|
|
|
const BITSET_WORD *nir_get_live_defs(nir_cursor cursor, void *mem_ctx);
|
2020-05-15 12:07:22 -05:00
|
|
|
|
2016-12-13 14:39:51 +11:00
|
|
|
void nir_loop_analyze_impl(nir_function_impl *impl,
|
2022-05-11 20:12:56 +10:00
|
|
|
nir_variable_mode indirect_mask,
|
|
|
|
|
bool force_unroll_sampler_indirect);
|
2016-12-13 14:39:51 +11:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
bool nir_defs_interfere(nir_def *a, nir_def *b);
|
2014-10-29 14:17:17 -07:00
|
|
|
|
2016-02-12 21:52:46 -08:00
|
|
|
bool nir_repair_ssa_impl(nir_function_impl *impl);
|
|
|
|
|
bool nir_repair_ssa(nir_shader *shader);
|
2015-06-24 05:28:34 -07:00
|
|
|
|
2016-08-29 10:02:34 +10:00
|
|
|
void nir_convert_loop_to_lcssa(nir_loop *loop);
|
2019-08-05 15:24:18 +01:00
|
|
|
bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants);
|
2024-04-09 18:14:12 +01:00
|
|
|
void nir_divergence_analysis_impl(nir_function_impl *impl, nir_divergence_options options);
|
2020-09-02 11:45:46 +01:00
|
|
|
void nir_divergence_analysis(nir_shader *shader);
|
2023-10-01 00:21:16 -04:00
|
|
|
void nir_vertex_divergence_analysis(nir_shader *shader);
|
2021-11-18 23:14:26 -05:00
|
|
|
bool nir_has_divergent_loop(nir_shader *shader);
|
2024-10-31 21:34:50 -04:00
|
|
|
void nir_clear_divergence_info(nir_shader *nir);
|
2016-08-29 10:02:34 +10:00
|
|
|
|
2023-05-22 13:04:05 -04:00
|
|
|
void
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_rewrite_uses_to_load_reg(struct nir_builder *b, nir_def *old,
|
|
|
|
|
nir_def *reg);
|
2023-05-22 13:04:05 -04:00
|
|
|
|
2015-07-08 01:57:00 -07:00
|
|
|
/* If phi_webs_only is true, only convert SSA values involved in phi nodes to
|
|
|
|
|
* registers. If false, convert all values (even those not involved in a phi
|
2023-08-01 10:35:21 -04:00
|
|
|
* node) to registers.
|
2015-06-24 05:28:34 -07:00
|
|
|
*/
|
2023-05-31 13:26:53 -05:00
|
|
|
bool nir_convert_from_ssa(nir_shader *shader,
|
2023-08-01 10:35:21 -04:00
|
|
|
bool phi_webs_only);
|
2014-07-22 14:05:06 -07:00
|
|
|
|
2016-12-19 20:11:47 -08:00
|
|
|
bool nir_lower_phis_to_regs_block(nir_block *block);
|
|
|
|
|
bool nir_lower_ssa_defs_to_regs_block(nir_block *block);
|
2023-07-28 17:07:49 +02:00
|
|
|
|
|
|
|
|
bool nir_rematerialize_deref_in_use_blocks(nir_deref_instr *instr);
|
2018-09-11 12:15:22 -05:00
|
|
|
bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl);
|
2016-12-19 20:11:47 -08:00
|
|
|
|
2019-08-28 22:34:14 +02:00
|
|
|
bool nir_lower_samplers(nir_shader *shader);
|
2022-09-26 00:03:35 +02:00
|
|
|
bool nir_lower_cl_images(nir_shader *shader, bool lower_image_derefs, bool lower_sampler_derefs);
|
2022-07-01 14:38:11 +02:00
|
|
|
bool nir_dedup_inline_samplers(nir_shader *shader);
|
2024-09-17 16:36:19 +02:00
|
|
|
|
|
|
|
|
typedef struct nir_lower_ssbo_options {
|
|
|
|
|
bool native_loads;
|
2024-09-17 16:40:48 +02:00
|
|
|
bool native_offset;
|
2024-09-17 16:36:19 +02:00
|
|
|
} nir_lower_ssbo_options;
|
|
|
|
|
|
|
|
|
|
bool nir_lower_ssbo(nir_shader *shader, const nir_lower_ssbo_options *opts);
|
|
|
|
|
|
2023-02-20 14:16:05 -05:00
|
|
|
bool nir_lower_helper_writes(nir_shader *shader, bool lower_plain_stores);
|
2019-08-28 22:34:14 +02:00
|
|
|
|
2020-08-13 17:21:18 -07:00
|
|
|
typedef struct nir_lower_printf_options {
|
|
|
|
|
unsigned max_buffer_size;
|
2024-02-28 09:34:58 +02:00
|
|
|
unsigned ptr_bit_size;
|
2024-02-27 18:44:25 +02:00
|
|
|
bool use_printf_base_identifier;
|
2024-12-04 12:18:57 -05:00
|
|
|
|
|
|
|
|
/* Some drivers may know the address of the printf buffer at compile-time. If
|
|
|
|
|
* buffer_address is nonzero, it will be used instead of intrinsics.
|
|
|
|
|
*/
|
|
|
|
|
uint64_t buffer_address;
|
2020-08-13 17:21:18 -07:00
|
|
|
} nir_lower_printf_options;
|
|
|
|
|
|
|
|
|
|
bool nir_lower_printf(nir_shader *nir, const nir_lower_printf_options *options);
|
|
|
|
|
|
2019-06-17 16:27:37 -07:00
|
|
|
/* This is here for unit tests. */
|
|
|
|
|
bool nir_opt_comparison_pre_impl(nir_function_impl *impl);
|
|
|
|
|
|
2018-05-22 18:19:16 -07:00
|
|
|
bool nir_opt_comparison_pre(nir_shader *shader);
|
|
|
|
|
|
2020-08-25 17:36:14 +01:00
|
|
|
typedef struct nir_opt_access_options {
|
|
|
|
|
bool is_vulkan;
|
|
|
|
|
} nir_opt_access_options;
|
|
|
|
|
|
|
|
|
|
bool nir_opt_access(nir_shader *shader, const nir_opt_access_options *options);
|
2014-12-12 11:13:10 -08:00
|
|
|
bool nir_opt_algebraic(nir_shader *shader);
|
2017-01-13 17:25:11 +11:00
|
|
|
bool nir_opt_algebraic_before_ffma(nir_shader *shader);
|
2023-08-12 16:17:46 -07:00
|
|
|
bool nir_opt_algebraic_before_lower_int64(nir_shader *shader);
|
2015-03-23 17:11:49 -07:00
|
|
|
bool nir_opt_algebraic_late(nir_shader *shader);
|
nir/algebraic: Distribute source modifiers into instructions
There are three main classes of cases that are helped by this change:
1. When the negation is applied to a value being type converted (e.g.,
float(-x)). This could possibly also be handled with more clever
code generation.
2. When the negation is applied to a phi node source (e.g., x = -(...);
at the end of a basic block). This was the original case that caught
my attention while looking at shader-db dumps.
3. When the negation is applied to the source of an instruction that
cannot have source modifiers. This includes texture instructions and
math box instructions on pre-Gen7 platforms (see more details below).
In many these cases the negation can be propagated into the instructions
that generate the value (e.g., -(a*b) = (-a)*b).
In addition to the operations implemtned in this patch, I also tried:
- frcp - Helped 6 or fewer shaders on Gen7+, and hurt just as many on
pre-Gen7. On Gen6 and earlier, frcp is a math box instruction, and
math box instructions cannot have source modifiers.
I suspect this is why so many more shaders are helped on Gen6 than on
Gen5 or Gen7. Gen6 supports OpenGL 3.3, so a lot more shaders
compile on it. A lot of these shaders may have things like cos(-x)
or rcp(-x) that could result in an explicit negation instruction.
- bcsel - Hurt a few shaders with none helped. bcsel operates on
integer sources, so the fabs or fneg cannot be a source modifier in
the bcsel itself.
- Integer instructions - No changes on any Intel platform.
Some notes about the shader-db results below.
- On Tiger Lake, a single Deus Ex fragment shader is hurt for both
spills and fills.
- On Haswell, a different Deus Ex fragment shader is hurt for both
spills and fills.
- On GM45, the "LOST: 1" and "GAINED: 1" is a single Left4Dead 2
(very high graphics settings, lol) fragment shader that upgrades
from SIMD8 to SIMD16.
v2: Add support for fsign. Add some patterns that remove redundant
negations and redundant absolute value rather than trying to push them
down the tree.
Tiger Lake
total instructions in shared programs: 17611333 -> 17586465 (-0.14%)
instructions in affected programs: 3033734 -> 3008866 (-0.82%)
helped: 10310
HURT: 632
helped stats (abs) min: 1 max: 35 x̄: 2.61 x̃: 1
helped stats (rel) min: 0.04% max: 16.67% x̄: 1.43% x̃: 1.01%
HURT stats (abs) min: 1 max: 47 x̄: 3.21 x̃: 2
HURT stats (rel) min: 0.04% max: 5.08% x̄: 0.88% x̃: 0.63%
95% mean confidence interval for instructions value: -2.33 -2.21
95% mean confidence interval for instructions %-change: -1.32% -1.27%
Instructions are helped.
total cycles in shared programs: 338365223 -> 338262252 (-0.03%)
cycles in affected programs: 125291811 -> 125188840 (-0.08%)
helped: 5224
HURT: 2031
helped stats (abs) min: 1 max: 5670 x̄: 46.73 x̃: 12
helped stats (rel) min: <.01% max: 34.78% x̄: 1.91% x̃: 0.97%
HURT stats (abs) min: 1 max: 2882 x̄: 69.50 x̃: 14
HURT stats (rel) min: <.01% max: 44.93% x̄: 2.35% x̃: 0.74%
95% mean confidence interval for cycles value: -18.71 -9.68
95% mean confidence interval for cycles %-change: -0.80% -0.63%
Cycles are helped.
total spills in shared programs: 8942 -> 8946 (0.04%)
spills in affected programs: 8 -> 12 (50.00%)
helped: 0
HURT: 1
total fills in shared programs: 9399 -> 9401 (0.02%)
fills in affected programs: 21 -> 23 (9.52%)
helped: 0
HURT: 1
Ice Lake
total instructions in shared programs: 16124348 -> 16102258 (-0.14%)
instructions in affected programs: 2830928 -> 2808838 (-0.78%)
helped: 11294
HURT: 2
helped stats (abs) min: 1 max: 12 x̄: 1.96 x̃: 1
helped stats (rel) min: 0.07% max: 17.65% x̄: 1.32% x̃: 0.93%
HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel) min: 3.45% max: 4.00% x̄: 3.72% x̃: 3.72%
95% mean confidence interval for instructions value: -1.99 -1.93
95% mean confidence interval for instructions %-change: -1.34% -1.29%
Instructions are helped.
total cycles in shared programs: 335393932 -> 335325794 (-0.02%)
cycles in affected programs: 123834609 -> 123766471 (-0.06%)
helped: 5034
HURT: 2128
helped stats (abs) min: 1 max: 3256 x̄: 43.39 x̃: 11
helped stats (rel) min: <.01% max: 35.79% x̄: 1.98% x̃: 1.00%
HURT stats (abs) min: 1 max: 2634 x̄: 70.63 x̃: 16
HURT stats (rel) min: <.01% max: 49.49% x̄: 2.73% x̃: 0.62%
95% mean confidence interval for cycles value: -13.66 -5.37
95% mean confidence interval for cycles %-change: -0.69% -0.48%
Cycles are helped.
LOST: 0
GAINED: 2
Skylake
total instructions in shared programs: 14949240 -> 14927930 (-0.14%)
instructions in affected programs: 2594756 -> 2573446 (-0.82%)
helped: 11000
HURT: 2
helped stats (abs) min: 1 max: 12 x̄: 1.94 x̃: 1
helped stats (rel) min: 0.07% max: 18.75% x̄: 1.39% x̃: 0.94%
HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel) min: 4.76% max: 4.76% x̄: 4.76% x̃: 4.76%
95% mean confidence interval for instructions value: -1.97 -1.91
95% mean confidence interval for instructions %-change: -1.42% -1.37%
Instructions are helped.
total cycles in shared programs: 324829346 -> 324821596 (<.01%)
cycles in affected programs: 121566087 -> 121558337 (<.01%)
helped: 4611
HURT: 2147
helped stats (abs) min: 1 max: 3715 x̄: 33.29 x̃: 10
helped stats (rel) min: <.01% max: 36.08% x̄: 1.94% x̃: 1.00%
HURT stats (abs) min: 1 max: 2551 x̄: 67.88 x̃: 16
HURT stats (rel) min: <.01% max: 53.79% x̄: 3.69% x̃: 0.89%
95% mean confidence interval for cycles value: -4.25 1.96
95% mean confidence interval for cycles %-change: -0.28% -0.02%
Inconclusive result (value mean confidence interval includes 0).
Broadwell
total instructions in shared programs: 14971203 -> 14949957 (-0.14%)
instructions in affected programs: 2635699 -> 2614453 (-0.81%)
helped: 10982
HURT: 2
helped stats (abs) min: 1 max: 12 x̄: 1.93 x̃: 1
helped stats (rel) min: 0.07% max: 18.75% x̄: 1.39% x̃: 0.94%
HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel) min: 4.76% max: 4.76% x̄: 4.76% x̃: 4.76%
95% mean confidence interval for instructions value: -1.97 -1.90
95% mean confidence interval for instructions %-change: -1.42% -1.37%
Instructions are helped.
total cycles in shared programs: 336215033 -> 336086458 (-0.04%)
cycles in affected programs: 127383198 -> 127254623 (-0.10%)
helped: 4884
HURT: 1963
helped stats (abs) min: 1 max: 25696 x̄: 51.78 x̃: 12
helped stats (rel) min: <.01% max: 58.28% x̄: 2.00% x̃: 1.05%
HURT stats (abs) min: 1 max: 3401 x̄: 63.33 x̃: 16
HURT stats (rel) min: <.01% max: 39.95% x̄: 2.20% x̃: 0.70%
95% mean confidence interval for cycles value: -29.99 -7.57
95% mean confidence interval for cycles %-change: -0.89% -0.71%
Cycles are helped.
total fills in shared programs: 24905 -> 24901 (-0.02%)
fills in affected programs: 117 -> 113 (-3.42%)
helped: 4
HURT: 0
LOST: 0
GAINED: 16
Haswell
total instructions in shared programs: 13148927 -> 13131528 (-0.13%)
instructions in affected programs: 2220941 -> 2203542 (-0.78%)
helped: 8017
HURT: 4
helped stats (abs) min: 1 max: 12 x̄: 2.17 x̃: 1
helped stats (rel) min: 0.07% max: 15.25% x̄: 1.40% x̃: 0.93%
HURT stats (abs) min: 1 max: 7 x̄: 2.50 x̃: 1
HURT stats (rel) min: 0.33% max: 4.76% x̄: 2.73% x̃: 2.91%
95% mean confidence interval for instructions value: -2.21 -2.13
95% mean confidence interval for instructions %-change: -1.43% -1.37%
Instructions are helped.
total cycles in shared programs: 321221791 -> 321079870 (-0.04%)
cycles in affected programs: 126886055 -> 126744134 (-0.11%)
helped: 4674
HURT: 1729
helped stats (abs) min: 1 max: 23654 x̄: 56.47 x̃: 16
helped stats (rel) min: <.01% max: 53.22% x̄: 2.13% x̃: 1.05%
HURT stats (abs) min: 1 max: 3694 x̄: 70.58 x̃: 18
HURT stats (rel) min: <.01% max: 63.06% x̄: 2.48% x̃: 0.90%
95% mean confidence interval for cycles value: -33.31 -11.02
95% mean confidence interval for cycles %-change: -0.99% -0.78%
Cycles are helped.
total spills in shared programs: 19872 -> 19874 (0.01%)
spills in affected programs: 21 -> 23 (9.52%)
helped: 0
HURT: 1
total fills in shared programs: 20941 -> 20941 (0.00%)
fills in affected programs: 62 -> 62 (0.00%)
helped: 1
HURT: 1
LOST: 0
GAINED: 8
Ivy Bridge
total instructions in shared programs: 11875553 -> 11853839 (-0.18%)
instructions in affected programs: 1553112 -> 1531398 (-1.40%)
helped: 7304
HURT: 3
helped stats (abs) min: 1 max: 16 x̄: 2.97 x̃: 2
helped stats (rel) min: 0.07% max: 15.25% x̄: 1.62% x̃: 1.15%
HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel) min: 1.05% max: 3.33% x̄: 2.44% x̃: 2.94%
95% mean confidence interval for instructions value: -3.04 -2.90
95% mean confidence interval for instructions %-change: -1.65% -1.59%
Instructions are helped.
total cycles in shared programs: 178246425 -> 178184484 (-0.03%)
cycles in affected programs: 13702146 -> 13640205 (-0.45%)
helped: 4409
HURT: 1566
helped stats (abs) min: 1 max: 531 x̄: 24.52 x̃: 13
helped stats (rel) min: <.01% max: 38.67% x̄: 2.14% x̃: 1.02%
HURT stats (abs) min: 1 max: 356 x̄: 29.48 x̃: 10
HURT stats (rel) min: <.01% max: 64.73% x̄: 1.87% x̃: 0.70%
95% mean confidence interval for cycles value: -11.60 -9.14
95% mean confidence interval for cycles %-change: -1.19% -0.99%
Cycles are helped.
LOST: 0
GAINED: 10
Sandy Bridge
total instructions in shared programs: 10695740 -> 10667483 (-0.26%)
instructions in affected programs: 2337607 -> 2309350 (-1.21%)
helped: 10720
HURT: 1
helped stats (abs) min: 1 max: 49 x̄: 2.64 x̃: 2
helped stats (rel) min: 0.07% max: 20.00% x̄: 1.54% x̃: 1.13%
HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel) min: 1.04% max: 1.04% x̄: 1.04% x̃: 1.04%
95% mean confidence interval for instructions value: -2.69 -2.58
95% mean confidence interval for instructions %-change: -1.57% -1.51%
Instructions are helped.
total cycles in shared programs: 153478839 -> 153416223 (-0.04%)
cycles in affected programs: 22050900 -> 21988284 (-0.28%)
helped: 5342
HURT: 2200
helped stats (abs) min: 1 max: 1020 x̄: 20.34 x̃: 16
helped stats (rel) min: <.01% max: 24.05% x̄: 1.51% x̃: 0.86%
HURT stats (abs) min: 1 max: 335 x̄: 20.93 x̃: 6
HURT stats (rel) min: <.01% max: 20.18% x̄: 1.03% x̃: 0.30%
95% mean confidence interval for cycles value: -9.18 -7.42
95% mean confidence interval for cycles %-change: -0.82% -0.71%
Cycles are helped.
Iron Lake
total instructions in shared programs: 8114882 -> 8105574 (-0.11%)
instructions in affected programs: 1232504 -> 1223196 (-0.76%)
helped: 4109
HURT: 2
helped stats (abs) min: 1 max: 6 x̄: 2.27 x̃: 1
helped stats (rel) min: 0.05% max: 8.33% x̄: 0.99% x̃: 0.66%
HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel) min: 0.94% max: 4.35% x̄: 2.65% x̃: 2.65%
95% mean confidence interval for instructions value: -2.31 -2.21
95% mean confidence interval for instructions %-change: -1.01% -0.96%
Instructions are helped.
total cycles in shared programs: 188504036 -> 188466296 (-0.02%)
cycles in affected programs: 31203798 -> 31166058 (-0.12%)
helped: 3447
HURT: 36
helped stats (abs) min: 2 max: 92 x̄: 11.03 x̃: 8
helped stats (rel) min: <.01% max: 5.41% x̄: 0.21% x̃: 0.13%
HURT stats (abs) min: 2 max: 30 x̄: 7.33 x̃: 6
HURT stats (rel) min: 0.01% max: 1.65% x̄: 0.18% x̃: 0.10%
95% mean confidence interval for cycles value: -11.16 -10.51
95% mean confidence interval for cycles %-change: -0.22% -0.20%
Cycles are helped.
LOST: 0
GAINED: 1
GM45
total instructions in shared programs: 4989697 -> 4984531 (-0.10%)
instructions in affected programs: 703952 -> 698786 (-0.73%)
helped: 2493
HURT: 2
helped stats (abs) min: 1 max: 6 x̄: 2.07 x̃: 1
helped stats (rel) min: 0.05% max: 8.33% x̄: 1.03% x̃: 0.66%
HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel) min: 0.95% max: 4.35% x̄: 2.65% x̃: 2.65%
95% mean confidence interval for instructions value: -2.13 -2.01
95% mean confidence interval for instructions %-change: -1.07% -0.99%
Instructions are helped.
total cycles in shared programs: 128929136 -> 128903886 (-0.02%)
cycles in affected programs: 21583096 -> 21557846 (-0.12%)
helped: 2214
HURT: 17
helped stats (abs) min: 2 max: 92 x̄: 11.44 x̃: 8
helped stats (rel) min: <.01% max: 5.41% x̄: 0.24% x̃: 0.13%
HURT stats (abs) min: 2 max: 8 x̄: 4.24 x̃: 4
HURT stats (rel) min: 0.01% max: 1.65% x̄: 0.20% x̃: 0.09%
95% mean confidence interval for cycles value: -11.75 -10.88
95% mean confidence interval for cycles %-change: -0.25% -0.22%
Cycles are helped.
LOST: 1
GAINED: 1
Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/1359>
2019-07-15 15:55:00 -07:00
|
|
|
bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader);
|
2014-11-14 21:35:25 -08:00
|
|
|
bool nir_opt_constant_folding(nir_shader *shader);
|
2014-12-12 11:13:10 -08:00
|
|
|
|
2020-02-21 10:53:05 -08:00
|
|
|
/* Try to combine a and b into a. Return true if combination was possible,
|
|
|
|
|
* which will result in b being removed by the pass. Return false if
|
|
|
|
|
* combination wasn't possible.
|
|
|
|
|
*/
|
2023-03-02 13:30:47 -05:00
|
|
|
typedef bool (*nir_combine_barrier_cb)(
|
2020-02-21 10:53:05 -08:00
|
|
|
nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *data);
|
|
|
|
|
|
2023-03-02 13:30:47 -05:00
|
|
|
bool nir_opt_combine_barriers(nir_shader *shader,
|
|
|
|
|
nir_combine_barrier_cb combine_cb,
|
|
|
|
|
void *data);
|
2023-08-21 18:53:20 -07:00
|
|
|
bool nir_opt_barrier_modes(nir_shader *shader);
|
2020-02-21 10:53:05 -08:00
|
|
|
|
2019-03-08 10:08:20 -08:00
|
|
|
bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes);
|
|
|
|
|
|
2021-11-23 13:27:14 +00:00
|
|
|
bool nir_copy_prop_impl(nir_function_impl *impl);
|
2014-07-23 11:19:50 -07:00
|
|
|
bool nir_copy_prop(nir_shader *shader);
|
|
|
|
|
|
2016-12-09 22:31:26 -08:00
|
|
|
bool nir_opt_copy_prop_vars(nir_shader *shader);
|
|
|
|
|
|
2014-11-11 16:11:34 -08:00
|
|
|
bool nir_opt_cse(nir_shader *shader);
|
|
|
|
|
|
2014-07-24 15:51:58 -07:00
|
|
|
bool nir_opt_dce(nir_shader *shader);
|
|
|
|
|
|
2015-05-01 02:38:17 -04:00
|
|
|
bool nir_opt_dead_cf(nir_shader *shader);
|
|
|
|
|
|
2018-07-27 13:56:35 -07:00
|
|
|
bool nir_opt_dead_write_vars(nir_shader *shader);
|
|
|
|
|
|
2019-03-04 16:17:02 -06:00
|
|
|
bool nir_opt_deref_impl(nir_function_impl *impl);
|
2018-12-13 11:08:13 -06:00
|
|
|
bool nir_opt_deref(nir_shader *shader);
|
|
|
|
|
|
2018-07-23 19:16:56 -07:00
|
|
|
bool nir_opt_find_array_copies(nir_shader *shader);
|
|
|
|
|
|
2021-05-07 16:36:47 +02:00
|
|
|
bool nir_opt_fragdepth(nir_shader *shader);
|
|
|
|
|
|
2016-08-10 14:34:49 -07:00
|
|
|
bool nir_opt_gcm(nir_shader *shader, bool value_number);
|
2015-02-03 10:11:23 -08:00
|
|
|
|
2024-09-03 11:41:37 -07:00
|
|
|
bool nir_opt_generate_bfi(nir_shader *shader);
|
|
|
|
|
|
2017-12-28 13:06:28 -08:00
|
|
|
bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size);
|
|
|
|
|
|
2024-01-05 17:37:33 +00:00
|
|
|
bool nir_opt_mqsad(nir_shader *shader);
|
|
|
|
|
|
2022-08-08 17:12:14 +02:00
|
|
|
typedef enum {
|
2023-08-08 16:48:49 +02:00
|
|
|
nir_opt_if_optimize_phi_true_false = (1 << 0),
|
|
|
|
|
nir_opt_if_avoid_64bit_phis = (1 << 1),
|
2022-08-08 17:12:14 +02:00
|
|
|
} nir_opt_if_options;
|
|
|
|
|
|
|
|
|
|
bool nir_opt_if(nir_shader *shader, nir_opt_if_options options);
|
2016-12-19 13:11:43 -08:00
|
|
|
|
2017-06-22 12:13:25 -07:00
|
|
|
bool nir_opt_intrinsics(nir_shader *shader);
|
|
|
|
|
|
2018-06-28 19:16:58 -07:00
|
|
|
bool nir_opt_large_constants(nir_shader *shader,
|
|
|
|
|
glsl_type_size_align_func size_align,
|
|
|
|
|
unsigned threshold);
|
|
|
|
|
|
2024-04-16 11:32:46 +02:00
|
|
|
bool nir_opt_licm(nir_shader *shader);
|
2023-07-31 22:26:50 +02:00
|
|
|
bool nir_opt_loop(nir_shader *shader);
|
|
|
|
|
|
2021-07-29 19:34:26 +10:00
|
|
|
bool nir_opt_loop_unroll(nir_shader *shader);
|
2016-09-15 15:49:57 +10:00
|
|
|
|
2019-05-22 20:23:03 +01:00
|
|
|
typedef enum {
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_move_const_undef = (1 << 0),
|
|
|
|
|
nir_move_load_ubo = (1 << 1),
|
|
|
|
|
nir_move_load_input = (1 << 2),
|
|
|
|
|
nir_move_comparisons = (1 << 3),
|
|
|
|
|
nir_move_copies = (1 << 4),
|
|
|
|
|
nir_move_load_ssbo = (1 << 5),
|
|
|
|
|
nir_move_load_uniform = (1 << 6),
|
nir/opt_sink: Move ALU with constant sources
In general, sinking ALU instructions can negatively impact register pressure,
since it extends the live ranges of the sources, although it does shrink the live range
of the destination.
However, constants do not usually contribute to register pressure. This is not a
totally true assumption, but it's pretty good in practice, since...
* constants can be rematerialized (backend-dependent)
* constants can often be inlined (ISA-dependent)
* constants can sometimes be promoted to free uniform registers (ISA-dependent)
* constants can live in scalar registers although the ALU destination might need
a vector register (and vector registers are assumed to be much more expensive
than scalar registers, again ISA-dependent)
So, assume that constants have zero effect on register pressure. Now consider an
ALU instruction where all but one source is a constant. Then there are two
cases:
1. The ALU instruction is moved past when its source was otherwise killed. Then
there is no effect on register pressure, since the source live range is
extended exactly as much as the destination live range shrinks.
2. The ALU instruction is moved down but its source is still alive where it's
moved to. Then register pressure is improved, since the source live range is
unchanged while the destination live range shrinks.
So, as a heuristic, we always move ALU instructions where n-1 sources are
constant. As an inevitable special case, this also (necessarily) moves unary ALU
ops, which should be beneficial by the same justification. This is not 100%
perfect but it is well-motivated. Results on AGX are decent:
total instructions in shared programs: 1796101 -> 1795652 (-0.02%)
instructions in affected programs: 326822 -> 326373 (-0.14%)
helped: 800
HURT: 371
Inconclusive result (%-change mean confidence interval includes 0).
total bytes in shared programs: 11805004 -> 11801424 (-0.03%)
bytes in affected programs: 2610630 -> 2607050 (-0.14%)
helped: 912
HURT: 462
Inconclusive result (%-change mean confidence interval includes 0).
total halfregs in shared programs: 525818 -> 515399 (-1.98%)
halfregs in affected programs: 118197 -> 107778 (-8.81%)
helped: 2095
HURT: 804
Halfregs are helped.
total threads in shared programs: 18916608 -> 18917056 (<.01%)
threads in affected programs: 4800 -> 5248 (9.33%)
helped: 7
HURT: 0
Threads are helped.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24833>
2023-08-20 12:19:55 -04:00
|
|
|
nir_move_alu = (1 << 7),
|
2019-05-22 20:23:03 +01:00
|
|
|
} nir_move_options;
|
|
|
|
|
|
|
|
|
|
bool nir_can_move_instr(nir_instr *instr, nir_move_options options);
|
|
|
|
|
|
|
|
|
|
bool nir_opt_sink(nir_shader *shader, nir_move_options options);
|
|
|
|
|
|
2019-07-24 19:23:21 +01:00
|
|
|
bool nir_opt_move(nir_shader *shader, nir_move_options options);
|
2018-01-26 12:38:57 +01:00
|
|
|
|
2022-01-10 14:49:09 -08:00
|
|
|
typedef struct {
|
|
|
|
|
/** nir_load_uniform max base offset */
|
|
|
|
|
uint32_t uniform_max;
|
|
|
|
|
|
2021-12-23 15:21:30 -08:00
|
|
|
/** nir_load_ubo_vec4 max base offset */
|
|
|
|
|
uint32_t ubo_vec4_max;
|
|
|
|
|
|
2022-01-10 14:49:09 -08:00
|
|
|
/** nir_var_mem_shared max base offset */
|
|
|
|
|
uint32_t shared_max;
|
|
|
|
|
|
2024-09-08 12:26:18 +02:00
|
|
|
/** nir_var_mem_shared atomic max base offset */
|
|
|
|
|
uint32_t shared_atomic_max;
|
|
|
|
|
|
2022-01-10 14:49:09 -08:00
|
|
|
/** nir_load/store_buffer_amd max base offset */
|
|
|
|
|
uint32_t buffer_max;
|
2024-04-10 10:43:38 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Callback to get the max base offset for instructions for which the
|
|
|
|
|
* corresponding value above is zero.
|
|
|
|
|
*/
|
|
|
|
|
uint32_t (*max_offset_cb)(nir_intrinsic_instr *intr, const void *data);
|
|
|
|
|
|
|
|
|
|
/** Data to pass to max_offset_cb. */
|
|
|
|
|
const void *max_offset_data;
|
2024-04-10 10:43:38 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Allow the offset calculation to wrap. If false, constant additions that
|
|
|
|
|
* might wrap will not be folded into the offset.
|
|
|
|
|
*/
|
|
|
|
|
bool allow_offset_wrap;
|
2022-01-10 14:49:09 -08:00
|
|
|
} nir_opt_offsets_options;
|
|
|
|
|
|
|
|
|
|
bool nir_opt_offsets(nir_shader *shader, const nir_opt_offsets_options *options);
|
2021-03-11 12:45:31 +01:00
|
|
|
|
2018-06-27 11:41:19 -07:00
|
|
|
bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
|
2018-06-18 16:11:55 -07:00
|
|
|
bool indirect_load_ok, bool expensive_alu_ok);
|
2014-11-04 10:12:14 -08:00
|
|
|
|
2022-07-26 08:49:32 -07:00
|
|
|
bool nir_opt_reassociate_bfi(nir_shader *shader);
|
|
|
|
|
|
nir: Rematerialize compare instructions
On some architectures, Boolean values used to control conditional
branches or condtional selection must be propagated into a flag. This
generally means that a stored Boolean value must be compared with zero.
Rather than force the generation of extra compares with zero, re-emit
the original comparison instruction. This can save register pressure by
not needing to store the Boolean value.
There are several possible ares for future improvement to this pass:
1. Be more conservative. If both sources to the comparison instruction
are non-constants, it may be better for register pressure to emit the
extra compare. The current shader-db results on Intel GPUs (next
commit) lead me to believe that this is not currently a problem.
2. Be less conservative. Currently the pass requires that all users of
the comparison match the pattern. The idea is that after the pass is
complete, no instruction will use the resulting Boolean value. The only
uses will be of the flag value. It may be beneficial to relax this
requirement in some cases.
3. Be less conservative. Also try to rematerialize comparisons used for
discard_if intrinsics. After changing the way the Intel compiler
generates cod e for discard_if (see MR!935), I tried implementing this
already. The changes were pretty small. Instructions were helped in 19
shaders, but, overall, cycles were hurt. A commit "nir: Rematerialize
comparisons for nir_intrinsic_discard_if too" is on my fd.o cgit.
4. Copy the preceeding ALU instruction. If the comparison is a
comparison with zero, and it is the only user of a particular ALU
instruction (e.g., (a+b) != 0.0), it may be a further improvment to also
copy the preceeding ALU instruction. On Intel GPUs, this may enable
cmod propagation to make additional progress.
v2: Use much simpler method to get the prev_block for an if-statement.
Suggested by Tim.
Reviewed-by: Matt Turner <mattst88@gmail.com>
2019-05-20 11:22:12 -07:00
|
|
|
bool nir_opt_rematerialize_compares(nir_shader *shader);
|
|
|
|
|
|
2015-02-03 01:49:44 -05:00
|
|
|
bool nir_opt_remove_phis(nir_shader *shader);
|
2024-09-06 14:01:30 +02:00
|
|
|
bool nir_remove_single_src_phis_block(nir_block *block);
|
2015-02-03 01:49:44 -05:00
|
|
|
|
2021-06-21 14:19:51 -07:00
|
|
|
bool nir_opt_phi_precision(nir_shader *shader);
|
|
|
|
|
|
2022-01-10 13:34:20 +00:00
|
|
|
bool nir_opt_shrink_stores(nir_shader *shader, bool shrink_image_store);
|
|
|
|
|
|
2024-03-07 13:47:14 -04:00
|
|
|
bool nir_opt_shrink_vectors(nir_shader *shader, bool shrink_start);
|
2018-01-29 17:19:00 +01:00
|
|
|
|
2015-08-04 16:25:24 -07:00
|
|
|
bool nir_opt_undef(nir_shader *shader);
|
|
|
|
|
|
2020-06-02 08:30:35 -07:00
|
|
|
bool nir_lower_undef_to_zero(nir_shader *shader);
|
|
|
|
|
|
2024-08-01 22:00:46 -04:00
|
|
|
bool nir_opt_uniform_atomics(nir_shader *shader, bool fs_atomics_predicated);
|
2020-09-01 16:31:37 +01:00
|
|
|
|
nir: Optimize uniform iadd, fadd, and ixor reduction operations
This adds optimizations for iadd, fadd, and ixor with reduce,
inclusive scan, and exclusive scan.
NOTE: The fadd and ixor optimizations had no shader-db or fossil-db
changes on any Intel platform.
NOTE 2: This change "fixes" arb_compute_variable_group_size-local-size
and base-local-size.shader_test on DG2 and MTL. This is just changing
the code path taken to not use whatever path was not working properly
before.
This is a subset of the things optimized by ACO. See also
https://gitlab.freedesktop.org/mesa/mesa/-/issues/3731#note_682802. The
min, max, iand, and ior exclusive_scan optimizations are not
implemented.
Broadwell on shader-db is not happy. I have not investigated.
v2: Silence some warnings about discarding const.
v3: Rename mbcnt to count_active_invocations. Add a big comment
explaining the differences between the two paths. Suggested by Rhys.
shader-db:
All Gfx9 and newer platforms had similar results. (Ice Lake shown)
total instructions in shared programs: 20300384 -> 20299545 (<.01%)
instructions in affected programs: 19167 -> 18328 (-4.38%)
helped: 35 / HURT: 0
total cycles in shared programs: 842809750 -> 842766381 (<.01%)
cycles in affected programs: 2160249 -> 2116880 (-2.01%)
helped: 33 / HURT: 2
total spills in shared programs: 4632 -> 4626 (-0.13%)
spills in affected programs: 206 -> 200 (-2.91%)
helped: 3 / HURT: 0
total fills in shared programs: 5594 -> 5581 (-0.23%)
fills in affected programs: 664 -> 651 (-1.96%)
helped: 3 / HURT: 1
fossil-db results:
All Intel platforms had similar results. (Ice Lake shown)
Totals:
Instrs: 165551893 -> 165513303 (-0.02%)
Cycles: 15132539132 -> 15125314947 (-0.05%); split: -0.05%, +0.00%
Spill count: 45258 -> 45204 (-0.12%)
Fill count: 74286 -> 74157 (-0.17%)
Scratch Memory Size: 2467840 -> 2451456 (-0.66%)
Totals from 712 (0.11% of 656120) affected shaders:
Instrs: 598931 -> 560341 (-6.44%)
Cycles: 184650167 -> 177425982 (-3.91%); split: -3.95%, +0.04%
Spill count: 983 -> 929 (-5.49%)
Fill count: 2274 -> 2145 (-5.67%)
Scratch Memory Size: 52224 -> 35840 (-31.37%)
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27044>
2023-12-19 18:40:41 -08:00
|
|
|
bool nir_opt_uniform_subgroup(nir_shader *shader,
|
|
|
|
|
const nir_lower_subgroups_options *);
|
2023-12-19 16:13:52 -08:00
|
|
|
|
2020-12-18 19:05:47 +01:00
|
|
|
bool nir_opt_vectorize(nir_shader *shader, nir_vectorize_cb filter,
|
2020-08-27 12:49:13 -07:00
|
|
|
void *data);
|
2024-05-26 23:02:42 -04:00
|
|
|
bool nir_opt_vectorize_io(nir_shader *shader, nir_variable_mode modes);
|
2015-11-14 20:26:47 -05:00
|
|
|
|
2016-11-02 01:22:07 +00:00
|
|
|
bool nir_opt_conditional_discard(nir_shader *shader);
|
2018-07-03 17:39:15 -07:00
|
|
|
bool nir_opt_move_discards_to_top(nir_shader *shader);
|
2016-11-02 01:22:07 +00:00
|
|
|
|
2021-10-19 14:14:20 +03:00
|
|
|
bool nir_opt_ray_queries(nir_shader *shader);
|
|
|
|
|
|
2022-08-18 20:18:39 +02:00
|
|
|
bool nir_opt_ray_query_ranges(nir_shader *shader);
|
|
|
|
|
|
2015-03-27 19:50:29 -07:00
|
|
|
void nir_sweep(nir_shader *shader);
|
|
|
|
|
|
2018-08-30 15:02:25 -05:00
|
|
|
void nir_remap_dual_slot_attributes(nir_shader *shader,
|
2018-08-31 07:35:17 -05:00
|
|
|
uint64_t *dual_slot_inputs);
|
2018-08-30 15:02:25 -05:00
|
|
|
uint64_t nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot);
|
2018-03-21 09:39:32 +01:00
|
|
|
|
2015-09-10 16:53:08 -07:00
|
|
|
nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
|
2015-08-03 16:02:16 -07:00
|
|
|
gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
|
|
|
|
|
|
2018-09-11 12:40:08 +02:00
|
|
|
static inline bool
|
|
|
|
|
nir_variable_is_in_ubo(const nir_variable *var)
|
|
|
|
|
{
|
|
|
|
|
return (var->data.mode == nir_var_mem_ubo &&
|
|
|
|
|
var->interface_type != NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_variable_is_in_ssbo(const nir_variable *var)
|
|
|
|
|
{
|
|
|
|
|
return (var->data.mode == nir_var_mem_ssbo &&
|
|
|
|
|
var->interface_type != NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_variable_is_in_block(const nir_variable *var)
|
|
|
|
|
{
|
|
|
|
|
return nir_variable_is_in_ubo(var) || nir_variable_is_in_ssbo(var);
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-14 12:24:51 -04:00
|
|
|
static inline unsigned
|
|
|
|
|
nir_variable_count_slots(const nir_variable *var, const struct glsl_type *type)
|
|
|
|
|
{
|
2023-08-08 12:00:35 -05:00
|
|
|
return var->data.compact ? DIV_ROUND_UP(var->data.location_frac + glsl_get_length(type), 4) : glsl_count_attribute_slots(type, false);
|
2023-07-14 12:24:51 -04:00
|
|
|
}
|
|
|
|
|
|
2023-08-03 11:16:47 +10:00
|
|
|
static inline unsigned
|
|
|
|
|
nir_deref_count_slots(nir_deref_instr *deref, nir_variable *var)
|
|
|
|
|
{
|
|
|
|
|
if (var->data.compact) {
|
|
|
|
|
switch (deref->deref_type) {
|
|
|
|
|
case nir_deref_type_array:
|
|
|
|
|
return 1;
|
|
|
|
|
case nir_deref_type_var:
|
|
|
|
|
return nir_variable_count_slots(var, deref->type);
|
|
|
|
|
default:
|
|
|
|
|
unreachable("illegal deref type");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return glsl_count_attribute_slots(deref->type, false);
|
|
|
|
|
}
|
|
|
|
|
|
2022-11-17 12:47:59 -08:00
|
|
|
/* See default_ub_config in nir_range_analysis.c for documentation. */
|
2019-11-12 17:51:19 +00:00
|
|
|
typedef struct nir_unsigned_upper_bound_config {
|
|
|
|
|
unsigned min_subgroup_size;
|
|
|
|
|
unsigned max_subgroup_size;
|
2021-06-04 12:04:15 -07:00
|
|
|
unsigned max_workgroup_invocations;
|
|
|
|
|
unsigned max_workgroup_count[3];
|
|
|
|
|
unsigned max_workgroup_size[3];
|
2019-11-12 17:51:19 +00:00
|
|
|
|
|
|
|
|
uint32_t vertex_attrib_max[32];
|
|
|
|
|
} nir_unsigned_upper_bound_config;
|
|
|
|
|
|
|
|
|
|
uint32_t
|
|
|
|
|
nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_scalar scalar,
|
2019-11-12 17:51:19 +00:00
|
|
|
const nir_unsigned_upper_bound_config *config);
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_scalar ssa, unsigned const_val,
|
2019-11-12 17:51:19 +00:00
|
|
|
const nir_unsigned_upper_bound_config *config);
|
|
|
|
|
|
2021-09-24 18:41:29 +02:00
|
|
|
typedef struct {
|
|
|
|
|
/* True if gl_DrawID is considered uniform, i.e. if the preamble is run
|
|
|
|
|
* at least once per "internal" draw rather than per user-visible draw.
|
|
|
|
|
*/
|
|
|
|
|
bool drawid_uniform;
|
|
|
|
|
|
|
|
|
|
/* True if the subgroup size is uniform. */
|
|
|
|
|
bool subgroup_size_uniform;
|
|
|
|
|
|
2023-08-29 20:54:14 +02:00
|
|
|
/* True if load_workgroup_size is supported in the preamble. */
|
|
|
|
|
bool load_workgroup_size_allowed;
|
|
|
|
|
|
2021-09-24 18:41:29 +02:00
|
|
|
/* size/align for load/store_preamble. */
|
2023-08-12 16:17:15 -04:00
|
|
|
void (*def_size)(nir_def *def, unsigned *size, unsigned *align);
|
2021-09-24 18:41:29 +02:00
|
|
|
|
|
|
|
|
/* Total available size for load/store_preamble storage, in units
|
|
|
|
|
* determined by def_size.
|
|
|
|
|
*/
|
|
|
|
|
unsigned preamble_storage_size;
|
|
|
|
|
|
|
|
|
|
/* Give the cost for an instruction. nir_opt_preamble will prioritize
|
|
|
|
|
* instructions with higher costs. Instructions with cost 0 may still be
|
|
|
|
|
* lifted, but only when required to lift other instructions with non-0
|
|
|
|
|
* cost (e.g. a load_const source of an expression).
|
|
|
|
|
*/
|
|
|
|
|
float (*instr_cost_cb)(nir_instr *instr, const void *data);
|
|
|
|
|
|
|
|
|
|
/* Give the cost of rewriting the instruction to use load_preamble. This
|
|
|
|
|
* may happen from inserting move instructions, etc. If the benefit doesn't
|
|
|
|
|
* exceed the cost here then we won't rewrite it.
|
|
|
|
|
*/
|
2023-08-12 16:17:15 -04:00
|
|
|
float (*rewrite_cost_cb)(nir_def *def, const void *data);
|
2021-09-24 18:41:29 +02:00
|
|
|
|
|
|
|
|
/* Instructions whose definitions should not be rewritten. These could
|
|
|
|
|
* still be moved to the preamble, but they shouldn't be the root of a
|
|
|
|
|
* replacement expression. Instructions with cost 0 and derefs are
|
|
|
|
|
* automatically included by the pass.
|
|
|
|
|
*/
|
|
|
|
|
nir_instr_filter_cb avoid_instr_cb;
|
|
|
|
|
|
|
|
|
|
const void *cb_data;
|
|
|
|
|
} nir_opt_preamble_options;
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
nir_opt_preamble(nir_shader *shader,
|
|
|
|
|
const nir_opt_preamble_options *options,
|
|
|
|
|
unsigned *size);
|
|
|
|
|
|
|
|
|
|
nir_function_impl *nir_shader_get_preamble(nir_shader *shader);
|
|
|
|
|
|
2022-02-22 21:32:42 +05:30
|
|
|
bool nir_lower_point_smooth(nir_shader *shader);
|
|
|
|
|
bool nir_lower_poly_line_smooth(nir_shader *shader, unsigned num_smooth_aa_sample);
|
2022-04-29 17:58:02 +05:30
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
bool nir_mod_analysis(nir_scalar val, nir_alu_type val_type, unsigned div, unsigned *mod);
|
2022-12-07 15:31:58 +01:00
|
|
|
|
2023-04-21 10:45:47 +03:00
|
|
|
bool
|
|
|
|
|
nir_remove_tex_shadow(nir_shader *shader, unsigned textures_bitmask);
|
|
|
|
|
|
2023-05-18 11:00:50 -04:00
|
|
|
void
|
|
|
|
|
nir_trivialize_registers(nir_shader *s);
|
|
|
|
|
|
2023-12-14 16:43:23 +01:00
|
|
|
unsigned
|
|
|
|
|
nir_static_workgroup_size(const nir_shader *s);
|
|
|
|
|
|
nir: Add intrinsics for register access
Note the writemask handling is chosen for consistency with the rest of NIR. In
every other instance, writemask=w requires a vec4 source. This is hardcoded into
nir_validate and nir_print as what it means to have a writemask.
More importantly, consistency with how register writemasks currently work.
nir_print hides it, but r0.w = fneg ssa_1.x is actually a vec4 instruction with
source ssa_1.xxxx. As a silly example nir_dest_num_components(that) = 4 in the
old model. I realize this is quite strange coming from a scalar ISA, but it's
perfectly natural for the class of vec4 hardware for which this was designed. In
that hardware, conceptually all instructions are vec4`, so the sequence "fneg
ssa_1 and write to channel w" is implemented as "fneg a vec4 with ssa_1.x in the
last component and write that vec4 out but mask to write only the w channel".
Isn't this inefficient? It can be. To save power, Midgard has scalar ALUs in
addition to vec4 ALUs. Those details are confined to the backend VLIW scheduler;
the instruction selection is still done as vec4. This mechanism has little in
common with AMD's SALUs. Midgard has a wave size of 1, with special hacks for
derivatives.
As a result, all backends consuming register writemasks are expecting this
pattern of code. Changing the store to take a vec1 instead of a vec4 would
require changing every backend to reswizzle the sources to resurrect the vec4. I
started typing a branch to do this yesterday, but it made a mess of both Midgard
and nir-to-tgsi. Without any good reason to think it'd actually help
performance, I abandoned the idea. Getting all 15 backends converted to the
helpers is enough of a challenge without forcing 10 backends to reswizzle their
sources too.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23089>
2023-05-16 11:19:49 -04:00
|
|
|
static inline nir_intrinsic_instr *
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_reg_get_decl(nir_def *reg)
|
nir: Add intrinsics for register access
Note the writemask handling is chosen for consistency with the rest of NIR. In
every other instance, writemask=w requires a vec4 source. This is hardcoded into
nir_validate and nir_print as what it means to have a writemask.
More importantly, consistency with how register writemasks currently work.
nir_print hides it, but r0.w = fneg ssa_1.x is actually a vec4 instruction with
source ssa_1.xxxx. As a silly example nir_dest_num_components(that) = 4 in the
old model. I realize this is quite strange coming from a scalar ISA, but it's
perfectly natural for the class of vec4 hardware for which this was designed. In
that hardware, conceptually all instructions are vec4`, so the sequence "fneg
ssa_1 and write to channel w" is implemented as "fneg a vec4 with ssa_1.x in the
last component and write that vec4 out but mask to write only the w channel".
Isn't this inefficient? It can be. To save power, Midgard has scalar ALUs in
addition to vec4 ALUs. Those details are confined to the backend VLIW scheduler;
the instruction selection is still done as vec4. This mechanism has little in
common with AMD's SALUs. Midgard has a wave size of 1, with special hacks for
derivatives.
As a result, all backends consuming register writemasks are expecting this
pattern of code. Changing the store to take a vec1 instead of a vec4 would
require changing every backend to reswizzle the sources to resurrect the vec4. I
started typing a branch to do this yesterday, but it made a mess of both Midgard
and nir-to-tgsi. Without any good reason to think it'd actually help
performance, I abandoned the idea. Getting all 15 backends converted to the
helpers is enough of a challenge without forcing 10 backends to reswizzle their
sources too.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23089>
2023-05-16 11:19:49 -04:00
|
|
|
{
|
|
|
|
|
assert(reg->parent_instr->type == nir_instr_type_intrinsic);
|
|
|
|
|
nir_intrinsic_instr *decl = nir_instr_as_intrinsic(reg->parent_instr);
|
|
|
|
|
assert(decl->intrinsic == nir_intrinsic_decl_reg);
|
|
|
|
|
|
|
|
|
|
return decl;
|
|
|
|
|
}
|
|
|
|
|
|
2023-05-22 14:39:52 -04:00
|
|
|
static inline nir_intrinsic_instr *
|
|
|
|
|
nir_next_decl_reg(nir_intrinsic_instr *prev, nir_function_impl *impl)
|
|
|
|
|
{
|
|
|
|
|
nir_instr *start;
|
|
|
|
|
if (prev != NULL)
|
|
|
|
|
start = nir_instr_next(&prev->instr);
|
|
|
|
|
else if (impl != NULL)
|
|
|
|
|
start = nir_block_first_instr(nir_start_block(impl));
|
|
|
|
|
else
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
for (nir_instr *instr = start; instr; instr = nir_instr_next(instr)) {
|
|
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
|
if (intrin->intrinsic == nir_intrinsic_decl_reg)
|
|
|
|
|
return intrin;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_foreach_reg_decl(reg, impl) \
|
|
|
|
|
for (nir_intrinsic_instr *reg = nir_next_decl_reg(NULL, impl); \
|
2023-05-22 14:39:52 -04:00
|
|
|
reg; reg = nir_next_decl_reg(reg, NULL))
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_foreach_reg_decl_safe(reg, impl) \
|
|
|
|
|
for (nir_intrinsic_instr *reg = nir_next_decl_reg(NULL, impl), \
|
|
|
|
|
*next_ = nir_next_decl_reg(reg, NULL); \
|
2023-05-22 14:39:52 -04:00
|
|
|
reg; reg = next_, next_ = nir_next_decl_reg(next_, NULL))
|
|
|
|
|
|
2023-12-15 11:42:41 +01:00
|
|
|
static inline nir_cursor
|
|
|
|
|
nir_after_reg_decls(nir_function_impl *impl)
|
|
|
|
|
{
|
|
|
|
|
nir_intrinsic_instr *last_reg_decl = NULL;
|
|
|
|
|
nir_foreach_reg_decl(reg_decl, impl)
|
|
|
|
|
last_reg_decl = reg_decl;
|
|
|
|
|
|
|
|
|
|
if (last_reg_decl != NULL)
|
|
|
|
|
return nir_after_instr(&last_reg_decl->instr);
|
|
|
|
|
return nir_before_impl(impl);
|
|
|
|
|
}
|
|
|
|
|
|
2023-05-22 14:39:52 -04:00
|
|
|
static inline bool
|
|
|
|
|
nir_is_load_reg(nir_intrinsic_instr *intr)
|
|
|
|
|
{
|
|
|
|
|
return intr->intrinsic == nir_intrinsic_load_reg ||
|
|
|
|
|
intr->intrinsic == nir_intrinsic_load_reg_indirect;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
nir_is_store_reg(nir_intrinsic_instr *intr)
|
|
|
|
|
{
|
|
|
|
|
return intr->intrinsic == nir_intrinsic_store_reg ||
|
|
|
|
|
intr->intrinsic == nir_intrinsic_store_reg_indirect;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_foreach_reg_load(load, reg) \
|
|
|
|
|
assert(reg->intrinsic == nir_intrinsic_decl_reg); \
|
|
|
|
|
\
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_foreach_use(load, ®->def) \
|
2023-08-14 09:58:47 -04:00
|
|
|
if (nir_is_load_reg(nir_instr_as_intrinsic(nir_src_parent_instr(load))))
|
2023-05-22 14:39:52 -04:00
|
|
|
|
2023-11-12 20:18:12 +01:00
|
|
|
#define nir_foreach_reg_load_safe(load, reg) \
|
|
|
|
|
assert(reg->intrinsic == nir_intrinsic_decl_reg); \
|
|
|
|
|
\
|
|
|
|
|
nir_foreach_use_safe(load, ®->def) \
|
|
|
|
|
if (nir_is_load_reg(nir_instr_as_intrinsic(nir_src_parent_instr(load))))
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#define nir_foreach_reg_store(store, reg) \
|
|
|
|
|
assert(reg->intrinsic == nir_intrinsic_decl_reg); \
|
|
|
|
|
\
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_foreach_use(store, ®->def) \
|
2023-08-14 09:58:47 -04:00
|
|
|
if (nir_is_store_reg(nir_instr_as_intrinsic(nir_src_parent_instr(store))))
|
2023-05-22 14:39:52 -04:00
|
|
|
|
2023-11-12 20:18:12 +01:00
|
|
|
#define nir_foreach_reg_store_safe(store, reg) \
|
|
|
|
|
assert(reg->intrinsic == nir_intrinsic_decl_reg); \
|
|
|
|
|
\
|
|
|
|
|
nir_foreach_use_safe(store, ®->def) \
|
|
|
|
|
if (nir_is_store_reg(nir_instr_as_intrinsic(nir_src_parent_instr(store))))
|
|
|
|
|
|
2023-05-22 14:39:52 -04:00
|
|
|
static inline nir_intrinsic_instr *
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_load_reg_for_def(const nir_def *def)
|
2023-05-22 14:39:52 -04:00
|
|
|
{
|
|
|
|
|
if (def->parent_instr->type != nir_instr_type_intrinsic)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(def->parent_instr);
|
|
|
|
|
if (!nir_is_load_reg(intr))
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
return intr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline nir_intrinsic_instr *
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_store_reg_for_def(const nir_def *def)
|
2023-05-22 14:39:52 -04:00
|
|
|
{
|
|
|
|
|
/* Look for the trivial store: single use of our destination by a
|
|
|
|
|
* store_register intrinsic.
|
|
|
|
|
*/
|
|
|
|
|
if (!list_is_singular(&def->uses))
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
nir_src *src = list_first_entry(&def->uses, nir_src, use_link);
|
2023-08-14 09:58:47 -04:00
|
|
|
if (nir_src_is_if(src))
|
2023-05-22 14:39:52 -04:00
|
|
|
return NULL;
|
|
|
|
|
|
2023-08-14 09:58:47 -04:00
|
|
|
nir_instr *parent = nir_src_parent_instr(src);
|
2023-05-22 14:39:52 -04:00
|
|
|
if (parent->type != nir_instr_type_intrinsic)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
|
|
|
|
|
if (!nir_is_store_reg(intr))
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
/* The first value is data. Third is indirect index, ignore that one. */
|
|
|
|
|
if (&intr->src[0] != src)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
return intr;
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-10 03:01:21 -04:00
|
|
|
struct nir_use_dominance_state;
|
|
|
|
|
|
|
|
|
|
struct nir_use_dominance_state *
|
|
|
|
|
nir_calc_use_dominance_impl(nir_function_impl *impl, bool post_dominance);
|
|
|
|
|
|
|
|
|
|
nir_instr *
|
|
|
|
|
nir_get_immediate_use_dominator(struct nir_use_dominance_state *state,
|
|
|
|
|
nir_instr *instr);
|
|
|
|
|
nir_instr *nir_use_dominance_lca(struct nir_use_dominance_state *state,
|
|
|
|
|
nir_instr *i1, nir_instr *i2);
|
|
|
|
|
bool nir_instr_dominates_use(struct nir_use_dominance_state *state,
|
|
|
|
|
nir_instr *parent, nir_instr *child);
|
|
|
|
|
void nir_print_use_dominators(struct nir_use_dominance_state *state,
|
|
|
|
|
nir_instr **instructions,
|
|
|
|
|
unsigned num_instructions);
|
|
|
|
|
|
2021-01-18 14:43:15 +00:00
|
|
|
#include "nir_inline_helpers.h"
|
|
|
|
|
|
2014-07-31 16:14:51 -07:00
|
|
|
#ifdef __cplusplus
|
|
|
|
|
} /* extern "C" */
|
|
|
|
|
#endif
|
2017-03-20 16:04:16 +00:00
|
|
|
|
|
|
|
|
#endif /* NIR_H */
|