mesa/src/compiler/nir/nir_shader_compiler_options.h

/*
 * Copyright © 2014 Connor Abbott
 * SPDX-License-Identifier: MIT
 */

#ifndef NIR_SHADER_COMPILER_OPTIONS_H
#define NIR_SHADER_COMPILER_OPTIONS_H

#include "util/macros.h"
#include "nir_defines.h"
#include <stdbool.h>
#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

typedef enum {
   nir_lower_imul64 = (1 << 0),
   nir_lower_isign64 = (1 << 1),
   /** Lower all int64 modulus and division opcodes */
   nir_lower_divmod64 = (1 << 2),
   /** Lower all 64-bit umul_high and imul_high opcodes */
   nir_lower_imul_high64 = (1 << 3),
   nir_lower_bcsel64 = (1 << 4),
   nir_lower_icmp64 = (1 << 5),
   nir_lower_iadd64 = (1 << 6),
   nir_lower_iabs64 = (1 << 7),
   nir_lower_ineg64 = (1 << 8),
   nir_lower_logic64 = (1 << 9),
   nir_lower_minmax64 = (1 << 10),
   nir_lower_shift64 = (1 << 11),
   nir_lower_imul_2x32_64 = (1 << 12),
   nir_lower_extract64 = (1 << 13),
   nir_lower_ufind_msb64 = (1 << 14),
   nir_lower_bit_count64 = (1 << 15),
   nir_lower_subgroup_shuffle64 = (1 << 16),
   nir_lower_scan_reduce_bitwise64 = (1 << 17),
   nir_lower_scan_reduce_iadd64 = (1 << 18),
   nir_lower_vote_ieq64 = (1 << 19),
   nir_lower_usub_sat64 = (1 << 20),
   nir_lower_iadd_sat64 = (1 << 21),
   nir_lower_find_lsb64 = (1 << 22),
   nir_lower_conv64 = (1 << 23),
   nir_lower_uadd_sat64 = (1 << 24),
   nir_lower_iadd3_64 = (1 << 25),
   nir_lower_bitfield_reverse64 = (1 << 26),
   nir_lower_bitfield_extract64 = (1 << 27),
} nir_lower_int64_options;

typedef enum {
   nir_lower_drcp = (1 << 0),
   nir_lower_dsqrt = (1 << 1),
   nir_lower_drsq = (1 << 2),
   nir_lower_dtrunc = (1 << 3),
   nir_lower_dfloor = (1 << 4),
   nir_lower_dceil = (1 << 5),
   nir_lower_dfract = (1 << 6),
   nir_lower_dround_even = (1 << 7),
   nir_lower_dmod = (1 << 8),
   nir_lower_dsub = (1 << 9),
   nir_lower_ddiv = (1 << 10),
   nir_lower_dsign = (1 << 11),
   nir_lower_dminmax = (1 << 12),
   nir_lower_dsat = (1 << 13),
   nir_lower_fp64_full_software = (1 << 14),
} nir_lower_doubles_options;

typedef enum {
   nir_divergence_single_prim_per_subgroup = (1 << 0),
   nir_divergence_single_patch_per_tcs_subgroup = (1 << 1),
   nir_divergence_single_patch_per_tes_subgroup = (1 << 2),
   nir_divergence_view_index_uniform = (1 << 3),
   nir_divergence_single_frag_shading_rate_per_subgroup = (1 << 4),
   nir_divergence_multiple_workgroup_per_compute_subgroup = (1 << 5),
   nir_divergence_shader_record_ptr_uniform = (1 << 6),
   nir_divergence_uniform_load_tears = (1 << 7),
   /* If used, this allows phis for divergent merges with undef and a uniform source to be considered uniform */
   nir_divergence_ignore_undef_if_phi_srcs = (1 << 8),
   /* Whether to compute vertex divergence (meaning between vertices
    * of the same primitive) instead of subgroup invocation divergence
    * (between invocations of the same subgroup). For example, patch input
    * loads are always convergent, while subgroup intrinsics are divergent.
    */
   nir_divergence_vertex = (1 << 11),
} nir_divergence_options;

/** An instruction filtering callback
 *
 * Returns true if the instruction should be processed and false otherwise.
 */
typedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *);

typedef enum {
   /**
    * Whether a fragment shader can interpolate the same input multiple times
    * with different modes (smooth, noperspective) and locations (pixel,
    * centroid, sample, at_offset, at_sample), excluding the flat mode.
    *
    * This matches AMD GPU flexibility and limitations and is a superset of
    * the GL4 requirement that each input can be interpolated at its specified
    * location, and then also as centroid, at_offset, and at_sample.
    */
   nir_io_has_flexible_input_interpolation_except_flat = BITFIELD_BIT(0),

   /**
    * nir_opt_varyings compacts (relocates) components of varyings by
    * rewriting their locations completely, effectively moving components of
    * varyings between slots. This option forces nir_opt_varyings to make
    * VARYING_SLOT_POS unused by moving its contents to VARn if the consumer
    * is not FS. If this option is not set and POS is unused, it moves
    * components of VARn to POS until it's fully used.
    */
   nir_io_dont_use_pos_for_non_fs_varyings = BITFIELD_BIT(1),

   nir_io_16bit_input_output_support = BITFIELD_BIT(2),

   /**
    * Implement mediump inputs and outputs as normal 32-bit IO.
    * Causes the mediump flag to be not set for IO semantics, essentially
    * destroying any mediump-related IO information in the shader.
    */
   nir_io_mediump_is_32bit = BITFIELD_BIT(3),

   /**
    * Whether nir_opt_vectorize_io should ignore FS inputs.
    */
   nir_io_prefer_scalar_fs_inputs = BITFIELD_BIT(4),

   /**
    * Whether interpolated fragment shader vec4 slots can use load_input for
    * a subset of its components to skip interpolation for those components.
    * The result of such load_input is a value from a random (not necessarily
    * provoking) vertex. If a value from the provoking vertex is required,
    * the vec4 slot should have no load_interpolated_input instructions.
    *
    * This exposes the AMD capability that allows packing flat inputs with
    * interpolated inputs in a limited number of cases. Normally, flat
    * components must be in a separate vec4 slot to get the value from
    * the provoking vertex. If the compiler can prove that all per-vertex
    * values are equal (convergent, i.e. the provoking vertex doesn't matter),
    * it can put such flat components into any interpolated vec4 slot.
    *
    * It should also be set if the hw can mix flat and interpolated components
    * in the same vec4 slot.
    *
    * This causes nir_opt_varyings to skip interpolation for all varyings
    * that are convergent, and enables better compaction and inter-shader code
    * motion for convergent varyings.
    */
   nir_io_mix_convergent_flat_with_interpolated = BITFIELD_BIT(5),

   /**
    * Whether src_type and dest_type of IO intrinsics are irrelevant and
    * should be ignored by nir_opt_vectorize_io. All drivers that always treat
    * load_input and store_output as untyped and load_interpolated_input as
    * float##bit_size should set this.
    */
   nir_io_vectorizer_ignores_types = BITFIELD_BIT(6),

   /**
    * Whether nir_opt_varyings should never promote convergent FS inputs
    * to flat.
    */
   nir_io_always_interpolate_convergent_fs_inputs = BITFIELD_BIT(7),

   /**
    * Whether the first assigned color channel component should be equal to
    * the first unused VARn component.
    *
    * For example, if the first unused VARn channel is VAR0.z, color channels
    * are assigned in this order:
    *       COL0.z, COL0.w, COL0.x, COL0.y, COL1.z, COL1.w, COL1.x, COL1.y
    *
    * This allows certain drivers to merge outputs if each output sets
    * different components, for example 2 outputs writing VAR0.xy and COL0.z
    * will only use 1 HW output.
    */
   nir_io_compaction_rotates_color_channels = BITFIELD_BIT(8),

   /**
    * Whether to group TES inputs as follows:
    * - inputs used to compute only POS/CLIP outputs are first
    * - inputs used to compute both POS/CLIP outputs and other outputs are next
    * - inputs used to compute only other outputs are last
    */
   nir_io_compaction_groups_tes_inputs_into_pos_and_var_groups = BITFIELD_BIT(9),

   /**
    * RADV expects that high 16 bits of outputs set component >= 4. That's not
    * legal in NIR, but RADV unfortunately relies on it because it's not
    * validated.
    */
   nir_io_radv_intrinsic_component_workaround = BITFIELD_BIT(10),

   /* Options affecting the GLSL compiler or Gallium are below. */

   /**
    * Lower load_deref/store_deref to load_input/store_output/etc. intrinsics.
    * This is only affects GLSL compilation and Gallium.
    */
   nir_io_has_intrinsics = BITFIELD_BIT(16),

   /**
    * Whether clip and cull distance arrays should be separate. If this is not
    * set, cull distances will be moved into VARYING_SLOT_CLIP_DISTn after clip
    * distances, and shader_info::clip_distance_array_size will be the index
    * of the first cull distance. nir_lower_clip_cull_distance_array_vars does
    * that.
    */
   nir_io_separate_clip_cull_distance_arrays = BITFIELD_BIT(17),
} nir_io_options;

typedef enum {
   nir_lower_packing_op_pack_64_2x32,
   nir_lower_packing_op_unpack_64_2x32,
   nir_lower_packing_op_pack_64_4x16,
   nir_lower_packing_op_unpack_64_4x16,
   nir_lower_packing_op_pack_32_2x16,
   nir_lower_packing_op_unpack_32_2x16,
   nir_lower_packing_op_pack_32_4x8,
   nir_lower_packing_op_unpack_32_4x8,
   nir_lower_packing_num_ops,
} nir_lower_packing_op;

typedef struct nir_shader_compiler_options {
   bool lower_fdiv;
   bool lower_ffma16;
   bool lower_ffma32;
   bool lower_ffma64;
   bool fuse_ffma16;
   bool fuse_ffma32;
   bool fuse_ffma64;
   bool lower_flrp16;
   bool lower_flrp32;
   /** Lowers flrp when it does not support doubles */
   bool lower_flrp64;
   bool lower_fpow;
   bool lower_fsat;
   bool lower_fsqrt;
   bool lower_sincos;
   bool lower_fmod;
   /** Lowers ibitfield_extract/ubitfield_extract for 8, 16 & 32 bits. */
   bool lower_bitfield_extract8;
   bool lower_bitfield_extract16;
   bool lower_bitfield_extract;
   /** Lowers bitfield_insert. */
   bool lower_bitfield_insert;
   /** Lowers bitfield_reverse to shifts. */
   bool lower_bitfield_reverse;
   /** Lowers bit_count to shifts. */
   bool lower_bit_count;
   /** Lowers ifind_msb. */
   bool lower_ifind_msb;
   /** Lowers ufind_msb. */
   bool lower_ufind_msb;
   /** Lowers find_lsb to ufind_msb and logic ops */
   bool lower_find_lsb;
   bool lower_uadd_carry;
   bool lower_usub_borrow;
   /** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */
   bool lower_mul_high;
   bool lower_mul_high16;
   /** lowers fneg to fmul(x, -1.0). Driver must call nir_opt_algebraic_late() */
   bool lower_fneg;
   /** lowers ineg to isub. Driver must call nir_opt_algebraic_late(). */
   bool lower_ineg;
   /** lowers fisnormal to alu ops. */
   bool lower_fisnormal;

   /* lower {slt,sge,seq,sne} to {flt,fge,feq,fneu} + b2f: */
   bool lower_scmp;

   /* lower b/fall_equalN/b/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */
   bool lower_vector_cmp;

   /** enable rules to avoid bit ops */
   bool lower_bitops;

   /** enables rules to lower isign to imin+imax */
   bool lower_isign;

   /** enables rules to lower fsign to fsub and flt */
   bool lower_fsign;

   /** enables rules to lower iabs to ineg+imax */
   bool lower_iabs;

   /** enable rules that avoid generating umax from signed integer ops */
   bool lower_umax;

   /** enable rules that avoid generating umin from signed integer ops */
   bool lower_umin;

   /* lower fmin/fmax with signed zero preserve to fmin/fmax with
    * no_signed_zero, for backends whose fmin/fmax implementations do not
    * implement IEEE-754-2019 semantics for signed zero.
    */
   bool lower_fminmax_signed_zero;

   /* lower fdph to fdot4 */
   bool lower_fdph;

   /* Does the native fdot instruction replicate its result for four
    * components?  If so, then opt_algebraic_late will turn all fdotN
    * instructions into fdotN_replicated instructions.
    */
   bool fdot_replicates;

   /** lowers ffloor to fsub+ffract: */
   bool lower_ffloor;

   /** lowers ffract to fsub+ffloor: */
   bool lower_ffract;

   /** lowers fceil to fneg+ffloor+fneg: */
   bool lower_fceil;

   bool lower_ftrunc;

   /** Lowers fround_even to ffract+feq+csel.
    *
    * Not correct in that it doesn't correctly handle the "_even" part of the
    * rounding, but good enough for DX9 array indexing handling on DX9-class
    * hardware.
    */
   bool lower_fround_even;

   bool lower_ldexp;

   bool lower_pack_half_2x16;
   bool lower_pack_unorm_2x16;
   bool lower_pack_snorm_2x16;
   bool lower_pack_unorm_4x8;
   bool lower_pack_snorm_4x8;
   bool lower_pack_64_2x32;
   bool lower_pack_64_4x16;
   bool lower_pack_32_2x16;
   bool lower_pack_64_2x32_split;
   bool lower_pack_32_2x16_split;
   bool lower_unpack_half_2x16;
   bool lower_unpack_unorm_2x16;
   bool lower_unpack_snorm_2x16;
   bool lower_unpack_unorm_4x8;
   bool lower_unpack_snorm_4x8;
   bool lower_unpack_64_2x32_split;
   bool lower_unpack_32_2x16_split;

   bool lower_pack_split;

   bool lower_extract_byte;
   bool lower_extract_word;
   bool lower_insert_byte;
   bool lower_insert_word;

   /* Indicates that the driver only has zero-based vertex id */
   bool vertex_id_zero_based;

   /**
    * If enabled, gl_BaseVertex will be lowered as:
    * is_indexed_draw (~0/0) & firstvertex
    */
   bool lower_base_vertex;

   /* Indicates that gl_InstanceIndex already includes base index
    * and doesn't require further lowering.
    */
   bool instance_id_includes_base_index;

   /**
    * If enabled, gl_HelperInvocation will be lowered as:
    *
    * - non-sample-shading: sample_mask_in == 0.
    * - sample shading:     !((1 << sample_id) & sample_mask_in))
    *
    * For this to be correct, it requires that fs.uses_sample_shading is set to
    * true when sample shading is enabled.  This means that you need shader
    * variants to set the flag when Vulkan's
    * VkPipelineMultisampleStateCreateInfo->sampleShadingEnable or GL's
    * glMinSampleshading() are enabled.
    *
    * This depends on some possibly hw implementation details, which may
    * not be true for all hw.  In particular that the FS is only executed
    * for covered samples or for helper invocations.  So, do not blindly
    * enable this option.
    *
    * Note: See also issue #22 in ARB_shader_image_load_store
    */
   bool lower_helper_invocation;

   /**
    * Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
    *
    *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
    *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
    */
   bool optimize_sample_mask_in;

   /**
    * Optimize load_front_face ? a : -a to load_front_face_fsign * a
    */
   bool optimize_load_front_face_fsign;

   /**
    * Optimize boolean reductions of quad broadcasts. This should only be enabled if
    * nir_intrinsic_reduce supports INCLUDE_HELPERS.
    */
   bool optimize_quad_vote_to_reduce;

   bool lower_cs_local_index_to_id;
   bool lower_cs_local_id_to_index;

   /* Prevents lowering global_invocation_id to be in terms of workgroup_id */
   bool has_cs_global_id;

   bool lower_device_index_to_zero;

   /* Set if nir_lower_pntc_ytransform() should invert gl_PointCoord.
    * Either when frame buffer is flipped or GL_POINT_SPRITE_COORD_ORIGIN
    * is GL_LOWER_LEFT.
    */
   bool lower_wpos_pntc;

   /**
    * Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be
    * lowered to simple arithmetic.
    *
    * If this flag is set, the lowering will be applied to all bit-sizes of
    * these instructions.
    *
    * :c:member:`lower_hadd64`
    */
   bool lower_hadd;

   /**
    * Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions
    * should be lowered to simple arithmetic.
    *
    * If this flag is set, the lowering will be applied to only 64-bit
    * versions of these instructions.
    *
    * :c:member:`lower_hadd`
    */
   bool lower_hadd64;

   /**
    * Set if nir_op_uadd_sat should be lowered to simple arithmetic.
    *
    * If this flag is set, the lowering will be applied to all bit-sizes of
    * these instructions.
    */
   bool lower_uadd_sat;

   /**
    * Set if nir_op_usub_sat should be lowered to simple arithmetic.
    *
    * If this flag is set, the lowering will be applied to all bit-sizes of
    * these instructions.
    */
   bool lower_usub_sat;

   /**
    * Set if nir_op_iadd_sat and nir_op_isub_sat should be lowered to simple
    * arithmetic.
    *
    * If this flag is set, the lowering will be applied to all bit-sizes of
    * these instructions.
    */
   bool lower_iadd_sat;

   /**
    * Set if imul_32x16 and umul_32x16 should be lowered to simple
    * arithmetic.
    */
   bool lower_mul_32x16;

   /**
    * Set if bf2f and f2bf should be lowered to arithmetic.
    */
   bool lower_bfloat16_conversions;

   bool vectorize_tess_levels;
   bool lower_to_scalar;
   nir_instr_filter_cb lower_to_scalar_filter;

   /**
    * Disables potentially harmful algebraic transformations for architectures
    * with SIMD-within-a-register semantics.
    *
    * Note, to actually vectorize 16bit instructions, use nir_opt_vectorize()
    * with a suitable callback function.
    */
   bool vectorize_vec2_16bit;

   /**
    * Should the linker unify inputs_read/outputs_written between adjacent
    * shader stages which are linked into a single program?
    */
   bool unify_interfaces;

   /**
    * Whether nir_lower_io() will lower interpolateAt functions to
    * load_interpolated_input intrinsics.
    *
    * Unlike nir_lower_io_use_interpolated_input_intrinsics this will only
    * lower these functions and leave input load intrinsics untouched.
    */
   bool lower_interpolate_at;

   /* Lowers when 32x32->64 bit multiplication is not supported */
   bool lower_mul_2x32_64;

   /* Indicates that urol and uror are supported */
   bool has_rotate8;
   bool has_rotate16;
   bool has_rotate32;

   /** Backend supports shfr */
   bool has_shfr32;

   /** Backend supports ternary addition */
   bool has_iadd3;

   /**
    * Backend supports amul and would like them generated whenever
    * possible. This is stronger than has_imul24 for amul, but does not imply
    * support for imul24.
    */
   bool has_amul;

   /**
    * Backend supports imul24, and would like to use it (when possible)
    * for address/offset calculation.  If true, driver should call
    * nir_lower_amul().  (If not set, amul will automatically be lowered
    * to imul.)
    */
   bool has_imul24;

   /** Backend supports umul24, if not set  umul24 will automatically be lowered
    * to imul with masked inputs */
   bool has_umul24;

   /** Backend supports imul24_relaxed and umul24_relaxed, if not set they will be lowered
    * to imul24, umul24 or imul.
    */
   bool has_mul24_relaxed;

   /** Backend supports 32-bit imad */
   bool has_imad32;

   /** Backend supports umad24, if not set  umad24 will automatically be lowered
    * to imul with masked inputs and iadd */
   bool has_umad24;

   /* Backend supports fused compare against zero and csel */
   bool has_fused_comp_and_csel;
   /* Backend supports fused int eq/ne against zero and csel. */
   bool has_icsel_eqz64;
   bool has_icsel_eqz32;
   bool has_icsel_eqz16;

   /* Backend supports fneo, fequ, fltu, fgeu. */
   bool has_fneo_fcmpu;

   /* Backend supports ford and funord. */
   bool has_ford_funord;

   /** Backend supports fsub, if not set fsub will automatically be lowered to
    * fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */
   bool has_fsub;

   /** Backend supports isub, if not set isub will automatically be lowered to
    * iadd(x, ineg(y)). If true, driver should call nir_opt_algebraic_late(). */
   bool has_isub;

   /** Backend supports pack_32_4x8 or pack_32_4x8_split. */
   bool has_pack_32_4x8;

   /** Backend supports nir_load_texture_scale and prefers it over txs for nir
    * lowerings. */
   bool has_texture_scaling;

   /** Backend supports sdot_4x8_iadd. */
   bool has_sdot_4x8;

   /** Backend supports udot_4x8_uadd. */
   bool has_udot_4x8;

   /** Backend supports sudot_4x8_iadd. */
   bool has_sudot_4x8;

   /** Backend supports sdot_4x8_iadd_sat. */
   bool has_sdot_4x8_sat;

   /** Backend supports udot_4x8_uadd_sat. */
   bool has_udot_4x8_sat;

   /** Backend supports sudot_4x8_iadd_sat. */
   bool has_sudot_4x8_sat;

   /** Backend supports sdot_2x16 and udot_2x16 opcodes. */
   bool has_dot_2x16;

   /** Backend supports bfdot2_bfadd opcode. */
   bool has_bfdot2_bfadd;

   /** Backend supports fmulz (and ffmaz if lower_ffma32=false) */
   bool has_fmulz;

   /**
    * Backend supports fmulz (and ffmaz if lower_ffma32=false) but only if
    * FLOAT_CONTROLS_DENORM_PRESERVE_FP32 is not set
    */
   bool has_fmulz_no_denorms;

   /** Backend supports 32bit ufind_msb_rev and ifind_msb_rev. */
   bool has_find_msb_rev;

   /** Backend supports pack_half_2x16_rtz_split. */
   bool has_pack_half_2x16_rtz;

   /** Backend supports bitz/bitnz. */
   bool has_bit_test;

   /** Backend supports ubfe/ibfe. */
   bool has_bfe;

   /** Backend supports bfm. */
   bool has_bfm;

   /** Backend supports bfi. */
   bool has_bfi;

   /** Backend supports bitfield_select. */
   bool has_bitfield_select;

   /** Backend supports uclz. */
   bool has_uclz;

   /** Backend support msad_u4x8. */
   bool has_msad;

   /** Backend supports f2e4m3fn_satfn */
   bool has_f2e4m3fn_satfn;

   /** Backend supports load_global_bounded intrinsics. */
   bool has_load_global_bounded;

   /**
    * Is this the Intel vec4 backend?
    *
    * Used to inhibit algebraic optimizations that are known to be harmful on
    * the Intel vec4 backend.  This is generally applicable to any
    * optimization that might cause more immediate values to be used in
    * 3-source (e.g., ffma and flrp) instructions.
    */
   bool intel_vec4;

   /**
    * For most Intel GPUs, all ternary operations such as FMA and BFE cannot
    * have immediates, so two to three instructions may eventually be needed.
    */
   bool avoid_ternary_with_two_constants;

   /** Whether 8-bit ALU is supported. */
   bool support_8bit_alu;

   /** Whether 16-bit ALU is supported. */
   bool support_16bit_alu;

   unsigned max_unroll_iterations;
   unsigned max_unroll_iterations_aggressive;
   unsigned max_unroll_iterations_fp64;

   bool lower_uniforms_to_ubo;

   /* Specifies if indirect sampler array access will trigger forced loop
    * unrolling.
    */
   bool force_indirect_unrolling_sampler;

   /* Some older drivers don't support GLSL versions with the concept of flat
    * varyings and also don't support integers. This setting helps us avoid
    * marking varyings as flat and potentially having them changed to ints via
    * varying packing.
    */
   bool no_integers;

   /**
    * Specifies which type of indirectly accessed variables should force
    * loop unrolling.
    */
   nir_variable_mode force_indirect_unrolling;

   bool driver_functions;

   /**
    * If true, the driver will call nir_lower_int64 itself and the frontend
    * should not do so. This may enable better optimization around address
    * modes.
    */
   bool late_lower_int64;
   nir_lower_int64_options lower_int64_options;
   nir_lower_doubles_options lower_doubles_options;
   nir_divergence_options divergence_analysis_options;

   /**
    * The masks of shader stages that support indirect indexing with
    * load_input and store_output intrinsics. It's used by
    * nir_lower_io_passes.
    */
   uint8_t support_indirect_inputs;
   uint8_t support_indirect_outputs;

   /** store the variable offset into the instrinsic range_base instead
    *  of adding it to the image index.
    */
   bool lower_image_offset_to_range_base;

   /** store the variable offset into the instrinsic range_base instead
    *  of adding it to the atomic source
    */
   bool lower_atomic_offset_to_range_base;

   /** Don't convert medium-precision casts (e.g. f2fmp) into concrete
    *  type casts (e.g. f2f16).
    */
   bool preserve_mediump;

   /** lowers fquantize2f16 to alu ops. */
   bool lower_fquantize2f16;

   /** Lower f2f16 to f2f16_rtz when execution mode is not rtne. */
   bool force_f2f16_rtz;

   /** Lower VARYING_SLOT_LAYER in FS to SYSTEM_VALUE_LAYER_ID. */
   bool lower_layer_fs_input_to_sysval;

   /** clip/cull distance and tess level arrays use compact semantics */
   bool compact_arrays;

   /**
    * Whether discard gets emitted as nir_intrinsic_demote.
    * Otherwise, nir_intrinsic_terminate is being used.
    */
   bool discard_is_demote;

   /**
    * Whether the new-style derivative intrinsics are supported. If false,
    * legacy ALU derivative ops will be emitted. This transitional option will
    * be removed once all drivers are converted to derivative intrinsics.
    */
   bool has_ddx_intrinsics;

   /** Whether derivative intrinsics must be scalarized. */
   bool scalarize_ddx;

   /**
    * Assign a range of driver locations to per-view outputs, with unique
    * slots for each view. If unset, per-view outputs will be treated
    * similarly to other arrayed IO, and only slots for one view will be
    * assigned. Regardless of this setting, per-view outputs are only assigned
    * slots for one value in var->data.location.
    */
   bool per_view_unique_driver_locations;

   /**
    * Emit nir_intrinsic_store_per_view_output with compacted view indices
    * rather than absolute view indices. When using compacted indices, the Nth
    * index refers to the Nth enabled view, not the Nth absolute view. For
    * example, with view mask 0b1010, compacted index 0 is absolute index 1,
    * and compacted index 1 is absolute index 3. Note that compacted view
    * indices do not correspond directly to gl_ViewIndex.
    *
    * If compact_view_index is unset, per-view indices must be constant before
    * nir_lower_io. This can be guaranteed by calling nir_lower_io_temporaries
    * first.
    */
   bool compact_view_index;

   /** Options determining lowering and behavior of inputs and outputs. */
   nir_io_options io_options;

   /**
    * Bit mask of nir_lower_packing_op to skip lowering some nir ops in
    * nir_lower_packing().
    */
   unsigned skip_lower_packing_ops;

   /* In case the exact subgroup size is not known, subgroup_size should be
    * set to 0. In that case, the maximum subgroup size will be calculated by
    * ballot_components * ballot_bit_size.
    */
   uint8_t subgroup_size;
   uint8_t ballot_bit_size;
   uint8_t ballot_components;

   /** Driver callback where drivers can define how to lower mediump.
    *  Used by nir_lower_io_passes.
    */
   void (*lower_mediump_io)(struct nir_shader *nir);

   /**
    * Return the maximum cost of an expression that's written to a shader
    * output that can be moved into the next shader to remove that output.
    *
    * Currently only uniform expressions are moved. A uniform expression is
    * any ALU expression sourcing only constants, uniforms, and UBO loads.
    *
    * Set to NULL or return 0 if you only want to propagate constants from
    * outputs to inputs.
    *
    * Drivers can set the maximum cost based on the types of consecutive
    * shaders or shader SHA1s.
    *
    * Drivers should also set "varying_estimate_instr_cost".
    */
   unsigned (*varying_expression_max_cost)(struct nir_shader *consumer,
                                           struct nir_shader *producer);

   /**
    * Return the cost of an instruction that could be moved into the next
    * shader. If the cost of all instructions in an expression is <=
    * varying_expression_max_cost(), the instruction is moved.
    *
    * When this callback isn't set, nir_opt_varyings uses its own version.
    */
   unsigned (*varying_estimate_instr_cost)(struct nir_instr *instr);

   /**
    * When the varying_expression_max_cost callback isn't set, this specifies
    * the maximum cost of a uniform expression that is allowed to be moved
    * from output stores into the next shader stage to eliminate those output
    * stores and corresponding inputs.
    *
    * 0 only allows propagating constants written to output stores to
    * the next shader.
    *
    * At least 2 is required for moving a uniform stored in an output into
    * the next shader according to default_varying_estimate_instr_cost.
    */
   unsigned max_varying_expression_cost;

   /**
    * Used by nir_lower_explicit_io to determine the maximum offset_shift to
    * use when lowering the deref address of the given intrinsic.
    */
   unsigned (*max_offset_shift)(nir_intrinsic_instr *, const void *);

   /**
    * Passed to the callbacks that accept a data pointer.
    */
   const void *cb_data;
} nir_shader_compiler_options;

#ifdef __cplusplus
}
#endif

#endif /* NIR_SHADER_COMPILER_OPTIONS_H */