mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
nir/lower_io: explain properly how nir_lower_io_lower_64bit_to_32* options work
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38916>
This commit is contained in:
parent
41d127b9e8
commit
9ac8e643d6
1 changed files with 56 additions and 18 deletions
|
|
@ -5266,36 +5266,74 @@ void nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode);
|
|||
bool nir_opt_clip_cull_const(nir_shader *shader);
|
||||
|
||||
typedef enum {
|
||||
/* If set, this causes all 64-bit IO operations to be lowered on-the-fly
|
||||
* to 32-bit operations. This is only valid for nir_var_shader_in/out
|
||||
* modes.
|
||||
/* If set, this causes all 64-bit IO loads and stores to be lowered to 32
|
||||
* bits + pack/unpack. This is only valid for nir_var_shader_in/out.
|
||||
*
|
||||
* Note that this destroys dual-slot information i.e. whether an input
|
||||
* occupies the low or high half of dvec4. Instead, it adds an offset of 1
|
||||
* to the load (which is ambiguous) and expects driver locations of inputs
|
||||
* to be final, which prevents any further optimizations.
|
||||
* Only VS inputs:
|
||||
*
|
||||
* TODO: remove this in favor of nir_lower_io_lower_64bit_to_32_new.
|
||||
* This option should be used when VS input locations (nir_intrinsic_base
|
||||
* values) are final. With this option, dual-slot VS inputs must always
|
||||
* occupy 2 VS input locations. For example, VS inputs only consisting
|
||||
* of dual-slot inputs should have VS input locations set to 0,2,4,6 and
|
||||
* loading .xy from (base) and .zw from (base + 1). nir_lower_io will lower
|
||||
* 64 bits into 32 bits like this:
|
||||
*
|
||||
* 0(previously 0.xy), 1(previously 0.zw), ...
|
||||
*
|
||||
* After that's done, the information about whether load_input loads
|
||||
* the low or high portion of dual-slot inputs is not saved
|
||||
* in the intrinsics. This is why the location must be final or the driver
|
||||
* must keep the original VS input semantics info in a separate structure
|
||||
* since it can't be gathered from NIR anymore.
|
||||
*
|
||||
* TODO: This option should be removed because it offers no benefit over
|
||||
* the next one.
|
||||
*/
|
||||
nir_lower_io_lower_64bit_to_32 = (1 << 0),
|
||||
|
||||
/* This causes all 64-bit IO operations to be lowered to 32-bit operations.
|
||||
* This is only valid for nir_var_shader_in/out modes.
|
||||
/* If set, this causes all 64-bit IO loads and stores to be lowered to 32
|
||||
* bits + pack/unpack. This is only valid for nir_var_shader_in/out.
|
||||
*
|
||||
* Only VS inputs: Dual slot information is preserved as nir_io_semantics::
|
||||
* high_dvec2 and gathered into shader_info::dual_slot_inputs, so that
|
||||
* the shader can be arbitrarily optimized and the low or high half of
|
||||
* dvec4 can be DCE'd independently without affecting the other half.
|
||||
* Only VS inputs:
|
||||
*
|
||||
* This option allows eliminating VS inputs and thus bound vertex buffers
|
||||
* after nir_opt_varyings makes them dead. The information whether
|
||||
* load_input loads from the low or high portion of dual-slot inputs is
|
||||
* saved in nir_io_semantics::high_dvec2. shader_info::dual_slot_inputs is
|
||||
* set by nir_shader_gather_info for all inputs that have high_dvec2 == 1.
|
||||
* Since all information about dual-slot VS inputs is preserved in NIR, it
|
||||
* allows linking optimizations to run after nir_lower_io, eliminate dead VS
|
||||
* outputs, which then eliminates dead VS inputs, and the driver can gather
|
||||
* shader_info and stop binding eliminated vertex buffers. The GLSL linker
|
||||
* and st/mesa use this to reduce the number of bound vertex buffers for
|
||||
* gallium when nir_opt_varyings makes them dead. It also naturally demotes
|
||||
* dual-slot VS inputs to single-slot VS inputs when the high half of
|
||||
* dual-slot VS inputs is DCE'd.
|
||||
*
|
||||
* nir_recompute_io_bases assigns the following VS input locations for dual
|
||||
* slot VS inputs:
|
||||
* 0(low), 0(high_dvec2=1), 1(low), 1(high_dvec2=1)
|
||||
*
|
||||
* Alternatively, a prefix bitmask of shader_info::inputs_read can be used
|
||||
* to get the input locations instead of calling nir_recompute_io_bases.
|
||||
* That numbering is for OpenGL where a dual-slot VS input occupies exactly
|
||||
* 1 input location.
|
||||
*
|
||||
* However, any other numbering is possible. For example, Vulkan drivers may
|
||||
* assign these VS input locations after DCE, which can be done by computing
|
||||
* a prefix bitmask from shader_info::inputs_read + a prefix bitmask of
|
||||
* shader_info::dual_slot_inputs + high_dvec2 (the Intel driver does this),
|
||||
* resulting in:
|
||||
* 0(low), 1(high_dvec2=1), 2(low), 3(high_dvec2=1)
|
||||
*/
|
||||
nir_lower_io_lower_64bit_to_32_new = (1 << 2),
|
||||
nir_lower_io_lower_64bit_to_32_new = (1 << 1),
|
||||
|
||||
/**
|
||||
* Should nir_lower_io() create load_interpolated_input intrinsics?
|
||||
/* Should nir_lower_io() create load_interpolated_input intrinsics?
|
||||
*
|
||||
* If not, it generates regular load_input intrinsics and interpolation
|
||||
* information must be inferred from the list of input nir_variables.
|
||||
*/
|
||||
nir_lower_io_use_interpolated_input_intrinsics = (1 << 3),
|
||||
nir_lower_io_use_interpolated_input_intrinsics = (1 << 2),
|
||||
} nir_lower_io_options;
|
||||
bool nir_lower_io(nir_shader *shader,
|
||||
nir_variable_mode modes,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue