diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 6a25fd54f2c..29d4d8b9928 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -5266,36 +5266,74 @@ void nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode); bool nir_opt_clip_cull_const(nir_shader *shader); typedef enum { - /* If set, this causes all 64-bit IO operations to be lowered on-the-fly - * to 32-bit operations. This is only valid for nir_var_shader_in/out - * modes. + /* If set, this causes all 64-bit IO loads and stores to be lowered to 32 + * bits + pack/unpack. This is only valid for nir_var_shader_in/out. * - * Note that this destroys dual-slot information i.e. whether an input - * occupies the low or high half of dvec4. Instead, it adds an offset of 1 - * to the load (which is ambiguous) and expects driver locations of inputs - * to be final, which prevents any further optimizations. + * Only VS inputs: * - * TODO: remove this in favor of nir_lower_io_lower_64bit_to_32_new. + * This option should be used when VS input locations (nir_intrinsic_base + * values) are final. With this option, dual-slot VS inputs must always + * occupy 2 VS input locations. For example, VS inputs only consisting + * of dual-slot inputs should have VS input locations set to 0,2,4,6 and + * loading .xy from (base) and .zw from (base + 1). nir_lower_io will lower + * 64 bits into 32 bits like this: + * + * 0(previously 0.xy), 1(previously 0.zw), ... + * + * After that's done, the information about whether load_input loads + * the low or high portion of dual-slot inputs is not saved + * in the intrinsics. This is why the location must be final or the driver + * must keep the original VS input semantics info in a separate structure + * since it can't be gathered from NIR anymore. + * + * TODO: This option should be removed because it offers no benefit over + * the next one. */ nir_lower_io_lower_64bit_to_32 = (1 << 0), - /* This causes all 64-bit IO operations to be lowered to 32-bit operations. - * This is only valid for nir_var_shader_in/out modes. + /* If set, this causes all 64-bit IO loads and stores to be lowered to 32 + * bits + pack/unpack. This is only valid for nir_var_shader_in/out. * - * Only VS inputs: Dual slot information is preserved as nir_io_semantics:: - * high_dvec2 and gathered into shader_info::dual_slot_inputs, so that - * the shader can be arbitrarily optimized and the low or high half of - * dvec4 can be DCE'd independently without affecting the other half. + * Only VS inputs: + * + * This option allows eliminating VS inputs and thus bound vertex buffers + * after nir_opt_varyings makes them dead. The information whether + * load_input loads from the low or high portion of dual-slot inputs is + * saved in nir_io_semantics::high_dvec2. shader_info::dual_slot_inputs is + * set by nir_shader_gather_info for all inputs that have high_dvec2 == 1. + * Since all information about dual-slot VS inputs is preserved in NIR, it + * allows linking optimizations to run after nir_lower_io, eliminate dead VS + * outputs, which then eliminates dead VS inputs, and the driver can gather + * shader_info and stop binding eliminated vertex buffers. The GLSL linker + * and st/mesa use this to reduce the number of bound vertex buffers for + * gallium when nir_opt_varyings makes them dead. It also naturally demotes + * dual-slot VS inputs to single-slot VS inputs when the high half of + * dual-slot VS inputs is DCE'd. + * + * nir_recompute_io_bases assigns the following VS input locations for dual + * slot VS inputs: + * 0(low), 0(high_dvec2=1), 1(low), 1(high_dvec2=1) + * + * Alternatively, a prefix bitmask of shader_info::inputs_read can be used + * to get the input locations instead of calling nir_recompute_io_bases. + * That numbering is for OpenGL where a dual-slot VS input occupies exactly + * 1 input location. + * + * However, any other numbering is possible. For example, Vulkan drivers may + * assign these VS input locations after DCE, which can be done by computing + * a prefix bitmask from shader_info::inputs_read + a prefix bitmask of + * shader_info::dual_slot_inputs + high_dvec2 (the Intel driver does this), + * resulting in: + * 0(low), 1(high_dvec2=1), 2(low), 3(high_dvec2=1) */ - nir_lower_io_lower_64bit_to_32_new = (1 << 2), + nir_lower_io_lower_64bit_to_32_new = (1 << 1), - /** - * Should nir_lower_io() create load_interpolated_input intrinsics? + /* Should nir_lower_io() create load_interpolated_input intrinsics? * * If not, it generates regular load_input intrinsics and interpolation * information must be inferred from the list of input nir_variables. */ - nir_lower_io_use_interpolated_input_intrinsics = (1 << 3), + nir_lower_io_use_interpolated_input_intrinsics = (1 << 2), } nir_lower_io_options; bool nir_lower_io(nir_shader *shader, nir_variable_mode modes,