diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index 64fefb57ea5..a4b5110cb62 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -292,6 +292,7 @@ files_libnir = files( 'nir_serialize.h', 'nir_shader_compiler_options.h', 'nir_split_64bit_vec3_and_vec4.c', + 'nir_split_conversions.c', 'nir_split_per_member_structs.c', 'nir_split_var_copies.c', 'nir_split_vars.c', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 254c0553f0f..9f898346cf3 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -5791,6 +5791,16 @@ bool nir_lower_bit_size(nir_shader *shader, void *callback_data); bool nir_lower_64bit_phis(nir_shader *shader); +typedef struct nir_split_conversions_options { + nir_lower_bit_size_callback callback; + void *callback_data; + /* True if the implementation supports nir_intrinsic_convert_alu_types */ + bool has_convert_alu_types; +} nir_split_conversions_options; + +bool nir_split_conversions(nir_shader *shader, + const nir_split_conversions_options *options); + bool nir_split_64bit_vec3_and_vec4(nir_shader *shader); nir_lower_int64_options nir_lower_int64_op_to_options_mask(nir_op opcode); diff --git a/src/nouveau/compiler/nak_nir_split_64bit_conversions.c b/src/compiler/nir/nir_split_conversions.c similarity index 86% rename from src/nouveau/compiler/nak_nir_split_64bit_conversions.c rename to src/compiler/nir/nir_split_conversions.c index 57db8533d3e..1fa6d995f2b 100644 --- a/src/nouveau/compiler/nak_nir_split_64bit_conversions.c +++ b/src/compiler/nir/nir_split_conversions.c @@ -23,7 +23,7 @@ /* Adapted from intel_nir_lower_conversions.c */ -#include "nak_private.h" +#include "nir.h" #include "nir_builder.h" static nir_rounding_mode @@ -37,8 +37,10 @@ op_rounding_mode(nir_op op) } static bool -split_64bit_conversion(nir_builder *b, nir_instr *instr, UNUSED void *_data) +split_conversion_instr(nir_builder *b, nir_instr *instr, UNUSED void *_data) { + const nir_split_conversions_options *opts = _data; + if (instr->type != nir_instr_type_alu) return false; @@ -47,21 +49,25 @@ split_64bit_conversion(nir_builder *b, nir_instr *instr, UNUSED void *_data) if (!nir_op_infos[alu->op].is_conversion) return false; + unsigned tmp_bit_size = opts->callback(instr, opts->callback_data); + if (tmp_bit_size == 0) + return false; + unsigned src_bit_size = nir_src_bit_size(alu->src[0].src); + unsigned dst_bit_size = alu->def.bit_size; + if (src_bit_size < dst_bit_size) + assert(src_bit_size < tmp_bit_size && tmp_bit_size < dst_bit_size); + else + assert(dst_bit_size < tmp_bit_size && tmp_bit_size < src_bit_size); + nir_alu_type src_type = nir_op_infos[alu->op].input_types[0]; nir_alu_type src_full_type = (nir_alu_type) (src_type | src_bit_size); - unsigned dst_bit_size = alu->def.bit_size; nir_alu_type dst_full_type = nir_op_infos[alu->op].output_type; assert(nir_alu_type_get_type_size(dst_full_type) == dst_bit_size); nir_alu_type dst_type = nir_alu_type_get_base_type(dst_full_type); const nir_rounding_mode rounding_mode = op_rounding_mode(alu->op); - /* We can't cross the 64-bit boundary in one conversion */ - if ((src_bit_size <= 32 && dst_bit_size <= 32) || - (src_bit_size >= 32 && dst_bit_size >= 32)) - return false; - nir_alu_type tmp_type; if ((src_full_type == nir_type_float16 && dst_bit_size == 64) || (src_bit_size == 64 && dst_full_type == nir_type_float16)) { @@ -69,6 +75,7 @@ split_64bit_conversion(nir_builder *b, nir_instr *instr, UNUSED void *_data) * 32-bit float type so we don't lose range when we convert to/from * a 64-bit integer. */ + assert(tmp_bit_size == 32); tmp_type = nir_type_float32; } else { /* For fp64 to integer conversions, using an integer intermediate type @@ -83,7 +90,7 @@ split_64bit_conversion(nir_builder *b, nir_instr *instr, UNUSED void *_data) * For all other conversions, the conversion from int to int is either * lossless or just as lossy as the final conversion. */ - tmp_type = dst_type | 32; + tmp_type = dst_type | tmp_bit_size; } b->cursor = nir_before_instr(&alu->instr); @@ -95,7 +102,8 @@ split_64bit_conversion(nir_builder *b, nir_instr *instr, UNUSED void *_data) */ assert(tmp_type == nir_type_float32); if (rounding_mode == nir_rounding_mode_rtne || - rounding_mode == nir_rounding_mode_undef) { + rounding_mode == nir_rounding_mode_undef || + !opts->has_convert_alu_types) { nir_def *src_lo = nir_unpack_64_2x32_split_x(b, src); nir_def *src_hi = nir_unpack_64_2x32_split_y(b, src); @@ -148,10 +156,14 @@ split_64bit_conversion(nir_builder *b, nir_instr *instr, UNUSED void *_data) * sufficiently negative exponent that it will flush to zero when * converted to fp16, regardless of what we do here. * + * This same trick works for all the rounding modes. Even though the + * actual rounding logic is a bit different, they all treat the F and + * D bits together based on "all F and D bits are zero" or not. + * * There are many operations we could choose for combining the low * dword bits for ORing into the high dword. We choose umin because - * it nicely translates to a single fixed-latency instruction on - * everything except Volta. + * it nicely translates to a single fixed-latency instruction on a + * lot of hardware. */ src_hi = nir_ior(b, src_hi, nir_umin_imm(b, src_lo, 1)); src_lo = nir_imm_int(b, 0); @@ -160,7 +172,8 @@ split_64bit_conversion(nir_builder *b, nir_instr *instr, UNUSED void *_data) } else { /* For round-up, round-down, and round-towards-zero, the rounding * accumulates properly as long as we use the same rounding mode for - * both operations. + * both operations. This is more efficient if the back-end supports + * nir_intrinsic_convert_alu_types. */ tmp = nir_convert_alu_types(b, 32, src, .src_type = nir_type_float64, @@ -183,9 +196,10 @@ split_64bit_conversion(nir_builder *b, nir_instr *instr, UNUSED void *_data) } bool -nak_nir_split_64bit_conversions(nir_shader *nir) +nir_split_conversions(nir_shader *shader, + const nir_split_conversions_options *options) { - return nir_shader_instructions_pass(nir, split_64bit_conversion, + return nir_shader_instructions_pass(shader, split_conversion_instr, nir_metadata_control_flow, - NULL); + (void *)options); } diff --git a/src/nouveau/compiler/meson.build b/src/nouveau/compiler/meson.build index 6e7d728be7c..142a75bdaa6 100644 --- a/src/nouveau/compiler/meson.build +++ b/src/nouveau/compiler/meson.build @@ -34,7 +34,6 @@ libnak_c_files = files( 'nak_nir_lower_vtg_io.c', 'nak_nir_mark_lcssa_invariants.c', 'nak_nir_rematerialize_load_const.c', - 'nak_nir_split_64bit_conversions.c', ) _libacorn_rs = static_library( diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index 66b5ce824e2..6ff52683db3 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -915,6 +915,21 @@ atomic_supported(const nir_instr *instr, const void *data) intr->def.bit_size == 64); } +static unsigned +split_conversions_cb(const nir_instr *instr, void *data) +{ + nir_alu_instr *alu = nir_instr_as_alu(instr); + unsigned src_bit_size = nir_src_bit_size(alu->src[0].src); + unsigned dst_bit_size = alu->def.bit_size; + + /* We can't cross the 64-bit boundary in one conversion */ + if ((src_bit_size <= 32 && dst_bit_size <= 32) || + (src_bit_size >= 32 && dst_bit_size >= 32)) + return 0; + + return 32; +} + void nak_postprocess_nir(nir_shader *nir, const struct nak_compiler *nak, @@ -1059,8 +1074,13 @@ nak_postprocess_nir(nir_shader *nir, } } while (progress); - if (nak->sm < 70) - OPT(nir, nak_nir_split_64bit_conversions); + if (nak->sm < 70) { + const nir_split_conversions_options split_conv_opts = { + .callback = split_conversions_cb, + .has_convert_alu_types = true, + }; + OPT(nir, nir_split_conversions, &split_conv_opts); + } /* Re-materialize load_const instructions in the blocks that use them. * This is both a register pressure optimization and a ensures correctness diff --git a/src/nouveau/compiler/nak_private.h b/src/nouveau/compiler/nak_private.h index 710f3022dab..75e300c7fc9 100644 --- a/src/nouveau/compiler/nak_private.h +++ b/src/nouveau/compiler/nak_private.h @@ -252,7 +252,6 @@ enum nak_fs_out { bool nak_nir_rematerialize_load_const(nir_shader *nir); bool nak_nir_mark_lcssa_invariants(nir_shader *nir); -bool nak_nir_split_64bit_conversions(nir_shader *nir); bool nak_nir_lower_non_uniform_ldcx(nir_shader *nir); bool nak_nir_add_barriers(nir_shader *nir, const struct nak_compiler *nak); bool nak_nir_lower_cf(nir_shader *nir);