diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index 229dacdb59d..9102307f388 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -234,6 +234,7 @@ files_libnir = files( 'nir_opt_move_discards_to_top.c', 'nir_opt_offsets.c', 'nir_opt_peephole_select.c', + 'nir_opt_phi_precision.c', 'nir_opt_rematerialize_compares.c', 'nir_opt_remove_phis.c', 'nir_opt_shrink_vectors.c', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 99d0f8039c3..4623492dd9c 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -5256,6 +5256,8 @@ bool nir_opt_rematerialize_compares(nir_shader *shader); bool nir_opt_remove_phis(nir_shader *shader); bool nir_opt_remove_phis_block(nir_block *block); +bool nir_opt_phi_precision(nir_shader *shader); + bool nir_opt_shrink_vectors(nir_shader *shader, bool shrink_image_store); bool nir_opt_trivial_continues(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_phi_precision.c b/src/compiler/nir/nir_opt_phi_precision.c new file mode 100644 index 00000000000..449e8913d16 --- /dev/null +++ b/src/compiler/nir/nir_opt_phi_precision.c @@ -0,0 +1,498 @@ +/* + * Copyright © 2021 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" + +/* + * This pass tries to reduce the bitsize of phi instructions by either + * moving narrowing conversions from the phi's consumers to the phi's + * sources, if all the uses of the phi are equivalent narrowing + * instructions. In other words, convert: + * + * vec1 32 ssa_124 = load_const (0x00000000) + * ... + * loop { + * ... + * vec1 32 ssa_155 = phi block_0: ssa_124, block_4: ssa_53 + * vec1 16 ssa_8 = i2imp ssa_155 + * ... + * vec1 32 ssa_53 = i2i32 ssa_52 + * } + * + * into: + * + * vec1 32 ssa_124 = load_const (0x00000000) + * vec1 16 ssa_156 = i2imp ssa_124 + * ... + * loop { + * ... + * vec1 16 ssa_8 = phi block_0: ssa_156, block_4: ssa_157 + * ... + * vec1 32 ssa_53 = i2i32 ssa_52 + * vec1 16 ssa_157 = i2i16 ssa_53 + * } + * + * Or failing that, tries to push widening conversion of phi srcs to + * the phi def. In this case, since load_const is frequently one + * of the phi sources this pass checks if can be narrowed without a + * loss of precision: + * + * vec1 32 ssa_0 = load_const (0x00000000) + * ... + * loop { + * ... + * vec1 32 ssa_8 = phi block_0: ssa_0, block_4: ssa_19 + * ... + * vec1 16 ssa_18 = iadd ssa_21, ssa_3 + * vec1 32 ssa_19 = i2i32 ssa_18 + * } + * + * into: + * + * vec1 32 ssa_0 = load_const (0x00000000) + * vec1 16 ssa_22 = i2i16 ssa_0 + * ... + * loop { + * ... + * vec1 16 ssa_8 = phi block_0: ssa_22, block_4: ssa_18 + * vec1 32 ssa_23 = i2i32 ssa_8 + * ... + * vec1 16 ssa_18 = iadd ssa_21, ssa_3 + * } + * + * Note that either transformations can convert x2ymp into x2y16, which + * is normally done later in nir_opt_algebraic_late(), losing the option + * to fold away sequences like (i2i32 (i2imp (x))), but algebraic opts + * cannot see through phis. + */ + +#define INVALID_OP nir_num_opcodes + +/** + * Get the corresponding exact conversion for a x2ymp conversion + */ +static nir_op +concrete_conversion(nir_op op) +{ + switch (op) { + case nir_op_i2imp: return nir_op_i2i16; + case nir_op_i2fmp: return nir_op_i2f16; + case nir_op_u2fmp: return nir_op_u2f16; + case nir_op_f2fmp: return nir_op_f2f16; + case nir_op_f2imp: return nir_op_f2i16; + case nir_op_f2ump: return nir_op_f2u16; + default: return op; + } +} + +static nir_op +narrowing_conversion_op(nir_instr *instr, nir_op current_op) +{ + if (instr->type != nir_instr_type_alu) + return INVALID_OP; + + nir_op op = nir_instr_as_alu(instr)->op; + switch (op) { + case nir_op_i2imp: + case nir_op_i2i16: + case nir_op_i2fmp: + case nir_op_i2f16: + case nir_op_u2fmp: + case nir_op_u2f16: + case nir_op_f2fmp: + case nir_op_f2f16: + case nir_op_f2imp: + case nir_op_f2i16: + case nir_op_f2ump: + case nir_op_f2u16: + case nir_op_f2f16_rtne: + case nir_op_f2f16_rtz: + break; + default: + return INVALID_OP; + } + + /* If we've already picked a conversion op from a previous phi use, + * make sure it is compatible with the current use + */ + if (current_op != INVALID_OP) { + if (current_op != op) { + /* If we have different conversions, but one can be converted + * to the other, then let's do that: + */ + if (concrete_conversion(current_op) == concrete_conversion(op)) { + op = concrete_conversion(op); + } else { + return INVALID_OP; + } + } + } + + return op; +} + +static nir_op +widening_conversion_op(nir_instr *instr, unsigned *bit_size) +{ + if (instr->type != nir_instr_type_alu) + return INVALID_OP; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + switch (alu->op) { + case nir_op_i2i32: + case nir_op_i2f32: + case nir_op_u2f32: + case nir_op_f2f32: + case nir_op_f2i32: + case nir_op_f2u32: + break; + default: + return INVALID_OP; + } + + *bit_size = nir_src_bit_size(alu->src[0].src); + + /* We also need to check that the conversion's dest was actually + * wider: + */ + if (nir_dest_bit_size(alu->dest.dest) <= *bit_size) + return INVALID_OP; + + return alu->op; +} + +static nir_alu_type +op_to_type(nir_op op) +{ + return nir_alu_type_get_base_type(nir_op_infos[op].output_type); +} + +/* Try to move narrowing instructions consuming the phi into the phi's + * sources to reduce the phi's precision: + */ +static bool +try_move_narrowing_dst(nir_builder *b, nir_phi_instr *phi) +{ + nir_op op = INVALID_OP; + + assert(phi->dest.is_ssa); + + /* If the phi has already been narrowed, nothing more to do: */ + if (phi->dest.ssa.bit_size != 32) + return false; + + /* Are the only uses of the phi conversion instructions, and + * are they all the same conversion? + */ + nir_foreach_use (use, &phi->dest.ssa) { + op = narrowing_conversion_op(use->parent_instr, op); + + /* Not a (compatible) narrowing conversion: */ + if (op == INVALID_OP) + return false; + } + + /* an if_uses means the phi is used directly in a conditional, ie. + * without a conversion + */ + if (!list_is_empty(&phi->dest.ssa.if_uses)) + return false; + + /* If the phi has no uses, then nothing to do: */ + if (op == INVALID_OP) + return false; + + /* construct replacement phi instruction: */ + nir_phi_instr *new_phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, + phi->dest.ssa.num_components, + nir_alu_type_get_type_size(nir_op_infos[op].output_type), + NULL); + + /* Push the conversion into the new phi sources: */ + nir_foreach_phi_src (src, phi) { + assert(src->src.is_ssa); + + /* insert new conversion instr in block of original phi src: */ + b->cursor = nir_after_instr_and_phis(src->src.ssa->parent_instr); + nir_ssa_def *old_src = src->src.ssa; + nir_ssa_def *new_src = nir_build_alu(b, op, old_src, NULL, NULL, NULL); + + /* and add corresponding phi_src to the new_phi: */ + nir_phi_src *phi_src = ralloc(new_phi, nir_phi_src); + phi_src->pred = src->pred; + phi_src->src = nir_src_for_ssa(new_src); + exec_list_push_tail(&new_phi->srcs, &phi_src->node); + } + + /* And finally rewrite the original uses of the original phi uses to + * directly use the new phi, skipping the conversion out of the orig + * phi + */ + nir_foreach_use (use, &phi->dest.ssa) { + /* We've previously established that all the uses were alu + * conversion ops: + */ + nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr); + + assert(alu->dest.dest.is_ssa); + + nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, &new_phi->dest.ssa); + } + + /* And finally insert the new phi after all sources are in place: */ + b->cursor = nir_after_instr(&phi->instr); + nir_builder_instr_insert(b, &new_phi->instr); + + return true; +} + +static bool +can_convert_load_const(nir_load_const_instr *lc, nir_op op) +{ + nir_alu_type type = op_to_type(op); + + /* Note that we only handle phi's with bit_size == 32: */ + assert(lc->def.bit_size == 32); + + for (unsigned i = 0; i < lc->def.num_components; i++) { + switch (type) { + case nir_type_int: + if (lc->value[i].i32 != (int32_t)(int16_t)lc->value[i].i32) + return false; + break; + case nir_type_uint: + if (lc->value[i].u32 != (uint32_t)(uint16_t)lc->value[i].u32) + return false; + break; + case nir_type_float: + if (lc->value[i].f32 != _mesa_half_to_float( + _mesa_float_to_half(lc->value[i].f32))) + return false; + break; + default: + unreachable("bad type"); + return false; + } + } + + return true; +} + +/* Check all the phi sources to see if they are the same widening op, in + * which case we can push the widening op to the other side of the phi + */ +static nir_op +find_widening_op(nir_phi_instr *phi, unsigned *bit_size) +{ + nir_op op = INVALID_OP; + + bool has_load_const = false; + *bit_size = 0; + + nir_foreach_phi_src (src, phi) { + assert(src->src.is_ssa); + + nir_instr *instr = src->src.ssa->parent_instr; + if (instr->type == nir_instr_type_load_const) { + has_load_const = true; + continue; + } + + unsigned src_bit_size; + nir_op src_op = widening_conversion_op(instr, &src_bit_size); + + /* Not a widening conversion: */ + if (src_op == INVALID_OP) + return INVALID_OP; + + /* If it is a widening conversion, it needs to be the same op as + * other phi sources: + */ + if ((op != INVALID_OP) && (op != src_op)) + return INVALID_OP; + + if (*bit_size && (*bit_size != src_bit_size)) + return INVALID_OP; + + op = src_op; + *bit_size = src_bit_size; + } + + if ((op == INVALID_OP) || !has_load_const) + return op; + + /* If we could otherwise move widening sources, but load_const is + * one of the phi sources (and does not have a widening conversion, + * but could have a narrowing->widening sequence inserted without + * loss of precision), then we could insert a narrowing->widening + * sequence to make the rest of the transformation possible: + */ + nir_foreach_phi_src (src, phi) { + assert(src->src.is_ssa); + + nir_instr *instr = src->src.ssa->parent_instr; + if (instr->type != nir_instr_type_load_const) + continue; + + if (!can_convert_load_const(nir_instr_as_load_const(instr), op)) + return INVALID_OP; + } + + return op; +} + +/* Try to move widening conversions into the phi to the phi's output + * to reduce the phi's precision: + */ +static bool +try_move_widening_src(nir_builder *b, nir_phi_instr *phi) +{ + assert(phi->dest.is_ssa); + + /* If the phi has already been narrowed, nothing more to do: */ + if (phi->dest.ssa.bit_size != 32) + return false; + + unsigned bit_size; + nir_op op = find_widening_op(phi, &bit_size); + + if (op == INVALID_OP) + return false; + + /* construct replacement phi instruction: */ + nir_phi_instr *new_phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, + phi->dest.ssa.num_components, + bit_size, NULL); + + /* Remove the widening conversions from the phi sources: */ + nir_foreach_phi_src (src, phi) { + assert(src->src.is_ssa); + + nir_instr *instr = src->src.ssa->parent_instr; + nir_ssa_def *new_src; + + b->cursor = nir_after_instr(instr); + + if (instr->type == nir_instr_type_load_const) { + /* if the src is a load_const, we've already verified that it + * is safe to insert a narrowing conversion to make the rest + * of this transformation legal: + */ + nir_load_const_instr *lc = nir_instr_as_load_const(instr); + + if (op_to_type(op) == nir_type_float) { + new_src = nir_f2f16(b, &lc->def); + } else { + new_src = nir_i2i16(b, &lc->def); + } + } else { + /* at this point we know the sources source is a conversion: */ + nir_alu_instr *alu = nir_instr_as_alu(instr); + + /* The conversion we are stripping off could have had a swizzle, + * so replace it with a mov if necessary: + */ + unsigned num_comp = nir_dest_num_components(alu->dest.dest); + new_src = nir_mov_alu(b, alu->src[0], num_comp); + } + + /* add corresponding phi_src to the new_phi: */ + nir_phi_src *phi_src = ralloc(new_phi, nir_phi_src); + phi_src->pred = src->pred; + phi_src->src = nir_src_for_ssa(new_src); + exec_list_push_tail(&new_phi->srcs, &phi_src->node); + } + + /* And insert the new phi after all sources are in place: */ + b->cursor = nir_after_instr(&phi->instr); + nir_builder_instr_insert(b, &new_phi->instr); + + /* And finally add back the widening conversion after the phi, + * and re-write the original phi's uses + */ + b->cursor = nir_after_instr_and_phis(&new_phi->instr); + nir_ssa_def *def = nir_build_alu(b, op, &new_phi->dest.ssa, NULL, NULL, NULL); + + nir_ssa_def_rewrite_uses(&phi->dest.ssa, def); + + return true; +} + +static bool +lower_phi(nir_builder *b, nir_phi_instr *phi) +{ + bool progress = try_move_narrowing_dst(b, phi); + if (!progress) + progress = try_move_widening_src(b, phi); + return progress; +} + +bool +nir_opt_phi_precision(nir_shader *shader) +{ + bool progress = false; + + /* If 8b or 16b bit_sizes are not used, no point to run this pass: */ + unsigned bit_sizes_used = shader->info.bit_sizes_float | + shader->info.bit_sizes_int; + + if (!bit_sizes_used) { + nir_shader_gather_info(shader, nir_shader_get_entrypoint(shader)); + bit_sizes_used = shader->info.bit_sizes_float | + shader->info.bit_sizes_int; + } + + if (!(bit_sizes_used & (8 | 16))) + return false; + + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block (block, function->impl) { + nir_foreach_instr_safe (instr, block) { + if (instr->type != nir_instr_type_phi) + break; + + progress |= lower_phi(&b, nir_instr_as_phi(instr)); + } + } + + if (progress) { + nir_metadata_preserve(function->impl, + nir_metadata_block_index | + nir_metadata_dominance); + } else { + nir_metadata_preserve(function->impl, nir_metadata_all); + } + } + + return progress; +} + diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index e9ea019a498..303dc084639 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -292,6 +292,7 @@ st_nir_opts(nir_shader *nir) NIR_PASS(progress, nir, nir_opt_cse); NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); + NIR_PASS(progress, nir, nir_opt_phi_precision); NIR_PASS(progress, nir, nir_opt_algebraic); NIR_PASS(progress, nir, nir_opt_constant_folding);