diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index c6a85525473..372000f67d2 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -1130,6 +1130,113 @@ nak_nir_lower_load_store(nir_shader *nir, const struct nak_compiler *nak) return progress; } +static bool +is_divergent_phi(nir_instr *instr) +{ + if (instr->type != nir_instr_type_phi) + return false; + nir_phi_instr *phi = nir_instr_as_phi(instr); + return nak_nir_phi_is_divergent(phi); +} + +static bool +nak_nir_opt_uniform_address_impl(struct nir_builder *b, + nir_intrinsic_instr *intr, void *cb_data) +{ + switch (intr->intrinsic) { + case nir_intrinsic_cmat_load_shared_nv: + case nir_intrinsic_global_atomic_nv: + case nir_intrinsic_load_global_nv: + case nir_intrinsic_load_scratch_nv: + case nir_intrinsic_load_shared_nv: + case nir_intrinsic_shared_atomic_nv: + case nir_intrinsic_store_global_nv: + case nir_intrinsic_store_scratch_nv: + case nir_intrinsic_store_shared_nv: { + nir_src *offset_src = nir_get_io_offset_src(intr); + nir_def *offset = offset_src->ssa; + nir_src *uniform_offset_src = nir_get_io_uniform_offset_src(intr); + nir_def *uniform_offset = uniform_offset_src->ssa; + nir_block *use_block = intr->instr.block; + + assert(nir_src_as_uint(*uniform_offset_src) == 0); + + /* Nak can't collect vectors in non uniform control flow, so don't + * even try */ + if (offset->bit_size == 64 && nak_block_is_divergent(use_block)) + return false; + + /* We ignore any constant offset */ + if (nir_src_is_const(*offset_src)) + return false; + + /* If the source is already uniform, just swap them as the uniform slot + * should be 0 */ + if (!nir_def_is_divergent_at_use_block(offset, use_block)) { + if (is_divergent_phi(nir_def_instr(offset))) + return false; + nir_src_rewrite(uniform_offset_src, offset); + nir_src_rewrite(offset_src, uniform_offset); + return true; + } + + nir_alu_instr *iadd = nir_def_as_alu_or_null(offset_src->ssa); + if (!iadd || iadd->op != nir_op_iadd) + return false; + + unsigned src0_div = nir_def_is_divergent_at_use_block(iadd->src[0].src.ssa, use_block); + unsigned src1_div = nir_def_is_divergent_at_use_block(iadd->src[1].src.ssa, use_block); + if (src0_div && src1_div) + return false; + + b->cursor = nir_before_instr(&intr->instr); + + nir_def *addr, *uaddr; + if (src0_div) { + assert(!src1_div); + addr = nir_ssa_for_alu_src(b, iadd, 0); + uaddr = nir_ssa_for_alu_src(b, iadd, 1); + } else { + assert(src1_div); + addr = nir_ssa_for_alu_src(b, iadd, 1); + uaddr = nir_ssa_for_alu_src(b, iadd, 0); + } + + if (is_divergent_phi(nir_def_instr(uaddr))) + return false; + + /* We can remove a u2u64 on the non uniform src */ + if (addr->bit_size == 64) { + nir_alu_instr *u2u64 = nir_def_as_alu_or_null(addr); + if (u2u64 && u2u64->op == nir_op_u2u64) + addr = nir_ssa_for_alu_src(b, u2u64, 0); + } + + nir_src_rewrite(offset_src, addr); + nir_src_rewrite(uniform_offset_src, uaddr); + return true; + } + default: + return false; + } +} + +/** This pass assumes it is ran after nir_opt_offset */ +static bool +nak_nir_opt_uniform_address(nir_shader *nir) +{ + if (nak_debug_no_ugpr()) + return false; + nir_divergence_analysis(nir); + return nir_shader_intrinsics_pass( + nir, + nak_nir_opt_uniform_address_impl, + nir_metadata_control_flow, + NULL + ); +} + + static bool nak_nir_opt_offset_shift_nv_impl(struct nir_builder *b, nir_intrinsic_instr *intrin, void *data) @@ -1333,6 +1440,12 @@ nak_postprocess_nir(nir_shader *nir, .cb_data = nak, }; OPT(nir, nir_opt_offsets, &nak_offset_options); + if (nak->sm >= 73) { + OPT(nir, nak_nir_opt_uniform_address); + /* TODO: as we eliminate u2u64s we could fold more offsets in, however + * This would require us to verify it doesn't overflow, which we can't. */ + /* OPT(nir, nir_opt_offsets, &nak_offset_options); */ + } /* Should run after nir_opt_offsets, because nir_opt_algebraic will move * iadds down the chain */