diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index b810d0b6552..be72578386e 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -9118,6 +9118,9 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
       break;
    }
    case nir_intrinsic_lane_permute_16_amd: {
+      /* NOTE: If we use divergence analysis information here instead of the src regclass,
+       * skip_uniformize_merge_phi() should be updated.
+       */
       Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
       Temp dst = get_ssa_temp(ctx, &instr->def);
       assert(ctx->program->gfx_level >= GFX10);
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index 4cc292f98be..8e3ce3122d1 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -261,6 +261,89 @@ setup_nir(isel_context* ctx, nir_shader* nir)
    nir_index_ssa_defs(func);
 }
 
+/* Returns true if we can skip uniformization of a merge phi. This makes the destination divergent,
+ * and so is only safe if the inconsistency it introduces into the divergence analysis won't break
+ * code generation. If we unsafely skip uniformization, later instructions (such as SSBO loads,
+ * some subgroup intrinsics and certain conversions) can use divergence analysis information which
+ * is no longer correct.
+ */
+bool
+skip_uniformize_merge_phi(nir_def* ssa, unsigned depth)
+{
+   if (depth >= 16)
+      return false;
+
+   nir_foreach_use (src, ssa) {
+      switch (nir_src_parent_instr(src)->type) {
+      case nir_instr_type_alu: {
+         nir_alu_instr* alu = nir_instr_as_alu(nir_src_parent_instr(src));
+         if (alu->def.divergent)
+            break;
+
+         switch (alu->op) {
+         case nir_op_f2i16:
+         case nir_op_f2u16:
+         case nir_op_f2i32:
+         case nir_op_f2u32:
+         case nir_op_b2i8:
+         case nir_op_b2i16:
+         case nir_op_b2i32:
+         case nir_op_b2b32:
+         case nir_op_b2f16:
+         case nir_op_b2f32:
+         case nir_op_b2f64:
+         case nir_op_mov:
+            /* These opcodes p_as_uniform or vote_any() the source, so fail immediately. We don't
+             * need to do this for non-nir_op_b2 if we know we'll move it back into a VGPR,
+             * in which case the p_as_uniform would be eliminated. This would be way too fragile,
+             * though.
+             */
+            return false;
+         default:
+            if (!skip_uniformize_merge_phi(&alu->def, depth + 1))
+               return false;
+            break;
+         }
+         break;
+      }
+      case nir_instr_type_intrinsic: {
+         nir_intrinsic_instr* intrin = nir_instr_as_intrinsic(nir_src_parent_instr(src));
+         unsigned src_idx = src - intrin->src;
+         /* nir_intrinsic_lane_permute_16_amd is only safe because we don't use divergence analysis
+          * for it's instruction selection. We use that intrinsic for NGG culling. All others are
+          * stores with VGPR sources.
+          */
+         if (intrin->intrinsic == nir_intrinsic_lane_permute_16_amd ||
+             intrin->intrinsic == nir_intrinsic_export_amd ||
+             intrin->intrinsic == nir_intrinsic_export_dual_src_blend_amd ||
+             (intrin->intrinsic == nir_intrinsic_export_row_amd && src_idx == 0) ||
+             (intrin->intrinsic == nir_intrinsic_store_buffer_amd && src_idx == 0) ||
+             (intrin->intrinsic == nir_intrinsic_store_ssbo && src_idx == 0) ||
+             (intrin->intrinsic == nir_intrinsic_store_global && src_idx == 0) ||
+             (intrin->intrinsic == nir_intrinsic_store_scratch && src_idx == 0) ||
+             (intrin->intrinsic == nir_intrinsic_store_shared && src_idx == 0))
+            break;
+         return false;
+      }
+      case nir_instr_type_phi: {
+         nir_phi_instr* phi = nir_instr_as_phi(nir_src_parent_instr(src));
+         if (phi->def.divergent || skip_uniformize_merge_phi(&phi->def, depth + 1))
+            break;
+         return false;
+      }
+      case nir_instr_type_tex: {
+         /* This is either used as a VGPR source or it's a (potentially undef) descriptor. */
+         break;
+      }
+      default: {
+         return false;
+      }
+      }
+   }
+
+   return true;
+}
+
 } /* end namespace */
 
 void
@@ -603,7 +686,7 @@ init_context(isel_context* ctx, nir_shader* shader)
                      /* In case of uniform phis after divergent merges, ensure that the dst is an
                       * SGPR and does not contain undefined values for some invocations.
                       */
-                     if (divergent_merge)
+                     if (divergent_merge && !skip_uniformize_merge_phi(&phi->def, 0))
                         type = RegType::sgpr;
                   }
                }