diff --git a/.pick_status.json b/.pick_status.json
index a9158c3fb39..b229a2a03cb 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -706,7 +706,7 @@
         "description": "spirv: Don't emit RMW for vector indexing in shared or global",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "master_sha": null,
         "because_sha": null
     },
diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c
index 93c4e004ab1..77cdcd56a58 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -131,6 +131,18 @@ vtn_mode_uses_ssa_offset(struct vtn_builder *b,
           mode == vtn_variable_mode_push_constant;
 }
 
+static bool
+vtn_mode_is_cross_invocation(struct vtn_builder *b,
+                             enum vtn_variable_mode mode)
+{
+   return mode == vtn_variable_mode_ssbo ||
+          mode == vtn_variable_mode_ubo ||
+          mode == vtn_variable_mode_phys_ssbo ||
+          mode == vtn_variable_mode_push_constant ||
+          mode == vtn_variable_mode_workgroup ||
+          mode == vtn_variable_mode_cross_workgroup;
+}
+
 static bool
 vtn_pointer_is_external_block(struct vtn_builder *b,
                               struct vtn_pointer *ptr)
@@ -1093,11 +1105,11 @@ _vtn_variable_load_store(struct vtn_builder *b, bool load,
       if (glsl_type_is_vector_or_scalar(ptr->type->type)) {
          /* We hit a vector or scalar; go ahead and emit the load[s] */
          nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
-         if (vtn_pointer_is_external_block(b, ptr)) {
-            /* If it's external, we call nir_load/store_deref directly.  The
-             * vtn_local_load/store helpers are too clever and do magic to
-             * avoid array derefs of vectors.  That magic is both less
-             * efficient than the direct load/store and, in the case of
+         if (vtn_mode_is_cross_invocation(b, ptr->mode)) {
+            /* If it's cross-invocation, we call nir_load/store_deref
+             * directly.  The vtn_local_load/store helpers are too clever and
+             * do magic to avoid array derefs of vectors.  That magic is both
+             * less efficient than the direct load/store and, in the case of
              * stores, is broken because it creates a race condition if two
              * threads are writing to different components of the same vector
              * due to the load+insert+store it uses to emulate the array