diff --git a/.pick_status.json b/.pick_status.json index a9158c3fb39..b229a2a03cb 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -706,7 +706,7 @@ "description": "spirv: Don't emit RMW for vector indexing in shared or global", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": null }, diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index 93c4e004ab1..77cdcd56a58 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -131,6 +131,18 @@ vtn_mode_uses_ssa_offset(struct vtn_builder *b, mode == vtn_variable_mode_push_constant; } +static bool +vtn_mode_is_cross_invocation(struct vtn_builder *b, + enum vtn_variable_mode mode) +{ + return mode == vtn_variable_mode_ssbo || + mode == vtn_variable_mode_ubo || + mode == vtn_variable_mode_phys_ssbo || + mode == vtn_variable_mode_push_constant || + mode == vtn_variable_mode_workgroup || + mode == vtn_variable_mode_cross_workgroup; +} + static bool vtn_pointer_is_external_block(struct vtn_builder *b, struct vtn_pointer *ptr) @@ -1093,11 +1105,11 @@ _vtn_variable_load_store(struct vtn_builder *b, bool load, if (glsl_type_is_vector_or_scalar(ptr->type->type)) { /* We hit a vector or scalar; go ahead and emit the load[s] */ nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr); - if (vtn_pointer_is_external_block(b, ptr)) { - /* If it's external, we call nir_load/store_deref directly. The - * vtn_local_load/store helpers are too clever and do magic to - * avoid array derefs of vectors. That magic is both less - * efficient than the direct load/store and, in the case of + if (vtn_mode_is_cross_invocation(b, ptr->mode)) { + /* If it's cross-invocation, we call nir_load/store_deref + * directly. The vtn_local_load/store helpers are too clever and + * do magic to avoid array derefs of vectors. That magic is both + * less efficient than the direct load/store and, in the case of * stores, is broken because it creates a race condition if two * threads are writing to different components of the same vector * due to the load+insert+store it uses to emulate the array