nir: Add read_invocation_cond_ir3 intrinsic

On qualcomm, we have shared registers similar to SGPR's on AMD. However, there is no readlane or readfirstlane primitive. shared registers can only be written to when just one lane is active. This means that we have to lower readInvocation(val, id) to something like: if (gl_SubgroupInvocation == id) { scalar_reg = val; } return scalar_reg; However it's a bit difficult to actually get the value of gl_SubgroupInvocation in the backend, because for compute it requires some calculations and we don't have any CSE support in the backend. This intrinsic lets us turn it into "readInvocationCond(val, id == gl_SubgroupInvocation)" in NIR at which point the backend code generation is a lot easier. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>
2026-05-06 11:38:05 +02:00 · 2020-09-14 10:14:55 +02:00 · 2020-09-14 10:14:55 +02:00 · cc514bfa0e
commit cc514bfa0e
parent e4e79de2a4
4 changed files with 22 additions and 0 deletions
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@ -4738,6 +4738,7 @@ typedef struct nir_lower_subgroups_options {
   bool lower_quad_broadcast_dynamic:1;
   bool lower_quad_broadcast_dynamic_to_const:1;
   bool lower_elect:1;
+   bool lower_read_invocation_to_cond:1;
 } nir_lower_subgroups_options;

 bool nir_lower_subgroups(nir_shader *shader,
--- a/src/compiler/nir/nir_control_flow
+++ b/src/compiler/nir/nir_control_flow
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@ -362,6 +362,10 @@ intrinsic("ballot", src_comp=[1], dest_comp=0, flags=[CAN_ELIMINATE])
 intrinsic("read_invocation", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
 intrinsic("read_first_invocation", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])

+# Returns the value of the first source for the lane where the second source is
+# true. The second source must be true for exactly one lane.
+intrinsic("read_invocation_cond_ir3", src_comp=[0, 1], dest_comp=0, flags=[CAN_ELIMINATE])
+
 # Additional SPIR-V ballot intrinsics
 #
 # These correspond to the SPIR-V opcodes
--- a/src/compiler/nir/nir_lower_subgroups.c
+++ b/src/compiler/nir/nir_lower_subgroups.c
@ -493,6 +493,15 @@ lower_dynamic_quad_broadcast(nir_builder *b, nir_intrinsic_instr *intrin,
   return dst;
 }

+static nir_ssa_def *
+lower_read_invocation_to_cond(nir_builder *b, nir_intrinsic_instr *intrin)
+{
+   return nir_read_invocation_cond_ir3(b, intrin->dest.ssa.bit_size,
+                                       intrin->src[0].ssa,
+                                       nir_ieq(b, intrin->src[1].ssa,
+                                               nir_load_subgroup_invocation(b)));
+}
+
 static nir_ssa_def *
 lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options)
 {
@ -524,6 +533,14 @@ lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options)
      break;

   case nir_intrinsic_read_invocation:
+      if (options->lower_to_scalar && intrin->num_components > 1)
+         return lower_subgroup_op_to_scalar(b, intrin, false);
+
+      if (options->lower_read_invocation_to_cond)
+         return lower_read_invocation_to_cond(b, intrin);
+
+      break;
+
   case nir_intrinsic_read_first_invocation:
      if (options->lower_to_scalar && intrin->num_components > 1)
         return lower_subgroup_op_to_scalar(b, intrin, false);