diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 8167676674a..4e0cf8566a2 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1979,6 +1979,18 @@ typedef enum { * intrinsic are due to the register reads/writes. */ NIR_INTRINSIC_CAN_REORDER = BITFIELD_BIT(1), + + /** + * Identifies any subgroup-like operation whose behaviour depends on other + * logical threads. This is incompatible with CAN_REORDER. + */ + NIR_INTRINSIC_SUBGROUP = BITFIELD_BIT(2), + + /** + * Identifies an operation whose behaviour depends (only) on the local quad. + * Any QUADGROUP intrinsic is also SUBGROUP. + */ + NIR_INTRINSIC_QUADGROUP = BITFIELD_BIT(3), } nir_intrinsic_semantic_flag; /** diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 7258b7722b9..afda3186020 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -95,6 +95,11 @@ class Intrinsic(object): CAN_ELIMINATE = "NIR_INTRINSIC_CAN_ELIMINATE" CAN_REORDER = "NIR_INTRINSIC_CAN_REORDER" +SUBGROUP = "NIR_INTRINSIC_SUBGROUP" +QUADGROUP = "NIR_INTRINSIC_QUADGROUP | " + SUBGROUP + +SUBGROUP_FLAGS = [CAN_ELIMINATE, SUBGROUP] +QUADGROUP_FLAGS = [CAN_ELIMINATE, QUADGROUP] INTR_INDICES = [] INTR_OPCODES = {} @@ -432,7 +437,7 @@ intrinsic("is_sparse_resident_zink", dest_comp=1, src_comp=[0], bit_sizes=[1], for suffix in ["", "_fine", "_coarse"]: for axis in ["x", "y"]: intrinsic(f"dd{axis}{suffix}", dest_comp=0, src_comp=[0], - bit_sizes=[16, 32], flags=[CAN_ELIMINATE]) + bit_sizes=[16, 32], flags=[CAN_ELIMINATE, QUADGROUP]) # a barrier is an intrinsic with no inputs/outputs but which can't be moved # around/optimized in general @@ -476,9 +481,9 @@ intrinsic("shader_clock", dest_comp=2, bit_sizes=[32], flags=[CAN_ELIMINATE], # readFirstInvocationARB() # # GLSL functions from ARB_shader_ballot. -intrinsic("ballot", src_comp=[1], dest_comp=0, flags=[CAN_ELIMINATE]) -intrinsic("read_invocation", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE]) -intrinsic("read_first_invocation", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE]) +intrinsic("ballot", src_comp=[1], dest_comp=0, flags=SUBGROUP_FLAGS) +intrinsic("read_invocation", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=SUBGROUP_FLAGS) +intrinsic("read_first_invocation", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=SUBGROUP_FLAGS) # Same as ballot, but inactive invocations contribute undefined bits. intrinsic("ballot_relaxed", src_comp=[1], dest_comp=0, flags=[CAN_ELIMINATE]) @@ -504,9 +509,9 @@ intrinsic("read_getlast_ir3", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[ # OpGroupNonUniformElect # OpSubgroupFirstInvocationKHR # OpGroupNonUniformInverseBallot -intrinsic("elect", dest_comp=1, flags=[CAN_ELIMINATE]) -intrinsic("first_invocation", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE]) -intrinsic("last_invocation", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE]) +intrinsic("elect", dest_comp=1, flags=SUBGROUP_FLAGS) +intrinsic("first_invocation", dest_comp=1, bit_sizes=[32], flags=SUBGROUP_FLAGS) +intrinsic("last_invocation", dest_comp=1, bit_sizes=[32], flags=SUBGROUP_FLAGS) intrinsic("inverse_ballot", src_comp=[0], dest_comp=1, flags=[CAN_ELIMINATE]) barrier("begin_invocation_interlock") @@ -517,10 +522,10 @@ intrinsic("demote_if", src_comp=[1]) intrinsic("terminate_if", src_comp=[1]) # ARB_shader_group_vote intrinsics -intrinsic("vote_any", src_comp=[1], dest_comp=1, flags=[CAN_ELIMINATE]) -intrinsic("vote_all", src_comp=[1], dest_comp=1, flags=[CAN_ELIMINATE]) -intrinsic("vote_feq", src_comp=[0], dest_comp=1, flags=[CAN_ELIMINATE]) -intrinsic("vote_ieq", src_comp=[0], dest_comp=1, flags=[CAN_ELIMINATE]) +intrinsic("vote_any", src_comp=[1], dest_comp=1, flags=SUBGROUP_FLAGS) +intrinsic("vote_all", src_comp=[1], dest_comp=1, flags=SUBGROUP_FLAGS) +intrinsic("vote_feq", src_comp=[0], dest_comp=1, flags=SUBGROUP_FLAGS) +intrinsic("vote_ieq", src_comp=[0], dest_comp=1, flags=SUBGROUP_FLAGS) # Ballot ALU operations from SPIR-V. # @@ -535,39 +540,41 @@ intrinsic("ballot_find_lsb", src_comp=[4], dest_comp=1, flags=[CAN_REORDER, CAN_ intrinsic("ballot_find_msb", src_comp=[4], dest_comp=1, flags=[CAN_REORDER, CAN_ELIMINATE]) # Shuffle operations from SPIR-V. -intrinsic("shuffle", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE]) -intrinsic("shuffle_xor", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE]) -intrinsic("shuffle_up", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE]) -intrinsic("shuffle_down", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE]) +intrinsic("shuffle", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=SUBGROUP_FLAGS) +intrinsic("shuffle_xor", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=SUBGROUP_FLAGS) +intrinsic("shuffle_up", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=SUBGROUP_FLAGS) +intrinsic("shuffle_down", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=SUBGROUP_FLAGS) # Quad operations from SPIR-V. -intrinsic("quad_broadcast", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE]) -intrinsic("quad_swap_horizontal", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE]) -intrinsic("quad_swap_vertical", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE]) -intrinsic("quad_swap_diagonal", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE]) +intrinsic("quad_broadcast", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=QUADGROUP_FLAGS) +intrinsic("quad_swap_horizontal", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=QUADGROUP_FLAGS) +intrinsic("quad_swap_vertical", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=QUADGROUP_FLAGS) +intrinsic("quad_swap_diagonal", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=QUADGROUP_FLAGS) # Similar to vote_any and vote_all, but per-quad instead of per-wavefront. # Equivalent to subgroupOr(val, 4) and subgroupAnd(val, 4) assuming val is # boolean. -intrinsic("quad_vote_any", src_comp=[1], dest_comp=1, flags=[CAN_ELIMINATE]) -intrinsic("quad_vote_all", src_comp=[1], dest_comp=1, flags=[CAN_ELIMINATE]) +intrinsic("quad_vote_any", src_comp=[1], dest_comp=1, flags=QUADGROUP_FLAGS) +intrinsic("quad_vote_all", src_comp=[1], dest_comp=1, flags=QUADGROUP_FLAGS) # Rotate operation from SPIR-V: SpvOpGroupNonUniformRotateKHR. intrinsic("rotate", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, - indices=[CLUSTER_SIZE], flags=[CAN_ELIMINATE]); + indices=[CLUSTER_SIZE], flags=SUBGROUP_FLAGS); intrinsic("reduce", src_comp=[0], dest_comp=0, bit_sizes=src0, - indices=[REDUCTION_OP, CLUSTER_SIZE], flags=[CAN_ELIMINATE]) + indices=[REDUCTION_OP, CLUSTER_SIZE], flags=SUBGROUP_FLAGS) intrinsic("inclusive_scan", src_comp=[0], dest_comp=0, bit_sizes=src0, - indices=[REDUCTION_OP], flags=[CAN_ELIMINATE]) + indices=[REDUCTION_OP], flags=SUBGROUP_FLAGS) intrinsic("exclusive_scan", src_comp=[0], dest_comp=0, bit_sizes=src0, - indices=[REDUCTION_OP], flags=[CAN_ELIMINATE]) + indices=[REDUCTION_OP], flags=SUBGROUP_FLAGS) # AMD shader ballot operations intrinsic("quad_swizzle_amd", src_comp=[0], dest_comp=0, bit_sizes=src0, - indices=[SWIZZLE_MASK, FETCH_INACTIVE], flags=[CAN_ELIMINATE]) + indices=[SWIZZLE_MASK, FETCH_INACTIVE], + flags=QUADGROUP_FLAGS) intrinsic("masked_swizzle_amd", src_comp=[0], dest_comp=0, bit_sizes=src0, - indices=[SWIZZLE_MASK, FETCH_INACTIVE], flags=[CAN_ELIMINATE]) + indices=[SWIZZLE_MASK, FETCH_INACTIVE], + flags=SUBGROUP_FLAGS) intrinsic("write_invocation_amd", src_comp=[0, 0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE]) # src = [ mask, addition ]