nir: mark subgroup/quadgroup ops

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33410>
This commit is contained in:
Alyssa Rosenzweig 2025-02-05 12:52:48 -05:00 committed by Marge Bot
parent 7168f9a4f3
commit b9ce851b6c
2 changed files with 46 additions and 27 deletions

View file

@ -1979,6 +1979,18 @@ typedef enum {
* intrinsic are due to the register reads/writes.
*/
NIR_INTRINSIC_CAN_REORDER = BITFIELD_BIT(1),
/**
* Identifies any subgroup-like operation whose behaviour depends on other
* logical threads. This is incompatible with CAN_REORDER.
*/
NIR_INTRINSIC_SUBGROUP = BITFIELD_BIT(2),
/**
* Identifies an operation whose behaviour depends (only) on the local quad.
* Any QUADGROUP intrinsic is also SUBGROUP.
*/
NIR_INTRINSIC_QUADGROUP = BITFIELD_BIT(3),
} nir_intrinsic_semantic_flag;
/**

View file

@ -95,6 +95,11 @@ class Intrinsic(object):
CAN_ELIMINATE = "NIR_INTRINSIC_CAN_ELIMINATE"
CAN_REORDER = "NIR_INTRINSIC_CAN_REORDER"
SUBGROUP = "NIR_INTRINSIC_SUBGROUP"
QUADGROUP = "NIR_INTRINSIC_QUADGROUP | " + SUBGROUP
SUBGROUP_FLAGS = [CAN_ELIMINATE, SUBGROUP]
QUADGROUP_FLAGS = [CAN_ELIMINATE, QUADGROUP]
INTR_INDICES = []
INTR_OPCODES = {}
@ -432,7 +437,7 @@ intrinsic("is_sparse_resident_zink", dest_comp=1, src_comp=[0], bit_sizes=[1],
for suffix in ["", "_fine", "_coarse"]:
for axis in ["x", "y"]:
intrinsic(f"dd{axis}{suffix}", dest_comp=0, src_comp=[0],
bit_sizes=[16, 32], flags=[CAN_ELIMINATE])
bit_sizes=[16, 32], flags=[CAN_ELIMINATE, QUADGROUP])
# a barrier is an intrinsic with no inputs/outputs but which can't be moved
# around/optimized in general
@ -476,9 +481,9 @@ intrinsic("shader_clock", dest_comp=2, bit_sizes=[32], flags=[CAN_ELIMINATE],
# readFirstInvocationARB()
#
# GLSL functions from ARB_shader_ballot.
intrinsic("ballot", src_comp=[1], dest_comp=0, flags=[CAN_ELIMINATE])
intrinsic("read_invocation", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
intrinsic("read_first_invocation", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
intrinsic("ballot", src_comp=[1], dest_comp=0, flags=SUBGROUP_FLAGS)
intrinsic("read_invocation", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=SUBGROUP_FLAGS)
intrinsic("read_first_invocation", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=SUBGROUP_FLAGS)
# Same as ballot, but inactive invocations contribute undefined bits.
intrinsic("ballot_relaxed", src_comp=[1], dest_comp=0, flags=[CAN_ELIMINATE])
@ -504,9 +509,9 @@ intrinsic("read_getlast_ir3", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[
# OpGroupNonUniformElect
# OpSubgroupFirstInvocationKHR
# OpGroupNonUniformInverseBallot
intrinsic("elect", dest_comp=1, flags=[CAN_ELIMINATE])
intrinsic("first_invocation", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
intrinsic("last_invocation", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
intrinsic("elect", dest_comp=1, flags=SUBGROUP_FLAGS)
intrinsic("first_invocation", dest_comp=1, bit_sizes=[32], flags=SUBGROUP_FLAGS)
intrinsic("last_invocation", dest_comp=1, bit_sizes=[32], flags=SUBGROUP_FLAGS)
intrinsic("inverse_ballot", src_comp=[0], dest_comp=1, flags=[CAN_ELIMINATE])
barrier("begin_invocation_interlock")
@ -517,10 +522,10 @@ intrinsic("demote_if", src_comp=[1])
intrinsic("terminate_if", src_comp=[1])
# ARB_shader_group_vote intrinsics
intrinsic("vote_any", src_comp=[1], dest_comp=1, flags=[CAN_ELIMINATE])
intrinsic("vote_all", src_comp=[1], dest_comp=1, flags=[CAN_ELIMINATE])
intrinsic("vote_feq", src_comp=[0], dest_comp=1, flags=[CAN_ELIMINATE])
intrinsic("vote_ieq", src_comp=[0], dest_comp=1, flags=[CAN_ELIMINATE])
intrinsic("vote_any", src_comp=[1], dest_comp=1, flags=SUBGROUP_FLAGS)
intrinsic("vote_all", src_comp=[1], dest_comp=1, flags=SUBGROUP_FLAGS)
intrinsic("vote_feq", src_comp=[0], dest_comp=1, flags=SUBGROUP_FLAGS)
intrinsic("vote_ieq", src_comp=[0], dest_comp=1, flags=SUBGROUP_FLAGS)
# Ballot ALU operations from SPIR-V.
#
@ -535,39 +540,41 @@ intrinsic("ballot_find_lsb", src_comp=[4], dest_comp=1, flags=[CAN_REORDER, CAN_
intrinsic("ballot_find_msb", src_comp=[4], dest_comp=1, flags=[CAN_REORDER, CAN_ELIMINATE])
# Shuffle operations from SPIR-V.
intrinsic("shuffle", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
intrinsic("shuffle_xor", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
intrinsic("shuffle_up", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
intrinsic("shuffle_down", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
intrinsic("shuffle", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=SUBGROUP_FLAGS)
intrinsic("shuffle_xor", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=SUBGROUP_FLAGS)
intrinsic("shuffle_up", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=SUBGROUP_FLAGS)
intrinsic("shuffle_down", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=SUBGROUP_FLAGS)
# Quad operations from SPIR-V.
intrinsic("quad_broadcast", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
intrinsic("quad_swap_horizontal", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
intrinsic("quad_swap_vertical", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
intrinsic("quad_swap_diagonal", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
intrinsic("quad_broadcast", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=QUADGROUP_FLAGS)
intrinsic("quad_swap_horizontal", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=QUADGROUP_FLAGS)
intrinsic("quad_swap_vertical", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=QUADGROUP_FLAGS)
intrinsic("quad_swap_diagonal", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=QUADGROUP_FLAGS)
# Similar to vote_any and vote_all, but per-quad instead of per-wavefront.
# Equivalent to subgroupOr(val, 4) and subgroupAnd(val, 4) assuming val is
# boolean.
intrinsic("quad_vote_any", src_comp=[1], dest_comp=1, flags=[CAN_ELIMINATE])
intrinsic("quad_vote_all", src_comp=[1], dest_comp=1, flags=[CAN_ELIMINATE])
intrinsic("quad_vote_any", src_comp=[1], dest_comp=1, flags=QUADGROUP_FLAGS)
intrinsic("quad_vote_all", src_comp=[1], dest_comp=1, flags=QUADGROUP_FLAGS)
# Rotate operation from SPIR-V: SpvOpGroupNonUniformRotateKHR.
intrinsic("rotate", src_comp=[0, 1], dest_comp=0, bit_sizes=src0,
indices=[CLUSTER_SIZE], flags=[CAN_ELIMINATE]);
indices=[CLUSTER_SIZE], flags=SUBGROUP_FLAGS);
intrinsic("reduce", src_comp=[0], dest_comp=0, bit_sizes=src0,
indices=[REDUCTION_OP, CLUSTER_SIZE], flags=[CAN_ELIMINATE])
indices=[REDUCTION_OP, CLUSTER_SIZE], flags=SUBGROUP_FLAGS)
intrinsic("inclusive_scan", src_comp=[0], dest_comp=0, bit_sizes=src0,
indices=[REDUCTION_OP], flags=[CAN_ELIMINATE])
indices=[REDUCTION_OP], flags=SUBGROUP_FLAGS)
intrinsic("exclusive_scan", src_comp=[0], dest_comp=0, bit_sizes=src0,
indices=[REDUCTION_OP], flags=[CAN_ELIMINATE])
indices=[REDUCTION_OP], flags=SUBGROUP_FLAGS)
# AMD shader ballot operations
intrinsic("quad_swizzle_amd", src_comp=[0], dest_comp=0, bit_sizes=src0,
indices=[SWIZZLE_MASK, FETCH_INACTIVE], flags=[CAN_ELIMINATE])
indices=[SWIZZLE_MASK, FETCH_INACTIVE],
flags=QUADGROUP_FLAGS)
intrinsic("masked_swizzle_amd", src_comp=[0], dest_comp=0, bit_sizes=src0,
indices=[SWIZZLE_MASK, FETCH_INACTIVE], flags=[CAN_ELIMINATE])
indices=[SWIZZLE_MASK, FETCH_INACTIVE],
flags=SUBGROUP_FLAGS)
intrinsic("write_invocation_amd", src_comp=[0, 0, 1], dest_comp=0, bit_sizes=src0,
flags=[CAN_ELIMINATE])
# src = [ mask, addition ]