mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 15:38:09 +02:00
nir: mark subgroup/quadgroup ops
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33410>
This commit is contained in:
parent
7168f9a4f3
commit
b9ce851b6c
2 changed files with 46 additions and 27 deletions
|
|
@ -1979,6 +1979,18 @@ typedef enum {
|
|||
* intrinsic are due to the register reads/writes.
|
||||
*/
|
||||
NIR_INTRINSIC_CAN_REORDER = BITFIELD_BIT(1),
|
||||
|
||||
/**
|
||||
* Identifies any subgroup-like operation whose behaviour depends on other
|
||||
* logical threads. This is incompatible with CAN_REORDER.
|
||||
*/
|
||||
NIR_INTRINSIC_SUBGROUP = BITFIELD_BIT(2),
|
||||
|
||||
/**
|
||||
* Identifies an operation whose behaviour depends (only) on the local quad.
|
||||
* Any QUADGROUP intrinsic is also SUBGROUP.
|
||||
*/
|
||||
NIR_INTRINSIC_QUADGROUP = BITFIELD_BIT(3),
|
||||
} nir_intrinsic_semantic_flag;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -95,6 +95,11 @@ class Intrinsic(object):
|
|||
|
||||
CAN_ELIMINATE = "NIR_INTRINSIC_CAN_ELIMINATE"
|
||||
CAN_REORDER = "NIR_INTRINSIC_CAN_REORDER"
|
||||
SUBGROUP = "NIR_INTRINSIC_SUBGROUP"
|
||||
QUADGROUP = "NIR_INTRINSIC_QUADGROUP | " + SUBGROUP
|
||||
|
||||
SUBGROUP_FLAGS = [CAN_ELIMINATE, SUBGROUP]
|
||||
QUADGROUP_FLAGS = [CAN_ELIMINATE, QUADGROUP]
|
||||
|
||||
INTR_INDICES = []
|
||||
INTR_OPCODES = {}
|
||||
|
|
@ -432,7 +437,7 @@ intrinsic("is_sparse_resident_zink", dest_comp=1, src_comp=[0], bit_sizes=[1],
|
|||
for suffix in ["", "_fine", "_coarse"]:
|
||||
for axis in ["x", "y"]:
|
||||
intrinsic(f"dd{axis}{suffix}", dest_comp=0, src_comp=[0],
|
||||
bit_sizes=[16, 32], flags=[CAN_ELIMINATE])
|
||||
bit_sizes=[16, 32], flags=[CAN_ELIMINATE, QUADGROUP])
|
||||
|
||||
# a barrier is an intrinsic with no inputs/outputs but which can't be moved
|
||||
# around/optimized in general
|
||||
|
|
@ -476,9 +481,9 @@ intrinsic("shader_clock", dest_comp=2, bit_sizes=[32], flags=[CAN_ELIMINATE],
|
|||
# readFirstInvocationARB()
|
||||
#
|
||||
# GLSL functions from ARB_shader_ballot.
|
||||
intrinsic("ballot", src_comp=[1], dest_comp=0, flags=[CAN_ELIMINATE])
|
||||
intrinsic("read_invocation", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
|
||||
intrinsic("read_first_invocation", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
|
||||
intrinsic("ballot", src_comp=[1], dest_comp=0, flags=SUBGROUP_FLAGS)
|
||||
intrinsic("read_invocation", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=SUBGROUP_FLAGS)
|
||||
intrinsic("read_first_invocation", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=SUBGROUP_FLAGS)
|
||||
|
||||
# Same as ballot, but inactive invocations contribute undefined bits.
|
||||
intrinsic("ballot_relaxed", src_comp=[1], dest_comp=0, flags=[CAN_ELIMINATE])
|
||||
|
|
@ -504,9 +509,9 @@ intrinsic("read_getlast_ir3", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[
|
|||
# OpGroupNonUniformElect
|
||||
# OpSubgroupFirstInvocationKHR
|
||||
# OpGroupNonUniformInverseBallot
|
||||
intrinsic("elect", dest_comp=1, flags=[CAN_ELIMINATE])
|
||||
intrinsic("first_invocation", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
|
||||
intrinsic("last_invocation", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
|
||||
intrinsic("elect", dest_comp=1, flags=SUBGROUP_FLAGS)
|
||||
intrinsic("first_invocation", dest_comp=1, bit_sizes=[32], flags=SUBGROUP_FLAGS)
|
||||
intrinsic("last_invocation", dest_comp=1, bit_sizes=[32], flags=SUBGROUP_FLAGS)
|
||||
intrinsic("inverse_ballot", src_comp=[0], dest_comp=1, flags=[CAN_ELIMINATE])
|
||||
|
||||
barrier("begin_invocation_interlock")
|
||||
|
|
@ -517,10 +522,10 @@ intrinsic("demote_if", src_comp=[1])
|
|||
intrinsic("terminate_if", src_comp=[1])
|
||||
|
||||
# ARB_shader_group_vote intrinsics
|
||||
intrinsic("vote_any", src_comp=[1], dest_comp=1, flags=[CAN_ELIMINATE])
|
||||
intrinsic("vote_all", src_comp=[1], dest_comp=1, flags=[CAN_ELIMINATE])
|
||||
intrinsic("vote_feq", src_comp=[0], dest_comp=1, flags=[CAN_ELIMINATE])
|
||||
intrinsic("vote_ieq", src_comp=[0], dest_comp=1, flags=[CAN_ELIMINATE])
|
||||
intrinsic("vote_any", src_comp=[1], dest_comp=1, flags=SUBGROUP_FLAGS)
|
||||
intrinsic("vote_all", src_comp=[1], dest_comp=1, flags=SUBGROUP_FLAGS)
|
||||
intrinsic("vote_feq", src_comp=[0], dest_comp=1, flags=SUBGROUP_FLAGS)
|
||||
intrinsic("vote_ieq", src_comp=[0], dest_comp=1, flags=SUBGROUP_FLAGS)
|
||||
|
||||
# Ballot ALU operations from SPIR-V.
|
||||
#
|
||||
|
|
@ -535,39 +540,41 @@ intrinsic("ballot_find_lsb", src_comp=[4], dest_comp=1, flags=[CAN_REORDER, CAN_
|
|||
intrinsic("ballot_find_msb", src_comp=[4], dest_comp=1, flags=[CAN_REORDER, CAN_ELIMINATE])
|
||||
|
||||
# Shuffle operations from SPIR-V.
|
||||
intrinsic("shuffle", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
|
||||
intrinsic("shuffle_xor", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
|
||||
intrinsic("shuffle_up", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
|
||||
intrinsic("shuffle_down", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
|
||||
intrinsic("shuffle", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=SUBGROUP_FLAGS)
|
||||
intrinsic("shuffle_xor", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=SUBGROUP_FLAGS)
|
||||
intrinsic("shuffle_up", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=SUBGROUP_FLAGS)
|
||||
intrinsic("shuffle_down", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=SUBGROUP_FLAGS)
|
||||
|
||||
# Quad operations from SPIR-V.
|
||||
intrinsic("quad_broadcast", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
|
||||
intrinsic("quad_swap_horizontal", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
|
||||
intrinsic("quad_swap_vertical", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
|
||||
intrinsic("quad_swap_diagonal", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
|
||||
intrinsic("quad_broadcast", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=QUADGROUP_FLAGS)
|
||||
intrinsic("quad_swap_horizontal", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=QUADGROUP_FLAGS)
|
||||
intrinsic("quad_swap_vertical", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=QUADGROUP_FLAGS)
|
||||
intrinsic("quad_swap_diagonal", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=QUADGROUP_FLAGS)
|
||||
|
||||
# Similar to vote_any and vote_all, but per-quad instead of per-wavefront.
|
||||
# Equivalent to subgroupOr(val, 4) and subgroupAnd(val, 4) assuming val is
|
||||
# boolean.
|
||||
intrinsic("quad_vote_any", src_comp=[1], dest_comp=1, flags=[CAN_ELIMINATE])
|
||||
intrinsic("quad_vote_all", src_comp=[1], dest_comp=1, flags=[CAN_ELIMINATE])
|
||||
intrinsic("quad_vote_any", src_comp=[1], dest_comp=1, flags=QUADGROUP_FLAGS)
|
||||
intrinsic("quad_vote_all", src_comp=[1], dest_comp=1, flags=QUADGROUP_FLAGS)
|
||||
|
||||
# Rotate operation from SPIR-V: SpvOpGroupNonUniformRotateKHR.
|
||||
intrinsic("rotate", src_comp=[0, 1], dest_comp=0, bit_sizes=src0,
|
||||
indices=[CLUSTER_SIZE], flags=[CAN_ELIMINATE]);
|
||||
indices=[CLUSTER_SIZE], flags=SUBGROUP_FLAGS);
|
||||
|
||||
intrinsic("reduce", src_comp=[0], dest_comp=0, bit_sizes=src0,
|
||||
indices=[REDUCTION_OP, CLUSTER_SIZE], flags=[CAN_ELIMINATE])
|
||||
indices=[REDUCTION_OP, CLUSTER_SIZE], flags=SUBGROUP_FLAGS)
|
||||
intrinsic("inclusive_scan", src_comp=[0], dest_comp=0, bit_sizes=src0,
|
||||
indices=[REDUCTION_OP], flags=[CAN_ELIMINATE])
|
||||
indices=[REDUCTION_OP], flags=SUBGROUP_FLAGS)
|
||||
intrinsic("exclusive_scan", src_comp=[0], dest_comp=0, bit_sizes=src0,
|
||||
indices=[REDUCTION_OP], flags=[CAN_ELIMINATE])
|
||||
indices=[REDUCTION_OP], flags=SUBGROUP_FLAGS)
|
||||
|
||||
# AMD shader ballot operations
|
||||
intrinsic("quad_swizzle_amd", src_comp=[0], dest_comp=0, bit_sizes=src0,
|
||||
indices=[SWIZZLE_MASK, FETCH_INACTIVE], flags=[CAN_ELIMINATE])
|
||||
indices=[SWIZZLE_MASK, FETCH_INACTIVE],
|
||||
flags=QUADGROUP_FLAGS)
|
||||
intrinsic("masked_swizzle_amd", src_comp=[0], dest_comp=0, bit_sizes=src0,
|
||||
indices=[SWIZZLE_MASK, FETCH_INACTIVE], flags=[CAN_ELIMINATE])
|
||||
indices=[SWIZZLE_MASK, FETCH_INACTIVE],
|
||||
flags=SUBGROUP_FLAGS)
|
||||
intrinsic("write_invocation_amd", src_comp=[0, 0, 1], dest_comp=0, bit_sizes=src0,
|
||||
flags=[CAN_ELIMINATE])
|
||||
# src = [ mask, addition ]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue