mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 09:28:07 +02:00
nir: Add quad vote intrinsics
Both Intel and AMD have special hardware support for these. Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/218>
This commit is contained in:
parent
27734c52eb
commit
1cfb0ae92c
3 changed files with 10 additions and 0 deletions
|
|
@ -420,6 +420,8 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
|
|||
case nir_intrinsic_quad_swap_horizontal:
|
||||
case nir_intrinsic_quad_swap_vertical:
|
||||
case nir_intrinsic_quad_swap_diagonal:
|
||||
case nir_intrinsic_quad_vote_any:
|
||||
case nir_intrinsic_quad_vote_all:
|
||||
case nir_intrinsic_load_deref:
|
||||
case nir_intrinsic_load_shared:
|
||||
case nir_intrinsic_load_shared2_amd:
|
||||
|
|
|
|||
|
|
@ -724,6 +724,8 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,
|
|||
}
|
||||
break;
|
||||
|
||||
case nir_intrinsic_quad_vote_any:
|
||||
case nir_intrinsic_quad_vote_all:
|
||||
case nir_intrinsic_quad_broadcast:
|
||||
case nir_intrinsic_quad_swap_horizontal:
|
||||
case nir_intrinsic_quad_swap_vertical:
|
||||
|
|
|
|||
|
|
@ -499,6 +499,12 @@ intrinsic("quad_swap_horizontal", src_comp=[0], dest_comp=0, bit_sizes=src0, fla
|
|||
intrinsic("quad_swap_vertical", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
|
||||
intrinsic("quad_swap_diagonal", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE])
|
||||
|
||||
# Similar to vote_any and vote_all, but per-quad instead of per-wavefront.
|
||||
# Equivalent to subgroupOr(val, 4) and subgroupAnd(val, 4) assuming val is
|
||||
# boolean.
|
||||
intrinsic("quad_vote_any", src_comp=[1], dest_comp=1, flags=[CAN_ELIMINATE])
|
||||
intrinsic("quad_vote_all", src_comp=[1], dest_comp=1, flags=[CAN_ELIMINATE])
|
||||
|
||||
# Rotate operation from SPIR-V: SpvOpGroupNonUniformRotateKHR.
|
||||
intrinsic("rotate", src_comp=[0, 1], dest_comp=0, bit_sizes=src0,
|
||||
indices=[EXECUTION_SCOPE, CLUSTER_SIZE], flags=[CAN_ELIMINATE]);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue