nir: add shuffle_{xor,up,down}_uniform_ir3 intrinsics

These are like shuffle_{xor,up,down} except they expect a dynamically
uniform index. This is necessary since the ir3 shfl instruction does not
work with a divergent index.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31501>
This commit is contained in:
Job Noorman 2024-10-16 20:25:32 +02:00 committed by Marge Bot
parent 9f58d0b0e7
commit 4556b18f51
3 changed files with 18 additions and 0 deletions

View file

@ -542,6 +542,9 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_shuffle_xor:
case nir_intrinsic_shuffle_up:
case nir_intrinsic_shuffle_down:
case nir_intrinsic_shuffle_xor_uniform_ir3:
case nir_intrinsic_shuffle_up_uniform_ir3:
case nir_intrinsic_shuffle_down_uniform_ir3:
case nir_intrinsic_quad_broadcast:
case nir_intrinsic_quad_swap_horizontal:
case nir_intrinsic_quad_swap_vertical:

View file

@ -1436,6 +1436,15 @@ intrinsic("inclusive_scan_clusters_ir3", dest_comp=1, src_comp=[1],
intrinsic("exclusive_scan_clusters_ir3", dest_comp=1, src_comp=[1, 1],
bit_sizes=src0, indices=[REDUCTION_OP])
# Like shuffle_{xor,up,down} except with a uniform index. Necessary since the
# ir3 shfl instruction doesn't work with divergent indices.
intrinsic("shuffle_xor_uniform_ir3", src_comp=[0, 1], dest_comp=0,
bit_sizes=src0, flags=[CAN_ELIMINATE])
intrinsic("shuffle_up_uniform_ir3", src_comp=[0, 1], dest_comp=0,
bit_sizes=src0, flags=[CAN_ELIMINATE])
intrinsic("shuffle_down_uniform_ir3", src_comp=[0, 1], dest_comp=0,
bit_sizes=src0, flags=[CAN_ELIMINATE])
# IR3-specific intrinsics for prefetching descriptors in preambles.
intrinsic("prefetch_sam_ir3", [1, 1], flags=[CAN_REORDER])
intrinsic("prefetch_tex_ir3", [1], flags=[CAN_REORDER])

View file

@ -1261,6 +1261,9 @@ should_lower_int64_intrinsic(const nir_intrinsic_instr *intrin,
case nir_intrinsic_shuffle_xor:
case nir_intrinsic_shuffle_up:
case nir_intrinsic_shuffle_down:
case nir_intrinsic_shuffle_xor_uniform_ir3:
case nir_intrinsic_shuffle_up_uniform_ir3:
case nir_intrinsic_shuffle_down_uniform_ir3:
case nir_intrinsic_rotate:
case nir_intrinsic_quad_broadcast:
case nir_intrinsic_quad_swap_horizontal:
@ -1307,6 +1310,9 @@ lower_int64_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
case nir_intrinsic_shuffle_xor:
case nir_intrinsic_shuffle_up:
case nir_intrinsic_shuffle_down:
case nir_intrinsic_shuffle_xor_uniform_ir3:
case nir_intrinsic_shuffle_up_uniform_ir3:
case nir_intrinsic_shuffle_down_uniform_ir3:
case nir_intrinsic_rotate:
case nir_intrinsic_quad_broadcast:
case nir_intrinsic_quad_swap_horizontal: