From 1cb5bf700907a0b862fc4e340c0803bd9411b5d0 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Wed, 17 Jan 2024 12:03:41 +0100 Subject: [PATCH] nir: add ballot_relaxed and as_uniform intrinsics Reviewed-by: Rhys Perry Part-of: --- src/compiler/nir/nir_divergence_analysis.c | 2 ++ src/compiler/nir/nir_intrinsics.py | 8 ++++++++ src/compiler/nir/nir_opt_constant_folding.c | 1 + 3 files changed, 11 insertions(+) diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 4cb456bc747..d0a453125a6 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -93,6 +93,8 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) /* Intrinsics which are always uniform */ case nir_intrinsic_shader_clock: case nir_intrinsic_ballot: + case nir_intrinsic_ballot_relaxed: + case nir_intrinsic_as_uniform: case nir_intrinsic_read_invocation: case nir_intrinsic_read_first_invocation: case nir_intrinsic_vote_any: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 99cc58c4dae..fe86600ce30 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -446,6 +446,14 @@ intrinsic("ballot", src_comp=[1], dest_comp=0, flags=[CAN_ELIMINATE]) intrinsic("read_invocation", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE]) intrinsic("read_first_invocation", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE]) +# Same as ballot, but inactive invocations contribute undefined bits. +intrinsic("ballot_relaxed", src_comp=[1], dest_comp=0, flags=[CAN_ELIMINATE]) + +# Allows the backend compiler to move this value to an uniform register. +# Result is undefined if src is not uniform. +# Unlike read_first_invocation, it may be replaced by a divergent move or CSE'd. +intrinsic("as_uniform", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE]) + # Returns the value of the first source for the lane where the second source is # true. The second source must be true for exactly one lane. intrinsic("read_invocation_cond_ir3", src_comp=[0, 1], dest_comp=0, flags=[CAN_ELIMINATE]) diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c index 2e5987cd515..2b6079cdfc6 100644 --- a/src/compiler/nir/nir_opt_constant_folding.c +++ b/src/compiler/nir/nir_opt_constant_folding.c @@ -258,6 +258,7 @@ try_fold_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, case nir_intrinsic_vote_all: case nir_intrinsic_read_invocation: case nir_intrinsic_read_first_invocation: + case nir_intrinsic_as_uniform: case nir_intrinsic_shuffle: case nir_intrinsic_shuffle_xor: case nir_intrinsic_shuffle_up: