nir: add fetch inactive index to quad_swizzle_amd/masked_swizzle_amd

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25525>
This commit is contained in:
Rhys Perry 2023-10-02 16:13:35 +01:00 committed by Marge Bot
parent 0e79f76aa5
commit ad5be40303
3 changed files with 9 additions and 5 deletions

View file

@ -688,7 +688,7 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first
nir_def *arg1 = nir_channel(b, mrt1_arg, i);
/* swap odd,even lanes of arg0 */
arg0 = nir_quad_swizzle_amd(b, arg0, .swizzle_mask = 0b10110001);
arg0 = nir_quad_swizzle_amd(b, arg0, .swizzle_mask = 0b10110001, .fetch_inactive = true);
/* swap even lanes between arg0 and arg1 */
nir_def *tid = nir_load_subgroup_invocation(b);
@ -699,7 +699,7 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first
arg1 = nir_bcsel(b, is_even, tmp, arg1);
/* swap odd,even lanes again for arg0 */
arg0 = nir_quad_swizzle_amd(b, arg0, .swizzle_mask = 0b10110001);
arg0 = nir_quad_swizzle_amd(b, arg0, .swizzle_mask = 0b10110001, .fetch_inactive = true);
arg0_vec[i] = arg0;
arg1_vec[i] = arg1;

View file

@ -224,6 +224,9 @@ index("nir_alu_type", "dest_type")
# The swizzle mask for quad_swizzle_amd & masked_swizzle_amd
index("unsigned", "swizzle_mask")
# Allow FI=1 for quad_swizzle_amd & masked_swizzle_amd
index("bool", "fetch_inactive")
# Offsets for load_shared2_amd/store_shared2_amd
index("uint8_t", "offset0")
index("uint8_t", "offset1")
@ -509,9 +512,9 @@ intrinsic("exclusive_scan", src_comp=[0], dest_comp=0, bit_sizes=src0,
# AMD shader ballot operations
intrinsic("quad_swizzle_amd", src_comp=[0], dest_comp=0, bit_sizes=src0,
indices=[SWIZZLE_MASK], flags=[CAN_ELIMINATE])
indices=[SWIZZLE_MASK, FETCH_INACTIVE], flags=[CAN_ELIMINATE])
intrinsic("masked_swizzle_amd", src_comp=[0], dest_comp=0, bit_sizes=src0,
indices=[SWIZZLE_MASK], flags=[CAN_ELIMINATE])
indices=[SWIZZLE_MASK, FETCH_INACTIVE], flags=[CAN_ELIMINATE])
intrinsic("write_invocation_amd", src_comp=[0, 0, 1], dest_comp=0, bit_sizes=src0,
flags=[CAN_ELIMINATE])
# src = [ mask, addition ]

View file

@ -198,7 +198,8 @@ lower_shuffle_to_swizzle(nir_builder *b, nir_intrinsic_instr *intrin)
return NULL;
return nir_masked_swizzle_amd(b, intrin->src[0].ssa,
.swizzle_mask = (mask << 10) | 0x1f);
.swizzle_mask = (mask << 10) | 0x1f,
.fetch_inactive = true);
}
/* Lowers "specialized" shuffles to a generic nir_intrinsic_shuffle. */