diff --git a/src/amd/common/ac_nir_lower_ps.c b/src/amd/common/ac_nir_lower_ps.c index dc345946fa3..41450aaa30d 100644 --- a/src/amd/common/ac_nir_lower_ps.c +++ b/src/amd/common/ac_nir_lower_ps.c @@ -688,7 +688,7 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first nir_def *arg1 = nir_channel(b, mrt1_arg, i); /* swap odd,even lanes of arg0 */ - arg0 = nir_quad_swizzle_amd(b, arg0, .swizzle_mask = 0b10110001); + arg0 = nir_quad_swizzle_amd(b, arg0, .swizzle_mask = 0b10110001, .fetch_inactive = true); /* swap even lanes between arg0 and arg1 */ nir_def *tid = nir_load_subgroup_invocation(b); @@ -699,7 +699,7 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first arg1 = nir_bcsel(b, is_even, tmp, arg1); /* swap odd,even lanes again for arg0 */ - arg0 = nir_quad_swizzle_amd(b, arg0, .swizzle_mask = 0b10110001); + arg0 = nir_quad_swizzle_amd(b, arg0, .swizzle_mask = 0b10110001, .fetch_inactive = true); arg0_vec[i] = arg0; arg1_vec[i] = arg1; diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 4bc57a7613a..fb31c13e41b 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -224,6 +224,9 @@ index("nir_alu_type", "dest_type") # The swizzle mask for quad_swizzle_amd & masked_swizzle_amd index("unsigned", "swizzle_mask") +# Allow FI=1 for quad_swizzle_amd & masked_swizzle_amd +index("bool", "fetch_inactive") + # Offsets for load_shared2_amd/store_shared2_amd index("uint8_t", "offset0") index("uint8_t", "offset1") @@ -509,9 +512,9 @@ intrinsic("exclusive_scan", src_comp=[0], dest_comp=0, bit_sizes=src0, # AMD shader ballot operations intrinsic("quad_swizzle_amd", src_comp=[0], dest_comp=0, bit_sizes=src0, - indices=[SWIZZLE_MASK], flags=[CAN_ELIMINATE]) + indices=[SWIZZLE_MASK, FETCH_INACTIVE], flags=[CAN_ELIMINATE]) intrinsic("masked_swizzle_amd", src_comp=[0], dest_comp=0, bit_sizes=src0, - indices=[SWIZZLE_MASK], flags=[CAN_ELIMINATE]) + indices=[SWIZZLE_MASK, FETCH_INACTIVE], flags=[CAN_ELIMINATE]) intrinsic("write_invocation_amd", src_comp=[0, 0, 1], dest_comp=0, bit_sizes=src0, flags=[CAN_ELIMINATE]) # src = [ mask, addition ] diff --git a/src/compiler/nir/nir_lower_subgroups.c b/src/compiler/nir/nir_lower_subgroups.c index 7c8aacf9318..e679a7a7e9a 100644 --- a/src/compiler/nir/nir_lower_subgroups.c +++ b/src/compiler/nir/nir_lower_subgroups.c @@ -198,7 +198,8 @@ lower_shuffle_to_swizzle(nir_builder *b, nir_intrinsic_instr *intrin) return NULL; return nir_masked_swizzle_amd(b, intrin->src[0].ssa, - .swizzle_mask = (mask << 10) | 0x1f); + .swizzle_mask = (mask << 10) | 0x1f, + .fetch_inactive = true); } /* Lowers "specialized" shuffles to a generic nir_intrinsic_shuffle. */