diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 380c153f0dd..8ac4ee3f5e6 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1589,8 +1589,12 @@ intrinsic("store_hit_attrib_amd", src_comp=[1], indices=[BASE]) # Load forced VRS rates. intrinsic("load_force_vrs_rates_amd", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE, CAN_REORDER]) -intrinsic("load_scalar_arg_amd", dest_comp=0, bit_sizes=[32], indices=[BASE, ARG_UPPER_BOUND_U32_AMD], flags=[CAN_ELIMINATE, CAN_REORDER]) -intrinsic("load_vector_arg_amd", dest_comp=0, bit_sizes=[32], indices=[BASE, ARG_UPPER_BOUND_U32_AMD], flags=[CAN_ELIMINATE, CAN_REORDER]) +intrinsic("load_scalar_arg_amd", dest_comp=0, bit_sizes=[32], + indices=[BASE, ARG_UPPER_BOUND_U32_AMD], + flags=[CAN_ELIMINATE, CAN_REORDER]) +intrinsic("load_vector_arg_amd", dest_comp=0, bit_sizes=[32], + indices=[BASE, ARG_UPPER_BOUND_U32_AMD, FLAGS], + flags=[CAN_ELIMINATE, CAN_REORDER]) store("scalar_arg_amd", [], [BASE]) store("vector_arg_amd", [], [BASE]) diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index e57d9590be6..c96572bd5a9 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -616,9 +616,15 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s nir_def *color[4]; for (int i = 0; i < 4; i++) { - color[i] = colors_read & BITFIELD_BIT(start + i) ? - ac_nir_load_arg_at_offset(b, &args->ac, args->color_start, offset++) : - nir_undef(b, 1, 32); + if (colors_read & BITFIELD_BIT(start + i)) { + color[i] = ac_nir_load_arg_at_offset(b, &args->ac, args->color_start, offset++); + + nir_intrinsic_set_flags(nir_instr_as_intrinsic(color[i]->parent_instr), + SI_VECTOR_ARG_IS_COLOR | + SI_VECTOR_ARG_COLOR_COMPONENT(start + i)); + } else { + color[i] = nir_undef(b, 1, 32); + } } replacement = nir_vec(b, color, 4); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 047ffc5c0a6..64f478b06d6 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -137,6 +137,10 @@ struct nir_lower_subgroups_options; /* D3D9 behaviour for COLOR0 requires 0001. GL is undefined. */ #define SI_PS_INPUT_CNTL_UNUSED_COLOR0 SI_PS_INPUT_CNTL_0001 +#define SI_VECTOR_ARG_IS_COLOR BITFIELD_BIT(0) +#define SI_VECTOR_ARG_COLOR_COMPONENT(x) (((x) & 0x7) << 1) +#define SI_GET_VECTOR_ARG_COLOR_COMPONENT(x) (((x) >> 1) & 0x7) + /* SGPR user data indices */ enum { diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c index b6af2badffc..66fd766be97 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_info.c @@ -526,6 +526,14 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info } break; } + case nir_intrinsic_load_vector_arg_amd: + /* Non-monolithic lowered PS can have this. We need to record color usage. */ + if (nir_intrinsic_flags(intr) & SI_VECTOR_ARG_IS_COLOR) { + /* The channel can be between 0 and 7. */ + unsigned chan = SI_GET_VECTOR_ARG_COLOR_COMPONENT(nir_intrinsic_flags(intr)); + info->colors_read |= BITFIELD_BIT(chan); + } + break; case nir_intrinsic_load_barycentric_at_offset: /* uses center */ case nir_intrinsic_load_barycentric_at_sample: /* uses center */ if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_FLAT)