diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c index 9deac4f66c6..248cc92f9f1 100644 --- a/src/amd/common/ac_shader_util.c +++ b/src/amd/common/ac_shader_util.c @@ -96,6 +96,9 @@ void ac_set_nir_options(struct radeon_info *info, bool use_llvm, options->support_16bit_alu = info->gfx_level >= GFX8; options->vectorize_vec2_16bit = info->has_packed_math_16bit; options->discard_is_demote = true; + options->io_options = nir_io_has_flexible_input_interpolation_except_flat | + nir_io_prefer_scalar_fs_inputs | + nir_io_mix_convergent_flat_with_interpolated; } bool diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 46aa72f2e3e..e98fbe4be5a 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2548,9 +2548,7 @@ input_mask_to_ps_inputs(const struct radv_vs_output_info *outinfo, const struct enum radv_ps_in_type type = default_type; - if (ps->info.ps.flat_shaded_mask & BITFIELD_BIT(*ps_offset)) - type = radv_ps_in_flat; - else if (ps->info.ps.explicit_shaded_mask & BITFIELD_BIT(*ps_offset)) + if (ps->info.ps.explicit_shaded_mask & BITFIELD_BIT(*ps_offset)) type = radv_ps_in_explicit; else if (ps->info.ps.explicit_strict_shaded_mask & BITFIELD_BIT(*ps_offset)) type = radv_ps_in_explicit_strict; @@ -2558,6 +2556,8 @@ input_mask_to_ps_inputs(const struct radv_vs_output_info *outinfo, const struct type = radv_ps_in_interpolated_fp16_hi; else if (ps->info.ps.float16_shaded_mask & BITFIELD_BIT(*ps_offset)) type = radv_ps_in_interpolated_fp16; + else if (ps->info.ps.float32_shaded_mask & BITFIELD_BIT(*ps_offset)) + type = radv_ps_in_interpolated; ps_input_cntl[*ps_offset] = offset_to_ps_input(vs_offset, type); ++(*ps_offset); @@ -2599,7 +2599,7 @@ radv_emit_ps_inputs(struct radv_cmd_buffer *cmd_buffer) if (ps->info.ps.input_clips_culls_mask & 0xf0) slot_to_ps_input(outinfo, VARYING_SLOT_CLIP_DIST1, ps_input_cntl, &ps_offset, false, radv_ps_in_interpolated); - input_mask_to_ps_inputs(outinfo, ps, ps->info.ps.input_mask, ps_input_cntl, &ps_offset, radv_ps_in_interpolated); + input_mask_to_ps_inputs(outinfo, ps, ps->info.ps.input_mask, ps_input_cntl, &ps_offset, radv_ps_in_flat); /* Per-primitive PS inputs: the HW needs these to be last. */ if (mesh) { diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 742e6f3dc4f..d3c6d0dea69 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -88,9 +88,7 @@ gather_load_fs_input_info(const nir_shader *nir, const nir_intrinsic_instr *intr const bool per_primitive = nir->info.per_primitive_inputs & BITFIELD64_BIT(location); if (!per_primitive) { - if (intrin->intrinsic == nir_intrinsic_load_input) { - info->ps.flat_shaded_mask |= mapped_mask; - } else if (intrin->intrinsic == nir_intrinsic_load_input_vertex) { + if (intrin->intrinsic == nir_intrinsic_load_input_vertex) { if (io_sem.interp_explicit_strict) info->ps.explicit_strict_shaded_mask |= mapped_mask; else @@ -100,6 +98,8 @@ gather_load_fs_input_info(const nir_shader *nir, const nir_intrinsic_instr *intr info->ps.float16_hi_shaded_mask |= mapped_mask; else info->ps.float16_shaded_mask |= mapped_mask; + } else if (intrin->intrinsic == nir_intrinsic_load_interpolated_input) { + info->ps.float32_shaded_mask |= mapped_mask; } } diff --git a/src/amd/vulkan/radv_shader_info.h b/src/amd/vulkan/radv_shader_info.h index e072c9ffd5b..3b2448fd0bc 100644 --- a/src/amd/vulkan/radv_shader_info.h +++ b/src/amd/vulkan/radv_shader_info.h @@ -175,7 +175,7 @@ struct radv_shader_info { uint8_t input_clips_culls_mask; uint32_t input_mask; uint32_t input_per_primitive_mask; - uint32_t flat_shaded_mask; + uint32_t float32_shaded_mask; uint32_t explicit_shaded_mask; uint32_t explicit_strict_shaded_mask; uint32_t float16_shaded_mask; diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 72a96d6fc4a..39257f9c0e9 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -1655,10 +1655,8 @@ void si_init_screen_get_functions(struct si_screen *sscreen) * when execution mode is rtz instead of rtne. */ options->force_f2f16_rtz = true; - options->io_options = nir_io_has_flexible_input_interpolation_except_flat | - nir_io_prefer_scalar_fs_inputs | - nir_io_glsl_lower_derefs | - (sscreen->options.optimize_io ? nir_io_glsl_opt_varyings : 0); + options->io_options |= nir_io_glsl_lower_derefs | + (sscreen->options.optimize_io ? nir_io_glsl_opt_varyings : 0); options->lower_mediump_io = sscreen->info.gfx_level >= GFX8 && sscreen->options.fp16 ? si_lower_mediump_io : NULL; /* HW supports indirect indexing for: | Enabled in driver diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c index 61885fc2294..2953e0a9b98 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_info.c @@ -298,9 +298,9 @@ static void scan_io_usage(const nir_shader *nir, struct si_shader_info *info, info->input[loc].semantic = semantic + i; - if (semantic == VARYING_SLOT_PRIMITIVE_ID) - info->input[loc].interpolate = INTERP_MODE_FLAT; - else + /* "interpolate" starts out as FLAT. The first seen load_interpolated_input overwrites it. */ + if (semantic != VARYING_SLOT_PRIMITIVE_ID && + info->input[loc].interpolate == INTERP_MODE_FLAT) info->input[loc].interpolate = interp; if (mask) { @@ -660,6 +660,12 @@ void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir, * conditions are met. */ info->writes_1_if_tex_is_1 = nir->info.writes_memory ? 0 : 0xff; + + /* Initialize all FS inputs to flat. If we see load_interpolated_input for any component, + * it will be changed to its interp mode. + */ + for (unsigned i = 0; i < ARRAY_SIZE(info->input); i++) + info->input[i].interpolate = INTERP_MODE_FLAT; } info->constbuf0_num_slots = nir->num_uniforms;