diff --git a/src/amd/common/nir/ac_nir.h b/src/amd/common/nir/ac_nir.h index db67fdfdaae..acf4fc509f1 100644 --- a/src/amd/common/nir/ac_nir.h +++ b/src/amd/common/nir/ac_nir.h @@ -275,7 +275,21 @@ typedef struct { bool load_sample_positions_always_loads_current_ones; bool dynamic_rasterization_samples; int force_front_face; /* 0 -> keep, 1 -> set to true, -1 -> set to false */ - unsigned ps_iter_samples; /* >= 2 forces sample interpolation, affects sample_mask_in lowering */ + + /* barycentrics: + * ps_iter_samples >= 2: + * * All barycentrics are changed to per-sample interpolation except at_offset/at_sample. + * * barycentric_at_sample(sample_id) is replaced by barycentric_sample. + * + * sample_mask_in: + * ps_iter_samples == 2, 4: + * * sample_mask_in is changed to (sample_mask_in & (ps_iter_mask << sample_id)) + * ps_iter_samples == 8: + * * sample_mask_in is replaced by 1 << sample_id. + * + * When ps_iter_samples is equal to rasterization samples, set ps_iter_samples = 8 for this pass. + */ + unsigned ps_iter_samples; /* fbfetch_output */ bool fbfetch_is_1D; diff --git a/src/amd/common/nir/ac_nir_lower_ps_early.c b/src/amd/common/nir/ac_nir_lower_ps_early.c index f716df0dae3..0aab96a94ac 100644 --- a/src/amd/common/nir/ac_nir_lower_ps_early.c +++ b/src/amd/common/nir/ac_nir_lower_ps_early.c @@ -383,19 +383,26 @@ lower_ps_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, void *state) return true; case nir_intrinsic_load_barycentric_at_sample: { unsigned mode = nir_intrinsic_interp_mode(intrin); + nir_def *sample_id = intrin->src[0].ssa; if (s->options->force_center_interp_no_msaa) { nir_def_replace(&intrin->def, nir_load_barycentric_pixel(b, 32, .interp_mode = mode)); return true; } + if (s->options->ps_iter_samples >= 2 && + sample_id->parent_instr->type == nir_instr_type_intrinsic && + nir_instr_as_intrinsic(sample_id->parent_instr)->intrinsic == nir_intrinsic_load_sample_id) { + nir_def_replace(&intrin->def, nir_load_barycentric_sample(b, 32, .interp_mode = mode)); + return true; + } + /* If load_sample_positions_always_loads_current_ones is true, load_sample_positions_amd * always loads the sample positions that are currently set in the rasterizer state * even if MSAA is disabled. */ nir_def *num_samples = s->options->load_sample_positions_always_loads_current_ones ? nir_undef(b, 1, 32) : nir_load_rasterization_samples_amd(b); - nir_def *sample_id = intrin->src[0].ssa; nir_def *sample_pos = nir_load_sample_positions_amd(b, 32, sample_id, num_samples); sample_pos = nir_fadd_imm(b, sample_pos, -0.5f); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index e13b35b82ed..856e3141338 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2487,11 +2487,11 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_ key->ps.mono.interpolate_at_sample_force_center, .load_sample_positions_always_loads_current_ones = true, .force_front_face = key->ps.opt.force_front_face_input, - /* This forces per-sample interpolation (if at least 2) and lowers sample_mask_in. */ + /* This does a lot of things. See the description in ac_nir_lower_ps_early_options. */ .ps_iter_samples = key->ps.part.prolog.samplemask_log_ps_iter ? (1 << key->ps.part.prolog.samplemask_log_ps_iter) : (key->ps.part.prolog.force_persp_sample_interp || - key->ps.part.prolog.force_linear_sample_interp ? 2 : 1), + key->ps.part.prolog.force_linear_sample_interp ? 2 : 0), .fbfetch_is_1D = key->ps.mono.fbfetch_is_1D, .fbfetch_layered = key->ps.mono.fbfetch_layered,