diff --git a/src/compiler/nir/nir_recompute_io_bases.c b/src/compiler/nir/nir_recompute_io_bases.c index 38ef4492d2a..d7fc129b410 100644 --- a/src/compiler/nir/nir_recompute_io_bases.c +++ b/src/compiler/nir/nir_recompute_io_bases.c @@ -93,13 +93,27 @@ nir_recompute_io_bases(nir_shader *nir, nir_variable_mode modes) if (mode == nir_var_shader_in) { for (unsigned i = 0; i < num_slots; i++) { - if (intr->intrinsic == nir_intrinsic_load_per_primitive_input) - BITSET_SET(per_prim_inputs, sem.location + i); + unsigned location = sem.location + i; + /* GPU like AMD require per primitive inputs come after per + * vertex inputs. + */ + if (intr->intrinsic == nir_intrinsic_load_per_primitive_input || + /* Some fragment shader input varying is per vertex when vertex + * pipeline, per primitive when mesh pipeline. In order to share + * the same fragment shader code, we move these varyings after + * other per vertex varyings by handling them like per primitive + * varyings here. + */ + (nir->info.stage == MESA_SHADER_FRAGMENT && + (location == VARYING_SLOT_PRIMITIVE_ID || + location == VARYING_SLOT_VIEWPORT || + location == VARYING_SLOT_LAYER))) + BITSET_SET(per_prim_inputs, location); else - BITSET_SET(inputs, sem.location + i); + BITSET_SET(inputs, location); if (sem.high_dvec2) - BITSET_SET(dual_slot_inputs, sem.location + i); + BITSET_SET(dual_slot_inputs, location); } } else if (!sem.dual_source_blend_index) { for (unsigned i = 0; i < num_slots; i++) @@ -126,7 +140,7 @@ nir_recompute_io_bases(nir_shader *nir, nir_variable_mode modes) num_slots = (num_slots + sem.high_16bits + 1) / 2; if (mode == nir_var_shader_in) { - if (intr->intrinsic == nir_intrinsic_load_per_primitive_input) { + if (BITSET_TEST(per_prim_inputs, sem.location)){ nir_intrinsic_set_base(intr, num_normal_inputs + BITSET_PREFIX_SUM(per_prim_inputs, sem.location));