diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 2628746a302..1ee29568790 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -433,17 +433,17 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
 			break;
 		case MESA_SHADER_TESS_CTRL:
 			NIR_PASS_V(s, ir3_nir_lower_tess_ctrl, so, so->key.tessellation);
-			NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so->shader->compiler);
+			NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so);
 			progress = true;
 			break;
 		case MESA_SHADER_TESS_EVAL:
-			NIR_PASS_V(s, ir3_nir_lower_tess_eval, so->key.tessellation);
+			NIR_PASS_V(s, ir3_nir_lower_tess_eval, so, so->key.tessellation);
 			if (so->key.has_gs)
 				NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, so, so->key.tessellation);
 			progress = true;
 			break;
 		case MESA_SHADER_GEOMETRY:
-			NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so->shader->compiler);
+			NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so);
 			progress = true;
 			break;
 		default:
@@ -694,12 +694,12 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
 		constoff = align(constoff - 1, 4) + 3;
 		const_state->offsets.primitive_param = constoff;
 		const_state->offsets.primitive_map = constoff + 5;
-		constoff += 5 + DIV_ROUND_UP(nir->num_inputs, 4);
+		constoff += 5 + DIV_ROUND_UP(v->input_size, 4);
 		break;
 	case MESA_SHADER_GEOMETRY:
 		const_state->offsets.primitive_param = constoff;
 		const_state->offsets.primitive_map = constoff + 1;
-		constoff += 1 + DIV_ROUND_UP(nir->num_inputs, 4);
+		constoff += 1 + DIV_ROUND_UP(v->input_size, 4);
 		break;
 	default:
 		break;
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h
index e9fe495767b..a6ec1440e9b 100644
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -46,9 +46,9 @@ bool ir3_nir_lower_tex_prefetch(nir_shader *shader);
 
 void ir3_nir_lower_to_explicit_output(nir_shader *shader,
 		struct ir3_shader_variant *v, unsigned topology);
-void ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_compiler *compiler);
+void ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_shader_variant *v);
 void ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology);
-void ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology);
+void ir3_nir_lower_tess_eval(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology);
 void ir3_nir_lower_gs(nir_shader *shader);
 
 const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
@@ -65,10 +65,6 @@ bool ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v);
 nir_ssa_def *
 ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_ssa_def *offset, int32_t shift);
 
-uint32_t ir3_link_geometry_stages(const struct ir3_shader_variant *producer,
-		const struct ir3_shader_variant *consumer,
-		uint32_t *locs);
-
 static inline nir_intrinsic_instr *
 ir3_bindless_resource(nir_src src)
 {
diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c
index 44b2921dcb7..d54f9a4c928 100644
--- a/src/freedreno/ir3/ir3_nir_lower_tess.c
+++ b/src/freedreno/ir3/ir3_nir_lower_tess.c
@@ -30,7 +30,6 @@ struct state {
 
 	struct primitive_map {
 		unsigned loc[32];
-		unsigned size[32];
 		unsigned stride;
 	} map;
 
@@ -73,45 +72,65 @@ build_local_primitive_id(nir_builder *b, struct state *state)
 	return bitfield_extract(b, state->header, state->local_primitive_id_start, 63);
 }
 
-static nir_variable *
-get_var(nir_shader *shader, nir_variable_mode mode, int driver_location)
+static bool
+is_tess_levels(gl_varying_slot slot)
 {
-	nir_foreach_variable_with_modes (v, shader, mode) {
-		if (v->data.driver_location == driver_location) {
-			return v;
-		}
-	}
-
-	return NULL;
+	return (slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
+			slot == VARYING_SLOT_TESS_LEVEL_INNER);
 }
 
-static bool
-is_tess_levels(nir_variable *var)
+/* Return a deterministic index for varyings. We can't rely on driver_location
+ * to be correct without linking the different stages first, so we create
+ * "primitive maps" where the producer decides on the location of each varying
+ * slot and then exports a per-slot array to the consumer. This compacts the
+ * gl_varying_slot space down a bit so that the primitive maps aren't too
+ * large.
+ *
+ * Note: per-patch varyings are currently handled separately, without any
+ * compacting.
+ *
+ * TODO: We could probably use the driver_location's directly in the non-SSO
+ * (Vulkan) case.
+ */
+
+static unsigned
+shader_io_get_unique_index(gl_varying_slot slot)
 {
-	return (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
-			var->data.location == VARYING_SLOT_TESS_LEVEL_INNER);
+	if (slot == VARYING_SLOT_POS)
+		return 0;
+	if (slot == VARYING_SLOT_PSIZ)
+		return 1;
+	if (slot == VARYING_SLOT_CLIP_DIST0)
+		return 2;
+	if (slot == VARYING_SLOT_CLIP_DIST1)
+		return 3;
+	if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
+		return 4 + (slot - VARYING_SLOT_VAR0);
+	unreachable("illegal slot in get unique index\n");
 }
 
 static nir_ssa_def *
 build_local_offset(nir_builder *b, struct state *state,
-		nir_ssa_def *vertex, uint32_t base, nir_ssa_def *offset)
+		nir_ssa_def *vertex, uint32_t location, uint32_t comp, nir_ssa_def *offset)
 {
 	nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b);
 	nir_ssa_def *primitive_offset =
 		nir_imul24(b, build_local_primitive_id(b, state), primitive_stride);
 	nir_ssa_def *attr_offset;
 	nir_ssa_def *vertex_stride;
+	unsigned index = shader_io_get_unique_index(location);
 
 	switch (b->shader->info.stage) {
 	case MESA_SHADER_VERTEX:
 	case MESA_SHADER_TESS_EVAL:
 		vertex_stride = nir_imm_int(b, state->map.stride * 4);
-		attr_offset = nir_imm_int(b, state->map.loc[base] * 4);
+		attr_offset = nir_imm_int(b, state->map.loc[index] + 4 * comp);
 		break;
 	case MESA_SHADER_TESS_CTRL:
 	case MESA_SHADER_GEOMETRY:
 		vertex_stride = nir_load_vs_vertex_stride_ir3(b);
-		attr_offset = nir_load_primitive_location_ir3(b, base);
+		attr_offset = nir_iadd(b, nir_load_primitive_location_ir3(b, index),
+							   nir_imm_int(b, comp * 4));
 		break;
 	default:
 		unreachable("bad shader stage");
@@ -120,7 +139,7 @@ build_local_offset(nir_builder *b, struct state *state,
 	nir_ssa_def *vertex_offset = nir_imul24(b, vertex, vertex_stride);
 
 	return nir_iadd(b, nir_iadd(b, primitive_offset, vertex_offset),
-			nir_iadd(b, attr_offset, offset));
+			nir_iadd(b, attr_offset, nir_ishl(b, offset, nir_imm_int(b, 4))));
 }
 
 static nir_intrinsic_instr *
@@ -153,37 +172,58 @@ replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
 }
 
 static void
-build_primitive_map(nir_shader *shader, nir_variable_mode mode, struct primitive_map *map)
+build_primitive_map(nir_shader *shader, struct primitive_map *map)
 {
-	nir_foreach_variable_with_modes (var, shader, mode) {
-		switch (var->data.location) {
-		case VARYING_SLOT_TESS_LEVEL_OUTER:
-		case VARYING_SLOT_TESS_LEVEL_INNER:
-			continue;
-		}
-
-		unsigned size = glsl_count_attribute_slots(var->type, false) * 4;
-
-		assert(var->data.driver_location < ARRAY_SIZE(map->size));
-		map->size[var->data.driver_location] =
-			MAX2(map->size[var->data.driver_location], size);
+	/* All interfaces except the TCS <-> TES interface use ldlw, which takes
+	 * an offset in bytes, so each vec4 slot is 16 bytes. TCS <-> TES uses
+	 * ldg, which takes an offset in dwords, but each per-vertex slot has
+	 * space for every vertex, and there's space at the beginning for
+	 * per-patch varyings.
+	 */
+	unsigned slot_size = 16, start = 0;
+	if (shader->info.stage == MESA_SHADER_TESS_CTRL) {
+		slot_size = shader->info.tess.tcs_vertices_out * 4;
+		start = util_last_bit(shader->info.patch_outputs_written) * 4;
 	}
 
-	unsigned loc = 0;
-	for (uint32_t i = 0; i < ARRAY_SIZE(map->size); i++) {
-		if (map->size[i] == 0)
-				continue;
-		nir_variable *var = get_var(shader, mode, i);
-		map->loc[i] = loc;
-		loc += map->size[i];
+	uint64_t mask = shader->info.outputs_written;
+	unsigned loc = start;
+	while (mask) {
+		int location = u_bit_scan64(&mask);
+		if (is_tess_levels(location))
+			continue;
 
-		if (var->data.patch)
-			map->size[i] = 0;
-		else
-			map->size[i] = map->size[i] / glsl_get_length(var->type);
+		unsigned index = shader_io_get_unique_index(location);
+		map->loc[index] = loc;
+		loc += slot_size;
 	}
 
 	map->stride = loc;
+	/* Use units of dwords for the stride. */
+	if (shader->info.stage != MESA_SHADER_TESS_CTRL)
+		map->stride /= 4;
+}
+
+/* For shader stages that receive a primitive map, calculate how big it should
+ * be.
+ */
+
+static unsigned
+calc_primitive_map_size(nir_shader *shader)
+{
+	uint64_t mask = shader->info.inputs_read;
+	unsigned max_index = 0;
+	while (mask) {
+		int location = u_bit_scan64(&mask);
+
+		if (is_tess_levels(location))
+			continue;
+
+		unsigned index = shader_io_get_unique_index(location);
+		max_index = MAX2(max_index, index + 1);
+	}
+	
+	return max_index;
 }
 
 static void
@@ -209,7 +249,9 @@ lower_block_to_explicit_output(nir_block *block, nir_builder *b, struct state *s
 			b->cursor = nir_instr_remove(&intr->instr);
 
 			nir_ssa_def *vertex_id = build_vertex_id(b, state);
-			nir_ssa_def *offset = build_local_offset(b, state, vertex_id, nir_intrinsic_base(intr),
+			nir_ssa_def *offset = build_local_offset(b, state, vertex_id,
+					nir_intrinsic_io_semantics(intr).location,
+					nir_intrinsic_component(intr),
 					intr->src[1].ssa);
 			nir_intrinsic_instr *store =
 				nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
@@ -240,7 +282,7 @@ ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader_variant *
 {
 	struct state state = { };
 
-	build_primitive_map(shader, nir_var_shader_out, &state.map);
+	build_primitive_map(shader, &state.map);
 	memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
 
 	nir_function_impl *impl = nir_shader_get_entrypoint(shader);
@@ -282,7 +324,8 @@ lower_block_to_explicit_input(nir_block *block, nir_builder *b, struct state *st
 
 			nir_ssa_def *offset = build_local_offset(b, state,
 					intr->src[0].ssa, // this is typically gl_InvocationID
-					nir_intrinsic_base(intr),
+					nir_intrinsic_io_semantics(intr).location,
+					nir_intrinsic_component(intr),
 					intr->src[1].ssa);
 
 			replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
@@ -305,14 +348,14 @@ lower_block_to_explicit_input(nir_block *block, nir_builder *b, struct state *st
 }
 
 void
-ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_compiler *compiler)
+ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_shader_variant *v)
 {
  	struct state state = { };
 
 	/* when using stl/ldl (instead of stlw/ldlw) for linking VS and HS,
 	 * HS uses a different primitive id, which starts at bit 16 in the header
 	 */
-	if (shader->info.stage == MESA_SHADER_TESS_CTRL && compiler->tess_use_shared)
+	if (shader->info.stage == MESA_SHADER_TESS_CTRL && v->shader->compiler->tess_use_shared)
 		state.local_primitive_id_start = 16;
 
 	nir_function_impl *impl = nir_shader_get_entrypoint(shader);
@@ -329,43 +372,74 @@ ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_compiler *compile
 
 	nir_foreach_block_safe (block, impl)
 		lower_block_to_explicit_input(block, &b, &state);
+
+	v->input_size = calc_primitive_map_size(shader);
 }
 
+static nir_ssa_def *
+build_tcs_out_vertices(nir_builder *b)
+{
+	if (b->shader->info.stage == MESA_SHADER_TESS_CTRL)
+		return nir_imm_int(b, b->shader->info.tess.tcs_vertices_out);
+	else
+		return nir_load_patch_vertices_in(b);
+}
 
 static nir_ssa_def *
 build_per_vertex_offset(nir_builder *b, struct state *state,
-		nir_ssa_def *vertex, nir_ssa_def *offset, nir_variable *var)
+		nir_ssa_def *vertex, uint32_t location, uint32_t comp, nir_ssa_def *offset)
 {
 	nir_ssa_def *primitive_id = nir_load_primitive_id(b);
 	nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b);
 	nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, patch_stride);
 	nir_ssa_def *attr_offset;
-	int loc = var->data.driver_location;
 
-	switch (b->shader->info.stage) {
-	case MESA_SHADER_TESS_CTRL:
-		attr_offset = nir_imm_int(b, state->map.loc[loc]);
-		break;
-	case MESA_SHADER_TESS_EVAL:
-		attr_offset = nir_load_primitive_location_ir3(b, loc);
-		break;
-	default:
-		unreachable("bad shader state");
+	if (nir_src_is_const(nir_src_for_ssa(offset))) {
+		location += nir_src_as_uint(nir_src_for_ssa(offset));
+		offset = nir_imm_int(b, 0);
+	} else {
+		/* Offset is in vec4's, but we need it in unit of components for the
+		 * load/store_global_ir3 offset.
+		 */
+		offset = nir_ishl(b, offset, nir_imm_int(b, 2));
 	}
 
-	nir_ssa_def *attr_stride = nir_imm_int(b, state->map.size[loc]);
-	nir_ssa_def *vertex_offset = nir_imul24(b, vertex, attr_stride);
+	nir_ssa_def *vertex_offset;
+	if (vertex) {
+		unsigned index = shader_io_get_unique_index(location);
+		switch (b->shader->info.stage) {
+		case MESA_SHADER_TESS_CTRL:
+			attr_offset = nir_imm_int(b, state->map.loc[index] + comp);
+			break;
+		case MESA_SHADER_TESS_EVAL:
+			attr_offset =
+				nir_iadd(b, nir_load_primitive_location_ir3(b, index),
+						 nir_imm_int(b, comp));
+			break;
+		default:
+			unreachable("bad shader state");
+		}
 
-	return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset),
-			nir_iadd(b, vertex_offset, nir_ishl(b, offset, nir_imm_int(b, 2))));
+		attr_offset = nir_iadd(b, attr_offset,
+							   nir_imul24(b, offset,
+										  build_tcs_out_vertices(b)));
+		vertex_offset = nir_ishl(b, vertex, nir_imm_int(b, 2));
+	} else {
+		assert(location >= VARYING_SLOT_PATCH0 &&
+			   location <= VARYING_SLOT_TESS_MAX);
+		unsigned index = location - VARYING_SLOT_PATCH0;
+		attr_offset = nir_iadd(b, nir_imm_int(b, index * 4 + comp), offset);
+		vertex_offset = nir_imm_int(b, 0);
+	}
+
+	return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset), vertex_offset);
 }
 
 static nir_ssa_def *
-build_patch_offset(nir_builder *b, struct state *state, nir_ssa_def *offset, nir_variable *var)
+build_patch_offset(nir_builder *b, struct state *state,
+		uint32_t base, uint32_t comp, nir_ssa_def *offset)
 {
-	debug_assert(var && var->data.patch);
-
-	return build_per_vertex_offset(b, state, nir_imm_int(b, 0), offset, var);
+	return build_per_vertex_offset(b, state, NULL, base, comp, offset);
 }
 
 static void
@@ -444,9 +518,11 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
 			b->cursor = nir_before_instr(&intr->instr);
 
 			nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
-			nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
 			nir_ssa_def *offset = build_per_vertex_offset(b, state,
-					intr->src[0].ssa, intr->src[1].ssa, var);
+					intr->src[0].ssa,
+					nir_intrinsic_io_semantics(intr).location,
+					nir_intrinsic_component(intr),
+				   	intr->src[1].ssa);
 
 			replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
 			break;
@@ -462,12 +538,13 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
 
 			nir_ssa_def *value = intr->src[0].ssa;
 			nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
-			nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
 			nir_ssa_def *offset = build_per_vertex_offset(b, state,
-					intr->src[1].ssa, intr->src[2].ssa, var);
+					intr->src[1].ssa,
+					nir_intrinsic_io_semantics(intr).location,
+					nir_intrinsic_component(intr),
+					intr->src[2].ssa);
 
-			replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
-					nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
+			replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address, offset);
 
 			break;
 		}
@@ -475,8 +552,6 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
 		case nir_intrinsic_load_output: {
 			// src[] = { offset }.
 
-			nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
-
 			b->cursor = nir_before_instr(&intr->instr);
 
 			nir_ssa_def *address, *offset;
@@ -486,13 +561,17 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
 			 * are never used. most likely some issue with (sy) not properly
 			 * syncing with values coming from a second memory transaction.
 			 */
-			if (is_tess_levels(var)) {
+			gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
+			if (is_tess_levels(location)) {
 				assert(intr->dest.ssa.num_components == 1);
 				address = nir_load_tess_factor_base_ir3(b);
-				offset = build_tessfactor_base(b, var->data.location, state);
+				offset = build_tessfactor_base(b, location, state);
 			} else {
 				address = nir_load_tess_param_base_ir3(b);
-				offset = build_patch_offset(b, state, intr->src[0].ssa, var);
+				offset = build_patch_offset(b, state,
+											location,
+											nir_intrinsic_component(intr),
+											intr->src[0].ssa);
 			}
 
 			replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
@@ -504,14 +583,13 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
 
 			/* write patch output to bo */
 
-			nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
-
 			b->cursor = nir_before_instr(&intr->instr);
 
 			/* sparse writemask not supported */
 			assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
 
-			if (is_tess_levels(var)) {
+			gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
+			if (is_tess_levels(location)) {
 				/* with tess levels are defined as float[4] and float[2],
 				 * but tess factor BO has smaller sizes for tris/isolines,
 				 * so we have to discard any writes beyond the number of
@@ -519,7 +597,7 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
 				uint32_t inner_levels, outer_levels, levels;
 				tess_level_components(state, &inner_levels, &outer_levels);
 
-				if (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
+				if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
 					levels = outer_levels;
 				else
 					levels = inner_levels;
@@ -534,12 +612,15 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
 				replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
 						intr->src[0].ssa,
 						nir_load_tess_factor_base_ir3(b),
-						nir_iadd(b, offset, build_tessfactor_base(b, var->data.location, state)));
+						nir_iadd(b, offset, build_tessfactor_base(b, location, state)));
 
 				nir_pop_if(b, nif);
 			} else {
 				nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
-				nir_ssa_def *offset = build_patch_offset(b, state, intr->src[1].ssa, var);
+				nir_ssa_def *offset = build_patch_offset(b, state, 
+														 location,
+														 nir_intrinsic_component(intr),
+														 intr->src[1].ssa);
 
 				debug_assert(nir_intrinsic_component(intr) == 0);
 
@@ -580,7 +661,7 @@ ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v,
 		nir_print_shader(shader, stderr);
 	}
 
-	build_primitive_map(shader, nir_var_shader_out, &state.map);
+	build_primitive_map(shader, &state.map);
 	memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
 	v->output_size = state.map.stride;
 
@@ -672,9 +753,11 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
 			b->cursor = nir_before_instr(&intr->instr);
 
 			nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
-			nir_variable *var = get_var(b->shader, nir_var_shader_in, nir_intrinsic_base(intr));
 			nir_ssa_def *offset = build_per_vertex_offset(b, state,
-					intr->src[0].ssa, intr->src[1].ssa, var);
+					intr->src[0].ssa,
+					nir_intrinsic_io_semantics(intr).location,
+					nir_intrinsic_component(intr),
+				   	intr->src[1].ssa);
 
 			replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
 			break;
@@ -683,10 +766,6 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
 		case nir_intrinsic_load_input: {
 			// src[] = { offset }.
 
-			nir_variable *var = get_var(b->shader, nir_var_shader_in, nir_intrinsic_base(intr));
-
-			debug_assert(var->data.patch);
-
 			b->cursor = nir_before_instr(&intr->instr);
 
 			nir_ssa_def *address, *offset;
@@ -696,13 +775,17 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
 			 * are never used. most likely some issue with (sy) not properly
 			 * syncing with values coming from a second memory transaction.
 			 */
-			if (is_tess_levels(var)) {
+			gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
+			if (is_tess_levels(location)) {
 				assert(intr->dest.ssa.num_components == 1);
 				address = nir_load_tess_factor_base_ir3(b);
-				offset = build_tessfactor_base(b, var->data.location, state);
+				offset = build_tessfactor_base(b, location, state);
 			} else {
 				address = nir_load_tess_param_base_ir3(b);
-				offset = build_patch_offset(b, state, intr->src[0].ssa, var);
+				offset = build_patch_offset(b, state,
+											location,
+											nir_intrinsic_component(intr),
+											intr->src[0].ssa);
 			}
 
 			offset = nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr)));
@@ -718,7 +801,7 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
 }
 
 void
-ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology)
+ir3_nir_lower_tess_eval(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology)
 {
 	struct state state = { .topology = topology };
 
@@ -728,9 +811,6 @@ ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology)
 		nir_print_shader(shader, stderr);
 	}
 
-	/* Build map of inputs so we have the sizes. */
-	build_primitive_map(shader, nir_var_shader_in, &state.map);
-
 	nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 	assert(impl);
 
@@ -740,6 +820,8 @@ ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology)
 	nir_foreach_block_safe (block, impl)
 		lower_tess_eval_block(block, &b, &state);
 
+	v->input_size = calc_primitive_map_size(shader);
+
 	nir_metadata_preserve(impl, 0);
 }
 
@@ -804,8 +886,6 @@ ir3_nir_lower_gs(nir_shader *shader)
 		nir_print_shader(shader, stderr);
 	}
 
-	build_primitive_map(shader, nir_var_shader_in, &state.map);
-
 	/* Create an output var for vertex_flags. This will be shadowed below,
 	 * same way regular outputs get shadowed, and this variable will become a
 	 * temporary.
@@ -914,38 +994,3 @@ ir3_nir_lower_gs(nir_shader *shader)
 	}
 }
 
-uint32_t
-ir3_link_geometry_stages(const struct ir3_shader_variant *producer,
-		const struct ir3_shader_variant *consumer,
-		uint32_t *locs)
-{
-	uint32_t num_loc = 0, factor;
-
-	switch (consumer->type) {
-	case MESA_SHADER_TESS_CTRL:
-	case MESA_SHADER_GEOMETRY:
-		/* These stages load with ldlw, which expects byte offsets. */
-		factor = 4;
-		break;
-	case MESA_SHADER_TESS_EVAL:
-		/* The tess eval shader uses ldg, which takes dword offsets. */
-		factor = 1;
-		break;
-	default:
-		unreachable("bad shader stage");
-	}
-
-	nir_foreach_shader_in_variable(in_var, consumer->shader->nir) {
-		nir_foreach_shader_out_variable(out_var, producer->shader->nir) {
-			if (in_var->data.location == out_var->data.location) {
-				locs[in_var->data.driver_location] =
-					producer->output_loc[out_var->data.driver_location] * factor;
-
-				debug_assert(num_loc <= in_var->data.driver_location + 1);
-				num_loc = in_var->data.driver_location + 1;
-			}
-		}
-	}
-
-	return num_loc;
-}
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index 39870066956..f78d8026e2a 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -570,7 +570,13 @@ struct ir3_shader_variant {
 	/* Size in dwords of all outputs for VS, size of entire patch for HS. */
 	uint32_t output_size;
 
-	/* Map from driver_location to byte offset in per-primitive storage */
+	/* Expected size of incoming output_loc for HS, DS, and GS */
+	uint32_t input_size;
+
+	/* Map from location to offset in per-primitive storage. In dwords for
+	 * HS, where varyings are read in the next stage via ldg with a dword
+	 * offset, and in bytes for all other stages.
+	 */
 	unsigned output_loc[32];
 
 	/* attributes (VS) / varyings (FS):
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index c2cc4dbd009..c1e6000c138 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -686,7 +686,7 @@ tu6_setup_streamout(struct tu_cs *cs,
 static void
 tu6_emit_const(struct tu_cs *cs, uint32_t opcode, uint32_t base,
                enum a6xx_state_block block, uint32_t offset,
-               uint32_t size, uint32_t *dwords) {
+               uint32_t size, const uint32_t *dwords) {
    assert(size % 4 == 0);
 
    tu_cs_emit_pkt7(cs, opcode, 3 + size);
@@ -711,16 +711,14 @@ tu6_emit_link_map(struct tu_cs *cs,
 {
    const struct ir3_const_state *const_state = ir3_const_state(consumer);
    uint32_t base = const_state->offsets.primitive_map;
-   uint32_t patch_locs[MAX_VARYING] = { }, num_loc;
-   num_loc = ir3_link_geometry_stages(producer, consumer, patch_locs);
-   int size = DIV_ROUND_UP(num_loc, 4);
+   int size = DIV_ROUND_UP(consumer->input_size, 4);
 
    size = (MIN2(size + base, consumer->constlen) - base) * 4;
    if (size <= 0)
       return;
 
    tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, base, sb, 0, size,
-                         patch_locs);
+                         producer->output_loc);
 }
 
 static uint16_t
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_const.h b/src/gallium/drivers/freedreno/ir3/ir3_const.h
index 5e79661c5bf..4784ac673bc 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_const.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_const.h
@@ -308,11 +308,7 @@ ir3_emit_link_map(struct fd_screen *screen,
 {
 	const struct ir3_const_state *const_state = ir3_const_state(v);
 	uint32_t base = const_state->offsets.primitive_map;
-	uint32_t patch_locs[MAX_VARYING] = { }, num_loc;
-
-	num_loc = ir3_link_geometry_stages(producer, v, patch_locs);
-
-	int size = DIV_ROUND_UP(num_loc, 4);
+	int size = DIV_ROUND_UP(v->input_size, 4);
 
 	/* truncate size to avoid writing constants that shader
 	 * does not use:
@@ -324,7 +320,7 @@ ir3_emit_link_map(struct fd_screen *screen,
 	size *= 4;
 
 	if (size > 0)
-		emit_const_user(ring, v, base, size, patch_locs);
+		emit_const_user(ring, v, base, size, producer->output_loc);
 }
 
 /* emit stream-out buffers: */