diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c
index ac870459981..01541e04314 100644
--- a/src/asahi/compiler/agx_compile.c
+++ b/src/asahi/compiler/agx_compile.c
@@ -51,6 +51,45 @@ int agx_debug = 0;
fprintf(stderr, "%s:%d: "fmt, \
__FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
+static agx_index
+agx_get_cf(agx_context *ctx, bool smooth, bool perspective,
+ gl_varying_slot slot, unsigned offset, unsigned count)
+{
+ struct agx_varyings_fs *varyings = &ctx->out->varyings.fs;
+ unsigned cf_base = varyings->nr_cf;
+
+ if (slot == VARYING_SLOT_POS) {
+ assert(offset == 2 || (cf_base == 0 && offset == 3));
+ varyings->reads_z |= (offset == 2);
+ }
+
+ /* First, search for an appropriate binding. This is O(n) to the number of
+ * bindings, which isn't great, but n should be small in practice.
+ */
+ for (unsigned b = 0; b < varyings->nr_bindings; ++b) {
+ if ((varyings->bindings[b].slot == slot) &&
+ (varyings->bindings[b].offset == offset) &&
+ (varyings->bindings[b].count == count) &&
+ (varyings->bindings[b].smooth == smooth) &&
+ (varyings->bindings[b].perspective == perspective)) {
+
+ return agx_immediate(varyings->bindings[b].cf_base);
+ }
+ }
+
+ /* If we didn't find one, make one */
+ unsigned b = varyings->nr_bindings++;
+ varyings->bindings[b].cf_base = varyings->nr_cf;
+ varyings->bindings[b].slot = slot;
+ varyings->bindings[b].offset = offset;
+ varyings->bindings[b].count = count;
+ varyings->bindings[b].smooth = smooth;
+ varyings->bindings[b].perspective = perspective;
+ varyings->nr_cf += count;
+
+ return agx_immediate(cf_base);
+}
+
/* Builds a 64-bit hash table key for an index */
static uint64_t
agx_index_to_key(agx_index idx)
@@ -278,17 +317,25 @@ agx_emit_load_vary_flat(agx_builder *b, agx_index *dests, nir_intrinsic_instr *i
unsigned components = instr->num_components;
assert(components >= 1 && components <= 4);
+ nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
nir_src *offset = nir_get_io_offset_src(instr);
assert(nir_src_is_const(*offset) && "no indirects");
- unsigned imm_index = b->shader->varyings[nir_intrinsic_base(instr)];
- imm_index += nir_src_as_uint(*offset);
-
assert(nir_dest_bit_size(instr->dest) == 32 && "no 16-bit flat shading");
+ /* Get all coefficient registers up front. This ensures the driver emits a
+ * single vectorized binding.
+ */
+ agx_index cf = agx_get_cf(b->shader, false, false,
+ sem.location + nir_src_as_uint(*offset), 0,
+ components);
+
for (unsigned i = 0; i < components; ++i) {
/* vec3 for each vertex, unknown what first 2 channels are for */
- agx_index values = agx_ld_vary_flat(b, agx_immediate(imm_index + i), 1);
+ agx_index values = agx_ld_vary_flat(b, cf, 1);
dests[i] = agx_p_extract(b, values, 2);
+
+ /* Each component accesses a sequential coefficient register */
+ cf.value++;
}
}
@@ -304,22 +351,29 @@ agx_emit_load_vary(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr)
/* TODO: Interpolation modes */
assert(parent->intrinsic == nir_intrinsic_load_barycentric_pixel);
+ nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
nir_src *offset = nir_get_io_offset_src(instr);
assert(nir_src_is_const(*offset) && "no indirects");
- unsigned imm_index = b->shader->varyings[nir_intrinsic_base(instr)];
- imm_index += nir_src_as_uint(*offset) * 4;
+
+ /* TODO: Make use of w explicit int he IR */
+ agx_index I = agx_get_cf(b->shader, true, true,
+ sem.location + nir_src_as_uint(*offset), 0,
+ components);
agx_index vec = agx_vec_for_intr(b->shader, instr);
- agx_ld_vary_to(b, vec, agx_immediate(imm_index), components, true);
+ agx_ld_vary_to(b, vec, I, components, true);
agx_emit_split(b, dests, vec, components);
}
static agx_instr *
agx_emit_store_vary(agx_builder *b, nir_intrinsic_instr *instr)
{
+ nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
nir_src *offset = nir_get_io_offset_src(instr);
assert(nir_src_is_const(*offset) && "todo: indirects");
- unsigned imm_index = b->shader->varyings[nir_intrinsic_base(instr)];
+
+ unsigned imm_index = b->shader->out->varyings.vs.slots[sem.location];
+ assert(imm_index < ~0);
imm_index += nir_intrinsic_component(instr);
imm_index += nir_src_as_uint(*offset);
@@ -447,8 +501,10 @@ agx_emit_load_frag_coord(agx_builder *b, agx_index *dests, nir_intrinsic_instr *
AGX_ROUND_RTE), agx_immediate_f(0.5f));
}
- dests[2] = agx_ld_vary(b, agx_immediate(1), 1, false); /* z */
- dests[3] = agx_ld_vary(b, agx_immediate(0), 1, false); /* w */
+ agx_index z = agx_get_cf(b->shader, true, false, VARYING_SLOT_POS, 2, 1);
+
+ dests[2] = agx_ld_vary(b, z, 1, false);
+ dests[3] = agx_ld_vary(b, agx_immediate(0), 1, false); /* cf0 is w */
}
static agx_instr *
@@ -1500,118 +1556,38 @@ agx_optimize_nir(nir_shader *nir)
/* ABI: position first, then user, then psiz */
static void
-agx_remap_varyings_vs(nir_shader *nir, struct agx_varyings *varyings,
- unsigned *remap)
+agx_remap_varyings_vs(nir_shader *nir, struct agx_varyings_vs *varyings)
{
unsigned base = 0;
- nir_variable *pos = nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_POS);
- if (pos) {
- assert(pos->data.driver_location < AGX_MAX_VARYINGS);
- remap[pos->data.driver_location] = base;
- base += 4;
- }
+ /* Initalize to "nothing is written" */
+ for (unsigned i = 0; i < ARRAY_SIZE(varyings->slots); ++i)
+ varyings->slots[i] = ~0;
+
+ assert(nir->info.outputs_written & VARYING_BIT_POS);
+ varyings->slots[VARYING_SLOT_POS] = base;
+ base += 4;
nir_foreach_shader_out_variable(var, nir) {
unsigned loc = var->data.location;
- if(loc == VARYING_SLOT_POS || loc == VARYING_SLOT_PSIZ) {
+ if(loc == VARYING_SLOT_POS || loc == VARYING_SLOT_PSIZ)
continue;
- }
- assert(var->data.driver_location < AGX_MAX_VARYINGS);
- remap[var->data.driver_location] = base;
+ varyings->slots[loc] = base;
base += 4;
}
- nir_variable *psiz = nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_PSIZ);
- if (psiz) {
- assert(psiz->data.driver_location < AGX_MAX_VARYINGS);
- remap[psiz->data.driver_location] = base;
+ /* TODO: Link FP16 varyings */
+ varyings->base_index_fp16 = base;
+
+ if (nir->info.outputs_written & VARYING_BIT_PSIZ) {
+ varyings->slots[VARYING_SLOT_PSIZ] = base;
base += 1;
}
- varyings->nr_slots = base;
-}
-
-static void
-agx_remap_varyings_fs(nir_shader *nir, struct agx_varyings *varyings,
- unsigned *remap)
-{
- struct agx_cf_binding_packed *packed = varyings->packed;
- unsigned base = 0;
-
- agx_pack(packed, CF_BINDING, cfg) {
- /* W component */
- cfg.shade_model = AGX_SHADE_MODEL_GOURAUD;
- cfg.components = 1;
- cfg.base_slot = base;
- cfg.base_coefficient_register = base;
- }
-
- base++;
- packed++;
-
- agx_pack(packed, CF_BINDING, cfg) {
- /* Z component */
- cfg.shade_model = AGX_SHADE_MODEL_GOURAUD;
- cfg.perspective = true;
- cfg.fragcoord_z = true;
- cfg.components = 1;
- cfg.base_slot = base;
- cfg.base_coefficient_register = base;
- }
-
- base++;
- packed++;
-
- unsigned comps[MAX_VARYING] = { 0 };
-
- nir_foreach_shader_in_variable(var, nir) {
- unsigned loc = var->data.driver_location;
- const struct glsl_type *column =
- glsl_without_array_or_matrix(var->type);
- unsigned chan = glsl_get_components(column);
-
- /* If we have a fractional location added, we need to increase the size
- * so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.
- * We could do better but this is an edge case as it is, normally
- * packed varyings will be aligned.
- */
- chan += var->data.location_frac;
- comps[loc] = MAX2(comps[loc], chan);
- }
-
- nir_foreach_shader_in_variable(var, nir) {
- unsigned loc = var->data.driver_location;
- unsigned sz = glsl_count_attribute_slots(var->type, FALSE);
- unsigned channels = comps[loc];
-
- assert(var->data.driver_location <= AGX_MAX_VARYINGS);
- remap[var->data.driver_location] = base;
-
- for (int c = 0; c < sz; ++c) {
- agx_pack(packed, CF_BINDING, cfg) {
- cfg.shade_model =
- (var->data.interpolation == INTERP_MODE_FLAT) ?
- AGX_SHADE_MODEL_FLAT_VERTEX_2 :
- AGX_SHADE_MODEL_GOURAUD;
-
- cfg.perspective = (var->data.interpolation != INTERP_MODE_FLAT);
- cfg.point_sprite = (var->data.location == VARYING_SLOT_PNTC);
-
- cfg.components = channels;
- cfg.base_slot = base;
- cfg.base_coefficient_register = base;
- }
-
- base += channels;
- packed++;
- }
- }
-
- varyings->nr_descs = (packed - varyings->packed);
- varyings->nr_slots = base;
+ /* All varyings linked now */
+ varyings->nr_index = base;
}
/*
@@ -1648,6 +1624,8 @@ agx_compile_shader_nir(nir_shader *nir,
ctx->stage = nir->info.stage;
list_inithead(&ctx->blocks);
+ memset(out, 0, sizeof *out);
+
if (ctx->stage == MESA_SHADER_VERTEX) {
out->writes_psiz = nir->info.outputs_written &
BITFIELD_BIT(VARYING_SLOT_PSIZ);
@@ -1714,9 +1692,13 @@ agx_compile_shader_nir(nir_shader *nir,
/* Must be last since NIR passes can remap driver_location freely */
if (ctx->stage == MESA_SHADER_VERTEX) {
- agx_remap_varyings_vs(nir, &out->varyings, ctx->varyings);
+ agx_remap_varyings_vs(nir, &out->varyings.vs);
} else if (ctx->stage == MESA_SHADER_FRAGMENT) {
- agx_remap_varyings_fs(nir, &out->varyings, ctx->varyings);
+ /* Ensure cf0 is W */
+ ASSERTED agx_index w =
+ agx_get_cf(ctx, true, false, VARYING_SLOT_POS, 3, 1);
+
+ assert(w.value == 0);
}
bool skip_internal = nir->info.internal;
diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h
index f877d04dea7..978806d0dc7 100644
--- a/src/asahi/compiler/agx_compile.h
+++ b/src/asahi/compiler/agx_compile.h
@@ -84,9 +84,71 @@ struct agx_push {
#define AGX_MAX_PUSH_RANGES (16)
#define AGX_MAX_VARYINGS (32)
+struct agx_varyings_vs {
+ /* The first index used for FP16 varyings. Indices less than this are treated
+ * as FP32. This may require remapping slots to guarantee.
+ */
+ unsigned base_index_fp16;
+
+ /* The total number of vertex shader indices output. Must be at least
+ * base_index_fp16.
+ */
+ unsigned nr_index;
+
+ /* If the slot is written, this is the base index that the first component
+ * of the slot is written to. The next components are found in the next
+ * indices. If less than base_index_fp16, this is a 32-bit slot (with 4
+ * indices for the 4 components), else this is a 16-bit slot (with 2
+ * indices for the 4 components). This must be less than nr_index.
+ *
+ * If the slot is not written, this must be ~0.
+ */
+ unsigned slots[VARYING_SLOT_MAX];
+};
+
+/* Conservative bound */
+#define AGX_MAX_CF_BINDINGS (VARYING_SLOT_MAX)
+
+struct agx_varyings_fs {
+ /* Number of coefficient registers used */
+ unsigned nr_cf;
+
+ /* Number of coefficient register bindings */
+ unsigned nr_bindings;
+
+ /* Whether gl_FragCoord.z is read */
+ bool reads_z;
+
+ /* Coefficient register bindings */
+ struct {
+ /* Base coefficient register */
+ unsigned cf_base;
+
+ /* Slot being bound */
+ gl_varying_slot slot;
+
+ /* First component bound.
+ *
+ * Must be 2 (Z) or 3 (W) if slot == VARYING_SLOT_POS.
+ */
+ unsigned offset : 2;
+
+ /* Number of components bound */
+ unsigned count : 3;
+
+ /* Is smooth shading enabled? If false, flat shading is used */
+ bool smooth : 1;
+
+ /* Perspective correct interpolation */
+ bool perspective : 1;
+ } bindings[AGX_MAX_CF_BINDINGS];
+};
+
struct agx_varyings {
- unsigned nr_descs, nr_slots;
- struct agx_cf_binding_packed packed[AGX_MAX_VARYINGS];
+ union {
+ struct agx_varyings_vs vs;
+ struct agx_varyings_fs fs;
+ };
};
struct agx_shader_info {
diff --git a/src/asahi/compiler/agx_compiler.h b/src/asahi/compiler/agx_compiler.h
index 91bff122940..b5a2c0fecde 100644
--- a/src/asahi/compiler/agx_compiler.h
+++ b/src/asahi/compiler/agx_compiler.h
@@ -375,9 +375,6 @@ typedef struct {
struct agx_shader_info *out;
struct agx_shader_key *key;
- /* Remapping table for varyings indexed by driver_location */
- unsigned varyings[AGX_MAX_VARYINGS];
-
/* Place to start pushing new values */
unsigned push_base;
diff --git a/src/asahi/lib/cmdbuf.xml b/src/asahi/lib/cmdbuf.xml
index d4b141112c9..20b2170600c 100644
--- a/src/asahi/lib/cmdbuf.xml
+++ b/src/asahi/lib/cmdbuf.xml
@@ -494,7 +494,6 @@
-
diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c
index 17c3606b89d..5c2d9ca5762 100644
--- a/src/gallium/drivers/asahi/agx_state.c
+++ b/src/gallium/drivers/asahi/agx_state.c
@@ -897,6 +897,122 @@ agx_create_shader_state(struct pipe_context *pctx,
return so;
}
+static unsigned
+agx_find_linked_slot(struct agx_varyings_vs *vs, struct agx_varyings_fs *fs,
+ gl_varying_slot slot, unsigned offset)
+{
+ assert(offset < 4);
+ assert(slot != VARYING_SLOT_PNTC && "point coords aren't linked");
+
+ if (slot == VARYING_SLOT_POS) {
+ if (offset == 3) {
+ return 0; /* W */
+ } else if (offset == 2) {
+ assert(fs->reads_z);
+ return 1; /* Z */
+ } else {
+ unreachable("gl_Position.xy are not varyings");
+ }
+ }
+
+ unsigned vs_index = vs->slots[slot];
+
+ assert(vs_index >= 4 && "gl_Position should have been the first 4 slots");
+ assert(vs_index < vs->nr_index &&
+ "varyings not written by vertex shader are undefined");
+ assert((vs_index < vs->base_index_fp16) ==
+ ((vs_index + offset) < vs->base_index_fp16) &&
+ "a given varying must have a consistent type");
+
+ unsigned vs_user_index = (vs_index + offset) - 4;
+
+ if (fs->reads_z)
+ return vs_user_index + 2;
+ else
+ return vs_user_index + 1;
+}
+
+static unsigned
+agx_num_general_outputs(struct agx_varyings_vs *vs)
+{
+ unsigned nr_vs = vs->nr_index;
+ bool writes_psiz = vs->slots[VARYING_SLOT_PSIZ] < nr_vs;
+
+ assert(nr_vs >= 4 && "gl_Position must be written");
+ if (writes_psiz)
+ assert(nr_vs >= 5 && "gl_PointSize is written");
+
+ return nr_vs - (writes_psiz ? 5 : 4);
+}
+
+static uint32_t
+agx_link_varyings_vs_fs(struct agx_pool *pool, struct agx_varyings_vs *vs,
+ struct agx_varyings_fs *fs, bool first_provoking_vertex)
+{
+ /* If there are no bindings, there's nothing to emit */
+ if (fs->nr_bindings == 0)
+ return 0;
+
+ size_t linkage_size = AGX_CF_BINDING_HEADER_LENGTH +
+ (fs->nr_bindings * AGX_CF_BINDING_LENGTH);
+
+ void *tmp = alloca(linkage_size);
+ struct agx_cf_binding_header_packed *header = tmp;
+ struct agx_cf_binding_packed *bindings = (void *) (header + 1);
+
+ unsigned nr_slots = agx_num_general_outputs(vs) + 1 + (fs->reads_z ? 1 : 0);
+
+ agx_pack(header, CF_BINDING_HEADER, cfg) {
+ cfg.number_of_32_bit_slots = nr_slots;
+ cfg.number_of_coefficient_registers = fs->nr_cf;
+ }
+
+ for (unsigned i = 0; i < fs->nr_bindings; ++i) {
+ agx_pack(bindings + i, CF_BINDING, cfg) {
+ cfg.base_coefficient_register = fs->bindings[i].cf_base;
+ cfg.components = fs->bindings[i].count;
+ cfg.perspective = fs->bindings[i].perspective;
+
+ cfg.shade_model = fs->bindings[i].smooth ? AGX_SHADE_MODEL_GOURAUD :
+ first_provoking_vertex ? AGX_SHADE_MODEL_FLAT_VERTEX_0 :
+ AGX_SHADE_MODEL_FLAT_VERTEX_2;
+
+ if (fs->bindings[i].slot == VARYING_SLOT_PNTC) {
+ assert(fs->bindings[i].offset == 0);
+ cfg.point_sprite = true;
+ } else {
+ cfg.base_slot = agx_find_linked_slot(vs, fs, fs->bindings[i].slot,
+ fs->bindings[i].offset);
+
+ assert(cfg.base_slot + cfg.components <= nr_slots &&
+ "overflow slots");
+ }
+
+ if (fs->bindings[i].slot == VARYING_SLOT_POS) {
+ if (fs->bindings[i].offset == 2)
+ cfg.fragcoord_z = true;
+ else
+ assert(!cfg.perspective && "W must not be perspective divided");
+ }
+
+ assert(cfg.base_coefficient_register + cfg.components <= fs->nr_cf &&
+ "overflowed coefficient registers");
+ }
+ }
+
+ struct agx_ptr ptr = agx_pool_alloc_aligned(pool, (3 * linkage_size), 256);
+ assert(ptr.gpu < (1ull << 32) && "varyings must be in low memory");
+
+ /* I don't understand why the data structures are repeated thrice */
+ for (unsigned i = 0; i < 3; ++i) {
+ memcpy(((uint8_t *) ptr.cpu) + (i * linkage_size),
+ ((uint8_t *) tmp) + (i * linkage_size),
+ linkage_size);
+ }
+
+ return ptr.gpu;
+}
+
/* Does not take ownership of key. Clones if necessary. */
static bool
agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out,
@@ -942,35 +1058,10 @@ agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out,
agx_compile_shader_nir(nir, &key->base, &binary, &compiled->info);
- struct agx_varyings *varyings = &compiled->info.varyings;
- unsigned packed_varying_sz = (AGX_CF_BINDING_HEADER_LENGTH +
- varyings->nr_descs * AGX_CF_BINDING_LENGTH);
- uint8_t *packed_varyings = alloca(packed_varying_sz);
-
- agx_pack(packed_varyings, CF_BINDING_HEADER, cfg) {
- cfg.number_of_32_bit_slots = varyings->nr_slots;
- cfg.number_of_coefficient_registers = varyings->nr_slots;
- }
-
- memcpy(packed_varyings + AGX_CF_BINDING_HEADER_LENGTH,
- varyings->packed, varyings->nr_descs * AGX_CF_BINDING_LENGTH);
-
if (binary.size) {
struct agx_device *dev = agx_device(ctx->base.screen);
- compiled->bo = agx_bo_create(dev,
- ALIGN_POT(binary.size, 256) + (3 * packed_varying_sz),
- AGX_MEMORY_TYPE_SHADER);
+ compiled->bo = agx_bo_create(dev, binary.size, AGX_MEMORY_TYPE_SHADER);
memcpy(compiled->bo->ptr.cpu, binary.data, binary.size);
-
-
- /* TODO: Why is the varying descriptor duplicated 3x? */
- unsigned offs = ALIGN_POT(binary.size, 256);
- for (unsigned copy = 0; copy < 3; ++copy) {
- memcpy(((uint8_t *) compiled->bo->ptr.cpu) + offs, packed_varyings, packed_varying_sz);
- offs += packed_varying_sz;
- }
-
- compiled->varyings = compiled->bo->ptr.gpu + ALIGN_POT(binary.size, 256);
}
ralloc_free(nir);
@@ -1161,7 +1252,7 @@ agx_build_pipeline(struct agx_context *ctx, struct agx_compiled_shader *cs, enum
agx_pack(record, SET_SHADER, cfg) {
cfg.code = cs->bo->ptr.gpu;
cfg.register_quadwords = 0;
- cfg.unk_2b = cs->info.varyings.nr_slots;
+ cfg.unk_2b = cs->info.varyings.vs.nr_index;
cfg.unk_2 = 0x0d;
}
@@ -1377,27 +1468,27 @@ demo_launch_fragment(struct agx_context *ctx, struct agx_pool *pool, uint32_t pi
}
static uint64_t
-demo_interpolation(struct agx_compiled_shader *fs, struct agx_pool *pool)
+demo_interpolation(struct agx_varyings_vs *vs, struct agx_pool *pool)
{
struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_INTERPOLATION_LENGTH, 64);
agx_pack(t.cpu, INTERPOLATION, cfg) {
- cfg.varying_count = fs->info.varyings.nr_slots;
+ cfg.varying_count = agx_num_general_outputs(vs);
};
return t.gpu;
}
static uint64_t
-demo_linkage(struct agx_compiled_shader *vs, struct agx_pool *pool)
+demo_linkage(struct agx_compiled_shader *vs, struct agx_compiled_shader *fs, struct agx_pool *pool)
{
struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_LINKAGE_LENGTH, 64);
agx_pack(t.cpu, LINKAGE, cfg) {
- cfg.varying_count = vs->info.varyings.nr_slots;
- cfg.any_varyings = !!cfg.varying_count;
+ cfg.varying_count = vs->info.varyings.vs.nr_index;
+ cfg.any_varyings = !!fs->info.varyings.fs.nr_bindings;
cfg.has_point_size = vs->info.writes_psiz;
- cfg.has_frag_coord_z = 1;
+ cfg.has_frag_coord_z = fs->info.varyings.fs.reads_z;
};
return t.gpu;
@@ -1505,8 +1596,8 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out,
unsigned tex_count = ctx->stage[PIPE_SHADER_VERTEX].texture_count;
agx_pack(out, BIND_VERTEX_PIPELINE, cfg) {
cfg.pipeline = pipeline_vertex;
- cfg.output_count_1 = ctx->vs->info.varyings.nr_slots;
- cfg.output_count_2 = ctx->vs->info.varyings.nr_slots;
+ cfg.output_count_1 = ctx->vs->info.varyings.vs.nr_index;
+ cfg.output_count_2 = cfg.output_count_1;
cfg.groups_of_8_immediate_textures = DIV_ROUND_UP(tex_count, 8);
cfg.groups_of_4_samplers = DIV_ROUND_UP(tex_count, 4);
@@ -1519,9 +1610,10 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out,
bool reads_tib = ctx->fs->info.reads_tib;
bool sample_mask_from_shader = ctx->fs->info.writes_sample_mask;
- agx_push_record(&out, 5, demo_interpolation(ctx->fs, pool));
- agx_push_record(&out, 5, demo_launch_fragment(ctx, pool, pipeline_fragment, varyings, ctx->fs->info.varyings.nr_descs));
- agx_push_record(&out, 4, demo_linkage(ctx->vs, pool));
+ agx_push_record(&out, 5, demo_interpolation(&ctx->vs->info.varyings.vs, pool));
+ agx_push_record(&out, 5, demo_launch_fragment(ctx, pool, pipeline_fragment,
+ varyings, ctx->fs->info.varyings.fs.nr_bindings));
+ agx_push_record(&out, 4, demo_linkage(ctx->vs, ctx->fs, pool));
agx_push_record(&out, 7, demo_rasterizer(ctx, pool, is_points));
agx_push_record(&out, 5, demo_unk11(pool, is_lines, is_points, reads_tib, sample_mask_from_shader));
@@ -1620,6 +1712,12 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
agx_update_vs(ctx);
agx_update_fs(ctx);
+ /* TODO: Cache or dirty track */
+ uint32_t varyings = agx_link_varyings_vs_fs(&ctx->batch->pipeline_pool,
+ &ctx->vs->info.varyings.vs,
+ &ctx->fs->info.varyings.fs,
+ ctx->rast->base.flatshade_first);
+
agx_batch_add_bo(batch, ctx->vs->bo);
agx_batch_add_bo(batch, ctx->fs->bo);
@@ -1634,7 +1732,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
uint8_t *out = agx_encode_state(ctx, batch->encoder_current,
agx_build_pipeline(ctx, ctx->vs, PIPE_SHADER_VERTEX),
agx_build_pipeline(ctx, ctx->fs, PIPE_SHADER_FRAGMENT),
- ctx->fs->varyings, is_lines, info->mode == PIPE_PRIM_POINTS);
+ varyings, is_lines, info->mode == PIPE_PRIM_POINTS);
enum agx_primitive prim = agx_primitive_for_pipe(info->mode);
unsigned idx_size = info->index_size;
diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h
index 05a89103f16..94f4b2796b1 100644
--- a/src/gallium/drivers/asahi/agx_state.h
+++ b/src/gallium/drivers/asahi/agx_state.h
@@ -58,9 +58,6 @@ struct agx_compiled_shader {
/* Mapped executable memory */
struct agx_bo *bo;
- /* Varying descriptor (TODO: is this the right place?) */
- uint64_t varyings;
-
/* Metadata returned from the compiler */
struct agx_shader_info info;
};