asahi: rewrite varying linking

Lower store_output to store_uvs_agx + math. Link UVS indices at draw-time
instead of compile-time to get efficient separate shaders. Also picks up varying
compaction along the way.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28483>
This commit is contained in:
Alyssa Rosenzweig 2024-03-03 10:09:08 -04:00 committed by Marge Bot
parent 3764adbef1
commit f0e1ccc8d4
13 changed files with 436 additions and 474 deletions

View file

@ -578,39 +578,6 @@ agx_emit_load_vary(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
agx_emit_cached_split(b, dest, components);
}
static agx_instr *
agx_emit_store_vary(agx_builder *b, nir_intrinsic_instr *instr)
{
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
nir_src *offset = nir_get_io_offset_src(instr);
assert(nir_src_is_const(*offset) && "todo: indirects");
unsigned imm_index = b->shader->out->varyings.vs.slots[sem.location];
if (sem.location == VARYING_SLOT_LAYER ||
sem.location == VARYING_SLOT_CLIP_DIST0) {
/* Separate slots used for the sysval vs the varying. The default slot
* above is for the varying. Change for the sysval.
*/
assert(sem.no_sysval_output || sem.no_varying);
if (sem.no_varying) {
imm_index = sem.location == VARYING_SLOT_LAYER
? b->shader->out->varyings.vs.layer_viewport_slot
: b->shader->out->varyings.vs.clip_dist_slot;
}
}
assert(imm_index < ~0);
imm_index += (nir_src_as_uint(*offset) * 4) + nir_intrinsic_component(instr);
/* nir_lower_io_to_scalar */
assert(nir_intrinsic_write_mask(instr) == 0x1);
return agx_st_vary(b, agx_immediate(imm_index),
agx_src_index(&instr->src[0]));
}
static agx_instr *
agx_emit_local_store_pixel(agx_builder *b, nir_intrinsic_instr *instr)
{
@ -1210,9 +1177,10 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
agx_emit_load(b, dst, instr);
return NULL;
case nir_intrinsic_store_output:
case nir_intrinsic_store_uvs_agx:
assert(stage == MESA_SHADER_VERTEX);
return agx_emit_store_vary(b, instr);
return agx_st_vary(b, agx_src_index(&instr->src[1]),
agx_src_index(&instr->src[0]));
case nir_intrinsic_store_agx:
agx_emit_store(b, instr);
@ -2667,96 +2635,6 @@ agx_optimize_nir(nir_shader *nir, unsigned *preamble_size)
NIR_PASS(_, nir, nir_lower_phis_to_scalar, true);
}
/* ABI: position first, then user, then psiz */
static void
agx_remap_varyings_vs(nir_shader *nir, struct agx_varyings_vs *varyings,
struct agx_shader_key *key)
{
unsigned base = 0;
/* Initialize to "nothing is written" */
for (unsigned i = 0; i < ARRAY_SIZE(varyings->slots); ++i)
varyings->slots[i] = ~0;
/* gl_Position is implicitly written, although it may validly be absent in
* vertex programs run only for transform feedback. Those ignore their
* varyings so it doesn't matter what we do here as long as we don't fail.
*/
varyings->slots[VARYING_SLOT_POS] = base;
base += 4;
/* These are always flat-shaded from the FS perspective */
key->vs.outputs_flat_shaded |= VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT;
/* The internal cull distance slots are always linearly-interpolated */
key->vs.outputs_linear_shaded |=
BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE, 2);
assert(!(key->vs.outputs_flat_shaded & key->vs.outputs_linear_shaded));
/* Smooth 32-bit user bindings go next */
u_foreach_bit64(loc, nir->info.outputs_written &
~key->vs.outputs_flat_shaded &
~key->vs.outputs_linear_shaded) {
if (loc == VARYING_SLOT_POS || loc == VARYING_SLOT_PSIZ)
continue;
assert(loc < ARRAY_SIZE(varyings->slots));
varyings->slots[loc] = base;
base += 4;
varyings->num_32_smooth += 4;
}
/* Flat 32-bit user bindings go next */
u_foreach_bit64(loc,
nir->info.outputs_written & key->vs.outputs_flat_shaded) {
if (loc == VARYING_SLOT_POS || loc == VARYING_SLOT_PSIZ)
continue;
assert(loc < ARRAY_SIZE(varyings->slots));
varyings->slots[loc] = base;
base += 4;
varyings->num_32_flat += 4;
}
/* Linear 32-bit user bindings go next */
u_foreach_bit64(loc,
nir->info.outputs_written & key->vs.outputs_linear_shaded) {
if (loc == VARYING_SLOT_POS || loc == VARYING_SLOT_PSIZ)
continue;
assert(loc < ARRAY_SIZE(varyings->slots));
varyings->slots[loc] = base;
base += 4;
varyings->num_32_linear += 4;
}
/* TODO: Link FP16 varyings */
varyings->base_index_fp16 = base;
varyings->num_16_smooth = 0;
varyings->num_16_flat = 0;
varyings->num_16_linear = 0;
if (nir->info.outputs_written & VARYING_BIT_PSIZ) {
varyings->slots[VARYING_SLOT_PSIZ] = base;
base += 1;
}
if (nir->info.outputs_written & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT)) {
varyings->layer_viewport_slot = base;
base += 1;
}
if (nir->info.outputs_written & VARYING_BIT_CLIP_DIST0) {
varyings->clip_dist_slot = base;
varyings->nr_clip_dists = nir->info.clip_distance_array_size;
base += varyings->nr_clip_dists;
}
/* All varyings linked now */
varyings->nr_index = base;
}
/*
* Varyings that are used as texture coordinates should be kept at fp32, because
* fp16 does not have enough precision for large textures. It's technically
@ -3188,10 +3066,6 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
if (nir->info.stage == MESA_SHADER_FRAGMENT)
out->tag_write_disable = !nir->info.writes_memory;
if (nir->info.stage == MESA_SHADER_VERTEX &&
(nir->info.outputs_written & VARYING_BIT_CLIP_DIST0))
NIR_PASS(_, nir, agx_nir_lower_clip_distance);
bool needs_libagx = true /* TODO: Optimize */;
if (nir->info.stage == MESA_SHADER_FRAGMENT)
@ -3238,19 +3112,6 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
/* Late VBO lowering creates constant udiv instructions */
NIR_PASS(_, nir, nir_opt_idiv_const, 16);
/* Varying output is scalar, other I/O is vector. Lowered late because
* transform feedback programs will use vector output.
*/
if (nir->info.stage == MESA_SHADER_VERTEX) {
NIR_PASS(_, nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
if (nir->info.outputs_written &
(VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT)) {
NIR_PASS(_, nir, agx_nir_lower_layer);
}
}
NIR_PASS(_, nir, nir_opt_constant_folding);
NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower_load_from_texture_handle,
nir_metadata_block_index | nir_metadata_dominance, NULL);
@ -3258,10 +3119,7 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
out->push_count = key->reserved_preamble;
agx_optimize_nir(nir, &out->push_count);
/* Must be last since NIR passes can remap driver_location freely */
if (nir->info.stage == MESA_SHADER_VERTEX)
agx_remap_varyings_vs(nir, &out->varyings.vs, key);
else if (nir->info.stage == MESA_SHADER_FRAGMENT)
if (nir->info.stage == MESA_SHADER_FRAGMENT)
assign_coefficient_regs(nir, &out->varyings.fs);
if (agx_should_dump(nir, AGX_DBG_SHADERS))
@ -3284,9 +3142,6 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
}
if (nir->info.stage == MESA_SHADER_VERTEX) {
out->writes_psiz =
nir->info.outputs_written & BITFIELD_BIT(VARYING_SLOT_PSIZ);
out->nonzero_viewport = nir->info.outputs_written & VARYING_BIT_VIEWPORT;
out->writes_layer_viewport =

View file

@ -9,50 +9,6 @@
#include "util/u_dynarray.h"
#include "shader_enums.h"
struct agx_varyings_vs {
/* The number of user varyings of each type. The varyings must be allocated
* in this order ({smooth, flat, linear} × {32, 16}), which may require
* remapping.
*/
unsigned num_32_smooth;
unsigned num_32_flat;
unsigned num_32_linear;
unsigned num_16_smooth;
unsigned num_16_flat;
unsigned num_16_linear;
/* The first index used for FP16 varyings. Indices less than this are treated
* as FP32. This may require remapping slots to guarantee.
*/
unsigned base_index_fp16;
/* The total number of vertex shader indices output. Must be at least
* base_index_fp16.
*/
unsigned nr_index;
/* If the slot is written, this is the base index that the first component
* of the slot is written to. The next components are found in the next
* indices. If less than base_index_fp16, this is a 32-bit slot (with 4
* indices for the 4 components), else this is a 16-bit slot (with 2
* indices for the 4 components). This must be less than nr_index.
*
* If the slot is not written, this must be ~0.
*/
unsigned slots[VARYING_SLOT_MAX];
/* Slot for the combined layer/viewport 32-bit sysval output, or ~0 if none
* is written. What's at slots[VARYING_SLOT_LAYER] is the varying output.
*/
unsigned layer_viewport_slot;
/* Base slot for the clip distance sysval outputs, or ~0 if none is written.
* What's at slots[VARYING_SLOT_CLIP_DIST0] is the varying output.
*/
unsigned clip_dist_slot;
unsigned nr_clip_dists;
};
struct agx_cf_binding {
/* Base coefficient register */
unsigned cf_base;
@ -96,7 +52,6 @@ struct agx_varyings_fs {
};
union agx_varyings {
struct agx_varyings_vs vs;
struct agx_varyings_fs fs;
};
@ -127,9 +82,6 @@ struct agx_shader_info {
/* Does the shader read the tilebuffer? */
bool reads_tib;
/* Does the shader write point size? */
bool writes_psiz;
/* Does the shader potentially draw to a nonzero viewport? */
bool nonzero_viewport;
@ -195,17 +147,6 @@ enum agx_format {
AGX_NUM_FORMATS,
};
struct agx_vs_shader_key {
/* The GPU ABI requires all smooth shaded varyings to come first, then all
* flat shaded varyings, then all linear shaded varyings, as written by the
* VS. In order to correctly remap the varyings into the right order in the
* VS, we need to propagate the mask of flat/linear shaded varyings into the
* compiler.
*/
uint64_t outputs_flat_shaded;
uint64_t outputs_linear_shaded;
};
struct agx_fs_shader_key {
/* Normally, access to the tilebuffer must be guarded by appropriate fencing
* instructions to ensure correct results in the presence of out-of-order
@ -246,7 +187,6 @@ struct agx_shader_key {
bool promote_constants;
union {
struct agx_vs_shader_key vs;
struct agx_fs_shader_key fs;
};
};

View file

@ -1,42 +0,0 @@
/*
* Copyright 2023 Valve Corporation
* SPDX-License-Identifier: MIT
*/
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "agx_nir.h"
static bool
lower(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
{
if (intr->intrinsic != nir_intrinsic_store_output)
return false;
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
if (sem.location != VARYING_SLOT_CLIP_DIST0)
return false;
nir_instr *clone = nir_instr_clone(b->shader, &intr->instr);
nir_intrinsic_instr *lowered = nir_instr_as_intrinsic(clone);
b->cursor = nir_after_instr(&intr->instr);
nir_builder_instr_insert(b, clone);
nir_io_semantics new_sem = sem;
new_sem.no_varying = true;
nir_intrinsic_set_io_semantics(lowered, new_sem);
sem.no_sysval_output = true;
nir_intrinsic_set_io_semantics(intr, sem);
return true;
}
bool
agx_nir_lower_clip_distance(nir_shader *s)
{
assert(s->info.outputs_written & VARYING_BIT_CLIP_DIST0);
return nir_shader_intrinsics_pass(
s, lower, nir_metadata_block_index | nir_metadata_dominance, NULL);
}

View file

@ -1,70 +0,0 @@
/*
* Copyright 2023 Valve Corporation
* SPDX-License-Identifier: MIT
*/
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "agx_nir.h"
bool
agx_nir_lower_layer(nir_shader *s)
{
assert(s->info.stage == MESA_SHADER_VERTEX);
assert(s->info.outputs_written & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT));
/* Writes are in the last block, search */
nir_function_impl *impl = nir_shader_get_entrypoint(s);
nir_block *last = nir_impl_last_block(impl);
nir_def *layer = NULL, *viewport = NULL;
nir_cursor last_cursor;
nir_foreach_instr(instr, last) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *store = nir_instr_as_intrinsic(instr);
if (store->intrinsic != nir_intrinsic_store_output)
continue;
nir_io_semantics sem = nir_intrinsic_io_semantics(store);
nir_def *value = store->src[0].ssa;
if (sem.location == VARYING_SLOT_LAYER) {
assert(layer == NULL && "only written once");
layer = value;
} else if (sem.location == VARYING_SLOT_VIEWPORT) {
assert(viewport == NULL && "only written once");
viewport = value;
} else {
continue;
}
last_cursor = nir_after_instr(&store->instr);
/* Leave the store as a varying-only, no sysval output */
sem.no_sysval_output = true;
nir_intrinsic_set_io_semantics(store, sem);
}
assert((layer || viewport) && "metadata inconsistent with program");
/* Pack together and write out */
nir_builder b = nir_builder_at(last_cursor);
nir_def *zero = nir_imm_intN_t(&b, 0, 16);
nir_def *packed =
nir_pack_32_2x16_split(&b, layer ? nir_u2u16(&b, layer) : zero,
viewport ? nir_u2u16(&b, viewport) : zero);
/* Written with a sysval-only store, no varying output */
nir_store_output(&b, packed, nir_imm_int(&b, 0),
.io_semantics.location = VARYING_SLOT_LAYER,
.io_semantics.num_slots = 1,
.io_semantics.no_varying = true);
nir_metadata_preserve(impl,
nir_metadata_dominance | nir_metadata_block_index);
return true;
}

View file

@ -8,13 +8,11 @@ libasahi_agx_files = files(
'agx_liveness.c',
'agx_insert_waits.c',
'agx_nir_lower_address.c',
'agx_nir_lower_clip_distance.c',
'agx_nir_lower_cull_distance.c',
'agx_nir_lower_frag_sidefx.c',
'agx_nir_lower_sample_mask.c',
'agx_nir_lower_discard_zs_emit.c',
'agx_nir_lower_interpolation.c',
'agx_nir_lower_layer.c',
'agx_nir_lower_shared_bitsize.c',
'agx_nir_lower_subgroups.c',
'agx_nir_opt_preamble.c',

View file

@ -0,0 +1,251 @@
/*
* Copyright 2024 Valve Corporation
* SPDX-License-Identifier: MIT
*/
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "util/bitscan.h"
#include "util/macros.h"
#include "agx_compile.h"
#include "agx_pack.h"
#include "agx_uvs.h"
#include "nir_builder_opcodes.h"
#include "nir_intrinsics.h"
#include "nir_intrinsics_indices.h"
#include "shader_enums.h"
struct ctx {
nir_def *layer, *viewport;
nir_cursor after_layer_viewport;
struct agx_unlinked_uvs_layout *layout;
};
static enum uvs_group
group_for_varying(gl_varying_slot loc)
{
switch (loc) {
case VARYING_SLOT_POS:
return UVS_POSITION;
case VARYING_SLOT_PSIZ:
return UVS_PSIZ;
default:
return UVS_VARYINGS;
}
}
static bool
lower(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
struct ctx *ctx = data;
if (intr->intrinsic != nir_intrinsic_store_output)
return false;
b->cursor = nir_instr_remove(&intr->instr);
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
unsigned component = nir_intrinsic_component(intr);
nir_def *value = intr->src[0].ssa;
nir_def *offset = intr->src[1].ssa;
/* If there is only 1 user varying, it is at the base of the varying section.
* This saves us an indirection on simple separate shaders.
*/
bool single_vary = util_is_power_of_two_nonzero64(ctx->layout->written);
enum uvs_group group = group_for_varying(sem.location);
nir_def *base;
if ((group == UVS_VARYINGS) && !single_vary)
base = nir_load_uvs_index_agx(b, .io_semantics = sem);
else
base = nir_imm_intN_t(b, ctx->layout->group_offs[group], 16);
nir_def *index = nir_iadd(b, nir_iadd_imm(b, base, component),
nir_imul_imm(b, nir_u2u16(b, offset), 4));
nir_intrinsic_instr *new_store = nir_store_uvs_agx(b, value, index);
/* Insert clip distance sysval writes, and gather layer/viewport writes so we
* can accumulate their system value. These are still lowered like normal to
* write them for the varying FS input.
*/
if (sem.location == VARYING_SLOT_LAYER) {
assert(ctx->layer == NULL && "only written once");
ctx->layer = value;
ctx->after_layer_viewport = nir_after_instr(&new_store->instr);
} else if (sem.location == VARYING_SLOT_VIEWPORT) {
assert(ctx->viewport == NULL && "only written once");
ctx->viewport = value;
ctx->after_layer_viewport = nir_after_instr(&new_store->instr);
} else if (sem.location == VARYING_SLOT_CLIP_DIST0) {
unsigned clip_base = ctx->layout->group_offs[UVS_CLIP_DIST];
nir_def *index = nir_iadd_imm(b, nir_imul_imm(b, nir_u2u16(b, offset), 4),
clip_base + component);
nir_store_uvs_agx(b, value, index);
}
return true;
}
static void
write_layer_viewport_sysval(struct ctx *ctx)
{
nir_builder b = nir_builder_at(ctx->after_layer_viewport);
nir_def *zero = nir_imm_intN_t(&b, 0, 16);
nir_def *layer = ctx->layer ? nir_u2u16(&b, ctx->layer) : zero;
nir_def *viewport = ctx->viewport ? nir_u2u16(&b, ctx->viewport) : zero;
nir_store_uvs_agx(
&b, nir_pack_32_2x16_split(&b, layer, viewport),
nir_imm_int(&b, ctx->layout->group_offs[UVS_LAYER_VIEWPORT]));
}
static bool
gather_components(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
struct agx_unlinked_uvs_layout *layout = data;
if (intr->intrinsic != nir_intrinsic_store_output)
return false;
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
unsigned component = nir_intrinsic_component(intr);
if (nir_src_is_const(intr->src[1])) {
unsigned loc = sem.location + nir_src_as_uint(intr->src[1]);
layout->components[loc] = MAX2(layout->components[loc], component + 1);
} else {
for (unsigned i = 0; i < sem.num_slots; ++i) {
layout->components[sem.location + i] = 4;
}
}
return false;
}
bool
agx_nir_lower_uvs(nir_shader *s, struct agx_unlinked_uvs_layout *layout)
{
bool progress = false;
/* Scalarize up front so we can ignore vectors later */
NIR_PASS(progress, s, nir_lower_io_to_scalar, nir_var_shader_out, NULL,
NULL);
/* Determine the unlinked UVS layout */
NIR_PASS(progress, s, nir_shader_intrinsics_pass, gather_components,
nir_metadata_block_index | nir_metadata_dominance, layout);
unsigned sizes[UVS_NUM_GROUP] = {
[UVS_POSITION] = 4,
[UVS_PSIZ] = !!(s->info.outputs_written & VARYING_BIT_PSIZ),
[UVS_LAYER_VIEWPORT] = !!(s->info.outputs_written &
(VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT)),
[UVS_CLIP_DIST] = s->info.clip_distance_array_size,
};
for (unsigned i = 0; i < ARRAY_SIZE(layout->components); ++i) {
if (i != VARYING_SLOT_POS && i != VARYING_SLOT_PSIZ &&
layout->components[i]) {
layout->written |= BITFIELD64_BIT(i);
sizes[UVS_VARYINGS] += layout->components[i];
}
}
unsigned offs = 0;
for (enum uvs_group g = 0; g < UVS_NUM_GROUP; ++g) {
layout->group_offs[g] = offs;
offs += sizes[g];
}
layout->size = offs;
layout->user_size = sizes[UVS_VARYINGS];
/* Now lower in terms of the unlinked layout */
struct ctx ctx = {.layout = layout};
NIR_PASS(progress, s, nir_shader_intrinsics_pass, lower,
nir_metadata_block_index | nir_metadata_dominance, &ctx);
if (ctx.layer || ctx.viewport) {
write_layer_viewport_sysval(&ctx);
}
/* Finally, pack what we can. It's much cheaper to do this at compile-time
* than draw-time.
*/
agx_pack(&layout->osel, OUTPUT_SELECT, cfg) {
cfg.point_size = sizes[UVS_PSIZ];
cfg.viewport_target = sizes[UVS_LAYER_VIEWPORT];
cfg.render_target = cfg.viewport_target;
cfg.clip_distance_plane_0 = sizes[UVS_CLIP_DIST] > 0;
cfg.clip_distance_plane_1 = sizes[UVS_CLIP_DIST] > 1;
cfg.clip_distance_plane_2 = sizes[UVS_CLIP_DIST] > 2;
cfg.clip_distance_plane_3 = sizes[UVS_CLIP_DIST] > 3;
cfg.clip_distance_plane_4 = sizes[UVS_CLIP_DIST] > 4;
cfg.clip_distance_plane_5 = sizes[UVS_CLIP_DIST] > 5;
cfg.clip_distance_plane_6 = sizes[UVS_CLIP_DIST] > 6;
cfg.clip_distance_plane_7 = sizes[UVS_CLIP_DIST] > 7;
}
agx_pack(&layout->vdm, VDM_STATE_VERTEX_OUTPUTS, cfg) {
cfg.output_count_1 = offs;
cfg.output_count_2 = offs;
}
return progress;
}
void
agx_assign_uvs(struct agx_varyings_vs *varyings,
struct agx_unlinked_uvs_layout *layout, uint64_t flat_mask,
uint64_t linear_mask)
{
*varyings = (struct agx_varyings_vs){0};
/* These are always flat-shaded from the FS perspective */
flat_mask |= VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT;
/* The internal cull distance slots are always linearly-interpolated */
linear_mask |= BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE, 2);
assert(!(flat_mask & linear_mask));
/* TODO: Link FP16 varyings */
unsigned num_32_smooth = 0, num_32_flat = 0, num_32_linear = 0;
struct {
uint32_t *num;
uint64_t mask;
} parts[] = {
{&num_32_smooth, ~flat_mask & ~linear_mask},
{&num_32_flat, flat_mask},
{&num_32_linear, linear_mask},
};
unsigned base = layout->group_offs[UVS_VARYINGS];
for (unsigned p = 0; p < ARRAY_SIZE(parts); ++p) {
u_foreach_bit64(loc, parts[p].mask & layout->written) {
assert(loc < ARRAY_SIZE(varyings->slots));
varyings->slots[loc] = base;
base += layout->components[loc];
(*parts[p].num) += layout->components[loc];
}
}
agx_pack(&varyings->counts_32, VARYING_COUNTS, cfg) {
cfg.smooth = num_32_smooth;
cfg.flat = num_32_flat;
cfg.linear = num_32_linear;
}
agx_pack(&varyings->counts_16, VARYING_COUNTS, cfg) {
cfg.smooth = 0;
cfg.flat = 0;
cfg.linear = 0;
}
}

View file

@ -6,6 +6,9 @@
#pragma once
#include <stdbool.h>
#include <stdint.h>
#include "agx_pack.h"
#include "shader_enums.h"
struct nir_shader;
struct nir_instr;

79
src/asahi/lib/agx_uvs.h Normal file
View file

@ -0,0 +1,79 @@
/*
* Copyright 2024 Valve Corporation
* SPDX-License-Identifier: MIT
*/
#pragma once
#include <stdint.h>
#include "agx_pack.h"
#include "shader_enums.h"
struct nir_shader;
/* Matches the hardware order */
enum uvs_group {
UVS_POSITION,
UVS_VARYINGS,
UVS_PSIZ,
UVS_LAYER_VIEWPORT,
UVS_CLIP_DIST,
UVS_NUM_GROUP,
};
/**
* Represents an "unlinked" UVS layout. This is computable from an unlinked
* vertex shader without knowing the associated fragment shader. The various UVS
* groups have fixed offsets, but the varyings within the varying group have
* indeterminate order since we don't yet know the fragment shader interpolation
* qualifiers.
*/
struct agx_unlinked_uvs_layout {
/* Offset of each group in the UVS in words. */
uint8_t group_offs[UVS_NUM_GROUP];
/* Size of the UVS allocation in words. >= last group_offs element */
uint8_t size;
/* Size of the UVS_VARYINGS */
uint8_t user_size;
/* Number of 32-bit components written for each slot. TODO: Model 16-bit.
*
* Invariant: sum_{slot} (components[slot]) =
* group_offs[PSIZ] - group_offs[VARYINGS]
*/
uint8_t components[VARYING_SLOT_MAX];
/* Bit i set <===> components[i] != 0 && i != POS && i != PSIZ. For fast
* iteration of user varyings.
*/
uint64_t written;
/* Fully packed data structure */
struct agx_vdm_state_vertex_outputs_packed vdm;
/* Partial data structure, must be merged with FS selects */
struct agx_output_select_packed osel;
};
bool agx_nir_lower_uvs(struct nir_shader *s,
struct agx_unlinked_uvs_layout *layout);
/**
* Represents a linked UVS layout.
*/
struct agx_varyings_vs {
/* Associated linked hardware data structures */
struct agx_varying_counts_packed counts_32, counts_16;
/* If the user varying slot is written, this is the base index that the first
* component of the slot is written to. The next components are found in the
* next indices. Otherwise 0, aliasing position.
*/
unsigned slots[VARYING_SLOT_MAX];
};
void agx_assign_uvs(struct agx_varyings_vs *varyings,
struct agx_unlinked_uvs_layout *layout, uint64_t flat_mask,
uint64_t linear_mask);

View file

@ -20,6 +20,7 @@ libasahi_lib_files = files(
'agx_nir_lower_tess.c',
'agx_nir_lower_texture.c',
'agx_nir_lower_tilebuffer.c',
'agx_nir_lower_uvs.c',
'agx_nir_lower_vbo.c',
'agx_nir_predicate_layer_id.c',
'agx_ppp.h',

View file

@ -63,6 +63,7 @@ write_shader(struct blob *blob, const struct agx_compiled_shader *binary,
blob_write_uint32(blob, shader_size);
blob_write_bytes(blob, binary->bo->ptr.cpu, shader_size);
blob_write_bytes(blob, &binary->info, sizeof(binary->info));
blob_write_bytes(blob, &binary->uvs, sizeof(binary->uvs));
blob_write_uint32(blob, binary->push_range_count);
blob_write_bytes(blob, binary->push,
sizeof(binary->push[0]) * binary->push_range_count);
@ -96,6 +97,7 @@ read_shader(struct agx_screen *screen, struct blob_reader *blob,
blob_copy_bytes(blob, binary->bo->ptr.cpu, binary_size);
blob_copy_bytes(blob, &binary->info, sizeof(binary->info));
blob_copy_bytes(blob, &binary->uvs, sizeof(binary->uvs));
binary->push_range_count = blob_read_uint32(blob);
blob_copy_bytes(blob, binary->push,
sizeof(binary->push[0]) * binary->push_range_count);

View file

@ -189,6 +189,9 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
return load_sysval_root(b, 1, 16, &u->sprite_mask);
case nir_intrinsic_load_clip_z_coeff_agx:
return nir_f2f32(b, load_sysval_root(b, 1, 16, &u->clip_z_coeff));
case nir_intrinsic_load_uvs_index_agx:
return load_sysval_root(
b, 1, 16, &u->uvs_index[nir_intrinsic_io_semantics(intr).location]);
case nir_intrinsic_load_polygon_stipple_agx: {
nir_def *base = load_sysval_root(b, 1, 64, &u->polygon_stipple);
nir_def *row = intr->src[0].ssa;

View file

@ -1521,59 +1521,10 @@ asahi_cs_shader_key_equal(const void *a, const void *b)
return true;
}
static unsigned
agx_find_linked_slot(struct agx_varyings_vs *vs, struct agx_varyings_fs *fs,
gl_varying_slot slot, unsigned offset)
{
assert(offset < 4);
assert(slot != VARYING_SLOT_PNTC && "point coords aren't linked");
if (slot == VARYING_SLOT_POS) {
if (offset == 3) {
return 0; /* W */
} else if (offset == 2) {
assert(fs->reads_z);
return 1; /* Z */
} else {
unreachable("gl_Position.xy are not varyings");
}
}
unsigned vs_index = vs->slots[slot];
/* Varyings not written by vertex shader are undefined but we can't crash */
if (!(vs_index < vs->nr_index))
return 0;
assert(vs_index >= 4 && "gl_Position should have been the first 4 slots");
assert((vs_index < vs->base_index_fp16) ==
((vs_index + offset) < vs->base_index_fp16) &&
"a given varying must have a consistent type");
unsigned vs_user_index = (vs_index + offset) - 4;
if (fs->reads_z)
return vs_user_index + 2;
else
return vs_user_index + 1;
}
static unsigned
agx_num_general_outputs(struct agx_varyings_vs *vs)
{
unsigned nr_vs = vs->nr_index;
bool writes_psiz = vs->slots[VARYING_SLOT_PSIZ] < nr_vs;
assert(nr_vs >= 4 && "gl_Position must be written");
if (writes_psiz)
assert(nr_vs >= 5 && "gl_PointSize is written");
return nr_vs - (writes_psiz ? 5 : 4);
}
static uint32_t
agx_link_varyings_vs_fs(struct agx_pool *pool, struct agx_varyings_vs *vs,
struct agx_varyings_fs *fs, bool first_provoking_vertex,
unsigned nr_user_indices, struct agx_varyings_fs *fs,
bool first_provoking_vertex,
uint8_t sprite_coord_enable,
bool *generate_primitive_id)
{
@ -1586,11 +1537,14 @@ agx_link_varyings_vs_fs(struct agx_pool *pool, struct agx_varyings_vs *vs,
size_t linkage_size =
AGX_CF_BINDING_HEADER_LENGTH + (fs->nr_bindings * AGX_CF_BINDING_LENGTH);
void *tmp = alloca(linkage_size);
struct agx_cf_binding_header_packed *header = tmp;
struct agx_ptr t = agx_pool_alloc_aligned(pool, linkage_size, 256);
assert(t.gpu < (1ull << 32) && "varyings must be in low memory");
struct agx_cf_binding_header_packed *header = t.cpu;
struct agx_cf_binding_packed *bindings = (void *)(header + 1);
unsigned nr_slots = agx_num_general_outputs(vs) + 1 + (fs->reads_z ? 1 : 0);
unsigned user_base = 1 + (fs->reads_z ? 1 : 0);
unsigned nr_slots = user_base + nr_user_indices;
agx_pack(header, CF_BINDING_HEADER, cfg) {
cfg.number_of_32_bit_slots = nr_slots;
@ -1598,35 +1552,45 @@ agx_link_varyings_vs_fs(struct agx_pool *pool, struct agx_varyings_vs *vs,
}
for (unsigned i = 0; i < fs->nr_bindings; ++i) {
struct agx_cf_binding b = fs->bindings[i];
agx_pack(bindings + i, CF_BINDING, cfg) {
cfg.base_coefficient_register = fs->bindings[i].cf_base;
cfg.components = fs->bindings[i].count;
cfg.base_coefficient_register = b.cf_base;
cfg.components = b.count;
cfg.shade_model =
agx_translate_shade_model(fs, i, first_provoking_vertex);
if (util_varying_is_point_coord(fs->bindings[i].slot,
sprite_coord_enable)) {
assert(fs->bindings[i].offset == 0);
if (util_varying_is_point_coord(b.slot, sprite_coord_enable)) {
assert(b.offset == 0);
cfg.source = AGX_COEFFICIENT_SOURCE_POINT_COORD;
} else if (fs->bindings[i].slot == VARYING_SLOT_PRIMITIVE_ID &&
vs->slots[VARYING_SLOT_PRIMITIVE_ID] == ~0) {
} else if (b.slot == VARYING_SLOT_PRIMITIVE_ID &&
!vs->slots[VARYING_SLOT_PRIMITIVE_ID]) {
cfg.source = AGX_COEFFICIENT_SOURCE_PRIMITIVE_ID;
*generate_primitive_id = true;
} else {
cfg.base_slot = agx_find_linked_slot(vs, fs, fs->bindings[i].slot,
fs->bindings[i].offset);
} else if (b.slot == VARYING_SLOT_POS) {
assert(b.offset >= 2 && "gl_Position.xy are not varyings");
assert(fs->reads_z || b.offset != 2);
assert(cfg.base_slot + cfg.components <=
MAX2(nr_slots, cfg.components) &&
"overflow slots");
}
if (fs->bindings[i].slot == VARYING_SLOT_POS) {
if (fs->bindings[i].offset == 2) {
if (b.offset == 2) {
cfg.source = AGX_COEFFICIENT_SOURCE_FRAGCOORD_Z;
cfg.base_slot = 1;
} else {
assert(!fs->bindings[i].perspective &&
"W must not be perspective divided");
assert(!b.perspective && "W must not be perspective divided");
}
} else {
unsigned vs_index = vs->slots[b.slot];
assert(b.offset < 4);
/* Varyings not written by vertex shader are undefined but we can't
* crash */
if (vs_index) {
assert(vs_index >= 4 &&
"gl_Position should have been the first 4 slots");
cfg.base_slot = user_base + (vs_index - 4) + b.offset;
assert(cfg.base_slot + cfg.components <= nr_slots &&
"overflow slots");
}
}
@ -1635,16 +1599,7 @@ agx_link_varyings_vs_fs(struct agx_pool *pool, struct agx_varyings_vs *vs,
}
}
struct agx_ptr ptr = agx_pool_alloc_aligned(pool, (3 * linkage_size), 256);
assert(ptr.gpu < (1ull << 32) && "varyings must be in low memory");
/* I don't understand why the data structures are repeated thrice */
for (unsigned i = 0; i < 3; ++i) {
memcpy(((uint8_t *)ptr.cpu) + (i * linkage_size), (uint8_t *)tmp,
linkage_size);
}
return ptr.gpu;
return t.gpu;
}
/* Dynamic lowered I/O version of nir_lower_clip_halfz */
@ -1859,6 +1814,7 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
perf_debug(dev, "Compiling shader variant #%u",
_mesa_hash_table_num_entries(so->variants));
struct agx_unlinked_uvs_layout uvs = {0};
bool force_translucent = false;
if (nir->info.stage == MESA_SHADER_VERTEX) {
@ -1871,6 +1827,7 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
key->next.hw.fixed_point_size);
NIR_PASS(_, nir, nir_shader_intrinsics_pass, agx_nir_lower_clip_m1_1,
nir_metadata_block_index | nir_metadata_dominance, NULL);
NIR_PASS(_, nir, agx_nir_lower_uvs, &uvs);
} else {
NIR_PASS(_, nir, agx_nir_lower_sysvals, PIPE_SHADER_VERTEX, false);
NIR_PASS(_, nir, agx_nir_lower_vs_before_gs, dev->libagx,
@ -1993,21 +1950,11 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
struct agx_shader_key base_key = {0};
if (nir->info.stage == MESA_SHADER_VERTEX) {
struct asahi_vs_shader_key *key = &key_->vs;
if (key->hw) {
base_key.vs.outputs_flat_shaded = key_->vs.next.hw.outputs_flat_shaded;
base_key.vs.outputs_linear_shaded =
key_->vs.next.hw.outputs_linear_shaded;
}
}
struct agx_compiled_shader *compiled =
agx_compile_nir(dev, nir, &base_key, debug, so->type);
compiled->so = so;
compiled->uvs = uvs;
/* reads_tib => Translucent pass type */
compiled->info.reads_tib |= force_translucent;
@ -2039,13 +1986,14 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
NIR_PASS(_, gs_copy, nir_shader_intrinsics_pass, agx_nir_lower_clip_m1_1,
nir_metadata_block_index | nir_metadata_dominance, NULL);
base_key.vs.outputs_flat_shaded = key->outputs_flat_shaded;
base_key.vs.outputs_linear_shaded = key->outputs_linear_shaded;
struct agx_unlinked_uvs_layout uvs = {0};
NIR_PASS(_, gs_copy, agx_nir_lower_uvs, &uvs);
compiled->gs_copy =
agx_compile_nir(dev, gs_copy, &base_key, debug, PIPE_SHADER_GEOMETRY);
compiled->gs_copy->so = so;
compiled->gs_copy->stage = so->type;
compiled->gs_copy->uvs = uvs;
}
compiled->gs_output_mode = gs_out_prim;
@ -2427,10 +2375,9 @@ agx_update_vs(struct agx_context *ctx, unsigned index_size_B)
*
* vb_mask, attributes, vertex_buffers: VERTEX
* point_size_per_vertex: RS
* outputs_{flat,linear}_shaded: FS_PROG
*/
if (!((ctx->dirty & (AGX_DIRTY_VS_PROG | AGX_DIRTY_VERTEX | AGX_DIRTY_XFB |
AGX_DIRTY_FS_PROG | AGX_DIRTY_RS | AGX_DIRTY_PRIM)) ||
AGX_DIRTY_RS | AGX_DIRTY_PRIM)) ||
ctx->stage[PIPE_SHADER_TESS_EVAL].dirty ||
ctx->stage[PIPE_SHADER_GEOMETRY].dirty ||
ctx->stage[PIPE_SHADER_TESS_EVAL].shader ||
@ -2451,11 +2398,6 @@ agx_update_vs(struct agx_context *ctx, unsigned index_size_B)
*/
key.next.hw.fixed_point_size = !ctx->rast->base.point_size_per_vertex &&
rasterized_prim == MESA_PRIM_POINTS;
key.next.hw.outputs_flat_shaded =
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_flat_shaded;
key.next.hw.outputs_linear_shaded =
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_linear_shaded;
} else {
key.next.sw.index_size_B = index_size_B;
}
@ -2511,10 +2453,6 @@ agx_update_gs(struct agx_context *ctx, const struct pipe_draw_info *info,
/* TODO: Deduplicate */
.fixed_point_size = !ctx->rast->base.point_size_per_vertex &&
rasterized_prim == MESA_PRIM_POINTS,
.outputs_flat_shaded =
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_flat_shaded,
.outputs_linear_shaded =
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_linear_shaded,
};
return agx_update_shader(ctx, &ctx->gs, PIPE_SHADER_GEOMETRY,
@ -3564,8 +3502,9 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out)
if (IS_DIRTY(VS_PROG) || IS_DIRTY(FS_PROG) || IS_DIRTY(RS) ||
IS_DIRTY(PRIM)) {
batch->varyings = agx_link_varyings_vs_fs(
&batch->pipeline_pool, &vs->info.varyings.vs,
&batch->pipeline_pool, &batch->linked_varyings, vs->uvs.user_size,
&ctx->fs->info.varyings.fs, ctx->rast->base.flatshade_first,
(batch->reduced_prim == MESA_PRIM_POINTS)
? ctx->rast->base.sprite_coord_enable
@ -3596,10 +3535,7 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out)
cfg.pipeline = agx_build_pipeline(batch, vs, PIPE_SHADER_VERTEX, 0, 0);
}
agx_push(out, VDM_STATE_VERTEX_OUTPUTS, cfg) {
cfg.output_count_1 = vs->info.varyings.vs.nr_index;
cfg.output_count_2 = cfg.output_count_1;
}
agx_push_packed(out, vs->uvs.vdm, VDM_STATE_VERTEX_OUTPUTS);
agx_push(out, VDM_STATE_VERTEX_UNKNOWN, cfg) {
cfg.flat_shading_control = ctx->rast->base.flatshade_first
@ -3654,9 +3590,9 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out)
.fragment_back_face = fragment_face_dirty,
.fragment_back_face_2 = object_type_dirty || IS_DIRTY(FS_PROG),
.fragment_back_stencil = IS_DIRTY(ZS),
.output_select = IS_DIRTY(VS_PROG) || IS_DIRTY(FS_PROG),
.varying_counts_32 = IS_DIRTY(VS_PROG),
.varying_counts_16 = IS_DIRTY(VS_PROG),
.output_select = varyings_dirty,
.varying_counts_32 = varyings_dirty,
.varying_counts_16 = varyings_dirty,
.cull = IS_DIRTY(RS),
.cull_2 = varyings_dirty,
.fragment_shader =
@ -3742,40 +3678,24 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out)
if (dirty.fragment_back_stencil)
agx_ppp_push_packed(&ppp, ctx->zs->back_stencil.opaque, FRAGMENT_STENCIL);
if (dirty.output_select) {
agx_ppp_push(&ppp, OUTPUT_SELECT, cfg) {
cfg.varyings = !!fs->info.varyings.fs.nr_bindings;
cfg.point_size = vs->info.writes_psiz;
cfg.viewport_target = vs->info.writes_layer_viewport;
cfg.render_target = vs->info.writes_layer_viewport;
cfg.frag_coord_z = fs->info.varyings.fs.reads_z;
cfg.clip_distance_plane_0 = vs->info.varyings.vs.nr_clip_dists > 0;
cfg.clip_distance_plane_1 = vs->info.varyings.vs.nr_clip_dists > 1;
cfg.clip_distance_plane_2 = vs->info.varyings.vs.nr_clip_dists > 2;
cfg.clip_distance_plane_3 = vs->info.varyings.vs.nr_clip_dists > 3;
cfg.clip_distance_plane_4 = vs->info.varyings.vs.nr_clip_dists > 4;
cfg.clip_distance_plane_5 = vs->info.varyings.vs.nr_clip_dists > 5;
cfg.clip_distance_plane_6 = vs->info.varyings.vs.nr_clip_dists > 6;
cfg.clip_distance_plane_7 = vs->info.varyings.vs.nr_clip_dists > 7;
assert(cfg.point_size || !is_points);
}
}
assert(dirty.varying_counts_32 == dirty.varying_counts_16);
assert(dirty.varying_counts_32 == dirty.output_select);
if (dirty.varying_counts_32) {
agx_ppp_push(&ppp, VARYING_COUNTS, cfg) {
cfg.smooth = vs->info.varyings.vs.num_32_smooth;
cfg.flat = vs->info.varyings.vs.num_32_flat;
cfg.linear = vs->info.varyings.vs.num_32_linear;
if (dirty.output_select) {
struct agx_output_select_packed osel;
agx_pack(&osel, OUTPUT_SELECT, cfg) {
cfg.varyings = !!fs->info.varyings.fs.nr_bindings;
cfg.frag_coord_z = fs->info.varyings.fs.reads_z;
}
agx_ppp_push(&ppp, VARYING_COUNTS, cfg) {
cfg.smooth = vs->info.varyings.vs.num_16_smooth;
cfg.flat = vs->info.varyings.vs.num_16_flat;
cfg.linear = vs->info.varyings.vs.num_16_linear;
}
agx_merge(osel, vs->uvs.osel, OUTPUT_SELECT);
agx_ppp_push_packed(&ppp, &osel, OUTPUT_SELECT);
agx_ppp_push_packed(&ppp, &batch->linked_varyings.counts_32,
VARYING_COUNTS);
agx_ppp_push_packed(&ppp, &batch->linked_varyings.counts_16,
VARYING_COUNTS);
}
if (dirty.cull)
@ -3817,7 +3737,7 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out)
if (dirty.output_size) {
agx_ppp_push(&ppp, OUTPUT_SIZE, cfg)
cfg.count = vs->info.varyings.vs.nr_index;
cfg.count = vs->uvs.size;
}
agx_ppp_fini(&out, &ppp);
@ -5061,6 +4981,21 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
agx_batch_add_bo(batch, ctx->gs->gs_copy->bo);
}
if (ctx->dirty & (AGX_DIRTY_VS_PROG | AGX_DIRTY_FS_PROG)) {
struct agx_compiled_shader *vs = ctx->vs;
if (ctx->gs)
vs = ctx->gs->gs_copy;
agx_assign_uvs(
&batch->linked_varyings, &vs->uvs,
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_flat_shaded,
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_linear_shaded);
for (unsigned i = 0; i < VARYING_SLOT_MAX; ++i) {
batch->uniforms.uvs_index[i] = batch->linked_varyings.slots[i];
}
}
/* Set draw ID */
if (ctx->vs->info.uses_draw_id) {
batch->uniforms.draw_id = drawid_offset;

View file

@ -14,6 +14,7 @@
#include "asahi/lib/agx_nir_lower_vbo.h"
#include "asahi/lib/agx_scratch.h"
#include "asahi/lib/agx_tilebuffer.h"
#include "asahi/lib/agx_uvs.h"
#include "asahi/lib/pool.h"
#include "asahi/lib/shaders/geometry.h"
#include "compiler/nir/nir_lower_blend.h"
@ -29,6 +30,7 @@
#include "util/u_range.h"
#include "agx_helpers.h"
#include "agx_meta.h"
#include "agx_nir_passes.h"
#ifdef __GLIBC__
#include <errno.h>
@ -162,6 +164,11 @@ struct PACKED agx_draw_uniforms {
/* Zero for [0, 1] clipping, 0.5 for [-1, 1] clipping. */
uint16_t clip_z_coeff;
/* Mapping from varying slots written by the last vertex stage to UVS
* indices. This mapping must be compatible with the fragment shader.
*/
uint16_t uvs_index[VARYING_SLOT_MAX];
};
struct PACKED agx_stage_uniforms {
@ -221,6 +228,9 @@ struct agx_compiled_shader {
unsigned push_range_count;
struct agx_push_range push[AGX_MAX_PUSH_RANGES];
/* UVS layout for the last vertex stage */
struct agx_unlinked_uvs_layout uvs;
/* Auxiliary programs, or NULL if not used */
struct agx_compiled_shader *gs_count, *pre_gs;
struct agx_compiled_shader *gs_copy;
@ -366,6 +376,7 @@ struct agx_batch {
/* Current varyings linkage structures */
uint32_t varyings;
struct agx_varyings_vs linked_varyings;
struct agx_draw_uniforms uniforms;
struct agx_stage_uniforms stage_uniforms[PIPE_SHADER_TYPES];
@ -478,8 +489,6 @@ struct asahi_vs_shader_key {
struct {
bool fixed_point_size;
uint64_t outputs_flat_shaded;
uint64_t outputs_linear_shaded;
} hw;
} next;
};
@ -512,15 +521,13 @@ struct asahi_fs_shader_key {
struct asahi_gs_shader_key {
/* Rasterizer shader key */
uint64_t outputs_flat_shaded;
uint64_t outputs_linear_shaded;
bool fixed_point_size;
/* If true, this GS is run only for its side effects (including XFB) */
bool rasterizer_discard;
bool padding[6];
};
static_assert(sizeof(struct asahi_gs_shader_key) == 24, "no holes");
static_assert(sizeof(struct asahi_gs_shader_key) == 8, "no holes");
union asahi_shader_key {
struct asahi_vs_shader_key vs;