mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-17 16:50:34 +01:00
asahi: rewrite varying linking
Lower store_output to store_uvs_agx + math. Link UVS indices at draw-time instead of compile-time to get efficient separate shaders. Also picks up varying compaction along the way. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28483>
This commit is contained in:
parent
3764adbef1
commit
f0e1ccc8d4
13 changed files with 436 additions and 474 deletions
|
|
@ -578,39 +578,6 @@ agx_emit_load_vary(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
|
|||
agx_emit_cached_split(b, dest, components);
|
||||
}
|
||||
|
||||
static agx_instr *
|
||||
agx_emit_store_vary(agx_builder *b, nir_intrinsic_instr *instr)
|
||||
{
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
|
||||
nir_src *offset = nir_get_io_offset_src(instr);
|
||||
assert(nir_src_is_const(*offset) && "todo: indirects");
|
||||
|
||||
unsigned imm_index = b->shader->out->varyings.vs.slots[sem.location];
|
||||
|
||||
if (sem.location == VARYING_SLOT_LAYER ||
|
||||
sem.location == VARYING_SLOT_CLIP_DIST0) {
|
||||
/* Separate slots used for the sysval vs the varying. The default slot
|
||||
* above is for the varying. Change for the sysval.
|
||||
*/
|
||||
assert(sem.no_sysval_output || sem.no_varying);
|
||||
|
||||
if (sem.no_varying) {
|
||||
imm_index = sem.location == VARYING_SLOT_LAYER
|
||||
? b->shader->out->varyings.vs.layer_viewport_slot
|
||||
: b->shader->out->varyings.vs.clip_dist_slot;
|
||||
}
|
||||
}
|
||||
|
||||
assert(imm_index < ~0);
|
||||
imm_index += (nir_src_as_uint(*offset) * 4) + nir_intrinsic_component(instr);
|
||||
|
||||
/* nir_lower_io_to_scalar */
|
||||
assert(nir_intrinsic_write_mask(instr) == 0x1);
|
||||
|
||||
return agx_st_vary(b, agx_immediate(imm_index),
|
||||
agx_src_index(&instr->src[0]));
|
||||
}
|
||||
|
||||
static agx_instr *
|
||||
agx_emit_local_store_pixel(agx_builder *b, nir_intrinsic_instr *instr)
|
||||
{
|
||||
|
|
@ -1210,9 +1177,10 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
|
|||
agx_emit_load(b, dst, instr);
|
||||
return NULL;
|
||||
|
||||
case nir_intrinsic_store_output:
|
||||
case nir_intrinsic_store_uvs_agx:
|
||||
assert(stage == MESA_SHADER_VERTEX);
|
||||
return agx_emit_store_vary(b, instr);
|
||||
return agx_st_vary(b, agx_src_index(&instr->src[1]),
|
||||
agx_src_index(&instr->src[0]));
|
||||
|
||||
case nir_intrinsic_store_agx:
|
||||
agx_emit_store(b, instr);
|
||||
|
|
@ -2667,96 +2635,6 @@ agx_optimize_nir(nir_shader *nir, unsigned *preamble_size)
|
|||
NIR_PASS(_, nir, nir_lower_phis_to_scalar, true);
|
||||
}
|
||||
|
||||
/* ABI: position first, then user, then psiz */
|
||||
static void
|
||||
agx_remap_varyings_vs(nir_shader *nir, struct agx_varyings_vs *varyings,
|
||||
struct agx_shader_key *key)
|
||||
{
|
||||
unsigned base = 0;
|
||||
|
||||
/* Initialize to "nothing is written" */
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(varyings->slots); ++i)
|
||||
varyings->slots[i] = ~0;
|
||||
|
||||
/* gl_Position is implicitly written, although it may validly be absent in
|
||||
* vertex programs run only for transform feedback. Those ignore their
|
||||
* varyings so it doesn't matter what we do here as long as we don't fail.
|
||||
*/
|
||||
varyings->slots[VARYING_SLOT_POS] = base;
|
||||
base += 4;
|
||||
|
||||
/* These are always flat-shaded from the FS perspective */
|
||||
key->vs.outputs_flat_shaded |= VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT;
|
||||
|
||||
/* The internal cull distance slots are always linearly-interpolated */
|
||||
key->vs.outputs_linear_shaded |=
|
||||
BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE, 2);
|
||||
|
||||
assert(!(key->vs.outputs_flat_shaded & key->vs.outputs_linear_shaded));
|
||||
|
||||
/* Smooth 32-bit user bindings go next */
|
||||
u_foreach_bit64(loc, nir->info.outputs_written &
|
||||
~key->vs.outputs_flat_shaded &
|
||||
~key->vs.outputs_linear_shaded) {
|
||||
if (loc == VARYING_SLOT_POS || loc == VARYING_SLOT_PSIZ)
|
||||
continue;
|
||||
|
||||
assert(loc < ARRAY_SIZE(varyings->slots));
|
||||
varyings->slots[loc] = base;
|
||||
base += 4;
|
||||
varyings->num_32_smooth += 4;
|
||||
}
|
||||
|
||||
/* Flat 32-bit user bindings go next */
|
||||
u_foreach_bit64(loc,
|
||||
nir->info.outputs_written & key->vs.outputs_flat_shaded) {
|
||||
if (loc == VARYING_SLOT_POS || loc == VARYING_SLOT_PSIZ)
|
||||
continue;
|
||||
|
||||
assert(loc < ARRAY_SIZE(varyings->slots));
|
||||
varyings->slots[loc] = base;
|
||||
base += 4;
|
||||
varyings->num_32_flat += 4;
|
||||
}
|
||||
|
||||
/* Linear 32-bit user bindings go next */
|
||||
u_foreach_bit64(loc,
|
||||
nir->info.outputs_written & key->vs.outputs_linear_shaded) {
|
||||
if (loc == VARYING_SLOT_POS || loc == VARYING_SLOT_PSIZ)
|
||||
continue;
|
||||
|
||||
assert(loc < ARRAY_SIZE(varyings->slots));
|
||||
varyings->slots[loc] = base;
|
||||
base += 4;
|
||||
varyings->num_32_linear += 4;
|
||||
}
|
||||
|
||||
/* TODO: Link FP16 varyings */
|
||||
varyings->base_index_fp16 = base;
|
||||
varyings->num_16_smooth = 0;
|
||||
varyings->num_16_flat = 0;
|
||||
varyings->num_16_linear = 0;
|
||||
|
||||
if (nir->info.outputs_written & VARYING_BIT_PSIZ) {
|
||||
varyings->slots[VARYING_SLOT_PSIZ] = base;
|
||||
base += 1;
|
||||
}
|
||||
|
||||
if (nir->info.outputs_written & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT)) {
|
||||
varyings->layer_viewport_slot = base;
|
||||
base += 1;
|
||||
}
|
||||
|
||||
if (nir->info.outputs_written & VARYING_BIT_CLIP_DIST0) {
|
||||
varyings->clip_dist_slot = base;
|
||||
varyings->nr_clip_dists = nir->info.clip_distance_array_size;
|
||||
base += varyings->nr_clip_dists;
|
||||
}
|
||||
|
||||
/* All varyings linked now */
|
||||
varyings->nr_index = base;
|
||||
}
|
||||
|
||||
/*
|
||||
* Varyings that are used as texture coordinates should be kept at fp32, because
|
||||
* fp16 does not have enough precision for large textures. It's technically
|
||||
|
|
@ -3188,10 +3066,6 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
|
|||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
out->tag_write_disable = !nir->info.writes_memory;
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX &&
|
||||
(nir->info.outputs_written & VARYING_BIT_CLIP_DIST0))
|
||||
NIR_PASS(_, nir, agx_nir_lower_clip_distance);
|
||||
|
||||
bool needs_libagx = true /* TODO: Optimize */;
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
|
|
@ -3238,19 +3112,6 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
|
|||
/* Late VBO lowering creates constant udiv instructions */
|
||||
NIR_PASS(_, nir, nir_opt_idiv_const, 16);
|
||||
|
||||
/* Varying output is scalar, other I/O is vector. Lowered late because
|
||||
* transform feedback programs will use vector output.
|
||||
*/
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
NIR_PASS(_, nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
|
||||
|
||||
if (nir->info.outputs_written &
|
||||
(VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT)) {
|
||||
|
||||
NIR_PASS(_, nir, agx_nir_lower_layer);
|
||||
}
|
||||
}
|
||||
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower_load_from_texture_handle,
|
||||
nir_metadata_block_index | nir_metadata_dominance, NULL);
|
||||
|
|
@ -3258,10 +3119,7 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
|
|||
out->push_count = key->reserved_preamble;
|
||||
agx_optimize_nir(nir, &out->push_count);
|
||||
|
||||
/* Must be last since NIR passes can remap driver_location freely */
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX)
|
||||
agx_remap_varyings_vs(nir, &out->varyings.vs, key);
|
||||
else if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
assign_coefficient_regs(nir, &out->varyings.fs);
|
||||
|
||||
if (agx_should_dump(nir, AGX_DBG_SHADERS))
|
||||
|
|
@ -3284,9 +3142,6 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
|
|||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
out->writes_psiz =
|
||||
nir->info.outputs_written & BITFIELD_BIT(VARYING_SLOT_PSIZ);
|
||||
|
||||
out->nonzero_viewport = nir->info.outputs_written & VARYING_BIT_VIEWPORT;
|
||||
|
||||
out->writes_layer_viewport =
|
||||
|
|
|
|||
|
|
@ -9,50 +9,6 @@
|
|||
#include "util/u_dynarray.h"
|
||||
#include "shader_enums.h"
|
||||
|
||||
struct agx_varyings_vs {
|
||||
/* The number of user varyings of each type. The varyings must be allocated
|
||||
* in this order ({smooth, flat, linear} × {32, 16}), which may require
|
||||
* remapping.
|
||||
*/
|
||||
unsigned num_32_smooth;
|
||||
unsigned num_32_flat;
|
||||
unsigned num_32_linear;
|
||||
unsigned num_16_smooth;
|
||||
unsigned num_16_flat;
|
||||
unsigned num_16_linear;
|
||||
|
||||
/* The first index used for FP16 varyings. Indices less than this are treated
|
||||
* as FP32. This may require remapping slots to guarantee.
|
||||
*/
|
||||
unsigned base_index_fp16;
|
||||
|
||||
/* The total number of vertex shader indices output. Must be at least
|
||||
* base_index_fp16.
|
||||
*/
|
||||
unsigned nr_index;
|
||||
|
||||
/* If the slot is written, this is the base index that the first component
|
||||
* of the slot is written to. The next components are found in the next
|
||||
* indices. If less than base_index_fp16, this is a 32-bit slot (with 4
|
||||
* indices for the 4 components), else this is a 16-bit slot (with 2
|
||||
* indices for the 4 components). This must be less than nr_index.
|
||||
*
|
||||
* If the slot is not written, this must be ~0.
|
||||
*/
|
||||
unsigned slots[VARYING_SLOT_MAX];
|
||||
|
||||
/* Slot for the combined layer/viewport 32-bit sysval output, or ~0 if none
|
||||
* is written. What's at slots[VARYING_SLOT_LAYER] is the varying output.
|
||||
*/
|
||||
unsigned layer_viewport_slot;
|
||||
|
||||
/* Base slot for the clip distance sysval outputs, or ~0 if none is written.
|
||||
* What's at slots[VARYING_SLOT_CLIP_DIST0] is the varying output.
|
||||
*/
|
||||
unsigned clip_dist_slot;
|
||||
unsigned nr_clip_dists;
|
||||
};
|
||||
|
||||
struct agx_cf_binding {
|
||||
/* Base coefficient register */
|
||||
unsigned cf_base;
|
||||
|
|
@ -96,7 +52,6 @@ struct agx_varyings_fs {
|
|||
};
|
||||
|
||||
union agx_varyings {
|
||||
struct agx_varyings_vs vs;
|
||||
struct agx_varyings_fs fs;
|
||||
};
|
||||
|
||||
|
|
@ -127,9 +82,6 @@ struct agx_shader_info {
|
|||
/* Does the shader read the tilebuffer? */
|
||||
bool reads_tib;
|
||||
|
||||
/* Does the shader write point size? */
|
||||
bool writes_psiz;
|
||||
|
||||
/* Does the shader potentially draw to a nonzero viewport? */
|
||||
bool nonzero_viewport;
|
||||
|
||||
|
|
@ -195,17 +147,6 @@ enum agx_format {
|
|||
AGX_NUM_FORMATS,
|
||||
};
|
||||
|
||||
struct agx_vs_shader_key {
|
||||
/* The GPU ABI requires all smooth shaded varyings to come first, then all
|
||||
* flat shaded varyings, then all linear shaded varyings, as written by the
|
||||
* VS. In order to correctly remap the varyings into the right order in the
|
||||
* VS, we need to propagate the mask of flat/linear shaded varyings into the
|
||||
* compiler.
|
||||
*/
|
||||
uint64_t outputs_flat_shaded;
|
||||
uint64_t outputs_linear_shaded;
|
||||
};
|
||||
|
||||
struct agx_fs_shader_key {
|
||||
/* Normally, access to the tilebuffer must be guarded by appropriate fencing
|
||||
* instructions to ensure correct results in the presence of out-of-order
|
||||
|
|
@ -246,7 +187,6 @@ struct agx_shader_key {
|
|||
bool promote_constants;
|
||||
|
||||
union {
|
||||
struct agx_vs_shader_key vs;
|
||||
struct agx_fs_shader_key fs;
|
||||
};
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,42 +0,0 @@
|
|||
/*
|
||||
* Copyright 2023 Valve Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "agx_nir.h"
|
||||
|
||||
static bool
|
||||
lower(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
|
||||
{
|
||||
if (intr->intrinsic != nir_intrinsic_store_output)
|
||||
return false;
|
||||
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
if (sem.location != VARYING_SLOT_CLIP_DIST0)
|
||||
return false;
|
||||
|
||||
nir_instr *clone = nir_instr_clone(b->shader, &intr->instr);
|
||||
nir_intrinsic_instr *lowered = nir_instr_as_intrinsic(clone);
|
||||
|
||||
b->cursor = nir_after_instr(&intr->instr);
|
||||
nir_builder_instr_insert(b, clone);
|
||||
|
||||
nir_io_semantics new_sem = sem;
|
||||
new_sem.no_varying = true;
|
||||
nir_intrinsic_set_io_semantics(lowered, new_sem);
|
||||
|
||||
sem.no_sysval_output = true;
|
||||
nir_intrinsic_set_io_semantics(intr, sem);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
agx_nir_lower_clip_distance(nir_shader *s)
|
||||
{
|
||||
assert(s->info.outputs_written & VARYING_BIT_CLIP_DIST0);
|
||||
|
||||
return nir_shader_intrinsics_pass(
|
||||
s, lower, nir_metadata_block_index | nir_metadata_dominance, NULL);
|
||||
}
|
||||
|
|
@ -1,70 +0,0 @@
|
|||
/*
|
||||
* Copyright 2023 Valve Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "agx_nir.h"
|
||||
|
||||
bool
|
||||
agx_nir_lower_layer(nir_shader *s)
|
||||
{
|
||||
assert(s->info.stage == MESA_SHADER_VERTEX);
|
||||
assert(s->info.outputs_written & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT));
|
||||
|
||||
/* Writes are in the last block, search */
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(s);
|
||||
nir_block *last = nir_impl_last_block(impl);
|
||||
|
||||
nir_def *layer = NULL, *viewport = NULL;
|
||||
nir_cursor last_cursor;
|
||||
|
||||
nir_foreach_instr(instr, last) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *store = nir_instr_as_intrinsic(instr);
|
||||
if (store->intrinsic != nir_intrinsic_store_output)
|
||||
continue;
|
||||
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(store);
|
||||
nir_def *value = store->src[0].ssa;
|
||||
|
||||
if (sem.location == VARYING_SLOT_LAYER) {
|
||||
assert(layer == NULL && "only written once");
|
||||
layer = value;
|
||||
} else if (sem.location == VARYING_SLOT_VIEWPORT) {
|
||||
assert(viewport == NULL && "only written once");
|
||||
viewport = value;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
last_cursor = nir_after_instr(&store->instr);
|
||||
|
||||
/* Leave the store as a varying-only, no sysval output */
|
||||
sem.no_sysval_output = true;
|
||||
nir_intrinsic_set_io_semantics(store, sem);
|
||||
}
|
||||
|
||||
assert((layer || viewport) && "metadata inconsistent with program");
|
||||
|
||||
/* Pack together and write out */
|
||||
nir_builder b = nir_builder_at(last_cursor);
|
||||
|
||||
nir_def *zero = nir_imm_intN_t(&b, 0, 16);
|
||||
nir_def *packed =
|
||||
nir_pack_32_2x16_split(&b, layer ? nir_u2u16(&b, layer) : zero,
|
||||
viewport ? nir_u2u16(&b, viewport) : zero);
|
||||
|
||||
/* Written with a sysval-only store, no varying output */
|
||||
nir_store_output(&b, packed, nir_imm_int(&b, 0),
|
||||
.io_semantics.location = VARYING_SLOT_LAYER,
|
||||
.io_semantics.num_slots = 1,
|
||||
.io_semantics.no_varying = true);
|
||||
|
||||
nir_metadata_preserve(impl,
|
||||
nir_metadata_dominance | nir_metadata_block_index);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -8,13 +8,11 @@ libasahi_agx_files = files(
|
|||
'agx_liveness.c',
|
||||
'agx_insert_waits.c',
|
||||
'agx_nir_lower_address.c',
|
||||
'agx_nir_lower_clip_distance.c',
|
||||
'agx_nir_lower_cull_distance.c',
|
||||
'agx_nir_lower_frag_sidefx.c',
|
||||
'agx_nir_lower_sample_mask.c',
|
||||
'agx_nir_lower_discard_zs_emit.c',
|
||||
'agx_nir_lower_interpolation.c',
|
||||
'agx_nir_lower_layer.c',
|
||||
'agx_nir_lower_shared_bitsize.c',
|
||||
'agx_nir_lower_subgroups.c',
|
||||
'agx_nir_opt_preamble.c',
|
||||
|
|
|
|||
251
src/asahi/lib/agx_nir_lower_uvs.c
Normal file
251
src/asahi/lib/agx_nir_lower_uvs.c
Normal file
|
|
@ -0,0 +1,251 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "util/bitscan.h"
|
||||
#include "util/macros.h"
|
||||
#include "agx_compile.h"
|
||||
#include "agx_pack.h"
|
||||
#include "agx_uvs.h"
|
||||
#include "nir_builder_opcodes.h"
|
||||
#include "nir_intrinsics.h"
|
||||
#include "nir_intrinsics_indices.h"
|
||||
#include "shader_enums.h"
|
||||
|
||||
struct ctx {
|
||||
nir_def *layer, *viewport;
|
||||
nir_cursor after_layer_viewport;
|
||||
struct agx_unlinked_uvs_layout *layout;
|
||||
};
|
||||
|
||||
static enum uvs_group
|
||||
group_for_varying(gl_varying_slot loc)
|
||||
{
|
||||
switch (loc) {
|
||||
case VARYING_SLOT_POS:
|
||||
return UVS_POSITION;
|
||||
case VARYING_SLOT_PSIZ:
|
||||
return UVS_PSIZ;
|
||||
default:
|
||||
return UVS_VARYINGS;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
lower(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
struct ctx *ctx = data;
|
||||
if (intr->intrinsic != nir_intrinsic_store_output)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_instr_remove(&intr->instr);
|
||||
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
unsigned component = nir_intrinsic_component(intr);
|
||||
|
||||
nir_def *value = intr->src[0].ssa;
|
||||
nir_def *offset = intr->src[1].ssa;
|
||||
|
||||
/* If there is only 1 user varying, it is at the base of the varying section.
|
||||
* This saves us an indirection on simple separate shaders.
|
||||
*/
|
||||
bool single_vary = util_is_power_of_two_nonzero64(ctx->layout->written);
|
||||
enum uvs_group group = group_for_varying(sem.location);
|
||||
|
||||
nir_def *base;
|
||||
if ((group == UVS_VARYINGS) && !single_vary)
|
||||
base = nir_load_uvs_index_agx(b, .io_semantics = sem);
|
||||
else
|
||||
base = nir_imm_intN_t(b, ctx->layout->group_offs[group], 16);
|
||||
|
||||
nir_def *index = nir_iadd(b, nir_iadd_imm(b, base, component),
|
||||
nir_imul_imm(b, nir_u2u16(b, offset), 4));
|
||||
|
||||
nir_intrinsic_instr *new_store = nir_store_uvs_agx(b, value, index);
|
||||
|
||||
/* Insert clip distance sysval writes, and gather layer/viewport writes so we
|
||||
* can accumulate their system value. These are still lowered like normal to
|
||||
* write them for the varying FS input.
|
||||
*/
|
||||
if (sem.location == VARYING_SLOT_LAYER) {
|
||||
assert(ctx->layer == NULL && "only written once");
|
||||
ctx->layer = value;
|
||||
ctx->after_layer_viewport = nir_after_instr(&new_store->instr);
|
||||
} else if (sem.location == VARYING_SLOT_VIEWPORT) {
|
||||
assert(ctx->viewport == NULL && "only written once");
|
||||
ctx->viewport = value;
|
||||
ctx->after_layer_viewport = nir_after_instr(&new_store->instr);
|
||||
} else if (sem.location == VARYING_SLOT_CLIP_DIST0) {
|
||||
unsigned clip_base = ctx->layout->group_offs[UVS_CLIP_DIST];
|
||||
nir_def *index = nir_iadd_imm(b, nir_imul_imm(b, nir_u2u16(b, offset), 4),
|
||||
clip_base + component);
|
||||
|
||||
nir_store_uvs_agx(b, value, index);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
write_layer_viewport_sysval(struct ctx *ctx)
|
||||
{
|
||||
nir_builder b = nir_builder_at(ctx->after_layer_viewport);
|
||||
|
||||
nir_def *zero = nir_imm_intN_t(&b, 0, 16);
|
||||
nir_def *layer = ctx->layer ? nir_u2u16(&b, ctx->layer) : zero;
|
||||
nir_def *viewport = ctx->viewport ? nir_u2u16(&b, ctx->viewport) : zero;
|
||||
|
||||
nir_store_uvs_agx(
|
||||
&b, nir_pack_32_2x16_split(&b, layer, viewport),
|
||||
nir_imm_int(&b, ctx->layout->group_offs[UVS_LAYER_VIEWPORT]));
|
||||
}
|
||||
|
||||
static bool
|
||||
gather_components(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
struct agx_unlinked_uvs_layout *layout = data;
|
||||
if (intr->intrinsic != nir_intrinsic_store_output)
|
||||
return false;
|
||||
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
unsigned component = nir_intrinsic_component(intr);
|
||||
|
||||
if (nir_src_is_const(intr->src[1])) {
|
||||
unsigned loc = sem.location + nir_src_as_uint(intr->src[1]);
|
||||
layout->components[loc] = MAX2(layout->components[loc], component + 1);
|
||||
} else {
|
||||
for (unsigned i = 0; i < sem.num_slots; ++i) {
|
||||
layout->components[sem.location + i] = 4;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
agx_nir_lower_uvs(nir_shader *s, struct agx_unlinked_uvs_layout *layout)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
/* Scalarize up front so we can ignore vectors later */
|
||||
NIR_PASS(progress, s, nir_lower_io_to_scalar, nir_var_shader_out, NULL,
|
||||
NULL);
|
||||
|
||||
/* Determine the unlinked UVS layout */
|
||||
NIR_PASS(progress, s, nir_shader_intrinsics_pass, gather_components,
|
||||
nir_metadata_block_index | nir_metadata_dominance, layout);
|
||||
|
||||
unsigned sizes[UVS_NUM_GROUP] = {
|
||||
[UVS_POSITION] = 4,
|
||||
[UVS_PSIZ] = !!(s->info.outputs_written & VARYING_BIT_PSIZ),
|
||||
[UVS_LAYER_VIEWPORT] = !!(s->info.outputs_written &
|
||||
(VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT)),
|
||||
[UVS_CLIP_DIST] = s->info.clip_distance_array_size,
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(layout->components); ++i) {
|
||||
if (i != VARYING_SLOT_POS && i != VARYING_SLOT_PSIZ &&
|
||||
layout->components[i]) {
|
||||
|
||||
layout->written |= BITFIELD64_BIT(i);
|
||||
sizes[UVS_VARYINGS] += layout->components[i];
|
||||
}
|
||||
}
|
||||
|
||||
unsigned offs = 0;
|
||||
for (enum uvs_group g = 0; g < UVS_NUM_GROUP; ++g) {
|
||||
layout->group_offs[g] = offs;
|
||||
offs += sizes[g];
|
||||
}
|
||||
|
||||
layout->size = offs;
|
||||
layout->user_size = sizes[UVS_VARYINGS];
|
||||
|
||||
/* Now lower in terms of the unlinked layout */
|
||||
struct ctx ctx = {.layout = layout};
|
||||
NIR_PASS(progress, s, nir_shader_intrinsics_pass, lower,
|
||||
nir_metadata_block_index | nir_metadata_dominance, &ctx);
|
||||
|
||||
if (ctx.layer || ctx.viewport) {
|
||||
write_layer_viewport_sysval(&ctx);
|
||||
}
|
||||
|
||||
/* Finally, pack what we can. It's much cheaper to do this at compile-time
|
||||
* than draw-time.
|
||||
*/
|
||||
agx_pack(&layout->osel, OUTPUT_SELECT, cfg) {
|
||||
cfg.point_size = sizes[UVS_PSIZ];
|
||||
cfg.viewport_target = sizes[UVS_LAYER_VIEWPORT];
|
||||
cfg.render_target = cfg.viewport_target;
|
||||
|
||||
cfg.clip_distance_plane_0 = sizes[UVS_CLIP_DIST] > 0;
|
||||
cfg.clip_distance_plane_1 = sizes[UVS_CLIP_DIST] > 1;
|
||||
cfg.clip_distance_plane_2 = sizes[UVS_CLIP_DIST] > 2;
|
||||
cfg.clip_distance_plane_3 = sizes[UVS_CLIP_DIST] > 3;
|
||||
cfg.clip_distance_plane_4 = sizes[UVS_CLIP_DIST] > 4;
|
||||
cfg.clip_distance_plane_5 = sizes[UVS_CLIP_DIST] > 5;
|
||||
cfg.clip_distance_plane_6 = sizes[UVS_CLIP_DIST] > 6;
|
||||
cfg.clip_distance_plane_7 = sizes[UVS_CLIP_DIST] > 7;
|
||||
}
|
||||
|
||||
agx_pack(&layout->vdm, VDM_STATE_VERTEX_OUTPUTS, cfg) {
|
||||
cfg.output_count_1 = offs;
|
||||
cfg.output_count_2 = offs;
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
void
|
||||
agx_assign_uvs(struct agx_varyings_vs *varyings,
|
||||
struct agx_unlinked_uvs_layout *layout, uint64_t flat_mask,
|
||||
uint64_t linear_mask)
|
||||
{
|
||||
*varyings = (struct agx_varyings_vs){0};
|
||||
|
||||
/* These are always flat-shaded from the FS perspective */
|
||||
flat_mask |= VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT;
|
||||
|
||||
/* The internal cull distance slots are always linearly-interpolated */
|
||||
linear_mask |= BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE, 2);
|
||||
|
||||
assert(!(flat_mask & linear_mask));
|
||||
|
||||
/* TODO: Link FP16 varyings */
|
||||
unsigned num_32_smooth = 0, num_32_flat = 0, num_32_linear = 0;
|
||||
struct {
|
||||
uint32_t *num;
|
||||
uint64_t mask;
|
||||
} parts[] = {
|
||||
{&num_32_smooth, ~flat_mask & ~linear_mask},
|
||||
{&num_32_flat, flat_mask},
|
||||
{&num_32_linear, linear_mask},
|
||||
};
|
||||
|
||||
unsigned base = layout->group_offs[UVS_VARYINGS];
|
||||
|
||||
for (unsigned p = 0; p < ARRAY_SIZE(parts); ++p) {
|
||||
u_foreach_bit64(loc, parts[p].mask & layout->written) {
|
||||
assert(loc < ARRAY_SIZE(varyings->slots));
|
||||
varyings->slots[loc] = base;
|
||||
|
||||
base += layout->components[loc];
|
||||
(*parts[p].num) += layout->components[loc];
|
||||
}
|
||||
}
|
||||
|
||||
agx_pack(&varyings->counts_32, VARYING_COUNTS, cfg) {
|
||||
cfg.smooth = num_32_smooth;
|
||||
cfg.flat = num_32_flat;
|
||||
cfg.linear = num_32_linear;
|
||||
}
|
||||
|
||||
agx_pack(&varyings->counts_16, VARYING_COUNTS, cfg) {
|
||||
cfg.smooth = 0;
|
||||
cfg.flat = 0;
|
||||
cfg.linear = 0;
|
||||
}
|
||||
}
|
||||
|
|
@ -6,6 +6,9 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include "agx_pack.h"
|
||||
#include "shader_enums.h"
|
||||
|
||||
struct nir_shader;
|
||||
struct nir_instr;
|
||||
|
|
|
|||
79
src/asahi/lib/agx_uvs.h
Normal file
79
src/asahi/lib/agx_uvs.h
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include "agx_pack.h"
|
||||
#include "shader_enums.h"
|
||||
|
||||
struct nir_shader;
|
||||
|
||||
/* Matches the hardware order */
|
||||
enum uvs_group {
|
||||
UVS_POSITION,
|
||||
UVS_VARYINGS,
|
||||
UVS_PSIZ,
|
||||
UVS_LAYER_VIEWPORT,
|
||||
UVS_CLIP_DIST,
|
||||
UVS_NUM_GROUP,
|
||||
};
|
||||
|
||||
/**
|
||||
* Represents an "unlinked" UVS layout. This is computable from an unlinked
|
||||
* vertex shader without knowing the associated fragment shader. The various UVS
|
||||
* groups have fixed offsets, but the varyings within the varying group have
|
||||
* indeterminate order since we don't yet know the fragment shader interpolation
|
||||
* qualifiers.
|
||||
*/
|
||||
struct agx_unlinked_uvs_layout {
|
||||
/* Offset of each group in the UVS in words. */
|
||||
uint8_t group_offs[UVS_NUM_GROUP];
|
||||
|
||||
/* Size of the UVS allocation in words. >= last group_offs element */
|
||||
uint8_t size;
|
||||
|
||||
/* Size of the UVS_VARYINGS */
|
||||
uint8_t user_size;
|
||||
|
||||
/* Number of 32-bit components written for each slot. TODO: Model 16-bit.
|
||||
*
|
||||
* Invariant: sum_{slot} (components[slot]) =
|
||||
* group_offs[PSIZ] - group_offs[VARYINGS]
|
||||
*/
|
||||
uint8_t components[VARYING_SLOT_MAX];
|
||||
|
||||
/* Bit i set <===> components[i] != 0 && i != POS && i != PSIZ. For fast
|
||||
* iteration of user varyings.
|
||||
*/
|
||||
uint64_t written;
|
||||
|
||||
/* Fully packed data structure */
|
||||
struct agx_vdm_state_vertex_outputs_packed vdm;
|
||||
|
||||
/* Partial data structure, must be merged with FS selects */
|
||||
struct agx_output_select_packed osel;
|
||||
};
|
||||
|
||||
bool agx_nir_lower_uvs(struct nir_shader *s,
|
||||
struct agx_unlinked_uvs_layout *layout);
|
||||
|
||||
/**
|
||||
* Represents a linked UVS layout.
|
||||
*/
|
||||
struct agx_varyings_vs {
|
||||
/* Associated linked hardware data structures */
|
||||
struct agx_varying_counts_packed counts_32, counts_16;
|
||||
|
||||
/* If the user varying slot is written, this is the base index that the first
|
||||
* component of the slot is written to. The next components are found in the
|
||||
* next indices. Otherwise 0, aliasing position.
|
||||
*/
|
||||
unsigned slots[VARYING_SLOT_MAX];
|
||||
};
|
||||
|
||||
void agx_assign_uvs(struct agx_varyings_vs *varyings,
|
||||
struct agx_unlinked_uvs_layout *layout, uint64_t flat_mask,
|
||||
uint64_t linear_mask);
|
||||
|
|
@ -20,6 +20,7 @@ libasahi_lib_files = files(
|
|||
'agx_nir_lower_tess.c',
|
||||
'agx_nir_lower_texture.c',
|
||||
'agx_nir_lower_tilebuffer.c',
|
||||
'agx_nir_lower_uvs.c',
|
||||
'agx_nir_lower_vbo.c',
|
||||
'agx_nir_predicate_layer_id.c',
|
||||
'agx_ppp.h',
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ write_shader(struct blob *blob, const struct agx_compiled_shader *binary,
|
|||
blob_write_uint32(blob, shader_size);
|
||||
blob_write_bytes(blob, binary->bo->ptr.cpu, shader_size);
|
||||
blob_write_bytes(blob, &binary->info, sizeof(binary->info));
|
||||
blob_write_bytes(blob, &binary->uvs, sizeof(binary->uvs));
|
||||
blob_write_uint32(blob, binary->push_range_count);
|
||||
blob_write_bytes(blob, binary->push,
|
||||
sizeof(binary->push[0]) * binary->push_range_count);
|
||||
|
|
@ -96,6 +97,7 @@ read_shader(struct agx_screen *screen, struct blob_reader *blob,
|
|||
blob_copy_bytes(blob, binary->bo->ptr.cpu, binary_size);
|
||||
|
||||
blob_copy_bytes(blob, &binary->info, sizeof(binary->info));
|
||||
blob_copy_bytes(blob, &binary->uvs, sizeof(binary->uvs));
|
||||
binary->push_range_count = blob_read_uint32(blob);
|
||||
blob_copy_bytes(blob, binary->push,
|
||||
sizeof(binary->push[0]) * binary->push_range_count);
|
||||
|
|
|
|||
|
|
@ -189,6 +189,9 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
return load_sysval_root(b, 1, 16, &u->sprite_mask);
|
||||
case nir_intrinsic_load_clip_z_coeff_agx:
|
||||
return nir_f2f32(b, load_sysval_root(b, 1, 16, &u->clip_z_coeff));
|
||||
case nir_intrinsic_load_uvs_index_agx:
|
||||
return load_sysval_root(
|
||||
b, 1, 16, &u->uvs_index[nir_intrinsic_io_semantics(intr).location]);
|
||||
case nir_intrinsic_load_polygon_stipple_agx: {
|
||||
nir_def *base = load_sysval_root(b, 1, 64, &u->polygon_stipple);
|
||||
nir_def *row = intr->src[0].ssa;
|
||||
|
|
|
|||
|
|
@ -1521,59 +1521,10 @@ asahi_cs_shader_key_equal(const void *a, const void *b)
|
|||
return true;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
agx_find_linked_slot(struct agx_varyings_vs *vs, struct agx_varyings_fs *fs,
|
||||
gl_varying_slot slot, unsigned offset)
|
||||
{
|
||||
assert(offset < 4);
|
||||
assert(slot != VARYING_SLOT_PNTC && "point coords aren't linked");
|
||||
|
||||
if (slot == VARYING_SLOT_POS) {
|
||||
if (offset == 3) {
|
||||
return 0; /* W */
|
||||
} else if (offset == 2) {
|
||||
assert(fs->reads_z);
|
||||
return 1; /* Z */
|
||||
} else {
|
||||
unreachable("gl_Position.xy are not varyings");
|
||||
}
|
||||
}
|
||||
|
||||
unsigned vs_index = vs->slots[slot];
|
||||
|
||||
/* Varyings not written by vertex shader are undefined but we can't crash */
|
||||
if (!(vs_index < vs->nr_index))
|
||||
return 0;
|
||||
|
||||
assert(vs_index >= 4 && "gl_Position should have been the first 4 slots");
|
||||
assert((vs_index < vs->base_index_fp16) ==
|
||||
((vs_index + offset) < vs->base_index_fp16) &&
|
||||
"a given varying must have a consistent type");
|
||||
|
||||
unsigned vs_user_index = (vs_index + offset) - 4;
|
||||
|
||||
if (fs->reads_z)
|
||||
return vs_user_index + 2;
|
||||
else
|
||||
return vs_user_index + 1;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
agx_num_general_outputs(struct agx_varyings_vs *vs)
|
||||
{
|
||||
unsigned nr_vs = vs->nr_index;
|
||||
bool writes_psiz = vs->slots[VARYING_SLOT_PSIZ] < nr_vs;
|
||||
|
||||
assert(nr_vs >= 4 && "gl_Position must be written");
|
||||
if (writes_psiz)
|
||||
assert(nr_vs >= 5 && "gl_PointSize is written");
|
||||
|
||||
return nr_vs - (writes_psiz ? 5 : 4);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
agx_link_varyings_vs_fs(struct agx_pool *pool, struct agx_varyings_vs *vs,
|
||||
struct agx_varyings_fs *fs, bool first_provoking_vertex,
|
||||
unsigned nr_user_indices, struct agx_varyings_fs *fs,
|
||||
bool first_provoking_vertex,
|
||||
uint8_t sprite_coord_enable,
|
||||
bool *generate_primitive_id)
|
||||
{
|
||||
|
|
@ -1586,11 +1537,14 @@ agx_link_varyings_vs_fs(struct agx_pool *pool, struct agx_varyings_vs *vs,
|
|||
size_t linkage_size =
|
||||
AGX_CF_BINDING_HEADER_LENGTH + (fs->nr_bindings * AGX_CF_BINDING_LENGTH);
|
||||
|
||||
void *tmp = alloca(linkage_size);
|
||||
struct agx_cf_binding_header_packed *header = tmp;
|
||||
struct agx_ptr t = agx_pool_alloc_aligned(pool, linkage_size, 256);
|
||||
assert(t.gpu < (1ull << 32) && "varyings must be in low memory");
|
||||
|
||||
struct agx_cf_binding_header_packed *header = t.cpu;
|
||||
struct agx_cf_binding_packed *bindings = (void *)(header + 1);
|
||||
|
||||
unsigned nr_slots = agx_num_general_outputs(vs) + 1 + (fs->reads_z ? 1 : 0);
|
||||
unsigned user_base = 1 + (fs->reads_z ? 1 : 0);
|
||||
unsigned nr_slots = user_base + nr_user_indices;
|
||||
|
||||
agx_pack(header, CF_BINDING_HEADER, cfg) {
|
||||
cfg.number_of_32_bit_slots = nr_slots;
|
||||
|
|
@ -1598,35 +1552,45 @@ agx_link_varyings_vs_fs(struct agx_pool *pool, struct agx_varyings_vs *vs,
|
|||
}
|
||||
|
||||
for (unsigned i = 0; i < fs->nr_bindings; ++i) {
|
||||
struct agx_cf_binding b = fs->bindings[i];
|
||||
|
||||
agx_pack(bindings + i, CF_BINDING, cfg) {
|
||||
cfg.base_coefficient_register = fs->bindings[i].cf_base;
|
||||
cfg.components = fs->bindings[i].count;
|
||||
cfg.base_coefficient_register = b.cf_base;
|
||||
cfg.components = b.count;
|
||||
cfg.shade_model =
|
||||
agx_translate_shade_model(fs, i, first_provoking_vertex);
|
||||
|
||||
if (util_varying_is_point_coord(fs->bindings[i].slot,
|
||||
sprite_coord_enable)) {
|
||||
assert(fs->bindings[i].offset == 0);
|
||||
if (util_varying_is_point_coord(b.slot, sprite_coord_enable)) {
|
||||
assert(b.offset == 0);
|
||||
cfg.source = AGX_COEFFICIENT_SOURCE_POINT_COORD;
|
||||
} else if (fs->bindings[i].slot == VARYING_SLOT_PRIMITIVE_ID &&
|
||||
vs->slots[VARYING_SLOT_PRIMITIVE_ID] == ~0) {
|
||||
} else if (b.slot == VARYING_SLOT_PRIMITIVE_ID &&
|
||||
!vs->slots[VARYING_SLOT_PRIMITIVE_ID]) {
|
||||
cfg.source = AGX_COEFFICIENT_SOURCE_PRIMITIVE_ID;
|
||||
*generate_primitive_id = true;
|
||||
} else {
|
||||
cfg.base_slot = agx_find_linked_slot(vs, fs, fs->bindings[i].slot,
|
||||
fs->bindings[i].offset);
|
||||
} else if (b.slot == VARYING_SLOT_POS) {
|
||||
assert(b.offset >= 2 && "gl_Position.xy are not varyings");
|
||||
assert(fs->reads_z || b.offset != 2);
|
||||
|
||||
assert(cfg.base_slot + cfg.components <=
|
||||
MAX2(nr_slots, cfg.components) &&
|
||||
"overflow slots");
|
||||
}
|
||||
|
||||
if (fs->bindings[i].slot == VARYING_SLOT_POS) {
|
||||
if (fs->bindings[i].offset == 2) {
|
||||
if (b.offset == 2) {
|
||||
cfg.source = AGX_COEFFICIENT_SOURCE_FRAGCOORD_Z;
|
||||
cfg.base_slot = 1;
|
||||
} else {
|
||||
assert(!fs->bindings[i].perspective &&
|
||||
"W must not be perspective divided");
|
||||
assert(!b.perspective && "W must not be perspective divided");
|
||||
}
|
||||
} else {
|
||||
unsigned vs_index = vs->slots[b.slot];
|
||||
assert(b.offset < 4);
|
||||
|
||||
/* Varyings not written by vertex shader are undefined but we can't
|
||||
* crash */
|
||||
if (vs_index) {
|
||||
assert(vs_index >= 4 &&
|
||||
"gl_Position should have been the first 4 slots");
|
||||
|
||||
cfg.base_slot = user_base + (vs_index - 4) + b.offset;
|
||||
|
||||
assert(cfg.base_slot + cfg.components <= nr_slots &&
|
||||
"overflow slots");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1635,16 +1599,7 @@ agx_link_varyings_vs_fs(struct agx_pool *pool, struct agx_varyings_vs *vs,
|
|||
}
|
||||
}
|
||||
|
||||
struct agx_ptr ptr = agx_pool_alloc_aligned(pool, (3 * linkage_size), 256);
|
||||
assert(ptr.gpu < (1ull << 32) && "varyings must be in low memory");
|
||||
|
||||
/* I don't understand why the data structures are repeated thrice */
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
memcpy(((uint8_t *)ptr.cpu) + (i * linkage_size), (uint8_t *)tmp,
|
||||
linkage_size);
|
||||
}
|
||||
|
||||
return ptr.gpu;
|
||||
return t.gpu;
|
||||
}
|
||||
|
||||
/* Dynamic lowered I/O version of nir_lower_clip_halfz */
|
||||
|
|
@ -1859,6 +1814,7 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
|
|||
perf_debug(dev, "Compiling shader variant #%u",
|
||||
_mesa_hash_table_num_entries(so->variants));
|
||||
|
||||
struct agx_unlinked_uvs_layout uvs = {0};
|
||||
bool force_translucent = false;
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
|
|
@ -1871,6 +1827,7 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
|
|||
key->next.hw.fixed_point_size);
|
||||
NIR_PASS(_, nir, nir_shader_intrinsics_pass, agx_nir_lower_clip_m1_1,
|
||||
nir_metadata_block_index | nir_metadata_dominance, NULL);
|
||||
NIR_PASS(_, nir, agx_nir_lower_uvs, &uvs);
|
||||
} else {
|
||||
NIR_PASS(_, nir, agx_nir_lower_sysvals, PIPE_SHADER_VERTEX, false);
|
||||
NIR_PASS(_, nir, agx_nir_lower_vs_before_gs, dev->libagx,
|
||||
|
|
@ -1993,21 +1950,11 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
|
|||
|
||||
struct agx_shader_key base_key = {0};
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
struct asahi_vs_shader_key *key = &key_->vs;
|
||||
|
||||
if (key->hw) {
|
||||
base_key.vs.outputs_flat_shaded = key_->vs.next.hw.outputs_flat_shaded;
|
||||
|
||||
base_key.vs.outputs_linear_shaded =
|
||||
key_->vs.next.hw.outputs_linear_shaded;
|
||||
}
|
||||
}
|
||||
|
||||
struct agx_compiled_shader *compiled =
|
||||
agx_compile_nir(dev, nir, &base_key, debug, so->type);
|
||||
|
||||
compiled->so = so;
|
||||
compiled->uvs = uvs;
|
||||
|
||||
/* reads_tib => Translucent pass type */
|
||||
compiled->info.reads_tib |= force_translucent;
|
||||
|
|
@ -2039,13 +1986,14 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
|
|||
NIR_PASS(_, gs_copy, nir_shader_intrinsics_pass, agx_nir_lower_clip_m1_1,
|
||||
nir_metadata_block_index | nir_metadata_dominance, NULL);
|
||||
|
||||
base_key.vs.outputs_flat_shaded = key->outputs_flat_shaded;
|
||||
base_key.vs.outputs_linear_shaded = key->outputs_linear_shaded;
|
||||
struct agx_unlinked_uvs_layout uvs = {0};
|
||||
NIR_PASS(_, gs_copy, agx_nir_lower_uvs, &uvs);
|
||||
|
||||
compiled->gs_copy =
|
||||
agx_compile_nir(dev, gs_copy, &base_key, debug, PIPE_SHADER_GEOMETRY);
|
||||
compiled->gs_copy->so = so;
|
||||
compiled->gs_copy->stage = so->type;
|
||||
compiled->gs_copy->uvs = uvs;
|
||||
}
|
||||
|
||||
compiled->gs_output_mode = gs_out_prim;
|
||||
|
|
@ -2427,10 +2375,9 @@ agx_update_vs(struct agx_context *ctx, unsigned index_size_B)
|
|||
*
|
||||
* vb_mask, attributes, vertex_buffers: VERTEX
|
||||
* point_size_per_vertex: RS
|
||||
* outputs_{flat,linear}_shaded: FS_PROG
|
||||
*/
|
||||
if (!((ctx->dirty & (AGX_DIRTY_VS_PROG | AGX_DIRTY_VERTEX | AGX_DIRTY_XFB |
|
||||
AGX_DIRTY_FS_PROG | AGX_DIRTY_RS | AGX_DIRTY_PRIM)) ||
|
||||
AGX_DIRTY_RS | AGX_DIRTY_PRIM)) ||
|
||||
ctx->stage[PIPE_SHADER_TESS_EVAL].dirty ||
|
||||
ctx->stage[PIPE_SHADER_GEOMETRY].dirty ||
|
||||
ctx->stage[PIPE_SHADER_TESS_EVAL].shader ||
|
||||
|
|
@ -2451,11 +2398,6 @@ agx_update_vs(struct agx_context *ctx, unsigned index_size_B)
|
|||
*/
|
||||
key.next.hw.fixed_point_size = !ctx->rast->base.point_size_per_vertex &&
|
||||
rasterized_prim == MESA_PRIM_POINTS;
|
||||
|
||||
key.next.hw.outputs_flat_shaded =
|
||||
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_flat_shaded;
|
||||
key.next.hw.outputs_linear_shaded =
|
||||
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_linear_shaded;
|
||||
} else {
|
||||
key.next.sw.index_size_B = index_size_B;
|
||||
}
|
||||
|
|
@ -2511,10 +2453,6 @@ agx_update_gs(struct agx_context *ctx, const struct pipe_draw_info *info,
|
|||
/* TODO: Deduplicate */
|
||||
.fixed_point_size = !ctx->rast->base.point_size_per_vertex &&
|
||||
rasterized_prim == MESA_PRIM_POINTS,
|
||||
.outputs_flat_shaded =
|
||||
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_flat_shaded,
|
||||
.outputs_linear_shaded =
|
||||
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_linear_shaded,
|
||||
};
|
||||
|
||||
return agx_update_shader(ctx, &ctx->gs, PIPE_SHADER_GEOMETRY,
|
||||
|
|
@ -3564,8 +3502,9 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out)
|
|||
|
||||
if (IS_DIRTY(VS_PROG) || IS_DIRTY(FS_PROG) || IS_DIRTY(RS) ||
|
||||
IS_DIRTY(PRIM)) {
|
||||
|
||||
batch->varyings = agx_link_varyings_vs_fs(
|
||||
&batch->pipeline_pool, &vs->info.varyings.vs,
|
||||
&batch->pipeline_pool, &batch->linked_varyings, vs->uvs.user_size,
|
||||
&ctx->fs->info.varyings.fs, ctx->rast->base.flatshade_first,
|
||||
(batch->reduced_prim == MESA_PRIM_POINTS)
|
||||
? ctx->rast->base.sprite_coord_enable
|
||||
|
|
@ -3596,10 +3535,7 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out)
|
|||
cfg.pipeline = agx_build_pipeline(batch, vs, PIPE_SHADER_VERTEX, 0, 0);
|
||||
}
|
||||
|
||||
agx_push(out, VDM_STATE_VERTEX_OUTPUTS, cfg) {
|
||||
cfg.output_count_1 = vs->info.varyings.vs.nr_index;
|
||||
cfg.output_count_2 = cfg.output_count_1;
|
||||
}
|
||||
agx_push_packed(out, vs->uvs.vdm, VDM_STATE_VERTEX_OUTPUTS);
|
||||
|
||||
agx_push(out, VDM_STATE_VERTEX_UNKNOWN, cfg) {
|
||||
cfg.flat_shading_control = ctx->rast->base.flatshade_first
|
||||
|
|
@ -3654,9 +3590,9 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out)
|
|||
.fragment_back_face = fragment_face_dirty,
|
||||
.fragment_back_face_2 = object_type_dirty || IS_DIRTY(FS_PROG),
|
||||
.fragment_back_stencil = IS_DIRTY(ZS),
|
||||
.output_select = IS_DIRTY(VS_PROG) || IS_DIRTY(FS_PROG),
|
||||
.varying_counts_32 = IS_DIRTY(VS_PROG),
|
||||
.varying_counts_16 = IS_DIRTY(VS_PROG),
|
||||
.output_select = varyings_dirty,
|
||||
.varying_counts_32 = varyings_dirty,
|
||||
.varying_counts_16 = varyings_dirty,
|
||||
.cull = IS_DIRTY(RS),
|
||||
.cull_2 = varyings_dirty,
|
||||
.fragment_shader =
|
||||
|
|
@ -3742,40 +3678,24 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out)
|
|||
if (dirty.fragment_back_stencil)
|
||||
agx_ppp_push_packed(&ppp, ctx->zs->back_stencil.opaque, FRAGMENT_STENCIL);
|
||||
|
||||
if (dirty.output_select) {
|
||||
agx_ppp_push(&ppp, OUTPUT_SELECT, cfg) {
|
||||
cfg.varyings = !!fs->info.varyings.fs.nr_bindings;
|
||||
cfg.point_size = vs->info.writes_psiz;
|
||||
cfg.viewport_target = vs->info.writes_layer_viewport;
|
||||
cfg.render_target = vs->info.writes_layer_viewport;
|
||||
cfg.frag_coord_z = fs->info.varyings.fs.reads_z;
|
||||
cfg.clip_distance_plane_0 = vs->info.varyings.vs.nr_clip_dists > 0;
|
||||
cfg.clip_distance_plane_1 = vs->info.varyings.vs.nr_clip_dists > 1;
|
||||
cfg.clip_distance_plane_2 = vs->info.varyings.vs.nr_clip_dists > 2;
|
||||
cfg.clip_distance_plane_3 = vs->info.varyings.vs.nr_clip_dists > 3;
|
||||
cfg.clip_distance_plane_4 = vs->info.varyings.vs.nr_clip_dists > 4;
|
||||
cfg.clip_distance_plane_5 = vs->info.varyings.vs.nr_clip_dists > 5;
|
||||
cfg.clip_distance_plane_6 = vs->info.varyings.vs.nr_clip_dists > 6;
|
||||
cfg.clip_distance_plane_7 = vs->info.varyings.vs.nr_clip_dists > 7;
|
||||
|
||||
assert(cfg.point_size || !is_points);
|
||||
}
|
||||
}
|
||||
|
||||
assert(dirty.varying_counts_32 == dirty.varying_counts_16);
|
||||
assert(dirty.varying_counts_32 == dirty.output_select);
|
||||
|
||||
if (dirty.varying_counts_32) {
|
||||
agx_ppp_push(&ppp, VARYING_COUNTS, cfg) {
|
||||
cfg.smooth = vs->info.varyings.vs.num_32_smooth;
|
||||
cfg.flat = vs->info.varyings.vs.num_32_flat;
|
||||
cfg.linear = vs->info.varyings.vs.num_32_linear;
|
||||
if (dirty.output_select) {
|
||||
struct agx_output_select_packed osel;
|
||||
agx_pack(&osel, OUTPUT_SELECT, cfg) {
|
||||
cfg.varyings = !!fs->info.varyings.fs.nr_bindings;
|
||||
cfg.frag_coord_z = fs->info.varyings.fs.reads_z;
|
||||
}
|
||||
|
||||
agx_ppp_push(&ppp, VARYING_COUNTS, cfg) {
|
||||
cfg.smooth = vs->info.varyings.vs.num_16_smooth;
|
||||
cfg.flat = vs->info.varyings.vs.num_16_flat;
|
||||
cfg.linear = vs->info.varyings.vs.num_16_linear;
|
||||
}
|
||||
agx_merge(osel, vs->uvs.osel, OUTPUT_SELECT);
|
||||
agx_ppp_push_packed(&ppp, &osel, OUTPUT_SELECT);
|
||||
|
||||
agx_ppp_push_packed(&ppp, &batch->linked_varyings.counts_32,
|
||||
VARYING_COUNTS);
|
||||
|
||||
agx_ppp_push_packed(&ppp, &batch->linked_varyings.counts_16,
|
||||
VARYING_COUNTS);
|
||||
}
|
||||
|
||||
if (dirty.cull)
|
||||
|
|
@ -3817,7 +3737,7 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out)
|
|||
|
||||
if (dirty.output_size) {
|
||||
agx_ppp_push(&ppp, OUTPUT_SIZE, cfg)
|
||||
cfg.count = vs->info.varyings.vs.nr_index;
|
||||
cfg.count = vs->uvs.size;
|
||||
}
|
||||
|
||||
agx_ppp_fini(&out, &ppp);
|
||||
|
|
@ -5061,6 +4981,21 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||
agx_batch_add_bo(batch, ctx->gs->gs_copy->bo);
|
||||
}
|
||||
|
||||
if (ctx->dirty & (AGX_DIRTY_VS_PROG | AGX_DIRTY_FS_PROG)) {
|
||||
struct agx_compiled_shader *vs = ctx->vs;
|
||||
if (ctx->gs)
|
||||
vs = ctx->gs->gs_copy;
|
||||
|
||||
agx_assign_uvs(
|
||||
&batch->linked_varyings, &vs->uvs,
|
||||
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_flat_shaded,
|
||||
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_linear_shaded);
|
||||
|
||||
for (unsigned i = 0; i < VARYING_SLOT_MAX; ++i) {
|
||||
batch->uniforms.uvs_index[i] = batch->linked_varyings.slots[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* Set draw ID */
|
||||
if (ctx->vs->info.uses_draw_id) {
|
||||
batch->uniforms.draw_id = drawid_offset;
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
#include "asahi/lib/agx_nir_lower_vbo.h"
|
||||
#include "asahi/lib/agx_scratch.h"
|
||||
#include "asahi/lib/agx_tilebuffer.h"
|
||||
#include "asahi/lib/agx_uvs.h"
|
||||
#include "asahi/lib/pool.h"
|
||||
#include "asahi/lib/shaders/geometry.h"
|
||||
#include "compiler/nir/nir_lower_blend.h"
|
||||
|
|
@ -29,6 +30,7 @@
|
|||
#include "util/u_range.h"
|
||||
#include "agx_helpers.h"
|
||||
#include "agx_meta.h"
|
||||
#include "agx_nir_passes.h"
|
||||
|
||||
#ifdef __GLIBC__
|
||||
#include <errno.h>
|
||||
|
|
@ -162,6 +164,11 @@ struct PACKED agx_draw_uniforms {
|
|||
|
||||
/* Zero for [0, 1] clipping, 0.5 for [-1, 1] clipping. */
|
||||
uint16_t clip_z_coeff;
|
||||
|
||||
/* Mapping from varying slots written by the last vertex stage to UVS
|
||||
* indices. This mapping must be compatible with the fragment shader.
|
||||
*/
|
||||
uint16_t uvs_index[VARYING_SLOT_MAX];
|
||||
};
|
||||
|
||||
struct PACKED agx_stage_uniforms {
|
||||
|
|
@ -221,6 +228,9 @@ struct agx_compiled_shader {
|
|||
unsigned push_range_count;
|
||||
struct agx_push_range push[AGX_MAX_PUSH_RANGES];
|
||||
|
||||
/* UVS layout for the last vertex stage */
|
||||
struct agx_unlinked_uvs_layout uvs;
|
||||
|
||||
/* Auxiliary programs, or NULL if not used */
|
||||
struct agx_compiled_shader *gs_count, *pre_gs;
|
||||
struct agx_compiled_shader *gs_copy;
|
||||
|
|
@ -366,6 +376,7 @@ struct agx_batch {
|
|||
|
||||
/* Current varyings linkage structures */
|
||||
uint32_t varyings;
|
||||
struct agx_varyings_vs linked_varyings;
|
||||
|
||||
struct agx_draw_uniforms uniforms;
|
||||
struct agx_stage_uniforms stage_uniforms[PIPE_SHADER_TYPES];
|
||||
|
|
@ -478,8 +489,6 @@ struct asahi_vs_shader_key {
|
|||
|
||||
struct {
|
||||
bool fixed_point_size;
|
||||
uint64_t outputs_flat_shaded;
|
||||
uint64_t outputs_linear_shaded;
|
||||
} hw;
|
||||
} next;
|
||||
};
|
||||
|
|
@ -512,15 +521,13 @@ struct asahi_fs_shader_key {
|
|||
|
||||
struct asahi_gs_shader_key {
|
||||
/* Rasterizer shader key */
|
||||
uint64_t outputs_flat_shaded;
|
||||
uint64_t outputs_linear_shaded;
|
||||
bool fixed_point_size;
|
||||
|
||||
/* If true, this GS is run only for its side effects (including XFB) */
|
||||
bool rasterizer_discard;
|
||||
bool padding[6];
|
||||
};
|
||||
static_assert(sizeof(struct asahi_gs_shader_key) == 24, "no holes");
|
||||
static_assert(sizeof(struct asahi_gs_shader_key) == 8, "no holes");
|
||||
|
||||
union asahi_shader_key {
|
||||
struct asahi_vs_shader_key vs;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue