From beabd93c1306a01b6059c8b8bc53514f01f85f5e Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sat, 27 Jan 2024 15:45:56 -0400 Subject: [PATCH] asahi,agx: use hw clip distance rather than lowering in the fragment shader. this is what we're "supposed" to do, what apple does, and it saves a byte of the fs key Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_compile.c | 20 +++++++-- src/asahi/compiler/agx_compile.h | 6 +++ src/asahi/compiler/agx_nir.h | 1 + .../compiler/agx_nir_lower_clip_distance.c | 42 +++++++++++++++++++ src/asahi/compiler/meson.build | 1 + src/gallium/drivers/asahi/agx_state.c | 18 ++++---- src/gallium/drivers/asahi/agx_state.h | 1 - 7 files changed, 75 insertions(+), 14 deletions(-) create mode 100644 src/asahi/compiler/agx_nir_lower_clip_distance.c diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index f35bc7535d1..e52fea05b0e 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -449,14 +449,18 @@ agx_emit_store_vary(agx_builder *b, nir_intrinsic_instr *instr) unsigned imm_index = b->shader->out->varyings.vs.slots[sem.location]; - if (sem.location == VARYING_SLOT_LAYER) { + if (sem.location == VARYING_SLOT_LAYER || + sem.location == VARYING_SLOT_CLIP_DIST0) { /* Separate slots used for the sysval vs the varying. The default slot * above is for the varying. Change for the sysval. */ assert(sem.no_sysval_output || sem.no_varying); - if (sem.no_varying) - imm_index = b->shader->out->varyings.vs.layer_viewport_slot; + if (sem.no_varying) { + imm_index = sem.location == VARYING_SLOT_LAYER + ? b->shader->out->varyings.vs.layer_viewport_slot + : b->shader->out->varyings.vs.clip_dist_slot; + } } assert(imm_index < ~0); @@ -2589,6 +2593,12 @@ agx_remap_varyings_vs(nir_shader *nir, struct agx_varyings_vs *varyings, base += 1; } + if (nir->info.outputs_written & VARYING_BIT_CLIP_DIST0) { + varyings->clip_dist_slot = base; + varyings->nr_clip_dists = nir->info.clip_distance_array_size; + base += varyings->nr_clip_dists; + } + /* All varyings linked now */ varyings->nr_index = base; } @@ -3071,6 +3081,10 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key, if (nir->info.stage == MESA_SHADER_FRAGMENT) out->tag_write_disable = !nir->info.writes_memory; + if (nir->info.stage == MESA_SHADER_VERTEX && + (nir->info.outputs_written & VARYING_BIT_CLIP_DIST0)) + NIR_PASS(_, nir, agx_nir_lower_clip_distance); + bool needs_libagx = nir->info.stage == MESA_SHADER_GEOMETRY; /* Late tilebuffer lowering creates multisampled image stores */ diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index fae10dba8f8..6e1a525360b 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -46,6 +46,12 @@ struct agx_varyings_vs { * is written. What's at slots[VARYING_SLOT_LAYER] is the varying output. */ unsigned layer_viewport_slot; + + /* Base slot for the clip distance sysval outputs, or ~0 if none is written. + * What's at slots[VARYING_SLOT_CLIP_DIST0] is the varying output. + */ + unsigned clip_dist_slot; + unsigned nr_clip_dists; }; /* Conservative bound, * 4 due to offsets (TODO: maybe worth eliminating diff --git a/src/asahi/compiler/agx_nir.h b/src/asahi/compiler/agx_nir.h index b52a77c01fe..2d9f5e34052 100644 --- a/src/asahi/compiler/agx_nir.h +++ b/src/asahi/compiler/agx_nir.h @@ -16,6 +16,7 @@ bool agx_nir_fuse_algebraic_late(struct nir_shader *shader); bool agx_nir_fence_images(struct nir_shader *shader); bool agx_nir_lower_multisampled_image_store(struct nir_shader *s); bool agx_nir_lower_layer(struct nir_shader *s); +bool agx_nir_lower_clip_distance(struct nir_shader *s); bool agx_nir_lower_cull_distance_vs(struct nir_shader *s); bool agx_nir_lower_subgroups(struct nir_shader *s); diff --git a/src/asahi/compiler/agx_nir_lower_clip_distance.c b/src/asahi/compiler/agx_nir_lower_clip_distance.c new file mode 100644 index 00000000000..589a1b48ed9 --- /dev/null +++ b/src/asahi/compiler/agx_nir_lower_clip_distance.c @@ -0,0 +1,42 @@ +/* + * Copyright 2023 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" +#include "agx_nir.h" + +static bool +lower(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data) +{ + if (intr->intrinsic != nir_intrinsic_store_output) + return false; + + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + if (sem.location != VARYING_SLOT_CLIP_DIST0) + return false; + + nir_instr *clone = nir_instr_clone(b->shader, &intr->instr); + nir_intrinsic_instr *lowered = nir_instr_as_intrinsic(clone); + + b->cursor = nir_after_instr(&intr->instr); + nir_builder_instr_insert(b, clone); + + nir_io_semantics new_sem = sem; + new_sem.no_varying = true; + nir_intrinsic_set_io_semantics(lowered, new_sem); + + sem.no_sysval_output = true; + nir_intrinsic_set_io_semantics(intr, sem); + return true; +} + +bool +agx_nir_lower_clip_distance(nir_shader *s) +{ + assert(s->info.outputs_written & VARYING_BIT_CLIP_DIST0); + + return nir_shader_intrinsics_pass( + s, lower, nir_metadata_block_index | nir_metadata_dominance, NULL); +} diff --git a/src/asahi/compiler/meson.build b/src/asahi/compiler/meson.build index 2f8a732f315..fe7358d452c 100644 --- a/src/asahi/compiler/meson.build +++ b/src/asahi/compiler/meson.build @@ -9,6 +9,7 @@ libasahi_agx_files = files( 'agx_insert_waits.c', 'agx_nir_fence_images.c', 'agx_nir_lower_address.c', + 'agx_nir_lower_clip_distance.c', 'agx_nir_lower_cull_distance.c', 'agx_nir_lower_frag_sidefx.c', 'agx_nir_lower_sample_mask.c', diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 31c3ce8d860..0d4ece8e296 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -2016,15 +2016,6 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx, NIR_PASS(_, nir, agx_nir_lower_stats_fs); } - /* Clip plane lowering creates discard instructions, so run that before - * lowering discards. Note: this introduces extra loads from the clip - * plane outputs, but they use smooth interpolation so it does not affect - * the flat/linear masks that get propagated back to the VS. - */ - if (key->clip_plane_enable) { - NIR_PASS(_, nir, nir_lower_clip_fs, key->clip_plane_enable, false); - } - /* Similarly for cull distancing lowering */ if (key->cull_distance_size) { NIR_PASS(_, nir, agx_nir_lower_cull_distance_fs, @@ -2587,7 +2578,6 @@ agx_update_fs(struct agx_batch *batch) .cull_distance_size = ctx->stage[MESA_SHADER_VERTEX].shader->info.cull_distance_size, - .clip_plane_enable = ctx->rast->base.clip_plane_enable, .polygon_stipple = ctx->rast->base.poly_stipple_enable && @@ -3772,6 +3762,14 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out, bool is_lines, cfg.viewport_target = vs->info.writes_layer_viewport; cfg.render_target = vs->info.writes_layer_viewport; cfg.frag_coord_z = fs->info.varyings.fs.reads_z; + cfg.clip_distance_plane_0 = vs->info.varyings.vs.nr_clip_dists > 0; + cfg.clip_distance_plane_1 = vs->info.varyings.vs.nr_clip_dists > 1; + cfg.clip_distance_plane_2 = vs->info.varyings.vs.nr_clip_dists > 2; + cfg.clip_distance_plane_3 = vs->info.varyings.vs.nr_clip_dists > 3; + cfg.clip_distance_plane_4 = vs->info.varyings.vs.nr_clip_dists > 4; + cfg.clip_distance_plane_5 = vs->info.varyings.vs.nr_clip_dists > 5; + cfg.clip_distance_plane_6 = vs->info.varyings.vs.nr_clip_dists > 6; + cfg.clip_distance_plane_7 = vs->info.varyings.vs.nr_clip_dists > 7; assert(cfg.point_size || !is_points); } diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 7aa6e130c35..7943c14938b 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -460,7 +460,6 @@ struct asahi_fs_shader_key { bool polygon_stipple; uint8_t cull_distance_size; - uint8_t clip_plane_enable; uint8_t nr_samples; enum pipe_format rt_formats[PIPE_MAX_COLOR_BUFS]; };