diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt index 047e6294ea7..a920b7b888b 100644 --- a/docs/relnotes/new_features.txt +++ b/docs/relnotes/new_features.txt @@ -15,3 +15,4 @@ GL_ARB_shader_texture_image_samples on Asahi GL_ARB_indirect_parameters on Asahi GL_ARB_viewport_array on Asahi GL_ARB_fragment_layer_viewport on Asahi +GL_ARB_cull_distance on Asahi diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 396753f82b9..421379cfadc 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -2502,6 +2502,10 @@ agx_remap_varyings_vs(nir_shader *nir, struct agx_varyings_vs *varyings, /* These are always flat-shaded from the FS perspective */ key->vs.outputs_flat_shaded |= VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT; + /* The internal cull distance slots are always linearly-interpolated */ + key->vs.outputs_linear_shaded |= + BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE, 2); + assert(!(key->vs.outputs_flat_shaded & key->vs.outputs_linear_shaded)); /* Smooth 32-bit user bindings go next */ @@ -2941,6 +2945,10 @@ agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx, } } else if (nir->info.stage == MESA_SHADER_VERTEX) { out->has_edgeflags = nir->info.outputs_written & VARYING_BIT_EDGE; + out->cull_distance_size = nir->info.cull_distance_array_size; + + if (out->cull_distance_size) + NIR_PASS_V(nir, agx_nir_lower_cull_distance_vs); } /* Clean up deref gunk after lowering I/O */ diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index d2e5c52e911..6a9172bf152 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -98,6 +98,7 @@ union agx_varyings { struct agx_uncompiled_shader_info { uint64_t inputs_flat_shaded; uint64_t inputs_linear_shaded; + uint8_t cull_distance_size; bool has_edgeflags; }; @@ -243,6 +244,9 @@ void agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx, bool agx_nir_lower_discard_zs_emit(nir_shader *s); +void agx_nir_lower_cull_distance_fs(struct nir_shader *s, + unsigned nr_distances); + bool agx_nir_needs_texture_crawl(nir_instr *instr); void agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key, diff --git a/src/asahi/compiler/agx_nir.h b/src/asahi/compiler/agx_nir.h index 4ef3a326299..5da37949ecc 100644 --- a/src/asahi/compiler/agx_nir.h +++ b/src/asahi/compiler/agx_nir.h @@ -16,5 +16,6 @@ bool agx_nir_fuse_algebraic_late(struct nir_shader *shader); bool agx_nir_fence_images(struct nir_shader *shader); bool agx_nir_lower_multisampled_image_store(struct nir_shader *s); void agx_nir_lower_layer(struct nir_shader *s); +void agx_nir_lower_cull_distance_vs(struct nir_shader *s); #endif diff --git a/src/asahi/compiler/agx_nir_lower_cull_distance.c b/src/asahi/compiler/agx_nir_lower_cull_distance.c new file mode 100644 index 00000000000..c53f420e960 --- /dev/null +++ b/src/asahi/compiler/agx_nir_lower_cull_distance.c @@ -0,0 +1,112 @@ +/* + * Copyright 2023 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" +#include "util/macros.h" +#include "agx_compile.h" +#include "agx_nir.h" +#include "glsl_types.h" + +/* + * Lower cull distance to discard. From the spec: + * + * If the cull distance for any enabled cull half-space is negative for all + * of the vertices of the primitive under consideration, the primitive is + * discarded. + * + * We don't have a direct way to read the cull distance at non-provoking + * vertices in the fragment shader. Instead, we interpolate the quantity: + * + * cull distance >= 0.0 ? 1.0 : 0.0 + * + * Then, the discard condition is equivalent to: + * + * "quantity is zero for all vertices of the primitive" + * + * which by linearity is equivalent to: + * + * quantity is zero somewhere in the primitive and quantity has zero + * first-order screen space derivatives. + * + * which we can determine with ease in the fragment shader. + */ + +static bool +lower_write(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data) +{ + if (intr->intrinsic != nir_intrinsic_store_output) + return false; + + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + if (sem.location != VARYING_SLOT_CULL_DIST0) + return false; + + nir_instr *clone = nir_instr_clone(b->shader, &intr->instr); + nir_intrinsic_instr *lowered = nir_instr_as_intrinsic(clone); + + b->cursor = nir_after_instr(&intr->instr); + nir_def *v = nir_b2f32(b, nir_fge_imm(b, intr->src[0].ssa, 0.0)); + + nir_builder_instr_insert(b, clone); + nir_src_rewrite(&lowered->src[0], v); + + sem.location = VARYING_SLOT_CULL_PRIMITIVE; + nir_intrinsic_set_io_semantics(lowered, sem); + return true; +} + +void +agx_nir_lower_cull_distance_vs(nir_shader *s) +{ + assert(s->info.stage == MESA_SHADER_VERTEX); + assert(s->info.outputs_written & VARYING_BIT_CULL_DIST0); + + nir_shader_intrinsics_pass( + s, lower_write, nir_metadata_block_index | nir_metadata_dominance, NULL); + + s->info.outputs_written |= + BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE, + DIV_ROUND_UP(s->info.cull_distance_array_size, 4)); +} + +void +agx_nir_lower_cull_distance_fs(nir_shader *s, unsigned nr_distances) +{ + assert(s->info.stage == MESA_SHADER_FRAGMENT); + assert(nr_distances > 0); + + nir_builder b_ = + nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(s))); + nir_builder *b = &b_; + + /* Test each half-space */ + nir_def *culled = nir_imm_false(b); + + for (unsigned i = 0; i < nr_distances; ++i) { + /* Load the coefficient vector for this half-space. Imaginapple + * partial derivatives and the value somewhere. + */ + nir_def *cf = nir_load_coefficients_agx( + b, .component = i & 3, + .io_semantics.location = VARYING_SLOT_CULL_PRIMITIVE + (i / 4), + .io_semantics.num_slots = nr_distances / 4, + .interp_mode = INTERP_MODE_NOPERSPECTIVE); + + /* If the coefficients are identically zero, then the quantity is + * zero across the primtive <==> cull distance is negative across the + * primitive <==> the primitive is culled. + */ + culled = nir_ior(b, culled, nir_ball(b, nir_feq_imm(b, cf, 0))); + } + + /* Emulate primitive culling by discarding fragments */ + nir_discard_if(b, culled); + + s->info.inputs_read |= BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE, + DIV_ROUND_UP(nr_distances, 4)); + + s->info.fs.uses_discard = true; +} diff --git a/src/asahi/compiler/meson.build b/src/asahi/compiler/meson.build index f83b572c16a..2a285d55d4a 100644 --- a/src/asahi/compiler/meson.build +++ b/src/asahi/compiler/meson.build @@ -9,6 +9,7 @@ libasahi_agx_files = files( 'agx_insert_waits.c', 'agx_nir_fence_images.c', 'agx_nir_lower_address.c', + 'agx_nir_lower_cull_distance.c', 'agx_nir_lower_frag_sidefx.c', 'agx_nir_lower_sample_mask.c', 'agx_nir_lower_discard_zs_emit.c', diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index a9b8207cfc1..2a81893b7f0 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -1589,6 +1589,8 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: case PIPE_CAP_SHADER_PACK_HALF_FLOAT: case PIPE_CAP_FS_FINE_DERIVATIVE: + case PIPE_CAP_CULL_DISTANCE_NOCOMBINE: + case PIPE_CAP_NIR_COMPACT_ARRAYS: return 1; case PIPE_CAP_CLIP_HALFZ: @@ -1647,6 +1649,7 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_DRAW_PARAMETERS: case PIPE_CAP_MULTI_DRAW_INDIRECT: case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: + case PIPE_CAP_CULL_DISTANCE: return 1; case PIPE_CAP_SURFACE_SAMPLE_COUNT: /* TODO: MSRTT */ diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 1110f9ca23a..4e2fb95e063 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -1817,6 +1817,12 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx, NIR_PASS_V(nir, nir_lower_clip_fs, key->clip_plane_enable, false); } + /* Similarly for cull distancing lowering */ + if (key->cull_distance_size) { + NIR_PASS_V(nir, agx_nir_lower_cull_distance_fs, + key->cull_distance_size); + } + /* Discards must be lowering before lowering MSAA to handle discards */ NIR_PASS_V(nir, agx_nir_lower_discard_zs_emit); @@ -2280,8 +2286,9 @@ agx_update_fs(struct agx_batch *batch) * sample_mask: SAMPLE_MASK * reduced_prim: PRIM */ - if (!(ctx->dirty & (AGX_DIRTY_FS_PROG | AGX_DIRTY_RS | AGX_DIRTY_BLEND | - AGX_DIRTY_SAMPLE_MASK | AGX_DIRTY_PRIM))) + if (!(ctx->dirty & + (AGX_DIRTY_VS_PROG | AGX_DIRTY_FS_PROG | AGX_DIRTY_RS | + AGX_DIRTY_BLEND | AGX_DIRTY_SAMPLE_MASK | AGX_DIRTY_PRIM))) return false; unsigned nr_samples = util_framebuffer_get_num_samples(&batch->key); @@ -2289,6 +2296,8 @@ agx_update_fs(struct agx_batch *batch) struct asahi_fs_shader_key key = { .nr_cbufs = batch->key.nr_cbufs, + .cull_distance_size = + ctx->stage[MESA_SHADER_VERTEX].shader->info.cull_distance_size, .clip_plane_enable = ctx->rast->base.clip_plane_enable, .nr_samples = nr_samples, .layered = util_framebuffer_get_num_layers(&batch->key) > 1, diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 093aed42f6c..f2b60e39ccf 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -385,6 +385,7 @@ struct asahi_fs_shader_key { */ bool api_sample_mask; + uint8_t cull_distance_size; uint8_t clip_plane_enable; uint8_t nr_samples; bool multisample;