From 2f63d09270880fc790b6c5f065e53937c11e398b Mon Sep 17 00:00:00 2001 From: squidbus <1249084-squidbus@users.noreply.gitlab.freedesktop.org> Date: Sat, 9 May 2026 23:21:25 -0700 Subject: [PATCH] kk: Support shaderCullDistance Uses an approach based on HoneyKrisp. In the vertex shader, an extra output writes 1 if the cull distance is >= 0, otherwise it writes 0. In the fragment shader, if the extra outputs from the vertex shader interpolate zero, all cull distances are < 0, so the primitive is culled by discarding fragments. Reviewed-by: Arcady Goldmints-Orlov Part-of: --- src/kosmickrisp/clc/kk_clc.c | 1 - src/kosmickrisp/compiler/msl_iomap.c | 8 +- .../compiler/msl_nir_lower_common.c | 137 ++++++++++++++++-- src/kosmickrisp/compiler/msl_private.h | 2 - src/kosmickrisp/compiler/nir_to_msl.c | 20 --- src/kosmickrisp/compiler/nir_to_msl.h | 3 +- src/kosmickrisp/vulkan/kk_physical_device.c | 1 + src/kosmickrisp/vulkan/kk_shader.c | 16 +- src/kosmickrisp/vulkan/kk_shader.h | 3 + 9 files changed, 150 insertions(+), 41 deletions(-) diff --git a/src/kosmickrisp/clc/kk_clc.c b/src/kosmickrisp/clc/kk_clc.c index 28b45a14d52..f17403f718e 100644 --- a/src/kosmickrisp/clc/kk_clc.c +++ b/src/kosmickrisp/clc/kk_clc.c @@ -163,7 +163,6 @@ compile(void *memctx, const uint32_t *spirv, size_t spirv_size) NIR_PASS(_, nir, nir_opt_idiv_const, 16); msl_lower_textures(nir); - msl_lower_nir_late(nir); optimize(nir); diff --git a/src/kosmickrisp/compiler/msl_iomap.c b/src/kosmickrisp/compiler/msl_iomap.c index 9885c3157da..e2b4bab6c9d 100644 --- a/src/kosmickrisp/compiler/msl_iomap.c +++ b/src/kosmickrisp/compiler/msl_iomap.c @@ -68,6 +68,12 @@ static const struct { [VARYING_SLOT_VIEWPORT] = {"viewport_array_index"}, [VARYING_SLOT_CLIP_DIST0] = {"clip_0", .user = true, .scalarized = true}, [VARYING_SLOT_CLIP_DIST1] = {"clip_1", .user = true, .scalarized = true}, + [VARYING_SLOT_CULL_DIST0] = {"cull_0", .user = true, .scalarized = true}, + [VARYING_SLOT_CULL_DIST1] = {"cull_1", .user = true, .scalarized = true}, + [VARYING_SLOT_CULL_PRIMITIVE] = {"cull_primitive_0", .user = true, .scalarized = true}, + /* Using cull primitive slots to emulate cull distances in fragment shader, + * which may extend to one varying extra (which is otherwise unused) */ + [VARYING_SLOT_CULL_PRIMITIVE + 1] = {"cull_primitive_1", .user = true, .scalarized = true}, [VARYING_SLOT_VAR0] = {"vary_00", .user = true}, [VARYING_SLOT_VAR1] = {"vary_01", .user = true}, [VARYING_SLOT_VAR2] = {"vary_02", .user = true}, @@ -183,7 +189,7 @@ vs_output_block(nir_shader *shader, struct nir_to_msl_ctx *ctx) } if (shader->info.clip_distance_array_size) - P_IND(ctx, "float gl_ClipDistance [[clip_distance]] [%d];", + P_IND(ctx, "float gl_ClipDistance [[clip_distance]] [%d];\n", shader->info.clip_distance_array_size); ctx->indentlevel--; P(ctx, "};\n"); diff --git a/src/kosmickrisp/compiler/msl_nir_lower_common.c b/src/kosmickrisp/compiler/msl_nir_lower_common.c index 98686418f5f..043eaac8b1d 100644 --- a/src/kosmickrisp/compiler/msl_nir_lower_common.c +++ b/src/kosmickrisp/compiler/msl_nir_lower_common.c @@ -413,28 +413,141 @@ msl_nir_lower_sample_shading(nir_shader *nir) } static bool -lower_clip_distance(nir_builder *b, nir_intrinsic_instr *intr, void *data) +lower_clip_cull_distance_write(nir_builder *b, nir_intrinsic_instr *intr, + UNUSED void *data) { if (intr->intrinsic != nir_intrinsic_store_output) return false; - nir_io_semantics io = nir_intrinsic_io_semantics(intr); - unsigned component = nir_intrinsic_component(intr); - if (io.location != VARYING_SLOT_CLIP_DIST0 && - io.location != VARYING_SLOT_CLIP_DIST1) + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + if (sem.location != VARYING_SLOT_CLIP_DIST0 && + sem.location != VARYING_SLOT_CLIP_DIST1 && + sem.location != VARYING_SLOT_CULL_DIST0 && + sem.location != VARYING_SLOT_CULL_DIST1) return false; - unsigned base = (io.location - VARYING_SLOT_CLIP_DIST0) * 4 + component; - if (intr->intrinsic == nir_intrinsic_store_output) { + assert(nir_src_num_components(intr->src[0]) == 1 && "must be scalarized"); + + signed location = sem.location + nir_src_as_uint(intr->src[1]); + + if (sem.location == VARYING_SLOT_CLIP_DIST0 || + sem.location == VARYING_SLOT_CLIP_DIST1) { + /* Clip distance, add write to MSL clip_distance output */ + unsigned component = + (location - VARYING_SLOT_CLIP_DIST0) * 4 + + nir_intrinsic_component(intr); + b->cursor = nir_after_instr(&intr->instr); - nir_store_clip_distance_kk(b, intr->src[0].ssa, .base = base); + nir_store_clip_distance_kk(b, intr->src[0].ssa, .base = component); + return true; } + + if (sem.location == VARYING_SLOT_CULL_DIST0 || + sem.location == VARYING_SLOT_CULL_DIST1) { + /* Cull distance, add write to cull primitive output */ + unsigned component = + (location - VARYING_SLOT_CULL_DIST0) * 4 + + nir_intrinsic_component(intr); + + b->cursor = nir_before_instr(&intr->instr); + nir_def *offs = nir_imm_int(b, component / 4); + nir_def *v = nir_b2f32(b, nir_fge_imm(b, intr->src[0].ssa, 0.0)); + + nir_store_output(b, v, offs, .component = component % 4, + .src_type = nir_type_float32, + .io_semantics.location = VARYING_SLOT_CULL_PRIMITIVE, + .io_semantics.num_slots = 2); + return true; + } + + return false; +} + +static bool +msl_nir_lower_clip_cull_distance_vs(nir_shader *s) +{ + if (s->info.clip_distance_array_size == 0 && + s->info.cull_distance_array_size == 0) + return false; + + nir_shader_intrinsics_pass(s, lower_clip_cull_distance_write, + nir_metadata_control_flow, NULL); + + if (s->info.cull_distance_array_size > 0) + s->info.outputs_written |= + BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE, + DIV_ROUND_UP(s->info.cull_distance_array_size, 4)); + return true; } -bool -msl_nir_lower_clip_distance(nir_shader *nir) +static bool +msl_nir_lower_cull_distance_fs(nir_shader *s, unsigned nr_distances) { - return nir_shader_intrinsics_pass(nir, lower_clip_distance, nir_metadata_all, - NULL); + assert(s->info.stage == MESA_SHADER_FRAGMENT); + + if (nr_distances == 0) + return false; + + nir_builder b_ = + nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(s))); + nir_builder *b = &b_; + + /* Test each half-space */ + nir_def *culled = nir_imm_false(b); + + for (unsigned i = 0; i < nr_distances; ++i) { + /* Load the cull primitive input for this cull distance */ + nir_def *baryc = nir_load_barycentric_pixel( + b, 32, .interp_mode = INTERP_MODE_NOPERSPECTIVE); + nir_def *cull = nir_load_interpolated_input( + b, 1, 32, baryc, nir_imm_int(b, 0), + .component = i & 3, + .io_semantics.location = VARYING_SLOT_CULL_PRIMITIVE + (i / 4), + .io_semantics.num_slots = nr_distances / 4); + + /* When the cull distance is negative in the vertex shader, the resulting + * cull primitive output is zero, otherwise it is one. Thus, the + * interpolated value will be zero only if all of its vertices had + * negative cull distances, indicating the primitive should be called. + * Note that, since the value is interpolated at the pixel center, we + * don't have to worry about corner values. */ + culled = nir_ior(b, culled, nir_ball(b, nir_feq_imm(b, cull, 0))); + + } + + /* Emulate primitive culling by discarding fragments */ + nir_demote_if(b, culled); + + s->info.inputs_read |= BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE, + DIV_ROUND_UP(nr_distances, 4)); + + s->info.fs.uses_discard = true; + return nir_progress(true, b->impl, nir_metadata_control_flow); +} + +/* Scalarize stores to CLIP_DIST* varyings */ +static bool +scalarize_clip_cull_distance_filter(const nir_intrinsic_instr *intrin, + UNUSED const void *_data) +{ + if (intrin->intrinsic != nir_intrinsic_store_output) + return false; + nir_io_semantics semantics = nir_intrinsic_io_semantics(intrin); + return semantics.location == VARYING_SLOT_CLIP_DIST0 || + semantics.location == VARYING_SLOT_CLIP_DIST1 || + semantics.location == VARYING_SLOT_CULL_DIST0 || + semantics.location == VARYING_SLOT_CULL_DIST1; +} + +void +msl_nir_lower_clip_cull_distance(nir_shader *nir, unsigned num_cull_distances) +{ + NIR_PASS(_, nir, nir_lower_io_to_scalar, nir_var_shader_out, + scalarize_clip_cull_distance_filter, NULL); + NIR_PASS(_, nir, nir_separate_merged_clip_cull_io); + if (nir->info.stage == MESA_SHADER_FRAGMENT) + NIR_PASS(_, nir, msl_nir_lower_cull_distance_fs, num_cull_distances); + else + NIR_PASS(_, nir, msl_nir_lower_clip_cull_distance_vs); } diff --git a/src/kosmickrisp/compiler/msl_private.h b/src/kosmickrisp/compiler/msl_private.h index 67eee6eca67..8aeab07014f 100644 --- a/src/kosmickrisp/compiler/msl_private.h +++ b/src/kosmickrisp/compiler/msl_private.h @@ -86,5 +86,3 @@ bool msl_def_is_sampler(struct nir_to_msl_ctx *ctx, nir_def *def); void msl_nir_lower_subgroups(nir_shader *nir); bool msl_nir_lower_algebraic_late(nir_shader *shader); - -bool msl_nir_lower_clip_distance(nir_shader *nir); diff --git a/src/kosmickrisp/compiler/nir_to_msl.c b/src/kosmickrisp/compiler/nir_to_msl.c index 4f603ad8d88..b7fb0851848 100644 --- a/src/kosmickrisp/compiler/nir_to_msl.c +++ b/src/kosmickrisp/compiler/nir_to_msl.c @@ -2125,26 +2125,6 @@ void msl_preprocess_nir_workarounds(struct nir_shader *nir, } } -/* Scalarize stores to CLIP_DIST* varyings */ -static bool -scalarize_clip_distance_filter(const nir_intrinsic_instr *intrin, - UNUSED const void *_data) -{ - if (intrin->intrinsic != nir_intrinsic_store_output) - return false; - nir_io_semantics semantics = nir_intrinsic_io_semantics(intrin); - return semantics.location == VARYING_SLOT_CLIP_DIST0 || - semantics.location == VARYING_SLOT_CLIP_DIST1; -} - -void -msl_lower_nir_late(nir_shader *nir) -{ - NIR_PASS(_, nir, nir_lower_io_to_scalar, nir_var_shader_out, - scalarize_clip_distance_filter, NULL); - NIR_PASS(_, nir, msl_nir_lower_clip_distance); -} - static void msl_gather_info(struct nir_to_msl_ctx *ctx, struct nir_to_msl_options *options) { diff --git a/src/kosmickrisp/compiler/nir_to_msl.h b/src/kosmickrisp/compiler/nir_to_msl.h index e7ee56c4c0b..66759875043 100644 --- a/src/kosmickrisp/compiler/nir_to_msl.h +++ b/src/kosmickrisp/compiler/nir_to_msl.h @@ -76,7 +76,8 @@ bool msl_nir_fs_io_types(nir_shader *nir); bool msl_nir_vs_io_types(nir_shader *nir); bool msl_nir_fake_guard_for_discards(struct nir_shader *nir); bool msl_nir_lower_sample_shading(nir_shader *nir); -void msl_lower_nir_late(nir_shader *nir); +void msl_nir_lower_clip_cull_distance(nir_shader *nir, + unsigned num_cull_distances); bool msl_gather_uses_per_draw_data(nir_shader *nir); diff --git a/src/kosmickrisp/vulkan/kk_physical_device.c b/src/kosmickrisp/vulkan/kk_physical_device.c index 07af93abcc3..f3a3de15c2d 100644 --- a/src/kosmickrisp/vulkan/kk_physical_device.c +++ b/src/kosmickrisp/vulkan/kk_physical_device.c @@ -193,6 +193,7 @@ kk_get_device_features( .samplerAnisotropy = true, .sampleRateShading = true, .shaderClipDistance = true, + .shaderCullDistance = true, .shaderImageGatherExtended = true, .shaderInt16 = true, .shaderInt64 = true, diff --git a/src/kosmickrisp/vulkan/kk_shader.c b/src/kosmickrisp/vulkan/kk_shader.c index 91bada8acd9..80565403f00 100644 --- a/src/kosmickrisp/vulkan/kk_shader.c +++ b/src/kosmickrisp/vulkan/kk_shader.c @@ -643,6 +643,7 @@ gather_shader_info(struct kk_shader *shader, nir_shader *nir, if (nir->info.stage == MESA_SHADER_VERTEX) { nir_shader_intrinsics_pass(nir, gather_vs_inputs, nir_metadata_all, &shader->info.vs.attribs_read); + shader->info.vs.num_cull_distances = nir->info.cull_distance_array_size; } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { /* Some meta shaders like vk-meta-resolve will have depth_layout as NONE * which is not a valid Metal layout */ @@ -687,6 +688,7 @@ modify_nir_info(nir_shader *nir) static VkResult kk_compile_shader(struct kk_device *dev, struct vk_shader_compile_info *info, + struct kk_shader *prev_stage, const struct vk_graphics_pipeline_state *state, const VkAllocationCallbacks *pAllocator, struct vk_shader **shader_out) @@ -715,7 +717,9 @@ kk_compile_shader(struct kk_device *dev, struct vk_shader_compile_info *info, if (info->stage == MESA_SHADER_VERTEX) { kk_lower_vs_vbo(nir, state, info->robustness); } - msl_lower_nir_late(nir); + unsigned num_cull_distances = + prev_stage ? prev_stage->info.vs.num_cull_distances : 0; + msl_nir_lower_clip_cull_distance(nir, num_cull_distances); msl_optimize_nir(nir); modify_nir_info(nir); @@ -780,7 +784,7 @@ kk_compile_nir_shader(struct kk_device *dev, nir_shader *nir, struct vk_shader *shader = NULL; nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); - VkResult result = kk_compile_shader(dev, &info, NULL, alloc, &shader); + VkResult result = kk_compile_shader(dev, &info, NULL, NULL, alloc, &shader); if (result != VK_SUCCESS) return result; @@ -1162,8 +1166,11 @@ kk_compile_shaders(struct vk_device *device, uint32_t shader_count, nir_opts, NULL); for (uint32_t i = 0; i < shader_count; i++) { + struct kk_shader *prev_stage = i > 0 ? + container_of(shaders_out[i - 1], struct kk_shader, vk) : NULL; result = - kk_compile_shader(dev, &infos[i], state, pAllocator, &shaders_out[i]); + kk_compile_shader(dev, &infos[i], prev_stage, state, pAllocator, + &shaders_out[i]); if (result != VK_SUCCESS) { /* Clean up all the shaders before this point */ for (uint32_t j = 0; j < i; j++) @@ -1203,7 +1210,8 @@ kk_compile_shaders(struct vk_device *device, uint32_t shader_count, }; struct vk_shader *frag_shader; result = - kk_compile_shader(dev, &info, state, &dev->vk.alloc, &frag_shader); + kk_compile_shader(dev, &info, fs, state, &dev->vk.alloc, + &frag_shader); if (result != VK_SUCCESS) { for (uint32_t i = 0; i < shader_count; i++) diff --git a/src/kosmickrisp/vulkan/kk_shader.h b/src/kosmickrisp/vulkan/kk_shader.h index 693526a9157..e4ac8d48c2c 100644 --- a/src/kosmickrisp/vulkan/kk_shader.h +++ b/src/kosmickrisp/vulkan/kk_shader.h @@ -33,6 +33,9 @@ struct kk_shader_info { uint32_t attribs_read; uint32_t sample_count; + /* Required for fragment shader cull distance discards. */ + uint8_t num_cull_distances; + /* Data needed for serialization. */ enum mtl_primitive_topology_class topology; enum mtl_pixel_format rt_formats[MAX_DRAW_BUFFERS];