diff --git a/src/kosmickrisp/libkk/kk_geometry.cl b/src/kosmickrisp/libkk/kk_geometry.cl index c54e0059e0e..b240bf2fb5e 100644 --- a/src/kosmickrisp/libkk/kk_geometry.cl +++ b/src/kosmickrisp/libkk/kk_geometry.cl @@ -10,21 +10,22 @@ #include "poly/geometry.h" #include "poly/tessellator.h" -KERNEL(1) +KERNEL(1024) libkk_prefix_sum_tess(global struct poly_tess_params *p) { + local uint scratch[32]; + poly_prefix_sum(scratch, p->counts, p->nr_patches, 1 /* words */, + 0 /* word */, 1024); + + /* After prefix summing, we know the total # of indices, so allocate the + * index buffer now. Elect a thread for the allocation. + */ + barrier(CLK_LOCAL_MEM_FENCE); if (cl_local_id.x != 0) return; /* The last element of an inclusive prefix sum is the total sum */ - uint total = 0; - - if (p->nr_patches > 0) { - for (uint32_t i = 0u; i < p->nr_patches; ++i) { - total += p->counts[i]; - p->counts[i] = total; - } - } + uint total = p->nr_patches > 0 ? p->counts[p->nr_patches - 1] : 0; /* Allocate 4-byte indices */ uint32_t elsize_B = sizeof(uint32_t); diff --git a/src/kosmickrisp/vulkan/kk_cmd_draw.c b/src/kosmickrisp/vulkan/kk_cmd_draw.c index b94fb5cbd1d..201a145a7ab 100644 --- a/src/kosmickrisp/vulkan/kk_cmd_draw.c +++ b/src/kosmickrisp/vulkan/kk_cmd_draw.c @@ -1768,7 +1768,7 @@ kk_launch_tess(struct kk_cmd_buffer *cmd, struct kk_draw_data draw, state); mtl_memory_barrier_with_scope(enc, MTL_BARRIER_SCOPE_BUFFERS); - libkk_prefix_sum_tess(cmd, kk_grid_1d(1u), true, state); + libkk_prefix_sum_tess(cmd, kk_grid_1d(1024u), true, state); mtl_memory_barrier_with_scope(enc, MTL_BARRIER_SCOPE_BUFFERS); libkk_tessellate(cmd, grid_tess, true, info.mode, POLY_TESS_MODE_WITH_COUNTS,