mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 04:58:05 +02:00
kk: Reuse as much poly utilities as possible for unrolling
We cannot use poly_unroll_restart since we may require a promoted index size when we want to disable primitive restart for 16 bit indices. For 32 bit indices we cannot do much... Signed-off-by: Aitor Camacho <aitor@lunarg.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40864>
This commit is contained in:
parent
1d57784308
commit
0c6019e265
8 changed files with 140 additions and 242 deletions
|
|
@ -32,7 +32,7 @@ static const struct spirv_to_nir_options spirv_options = {
|
|||
.temp_addr_format = nir_address_format_62bit_generic,
|
||||
.constant_addr_format = nir_address_format_64bit_global,
|
||||
.create_library = true,
|
||||
.printf = true,
|
||||
.printf = false, /* TODO_KOSMICKRISP Enable */
|
||||
};
|
||||
|
||||
/* Standard optimization loop */
|
||||
|
|
|
|||
|
|
@ -1287,6 +1287,12 @@ intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
|
|||
ctx->indentlevel--;
|
||||
P_IND(ctx, "}\n");
|
||||
break;
|
||||
/* This is only used by OpenCL shaders (because poly uses printf_abort even
|
||||
* if we don't expose printf, need to actually fix this or implement printf
|
||||
* in KK). Kinda hacked, but need to get things going. TODO_KOSMICKRISP */
|
||||
case nir_intrinsic_printf_abort:
|
||||
P_IND(ctx, "return;\n");
|
||||
break;
|
||||
case nir_intrinsic_load_shared:
|
||||
assert(nir_intrinsic_base(instr) == 0);
|
||||
P(ctx, "*(threadgroup %s*)&shared_data[",
|
||||
|
|
|
|||
|
|
@ -8,214 +8,59 @@
|
|||
#include "compiler/libcl/libcl_vk.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
|
||||
static uint
|
||||
libkk_vertex_id_for_line_loop(uint prim, uint vert, uint num_prims)
|
||||
{
|
||||
/* (0, 1), (1, 2), (2, 0) */
|
||||
if (prim == (num_prims - 1) && vert == 1)
|
||||
return 0;
|
||||
else
|
||||
return prim + vert;
|
||||
}
|
||||
|
||||
/* Swap the two non-provoking vertices third vert in odd triangles. This
|
||||
* generates a vertex ID list with a consistent winding order.
|
||||
*
|
||||
* With prim and flatshade_first, the map : [0, 1, 2] -> [0, 1, 2] is its own
|
||||
* inverse. This lets us reuse it for both vertex fetch and transform feedback.
|
||||
*/
|
||||
static uint
|
||||
libagx_map_vertex_in_tri_strip(uint prim, uint vert, bool flatshade_first)
|
||||
{
|
||||
unsigned pv = flatshade_first ? 0 : 2;
|
||||
|
||||
bool even = (prim & 1) == 0;
|
||||
bool provoking = vert == pv;
|
||||
|
||||
return (provoking || even) ? vert : ((3 - pv) - vert);
|
||||
}
|
||||
#include "poly/cl/restart.h"
|
||||
#include "poly/geometry.h"
|
||||
|
||||
static uint
|
||||
libkk_vertex_id_for_tri_fan(uint prim, uint vert, bool flatshade_first)
|
||||
load_index(uintptr_t index_buffer, uint32_t index_buffer_range_el, uint id,
|
||||
uint index_size)
|
||||
{
|
||||
/* Vulkan spec section 20.1.7 gives (i + 1, i + 2, 0) for a provoking
|
||||
* first. OpenGL instead wants (0, i + 1, i + 2) with a provoking last.
|
||||
* Piglit clipflat expects us to switch between these orders depending on
|
||||
* provoking vertex, to avoid trivializing the fan.
|
||||
*
|
||||
* Rotate accordingly.
|
||||
*/
|
||||
if (flatshade_first) {
|
||||
vert = (vert == 2) ? 0 : (vert + 1);
|
||||
}
|
||||
|
||||
/* The simpler form assuming last is provoking. */
|
||||
return (vert == 0) ? 0 : prim + vert;
|
||||
}
|
||||
|
||||
static uint
|
||||
libkk_vertex_id_for_tri_strip_adj(uint prim, uint vert, uint num_prims,
|
||||
bool flatshade_first)
|
||||
{
|
||||
/* See Vulkan spec section 20.1.11 "Triangle Strips With Adjancency".
|
||||
*
|
||||
* There are different cases for first/middle/last/only primitives and for
|
||||
* odd/even primitives. Determine which case we're in.
|
||||
*/
|
||||
bool last = prim == (num_prims - 1);
|
||||
bool first = prim == 0;
|
||||
bool even = (prim & 1) == 0;
|
||||
bool even_or_first = even || first;
|
||||
|
||||
/* When the last vertex is provoking, we rotate the primitives
|
||||
* accordingly. This seems required for OpenGL.
|
||||
*/
|
||||
if (!flatshade_first && !even_or_first) {
|
||||
vert = (vert + 4u) % 6u;
|
||||
}
|
||||
|
||||
/* Offsets per the spec. The spec lists 6 cases with 6 offsets. Luckily,
|
||||
* there are lots of patterns we can exploit, avoiding a full 6x6 LUT.
|
||||
*
|
||||
* Here we assume the first vertex is provoking, the Vulkan default.
|
||||
*/
|
||||
const uint offsets[6] = {
|
||||
0,
|
||||
first ? 1 : (even ? -2 : 3),
|
||||
even_or_first ? 2 : 4,
|
||||
last ? 5 : 6,
|
||||
even_or_first ? 4 : 2,
|
||||
even_or_first ? 3 : -2,
|
||||
};
|
||||
|
||||
/* Ensure NIR can see thru the local array */
|
||||
uint offset = 0;
|
||||
for (uint i = 1; i < 6; ++i) {
|
||||
if (i == vert)
|
||||
offset = offsets[i];
|
||||
}
|
||||
|
||||
/* Finally add to the base of the primitive */
|
||||
return (prim * 2) + offset;
|
||||
}
|
||||
|
||||
static uint
|
||||
vertex_id_for_topology(enum mesa_prim mode, bool flatshade_first, uint prim,
|
||||
uint vert, uint num_prims)
|
||||
{
|
||||
switch (mode) {
|
||||
case MESA_PRIM_POINTS:
|
||||
case MESA_PRIM_LINES:
|
||||
case MESA_PRIM_TRIANGLES:
|
||||
case MESA_PRIM_LINES_ADJACENCY:
|
||||
case MESA_PRIM_TRIANGLES_ADJACENCY:
|
||||
/* Regular primitive: every N vertices defines a primitive */
|
||||
return (prim * mesa_vertices_per_prim(mode)) + vert;
|
||||
|
||||
case MESA_PRIM_LINE_LOOP:
|
||||
return libkk_vertex_id_for_line_loop(prim, vert, num_prims);
|
||||
|
||||
case MESA_PRIM_LINE_STRIP:
|
||||
case MESA_PRIM_LINE_STRIP_ADJACENCY:
|
||||
/* (i, i + 1) or (i, ..., i + 3) */
|
||||
return prim + vert;
|
||||
|
||||
case MESA_PRIM_TRIANGLE_STRIP: {
|
||||
/* Order depends on the provoking vert.
|
||||
*
|
||||
* First: (0, 1, 2), (1, 3, 2), (2, 3, 4).
|
||||
* Last: (0, 1, 2), (2, 1, 3), (2, 3, 4).
|
||||
*
|
||||
* Pull the (maybe swapped) vert from the corresponding primitive
|
||||
*/
|
||||
return prim + libagx_map_vertex_in_tri_strip(prim, vert, flatshade_first);
|
||||
}
|
||||
|
||||
case MESA_PRIM_TRIANGLE_FAN:
|
||||
return libkk_vertex_id_for_tri_fan(prim, vert, flatshade_first);
|
||||
|
||||
case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
|
||||
return libkk_vertex_id_for_tri_strip_adj(prim, vert, num_prims,
|
||||
flatshade_first);
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
store_index(global uint8_t *index_buffer, uint index_size_B, uint id,
|
||||
uint value)
|
||||
{
|
||||
global uint32_t *out_32 = (global uint32_t *)index_buffer;
|
||||
global uint16_t *out_16 = (global uint16_t *)index_buffer;
|
||||
global uint8_t *out_8 = (global uint8_t *)index_buffer;
|
||||
|
||||
if (index_size_B == 4)
|
||||
out_32[id] = value;
|
||||
else if (index_size_B == 2)
|
||||
out_16[id] = value;
|
||||
else
|
||||
out_8[id] = value;
|
||||
}
|
||||
|
||||
static uint
|
||||
load_index(constant uint8_t *index_buffer, uint32_t index_buffer_range_el,
|
||||
uint id, uint index_size)
|
||||
{
|
||||
/* We have no index buffer, index is the id */
|
||||
/* We have no index buffer, index is the id. Required for index promotion. */
|
||||
if (index_buffer == 0u)
|
||||
return id;
|
||||
|
||||
/* When no index_buffer is present, index_buffer_range_el is vtx count */
|
||||
bool oob = id >= index_buffer_range_el;
|
||||
|
||||
/* If the load would be out-of-bounds, load the first element which is
|
||||
* assumed valid. If the application index buffer is empty with robustness2,
|
||||
* index_buffer will point to a zero sink where only the first is valid.
|
||||
*/
|
||||
if (oob) {
|
||||
id = 0u;
|
||||
}
|
||||
|
||||
uint el;
|
||||
if (index_size == 1) {
|
||||
el = ((constant uint8_t *)index_buffer)[id];
|
||||
} else if (index_size == 2) {
|
||||
el = ((constant uint16_t *)index_buffer)[id];
|
||||
} else {
|
||||
el = ((constant uint32_t *)index_buffer)[id];
|
||||
}
|
||||
|
||||
/* D3D robustness semantics. TODO: Optimize? */
|
||||
if (oob) {
|
||||
el = 0;
|
||||
}
|
||||
|
||||
return el;
|
||||
return poly_load_index(index_buffer, index_buffer_range_el, id, index_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the ID of the first thread in the workgroup where cond is true, or
|
||||
* 1024 if cond is false across the workgroup.
|
||||
* Same as poly_setup_unroll_for_draw but for non-indexed. Only changes how the
|
||||
* out_draw is built.
|
||||
*/
|
||||
static uint
|
||||
first_true_thread_in_workgroup(bool cond, local uint *scratch)
|
||||
static inline global void *
|
||||
kk_setup_unroll_for_non_indexed_draw(global struct poly_heap *heap,
|
||||
constant uint *in_draw,
|
||||
global uint *out_draw, enum mesa_prim mode,
|
||||
uint index_size_B)
|
||||
{
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
scratch[get_sub_group_id()] = sub_group_ballot(cond)[0];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
/* Determine an upper bound on the memory required for the index buffer.
|
||||
* Restarts only decrease the unrolled index buffer size, so the maximum size
|
||||
* is the unrolled size when the input has no restarts.
|
||||
*/
|
||||
uint max_prims = u_decomposed_prims_for_vertices(mode, in_draw[0]);
|
||||
uint max_verts = max_prims * mesa_vertices_per_prim(mode);
|
||||
uint alloc_size = max_verts * index_size_B;
|
||||
|
||||
uint first_group =
|
||||
ctz(sub_group_ballot(scratch[get_sub_group_local_id()])[0]);
|
||||
uint off = ctz(first_group < 32 ? scratch[first_group] : 0);
|
||||
return (first_group * 32) + off;
|
||||
/* Allocate unrolled index buffer.
|
||||
*
|
||||
* TODO: For multidraw, should be atomic. But multidraw+unroll isn't
|
||||
* currently wired up in any driver.
|
||||
*/
|
||||
uint old_heap_bottom_B = poly_heap_alloc_offs(heap, alloc_size);
|
||||
|
||||
/* Setup most of the descriptor. Count will be determined after unroll. */
|
||||
out_draw[1] = in_draw[1]; /* instance count */
|
||||
out_draw[2] = old_heap_bottom_B / index_size_B; /* index offset */
|
||||
out_draw[3] = in_draw[2]; /* index bias */
|
||||
out_draw[4] = in_draw[3]; /* base instance */
|
||||
|
||||
/* Return the index buffer we allocated */
|
||||
return (global uchar *)heap->base + old_heap_bottom_B;
|
||||
}
|
||||
|
||||
// KERNEL(1024)
|
||||
/* TODO_KOSMICKRISP KERNEL(1024) */
|
||||
KERNEL(1)
|
||||
libkk_unroll_geometry_and_restart(
|
||||
constant uint8_t *index_buffer, global uint8_t *out_ptr,
|
||||
uint64_t index_buffer, global struct poly_heap *heap,
|
||||
constant uint32_t *in_draw, global uint32_t *out_draw,
|
||||
uint32_t restart_index, uint32_t index_buffer_size_el, uint32_t in_el_size_B,
|
||||
uint32_t out_el_size_B, uint32_t flatshade_first, uint32_t mode)
|
||||
|
|
@ -223,10 +68,21 @@ libkk_unroll_geometry_and_restart(
|
|||
uint tid = cl_local_id.x;
|
||||
uint count = in_draw[0];
|
||||
|
||||
constant uint8_t *in_ptr =
|
||||
index_buffer ? index_buffer + (in_draw[2] * in_el_size_B) : index_buffer;
|
||||
uintptr_t out_ptr;
|
||||
if (tid == 0) {
|
||||
if (index_buffer)
|
||||
out_ptr = (uintptr_t)poly_setup_unroll_for_draw(
|
||||
heap, in_draw, out_draw, mode, out_el_size_B);
|
||||
else
|
||||
out_ptr = (uintptr_t)kk_setup_unroll_for_non_indexed_draw(
|
||||
heap, in_draw, out_draw, mode, out_el_size_B);
|
||||
}
|
||||
|
||||
// local uint scratch[32];
|
||||
uintptr_t in_ptr = index_buffer
|
||||
? (uintptr_t)index_buffer + (in_draw[2] * in_el_size_B)
|
||||
: (uintptr_t)index_buffer;
|
||||
|
||||
/* TODO_KOSMICKRISP local uint scratch[32]; */
|
||||
|
||||
uint out_prims = 0;
|
||||
uint needle = 0;
|
||||
|
|
@ -240,11 +96,13 @@ libkk_unroll_geometry_and_restart(
|
|||
idx >= count || load_index(in_ptr, index_buffer_size_el, idx,
|
||||
in_el_size_B) == restart_index;
|
||||
|
||||
// uint next_offs = first_true_thread_in_workgroup(restart, scratch);
|
||||
/* TODO_KOSMICKRISP Uncomment this when subgroups are reliable
|
||||
uint next_offs = poly_work_group_first_true(restart, scratch);
|
||||
|
||||
// next_restart += next_offs;
|
||||
// if (next_offs < 1024)
|
||||
// break;
|
||||
next_restart += next_offs;
|
||||
if (next_offs < cl_local_size.x)
|
||||
break;
|
||||
*/
|
||||
if (restart)
|
||||
break;
|
||||
next_restart++;
|
||||
|
|
@ -254,17 +112,17 @@ libkk_unroll_geometry_and_restart(
|
|||
uint subcount = next_restart - needle;
|
||||
uint subprims = u_decomposed_prims_for_vertices(mode, subcount);
|
||||
uint out_prims_base = out_prims;
|
||||
for (uint i = tid; i < subprims; /*i += 1024*/ ++i) {
|
||||
for (uint i = tid; i < subprims; /*i += cl_local_size.x*/ ++i) {
|
||||
for (uint vtx = 0; vtx < per_prim; ++vtx) {
|
||||
uint id =
|
||||
vertex_id_for_topology(mode, flatshade_first, i, vtx, subprims);
|
||||
uint id = poly_vertex_id_for_topology(mode, flatshade_first, i, vtx,
|
||||
subprims);
|
||||
uint offset = needle + id;
|
||||
|
||||
uint x = ((out_prims_base + i) * per_prim) + vtx;
|
||||
uint y =
|
||||
load_index(in_ptr, index_buffer_size_el, offset, in_el_size_B);
|
||||
|
||||
store_index(out_ptr, out_el_size_B, x, y);
|
||||
poly_store_index(out_ptr, out_el_size_B, x, y);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -273,10 +131,6 @@ libkk_unroll_geometry_and_restart(
|
|||
}
|
||||
|
||||
if (tid == 0) {
|
||||
out_draw[0] = out_prims * per_prim; /* indexCount */
|
||||
out_draw[1] = in_draw[1]; /* instanceCount */
|
||||
out_draw[2] = 0u; /* firstIndex */
|
||||
out_draw[3] = index_buffer ? in_draw[3] : in_draw[2]; /* vertexOffset */
|
||||
out_draw[4] = index_buffer ? in_draw[4] : in_draw[3]; /* firstInstance */
|
||||
out_draw[0] = out_prims * per_prim;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -105,6 +105,7 @@ kk_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
|
|||
kk_cmd_release_resources(dev, cmd);
|
||||
|
||||
memset(&cmd->state, 0, sizeof(cmd->state));
|
||||
cmd->uses_heap = false;
|
||||
}
|
||||
|
||||
const struct vk_command_buffer_ops kk_cmd_buffer_ops = {
|
||||
|
|
|
|||
|
|
@ -138,9 +138,6 @@ struct kk_graphics_state {
|
|||
struct {
|
||||
struct kk_addr_range addr_range[KK_MAX_VBUFS];
|
||||
mtl_buffer *handles[KK_MAX_VBUFS];
|
||||
/* Required to understand maximum size of index buffer if primitive is
|
||||
* triangle fans */
|
||||
uint32_t max_vertices;
|
||||
} vb;
|
||||
|
||||
/* Needed by vk_command_buffer::dynamic_graphics_state */
|
||||
|
|
@ -170,6 +167,9 @@ struct kk_cmd_buffer {
|
|||
|
||||
/* Owned large BOs */
|
||||
struct util_dynarray large_bos;
|
||||
|
||||
/* Does the command buffer use the geometry heap? */
|
||||
bool uses_heap;
|
||||
};
|
||||
|
||||
VK_DEFINE_HANDLE_CASTS(kk_cmd_buffer, vk.base, VkCommandBuffer,
|
||||
|
|
@ -213,6 +213,11 @@ kk_cmd_buffer_dirty_all_gfx(struct kk_cmd_buffer *cmd)
|
|||
cmd->state.dirty_shaders = ~0u;
|
||||
cmd->state.gfx.dirty = ~0u;
|
||||
cmd->state.gfx.descriptors.root_dirty = true;
|
||||
|
||||
/* We just flushed out the heap use. If we want to use it again, we'll need
|
||||
* to queue a free for it again.
|
||||
*/
|
||||
cmd->uses_heap = false;
|
||||
}
|
||||
|
||||
void kk_cmd_release_dynamic_ds_state(struct kk_cmd_buffer *cmd);
|
||||
|
|
|
|||
|
|
@ -19,6 +19,8 @@
|
|||
#include "kosmickrisp/bridge/mtl_bridge.h"
|
||||
#include "kosmickrisp/bridge/vk_to_mtl_map.h"
|
||||
|
||||
#include "poly/geometry.h"
|
||||
|
||||
#include "vulkan/util/vk_format.h"
|
||||
|
||||
static void
|
||||
|
|
@ -766,7 +768,6 @@ kk_flush_dynamic_state(struct kk_cmd_buffer *cmd)
|
|||
IS_DIRTY(VI_BINDING_STRIDES) || gfx->dirty & KK_DIRTY_VB) {
|
||||
struct kk_shader *vs = cmd->state.shaders[MESA_SHADER_VERTEX];
|
||||
unsigned slot = 0;
|
||||
cmd->state.gfx.vb.max_vertices = 0u;
|
||||
u_foreach_bit(i, vs->info.vs.attribs_read) {
|
||||
if (dyn->vi->attributes_valid & BITFIELD_BIT(i)) {
|
||||
struct vk_vertex_attribute_state attr = dyn->vi->attributes[i];
|
||||
|
|
@ -778,10 +779,6 @@ kk_flush_dynamic_state(struct kk_cmd_buffer *cmd)
|
|||
&desc->root.draw.attrib_base[slot]);
|
||||
desc->root.draw.buffer_strides[attr.binding] =
|
||||
dyn->vi_binding_strides[attr.binding];
|
||||
|
||||
cmd->state.gfx.vb.max_vertices =
|
||||
MAX2(vb.range / dyn->vi_binding_strides[attr.binding],
|
||||
cmd->state.gfx.vb.max_vertices);
|
||||
}
|
||||
slot++;
|
||||
}
|
||||
|
|
@ -843,12 +840,52 @@ struct kk_draw_data {
|
|||
bool restart;
|
||||
};
|
||||
|
||||
static void
|
||||
kk_init_heap(const void *data)
|
||||
{
|
||||
struct kk_cmd_buffer *cmd = (struct kk_cmd_buffer *)data;
|
||||
struct kk_device *dev = kk_cmd_buffer_device(cmd);
|
||||
|
||||
size_t size = 128 * 1024 * 1024;
|
||||
kk_alloc_bo(dev, &dev->vk.base, size, 0, &dev->heap);
|
||||
|
||||
struct poly_heap *map = (struct poly_heap *)dev->heap->cpu;
|
||||
|
||||
/* TODO_KOSMICKRISP Self-contained until we have rodata at the device. */
|
||||
*map = (struct poly_heap){
|
||||
.base = dev->heap->gpu + sizeof(struct poly_heap),
|
||||
.size = size - sizeof(struct poly_heap),
|
||||
};
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
kk_heap(struct kk_cmd_buffer *cmd)
|
||||
{
|
||||
struct kk_device *dev = kk_cmd_buffer_device(cmd);
|
||||
|
||||
util_call_once_data(&dev->heap_init_once, kk_init_heap, cmd);
|
||||
|
||||
/* We need to free all allocations after each command buffer execution */
|
||||
if (!cmd->uses_heap) {
|
||||
uint64_t addr = dev->heap->gpu;
|
||||
|
||||
/* Zeroing the allocated index frees everything */
|
||||
kk_cmd_write(cmd, (struct libkk_imm_write){
|
||||
addr + offsetof(struct poly_heap, bottom), 0});
|
||||
|
||||
cmd->uses_heap = true;
|
||||
}
|
||||
|
||||
return dev->heap->gpu;
|
||||
}
|
||||
|
||||
/* Unrolling will always be done through indirect rendering, so if this is
|
||||
* called from non-indirect calls, we will fake it. */
|
||||
static struct kk_draw_data
|
||||
kk_unroll_geometry(struct kk_cmd_buffer *cmd, struct kk_draw_data data,
|
||||
bool promote_index_type)
|
||||
{
|
||||
struct kk_device *dev = kk_cmd_buffer_device(cmd);
|
||||
if (!data.indirect) {
|
||||
if (data.indexed) {
|
||||
VkDrawIndexedIndirectCommand draw = {
|
||||
|
|
@ -875,38 +912,24 @@ kk_unroll_geometry(struct kk_cmd_buffer *cmd, struct kk_draw_data data,
|
|||
}
|
||||
}
|
||||
|
||||
uint32_t el_count = cmd->state.gfx.vb.max_vertices;
|
||||
if (data.indexed) {
|
||||
el_count =
|
||||
(mtl_buffer_get_length(data.index_buffer) - data.index_buffer_offset) /
|
||||
data.index_size;
|
||||
}
|
||||
struct kk_bo *out_draw =
|
||||
kk_cmd_allocate_buffer(cmd, sizeof(VkDrawIndexedIndirectCommand), 4u);
|
||||
|
||||
uint32_t decomposed_index_count =
|
||||
u_decomposed_prims_for_vertices(data.prim, el_count) *
|
||||
mesa_vertices_per_prim(data.prim);
|
||||
uint32_t el_size_B = 4u;
|
||||
uint32_t index_buffer_size_B = decomposed_index_count * el_size_B;
|
||||
uint32_t buffer_size_B =
|
||||
sizeof(VkDrawIndexedIndirectCommand) + index_buffer_size_B;
|
||||
struct kk_bo *index_buffer =
|
||||
kk_cmd_allocate_buffer(cmd, buffer_size_B, el_size_B);
|
||||
|
||||
if (!index_buffer)
|
||||
if (!out_draw)
|
||||
return data;
|
||||
|
||||
struct libkk_unroll_geometry_and_restart_args info = {
|
||||
.index_buffer = mtl_buffer_get_gpu_address(data.index_buffer) +
|
||||
data.index_buffer_offset,
|
||||
.out_ptr = index_buffer->gpu + sizeof(VkDrawIndexedIndirectCommand),
|
||||
.heap = kk_heap(cmd),
|
||||
.in_draw = mtl_buffer_get_gpu_address(data.indirect_buffer) +
|
||||
data.indirect_buffer_offset,
|
||||
.out_draw = index_buffer->gpu,
|
||||
.out_draw = out_draw->gpu,
|
||||
.restart_index =
|
||||
promote_index_type ? UINT32_MAX : cmd->state.gfx.index.restart,
|
||||
.index_buffer_size_el = data.index_buffer_range_B,
|
||||
.in_el_size_B = data.index_size,
|
||||
.out_el_size_B = el_size_B,
|
||||
.out_el_size_B = 4u,
|
||||
.flatshade_first = true,
|
||||
.mode = data.prim,
|
||||
};
|
||||
|
|
@ -914,14 +937,15 @@ kk_unroll_geometry(struct kk_cmd_buffer *cmd, struct kk_draw_data data,
|
|||
struct mtl_size grid = {1, 1, 1};
|
||||
libkk_unroll_geometry_and_restart_struct(cmd, grid, true, info);
|
||||
|
||||
data.indirect_buffer = index_buffer->map;
|
||||
data.index_buffer = index_buffer->map;
|
||||
data.index_buffer_offset = sizeof(VkDrawIndexedIndirectCommand);
|
||||
data.indirect_buffer = out_draw->map;
|
||||
data.index_buffer = dev->heap->map;
|
||||
/* TODO_KOSMICKRISP Self-contained until we have rodata at the device. */
|
||||
data.index_buffer_offset = sizeof(struct poly_heap);
|
||||
data.indirect_buffer_offset = 0u;
|
||||
data.index_buffer_range_B = index_buffer_size_B;
|
||||
data.index_buffer_range_B = dev->heap->size_B - sizeof(struct poly_heap);
|
||||
data.first_index = 0u;
|
||||
data.prim = u_decomposed_prim(data.prim);
|
||||
data.index_size = el_size_B;
|
||||
data.index_size = 4u;
|
||||
data.indirect = true;
|
||||
data.indexed = true;
|
||||
data.restart = false;
|
||||
|
|
|
|||
|
|
@ -287,6 +287,10 @@ kk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
|
|||
kk_query_table_finish(dev, &dev->occlusion_queries);
|
||||
kk_destroy_sampler_heap(dev, &dev->samplers);
|
||||
|
||||
/* Geometry heap */
|
||||
if (dev->heap)
|
||||
kk_destroy_bo(dev, dev->heap);
|
||||
|
||||
/* Release the residency set last once all BOs are released. */
|
||||
mtl_release(dev->residency_set.handle);
|
||||
simple_mtx_destroy(&dev->residency_set.mutex);
|
||||
|
|
|
|||
|
|
@ -97,6 +97,10 @@ struct kk_device {
|
|||
|
||||
struct vk_meta_device meta;
|
||||
|
||||
/* Geomtry heap */
|
||||
struct kk_bo *heap;
|
||||
util_once_flag heap_init_once;
|
||||
|
||||
uint64_t disabled_workarounds;
|
||||
bool gpu_capture_enabled;
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue