nvk: Implement VK_KHR_draw_indirect_count on Turing+

v2: handle maxDrawCount parameter
v3: assert on pre-turing. Free regsisters after use.
v4: less register presure. Update to pass new tests

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24326>
This commit is contained in:
Thomas H.P. Andersen 2022-11-03 23:30:53 +01:00 committed by Marge Bot
parent d0cf79b28f
commit 0e5fcf912f
5 changed files with 179 additions and 11 deletions

View file

@ -110,6 +110,13 @@ mme_free_reg(struct mme_builder *b, struct mme_value val)
mme_reg_alloc_free(&b->reg_alloc, val);
}
static inline void
mme_free_reg64(struct mme_builder *b, struct mme_value64 val)
{
mme_reg_alloc_free(&b->reg_alloc, val.lo);
mme_reg_alloc_free(&b->reg_alloc, val.hi);
}
static inline void
mme_alu_to(struct mme_builder *b,
struct mme_value dst,

View file

@ -1973,3 +1973,155 @@ nvk_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
}
}
}
void
nvk_mme_draw_indirect_count(struct mme_builder *b)
{
if (b->devinfo->cls_eng3d < TURING_A)
return;
struct mme_value begin = mme_load(b);
struct mme_value64 draw_addr = mme_load_addr64(b);
struct mme_value64 draw_count_addr = mme_load_addr64(b);
struct mme_value draw_max = mme_load(b);
struct mme_value stride = mme_load(b);
mme_tu104_read_fifoed(b, draw_count_addr, mme_imm(1));
mme_free_reg64(b, draw_count_addr);
struct mme_value draw_count_buf = mme_load(b);
mme_if(b, ule, draw_count_buf, draw_max) {
mme_mov_to(b, draw_max, draw_count_buf);
}
mme_free_reg(b, draw_count_buf);
struct mme_value draw = mme_mov(b, mme_zero());
mme_while(b, ult, draw, draw_max) {
mme_tu104_read_fifoed(b, draw_addr, mme_imm(4));
nvk_mme_build_draw(b, begin, draw);
mme_add_to(b, draw, draw, mme_imm(1));
mme_add64_to(b, draw_addr, draw_addr, mme_value64(stride, mme_zero()));
}
}
VKAPI_ATTR void VKAPI_CALL
nvk_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
VkBuffer countBuffer,
VkDeviceSize countBufferOffset,
uint32_t maxDrawCount,
uint32_t stride)
{
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
VK_FROM_HANDLE(nvk_buffer, buffer, _buffer);
VK_FROM_HANDLE(nvk_buffer, count_buffer, countBuffer);
const struct vk_dynamic_graphics_state *dyn =
&cmd->vk.dynamic_graphics_state;
/* TODO: Indirect count draw pre-Turing */
assert(nvk_cmd_buffer_3d_cls(cmd) >= TURING_A);
nvk_flush_gfx_state(cmd);
uint32_t begin;
V_NV9097_BEGIN(begin, {
.op = vk_to_nv9097_primitive_topology(dyn->ia.primitive_topology),
.primitive_id = NV9097_BEGIN_PRIMITIVE_ID_FIRST,
.instance_id = NV9097_BEGIN_INSTANCE_ID_FIRST,
.split_mode = SPLIT_MODE_NORMAL_BEGIN_NORMAL_END,
});
struct nv_push *p = nvk_cmd_buffer_push(cmd, 10);
P_IMMD(p, NVC597, SET_MME_DATA_FIFO_CONFIG, FIFO_SIZE_SIZE_4KB);
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_DRAW_INDIRECT_COUNT));
P_INLINE_DATA(p, begin);
uint64_t draw_addr = nvk_buffer_address(buffer, offset);
P_INLINE_DATA(p, draw_addr >> 32);
P_INLINE_DATA(p, draw_addr);
uint64_t draw_count_addr = nvk_buffer_address(count_buffer,
countBufferOffset);
P_INLINE_DATA(p, draw_count_addr >> 32);
P_INLINE_DATA(p, draw_count_addr);
P_INLINE_DATA(p, maxDrawCount);
P_INLINE_DATA(p, stride);
}
void
nvk_mme_draw_indexed_indirect_count(struct mme_builder *b)
{
if (b->devinfo->cls_eng3d < TURING_A)
return;
struct mme_value begin = mme_load(b);
struct mme_value64 draw_addr = mme_load_addr64(b);
struct mme_value64 draw_count_addr = mme_load_addr64(b);
struct mme_value draw_max = mme_load(b);
struct mme_value stride = mme_load(b);
mme_tu104_read_fifoed(b, draw_count_addr, mme_imm(1));
mme_free_reg64(b, draw_count_addr);
struct mme_value draw_count_buf = mme_load(b);
mme_if(b, ule, draw_count_buf, draw_max) {
mme_mov_to(b, draw_max, draw_count_buf);
}
mme_free_reg(b, draw_count_buf);
struct mme_value draw = mme_mov(b, mme_zero());
mme_while(b, ult, draw, draw_max) {
mme_tu104_read_fifoed(b, draw_addr, mme_imm(5));
nvk_mme_build_draw_indexed(b, begin, draw);
mme_add_to(b, draw, draw, mme_imm(1));
mme_add64_to(b, draw_addr, draw_addr, mme_value64(stride, mme_zero()));
}
}
VKAPI_ATTR void VKAPI_CALL
nvk_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
VkBuffer countBuffer,
VkDeviceSize countBufferOffset,
uint32_t maxDrawCount,
uint32_t stride)
{
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
VK_FROM_HANDLE(nvk_buffer, buffer, _buffer);
VK_FROM_HANDLE(nvk_buffer, count_buffer, countBuffer);
const struct vk_dynamic_graphics_state *dyn =
&cmd->vk.dynamic_graphics_state;
/* TODO: Indexed indirect count draw pre-Turing */
assert(nvk_cmd_buffer_3d_cls(cmd) >= TURING_A);
nvk_flush_gfx_state(cmd);
uint32_t begin;
V_NV9097_BEGIN(begin, {
.op = vk_to_nv9097_primitive_topology(dyn->ia.primitive_topology),
.primitive_id = NV9097_BEGIN_PRIMITIVE_ID_FIRST,
.instance_id = NV9097_BEGIN_INSTANCE_ID_FIRST,
.split_mode = SPLIT_MODE_NORMAL_BEGIN_NORMAL_END,
});
struct nv_push *p = nvk_cmd_buffer_push(cmd, 10);
P_IMMD(p, NVC597, SET_MME_DATA_FIFO_CONFIG, FIFO_SIZE_SIZE_4KB);
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_DRAW_INDEXED_INDIRECT_COUNT));
P_INLINE_DATA(p, begin);
uint64_t draw_addr = nvk_buffer_address(buffer, offset);
P_INLINE_DATA(p, draw_addr >> 32);
P_INLINE_DATA(p, draw_addr);
uint64_t draw_count_addr = nvk_buffer_address(count_buffer,
countBufferOffset);
P_INLINE_DATA(p, draw_count_addr >> 32);
P_INLINE_DATA(p, draw_count_addr);
P_INLINE_DATA(p, maxDrawCount);
P_INLINE_DATA(p, stride);
}

View file

@ -3,16 +3,18 @@
#include "nvk_private.h"
static const nvk_mme_builder_func mme_builders[NVK_MME_COUNT] = {
[NVK_MME_CLEAR_VIEWS] = nvk_mme_clear_views,
[NVK_MME_CLEAR_LAYERS] = nvk_mme_clear_layers,
[NVK_MME_DRAW] = nvk_mme_draw,
[NVK_MME_DRAW_INDEXED] = nvk_mme_draw_indexed,
[NVK_MME_DRAW_INDIRECT] = nvk_mme_draw_indirect,
[NVK_MME_DRAW_INDEXED_INDIRECT] = nvk_mme_draw_indexed_indirect,
[NVK_MME_ADD_CS_INVOCATIONS] = nvk_mme_add_cs_invocations,
[NVK_MME_DISPATCH_INDIRECT] = nvk_mme_dispatch_indirect,
[NVK_MME_WRITE_CS_INVOCATIONS] = nvk_mme_write_cs_invocations,
[NVK_MME_COPY_QUERIES] = nvk_mme_copy_queries,
[NVK_MME_CLEAR_VIEWS] = nvk_mme_clear_views,
[NVK_MME_CLEAR_LAYERS] = nvk_mme_clear_layers,
[NVK_MME_DRAW] = nvk_mme_draw,
[NVK_MME_DRAW_INDEXED] = nvk_mme_draw_indexed,
[NVK_MME_DRAW_INDIRECT] = nvk_mme_draw_indirect,
[NVK_MME_DRAW_INDEXED_INDIRECT] = nvk_mme_draw_indexed_indirect,
[NVK_MME_DRAW_INDIRECT_COUNT] = nvk_mme_draw_indirect_count,
[NVK_MME_DRAW_INDEXED_INDIRECT_COUNT] = nvk_mme_draw_indexed_indirect_count,
[NVK_MME_ADD_CS_INVOCATIONS] = nvk_mme_add_cs_invocations,
[NVK_MME_DISPATCH_INDIRECT] = nvk_mme_dispatch_indirect,
[NVK_MME_WRITE_CS_INVOCATIONS] = nvk_mme_write_cs_invocations,
[NVK_MME_COPY_QUERIES] = nvk_mme_copy_queries,
};
uint32_t *

View file

@ -12,6 +12,8 @@ enum nvk_mme {
NVK_MME_DRAW_INDEXED,
NVK_MME_DRAW_INDIRECT,
NVK_MME_DRAW_INDEXED_INDIRECT,
NVK_MME_DRAW_INDIRECT_COUNT,
NVK_MME_DRAW_INDEXED_INDIRECT_COUNT,
NVK_MME_ADD_CS_INVOCATIONS,
NVK_MME_DISPATCH_INDIRECT,
NVK_MME_WRITE_CS_INVOCATIONS,
@ -40,6 +42,8 @@ void nvk_mme_draw(struct mme_builder *b);
void nvk_mme_draw_indexed(struct mme_builder *b);
void nvk_mme_draw_indirect(struct mme_builder *b);
void nvk_mme_draw_indexed_indirect(struct mme_builder *b);
void nvk_mme_draw_indirect_count(struct mme_builder *b);
void nvk_mme_draw_indexed_indirect_count(struct mme_builder *b);
void nvk_mme_add_cs_invocations(struct mme_builder *b);
void nvk_mme_dispatch_indirect(struct mme_builder *b);
void nvk_mme_write_cs_invocations(struct mme_builder *b);

View file

@ -22,6 +22,7 @@
#include "clc0c0.h"
#include "clc1c0.h"
#include "clc3c0.h"
#include "clc597.h"
#include "clc5c0.h"
@ -29,7 +30,7 @@ VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
VkPhysicalDeviceFeatures2 *pFeatures)
{
// VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
pFeatures->features = (VkPhysicalDeviceFeatures) {
.robustBufferAccess = true,
@ -103,6 +104,7 @@ nvk_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
.bufferDeviceAddress = true,
.bufferDeviceAddressCaptureReplay = false,
.bufferDeviceAddressMultiDevice = false,
.drawIndirectCount = pdev->info.cls_eng3d >= TURING_A,
};
VkPhysicalDeviceVulkan13Features core_1_3 = {
@ -483,6 +485,7 @@ nvk_get_device_extensions(const struct nvk_physical_device *pdev,
.KHR_depth_stencil_resolve = true,
.KHR_descriptor_update_template = true,
.KHR_device_group = true,
.KHR_draw_indirect_count = pdev->info.cls_eng3d >= TURING_A,
.KHR_driver_properties = true,
.KHR_dynamic_rendering = true,
.KHR_format_feature_flags2 = true,