mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 20:28:04 +02:00
nvk: Implement VK_KHR_draw_indirect_count on Turing+
v2: handle maxDrawCount parameter v3: assert on pre-turing. Free regsisters after use. v4: less register presure. Update to pass new tests Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24326>
This commit is contained in:
parent
d0cf79b28f
commit
0e5fcf912f
5 changed files with 179 additions and 11 deletions
|
|
@ -110,6 +110,13 @@ mme_free_reg(struct mme_builder *b, struct mme_value val)
|
|||
mme_reg_alloc_free(&b->reg_alloc, val);
|
||||
}
|
||||
|
||||
static inline void
|
||||
mme_free_reg64(struct mme_builder *b, struct mme_value64 val)
|
||||
{
|
||||
mme_reg_alloc_free(&b->reg_alloc, val.lo);
|
||||
mme_reg_alloc_free(&b->reg_alloc, val.hi);
|
||||
}
|
||||
|
||||
static inline void
|
||||
mme_alu_to(struct mme_builder *b,
|
||||
struct mme_value dst,
|
||||
|
|
|
|||
|
|
@ -1973,3 +1973,155 @@ nvk_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
nvk_mme_draw_indirect_count(struct mme_builder *b)
|
||||
{
|
||||
if (b->devinfo->cls_eng3d < TURING_A)
|
||||
return;
|
||||
|
||||
struct mme_value begin = mme_load(b);
|
||||
struct mme_value64 draw_addr = mme_load_addr64(b);
|
||||
struct mme_value64 draw_count_addr = mme_load_addr64(b);
|
||||
struct mme_value draw_max = mme_load(b);
|
||||
struct mme_value stride = mme_load(b);
|
||||
|
||||
mme_tu104_read_fifoed(b, draw_count_addr, mme_imm(1));
|
||||
mme_free_reg64(b, draw_count_addr);
|
||||
struct mme_value draw_count_buf = mme_load(b);
|
||||
|
||||
mme_if(b, ule, draw_count_buf, draw_max) {
|
||||
mme_mov_to(b, draw_max, draw_count_buf);
|
||||
}
|
||||
mme_free_reg(b, draw_count_buf);
|
||||
|
||||
struct mme_value draw = mme_mov(b, mme_zero());
|
||||
mme_while(b, ult, draw, draw_max) {
|
||||
mme_tu104_read_fifoed(b, draw_addr, mme_imm(4));
|
||||
|
||||
nvk_mme_build_draw(b, begin, draw);
|
||||
|
||||
mme_add_to(b, draw, draw, mme_imm(1));
|
||||
mme_add64_to(b, draw_addr, draw_addr, mme_value64(stride, mme_zero()));
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
nvk_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,
|
||||
VkBuffer _buffer,
|
||||
VkDeviceSize offset,
|
||||
VkBuffer countBuffer,
|
||||
VkDeviceSize countBufferOffset,
|
||||
uint32_t maxDrawCount,
|
||||
uint32_t stride)
|
||||
{
|
||||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||||
VK_FROM_HANDLE(nvk_buffer, buffer, _buffer);
|
||||
VK_FROM_HANDLE(nvk_buffer, count_buffer, countBuffer);
|
||||
|
||||
const struct vk_dynamic_graphics_state *dyn =
|
||||
&cmd->vk.dynamic_graphics_state;
|
||||
|
||||
/* TODO: Indirect count draw pre-Turing */
|
||||
assert(nvk_cmd_buffer_3d_cls(cmd) >= TURING_A);
|
||||
|
||||
nvk_flush_gfx_state(cmd);
|
||||
|
||||
uint32_t begin;
|
||||
V_NV9097_BEGIN(begin, {
|
||||
.op = vk_to_nv9097_primitive_topology(dyn->ia.primitive_topology),
|
||||
.primitive_id = NV9097_BEGIN_PRIMITIVE_ID_FIRST,
|
||||
.instance_id = NV9097_BEGIN_INSTANCE_ID_FIRST,
|
||||
.split_mode = SPLIT_MODE_NORMAL_BEGIN_NORMAL_END,
|
||||
});
|
||||
|
||||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 10);
|
||||
P_IMMD(p, NVC597, SET_MME_DATA_FIFO_CONFIG, FIFO_SIZE_SIZE_4KB);
|
||||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_DRAW_INDIRECT_COUNT));
|
||||
P_INLINE_DATA(p, begin);
|
||||
uint64_t draw_addr = nvk_buffer_address(buffer, offset);
|
||||
P_INLINE_DATA(p, draw_addr >> 32);
|
||||
P_INLINE_DATA(p, draw_addr);
|
||||
uint64_t draw_count_addr = nvk_buffer_address(count_buffer,
|
||||
countBufferOffset);
|
||||
P_INLINE_DATA(p, draw_count_addr >> 32);
|
||||
P_INLINE_DATA(p, draw_count_addr);
|
||||
P_INLINE_DATA(p, maxDrawCount);
|
||||
P_INLINE_DATA(p, stride);
|
||||
}
|
||||
|
||||
void
|
||||
nvk_mme_draw_indexed_indirect_count(struct mme_builder *b)
|
||||
{
|
||||
if (b->devinfo->cls_eng3d < TURING_A)
|
||||
return;
|
||||
|
||||
struct mme_value begin = mme_load(b);
|
||||
struct mme_value64 draw_addr = mme_load_addr64(b);
|
||||
struct mme_value64 draw_count_addr = mme_load_addr64(b);
|
||||
struct mme_value draw_max = mme_load(b);
|
||||
struct mme_value stride = mme_load(b);
|
||||
|
||||
mme_tu104_read_fifoed(b, draw_count_addr, mme_imm(1));
|
||||
mme_free_reg64(b, draw_count_addr);
|
||||
struct mme_value draw_count_buf = mme_load(b);
|
||||
|
||||
mme_if(b, ule, draw_count_buf, draw_max) {
|
||||
mme_mov_to(b, draw_max, draw_count_buf);
|
||||
}
|
||||
mme_free_reg(b, draw_count_buf);
|
||||
|
||||
struct mme_value draw = mme_mov(b, mme_zero());
|
||||
mme_while(b, ult, draw, draw_max) {
|
||||
mme_tu104_read_fifoed(b, draw_addr, mme_imm(5));
|
||||
|
||||
nvk_mme_build_draw_indexed(b, begin, draw);
|
||||
|
||||
mme_add_to(b, draw, draw, mme_imm(1));
|
||||
mme_add64_to(b, draw_addr, draw_addr, mme_value64(stride, mme_zero()));
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
nvk_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,
|
||||
VkBuffer _buffer,
|
||||
VkDeviceSize offset,
|
||||
VkBuffer countBuffer,
|
||||
VkDeviceSize countBufferOffset,
|
||||
uint32_t maxDrawCount,
|
||||
uint32_t stride)
|
||||
{
|
||||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||||
VK_FROM_HANDLE(nvk_buffer, buffer, _buffer);
|
||||
VK_FROM_HANDLE(nvk_buffer, count_buffer, countBuffer);
|
||||
|
||||
const struct vk_dynamic_graphics_state *dyn =
|
||||
&cmd->vk.dynamic_graphics_state;
|
||||
|
||||
/* TODO: Indexed indirect count draw pre-Turing */
|
||||
assert(nvk_cmd_buffer_3d_cls(cmd) >= TURING_A);
|
||||
|
||||
nvk_flush_gfx_state(cmd);
|
||||
|
||||
uint32_t begin;
|
||||
V_NV9097_BEGIN(begin, {
|
||||
.op = vk_to_nv9097_primitive_topology(dyn->ia.primitive_topology),
|
||||
.primitive_id = NV9097_BEGIN_PRIMITIVE_ID_FIRST,
|
||||
.instance_id = NV9097_BEGIN_INSTANCE_ID_FIRST,
|
||||
.split_mode = SPLIT_MODE_NORMAL_BEGIN_NORMAL_END,
|
||||
});
|
||||
|
||||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 10);
|
||||
P_IMMD(p, NVC597, SET_MME_DATA_FIFO_CONFIG, FIFO_SIZE_SIZE_4KB);
|
||||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_DRAW_INDEXED_INDIRECT_COUNT));
|
||||
P_INLINE_DATA(p, begin);
|
||||
uint64_t draw_addr = nvk_buffer_address(buffer, offset);
|
||||
P_INLINE_DATA(p, draw_addr >> 32);
|
||||
P_INLINE_DATA(p, draw_addr);
|
||||
uint64_t draw_count_addr = nvk_buffer_address(count_buffer,
|
||||
countBufferOffset);
|
||||
P_INLINE_DATA(p, draw_count_addr >> 32);
|
||||
P_INLINE_DATA(p, draw_count_addr);
|
||||
P_INLINE_DATA(p, maxDrawCount);
|
||||
P_INLINE_DATA(p, stride);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,16 +3,18 @@
|
|||
#include "nvk_private.h"
|
||||
|
||||
static const nvk_mme_builder_func mme_builders[NVK_MME_COUNT] = {
|
||||
[NVK_MME_CLEAR_VIEWS] = nvk_mme_clear_views,
|
||||
[NVK_MME_CLEAR_LAYERS] = nvk_mme_clear_layers,
|
||||
[NVK_MME_DRAW] = nvk_mme_draw,
|
||||
[NVK_MME_DRAW_INDEXED] = nvk_mme_draw_indexed,
|
||||
[NVK_MME_DRAW_INDIRECT] = nvk_mme_draw_indirect,
|
||||
[NVK_MME_DRAW_INDEXED_INDIRECT] = nvk_mme_draw_indexed_indirect,
|
||||
[NVK_MME_ADD_CS_INVOCATIONS] = nvk_mme_add_cs_invocations,
|
||||
[NVK_MME_DISPATCH_INDIRECT] = nvk_mme_dispatch_indirect,
|
||||
[NVK_MME_WRITE_CS_INVOCATIONS] = nvk_mme_write_cs_invocations,
|
||||
[NVK_MME_COPY_QUERIES] = nvk_mme_copy_queries,
|
||||
[NVK_MME_CLEAR_VIEWS] = nvk_mme_clear_views,
|
||||
[NVK_MME_CLEAR_LAYERS] = nvk_mme_clear_layers,
|
||||
[NVK_MME_DRAW] = nvk_mme_draw,
|
||||
[NVK_MME_DRAW_INDEXED] = nvk_mme_draw_indexed,
|
||||
[NVK_MME_DRAW_INDIRECT] = nvk_mme_draw_indirect,
|
||||
[NVK_MME_DRAW_INDEXED_INDIRECT] = nvk_mme_draw_indexed_indirect,
|
||||
[NVK_MME_DRAW_INDIRECT_COUNT] = nvk_mme_draw_indirect_count,
|
||||
[NVK_MME_DRAW_INDEXED_INDIRECT_COUNT] = nvk_mme_draw_indexed_indirect_count,
|
||||
[NVK_MME_ADD_CS_INVOCATIONS] = nvk_mme_add_cs_invocations,
|
||||
[NVK_MME_DISPATCH_INDIRECT] = nvk_mme_dispatch_indirect,
|
||||
[NVK_MME_WRITE_CS_INVOCATIONS] = nvk_mme_write_cs_invocations,
|
||||
[NVK_MME_COPY_QUERIES] = nvk_mme_copy_queries,
|
||||
};
|
||||
|
||||
uint32_t *
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@ enum nvk_mme {
|
|||
NVK_MME_DRAW_INDEXED,
|
||||
NVK_MME_DRAW_INDIRECT,
|
||||
NVK_MME_DRAW_INDEXED_INDIRECT,
|
||||
NVK_MME_DRAW_INDIRECT_COUNT,
|
||||
NVK_MME_DRAW_INDEXED_INDIRECT_COUNT,
|
||||
NVK_MME_ADD_CS_INVOCATIONS,
|
||||
NVK_MME_DISPATCH_INDIRECT,
|
||||
NVK_MME_WRITE_CS_INVOCATIONS,
|
||||
|
|
@ -40,6 +42,8 @@ void nvk_mme_draw(struct mme_builder *b);
|
|||
void nvk_mme_draw_indexed(struct mme_builder *b);
|
||||
void nvk_mme_draw_indirect(struct mme_builder *b);
|
||||
void nvk_mme_draw_indexed_indirect(struct mme_builder *b);
|
||||
void nvk_mme_draw_indirect_count(struct mme_builder *b);
|
||||
void nvk_mme_draw_indexed_indirect_count(struct mme_builder *b);
|
||||
void nvk_mme_add_cs_invocations(struct mme_builder *b);
|
||||
void nvk_mme_dispatch_indirect(struct mme_builder *b);
|
||||
void nvk_mme_write_cs_invocations(struct mme_builder *b);
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@
|
|||
#include "clc0c0.h"
|
||||
#include "clc1c0.h"
|
||||
#include "clc3c0.h"
|
||||
#include "clc597.h"
|
||||
#include "clc5c0.h"
|
||||
|
||||
|
||||
|
|
@ -29,7 +30,7 @@ VKAPI_ATTR void VKAPI_CALL
|
|||
nvk_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
|
||||
VkPhysicalDeviceFeatures2 *pFeatures)
|
||||
{
|
||||
// VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
|
||||
VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
|
||||
|
||||
pFeatures->features = (VkPhysicalDeviceFeatures) {
|
||||
.robustBufferAccess = true,
|
||||
|
|
@ -103,6 +104,7 @@ nvk_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
|
|||
.bufferDeviceAddress = true,
|
||||
.bufferDeviceAddressCaptureReplay = false,
|
||||
.bufferDeviceAddressMultiDevice = false,
|
||||
.drawIndirectCount = pdev->info.cls_eng3d >= TURING_A,
|
||||
};
|
||||
|
||||
VkPhysicalDeviceVulkan13Features core_1_3 = {
|
||||
|
|
@ -483,6 +485,7 @@ nvk_get_device_extensions(const struct nvk_physical_device *pdev,
|
|||
.KHR_depth_stencil_resolve = true,
|
||||
.KHR_descriptor_update_template = true,
|
||||
.KHR_device_group = true,
|
||||
.KHR_draw_indirect_count = pdev->info.cls_eng3d >= TURING_A,
|
||||
.KHR_driver_properties = true,
|
||||
.KHR_dynamic_rendering = true,
|
||||
.KHR_format_feature_flags2 = true,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue