mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 02:28:10 +02:00
tu: Implement unaligned dispatches
These will be used for BVH building. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28447>
This commit is contained in:
parent
1bee1a9301
commit
80649e148d
4 changed files with 230 additions and 46 deletions
|
|
@ -6478,8 +6478,7 @@ struct tu_dispatch_info
|
|||
/**
|
||||
* Indirect compute parameters resource.
|
||||
*/
|
||||
struct tu_buffer *indirect;
|
||||
uint64_t indirect_offset;
|
||||
VkDeviceAddress indirect;
|
||||
};
|
||||
|
||||
static inline struct ir3_driver_params_cs
|
||||
|
|
@ -6524,7 +6523,7 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
|
|||
return;
|
||||
|
||||
bool direct_indirect_load =
|
||||
!(info->indirect_offset & 0xf) &&
|
||||
!(info->indirect & 0xf) &&
|
||||
!(info->indirect && num_consts > IR3_DP_CS(base_group_x));
|
||||
|
||||
uint64_t iova = 0;
|
||||
|
|
@ -6545,13 +6544,13 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
|
|||
memcpy(consts.map, &driver_params, num_consts * sizeof(uint32_t));
|
||||
iova = consts.iova;
|
||||
} else if (direct_indirect_load) {
|
||||
iova = info->indirect->iova + info->indirect_offset;
|
||||
iova = info->indirect;
|
||||
} else {
|
||||
/* Vulkan guarantees only 4 byte alignment for indirect_offset.
|
||||
* However, CP_LOAD_STATE.EXT_SRC_ADDR needs 16 byte alignment.
|
||||
*/
|
||||
|
||||
uint64_t indirect_iova = info->indirect->iova + info->indirect_offset;
|
||||
uint64_t indirect_iova = info->indirect;
|
||||
|
||||
/* Wait for any previous uses to finish. */
|
||||
tu_cs_emit_wfi(cs);
|
||||
|
|
@ -6626,21 +6625,19 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_emit(cs, 0);
|
||||
tu_cs_emit(cs, 0);
|
||||
tu_cs_emit_array(cs, (uint32_t *)&driver_params, num_consts);
|
||||
} else if (!(info->indirect_offset & 0xf)) {
|
||||
} else if (!(info->indirect & 0xf)) {
|
||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3);
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(1));
|
||||
tu_cs_emit_qw(cs, info->indirect->iova + info->indirect_offset);
|
||||
tu_cs_emit_qw(cs, info->indirect);
|
||||
} else {
|
||||
/* Vulkan guarantees only 4 byte alignment for indirect_offset.
|
||||
* However, CP_LOAD_STATE.EXT_SRC_ADDR needs 16 byte alignment.
|
||||
*/
|
||||
|
||||
uint64_t indirect_iova = info->indirect->iova + info->indirect_offset;
|
||||
|
||||
/* Wait for any previous uses to finish. */
|
||||
tu_cs_emit_wfi(cs);
|
||||
|
||||
|
|
@ -6648,7 +6645,7 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 5);
|
||||
tu_cs_emit(cs, 0);
|
||||
tu_cs_emit_qw(cs, global_iova_arr(cmd, cs_indirect_xyz, i));
|
||||
tu_cs_emit_qw(cs, indirect_iova + i * 4);
|
||||
tu_cs_emit_qw(cs, info->indirect + i * 4);
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
|
||||
|
|
@ -6756,53 +6753,205 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
|
|||
|
||||
const uint16_t *local_size = shader->variant->local_size;
|
||||
const uint32_t *num_groups = info->blocks;
|
||||
tu_cs_emit_regs(cs,
|
||||
HLSQ_CS_NDRANGE_0(CHIP, .kerneldim = 3,
|
||||
.localsizex = local_size[0] - 1,
|
||||
.localsizey = local_size[1] - 1,
|
||||
.localsizez = local_size[2] - 1),
|
||||
HLSQ_CS_NDRANGE_1(CHIP, .globalsize_x = local_size[0] * num_groups[0]),
|
||||
HLSQ_CS_NDRANGE_2(CHIP, .globaloff_x = 0),
|
||||
HLSQ_CS_NDRANGE_3(CHIP, .globalsize_y = local_size[1] * num_groups[1]),
|
||||
HLSQ_CS_NDRANGE_4(CHIP, .globaloff_y = 0),
|
||||
HLSQ_CS_NDRANGE_5(CHIP, .globalsize_z = local_size[2] * num_groups[2]),
|
||||
HLSQ_CS_NDRANGE_6(CHIP, .globaloff_z = 0));
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
if (info->unaligned) {
|
||||
assert(CHIP >= A7XX);
|
||||
|
||||
if (info->indirect) {
|
||||
/* This path is tailored for BVH building and currently only supports
|
||||
* 1-dimensional dispatches with a power-of-two local size. We use
|
||||
* CP_RUN_OPENCL instead of CP_EXEC_CS in order to dynamically set
|
||||
* HLSQ_CS_KERNEL_GROUP_X, which is usually set implicitly by the
|
||||
* packet, to the number of workgroups. The registers for Y and Z
|
||||
* dimensions should be unused because we set the kernel dimension to
|
||||
* 1.
|
||||
*/
|
||||
assert(local_size[1] == 1 && local_size[2] == 1);
|
||||
assert(util_is_power_of_two_nonzero(local_size[0]));
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
HLSQ_CS_NDRANGE_0(CHIP, .kerneldim = 1,
|
||||
.localsizex = local_size[0] - 1));
|
||||
|
||||
tu_cs_emit_regs(cs, HLSQ_CS_NDRANGE_2(CHIP, .globaloff_x = 0));
|
||||
|
||||
/* This does:
|
||||
* - waits for pending cache flushes to finish
|
||||
* - CP_WAIT_FOR_ME
|
||||
*
|
||||
* In a sequence of indirect dispatches this shouldn't wait for the
|
||||
* previous dispatches to finish.
|
||||
*/
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3);
|
||||
tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(REG_A7XX_HLSQ_CS_NDRANGE_1));
|
||||
tu_cs_emit_qw(cs, info->indirect);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SCRATCH_WRITE, 2);
|
||||
tu_cs_emit(cs, CP_SCRATCH_WRITE_0_SCRATCH(0));
|
||||
tu_cs_emit(cs, ~0u);
|
||||
|
||||
/* CP_REG_RMW and CP_REG_TO_SCRATCH implicitly do a CP_WAIT_FOR_IDLE
|
||||
* *and* CP_WAIT_FOR_ME, which is a full pipeline stall that we don't
|
||||
* want, so manually wait for the CP_MEM_TO_REG write to land and
|
||||
* then skip waiting below with SKIP_WAIT_FOR_ME.
|
||||
*/
|
||||
tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
|
||||
|
||||
/* scratch0 = ((scratch0 & CS_NDRANGE_1) + -1
|
||||
* = ((~0 & CS_NDRANGE_1) + -1
|
||||
* = CS_NDRANGE_1 - 1
|
||||
*/
|
||||
tu_cs_emit_pkt7(cs, CP_REG_RMW, 3);
|
||||
tu_cs_emit(cs,
|
||||
CP_REG_RMW_0_DST_REG(0) |
|
||||
CP_REG_RMW_0_DST_SCRATCH |
|
||||
CP_REG_RMW_0_SKIP_WAIT_FOR_ME |
|
||||
CP_REG_RMW_0_SRC0_IS_REG |
|
||||
CP_REG_RMW_0_SRC1_ADD);
|
||||
tu_cs_emit(cs, REG_A7XX_HLSQ_CS_NDRANGE_1); /* SRC0 */
|
||||
tu_cs_emit(cs, -1); /* SRC1 */
|
||||
|
||||
/* scratch0 = ((scratch0 & (local_size - 1)) rot 2
|
||||
* = ((scratch0 & (local_size - 1)) << 2
|
||||
*/
|
||||
tu_cs_emit_pkt7(cs, CP_REG_RMW, 3);
|
||||
tu_cs_emit(cs,
|
||||
CP_REG_RMW_0_DST_REG(0) |
|
||||
CP_REG_RMW_0_DST_SCRATCH |
|
||||
CP_REG_RMW_0_SKIP_WAIT_FOR_ME |
|
||||
CP_REG_RMW_0_ROTATE(A7XX_HLSQ_CS_LAST_LOCAL_SIZE_LOCALSIZEX__SHIFT));
|
||||
tu_cs_emit(cs, local_size[0] - 1); /* SRC0 */
|
||||
tu_cs_emit(cs, 0); /* SRC1 */
|
||||
|
||||
/* write scratch0 to HLSQ_CS_LAST_LOCAL_SIZE */
|
||||
tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1);
|
||||
tu_cs_emit(cs,
|
||||
CP_SCRATCH_TO_REG_0_REG(REG_A7XX_HLSQ_CS_LAST_LOCAL_SIZE) |
|
||||
CP_SCRATCH_TO_REG_0_SCRATCH(0));
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SCRATCH_WRITE, 2);
|
||||
tu_cs_emit(cs, CP_SCRATCH_WRITE_0_SCRATCH(0));
|
||||
tu_cs_emit(cs, ~0u);
|
||||
|
||||
/* scratch0 = (scratch0 & CS_NDRANGE_1) + local_size - 1
|
||||
* = (~0u & CS_NDRANGE_1) + local_size - 1
|
||||
* = CS_NDRANGE_1 + local_size - 1
|
||||
*/
|
||||
tu_cs_emit_pkt7(cs, CP_REG_RMW, 3);
|
||||
tu_cs_emit(cs,
|
||||
CP_REG_RMW_0_DST_REG(0) |
|
||||
CP_REG_RMW_0_DST_SCRATCH |
|
||||
CP_REG_RMW_0_SKIP_WAIT_FOR_ME |
|
||||
CP_REG_RMW_0_SRC0_IS_REG |
|
||||
CP_REG_RMW_0_SRC1_ADD);
|
||||
tu_cs_emit(cs, REG_A7XX_HLSQ_CS_NDRANGE_1); /* SRC0 */
|
||||
tu_cs_emit(cs, local_size[0] - 1); /* SRC1 */
|
||||
|
||||
unsigned local_size_log2 = util_logbase2(local_size[0]);
|
||||
|
||||
/* scratch0 = (scratch0 & (~(local_size - 1)) rot (32 - log2(local_size))
|
||||
* = scratch0 >> log2(local_size)
|
||||
* = scratch0 / local_size
|
||||
* = (CS_NDRANGE_1 + local_size - 1) / local_size
|
||||
*/
|
||||
tu_cs_emit_pkt7(cs, CP_REG_RMW, 3);
|
||||
tu_cs_emit(cs,
|
||||
CP_REG_RMW_0_DST_REG(0) |
|
||||
CP_REG_RMW_0_DST_SCRATCH |
|
||||
CP_REG_RMW_0_SKIP_WAIT_FOR_ME |
|
||||
CP_REG_RMW_0_ROTATE(32 - local_size_log2));
|
||||
tu_cs_emit(cs, ~(local_size[0] - 1)); /* SRC0 */
|
||||
tu_cs_emit(cs, 0); /* SRC1 */
|
||||
|
||||
/* write scratch0 to HLSQ_CS_KERNEL_GROUP_X */
|
||||
tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1);
|
||||
tu_cs_emit(cs,
|
||||
CP_SCRATCH_TO_REG_0_REG(REG_A7XX_HLSQ_CS_KERNEL_GROUP_X) |
|
||||
CP_SCRATCH_TO_REG_0_SCRATCH(0));
|
||||
} else {
|
||||
tu_cs_emit_regs(cs,
|
||||
HLSQ_CS_NDRANGE_0(CHIP, .kerneldim = 3,
|
||||
.localsizex = local_size[0] - 1,
|
||||
.localsizey = local_size[1] - 1,
|
||||
.localsizez = local_size[2] - 1),
|
||||
HLSQ_CS_NDRANGE_1(CHIP, .globalsize_x = num_groups[0]),
|
||||
HLSQ_CS_NDRANGE_2(CHIP, .globaloff_x = 0),
|
||||
HLSQ_CS_NDRANGE_3(CHIP, .globalsize_y = num_groups[1]),
|
||||
HLSQ_CS_NDRANGE_4(CHIP, .globaloff_y = 0),
|
||||
HLSQ_CS_NDRANGE_5(CHIP, .globalsize_z = num_groups[2]),
|
||||
HLSQ_CS_NDRANGE_6(CHIP, .globaloff_z = 0));
|
||||
uint32_t last_local_size[3];
|
||||
for (unsigned i = 0; i < 3; i++)
|
||||
last_local_size[i] = ((num_groups[i] - 1) % local_size[i]) + 1;
|
||||
tu_cs_emit_regs(cs,
|
||||
A7XX_HLSQ_CS_LAST_LOCAL_SIZE(.localsizex = last_local_size[0] - 1,
|
||||
.localsizey = last_local_size[1] - 1,
|
||||
.localsizez = last_local_size[2] - 1));
|
||||
}
|
||||
} else {
|
||||
tu_cs_emit_regs(cs,
|
||||
A7XX_HLSQ_CS_LAST_LOCAL_SIZE(.localsizex = local_size[0] - 1,
|
||||
.localsizey = local_size[1] - 1,
|
||||
.localsizez = local_size[2] - 1));
|
||||
HLSQ_CS_NDRANGE_0(CHIP, .kerneldim = 3,
|
||||
.localsizex = local_size[0] - 1,
|
||||
.localsizey = local_size[1] - 1,
|
||||
.localsizez = local_size[2] - 1),
|
||||
HLSQ_CS_NDRANGE_1(CHIP, .globalsize_x = local_size[0] * num_groups[0]),
|
||||
HLSQ_CS_NDRANGE_2(CHIP, .globaloff_x = 0),
|
||||
HLSQ_CS_NDRANGE_3(CHIP, .globalsize_y = local_size[1] * num_groups[1]),
|
||||
HLSQ_CS_NDRANGE_4(CHIP, .globaloff_y = 0),
|
||||
HLSQ_CS_NDRANGE_5(CHIP, .globalsize_z = local_size[2] * num_groups[2]),
|
||||
HLSQ_CS_NDRANGE_6(CHIP, .globaloff_z = 0));
|
||||
if (CHIP >= A7XX) {
|
||||
tu_cs_emit_regs(cs,
|
||||
A7XX_HLSQ_CS_LAST_LOCAL_SIZE(.localsizex = local_size[0] - 1,
|
||||
.localsizey = local_size[1] - 1,
|
||||
.localsizez = local_size[2] - 1));
|
||||
}
|
||||
}
|
||||
|
||||
if (info->indirect) {
|
||||
uint64_t iova = info->indirect->iova + info->indirect_offset;
|
||||
trace_start_compute_indirect(&cmd->trace, cs, info->unaligned);
|
||||
|
||||
trace_start_compute_indirect(&cmd->trace, cs);
|
||||
if (info->unaligned) {
|
||||
tu_cs_emit_pkt7(cs, CP_RUN_OPENCL, 1);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
} else {
|
||||
tu_cs_emit_pkt7(cs, CP_EXEC_CS_INDIRECT, 4);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit_qw(cs, info->indirect);
|
||||
tu_cs_emit(cs,
|
||||
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(local_size[0] - 1) |
|
||||
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(local_size[1] - 1) |
|
||||
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(local_size[2] - 1));
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_EXEC_CS_INDIRECT, 4);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit_qw(cs, iova);
|
||||
tu_cs_emit(cs,
|
||||
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(local_size[0] - 1) |
|
||||
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(local_size[1] - 1) |
|
||||
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(local_size[2] - 1));
|
||||
}
|
||||
|
||||
trace_end_compute_indirect(&cmd->trace, cs,
|
||||
(struct u_trace_address) {
|
||||
.bo = info->indirect->bo,
|
||||
.offset = info->indirect_offset,
|
||||
.bo = NULL,
|
||||
.offset = info->indirect,
|
||||
});
|
||||
} else {
|
||||
trace_start_compute(&cmd->trace, cs, info->indirect != NULL,
|
||||
local_size[0], local_size[1], local_size[2],
|
||||
info->blocks[0], info->blocks[1], info->blocks[2]);
|
||||
trace_start_compute(&cmd->trace, cs, info->indirect != 0,
|
||||
info->unaligned, local_size[0], local_size[1],
|
||||
local_size[2], info->blocks[0], info->blocks[1],
|
||||
info->blocks[2]);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_EXEC_CS, 4);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, CP_EXEC_CS_1_NGROUPS_X(info->blocks[0]));
|
||||
tu_cs_emit(cs, CP_EXEC_CS_2_NGROUPS_Y(info->blocks[1]));
|
||||
tu_cs_emit(cs, CP_EXEC_CS_3_NGROUPS_Z(info->blocks[2]));
|
||||
if (info->unaligned) {
|
||||
tu_cs_emit_pkt7(cs, CP_EXEC_CS, 4);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, CP_EXEC_CS_1_NGROUPS_X(DIV_ROUND_UP(info->blocks[0],
|
||||
local_size[0])));
|
||||
tu_cs_emit(cs, CP_EXEC_CS_2_NGROUPS_Y(DIV_ROUND_UP(info->blocks[1],
|
||||
local_size[1])));
|
||||
tu_cs_emit(cs, CP_EXEC_CS_3_NGROUPS_Z(DIV_ROUND_UP(info->blocks[2],
|
||||
local_size[2])));
|
||||
} else {
|
||||
tu_cs_emit_pkt7(cs, CP_EXEC_CS, 4);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, CP_EXEC_CS_1_NGROUPS_X(info->blocks[0]));
|
||||
tu_cs_emit(cs, CP_EXEC_CS_2_NGROUPS_Y(info->blocks[1]));
|
||||
tu_cs_emit(cs, CP_EXEC_CS_3_NGROUPS_Z(info->blocks[2]));
|
||||
}
|
||||
|
||||
trace_end_compute(&cmd->trace, cs);
|
||||
}
|
||||
|
|
@ -6852,13 +7001,39 @@ tu_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
|
|||
VK_FROM_HANDLE(tu_buffer, buffer, _buffer);
|
||||
struct tu_dispatch_info info = {};
|
||||
|
||||
info.indirect = buffer;
|
||||
info.indirect_offset = offset;
|
||||
info.indirect = buffer->iova + offset;
|
||||
|
||||
tu_dispatch<CHIP>(cmd_buffer, &info);
|
||||
}
|
||||
TU_GENX(tu_CmdDispatchIndirect);
|
||||
|
||||
void
|
||||
tu_dispatch_unaligned(VkCommandBuffer commandBuffer,
|
||||
uint32_t x, uint32_t y, uint32_t z)
|
||||
{
|
||||
VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct tu_dispatch_info info = {};
|
||||
|
||||
info.unaligned = true;
|
||||
info.blocks[0] = x;
|
||||
info.blocks[1] = y;
|
||||
info.blocks[2] = z;
|
||||
TU_CALLX(cmd_buffer->device, tu_dispatch)(cmd_buffer, &info);
|
||||
}
|
||||
|
||||
void
|
||||
tu_dispatch_unaligned_indirect(VkCommandBuffer commandBuffer,
|
||||
VkDeviceAddress size_addr)
|
||||
{
|
||||
VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct tu_dispatch_info info = {};
|
||||
|
||||
info.unaligned = true;
|
||||
info.indirect = size_addr;
|
||||
|
||||
TU_CALLX(cmd_buffer->device, tu_dispatch)(cmd_buffer, &info);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
|
||||
const VkSubpassEndInfo *pSubpassEndInfo)
|
||||
|
|
|
|||
|
|
@ -683,6 +683,12 @@ tu_restore_suspended_pass(struct tu_cmd_buffer *cmd,
|
|||
template <chip CHIP>
|
||||
void tu_cmd_render(struct tu_cmd_buffer *cmd);
|
||||
|
||||
void tu_dispatch_unaligned(VkCommandBuffer commandBuffer,
|
||||
uint32_t x, uint32_t y, uint32_t z);
|
||||
|
||||
void tu_dispatch_unaligned_indirect(VkCommandBuffer commandBuffer,
|
||||
VkDeviceAddress size_addr);
|
||||
|
||||
void tu_write_buffer_cp(VkCommandBuffer commandBuffer,
|
||||
VkDeviceAddress addr,
|
||||
void *data, uint32_t size);
|
||||
|
|
|
|||
|
|
@ -2707,6 +2707,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
fd_rd_output_init(&device->rd_output, output_name);
|
||||
}
|
||||
|
||||
device->vk.cmd_dispatch_unaligned = tu_dispatch_unaligned;
|
||||
device->vk.write_buffer_cp = tu_write_buffer_cp;
|
||||
device->vk.flush_buffer_write_cp = tu_flush_buffer_write_cp;
|
||||
device->vk.cmd_fill_buffer_addr = tu_cmd_fill_buffer_addr;
|
||||
|
|
|
|||
|
|
@ -142,6 +142,7 @@ begin_end_tp('blit',
|
|||
|
||||
begin_end_tp('compute',
|
||||
args=[Arg(type='uint8_t', var='indirect', c_format='%u'),
|
||||
Arg(type='uint8_t', var='unaligned', c_format='%u'),
|
||||
Arg(type='uint16_t', var='local_size_x', c_format='%u'),
|
||||
Arg(type='uint16_t', var='local_size_y', c_format='%u'),
|
||||
Arg(type='uint16_t', var='local_size_z', c_format='%u'),
|
||||
|
|
@ -150,6 +151,7 @@ begin_end_tp('compute',
|
|||
Arg(type='uint16_t', var='num_groups_z', c_format='%u')])
|
||||
|
||||
begin_end_tp('compute_indirect',
|
||||
args=[Arg(type='uint8_t', var='unaligned', c_format='%u')],
|
||||
end_args=[ArgStruct(type='VkDispatchIndirectCommand', var='size',
|
||||
is_indirect=True, c_format="%ux%ux%u",
|
||||
fields=['x', 'y', 'z'])])
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue