anv: implement VK_KHR_device_address_commands

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40387>
This commit is contained in:
Lionel Landwerlin 2025-02-27 08:05:26 +02:00 committed by Marge Bot
parent 7adece7ce0
commit f123030dcd
8 changed files with 1035 additions and 0 deletions

View file

@ -700,6 +700,41 @@ anv_blorp_execute_on_companion(struct anv_cmd_buffer *cmd_buffer,
return false;
}
static bool
anv_blorp_blitter_execute_on_companion2(struct anv_cmd_buffer *cmd_buffer,
struct anv_image *image,
uint32_t region_count,
const VkDeviceMemoryImageCopyKHR* regions)
{
if (!anv_cmd_buffer_is_blitter_queue(cmd_buffer))
return false;
bool blorp_execute_on_companion = false;
for (unsigned r = 0; r < region_count && !blorp_execute_on_companion; r++) {
VkImageAspectFlags aspect_mask = regions[r].imageSubresource.aspectMask;
enum isl_format linear_format =
anv_get_isl_format(cmd_buffer->device->physical, image->vk.format,
aspect_mask, VK_IMAGE_TILING_LINEAR);
const struct isl_format_layout *linear_fmtl =
isl_format_get_layout(linear_format);
switch (linear_fmtl->bpb) {
case 96:
/* We can only support linear mode for 96bpp on blitter engine. */
blorp_execute_on_companion |=
image->vk.tiling != VK_IMAGE_TILING_LINEAR;
break;
default:
blorp_execute_on_companion |= linear_fmtl->bpb % 3 == 0;
break;
}
}
return blorp_execute_on_companion;
}
void anv_CmdCopyImage2(
VkCommandBuffer commandBuffer,
const VkCopyImageInfo2* pCopyImageInfo)
@ -915,6 +950,77 @@ void anv_CmdCopyBufferToImage2(
}
}
void anv_CmdCopyMemoryToImageKHR(
VkCommandBuffer commandBuffer,
const VkCopyDeviceMemoryImageInfoKHR* pCopyMemoryInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_image, dst_image, pCopyMemoryInfo->image);
bool blorp_execute_on_companion =
anv_blorp_execute_on_companion(cmd_buffer, NULL, dst_image);
/* Check if any one of the aspects is incompatible with the blitter engine,
* if true, use the companion RCS command buffer for blit operation since 3
* component formats are not supported natively except 96bpb on the blitter.
*/
blorp_execute_on_companion |=
anv_blorp_blitter_execute_on_companion2(cmd_buffer, dst_image,
pCopyMemoryInfo->regionCount,
pCopyMemoryInfo->pRegions);
anv_cmd_require_rcs(cmd_buffer, blorp_execute_on_companion) {
struct blorp_batch batch;
anv_blorp_batch_init(cmd_buffer, &batch, BLORP_BATCH_SRC_UNPADDED);
for (unsigned r = 0; r < pCopyMemoryInfo->regionCount; r++) {
const VkDeviceMemoryImageCopyKHR *region = &pCopyMemoryInfo->pRegions[r];
const struct vk_image_buffer_layout buffer_layout =
vk_image_memory_copy_layout(&dst_image->vk, region);
copy_buffer_to_image(cmd_buffer, &batch,
anv_address_from_range_flags(
region->addressRange,
region->addressFlags),
&buffer_layout,
dst_image, region->imageLayout,
region->imageSubresource,
region->imageOffset, region->imageExtent,
true);
}
anv_blorp_batch_finish(&batch);
if (dst_image->emu_plane_format != VK_FORMAT_UNDEFINED) {
assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer));
const enum anv_pipe_bits pipe_bits =
anv_cmd_buffer_is_compute_queue(cmd_buffer) ?
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT :
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
anv_add_pending_pipe_bits(cmd_buffer,
(batch.flags & BLORP_BATCH_USE_COMPUTE) ?
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT :
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
pipe_bits,
"Copy flush before astc emu");
for (unsigned r = 0; r < pCopyMemoryInfo->regionCount; r++) {
const VkDeviceMemoryImageCopyKHR *region =
&pCopyMemoryInfo->pRegions[r];
const VkOffset3D block_offset = vk_image_offset_to_elements(
&dst_image->vk, region->imageOffset);
const VkExtent3D block_extent = vk_image_extent_to_elements(
&dst_image->vk, region->imageExtent);
anv_astc_emu_process(cmd_buffer, dst_image,
region->imageLayout,
&region->imageSubresource,
block_offset, block_extent);
}
}
}
}
static void
anv_add_buffer_write_pending_bits(struct anv_cmd_buffer *cmd_buffer,
const char *reason)
@ -976,6 +1082,50 @@ void anv_CmdCopyImageToBuffer2(
}
}
void anv_CmdCopyImageToMemoryKHR(
VkCommandBuffer commandBuffer,
const VkCopyDeviceMemoryImageInfoKHR* pCopyMemoryInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_image, src_image, pCopyMemoryInfo->image);
bool blorp_execute_on_companion =
anv_blorp_execute_on_companion(cmd_buffer, src_image, NULL);
/* Check if any one of the aspects is incompatible with the blitter engine,
* if true, use the companion RCS command buffer for blit operation since 3
* component formats are not supported natively except 96bpb on the blitter.
*/
blorp_execute_on_companion |=
anv_blorp_blitter_execute_on_companion2(cmd_buffer, src_image,
pCopyMemoryInfo->regionCount,
pCopyMemoryInfo->pRegions);
anv_cmd_require_rcs(cmd_buffer, blorp_execute_on_companion) {
struct blorp_batch batch;
anv_blorp_batch_init(cmd_buffer, &batch, 0);
for (unsigned r = 0; r < pCopyMemoryInfo->regionCount; r++) {
const VkDeviceMemoryImageCopyKHR *region = &pCopyMemoryInfo->pRegions[r];
const struct vk_image_buffer_layout memory_layout =
vk_image_memory_copy_layout(&src_image->vk, region);
copy_buffer_to_image(cmd_buffer, &batch,
anv_address_from_range_flags(region->addressRange,
region->addressFlags),
&memory_layout,
src_image, region->imageLayout,
region->imageSubresource,
region->imageOffset, region->imageExtent,
false);
}
anv_add_buffer_write_pending_bits(cmd_buffer, "after copy image to buffer");
anv_blorp_batch_finish(&batch);
}
}
static bool
flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
{
@ -1235,6 +1385,34 @@ void anv_CmdCopyBuffer2(
anv_blorp_batch_finish(&batch);
}
void anv_CmdCopyMemoryKHR(
VkCommandBuffer commandBuffer,
const VkCopyDeviceMemoryInfoKHR* pCopyMemoryInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct blorp_batch batch;
anv_blorp_batch_init(cmd_buffer, &batch,
BLORP_BATCH_SRC_UNPADDED |
(cmd_buffer->state.current_pipeline ==
cmd_buffer->device->physical->gpgpu_pipeline_value ?
BLORP_BATCH_USE_COMPUTE : 0));
for (unsigned r = 0; r < pCopyMemoryInfo->regionCount; r++) {
const VkDeviceMemoryCopyKHR *region = &pCopyMemoryInfo->pRegions[r];
copy_memory(cmd_buffer->device, &batch,
anv_address_from_range_flags(region->srcRange,
region->srcFlags),
anv_address_from_range_flags(region->dstRange,
region->dstFlags),
region->srcRange.size);
}
anv_add_buffer_write_pending_bits(cmd_buffer, "after copy buffer");
anv_blorp_batch_finish(&batch);
}
void
anv_cmd_buffer_update_addr(
struct anv_cmd_buffer* cmd_buffer,
@ -1323,6 +1501,20 @@ void anv_CmdUpdateBuffer(
dataSize, pData);
}
void anv_CmdUpdateMemoryKHR(
VkCommandBuffer commandBuffer,
const VkDeviceAddressRangeKHR* pDstRange,
VkAddressCommandFlagsKHR dstFlags,
VkDeviceSize dataSize,
const void* pData)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
anv_cmd_buffer_update_addr(cmd_buffer,
anv_address_from_range_flags(*pDstRange, dstFlags),
pDstRange->size, pData);
}
void
anv_cmd_buffer_fill_area(struct anv_cmd_buffer *cmd_buffer,
struct anv_address address,
@ -1413,6 +1605,31 @@ void anv_CmdFillBuffer(
anv_add_buffer_write_pending_bits(cmd_buffer, "after fill buffer");
}
void anv_CmdFillMemoryKHR(
VkCommandBuffer commandBuffer,
const VkDeviceAddressRangeKHR* pDstRange,
VkAddressCommandFlagsKHR dstFlags,
uint32_t data)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
/* From the Vulkan spec:
*
* "size is the number of bytes to fill, and must be either a multiple
* of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
* the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
* buffer is not a multiple of 4, then the nearest smaller multiple is
* used."
*/
const VkDeviceSize size = pDstRange->size & ~3ull;
anv_cmd_buffer_fill_area(cmd_buffer,
anv_address_from_range_flags(*pDstRange, dstFlags),
size, data);
anv_add_buffer_write_pending_bits(cmd_buffer, "after fill buffer");
}
static void
exec_ccs_op(struct anv_cmd_buffer *cmd_buffer,
struct blorp_batch *batch,

View file

@ -1007,6 +1007,39 @@ void anv_CmdBindVertexBuffers2(
}
}
void anv_CmdBindVertexBuffers3KHR(
VkCommandBuffer commandBuffer,
uint32_t firstBinding,
uint32_t bindingCount,
const VkBindVertexBuffer3InfoKHR* pBindingInfos)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings;
/* We have to defer setting up vertex buffer since we need the buffer
* stride from the pipeline. */
assert(firstBinding + bindingCount <= get_max_vbs(cmd_buffer->device->info));
for (uint32_t i = 0; i < bindingCount; i++) {
if (vb[firstBinding + i].addr != pBindingInfos[i].addressRange.address ||
vb[firstBinding + i].size != pBindingInfos[i].addressRange.size) {
vb[firstBinding + i] = (struct anv_vertex_binding) {
.addr = pBindingInfos[i].addressRange.address,
.size = pBindingInfos[i].addressRange.size,
.mocs = anv_mocs(cmd_buffer->device, NULL,
((pBindingInfos[i].addressFlags &
VK_ADDRESS_COMMAND_PROTECTED_BIT_KHR) ?
ISL_SURF_USAGE_PROTECTED_BIT : 0) |
ISL_SURF_USAGE_VERTEX_BUFFER_BIT),
};
cmd_buffer->state.gfx.vb_dirty |= 1 << (firstBinding + i);
}
}
vk_cmd_set_vertex_binding_strides2(&cmd_buffer->vk, firstBinding,
bindingCount, pBindingInfos);
}
void anv_CmdBindIndexBuffer2(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
@ -1038,6 +1071,32 @@ void anv_CmdBindIndexBuffer2(
}
}
void anv_CmdBindIndexBuffer3KHR(
VkCommandBuffer commandBuffer,
const VkBindIndexBuffer3InfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
if (cmd_buffer->state.gfx.index_type != pInfo->indexType) {
cmd_buffer->state.gfx.index_type = pInfo->indexType;
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_INDEX_TYPE;
}
vk_cmd_set_index_buffer_type(&cmd_buffer->vk, pInfo->indexType);
if (cmd_buffer->state.gfx.index_addr != pInfo->addressRange.address ||
cmd_buffer->state.gfx.index_size != pInfo->addressRange.size) {
cmd_buffer->state.gfx.index_addr = pInfo->addressRange.address;
cmd_buffer->state.gfx.index_size = pInfo->addressRange.size;
cmd_buffer->state.gfx.index_mocs =
anv_mocs(cmd_buffer->device, NULL,
((pInfo->addressFlags &
VK_ADDRESS_COMMAND_PROTECTED_BIT_KHR) ?
ISL_SURF_USAGE_PROTECTED_BIT : 0) |
ISL_SURF_USAGE_INDEX_BUFFER_BIT);
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER;
}
}
void anv_CmdBindTransformFeedbackBuffersEXT(
VkCommandBuffer commandBuffer,
@ -1071,6 +1130,36 @@ void anv_CmdBindTransformFeedbackBuffersEXT(
}
}
void anv_CmdBindTransformFeedbackBuffers2EXT(
VkCommandBuffer commandBuffer,
uint32_t firstBinding,
uint32_t bindingCount,
const VkBindTransformFeedbackBuffer2InfoEXT* pBindingInfos)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_xfb_binding *xfb = cmd_buffer->state.xfb_bindings;
/* We have to defer setting up vertex buffer since we need the buffer
* stride from the pipeline. */
assert(firstBinding + bindingCount <= MAX_XFB_BUFFERS);
for (uint32_t i = 0; i < bindingCount; i++) {
if (pBindingInfos[i].addressRange.size == 0) {
xfb[firstBinding + i] = (struct anv_xfb_binding) { 0 };
} else {
xfb[firstBinding + i] = (struct anv_xfb_binding) {
.addr = pBindingInfos[i].addressRange.address,
.size = pBindingInfos[i].addressRange.size,
.mocs = anv_mocs(cmd_buffer->device, NULL,
((pBindingInfos[i].addressFlags &
VK_ADDRESS_COMMAND_PROTECTED_BIT_KHR) ?
ISL_SURF_USAGE_PROTECTED_BIT : 0) |
ISL_SURF_USAGE_STREAM_OUT_BIT),
};
}
}
}
enum isl_format
anv_isl_format_for_descriptor_type(const struct anv_device *device,
VkDescriptorType type)

View file

@ -159,6 +159,7 @@ get_device_extensions(const struct anv_physical_device *device,
.KHR_depth_stencil_resolve = true,
.KHR_descriptor_update_template = true,
.KHR_device_group = true,
.KHR_device_address_commands = true,
.KHR_draw_indirect_count = true,
.KHR_driver_properties = true,
.KHR_dynamic_rendering = true,
@ -1051,6 +1052,9 @@ get_features(const struct anv_physical_device *pdevice,
/* VK_KHR_maintenance11 */
.maintenance11 = true,
/* VK_KHR_device_address_commands */
.deviceAddressCommands = true,
};
/* The new DOOM and Wolfenstein games require depthBounds without

View file

@ -530,6 +530,28 @@ anv_address_from_u64(uint64_t addr_u64)
};
}
static inline struct anv_address
anv_address_from_range_flags(VkDeviceAddressRangeKHR range,
VkAddressCommandFlagsKHR flags)
{
return (struct anv_address) {
.bo = NULL,
.offset = range.address,
.protected = (flags & VK_ADDRESS_COMMAND_PROTECTED_BIT_KHR) != 0,
};
}
static inline struct anv_address
anv_address_from_strided_range_flags(VkStridedDeviceAddressRangeKHR range,
VkAddressCommandFlagsKHR flags)
{
return (struct anv_address) {
.bo = NULL,
.offset = range.address,
.protected = (flags & VK_ADDRESS_COMMAND_PROTECTED_BIT_KHR) != 0,
};
}
static inline bool
anv_address_is_null(struct anv_address addr)
{

View file

@ -4936,6 +4936,27 @@ cmd_buffer_barrier_video(struct anv_cmd_buffer *cmd_buffer,
break;
}
const VkMemoryRangeBarriersInfoKHR *mem_range_barriers =
vk_find_struct_const(dep_infos->pNext, MEMORY_RANGE_BARRIERS_INFO_KHR);
for (uint32_t i = 0; mem_range_barriers && i < mem_range_barriers->memoryRangeBarrierCount; i++) {
const VkMemoryRangeBarrierKHR *mem_barrier =
&mem_range_barriers->pMemoryRangeBarriers[i];
const VkMemoryBarrierAccessFlags3KHR *barrier3 =
vk_find_struct_const(mem_barrier->pNext,
MEMORY_BARRIER_ACCESS_FLAGS_3_KHR);
/* Flush the cache if something is written by the video operations and
* used by any other stages except video encode/decode stage.
*/
if (stage_is_video(mem_barrier->srcStageMask) &&
mask_is_write(mem_barrier->srcAccessMask,
barrier3 ? barrier3->srcAccessMask3 : 0) &&
!stage_is_video(mem_barrier->dstStageMask)) {
flush_llc = true;
break;
}
}
if (flush_ccs || flush_llc || !anv_address_is_null(signal_addr)) {
anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), fd) {
#if GFX_VERx10 >= 125
@ -5051,6 +5072,26 @@ cmd_buffer_barrier_blitter(struct anv_cmd_buffer *cmd_buffer,
}
}
const VkMemoryRangeBarriersInfoKHR *mem_range_barriers =
vk_find_struct_const(dep_info->pNext, MEMORY_RANGE_BARRIERS_INFO_KHR);
for (uint32_t i = 0; mem_range_barriers && i < mem_range_barriers->memoryRangeBarrierCount; i++) {
const VkMemoryRangeBarrierKHR *mem_barrier =
&mem_range_barriers->pMemoryRangeBarriers[i];
const VkMemoryBarrierAccessFlags3KHR *barrier3 =
vk_find_struct_const(mem_barrier->pNext,
MEMORY_BARRIER_ACCESS_FLAGS_3_KHR);
/* Flush the cache if something is written by the transfer command
* and used by any other stages except transfer stage.
*/
if (stage_is_transfer(mem_barrier->srcStageMask) &&
mask_is_write(mem_barrier->srcAccessMask,
barrier3 ? barrier3->srcAccessMask3 : 0)) {
flush_llc = true;
break;
}
}
/* We cannot gather more information than that. */
if (flush_ccs && flush_llc)
break;
@ -5366,6 +5407,53 @@ cmd_buffer_accumulate_barrier_bits(struct anv_cmd_buffer *cmd_buffer,
if (anv_image_is_sparse(image) &&
mask_is_write(src_flags, barrier3 ? barrier3->srcAccessMask3 : 0))
apply_sparse_flushes = true;
#endif
}
const VkMemoryRangeBarriersInfoKHR *mem_range_barriers =
vk_find_struct_const(dep_info->pNext, MEMORY_RANGE_BARRIERS_INFO_KHR);
for (uint32_t i = 0; mem_range_barriers && i < mem_range_barriers->memoryRangeBarrierCount; i++) {
const VkMemoryRangeBarrierKHR *mem_barrier =
&mem_range_barriers->pMemoryRangeBarriers[i];
const VkMemoryBarrierAccessFlags3KHR *barrier3 =
vk_find_struct_const(mem_barrier->pNext,
MEMORY_BARRIER_ACCESS_FLAGS_3_KHR);
if (barrier3) {
src_flags3 |= barrier3->srcAccessMask3;
dst_flags3 |= barrier3->dstAccessMask3;
}
src_flags |= mem_barrier->srcAccessMask;
dst_flags |= mem_barrier->dstAccessMask;
src_stages |= mem_barrier->srcStageMask;
dst_stages |= mem_barrier->dstStageMask;
/* Shader writes to buffers that could then be written by a transfer
* command (including queries).
*/
if (stage_is_shader(mem_barrier->srcStageMask) &&
mask_is_shader_write(mem_barrier->srcAccessMask,
barrier3 ? barrier3->srcAccessMask3 : 0) &&
stage_is_transfer(mem_barrier->dstStageMask)) {
cmd_buffer->state.queries.buffer_write_bits |=
ANV_QUERY_COMPUTE_WRITES_PENDING_BITS;
}
if (stage_is_transfer(mem_barrier->srcStageMask) &&
mask_is_transfer_write(mem_barrier->srcAccessMask) &&
cmd_buffer_has_pending_copy_query(cmd_buffer))
flush_query_copies = true;
#if GFX_VER < 20
/* There's no way of knowing if this memory barrier is related to
* sparse buffers! This is pretty horrible.
*/
if (mask_is_write(src_flags,
barrier3 ? barrier3->srcAccessMask3 : 0) &&
p_atomic_read(&device->num_sparse_resources) > 0)
apply_sparse_flushes = true;
#endif
}
}
@ -6958,6 +7046,49 @@ void genX(CmdBeginConditionalRenderingEXT)(
mi_ult(&b, mi_imm(0), value));
}
void genX(CmdBeginConditionalRendering2EXT)(
VkCommandBuffer commandBuffer,
const VkConditionalRenderingBeginInfo2EXT* pConditionalRenderingBegin)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_cmd_state *cmd_state = &cmd_buffer->state;
struct anv_address value_address =
anv_address_from_u64(pConditionalRenderingBegin->addressRange.address);
const bool isInverted = pConditionalRenderingBegin->flags &
VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
cmd_state->conditional_render_enabled = true;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
const uint32_t mocs = anv_mocs_for_address(cmd_buffer->device, &value_address);
mi_builder_set_mocs(&b, mocs);
/* Section 19.4 of the Vulkan 1.1.85 spec says:
*
* If the value of the predicate in buffer memory changes
* while conditional rendering is active, the rendering commands
* may be discarded in an implementation-dependent way.
* Some implementations may latch the value of the predicate
* upon beginning conditional rendering while others
* may read it before every rendering command.
*
* So it's perfectly fine to read a value from the buffer once.
*/
struct mi_value value = mi_mem32(value_address);
/* Precompute predicate result, it is necessary to support secondary
* command buffers since it is unknown if conditional rendering is
* inverted when populating them.
*/
mi_store(&b, mi_reg64(ANV_PREDICATE_RESULT_REG),
isInverted ? mi_uge(&b, mi_imm(0), value) :
mi_ult(&b, mi_imm(0), value));
}
void genX(CmdEndConditionalRenderingEXT)(
VkCommandBuffer commandBuffer)
{
@ -7673,6 +7804,45 @@ genX(CmdWriteBufferMarker2AMD)(VkCommandBuffer commandBuffer,
trace_intel_end_write_buffer_marker(&cmd_buffer->trace);
}
void genX(CmdWriteMarkerToMemoryAMD)(
VkCommandBuffer commandBuffer,
const VkMemoryMarkerInfoAMD* pInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
/* The barriers inserted by the application to make dstBuffer writable
* should already have the L1/L2 cache flushes. On platforms where the
* command streamer is not coherent with L3, we need an additional set of
* cache flushes.
*/
enum anv_pipe_bits bits =
(ANV_DEVINFO_HAS_COHERENT_L3_CS(cmd_buffer->device->info) ? 0 :
(ANV_PIPE_DATA_CACHE_FLUSH_BIT | ANV_PIPE_TILE_CACHE_FLUSH_BIT)) |
ANV_PIPE_END_OF_PIPE_SYNC_BIT;
trace_intel_begin_write_buffer_marker(&cmd_buffer->trace);
anv_add_pending_pipe_bits(cmd_buffer, pInfo->stage,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
bits, "write buffer marker");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
/* Emitting a PIPE_CONTROL with Post-Sync Op = Write Immediate Data
* would be the logical way to implement this extension, as it could
* do a pipelined marker write. Unfortunately, it requires writing
* whole 64-bit QWords, and VK_AMD_buffer_marker requires writing a
* 32-bit value. MI_STORE_DATA_IMM is the only good way to do that,
* and unfortunately it requires stalling.
*/
mi_store(&b, mi_mem32(anv_address_from_u64(pInfo->dstRange.address)),
mi_imm(pInfo->marker));
trace_intel_end_write_buffer_marker(&cmd_buffer->trace);
}
void
genX(cmd_write_buffer_cp)(struct anv_cmd_buffer *cmd_buffer,
VkDeviceAddress dstAddr,

View file

@ -886,6 +886,16 @@ void genX(CmdDispatchIndirect)(
genX(cmd_buffer_dispatch_indirect)(cmd_buffer, addr, false);
}
void genX(CmdDispatchIndirect2KHR)(
VkCommandBuffer commandBuffer,
const VkDispatchIndirect2InfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
genX(cmd_buffer_dispatch_indirect)(
cmd_buffer, anv_address_from_u64(pInfo->addressRange.address), false);
}
struct anv_address
genX(cmd_buffer_ray_query_globals)(struct anv_cmd_buffer *cmd_buffer)
{

View file

@ -1774,6 +1774,97 @@ void genX(CmdDrawIndirectByteCountEXT)(
gfx->fs_source_hash);
}
void genX(CmdDrawIndirectByteCount2EXT)(
VkCommandBuffer commandBuffer,
uint32_t instanceCount,
uint32_t firstInstance,
const VkBindTransformFeedbackBuffer2InfoEXT* pCounterInfo,
uint32_t counterOffset,
uint32_t vertexStride)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
const struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
/* firstVertex is always zero for this draw function */
const uint32_t firstVertex = 0;
if (anv_batch_has_error(&cmd_buffer->batch))
return;
anv_measure_snapshot(cmd_buffer,
INTEL_SNAPSHOT_DRAW,
"draw indirect byte count",
instanceCount * gfx->instance_multiplier);
trace_intel_begin_draw_indirect_byte_count(&cmd_buffer->trace);
/* Select pipeline here to allow
* cmd_buffer_emit_vertex_constants_and_flush() without flushing before
* emit_base_vertex_instance() & emit_draw_index().
*/
genX(flush_pipeline_select_3d)(cmd_buffer);
#if GFX_VER < 11
const struct brw_vs_prog_data *vs_prog_data = get_gfx_vs_prog_data(gfx);
if (vs_prog_data->uses_firstvertex ||
vs_prog_data->uses_baseinstance)
emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance);
if (vs_prog_data->uses_drawid)
emit_draw_index(cmd_buffer, 0);
#endif
cmd_buffer_flush_gfx(cmd_buffer);
if (cmd_buffer->state.conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
struct anv_address counter_addr =
anv_address_from_u64(pCounterInfo->addressRange.address);
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
const uint32_t mocs = anv_mocs_for_address(cmd_buffer->device, &counter_addr);
mi_builder_set_mocs(&b, mocs);
struct mi_value count = mi_mem32(counter_addr);
if (counterOffset)
count = mi_isub(&b, count, mi_imm(counterOffset));
count = mi_udiv32_imm(&b, count, vertexStride);
mi_store(&b, mi_reg32(GFX7_3DPRIM_VERTEX_COUNT), count);
mi_store(&b, mi_reg32(GFX7_3DPRIM_START_VERTEX), mi_imm(firstVertex));
mi_store(&b, mi_reg32(GFX7_3DPRIM_INSTANCE_COUNT),
mi_imm(instanceCount * gfx->instance_multiplier));
mi_store(&b, mi_reg32(GFX7_3DPRIM_START_INSTANCE), mi_imm(firstInstance));
mi_store(&b, mi_reg32(GFX7_3DPRIM_BASE_VERTEX), mi_imm(0));
#if GFX_VER >= 11
mi_store(&b, mi_reg32(GEN11_3DPRIM_XP_BASE_VERTEX),
mi_imm(firstVertex));
/* GEN11_3DPRIM_XP_BASE_INSTANCE is implicit */
mi_store(&b, mi_reg32(GEN11_3DPRIM_XP_DRAW_ID), mi_imm(0));
#endif
cmd_buffer_pre_draw_wa(cmd_buffer);
anv_batch_emit(&cmd_buffer->batch, _3DPRIMITIVE_DIRECT, prim) {
#if GFX_VERx10 >= 125
prim.TBIMREnable = cmd_buffer->state.gfx.dyn_state.use_tbimr;
#endif
prim.IndirectParameterEnable = true;
prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
prim.VertexAccessType = SEQUENTIAL;
#if GFX_VER >= 11
prim.ExtendedParametersPresent = true;
#endif
}
cmd_buffer_post_draw_wa(cmd_buffer, 1, SEQUENTIAL);
trace_intel_end_draw_indirect_byte_count(&cmd_buffer->trace,
instanceCount * gfx->instance_multiplier,
gfx->vs_source_hash,
gfx->fs_source_hash);
}
static void
load_indirect_parameters(struct anv_cmd_buffer *cmd_buffer,
struct anv_address addr,
@ -2106,6 +2197,48 @@ void genX(CmdDrawIndirect)(
gfx->fs_source_hash);
}
void genX(CmdDrawIndirect2KHR)(
VkCommandBuffer commandBuffer,
const VkDrawIndirect2InfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
const struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
if (anv_batch_has_error(&cmd_buffer->batch))
return;
anv_measure_snapshot(cmd_buffer,
INTEL_SNAPSHOT_DRAW,
"draw indirect",
pInfo->drawCount);
trace_intel_begin_draw_indirect(&cmd_buffer->trace);
struct anv_address indirect_data_addr =
anv_address_from_u64(pInfo->addressRange.address);
uint64_t stride =
MAX2(pInfo->addressRange.stride, sizeof(VkDrawIndirectCommand));
if (execute_indirect_draw_supported(cmd_buffer)) {
genX(cmd_buffer_emit_execute_indirect_draws)(
cmd_buffer, indirect_data_addr, stride,
ANV_NULL_ADDRESS /* count_addr */, pInfo->drawCount,
VK_CMD_DRAW_INDIRECT);
} else if (anv_use_generated_draws(cmd_buffer, pInfo->drawCount)) {
genX(cmd_buffer_emit_indirect_generated_draws)(
cmd_buffer,indirect_data_addr, stride,
ANV_NULL_ADDRESS /* count_addr */, pInfo->drawCount,
false /* indexed */);
} else {
emit_indirect_draws(cmd_buffer,
indirect_data_addr, stride,
pInfo->drawCount, false /* indexed */);
}
trace_intel_end_draw_indirect(&cmd_buffer->trace, pInfo->drawCount,
gfx->vs_source_hash,
gfx->fs_source_hash);
}
void genX(CmdDrawIndexedIndirect)(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
@ -2158,6 +2291,47 @@ void genX(CmdDrawIndexedIndirect)(
gfx->fs_source_hash);
}
void genX(CmdDrawIndexedIndirect2KHR)(
VkCommandBuffer commandBuffer,
const VkDrawIndirect2InfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
const struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
if (anv_batch_has_error(&cmd_buffer->batch))
return;
anv_measure_snapshot(cmd_buffer,
INTEL_SNAPSHOT_DRAW,
"draw indexed indirect",
pInfo->drawCount);
trace_intel_begin_draw_indexed_indirect(&cmd_buffer->trace);
struct anv_address indirect_data_addr =
anv_address_from_u64(pInfo->addressRange.address);
uint64_t stride =
MAX2(pInfo->addressRange.stride, sizeof(VkDrawIndexedIndirectCommand));
if (execute_indirect_draw_supported(cmd_buffer)) {
genX(cmd_buffer_emit_execute_indirect_draws)(
cmd_buffer, indirect_data_addr, stride,
ANV_NULL_ADDRESS /* count_addr */, pInfo->drawCount,
VK_CMD_DRAW_INDEXED_INDIRECT);
} else if (anv_use_generated_draws(cmd_buffer, pInfo->drawCount)) {
genX(cmd_buffer_emit_indirect_generated_draws)(
cmd_buffer, indirect_data_addr, stride,
ANV_NULL_ADDRESS /* count_addr */, pInfo->drawCount,
true /* indexed */);
} else {
emit_indirect_draws(cmd_buffer, indirect_data_addr, stride,
pInfo->drawCount, true /* indexed */);
}
trace_intel_end_draw_indexed_indirect(&cmd_buffer->trace, pInfo->drawCount,
gfx->vs_source_hash,
gfx->fs_source_hash);
}
#define MI_PREDICATE_SRC0 0x2400
#define MI_PREDICATE_SRC1 0x2408
#define MI_PREDICATE_RESULT 0x2418
@ -2365,6 +2539,50 @@ void genX(CmdDrawIndirectCount)(
gfx->fs_source_hash);
}
void genX(CmdDrawIndirectCount2KHR)(
VkCommandBuffer commandBuffer,
const VkDrawIndirectCount2InfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
const struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
if (anv_batch_has_error(&cmd_buffer->batch))
return;
anv_measure_snapshot(cmd_buffer,
INTEL_SNAPSHOT_DRAW,
"draw indirect count",
0);
trace_intel_begin_draw_indirect_count(&cmd_buffer->trace);
struct anv_address indirect_data_address =
anv_address_from_u64(pInfo->addressRange.address);
uint64_t stride =
MAX2(pInfo->addressRange.stride, sizeof(VkDrawIndirectCommand));
struct anv_address count_address =
anv_address_from_u64(pInfo->countAddressRange.address);
if (execute_indirect_draw_supported(cmd_buffer)) {
genX(cmd_buffer_emit_execute_indirect_draws)(
cmd_buffer, indirect_data_address, stride,
count_address, pInfo->maxDrawCount,
VK_CMD_DRAW_INDIRECT_COUNT);
} else if (anv_use_generated_draws(cmd_buffer, pInfo->maxDrawCount)) {
genX(cmd_buffer_emit_indirect_generated_draws)(
cmd_buffer, indirect_data_address, stride,
count_address, pInfo->maxDrawCount, false /* indexed */);
} else {
emit_indirect_count_draws(
cmd_buffer, indirect_data_address, stride,
count_address, pInfo->maxDrawCount, false /* indexed */);
}
trace_intel_end_draw_indirect_count(&cmd_buffer->trace,
anv_address_utrace(count_address),
gfx->vs_source_hash,
gfx->fs_source_hash);
}
void genX(CmdDrawIndexedIndirectCount)(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
@ -2425,6 +2643,51 @@ void genX(CmdDrawIndexedIndirectCount)(
gfx->fs_source_hash);
}
void genX(CmdDrawIndexedIndirectCount2KHR)(
VkCommandBuffer commandBuffer,
const VkDrawIndirectCount2InfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
const struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
if (anv_batch_has_error(&cmd_buffer->batch))
return;
anv_measure_snapshot(cmd_buffer,
INTEL_SNAPSHOT_DRAW,
"draw indexed indirect count",
0);
trace_intel_begin_draw_indexed_indirect_count(&cmd_buffer->trace);
struct anv_address indirect_data_address =
anv_address_from_u64(pInfo->addressRange.address);
uint64_t stride =
MAX2(pInfo->addressRange.stride, sizeof(VkDrawIndexedIndirectCommand));
struct anv_address count_address =
anv_address_from_u64(pInfo->countAddressRange.address);
if (execute_indirect_draw_supported(cmd_buffer)) {
genX(cmd_buffer_emit_execute_indirect_draws)(
cmd_buffer, indirect_data_address, stride,
count_address, pInfo->maxDrawCount,
VK_CMD_DRAW_INDEXED_INDIRECT_COUNT);
} else if (anv_use_generated_draws(cmd_buffer, pInfo->maxDrawCount)) {
genX(cmd_buffer_emit_indirect_generated_draws)(
cmd_buffer, indirect_data_address, stride,
count_address, pInfo->maxDrawCount,
true /* indexed */);
} else {
emit_indirect_count_draws(
cmd_buffer, indirect_data_address, stride,
count_address, pInfo->maxDrawCount, true /* indexed */);
}
trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace,
anv_address_utrace(count_address),
gfx->vs_source_hash,
gfx->fs_source_hash);
}
void genX(CmdBeginTransformFeedbackEXT)(
VkCommandBuffer commandBuffer,
uint32_t firstCounterBuffer,
@ -2481,6 +2744,59 @@ void genX(CmdBeginTransformFeedbackEXT)(
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE;
}
void genX(CmdBeginTransformFeedback2EXT)(
VkCommandBuffer commandBuffer,
uint32_t firstCounterRange,
uint32_t counterRangeCount,
const VkBindTransformFeedbackBuffer2InfoEXT* pCounterInfos)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
assert(firstCounterRange < MAX_XFB_BUFFERS);
assert(counterRangeCount <= MAX_XFB_BUFFERS);
assert(firstCounterRange + counterRangeCount <= MAX_XFB_BUFFERS);
trace_intel_begin_xfb(&cmd_buffer->trace);
/* From the SKL PRM Vol. 2c, SO_WRITE_OFFSET:
*
* "Ssoftware must ensure that no HW stream output operations can be in
* process or otherwise pending at the point that the MI_LOAD/STORE
* commands are processed. This will likely require a pipeline flush."
*/
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_CS_STALL_BIT,
"begin transform feedback");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
for (uint32_t idx = 0; idx < MAX_XFB_BUFFERS; idx++) {
uint32_t cb_idx = idx - firstCounterRange;
/* If we have a counter buffer, this is a resume so we need to load the
* value into the streamout offset register. Otherwise, this is a begin
* and we need to reset it to zero.
*/
if (pCounterInfos &&
idx >= firstCounterRange &&
idx - firstCounterRange < counterRangeCount &&
pCounterInfos[cb_idx].addressRange.size != 0) {
mi_store(&b, mi_reg32(GENX(SO_WRITE_OFFSET0_num) + idx * 4),
mi_mem32(anv_address_from_u64(
pCounterInfos[cb_idx].addressRange.address)));
} else {
mi_store(&b, mi_reg32(GENX(SO_WRITE_OFFSET0_num) + idx * 4),
mi_imm(0));
}
}
cmd_buffer->state.xfb_enabled = true;
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE;
}
void genX(CmdEndTransformFeedbackEXT)(
VkCommandBuffer commandBuffer,
uint32_t firstCounterBuffer,
@ -2535,6 +2851,54 @@ void genX(CmdEndTransformFeedbackEXT)(
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE;
}
void genX(CmdEndTransformFeedback2EXT)(
VkCommandBuffer commandBuffer,
uint32_t firstCounterRange,
uint32_t counterRangeCount,
const VkBindTransformFeedbackBuffer2InfoEXT* pCounterInfos)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
assert(firstCounterRange < MAX_XFB_BUFFERS);
assert(counterRangeCount <= MAX_XFB_BUFFERS);
assert(firstCounterRange + counterRangeCount <= MAX_XFB_BUFFERS);
/* From the SKL PRM Vol. 2c, SO_WRITE_OFFSET:
*
* "Ssoftware must ensure that no HW stream output operations can be in
* process or otherwise pending at the point that the MI_LOAD/STORE
* commands are processed. This will likely require a pipeline flush."
*/
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_CS_STALL_BIT,
"end transform feedback");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
for (uint32_t cb_idx = 0; cb_idx < counterRangeCount; cb_idx++) {
unsigned idx = firstCounterRange + cb_idx;
/* If we have a counter buffer, this is a resume so we need to load the
* value into the streamout offset register. Otherwise, this is a begin
* and we need to reset it to zero.
*/
if (cb_idx < counterRangeCount &&
pCounterInfos[cb_idx].addressRange.size != 0) {
anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) {
srm.MemoryAddress = anv_address_from_u64(
pCounterInfos[cb_idx].addressRange.address);
srm.RegisterAddress = GENX(SO_WRITE_OFFSET0_num) + idx * 4;
}
}
}
trace_intel_end_xfb(&cmd_buffer->trace);
cmd_buffer->state.xfb_enabled = false;
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE;
}
#if GFX_VERx10 >= 125
void
@ -2673,6 +3037,65 @@ genX(CmdDrawMeshTasksIndirectEXT)(
trace_intel_end_draw_mesh_indirect(&cmd_buffer->trace, drawCount);
}
void genX(CmdDrawMeshTasksIndirect2EXT)(
VkCommandBuffer commandBuffer,
const VkDrawIndirect2InfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
const struct brw_task_prog_data *task_prog_data = get_gfx_task_prog_data(gfx);
const struct brw_mesh_prog_data *mesh_prog_data = get_gfx_mesh_prog_data(gfx);
struct anv_cmd_state *cmd_state = &cmd_buffer->state;
if (anv_batch_has_error(&cmd_buffer->batch))
return;
anv_measure_snapshot(cmd_buffer,
INTEL_SNAPSHOT_DRAW,
"draw mesh indirect", pInfo->drawCount);
struct anv_address indirect_data_addr =
anv_address_from_u64(pInfo->addressRange.address);
uint64_t stride =
MAX2(pInfo->addressRange.stride, sizeof(VkDrawMeshTasksIndirectCommandEXT));
trace_intel_begin_draw_mesh_indirect(&cmd_buffer->trace);
if (execute_indirect_draw_supported(cmd_buffer)) {
genX(cmd_buffer_emit_execute_indirect_draws)(
cmd_buffer, indirect_data_addr, stride,
ANV_NULL_ADDRESS /* count_addr */, pInfo->drawCount,
VK_CMD_DRAW_MESH_TASKS_INDIRECT_EXT);
trace_intel_end_draw_mesh_indirect(&cmd_buffer->trace, pInfo->drawCount);
return;
}
cmd_buffer_flush_gfx(cmd_buffer);
if (cmd_state->conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
bool uses_drawid = (task_prog_data && task_prog_data->uses_drawid) ||
mesh_prog_data->uses_drawid;
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
uint64_t offset = 0;
for (uint32_t i = 0; i < pInfo->drawCount; i++) {
struct anv_address draw = anv_address_add(indirect_data_addr, offset);
mesh_load_indirect_parameters_3dmesh_3d(cmd_buffer, &b, draw, uses_drawid, i);
emit_indirect_3dmesh_3d(&cmd_buffer->batch,
cmd_state->conditional_render_enabled, uses_drawid);
offset += stride;
}
trace_intel_end_draw_mesh_indirect(&cmd_buffer->trace, pInfo->drawCount);
}
void
genX(CmdDrawMeshTasksIndirectCountEXT)(
VkCommandBuffer commandBuffer,
@ -2746,4 +3169,70 @@ genX(CmdDrawMeshTasksIndirectCountEXT)(
anv_address_utrace(count_addr));
}
void genX(CmdDrawMeshTasksIndirectCount2EXT)(
VkCommandBuffer commandBuffer,
const VkDrawIndirectCount2InfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
const struct brw_task_prog_data *task_prog_data = get_gfx_task_prog_data(gfx);
const struct brw_mesh_prog_data *mesh_prog_data = get_gfx_mesh_prog_data(gfx);
if (anv_batch_has_error(&cmd_buffer->batch))
return;
anv_measure_snapshot(cmd_buffer,
INTEL_SNAPSHOT_DRAW,
"draw mesh indirect count", 0);
trace_intel_begin_draw_mesh_indirect_count(&cmd_buffer->trace);
struct anv_address indirect_data_addr =
anv_address_from_u64(pInfo->addressRange.address);
uint64_t stride =
MAX2(pInfo->addressRange.stride, sizeof(VkDrawMeshTasksIndirectCommandEXT));
struct anv_address count_addr =
anv_address_from_u64(pInfo->countAddressRange.address);
if (execute_indirect_draw_supported(cmd_buffer)) {
genX(cmd_buffer_emit_execute_indirect_draws)(
cmd_buffer, indirect_data_addr, stride,
count_addr, pInfo->maxDrawCount,
VK_CMD_DRAW_MESH_TASKS_INDIRECT_COUNT_EXT);
trace_intel_end_draw_mesh_indirect(&cmd_buffer->trace, pInfo->maxDrawCount);
return;
}
cmd_buffer_flush_gfx(cmd_buffer);
bool uses_drawid = (task_prog_data && task_prog_data->uses_drawid) ||
mesh_prog_data->uses_drawid;
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
const uint32_t mocs = anv_mocs_for_address(cmd_buffer->device, &count_addr);
mi_builder_set_mocs(&b, mocs);
struct mi_value max =
prepare_for_draw_count_predicate(cmd_buffer, &b, count_addr);
uint64_t offset = 0;
for (uint32_t i = 0; i < pInfo->maxDrawCount; i++) {
struct anv_address draw = anv_address_add(indirect_data_addr, offset);
emit_draw_count_predicate_cond(cmd_buffer, &b, i, max);
mesh_load_indirect_parameters_3dmesh_3d(cmd_buffer, &b, draw, uses_drawid, i);
emit_indirect_3dmesh_3d(&cmd_buffer->batch, true, uses_drawid);
offset += stride;
}
trace_intel_end_draw_mesh_indirect_count(&cmd_buffer->trace,
anv_address_utrace(count_addr));
}
#endif /* GFX_VERx10 >= 125 */

View file

@ -2062,6 +2062,40 @@ void genX(CmdCopyQueryPoolResults)(
}
}
void genX(CmdCopyQueryPoolResultsToMemoryKHR)(
VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
uint32_t firstQuery,
uint32_t queryCount,
const VkStridedDeviceAddressRangeKHR* pDstRange,
VkAddressCommandFlagsKHR dstFlags,
VkQueryResultFlags queryResultFlags)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
struct anv_device *device = cmd_buffer->device;
struct anv_physical_device *pdevice = device->physical;
struct anv_address dst_addr =
anv_address_from_strided_range_flags(*pDstRange, dstFlags);
if (queryCount > pdevice->instance->query_copy_with_shader_threshold) {
copy_query_results_with_shader(cmd_buffer, pool,
dst_addr,
pDstRange->stride,
firstQuery,
queryCount,
queryResultFlags);
} else {
copy_query_results_with_cs(cmd_buffer, pool,
dst_addr,
pDstRange->stride,
firstQuery,
queryCount,
queryResultFlags);
}
}
#if GFX_VERx10 >= 125 && ANV_SUPPORT_RT
#include "bvh/anv_bvh.h"