anv: remove old entrypoints

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40387>
This commit is contained in:
Lionel Landwerlin 2025-03-05 17:40:49 +02:00 committed by Marge Bot
parent f123030dcd
commit 62b890046f
6 changed files with 22 additions and 996 deletions

View file

@ -532,41 +532,6 @@ copy_image(struct anv_cmd_buffer *cmd_buffer,
tex_cache_flush_hack(cmd_buffer, true);
}
static bool
anv_blorp_blitter_execute_on_companion(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
uint32_t region_count,
const VkBufferImageCopy2* regions)
{
if (!anv_cmd_buffer_is_blitter_queue(cmd_buffer))
return false;
bool blorp_execute_on_companion = false;
for (unsigned r = 0; r < region_count && !blorp_execute_on_companion; r++) {
VkImageAspectFlags aspect_mask = regions[r].imageSubresource.aspectMask;
enum isl_format linear_format =
anv_get_isl_format(cmd_buffer->device->physical, image->vk.format,
aspect_mask, VK_IMAGE_TILING_LINEAR);
const struct isl_format_layout *linear_fmtl =
isl_format_get_layout(linear_format);
switch (linear_fmtl->bpb) {
case 96:
/* We can only support linear mode for 96bpp on blitter engine. */
blorp_execute_on_companion |=
image->vk.tiling != VK_IMAGE_TILING_LINEAR;
break;
default:
blorp_execute_on_companion |= linear_fmtl->bpb % 3 == 0;
break;
}
}
return blorp_execute_on_companion;
}
static bool
is_image_multisampled(const struct anv_image *image)
{
@ -878,78 +843,6 @@ copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
tex_cache_flush_hack(cmd_buffer, true);
}
void anv_CmdCopyBufferToImage2(
VkCommandBuffer commandBuffer,
const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
ANV_FROM_HANDLE(anv_image, dst_image, pCopyBufferToImageInfo->dstImage);
bool blorp_execute_on_companion =
anv_blorp_execute_on_companion(cmd_buffer, NULL, dst_image);
/* Check if any one of the aspects is incompatible with the blitter engine,
* if true, use the companion RCS command buffer for blit operation since 3
* component formats are not supported natively except 96bpb on the blitter.
*/
blorp_execute_on_companion |=
anv_blorp_blitter_execute_on_companion(cmd_buffer, dst_image,
pCopyBufferToImageInfo->regionCount,
pCopyBufferToImageInfo->pRegions);
anv_cmd_require_rcs(cmd_buffer, blorp_execute_on_companion) {
struct blorp_batch batch;
anv_blorp_batch_init(cmd_buffer, &batch, BLORP_BATCH_SRC_UNPADDED);
for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
const VkBufferImageCopy2 *region = &pCopyBufferToImageInfo->pRegions[r];
const struct vk_image_buffer_layout buffer_layout =
vk_image_buffer_copy_layout(&dst_image->vk, region);
copy_buffer_to_image(cmd_buffer, &batch,
anv_address_add(src_buffer->address,
region->bufferOffset),
&buffer_layout,
dst_image, pCopyBufferToImageInfo->dstImageLayout,
region->imageSubresource,
region->imageOffset, region->imageExtent,
true);
}
anv_blorp_batch_finish(&batch);
if (dst_image->emu_plane_format != VK_FORMAT_UNDEFINED) {
assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer));
const enum anv_pipe_bits pipe_bits =
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
((batch.flags & BLORP_BATCH_USE_COMPUTE) ?
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT :
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT);
anv_add_pending_pipe_bits(cmd_buffer,
(batch.flags & BLORP_BATCH_USE_COMPUTE) ?
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT :
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
pipe_bits,
"Copy flush before astc emu");
for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
const VkBufferImageCopy2 *region =
&pCopyBufferToImageInfo->pRegions[r];
const VkOffset3D block_offset = vk_image_offset_to_elements(
&dst_image->vk, region->imageOffset);
const VkExtent3D block_extent = vk_image_extent_to_elements(
&dst_image->vk, region->imageExtent);
anv_astc_emu_process(cmd_buffer, dst_image,
pCopyBufferToImageInfo->dstImageLayout,
&region->imageSubresource,
block_offset, block_extent);
}
}
}
}
void anv_CmdCopyMemoryToImageKHR(
VkCommandBuffer commandBuffer,
const VkCopyDeviceMemoryImageInfoKHR* pCopyMemoryInfo)
@ -1037,51 +930,6 @@ anv_add_buffer_write_pending_bits(struct anv_cmd_buffer *cmd_buffer,
ANV_QUERY_RENDER_TARGET_WRITES_PENDING_BITS(devinfo);
}
void anv_CmdCopyImageToBuffer2(
VkCommandBuffer commandBuffer,
const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_image, src_image, pCopyImageToBufferInfo->srcImage);
ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
bool blorp_execute_on_companion =
anv_blorp_execute_on_companion(cmd_buffer, src_image, NULL);
/* Check if any one of the aspects is incompatible with the blitter engine,
* if true, use the companion RCS command buffer for blit operation since 3
* component formats are not supported natively except 96bpb on the blitter.
*/
blorp_execute_on_companion |=
anv_blorp_blitter_execute_on_companion(cmd_buffer, src_image,
pCopyImageToBufferInfo->regionCount,
pCopyImageToBufferInfo->pRegions);
anv_cmd_require_rcs(cmd_buffer, blorp_execute_on_companion) {
struct blorp_batch batch;
anv_blorp_batch_init(cmd_buffer, &batch, 0);
for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
const VkBufferImageCopy2 *region = &pCopyImageToBufferInfo->pRegions[r];
const struct vk_image_buffer_layout buffer_layout =
vk_image_buffer_copy_layout(&src_image->vk, region);
copy_buffer_to_image(cmd_buffer, &batch,
anv_address_add(dst_buffer->address,
region->bufferOffset),
&buffer_layout,
src_image, pCopyImageToBufferInfo->srcImageLayout,
region->imageSubresource,
region->imageOffset, region->imageExtent,
false);
}
anv_add_buffer_write_pending_bits(cmd_buffer, "after copy image to buffer");
anv_blorp_batch_finish(&batch);
}
}
void anv_CmdCopyImageToMemoryKHR(
VkCommandBuffer commandBuffer,
const VkCopyDeviceMemoryImageInfoKHR* pCopyMemoryInfo)
@ -1356,35 +1204,6 @@ anv_cmd_copy_addr(struct anv_cmd_buffer *cmd_buffer,
anv_blorp_batch_finish(&batch);
}
void anv_CmdCopyBuffer2(
VkCommandBuffer commandBuffer,
const VkCopyBufferInfo2* pCopyBufferInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
struct blorp_batch batch;
anv_blorp_batch_init(cmd_buffer, &batch,
BLORP_BATCH_SRC_UNPADDED |
(cmd_buffer->state.current_pipeline ==
cmd_buffer->device->physical->gpgpu_pipeline_value ?
BLORP_BATCH_USE_COMPUTE : 0));
for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
const VkBufferCopy2 *region = &pCopyBufferInfo->pRegions[r];
copy_memory(cmd_buffer->device, &batch,
anv_address_add(src_buffer->address, region->srcOffset),
anv_address_add(dst_buffer->address, region->dstOffset),
region->size);
}
anv_add_buffer_write_pending_bits(cmd_buffer, "after copy buffer");
anv_blorp_batch_finish(&batch);
}
void anv_CmdCopyMemoryKHR(
VkCommandBuffer commandBuffer,
const VkCopyDeviceMemoryInfoKHR* pCopyMemoryInfo)
@ -1486,21 +1305,6 @@ anv_cmd_buffer_update_addr(
anv_blorp_batch_finish(&batch);
}
void anv_CmdUpdateBuffer(
VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize dataSize,
const void* pData)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
anv_cmd_buffer_update_addr(cmd_buffer,
anv_address_add(dst_buffer->address, dstOffset),
dataSize, pData);
}
void anv_CmdUpdateMemoryKHR(
VkCommandBuffer commandBuffer,
const VkDeviceAddressRangeKHR* pDstRange,
@ -1576,35 +1380,6 @@ anv_cmd_fill_buffer_addr(VkCommandBuffer commandBuffer,
anv_add_buffer_write_pending_bits(cmd_buffer, "after fill buffer");
}
void anv_CmdFillBuffer(
VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize fillSize,
uint32_t data)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
fillSize = vk_buffer_range(&dst_buffer->vk, dstOffset, fillSize);
/* From the Vulkan spec:
*
* "size is the number of bytes to fill, and must be either a multiple
* of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
* the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
* buffer is not a multiple of 4, then the nearest smaller multiple is
* used."
*/
fillSize &= ~3ull;
anv_cmd_buffer_fill_area(cmd_buffer,
anv_address_add(dst_buffer->address, dstOffset),
fillSize, data);
anv_add_buffer_write_pending_bits(cmd_buffer, "after fill buffer");
}
void anv_CmdFillMemoryKHR(
VkCommandBuffer commandBuffer,
const VkDeviceAddressRangeKHR* pDstRange,

View file

@ -967,46 +967,6 @@ void anv_CmdPushDataEXT(
}
}
void anv_CmdBindVertexBuffers2(
VkCommandBuffer commandBuffer,
uint32_t firstBinding,
uint32_t bindingCount,
const VkBuffer* pBuffers,
const VkDeviceSize* pOffsets,
const VkDeviceSize* pSizes,
const VkDeviceSize* pStrides)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings;
/* We have to defer setting up vertex buffer since we need the buffer
* stride from the pipeline. */
assert(firstBinding + bindingCount <= get_max_vbs(cmd_buffer->device->info));
for (uint32_t i = 0; i < bindingCount; i++) {
ANV_FROM_HANDLE(anv_buffer, buffer, pBuffers[i]);
if (buffer == NULL) {
vb[firstBinding + i] = (struct anv_vertex_binding) { 0 };
} else {
vb[firstBinding + i] = (struct anv_vertex_binding) {
.addr = anv_address_physical(
anv_address_add(buffer->address, pOffsets[i])),
.size = vk_buffer_range(&buffer->vk, pOffsets[i],
pSizes ? pSizes[i] : VK_WHOLE_SIZE),
.mocs = anv_mocs(cmd_buffer->device, buffer->address.bo,
ISL_SURF_USAGE_VERTEX_BUFFER_BIT),
};
}
cmd_buffer->state.gfx.vb_dirty |= 1 << (firstBinding + i);
}
if (pStrides != NULL) {
vk_cmd_set_vertex_binding_strides(&cmd_buffer->vk, firstBinding,
bindingCount, pStrides);
}
}
void anv_CmdBindVertexBuffers3KHR(
VkCommandBuffer commandBuffer,
uint32_t firstBinding,
@ -1040,37 +1000,6 @@ void anv_CmdBindVertexBuffers3KHR(
bindingCount, pBindingInfos);
}
void anv_CmdBindIndexBuffer2(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
VkDeviceSize size,
VkIndexType indexType)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
if (cmd_buffer->state.gfx.index_type != indexType) {
cmd_buffer->state.gfx.index_type = indexType;
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_INDEX_TYPE;
}
vk_cmd_set_index_buffer_type(&cmd_buffer->vk, indexType);
uint64_t index_addr = buffer ?
anv_address_physical(anv_address_add(buffer->address, offset)) : 0;
uint32_t index_size = buffer ? vk_buffer_range(&buffer->vk, offset, size) : 0;
if (cmd_buffer->state.gfx.index_addr != index_addr ||
cmd_buffer->state.gfx.index_size != index_size) {
cmd_buffer->state.gfx.index_addr = index_addr;
cmd_buffer->state.gfx.index_size = index_size;
cmd_buffer->state.gfx.index_mocs =
anv_mocs(cmd_buffer->device, buffer->address.bo,
ISL_SURF_USAGE_INDEX_BUFFER_BIT);
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER;
}
}
void anv_CmdBindIndexBuffer3KHR(
VkCommandBuffer commandBuffer,
const VkBindIndexBuffer3InfoKHR* pInfo)
@ -1098,38 +1027,6 @@ void anv_CmdBindIndexBuffer3KHR(
}
}
void anv_CmdBindTransformFeedbackBuffersEXT(
VkCommandBuffer commandBuffer,
uint32_t firstBinding,
uint32_t bindingCount,
const VkBuffer* pBuffers,
const VkDeviceSize* pOffsets,
const VkDeviceSize* pSizes)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_xfb_binding *xfb = cmd_buffer->state.xfb_bindings;
/* We have to defer setting up vertex buffer since we need the buffer
* stride from the pipeline. */
assert(firstBinding + bindingCount <= MAX_XFB_BUFFERS);
for (uint32_t i = 0; i < bindingCount; i++) {
if (pBuffers[i] == VK_NULL_HANDLE) {
xfb[firstBinding + i] = (struct anv_xfb_binding) { 0 };
} else {
ANV_FROM_HANDLE(anv_buffer, buffer, pBuffers[i]);
xfb[firstBinding + i] = (struct anv_xfb_binding) {
.addr = anv_address_physical(
anv_address_add(buffer->address, pOffsets[i])),
.size = vk_buffer_range(&buffer->vk, pOffsets[i],
pSizes ? pSizes[i] : VK_WHOLE_SIZE),
.mocs = anv_mocs(cmd_buffer->device, buffer->address.bo,
ISL_SURF_USAGE_STREAM_OUT_BIT),
};
}
}
}
void anv_CmdBindTransformFeedbackBuffers2EXT(
VkCommandBuffer commandBuffer,
uint32_t firstBinding,

View file

@ -663,10 +663,17 @@ genX(CmdCopyAccelerationStructureKHR)(
"bvh size read for dispatch");
}
anv_genX(cmd_buffer->device->info, CmdDispatchIndirect)(
commandBuffer, vk_buffer_to_handle(src->buffer),
src->offset + offsetof(struct anv_accel_struct_header,
copy_dispatch_size));
anv_genX(cmd_buffer->device->info, CmdDispatchIndirect2KHR)(
commandBuffer,
&(VkDispatchIndirect2InfoKHR) {
.sType = VK_STRUCTURE_TYPE_DISPATCH_INDIRECT_2_INFO_KHR,
.addressRange = {
.address = vk_acceleration_structure_get_va(src) +
offsetof(struct anv_accel_struct_header,
copy_dispatch_size),
.size = src->size,
},
});
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
@ -715,10 +722,17 @@ genX(CmdCopyAccelerationStructureToMemoryKHR)(
"bvh size read for dispatch");
}
anv_genX(device->info, CmdDispatchIndirect)(
commandBuffer, vk_buffer_to_handle(src->buffer),
src->offset + offsetof(struct anv_accel_struct_header,
copy_dispatch_size));
anv_genX(device->info, CmdDispatchIndirect2KHR)(
commandBuffer,
&(VkDispatchIndirect2InfoKHR) {
.sType = VK_STRUCTURE_TYPE_DISPATCH_INDIRECT_2_INFO_KHR,
.addressRange = {
.address = vk_acceleration_structure_get_va(src) +
offsetof(struct anv_accel_struct_header,
copy_dispatch_size),
.size = src->size,
},
});
anv_cmd_buffer_restore_state(cmd_buffer, &saved);

View file

@ -7002,50 +7002,6 @@ genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer)
}
}
void genX(CmdBeginConditionalRenderingEXT)(
VkCommandBuffer commandBuffer,
const VkConditionalRenderingBeginInfoEXT* pConditionalRenderingBegin)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, buffer, pConditionalRenderingBegin->buffer);
struct anv_cmd_state *cmd_state = &cmd_buffer->state;
struct anv_address value_address =
anv_address_add(buffer->address, pConditionalRenderingBegin->offset);
const bool isInverted = pConditionalRenderingBegin->flags &
VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
cmd_state->conditional_render_enabled = true;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
const uint32_t mocs = anv_mocs_for_address(cmd_buffer->device, &value_address);
mi_builder_set_mocs(&b, mocs);
/* Section 19.4 of the Vulkan 1.1.85 spec says:
*
* If the value of the predicate in buffer memory changes
* while conditional rendering is active, the rendering commands
* may be discarded in an implementation-dependent way.
* Some implementations may latch the value of the predicate
* upon beginning conditional rendering while others
* may read it before every rendering command.
*
* So it's perfectly fine to read a value from the buffer once.
*/
struct mi_value value = mi_mem32(value_address);
/* Precompute predicate result, it is necessary to support secondary
* command buffers since it is unknown if conditional rendering is
* inverted when populating them.
*/
mi_store(&b, mi_reg64(ANV_PREDICATE_RESULT_REG),
isInverted ? mi_uge(&b, mi_imm(0), value) :
mi_ult(&b, mi_imm(0), value));
}
void genX(CmdBeginConditionalRendering2EXT)(
VkCommandBuffer commandBuffer,
const VkConditionalRenderingBeginInfo2EXT* pConditionalRenderingBegin)
@ -7760,50 +7716,6 @@ genX(async_submit_end)(struct anv_async_submit *submit)
anv_batch_emit(batch, GENX(MI_BATCH_BUFFER_END), bbe);
}
void
genX(CmdWriteBufferMarker2AMD)(VkCommandBuffer commandBuffer,
VkPipelineStageFlags2 stage,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
uint32_t marker)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, buffer, dstBuffer);
/* The barriers inserted by the application to make dstBuffer writable
* should already have the L1/L2 cache flushes. On platforms where the
* command streamer is not coherent with L3, we need an additional set of
* cache flushes.
*/
enum anv_pipe_bits bits =
(ANV_DEVINFO_HAS_COHERENT_L3_CS(cmd_buffer->device->info) ? 0 :
(ANV_PIPE_DATA_CACHE_FLUSH_BIT | ANV_PIPE_TILE_CACHE_FLUSH_BIT)) |
ANV_PIPE_END_OF_PIPE_SYNC_BIT;
trace_intel_begin_write_buffer_marker(&cmd_buffer->trace);
anv_add_pending_pipe_bits(cmd_buffer,
stage,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
bits, "write buffer marker");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
/* Emitting a PIPE_CONTROL with Post-Sync Op = Write Immediate Data
* would be the logical way to implement this extension, as it could
* do a pipelined marker write. Unfortunately, it requires writing
* whole 64-bit QWords, and VK_AMD_buffer_marker requires writing a
* 32-bit value. MI_STORE_DATA_IMM is the only good way to do that,
* and unfortunately it requires stalling.
*/
mi_store(&b, mi_mem32(anv_address_add(buffer->address, dstOffset)),
mi_imm(marker));
trace_intel_end_write_buffer_marker(&cmd_buffer->trace);
}
void genX(CmdWriteMarkerToMemoryAMD)(
VkCommandBuffer commandBuffer,
const VkMemoryMarkerInfoAMD* pInfo)

View file

@ -874,18 +874,6 @@ genX(cmd_buffer_dispatch_indirect)(struct anv_cmd_buffer *cmd_buffer,
prog_data->base.source_hash);
}
void genX(CmdDispatchIndirect)(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
struct anv_address addr = anv_address_add(buffer->address, offset);
genX(cmd_buffer_dispatch_indirect)(cmd_buffer, addr, false);
}
void genX(CmdDispatchIndirect2KHR)(
VkCommandBuffer commandBuffer,
const VkDispatchIndirect2InfoKHR* pInfo)

View file

@ -1681,99 +1681,6 @@ void genX(CmdDrawMultiIndexedEXT)(
#define GEN11_3DPRIM_XP_BASE_INSTANCE GEN11_3DPRIM_XP1
#define GEN11_3DPRIM_XP_DRAW_ID GEN11_3DPRIM_XP2
void genX(CmdDrawIndirectByteCountEXT)(
VkCommandBuffer commandBuffer,
uint32_t instanceCount,
uint32_t firstInstance,
VkBuffer counterBuffer,
VkDeviceSize counterBufferOffset,
uint32_t counterOffset,
uint32_t vertexStride)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, counter_buffer, counterBuffer);
const struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
/* firstVertex is always zero for this draw function */
const uint32_t firstVertex = 0;
if (anv_batch_has_error(&cmd_buffer->batch))
return;
anv_measure_snapshot(cmd_buffer,
INTEL_SNAPSHOT_DRAW,
"draw indirect byte count",
instanceCount * gfx->instance_multiplier);
trace_intel_begin_draw_indirect_byte_count(&cmd_buffer->trace);
/* Select pipeline here to allow
* cmd_buffer_emit_vertex_constants_and_flush() without flushing before
* emit_base_vertex_instance() & emit_draw_index().
*/
genX(flush_pipeline_select_3d)(cmd_buffer);
#if GFX_VER < 11
const struct brw_vs_prog_data *vs_prog_data = get_gfx_vs_prog_data(gfx);
if (vs_prog_data->uses_firstvertex ||
vs_prog_data->uses_baseinstance)
emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance);
if (vs_prog_data->uses_drawid)
emit_draw_index(cmd_buffer, 0);
#endif
cmd_buffer_flush_gfx(cmd_buffer);
if (cmd_buffer->state.conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
const uint32_t mocs = anv_mocs_for_address(cmd_buffer->device, &counter_buffer->address);
mi_builder_set_mocs(&b, mocs);
struct mi_value count =
mi_mem32(anv_address_add(counter_buffer->address,
counterBufferOffset));
if (counterOffset)
count = mi_isub(&b, count, mi_imm(counterOffset));
count = mi_udiv32_imm(&b, count, vertexStride);
mi_store(&b, mi_reg32(GFX7_3DPRIM_VERTEX_COUNT), count);
mi_store(&b, mi_reg32(GFX7_3DPRIM_START_VERTEX), mi_imm(firstVertex));
assert(((uint64_t)instanceCount * gfx->instance_multiplier <= UINT32_MAX));
mi_store(&b, mi_reg32(GFX7_3DPRIM_INSTANCE_COUNT),
mi_imm(instanceCount * gfx->instance_multiplier));
mi_store(&b, mi_reg32(GFX7_3DPRIM_START_INSTANCE), mi_imm(firstInstance));
mi_store(&b, mi_reg32(GFX7_3DPRIM_BASE_VERTEX), mi_imm(0));
#if GFX_VER >= 11
mi_store(&b, mi_reg32(GEN11_3DPRIM_XP_BASE_VERTEX),
mi_imm(firstVertex));
/* GEN11_3DPRIM_XP_BASE_INSTANCE is implicit */
mi_store(&b, mi_reg32(GEN11_3DPRIM_XP_DRAW_ID), mi_imm(0));
#endif
cmd_buffer_pre_draw_wa(cmd_buffer);
anv_batch_emit(&cmd_buffer->batch, _3DPRIMITIVE_DIRECT, prim) {
#if GFX_VERx10 >= 125
prim.TBIMREnable = cmd_buffer->state.gfx.dyn_state.use_tbimr;
#endif
prim.IndirectParameterEnable = true;
prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
prim.VertexAccessType = SEQUENTIAL;
#if GFX_VER >= 11
prim.ExtendedParametersPresent = true;
#endif
}
cmd_buffer_post_draw_wa(cmd_buffer, 1, SEQUENTIAL);
trace_intel_end_draw_indirect_byte_count(&cmd_buffer->trace,
instanceCount * gfx->instance_multiplier,
gfx->vs_source_hash,
gfx->fs_source_hash);
}
void genX(CmdDrawIndirectByteCount2EXT)(
VkCommandBuffer commandBuffer,
uint32_t instanceCount,
@ -2145,57 +2052,6 @@ genX(cmd_buffer_emit_execute_indirect_draws)(struct anv_cmd_buffer *cmd_buffer,
}
#endif // GFX_VERx10 >= 125
}
void genX(CmdDrawIndirect)(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
uint32_t drawCount,
uint32_t stride)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
const struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
if (anv_batch_has_error(&cmd_buffer->batch))
return;
anv_measure_snapshot(cmd_buffer,
INTEL_SNAPSHOT_DRAW,
"draw indirect",
drawCount);
trace_intel_begin_draw_indirect(&cmd_buffer->trace);
struct anv_address indirect_data_addr =
anv_address_add(buffer->address, offset);
stride = MAX2(stride, sizeof(VkDrawIndirectCommand));
if (execute_indirect_draw_supported(cmd_buffer)) {
genX(cmd_buffer_emit_execute_indirect_draws)(
cmd_buffer,
indirect_data_addr,
stride,
ANV_NULL_ADDRESS /* count_addr */,
drawCount,
VK_CMD_DRAW_INDIRECT);
} else if (anv_use_generated_draws(cmd_buffer, drawCount)) {
genX(cmd_buffer_emit_indirect_generated_draws)(
cmd_buffer,
indirect_data_addr,
stride,
ANV_NULL_ADDRESS /* count_addr */,
drawCount,
false /* indexed */);
} else {
emit_indirect_draws(cmd_buffer,
indirect_data_addr,
stride, drawCount, false /* indexed */);
}
trace_intel_end_draw_indirect(&cmd_buffer->trace, drawCount,
gfx->vs_source_hash,
gfx->fs_source_hash);
}
void genX(CmdDrawIndirect2KHR)(
VkCommandBuffer commandBuffer,
@ -2239,58 +2095,6 @@ void genX(CmdDrawIndirect2KHR)(
gfx->fs_source_hash);
}
void genX(CmdDrawIndexedIndirect)(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
uint32_t drawCount,
uint32_t stride)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
const struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
if (anv_batch_has_error(&cmd_buffer->batch))
return;
anv_measure_snapshot(cmd_buffer,
INTEL_SNAPSHOT_DRAW,
"draw indexed indirect",
drawCount);
trace_intel_begin_draw_indexed_indirect(&cmd_buffer->trace);
struct anv_address indirect_data_addr =
anv_address_add(buffer->address, offset);
stride = MAX2(stride, sizeof(VkDrawIndexedIndirectCommand));
if (execute_indirect_draw_supported(cmd_buffer)) {
genX(cmd_buffer_emit_execute_indirect_draws)(
cmd_buffer,
indirect_data_addr,
stride,
ANV_NULL_ADDRESS /* count_addr */,
drawCount,
VK_CMD_DRAW_INDEXED_INDIRECT);
} else if (anv_use_generated_draws(cmd_buffer, drawCount)) {
genX(cmd_buffer_emit_indirect_generated_draws)(
cmd_buffer,
indirect_data_addr,
stride,
ANV_NULL_ADDRESS /* count_addr */,
drawCount,
true /* indexed */);
} else {
emit_indirect_draws(cmd_buffer,
indirect_data_addr,
stride, drawCount, true /* indexed */);
}
trace_intel_end_draw_indexed_indirect(&cmd_buffer->trace, drawCount,
gfx->vs_source_hash,
gfx->fs_source_hash);
}
void genX(CmdDrawIndexedIndirect2KHR)(
VkCommandBuffer commandBuffer,
const VkDrawIndirect2InfoKHR* pInfo)
@ -2479,66 +2283,6 @@ emit_indirect_count_draws(struct anv_cmd_buffer *cmd_buffer,
mi_value_unref(&b, max);
}
void genX(CmdDrawIndirectCount)(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
VkBuffer _countBuffer,
VkDeviceSize countBufferOffset,
uint32_t maxDrawCount,
uint32_t stride)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer);
const struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
if (anv_batch_has_error(&cmd_buffer->batch))
return;
anv_measure_snapshot(cmd_buffer,
INTEL_SNAPSHOT_DRAW,
"draw indirect count",
0);
trace_intel_begin_draw_indirect_count(&cmd_buffer->trace);
struct anv_address indirect_data_address =
anv_address_add(buffer->address, offset);
struct anv_address count_address =
anv_address_add(count_buffer->address, countBufferOffset);
stride = MAX2(stride, sizeof(VkDrawIndirectCommand));
if (execute_indirect_draw_supported(cmd_buffer)) {
genX(cmd_buffer_emit_execute_indirect_draws)(
cmd_buffer,
indirect_data_address,
stride,
count_address,
maxDrawCount,
VK_CMD_DRAW_INDIRECT_COUNT);
} else if (anv_use_generated_draws(cmd_buffer, maxDrawCount)) {
genX(cmd_buffer_emit_indirect_generated_draws)(
cmd_buffer,
indirect_data_address,
stride,
count_address,
maxDrawCount,
false /* indexed */);
} else {
emit_indirect_count_draws(cmd_buffer,
indirect_data_address,
stride,
count_address,
maxDrawCount,
false /* indexed */);
}
trace_intel_end_draw_indirect_count(&cmd_buffer->trace,
anv_address_utrace(count_address),
gfx->vs_source_hash,
gfx->fs_source_hash);
}
void genX(CmdDrawIndirectCount2KHR)(
VkCommandBuffer commandBuffer,
const VkDrawIndirectCount2InfoKHR* pInfo)
@ -2583,66 +2327,6 @@ void genX(CmdDrawIndirectCount2KHR)(
gfx->fs_source_hash);
}
void genX(CmdDrawIndexedIndirectCount)(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
VkBuffer _countBuffer,
VkDeviceSize countBufferOffset,
uint32_t maxDrawCount,
uint32_t stride)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer);
const struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
if (anv_batch_has_error(&cmd_buffer->batch))
return;
anv_measure_snapshot(cmd_buffer,
INTEL_SNAPSHOT_DRAW,
"draw indexed indirect count",
0);
trace_intel_begin_draw_indexed_indirect_count(&cmd_buffer->trace);
struct anv_address indirect_data_address =
anv_address_add(buffer->address, offset);
struct anv_address count_address =
anv_address_add(count_buffer->address, countBufferOffset);
stride = MAX2(stride, sizeof(VkDrawIndexedIndirectCommand));
if (execute_indirect_draw_supported(cmd_buffer)) {
genX(cmd_buffer_emit_execute_indirect_draws)(
cmd_buffer,
indirect_data_address,
stride,
count_address,
maxDrawCount,
VK_CMD_DRAW_INDEXED_INDIRECT_COUNT);
} else if (anv_use_generated_draws(cmd_buffer, maxDrawCount)) {
genX(cmd_buffer_emit_indirect_generated_draws)(
cmd_buffer,
indirect_data_address,
stride,
count_address,
maxDrawCount,
true /* indexed */);
} else {
emit_indirect_count_draws(cmd_buffer,
indirect_data_address,
stride,
count_address,
maxDrawCount,
true /* indexed */);
}
trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace,
anv_address_utrace(count_address),
gfx->vs_source_hash,
gfx->fs_source_hash);
}
void genX(CmdDrawIndexedIndirectCount2KHR)(
VkCommandBuffer commandBuffer,
const VkDrawIndirectCount2InfoKHR* pInfo)
@ -2688,62 +2372,6 @@ void genX(CmdDrawIndexedIndirectCount2KHR)(
gfx->fs_source_hash);
}
void genX(CmdBeginTransformFeedbackEXT)(
VkCommandBuffer commandBuffer,
uint32_t firstCounterBuffer,
uint32_t counterBufferCount,
const VkBuffer* pCounterBuffers,
const VkDeviceSize* pCounterBufferOffsets)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
assert(firstCounterBuffer < MAX_XFB_BUFFERS);
assert(counterBufferCount <= MAX_XFB_BUFFERS);
assert(firstCounterBuffer + counterBufferCount <= MAX_XFB_BUFFERS);
trace_intel_begin_xfb(&cmd_buffer->trace);
/* From the SKL PRM Vol. 2c, SO_WRITE_OFFSET:
*
* "Ssoftware must ensure that no HW stream output operations can be in
* process or otherwise pending at the point that the MI_LOAD/STORE
* commands are processed. This will likely require a pipeline flush."
*/
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_CS_STALL_BIT,
"begin transform feedback");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
for (uint32_t idx = 0; idx < MAX_XFB_BUFFERS; idx++) {
/* If we have a counter buffer, this is a resume so we need to load the
* value into the streamout offset register. Otherwise, this is a begin
* and we need to reset it to zero.
*/
if (pCounterBuffers &&
idx >= firstCounterBuffer &&
idx - firstCounterBuffer < counterBufferCount &&
pCounterBuffers[idx - firstCounterBuffer] != VK_NULL_HANDLE) {
uint32_t cb_idx = idx - firstCounterBuffer;
ANV_FROM_HANDLE(anv_buffer, counter_buffer, pCounterBuffers[cb_idx]);
uint64_t offset = pCounterBufferOffsets ?
pCounterBufferOffsets[cb_idx] : 0;
mi_store(&b, mi_reg32(GENX(SO_WRITE_OFFSET0_num) + idx * 4),
mi_mem32(anv_address_add(counter_buffer->address, offset)));
} else {
mi_store(&b, mi_reg32(GENX(SO_WRITE_OFFSET0_num) + idx * 4),
mi_imm(0));
}
}
cmd_buffer->state.xfb_enabled = true;
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE;
}
void genX(CmdBeginTransformFeedback2EXT)(
VkCommandBuffer commandBuffer,
uint32_t firstCounterRange,
@ -2797,60 +2425,6 @@ void genX(CmdBeginTransformFeedback2EXT)(
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE;
}
void genX(CmdEndTransformFeedbackEXT)(
VkCommandBuffer commandBuffer,
uint32_t firstCounterBuffer,
uint32_t counterBufferCount,
const VkBuffer* pCounterBuffers,
const VkDeviceSize* pCounterBufferOffsets)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
assert(firstCounterBuffer < MAX_XFB_BUFFERS);
assert(counterBufferCount <= MAX_XFB_BUFFERS);
assert(firstCounterBuffer + counterBufferCount <= MAX_XFB_BUFFERS);
/* From the SKL PRM Vol. 2c, SO_WRITE_OFFSET:
*
* "Ssoftware must ensure that no HW stream output operations can be in
* process or otherwise pending at the point that the MI_LOAD/STORE
* commands are processed. This will likely require a pipeline flush."
*/
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_CS_STALL_BIT,
"end transform feedback");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
for (uint32_t cb_idx = 0; cb_idx < counterBufferCount; cb_idx++) {
unsigned idx = firstCounterBuffer + cb_idx;
/* If we have a counter buffer, this is a resume so we need to load the
* value into the streamout offset register. Otherwise, this is a begin
* and we need to reset it to zero.
*/
if (pCounterBuffers &&
cb_idx < counterBufferCount &&
pCounterBuffers[cb_idx] != VK_NULL_HANDLE) {
ANV_FROM_HANDLE(anv_buffer, counter_buffer, pCounterBuffers[cb_idx]);
uint64_t offset = pCounterBufferOffsets ?
pCounterBufferOffsets[cb_idx] : 0;
anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) {
srm.MemoryAddress = anv_address_add(counter_buffer->address,
offset);
srm.RegisterAddress = GENX(SO_WRITE_OFFSET0_num) + idx * 4;
}
}
}
trace_intel_end_xfb(&cmd_buffer->trace);
cmd_buffer->state.xfb_enabled = false;
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE;
}
void genX(CmdEndTransformFeedback2EXT)(
VkCommandBuffer commandBuffer,
uint32_t firstCounterRange,
@ -2976,67 +2550,6 @@ emit_indirect_3dmesh_3d(struct anv_batch *batch,
dw[len - 1] = 0;
}
void
genX(CmdDrawMeshTasksIndirectEXT)(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
uint32_t drawCount,
uint32_t stride)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
const struct brw_task_prog_data *task_prog_data = get_gfx_task_prog_data(gfx);
const struct brw_mesh_prog_data *mesh_prog_data = get_gfx_mesh_prog_data(gfx);
struct anv_cmd_state *cmd_state = &cmd_buffer->state;
if (anv_batch_has_error(&cmd_buffer->batch))
return;
anv_measure_snapshot(cmd_buffer,
INTEL_SNAPSHOT_DRAW,
"draw mesh indirect", drawCount);
trace_intel_begin_draw_mesh_indirect(&cmd_buffer->trace);
if (execute_indirect_draw_supported(cmd_buffer)) {
genX(cmd_buffer_emit_execute_indirect_draws)(
cmd_buffer,
anv_address_add(buffer->address, offset),
MAX2(stride, sizeof(VkDrawMeshTasksIndirectCommandEXT)),
ANV_NULL_ADDRESS /* count_addr */,
drawCount,
VK_CMD_DRAW_MESH_TASKS_INDIRECT_EXT);
trace_intel_end_draw_mesh_indirect(&cmd_buffer->trace, drawCount);
return;
}
cmd_buffer_flush_gfx(cmd_buffer);
if (cmd_state->conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
bool uses_drawid = (task_prog_data && task_prog_data->uses_drawid) ||
mesh_prog_data->uses_drawid;
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
for (uint32_t i = 0; i < drawCount; i++) {
struct anv_address draw = anv_address_add(buffer->address, offset);
mesh_load_indirect_parameters_3dmesh_3d(cmd_buffer, &b, draw, uses_drawid, i);
emit_indirect_3dmesh_3d(&cmd_buffer->batch,
cmd_state->conditional_render_enabled, uses_drawid);
offset += stride;
}
trace_intel_end_draw_mesh_indirect(&cmd_buffer->trace, drawCount);
}
void genX(CmdDrawMeshTasksIndirect2EXT)(
VkCommandBuffer commandBuffer,
const VkDrawIndirect2InfoKHR* pInfo)
@ -3096,79 +2609,6 @@ void genX(CmdDrawMeshTasksIndirect2EXT)(
trace_intel_end_draw_mesh_indirect(&cmd_buffer->trace, pInfo->drawCount);
}
void
genX(CmdDrawMeshTasksIndirectCountEXT)(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
VkBuffer _countBuffer,
VkDeviceSize countBufferOffset,
uint32_t maxDrawCount,
uint32_t stride)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer);
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
const struct brw_task_prog_data *task_prog_data = get_gfx_task_prog_data(gfx);
const struct brw_mesh_prog_data *mesh_prog_data = get_gfx_mesh_prog_data(gfx);
if (anv_batch_has_error(&cmd_buffer->batch))
return;
anv_measure_snapshot(cmd_buffer,
INTEL_SNAPSHOT_DRAW,
"draw mesh indirect count", 0);
trace_intel_begin_draw_mesh_indirect_count(&cmd_buffer->trace);
struct anv_address count_addr =
anv_address_add(count_buffer->address, countBufferOffset);
if (execute_indirect_draw_supported(cmd_buffer)) {
genX(cmd_buffer_emit_execute_indirect_draws)(
cmd_buffer,
anv_address_add(buffer->address, offset),
MAX2(stride, sizeof(VkDrawMeshTasksIndirectCommandEXT)),
count_addr /* count_addr */,
maxDrawCount,
VK_CMD_DRAW_MESH_TASKS_INDIRECT_COUNT_EXT);
trace_intel_end_draw_mesh_indirect(&cmd_buffer->trace, maxDrawCount);
return;
}
cmd_buffer_flush_gfx(cmd_buffer);
bool uses_drawid = (task_prog_data && task_prog_data->uses_drawid) ||
mesh_prog_data->uses_drawid;
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
const uint32_t mocs = anv_mocs_for_address(cmd_buffer->device, &count_buffer->address);
mi_builder_set_mocs(&b, mocs);
struct mi_value max =
prepare_for_draw_count_predicate(
cmd_buffer, &b, count_addr);
for (uint32_t i = 0; i < maxDrawCount; i++) {
struct anv_address draw = anv_address_add(buffer->address, offset);
emit_draw_count_predicate_cond(cmd_buffer, &b, i, max);
mesh_load_indirect_parameters_3dmesh_3d(cmd_buffer, &b, draw, uses_drawid, i);
emit_indirect_3dmesh_3d(&cmd_buffer->batch, true, uses_drawid);
offset += stride;
}
trace_intel_end_draw_mesh_indirect_count(&cmd_buffer->trace,
anv_address_utrace(count_addr));
}
void genX(CmdDrawMeshTasksIndirectCount2EXT)(
VkCommandBuffer commandBuffer,
const VkDrawIndirectCount2InfoKHR* pInfo)