mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 09:38:07 +02:00
radv: re-introduce the compute vs CP DMA heuristic for copy/fill operations
This caused a -5% performance regression in Control because using compute always eats resources. This new approach introduces a flag called RADV_COPY_FLAGS_DEVICE_LOCAL which can be used to indicate if the underlying memory is device local. This should also help for future work. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12639 Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34556>
This commit is contained in:
parent
5e2508e7c4
commit
e616761fb2
6 changed files with 99 additions and 24 deletions
|
|
@ -65,6 +65,10 @@ struct radv_meta_saved_state {
|
|||
unsigned active_occlusion_queries;
|
||||
};
|
||||
|
||||
enum radv_copy_flags {
|
||||
RADV_COPY_FLAGS_DEVICE_LOCAL = 1 << 0,
|
||||
};
|
||||
|
||||
enum radv_blit_ds_layout {
|
||||
RADV_BLIT_DS_LAYOUT_TILE_ENABLE,
|
||||
RADV_BLIT_DS_LAYOUT_TILE_DISABLE,
|
||||
|
|
@ -182,6 +186,7 @@ struct radv_meta_blit2d_buffer {
|
|||
uint32_t pitch;
|
||||
uint8_t bs;
|
||||
VkFormat format;
|
||||
enum radv_copy_flags copy_flags;
|
||||
};
|
||||
|
||||
struct radv_meta_blit2d_rect {
|
||||
|
|
@ -263,12 +268,14 @@ void radv_meta_decode_astc(struct radv_cmd_buffer *cmd_buffer, struct radv_image
|
|||
uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, uint64_t va, uint64_t size,
|
||||
uint32_t value);
|
||||
|
||||
uint32_t radv_fill_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, uint32_t value);
|
||||
uint32_t radv_fill_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, uint32_t value,
|
||||
enum radv_copy_flags copy_flags);
|
||||
|
||||
uint32_t radv_fill_image(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, uint64_t offset,
|
||||
uint64_t size, uint32_t value);
|
||||
|
||||
void radv_copy_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dst_va, uint64_t size);
|
||||
void radv_copy_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dst_va, uint64_t size,
|
||||
enum radv_copy_flags src_copy_flags, enum radv_copy_flags dst_copy_flags);
|
||||
|
||||
void radv_cmd_buffer_clear_attachment(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *attachment);
|
||||
|
||||
|
|
|
|||
|
|
@ -190,11 +190,29 @@ radv_compute_copy_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, ui
|
|||
radv_meta_restore(&saved_state, cmd_buffer);
|
||||
}
|
||||
|
||||
static bool
|
||||
radv_prefer_compute_or_cp_dma(const struct radv_device *device, uint64_t size, enum radv_copy_flags src_copy_flags,
|
||||
enum radv_copy_flags dst_copy_flags)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
bool use_compute = size >= RADV_BUFFER_OPS_CS_THRESHOLD;
|
||||
|
||||
if (pdev->info.gfx_level >= GFX10 && pdev->info.has_dedicated_vram) {
|
||||
if (!(src_copy_flags & RADV_COPY_FLAGS_DEVICE_LOCAL) || !(dst_copy_flags & RADV_COPY_FLAGS_DEVICE_LOCAL)) {
|
||||
/* Prefer CP DMA for GTT on dGPUS due to slow PCIe. */
|
||||
use_compute = false;
|
||||
}
|
||||
}
|
||||
|
||||
return use_compute;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
radv_fill_memory_internal(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, uint64_t va,
|
||||
uint64_t size, uint32_t value)
|
||||
uint64_t size, uint32_t value, enum radv_copy_flags copy_flags)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
bool use_compute = radv_prefer_compute_or_cp_dma(device, size, copy_flags, copy_flags);
|
||||
uint32_t flush_bits = 0;
|
||||
|
||||
assert(!(va & 3));
|
||||
|
|
@ -202,7 +220,7 @@ radv_fill_memory_internal(struct radv_cmd_buffer *cmd_buffer, const struct radv_
|
|||
|
||||
if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
|
||||
radv_sdma_fill_memory(device, cmd_buffer->cs, va, size, value);
|
||||
} else if (size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
|
||||
} else if (use_compute) {
|
||||
radv_compute_fill_memory(cmd_buffer, va, size, value);
|
||||
|
||||
flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
|
||||
|
|
@ -215,9 +233,10 @@ radv_fill_memory_internal(struct radv_cmd_buffer *cmd_buffer, const struct radv_
|
|||
}
|
||||
|
||||
uint32_t
|
||||
radv_fill_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, uint32_t value)
|
||||
radv_fill_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, uint32_t value,
|
||||
enum radv_copy_flags copy_flags)
|
||||
{
|
||||
return radv_fill_memory_internal(cmd_buffer, NULL, va, size, value);
|
||||
return radv_fill_memory_internal(cmd_buffer, NULL, va, size, value, copy_flags);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
|
|
@ -227,10 +246,14 @@ radv_fill_image(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *ima
|
|||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const uint64_t va = image->bindings[0].addr + offset;
|
||||
struct radeon_winsys_bo *bo = image->bindings[0].bo;
|
||||
enum radv_copy_flags copy_flags = 0;
|
||||
|
||||
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
|
||||
copy_flags |= RADV_COPY_FLAGS_DEVICE_LOCAL;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
|
||||
|
||||
return radv_fill_memory_internal(cmd_buffer, image, va, size, value);
|
||||
return radv_fill_memory_internal(cmd_buffer, image, va, size, value, copy_flags);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
|
|
@ -238,10 +261,14 @@ radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo
|
|||
uint32_t value)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
enum radv_copy_flags copy_flags = 0;
|
||||
|
||||
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
|
||||
copy_flags |= RADV_COPY_FLAGS_DEVICE_LOCAL;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
|
||||
|
||||
return radv_fill_memory(cmd_buffer, va, size, value);
|
||||
return radv_fill_memory(cmd_buffer, va, size, value, copy_flags);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
|
|
@ -261,10 +288,12 @@ radv_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSi
|
|||
}
|
||||
|
||||
void
|
||||
radv_copy_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dst_va, uint64_t size)
|
||||
radv_copy_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dst_va, uint64_t size,
|
||||
enum radv_copy_flags src_copy_flags, enum radv_copy_flags dst_copy_flags)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const bool use_compute = !(size & 3) && !(src_va & 3) && !(dst_va & 3) && size >= RADV_BUFFER_OPS_CS_THRESHOLD;
|
||||
const bool use_compute = !(size & 3) && !(src_va & 3) && !(dst_va & 3) &&
|
||||
radv_prefer_compute_or_cp_dma(device, size, src_copy_flags, dst_copy_flags);
|
||||
|
||||
if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
|
||||
radv_sdma_copy_memory(device, cmd_buffer->cs, src_va, dst_va, size);
|
||||
|
|
@ -282,6 +311,12 @@ radv_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCop
|
|||
VK_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
|
||||
VK_FROM_HANDLE(radv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
enum radv_copy_flags src_copy_flags = 0, dst_copy_flags = 0;
|
||||
|
||||
if (src_buffer->bo->initial_domain & RADEON_DOMAIN_VRAM)
|
||||
src_copy_flags |= RADV_COPY_FLAGS_DEVICE_LOCAL;
|
||||
if (dst_buffer->bo->initial_domain & RADEON_DOMAIN_VRAM)
|
||||
dst_copy_flags |= RADV_COPY_FLAGS_DEVICE_LOCAL;
|
||||
|
||||
radv_suspend_conditional_rendering(cmd_buffer);
|
||||
|
||||
|
|
@ -293,7 +328,7 @@ radv_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCop
|
|||
const uint64_t src_va = vk_buffer_address(&src_buffer->vk, region->srcOffset);
|
||||
const uint64_t dst_va = vk_buffer_address(&dst_buffer->vk, region->dstOffset);
|
||||
|
||||
radv_copy_memory(cmd_buffer, src_va, dst_va, region->size);
|
||||
radv_copy_memory(cmd_buffer, src_va, dst_va, region->size, src_copy_flags, dst_copy_flags);
|
||||
}
|
||||
|
||||
radv_resume_conditional_rendering(cmd_buffer);
|
||||
|
|
@ -324,7 +359,8 @@ radv_update_buffer_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const voi
|
|||
}
|
||||
|
||||
static void
|
||||
radv_update_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, const void *data)
|
||||
radv_update_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, const void *data,
|
||||
enum radv_copy_flags dst_copy_flags)
|
||||
{
|
||||
assert(!(size & 3));
|
||||
assert(!(va & 3));
|
||||
|
|
@ -335,13 +371,17 @@ radv_update_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t siz
|
|||
if (size < RADV_BUFFER_UPDATE_THRESHOLD && cmd_buffer->qf != RADV_QUEUE_TRANSFER) {
|
||||
radv_update_buffer_cp(cmd_buffer, va, data, size);
|
||||
} else {
|
||||
enum radv_copy_flags src_copy_flags = 0;
|
||||
uint32_t buf_offset;
|
||||
|
||||
radv_cmd_buffer_upload_data(cmd_buffer, size, data, &buf_offset);
|
||||
|
||||
if (cmd_buffer->upload.upload_bo->initial_domain & RADEON_DOMAIN_VRAM)
|
||||
src_copy_flags |= RADV_COPY_FLAGS_DEVICE_LOCAL;
|
||||
|
||||
const uint64_t src_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + buf_offset;
|
||||
|
||||
radv_copy_memory(cmd_buffer, src_va, va, size);
|
||||
radv_copy_memory(cmd_buffer, src_va, va, size, src_copy_flags, dst_copy_flags);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -353,12 +393,16 @@ radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDevice
|
|||
VK_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const uint64_t dst_va = vk_buffer_address(&dst_buffer->vk, dstOffset);
|
||||
enum radv_copy_flags dst_copy_flags = 0;
|
||||
|
||||
if (dst_buffer->bo->initial_domain & RADEON_DOMAIN_VRAM)
|
||||
dst_copy_flags |= RADV_COPY_FLAGS_DEVICE_LOCAL;
|
||||
|
||||
radv_suspend_conditional_rendering(cmd_buffer);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_buffer->bo);
|
||||
|
||||
radv_update_memory(cmd_buffer, dst_va, dataSize, pData);
|
||||
radv_update_memory(cmd_buffer, dst_va, dataSize, pData, dst_copy_flags);
|
||||
|
||||
radv_resume_conditional_rendering(cmd_buffer);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -622,6 +622,7 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_meta_bl
|
|||
const struct radeon_surf *surf = &image->planes[0].surface;
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image);
|
||||
enum radv_copy_flags img_copy_flags = 0, mem_copy_flags = 0;
|
||||
|
||||
/* GFX10 will use a different workaround unless this is not a 2D image */
|
||||
if (gpu_info->gfx_level < GFX9 || (gpu_info->gfx_level >= GFX10 && image->vk.image_type == VK_IMAGE_TYPE_2D) ||
|
||||
|
|
@ -654,6 +655,10 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_meta_bl
|
|||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE;
|
||||
}
|
||||
|
||||
if (image->bindings[0].bo && (image->bindings[0].bo->initial_domain & RADEON_DOMAIN_VRAM))
|
||||
img_copy_flags |= RADV_COPY_FLAGS_DEVICE_LOCAL;
|
||||
mem_copy_flags |= buf_bsurf->copy_flags;
|
||||
|
||||
for (uint32_t y = 0; y < mip_extent.height; y++) {
|
||||
uint32_t coordY = y + mip_offset.y;
|
||||
/* If the default copy algorithm (done previously) has already seen this
|
||||
|
|
@ -670,9 +675,9 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_meta_bl
|
|||
/* buf_bsurf->offset already includes the layer offset */
|
||||
const uint64_t mem_va = buf_bsurf->addr + buf_bsurf->offset + y * buf_bsurf->pitch * surf->bpe + x * surf->bpe;
|
||||
if (to_image) {
|
||||
radv_copy_memory(cmd_buffer, mem_va, img_va, surf->bpe);
|
||||
radv_copy_memory(cmd_buffer, mem_va, img_va, surf->bpe, mem_copy_flags, img_copy_flags);
|
||||
} else {
|
||||
radv_copy_memory(cmd_buffer, img_va, mem_va, surf->bpe);
|
||||
radv_copy_memory(cmd_buffer, img_va, mem_va, surf->bpe, img_copy_flags, mem_copy_flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -98,7 +98,8 @@ transfer_copy_memory_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_v
|
|||
|
||||
static void
|
||||
copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_addr, uint64_t buffer_size,
|
||||
struct radv_image *image, VkImageLayout layout, const VkBufferImageCopy2 *region)
|
||||
enum radv_copy_flags src_copy_flags, struct radv_image *image, VkImageLayout layout,
|
||||
const VkBufferImageCopy2 *region)
|
||||
{
|
||||
if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
|
||||
transfer_copy_memory_image(cmd_buffer, buffer_addr, image, region, true);
|
||||
|
|
@ -173,6 +174,7 @@ copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_addr, u
|
|||
.format = img_bsurf.format,
|
||||
.offset = region->bufferOffset,
|
||||
.pitch = buf_layout.row_stride_B / buf_layout.element_size_B,
|
||||
.copy_flags = src_copy_flags,
|
||||
};
|
||||
|
||||
if (image->vk.image_type == VK_IMAGE_TYPE_3D)
|
||||
|
|
@ -218,6 +220,10 @@ radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToIm
|
|||
VK_FROM_HANDLE(radv_image, dst_image, pCopyBufferToImageInfo->dstImage);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
enum radv_copy_flags src_copy_flags = 0;
|
||||
|
||||
if (src_buffer->bo->initial_domain & RADEON_DOMAIN_VRAM)
|
||||
src_copy_flags |= RADV_COPY_FLAGS_DEVICE_LOCAL;
|
||||
|
||||
radv_suspend_conditional_rendering(cmd_buffer);
|
||||
|
||||
|
|
@ -230,7 +236,7 @@ radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToIm
|
|||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_image->bindings[bind_idx].bo);
|
||||
|
||||
copy_memory_to_image(cmd_buffer, src_buffer->vk.device_address, src_buffer->vk.size, dst_image,
|
||||
copy_memory_to_image(cmd_buffer, src_buffer->vk.device_address, src_buffer->vk.size, src_copy_flags, dst_image,
|
||||
pCopyBufferToImageInfo->dstImageLayout, region);
|
||||
}
|
||||
|
||||
|
|
@ -262,7 +268,8 @@ radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToIm
|
|||
|
||||
static void
|
||||
copy_image_to_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_addr, uint64_t buffer_size,
|
||||
struct radv_image *image, VkImageLayout layout, const VkBufferImageCopy2 *region)
|
||||
enum radv_copy_flags dst_copy_flags, struct radv_image *image, VkImageLayout layout,
|
||||
const VkBufferImageCopy2 *region)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
|
||||
|
|
@ -332,6 +339,7 @@ copy_image_to_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_addr, u
|
|||
.format = img_info.format,
|
||||
.offset = region->bufferOffset,
|
||||
.pitch = buf_extent_el.width,
|
||||
.copy_flags = dst_copy_flags,
|
||||
};
|
||||
|
||||
if (image->vk.image_type == VK_IMAGE_TYPE_3D)
|
||||
|
|
@ -367,6 +375,10 @@ radv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBuf
|
|||
VK_FROM_HANDLE(radv_image, src_image, pCopyImageToBufferInfo->srcImage);
|
||||
VK_FROM_HANDLE(radv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
enum radv_copy_flags dst_copy_flags = 0;
|
||||
|
||||
if (dst_buffer->bo->initial_domain & RADEON_DOMAIN_VRAM)
|
||||
dst_copy_flags |= RADV_COPY_FLAGS_DEVICE_LOCAL;
|
||||
|
||||
radv_suspend_conditional_rendering(cmd_buffer);
|
||||
|
||||
|
|
@ -379,7 +391,7 @@ radv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBuf
|
|||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, src_image->bindings[bind_idx].bo);
|
||||
|
||||
copy_image_to_memory(cmd_buffer, dst_buffer->vk.device_address, dst_buffer->vk.size, src_image,
|
||||
copy_image_to_memory(cmd_buffer, dst_buffer->vk.device_address, dst_buffer->vk.size, dst_copy_flags, src_image,
|
||||
pCopyImageToBufferInfo->srcImageLayout, region);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -93,6 +93,7 @@ static void
|
|||
radv_fixup_copy_dst_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image,
|
||||
const struct radv_image *dst_image)
|
||||
{
|
||||
enum radv_copy_flags src_copy_flags = 0, dst_copy_flags = 0;
|
||||
uint64_t src_va, dst_va, size;
|
||||
|
||||
assert(src_image->planes[0].surface.cmask_size == dst_image->planes[0].surface.cmask_size &&
|
||||
|
|
@ -102,12 +103,17 @@ radv_fixup_copy_dst_metadata(struct radv_cmd_buffer *cmd_buffer, const struct ra
|
|||
dst_image->planes[0].surface.fmask_offset + dst_image->planes[0].surface.fmask_size ==
|
||||
dst_image->planes[0].surface.cmask_offset);
|
||||
|
||||
if (src_image->bindings[0].bo && (src_image->bindings[0].bo->initial_domain & RADEON_DOMAIN_VRAM))
|
||||
src_copy_flags |= RADV_COPY_FLAGS_DEVICE_LOCAL;
|
||||
if (dst_image->bindings[0].bo && (dst_image->bindings[0].bo->initial_domain & RADEON_DOMAIN_VRAM))
|
||||
dst_copy_flags |= RADV_COPY_FLAGS_DEVICE_LOCAL;
|
||||
|
||||
/* Copy CMASK+FMASK. */
|
||||
size = src_image->planes[0].surface.cmask_size + src_image->planes[0].surface.fmask_size;
|
||||
src_va = src_image->bindings[0].addr + src_image->planes[0].surface.fmask_offset;
|
||||
dst_va = dst_image->bindings[0].addr + dst_image->planes[0].surface.fmask_offset;
|
||||
|
||||
radv_copy_memory(cmd_buffer, src_va, dst_va, size);
|
||||
radv_copy_memory(cmd_buffer, src_va, dst_va, size, src_copy_flags, dst_copy_flags);
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
|
|||
|
|
@ -581,7 +581,7 @@ radv_init_update_scratch(VkCommandBuffer commandBuffer, VkDeviceAddress scratch,
|
|||
|
||||
/* Prepare ready counts for internal nodes */
|
||||
radv_fill_memory(cmd_buffer, scratch + layout.internal_ready_count_offset,
|
||||
layout.update_size - layout.internal_ready_count_offset, 0x0);
|
||||
layout.update_size - layout.internal_ready_count_offset, 0x0, RADV_COPY_FLAGS_DEVICE_LOCAL);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -627,7 +627,8 @@ radv_update_as(VkCommandBuffer commandBuffer, const VkAccelerationStructureBuild
|
|||
const uint64_t src_va = vk_acceleration_structure_get_va(src);
|
||||
const uint64_t dst_va = vk_acceleration_structure_get_va(dst);
|
||||
|
||||
radv_copy_memory(cmd_buffer, src_va, dst_va, layout.bvh_offset);
|
||||
radv_copy_memory(cmd_buffer, src_va, dst_va, layout.bvh_offset, RADV_COPY_FLAGS_DEVICE_LOCAL,
|
||||
RADV_COPY_FLAGS_DEVICE_LOCAL);
|
||||
}
|
||||
|
||||
struct scratch_layout layout;
|
||||
|
|
@ -721,7 +722,7 @@ static void
|
|||
radv_cmd_fill_buffer_addr(VkCommandBuffer commandBuffer, VkDeviceAddress addr, VkDeviceSize size, uint32_t data)
|
||||
{
|
||||
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
radv_fill_memory(cmd_buffer, addr, size, data);
|
||||
radv_fill_memory(cmd_buffer, addr, size, data, RADV_COPY_FLAGS_DEVICE_LOCAL);
|
||||
}
|
||||
|
||||
VkResult
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue