diff --git a/src/nouveau/vulkan/nvk_cmd_blit.c b/src/nouveau/vulkan/nvk_cmd_blit.c index 5da50b6309e..ff75488afca 100644 --- a/src/nouveau/vulkan/nvk_cmd_blit.c +++ b/src/nouveau/vulkan/nvk_cmd_blit.c @@ -72,18 +72,6 @@ nvk_CmdBlitImage2(VkCommandBuffer commandBuffer, struct nvk_image_level *src_level = &src->level[region->srcSubresource.mipLevel]; struct nvk_image_level *dst_level = &dst->level[region->dstSubresource.mipLevel]; - VkDeviceSize src_addr = nvk_image_base_address(src, region->srcSubresource.mipLevel); - VkDeviceSize dst_addr = nvk_image_base_address(dst, region->dstSubresource.mipLevel); - - /* we can't select the src layer, so we need to offset manually - * Also, this is completely safe as we don't tile over array layers contrary to the depth - * of a 3d image. - */ - src_addr += region->srcSubresource.baseArrayLayer * src_level->layer_stride; - - uint32_t src_depth = src_level->extent.depth * src->vk.array_layers; - uint32_t dst_depth = dst_level->extent.depth * dst->vk.array_layers; - unsigned x_i = region->dstOffsets[0].x < region->dstOffsets[1].x ? 0 : 1; unsigned y_i = region->dstOffsets[0].y < region->dstOffsets[1].y ? 0 : 1; @@ -124,7 +112,7 @@ nvk_CmdBlitImage2(VkCommandBuffer commandBuffer, } P_MTHD(push, NV902D, SET_SRC_DEPTH); - P_NV902D_SET_SRC_DEPTH(push, src_depth); + P_NV902D_SET_SRC_DEPTH(push, src_level->extent.depth); P_MTHD(push, NV902D, SET_SRC_PITCH); P_NV902D_SET_SRC_PITCH(push, src_level->row_stride); @@ -143,14 +131,12 @@ nvk_CmdBlitImage2(VkCommandBuffer commandBuffer, } P_MTHD(push, NV902D, SET_DST_DEPTH); - P_NV902D_SET_DST_DEPTH(push, dst_depth); + P_NV902D_SET_DST_DEPTH(push, dst_level->extent.depth); P_MTHD(push, NV902D, SET_DST_PITCH); P_NV902D_SET_DST_PITCH(push, dst_level->row_stride); P_NV902D_SET_DST_WIDTH(push, dst_level->extent.width); P_NV902D_SET_DST_HEIGHT(push, dst_level->extent.height); - P_NV902D_SET_DST_OFFSET_UPPER(push, dst_addr >> 32); - P_NV902D_SET_DST_OFFSET_LOWER(push, dst_addr & 0xffffffff); P_MTHD(push, NV902D, SET_PIXELS_FROM_MEMORY_DST_X0); P_NV902D_SET_PIXELS_FROM_MEMORY_DST_X0(push, dst_start_x); @@ -165,21 +151,30 @@ nvk_CmdBlitImage2(VkCommandBuffer commandBuffer, P_NV902D_SET_PIXELS_FROM_MEMORY_SRC_X0_INT(push, src_start_x_fp >> 32); P_NV902D_SET_PIXELS_FROM_MEMORY_SRC_Y0_FRAC(push, src_start_y_fp & 0xffffffff); - /* we can select the dst but not the src layer... */ - for (unsigned z = 0; z < region->srcSubresource.layerCount; z++) { + assert(src->vk.image_type != VK_IMAGE_TYPE_3D); + assert(dst->vk.image_type != VK_IMAGE_TYPE_3D); + for (unsigned w = 0; w < region->srcSubresource.layerCount; w++) { + VkDeviceSize src_addr = nvk_image_base_address(src, region->srcSubresource.mipLevel); + VkDeviceSize dst_addr = nvk_image_base_address(dst, region->dstSubresource.mipLevel); + + src_addr += (w + region->srcSubresource.baseArrayLayer) * + src_level->layer_stride; + dst_addr += (w + region->dstSubresource.baseArrayLayer) * + dst_level->layer_stride; + P_MTHD(push, NV902D, SET_SRC_OFFSET_UPPER); P_NV902D_SET_SRC_OFFSET_UPPER(push, src_addr >> 32); P_NV902D_SET_SRC_OFFSET_LOWER(push, src_addr & 0xffffffff); + P_MTHD(push, NV902D, SET_DST_OFFSET_UPPER); + P_NV902D_SET_DST_OFFSET_UPPER(push, dst_addr >> 32); + P_NV902D_SET_DST_OFFSET_LOWER(push, dst_addr & 0xffffffff); + P_MTHD(push, NV902D, SET_DST_LAYER); - P_NV902D_SET_DST_LAYER(push, z + region->dstSubresource.baseArrayLayer); + P_NV902D_SET_DST_LAYER(push, 0); P_MTHD(push, NV902D, PIXELS_FROM_MEMORY_SRC_Y0_INT); P_NV902D_PIXELS_FROM_MEMORY_SRC_Y0_INT(push, src_start_y_fp >> 32); - - /* this works only if there is no tiling on z */ - assert(!src_level->tile.z); - src_addr += src_level->layer_stride; } } } diff --git a/src/nouveau/vulkan/nvk_cmd_copy.c b/src/nouveau/vulkan/nvk_cmd_copy.c index c98035fc828..d89868b776e 100644 --- a/src/nouveau/vulkan/nvk_cmd_copy.c +++ b/src/nouveau/vulkan/nvk_cmd_copy.c @@ -50,6 +50,7 @@ nouveau_copy_linear(struct nouveau_ws_push *push, struct nouveau_copy_buffer { uint64_t base_addr; VkOffset3D offset; + uint32_t base_array_layer; VkExtent3D extent; uint32_t row_stride; uint32_t layer_stride; @@ -61,6 +62,7 @@ struct nouveau_copy { struct nouveau_copy_buffer dst; uint32_t bpp; VkExtent3D extent; + uint32_t layer_count; }; static struct nouveau_copy_buffer @@ -88,122 +90,121 @@ nouveau_copy_rect_image( .base_addr = nvk_image_base_address(img, sub_res->mipLevel), .offset = vk_image_sanitize_offset(&img->vk, offset), .extent = level->extent, + .base_array_layer = sub_res->baseArrayLayer, .row_stride = level->row_stride, .layer_stride = level->layer_stride, .tile = level->tile, }; - buf.extent.depth *= img->vk.array_layers; - buf.offset.z += sub_res->baseArrayLayer; - return buf; } static void nouveau_copy_rect(struct nvk_cmd_buffer *cmd, struct nouveau_copy *copy) { - VkDeviceSize src_addr = copy->src.base_addr; - VkDeviceSize dst_addr = copy->dst.base_addr; - - if (!copy->src.tile.is_tiled) { - src_addr += - copy->src.offset.x * copy->bpp + - copy->src.offset.y * copy->src.row_stride + - copy->src.offset.z * copy->src.layer_stride; - } - - if (!copy->dst.tile.is_tiled) { - dst_addr += - copy->dst.offset.x * copy->bpp + - copy->dst.offset.y * copy->dst.row_stride + - copy->dst.offset.z * copy->dst.layer_stride; - } - struct nouveau_ws_push *push = cmd->push; - for (unsigned z = 0; z < copy->extent.depth; z++) { - P_MTHD(push, NV90B5, OFFSET_IN_UPPER); - P_NV90B5_OFFSET_IN_UPPER(push, src_addr >> 32); - P_NV90B5_OFFSET_IN_LOWER(push, src_addr & 0xffffffff); - P_NV90B5_OFFSET_OUT_UPPER(push, dst_addr >> 32); - P_NV90B5_OFFSET_OUT_LOWER(push, dst_addr & 0xfffffff); - P_NV90B5_PITCH_IN(push, copy->src.row_stride); - P_NV90B5_PITCH_OUT(push, copy->dst.row_stride); - P_NV90B5_LINE_LENGTH_IN(push, copy->extent.width * copy->bpp); - P_NV90B5_LINE_COUNT(push, copy->extent.height); + for (unsigned w = 0; w < copy->layer_count; w++) { + VkDeviceSize src_addr = copy->src.base_addr; + VkDeviceSize dst_addr = copy->dst.base_addr; - uint32_t src_layout = 0, dst_layout = 0; - if (copy->src.tile.is_tiled) { - assert(copy->src.tile.is_fermi); - P_MTHD(push, NV90B5, SET_SRC_BLOCK_SIZE); - P_NV90B5_SET_SRC_BLOCK_SIZE(push, { - .width = copy->src.tile.x, - .height = copy->src.tile.y, - .depth = copy->src.tile.z, - .gob_height = GOB_HEIGHT_GOB_HEIGHT_FERMI_8, - }); - P_NV90B5_SET_SRC_WIDTH(push, copy->src.extent.width * copy->bpp); - P_NV90B5_SET_SRC_HEIGHT(push, copy->src.extent.height); - P_NV90B5_SET_SRC_DEPTH(push, copy->src.extent.depth); - P_NV90B5_SET_SRC_LAYER(push, z + copy->src.offset.z); + src_addr += (w + copy->src.base_array_layer) * copy->src.layer_stride; + dst_addr += (w + copy->dst.base_array_layer) * copy->dst.layer_stride; - if (cmd->pool->dev->pdev->dev->cls >= 0xc1) { - P_MTHD(push, NVC1B5, SRC_ORIGIN_X); - P_NVC1B5_SRC_ORIGIN_X(push, copy->src.offset.x * copy->bpp); - P_NVC1B5_SRC_ORIGIN_Y(push, copy->src.offset.y); - } else { - P_MTHD(push, NV90B5, SET_SRC_ORIGIN); - P_NV90B5_SET_SRC_ORIGIN(push, { - .x = copy->src.offset.x * copy->bpp, - .y = copy->src.offset.y - }); - } - - src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR; - } else { - src_addr += copy->src.layer_stride; - src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH; + if (!copy->src.tile.is_tiled) { + src_addr += copy->src.offset.x * copy->bpp + + copy->src.offset.y * copy->src.row_stride; } - if (copy->dst.tile.is_tiled) { - assert(copy->dst.tile.is_fermi); - P_MTHD(push, NV90B5, SET_DST_BLOCK_SIZE); - P_NV90B5_SET_DST_BLOCK_SIZE(push, { - .width = copy->dst.tile.x, - .height = copy->dst.tile.y, - .depth = copy->dst.tile.z, - .gob_height = GOB_HEIGHT_GOB_HEIGHT_FERMI_8, - }); - P_NV90B5_SET_DST_WIDTH(push, copy->dst.extent.width * copy->bpp); - P_NV90B5_SET_DST_HEIGHT(push, copy->dst.extent.height); - P_NV90B5_SET_DST_DEPTH(push, copy->dst.extent.depth); - P_NV90B5_SET_DST_LAYER(push, z + copy->dst.offset.z); - - if (cmd->pool->dev->pdev->dev->cls >= 0xc1) { - P_MTHD(push, NVC1B5, DST_ORIGIN_X); - P_NVC1B5_DST_ORIGIN_X(push, copy->dst.offset.x * copy->bpp); - P_NVC1B5_DST_ORIGIN_Y(push, copy->dst.offset.y); - } else { - P_MTHD(push, NV90B5, SET_DST_ORIGIN); - P_NV90B5_SET_DST_ORIGIN(push, { - .x = copy->dst.offset.x * copy->bpp, - .y = copy->dst.offset.y - }); - } - - dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR; - } else { - dst_addr += copy->dst.layer_stride; - dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH; + if (!copy->dst.tile.is_tiled) { + dst_addr += copy->dst.offset.x * copy->bpp + + copy->dst.offset.y * copy->dst.row_stride; } - P_IMMD(push, NV90B5, LAUNCH_DMA, { - .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED, - .multi_line_enable = MULTI_LINE_ENABLE_TRUE, - .flush_enable = FLUSH_ENABLE_TRUE, - .src_memory_layout = src_layout, - .dst_memory_layout = dst_layout - }); + for (unsigned z = 0; z < copy->extent.depth; z++) { + P_MTHD(push, NV90B5, OFFSET_IN_UPPER); + P_NV90B5_OFFSET_IN_UPPER(push, src_addr >> 32); + P_NV90B5_OFFSET_IN_LOWER(push, src_addr & 0xffffffff); + P_NV90B5_OFFSET_OUT_UPPER(push, dst_addr >> 32); + P_NV90B5_OFFSET_OUT_LOWER(push, dst_addr & 0xfffffff); + P_NV90B5_PITCH_IN(push, copy->src.row_stride); + P_NV90B5_PITCH_OUT(push, copy->dst.row_stride); + P_NV90B5_LINE_LENGTH_IN(push, copy->extent.width * copy->bpp); + P_NV90B5_LINE_COUNT(push, copy->extent.height); + + uint32_t src_layout = 0, dst_layout = 0; + if (copy->src.tile.is_tiled) { + assert(copy->src.tile.is_fermi); + P_MTHD(push, NV90B5, SET_SRC_BLOCK_SIZE); + P_NV90B5_SET_SRC_BLOCK_SIZE(push, { + .width = copy->src.tile.x, + .height = copy->src.tile.y, + .depth = copy->src.tile.z, + .gob_height = GOB_HEIGHT_GOB_HEIGHT_FERMI_8, + }); + P_NV90B5_SET_SRC_WIDTH(push, copy->src.extent.width * copy->bpp); + P_NV90B5_SET_SRC_HEIGHT(push, copy->src.extent.height); + P_NV90B5_SET_SRC_DEPTH(push, copy->src.extent.depth); + P_NV90B5_SET_SRC_LAYER(push, z + copy->src.offset.z); + + if (cmd->pool->dev->pdev->dev->cls >= 0xc1) { + P_MTHD(push, NVC1B5, SRC_ORIGIN_X); + P_NVC1B5_SRC_ORIGIN_X(push, copy->src.offset.x * copy->bpp); + P_NVC1B5_SRC_ORIGIN_Y(push, copy->src.offset.y); + } else { + P_MTHD(push, NV90B5, SET_SRC_ORIGIN); + P_NV90B5_SET_SRC_ORIGIN(push, { + .x = copy->src.offset.x * copy->bpp, + .y = copy->src.offset.y + }); + } + + src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR; + } else { + src_addr += copy->src.layer_stride; + src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH; + } + + if (copy->dst.tile.is_tiled) { + assert(copy->dst.tile.is_fermi); + P_MTHD(push, NV90B5, SET_DST_BLOCK_SIZE); + P_NV90B5_SET_DST_BLOCK_SIZE(push, { + .width = copy->dst.tile.x, + .height = copy->dst.tile.y, + .depth = copy->dst.tile.z, + .gob_height = GOB_HEIGHT_GOB_HEIGHT_FERMI_8, + }); + P_NV90B5_SET_DST_WIDTH(push, copy->dst.extent.width * copy->bpp); + P_NV90B5_SET_DST_HEIGHT(push, copy->dst.extent.height); + P_NV90B5_SET_DST_DEPTH(push, copy->dst.extent.depth); + P_NV90B5_SET_DST_LAYER(push, z + copy->dst.offset.z); + + if (cmd->pool->dev->pdev->dev->cls >= 0xc1) { + P_MTHD(push, NVC1B5, DST_ORIGIN_X); + P_NVC1B5_DST_ORIGIN_X(push, copy->dst.offset.x * copy->bpp); + P_NVC1B5_DST_ORIGIN_Y(push, copy->dst.offset.y); + } else { + P_MTHD(push, NV90B5, SET_DST_ORIGIN); + P_NV90B5_SET_DST_ORIGIN(push, { + .x = copy->dst.offset.x * copy->bpp, + .y = copy->dst.offset.y + }); + } + + dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR; + } else { + dst_addr += copy->dst.layer_stride; + dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH; + } + + P_IMMD(push, NV90B5, LAUNCH_DMA, { + .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED, + .multi_line_enable = MULTI_LINE_ENABLE_TRUE, + .flush_enable = FLUSH_ENABLE_TRUE, + .src_memory_layout = src_layout, + .dst_memory_layout = dst_layout + }); + } } } @@ -248,8 +249,8 @@ nvk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, .dst = nouveau_copy_rect_image(dst, region->imageOffset, ®ion->imageSubresource), .bpp = buffer_layout.element_size_B, .extent = vk_image_sanitize_extent(&dst->vk, region->imageExtent), + .layer_count = region->imageSubresource.layerCount, }; - copy.extent.depth *= region->imageSubresource.layerCount; nouveau_copy_rect(cmd, ©); @@ -291,8 +292,8 @@ nvk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, .dst = nouveau_copy_rect_buffer(dst, region->bufferOffset, buffer_layout), .bpp = buffer_layout.element_size_B, .extent = vk_image_sanitize_extent(&src->vk, region->imageExtent), + .layer_count = region->imageSubresource.layerCount, }; - copy.extent.depth *= region->imageSubresource.layerCount; nouveau_copy_rect(cmd, ©); @@ -337,8 +338,8 @@ nvk_CmdCopyImage2(VkCommandBuffer commandBuffer, .dst = nouveau_copy_rect_image(dst, region->dstOffset, ®ion->dstSubresource), .bpp = bpp, .extent = vk_image_sanitize_extent(&src->vk, region->extent), + .layer_count = region->srcSubresource.layerCount, }; - copy.extent.depth *= region->srcSubresource.layerCount; nouveau_copy_rect(cmd, ©); }