diff --git a/src/nouveau/vulkan/nvk_cmd_2d.c b/src/nouveau/vulkan/nvk_cmd_2d.c index 5a4f1040bee..46b7e5ee3f8 100644 --- a/src/nouveau/vulkan/nvk_cmd_2d.c +++ b/src/nouveau/vulkan/nvk_cmd_2d.c @@ -28,3 +28,304 @@ nvk_push_2d_state_init(struct nvk_queue *queue, struct nv_push *p) return VK_SUCCESS; } + +/** + * Divide x by y, rounding to the nearest integer + */ +static int64_t +div_round(int64_t x, int64_t y) { + assert(y > 0); + if (x >= 0) { + return (x + y / 2) / y; + } else { + return (x - y / 2) / y; + } +} + +/** + * Convert an integer to 32.32 fixed point + */ +static int64_t +int_to_fixed(int64_t x) +{ + assert(INT32_MIN <= x); + assert(x <= INT32_MAX); + return x << 32; +} + +/** + * Compute offset and scale for one dimension + * + * dst0_out, dst1_out are integer coordinates + * src0_out, scale_out are 32.32 fixed point + */ +static inline void +compute_off_scale(uint32_t src0, uint32_t src1, + uint32_t dst0, uint32_t dst1, + uint32_t *dst0_out, uint32_t *dst1_out, + int64_t *src0_out, int64_t *scale_out) +{ + if (dst0 < dst1) { + *dst0_out = dst0; + *dst1_out = dst1; + } else { + *dst0_out = dst1; + *dst1_out = dst0; + + /* Flip the source region */ + SWAP(src0, src1); + } + + int64_t src_region_size = (int64_t)src1 - (int64_t)src0; + assert(src_region_size != 0); + + int64_t dst_region_size = (int64_t)*dst1_out - (int64_t)*dst0_out; + assert(dst_region_size > 0); + + /* Divide with result in 32.32 fixed point */ + int64_t scale = div_round(int_to_fixed(src_region_size), dst_region_size); + + /* Based on the equations in the spec for vkCmdBlitImage, we set i = x_dst0 + * to get the starting texel, which gives us: + * i = x_dst0 + * u_base = x_dst0 + 1/2 + * u_offset = (x_dst0 + 1/2) - x_dst0 = 1/2 + * u_scaled = u_offset * scale_u = scale_u / 2 + * u = u_scaled + x_src0 = x_src0 + scale_u / 2 + * Thanks to maxImageDimension, this should be nowhere near overflow. + */ + int64_t src_offset = int_to_fixed(src0) + div_round(scale, 2); + + *scale_out = scale; + *src0_out = src_offset; +} + +static void +nvk_2d_blit_rect(struct nvk_cmd_buffer *cmd, const VkImageBlit2 *region) +{ + struct nv_push *p = nvk_cmd_buffer_push(cmd, 13); + uint32_t dst_x0, dst_y0, dst_x1, dst_y1; + int64_t src_x0, src_y0, du_dx, dv_dy; + + compute_off_scale(region->srcOffsets[0].x, + region->srcOffsets[1].x, + region->dstOffsets[0].x, + region->dstOffsets[1].x, + &dst_x0, &dst_x1, + &src_x0, &du_dx); + compute_off_scale(region->srcOffsets[0].y, + region->srcOffsets[1].y, + region->dstOffsets[0].y, + region->dstOffsets[1].y, + &dst_y0, &dst_y1, + &src_y0, &dv_dy); + + P_MTHD(p, NV902D, SET_PIXELS_FROM_MEMORY_DST_X0); + P_NV902D_SET_PIXELS_FROM_MEMORY_DST_X0(p, dst_x0); + P_NV902D_SET_PIXELS_FROM_MEMORY_DST_Y0(p, dst_y0); + P_NV902D_SET_PIXELS_FROM_MEMORY_DST_WIDTH(p, dst_x1 - dst_x0); + P_NV902D_SET_PIXELS_FROM_MEMORY_DST_HEIGHT(p, dst_y1 - dst_y0); + + P_NV902D_SET_PIXELS_FROM_MEMORY_DU_DX_FRAC(p, du_dx); + P_NV902D_SET_PIXELS_FROM_MEMORY_DU_DX_INT(p, du_dx >> 32); + P_NV902D_SET_PIXELS_FROM_MEMORY_DV_DY_FRAC(p, dv_dy); + P_NV902D_SET_PIXELS_FROM_MEMORY_DV_DY_INT(p, dv_dy >> 32); + + P_NV902D_SET_PIXELS_FROM_MEMORY_SRC_X0_FRAC(p, src_x0); + P_NV902D_SET_PIXELS_FROM_MEMORY_SRC_X0_INT(p, src_x0 >> 32); + P_NV902D_SET_PIXELS_FROM_MEMORY_SRC_Y0_FRAC(p, src_y0); + P_NV902D_PIXELS_FROM_MEMORY_SRC_Y0_INT(p, src_y0 >> 32); +} + +static bool +nvk_2d_can_set_target(const struct nvk_physical_device *pdev, + const struct nvk_image *image) +{ + if (image->plane_count != 1) + return false; + + const struct nvk_image_plane *plane = &image->planes[0]; + const struct nil_image *nil_image = &plane->nil; + + if (nil_image->dim == NIL_IMAGE_DIM_3D) + return false; + + enum pipe_format p_format = + nvk_format_to_pipe_format(image->vk.format); + if (!nil_format_supports_2d_engine(&pdev->info, p_format)) + return false; + + return true; +} + +static void +nvk_2d_set_target(struct nvk_cmd_buffer *cmd, struct nvk_image *image, + const VkImageSubresourceLayers *subresource, bool is_src) +{ + assert(image->plane_count == 1); + const struct nvk_image_plane *plane = &image->planes[0]; + const struct nil_image *nil_image = &plane->nil; + const struct nil_image_level *level = + &nil_image->levels[subresource->mipLevel]; + + enum pipe_format p_format = + nvk_format_to_pipe_format(image->vk.format); + + struct nil_Extent4D_Samples level_extent_sa = + nil_image_level_extent_sa(nil_image, subresource->mipLevel); + + uint64_t addr = nvk_image_plane_base_address(plane) + level->offset_B; + + assert(nil_image->dim != NIL_IMAGE_DIM_3D); + assert(subresource->layerCount == 1); + addr += subresource->baseArrayLayer * + (uint64_t)nil_image->array_stride_B; + + struct nv_push *p = nvk_cmd_buffer_push(cmd, 11); + + if (is_src) { + P_MTHD(p, NV902D, SET_SRC_FORMAT); + } else { + P_MTHD(p, NV902D, SET_DST_FORMAT); + } + +#define SET(n, x...) do { \ + if (is_src) { \ + P_NV902D_SET_SRC_##n(p, x); \ + } else { \ + P_NV902D_SET_DST_##n(p, x); \ + } \ + } while (0) + + uint8_t ct_format = nil_format_to_color_target(p_format); + SET(FORMAT, ct_format); + + if (level->tiling.gob_type != NIL_GOB_TYPE_LINEAR) { + SET(MEMORY_LAYOUT, V_BLOCKLINEAR); + } else { + SET(MEMORY_LAYOUT, V_PITCH); + } + + SET(BLOCK_SIZE, { + .height = level->tiling.y_log2, + .depth = level->tiling.z_log2, + }); + SET(DEPTH, level_extent_sa.depth); + if (is_src) { + P_MTHD(p, NV902D, SET_SRC_PITCH); + } else { + P_NV902D_SET_DST_LAYER(p, 0); + } + + if (level->tiling.gob_type != NIL_GOB_TYPE_LINEAR) { + const uint32_t row_stride_el = + level->row_stride_B / util_format_get_blocksize(p_format); + SET(PITCH, 0); + SET(WIDTH, row_stride_el); + } else { + uint32_t pitch = level->row_stride_B; + assert(pitch % 32 == 0); + SET(PITCH, pitch); + SET(WIDTH, level_extent_sa.width); + } + SET(HEIGHT, level_extent_sa.height); + + assert(addr % 32 == 0); + SET(OFFSET_UPPER, addr >> 32); + SET(OFFSET_LOWER, addr); +#undef SET +} + +static void +nvk_2d_blit(struct nvk_cmd_buffer *cmd, + const VkBlitImageInfo2 *pBlitImageInfo) +{ + VK_FROM_HANDLE(nvk_image, dst_image, pBlitImageInfo->dstImage); + VK_FROM_HANDLE(nvk_image, src_image, pBlitImageInfo->srcImage); + + { + assert(pBlitImageInfo->filter == VK_FILTER_NEAREST || + pBlitImageInfo->filter == VK_FILTER_LINEAR); + bool nearest = pBlitImageInfo->filter == VK_FILTER_NEAREST; + + struct nv_push *p = nvk_cmd_buffer_push(cmd, 6); + P_IMMD(p, NV902D, SET_OPERATION, V_SRCCOPY); + P_IMMD(p, NV902D, SET_PIXELS_FROM_MEMORY_SAMPLE_MODE, { + .filter = nearest ? FILTER_POINT : FILTER_BILINEAR, + .origin = ORIGIN_CORNER, + }); + P_IMMD(p, NV902D, SET_COMPRESSION, dst_image->is_compressed); + } + + for (uint32_t r = 0; r < pBlitImageInfo->regionCount; r++) { + const VkImageBlit2 *region = &pBlitImageInfo->pRegions[r]; + + assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + + nvk_2d_set_target(cmd, dst_image, ®ion->dstSubresource, false); + nvk_2d_set_target(cmd, src_image, ®ion->srcSubresource, true); + + nvk_2d_blit_rect(cmd, region); + } +} + +static bool +can_use_2d_blit(const struct nvk_physical_device *pdev, + const VkBlitImageInfo2 *pBlitImageInfo) +{ + VK_FROM_HANDLE(nvk_image, dst_image, pBlitImageInfo->dstImage); + VK_FROM_HANDLE(nvk_image, src_image, pBlitImageInfo->srcImage); + + if (!nvk_2d_can_set_target(pdev, dst_image) || + !nvk_2d_can_set_target(pdev, src_image)) { + return false; + } + + for (uint32_t r = 0; r < pBlitImageInfo->regionCount; r++) { + const VkImageBlit2 *region = &pBlitImageInfo->pRegions[r]; + if (region->dstSubresource.layerCount != 1 || + region->srcSubresource.layerCount != 1 || + region->dstSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT || + region->srcSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT) { + return false; + } + } + + enum pipe_format src_p_format = + nvk_format_to_pipe_format(src_image->vk.format); + enum pipe_format dst_p_format = + nvk_format_to_pipe_format(dst_image->vk.format); + + if (util_format_is_red(src_p_format) && + !util_format_is_red(dst_p_format)) { + /* The 2D engine always treats single component formats as + * luminance rather than red + */ + return false; + } + + if (util_format_is_alpha(src_p_format) && + !util_format_is_alpha(dst_p_format)) { + /* Alpha copies seem to leave other channels unchanged, which + * isn't what we want + */ + return false; + } + + return true; +} + +VKAPI_ATTR void VKAPI_CALL +nvk_CmdBlitImage2(VkCommandBuffer commandBuffer, + const VkBlitImageInfo2 *pBlitImageInfo) +{ + VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); + struct nvk_device *dev = nvk_cmd_buffer_device(cmd); + const struct nvk_physical_device *pdev = nvk_device_physical(dev); + + if (can_use_2d_blit(pdev, pBlitImageInfo)) { + nvk_2d_blit(cmd, pBlitImageInfo); + } else { + nvk_meta_blit(cmd, pBlitImageInfo); + } +} diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.h b/src/nouveau/vulkan/nvk_cmd_buffer.h index 47356580e31..dff8719082b 100644 --- a/src/nouveau/vulkan/nvk_cmd_buffer.h +++ b/src/nouveau/vulkan/nvk_cmd_buffer.h @@ -441,6 +441,8 @@ void nvk_cmd_fill_memory(struct nvk_cmd_buffer *cmd, uint64_t dst_addr, uint64_t size, uint32_t data); +void nvk_meta_blit(struct nvk_cmd_buffer *cmd, + const VkBlitImageInfo2 *pBlitImageInfo); void nvk_meta_resolve_rendering(struct nvk_cmd_buffer *cmd, const VkRenderingInfo *pRenderingInfo); diff --git a/src/nouveau/vulkan/nvk_cmd_meta.c b/src/nouveau/vulkan/nvk_cmd_meta.c index d58f2ca4df2..241ae96ffca 100644 --- a/src/nouveau/vulkan/nvk_cmd_meta.c +++ b/src/nouveau/vulkan/nvk_cmd_meta.c @@ -7,6 +7,7 @@ #include "nvk_descriptor_set.h" #include "nvk_device.h" #include "nvk_entrypoints.h" +#include "nvk_format.h" #include "nvk_image.h" #include "nvk_physical_device.h" @@ -214,11 +215,10 @@ nvk_meta_end(struct nvk_cmd_buffer *cmd, P_IMMD(p, NV9097, SET_RENDER_ENABLE_OVERRIDE, MODE_USE_RENDER_ENABLE); } -VKAPI_ATTR void VKAPI_CALL -nvk_CmdBlitImage2(VkCommandBuffer commandBuffer, - const VkBlitImageInfo2 *pBlitImageInfo) +void +nvk_meta_blit(struct nvk_cmd_buffer *cmd, + const VkBlitImageInfo2 *pBlitImageInfo) { - VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); struct nvk_device *dev = nvk_cmd_buffer_device(cmd); struct nvk_meta_save save;