nvk: Use the 2D engine for simple blits

This commit is contained in:
Mel Henning 2025-11-23 00:39:06 -05:00
parent 7f299331ab
commit 88b1ca44cb
3 changed files with 307 additions and 4 deletions

View file

@ -28,3 +28,304 @@ nvk_push_2d_state_init(struct nvk_queue *queue, struct nv_push *p)
return VK_SUCCESS;
}
/**
* Divide x by y, rounding to the nearest integer
*/
static int64_t
div_round(int64_t x, int64_t y) {
assert(y > 0);
if (x >= 0) {
return (x + y / 2) / y;
} else {
return (x - y / 2) / y;
}
}
/**
* Convert an integer to 32.32 fixed point
*/
static int64_t
int_to_fixed(int64_t x)
{
assert(INT32_MIN <= x);
assert(x <= INT32_MAX);
return x << 32;
}
/**
* Compute offset and scale for one dimension
*
* dst0_out, dst1_out are integer coordinates
* src0_out, scale_out are 32.32 fixed point
*/
static inline void
compute_off_scale(uint32_t src0, uint32_t src1,
uint32_t dst0, uint32_t dst1,
uint32_t *dst0_out, uint32_t *dst1_out,
int64_t *src0_out, int64_t *scale_out)
{
if (dst0 < dst1) {
*dst0_out = dst0;
*dst1_out = dst1;
} else {
*dst0_out = dst1;
*dst1_out = dst0;
/* Flip the source region */
SWAP(src0, src1);
}
int64_t src_region_size = (int64_t)src1 - (int64_t)src0;
assert(src_region_size != 0);
int64_t dst_region_size = (int64_t)*dst1_out - (int64_t)*dst0_out;
assert(dst_region_size > 0);
/* Divide with result in 32.32 fixed point */
int64_t scale = div_round(int_to_fixed(src_region_size), dst_region_size);
/* Based on the equations in the spec for vkCmdBlitImage, we set i = x_dst0
* to get the starting texel, which gives us:
* i = x_dst0
* u_base = x_dst0 + 1/2
* u_offset = (x_dst0 + 1/2) - x_dst0 = 1/2
* u_scaled = u_offset * scale_u = scale_u / 2
* u = u_scaled + x_src0 = x_src0 + scale_u / 2
* Thanks to maxImageDimension, this should be nowhere near overflow.
*/
int64_t src_offset = int_to_fixed(src0) + div_round(scale, 2);
*scale_out = scale;
*src0_out = src_offset;
}
static void
nvk_2d_blit_rect(struct nvk_cmd_buffer *cmd, const VkImageBlit2 *region)
{
struct nv_push *p = nvk_cmd_buffer_push(cmd, 13);
uint32_t dst_x0, dst_y0, dst_x1, dst_y1;
int64_t src_x0, src_y0, du_dx, dv_dy;
compute_off_scale(region->srcOffsets[0].x,
region->srcOffsets[1].x,
region->dstOffsets[0].x,
region->dstOffsets[1].x,
&dst_x0, &dst_x1,
&src_x0, &du_dx);
compute_off_scale(region->srcOffsets[0].y,
region->srcOffsets[1].y,
region->dstOffsets[0].y,
region->dstOffsets[1].y,
&dst_y0, &dst_y1,
&src_y0, &dv_dy);
P_MTHD(p, NV902D, SET_PIXELS_FROM_MEMORY_DST_X0);
P_NV902D_SET_PIXELS_FROM_MEMORY_DST_X0(p, dst_x0);
P_NV902D_SET_PIXELS_FROM_MEMORY_DST_Y0(p, dst_y0);
P_NV902D_SET_PIXELS_FROM_MEMORY_DST_WIDTH(p, dst_x1 - dst_x0);
P_NV902D_SET_PIXELS_FROM_MEMORY_DST_HEIGHT(p, dst_y1 - dst_y0);
P_NV902D_SET_PIXELS_FROM_MEMORY_DU_DX_FRAC(p, du_dx);
P_NV902D_SET_PIXELS_FROM_MEMORY_DU_DX_INT(p, du_dx >> 32);
P_NV902D_SET_PIXELS_FROM_MEMORY_DV_DY_FRAC(p, dv_dy);
P_NV902D_SET_PIXELS_FROM_MEMORY_DV_DY_INT(p, dv_dy >> 32);
P_NV902D_SET_PIXELS_FROM_MEMORY_SRC_X0_FRAC(p, src_x0);
P_NV902D_SET_PIXELS_FROM_MEMORY_SRC_X0_INT(p, src_x0 >> 32);
P_NV902D_SET_PIXELS_FROM_MEMORY_SRC_Y0_FRAC(p, src_y0);
P_NV902D_PIXELS_FROM_MEMORY_SRC_Y0_INT(p, src_y0 >> 32);
}
static bool
nvk_2d_can_set_target(const struct nvk_physical_device *pdev,
const struct nvk_image *image)
{
if (image->plane_count != 1)
return false;
const struct nvk_image_plane *plane = &image->planes[0];
const struct nil_image *nil_image = &plane->nil;
if (nil_image->dim == NIL_IMAGE_DIM_3D)
return false;
enum pipe_format p_format =
nvk_format_to_pipe_format(image->vk.format);
if (!nil_format_supports_2d_engine(&pdev->info, p_format))
return false;
return true;
}
static void
nvk_2d_set_target(struct nvk_cmd_buffer *cmd, struct nvk_image *image,
const VkImageSubresourceLayers *subresource, bool is_src)
{
assert(image->plane_count == 1);
const struct nvk_image_plane *plane = &image->planes[0];
const struct nil_image *nil_image = &plane->nil;
const struct nil_image_level *level =
&nil_image->levels[subresource->mipLevel];
enum pipe_format p_format =
nvk_format_to_pipe_format(image->vk.format);
struct nil_Extent4D_Samples level_extent_sa =
nil_image_level_extent_sa(nil_image, subresource->mipLevel);
uint64_t addr = nvk_image_plane_base_address(plane) + level->offset_B;
assert(nil_image->dim != NIL_IMAGE_DIM_3D);
assert(subresource->layerCount == 1);
addr += subresource->baseArrayLayer *
(uint64_t)nil_image->array_stride_B;
struct nv_push *p = nvk_cmd_buffer_push(cmd, 11);
if (is_src) {
P_MTHD(p, NV902D, SET_SRC_FORMAT);
} else {
P_MTHD(p, NV902D, SET_DST_FORMAT);
}
#define SET(n, x...) do { \
if (is_src) { \
P_NV902D_SET_SRC_##n(p, x); \
} else { \
P_NV902D_SET_DST_##n(p, x); \
} \
} while (0)
uint8_t ct_format = nil_format_to_color_target(p_format);
SET(FORMAT, ct_format);
if (level->tiling.gob_type != NIL_GOB_TYPE_LINEAR) {
SET(MEMORY_LAYOUT, V_BLOCKLINEAR);
} else {
SET(MEMORY_LAYOUT, V_PITCH);
}
SET(BLOCK_SIZE, {
.height = level->tiling.y_log2,
.depth = level->tiling.z_log2,
});
SET(DEPTH, level_extent_sa.depth);
if (is_src) {
P_MTHD(p, NV902D, SET_SRC_PITCH);
} else {
P_NV902D_SET_DST_LAYER(p, 0);
}
if (level->tiling.gob_type != NIL_GOB_TYPE_LINEAR) {
const uint32_t row_stride_el =
level->row_stride_B / util_format_get_blocksize(p_format);
SET(PITCH, 0);
SET(WIDTH, row_stride_el);
} else {
uint32_t pitch = level->row_stride_B;
assert(pitch % 32 == 0);
SET(PITCH, pitch);
SET(WIDTH, level_extent_sa.width);
}
SET(HEIGHT, level_extent_sa.height);
assert(addr % 32 == 0);
SET(OFFSET_UPPER, addr >> 32);
SET(OFFSET_LOWER, addr);
#undef SET
}
static void
nvk_2d_blit(struct nvk_cmd_buffer *cmd,
const VkBlitImageInfo2 *pBlitImageInfo)
{
VK_FROM_HANDLE(nvk_image, dst_image, pBlitImageInfo->dstImage);
VK_FROM_HANDLE(nvk_image, src_image, pBlitImageInfo->srcImage);
{
assert(pBlitImageInfo->filter == VK_FILTER_NEAREST ||
pBlitImageInfo->filter == VK_FILTER_LINEAR);
bool nearest = pBlitImageInfo->filter == VK_FILTER_NEAREST;
struct nv_push *p = nvk_cmd_buffer_push(cmd, 6);
P_IMMD(p, NV902D, SET_OPERATION, V_SRCCOPY);
P_IMMD(p, NV902D, SET_PIXELS_FROM_MEMORY_SAMPLE_MODE, {
.filter = nearest ? FILTER_POINT : FILTER_BILINEAR,
.origin = ORIGIN_CORNER,
});
P_IMMD(p, NV902D, SET_COMPRESSION, dst_image->is_compressed);
}
for (uint32_t r = 0; r < pBlitImageInfo->regionCount; r++) {
const VkImageBlit2 *region = &pBlitImageInfo->pRegions[r];
assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
nvk_2d_set_target(cmd, dst_image, &region->dstSubresource, false);
nvk_2d_set_target(cmd, src_image, &region->srcSubresource, true);
nvk_2d_blit_rect(cmd, region);
}
}
static bool
can_use_2d_blit(const struct nvk_physical_device *pdev,
const VkBlitImageInfo2 *pBlitImageInfo)
{
VK_FROM_HANDLE(nvk_image, dst_image, pBlitImageInfo->dstImage);
VK_FROM_HANDLE(nvk_image, src_image, pBlitImageInfo->srcImage);
if (!nvk_2d_can_set_target(pdev, dst_image) ||
!nvk_2d_can_set_target(pdev, src_image)) {
return false;
}
for (uint32_t r = 0; r < pBlitImageInfo->regionCount; r++) {
const VkImageBlit2 *region = &pBlitImageInfo->pRegions[r];
if (region->dstSubresource.layerCount != 1 ||
region->srcSubresource.layerCount != 1 ||
region->dstSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT ||
region->srcSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT) {
return false;
}
}
enum pipe_format src_p_format =
nvk_format_to_pipe_format(src_image->vk.format);
enum pipe_format dst_p_format =
nvk_format_to_pipe_format(dst_image->vk.format);
if (util_format_is_red(src_p_format) &&
!util_format_is_red(dst_p_format)) {
/* The 2D engine always treats single component formats as
* luminance rather than red
*/
return false;
}
if (util_format_is_alpha(src_p_format) &&
!util_format_is_alpha(dst_p_format)) {
/* Alpha copies seem to leave other channels unchanged, which
* isn't what we want
*/
return false;
}
return true;
}
VKAPI_ATTR void VKAPI_CALL
nvk_CmdBlitImage2(VkCommandBuffer commandBuffer,
const VkBlitImageInfo2 *pBlitImageInfo)
{
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
const struct nvk_physical_device *pdev = nvk_device_physical(dev);
if (can_use_2d_blit(pdev, pBlitImageInfo)) {
nvk_2d_blit(cmd, pBlitImageInfo);
} else {
nvk_meta_blit(cmd, pBlitImageInfo);
}
}

View file

@ -441,6 +441,8 @@ void nvk_cmd_fill_memory(struct nvk_cmd_buffer *cmd,
uint64_t dst_addr, uint64_t size,
uint32_t data);
void nvk_meta_blit(struct nvk_cmd_buffer *cmd,
const VkBlitImageInfo2 *pBlitImageInfo);
void nvk_meta_resolve_rendering(struct nvk_cmd_buffer *cmd,
const VkRenderingInfo *pRenderingInfo);

View file

@ -7,6 +7,7 @@
#include "nvk_descriptor_set.h"
#include "nvk_device.h"
#include "nvk_entrypoints.h"
#include "nvk_format.h"
#include "nvk_image.h"
#include "nvk_physical_device.h"
@ -214,11 +215,10 @@ nvk_meta_end(struct nvk_cmd_buffer *cmd,
P_IMMD(p, NV9097, SET_RENDER_ENABLE_OVERRIDE, MODE_USE_RENDER_ENABLE);
}
VKAPI_ATTR void VKAPI_CALL
nvk_CmdBlitImage2(VkCommandBuffer commandBuffer,
const VkBlitImageInfo2 *pBlitImageInfo)
void
nvk_meta_blit(struct nvk_cmd_buffer *cmd,
const VkBlitImageInfo2 *pBlitImageInfo)
{
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
struct nvk_meta_save save;