From 872c4bcd27db7b7ca26abe9fc090ae26d502156f Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Wed, 25 Nov 2020 20:54:32 -0500 Subject: [PATCH] turnip: implement z-scaling and z-mirroring BlitImage Z scaling case without nearest filter needs a 3D texture, so add a 3D texture path and use it to cover all scaling/mirroring cases. The "rotation" argument for the clear/blit "setup" function is replaced with a more generic "blit_param", which has a different meaning for the 3D blit path. (to avoid having too many arguments) Signed-off-by: Jonathan Marek Part-of: --- .gitlab-ci/deqp-freedreno-a630-fails.txt | 52 -------- src/freedreno/vulkan/tu_clear_blit.c | 158 ++++++++++++++++------- src/freedreno/vulkan/tu_private.h | 1 + 3 files changed, 113 insertions(+), 98 deletions(-) diff --git a/.gitlab-ci/deqp-freedreno-a630-fails.txt b/.gitlab-ci/deqp-freedreno-a630-fails.txt index aff2411ad4b..545f9b54ea4 100644 --- a/.gitlab-ci/deqp-freedreno-a630-fails.txt +++ b/.gitlab-ci/deqp-freedreno-a630-fails.txt @@ -1,56 +1,4 @@ -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a1r5g5b5_unorm_pack16.a1r5g5b5_unorm_pack16.optimal_general_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.general_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2r10g10b10_unorm_pack32.a2r10g10b10_unorm_pack32.optimal_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.general_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_snorm_pack32.a8b8g8r8_snorm_pack32.general_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_srgb_pack32.a8b8g8r8_srgb_pack32.optimal_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.optimal_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_unorm_pack32.a8b8g8r8_unorm_pack32.optimal_general_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.b10g11r11_ufloat_pack32.b10g11r11_ufloat_pack32.general_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.b4g4r4a4_unorm_pack16.b4g4r4a4_unorm_pack16.optimal_general_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.b5g5r5a1_unorm_pack16.b5g5r5a1_unorm_pack16.optimal_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.b5g6r5_unorm_pack16.b5g6r5_unorm_pack16.general_general_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.b8g8r8a8_snorm.b8g8r8a8_snorm.general_general_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.b8g8r8a8_unorm.b8g8r8a8_unorm.general_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r16g16b16a16_sfloat.r16g16b16a16_sfloat.general_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r16g16b16a16_unorm.r16g16b16a16_unorm.general_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r16g16b16a16_unorm.r16g16b16a16_unorm.linear_linear_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r16_sfloat.r16_sfloat.optimal_linear_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r16_sint.r16_sint.optimal_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r16_unorm.r16_unorm.general_general_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r32g32b32a32_sfloat.r32g32b32a32_sfloat.optimal_general_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r32g32b32a32_sint.r32g32b32a32_sint.general_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r32g32b32a32_uint.r32g32b32a32_uint.general_linear_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r32g32_sint.r32g32_sint.optimal_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r32_sfloat.r32_sfloat.general_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r32_sfloat.r32_sfloat.linear_linear_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r32_uint.r32_uint.general_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r4g4b4a4_unorm_pack16.r4g4b4a4_unorm_pack16.general_linear_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r4g4b4a4_unorm_pack16.r4g4b4a4_unorm_pack16.optimal_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r5g6b5_unorm_pack16.r5g6b5_unorm_pack16.general_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r8g8b8a8_sint.r8g8b8a8_sint.optimal_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r8g8b8a8_srgb.r8g8b8a8_srgb.general_general_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r8g8b8a8_unorm.r8g8b8a8_unorm.general_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r8g8b8a8_unorm.r8g8b8a8_unorm.linear_linear_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r8g8_sint.r8g8_sint.general_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r8g8_snorm.r8g8_snorm.general_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r8g8_srgb.r8g8_srgb.optimal_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r8g8_uint.r8g8_uint.optimal_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r8g8_unorm.r8g8_unorm.optimal_general_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r8_snorm.r8_snorm.general_general_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r8_srgb.r8_srgb.general_general_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.r8_unorm.r8_unorm.general_optimal_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.simple_tests.mirror_z_3d.nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.simple_tests.scaling_and_offset_3d.r32_sfloat_linear,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.simple_tests.scaling_whole1_3d.b8g8r8a8_unorm_nearest,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.simple_tests.scaling_whole2_3d.linear,Fail -dEQP-VK.api.copy_and_blit.dedicated_allocation.blit_image.all_formats.color.3d.a8b8g8r8_srgb_pack32.a8b8g8r8_srgb_pack32.optimal_optimal_linear,Fail -dEQP-VK.api.copy_and_blit.dedicated_allocation.blit_image.all_formats.color.3d.r8_srgb.r8_srgb.general_general_linear,Fail -dEQP-VK.api.copy_and_blit.dedicated_allocation.blit_image.all_formats.color.3d.r8_uint.r8_uint.general_general_nearest,Fail -dEQP-VK.api.copy_and_blit.dedicated_allocation.blit_image.simple_tests.scaling_and_offset_3d.linear,Fail -dEQP-VK.api.copy_and_blit.dedicated_allocation.blit_image.simple_tests.scaling_whole1_3d.r32_sfloat_nearest,Fail -dEQP-VK.api.copy_and_blit.dedicated_allocation.blit_image.simple_tests.scaling_whole2_3d.b8g8r8a8_unorm_nearest,Fail dEQP-VK.api.image_clearing.core.clear_color_attachment.single_layer.a8b8g8r8_srgb_pack32_1x33,Fail dEQP-VK.api.image_clearing.core.partial_clear_color_attachment.single_layer.a8b8g8r8_srgb_pack32_200x180,Fail dEQP-VK.api.image_clearing.dedicated_allocation.clear_color_attachment.single_layer.b8g8r8a8_srgb_33x128,Fail diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index aa19255a153..2a7e63693a8 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -233,7 +233,7 @@ r2d_setup_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, VkFormat vk_format, VkImageAspectFlags aspect_mask, - enum a6xx_rotation rotation, + unsigned blit_param, bool clear, bool ubwc, bool scissor) @@ -262,7 +262,7 @@ r2d_setup_common(struct tu_cmd_buffer *cmd, uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL( .scissor = scissor, - .rotate = rotation, + .rotate = blit_param, .solid_color = clear, .d24s8 = format == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear, .color_format = format, @@ -292,13 +292,13 @@ r2d_setup(struct tu_cmd_buffer *cmd, struct tu_cs *cs, VkFormat vk_format, VkImageAspectFlags aspect_mask, - enum a6xx_rotation rotation, + unsigned blit_param, bool clear, bool ubwc) { tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM); - r2d_setup_common(cmd, cs, vk_format, aspect_mask, rotation, clear, ubwc, false); + r2d_setup_common(cmd, cs, vk_format, aspect_mask, blit_param, clear, ubwc, false); } static void @@ -327,6 +327,7 @@ tu_init_clear_blit_shaders(struct tu6_global *global) static const instr_t vs_code[] = { /* r0.xyz = r0.w ? c1.xyz : c0.xyz * r1.xy = r0.w ? c1.zw : c0.zw + * r1.z = c2.x (for z_scale path) * r0.w = 1.0f */ CAT3(OPC_SEL_B32, .repeat = 2, .dst = 0, @@ -337,6 +338,7 @@ tu_init_clear_blit_shaders(struct tu6_global *global) .c1 = {.src1_c = 1, .src1 = 6}, .src1_r = 1, .src2 = 3, .c2 = {.src3_c = 1, .dummy = 1, .src3 = 2}), + MOV(.dst = 6, .src_c = 1, .src = 8 ), MOV(.dst = 3, .src_im = 1, .fim_val = 1.0f ), { .cat0 = { .opc = OPC_END } }, }; @@ -349,8 +351,21 @@ tu_init_clear_blit_shaders(struct tu6_global *global) { .cat0 = { .opc = OPC_END } }, }; + static const instr_t fs_blit_zscale[] = { + /* (rpt2)bary.f (ei)r0.x, (r)0, r0.x + * (rpt5)nop + * sam.3d (s32)(xyzw)r0.x, r0.x, s#0, t#0 + */ + CAT2(OPC_BARY_F, .ei = 1, .full = 1, .dst = 0, .src1_im = 1, .src1 = 0, .repeat = 2, .src1_r = 1), + { .cat0 = { .repeat = 5 } }, + { .cat5 = { .opc_cat = 5, .opc = OPC_SAM & 31, .dst = 0, .wrmask = 0xf, .type = TYPE_S32, + .is_3d = 1, .norm = { .full = 1, .src1 = 0 } } }, + { .cat0 = { .opc = OPC_END } }, + }; + memcpy(&global->shaders[GLOBAL_SH_VS], vs_code, sizeof(vs_code)); memcpy(&global->shaders[GLOBAL_SH_FS_BLIT], fs_blit, sizeof(fs_blit)); + memcpy(&global->shaders[GLOBAL_SH_FS_BLIT_ZSCALE], fs_blit_zscale, sizeof(fs_blit_zscale)); for (uint32_t num_rts = 0; num_rts <= MAX_RTS; num_rts++) { instr_t *code = global->shaders[GLOBAL_SH_FS_CLEAR0 + num_rts]; @@ -364,7 +379,7 @@ tu_init_clear_blit_shaders(struct tu6_global *global) static void r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_rts, - bool layered_clear) + bool layered_clear, bool z_scale) { struct ir3_const_state dummy_const_state = {}; struct ir3_shader dummy_shader = {}; @@ -427,6 +442,20 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_ .const_state = &dummy_const_state, }; + enum global_shader fs_id = GLOBAL_SH_FS_BLIT; + + if (!blit) + fs_id = GLOBAL_SH_FS_CLEAR0 + num_rts; + + /* z_scale blit path has an extra varying and doesn't use prefetch */ + if (z_scale) { + assert(blit); + fs.total_in = 3; + fs.num_sampler_prefetch = 0; + fs.inputs[0].compmask = 7; + fs_id = GLOBAL_SH_FS_BLIT_ZSCALE; + } + tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD( .vs_state = true, .hs_state = true, @@ -445,8 +474,7 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_ tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL, &pvtmem, 0); tu6_emit_xs_config(cs, MESA_SHADER_TESS_EVAL, NULL, &pvtmem, 0); tu6_emit_xs_config(cs, MESA_SHADER_GEOMETRY, NULL, &pvtmem, 0); - tu6_emit_xs_config(cs, MESA_SHADER_FRAGMENT, &fs, &pvtmem, - global_iova(cmd, shaders[blit ? GLOBAL_SH_FS_BLIT : (GLOBAL_SH_FS_CLEAR0 + num_rts)])); + tu6_emit_xs_config(cs, MESA_SHADER_FRAGMENT, &fs, &pvtmem, global_iova(cmd, shaders[fs_id])); tu_cs_emit_regs(cs, A6XX_PC_PRIMITIVE_CNTL_0()); tu_cs_emit_regs(cs, A6XX_VFD_CONTROL_0()); @@ -506,6 +534,24 @@ r3d_coords_raw(struct tu_cs *cs, const float *coords) tu_cs_emit_array(cs, (const uint32_t *) coords, 8); } +/* z coordinate for "z scale" blit path which uses a 3d texture */ +static void +r3d_coord_z(struct tu_cs *cs, float z) +{ + tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_GEOM, 3 + 4); + tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(2) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) | + CP_LOAD_STATE6_0_NUM_UNIT(1)); + tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); + tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + tu_cs_emit(cs, fui(z)); + tu_cs_emit(cs, 0); + tu_cs_emit(cs, 0); + tu_cs_emit(cs, 0); +} + static void r3d_coords(struct tu_cs *cs, const VkOffset2D *dst, @@ -743,7 +789,7 @@ r3d_setup(struct tu_cmd_buffer *cmd, struct tu_cs *cs, VkFormat vk_format, VkImageAspectFlags aspect_mask, - enum a6xx_rotation rotation, + unsigned blit_param, bool clear, bool ubwc) { @@ -762,7 +808,7 @@ r3d_setup(struct tu_cmd_buffer *cmd, tu_cs_emit_regs(cs, A6XX_GRAS_BIN_CONTROL(.dword = 0xc00000)); tu_cs_emit_regs(cs, A6XX_RB_BIN_CONTROL(.dword = 0xc00000)); - r3d_common(cmd, cs, !clear, clear ? 1 : 0, false); + r3d_common(cmd, cs, !clear, clear ? 1 : 0, false, blit_param); tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2); tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) | @@ -851,7 +897,7 @@ struct blit_ops { struct tu_cs *cs, VkFormat vk_format, VkImageAspectFlags aspect_mask, - enum a6xx_rotation rotation, + unsigned blit_param, /* CmdBlitImage: rotation in 2D path and z scaling in 3D path */ bool clear, bool ubwc); void (*run)(struct tu_cmd_buffer *cmd, struct tu_cs *cs); @@ -940,7 +986,7 @@ tu6_clear_lrz(struct tu_cmd_buffer *cmd, { const struct blit_ops *ops = &r2d_ops; - ops->setup(cmd, cs, VK_FORMAT_D16_UNORM, VK_IMAGE_ASPECT_DEPTH_BIT, ROTATE_0, true, false); + ops->setup(cmd, cs, VK_FORMAT_D16_UNORM, VK_IMAGE_ASPECT_DEPTH_BIT, 0, true, false); ops->clear_value(cs, VK_FORMAT_D16_UNORM, value); ops->dst_buffer(cs, VK_FORMAT_D16_UNORM, image->bo->iova + image->bo_offset + image->lrz_offset, @@ -956,7 +1002,8 @@ tu_image_view_copy_blit(struct tu_image_view *iview, VkFormat format, const VkImageSubresourceLayers *subres, uint32_t layer, - bool stencil_read) + bool stencil_read, + bool z_scale) { VkImageAspectFlags aspect_mask = subres->aspectMask; @@ -968,7 +1015,7 @@ tu_image_view_copy_blit(struct tu_image_view *iview, tu_image_view_init(iview, &(VkImageViewCreateInfo) { .image = tu_image_to_handle(image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, + .viewType = z_scale ? VK_IMAGE_VIEW_TYPE_3D : VK_IMAGE_VIEW_TYPE_2D, .format = format, /* image_to_buffer from d24s8 with stencil aspect mask writes out to r8 */ .components.r = stencil_read ? VK_COMPONENT_SWIZZLE_A : VK_COMPONENT_SWIZZLE_R, @@ -991,7 +1038,7 @@ tu_image_view_copy(struct tu_image_view *iview, bool stencil_read) { format = copy_format(format, subres->aspectMask, false); - tu_image_view_copy_blit(iview, image, format, subres, layer, stencil_read); + tu_image_view_copy_blit(iview, image, format, subres, layer, stencil_read, false); } static void @@ -1000,7 +1047,7 @@ tu_image_view_blit(struct tu_image_view *iview, const VkImageSubresourceLayers *subres, uint32_t layer) { - tu_image_view_copy_blit(iview, image, image->vk_format, subres, layer, false); + tu_image_view_copy_blit(iview, image, image->vk_format, subres, layer, false, false); } static void @@ -1010,9 +1057,10 @@ tu6_blit_image(struct tu_cmd_buffer *cmd, const VkImageBlit *info, VkFilter filter) { - const struct blit_ops *ops = &r2d_ops; + const struct blit_ops *ops = &r3d_ops; struct tu_cs *cs = &cmd->cs; - uint32_t layers; + bool z_scale = false; + uint32_t layers = info->dstOffsets[1].z - info->dstOffsets[0].z; /* 2D blit can't do rotation mirroring from just coordinates */ static const enum a6xx_rotation rotate[2][2] = { @@ -1024,21 +1072,22 @@ tu6_blit_image(struct tu_cmd_buffer *cmd, (info->dstOffsets[1].x < info->dstOffsets[0].x); bool mirror_y = (info->srcOffsets[1].y < info->srcOffsets[0].y) != (info->dstOffsets[1].y < info->dstOffsets[0].y); - bool mirror_z = (info->srcOffsets[1].z < info->srcOffsets[0].z) != - (info->dstOffsets[1].z < info->dstOffsets[0].z); - if (mirror_z) { - tu_finishme("blit z mirror\n"); - return; + int32_t src0_z = info->srcOffsets[0].z; + int32_t src1_z = info->srcOffsets[1].z; + + if ((info->srcOffsets[1].z - info->srcOffsets[0].z != + info->dstOffsets[1].z - info->dstOffsets[0].z) || + info->srcOffsets[1].z < info->srcOffsets[0].z) { + z_scale = true; } - if (info->srcOffsets[1].z - info->srcOffsets[0].z != - info->dstOffsets[1].z - info->dstOffsets[0].z) { - tu_finishme("blit z filter\n"); - return; + if (info->dstOffsets[1].z < info->dstOffsets[0].z) { + layers = info->dstOffsets[0].z - info->dstOffsets[1].z; + src0_z = info->srcOffsets[1].z; + src1_z = info->srcOffsets[0].z; } - layers = info->srcOffsets[1].z - info->srcOffsets[0].z; if (info->dstSubresource.layerCount > 1) { assert(layers <= 1); layers = info->dstSubresource.layerCount; @@ -1052,11 +1101,15 @@ tu6_blit_image(struct tu_cmd_buffer *cmd, * the 2d path. */ + unsigned blit_param = rotate[mirror_y][mirror_x]; if (dst_image->layout[0].nr_samples > 1 || src_image->vk_format == VK_FORMAT_BC1_RGB_UNORM_BLOCK || src_image->vk_format == VK_FORMAT_BC1_RGB_SRGB_BLOCK || - filter == VK_FILTER_CUBIC_EXT) + filter == VK_FILTER_CUBIC_EXT || + z_scale) { ops = &r3d_ops; + blit_param = z_scale; + } /* use the right format in setup() for D32_S8 * TODO: this probably should use a helper @@ -1072,7 +1125,7 @@ tu6_blit_image(struct tu_cmd_buffer *cmd, } ops->setup(cmd, cs, format, info->dstSubresource.aspectMask, - rotate[mirror_y][mirror_x], false, dst_image->layout[0].ubwc); + blit_param, false, dst_image->layout[0].ubwc); if (ops == &r3d_ops) { r3d_coords_raw(cs, (float[]) { @@ -1095,12 +1148,25 @@ tu6_blit_image(struct tu_cmd_buffer *cmd, } struct tu_image_view dst, src; - tu_image_view_blit(&dst, dst_image, &info->dstSubresource, info->dstOffsets[0].z); - tu_image_view_blit(&src, src_image, &info->srcSubresource, info->srcOffsets[0].z); + tu_image_view_blit(&dst, dst_image, &info->dstSubresource, + MIN2(info->dstOffsets[0].z, info->dstOffsets[1].z)); + + if (z_scale) { + tu_image_view_copy_blit(&src, src_image, src_image->vk_format, + &info->srcSubresource, 0, false, true); + ops->src(cmd, cs, &src, 0, filter); + } else { + tu_image_view_blit(&src, src_image, &info->srcSubresource, info->srcOffsets[0].z); + } for (uint32_t i = 0; i < layers; i++) { + if (z_scale) { + float t = ((float) i + 0.5f) / (float) layers; + r3d_coord_z(cs, t * (src1_z - src0_z) + src0_z); + } else { + ops->src(cmd, cs, &src, i, filter); + } ops->dst(cs, &dst, i); - ops->src(cmd, cs, &src, i, filter); ops->run(cmd, cs); } @@ -1201,7 +1267,7 @@ tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd, ops->setup(cmd, cs, copy_format(dst_image->vk_format, info->imageSubresource.aspectMask, false), - info->imageSubresource.aspectMask, ROTATE_0, false, dst_image->layout[0].ubwc); + info->imageSubresource.aspectMask, 0, false, dst_image->layout[0].ubwc); struct tu_image_view dst; tu_image_view_copy(&dst, dst_image, dst_image->vk_format, &info->imageSubresource, offset.z, false); @@ -1274,7 +1340,7 @@ tu_copy_image_to_buffer(struct tu_cmd_buffer *cmd, uint32_t pitch = dst_width * vk_format_get_blocksize(dst_format); uint32_t layer_size = pitch * dst_height; - ops->setup(cmd, cs, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, false); + ops->setup(cmd, cs, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false); struct tu_image_view src; tu_image_view_copy(&src, src_image, src_image->vk_format, &info->imageSubresource, offset.z, stencil_read); @@ -1470,7 +1536,7 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd, tu_image_view_copy(&staging, &staging_image, src_format, &staging_subresource, 0, false); - ops->setup(cmd, cs, src_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, false); + ops->setup(cmd, cs, src_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false); coords(ops, cs, &staging_offset, &src_offset, &extent); for (uint32_t i = 0; i < info->extent.depth; i++) { @@ -1489,7 +1555,7 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd, &staging_subresource, 0, false); ops->setup(cmd, cs, dst_format, info->dstSubresource.aspectMask, - ROTATE_0, false, dst_image->layout[0].ubwc); + 0, false, dst_image->layout[0].ubwc); coords(ops, cs, &dst_offset, &staging_offset, &extent); for (uint32_t i = 0; i < info->extent.depth; i++) { @@ -1502,7 +1568,7 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd, tu_image_view_copy(&src, src_image, format, &info->srcSubresource, src_offset.z, false); ops->setup(cmd, cs, format, info->dstSubresource.aspectMask, - ROTATE_0, false, dst_image->layout[0].ubwc); + 0, false, dst_image->layout[0].ubwc); coords(ops, cs, &dst_offset, &src_offset, &extent); for (uint32_t i = 0; i < info->extent.depth; i++) { @@ -1544,7 +1610,7 @@ copy_buffer(struct tu_cmd_buffer *cmd, VkFormat format = block_size == 4 ? VK_FORMAT_R32_UINT : VK_FORMAT_R8_UNORM; uint64_t blocks = size / block_size; - ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, false); + ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false); while (blocks) { uint32_t src_x = (src_va & 63) / block_size; @@ -1622,7 +1688,7 @@ tu_CmdFillBuffer(VkCommandBuffer commandBuffer, uint64_t dst_va = tu_buffer_iova(buffer) + dstOffset; uint32_t blocks = fillSize / 4; - ops->setup(cmd, cs, VK_FORMAT_R32_UINT, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, true, false); + ops->setup(cmd, cs, VK_FORMAT_R32_UINT, VK_IMAGE_ASPECT_COLOR_BIT, 0, true, false); ops->clear_value(cs, VK_FORMAT_R32_UINT, &(VkClearValue){.color = {.uint32[0] = data}}); while (blocks) { @@ -1656,7 +1722,7 @@ tu_CmdResolveImage(VkCommandBuffer commandBuffer, struct tu_cs *cs = &cmd->cs; ops->setup(cmd, cs, dst_image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT, - ROTATE_0, false, dst_image->layout[0].ubwc); + 0, false, dst_image->layout[0].ubwc); for (uint32_t i = 0; i < regionCount; ++i) { const VkImageResolve *info = &pRegions[i]; @@ -1701,7 +1767,7 @@ resolve_sysmem(struct tu_cmd_buffer *cmd, const struct blit_ops *ops = &r2d_ops; ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, - ROTATE_0, false, dst->ubwc_enabled); + 0, false, dst->ubwc_enabled); ops->coords(cs, &rect->offset, &rect->offset, &rect->extent); for_each_layer(i, layer_mask, layers) { @@ -1761,7 +1827,7 @@ clear_image(struct tu_cmd_buffer *cmd, const struct blit_ops *ops = image->layout[0].nr_samples > 1 ? &r3d_ops : &r2d_ops; - ops->setup(cmd, cs, format, aspect_mask, ROTATE_0, true, image->layout[0].ubwc); + ops->setup(cmd, cs, format, aspect_mask, 0, true, image->layout[0].ubwc); if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) ops->clear_value(cs, VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, clear_value); else @@ -1782,7 +1848,7 @@ clear_image(struct tu_cmd_buffer *cmd, .mipLevel = range->baseMipLevel + j, .baseArrayLayer = range->baseArrayLayer, .layerCount = 1, - }, 0, false); + }, 0, false, false); for (uint32_t i = 0; i < layer_count; i++) { ops->dst(cs, &dst, i); @@ -1930,7 +1996,7 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd, layered_clear = true; } - r3d_common(cmd, cs, false, num_rts, layered_clear); + r3d_common(cmd, cs, false, num_rts, layered_clear, false); tu_cs_emit_regs(cs, A6XX_SP_FS_RENDER_COMPONENTS(.dword = clear_components)); @@ -2226,7 +2292,7 @@ clear_sysmem_attachment(struct tu_cmd_buffer *cmd, if (cmd->state.pass->attachments[a].samples > 1) ops = &r3d_ops; - ops->setup(cmd, cs, format, clear_mask, ROTATE_0, true, iview->ubwc_enabled); + ops->setup(cmd, cs, format, clear_mask, 0, true, iview->ubwc_enabled); ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent); ops->clear_value(cs, format, &info->pClearValues[a]); @@ -2412,7 +2478,7 @@ store_cp_blit(struct tu_cmd_buffer *cmd, uint32_t gmem_offset, uint32_t cpp) { - r2d_setup_common(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, + r2d_setup_common(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, iview->ubwc_enabled, true); if (separate_stencil) r2d_dst_stencil(cs, iview, 0); diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index d46785a1531..e48f88baeac 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -310,6 +310,7 @@ struct tu_bo enum global_shader { GLOBAL_SH_VS, GLOBAL_SH_FS_BLIT, + GLOBAL_SH_FS_BLIT_ZSCALE, GLOBAL_SH_FS_CLEAR0, GLOBAL_SH_FS_CLEAR_MAX = GLOBAL_SH_FS_CLEAR0 + MAX_RTS, GLOBAL_SH_COUNT,