From 0fcce6c319f676ee31a0bcbbb11cb7a080764417 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Tue, 3 Dec 2019 12:54:30 +0100 Subject: [PATCH] v3dv: implement vkCreateImage This relies heavily in infrastructure taken from the v3d driver. We should probably look for ways to share the code between both drivers by creating a surface layout library that we can use from both, or at least moving parts of the v3d driver to broadcom/common. Specifically: We take v3d_tiling.c, which requires gallium's pipe_box type for some helper functions that we don't quite need yet. We copied and adapted bits of v3d_resource.c into v3dv_image.c, however, it should be possible to look for ways to reuse the code instead of duplicating it. Pre-compute UIF padding into the slice setup. This is different from what we do in v3d (we do this at cerate_surface time), but it is more convenient for us to pre-calculate it here for all mipmap slices. Part-of: --- src/broadcom/vulkan/meson.build | 2 + src/broadcom/vulkan/v3d_tiling.c | 498 +++++++++++++++++++++++++++ src/broadcom/vulkan/v3dv_formats.c | 2 +- src/broadcom/vulkan/v3dv_image.c | 299 ++++++++++++++++ src/broadcom/vulkan/v3dv_private.h | 103 ++++++ src/broadcom/vulkan/vk_format_info.h | 21 ++ 6 files changed, 924 insertions(+), 1 deletion(-) create mode 100644 src/broadcom/vulkan/v3d_tiling.c create mode 100644 src/broadcom/vulkan/v3dv_image.c diff --git a/src/broadcom/vulkan/meson.build b/src/broadcom/vulkan/meson.build index 1c6eb1daca8..00057fd2afb 100644 --- a/src/broadcom/vulkan/meson.build +++ b/src/broadcom/vulkan/meson.build @@ -54,9 +54,11 @@ v3dv_extensions_h = custom_target( libv3dv_files = files( 'v3dv_device.c', 'v3dv_formats.c', + 'v3dv_image.c', 'v3dv_pipeline.c', 'v3dv_private.h', 'v3dv_util.c', + 'v3d_tiling.c', ) # The vulkan driver only supports version >= 42, which is the version present in diff --git a/src/broadcom/vulkan/v3d_tiling.c b/src/broadcom/vulkan/v3d_tiling.c new file mode 100644 index 00000000000..08660e38fba --- /dev/null +++ b/src/broadcom/vulkan/v3d_tiling.c @@ -0,0 +1,498 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file v3d_tiling.c + * + * Handles information about the VC5 tiling formats, and loading and storing + * from them. + */ + +#include +#include + +#include "v3dv_private.h" + +#include "util/u_math.h" +#include "util/u_box.h" + +#include "broadcom/common/v3d_cpu_tiling.h" + +/** Return the width in pixels of a 64-byte microtile. */ +uint32_t +v3d_utile_width(int cpp) +{ + switch (cpp) { + case 1: + case 2: + return 8; + case 4: + case 8: + return 4; + case 16: + return 2; + default: + unreachable("unknown cpp"); + } +} + +/** Return the height in pixels of a 64-byte microtile. */ +uint32_t +v3d_utile_height(int cpp) +{ + switch (cpp) { + case 1: + return 8; + case 2: + case 4: + return 4; + case 8: + case 16: + return 2; + default: + unreachable("unknown cpp"); + } +} + +/** + * Returns the byte address for a given pixel within a utile. + * + * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4 + * arrangement. + */ +static inline uint32_t +v3d_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y) +{ + uint32_t utile_w = v3d_utile_width(cpp); + + assert(x < utile_w && y < v3d_utile_height(cpp)); + + return x * cpp + y * utile_w * cpp; +} + +/** + * Returns the byte offset for a given pixel in a LINEARTILE layout. + * + * LINEARTILE is a single line of utiles in either the X or Y direction. + */ +static inline uint32_t +v3d_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y) +{ + uint32_t utile_w = v3d_utile_width(cpp); + uint32_t utile_h = v3d_utile_height(cpp); + uint32_t utile_index_x = x / utile_w; + uint32_t utile_index_y = y / utile_h; + + assert(utile_index_x == 0 || utile_index_y == 0); + + return (64 * (utile_index_x + utile_index_y) + + v3d_get_utile_pixel_offset(cpp, + x & (utile_w - 1), + y & (utile_h - 1))); +} + +/** + * Returns the byte offset for a given pixel in a UBLINEAR layout. + * + * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2 + * utiles), and the UIF blocks are in 1 or 2 columns in raster order. + */ +static inline uint32_t +v3d_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y, + int ublinear_number) +{ + uint32_t utile_w = v3d_utile_width(cpp); + uint32_t utile_h = v3d_utile_height(cpp); + uint32_t ub_w = utile_w * 2; + uint32_t ub_h = utile_h * 2; + uint32_t ub_x = x / ub_w; + uint32_t ub_y = y / ub_h; + + return (256 * (ub_y * ublinear_number + + ub_x) + + ((x & utile_w) ? 64 : 0) + + ((y & utile_h) ? 128 : 0) + + + v3d_get_utile_pixel_offset(cpp, + x & (utile_w - 1), + y & (utile_h - 1))); +} + +static inline uint32_t +v3d_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return v3d_get_ublinear_pixel_offset(cpp, x, y, 2); +} + +static inline uint32_t +v3d_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return v3d_get_ublinear_pixel_offset(cpp, x, y, 1); +} + +/** + * Returns the byte offset for a given pixel in a UIF layout. + * + * UIF is the general VC5 tiling layout shared across 3D, media, and scanout. + * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in + * 4x4 groups, and those 4x4 groups are then stored in raster order. + */ +static inline uint32_t +v3d_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y, + bool do_xor) +{ + uint32_t utile_w = v3d_utile_width(cpp); + uint32_t utile_h = v3d_utile_height(cpp); + uint32_t mb_width = utile_w * 2; + uint32_t mb_height = utile_h * 2; + uint32_t log2_mb_width = ffs(mb_width) - 1; + uint32_t log2_mb_height = ffs(mb_height) - 1; + + /* Macroblock X, y */ + uint32_t mb_x = x >> log2_mb_width; + uint32_t mb_y = y >> log2_mb_height; + /* X, y within the macroblock */ + uint32_t mb_pixel_x = x - (mb_x << log2_mb_width); + uint32_t mb_pixel_y = y - (mb_y << log2_mb_height); + + if (do_xor && (mb_x / 4) & 1) + mb_y ^= 0x10; + + uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height; + uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4; + + uint32_t mb_base_addr = mb_id * 256; + + bool top = mb_pixel_y < utile_h; + bool left = mb_pixel_x < utile_w; + + /* Docs have this in pixels, we do bytes here. */ + uint32_t mb_tile_offset = (!top * 128 + !left * 64); + + uint32_t utile_x = mb_pixel_x & (utile_w - 1); + uint32_t utile_y = mb_pixel_y & (utile_h - 1); + + uint32_t mb_pixel_address = (mb_base_addr + + mb_tile_offset + + v3d_get_utile_pixel_offset(cpp, + utile_x, + utile_y)); + + return mb_pixel_address; +} + +static inline uint32_t +v3d_get_uif_xor_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return v3d_get_uif_pixel_offset(cpp, image_h, x, y, true); +} + +static inline uint32_t +v3d_get_uif_no_xor_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return v3d_get_uif_pixel_offset(cpp, image_h, x, y, false); +} + +/* Loads/stores non-utile-aligned boxes by walking over the destination + * rectangle, computing the address on the GPU, and storing/loading a pixel at + * a time. + */ +static inline void +v3d_move_pixels_unaligned(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + int cpp, uint32_t image_h, + const struct pipe_box *box, + uint32_t (*get_pixel_offset)(uint32_t cpp, + uint32_t image_h, + uint32_t x, uint32_t y), + bool is_load) +{ + for (uint32_t y = 0; y < box->height; y++) { + void *cpu_row = cpu + y * cpu_stride; + + for (int x = 0; x < box->width; x++) { + uint32_t pixel_offset = get_pixel_offset(cpp, image_h, + box->x + x, + box->y + y); + + if (false) { + fprintf(stderr, "%3d,%3d -> %d\n", + box->x + x, box->y + y, + pixel_offset); + } + + if (is_load) { + memcpy(cpu_row + x * cpp, + gpu + pixel_offset, + cpp); + } else { + memcpy(gpu + pixel_offset, + cpu_row + x * cpp, + cpp); + } + } + } +} + +/* Breaks the image down into utiles and calls either the fast whole-utile + * load/store functions, or the unaligned fallback case. + */ +static inline void +v3d_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + int cpp, uint32_t image_h, + const struct pipe_box *box, + uint32_t (*get_pixel_offset)(uint32_t cpp, + uint32_t image_h, + uint32_t x, uint32_t y), + bool is_load) +{ + uint32_t utile_w = v3d_utile_width(cpp); + uint32_t utile_h = v3d_utile_height(cpp); + uint32_t utile_gpu_stride = utile_w * cpp; + uint32_t x1 = box->x; + uint32_t y1 = box->y; + uint32_t x2 = box->x + box->width; + uint32_t y2 = box->y + box->height; + uint32_t align_x1 = align(x1, utile_w); + uint32_t align_y1 = align(y1, utile_h); + uint32_t align_x2 = x2 & ~(utile_w - 1); + uint32_t align_y2 = y2 & ~(utile_h - 1); + + /* Load/store all the whole utiles first. */ + for (uint32_t y = align_y1; y < align_y2; y += utile_h) { + void *cpu_row = cpu + (y - box->y) * cpu_stride; + + for (uint32_t x = align_x1; x < align_x2; x += utile_w) { + void *utile_gpu = (gpu + + get_pixel_offset(cpp, image_h, x, y)); + void *utile_cpu = cpu_row + (x - box->x) * cpp; + + if (is_load) { + v3d_load_utile(utile_cpu, cpu_stride, + utile_gpu, utile_gpu_stride); + } else { + v3d_store_utile(utile_gpu, utile_gpu_stride, + utile_cpu, cpu_stride); + } + } + } + + /* If there were no aligned utiles in the middle, load/store the whole + * thing unaligned. + */ + if (align_y2 <= align_y1 || + align_x2 <= align_x1) { + v3d_move_pixels_unaligned(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, + box, + get_pixel_offset, is_load); + return; + } + + /* Load/store the partial utiles. */ + struct pipe_box partial_boxes[4] = { + /* Top */ + { + .x = x1, + .width = x2 - x1, + .y = y1, + .height = align_y1 - y1, + }, + /* Bottom */ + { + .x = x1, + .width = x2 - x1, + .y = align_y2, + .height = y2 - align_y2, + }, + /* Left */ + { + .x = x1, + .width = align_x1 - x1, + .y = align_y1, + .height = align_y2 - align_y1, + }, + /* Right */ + { + .x = align_x2, + .width = x2 - align_x2, + .y = align_y1, + .height = align_y2 - align_y1, + }, + }; + for (int i = 0; i < ARRAY_SIZE(partial_boxes); i++) { + void *partial_cpu = (cpu + + (partial_boxes[i].y - y1) * cpu_stride + + (partial_boxes[i].x - x1) * cpp); + + v3d_move_pixels_unaligned(gpu, gpu_stride, + partial_cpu, cpu_stride, + cpp, image_h, + &partial_boxes[i], + get_pixel_offset, is_load); + } +} + +static inline void +v3d_move_pixels_general(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + int cpp, uint32_t image_h, + const struct pipe_box *box, + uint32_t (*get_pixel_offset)(uint32_t cpp, + uint32_t image_h, + uint32_t x, uint32_t y), + bool is_load) +{ + switch (cpp) { + case 1: + v3d_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 1, image_h, box, + get_pixel_offset, + is_load); + break; + case 2: + v3d_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 2, image_h, box, + get_pixel_offset, + is_load); + break; + case 4: + v3d_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 4, image_h, box, + get_pixel_offset, + is_load); + break; + case 8: + v3d_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 8, image_h, box, + get_pixel_offset, + is_load); + break; + case 16: + v3d_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 16, image_h, box, + get_pixel_offset, + is_load); + break; + } +} + +static inline void +v3d_move_tiled_image(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + enum v3d_tiling_mode tiling_format, + int cpp, + uint32_t image_h, + const struct pipe_box *box, + bool is_load) +{ + switch (tiling_format) { + case VC5_TILING_UIF_XOR: + v3d_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + v3d_get_uif_xor_pixel_offset, + is_load); + break; + case VC5_TILING_UIF_NO_XOR: + v3d_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + v3d_get_uif_no_xor_pixel_offset, + is_load); + break; + case VC5_TILING_UBLINEAR_2_COLUMN: + v3d_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + v3d_get_ublinear_2_column_pixel_offset, + is_load); + break; + case VC5_TILING_UBLINEAR_1_COLUMN: + v3d_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + v3d_get_ublinear_1_column_pixel_offset, + is_load); + break; + case VC5_TILING_LINEARTILE: + v3d_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + v3d_get_lt_pixel_offset, + is_load); + break; + default: + unreachable("Unsupported tiling format"); + break; + } +} + +/** + * Loads pixel data from the start (microtile-aligned) box in \p src to the + * start of \p dst according to the given tiling format. + */ +void +v3d_load_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum v3d_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box) +{ + v3d_move_tiled_image(src, src_stride, + dst, dst_stride, + tiling_format, + cpp, + image_h, + box, + true); +} + +/** + * Stores pixel data from the start of \p src into a (microtile-aligned) box in + * \p dst according to the given tiling format. + */ +void +v3d_store_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum v3d_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box) +{ + v3d_move_tiled_image(dst, dst_stride, + src, src_stride, + tiling_format, + cpp, + image_h, + box, + false); +} diff --git a/src/broadcom/vulkan/v3dv_formats.c b/src/broadcom/vulkan/v3dv_formats.c index 7dfbb317b6e..5f6a20a1096 100644 --- a/src/broadcom/vulkan/v3dv_formats.c +++ b/src/broadcom/vulkan/v3dv_formats.c @@ -62,7 +62,7 @@ static const struct v3dv_format format_table[] = { FORMAT(B8G8R8A8_UNORM, RGBA8, RGBA8, SWIZ_ZYXW, 16), }; -static inline const struct v3dv_format * +const struct v3dv_format * v3dv_get_format(VkFormat format) { if (format < ARRAY_SIZE(format_table) && format_table[format].supported) diff --git a/src/broadcom/vulkan/v3dv_image.c b/src/broadcom/vulkan/v3dv_image.c new file mode 100644 index 00000000000..14b8b12e38b --- /dev/null +++ b/src/broadcom/vulkan/v3dv_image.c @@ -0,0 +1,299 @@ +/* + * Copyright © 2019 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "v3dv_private.h" + +#include "drm-uapi/drm_fourcc.h" +#include "util/format/u_format.h" +#include "util/u_math.h" +#include "vk_format_info.h" + +/* These are tunable parameters in the HW design, but all the V3D + * implementations agree. + */ +#define VC5_UIFCFG_BANKS 8 +#define VC5_UIFCFG_PAGE_SIZE 4096 +#define VC5_UIFCFG_XOR_VALUE (1 << 4) +#define VC5_PAGE_CACHE_SIZE (VC5_UIFCFG_PAGE_SIZE * VC5_UIFCFG_BANKS) +#define VC5_UBLOCK_SIZE 64 +#define VC5_UIFBLOCK_SIZE (4 * VC5_UBLOCK_SIZE) +#define VC5_UIFBLOCK_ROW_SIZE (4 * VC5_UIFBLOCK_SIZE) + +#define PAGE_UB_ROWS (VC5_UIFCFG_PAGE_SIZE / VC5_UIFBLOCK_ROW_SIZE) +#define PAGE_UB_ROWS_TIMES_1_5 ((PAGE_UB_ROWS * 3) >> 1) +#define PAGE_CACHE_UB_ROWS (VC5_PAGE_CACHE_SIZE / VC5_UIFBLOCK_ROW_SIZE) +#define PAGE_CACHE_MINUS_1_5_UB_ROWS (PAGE_CACHE_UB_ROWS - PAGE_UB_ROWS_TIMES_1_5) + +/** + * Computes the HW's UIFblock padding for a given height/cpp. + * + * The goal of the padding is to keep pages of the same color (bank number) at + * least half a page away from each other vertically when crossing between + * columns of UIF blocks. + */ +static uint32_t +v3d_get_ub_pad(uint32_t cpp, uint32_t height) +{ + uint32_t utile_h = v3d_utile_height(cpp); + uint32_t uif_block_h = utile_h * 2; + uint32_t height_ub = height / uif_block_h; + + uint32_t height_offset_in_pc = height_ub % PAGE_CACHE_UB_ROWS; + + /* For the perfectly-aligned-for-UIF-XOR case, don't add any pad. */ + if (height_offset_in_pc == 0) + return 0; + + /* Try padding up to where we're offset by at least half a page. */ + if (height_offset_in_pc < PAGE_UB_ROWS_TIMES_1_5) { + /* If we fit entirely in the page cache, don't pad. */ + if (height_ub < PAGE_CACHE_UB_ROWS) + return 0; + else + return PAGE_UB_ROWS_TIMES_1_5 - height_offset_in_pc; + } + + /* If we're close to being aligned to page cache size, then round up + * and rely on XOR. + */ + if (height_offset_in_pc > PAGE_CACHE_MINUS_1_5_UB_ROWS) + return PAGE_CACHE_UB_ROWS - height_offset_in_pc; + + /* Otherwise, we're far enough away (top and bottom) to not need any + * padding. + */ + return 0; +} + +static void +v3d_setup_slices(struct v3dv_image *image) +{ + assert(image->cpp > 0); + + uint32_t width = image->extent.width; + uint32_t height = image->extent.height; + uint32_t depth = image->extent.depth; + + /* Note that power-of-two padding is based on level 1. These are not + * equivalent to just util_next_power_of_two(dimension), because at a + * level 0 dimension of 9, the level 1 power-of-two padded value is 4, + * not 8. + */ + uint32_t pot_width = 2 * util_next_power_of_two(u_minify(width, 1)); + uint32_t pot_height = 2 * util_next_power_of_two(u_minify(height, 1)); + uint32_t pot_depth = 2 * util_next_power_of_two(u_minify(depth, 1)); + + uint32_t utile_w = v3d_utile_width(image->cpp); + uint32_t utile_h = v3d_utile_height(image->cpp); + uint32_t uif_block_w = utile_w * 2; + uint32_t uif_block_h = utile_h * 2; + + uint32_t block_width = vk_format_get_blockwidth(image->vk_format); + uint32_t block_height = vk_format_get_blockheight(image->vk_format); + + bool msaa = image->samples > VK_SAMPLE_COUNT_1_BIT; + + bool uif_top = msaa; + + assert(image->array_size > 0); + assert(depth > 0); + assert(image->levels >= 1); + + uint32_t offset = 0; + for (int32_t i = image->levels - 1; i >= 0; i--) { + struct v3d_resource_slice *slice = &image->slices[i]; + + uint32_t level_width, level_height, level_depth; + if (i < 2) { + level_width = u_minify(width, i); + level_height = u_minify(height, i); + } else { + level_width = u_minify(pot_width, i); + level_height = u_minify(pot_height, i); + } + + if (i < 1) + level_depth = u_minify(depth, i); + else + level_depth = u_minify(pot_depth, i); + + if (msaa) { + level_width *= 2; + level_height *= 2; + } + + level_width = DIV_ROUND_UP(level_width, block_width); + level_height = DIV_ROUND_UP(level_height, block_height); + + if (!image->tiled) { + slice->tiling = VC5_TILING_RASTER; + if (image->type == VK_IMAGE_TYPE_1D) + level_width = align(level_width, 64 / image->cpp); + } else { + if ((i != 0 || !uif_top) && + (level_width <= utile_w || level_height <= utile_h)) { + slice->tiling = VC5_TILING_LINEARTILE; + level_width = align(level_width, utile_w); + level_height = align(level_height, utile_h); + } else if ((i != 0 || !uif_top) && level_width <= uif_block_w) { + slice->tiling = VC5_TILING_UBLINEAR_1_COLUMN; + level_width = align(level_width, uif_block_w); + level_height = align(level_height, uif_block_h); + } else if ((i != 0 || !uif_top) && level_width <= 2 * uif_block_w) { + slice->tiling = VC5_TILING_UBLINEAR_2_COLUMN; + level_width = align(level_width, 2 * uif_block_w); + level_height = align(level_height, uif_block_h); + } else { + /* We align the width to a 4-block column of UIF blocks, but we + * only align height to UIF blocks. + */ + level_width = align(level_width, 4 * uif_block_w); + level_height = align(level_height, uif_block_h); + + slice->ub_pad = v3d_get_ub_pad(image->cpp, level_height); + level_height += slice->ub_pad * uif_block_h; + + /* If the padding set us to to be aligned to the page cache size, + * then the HW will use the XOR bit on odd columns to get us + * perfectly misaligned. + */ + if ((level_height / uif_block_h) % + (VC5_PAGE_CACHE_SIZE / VC5_UIFBLOCK_ROW_SIZE) == 0) { + slice->tiling = VC5_TILING_UIF_XOR; + } else { + slice->tiling = VC5_TILING_UIF_NO_XOR; + } + } + } + + slice->offset = offset; + slice->stride = level_width * image->cpp; + slice->padded_height = level_height; + if (slice->tiling == VC5_TILING_UIF_NO_XOR || + slice->tiling == VC5_TILING_UIF_XOR) { + slice->padded_height_of_output_image_in_uif_blocks = + slice->padded_height / (2 * v3d_utile_height(image->cpp)); + } + + slice->size = level_height * slice->stride; + uint32_t slice_total_size = slice->size * level_depth; + + /* The HW aligns level 1's base to a page if any of level 1 or + * below could be UIF XOR. The lower levels then inherit the + * alignment for as long as necesary, thanks to being power of + * two aligned. + */ + if (i == 1 && + level_width > 4 * uif_block_w && + level_height > PAGE_CACHE_MINUS_1_5_UB_ROWS * uif_block_h) { + slice_total_size = align(slice_total_size, VC5_UIFCFG_PAGE_SIZE); + } + + offset += slice_total_size; + } + + image->size = offset; + + /* UIF/UBLINEAR levels need to be aligned to UIF-blocks, and LT only + * needs to be aligned to utile boundaries. Since tiles are laid out + * from small to big in memory, we need to align the later UIF slices + * to UIF blocks, if they were preceded by non-UIF-block-aligned LT + * slices. + * + * We additionally align to 4k, which improves UIF XOR performance. + */ + image->alignment = 4096; + uint32_t page_align_offset = + align(image->slices[0].offset, image->alignment) - image->slices[0].offset; + if (page_align_offset) { + image->size += page_align_offset; + for (int i = 0; i < image->levels; i++) + image->slices[i].offset += page_align_offset; + } + + /* Arrays and cube textures have a stride which is the distance from + * one full mipmap tree to the next (64b aligned). For 3D textures, + * we need to program the stride between slices of miplevel 0. + */ + if (image->type != VK_IMAGE_TYPE_3D) { + image->cube_map_stride = + align(image->slices[0].offset + image->slices[0].size, 64); + image->size += image->cube_map_stride * (image->array_size - 1); + } else { + image->cube_map_stride = image->slices[0].size; + } +} + +VkResult +v3dv_CreateImage(VkDevice _device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImage *pImage) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + struct v3dv_image *image = NULL; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); + + v3dv_assert(pCreateInfo->mipLevels > 0); + v3dv_assert(pCreateInfo->arrayLayers > 0); + v3dv_assert(pCreateInfo->samples > 0); + v3dv_assert(pCreateInfo->extent.width > 0); + v3dv_assert(pCreateInfo->extent.height > 0); + v3dv_assert(pCreateInfo->extent.depth > 0); + + const struct v3dv_format *format = v3dv_get_format(pCreateInfo->format); + v3dv_assert(format != NULL && format->supported); + + image = vk_zalloc2(&device->alloc, pAllocator, sizeof(*image), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!image) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + image->type = pCreateInfo->imageType; + image->extent = pCreateInfo->extent; + image->vk_format = pCreateInfo->format; + image->format = format; + image->aspects = vk_format_aspects(image->vk_format); + image->levels = pCreateInfo->mipLevels; + image->array_size = pCreateInfo->arrayLayers; + image->samples = pCreateInfo->samples; + image->usage = pCreateInfo->usage; + image->create_flags = pCreateInfo->flags; + image->tiling = pCreateInfo->tiling; + + image->drm_format_mod = DRM_FORMAT_MOD_INVALID; + image->tiled = true; + + /* 1D and 1D_ARRAY textures are always raster-order */ + if (image->type == VK_IMAGE_TYPE_1D) + image->tiled = false; + + image->cpp = vk_format_get_blocksize(image->vk_format); + + v3d_setup_slices(image); + + *pImage = v3dv_image_to_handle(image); + + return VK_SUCCESS; +} diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h index 2e7dc99a6fa..5a6d0480d43 100644 --- a/src/broadcom/vulkan/v3dv_private.h +++ b/src/broadcom/vulkan/v3dv_private.h @@ -46,6 +46,7 @@ #endif #include "common/v3d_device_info.h" +#include "common/v3d_limits.h" #include "vk_debug_report.h" #include "util/xmlconfig.h" @@ -56,6 +57,22 @@ #include "vk_alloc.h" #include "simulator/v3d_simulator.h" +/* FIXME: pipe_box from Gallium. Needed for some v3d_tiling.c functions. + * In the future we might want to drop that depedency, but for now it is + * good enough. + */ +#include "util/u_box.h" + +/* A non-fatal assert. Useful for debugging. */ +#ifdef DEBUG +#define v3dv_assert(x) ({ \ + if (unlikely(!(x))) \ + fprintf(stderr, "%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \ +}) +#else +#define v3dv_assert(x) +#endif + /* FIXME: hooks for the packet definition functions. */ static inline void pack_emit_reloc(void *cl, const void *reloc) {} @@ -193,6 +210,74 @@ struct v3dv_format { uint8_t return_size; }; +/** + * Tiling mode enum used for v3d_resource.c, which maps directly to the Memory + * Format field of render target and Z/Stencil config. + */ +enum v3d_tiling_mode { + /* Untiled resources. Not valid as texture inputs. */ + VC5_TILING_RASTER, + + /* Single line of u-tiles. */ + VC5_TILING_LINEARTILE, + + /* Departure from standard 4-UIF block column format. */ + VC5_TILING_UBLINEAR_1_COLUMN, + + /* Departure from standard 4-UIF block column format. */ + VC5_TILING_UBLINEAR_2_COLUMN, + + /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is + * split 2x2 into utiles. + */ + VC5_TILING_UIF_NO_XOR, + + /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is + * split 2x2 into utiles. + */ + VC5_TILING_UIF_XOR, +}; + +struct v3d_resource_slice { + uint32_t offset; + uint32_t stride; + uint32_t padded_height; + /* Size of a single pane of the slice. For 3D textures, there will be + * a number of panes equal to the minified, power-of-two-aligned + * depth. + */ + uint32_t size; + uint8_t ub_pad; + enum v3d_tiling_mode tiling; + uint32_t padded_height_of_output_image_in_uif_blocks; +}; + +struct v3dv_image { + VkImageType type; + VkImageAspectFlags aspects; + + VkExtent3D extent; + uint32_t levels; + uint32_t array_size; + uint32_t samples; + VkImageUsageFlags usage; + VkImageCreateFlags create_flags; + VkImageTiling tiling; + + VkFormat vk_format; + const struct v3dv_format *format; + + uint32_t cpp; + + uint64_t drm_format_mod; + bool tiled; + + struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS]; + uint32_t size; /* Total size in bytes */ + uint32_t cube_map_stride; + uint32_t alignment; +}; + uint32_t v3dv_physical_device_api_version(struct v3dv_physical_device *dev); int v3dv_get_instance_entrypoint_index(const char *name); @@ -229,6 +314,23 @@ VkResult __vk_errorf(struct v3dv_instance *instance, VkResult error, void v3dv_loge(const char *format, ...) v3dv_printflike(1, 2); void v3dv_loge_v(const char *format, va_list va); +const struct v3dv_format *v3dv_get_format(VkFormat); + +uint32_t v3d_utile_width(int cpp); +uint32_t v3d_utile_height(int cpp); + +void v3d_load_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum v3d_tiling_mode tiling_format, + int cpp, uint32_t image_h, + const struct pipe_box *box); + +void v3d_store_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum v3d_tiling_mode tiling_format, + int cpp, uint32_t image_h, + const struct pipe_box *box); + #define V3DV_DEFINE_HANDLE_CASTS(__v3dv_type, __VkType) \ \ static inline struct __v3dv_type * \ @@ -267,6 +369,7 @@ V3DV_DEFINE_HANDLE_CASTS(v3dv_physical_device, VkPhysicalDevice) V3DV_DEFINE_HANDLE_CASTS(v3dv_queue, VkQueue) V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, VkDeviceMemory) +V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, VkImage) static inline int v3dv_ioctl(int fd, unsigned long request, void *arg) diff --git a/src/broadcom/vulkan/vk_format_info.h b/src/broadcom/vulkan/vk_format_info.h index a1cc6952c8f..ac4e1744b4a 100644 --- a/src/broadcom/vulkan/vk_format_info.h +++ b/src/broadcom/vulkan/vk_format_info.h @@ -27,6 +27,9 @@ #include #include +#include "util/format/u_format.h" +#include "vulkan/util/vk_format.h" + static inline VkImageAspectFlags vk_format_aspects(VkFormat format) { @@ -99,4 +102,22 @@ vk_format_has_depth(VkFormat format) return aspects & VK_IMAGE_ASPECT_DEPTH_BIT; } +static inline unsigned +vk_format_get_blocksize(VkFormat format) +{ + return util_format_get_blocksize(vk_format_to_pipe_format(format)); +} + +static inline unsigned +vk_format_get_blockwidth(VkFormat format) +{ + return util_format_get_blockwidth(vk_format_to_pipe_format(format)); +} + +static inline unsigned +vk_format_get_blockheight(VkFormat format) +{ + return util_format_get_blockheight(vk_format_to_pipe_format(format)); +} + #endif /* VK_FORMAT_INFO_H */