diff --git a/src/gallium/drivers/v3d/meson.build b/src/gallium/drivers/v3d/meson.build index 526a131ae9b..b2e748573b7 100644 --- a/src/gallium/drivers/v3d/meson.build +++ b/src/gallium/drivers/v3d/meson.build @@ -49,6 +49,7 @@ files_per_version = files( 'v3dx_job.c', 'v3dx_rcl.c', 'v3dx_state.c', + 'v3dx_tfu.c', ) v3d_args = ['-DV3D_BUILD_NEON'] diff --git a/src/gallium/drivers/v3d/v3d_blit.c b/src/gallium/drivers/v3d/v3d_blit.c index 8b47ca7729d..e313afdae5e 100644 --- a/src/gallium/drivers/v3d/v3d_blit.c +++ b/src/gallium/drivers/v3d/v3d_blit.c @@ -210,140 +210,6 @@ v3d_stencil_blit(struct pipe_context *ctx, struct pipe_blit_info *info) info->mask &= ~PIPE_MASK_S; } -static bool -v3d_tfu(struct pipe_context *pctx, - struct pipe_resource *pdst, - struct pipe_resource *psrc, - unsigned int src_level, - unsigned int base_level, - unsigned int last_level, - unsigned int src_layer, - unsigned int dst_layer, - bool for_mipmap) -{ - struct v3d_context *v3d = v3d_context(pctx); - struct v3d_screen *screen = v3d->screen; - struct v3d_resource *src = v3d_resource(psrc); - struct v3d_resource *dst = v3d_resource(pdst); - struct v3d_resource_slice *src_base_slice = &src->slices[src_level]; - struct v3d_resource_slice *dst_base_slice = &dst->slices[base_level]; - int msaa_scale = pdst->nr_samples > 1 ? 2 : 1; - int width = u_minify(pdst->width0, base_level) * msaa_scale; - int height = u_minify(pdst->height0, base_level) * msaa_scale; - enum pipe_format pformat; - - if (psrc->format != pdst->format) - return false; - if (psrc->nr_samples != pdst->nr_samples) - return false; - - /* Can't write to raster. */ - if (dst_base_slice->tiling == V3D_TILING_RASTER) - return false; - - /* When using TFU for blit, we are doing exact copies (both input and - * output format must be the same, no scaling, etc), so there is no - * pixel format conversions. Thus we can rewrite the format to use one - * that is TFU compatible based on its texel size. - */ - if (for_mipmap) { - pformat = pdst->format; - } else { - switch (dst->cpp) { - case 16: pformat = PIPE_FORMAT_R32G32B32A32_FLOAT; break; - case 8: pformat = PIPE_FORMAT_R16G16B16A16_FLOAT; break; - case 4: pformat = PIPE_FORMAT_R32_FLOAT; break; - case 2: pformat = PIPE_FORMAT_R16_FLOAT; break; - case 1: pformat = PIPE_FORMAT_R8_UNORM; break; - default: unreachable("unsupported format bit-size"); break; - }; - } - - uint32_t tex_format = v3d_get_tex_format(&screen->devinfo, pformat); - struct v3d_device_info *devinfo = &screen->devinfo; - - if (!v3d_X(devinfo, tfu_supports_tex_format)(tex_format, for_mipmap)) { - assert(for_mipmap); - return false; - } - - v3d_flush_jobs_writing_resource(v3d, psrc, V3D_FLUSH_DEFAULT, false); - v3d_flush_jobs_reading_resource(v3d, pdst, V3D_FLUSH_DEFAULT, false); - - struct drm_v3d_submit_tfu tfu = { - .ios = (height << 16) | width, - .bo_handles = { - dst->bo->handle, - src != dst ? src->bo->handle : 0 - }, - .in_sync = v3d->out_sync, - .out_sync = v3d->out_sync, - }; - uint32_t src_offset = (src->bo->offset + - v3d_layer_offset(psrc, src_level, src_layer)); - tfu.iia |= src_offset; - if (src_base_slice->tiling == V3D_TILING_RASTER) { - tfu.icfg |= (V3D33_TFU_ICFG_FORMAT_RASTER << - V3D33_TFU_ICFG_FORMAT_SHIFT); - } else { - tfu.icfg |= ((V3D33_TFU_ICFG_FORMAT_LINEARTILE + - (src_base_slice->tiling - V3D_TILING_LINEARTILE)) << - V3D33_TFU_ICFG_FORMAT_SHIFT); - } - - uint32_t dst_offset = (dst->bo->offset + - v3d_layer_offset(pdst, base_level, dst_layer)); - tfu.ioa |= dst_offset; - if (last_level != base_level) - tfu.ioa |= V3D33_TFU_IOA_DIMTW; - tfu.ioa |= ((V3D33_TFU_IOA_FORMAT_LINEARTILE + - (dst_base_slice->tiling - V3D_TILING_LINEARTILE)) << - V3D33_TFU_IOA_FORMAT_SHIFT); - - tfu.icfg |= tex_format << V3D33_TFU_ICFG_TTYPE_SHIFT; - tfu.icfg |= (last_level - base_level) << V3D33_TFU_ICFG_NUMMM_SHIFT; - - switch (src_base_slice->tiling) { - case V3D_TILING_UIF_NO_XOR: - case V3D_TILING_UIF_XOR: - tfu.iis |= (src_base_slice->padded_height / - (2 * v3d_utile_height(src->cpp))); - break; - case V3D_TILING_RASTER: - tfu.iis |= src_base_slice->stride / src->cpp; - break; - case V3D_TILING_LINEARTILE: - case V3D_TILING_UBLINEAR_1_COLUMN: - case V3D_TILING_UBLINEAR_2_COLUMN: - break; - } - - /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the - * OPAD field for the destination (how many extra UIF blocks beyond - * those necessary to cover the height). When filling mipmaps, the - * miplevel 1+ tiling state is inferred. - */ - if (dst_base_slice->tiling == V3D_TILING_UIF_NO_XOR || - dst_base_slice->tiling == V3D_TILING_UIF_XOR) { - int uif_block_h = 2 * v3d_utile_height(dst->cpp); - int implicit_padded_height = align(height, uif_block_h); - - tfu.icfg |= (((dst_base_slice->padded_height - - implicit_padded_height) / uif_block_h) << - V3D33_TFU_ICFG_OPAD_SHIFT); - } - - int ret = v3d_ioctl(screen->fd, DRM_IOCTL_V3D_SUBMIT_TFU, &tfu); - if (ret != 0) { - fprintf(stderr, "Failed to submit TFU job: %d\n", ret); - return false; - } - - dst->writes++; - - return true; -} - bool v3d_generate_mipmap(struct pipe_context *pctx, struct pipe_resource *prsc, @@ -362,12 +228,16 @@ v3d_generate_mipmap(struct pipe_context *pctx, if (first_layer != last_layer) return false; - return v3d_tfu(pctx, - prsc, prsc, - base_level, - base_level, last_level, - first_layer, first_layer, - true); + struct v3d_context *v3d = v3d_context(pctx); + struct v3d_screen *screen = v3d->screen; + struct v3d_device_info *devinfo = &screen->devinfo; + + return v3d_X(devinfo, tfu)(pctx, + prsc, prsc, + base_level, + base_level, last_level, + first_layer, first_layer, + true); } static void @@ -396,11 +266,15 @@ v3d_tfu_blit(struct pipe_context *pctx, struct pipe_blit_info *info) if (info->dst.format != info->src.format) return; - if (v3d_tfu(pctx, info->dst.resource, info->src.resource, - info->src.level, - info->dst.level, info->dst.level, - info->src.box.z, info->dst.box.z, - false)) { + struct v3d_context *v3d = v3d_context(pctx); + struct v3d_screen *screen = v3d->screen; + struct v3d_device_info *devinfo = &screen->devinfo; + + if (v3d_X(devinfo, tfu)(pctx, info->dst.resource, info->src.resource, + info->src.level, + info->dst.level, info->dst.level, + info->src.box.z, info->dst.box.z, + false)) { info->mask &= ~PIPE_MASK_RGBA; } } diff --git a/src/gallium/drivers/v3d/v3dx_context.h b/src/gallium/drivers/v3d/v3dx_context.h index 03d7c244ea2..e0a5cbfb2f3 100644 --- a/src/gallium/drivers/v3d/v3dx_context.h +++ b/src/gallium/drivers/v3d/v3dx_context.h @@ -51,3 +51,13 @@ void v3dX(get_internal_type_bpp_for_output_format)(uint32_t format, */ bool v3dX(tfu_supports_tex_format)(uint32_t tex_format, bool for_mipmap); + +bool v3dX(tfu)(struct pipe_context *pctx, + struct pipe_resource *pdst, + struct pipe_resource *psrc, + unsigned int src_level, + unsigned int base_level, + unsigned int last_level, + unsigned int src_layer, + unsigned int dst_layer, + bool for_mipmap); diff --git a/src/gallium/drivers/v3d/v3dx_tfu.c b/src/gallium/drivers/v3d/v3dx_tfu.c new file mode 100644 index 00000000000..f4dba0cfa48 --- /dev/null +++ b/src/gallium/drivers/v3d/v3dx_tfu.c @@ -0,0 +1,202 @@ +/* + * Copyright © 2022 Raspberry Pi Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "v3d_context.h" +#include "broadcom/common/v3d_tfu.h" + +bool +v3dX(tfu)(struct pipe_context *pctx, + struct pipe_resource *pdst, + struct pipe_resource *psrc, + unsigned int src_level, + unsigned int base_level, + unsigned int last_level, + unsigned int src_layer, + unsigned int dst_layer, + bool for_mipmap) +{ + struct v3d_context *v3d = v3d_context(pctx); + struct v3d_screen *screen = v3d->screen; + struct v3d_resource *src = v3d_resource(psrc); + struct v3d_resource *dst = v3d_resource(pdst); + struct v3d_resource_slice *src_base_slice = &src->slices[src_level]; + struct v3d_resource_slice *dst_base_slice = &dst->slices[base_level]; + int msaa_scale = pdst->nr_samples > 1 ? 2 : 1; + int width = u_minify(pdst->width0, base_level) * msaa_scale; + int height = u_minify(pdst->height0, base_level) * msaa_scale; + enum pipe_format pformat; + + if (psrc->format != pdst->format) + return false; + if (psrc->nr_samples != pdst->nr_samples) + return false; + + if (pdst->target != PIPE_TEXTURE_2D || psrc->target != PIPE_TEXTURE_2D) + return false; + + /* Can't write to raster. */ + if (dst_base_slice->tiling == V3D_TILING_RASTER) + return false; + + /* When using TFU for blit, we are doing exact copies (both input and + * output format must be the same, no scaling, etc), so there is no + * pixel format conversions. Thus we can rewrite the format to use one + * that is TFU compatible based on its texel size. + */ + if (for_mipmap) { + pformat = pdst->format; + } else { + switch (dst->cpp) { + case 16: pformat = PIPE_FORMAT_R32G32B32A32_FLOAT; break; + case 8: pformat = PIPE_FORMAT_R16G16B16A16_FLOAT; break; + case 4: pformat = PIPE_FORMAT_R32_FLOAT; break; + case 2: pformat = PIPE_FORMAT_R16_FLOAT; break; + case 1: pformat = PIPE_FORMAT_R8_UNORM; break; + default: unreachable("unsupported format bit-size"); break; + }; + } + + uint32_t tex_format = v3d_get_tex_format(&screen->devinfo, pformat); + + if (!v3dX(tfu_supports_tex_format)(tex_format, for_mipmap)) { + assert(for_mipmap); + return false; + } + + v3d_flush_jobs_writing_resource(v3d, psrc, V3D_FLUSH_DEFAULT, false); + v3d_flush_jobs_reading_resource(v3d, pdst, V3D_FLUSH_DEFAULT, false); + + struct drm_v3d_submit_tfu tfu = { + .ios = (height << 16) | width, + .bo_handles = { + dst->bo->handle, + src != dst ? src->bo->handle : 0 + }, + .in_sync = v3d->out_sync, + .out_sync = v3d->out_sync, + }; + uint32_t src_offset = (src->bo->offset + + v3d_layer_offset(psrc, src_level, src_layer)); + tfu.iia |= src_offset; + + uint32_t dst_offset = (dst->bo->offset + + v3d_layer_offset(pdst, base_level, dst_layer)); + tfu.ioa |= dst_offset; + + switch (src_base_slice->tiling) { + case V3D_TILING_UIF_NO_XOR: + case V3D_TILING_UIF_XOR: + tfu.iis |= (src_base_slice->padded_height / + (2 * v3d_utile_height(src->cpp))); + break; + case V3D_TILING_RASTER: + tfu.iis |= src_base_slice->stride / src->cpp; + break; + case V3D_TILING_LINEARTILE: + case V3D_TILING_UBLINEAR_1_COLUMN: + case V3D_TILING_UBLINEAR_2_COLUMN: + break; + } + +#if V3D_VERSION <= 42 + if (src_base_slice->tiling == V3D_TILING_RASTER) { + tfu.icfg |= (V3D33_TFU_ICFG_FORMAT_RASTER << + V3D33_TFU_ICFG_FORMAT_SHIFT); + } else { + tfu.icfg |= ((V3D33_TFU_ICFG_FORMAT_LINEARTILE + + (src_base_slice->tiling - V3D_TILING_LINEARTILE)) << + V3D33_TFU_ICFG_FORMAT_SHIFT); + } + tfu.icfg |= tex_format << V3D33_TFU_ICFG_TTYPE_SHIFT; + + if (last_level != base_level) + tfu.ioa |= V3D33_TFU_IOA_DIMTW; + + tfu.ioa |= ((V3D33_TFU_IOA_FORMAT_LINEARTILE + + (dst_base_slice->tiling - V3D_TILING_LINEARTILE)) << + V3D33_TFU_IOA_FORMAT_SHIFT); + + tfu.icfg |= (last_level - base_level) << V3D33_TFU_ICFG_NUMMM_SHIFT; + + /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the + * OPAD field for the destination (how many extra UIF blocks beyond + * those necessary to cover the height). When filling mipmaps, the + * miplevel 1+ tiling state is inferred. + */ + if (dst_base_slice->tiling == V3D_TILING_UIF_NO_XOR || + dst_base_slice->tiling == V3D_TILING_UIF_XOR) { + int uif_block_h = 2 * v3d_utile_height(dst->cpp); + int implicit_padded_height = align(height, uif_block_h); + + tfu.icfg |= (((dst_base_slice->padded_height - + implicit_padded_height) / uif_block_h) << + V3D33_TFU_ICFG_OPAD_SHIFT); + } +#endif /* V3D_VERSION <= 42 */ + +#if V3D_VERSION >= 71 + if (src_base_slice->tiling == V3D_TILING_RASTER) { + tfu.icfg = V3D71_TFU_ICFG_FORMAT_RASTER << V3D71_TFU_ICFG_IFORMAT_SHIFT; + } else { + tfu.icfg = (V3D71_TFU_ICFG_FORMAT_LINEARTILE + + (src_base_slice->tiling - V3D_TILING_LINEARTILE)) << + V3D71_TFU_ICFG_IFORMAT_SHIFT; + } + tfu.icfg |= tex_format << V3D71_TFU_ICFG_OTYPE_SHIFT; + + if (last_level != base_level) + tfu.v71.ioc |= V3D71_TFU_IOC_DIMTW; + + tfu.v71.ioc |= ((V3D71_TFU_IOC_FORMAT_LINEARTILE + + (dst_base_slice->tiling - V3D_TILING_LINEARTILE)) << + V3D71_TFU_IOC_FORMAT_SHIFT); + + switch (dst_base_slice->tiling) { + case V3D_TILING_UIF_NO_XOR: + case V3D_TILING_UIF_XOR: + tfu.v71.ioc |= + (dst_base_slice->padded_height / (2 * v3d_utile_height(dst->cpp))) << + V3D71_TFU_IOC_STRIDE_SHIFT; + break; + case V3D_TILING_RASTER: + tfu.v71.ioc |= (dst_base_slice->padded_height / dst->cpp) << + V3D71_TFU_IOC_STRIDE_SHIFT; + break; + default: + break; + } + + tfu.v71.ioc |= (last_level - base_level) << V3D71_TFU_IOC_NUMMM_SHIFT; +#endif /* V3D_VERSION >= 71*/ + + int ret = v3d_ioctl(screen->fd, DRM_IOCTL_V3D_SUBMIT_TFU, &tfu); + if (ret != 0) { + fprintf(stderr, "Failed to submit TFU job: %d\n", ret); + return false; + } + + dst->writes++; + + return true; +} +