diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c index 661f10a4866..dd2551eb5fa 100644 --- a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c +++ b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c @@ -235,7 +235,7 @@ single_desc_copy(nir_builder *b, nir_def *desc_copy_idx) nir_pop_if(b, NULL); } -static mali_ptr +static struct panvk_priv_mem panvk_meta_desc_copy_shader(struct panvk_device *dev, struct pan_shader_info *shader_info) { @@ -270,8 +270,8 @@ panvk_meta_desc_copy_shader(struct panvk_device *dev, shader_info->push.count = DIV_ROUND_UP(sizeof(struct pan_nir_desc_copy_info), 4); - mali_ptr shader = pan_pool_upload_aligned(&dev->meta.bin_pool.base, - binary.data, binary.size, 128); + struct panvk_priv_mem shader = panvk_pool_upload_aligned( + &dev->mempools.exec, binary.data, binary.size, 128); util_dynarray_fini(&binary); return shader; @@ -282,15 +282,24 @@ panvk_per_arch(meta_desc_copy_init)(struct panvk_device *dev) { struct pan_shader_info shader_info; - mali_ptr shader = panvk_meta_desc_copy_shader(dev, &shader_info); - struct panfrost_ptr rsd = - pan_pool_alloc_desc(&dev->meta.desc_pool.base, RENDERER_STATE); + dev->desc_copy.shader = panvk_meta_desc_copy_shader(dev, &shader_info); - pan_pack(rsd.cpu, RENDERER_STATE, cfg) { + mali_ptr shader = panvk_priv_mem_dev_addr(dev->desc_copy.shader); + struct panvk_priv_mem rsd = + panvk_pool_alloc_desc(&dev->mempools.rw, RENDERER_STATE); + + pan_pack(panvk_priv_mem_host_addr(rsd), RENDERER_STATE, cfg) { pan_shader_prepare_rsd(&shader_info, shader, &cfg); } - dev->meta.desc_copy.rsd = rsd.gpu; + dev->desc_copy.rsd = rsd; +} + +void +panvk_per_arch(meta_desc_copy_cleanup)(struct panvk_device *dev) +{ + panvk_pool_free_mem(&dev->mempools.rw, dev->desc_copy.rsd); + panvk_pool_free_mem(&dev->mempools.exec, dev->desc_copy.shader); } struct panfrost_ptr @@ -368,7 +377,7 @@ panvk_per_arch(meta_get_copy_desc_job)( GENX(pan_emit_tls)(&tlsinfo, tls.cpu); pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) { - cfg.state = dev->meta.desc_copy.rsd; + cfg.state = panvk_priv_mem_dev_addr(dev->desc_copy.rsd); cfg.push_uniforms = push_uniforms; cfg.thread_storage = tls.gpu; } diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c index e6d1d86fac5..56b39b34417 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c @@ -154,7 +154,7 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf) cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds.gpu = 0; ASSERTED unsigned num_preload_jobs = GENX(pan_preload_fb)( - &dev->meta.blitter.cache, &cmdbuf->desc_pool.base, + &dev->blitter.cache, &cmdbuf->desc_pool.base, &cmdbuf->state.gfx.render.fb.info, i, batch->tls.gpu, NULL); /* Bifrost GPUs use pre frame DCDs to preload the FB content. We diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c index 3b51cc612e7..a87879150d5 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c @@ -15,6 +15,7 @@ #include "panvk_cmd_desc_state.h" #include "panvk_device.h" #include "panvk_entrypoints.h" +#include "panvk_meta.h" #include "panvk_physical_device.h" #include "pan_desc.h" diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c index cfd6ca6350d..38030c6378a 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c @@ -32,6 +32,7 @@ #include "pan_shader.h" #include "vk_format.h" +#include "vk_meta.h" #include "vk_pipeline_layout.h" struct panvk_draw_info { @@ -857,6 +858,12 @@ panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf, static enum mali_draw_mode translate_prim_topology(VkPrimitiveTopology in) { + /* Test VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA separately, as it's not + * part of the VkPrimitiveTopology enum. + */ + if (in == VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA) + return MALI_DRAW_MODE_TRIANGLES; + switch (in) { case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: return MALI_DRAW_MODE_POINTS; diff --git a/src/panfrost/vulkan/jm/panvk_vX_meta.c b/src/panfrost/vulkan/jm/panvk_vX_meta.c deleted file mode 100644 index 45afeacc2ba..00000000000 --- a/src/panfrost/vulkan/jm/panvk_vX_meta.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright © 2021 Collabora Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "genxml/gen_macros.h" - -#include "nir/nir_builder.h" -#include "pan_encoder.h" -#include "pan_shader.h" - -#include "panvk_device.h" - -#include "vk_format.h" - -mali_ptr -panvk_per_arch(meta_emit_viewport)(struct pan_pool *pool, uint16_t minx, - uint16_t miny, uint16_t maxx, uint16_t maxy) -{ - struct panfrost_ptr vp = pan_pool_alloc_desc(pool, VIEWPORT); - - pan_pack(vp.cpu, VIEWPORT, cfg) { - cfg.scissor_minimum_x = minx; - cfg.scissor_minimum_y = miny; - cfg.scissor_maximum_x = maxx; - cfg.scissor_maximum_y = maxy; - } - - return vp.gpu; -} - -void -panvk_per_arch(meta_init)(struct panvk_device *dev) -{ - struct panvk_pool_properties bin_pool_props = { - .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE, - .slab_size = 16 * 1024, - .label = "panvk_meta binary pool", - .prealloc = false, - .owns_bos = true, - .needs_locking = false, - }; - struct panvk_pool_properties desc_pool_props = { - .create_flags = 0, - .slab_size = 16 * 1024, - .label = "panvk_meta descriptor pool", - .prealloc = false, - .owns_bos = true, - .needs_locking = false, - }; - - panvk_pool_init(&dev->meta.bin_pool, dev, NULL, &bin_pool_props); - panvk_pool_init(&dev->meta.desc_pool, dev, NULL, &desc_pool_props); - panvk_per_arch(meta_blit_init)(dev); - panvk_per_arch(meta_copy_init)(dev); - panvk_per_arch(meta_clear_init)(dev); - panvk_per_arch(meta_desc_copy_init)(dev); -} - -void -panvk_per_arch(meta_cleanup)(struct panvk_device *dev) -{ - panvk_per_arch(meta_blit_cleanup)(dev); - panvk_pool_cleanup(&dev->meta.desc_pool); - panvk_pool_cleanup(&dev->meta.bin_pool); -} diff --git a/src/panfrost/vulkan/jm/panvk_vX_meta_blit.c b/src/panfrost/vulkan/jm/panvk_vX_meta_blit.c deleted file mode 100644 index 66ea66635a5..00000000000 --- a/src/panfrost/vulkan/jm/panvk_vX_meta_blit.c +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Copyright © 2021 Collabora Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "gen_macros.h" - -#include "pan_blitter.h" -#include "pan_props.h" - -#include "panvk_cmd_buffer.h" -#include "panvk_device.h" -#include "panvk_entrypoints.h" -#include "panvk_image.h" -#include "panvk_physical_device.h" - -static void -panvk_meta_blit(struct panvk_cmd_buffer *cmdbuf, - const struct pan_blit_info *blitinfo, - const struct panvk_image *src_img, - const struct panvk_image *dst_img) -{ - struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); - struct panvk_physical_device *phys_dev = - to_panvk_physical_device(dev->vk.physical); - struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info; - struct pan_blit_context ctx; - struct pan_image_view views[2] = { - { - .format = blitinfo->dst.planes[0].format, - .dim = MALI_TEXTURE_DIMENSION_2D, - .planes = - { - blitinfo->dst.planes[0].image, - blitinfo->dst.planes[1].image, - blitinfo->dst.planes[2].image, - }, - .nr_samples = blitinfo->dst.planes[0].image->layout.nr_samples, - .first_level = blitinfo->dst.level, - .last_level = blitinfo->dst.level, - .swizzle = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, - PIPE_SWIZZLE_W}, - }, - }; - - *fbinfo = (struct pan_fb_info){ - .tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model), - .width = u_minify(blitinfo->dst.planes[0].image->layout.width, - blitinfo->dst.level), - .height = u_minify(blitinfo->dst.planes[0].image->layout.height, - blitinfo->dst.level), - .extent = - { - .minx = MAX2(MIN2(blitinfo->dst.start.x, blitinfo->dst.end.x), 0), - .miny = MAX2(MIN2(blitinfo->dst.start.y, blitinfo->dst.end.y), 0), - .maxx = MAX2(blitinfo->dst.start.x, blitinfo->dst.end.x), - .maxy = MAX2(blitinfo->dst.start.y, blitinfo->dst.end.y), - }, - .nr_samples = blitinfo->dst.planes[0].image->layout.nr_samples, - }; - - fbinfo->extent.maxx = MIN2(fbinfo->extent.maxx, fbinfo->width - 1); - fbinfo->extent.maxy = MIN2(fbinfo->extent.maxy, fbinfo->height - 1); - - /* TODO: don't force preloads of dst resources if unneeded */ - - const struct util_format_description *fdesc = - util_format_description(blitinfo->dst.planes[0].image->layout.format); - - if (util_format_has_depth(fdesc)) { - /* We want the image format here, otherwise we might lose one of the - * component. - */ - views[0].format = blitinfo->dst.planes[0].image->layout.format; - fbinfo->zs.view.zs = &views[0]; - fbinfo->zs.preload.z = true; - fbinfo->zs.preload.s = util_format_has_stencil(fdesc); - } else if (util_format_has_stencil(fdesc)) { - fbinfo->zs.view.s = &views[0]; - fbinfo->zs.preload.s = true; - } else { - fbinfo->rt_count = 1; - fbinfo->rts[0].view = &views[0]; - fbinfo->rts[0].preload = true; - cmdbuf->state.gfx.render.fb.crc_valid[0] = false; - fbinfo->rts[0].crc_valid = &cmdbuf->state.gfx.render.fb.crc_valid[0]; - } - - if (blitinfo->dst.planes[1].format != PIPE_FORMAT_NONE) { - /* TODO: don't force preloads of dst resources if unneeded */ - views[1].format = blitinfo->dst.planes[1].format; - views[1].dim = MALI_TEXTURE_DIMENSION_2D; - views[1].planes[0] = blitinfo->dst.planes[1].image; - views[1].nr_samples = blitinfo->dst.planes[1].image->layout.nr_samples; - views[1].first_level = blitinfo->dst.level; - views[1].last_level = blitinfo->dst.level; - views[1].swizzle[0] = PIPE_SWIZZLE_X; - views[1].swizzle[1] = PIPE_SWIZZLE_Y; - views[1].swizzle[2] = PIPE_SWIZZLE_Z; - views[1].swizzle[3] = PIPE_SWIZZLE_W; - fbinfo->zs.view.s = &views[1]; - } - - panvk_per_arch(cmd_close_batch)(cmdbuf); - cmdbuf->state.gfx.render.layer_count = 1; - - GENX(pan_blit_ctx_init) - (&dev->meta.blitter.cache, blitinfo, &cmdbuf->desc_pool.base, &ctx); - do { - if (ctx.dst.cur_layer < 0) - continue; - - struct panvk_batch *batch = panvk_per_arch(cmd_open_batch)(cmdbuf); - mali_ptr tsd, tiler; - - views[0].first_layer = views[0].last_layer = ctx.dst.cur_layer; - views[1].first_layer = views[1].last_layer = views[0].first_layer; - batch->blit.src = src_img->bo; - batch->blit.dst = dst_img->bo; - panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true); - panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); - panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, 0); - - tsd = batch->tls.gpu; - tiler = batch->tiler.ctx_descs.gpu; - - struct panfrost_ptr job = GENX(pan_blit)(&ctx, &cmdbuf->desc_pool.base, - &batch->vtc_jc, tsd, tiler); - util_dynarray_append(&batch->jobs, void *, job.cpu); - panvk_per_arch(cmd_close_batch)(cmdbuf); - } while (pan_blit_next_surface(&ctx)); -} - -VKAPI_ATTR void VKAPI_CALL -panvk_per_arch(CmdBlitImage2)(VkCommandBuffer commandBuffer, - const VkBlitImageInfo2 *pBlitImageInfo) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(panvk_image, src, pBlitImageInfo->srcImage); - VK_FROM_HANDLE(panvk_image, dst, pBlitImageInfo->dstImage); - - for (unsigned i = 0; i < pBlitImageInfo->regionCount; i++) { - const VkImageBlit2 *region = &pBlitImageInfo->pRegions[i]; - struct pan_blit_info info = { - .src = - { - .planes[0].image = &src->pimage, - .planes[0].format = src->pimage.layout.format, - .level = region->srcSubresource.mipLevel, - .start = - { - region->srcOffsets[0].x, - region->srcOffsets[0].y, - region->srcOffsets[0].z, - region->srcSubresource.baseArrayLayer, - }, - .end = - { - region->srcOffsets[1].x, - region->srcOffsets[1].y, - region->srcOffsets[1].z, - region->srcSubresource.baseArrayLayer + - region->srcSubresource.layerCount - 1, - }, - }, - .dst = - { - .planes[0].image = &dst->pimage, - .planes[0].format = dst->pimage.layout.format, - .level = region->dstSubresource.mipLevel, - .start = - { - region->dstOffsets[0].x, - region->dstOffsets[0].y, - region->dstOffsets[0].z, - region->dstSubresource.baseArrayLayer, - }, - .end = - { - region->dstOffsets[1].x, - region->dstOffsets[1].y, - region->dstOffsets[1].z, - region->dstSubresource.baseArrayLayer + - region->dstSubresource.layerCount - 1, - }, - }, - .nearest = pBlitImageInfo->filter == VK_FILTER_NEAREST, - }; - - if (region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) - info.src.planes[0].format = - util_format_stencil_only(info.src.planes[0].format); - else if (region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) - info.src.planes[0].format = - util_format_get_depth_only(info.src.planes[0].format); - - if (region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) - info.dst.planes[0].format = - util_format_stencil_only(info.dst.planes[0].format); - else if (region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) - info.dst.planes[0].format = - util_format_get_depth_only(info.dst.planes[0].format); - - panvk_meta_blit(cmdbuf, &info, src, dst); - } -} - -VKAPI_ATTR void VKAPI_CALL -panvk_per_arch(CmdResolveImage2)(VkCommandBuffer commandBuffer, - const VkResolveImageInfo2 *pResolveImageInfo) -{ - panvk_stub(); -} - -void -panvk_per_arch(meta_blit_init)(struct panvk_device *dev) -{ - struct panvk_physical_device *phys_dev = - to_panvk_physical_device(dev->vk.physical); - struct panvk_pool_properties bin_pool_props = { - .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE, - .slab_size = 16 * 1024, - .label = "panvk_meta blitter binary pool", - .prealloc = false, - .owns_bos = true, - .needs_locking = false, - }; - struct panvk_pool_properties desc_pool_props = { - .create_flags = 0, - .slab_size = 16 * 1024, - .label = "panvk_meta blitter descriptor pool", - .prealloc = false, - .owns_bos = true, - .needs_locking = false, - }; - - panvk_pool_init(&dev->meta.blitter.bin_pool, dev, NULL, &bin_pool_props); - panvk_pool_init(&dev->meta.blitter.desc_pool, dev, NULL, &desc_pool_props); - pan_blend_shader_cache_init(&dev->meta.blend_shader_cache, - phys_dev->kmod.props.gpu_prod_id); - GENX(pan_blitter_cache_init) - (&dev->meta.blitter.cache, phys_dev->kmod.props.gpu_prod_id, - &dev->meta.blend_shader_cache, &dev->meta.blitter.bin_pool.base, - &dev->meta.blitter.desc_pool.base); -} - -void -panvk_per_arch(meta_blit_cleanup)(struct panvk_device *dev) -{ - GENX(pan_blitter_cache_cleanup)(&dev->meta.blitter.cache); - pan_blend_shader_cache_cleanup(&dev->meta.blend_shader_cache); - panvk_pool_cleanup(&dev->meta.blitter.desc_pool); - panvk_pool_cleanup(&dev->meta.blitter.bin_pool); -} diff --git a/src/panfrost/vulkan/jm/panvk_vX_meta_clear.c b/src/panfrost/vulkan/jm/panvk_vX_meta_clear.c deleted file mode 100644 index 67d402b13c5..00000000000 --- a/src/panfrost/vulkan/jm/panvk_vX_meta_clear.c +++ /dev/null @@ -1,529 +0,0 @@ -/* - * Copyright © 2021 Collabora Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "nir/nir_builder.h" -#include "pan_blitter.h" -#include "pan_encoder.h" -#include "pan_props.h" -#include "pan_shader.h" - -#include "panvk_cmd_buffer.h" -#include "panvk_device.h" -#include "panvk_entrypoints.h" -#include "panvk_image.h" -#include "panvk_meta.h" -#include "panvk_physical_device.h" - -#include "vk_format.h" -#include "vk_render_pass.h" - -static mali_ptr -panvk_meta_clear_color_attachment_shader(struct panvk_device *dev, - enum glsl_base_type base_type, - struct pan_shader_info *shader_info) -{ - struct panvk_physical_device *phys_dev = - to_panvk_physical_device(dev->vk.physical); - struct pan_pool *bin_pool = &dev->meta.bin_pool.base; - - nir_builder b = nir_builder_init_simple_shader( - MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(), - "panvk_meta_clear_attachment(base_type=%d)", base_type); - - const struct glsl_type *out_type = glsl_vector_type(base_type, 4); - nir_variable *out = - nir_variable_create(b.shader, nir_var_shader_out, out_type, "out"); - out->data.location = FRAG_RESULT_DATA0; - - nir_def *clear_values = - nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = ~0); - nir_store_var(&b, out, clear_values, 0xff); - - struct panfrost_compile_inputs inputs = { - .gpu_id = phys_dev->kmod.props.gpu_prod_id, - .is_blit = true, - .no_ubo_to_push = true, - }; - - struct util_dynarray binary; - - util_dynarray_init(&binary, NULL); - pan_shader_preprocess(b.shader, inputs.gpu_id); - GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); - - shader_info->push.count = 4; - - mali_ptr shader = - pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128); - - util_dynarray_fini(&binary); - ralloc_free(b.shader); - - return shader; -} - -static mali_ptr -panvk_meta_clear_color_attachment_emit_rsd(struct pan_pool *desc_pool, - enum pipe_format format, unsigned rt, - struct pan_shader_info *shader_info, - mali_ptr shader) -{ - struct panfrost_ptr rsd_ptr = pan_pool_alloc_desc_aggregate( - desc_pool, PAN_DESC(RENDERER_STATE), PAN_DESC_ARRAY(rt + 1, BLEND)); - - pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) { - pan_shader_prepare_rsd(shader_info, shader, &cfg); - - cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION; - cfg.multisample_misc.sample_mask = UINT16_MAX; - cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS; - cfg.properties.allow_forward_pixel_to_be_killed = true; - cfg.properties.allow_forward_pixel_to_kill = true; - cfg.properties.zs_update_operation = MALI_PIXEL_KILL_WEAK_EARLY; - cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_WEAK_EARLY; - } - - void *bd = rsd_ptr.cpu + pan_size(RENDERER_STATE); - - pan_pack(bd, BLEND, cfg) { - cfg.round_to_fb_precision = true; - cfg.load_destination = false; - cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC; - cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC; - cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO; - cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC; - cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC; - cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO; - cfg.internal.mode = MALI_BLEND_MODE_OPAQUE; - cfg.equation.color_mask = 0xf; - cfg.internal.fixed_function.num_comps = 4; - cfg.internal.fixed_function.rt = rt; - cfg.internal.fixed_function.conversion.memory_format = - GENX(panfrost_dithered_format_from_pipe_format)(format, false); - cfg.internal.fixed_function.conversion.register_format = - shader_info->bifrost.blend[0].format; - } - - return rsd_ptr.gpu; -} - -static mali_ptr -panvk_meta_clear_zs_attachment_emit_rsd(struct pan_pool *desc_pool, - VkImageAspectFlags mask, - VkClearDepthStencilValue value) -{ - struct panfrost_ptr rsd_ptr = pan_pool_alloc_desc(desc_pool, RENDERER_STATE); - - pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) { - cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION; - cfg.multisample_misc.sample_mask = UINT16_MAX; - - if (mask & VK_IMAGE_ASPECT_DEPTH_BIT) { - cfg.multisample_misc.depth_write_mask = true; - cfg.multisample_misc.depth_function = MALI_FUNC_NOT_EQUAL; - - if (value.depth != 0.0) { - cfg.stencil_mask_misc.front_facing_depth_bias = true; - cfg.stencil_mask_misc.back_facing_depth_bias = true; - cfg.depth_units = INFINITY; - cfg.depth_bias_clamp = value.depth; - } - } - - if (mask & VK_IMAGE_ASPECT_STENCIL_BIT) { - cfg.stencil_mask_misc.stencil_enable = true; - cfg.stencil_mask_misc.stencil_mask_front = 0xFF; - cfg.stencil_mask_misc.stencil_mask_back = 0xFF; - - cfg.stencil_front.compare_function = (mask & VK_IMAGE_ASPECT_DEPTH_BIT) - ? MALI_FUNC_ALWAYS - : MALI_FUNC_NOT_EQUAL; - - cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_KEEP; - cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE; - cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE; - cfg.stencil_front.reference_value = value.stencil; - cfg.stencil_front.mask = 0xFF; - cfg.stencil_back = cfg.stencil_front; - } - - cfg.properties.allow_forward_pixel_to_be_killed = true; - cfg.properties.zs_update_operation = MALI_PIXEL_KILL_WEAK_EARLY; - cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_WEAK_EARLY; - } - - return rsd_ptr.gpu; -} - -static void -panvk_meta_clear_attachment_emit_dcd(struct pan_pool *pool, mali_ptr coords, - mali_ptr push_constants, mali_ptr vpd, - mali_ptr tsd, mali_ptr rsd, void *out) -{ - pan_pack(out, DRAW, cfg) { - cfg.thread_storage = tsd; - cfg.state = rsd; - cfg.push_uniforms = push_constants; - cfg.position = coords; - cfg.viewport = vpd; - } -} - -static struct panfrost_ptr -panvk_meta_clear_attachment_emit_tiler_job(struct pan_pool *desc_pool, - struct pan_jc *jc, mali_ptr coords, - mali_ptr push_constants, - mali_ptr vpd, mali_ptr rsd, - mali_ptr tsd, mali_ptr tiler) -{ - struct panfrost_ptr job = pan_pool_alloc_desc(desc_pool, TILER_JOB); - - panvk_meta_clear_attachment_emit_dcd( - desc_pool, coords, push_constants, vpd, tsd, rsd, - pan_section_ptr(job.cpu, TILER_JOB, DRAW)); - - pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) { - cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP; - cfg.index_count = 4; - cfg.job_task_split = 6; - } - - pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) { - cfg.constant = 1.0f; - } - - void *invoc = pan_section_ptr(job.cpu, TILER_JOB, INVOCATION); - panfrost_pack_work_groups_compute(invoc, 1, 4, 1, 1, 1, 1, true, false); - - pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg) - ; - pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) { - cfg.address = tiler; - } - - pan_jc_add_job(jc, MALI_JOB_TYPE_TILER, false, false, 0, 0, &job, false); - return job; -} - -static enum glsl_base_type -panvk_meta_get_format_type(enum pipe_format format) -{ - const struct util_format_description *desc = util_format_description(format); - int i; - - i = util_format_get_first_non_void_channel(format); - assert(i >= 0); - - if (desc->channel[i].normalized) - return GLSL_TYPE_FLOAT; - - switch (desc->channel[i].type) { - - case UTIL_FORMAT_TYPE_UNSIGNED: - return GLSL_TYPE_UINT; - - case UTIL_FORMAT_TYPE_SIGNED: - return GLSL_TYPE_INT; - - case UTIL_FORMAT_TYPE_FLOAT: - return GLSL_TYPE_FLOAT; - - default: - unreachable("Unhandled format"); - return GLSL_TYPE_FLOAT; - } -} - -static void -panvk_meta_clear_attachment(struct panvk_cmd_buffer *cmdbuf, unsigned rt, - VkImageAspectFlags mask, - const VkClearValue *clear_value, - const VkClearRect *clear_rect) -{ - struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); - struct panvk_meta *meta = &dev->meta; - struct panvk_batch *batch = cmdbuf->cur_batch; - enum pipe_format pfmt = - cmdbuf->state.gfx.render.fb.info.rts[rt].view->format; - unsigned minx = MAX2(clear_rect->rect.offset.x, 0); - unsigned miny = MAX2(clear_rect->rect.offset.y, 0); - unsigned maxx = - MAX2(clear_rect->rect.offset.x + clear_rect->rect.extent.width - 1, 0); - unsigned maxy = - MAX2(clear_rect->rect.offset.y + clear_rect->rect.extent.height - 1, 0); - - panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); - panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true); - cmdbuf->state.gfx.render.layer_count = 1; - panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, 0); - - mali_ptr vpd = panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base, - minx, miny, maxx, maxy); - - float rect[] = { - minx, miny, 0.0, 1.0, maxx + 1, miny, 0.0, 1.0, - minx, maxy + 1, 0.0, 1.0, maxx + 1, maxy + 1, 0.0, 1.0, - }; - mali_ptr coordinates = - pan_pool_upload_aligned(&cmdbuf->desc_pool.base, rect, sizeof(rect), 64); - - enum glsl_base_type base_type = panvk_meta_get_format_type(pfmt); - - mali_ptr tiler = batch->tiler.ctx_descs.gpu; - mali_ptr tsd = batch->tls.gpu; - - mali_ptr pushconsts = 0, rsd = 0; - - if (mask & VK_IMAGE_ASPECT_COLOR_BIT) { - mali_ptr shader = meta->clear_attachment.color[base_type].shader; - struct pan_shader_info *shader_info = - &meta->clear_attachment.color[base_type].shader_info; - - pushconsts = pan_pool_upload_aligned(&cmdbuf->desc_pool.base, clear_value, - sizeof(*clear_value), 16); - - rsd = panvk_meta_clear_color_attachment_emit_rsd( - &cmdbuf->desc_pool.base, pfmt, rt, shader_info, shader); - } else { - rsd = panvk_meta_clear_zs_attachment_emit_rsd( - &cmdbuf->desc_pool.base, mask, clear_value->depthStencil); - } - - struct panfrost_ptr job; - - job = panvk_meta_clear_attachment_emit_tiler_job( - &cmdbuf->desc_pool.base, &batch->vtc_jc, coordinates, pushconsts, vpd, - rsd, tsd, tiler); - - util_dynarray_append(&batch->jobs, void *, job.cpu); -} - -static void -panvk_meta_clear_color_img(struct panvk_cmd_buffer *cmdbuf, - struct panvk_image *img, - const VkClearColorValue *color, - const VkImageSubresourceRange *range) -{ - struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); - struct panvk_physical_device *phys_dev = - to_panvk_physical_device(dev->vk.physical); - struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info; - struct pan_image_view view = { - .format = img->pimage.layout.format, - .dim = MALI_TEXTURE_DIMENSION_2D, - .planes[0] = &img->pimage, - .nr_samples = img->pimage.layout.nr_samples, - .swizzle = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, - PIPE_SWIZZLE_W}, - }; - - cmdbuf->state.gfx.render.layer_count = 1; - cmdbuf->state.gfx.render.fb.crc_valid[0] = false; - *fbinfo = (struct pan_fb_info){ - .tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model), - .nr_samples = img->pimage.layout.nr_samples, - .rt_count = 1, - .rts[0].view = &view, - .rts[0].clear = true, - .rts[0].crc_valid = &cmdbuf->state.gfx.render.fb.crc_valid[0], - }; - - uint32_t clearval[4]; - pan_pack_color(panfrost_blendable_formats_v7, clearval, - (union pipe_color_union *)color, img->pimage.layout.format, - false); - memcpy(fbinfo->rts[0].clear_value, clearval, - sizeof(fbinfo->rts[0].clear_value)); - - unsigned level_count = vk_image_subresource_level_count(&img->vk, range); - unsigned layer_count = vk_image_subresource_layer_count(&img->vk, range); - - for (unsigned level = range->baseMipLevel; - level < range->baseMipLevel + level_count; level++) { - view.first_level = view.last_level = level; - fbinfo->width = u_minify(img->pimage.layout.width, level); - fbinfo->height = u_minify(img->pimage.layout.height, level); - fbinfo->extent.maxx = fbinfo->width - 1; - fbinfo->extent.maxy = fbinfo->height - 1; - - for (unsigned layer = range->baseArrayLayer; - layer < range->baseArrayLayer + layer_count; layer++) { - view.first_layer = view.last_layer = layer; - panvk_per_arch(cmd_open_batch)(cmdbuf); - panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); - panvk_per_arch(cmd_close_batch)(cmdbuf); - } - } -} - -VKAPI_ATTR void VKAPI_CALL -panvk_per_arch(CmdClearColorImage)(VkCommandBuffer commandBuffer, VkImage image, - VkImageLayout imageLayout, - const VkClearColorValue *pColor, - uint32_t rangeCount, - const VkImageSubresourceRange *pRanges) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(panvk_image, img, image); - - panvk_per_arch(cmd_close_batch)(cmdbuf); - - for (unsigned i = 0; i < rangeCount; i++) - panvk_meta_clear_color_img(cmdbuf, img, pColor, &pRanges[i]); -} - -static void -panvk_meta_clear_zs_img(struct panvk_cmd_buffer *cmdbuf, - struct panvk_image *img, - const VkClearDepthStencilValue *value, - const VkImageSubresourceRange *range) -{ - struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); - struct panvk_physical_device *phys_dev = - to_panvk_physical_device(dev->vk.physical); - struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info; - struct pan_image_view view = { - .format = img->pimage.layout.format, - .dim = MALI_TEXTURE_DIMENSION_2D, - .planes[0] = &img->pimage, - .nr_samples = img->pimage.layout.nr_samples, - .swizzle = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, - PIPE_SWIZZLE_W}, - }; - - cmdbuf->state.gfx.render.fb.crc_valid[0] = false; - *fbinfo = (struct pan_fb_info){ - .tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model), - .nr_samples = img->pimage.layout.nr_samples, - .rt_count = 1, - .zs.clear_value.depth = value->depth, - .zs.clear_value.stencil = value->stencil, - .zs.clear.z = range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT, - .zs.clear.s = range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT}; - - const struct util_format_description *fdesc = - util_format_description(view.format); - - if (util_format_has_depth(fdesc)) { - fbinfo->zs.view.zs = &view; - if (util_format_has_stencil(fdesc)) { - fbinfo->zs.preload.z = !fbinfo->zs.clear.z; - fbinfo->zs.preload.s = !fbinfo->zs.clear.s; - } - } else { - fbinfo->zs.view.s = &view; - } - - unsigned level_count = vk_image_subresource_level_count(&img->vk, range); - unsigned layer_count = vk_image_subresource_layer_count(&img->vk, range); - - for (unsigned level = range->baseMipLevel; - level < range->baseMipLevel + level_count; level++) { - view.first_level = view.last_level = level; - fbinfo->width = u_minify(img->pimage.layout.width, level); - fbinfo->height = u_minify(img->pimage.layout.height, level); - fbinfo->extent.maxx = fbinfo->width - 1; - fbinfo->extent.maxy = fbinfo->height - 1; - - for (unsigned layer = range->baseArrayLayer; - layer < range->baseArrayLayer + layer_count; layer++) { - view.first_layer = view.last_layer = layer; - panvk_per_arch(cmd_open_batch)(cmdbuf); - panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); - panvk_per_arch(cmd_close_batch)(cmdbuf); - } - } - - memset(fbinfo, 0, sizeof(*fbinfo)); -} - -VKAPI_ATTR void VKAPI_CALL -panvk_per_arch(CmdClearDepthStencilImage)( - VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, - const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount, - const VkImageSubresourceRange *pRanges) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(panvk_image, img, image); - - panvk_per_arch(cmd_close_batch)(cmdbuf); - - for (unsigned i = 0; i < rangeCount; i++) - panvk_meta_clear_zs_img(cmdbuf, img, pDepthStencil, &pRanges[i]); -} - -VKAPI_ATTR void VKAPI_CALL -panvk_per_arch(CmdClearAttachments)(VkCommandBuffer commandBuffer, - uint32_t attachmentCount, - const VkClearAttachment *pAttachments, - uint32_t rectCount, - const VkClearRect *pRects) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - const struct vk_subpass *subpass = - &cmdbuf->vk.render_pass->subpasses[cmdbuf->vk.subpass_idx]; - - for (unsigned i = 0; i < attachmentCount; i++) { - for (unsigned j = 0; j < rectCount; j++) { - - uint32_t attachment = VK_ATTACHMENT_UNUSED, rt = 0; - if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - rt = pAttachments[i].colorAttachment; - attachment = subpass->color_attachments[rt].attachment; - } else if (subpass->depth_stencil_attachment) { - attachment = subpass->depth_stencil_attachment->attachment; - } - - if (attachment == VK_ATTACHMENT_UNUSED) - continue; - - panvk_meta_clear_attachment(cmdbuf, rt, pAttachments[i].aspectMask, - &pAttachments[i].clearValue, &pRects[j]); - } - } -} - -static void -panvk_meta_clear_attachment_init(struct panvk_device *dev) -{ - dev->meta.clear_attachment.color[GLSL_TYPE_UINT].shader = - panvk_meta_clear_color_attachment_shader( - dev, GLSL_TYPE_UINT, - &dev->meta.clear_attachment.color[GLSL_TYPE_UINT].shader_info); - - dev->meta.clear_attachment.color[GLSL_TYPE_INT].shader = - panvk_meta_clear_color_attachment_shader( - dev, GLSL_TYPE_INT, - &dev->meta.clear_attachment.color[GLSL_TYPE_INT].shader_info); - - dev->meta.clear_attachment.color[GLSL_TYPE_FLOAT].shader = - panvk_meta_clear_color_attachment_shader( - dev, GLSL_TYPE_FLOAT, - &dev->meta.clear_attachment.color[GLSL_TYPE_FLOAT].shader_info); -} - -void -panvk_per_arch(meta_clear_init)(struct panvk_device *dev) -{ - panvk_meta_clear_attachment_init(dev); -} diff --git a/src/panfrost/vulkan/jm/panvk_vX_meta_copy.c b/src/panfrost/vulkan/jm/panvk_vX_meta_copy.c deleted file mode 100644 index 3097e10b078..00000000000 --- a/src/panfrost/vulkan/jm/panvk_vX_meta_copy.c +++ /dev/null @@ -1,1959 +0,0 @@ -/* - * Copyright © 2021 Collabora Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "gen_macros.h" - -#include "nir/nir_builder.h" -#include "pan_encoder.h" -#include "pan_props.h" -#include "pan_shader.h" - -#include "panvk_buffer.h" -#include "panvk_cmd_buffer.h" -#include "panvk_device.h" -#include "panvk_entrypoints.h" -#include "panvk_image.h" -#include "panvk_physical_device.h" - -static mali_ptr -panvk_meta_copy_img_emit_texture(struct pan_pool *desc_pool, - const struct pan_image_view *view) -{ - struct panfrost_ptr texture = pan_pool_alloc_desc(desc_pool, TEXTURE); - size_t payload_size = GENX(panfrost_estimate_texture_payload_size)(view); - struct panfrost_ptr surfaces = pan_pool_alloc_aligned( - desc_pool, payload_size, pan_alignment(SURFACE_WITH_STRIDE)); - - GENX(panfrost_new_texture)(view, texture.cpu, &surfaces); - - return texture.gpu; -} - -static mali_ptr -panvk_meta_copy_img_emit_sampler(struct pan_pool *desc_pool) -{ - struct panfrost_ptr sampler = pan_pool_alloc_desc(desc_pool, SAMPLER); - - pan_pack(sampler.cpu, SAMPLER, cfg) { - cfg.seamless_cube_map = false; - cfg.normalized_coordinates = false; - cfg.minify_nearest = true; - cfg.magnify_nearest = true; - } - - return sampler.gpu; -} - -static void -panvk_meta_copy_emit_varying(struct pan_pool *pool, mali_ptr coordinates, - mali_ptr *varying_bufs, mali_ptr *varyings) -{ - struct panfrost_ptr varying = pan_pool_alloc_desc(pool, ATTRIBUTE); - struct panfrost_ptr varying_buffer = - pan_pool_alloc_desc_array(pool, 2, ATTRIBUTE_BUFFER); - - pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) { - cfg.pointer = coordinates; - cfg.stride = 4 * sizeof(uint32_t); - cfg.size = cfg.stride * 4; - } - - /* Bifrost needs an empty desc to mark end of prefetching */ - pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER), ATTRIBUTE_BUFFER, - cfg) - ; - - pan_pack(varying.cpu, ATTRIBUTE, cfg) { - enum pipe_format f = PIPE_FORMAT_R32G32B32_FLOAT; - - cfg.buffer_index = 0; - cfg.format = GENX(panfrost_format_from_pipe_format)(f)->hw; - cfg.offset_enable = false; - } - - *varyings = varying.gpu; - *varying_bufs = varying_buffer.gpu; -} - -static void -panvk_meta_copy_emit_dcd(struct pan_pool *pool, mali_ptr src_coords, - mali_ptr dst_coords, mali_ptr texture, - mali_ptr sampler, mali_ptr vpd, mali_ptr tsd, - mali_ptr rsd, mali_ptr push_constants, void *out) -{ - pan_pack(out, DRAW, cfg) { - cfg.thread_storage = tsd; - cfg.state = rsd; - cfg.push_uniforms = push_constants; - cfg.position = dst_coords; - if (src_coords) { - panvk_meta_copy_emit_varying(pool, src_coords, &cfg.varying_buffers, - &cfg.varyings); - } - cfg.viewport = vpd; - cfg.textures = texture; - cfg.samplers = sampler; - } -} - -static struct panfrost_ptr -panvk_meta_copy_emit_tiler_job(struct pan_pool *desc_pool, struct pan_jc *jc, - mali_ptr src_coords, mali_ptr dst_coords, - mali_ptr texture, mali_ptr sampler, - mali_ptr push_constants, mali_ptr vpd, - mali_ptr rsd, mali_ptr tsd, mali_ptr tiler) -{ - struct panfrost_ptr job = pan_pool_alloc_desc(desc_pool, TILER_JOB); - - panvk_meta_copy_emit_dcd(desc_pool, src_coords, dst_coords, texture, sampler, - vpd, tsd, rsd, push_constants, - pan_section_ptr(job.cpu, TILER_JOB, DRAW)); - - pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) { - cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP; - cfg.index_count = 4; - cfg.job_task_split = 6; - } - - pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) { - cfg.constant = 1.0f; - } - - void *invoc = pan_section_ptr(job.cpu, TILER_JOB, INVOCATION); - panfrost_pack_work_groups_compute(invoc, 1, 4, 1, 1, 1, 1, true, false); - - pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg) - ; - pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) { - cfg.address = tiler; - } - - pan_jc_add_job(jc, MALI_JOB_TYPE_TILER, false, false, 0, 0, &job, false); - return job; -} - -static struct panfrost_ptr -panvk_meta_copy_emit_compute_job(struct pan_pool *desc_pool, struct pan_jc *jc, - const struct pan_compute_dim *num_wg, - const struct pan_compute_dim *wg_sz, - mali_ptr texture, mali_ptr sampler, - mali_ptr push_constants, mali_ptr rsd, - mali_ptr tsd) -{ - struct panfrost_ptr job = pan_pool_alloc_desc(desc_pool, COMPUTE_JOB); - - void *invoc = pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION); - panfrost_pack_work_groups_compute(invoc, num_wg->x, num_wg->y, num_wg->z, - wg_sz->x, wg_sz->y, wg_sz->z, false, - false); - - pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) { - cfg.job_task_split = 8; - } - - panvk_meta_copy_emit_dcd(desc_pool, 0, 0, texture, sampler, 0, tsd, rsd, - push_constants, - pan_section_ptr(job.cpu, COMPUTE_JOB, DRAW)); - - pan_jc_add_job(jc, MALI_JOB_TYPE_COMPUTE, false, false, 0, 0, &job, false); - return job; -} - -static uint32_t -panvk_meta_copy_img_bifrost_raw_format(unsigned texelsize) -{ - switch (texelsize) { - case 6: - return MALI_RGB16UI << 12; - case 8: - return MALI_RG32UI << 12; - case 12: - return MALI_RGB32UI << 12; - case 16: - return MALI_RGBA32UI << 12; - default: - unreachable("Invalid texel size\n"); - } -} - -static mali_ptr -panvk_meta_copy_to_img_emit_rsd(struct pan_pool *desc_pool, mali_ptr shader, - const struct pan_shader_info *shader_info, - enum pipe_format fmt, unsigned wrmask, - bool from_img) -{ - struct panfrost_ptr rsd_ptr = pan_pool_alloc_desc_aggregate( - desc_pool, PAN_DESC(RENDERER_STATE), PAN_DESC_ARRAY(1, BLEND)); - - bool raw = util_format_get_blocksize(fmt) > 4; - unsigned fullmask = (1 << util_format_get_nr_components(fmt)) - 1; - bool partialwrite = fullmask != wrmask && !raw; - bool readstb = fullmask != wrmask && raw; - - pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) { - pan_shader_prepare_rsd(shader_info, shader, &cfg); - if (from_img) { - cfg.shader.varying_count = 1; - cfg.shader.texture_count = 1; - cfg.shader.sampler_count = 1; - } - cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION; - cfg.multisample_misc.sample_mask = UINT16_MAX; - cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS; - cfg.stencil_mask_misc.stencil_mask_front = 0xFF; - cfg.stencil_mask_misc.stencil_mask_back = 0xFF; - cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS; - cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE; - cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE; - cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE; - cfg.stencil_front.mask = 0xFF; - cfg.stencil_back = cfg.stencil_front; - - cfg.properties.allow_forward_pixel_to_be_killed = true; - cfg.properties.allow_forward_pixel_to_kill = !partialwrite && !readstb; - cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY; - cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY; - } - - pan_pack(rsd_ptr.cpu + pan_size(RENDERER_STATE), BLEND, cfg) { - cfg.round_to_fb_precision = true; - cfg.load_destination = partialwrite; - cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC; - cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC; - cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO; - cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC; - cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC; - cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO; - cfg.internal.mode = - partialwrite ? MALI_BLEND_MODE_FIXED_FUNCTION : MALI_BLEND_MODE_OPAQUE; - cfg.equation.color_mask = partialwrite ? wrmask : 0xf; - cfg.internal.fixed_function.num_comps = 4; - if (!raw) { - cfg.internal.fixed_function.conversion.memory_format = - GENX(panfrost_dithered_format_from_pipe_format)(fmt, false); - cfg.internal.fixed_function.conversion.register_format = - MALI_REGISTER_FILE_FORMAT_F32; - } else { - unsigned imgtexelsz = util_format_get_blocksize(fmt); - - cfg.internal.fixed_function.conversion.memory_format = - panvk_meta_copy_img_bifrost_raw_format(imgtexelsz); - cfg.internal.fixed_function.conversion.register_format = - (imgtexelsz & 2) ? MALI_REGISTER_FILE_FORMAT_U16 - : MALI_REGISTER_FILE_FORMAT_U32; - } - } - - return rsd_ptr.gpu; -} - -static mali_ptr -panvk_meta_copy_to_buf_emit_rsd(struct pan_pool *desc_pool, mali_ptr shader, - const struct pan_shader_info *shader_info, - bool from_img) -{ - struct panfrost_ptr rsd_ptr = - pan_pool_alloc_desc_aggregate(desc_pool, PAN_DESC(RENDERER_STATE)); - - pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) { - pan_shader_prepare_rsd(shader_info, shader, &cfg); - if (from_img) { - cfg.shader.texture_count = 1; - cfg.shader.sampler_count = 1; - } - } - - return rsd_ptr.gpu; -} - -static mali_ptr -panvk_meta_copy_img2img_shader(struct panvk_device *dev, - enum pipe_format srcfmt, enum pipe_format dstfmt, - unsigned dstmask, unsigned texdim, - bool texisarray, bool is_ms, - struct pan_shader_info *shader_info) -{ - struct panvk_physical_device *phys_dev = - to_panvk_physical_device(dev->vk.physical); - struct pan_pool *bin_pool = &dev->meta.bin_pool.base; - - nir_builder b = nir_builder_init_simple_shader( - MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(), - "panvk_meta_copy_img2img(srcfmt=%s,dstfmt=%s,%dD%s%s)", - util_format_name(srcfmt), util_format_name(dstfmt), texdim, - texisarray ? "[]" : "", is_ms ? ",ms" : ""); - - nir_variable *coord_var = nir_variable_create( - b.shader, nir_var_shader_in, - glsl_vector_type(GLSL_TYPE_FLOAT, texdim + texisarray), "coord"); - coord_var->data.location = VARYING_SLOT_VAR0; - nir_def *coord = nir_f2u32(&b, nir_load_var(&b, coord_var)); - - nir_tex_instr *tex = nir_tex_instr_create(b.shader, is_ms ? 2 : 1); - tex->op = is_ms ? nir_texop_txf_ms : nir_texop_txf; - tex->texture_index = 0; - tex->is_array = texisarray; - tex->dest_type = - util_format_is_unorm(srcfmt) ? nir_type_float32 : nir_type_uint32; - - switch (texdim) { - case 1: - assert(!is_ms); - tex->sampler_dim = GLSL_SAMPLER_DIM_1D; - break; - case 2: - tex->sampler_dim = is_ms ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D; - break; - case 3: - assert(!is_ms); - tex->sampler_dim = GLSL_SAMPLER_DIM_3D; - break; - default: - unreachable("Invalid texture dimension"); - } - - tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coord); - tex->coord_components = texdim + texisarray; - - if (is_ms) { - tex->src[1] = - nir_tex_src_for_ssa(nir_tex_src_ms_index, nir_load_sample_id(&b)); - } - - nir_def_init(&tex->instr, &tex->def, 4, - nir_alu_type_get_type_size(tex->dest_type)); - nir_builder_instr_insert(&b, &tex->instr); - - nir_def *texel = &tex->def; - - unsigned dstcompsz = - util_format_get_component_bits(dstfmt, UTIL_FORMAT_COLORSPACE_RGB, 0); - unsigned ndstcomps = util_format_get_nr_components(dstfmt); - const struct glsl_type *outtype = NULL; - - if (srcfmt == PIPE_FORMAT_R5G6B5_UNORM && dstfmt == PIPE_FORMAT_R8G8_UNORM) { - nir_def *rgb = nir_f2u32( - &b, nir_fmul(&b, texel, - nir_vec3(&b, nir_imm_float(&b, 31), nir_imm_float(&b, 63), - nir_imm_float(&b, 31)))); - nir_def *rg = nir_vec2( - &b, - nir_ior(&b, nir_channel(&b, rgb, 0), - nir_ishl(&b, nir_channel(&b, rgb, 1), nir_imm_int(&b, 5))), - nir_ior(&b, nir_ushr_imm(&b, nir_channel(&b, rgb, 1), 3), - nir_ishl(&b, nir_channel(&b, rgb, 2), nir_imm_int(&b, 3)))); - rg = nir_iand_imm(&b, rg, 255); - texel = nir_fmul_imm(&b, nir_u2f32(&b, rg), 1.0 / 255); - outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 2); - } else if (srcfmt == PIPE_FORMAT_R8G8_UNORM && - dstfmt == PIPE_FORMAT_R5G6B5_UNORM) { - nir_def *rg = nir_f2u32(&b, nir_fmul_imm(&b, texel, 255)); - nir_def *rgb = nir_vec3( - &b, nir_channel(&b, rg, 0), - nir_ior(&b, nir_ushr_imm(&b, nir_channel(&b, rg, 0), 5), - nir_ishl(&b, nir_channel(&b, rg, 1), nir_imm_int(&b, 3))), - nir_ushr_imm(&b, nir_channel(&b, rg, 1), 3)); - rgb = nir_iand(&b, rgb, - nir_vec3(&b, nir_imm_int(&b, 31), nir_imm_int(&b, 63), - nir_imm_int(&b, 31))); - texel = nir_fmul( - &b, nir_u2f32(&b, rgb), - nir_vec3(&b, nir_imm_float(&b, 1.0 / 31), nir_imm_float(&b, 1.0 / 63), - nir_imm_float(&b, 1.0 / 31))); - outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 3); - } else { - assert(srcfmt == dstfmt); - enum glsl_base_type basetype; - if (util_format_is_unorm(dstfmt)) { - basetype = GLSL_TYPE_FLOAT; - } else if (dstcompsz == 16) { - basetype = GLSL_TYPE_UINT16; - } else { - assert(dstcompsz == 32); - basetype = GLSL_TYPE_UINT; - } - - if (dstcompsz == 16) - texel = nir_u2u16(&b, texel); - - texel = nir_trim_vector(&b, texel, ndstcomps); - outtype = glsl_vector_type(basetype, ndstcomps); - } - - nir_variable *out = - nir_variable_create(b.shader, nir_var_shader_out, outtype, "out"); - out->data.location = FRAG_RESULT_DATA0; - - unsigned fullmask = (1 << ndstcomps) - 1; - if (dstcompsz > 8 && dstmask != fullmask) { - nir_def *oldtexel = nir_load_var(&b, out); - nir_def *dstcomps[4]; - - for (unsigned i = 0; i < ndstcomps; i++) { - if (dstmask & BITFIELD_BIT(i)) - dstcomps[i] = nir_channel(&b, texel, i); - else - dstcomps[i] = nir_channel(&b, oldtexel, i); - } - - texel = nir_vec(&b, dstcomps, ndstcomps); - } - - nir_store_var(&b, out, texel, 0xff); - - struct panfrost_compile_inputs inputs = { - .gpu_id = phys_dev->kmod.props.gpu_prod_id, - .is_blit = true, - .no_ubo_to_push = true, - }; - - struct util_dynarray binary; - - util_dynarray_init(&binary, NULL); - pan_shader_preprocess(b.shader, inputs.gpu_id); - NIR_PASS_V(b.shader, GENX(pan_inline_rt_conversion), &dstfmt); - GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); - - shader_info->fs.sample_shading = is_ms; - - mali_ptr shader = - pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128); - - util_dynarray_fini(&binary); - ralloc_free(b.shader); - - return shader; -} - -static enum pipe_format -panvk_meta_copy_img_format(enum pipe_format fmt) -{ - /* We can't use a non-compressed format when handling a tiled/AFBC - * compressed format because the tile size differ (4x4 blocks for - * compressed formats and 16x16 texels for non-compressed ones). - */ - assert(!util_format_is_compressed(fmt)); - - /* Pick blendable formats when we can, otherwise pick the UINT variant - * matching the texel size. - */ - switch (util_format_get_blocksize(fmt)) { - case 16: - return PIPE_FORMAT_R32G32B32A32_UINT; - case 12: - return PIPE_FORMAT_R32G32B32_UINT; - case 8: - return PIPE_FORMAT_R32G32_UINT; - case 6: - return PIPE_FORMAT_R16G16B16_UINT; - case 4: - return PIPE_FORMAT_R8G8B8A8_UNORM; - case 2: - return (fmt == PIPE_FORMAT_R5G6B5_UNORM || - fmt == PIPE_FORMAT_B5G6R5_UNORM) - ? PIPE_FORMAT_R5G6B5_UNORM - : PIPE_FORMAT_R8G8_UNORM; - case 1: - return PIPE_FORMAT_R8_UNORM; - default: - unreachable("Unsupported format\n"); - } -} - -struct panvk_meta_copy_img2img_format_info { - enum pipe_format srcfmt; - enum pipe_format dstfmt; - unsigned dstmask; -} PACKED; - -static const struct panvk_meta_copy_img2img_format_info - panvk_meta_copy_img2img_fmts[] = { - {PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R8_UNORM, 0x1}, - {PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7}, - {PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3}, - {PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7}, - {PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3}, - /* Z24S8(depth) */ - {PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x7}, - /* Z24S8(stencil) */ - {PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x8}, - {PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0xf}, - {PIPE_FORMAT_R16G16B16_UINT, PIPE_FORMAT_R16G16B16_UINT, 0x7}, - {PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x3}, - /* Z32S8X24(depth) */ - {PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x1}, - /* Z32S8X24(stencil) */ - {PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x2}, - {PIPE_FORMAT_R32G32B32_UINT, PIPE_FORMAT_R32G32B32_UINT, 0x7}, - {PIPE_FORMAT_R32G32B32A32_UINT, PIPE_FORMAT_R32G32B32A32_UINT, 0xf}, -}; - -static unsigned -panvk_meta_copy_img2img_format_idx( - struct panvk_meta_copy_img2img_format_info key) -{ - STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == - PANVK_META_COPY_IMG2IMG_NUM_FORMATS); - - for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) { - if (!memcmp(&key, &panvk_meta_copy_img2img_fmts[i], sizeof(key))) - return i; - } - - unreachable("Invalid image format\n"); -} - -static unsigned -panvk_meta_copy_img_mask(enum pipe_format imgfmt, VkImageAspectFlags aspectMask) -{ - if (aspectMask != VK_IMAGE_ASPECT_DEPTH_BIT && - aspectMask != VK_IMAGE_ASPECT_STENCIL_BIT) { - enum pipe_format outfmt = panvk_meta_copy_img_format(imgfmt); - - return (1 << util_format_get_nr_components(outfmt)) - 1; - } - - switch (imgfmt) { - case PIPE_FORMAT_S8_UINT: - return 1; - case PIPE_FORMAT_Z16_UNORM: - return 3; - case PIPE_FORMAT_Z16_UNORM_S8_UINT: - return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 3 : 8; - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 7 : 8; - case PIPE_FORMAT_Z24X8_UNORM: - assert(aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT); - return 7; - case PIPE_FORMAT_Z32_FLOAT: - return 0xf; - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 1 : 2; - default: - unreachable("Invalid depth format\n"); - } -} - -static void -panvk_meta_copy_img2img(struct panvk_cmd_buffer *cmdbuf, - const struct panvk_image *src, - const struct panvk_image *dst, - const VkImageCopy2 *region) -{ - struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); - struct panvk_physical_device *phys_dev = - to_panvk_physical_device(dev->vk.physical); - struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info; - struct panvk_meta_copy_img2img_format_info key = { - .srcfmt = panvk_meta_copy_img_format(src->pimage.layout.format), - .dstfmt = panvk_meta_copy_img_format(dst->pimage.layout.format), - .dstmask = panvk_meta_copy_img_mask(dst->pimage.layout.format, - region->dstSubresource.aspectMask), - }; - - assert(src->pimage.layout.nr_samples == dst->pimage.layout.nr_samples); - - unsigned texdimidx = panvk_meta_copy_tex_type( - src->pimage.layout.dim, src->pimage.layout.array_size > 1); - unsigned fmtidx = panvk_meta_copy_img2img_format_idx(key); - unsigned ms = dst->pimage.layout.nr_samples > 1 ? 1 : 0; - - mali_ptr rsd = dev->meta.copy.img2img[ms][texdimidx][fmtidx].rsd; - - struct pan_image_view srcview = { - .format = key.srcfmt, - .dim = src->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE - ? MALI_TEXTURE_DIMENSION_2D - : src->pimage.layout.dim, - .planes[0] = &src->pimage, - .nr_samples = src->pimage.layout.nr_samples, - .first_level = region->srcSubresource.mipLevel, - .last_level = region->srcSubresource.mipLevel, - .first_layer = region->srcSubresource.baseArrayLayer, - .last_layer = region->srcSubresource.baseArrayLayer + - region->srcSubresource.layerCount - 1, - .swizzle = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, - PIPE_SWIZZLE_W}, - }; - - struct pan_image_view dstview = { - .format = key.dstfmt, - .dim = MALI_TEXTURE_DIMENSION_2D, - .planes[0] = &dst->pimage, - .nr_samples = dst->pimage.layout.nr_samples, - .first_level = region->dstSubresource.mipLevel, - .last_level = region->dstSubresource.mipLevel, - .swizzle = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, - PIPE_SWIZZLE_W}, - }; - - unsigned minx = MAX2(region->dstOffset.x, 0); - unsigned miny = MAX2(region->dstOffset.y, 0); - unsigned maxx = MAX2(region->dstOffset.x + region->extent.width - 1, 0); - unsigned maxy = MAX2(region->dstOffset.y + region->extent.height - 1, 0); - - mali_ptr vpd = panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base, - minx, miny, maxx, maxy); - - float dst_rect[] = { - minx, miny, 0.0, 1.0, maxx + 1, miny, 0.0, 1.0, - minx, maxy + 1, 0.0, 1.0, maxx + 1, maxy + 1, 0.0, 1.0, - }; - - mali_ptr dst_coords = pan_pool_upload_aligned( - &cmdbuf->desc_pool.base, dst_rect, sizeof(dst_rect), 64); - - /* TODO: don't force preloads of dst resources if unneeded */ - - unsigned width = - u_minify(dst->pimage.layout.width, region->dstSubresource.mipLevel); - unsigned height = - u_minify(dst->pimage.layout.height, region->dstSubresource.mipLevel); - cmdbuf->state.gfx.render.layer_count = 1; - cmdbuf->state.gfx.render.fb.crc_valid[0] = false; - *fbinfo = (struct pan_fb_info){ - .tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model), - .width = width, - .height = height, - .extent.minx = minx & ~31, - .extent.miny = miny & ~31, - .extent.maxx = MIN2(ALIGN_POT(maxx + 1, 32), width) - 1, - .extent.maxy = MIN2(ALIGN_POT(maxy + 1, 32), height) - 1, - .nr_samples = dst->pimage.layout.nr_samples, - .rt_count = 1, - .rts[0].view = &dstview, - .rts[0].preload = true, - .rts[0].crc_valid = &cmdbuf->state.gfx.render.fb.crc_valid[0], - }; - - mali_ptr texture = - panvk_meta_copy_img_emit_texture(&cmdbuf->desc_pool.base, &srcview); - mali_ptr sampler = panvk_meta_copy_img_emit_sampler(&cmdbuf->desc_pool.base); - - panvk_per_arch(cmd_close_batch)(cmdbuf); - - minx = MAX2(region->srcOffset.x, 0); - miny = MAX2(region->srcOffset.y, 0); - maxx = MAX2(region->srcOffset.x + region->extent.width - 1, 0); - maxy = MAX2(region->srcOffset.y + region->extent.height - 1, 0); - assert(region->dstOffset.z >= 0); - - unsigned first_src_layer = MAX2(0, region->srcOffset.z); - unsigned first_dst_layer = - MAX2(region->dstSubresource.baseArrayLayer, region->dstOffset.z); - unsigned nlayers = - MAX2(region->dstSubresource.layerCount, region->extent.depth); - for (unsigned l = 0; l < nlayers; l++) { - unsigned src_l = l + first_src_layer; - float src_rect[] = { - minx, miny, src_l, 1.0, maxx + 1, miny, src_l, 1.0, - minx, maxy + 1, src_l, 1.0, maxx + 1, maxy + 1, src_l, 1.0, - }; - - mali_ptr src_coords = pan_pool_upload_aligned( - &cmdbuf->desc_pool.base, src_rect, sizeof(src_rect), 64); - - struct panvk_batch *batch = panvk_per_arch(cmd_open_batch)(cmdbuf); - - dstview.first_layer = dstview.last_layer = l + first_dst_layer; - batch->blit.src = src->bo; - batch->blit.dst = dst->bo; - panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true); - panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); - panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, 0); - - mali_ptr tsd, tiler; - - tsd = batch->tls.gpu; - tiler = batch->tiler.ctx_descs.gpu; - - struct panfrost_ptr job; - - job = panvk_meta_copy_emit_tiler_job( - &cmdbuf->desc_pool.base, &batch->vtc_jc, src_coords, dst_coords, - texture, sampler, 0, vpd, rsd, tsd, tiler); - - util_dynarray_append(&batch->jobs, void *, job.cpu); - panvk_per_arch(cmd_close_batch)(cmdbuf); - } -} - -static void -panvk_meta_copy_img2img_init(struct panvk_device *dev, bool is_ms) -{ - STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == - PANVK_META_COPY_IMG2IMG_NUM_FORMATS); - - for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) { - for (unsigned texdim = 1; texdim <= 3; texdim++) { - unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false); - assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0])); - - /* No MSAA on 1D/3D textures */ - if (texdim != 2 && is_ms) - continue; - - struct pan_shader_info shader_info; - mali_ptr shader = panvk_meta_copy_img2img_shader( - dev, panvk_meta_copy_img2img_fmts[i].srcfmt, - panvk_meta_copy_img2img_fmts[i].dstfmt, - panvk_meta_copy_img2img_fmts[i].dstmask, texdim, false, is_ms, - &shader_info); - dev->meta.copy.img2img[is_ms][texdimidx][i].rsd = - panvk_meta_copy_to_img_emit_rsd( - &dev->meta.desc_pool.base, shader, &shader_info, - panvk_meta_copy_img2img_fmts[i].dstfmt, - panvk_meta_copy_img2img_fmts[i].dstmask, true); - if (texdim == 3) - continue; - - memset(&shader_info, 0, sizeof(shader_info)); - texdimidx = panvk_meta_copy_tex_type(texdim, true); - assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0])); - shader = panvk_meta_copy_img2img_shader( - dev, panvk_meta_copy_img2img_fmts[i].srcfmt, - panvk_meta_copy_img2img_fmts[i].dstfmt, - panvk_meta_copy_img2img_fmts[i].dstmask, texdim, true, is_ms, - &shader_info); - dev->meta.copy.img2img[is_ms][texdimidx][i].rsd = - panvk_meta_copy_to_img_emit_rsd( - &dev->meta.desc_pool.base, shader, &shader_info, - panvk_meta_copy_img2img_fmts[i].dstfmt, - panvk_meta_copy_img2img_fmts[i].dstmask, true); - } - } -} - -VKAPI_ATTR void VKAPI_CALL -panvk_per_arch(CmdCopyImage2)(VkCommandBuffer commandBuffer, - const VkCopyImageInfo2 *pCopyImageInfo) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(panvk_image, dst, pCopyImageInfo->dstImage); - VK_FROM_HANDLE(panvk_image, src, pCopyImageInfo->srcImage); - - for (unsigned i = 0; i < pCopyImageInfo->regionCount; i++) { - panvk_meta_copy_img2img(cmdbuf, src, dst, &pCopyImageInfo->pRegions[i]); - } -} - -static unsigned -panvk_meta_copy_buf_texelsize(enum pipe_format imgfmt, unsigned mask) -{ - unsigned imgtexelsz = util_format_get_blocksize(imgfmt); - unsigned nbufcomps = util_bitcount(mask); - - if (nbufcomps == util_format_get_nr_components(imgfmt)) - return imgtexelsz; - - /* Special case for Z24 buffers which are not tightly packed */ - if (mask == 7 && imgtexelsz == 4) - return 4; - - /* Special case for S8 extraction from Z32_S8X24 */ - if (mask == 2 && imgtexelsz == 8) - return 1; - - unsigned compsz = - util_format_get_component_bits(imgfmt, UTIL_FORMAT_COLORSPACE_RGB, 0); - - assert(!(compsz % 8)); - - return nbufcomps * compsz / 8; -} - -static enum pipe_format -panvk_meta_copy_buf2img_format(enum pipe_format imgfmt) -{ - /* Pick blendable formats when we can, and the FLOAT variant matching the - * texelsize otherwise. - */ - switch (util_format_get_blocksize(imgfmt)) { - case 1: - return PIPE_FORMAT_R8_UNORM; - /* AFBC stores things differently for RGB565, - * we can't simply map to R8G8 in that case */ - case 2: - return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM || - imgfmt == PIPE_FORMAT_B5G6R5_UNORM) - ? PIPE_FORMAT_R5G6B5_UNORM - : PIPE_FORMAT_R8G8_UNORM; - case 4: - return PIPE_FORMAT_R8G8B8A8_UNORM; - case 6: - return PIPE_FORMAT_R16G16B16_UINT; - case 8: - return PIPE_FORMAT_R32G32_UINT; - case 12: - return PIPE_FORMAT_R32G32B32_UINT; - case 16: - return PIPE_FORMAT_R32G32B32A32_UINT; - default: - unreachable("Invalid format\n"); - } -} - -struct panvk_meta_copy_format_info { - enum pipe_format imgfmt; - unsigned mask; -} PACKED; - -static const struct panvk_meta_copy_format_info panvk_meta_copy_buf2img_fmts[] = - { - {PIPE_FORMAT_R8_UNORM, 0x1}, - {PIPE_FORMAT_R8G8_UNORM, 0x3}, - {PIPE_FORMAT_R5G6B5_UNORM, 0x7}, - {PIPE_FORMAT_R8G8B8A8_UNORM, 0xf}, - {PIPE_FORMAT_R16G16B16_UINT, 0x7}, - {PIPE_FORMAT_R32G32_UINT, 0x3}, - {PIPE_FORMAT_R32G32B32_UINT, 0x7}, - {PIPE_FORMAT_R32G32B32A32_UINT, 0xf}, - /* S8 -> Z24S8 */ - {PIPE_FORMAT_R8G8B8A8_UNORM, 0x8}, - /* S8 -> Z32_S8X24 */ - {PIPE_FORMAT_R32G32_UINT, 0x2}, - /* Z24X8 -> Z24S8 */ - {PIPE_FORMAT_R8G8B8A8_UNORM, 0x7}, - /* Z32 -> Z32_S8X24 */ - {PIPE_FORMAT_R32G32_UINT, 0x1}, -}; - -struct panvk_meta_copy_buf2img_info { - struct { - mali_ptr ptr; - struct { - unsigned line; - unsigned surf; - } stride; - } buf; -} PACKED; - -#define panvk_meta_copy_buf2img_get_info_field(b, field) \ - nir_load_push_constant( \ - (b), 1, sizeof(((struct panvk_meta_copy_buf2img_info *)0)->field) * 8, \ - nir_imm_int(b, 0), \ - .base = offsetof(struct panvk_meta_copy_buf2img_info, field), \ - .range = ~0) - -static mali_ptr -panvk_meta_copy_buf2img_shader(struct panvk_device *dev, - struct panvk_meta_copy_format_info key, - struct pan_shader_info *shader_info) -{ - struct panvk_physical_device *phys_dev = - to_panvk_physical_device(dev->vk.physical); - struct pan_pool *bin_pool = &dev->meta.bin_pool.base; - - nir_builder b = nir_builder_init_simple_shader( - MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(), - "panvk_meta_copy_buf2img(imgfmt=%s,mask=%x)", - util_format_name(key.imgfmt), key.mask); - - nir_variable *coord_var = - nir_variable_create(b.shader, nir_var_shader_in, - glsl_vector_type(GLSL_TYPE_FLOAT, 3), "coord"); - coord_var->data.location = VARYING_SLOT_VAR0; - nir_def *coord = nir_load_var(&b, coord_var); - - coord = nir_f2u32(&b, coord); - - nir_def *bufptr = panvk_meta_copy_buf2img_get_info_field(&b, buf.ptr); - nir_def *buflinestride = - panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.line); - nir_def *bufsurfstride = - panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.surf); - - unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt); - unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask); - unsigned writemask = key.mask; - - nir_def *offset = - nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz)); - offset = nir_iadd(&b, offset, - nir_imul(&b, nir_channel(&b, coord, 1), buflinestride)); - offset = nir_iadd(&b, offset, - nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride)); - bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset)); - - unsigned imgcompsz = - (imgtexelsz <= 4 && key.imgfmt != PIPE_FORMAT_R5G6B5_UNORM) - ? 1 - : MIN2(1 << (ffs(imgtexelsz) - 1), 4); - - unsigned nimgcomps = imgtexelsz / imgcompsz; - unsigned bufcompsz = MIN2(buftexelsz, imgcompsz); - unsigned nbufcomps = buftexelsz / bufcompsz; - - assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4); - assert(nbufcomps <= 4 && nimgcomps <= 4); - - nir_def *texel = - nir_load_global(&b, bufptr, bufcompsz, nbufcomps, bufcompsz * 8); - - enum glsl_base_type basetype; - if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) { - texel = nir_vec3( - &b, nir_iand_imm(&b, texel, BITFIELD_MASK(5)), - nir_iand_imm(&b, nir_ushr_imm(&b, texel, 5), BITFIELD_MASK(6)), - nir_iand_imm(&b, nir_ushr_imm(&b, texel, 11), BITFIELD_MASK(5))); - texel = nir_fmul( - &b, nir_u2f32(&b, texel), - nir_vec3(&b, nir_imm_float(&b, 1.0f / 31), - nir_imm_float(&b, 1.0f / 63), nir_imm_float(&b, 1.0f / 31))); - nimgcomps = 3; - basetype = GLSL_TYPE_FLOAT; - } else if (imgcompsz == 1) { - assert(bufcompsz == 1); - /* Blendable formats are unorm and the fixed-function blend unit - * takes float values. - */ - texel = nir_fmul_imm(&b, nir_u2f32(&b, texel), 1.0f / 255); - basetype = GLSL_TYPE_FLOAT; - } else { - texel = nir_u2uN(&b, texel, imgcompsz * 8); - basetype = imgcompsz == 2 ? GLSL_TYPE_UINT16 : GLSL_TYPE_UINT; - } - - /* We always pass the texel using 32-bit regs for now */ - nir_variable *out = - nir_variable_create(b.shader, nir_var_shader_out, - glsl_vector_type(basetype, nimgcomps), "out"); - out->data.location = FRAG_RESULT_DATA0; - - uint16_t fullmask = (1 << nimgcomps) - 1; - - assert(fullmask >= writemask); - - if (fullmask != writemask) { - unsigned first_written_comp = ffs(writemask) - 1; - nir_def *oldtexel = NULL; - if (imgcompsz > 1) - oldtexel = nir_load_var(&b, out); - - nir_def *texel_comps[4]; - for (unsigned i = 0; i < nimgcomps; i++) { - if (writemask & BITFIELD_BIT(i)) - texel_comps[i] = nir_channel(&b, texel, i - first_written_comp); - else if (imgcompsz > 1) - texel_comps[i] = nir_channel(&b, oldtexel, i); - else - texel_comps[i] = nir_imm_intN_t(&b, 0, texel->bit_size); - } - - texel = nir_vec(&b, texel_comps, nimgcomps); - } - - nir_store_var(&b, out, texel, 0xff); - - struct panfrost_compile_inputs inputs = { - .gpu_id = phys_dev->kmod.props.gpu_prod_id, - .is_blit = true, - .no_ubo_to_push = true, - }; - - struct util_dynarray binary; - - util_dynarray_init(&binary, NULL); - pan_shader_preprocess(b.shader, inputs.gpu_id); - - enum pipe_format rt_formats[8] = {key.imgfmt}; - NIR_PASS_V(b.shader, GENX(pan_inline_rt_conversion), rt_formats); - - GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); - shader_info->push.count = - DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2img_info), 4); - - mali_ptr shader = - pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128); - - util_dynarray_fini(&binary); - ralloc_free(b.shader); - - return shader; -} - -static unsigned -panvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key) -{ - for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) { - if (!memcmp(&key, &panvk_meta_copy_buf2img_fmts[i], sizeof(key))) - return i; - } - - unreachable("Invalid image format\n"); -} - -static void -panvk_meta_copy_buf2img(struct panvk_cmd_buffer *cmdbuf, - const struct panvk_buffer *buf, - const struct panvk_image *img, - const VkBufferImageCopy2 *region) -{ - struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); - struct panvk_physical_device *phys_dev = - to_panvk_physical_device(dev->vk.physical); - struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info; - unsigned minx = MAX2(region->imageOffset.x, 0); - unsigned miny = MAX2(region->imageOffset.y, 0); - unsigned maxx = - MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0); - unsigned maxy = - MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0); - - mali_ptr vpd = panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base, - minx, miny, maxx, maxy); - - float dst_rect[] = { - minx, miny, 0.0, 1.0, maxx + 1, miny, 0.0, 1.0, - minx, maxy + 1, 0.0, 1.0, maxx + 1, maxy + 1, 0.0, 1.0, - }; - mali_ptr dst_coords = pan_pool_upload_aligned( - &cmdbuf->desc_pool.base, dst_rect, sizeof(dst_rect), 64); - - struct panvk_meta_copy_format_info key = { - .imgfmt = panvk_meta_copy_buf2img_format(img->pimage.layout.format), - .mask = panvk_meta_copy_img_mask(img->pimage.layout.format, - region->imageSubresource.aspectMask), - }; - - unsigned fmtidx = panvk_meta_copy_buf2img_format_idx(key); - - mali_ptr rsd = dev->meta.copy.buf2img[fmtidx].rsd; - - const struct vk_image_buffer_layout buflayout = - vk_image_buffer_copy_layout(&img->vk, region); - struct panvk_meta_copy_buf2img_info info = { - .buf.ptr = panvk_buffer_gpu_ptr(buf, region->bufferOffset), - .buf.stride.line = buflayout.row_stride_B, - .buf.stride.surf = buflayout.image_stride_B, - }; - - mali_ptr pushconsts = - pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16); - - struct pan_image_view view = { - .format = key.imgfmt, - .dim = MALI_TEXTURE_DIMENSION_2D, - .planes[0] = &img->pimage, - .nr_samples = img->pimage.layout.nr_samples, - .first_level = region->imageSubresource.mipLevel, - .last_level = region->imageSubresource.mipLevel, - .swizzle = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, - PIPE_SWIZZLE_W}, - }; - - /* TODO: don't force preloads of dst resources if unneeded */ - cmdbuf->state.gfx.render.layer_count = 1; - cmdbuf->state.gfx.render.fb.crc_valid[0] = false; - *fbinfo = (struct pan_fb_info){ - .tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model), - .width = - u_minify(img->pimage.layout.width, region->imageSubresource.mipLevel), - .height = - u_minify(img->pimage.layout.height, region->imageSubresource.mipLevel), - .extent.minx = minx, - .extent.maxx = maxx, - .extent.miny = miny, - .extent.maxy = maxy, - .nr_samples = 1, - .rt_count = 1, - .rts[0].view = &view, - .rts[0].preload = true, - .rts[0].crc_valid = &cmdbuf->state.gfx.render.fb.crc_valid[0], - }; - - panvk_per_arch(cmd_close_batch)(cmdbuf); - - assert(region->imageSubresource.layerCount == 1 || - region->imageExtent.depth == 1); - assert(region->imageOffset.z >= 0); - unsigned first_layer = - MAX2(region->imageSubresource.baseArrayLayer, region->imageOffset.z); - unsigned nlayers = - MAX2(region->imageSubresource.layerCount, region->imageExtent.depth); - for (unsigned l = 0; l < nlayers; l++) { - float src_rect[] = { - 0, - 0, - l, - 1.0, - region->imageExtent.width, - 0, - l, - 1.0, - 0, - region->imageExtent.height, - l, - 1.0, - region->imageExtent.width, - region->imageExtent.height, - l, - 1.0, - }; - - mali_ptr src_coords = pan_pool_upload_aligned( - &cmdbuf->desc_pool.base, src_rect, sizeof(src_rect), 64); - - struct panvk_batch *batch = panvk_per_arch(cmd_open_batch)(cmdbuf); - - view.first_layer = view.last_layer = l + first_layer; - batch->blit.src = buf->bo; - batch->blit.dst = img->bo; - panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true); - panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); - panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, 0); - - mali_ptr tsd, tiler; - - tsd = batch->tls.gpu; - tiler = batch->tiler.ctx_descs.gpu; - - struct panfrost_ptr job; - - job = panvk_meta_copy_emit_tiler_job( - &cmdbuf->desc_pool.base, &batch->vtc_jc, src_coords, dst_coords, 0, 0, - pushconsts, vpd, rsd, tsd, tiler); - - util_dynarray_append(&batch->jobs, void *, job.cpu); - panvk_per_arch(cmd_close_batch)(cmdbuf); - } -} - -static void -panvk_meta_copy_buf2img_init(struct panvk_device *dev) -{ - STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_buf2img_fmts) == - PANVK_META_COPY_BUF2IMG_NUM_FORMATS); - - for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) { - struct pan_shader_info shader_info; - mali_ptr shader = panvk_meta_copy_buf2img_shader( - dev, panvk_meta_copy_buf2img_fmts[i], &shader_info); - dev->meta.copy.buf2img[i].rsd = panvk_meta_copy_to_img_emit_rsd( - &dev->meta.desc_pool.base, shader, &shader_info, - panvk_meta_copy_buf2img_fmts[i].imgfmt, - panvk_meta_copy_buf2img_fmts[i].mask, false); - } -} - -VKAPI_ATTR void VKAPI_CALL -panvk_per_arch(CmdCopyBufferToImage2)( - VkCommandBuffer commandBuffer, - const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(panvk_buffer, buf, pCopyBufferToImageInfo->srcBuffer); - VK_FROM_HANDLE(panvk_image, img, pCopyBufferToImageInfo->dstImage); - - for (unsigned i = 0; i < pCopyBufferToImageInfo->regionCount; i++) { - panvk_meta_copy_buf2img(cmdbuf, buf, img, - &pCopyBufferToImageInfo->pRegions[i]); - } -} - -static const struct panvk_meta_copy_format_info panvk_meta_copy_img2buf_fmts[] = - { - {PIPE_FORMAT_R8_UINT, 0x1}, - {PIPE_FORMAT_R8G8_UINT, 0x3}, - {PIPE_FORMAT_R5G6B5_UNORM, 0x7}, - {PIPE_FORMAT_R8G8B8A8_UINT, 0xf}, - {PIPE_FORMAT_R16G16B16_UINT, 0x7}, - {PIPE_FORMAT_R32G32_UINT, 0x3}, - {PIPE_FORMAT_R32G32B32_UINT, 0x7}, - {PIPE_FORMAT_R32G32B32A32_UINT, 0xf}, - /* S8 -> Z24S8 */ - {PIPE_FORMAT_R8G8B8A8_UINT, 0x8}, - /* S8 -> Z32_S8X24 */ - {PIPE_FORMAT_R32G32_UINT, 0x2}, - /* Z24X8 -> Z24S8 */ - {PIPE_FORMAT_R8G8B8A8_UINT, 0x7}, - /* Z32 -> Z32_S8X24 */ - {PIPE_FORMAT_R32G32_UINT, 0x1}, -}; - -static enum pipe_format -panvk_meta_copy_img2buf_format(enum pipe_format imgfmt) -{ - /* Pick blendable formats when we can, and the FLOAT variant matching the - * texelsize otherwise. - */ - switch (util_format_get_blocksize(imgfmt)) { - case 1: - return PIPE_FORMAT_R8_UINT; - /* AFBC stores things differently for RGB565, - * we can't simply map to R8G8 in that case */ - case 2: - return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM || - imgfmt == PIPE_FORMAT_B5G6R5_UNORM) - ? PIPE_FORMAT_R5G6B5_UNORM - : PIPE_FORMAT_R8G8_UINT; - case 4: - return PIPE_FORMAT_R8G8B8A8_UINT; - case 6: - return PIPE_FORMAT_R16G16B16_UINT; - case 8: - return PIPE_FORMAT_R32G32_UINT; - case 12: - return PIPE_FORMAT_R32G32B32_UINT; - case 16: - return PIPE_FORMAT_R32G32B32A32_UINT; - default: - unreachable("Invalid format\n"); - } -} - -struct panvk_meta_copy_img2buf_info { - struct { - mali_ptr ptr; - struct { - unsigned line; - unsigned surf; - } stride; - } buf; - struct { - struct { - unsigned x, y, z; - } offset; - struct { - unsigned minx, miny, maxx, maxy; - } extent; - } img; -} PACKED; - -#define panvk_meta_copy_img2buf_get_info_field(b, field) \ - nir_load_push_constant( \ - (b), 1, sizeof(((struct panvk_meta_copy_img2buf_info *)0)->field) * 8, \ - nir_imm_int(b, 0), \ - .base = offsetof(struct panvk_meta_copy_img2buf_info, field), \ - .range = ~0) - -static mali_ptr -panvk_meta_copy_img2buf_shader(struct panvk_device *dev, - struct panvk_meta_copy_format_info key, - unsigned texdim, unsigned texisarray, - struct pan_shader_info *shader_info) -{ - struct panvk_physical_device *phys_dev = - to_panvk_physical_device(dev->vk.physical); - unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt); - unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask); - struct pan_pool *bin_pool = &dev->meta.bin_pool.base; - - /* FIXME: Won't work on compute queues, but we can't do that with - * a compute shader if the destination is an AFBC surface. - */ - nir_builder b = nir_builder_init_simple_shader( - MESA_SHADER_COMPUTE, GENX(pan_shader_get_compiler_options)(), - "panvk_meta_copy_img2buf(dim=%dD%s,imgfmt=%s,mask=%x)", texdim, - texisarray ? "[]" : "", util_format_name(key.imgfmt), key.mask); - - nir_def *coord = nir_load_global_invocation_id(&b, 32); - nir_def *bufptr = panvk_meta_copy_img2buf_get_info_field(&b, buf.ptr); - nir_def *buflinestride = - panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.line); - nir_def *bufsurfstride = - panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.surf); - - nir_def *imgminx = - panvk_meta_copy_img2buf_get_info_field(&b, img.extent.minx); - nir_def *imgminy = - panvk_meta_copy_img2buf_get_info_field(&b, img.extent.miny); - nir_def *imgmaxx = - panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxx); - nir_def *imgmaxy = - panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxy); - - nir_def *imgcoords, *inbounds; - - switch (texdim + texisarray) { - case 1: - imgcoords = - nir_iadd(&b, nir_channel(&b, coord, 0), - panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)); - inbounds = - nir_iand(&b, nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)), - nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx)); - break; - case 2: - imgcoords = nir_vec2( - &b, - nir_iadd(&b, nir_channel(&b, coord, 0), - panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)), - nir_iadd(&b, nir_channel(&b, coord, 1), - panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y))); - inbounds = nir_iand( - &b, - nir_iand(&b, nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)), - nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))), - nir_iand(&b, nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx), - nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy))); - break; - case 3: - imgcoords = nir_vec3( - &b, - nir_iadd(&b, nir_channel(&b, coord, 0), - panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)), - nir_iadd(&b, nir_channel(&b, coord, 1), - panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)), - nir_iadd(&b, nir_channel(&b, coord, 2), - panvk_meta_copy_img2buf_get_info_field(&b, img.offset.z))); - inbounds = nir_iand( - &b, - nir_iand(&b, nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)), - nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))), - nir_iand(&b, nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx), - nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy))); - break; - default: - unreachable("Invalid texture dimension\n"); - } - - nir_push_if(&b, inbounds); - - /* FIXME: doesn't work for tiled+compressed formats since blocks are 4x4 - * blocks instead of 16x16 texels in that case, and there's nothing we can - * do to force the tile size to 4x4 in the render path. - * This being said, compressed textures are not compatible with AFBC, so we - * could use a compute shader arranging the blocks properly. - */ - nir_def *offset = - nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz)); - offset = nir_iadd(&b, offset, - nir_imul(&b, nir_channel(&b, coord, 1), buflinestride)); - offset = nir_iadd(&b, offset, - nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride)); - bufptr = nir_iadd(&b, bufptr, nir_i2i64(&b, offset)); - - unsigned imgcompsz = - imgtexelsz <= 4 ? 1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4); - unsigned nimgcomps = imgtexelsz / imgcompsz; - assert(nimgcomps <= 4); - - nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); - tex->op = nir_texop_txf; - tex->texture_index = 0; - tex->is_array = texisarray; - tex->dest_type = - util_format_is_unorm(key.imgfmt) ? nir_type_float32 : nir_type_uint32; - - switch (texdim) { - case 1: - tex->sampler_dim = GLSL_SAMPLER_DIM_1D; - break; - case 2: - tex->sampler_dim = GLSL_SAMPLER_DIM_2D; - break; - case 3: - tex->sampler_dim = GLSL_SAMPLER_DIM_3D; - break; - default: - unreachable("Invalid texture dimension"); - } - - tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, imgcoords); - tex->coord_components = texdim + texisarray; - nir_def_init(&tex->instr, &tex->def, 4, - nir_alu_type_get_type_size(tex->dest_type)); - nir_builder_instr_insert(&b, &tex->instr); - - nir_def *texel = &tex->def; - - unsigned fullmask = (1 << util_format_get_nr_components(key.imgfmt)) - 1; - unsigned nbufcomps = util_bitcount(fullmask); - if (key.mask != fullmask) { - nir_def *bufcomps[4]; - nbufcomps = 0; - for (unsigned i = 0; i < nimgcomps; i++) { - if (key.mask & BITFIELD_BIT(i)) - bufcomps[nbufcomps++] = nir_channel(&b, texel, i); - } - - texel = nir_vec(&b, bufcomps, nbufcomps); - } - - unsigned bufcompsz = buftexelsz / nbufcomps; - - if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) { - texel = nir_fmul(&b, texel, - nir_vec3(&b, nir_imm_float(&b, 31), - nir_imm_float(&b, 63), nir_imm_float(&b, 31))); - texel = nir_f2u16(&b, texel); - texel = nir_ior( - &b, nir_channel(&b, texel, 0), - nir_ior(&b, - nir_ishl(&b, nir_channel(&b, texel, 1), nir_imm_int(&b, 5)), - nir_ishl(&b, nir_channel(&b, texel, 2), nir_imm_int(&b, 11)))); - imgcompsz = 2; - bufcompsz = 2; - nbufcomps = 1; - nimgcomps = 1; - } else if (imgcompsz == 1) { - nir_def *packed = nir_channel(&b, texel, 0); - for (unsigned i = 1; i < nbufcomps; i++) { - packed = nir_ior( - &b, packed, - nir_ishl(&b, nir_iand_imm(&b, nir_channel(&b, texel, i), 0xff), - nir_imm_int(&b, i * 8))); - } - texel = packed; - - bufcompsz = nbufcomps == 3 ? 4 : nbufcomps; - nbufcomps = 1; - } - - assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4); - assert(nbufcomps <= 4 && nimgcomps <= 4); - texel = nir_u2uN(&b, texel, bufcompsz * 8); - - nir_store_global(&b, bufptr, bufcompsz, texel, (1 << nbufcomps) - 1); - nir_pop_if(&b, NULL); - - struct panfrost_compile_inputs inputs = { - .gpu_id = phys_dev->kmod.props.gpu_prod_id, - .is_blit = true, - .no_ubo_to_push = true, - }; - - struct util_dynarray binary; - - util_dynarray_init(&binary, NULL); - pan_shader_preprocess(b.shader, inputs.gpu_id); - GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); - - shader_info->push.count = - DIV_ROUND_UP(sizeof(struct panvk_meta_copy_img2buf_info), 4); - - mali_ptr shader = - pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128); - - util_dynarray_fini(&binary); - ralloc_free(b.shader); - - return shader; -} - -static unsigned -panvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key) -{ - for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) { - if (!memcmp(&key, &panvk_meta_copy_img2buf_fmts[i], sizeof(key))) - return i; - } - - unreachable("Invalid texel size\n"); -} - -static void -panvk_meta_copy_img2buf(struct panvk_cmd_buffer *cmdbuf, - const struct panvk_buffer *buf, - const struct panvk_image *img, - const VkBufferImageCopy2 *region) -{ - struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); - unsigned blksz = util_format_get_blocksize(img->pimage.layout.format); - struct panvk_meta_copy_format_info key = { - .imgfmt = panvk_meta_copy_img2buf_format(img->pimage.layout.format), - .mask = panvk_meta_copy_img_mask(img->pimage.layout.format, - region->imageSubresource.aspectMask), - }; - unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask); - unsigned texdimidx = panvk_meta_copy_tex_type( - img->pimage.layout.dim, img->pimage.layout.array_size > 1); - unsigned fmtidx = panvk_meta_copy_img2buf_format_idx(key); - - mali_ptr rsd = dev->meta.copy.img2buf[texdimidx][fmtidx].rsd; - - struct panvk_meta_copy_img2buf_info info = { - .buf.ptr = panvk_buffer_gpu_ptr(buf, region->bufferOffset) - - (region->imageOffset.x & 15) * blksz, - .buf.stride.line = - (region->bufferRowLength ?: region->imageExtent.width) * buftexelsz, - .img.offset.x = MAX2(region->imageOffset.x & ~15, 0), - .img.extent.minx = MAX2(region->imageOffset.x, 0), - .img.extent.maxx = - MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0), - }; - - if (img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D) { - info.img.extent.maxy = region->imageSubresource.layerCount - 1; - } else { - info.img.offset.y = MAX2(region->imageOffset.y & ~15, 0); - info.buf.ptr -= (region->imageOffset.y & 15) * info.buf.stride.line; - info.img.offset.z = MAX2(region->imageOffset.z, 0); - info.img.extent.miny = MAX2(region->imageOffset.y, 0); - info.img.extent.maxy = - MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0); - } - - info.buf.stride.surf = - (region->bufferImageHeight ?: region->imageExtent.height) * - info.buf.stride.line; - - mali_ptr pushconsts = - pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16); - - struct pan_image_view view = { - .format = key.imgfmt, - .dim = img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE - ? MALI_TEXTURE_DIMENSION_2D - : img->pimage.layout.dim, - .planes[0] = &img->pimage, - .nr_samples = img->pimage.layout.nr_samples, - .first_level = region->imageSubresource.mipLevel, - .last_level = region->imageSubresource.mipLevel, - .first_layer = region->imageSubresource.baseArrayLayer, - .last_layer = region->imageSubresource.baseArrayLayer + - region->imageSubresource.layerCount - 1, - .swizzle = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, - PIPE_SWIZZLE_W}, - }; - - mali_ptr texture = - panvk_meta_copy_img_emit_texture(&cmdbuf->desc_pool.base, &view); - mali_ptr sampler = panvk_meta_copy_img_emit_sampler(&cmdbuf->desc_pool.base); - - panvk_per_arch(cmd_close_batch)(cmdbuf); - - struct panvk_batch *batch = panvk_per_arch(cmd_open_batch)(cmdbuf); - - struct pan_tls_info tlsinfo = {0}; - - batch->blit.src = img->bo; - batch->blit.dst = buf->bo; - batch->tls = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE); - GENX(pan_emit_tls)(&tlsinfo, batch->tls.cpu); - - mali_ptr tsd = batch->tls.gpu; - - struct pan_compute_dim wg_sz = { - 16, - img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ? 1 : 16, - 1, - }; - - struct pan_compute_dim num_wg = { - (ALIGN_POT(info.img.extent.maxx + 1, 16) - info.img.offset.x) / 16, - img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D - ? region->imageSubresource.layerCount - : (ALIGN_POT(info.img.extent.maxy + 1, 16) - info.img.offset.y) / 16, - img->pimage.layout.dim != MALI_TEXTURE_DIMENSION_1D - ? MAX2(region->imageSubresource.layerCount, region->imageExtent.depth) - : 1, - }; - - struct panfrost_ptr job = panvk_meta_copy_emit_compute_job( - &cmdbuf->desc_pool.base, &batch->vtc_jc, &num_wg, &wg_sz, texture, - sampler, pushconsts, rsd, tsd); - - util_dynarray_append(&batch->jobs, void *, job.cpu); - - panvk_per_arch(cmd_close_batch)(cmdbuf); -} - -static void -panvk_meta_copy_img2buf_init(struct panvk_device *dev) -{ - STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2buf_fmts) == - PANVK_META_COPY_IMG2BUF_NUM_FORMATS); - - for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) { - for (unsigned texdim = 1; texdim <= 3; texdim++) { - unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false); - assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf)); - - struct pan_shader_info shader_info; - mali_ptr shader = panvk_meta_copy_img2buf_shader( - dev, panvk_meta_copy_img2buf_fmts[i], texdim, false, &shader_info); - dev->meta.copy.img2buf[texdimidx][i].rsd = - panvk_meta_copy_to_buf_emit_rsd(&dev->meta.desc_pool.base, shader, - &shader_info, true); - - if (texdim == 3) - continue; - - memset(&shader_info, 0, sizeof(shader_info)); - texdimidx = panvk_meta_copy_tex_type(texdim, true); - assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf)); - shader = panvk_meta_copy_img2buf_shader( - dev, panvk_meta_copy_img2buf_fmts[i], texdim, true, &shader_info); - dev->meta.copy.img2buf[texdimidx][i].rsd = - panvk_meta_copy_to_buf_emit_rsd(&dev->meta.desc_pool.base, shader, - &shader_info, true); - } - } -} - -VKAPI_ATTR void VKAPI_CALL -panvk_per_arch(CmdCopyImageToBuffer2)( - VkCommandBuffer commandBuffer, - const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(panvk_buffer, buf, pCopyImageToBufferInfo->dstBuffer); - VK_FROM_HANDLE(panvk_image, img, pCopyImageToBufferInfo->srcImage); - - for (unsigned i = 0; i < pCopyImageToBufferInfo->regionCount; i++) { - panvk_meta_copy_img2buf(cmdbuf, buf, img, - &pCopyImageToBufferInfo->pRegions[i]); - } -} - -struct panvk_meta_copy_buf2buf_info { - mali_ptr src; - mali_ptr dst; -} PACKED; - -#define panvk_meta_copy_buf2buf_get_info_field(b, field) \ - nir_load_push_constant( \ - (b), 1, sizeof(((struct panvk_meta_copy_buf2buf_info *)0)->field) * 8, \ - nir_imm_int(b, 0), \ - .base = offsetof(struct panvk_meta_copy_buf2buf_info, field), \ - .range = ~0) - -static mali_ptr -panvk_meta_copy_buf2buf_shader(struct panvk_device *dev, unsigned blksz, - struct pan_shader_info *shader_info) -{ - struct panvk_physical_device *phys_dev = - to_panvk_physical_device(dev->vk.physical); - struct pan_pool *bin_pool = &dev->meta.bin_pool.base; - - /* FIXME: Won't work on compute queues, but we can't do that with - * a compute shader if the destination is an AFBC surface. - */ - nir_builder b = nir_builder_init_simple_shader( - MESA_SHADER_COMPUTE, GENX(pan_shader_get_compiler_options)(), - "panvk_meta_copy_buf2buf(blksz=%d)", blksz); - - nir_def *coord = nir_load_global_invocation_id(&b, 32); - - nir_def *offset = nir_u2u64( - &b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, blksz))); - nir_def *srcptr = - nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, src), offset); - nir_def *dstptr = - nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, dst), offset); - - unsigned compsz = blksz < 4 ? blksz : 4; - unsigned ncomps = blksz / compsz; - nir_store_global(&b, dstptr, blksz, - nir_load_global(&b, srcptr, blksz, ncomps, compsz * 8), - (1 << ncomps) - 1); - - struct panfrost_compile_inputs inputs = { - .gpu_id = phys_dev->kmod.props.gpu_prod_id, - .is_blit = true, - .no_ubo_to_push = true, - }; - - struct util_dynarray binary; - - util_dynarray_init(&binary, NULL); - pan_shader_preprocess(b.shader, inputs.gpu_id); - GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); - - shader_info->push.count = - DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2buf_info), 4); - - mali_ptr shader = - pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128); - - util_dynarray_fini(&binary); - ralloc_free(b.shader); - - return shader; -} - -static void -panvk_meta_copy_buf2buf_init(struct panvk_device *dev) -{ - for (unsigned i = 0; i < ARRAY_SIZE(dev->meta.copy.buf2buf); i++) { - struct pan_shader_info shader_info; - mali_ptr shader = - panvk_meta_copy_buf2buf_shader(dev, 1 << i, &shader_info); - dev->meta.copy.buf2buf[i].rsd = panvk_meta_copy_to_buf_emit_rsd( - &dev->meta.desc_pool.base, shader, &shader_info, false); - } -} - -static void -panvk_meta_copy_buf2buf(struct panvk_cmd_buffer *cmdbuf, - const struct panvk_buffer *src, - const struct panvk_buffer *dst, - const VkBufferCopy2 *region) -{ - struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); - struct panvk_meta_copy_buf2buf_info info = { - .src = panvk_buffer_gpu_ptr(src, region->srcOffset), - .dst = panvk_buffer_gpu_ptr(dst, region->dstOffset), - }; - - unsigned alignment = ffs((info.src | info.dst | region->size) & 15); - unsigned log2blksz = alignment ? alignment - 1 : 4; - - assert(log2blksz < ARRAY_SIZE(dev->meta.copy.buf2buf)); - mali_ptr rsd = dev->meta.copy.buf2buf[log2blksz].rsd; - - mali_ptr pushconsts = - pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16); - - panvk_per_arch(cmd_close_batch)(cmdbuf); - - struct panvk_batch *batch = panvk_per_arch(cmd_open_batch)(cmdbuf); - - panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); - - mali_ptr tsd = batch->tls.gpu; - - unsigned nblocks = region->size >> log2blksz; - struct pan_compute_dim num_wg = {nblocks, 1, 1}; - struct pan_compute_dim wg_sz = {1, 1, 1}; - struct panfrost_ptr job = panvk_meta_copy_emit_compute_job( - &cmdbuf->desc_pool.base, &batch->vtc_jc, &num_wg, &wg_sz, 0, 0, - pushconsts, rsd, tsd); - - util_dynarray_append(&batch->jobs, void *, job.cpu); - - batch->blit.src = src->bo; - batch->blit.dst = dst->bo; - panvk_per_arch(cmd_close_batch)(cmdbuf); -} - -VKAPI_ATTR void VKAPI_CALL -panvk_per_arch(CmdCopyBuffer2)(VkCommandBuffer commandBuffer, - const VkCopyBufferInfo2 *pCopyBufferInfo) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(panvk_buffer, src, pCopyBufferInfo->srcBuffer); - VK_FROM_HANDLE(panvk_buffer, dst, pCopyBufferInfo->dstBuffer); - - for (unsigned i = 0; i < pCopyBufferInfo->regionCount; i++) { - panvk_meta_copy_buf2buf(cmdbuf, src, dst, &pCopyBufferInfo->pRegions[i]); - } -} - -struct panvk_meta_fill_buf_info { - mali_ptr start; - uint32_t val; -} PACKED; - -#define panvk_meta_fill_buf_get_info_field(b, field) \ - nir_load_push_constant( \ - (b), 1, sizeof(((struct panvk_meta_fill_buf_info *)0)->field) * 8, \ - nir_imm_int(b, 0), \ - .base = offsetof(struct panvk_meta_fill_buf_info, field), .range = ~0) - -static mali_ptr -panvk_meta_fill_buf_shader(struct panvk_device *dev, - struct pan_shader_info *shader_info) -{ - struct panvk_physical_device *phys_dev = - to_panvk_physical_device(dev->vk.physical); - struct pan_pool *bin_pool = &dev->meta.bin_pool.base; - - /* FIXME: Won't work on compute queues, but we can't do that with - * a compute shader if the destination is an AFBC surface. - */ - nir_builder b = nir_builder_init_simple_shader( - MESA_SHADER_COMPUTE, GENX(pan_shader_get_compiler_options)(), - "panvk_meta_fill_buf()"); - - nir_def *coord = nir_load_global_invocation_id(&b, 32); - - nir_def *offset = nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), - nir_imm_int(&b, sizeof(uint32_t)))); - nir_def *ptr = - nir_iadd(&b, panvk_meta_fill_buf_get_info_field(&b, start), offset); - nir_def *val = panvk_meta_fill_buf_get_info_field(&b, val); - - nir_store_global(&b, ptr, sizeof(uint32_t), val, 1); - - struct panfrost_compile_inputs inputs = { - .gpu_id = phys_dev->kmod.props.gpu_prod_id, - .is_blit = true, - .no_ubo_to_push = true, - }; - - struct util_dynarray binary; - - util_dynarray_init(&binary, NULL); - pan_shader_preprocess(b.shader, inputs.gpu_id); - GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); - - shader_info->push.count = - DIV_ROUND_UP(sizeof(struct panvk_meta_fill_buf_info), 4); - - mali_ptr shader = - pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128); - - util_dynarray_fini(&binary); - ralloc_free(b.shader); - - return shader; -} - -static mali_ptr -panvk_meta_fill_buf_emit_rsd(struct panvk_device *dev) -{ - struct pan_pool *desc_pool = &dev->meta.desc_pool.base; - struct pan_shader_info shader_info; - - mali_ptr shader = panvk_meta_fill_buf_shader(dev, &shader_info); - - struct panfrost_ptr rsd_ptr = - pan_pool_alloc_desc_aggregate(desc_pool, PAN_DESC(RENDERER_STATE)); - - pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) { - pan_shader_prepare_rsd(&shader_info, shader, &cfg); - } - - return rsd_ptr.gpu; -} - -static void -panvk_meta_fill_buf_init(struct panvk_device *dev) -{ - dev->meta.copy.fillbuf.rsd = panvk_meta_fill_buf_emit_rsd(dev); -} - -static void -panvk_meta_fill_buf(struct panvk_cmd_buffer *cmdbuf, - const struct panvk_buffer *dst, VkDeviceSize size, - VkDeviceSize offset, uint32_t val) -{ - struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); - struct panvk_meta_fill_buf_info info = { - .start = panvk_buffer_gpu_ptr(dst, offset), - .val = val, - }; - size = panvk_buffer_range(dst, offset, size); - - /* From the Vulkan spec: - * - * "size is the number of bytes to fill, and must be either a multiple - * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of - * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the - * buffer is not a multiple of 4, then the nearest smaller multiple is - * used." - */ - size &= ~3ull; - - assert(!(offset & 3) && !(size & 3)); - - unsigned nwords = size / sizeof(uint32_t); - mali_ptr rsd = dev->meta.copy.fillbuf.rsd; - - mali_ptr pushconsts = - pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16); - - panvk_per_arch(cmd_close_batch)(cmdbuf); - - struct panvk_batch *batch = panvk_per_arch(cmd_open_batch)(cmdbuf); - - panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); - - mali_ptr tsd = batch->tls.gpu; - - struct pan_compute_dim num_wg = {nwords, 1, 1}; - struct pan_compute_dim wg_sz = {1, 1, 1}; - struct panfrost_ptr job = panvk_meta_copy_emit_compute_job( - &cmdbuf->desc_pool.base, &batch->vtc_jc, &num_wg, &wg_sz, 0, 0, - pushconsts, rsd, tsd); - - util_dynarray_append(&batch->jobs, void *, job.cpu); - - batch->blit.dst = dst->bo; - panvk_per_arch(cmd_close_batch)(cmdbuf); -} - -VKAPI_ATTR void VKAPI_CALL -panvk_per_arch(CmdFillBuffer)(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, - VkDeviceSize dstOffset, VkDeviceSize fillSize, - uint32_t data) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer); - - panvk_meta_fill_buf(cmdbuf, dst, fillSize, dstOffset, data); -} - -static void -panvk_meta_update_buf(struct panvk_cmd_buffer *cmdbuf, - const struct panvk_buffer *dst, VkDeviceSize offset, - VkDeviceSize size, const void *data) -{ - struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); - struct panvk_meta_copy_buf2buf_info info = { - .src = pan_pool_upload_aligned(&cmdbuf->desc_pool.base, data, size, 4), - .dst = panvk_buffer_gpu_ptr(dst, offset), - }; - - unsigned log2blksz = ffs(sizeof(uint32_t)) - 1; - - mali_ptr rsd = dev->meta.copy.buf2buf[log2blksz].rsd; - - mali_ptr pushconsts = - pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16); - - panvk_per_arch(cmd_close_batch)(cmdbuf); - - struct panvk_batch *batch = panvk_per_arch(cmd_open_batch)(cmdbuf); - - panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); - - mali_ptr tsd = batch->tls.gpu; - - unsigned nblocks = size >> log2blksz; - struct pan_compute_dim num_wg = {nblocks, 1, 1}; - struct pan_compute_dim wg_sz = {1, 1, 1}; - struct panfrost_ptr job = panvk_meta_copy_emit_compute_job( - &cmdbuf->desc_pool.base, &batch->vtc_jc, &num_wg, &wg_sz, 0, 0, - pushconsts, rsd, tsd); - - util_dynarray_append(&batch->jobs, void *, job.cpu); - - batch->blit.dst = dst->bo; - panvk_per_arch(cmd_close_batch)(cmdbuf); -} - -VKAPI_ATTR void VKAPI_CALL -panvk_per_arch(CmdUpdateBuffer)(VkCommandBuffer commandBuffer, - VkBuffer dstBuffer, VkDeviceSize dstOffset, - VkDeviceSize dataSize, const void *pData) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer); - - panvk_meta_update_buf(cmdbuf, dst, dstOffset, dataSize, pData); -} - -void -panvk_per_arch(meta_copy_init)(struct panvk_device *dev) -{ - panvk_meta_copy_img2img_init(dev, false); - panvk_meta_copy_img2img_init(dev, true); - panvk_meta_copy_buf2img_init(dev); - panvk_meta_copy_img2buf_init(dev); - panvk_meta_copy_buf2buf_init(dev); - panvk_meta_fill_buf_init(dev); -} diff --git a/src/panfrost/vulkan/meson.build b/src/panfrost/vulkan/meson.build index 4be3ae6f76b..f775e752c39 100644 --- a/src/panfrost/vulkan/meson.build +++ b/src/panfrost/vulkan/meson.build @@ -56,10 +56,6 @@ jm_files = [ 'jm/panvk_vX_cmd_dispatch.c', 'jm/panvk_vX_cmd_draw.c', 'jm/panvk_vX_cmd_event.c', - 'jm/panvk_vX_meta.c', - 'jm/panvk_vX_meta_blit.c', - 'jm/panvk_vX_meta_clear.c', - 'jm/panvk_vX_meta_copy.c', 'jm/panvk_vX_queue.c', ] diff --git a/src/panfrost/vulkan/panvk_device.h b/src/panfrost/vulkan/panvk_device.h index 882dbec4175..7c8dcce33aa 100644 --- a/src/panfrost/vulkan/panvk_device.h +++ b/src/panfrost/vulkan/panvk_device.h @@ -9,6 +9,7 @@ #include #include "vk_device.h" +#include "vk_meta.h" #include "panvk_blend.h" #include "panvk_instance.h" @@ -36,13 +37,31 @@ struct panvk_device { struct panvk_priv_bo *tiler_heap; struct panvk_priv_bo *sample_positions; - struct panvk_blend_shader_cache blend_shader_cache; - struct panvk_meta meta; - + /* Access to the blitter pools are protected by the blitter + * shader/rsd locks. They can't be merged with other binary/desc + * pools unless we patch pan_blitter.c to support external pool locks. + * + * FIXME: The blitter infrastructure is only needed for FB preload. + * We should probably consider getting rid of the dependency we have + * on pan_desc.c and implement preload ourselves so we don't have + * to duplicate caches. + */ struct { - struct panvk_priv_bo *shader_bo; - struct panvk_priv_bo *rsd_bo; + struct panvk_pool bin_pool; + struct panvk_pool desc_pool; + struct pan_blitter_cache cache; + struct pan_blend_shader_cache blend_shader_cache; + } blitter; + + struct panvk_blend_shader_cache blend_shader_cache; + struct vk_meta_device meta; + +#if PAN_ARCH <= 7 + struct { + struct panvk_priv_mem shader; + struct panvk_priv_mem rsd; } desc_copy; +#endif struct { struct panvk_pool rw; diff --git a/src/panfrost/vulkan/panvk_image.c b/src/panfrost/vulkan/panvk_image.c index eb6042c3928..f0e7314176a 100644 --- a/src/panfrost/vulkan/panvk_image.c +++ b/src/panfrost/vulkan/panvk_image.c @@ -197,6 +197,61 @@ panvk_image_select_mod(struct panvk_image *image, panvk_image_select_mod_from_list(image, NULL, 0); } +static void +panvk_image_pre_mod_select_meta_adjustments(struct panvk_image *image) +{ + const VkImageAspectFlags aspects = vk_format_aspects(image->vk.format); + + /* We do image blit/resolve with vk_meta, so when an image is flagged as + * being a potential transfer source, we also need to add the sampled usage. + */ + if (image->vk.usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { + image->vk.usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) + image->vk.stencil_usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } + + if (image->vk.usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { + /* Similarly, image that can be a transfer destination can be attached + * as a color or depth-stencil attachment by vk_meta. */ + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + image->vk.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) + image->vk.stencil_usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + + if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) { + image->vk.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + image->vk.usage |= VK_IMAGE_USAGE_STORAGE_BIT; + } + + /* vk_meta creates 2D array views of 3D images. */ + if (image->vk.image_type == VK_IMAGE_TYPE_3D) + image->vk.create_flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; + } + + /* Needed for resolve operations. */ + if (image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) + image->vk.usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + + if (image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + image->vk.usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) + image->vk.stencil_usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } + + if ((image->vk.usage & + (VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)) && + util_format_is_compressed(image->pimage.layout.format)) { + /* We need to be able to create RGBA views of compressed formats for + * vk_meta copies. */ + image->vk.create_flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | + VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT; + } +} + static uint64_t panvk_image_get_total_size(const struct panvk_image *image) { @@ -241,6 +296,13 @@ panvk_CreateImage(VkDevice device, const VkImageCreateInfo *pCreateInfo, .nr_slices = image->vk.mip_levels, }; + /* Add any create/usage flags that might be needed for meta operations. + * This is run before the modifier selection because some + * usage/create_flags influence the modifier selection logic. */ + panvk_image_pre_mod_select_meta_adjustments(image); + + /* Now that we've patched the create/usage flags, we can proceed with the + * modifier selection. */ panvk_image_select_mod(image, pCreateInfo); *pImage = panvk_image_to_handle(image); diff --git a/src/panfrost/vulkan/panvk_meta.h b/src/panfrost/vulkan/panvk_meta.h index b54a39d0b16..2e8e182dabb 100644 --- a/src/panfrost/vulkan/panvk_meta.h +++ b/src/panfrost/vulkan/panvk_meta.h @@ -6,78 +6,140 @@ #ifndef PANVK_META_H #define PANVK_META_H -#include "panvk_macros.h" +#include "panvk_image.h" #include "panvk_mempool.h" -#include "pan_blend.h" -#include "pan_blitter.h" +#include "vk_format.h" +#include "vk_meta.h" -#define PANVK_META_COPY_BUF2IMG_NUM_FORMATS 12 -#define PANVK_META_COPY_IMG2BUF_NUM_FORMATS 12 -#define PANVK_META_COPY_IMG2IMG_NUM_FORMATS 14 -#define PANVK_META_COPY_NUM_TEX_TYPES 5 -#define PANVK_META_COPY_BUF2BUF_NUM_BLKSIZES 5 - -static inline unsigned -panvk_meta_copy_tex_type(unsigned dim, bool isarray) +static inline bool +panvk_meta_copy_to_image_use_gfx_pipeline(struct panvk_image *dst_img) { - assert(dim > 0 && dim <= 3); - assert(dim < 3 || !isarray); - return (((dim - 1) << 1) | (isarray ? 1 : 0)); + /* Writes to AFBC images must go through the graphics pipeline. */ + if (drm_is_afbc(dst_img->pimage.layout.modifier)) + return true; + + return false; } -struct panvk_meta { - struct panvk_pool bin_pool; - struct panvk_pool desc_pool; +static inline VkFormat +panvk_meta_get_uint_format_for_blk_size(unsigned blk_sz) +{ + switch (blk_sz) { + case 1: + return VK_FORMAT_R8_UINT; + case 2: + return VK_FORMAT_R16_UINT; + case 3: + return VK_FORMAT_R8G8B8_UINT; + case 4: + return VK_FORMAT_R32_UINT; + case 6: + return VK_FORMAT_R16G16B16_UINT; + case 8: + return VK_FORMAT_R32G32_UINT; + case 12: + return VK_FORMAT_R32G32B32_UINT; + case 16: + return VK_FORMAT_R32G32B32A32_UINT; + default: + return VK_FORMAT_UNDEFINED; + } +} - /* Access to the blitter pools are protected by the blitter - * shader/rsd locks. They can't be merged with other binary/desc - * pools unless we patch pan_blitter.c to external pool locks. - */ - struct { - struct panvk_pool bin_pool; - struct panvk_pool desc_pool; - struct pan_blitter_cache cache; - } blitter; +static inline struct vk_meta_copy_image_properties +panvk_meta_copy_get_image_properties(struct panvk_image *img) +{ + uint64_t mod = img->pimage.layout.modifier; + enum pipe_format pfmt = vk_format_to_pipe_format(img->vk.format); + unsigned blk_sz = util_format_get_blocksize(pfmt); + struct vk_meta_copy_image_properties props = {0}; - struct pan_blend_shader_cache blend_shader_cache; + if (drm_is_afbc(mod)) { + if (!vk_format_is_depth_or_stencil(img->vk.format)) { + props.color.view_format = img->vk.format; + } else { + switch (img->vk.format) { + case VK_FORMAT_D24_UNORM_S8_UINT: + props.depth.view_format = VK_FORMAT_R8G8B8A8_UNORM; + props.depth.component_mask = BITFIELD_MASK(3); + props.stencil.view_format = VK_FORMAT_R8G8B8A8_UNORM; + props.stencil.component_mask = BITFIELD_BIT(3); + break; + case VK_FORMAT_X8_D24_UNORM_PACK32: + props.depth.view_format = VK_FORMAT_R8G8B8A8_UNORM; + props.depth.component_mask = BITFIELD_MASK(3); + break; + case VK_FORMAT_D16_UNORM: + props.depth.view_format = VK_FORMAT_R8G8_UNORM; + props.depth.component_mask = BITFIELD_MASK(2); + break; + default: + assert(!"Invalid ZS format"); + break; + } + } + } else if (vk_format_is_depth_or_stencil(img->vk.format)) { + switch (img->vk.format) { + case VK_FORMAT_S8_UINT: + props.stencil.view_format = VK_FORMAT_R8_UINT; + props.stencil.component_mask = BITFIELD_MASK(1); + break; + case VK_FORMAT_D24_UNORM_S8_UINT: + props.depth.view_format = VK_FORMAT_R8G8B8A8_UINT; + props.depth.component_mask = BITFIELD_MASK(3); + props.stencil.view_format = VK_FORMAT_R8G8B8A8_UINT; + props.stencil.component_mask = BITFIELD_BIT(3); + break; + case VK_FORMAT_X8_D24_UNORM_PACK32: + props.depth.view_format = VK_FORMAT_R8G8B8A8_UINT; + props.depth.component_mask = BITFIELD_MASK(3); + break; + case VK_FORMAT_D32_SFLOAT_S8_UINT: + props.depth.view_format = VK_FORMAT_R32G32_UINT; + props.depth.component_mask = BITFIELD_BIT(0); + props.stencil.view_format = VK_FORMAT_R32G32_UINT; + props.stencil.component_mask = BITFIELD_BIT(1); + break; + case VK_FORMAT_D16_UNORM: + props.depth.view_format = VK_FORMAT_R16_UINT; + props.depth.component_mask = BITFIELD_BIT(0); + break; + case VK_FORMAT_D32_SFLOAT: + props.depth.view_format = VK_FORMAT_R32_UINT; + props.depth.component_mask = BITFIELD_BIT(0); + break; + default: + assert(!"Invalid ZS format"); + break; + } + } else { + props.color.view_format = panvk_meta_get_uint_format_for_blk_size(blk_sz); + } - struct { - struct { - mali_ptr shader; - struct pan_shader_info shader_info; - } color[3]; /* 3 base types */ - } clear_attachment; + if (mod == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED || + drm_is_afbc(mod)) { + props.tile_size.width = 16; + props.tile_size.height = 16; + props.tile_size.depth = 1; + } else { + /* When linear, pretend we have a 1D-tile so we end up with a <64,1,1> + * workgroup. */ + props.tile_size.width = 64; + props.tile_size.height = 1; + props.tile_size.depth = 1; + } - struct { - struct { - mali_ptr rsd; - } buf2img[PANVK_META_COPY_BUF2IMG_NUM_FORMATS]; - struct { - mali_ptr rsd; - } img2buf[PANVK_META_COPY_NUM_TEX_TYPES] - [PANVK_META_COPY_IMG2BUF_NUM_FORMATS]; - struct { - mali_ptr rsd; - } img2img[2][PANVK_META_COPY_NUM_TEX_TYPES] - [PANVK_META_COPY_IMG2IMG_NUM_FORMATS]; - struct { - mali_ptr rsd; - } buf2buf[PANVK_META_COPY_BUF2BUF_NUM_BLKSIZES]; - struct { - mali_ptr rsd; - } fillbuf; - } copy; + return props; +} - struct { - mali_ptr rsd; - } desc_copy; -}; +#if defined(PAN_ARCH) && PAN_ARCH <= 7 +void panvk_per_arch(meta_desc_copy_init)(struct panvk_device *dev); -#if PAN_ARCH +void panvk_per_arch(meta_desc_copy_cleanup)(struct panvk_device *dev); -#if PAN_ARCH <= 7 struct panvk_descriptor_state; +struct panvk_device; struct panvk_shader; struct panvk_shader_desc_state; @@ -89,23 +151,4 @@ struct panfrost_ptr panvk_per_arch(meta_get_copy_desc_job)( uint32_t attrib_buf_idx_offset); #endif -void panvk_per_arch(meta_init)(struct panvk_device *dev); - -void panvk_per_arch(meta_cleanup)(struct panvk_device *dev); - -mali_ptr panvk_per_arch(meta_emit_viewport)(struct pan_pool *pool, - uint16_t minx, uint16_t miny, - uint16_t maxx, uint16_t maxy); - -void panvk_per_arch(meta_clear_init)(struct panvk_device *dev); - -void panvk_per_arch(meta_blit_init)(struct panvk_device *dev); - -void panvk_per_arch(meta_blit_cleanup)(struct panvk_device *dev); - -void panvk_per_arch(meta_copy_init)(struct panvk_device *dev); - -void panvk_per_arch(meta_desc_copy_init)(struct panvk_device *dev); -#endif - #endif diff --git a/src/panfrost/vulkan/panvk_vX_cmd_meta.c b/src/panfrost/vulkan/panvk_vX_cmd_meta.c index bdd35de0c4f..55d4cc488e9 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_meta.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_meta.c @@ -4,6 +4,7 @@ */ #include "panvk_cmd_meta.h" +#include "panvk_entrypoints.h" void panvk_per_arch(cmd_meta_compute_start)( @@ -122,3 +123,236 @@ panvk_per_arch(cmd_meta_gfx_end)( vk_dynamic_graphics_state_copy(&cmdbuf->vk.dynamic_graphics_state, &save_ctx->dyn_state.all); } + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(CmdBlitImage2)(VkCommandBuffer commandBuffer, + const VkBlitImageInfo2 *pBlitImageInfo) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); + struct panvk_cmd_meta_graphics_save_ctx save = {0}; + + panvk_per_arch(cmd_meta_gfx_start)(cmdbuf, &save); + vk_meta_blit_image2(&cmdbuf->vk, &dev->meta, pBlitImageInfo); + panvk_per_arch(cmd_meta_gfx_end)(cmdbuf, &save); +} + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(CmdResolveImage2)(VkCommandBuffer commandBuffer, + const VkResolveImageInfo2 *pResolveImageInfo) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); + struct panvk_cmd_meta_graphics_save_ctx save = {0}; + + panvk_per_arch(cmd_meta_gfx_start)(cmdbuf, &save); + vk_meta_resolve_image2(&cmdbuf->vk, &dev->meta, pResolveImageInfo); + panvk_per_arch(cmd_meta_gfx_end)(cmdbuf, &save); +} + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(CmdClearAttachments)(VkCommandBuffer commandBuffer, + uint32_t attachmentCount, + const VkClearAttachment *pAttachments, + uint32_t rectCount, + const VkClearRect *pRects) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info; + struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); + struct panvk_cmd_meta_graphics_save_ctx save = {0}; + struct vk_meta_rendering_info render = { + .view_mask = 0, + .samples = fbinfo->nr_samples, + .color_attachment_count = fbinfo->rt_count, + }; + + for (uint32_t i = 0; i < fbinfo->rt_count; i++) { + if (fbinfo->rts[i].view) { + render.color_attachment_formats[i] = + cmdbuf->state.gfx.render.color_attachments.fmts[i]; + render.color_attachment_write_masks[i] = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + } + } + + if (fbinfo->zs.view.zs) { + render.depth_attachment_format = + vk_format_from_pipe_format(fbinfo->zs.view.zs->format); + + if (vk_format_has_stencil(render.depth_attachment_format)) + render.stencil_attachment_format = render.depth_attachment_format; + } + + if (fbinfo->zs.view.s) { + render.stencil_attachment_format = + vk_format_from_pipe_format(fbinfo->zs.view.s->format); + } + + panvk_per_arch(cmd_meta_gfx_start)(cmdbuf, &save); + vk_meta_clear_attachments(&cmdbuf->vk, &dev->meta, &render, attachmentCount, + pAttachments, rectCount, pRects); + panvk_per_arch(cmd_meta_gfx_end)(cmdbuf, &save); +} + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(CmdClearDepthStencilImage)( + VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, + const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(panvk_image, img, image); + struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); + struct panvk_cmd_meta_graphics_save_ctx save = {0}; + + panvk_per_arch(cmd_meta_gfx_start)(cmdbuf, &save); + vk_meta_clear_depth_stencil_image(&cmdbuf->vk, &dev->meta, &img->vk, + imageLayout, pDepthStencil, rangeCount, + pRanges); + panvk_per_arch(cmd_meta_gfx_end)(cmdbuf, &save); +} + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(CmdClearColorImage)(VkCommandBuffer commandBuffer, VkImage image, + VkImageLayout imageLayout, + const VkClearColorValue *pColor, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(panvk_image, img, image); + struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); + struct panvk_cmd_meta_graphics_save_ctx save = {0}; + + panvk_per_arch(cmd_meta_gfx_start)(cmdbuf, &save); + vk_meta_clear_color_image(&cmdbuf->vk, &dev->meta, &img->vk, imageLayout, + img->vk.format, pColor, rangeCount, pRanges); + panvk_per_arch(cmd_meta_gfx_end)(cmdbuf, &save); +} + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(CmdCopyBuffer2)(VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2 *pCopyBufferInfo) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); + struct panvk_cmd_meta_compute_save_ctx save = {0}; + + panvk_per_arch(cmd_meta_compute_start)(cmdbuf, &save); + vk_meta_copy_buffer(&cmdbuf->vk, &dev->meta, pCopyBufferInfo); + panvk_per_arch(cmd_meta_compute_end)(cmdbuf, &save); +} + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(CmdCopyBufferToImage2)( + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); + VK_FROM_HANDLE(panvk_image, img, pCopyBufferToImageInfo->dstImage); + struct vk_meta_copy_image_properties img_props = + panvk_meta_copy_get_image_properties(img); + bool use_gfx_pipeline = panvk_meta_copy_to_image_use_gfx_pipeline(img); + + if (use_gfx_pipeline) { + struct panvk_cmd_meta_graphics_save_ctx save = {0}; + + panvk_per_arch(cmd_meta_gfx_start)(cmdbuf, &save); + vk_meta_copy_buffer_to_image(&cmdbuf->vk, &dev->meta, + pCopyBufferToImageInfo, &img_props, + VK_PIPELINE_BIND_POINT_GRAPHICS); + panvk_per_arch(cmd_meta_gfx_end)(cmdbuf, &save); + } else { + struct panvk_cmd_meta_compute_save_ctx save = {0}; + + panvk_per_arch(cmd_meta_compute_start)(cmdbuf, &save); + vk_meta_copy_buffer_to_image(&cmdbuf->vk, &dev->meta, + pCopyBufferToImageInfo, &img_props, + VK_PIPELINE_BIND_POINT_COMPUTE); + panvk_per_arch(cmd_meta_compute_end)(cmdbuf, &save); + } +} + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(CmdCopyImageToBuffer2)( + VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); + VK_FROM_HANDLE(panvk_image, img, pCopyImageToBufferInfo->srcImage); + struct vk_meta_copy_image_properties img_props = + panvk_meta_copy_get_image_properties(img); + struct panvk_cmd_meta_compute_save_ctx save = {0}; + + panvk_per_arch(cmd_meta_compute_start)(cmdbuf, &save); + vk_meta_copy_image_to_buffer(&cmdbuf->vk, &dev->meta, pCopyImageToBufferInfo, + &img_props); + panvk_per_arch(cmd_meta_compute_end)(cmdbuf, &save); +} + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(CmdFillBuffer)(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, + VkDeviceSize dstOffset, VkDeviceSize fillSize, + uint32_t data) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); + struct panvk_cmd_meta_compute_save_ctx save = {0}; + + panvk_per_arch(cmd_meta_compute_start)(cmdbuf, &save); + vk_meta_fill_buffer(&cmdbuf->vk, &dev->meta, dstBuffer, dstOffset, fillSize, + data); + panvk_per_arch(cmd_meta_compute_end)(cmdbuf, &save); +} + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(CmdUpdateBuffer)(VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, VkDeviceSize dstOffset, + VkDeviceSize dataSize, const void *pData) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); + struct panvk_cmd_meta_compute_save_ctx save = {0}; + + panvk_per_arch(cmd_meta_compute_start)(cmdbuf, &save); + vk_meta_update_buffer(&cmdbuf->vk, &dev->meta, dstBuffer, dstOffset, + dataSize, pData); + panvk_per_arch(cmd_meta_compute_end)(cmdbuf, &save); +} + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(CmdCopyImage2)(VkCommandBuffer commandBuffer, + const VkCopyImageInfo2 *pCopyImageInfo) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); + VK_FROM_HANDLE(panvk_image, src_img, pCopyImageInfo->srcImage); + VK_FROM_HANDLE(panvk_image, dst_img, pCopyImageInfo->dstImage); + struct vk_meta_copy_image_properties src_img_props = + panvk_meta_copy_get_image_properties(src_img); + struct vk_meta_copy_image_properties dst_img_props = + panvk_meta_copy_get_image_properties(dst_img); + bool use_gfx_pipeline = panvk_meta_copy_to_image_use_gfx_pipeline(dst_img); + + if (use_gfx_pipeline) { + struct panvk_cmd_meta_graphics_save_ctx save = {0}; + + panvk_per_arch(cmd_meta_gfx_start)(cmdbuf, &save); + vk_meta_copy_image(&cmdbuf->vk, &dev->meta, pCopyImageInfo, + &src_img_props, &dst_img_props, + VK_PIPELINE_BIND_POINT_GRAPHICS); + panvk_per_arch(cmd_meta_gfx_end)(cmdbuf, &save); + } else { + struct panvk_cmd_meta_compute_save_ctx save = {0}; + + panvk_per_arch(cmd_meta_compute_start)(cmdbuf, &save); + vk_meta_copy_image(&cmdbuf->vk, &dev->meta, pCopyImageInfo, + &src_img_props, &dst_img_props, + VK_PIPELINE_BIND_POINT_COMPUTE); + panvk_per_arch(cmd_meta_compute_end)(cmdbuf, &save); + } +} diff --git a/src/panfrost/vulkan/panvk_vX_device.c b/src/panfrost/vulkan/panvk_vX_device.c index face860a654..961a918b344 100644 --- a/src/panfrost/vulkan/panvk_vX_device.c +++ b/src/panfrost/vulkan/panvk_vX_device.c @@ -12,6 +12,7 @@ #include "vk_cmd_enqueue_entrypoints.h" #include "vk_common_entrypoints.h" +#include "panvk_buffer.h" #include "panvk_cmd_buffer.h" #include "panvk_device.h" #include "panvk_entrypoints.h" @@ -86,6 +87,103 @@ panvk_device_cleanup_mempools(struct panvk_device *dev) panvk_pool_cleanup(&dev->mempools.exec); } +static VkResult +panvk_meta_cmd_bind_map_buffer(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, VkBuffer buf, + void **map_out) +{ + VK_FROM_HANDLE(panvk_buffer, buffer, buf); + struct panvk_cmd_buffer *cmdbuf = + container_of(cmd, struct panvk_cmd_buffer, vk); + struct panfrost_ptr mem = + pan_pool_alloc_aligned(&cmdbuf->desc_pool.base, buffer->vk.size, 64); + + buffer->dev_addr = mem.gpu; + *map_out = mem.cpu; + return VK_SUCCESS; +} + +static VkResult +panvk_meta_init(struct panvk_device *device) +{ + const struct vk_physical_device *pdev = device->vk.physical; + + VkResult result = vk_meta_device_init(&device->vk, &device->meta); + if (result != VK_SUCCESS) + return result; + + device->meta.use_stencil_export = true; + device->meta.max_bind_map_buffer_size_B = 64 * 1024; + device->meta.cmd_bind_map_buffer = panvk_meta_cmd_bind_map_buffer; + + /* Assume a maximum of 1024 bytes per worgroup and choose the workgroup size + * accordingly. */ + for (uint32_t i = 0; + i < ARRAY_SIZE(device->meta.buffer_access.optimal_wg_size); i++) { + device->meta.buffer_access.optimal_wg_size[i] = + MIN2(1024 >> i, pdev->properties.maxComputeWorkGroupSize[0]); + } + +#if PAN_ARCH <= 7 + panvk_per_arch(meta_desc_copy_init)(device); +#endif + + return VK_SUCCESS; +} + +static void +panvk_meta_cleanup(struct panvk_device *device) +{ +#if PAN_ARCH <= 7 + panvk_per_arch(meta_desc_copy_cleanup)(device); +#endif + + vk_meta_device_finish(&device->vk, &device->meta); +} + +static void +panvk_preload_blitter_init(struct panvk_device *device) +{ + const struct panvk_physical_device *physical_device = + to_panvk_physical_device(device->vk.physical); + + struct panvk_pool_properties bin_pool_props = { + .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE, + .slab_size = 16 * 1024, + .label = "panvk_meta blitter binary pool", + .owns_bos = true, + .needs_locking = false, + .prealloc = false, + }; + panvk_pool_init(&device->blitter.bin_pool, device, NULL, &bin_pool_props); + + struct panvk_pool_properties desc_pool_props = { + .create_flags = 0, + .slab_size = 16 * 1024, + .label = "panvk_meta blitter descriptor pool", + .owns_bos = true, + .needs_locking = false, + .prealloc = false, + }; + panvk_pool_init(&device->blitter.desc_pool, device, NULL, &desc_pool_props); + + pan_blend_shader_cache_init(&device->blitter.blend_shader_cache, + physical_device->kmod.props.gpu_prod_id); + GENX(pan_blitter_cache_init) + (&device->blitter.cache, physical_device->kmod.props.gpu_prod_id, + &device->blitter.blend_shader_cache, &device->blitter.bin_pool.base, + &device->blitter.desc_pool.base); +} + +static void +panvk_preload_blitter_cleanup(struct panvk_device *device) +{ + GENX(pan_blitter_cache_cleanup)(&device->blitter.cache); + pan_blend_shader_cache_cleanup(&device->blitter.blend_shader_cache); + panvk_pool_cleanup(&device->blitter.desc_pool); + panvk_pool_cleanup(&device->blitter.bin_pool); +} + /* Always reserve the lower 32MB. */ #define PANVK_VA_RESERVE_BOTTOM 0x2000000ull @@ -206,7 +304,11 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device, if (result != VK_SUCCESS) goto err_free_priv_bos; - panvk_per_arch(meta_init)(device); + panvk_preload_blitter_init(device); + + result = panvk_meta_init(device); + if (result != VK_SUCCESS) + goto err_cleanup_blitter; for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { const VkDeviceQueueCreateInfo *queue_create = @@ -245,7 +347,10 @@ err_finish_queues: vk_object_free(&device->vk, NULL, device->queues[i]); } - panvk_per_arch(meta_cleanup)(device); + panvk_meta_cleanup(device); + +err_cleanup_blitter: + panvk_preload_blitter_cleanup(device); panvk_per_arch(blend_shader_cache_cleanup)(device); err_free_priv_bos: @@ -279,7 +384,8 @@ panvk_per_arch(destroy_device)(struct panvk_device *device, vk_object_free(&device->vk, NULL, device->queues[i]); } - panvk_per_arch(meta_cleanup)(device); + panvk_meta_cleanup(device); + panvk_preload_blitter_cleanup(device); panvk_per_arch(blend_shader_cache_cleanup)(device); panvk_priv_bo_unref(device->tiler_heap); panvk_priv_bo_unref(device->sample_positions); diff --git a/src/panfrost/vulkan/panvk_vX_image_view.c b/src/panfrost/vulkan/panvk_vX_image_view.c index 7f056b5cef6..7f86ad4420d 100644 --- a/src/panfrost/vulkan/panvk_vX_image_view.c +++ b/src/panfrost/vulkan/panvk_vX_image_view.c @@ -78,10 +78,12 @@ panvk_per_arch(CreateImageView)(VkDevice _device, { VK_FROM_HANDLE(panvk_device, device, _device); VK_FROM_HANDLE(panvk_image, image, pCreateInfo->image); + bool driver_internal = + (pCreateInfo->flags & VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA) != 0; struct panvk_image_view *view; - view = vk_image_view_create(&device->vk, false, pCreateInfo, pAllocator, - sizeof(*view)); + view = vk_image_view_create(&device->vk, driver_internal, pCreateInfo, + pAllocator, sizeof(*view)); if (view == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -101,19 +103,21 @@ panvk_per_arch(CreateImageView)(VkDevice _device, * depth and stencil but the view only contains one of these components, so * we can ignore the component we don't use. */ - if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT && - view->vk.view_format != VK_FORMAT_D32_SFLOAT_S8_UINT) - view->pview.format = view->vk.view_format == VK_FORMAT_D32_SFLOAT - ? PIPE_FORMAT_Z32_FLOAT_S8X24_UINT - : PIPE_FORMAT_X32_S8X24_UINT; + if (vk_format_is_depth_or_stencil(view->vk.view_format)) { + if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT && + view->vk.view_format != VK_FORMAT_D32_SFLOAT_S8_UINT) + view->pview.format = view->vk.view_format == VK_FORMAT_D32_SFLOAT + ? PIPE_FORMAT_Z32_FLOAT_S8X24_UINT + : PIPE_FORMAT_X32_S8X24_UINT; - if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT && - view->vk.view_format == VK_FORMAT_S8_UINT) - view->pview.format = PIPE_FORMAT_X24S8_UINT; + if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT && + view->vk.view_format == VK_FORMAT_S8_UINT) + view->pview.format = PIPE_FORMAT_X24S8_UINT; - if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT && - view->vk.view_format == VK_FORMAT_S8_UINT) - view->pview.format = PIPE_FORMAT_X32_S8X24_UINT; + if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT && + view->vk.view_format == VK_FORMAT_S8_UINT) + view->pview.format = PIPE_FORMAT_X32_S8X24_UINT; + } VkImageUsageFlags tex_usage_mask = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; @@ -193,12 +197,13 @@ panvk_per_arch(CreateImageView)(VkDevice _device, pan_pack(view->descs.img_attrib_buf[1].opaque, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) { unsigned level = view->pview.first_level; + VkExtent3D extent = view->vk.extent; - cfg.s_dimension = u_minify(image->pimage.layout.width, level); - cfg.t_dimension = u_minify(image->pimage.layout.height, level); + cfg.s_dimension = extent.width; + cfg.t_dimension = extent.height; cfg.r_dimension = view->pview.dim == MALI_TEXTURE_DIMENSION_3D - ? u_minify(image->pimage.layout.depth, level) + ? extent.depth : (view->pview.last_layer - view->pview.first_layer + 1); cfg.row_stride = image->pimage.layout.slices[level].row_stride; if (cfg.r_dimension > 1) {