From 69525b7955a0597599ddcf351ef80ae7caefb1fc Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 31 Jan 2024 15:45:26 +0100 Subject: [PATCH] panvk: Kill panvk_[vX_]cs.{c,h} Dispatch the helpers where they are used, or inline them when they are simple. The only helper that's shared by the command buffer and pipeline logic is panvk_per_arch(emit_viewport)(), and we move it to the cmd_buffer file for now. If there's more to share, we might want to create panvk_graphics_state.{c,h} for those. Signed-off-by: Boris Brezillon Acked-by: Erik Faye-Lund Reviewed-by: Mary Guillemard Reviewed-by: Rebecca Mckeever Part-of: --- src/panfrost/vulkan/meson.build | 2 - src/panfrost/vulkan/panvk_cs.c | 67 -- src/panfrost/vulkan/panvk_cs.h | 82 -- src/panfrost/vulkan/panvk_device.c | 2 - src/panfrost/vulkan/panvk_pipeline.c | 1 - src/panfrost/vulkan/panvk_private.h | 3 - src/panfrost/vulkan/panvk_vX_cmd_buffer.c | 516 +++++++++++- src/panfrost/vulkan/panvk_vX_cmd_buffer.h | 3 + src/panfrost/vulkan/panvk_vX_cs.c | 785 ------------------ src/panfrost/vulkan/panvk_vX_cs.h | 107 --- src/panfrost/vulkan/panvk_vX_descriptor_set.c | 12 +- src/panfrost/vulkan/panvk_vX_device.c | 1 - src/panfrost/vulkan/panvk_vX_pipeline.c | 230 ++++- 13 files changed, 720 insertions(+), 1091 deletions(-) delete mode 100644 src/panfrost/vulkan/panvk_cs.c delete mode 100644 src/panfrost/vulkan/panvk_cs.h delete mode 100644 src/panfrost/vulkan/panvk_vX_cs.c delete mode 100644 src/panfrost/vulkan/panvk_vX_cs.h diff --git a/src/panfrost/vulkan/meson.build b/src/panfrost/vulkan/meson.build index dea7fe89e37..4420bb13b59 100644 --- a/src/panfrost/vulkan/meson.build +++ b/src/panfrost/vulkan/meson.build @@ -37,7 +37,6 @@ panvk_entrypoints = custom_target( libpanvk_files = files( 'panvk_buffer.c', 'panvk_cmd_buffer.c', - 'panvk_cs.c', 'panvk_device.c', 'panvk_device_memory.c', 'panvk_descriptor_set.c', @@ -63,7 +62,6 @@ foreach arch : ['6', '7'] panvk_entrypoints[0], 'panvk_vX_buffer_view.c', 'panvk_vX_cmd_buffer.c', - 'panvk_vX_cs.c', 'panvk_vX_descriptor_set.c', 'panvk_vX_descriptor_set_layout.c', 'panvk_vX_device.c', diff --git a/src/panfrost/vulkan/panvk_cs.c b/src/panfrost/vulkan/panvk_cs.c deleted file mode 100644 index eb998206f8e..00000000000 --- a/src/panfrost/vulkan/panvk_cs.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (C) 2021 Collabora Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "compiler/shader_enums.h" -#include "util/macros.h" - -#include "pan_desc.h" -#include "pan_pool.h" - -#include "panvk_cs.h" -#include "panvk_private.h" - -/* - * Upload the viewport scale. Defined as (px/2, py/2, pz) at the start of - * section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the end of - * the section, the spec defines: - * - * px = width - * py = height - * pz = maxDepth - minDepth - */ -void -panvk_sysval_upload_viewport_scale(const VkViewport *viewport, - union panvk_sysval_vec4 *data) -{ - data->f32[0] = 0.5f * viewport->width; - data->f32[1] = 0.5f * viewport->height; - data->f32[2] = (viewport->maxDepth - viewport->minDepth); -} - -/* - * Upload the viewport offset. Defined as (ox, oy, oz) at the start of section - * 24.5 ("Controlling the Viewport") of the Vulkan spec. At the end of the - * section, the spec defines: - * - * ox = x + width/2 - * oy = y + height/2 - * oz = minDepth - */ -void -panvk_sysval_upload_viewport_offset(const VkViewport *viewport, - union panvk_sysval_vec4 *data) -{ - data->f32[0] = (0.5f * viewport->width) + viewport->x; - data->f32[1] = (0.5f * viewport->height) + viewport->y; - data->f32[2] = viewport->minDepth; -} diff --git a/src/panfrost/vulkan/panvk_cs.h b/src/panfrost/vulkan/panvk_cs.h deleted file mode 100644 index 3260a666fe8..00000000000 --- a/src/panfrost/vulkan/panvk_cs.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (C) 2021 Collabora Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef PANVK_CS_H -#define PANVK_CS_H - -#include "pan_encoder.h" - -#include - -#include "compiler/shader_enums.h" -#include "pan_desc.h" -#include "panfrost-job.h" - -#include "vk_util.h" - -#include "panvk_private.h" - -struct pan_blend_state; -struct pan_shader_info; -struct panfrost_ptr; -struct pan_pool; - -union panvk_sysval_data; -struct panvk_cmd_state; -struct panvk_compute_dim; -struct panvk_device; -struct panvk_batch; -struct panvk_varyings_info; -struct panvk_attrib_buf; -struct panvk_attribs_info; -struct panvk_pipeline; -struct panvk_draw_info; -struct panvk_descriptor_state; -struct panvk_subpass; -struct panvk_clear_value; - -#ifdef PAN_ARCH -static inline enum mali_func -panvk_per_arch(translate_compare_func)(VkCompareOp comp) -{ - STATIC_ASSERT(VK_COMPARE_OP_NEVER == (VkCompareOp)MALI_FUNC_NEVER); - STATIC_ASSERT(VK_COMPARE_OP_LESS == (VkCompareOp)MALI_FUNC_LESS); - STATIC_ASSERT(VK_COMPARE_OP_EQUAL == (VkCompareOp)MALI_FUNC_EQUAL); - STATIC_ASSERT(VK_COMPARE_OP_LESS_OR_EQUAL == (VkCompareOp)MALI_FUNC_LEQUAL); - STATIC_ASSERT(VK_COMPARE_OP_GREATER == (VkCompareOp)MALI_FUNC_GREATER); - STATIC_ASSERT(VK_COMPARE_OP_NOT_EQUAL == (VkCompareOp)MALI_FUNC_NOT_EQUAL); - STATIC_ASSERT(VK_COMPARE_OP_GREATER_OR_EQUAL == - (VkCompareOp)MALI_FUNC_GEQUAL); - STATIC_ASSERT(VK_COMPARE_OP_ALWAYS == (VkCompareOp)MALI_FUNC_ALWAYS); - - return (enum mali_func)comp; -} -#endif - -void panvk_sysval_upload_viewport_scale(const VkViewport *viewport, - union panvk_sysval_vec4 *data); - -void panvk_sysval_upload_viewport_offset(const VkViewport *viewport, - union panvk_sysval_vec4 *data); - -#endif diff --git a/src/panfrost/vulkan/panvk_device.c b/src/panfrost/vulkan/panvk_device.c index 1149b5646a5..65fa365b08a 100644 --- a/src/panfrost/vulkan/panvk_device.c +++ b/src/panfrost/vulkan/panvk_device.c @@ -63,8 +63,6 @@ #include #endif -#include "panvk_cs.h" - static int panvk_device_get_cache_uuid(uint16_t family, void *uuid) { diff --git a/src/panfrost/vulkan/panvk_pipeline.c b/src/panfrost/vulkan/panvk_pipeline.c index 08c32e7701c..b53b9099067 100644 --- a/src/panfrost/vulkan/panvk_pipeline.c +++ b/src/panfrost/vulkan/panvk_pipeline.c @@ -27,7 +27,6 @@ */ #include "panvk_pipeline.h" -#include "panvk_cs.h" #include "panvk_private.h" #include "nir/nir.h" diff --git a/src/panfrost/vulkan/panvk_private.h b/src/panfrost/vulkan/panvk_private.h index ca453ec8313..e228678dddd 100644 --- a/src/panfrost/vulkan/panvk_private.h +++ b/src/panfrost/vulkan/panvk_private.h @@ -534,14 +534,12 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_cmd_pool, vk.base, VkCommandPool, #ifdef PAN_ARCH #include "panvk_vX_cmd_buffer.h" -#include "panvk_vX_cs.h" #include "panvk_vX_device.h" #include "panvk_vX_meta.h" #else #define PAN_ARCH 6 #define panvk_per_arch(name) panvk_arch_name(name, v6) #include "panvk_vX_cmd_buffer.h" -#include "panvk_vX_cs.h" #include "panvk_vX_device.h" #include "panvk_vX_meta.h" #undef PAN_ARCH @@ -549,7 +547,6 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_cmd_pool, vk.base, VkCommandPool, #define PAN_ARCH 7 #define panvk_per_arch(name) panvk_arch_name(name, v7) #include "panvk_vX_cmd_buffer.h" -#include "panvk_vX_cs.h" #include "panvk_vX_device.h" #include "panvk_vX_meta.h" #undef PAN_ARCH diff --git a/src/panfrost/vulkan/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/panvk_vX_cmd_buffer.c index 6b11c43c59a..e350595782d 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_buffer.c @@ -29,7 +29,6 @@ #include "genxml/gen_macros.h" #include "panvk_buffer.h" -#include "panvk_cs.h" #include "panvk_event.h" #include "panvk_image.h" #include "panvk_image_view.h" @@ -198,6 +197,42 @@ panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, bool gfx) } } +/* + * Upload the viewport scale. Defined as (px/2, py/2, pz) at the start of + * section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the end of + * the section, the spec defines: + * + * px = width + * py = height + * pz = maxDepth - minDepth + */ +static void +panvk_sysval_upload_viewport_scale(const VkViewport *viewport, + union panvk_sysval_vec4 *data) +{ + data->f32[0] = 0.5f * viewport->width; + data->f32[1] = 0.5f * viewport->height; + data->f32[2] = (viewport->maxDepth - viewport->minDepth); +} + +/* + * Upload the viewport offset. Defined as (ox, oy, oz) at the start of section + * 24.5 ("Controlling the Viewport") of the Vulkan spec. At the end of the + * section, the spec defines: + * + * ox = x + width/2 + * oy = y + height/2 + * oz = minDepth + */ +static void +panvk_sysval_upload_viewport_offset(const VkViewport *viewport, + union panvk_sysval_vec4 *data) +{ + data->f32[0] = (0.5f * viewport->width) + viewport->x; + data->f32[1] = (0.5f * viewport->height) + viewport->y; + data->f32[2] = viewport->minDepth; +} + static void panvk_cmd_prepare_draw_sysvals( struct panvk_cmd_buffer *cmdbuf, @@ -281,8 +316,61 @@ panvk_cmd_prepare_ubos(struct panvk_cmd_buffer *cmdbuf, struct panfrost_ptr ubos = pan_pool_alloc_desc_array( &cmdbuf->desc_pool.base, pipeline->num_ubos, UNIFORM_BUFFER); + struct mali_uniform_buffer_packed *ubo_descs = ubos.cpu; - panvk_per_arch(emit_ubos)(pipeline, desc_state, ubos.cpu); + pan_pack(&ubo_descs[PANVK_SYSVAL_UBO_INDEX], UNIFORM_BUFFER, cfg) { + cfg.pointer = desc_state->sysvals_ptr; + cfg.entries = DIV_ROUND_UP(sizeof(desc_state->sysvals), 16); + } + + if (pipeline->layout->push_constants.size) { + pan_pack(&ubo_descs[PANVK_PUSH_CONST_UBO_INDEX], UNIFORM_BUFFER, cfg) { + cfg.pointer = desc_state->push_constants; + cfg.entries = ALIGN_POT(pipeline->layout->push_constants.size, 16); + } + } else { + memset(&ubo_descs[PANVK_PUSH_CONST_UBO_INDEX], 0, sizeof(*ubo_descs)); + } + + for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) { + const struct panvk_descriptor_set_layout *set_layout = + vk_to_panvk_descriptor_set_layout(pipeline->layout->vk.set_layouts[s]); + const struct panvk_descriptor_set *set = desc_state->sets[s]; + + unsigned ubo_start = + panvk_per_arch(pipeline_layout_ubo_start)(pipeline->layout, s, false); + + if (!set) { + unsigned all_ubos = set_layout->num_ubos + set_layout->num_dyn_ubos; + memset(&ubo_descs[ubo_start], 0, all_ubos * sizeof(*ubo_descs)); + } else { + memcpy(&ubo_descs[ubo_start], set->ubos, + set_layout->num_ubos * sizeof(*ubo_descs)); + + unsigned dyn_ubo_start = panvk_per_arch(pipeline_layout_ubo_start)( + pipeline->layout, s, true); + + for (unsigned i = 0; i < set_layout->num_dyn_ubos; i++) { + const unsigned ubo_idx = + pipeline->layout->sets[s].dyn_ubo_offset + i; + const struct panvk_buffer_desc *bdesc = + &desc_state->dyn.ubos[ubo_idx]; + + mali_ptr address = + panvk_buffer_gpu_ptr(bdesc->buffer, bdesc->offset); + size_t size = + panvk_buffer_range(bdesc->buffer, bdesc->offset, bdesc->size); + if (size) { + pan_pack(&ubo_descs[dyn_ubo_start + i], UNIFORM_BUFFER, cfg) { + cfg.pointer = address; + cfg.entries = DIV_ROUND_UP(size, 16); + } + } else { + memset(&ubo_descs[dyn_ubo_start + i], 0, sizeof(*ubo_descs)); + } + } + } + } desc_state->ubos = ubos.gpu; } @@ -360,7 +448,6 @@ static void panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { - struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); @@ -370,6 +457,7 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, } if (!cmdbuf->state.fs_rsd) { + const struct panvk_cmd_state *state = &cmdbuf->state; struct panfrost_ptr rsd = pan_pool_alloc_desc_aggregate( &cmdbuf->desc_pool.base, PAN_DESC(RENDERER_STATE), PAN_DESC_ARRAY(pipeline->blend.state.rt_count, BLEND)); @@ -380,7 +468,35 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, STATIC_ASSERT(sizeof(pipeline->fs.rsd_template) >= sizeof(*rsd_templ)); - panvk_per_arch(emit_dyn_fs_rsd)(pipeline, &cmdbuf->state, &rsd_dyn); + pan_pack(&rsd_dyn, RENDERER_STATE, cfg) { + if (pipeline->dynamic_state_mask & + (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { + cfg.depth_units = state->rast.depth_bias.constant_factor * 2.0f; + cfg.depth_factor = state->rast.depth_bias.slope_factor; + cfg.depth_bias_clamp = state->rast.depth_bias.clamp; + } + + if (pipeline->dynamic_state_mask & + (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { + cfg.stencil_front.mask = state->zs.s_front.compare_mask; + cfg.stencil_back.mask = state->zs.s_back.compare_mask; + } + + if (pipeline->dynamic_state_mask & + (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { + cfg.stencil_mask_misc.stencil_mask_front = + state->zs.s_front.write_mask; + cfg.stencil_mask_misc.stencil_mask_back = + state->zs.s_back.write_mask; + } + + if (pipeline->dynamic_state_mask & + (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { + cfg.stencil_front.reference_value = state->zs.s_front.ref; + cfg.stencil_back.reference_value = state->zs.s_back.ref; + } + } + pan_merge(rsd_dyn, (*rsd_templ), RENDERER_STATE); memcpy(rsd.cpu, &rsd_dyn, sizeof(rsd_dyn)); @@ -391,10 +507,15 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, struct mali_blend_packed *bd_templ = (struct mali_blend_packed *)&pipeline->blend.bd_template[i]; - STATIC_ASSERT(sizeof(pipeline->blend.bd_template[0]) >= - sizeof(*bd_templ)); - panvk_per_arch(emit_blend_constant)( - dev, pipeline, i, cmdbuf->state.blend.constants, &bd_dyn); + float constant = + cmdbuf->state.blend.constants[pipeline->blend.constant[i].index] * + pipeline->blend.constant[i].bifrost_factor; + + pan_pack(&bd_dyn, BLEND, cfg) { + cfg.enable = false; + cfg.constant = constant; + } + pan_merge(bd_dyn, (*bd_templ), BLEND); memcpy(bd, &bd_dyn, sizeof(bd_dyn)); } @@ -422,12 +543,21 @@ panvk_per_arch(cmd_get_tiler_context)(struct panvk_cmd_buffer *cmdbuf, STATIC_ASSERT(sizeof(batch->tiler.templ) >= pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP)); - struct panfrost_ptr desc = { - .gpu = batch->tiler.descs.gpu, - .cpu = batch->tiler.templ, - }; + pan_pack((void *)((uint8_t *)batch->tiler.templ + pan_size(TILER_CONTEXT)), + TILER_HEAP, cfg) { + cfg.size = pan_kmod_bo_size(dev->tiler_heap->bo); + cfg.base = dev->tiler_heap->addr.dev; + cfg.bottom = dev->tiler_heap->addr.dev; + cfg.top = cfg.base + cfg.size; + } + + pan_pack(batch->tiler.templ, TILER_CONTEXT, cfg) { + cfg.hierarchy_mask = 0x28; + cfg.fb_width = width; + cfg.fb_height = height; + cfg.heap = batch->tiler.descs.gpu + pan_size(TILER_CONTEXT); + } - panvk_per_arch(emit_tiler_context)(dev, width, height, &desc); memcpy(batch->tiler.descs.cpu, batch->tiler.templ, pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP)); batch->tiler.ctx.bifrost = batch->tiler.descs.gpu; @@ -451,11 +581,40 @@ panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf, draw->tiler_ctx = &batch->tiler.ctx; } +static mali_pixel_format +panvk_varying_hw_format(gl_shader_stage stage, gl_varying_slot loc, + enum pipe_format pfmt) +{ + switch (loc) { + case VARYING_SLOT_PNTC: + case VARYING_SLOT_PSIZ: +#if PAN_ARCH <= 6 + return (MALI_R16F << 12) | panfrost_get_default_swizzle(1); +#else + return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000; +#endif + case VARYING_SLOT_POS: +#if PAN_ARCH <= 6 + return (MALI_SNAP_4 << 12) | panfrost_get_default_swizzle(4); +#else + return (MALI_SNAP_4 << 12) | MALI_RGB_COMPONENT_ORDER_RGBA; +#endif + default: + if (pfmt != PIPE_FORMAT_NONE) + return GENX(panfrost_format_from_pipe_format)(pfmt)->hw; + +#if PAN_ARCH >= 7 + return (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000; +#else + return (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0); +#endif + } +} + static void panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { - struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); struct panvk_varyings_info *varyings = &cmdbuf->state.varyings; @@ -466,8 +625,21 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, unsigned buf_count = panvk_varyings_buf_count(varyings); struct panfrost_ptr bufs = pan_pool_alloc_desc_array( &cmdbuf->desc_pool.base, buf_count + 1, ATTRIBUTE_BUFFER); + struct mali_attribute_buffer_packed *buf_descs = bufs.cpu; - panvk_per_arch(emit_varying_bufs)(varyings, bufs.cpu); + for (unsigned i = 0, buf_idx = 0; i < PANVK_VARY_BUF_MAX; i++) { + if (varyings->buf_mask & (1 << i)) { + pan_pack(&buf_descs[buf_idx], ATTRIBUTE_BUFFER, cfg) { + unsigned offset = varyings->buf[buf_idx].address & 63; + + cfg.stride = varyings->buf[buf_idx].stride; + cfg.size = varyings->buf[buf_idx].size + offset; + cfg.pointer = varyings->buf[buf_idx].address & ~63ULL; + } + + buf_idx++; + } + } /* We need an empty entry to stop prefetching on Bifrost */ memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * buf_count), 0, @@ -500,9 +672,20 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, struct panfrost_ptr attribs = pan_pool_alloc_desc_array( &cmdbuf->desc_pool.base, varyings->stage[s].count, ATTRIBUTE); + struct mali_attribute_packed *attrib_descs = attribs.cpu; - panvk_per_arch(emit_varyings)(dev, varyings, s, attribs.cpu); draw->stages[s].varyings = attribs.gpu; + for (unsigned i = 0; i < varyings->stage[s].count; i++) { + gl_varying_slot loc = varyings->stage[s].loc[i]; + + pan_pack(&attrib_descs[i], ATTRIBUTE, cfg) { + cfg.buffer_index = varyings->varying[loc].buf; + cfg.offset = varyings->varying[loc].offset; + cfg.offset_enable = false; + cfg.format = + panvk_varying_hw_format(s, loc, varyings->varying[loc].format); + } + } } } @@ -563,11 +746,95 @@ panvk_prepare_non_vs_attribs(struct panvk_cmd_buffer *cmdbuf, desc_state->non_vs_attribs = attribs.gpu; } +static void +panvk_draw_emit_attrib_buf(const struct panvk_draw_info *draw, + const struct panvk_attrib_buf_info *buf_info, + const struct panvk_attrib_buf *buf, void *desc) +{ + mali_ptr addr = buf->address & ~63ULL; + unsigned size = buf->size + (buf->address & 63); + unsigned divisor = draw->padded_vertex_count * buf_info->instance_divisor; + + /* TODO: support instanced arrays */ + if (draw->instance_count <= 1) { + pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { + cfg.type = MALI_ATTRIBUTE_TYPE_1D; + cfg.stride = buf_info->per_instance ? 0 : buf_info->stride; + cfg.pointer = addr; + cfg.size = size; + } + } else if (!buf_info->per_instance) { + pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { + cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS; + cfg.divisor = draw->padded_vertex_count; + cfg.stride = buf_info->stride; + cfg.pointer = addr; + cfg.size = size; + } + } else if (!divisor) { + /* instance_divisor == 0 means all instances share the same value. + * Make it a 1D array with a zero stride. + */ + pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { + cfg.type = MALI_ATTRIBUTE_TYPE_1D; + cfg.stride = 0; + cfg.pointer = addr; + cfg.size = size; + } + } else if (util_is_power_of_two_or_zero(divisor)) { + pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { + cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR; + cfg.stride = buf_info->stride; + cfg.pointer = addr; + cfg.size = size; + cfg.divisor_r = __builtin_ctz(divisor); + } + } else { + unsigned divisor_r = 0, divisor_e = 0; + unsigned divisor_num = + panfrost_compute_magic_divisor(divisor, &divisor_r, &divisor_e); + pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { + cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR; + cfg.stride = buf_info->stride; + cfg.pointer = addr; + cfg.size = size; + cfg.divisor_r = divisor_r; + cfg.divisor_e = divisor_e; + } + + desc += pan_size(ATTRIBUTE_BUFFER); + pan_pack(desc, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) { + cfg.divisor_numerator = divisor_num; + cfg.divisor = buf_info->instance_divisor; + } + } +} + +static void +panvk_draw_emit_attrib(const struct panvk_draw_info *draw, + const struct panvk_attrib_info *attrib_info, + const struct panvk_attrib_buf_info *buf_info, + const struct panvk_attrib_buf *buf, void *desc) +{ + enum pipe_format f = attrib_info->format; + unsigned buf_idx = attrib_info->buf; + + pan_pack(desc, ATTRIBUTE, cfg) { + cfg.buffer_index = buf_idx * 2; + cfg.offset = attrib_info->offset + (buf->address & 63); + cfg.offset_enable = true; + + if (buf_info->per_instance) + cfg.offset += draw->first_instance * buf_info->stride; + + cfg.format = GENX(panfrost_format_from_pipe_format)(f)->hw; + } +} + static void panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { - struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); struct panvk_cmd_bind_point_state *bind_point_state = panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS); struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; @@ -591,14 +858,24 @@ panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf, unsigned attrib_buf_count = pipeline->attribs.buf_count * 2; struct panfrost_ptr bufs = pan_pool_alloc_desc_array( &cmdbuf->desc_pool.base, attrib_buf_count + 1, ATTRIBUTE_BUFFER); + struct mali_attribute_buffer_packed *attrib_buf_descs = bufs.cpu; struct panfrost_ptr attribs = pan_pool_alloc_desc_array( &cmdbuf->desc_pool.base, attrib_count, ATTRIBUTE); + struct mali_attribute_packed *attrib_descs = attribs.cpu; - panvk_per_arch(emit_attrib_bufs)(&pipeline->attribs, cmdbuf->state.vb.bufs, - cmdbuf->state.vb.count, draw, bufs.cpu); - panvk_per_arch(emit_attribs)(dev, draw, &pipeline->attribs, - cmdbuf->state.vb.bufs, cmdbuf->state.vb.count, - attribs.cpu); + for (unsigned i = 0; i < pipeline->attribs.buf_count; i++) { + panvk_draw_emit_attrib_buf(draw, &pipeline->attribs.buf[i], + &cmdbuf->state.vb.bufs[i], + &attrib_buf_descs[i * 2]); + } + + for (unsigned i = 0; i < pipeline->attribs.attrib_count; i++) { + unsigned buf_idx = pipeline->attribs.attrib[i].buf; + + panvk_draw_emit_attrib(draw, &pipeline->attribs.attrib[i], + &pipeline->attribs.buf[buf_idx], + &cmdbuf->state.vb.bufs[buf_idx], &attrib_descs[i]); + } if (attrib_count > pipeline->attribs.buf_count) { unsigned bufs_offset = @@ -641,6 +918,42 @@ panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf, } } +void +panvk_per_arch(emit_viewport)(const VkViewport *viewport, + const VkRect2D *scissor, void *vpd) +{ + /* The spec says "width must be greater than 0.0" */ + assert(viewport->x >= 0); + int minx = (int)viewport->x; + int maxx = (int)(viewport->x + viewport->width); + + /* Viewport height can be negative */ + int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height)); + int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height)); + + assert(scissor->offset.x >= 0 && scissor->offset.y >= 0); + miny = MAX2(scissor->offset.x, minx); + miny = MAX2(scissor->offset.y, miny); + maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx); + maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy); + + /* Make sure we don't end up with a max < min when width/height is 0 */ + maxx = maxx > minx ? maxx - 1 : maxx; + maxy = maxy > miny ? maxy - 1 : maxy; + + assert(viewport->minDepth >= 0.0f && viewport->minDepth <= 1.0f); + assert(viewport->maxDepth >= 0.0f && viewport->maxDepth <= 1.0f); + + pan_pack(vpd, VIEWPORT, cfg) { + cfg.scissor_minimum_x = minx; + cfg.scissor_minimum_y = miny; + cfg.scissor_maximum_x = maxx; + cfg.scissor_maximum_y = maxy; + cfg.minimum_z = MIN2(viewport->minDepth, viewport->maxDepth); + cfg.maximum_z = MAX2(viewport->minDepth, viewport->maxDepth); + } +} + static void panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) @@ -682,7 +995,121 @@ panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf, util_dynarray_append(&batch->jobs, void *, ptr.cpu); draw->jobs.vertex = ptr; - panvk_per_arch(emit_vertex_job)(pipeline, draw, ptr.cpu); + + memcpy(pan_section_ptr(ptr.cpu, COMPUTE_JOB, INVOCATION), &draw->invocation, + pan_size(INVOCATION)); + + pan_section_pack(ptr.cpu, COMPUTE_JOB, PARAMETERS, cfg) { + cfg.job_task_split = 5; + } + + pan_section_pack(ptr.cpu, COMPUTE_JOB, DRAW, cfg) { + cfg.state = pipeline->rsds[MESA_SHADER_VERTEX]; + cfg.attributes = draw->stages[MESA_SHADER_VERTEX].attributes; + cfg.attribute_buffers = draw->stages[MESA_SHADER_VERTEX].attribute_bufs; + cfg.varyings = draw->stages[MESA_SHADER_VERTEX].varyings; + cfg.varying_buffers = draw->varying_bufs; + cfg.thread_storage = draw->tls; + cfg.offset_start = draw->offset_start; + cfg.instance_size = + draw->instance_count > 1 ? draw->padded_vertex_count : 1; + cfg.uniform_buffers = draw->ubos; + cfg.push_uniforms = draw->stages[PIPE_SHADER_VERTEX].push_constants; + cfg.textures = draw->textures; + cfg.samplers = draw->samplers; + } +} + +static void +panvk_emit_tiler_primitive(const struct panvk_pipeline *pipeline, + const struct panvk_draw_info *draw, void *prim) +{ + pan_pack(prim, PRIMITIVE, cfg) { + cfg.draw_mode = pipeline->ia.topology; + if (pipeline->ia.writes_point_size) + cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16; + + cfg.first_provoking_vertex = true; + if (pipeline->ia.primitive_restart) + cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT; + cfg.job_task_split = 6; + + if (draw->index_size) { + cfg.index_count = draw->index_count; + cfg.indices = draw->indices; + cfg.base_vertex_offset = draw->vertex_offset - draw->offset_start; + + switch (draw->index_size) { + case 32: + cfg.index_type = MALI_INDEX_TYPE_UINT32; + break; + case 16: + cfg.index_type = MALI_INDEX_TYPE_UINT16; + break; + case 8: + cfg.index_type = MALI_INDEX_TYPE_UINT8; + break; + default: + unreachable("Invalid index size"); + } + } else { + cfg.index_count = draw->vertex_count; + cfg.index_type = MALI_INDEX_TYPE_NONE; + } + } +} + +static void +panvk_emit_tiler_primitive_size(const struct panvk_pipeline *pipeline, + const struct panvk_draw_info *draw, + void *primsz) +{ + pan_pack(primsz, PRIMITIVE_SIZE, cfg) { + if (pipeline->ia.writes_point_size) { + cfg.size_array = draw->psiz; + } else { + cfg.constant = draw->line_width; + } + } +} + +static void +panvk_emit_tiler_dcd(const struct panvk_pipeline *pipeline, + const struct panvk_draw_info *draw, void *dcd) +{ + pan_pack(dcd, DRAW, cfg) { + cfg.front_face_ccw = pipeline->rast.front_ccw; + cfg.cull_front_face = pipeline->rast.cull_front_face; + cfg.cull_back_face = pipeline->rast.cull_back_face; + cfg.position = draw->position; + cfg.state = draw->fs_rsd; + cfg.attributes = draw->stages[MESA_SHADER_FRAGMENT].attributes; + cfg.attribute_buffers = draw->stages[MESA_SHADER_FRAGMENT].attribute_bufs; + cfg.viewport = draw->viewport; + cfg.varyings = draw->stages[MESA_SHADER_FRAGMENT].varyings; + cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0; + cfg.thread_storage = draw->tls; + + /* For all primitives but lines DRAW.flat_shading_vertex must + * be set to 0 and the provoking vertex is selected with the + * PRIMITIVE.first_provoking_vertex field. + */ + if (pipeline->ia.topology == MALI_DRAW_MODE_LINES || + pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP || + pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) { + cfg.flat_shading_vertex = true; + } + + cfg.offset_start = draw->offset_start; + cfg.instance_size = + draw->instance_count > 1 ? draw->padded_vertex_count : 1; + cfg.uniform_buffers = draw->ubos; + cfg.push_uniforms = draw->stages[PIPE_SHADER_FRAGMENT].push_constants; + cfg.textures = draw->textures; + cfg.samplers = draw->samplers; + + /* TODO: occlusion queries */ + } } static void @@ -697,7 +1124,25 @@ panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf, util_dynarray_append(&batch->jobs, void *, ptr.cpu); draw->jobs.tiler = ptr; - panvk_per_arch(emit_tiler_job)(pipeline, draw, ptr.cpu); + + memcpy(pan_section_ptr(ptr.cpu, TILER_JOB, INVOCATION), &draw->invocation, + pan_size(INVOCATION)); + + panvk_emit_tiler_primitive(pipeline, draw, + pan_section_ptr(ptr.cpu, TILER_JOB, PRIMITIVE)); + + panvk_emit_tiler_primitive_size( + pipeline, draw, pan_section_ptr(ptr.cpu, TILER_JOB, PRIMITIVE_SIZE)); + + panvk_emit_tiler_dcd(pipeline, draw, + pan_section_ptr(ptr.cpu, TILER_JOB, DRAW)); + + pan_section_pack(ptr.cpu, TILER_JOB, TILER, cfg) { + cfg.address = draw->tiler_ctx->bifrost; + } + + pan_section_pack(ptr.cpu, TILER_JOB, PADDING, padding) + ; } static void @@ -1177,7 +1622,28 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x, panvk_cmd_prepare_samplers(cmdbuf, bind_point_state); dispatch.samplers = desc_state->samplers; - panvk_per_arch(emit_compute_job)(pipeline, &dispatch, job.cpu); + panfrost_pack_work_groups_compute( + pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), dispatch.wg_count.x, + dispatch.wg_count.y, dispatch.wg_count.z, pipeline->cs.local_size.x, + pipeline->cs.local_size.y, pipeline->cs.local_size.z, false, false); + + pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) { + cfg.job_task_split = util_logbase2_ceil(pipeline->cs.local_size.x + 1) + + util_logbase2_ceil(pipeline->cs.local_size.y + 1) + + util_logbase2_ceil(pipeline->cs.local_size.z + 1); + } + + pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) { + cfg.state = pipeline->rsds[MESA_SHADER_COMPUTE]; + cfg.attributes = dispatch.attributes; + cfg.attribute_buffers = dispatch.attribute_bufs; + cfg.thread_storage = dispatch.tsd; + cfg.uniform_buffers = dispatch.ubos; + cfg.push_uniforms = dispatch.push_uniforms; + cfg.textures = dispatch.textures; + cfg.samplers = dispatch.samplers; + } + pan_jc_add_job(&cmdbuf->desc_pool.base, &batch->jc, MALI_JOB_TYPE_COMPUTE, false, false, 0, 0, &job, false); diff --git a/src/panfrost/vulkan/panvk_vX_cmd_buffer.h b/src/panfrost/vulkan/panvk_vX_cmd_buffer.h index 6da4977bf61..7974719104f 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_buffer.h +++ b/src/panfrost/vulkan/panvk_vX_cmd_buffer.h @@ -45,3 +45,6 @@ void panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, bool gfx); void panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf); + +void panvk_per_arch(emit_viewport)(const VkViewport *viewport, + const VkRect2D *scissor, void *vpd); diff --git a/src/panfrost/vulkan/panvk_vX_cs.c b/src/panfrost/vulkan/panvk_vX_cs.c deleted file mode 100644 index a6fa027ab4a..00000000000 --- a/src/panfrost/vulkan/panvk_vX_cs.c +++ /dev/null @@ -1,785 +0,0 @@ -/* - * Copyright (C) 2021 Collabora Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "genxml/gen_macros.h" - -#include "compiler/shader_enums.h" -#include "util/macros.h" - -#include "vk_util.h" - -#include "pan_desc.h" -#include "pan_earlyzs.h" -#include "pan_encoder.h" -#include "pan_pool.h" -#include "pan_shader.h" - -#include "panvk_buffer.h" -#include "panvk_cs.h" -#include "panvk_pipeline.h" -#include "panvk_pipeline_layout.h" -#include "panvk_private.h" -#include "panvk_varyings.h" - -#include "vk_sampler.h" - -static mali_pixel_format -panvk_varying_hw_format(const struct panvk_device *dev, - const struct panvk_varyings_info *varyings, - gl_shader_stage stage, unsigned idx) -{ - gl_varying_slot loc = varyings->stage[stage].loc[idx]; - - switch (loc) { - case VARYING_SLOT_PNTC: - case VARYING_SLOT_PSIZ: -#if PAN_ARCH <= 6 - return (MALI_R16F << 12) | panfrost_get_default_swizzle(1); -#else - return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000; -#endif - case VARYING_SLOT_POS: -#if PAN_ARCH <= 6 - return (MALI_SNAP_4 << 12) | panfrost_get_default_swizzle(4); -#else - return (MALI_SNAP_4 << 12) | MALI_RGB_COMPONENT_ORDER_RGBA; -#endif - default: - if (varyings->varying[loc].format != PIPE_FORMAT_NONE) { - enum pipe_format f = varyings->varying[loc].format; - - return GENX(panfrost_format_from_pipe_format)(f)->hw; - } -#if PAN_ARCH >= 7 - return (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000; -#else - return (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0); -#endif - } -} - -static void -panvk_emit_varying(const struct panvk_device *dev, - const struct panvk_varyings_info *varyings, - gl_shader_stage stage, unsigned idx, void *attrib) -{ - gl_varying_slot loc = varyings->stage[stage].loc[idx]; - - pan_pack(attrib, ATTRIBUTE, cfg) { - cfg.buffer_index = varyings->varying[loc].buf; - cfg.offset = varyings->varying[loc].offset; - cfg.format = panvk_varying_hw_format(dev, varyings, stage, idx); - cfg.offset_enable = false; - } -} - -void -panvk_per_arch(emit_varyings)(const struct panvk_device *dev, - const struct panvk_varyings_info *varyings, - gl_shader_stage stage, void *descs) -{ - struct mali_attribute_packed *attrib = descs; - - for (unsigned i = 0; i < varyings->stage[stage].count; i++) - panvk_emit_varying(dev, varyings, stage, i, attrib++); -} - -static void -panvk_emit_varying_buf(const struct panvk_varyings_info *varyings, - enum panvk_varying_buf_id id, void *buf) -{ - unsigned buf_idx = panvk_varying_buf_index(varyings, id); - - pan_pack(buf, ATTRIBUTE_BUFFER, cfg) { - unsigned offset = varyings->buf[buf_idx].address & 63; - - cfg.stride = varyings->buf[buf_idx].stride; - cfg.size = varyings->buf[buf_idx].size + offset; - cfg.pointer = varyings->buf[buf_idx].address & ~63ULL; - } -} - -void -panvk_per_arch(emit_varying_bufs)(const struct panvk_varyings_info *varyings, - void *descs) -{ - struct mali_attribute_buffer_packed *buf = descs; - - for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) { - if (varyings->buf_mask & (1 << i)) - panvk_emit_varying_buf(varyings, i, buf++); - } -} - -static void -panvk_emit_attrib_buf(const struct panvk_attribs_info *info, - const struct panvk_draw_info *draw, - const struct panvk_attrib_buf *bufs, unsigned buf_count, - unsigned idx, void *desc) -{ - const struct panvk_attrib_buf_info *buf_info = &info->buf[idx]; - - assert(idx < buf_count); - const struct panvk_attrib_buf *buf = &bufs[idx]; - mali_ptr addr = buf->address & ~63ULL; - unsigned size = buf->size + (buf->address & 63); - unsigned divisor = draw->padded_vertex_count * buf_info->instance_divisor; - - /* TODO: support instanced arrays */ - if (draw->instance_count <= 1) { - pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { - cfg.type = MALI_ATTRIBUTE_TYPE_1D; - cfg.stride = buf_info->per_instance ? 0 : buf_info->stride; - cfg.pointer = addr; - cfg.size = size; - } - } else if (!buf_info->per_instance) { - pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { - cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS; - cfg.divisor = draw->padded_vertex_count; - cfg.stride = buf_info->stride; - cfg.pointer = addr; - cfg.size = size; - } - } else if (!divisor) { - /* instance_divisor == 0 means all instances share the same value. - * Make it a 1D array with a zero stride. - */ - pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { - cfg.type = MALI_ATTRIBUTE_TYPE_1D; - cfg.stride = 0; - cfg.pointer = addr; - cfg.size = size; - } - } else if (util_is_power_of_two_or_zero(divisor)) { - pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { - cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR; - cfg.stride = buf_info->stride; - cfg.pointer = addr; - cfg.size = size; - cfg.divisor_r = __builtin_ctz(divisor); - } - } else { - unsigned divisor_r = 0, divisor_e = 0; - unsigned divisor_num = - panfrost_compute_magic_divisor(divisor, &divisor_r, &divisor_e); - pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { - cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR; - cfg.stride = buf_info->stride; - cfg.pointer = addr; - cfg.size = size; - cfg.divisor_r = divisor_r; - cfg.divisor_e = divisor_e; - } - - desc += pan_size(ATTRIBUTE_BUFFER); - pan_pack(desc, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) { - cfg.divisor_numerator = divisor_num; - cfg.divisor = buf_info->instance_divisor; - } - } -} - -void -panvk_per_arch(emit_attrib_bufs)(const struct panvk_attribs_info *info, - const struct panvk_attrib_buf *bufs, - unsigned buf_count, - const struct panvk_draw_info *draw, - void *descs) -{ - struct mali_attribute_buffer_packed *buf = descs; - - for (unsigned i = 0; i < info->buf_count; i++) { - panvk_emit_attrib_buf(info, draw, bufs, buf_count, i, buf); - buf += 2; - } -} - -static void -panvk_emit_attrib(const struct panvk_device *dev, - const struct panvk_draw_info *draw, - const struct panvk_attribs_info *attribs, - const struct panvk_attrib_buf *bufs, unsigned buf_count, - unsigned idx, void *attrib) -{ - enum pipe_format f = attribs->attrib[idx].format; - unsigned buf_idx = attribs->attrib[idx].buf; - const struct panvk_attrib_buf_info *buf_info = &attribs->buf[buf_idx]; - - pan_pack(attrib, ATTRIBUTE, cfg) { - cfg.buffer_index = buf_idx * 2; - cfg.offset = attribs->attrib[idx].offset + (bufs[buf_idx].address & 63); - cfg.offset_enable = true; - - if (buf_info->per_instance) - cfg.offset += draw->first_instance * buf_info->stride; - - cfg.format = GENX(panfrost_format_from_pipe_format)(f)->hw; - } -} - -void -panvk_per_arch(emit_attribs)(const struct panvk_device *dev, - const struct panvk_draw_info *draw, - const struct panvk_attribs_info *attribs, - const struct panvk_attrib_buf *bufs, - unsigned buf_count, void *descs) -{ - struct mali_attribute_packed *attrib = descs; - - for (unsigned i = 0; i < attribs->attrib_count; i++) - panvk_emit_attrib(dev, draw, attribs, bufs, buf_count, i, attrib++); -} - -void -panvk_per_arch(emit_ubo)(mali_ptr address, size_t size, void *desc) -{ - pan_pack(desc, UNIFORM_BUFFER, cfg) { - cfg.pointer = address; - cfg.entries = DIV_ROUND_UP(size, 16); - } -} - -void -panvk_per_arch(emit_ubos)(const struct panvk_pipeline *pipeline, - const struct panvk_descriptor_state *state, - void *descs) -{ - struct mali_uniform_buffer_packed *ubos = descs; - - panvk_per_arch(emit_ubo)(state->sysvals_ptr, sizeof(state->sysvals), - &ubos[PANVK_SYSVAL_UBO_INDEX]); - - if (pipeline->layout->push_constants.size) { - panvk_per_arch(emit_ubo)( - state->push_constants, - ALIGN_POT(pipeline->layout->push_constants.size, 16), - &ubos[PANVK_PUSH_CONST_UBO_INDEX]); - } else { - memset(&ubos[PANVK_PUSH_CONST_UBO_INDEX], 0, sizeof(*ubos)); - } - - for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) { - const struct panvk_descriptor_set_layout *set_layout = - vk_to_panvk_descriptor_set_layout(pipeline->layout->vk.set_layouts[s]); - const struct panvk_descriptor_set *set = state->sets[s]; - - unsigned ubo_start = - panvk_per_arch(pipeline_layout_ubo_start)(pipeline->layout, s, false); - - if (!set) { - unsigned all_ubos = set_layout->num_ubos + set_layout->num_dyn_ubos; - memset(&ubos[ubo_start], 0, all_ubos * sizeof(*ubos)); - } else { - memcpy(&ubos[ubo_start], set->ubos, - set_layout->num_ubos * sizeof(*ubos)); - - unsigned dyn_ubo_start = panvk_per_arch(pipeline_layout_ubo_start)( - pipeline->layout, s, true); - - for (unsigned i = 0; i < set_layout->num_dyn_ubos; i++) { - const struct panvk_buffer_desc *bdesc = - &state->dyn.ubos[pipeline->layout->sets[s].dyn_ubo_offset + i]; - - mali_ptr address = - panvk_buffer_gpu_ptr(bdesc->buffer, bdesc->offset); - size_t size = - panvk_buffer_range(bdesc->buffer, bdesc->offset, bdesc->size); - if (size) { - panvk_per_arch(emit_ubo)(address, size, - &ubos[dyn_ubo_start + i]); - } else { - memset(&ubos[dyn_ubo_start + i], 0, sizeof(*ubos)); - } - } - } - } -} - -void -panvk_per_arch(emit_vertex_job)(const struct panvk_pipeline *pipeline, - const struct panvk_draw_info *draw, void *job) -{ - void *section = pan_section_ptr(job, COMPUTE_JOB, INVOCATION); - - memcpy(section, &draw->invocation, pan_size(INVOCATION)); - - pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) { - cfg.job_task_split = 5; - } - - pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) { - cfg.state = pipeline->rsds[MESA_SHADER_VERTEX]; - cfg.attributes = draw->stages[MESA_SHADER_VERTEX].attributes; - cfg.attribute_buffers = draw->stages[MESA_SHADER_VERTEX].attribute_bufs; - cfg.varyings = draw->stages[MESA_SHADER_VERTEX].varyings; - cfg.varying_buffers = draw->varying_bufs; - cfg.thread_storage = draw->tls; - cfg.offset_start = draw->offset_start; - cfg.instance_size = - draw->instance_count > 1 ? draw->padded_vertex_count : 1; - cfg.uniform_buffers = draw->ubos; - cfg.push_uniforms = draw->stages[PIPE_SHADER_VERTEX].push_constants; - cfg.textures = draw->textures; - cfg.samplers = draw->samplers; - } -} - -void -panvk_per_arch(emit_compute_job)(const struct panvk_pipeline *pipeline, - const struct panvk_dispatch_info *dispatch, - void *job) -{ - panfrost_pack_work_groups_compute( - pan_section_ptr(job, COMPUTE_JOB, INVOCATION), dispatch->wg_count.x, - dispatch->wg_count.y, dispatch->wg_count.z, pipeline->cs.local_size.x, - pipeline->cs.local_size.y, pipeline->cs.local_size.z, false, false); - - pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) { - cfg.job_task_split = util_logbase2_ceil(pipeline->cs.local_size.x + 1) + - util_logbase2_ceil(pipeline->cs.local_size.y + 1) + - util_logbase2_ceil(pipeline->cs.local_size.z + 1); - } - - pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) { - cfg.state = pipeline->rsds[MESA_SHADER_COMPUTE]; - cfg.attributes = dispatch->attributes; - cfg.attribute_buffers = dispatch->attribute_bufs; - cfg.thread_storage = dispatch->tsd; - cfg.uniform_buffers = dispatch->ubos; - cfg.push_uniforms = dispatch->push_uniforms; - cfg.textures = dispatch->textures; - cfg.samplers = dispatch->samplers; - } -} - -static void -panvk_emit_tiler_primitive(const struct panvk_pipeline *pipeline, - const struct panvk_draw_info *draw, void *prim) -{ - pan_pack(prim, PRIMITIVE, cfg) { - cfg.draw_mode = pipeline->ia.topology; - if (pipeline->ia.writes_point_size) - cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16; - - cfg.first_provoking_vertex = true; - if (pipeline->ia.primitive_restart) - cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT; - cfg.job_task_split = 6; - - if (draw->index_size) { - cfg.index_count = draw->index_count; - cfg.indices = draw->indices; - cfg.base_vertex_offset = draw->vertex_offset - draw->offset_start; - - switch (draw->index_size) { - case 32: - cfg.index_type = MALI_INDEX_TYPE_UINT32; - break; - case 16: - cfg.index_type = MALI_INDEX_TYPE_UINT16; - break; - case 8: - cfg.index_type = MALI_INDEX_TYPE_UINT8; - break; - default: - unreachable("Invalid index size"); - } - } else { - cfg.index_count = draw->vertex_count; - cfg.index_type = MALI_INDEX_TYPE_NONE; - } - } -} - -static void -panvk_emit_tiler_primitive_size(const struct panvk_pipeline *pipeline, - const struct panvk_draw_info *draw, - void *primsz) -{ - pan_pack(primsz, PRIMITIVE_SIZE, cfg) { - if (pipeline->ia.writes_point_size) { - cfg.size_array = draw->psiz; - } else { - cfg.constant = draw->line_width; - } - } -} - -static void -panvk_emit_tiler_dcd(const struct panvk_pipeline *pipeline, - const struct panvk_draw_info *draw, void *dcd) -{ - pan_pack(dcd, DRAW, cfg) { - cfg.front_face_ccw = pipeline->rast.front_ccw; - cfg.cull_front_face = pipeline->rast.cull_front_face; - cfg.cull_back_face = pipeline->rast.cull_back_face; - cfg.position = draw->position; - cfg.state = draw->fs_rsd; - cfg.attributes = draw->stages[MESA_SHADER_FRAGMENT].attributes; - cfg.attribute_buffers = draw->stages[MESA_SHADER_FRAGMENT].attribute_bufs; - cfg.viewport = draw->viewport; - cfg.varyings = draw->stages[MESA_SHADER_FRAGMENT].varyings; - cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0; - cfg.thread_storage = draw->tls; - - /* For all primitives but lines DRAW.flat_shading_vertex must - * be set to 0 and the provoking vertex is selected with the - * PRIMITIVE.first_provoking_vertex field. - */ - if (pipeline->ia.topology == MALI_DRAW_MODE_LINES || - pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP || - pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) { - cfg.flat_shading_vertex = true; - } - - cfg.offset_start = draw->offset_start; - cfg.instance_size = - draw->instance_count > 1 ? draw->padded_vertex_count : 1; - cfg.uniform_buffers = draw->ubos; - cfg.push_uniforms = draw->stages[PIPE_SHADER_FRAGMENT].push_constants; - cfg.textures = draw->textures; - cfg.samplers = draw->samplers; - - /* TODO: occlusion queries */ - } -} - -void -panvk_per_arch(emit_tiler_job)(const struct panvk_pipeline *pipeline, - const struct panvk_draw_info *draw, void *job) -{ - void *section; - - section = pan_section_ptr(job, TILER_JOB, INVOCATION); - memcpy(section, &draw->invocation, pan_size(INVOCATION)); - - section = pan_section_ptr(job, TILER_JOB, PRIMITIVE); - panvk_emit_tiler_primitive(pipeline, draw, section); - - section = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE); - panvk_emit_tiler_primitive_size(pipeline, draw, section); - - section = pan_section_ptr(job, TILER_JOB, DRAW); - panvk_emit_tiler_dcd(pipeline, draw, section); - - pan_section_pack(job, TILER_JOB, TILER, cfg) { - cfg.address = draw->tiler_ctx->bifrost; - } - pan_section_pack(job, TILER_JOB, PADDING, padding) - ; -} - -void -panvk_per_arch(emit_viewport)(const VkViewport *viewport, - const VkRect2D *scissor, void *vpd) -{ - /* The spec says "width must be greater than 0.0" */ - assert(viewport->x >= 0); - int minx = (int)viewport->x; - int maxx = (int)(viewport->x + viewport->width); - - /* Viewport height can be negative */ - int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height)); - int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height)); - - assert(scissor->offset.x >= 0 && scissor->offset.y >= 0); - miny = MAX2(scissor->offset.x, minx); - miny = MAX2(scissor->offset.y, miny); - maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx); - maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy); - - /* Make sure we don't end up with a max < min when width/height is 0 */ - maxx = maxx > minx ? maxx - 1 : maxx; - maxy = maxy > miny ? maxy - 1 : maxy; - - assert(viewport->minDepth >= 0.0f && viewport->minDepth <= 1.0f); - assert(viewport->maxDepth >= 0.0f && viewport->maxDepth <= 1.0f); - - pan_pack(vpd, VIEWPORT, cfg) { - cfg.scissor_minimum_x = minx; - cfg.scissor_minimum_y = miny; - cfg.scissor_maximum_x = maxx; - cfg.scissor_maximum_y = maxy; - cfg.minimum_z = MIN2(viewport->minDepth, viewport->maxDepth); - cfg.maximum_z = MAX2(viewport->minDepth, viewport->maxDepth); - } -} - -static enum mali_register_file_format -bifrost_blend_type_from_nir(nir_alu_type nir_type) -{ - switch (nir_type) { - case 0: /* Render target not in use */ - return 0; - case nir_type_float16: - return MALI_REGISTER_FILE_FORMAT_F16; - case nir_type_float32: - return MALI_REGISTER_FILE_FORMAT_F32; - case nir_type_int32: - return MALI_REGISTER_FILE_FORMAT_I32; - case nir_type_uint32: - return MALI_REGISTER_FILE_FORMAT_U32; - case nir_type_int16: - return MALI_REGISTER_FILE_FORMAT_I16; - case nir_type_uint16: - return MALI_REGISTER_FILE_FORMAT_U16; - default: - unreachable("Unsupported blend shader type for NIR alu type"); - } -} - -void -panvk_per_arch(emit_blend)(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, unsigned rt, - void *bd) -{ - const struct pan_blend_state *blend = &pipeline->blend.state; - const struct pan_blend_rt_state *rts = &blend->rts[rt]; - bool dithered = false; - - pan_pack(bd, BLEND, cfg) { - if (!blend->rt_count || !rts->equation.color_mask) { - cfg.enable = false; - cfg.internal.mode = MALI_BLEND_MODE_OFF; - continue; - } - - cfg.srgb = util_format_is_srgb(rts->format); - cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation); - cfg.round_to_fb_precision = !dithered; - - const struct util_format_description *format_desc = - util_format_description(rts->format); - unsigned chan_size = 0; - for (unsigned i = 0; i < format_desc->nr_channels; i++) - chan_size = MAX2(format_desc->channel[i].size, chan_size); - - pan_blend_to_fixed_function_equation(blend->rts[rt].equation, - &cfg.equation); - - /* Fixed point constant */ - float fconst = pan_blend_get_constant( - pan_blend_constant_mask(blend->rts[rt].equation), blend->constants); - u16 constant = fconst * ((1 << chan_size) - 1); - constant <<= 16 - chan_size; - cfg.constant = constant; - - if (pan_blend_is_opaque(blend->rts[rt].equation)) { - cfg.internal.mode = MALI_BLEND_MODE_OPAQUE; - } else { - cfg.internal.mode = MALI_BLEND_MODE_FIXED_FUNCTION; - - cfg.internal.fixed_function.alpha_zero_nop = - pan_blend_alpha_zero_nop(blend->rts[rt].equation); - cfg.internal.fixed_function.alpha_one_store = - pan_blend_alpha_one_store(blend->rts[rt].equation); - } - - /* If we want the conversion to work properly, - * num_comps must be set to 4 - */ - cfg.internal.fixed_function.num_comps = 4; - cfg.internal.fixed_function.conversion.memory_format = - GENX(panfrost_dithered_format_from_pipe_format)(rts->format, dithered); - cfg.internal.fixed_function.conversion.register_format = - bifrost_blend_type_from_nir(pipeline->fs.info.bifrost.blend[rt].type); - cfg.internal.fixed_function.rt = rt; - } -} - -void -panvk_per_arch(emit_blend_constant)(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - unsigned rt, const float *constants, - void *bd) -{ - float constant = constants[pipeline->blend.constant[rt].index]; - - pan_pack(bd, BLEND, cfg) { - cfg.enable = false; - cfg.constant = constant * pipeline->blend.constant[rt].bifrost_factor; - } -} - -void -panvk_per_arch(emit_dyn_fs_rsd)(const struct panvk_pipeline *pipeline, - const struct panvk_cmd_state *state, void *rsd) -{ - pan_pack(rsd, RENDERER_STATE, cfg) { - if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { - cfg.depth_units = state->rast.depth_bias.constant_factor * 2.0f; - cfg.depth_factor = state->rast.depth_bias.slope_factor; - cfg.depth_bias_clamp = state->rast.depth_bias.clamp; - } - - if (pipeline->dynamic_state_mask & - (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { - cfg.stencil_front.mask = state->zs.s_front.compare_mask; - cfg.stencil_back.mask = state->zs.s_back.compare_mask; - } - - if (pipeline->dynamic_state_mask & - (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { - cfg.stencil_mask_misc.stencil_mask_front = - state->zs.s_front.write_mask; - cfg.stencil_mask_misc.stencil_mask_back = state->zs.s_back.write_mask; - } - - if (pipeline->dynamic_state_mask & - (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { - cfg.stencil_front.reference_value = state->zs.s_front.ref; - cfg.stencil_back.reference_value = state->zs.s_back.ref; - } - } -} - -void -panvk_per_arch(emit_base_fs_rsd)(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - void *rsd) -{ - const struct pan_shader_info *info = &pipeline->fs.info; - - pan_pack(rsd, RENDERER_STATE, cfg) { - if (pipeline->fs.required) { - pan_shader_prepare_rsd(info, pipeline->fs.address, &cfg); - - uint8_t rt_written = - pipeline->fs.info.outputs_written >> FRAG_RESULT_DATA0; - uint8_t rt_mask = pipeline->fs.rt_mask; - cfg.properties.allow_forward_pixel_to_kill = - pipeline->fs.info.fs.can_fpk && !(rt_mask & ~rt_written) && - !pipeline->ms.alpha_to_coverage && !pipeline->blend.reads_dest; - - bool writes_zs = pipeline->zs.z_write || pipeline->zs.s_test; - bool zs_always_passes = !pipeline->zs.z_test && !pipeline->zs.s_test; - bool oq = false; /* TODO: Occlusion queries */ - - struct pan_earlyzs_state earlyzs = - pan_earlyzs_get(pan_earlyzs_analyze(info), writes_zs || oq, - pipeline->ms.alpha_to_coverage, zs_always_passes); - - cfg.properties.pixel_kill_operation = earlyzs.kill; - cfg.properties.zs_update_operation = earlyzs.update; - } else { - cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION; - cfg.properties.allow_forward_pixel_to_kill = true; - cfg.properties.allow_forward_pixel_to_be_killed = true; - cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY; - } - - bool msaa = pipeline->ms.rast_samples > 1; - cfg.multisample_misc.multisample_enable = msaa; - cfg.multisample_misc.sample_mask = - msaa ? pipeline->ms.sample_mask : UINT16_MAX; - - cfg.multisample_misc.depth_function = - pipeline->zs.z_test ? pipeline->zs.z_compare_func : MALI_FUNC_ALWAYS; - - cfg.multisample_misc.depth_write_mask = pipeline->zs.z_write; - cfg.multisample_misc.fixed_function_near_discard = - !pipeline->rast.clamp_depth; - cfg.multisample_misc.fixed_function_far_discard = - !pipeline->rast.clamp_depth; - cfg.multisample_misc.shader_depth_range_fixed = true; - - cfg.stencil_mask_misc.stencil_enable = pipeline->zs.s_test; - cfg.stencil_mask_misc.alpha_to_coverage = pipeline->ms.alpha_to_coverage; - cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS; - cfg.stencil_mask_misc.front_facing_depth_bias = - pipeline->rast.depth_bias.enable; - cfg.stencil_mask_misc.back_facing_depth_bias = - pipeline->rast.depth_bias.enable; - cfg.stencil_mask_misc.single_sampled_lines = - pipeline->ms.rast_samples <= 1; - - if (!(pipeline->dynamic_state_mask & - (1 << VK_DYNAMIC_STATE_DEPTH_BIAS))) { - cfg.depth_units = pipeline->rast.depth_bias.constant_factor * 2.0f; - cfg.depth_factor = pipeline->rast.depth_bias.slope_factor; - cfg.depth_bias_clamp = pipeline->rast.depth_bias.clamp; - } - - if (!(pipeline->dynamic_state_mask & - (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK))) { - cfg.stencil_front.mask = pipeline->zs.s_front.compare_mask; - cfg.stencil_back.mask = pipeline->zs.s_back.compare_mask; - } - - if (!(pipeline->dynamic_state_mask & - (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK))) { - cfg.stencil_mask_misc.stencil_mask_front = - pipeline->zs.s_front.write_mask; - cfg.stencil_mask_misc.stencil_mask_back = - pipeline->zs.s_back.write_mask; - } - - if (!(pipeline->dynamic_state_mask & - (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))) { - cfg.stencil_front.reference_value = pipeline->zs.s_front.ref; - cfg.stencil_back.reference_value = pipeline->zs.s_back.ref; - } - - cfg.stencil_front.compare_function = pipeline->zs.s_front.compare_func; - cfg.stencil_front.stencil_fail = pipeline->zs.s_front.fail_op; - cfg.stencil_front.depth_fail = pipeline->zs.s_front.z_fail_op; - cfg.stencil_front.depth_pass = pipeline->zs.s_front.pass_op; - cfg.stencil_back.compare_function = pipeline->zs.s_back.compare_func; - cfg.stencil_back.stencil_fail = pipeline->zs.s_back.fail_op; - cfg.stencil_back.depth_fail = pipeline->zs.s_back.z_fail_op; - cfg.stencil_back.depth_pass = pipeline->zs.s_back.pass_op; - } -} - -void -panvk_per_arch(emit_non_fs_rsd)(const struct panvk_device *dev, - const struct pan_shader_info *shader_info, - mali_ptr shader_ptr, void *rsd) -{ - assert(shader_info->stage != MESA_SHADER_FRAGMENT); - - pan_pack(rsd, RENDERER_STATE, cfg) { - pan_shader_prepare_rsd(shader_info, shader_ptr, &cfg); - } -} - -void -panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev, - unsigned width, unsigned height, - const struct panfrost_ptr *descs) -{ - pan_pack(descs->cpu + pan_size(TILER_CONTEXT), TILER_HEAP, cfg) { - cfg.size = pan_kmod_bo_size(dev->tiler_heap->bo); - cfg.base = dev->tiler_heap->addr.dev; - cfg.bottom = dev->tiler_heap->addr.dev; - cfg.top = cfg.base + cfg.size; - } - - pan_pack(descs->cpu, TILER_CONTEXT, cfg) { - cfg.hierarchy_mask = 0x28; - cfg.fb_width = width; - cfg.fb_height = height; - cfg.heap = descs->gpu + pan_size(TILER_CONTEXT); - } -} diff --git a/src/panfrost/vulkan/panvk_vX_cs.h b/src/panfrost/vulkan/panvk_vX_cs.h deleted file mode 100644 index 434a6906408..00000000000 --- a/src/panfrost/vulkan/panvk_vX_cs.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (C) 2021 Collabora Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef PANVK_PRIVATE_H -#error "Must be included from panvk_private.h" -#endif - -#ifndef PAN_ARCH -#error "no arch" -#endif - -#include "compiler/shader_enums.h" -#include - -struct panvk_attribs_info; - -void panvk_per_arch(emit_varying)(const struct panvk_device *dev, - const struct panvk_varyings_info *varyings, - gl_shader_stage stage, unsigned idx, - void *attrib); - -void panvk_per_arch(emit_varyings)(const struct panvk_device *dev, - const struct panvk_varyings_info *varyings, - gl_shader_stage stage, void *descs); - -void - panvk_per_arch(emit_varying_bufs)(const struct panvk_varyings_info *varyings, - void *descs); - -void panvk_per_arch(emit_attrib_bufs)(const struct panvk_attribs_info *info, - const struct panvk_attrib_buf *bufs, - unsigned buf_count, - const struct panvk_draw_info *draw, - void *descs); - -void panvk_per_arch(emit_attribs)(const struct panvk_device *dev, - const struct panvk_draw_info *draw, - const struct panvk_attribs_info *attribs, - const struct panvk_attrib_buf *bufs, - unsigned buf_count, void *descs); - -void panvk_per_arch(emit_ubo)(mali_ptr address, size_t size, void *desc); - -void panvk_per_arch(emit_ubos)(const struct panvk_pipeline *pipeline, - const struct panvk_descriptor_state *state, - void *descs); - -void panvk_per_arch(emit_vertex_job)(const struct panvk_pipeline *pipeline, - const struct panvk_draw_info *draw, - void *job); - -void - panvk_per_arch(emit_compute_job)(const struct panvk_pipeline *pipeline, - const struct panvk_dispatch_info *dispatch, - void *job); - -void panvk_per_arch(emit_tiler_job)(const struct panvk_pipeline *pipeline, - const struct panvk_draw_info *draw, - void *job); - -void panvk_per_arch(emit_viewport)(const VkViewport *viewport, - const VkRect2D *scissor, void *vpd); - -void panvk_per_arch(emit_blend)(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - unsigned rt, void *bd); - -void panvk_per_arch(emit_blend_constant)(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - unsigned rt, const float *constants, - void *bd); - -void panvk_per_arch(emit_dyn_fs_rsd)(const struct panvk_pipeline *pipeline, - const struct panvk_cmd_state *state, - void *rsd); - -void panvk_per_arch(emit_base_fs_rsd)(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - void *rsd); - -void panvk_per_arch(emit_non_fs_rsd)(const struct panvk_device *dev, - const struct pan_shader_info *shader_info, - mali_ptr shader_ptr, void *rsd); - -void panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev, - unsigned width, unsigned height, - const struct panfrost_ptr *descs); diff --git a/src/panfrost/vulkan/panvk_vX_descriptor_set.c b/src/panfrost/vulkan/panvk_vX_descriptor_set.c index 609435221d8..50b7ebe8e1d 100644 --- a/src/panfrost/vulkan/panvk_vX_descriptor_set.c +++ b/src/panfrost/vulkan/panvk_vX_descriptor_set.c @@ -45,7 +45,6 @@ #include "vk_util.h" #include "panvk_buffer.h" -#include "panvk_cs.h" #include "panvk_descriptor_set.h" #include "panvk_descriptor_set_layout.h" #include "panvk_sampler.h" @@ -150,8 +149,10 @@ panvk_per_arch(descriptor_set_create)( struct mali_uniform_buffer_packed *ubos = set->ubos; - panvk_per_arch(emit_ubo)(set->desc_bo->addr.dev, layout->desc_ubo_size, - &ubos[layout->desc_ubo_index]); + pan_pack(&ubos[layout->desc_ubo_index], UNIFORM_BUFFER, cfg) { + cfg.pointer = set->desc_bo->addr.dev; + cfg.entries = DIV_ROUND_UP(layout->desc_ubo_size, 16); + } } for (unsigned i = 0; i < layout->binding_count; i++) { @@ -406,7 +407,10 @@ panvk_write_ubo_desc(struct panvk_descriptor_set *set, uint32_t binding, size_t size = panvk_buffer_range(buffer, pBufferInfo->offset, pBufferInfo->range); - panvk_per_arch(emit_ubo)(ptr, size, panvk_ubo_desc(set, binding, elem)); + pan_pack(panvk_ubo_desc(set, binding, elem), UNIFORM_BUFFER, cfg) { + cfg.pointer = ptr; + cfg.entries = DIV_ROUND_UP(size, 16); + } } static void diff --git a/src/panfrost/vulkan/panvk_vX_device.c b/src/panfrost/vulkan/panvk_vX_device.c index eb57eb28984..095a0ddc6e3 100644 --- a/src/panfrost/vulkan/panvk_vX_device.c +++ b/src/panfrost/vulkan/panvk_vX_device.c @@ -30,7 +30,6 @@ #include "decode.h" -#include "panvk_cs.h" #include "panvk_event.h" #include "panvk_image.h" #include "panvk_image_view.h" diff --git a/src/panfrost/vulkan/panvk_vX_pipeline.c b/src/panfrost/vulkan/panvk_vX_pipeline.c index 155a6870a85..7673b823e44 100644 --- a/src/panfrost/vulkan/panvk_vX_pipeline.c +++ b/src/panfrost/vulkan/panvk_vX_pipeline.c @@ -26,7 +26,6 @@ * DEALINGS IN THE SOFTWARE. */ -#include "panvk_cs.h" #include "panvk_pipeline.h" #include "panvk_pipeline_layout.h" #include "panvk_private.h" @@ -47,6 +46,9 @@ #include "panfrost/util/pan_lower_framebuffer.h" +#include "pan_earlyzs.h" +#include "pan_shader.h" + struct panvk_pipeline_builder { struct panvk_device *device; struct vk_pipeline_cache *cache; @@ -227,6 +229,195 @@ panvk_pipeline_builder_init_sysvals(struct panvk_pipeline_builder *builder, pipeline->sysvals[stage].ubo_idx = shader->sysval_ubo; } +static void +panvk_pipeline_builder_emit_non_fs_rsd( + const struct pan_shader_info *shader_info, mali_ptr shader_ptr, void *rsd) +{ + assert(shader_info->stage != MESA_SHADER_FRAGMENT); + + pan_pack(rsd, RENDERER_STATE, cfg) { + pan_shader_prepare_rsd(shader_info, shader_ptr, &cfg); + } +} + +static void +panvk_pipeline_builder_emit_base_fs_rsd(const struct panvk_pipeline *pipeline, + void *rsd) +{ + const struct pan_shader_info *info = &pipeline->fs.info; + + pan_pack(rsd, RENDERER_STATE, cfg) { + if (pipeline->fs.required) { + pan_shader_prepare_rsd(info, pipeline->fs.address, &cfg); + + uint8_t rt_written = + pipeline->fs.info.outputs_written >> FRAG_RESULT_DATA0; + uint8_t rt_mask = pipeline->fs.rt_mask; + cfg.properties.allow_forward_pixel_to_kill = + pipeline->fs.info.fs.can_fpk && !(rt_mask & ~rt_written) && + !pipeline->ms.alpha_to_coverage && !pipeline->blend.reads_dest; + + bool writes_zs = pipeline->zs.z_write || pipeline->zs.s_test; + bool zs_always_passes = !pipeline->zs.z_test && !pipeline->zs.s_test; + bool oq = false; /* TODO: Occlusion queries */ + + struct pan_earlyzs_state earlyzs = + pan_earlyzs_get(pan_earlyzs_analyze(info), writes_zs || oq, + pipeline->ms.alpha_to_coverage, zs_always_passes); + + cfg.properties.pixel_kill_operation = earlyzs.kill; + cfg.properties.zs_update_operation = earlyzs.update; + } else { + cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION; + cfg.properties.allow_forward_pixel_to_kill = true; + cfg.properties.allow_forward_pixel_to_be_killed = true; + cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY; + } + + bool msaa = pipeline->ms.rast_samples > 1; + cfg.multisample_misc.multisample_enable = msaa; + cfg.multisample_misc.sample_mask = + msaa ? pipeline->ms.sample_mask : UINT16_MAX; + + cfg.multisample_misc.depth_function = + pipeline->zs.z_test ? pipeline->zs.z_compare_func : MALI_FUNC_ALWAYS; + + cfg.multisample_misc.depth_write_mask = pipeline->zs.z_write; + cfg.multisample_misc.fixed_function_near_discard = + !pipeline->rast.clamp_depth; + cfg.multisample_misc.fixed_function_far_discard = + !pipeline->rast.clamp_depth; + cfg.multisample_misc.shader_depth_range_fixed = true; + + cfg.stencil_mask_misc.stencil_enable = pipeline->zs.s_test; + cfg.stencil_mask_misc.alpha_to_coverage = pipeline->ms.alpha_to_coverage; + cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS; + cfg.stencil_mask_misc.front_facing_depth_bias = + pipeline->rast.depth_bias.enable; + cfg.stencil_mask_misc.back_facing_depth_bias = + pipeline->rast.depth_bias.enable; + cfg.stencil_mask_misc.single_sampled_lines = + pipeline->ms.rast_samples <= 1; + + if (!(pipeline->dynamic_state_mask & + (1 << VK_DYNAMIC_STATE_DEPTH_BIAS))) { + cfg.depth_units = pipeline->rast.depth_bias.constant_factor * 2.0f; + cfg.depth_factor = pipeline->rast.depth_bias.slope_factor; + cfg.depth_bias_clamp = pipeline->rast.depth_bias.clamp; + } + + if (!(pipeline->dynamic_state_mask & + (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK))) { + cfg.stencil_front.mask = pipeline->zs.s_front.compare_mask; + cfg.stencil_back.mask = pipeline->zs.s_back.compare_mask; + } + + if (!(pipeline->dynamic_state_mask & + (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK))) { + cfg.stencil_mask_misc.stencil_mask_front = + pipeline->zs.s_front.write_mask; + cfg.stencil_mask_misc.stencil_mask_back = + pipeline->zs.s_back.write_mask; + } + + if (!(pipeline->dynamic_state_mask & + (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))) { + cfg.stencil_front.reference_value = pipeline->zs.s_front.ref; + cfg.stencil_back.reference_value = pipeline->zs.s_back.ref; + } + + cfg.stencil_front.compare_function = pipeline->zs.s_front.compare_func; + cfg.stencil_front.stencil_fail = pipeline->zs.s_front.fail_op; + cfg.stencil_front.depth_fail = pipeline->zs.s_front.z_fail_op; + cfg.stencil_front.depth_pass = pipeline->zs.s_front.pass_op; + cfg.stencil_back.compare_function = pipeline->zs.s_back.compare_func; + cfg.stencil_back.stencil_fail = pipeline->zs.s_back.fail_op; + cfg.stencil_back.depth_fail = pipeline->zs.s_back.z_fail_op; + cfg.stencil_back.depth_pass = pipeline->zs.s_back.pass_op; + } +} + +static enum mali_register_file_format +blend_type_from_nir(nir_alu_type nir_type) +{ + switch (nir_type) { + case 0: /* Render target not in use */ + return 0; + case nir_type_float16: + return MALI_REGISTER_FILE_FORMAT_F16; + case nir_type_float32: + return MALI_REGISTER_FILE_FORMAT_F32; + case nir_type_int32: + return MALI_REGISTER_FILE_FORMAT_I32; + case nir_type_uint32: + return MALI_REGISTER_FILE_FORMAT_U32; + case nir_type_int16: + return MALI_REGISTER_FILE_FORMAT_I16; + case nir_type_uint16: + return MALI_REGISTER_FILE_FORMAT_U16; + default: + unreachable("Unsupported blend shader type for NIR alu type"); + } +} + +static void +panvk_pipeline_builder_emit_blend(const struct panvk_pipeline *pipeline, + unsigned rt, void *bd) +{ + const struct pan_blend_state *blend = &pipeline->blend.state; + const struct pan_blend_rt_state *rts = &blend->rts[rt]; + bool dithered = false; + + pan_pack(bd, BLEND, cfg) { + if (!blend->rt_count || !rts->equation.color_mask) { + cfg.enable = false; + cfg.internal.mode = MALI_BLEND_MODE_OFF; + continue; + } + + cfg.srgb = util_format_is_srgb(rts->format); + cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation); + cfg.round_to_fb_precision = !dithered; + + const struct util_format_description *format_desc = + util_format_description(rts->format); + unsigned chan_size = 0; + for (unsigned i = 0; i < format_desc->nr_channels; i++) + chan_size = MAX2(format_desc->channel[i].size, chan_size); + + pan_blend_to_fixed_function_equation(blend->rts[rt].equation, + &cfg.equation); + + /* Fixed point constant */ + float fconst = pan_blend_get_constant( + pan_blend_constant_mask(blend->rts[rt].equation), blend->constants); + u16 constant = fconst * ((1 << chan_size) - 1); + constant <<= 16 - chan_size; + cfg.constant = constant; + + if (pan_blend_is_opaque(blend->rts[rt].equation)) { + cfg.internal.mode = MALI_BLEND_MODE_OPAQUE; + } else { + cfg.internal.mode = MALI_BLEND_MODE_FIXED_FUNCTION; + + cfg.internal.fixed_function.alpha_zero_nop = + pan_blend_alpha_zero_nop(blend->rts[rt].equation); + cfg.internal.fixed_function.alpha_one_store = + pan_blend_alpha_one_store(blend->rts[rt].equation); + } + + /* If we want the conversion to work properly, + * num_comps must be set to 4 + */ + cfg.internal.fixed_function.num_comps = 4; + cfg.internal.fixed_function.conversion.memory_format = + GENX(panfrost_dithered_format_from_pipe_format)(rts->format, dithered); + cfg.internal.fixed_function.conversion.register_format = + blend_type_from_nir(pipeline->fs.info.bifrost.blend[rt].type); + cfg.internal.fixed_function.rt = rt; + } +} + static void panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder *builder, struct panvk_pipeline *pipeline) @@ -268,8 +459,7 @@ panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder *builder, mali_ptr gpu_rsd = pipeline->state_bo->addr.dev + builder->stages[i].rsd_offset; - panvk_per_arch(emit_non_fs_rsd)(builder->device, &shader->info, - shader_ptr, rsd); + panvk_pipeline_builder_emit_non_fs_rsd(&shader->info, shader_ptr, rsd); pipeline->rsds[i] = gpu_rsd; } @@ -286,20 +476,20 @@ panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder *builder, builder->stages[MESA_SHADER_FRAGMENT].rsd_offset; void *bd = rsd + pan_size(RENDERER_STATE); - panvk_per_arch(emit_base_fs_rsd)(builder->device, pipeline, rsd); + panvk_pipeline_builder_emit_base_fs_rsd(pipeline, rsd); for (unsigned rt = 0; rt < pipeline->blend.state.rt_count; rt++) { - panvk_per_arch(emit_blend)(builder->device, pipeline, rt, bd); + panvk_pipeline_builder_emit_blend(pipeline, rt, bd); bd += pan_size(BLEND); } pipeline->rsds[MESA_SHADER_FRAGMENT] = gpu_rsd; } else if (builder->create_info.gfx) { - panvk_per_arch(emit_base_fs_rsd)(builder->device, pipeline, - &pipeline->fs.rsd_template); + panvk_pipeline_builder_emit_base_fs_rsd(pipeline, + pipeline->fs.rsd_template); for (unsigned rt = 0; rt < MAX2(pipeline->blend.state.rt_count, 1); rt++) { - panvk_per_arch(emit_blend)(builder->device, pipeline, rt, - &pipeline->blend.bd_template[rt]); + panvk_pipeline_builder_emit_blend(pipeline, rt, + &pipeline->blend.bd_template[rt]); } } @@ -513,6 +703,22 @@ translate_stencil_op(VkStencilOp in) } } +static inline enum mali_func +translate_compare_func(VkCompareOp comp) +{ + STATIC_ASSERT(VK_COMPARE_OP_NEVER == (VkCompareOp)MALI_FUNC_NEVER); + STATIC_ASSERT(VK_COMPARE_OP_LESS == (VkCompareOp)MALI_FUNC_LESS); + STATIC_ASSERT(VK_COMPARE_OP_EQUAL == (VkCompareOp)MALI_FUNC_EQUAL); + STATIC_ASSERT(VK_COMPARE_OP_LESS_OR_EQUAL == (VkCompareOp)MALI_FUNC_LEQUAL); + STATIC_ASSERT(VK_COMPARE_OP_GREATER == (VkCompareOp)MALI_FUNC_GREATER); + STATIC_ASSERT(VK_COMPARE_OP_NOT_EQUAL == (VkCompareOp)MALI_FUNC_NOT_EQUAL); + STATIC_ASSERT(VK_COMPARE_OP_GREATER_OR_EQUAL == + (VkCompareOp)MALI_FUNC_GEQUAL); + STATIC_ASSERT(VK_COMPARE_OP_ALWAYS == (VkCompareOp)MALI_FUNC_ALWAYS); + + return (enum mali_func)comp; +} + static void panvk_pipeline_builder_parse_zs(struct panvk_pipeline_builder *builder, struct panvk_pipeline *pipeline) @@ -536,7 +742,7 @@ panvk_pipeline_builder_parse_zs(struct panvk_pipeline_builder *builder, pipeline->zs.z_test && builder->create_info.gfx->pDepthStencilState->depthWriteEnable; - pipeline->zs.z_compare_func = panvk_per_arch(translate_compare_func)( + pipeline->zs.z_compare_func = translate_compare_func( builder->create_info.gfx->pDepthStencilState->depthCompareOp); pipeline->zs.s_test = builder->create_info.gfx->pDepthStencilState->stencilTestEnable; @@ -546,7 +752,7 @@ panvk_pipeline_builder_parse_zs(struct panvk_pipeline_builder *builder, builder->create_info.gfx->pDepthStencilState->front.passOp); pipeline->zs.s_front.z_fail_op = translate_stencil_op( builder->create_info.gfx->pDepthStencilState->front.depthFailOp); - pipeline->zs.s_front.compare_func = panvk_per_arch(translate_compare_func)( + pipeline->zs.s_front.compare_func = translate_compare_func( builder->create_info.gfx->pDepthStencilState->front.compareOp); pipeline->zs.s_front.compare_mask = builder->create_info.gfx->pDepthStencilState->front.compareMask; @@ -560,7 +766,7 @@ panvk_pipeline_builder_parse_zs(struct panvk_pipeline_builder *builder, builder->create_info.gfx->pDepthStencilState->back.passOp); pipeline->zs.s_back.z_fail_op = translate_stencil_op( builder->create_info.gfx->pDepthStencilState->back.depthFailOp); - pipeline->zs.s_back.compare_func = panvk_per_arch(translate_compare_func)( + pipeline->zs.s_back.compare_func = translate_compare_func( builder->create_info.gfx->pDepthStencilState->back.compareOp); pipeline->zs.s_back.compare_mask = builder->create_info.gfx->pDepthStencilState->back.compareMask;