From 1527d88bc15b9838831e6e2b204349b9f7f859b1 Mon Sep 17 00:00:00 2001 From: Marc Alcala Prieto Date: Fri, 17 Apr 2026 14:33:42 +0200 Subject: [PATCH] pan/fb: Implement pan_emit_fb_desc for RUN_FRAGMENT2 Add a new structure that is used to store per-layer RUN_FRAGMENT2 state. Any other state will be emitted directly to registers. Also, modify pan_emit_fb_desc's signature to take a pan_ptr to the framebuffer memory instead of the CPU-mapped pointer. Reviewed-by: Lars-Ivar Hesselberg Simonsen --- src/panfrost/lib/pan_fb.c | 121 ++++++++++++++++++- src/panfrost/lib/pan_fb.h | 41 ++++++- src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c | 7 +- src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c | 2 +- 4 files changed, 162 insertions(+), 9 deletions(-) diff --git a/src/panfrost/lib/pan_fb.c b/src/panfrost/lib/pan_fb.c index f9b6c85b2ce..3b3c6c86c5f 100644 --- a/src/panfrost/lib/pan_fb.c +++ b/src/panfrost/lib/pan_fb.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2026 Collabora, Ltd. + * Copyright (C) 2026 Arm Ltd. * SPDX-License-Identifier: MIT */ #include "pan_fb.h" @@ -669,9 +670,124 @@ pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode, } #endif +#if PAN_ARCH >= 14 uint32_t -GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info, void *out) +GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info, + const struct pan_ptr framebuffer) { + /* Emit the dynamic framebuffer state. That is, state that may change per-layer. */ + + void *out = framebuffer.cpu; + const struct pan_fb_layout *fb = info->fb; + const struct pan_fb_load *load = info->load; + const struct pan_fb_store *store = info->store; + const struct pan_fb_clean_tile ct = pan_fb_get_clean_tile(info); + const bool has_zs_crc_ext = pan_fb_has_zs(fb); + + struct pan_fbd_layer fbd_data = {0}; + fbd_data.tiler = info->tiler_ctx->valhall.desc; + + /* layer_index in flags0 is used to select the right primitive list in + * the tiler context, and frame_arg is the value that's passed to the + * fragment shader through r62-r63, which we use to pass gl_Layer. Since + * the layer_idx only takes 8-bits, we might use the extra 56-bits we + * have in frame_argument to pass other information to the fragment + * shader at some point. + */ + assert(info->layer >= info->tiler_ctx->valhall.layer_offset); + fbd_data.frame_argument = info->layer; + + pan_pack(&fbd_data.flags0, FRAGMENT_FLAGS_0, cfg) { + cfg.pre_frame_0 = pan_fix_frame_shader_mode(info->frame_shaders.modes[0], + ct.rts || ct.zs || ct.s); + cfg.pre_frame_1 = pan_fix_frame_shader_mode(info->frame_shaders.modes[1], + ct.rts || ct.zs || ct.s); + cfg.post_frame = info->frame_shaders.modes[2]; + + /* Enabling prepass without pipelineing is generally not good for + * performance, so disable HSR in that case. + */ + cfg.hsr_prepass_enable = info->allow_hsr_prepass && + pan_fb_can_pipeline_zs(fb); + cfg.hsr_prepass_interleaving_enable = pan_fb_can_pipeline_zs(fb); + cfg.hsr_prepass_filter_enable = true; + cfg.hsr_hierarchical_optimizations_enable = true; + + cfg.internal_layer_index = + info->layer - info->tiler_ctx->valhall.layer_offset; + } + + pan_pack(&fbd_data.flags2, FRAGMENT_FLAGS_2, cfg) { + if (fb->s_format != PIPE_FORMAT_NONE) { + cfg.s_clear = load && target_has_clear(&load->s) ? + load->s.clear.stencil : 0; + cfg.s_write_enable = store && store->s.store; + } + + if (fb->z_format != PIPE_FORMAT_NONE) { + cfg.z_internal_format = pan_get_z_internal_format(fb->z_format); + cfg.z_write_enable = store && store->zs.store; + } else { + cfg.z_internal_format = MALI_Z_INTERNAL_FORMAT_D24; + assert(!store || !store->zs.store); + } + } + + fbd_data.z_clear = + util_bitpack_float(fb->z_format != PIPE_FORMAT_NONE && load && load && + target_has_clear(&load->z) + ? load->z.clear.depth + : 0); + + fbd_data.dcd_pointer = info->frame_shaders.dcd_pointer; + + { + /* Set the DBD and RTD pointers. Both must be 64-bytes aligned. */ + uint64_t out_gpu_addr = + framebuffer.gpu + ALIGN_POT(sizeof(struct pan_fbd_layer), 64); + + if (has_zs_crc_ext) { + fbd_data.dbd_pointer = out_gpu_addr; + assert(fbd_data.dbd_pointer % 64 == 0); + out_gpu_addr += pan_size(ZS_CRC_EXTENSION); + } + + fbd_data.rtd_pointer = out_gpu_addr; + assert(fbd_data.rtd_pointer % 64 == 0); + } + + memcpy(out, &fbd_data, sizeof(fbd_data)); + out += ALIGN_POT(sizeof(fbd_data), 64); + + if (has_zs_crc_ext) { + struct mali_zs_crc_extension_packed zs_crc; + emit_zs_crc_desc(info, ct, &zs_crc); + memcpy(out, &zs_crc, sizeof(zs_crc)); + out += sizeof(zs_crc); + } + + uint32_t tile_rt_offset_B = 0; + for (unsigned rt = 0; rt < fb->rt_count; rt++) { + struct mali_rgb_render_target_packed rgb_rt; + emit_rgb_rt_desc(info, ct, rt, tile_rt_offset_B, &rgb_rt); + memcpy(out, &rgb_rt, sizeof(rgb_rt)); + out += sizeof(rgb_rt); + + if (fb->rt_formats[rt] != PIPE_FORMAT_NONE) { + tile_rt_offset_B += pan_bytes_per_pixel_tib(fb->rt_formats[rt]) * + fb->tile_size_px * fb->sample_count; + } + } + assert(tile_rt_offset_B <= fb->tile_rt_alloc_B); + + return 0; +} +#else /* PAN_ARCH < 14 */ +uint32_t +GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info, + const struct pan_ptr framebuffer) +{ + void *out = framebuffer.cpu; const struct pan_fb_layout *fb = info->fb; const struct pan_fb_load *load = info->load; const struct pan_fb_store *store = info->store; @@ -823,4 +939,5 @@ GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info, void *out) } return tag.opaque[0]; } -#endif +#endif /* PAN_ARCH >= 14 */ +#endif /* PAN_ARCH >= 5 */ diff --git a/src/panfrost/lib/pan_fb.h b/src/panfrost/lib/pan_fb.h index c4635f3f4c2..48bfc888b1c 100644 --- a/src/panfrost/lib/pan_fb.h +++ b/src/panfrost/lib/pan_fb.h @@ -1,14 +1,20 @@ /* * Copyright (C) 2026 Collabora, Ltd. + * Copyright (C) 2026 Arm Ltd. * SPDX-License-Identifier: MIT */ #ifndef __PAN_FB_H #define __PAN_FB_H +#if PAN_ARCH >= 14 +#include "genxml/cs_builder.h" +#endif + +#include "compiler/shader_enums.h" #include "genxml/gen_macros.h" #include "util/format/u_formats.h" -#include "compiler/shader_enums.h" +#include "pan_pool.h" struct nir_shader; struct nir_shader_compiler_options; @@ -481,7 +487,7 @@ void GENX(pan_fill_fb_info)(const struct pan_fb_desc_info *info, struct pan_fb_info *fbinfo); uint32_t GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info, - void *out); + const struct pan_ptr framebuffer); #endif enum ENUM_PACKED pan_fb_shader_op { @@ -620,4 +626,35 @@ GENX(pan_get_fb_shader)(const struct pan_fb_shader_key *key, const struct nir_shader_compiler_options *nir_options); #endif +#if PAN_ARCH >= 14 +/* Framebuffer per-layer state. Keep this structure 64-byte aligned, since + * we want the adjacent ZS_CRC_EXTENSION and RENDER_TARGET descriptors + * aligned. */ +struct pan_fbd_layer { + /** GPU address to the tiler descriptor. */ + uint64_t tiler; + + /** Frame argument. */ + uint64_t frame_argument; + + /** An instance of Fragment Flags 0. */ + struct mali_fragment_flags_0_packed flags0; + + /** An instance of Fragment Flags 2. */ + struct mali_fragment_flags_2_packed flags2; + + /** Z clear value. */ + uint32_t z_clear; + + /** GPU address to the draw call descriptors. It may be 0. */ + uint64_t dcd_pointer; + + /** GPU address to the ZS_CRC_EXTENSION descriptor. It may be 0. */ + uint64_t dbd_pointer; + + /** GPU address to the RENDER_TARGET descriptors. */ + uint64_t rtd_pointer; +} __attribute__((aligned(64))); +#endif /* PAN_ARCH >= 14 */ + #endif /* __PAN_FB_H */ diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index 87e7b647df7..55069924624 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -1316,7 +1316,7 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf) tiler_ctx = get_tiler_context(cmdbuf, layer_idx); uint32_t new_fbd_flags = - GENX(pan_emit_fb_desc)(&fbd_info, fbds.cpu + fbd_sz * i); + GENX(pan_emit_fb_desc)(&fbd_info, pan_ptr_offset(fbds, fbd_sz * i)); /* Make sure all FBDs have the same flags. */ assert(i == 0 || new_fbd_flags == fbd_flags); @@ -1335,7 +1335,6 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf) for (uint32_t i = 0; i < enabled_layer_count; i++) { uint32_t layer_idx = multiview ? u_bit_scan(&ir_view_mask_temp) : i; - void *ir_fbd = (void *)((uint8_t *)ir_fbds.cpu + (i * fbd_sz)); fbd_info.layer = layer_idx; tiler_ctx = get_tiler_context(cmdbuf, layer_idx); @@ -1353,8 +1352,8 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf) if (result != VK_SUCCESS) return result; - ASSERTED uint32_t new_fbd_flags = - GENX(pan_emit_fb_desc)(&fbd_info, ir_fbd); + ASSERTED uint32_t new_fbd_flags = GENX(pan_emit_fb_desc)( + &fbd_info, pan_ptr_offset(ir_fbds, fbd_sz * i)); /* Make sure all FBDs have the same flags. */ assert(new_fbd_flags == fbd_flags); diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c index 0579034aea2..9879ca8b112 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c @@ -181,7 +181,7 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf) fbd_info.layer = layer_id; fbd_info.frame_shaders = fs; fbd_info.frame_shaders.dcd_pointer += layer_id * 3 * pan_size(DRAW); - tagged_fbd_ptr |= GENX(pan_emit_fb_desc)(&fbd_info, fbd.cpu); + tagged_fbd_ptr |= GENX(pan_emit_fb_desc)(&fbd_info, fbd); result = panvk_cmd_prepare_fragment_job(cmdbuf, tagged_fbd_ptr); if (result != VK_SUCCESS)