pan/fb: Implement pan_emit_fb_desc for RUN_FRAGMENT2

Add a new structure that is used to store per-layer RUN_FRAGMENT2 state.
Any other state will be emitted directly to registers.

Also, modify pan_emit_fb_desc's signature to take a pan_ptr to the
framebuffer memory instead of the CPU-mapped pointer.

Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
This commit is contained in:
Marc Alcala Prieto 2026-04-17 14:33:42 +02:00
parent 6bd5cb1728
commit 2b3f7b2361
4 changed files with 162 additions and 9 deletions

View file

@ -1,5 +1,6 @@
/*
* Copyright (C) 2026 Collabora, Ltd.
* Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
#include "pan_fb.h"
@ -669,9 +670,124 @@ pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,
}
#endif
#if PAN_ARCH >= 14
uint32_t
GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info, void *out)
GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info,
const struct pan_ptr framebuffer)
{
/* Emit the dynamic framebuffer state. That is, state that may change per-layer. */
void *out = framebuffer.cpu;
const struct pan_fb_layout *fb = info->fb;
const struct pan_fb_load *load = info->load;
const struct pan_fb_store *store = info->store;
const struct pan_fb_clean_tile ct = pan_fb_get_clean_tile(info);
const bool has_zs_crc_ext = pan_fb_has_zs(fb);
struct pan_fbd_layer fbd_data = {0};
fbd_data.tiler = info->tiler_ctx->valhall.desc;
/* layer_index in flags0 is used to select the right primitive list in
* the tiler context, and frame_arg is the value that's passed to the
* fragment shader through r62-r63, which we use to pass gl_Layer. Since
* the layer_idx only takes 8-bits, we might use the extra 56-bits we
* have in frame_argument to pass other information to the fragment
* shader at some point.
*/
assert(info->layer >= info->tiler_ctx->valhall.layer_offset);
fbd_data.frame_argument = info->layer;
pan_pack(&fbd_data.flags0, FRAGMENT_FLAGS_0, cfg) {
cfg.pre_frame_0 = pan_fix_frame_shader_mode(info->frame_shaders.modes[0],
ct.rts || ct.zs || ct.s);
cfg.pre_frame_1 = pan_fix_frame_shader_mode(info->frame_shaders.modes[1],
ct.rts || ct.zs || ct.s);
cfg.post_frame = info->frame_shaders.modes[2];
/* Enabling prepass without pipelineing is generally not good for
* performance, so disable HSR in that case.
*/
cfg.hsr_prepass_enable = info->allow_hsr_prepass &&
pan_fb_can_pipeline_zs(fb);
cfg.hsr_prepass_interleaving_enable = pan_fb_can_pipeline_zs(fb);
cfg.hsr_prepass_filter_enable = true;
cfg.hsr_hierarchical_optimizations_enable = true;
cfg.internal_layer_index =
info->layer - info->tiler_ctx->valhall.layer_offset;
}
pan_pack(&fbd_data.flags2, FRAGMENT_FLAGS_2, cfg) {
if (fb->s_format != PIPE_FORMAT_NONE) {
cfg.s_clear = load && target_has_clear(&load->s) ?
load->s.clear.stencil : 0;
cfg.s_write_enable = store && store->s.store;
}
if (fb->z_format != PIPE_FORMAT_NONE) {
cfg.z_internal_format = pan_get_z_internal_format(fb->z_format);
cfg.z_write_enable = store && store->zs.store;
} else {
cfg.z_internal_format = MALI_Z_INTERNAL_FORMAT_D24;
assert(!store || !store->zs.store);
}
}
fbd_data.z_clear =
util_bitpack_float(fb->z_format != PIPE_FORMAT_NONE && load && load &&
target_has_clear(&load->z)
? load->z.clear.depth
: 0);
fbd_data.dcd_pointer = info->frame_shaders.dcd_pointer;
{
/* Set the DBD and RTD pointers. Both must be 64-bytes aligned. */
uint64_t out_gpu_addr =
framebuffer.gpu + ALIGN_POT(sizeof(struct pan_fbd_layer), 64);
if (has_zs_crc_ext) {
fbd_data.dbd_pointer = out_gpu_addr;
assert(fbd_data.dbd_pointer % 64 == 0);
out_gpu_addr += pan_size(ZS_CRC_EXTENSION);
}
fbd_data.rtd_pointer = out_gpu_addr;
assert(fbd_data.rtd_pointer % 64 == 0);
}
memcpy(out, &fbd_data, sizeof(fbd_data));
out += ALIGN_POT(sizeof(fbd_data), 64);
if (has_zs_crc_ext) {
struct mali_zs_crc_extension_packed zs_crc;
emit_zs_crc_desc(info, ct, &zs_crc);
memcpy(out, &zs_crc, sizeof(zs_crc));
out += sizeof(zs_crc);
}
uint32_t tile_rt_offset_B = 0;
for (unsigned rt = 0; rt < fb->rt_count; rt++) {
struct mali_rgb_render_target_packed rgb_rt;
emit_rgb_rt_desc(info, ct, rt, tile_rt_offset_B, &rgb_rt);
memcpy(out, &rgb_rt, sizeof(rgb_rt));
out += sizeof(rgb_rt);
if (fb->rt_formats[rt] != PIPE_FORMAT_NONE) {
tile_rt_offset_B += pan_bytes_per_pixel_tib(fb->rt_formats[rt]) *
fb->tile_size_px * fb->sample_count;
}
}
assert(tile_rt_offset_B <= fb->tile_rt_alloc_B);
return 0;
}
#else /* PAN_ARCH < 14 */
uint32_t
GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info,
const struct pan_ptr framebuffer)
{
void *out = framebuffer.cpu;
const struct pan_fb_layout *fb = info->fb;
const struct pan_fb_load *load = info->load;
const struct pan_fb_store *store = info->store;
@ -823,4 +939,5 @@ GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info, void *out)
}
return tag.opaque[0];
}
#endif
#endif /* PAN_ARCH >= 14 */
#endif /* PAN_ARCH >= 5 */

View file

@ -1,14 +1,20 @@
/*
* Copyright (C) 2026 Collabora, Ltd.
* Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
#ifndef __PAN_FB_H
#define __PAN_FB_H
#if PAN_ARCH >= 14
#include "genxml/cs_builder.h"
#endif
#include "compiler/shader_enums.h"
#include "genxml/gen_macros.h"
#include "util/format/u_formats.h"
#include "compiler/shader_enums.h"
#include "pan_pool.h"
struct nir_shader;
struct nir_shader_compiler_options;
@ -481,7 +487,7 @@ void GENX(pan_fill_fb_info)(const struct pan_fb_desc_info *info,
struct pan_fb_info *fbinfo);
uint32_t GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info,
void *out);
const struct pan_ptr framebuffer);
#endif
enum ENUM_PACKED pan_fb_shader_op {
@ -620,4 +626,35 @@ GENX(pan_get_fb_shader)(const struct pan_fb_shader_key *key,
const struct nir_shader_compiler_options *nir_options);
#endif
#if PAN_ARCH >= 14
/* Framebuffer per-layer state. Keep this structure 64-byte aligned, since
* we want the adjacent ZS_CRC_EXTENSION and RENDER_TARGET descriptors
* aligned. */
struct pan_fbd_layer {
/** GPU address to the tiler descriptor. */
uint64_t tiler;
/** Frame argument. */
uint64_t frame_argument;
/** An instance of Fragment Flags 0. */
struct mali_fragment_flags_0_packed flags0;
/** An instance of Fragment Flags 2. */
struct mali_fragment_flags_2_packed flags2;
/** Z clear value. */
uint32_t z_clear;
/** GPU address to the draw call descriptors. It may be 0. */
uint64_t dcd_pointer;
/** GPU address to the ZS_CRC_EXTENSION descriptor. It may be 0. */
uint64_t dbd_pointer;
/** GPU address to the RENDER_TARGET descriptors. */
uint64_t rtd_pointer;
} __attribute__((aligned(64)));
#endif /* PAN_ARCH >= 14 */
#endif /* __PAN_FB_H */

View file

@ -1316,7 +1316,7 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
tiler_ctx = get_tiler_context(cmdbuf, layer_idx);
uint32_t new_fbd_flags =
GENX(pan_emit_fb_desc)(&fbd_info, fbds.cpu + fbd_sz * i);
GENX(pan_emit_fb_desc)(&fbd_info, pan_ptr_offset(fbds, fbd_sz * i));
/* Make sure all FBDs have the same flags. */
assert(i == 0 || new_fbd_flags == fbd_flags);
@ -1335,7 +1335,6 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
for (uint32_t i = 0; i < enabled_layer_count; i++) {
uint32_t layer_idx = multiview ? u_bit_scan(&ir_view_mask_temp) : i;
void *ir_fbd = (void *)((uint8_t *)ir_fbds.cpu + (i * fbd_sz));
fbd_info.layer = layer_idx;
tiler_ctx = get_tiler_context(cmdbuf, layer_idx);
@ -1353,8 +1352,8 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
if (result != VK_SUCCESS)
return result;
ASSERTED uint32_t new_fbd_flags =
GENX(pan_emit_fb_desc)(&fbd_info, ir_fbd);
ASSERTED uint32_t new_fbd_flags = GENX(pan_emit_fb_desc)(
&fbd_info, pan_ptr_offset(ir_fbds, fbd_sz * i));
/* Make sure all FBDs have the same flags. */
assert(new_fbd_flags == fbd_flags);

View file

@ -181,7 +181,7 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
fbd_info.layer = layer_id;
fbd_info.frame_shaders = fs;
fbd_info.frame_shaders.dcd_pointer += layer_id * 3 * pan_size(DRAW);
tagged_fbd_ptr |= GENX(pan_emit_fb_desc)(&fbd_info, fbd.cpu);
tagged_fbd_ptr |= GENX(pan_emit_fb_desc)(&fbd_info, fbd);
result = panvk_cmd_prepare_fragment_job(cmdbuf, tagged_fbd_ptr);
if (result != VK_SUCCESS)