pan/desc: Implement pan_emit_fbd for RUN_FRAGMENT2

Reuses the same structure that is used by pan_emit_fb_desc.

Also, modify pan_emit_fbd's signature to take a pan_ptr to the
framebuffer memory instead of the CPU-mapped pointer.

Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
This commit is contained in:
Marc Alcala Prieto 2026-04-17 15:28:51 +02:00
parent 2b3f7b2361
commit e16f6f5c4c
4 changed files with 158 additions and 7 deletions

View file

@ -758,7 +758,7 @@ GENX(csf_preload_fb)(struct panfrost_batch *batch, struct pan_fb_info *fb)
(_ctx)->fbds[PAN_INCREMENTAL_RENDERING_##_pass##_PASS]
#define EMIT_FBD(_ctx, _pass, _fb, _tls, _tiler_ctx) \
GET_FBD(_ctx, _pass).gpu |= \
GENX(pan_emit_fbd)(_fb, 0, _tls, _tiler_ctx, GET_FBD(_ctx, _pass).cpu)
GENX(pan_emit_fbd)(_fb, 0, _tls, _tiler_ctx, GET_FBD(_ctx, _pass))
void
GENX(csf_emit_fbds)(struct panfrost_batch *batch, struct pan_fb_info *fb,
@ -771,7 +771,7 @@ GENX(csf_emit_fbds)(struct panfrost_batch *batch, struct pan_fb_info *fb,
/* Default framebuffer descriptor */
batch->framebuffer.gpu |=
GENX(pan_emit_fbd)(fb, 0, tls, &batch->tiler_ctx, batch->framebuffer.cpu);
GENX(pan_emit_fbd)(fb, 0, tls, &batch->tiler_ctx, batch->framebuffer);
if (batch->draw_count == 0)
return;

View file

@ -257,8 +257,8 @@ GENX(jm_emit_fbds)(struct panfrost_batch *batch, struct pan_fb_info *fb,
{
PAN_TRACE_FUNC(PAN_TRACE_GL_JM);
batch->framebuffer.gpu |= GENX(pan_emit_fbd)(
fb, 0, tls, &batch->tiler_ctx, batch->framebuffer.cpu);
batch->framebuffer.gpu |=
GENX(pan_emit_fbd)(fb, 0, tls, &batch->tiler_ctx, batch->framebuffer);
}
void

View file

@ -1,5 +1,6 @@
/*
* Copyright (C) 2021 Collabora, Ltd.
* Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
@ -11,6 +12,7 @@
#include "pan_afrc.h"
#include "pan_desc.h"
#include "pan_encoder.h"
#include "pan_fb.h"
#include "pan_props.h"
#include "pan_texture.h"
#include "pan_trace.h"
@ -1172,11 +1174,156 @@ check_fb_attachments(const struct pan_fb_info *fb)
#endif
}
#if PAN_ARCH >= 14
unsigned
GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
const struct pan_tls_info *tls,
const struct pan_tiler_context *tiler_ctx, void *out)
const struct pan_tiler_context *tiler_ctx,
const struct pan_ptr framebuffer)
{
void *out = framebuffer.cpu;
PAN_TRACE_FUNC(PAN_TRACE_LIB_DESC);
check_fb_attachments(fb);
const int crc_rt = GENX(pan_select_crc_rt)(fb, fb->tile_size);
const bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0);
const struct pan_clean_tile clean_tile = pan_get_clean_tile_info(fb);
/* Emit to memory the state that might change per-layer. The static
* state is emitted directly to CSF registers by
* cs_emit_static_fragment_state().
*/
struct pan_fbd_layer fbd_data = {0};
fbd_data.tiler = tiler_ctx->valhall.desc;
/* internal_layer_index in flags0 is used to select the right
* primitive list in the tiler context, and frame_arg is the value
* that's passed to the fragment shader through r62-r63, which we use
* to pass gl_Layer. Since the layer_idx only takes 8-bits, we might
* use the extra 56-bits we have in frame_argument to pass other
* information to the fragment shader at some point.
*/
assert(layer_idx >= tiler_ctx->valhall.layer_offset);
fbd_data.frame_argument = layer_idx;
pan_pack(&fbd_data.flags0, FRAGMENT_FLAGS_0, cfg) {
cfg.pre_frame_0 =
pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0],
pan_clean_tile_write_any_set(clean_tile));
cfg.pre_frame_1 =
pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1],
pan_clean_tile_write_any_set(clean_tile));
cfg.post_frame = fb->bifrost.pre_post.modes[2];
const unsigned zs_bytes_per_pixel = pan_zsbuf_bytes_per_pixel(fb);
/* We can interleave HSR if we have space for two ZS tiles in
* the tile buffer. */
const unsigned max_zs_tile_size_interleave =
fb->z_tile_buf_budget >> util_logbase2_ceil(zs_bytes_per_pixel);
const bool hsr_can_interleave =
fb->tile_size <= max_zs_tile_size_interleave;
/* Enabling prepass without interleave is generally not good for
* performance, so disable HSR in that case. */
cfg.hsr_prepass_enable = fb->allow_hsr_prepass && hsr_can_interleave;
cfg.hsr_prepass_interleaving_enable = hsr_can_interleave;
cfg.hsr_prepass_filter_enable = true;
cfg.hsr_hierarchical_optimizations_enable = true;
cfg.internal_layer_index = layer_idx - tiler_ctx->valhall.layer_offset;
}
fbd_data.dcd_pointer = fb->bifrost.pre_post.dcds.gpu;
pan_pack(&fbd_data.flags2, FRAGMENT_FLAGS_2, cfg) {
cfg.s_clear = fb->zs.clear_value.stencil;
cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
/* Default to 24 bit depth if there's no surface. */
cfg.z_internal_format =
fb->zs.view.zs ? pan_get_z_internal_format(fb->zs.view.zs->format)
: MALI_Z_INTERNAL_FORMAT_D24;
cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z);
if (crc_rt >= 0) {
bool *valid = fb->rts[crc_rt].crc_valid;
bool full = !fb->draw_extent.minx && !fb->draw_extent.miny &&
fb->draw_extent.maxx == (fb->width - 1) &&
fb->draw_extent.maxy == (fb->height - 1);
/* If the CRC was valid it stays valid, if it wasn't, we must
* ensure the render operation covers the full frame, and
* clean tiles are pushed to memory. */
bool new_valid = *valid | (full && pan_clean_tile_write_rt_enabled(
clean_tile, crc_rt));
cfg.crc_read_enable = *valid;
/* If the data is currently invalid, still write CRC
* data if we are doing a full write, so that it is
* valid for next time. */
cfg.crc_write_enable = new_valid;
*valid = new_valid;
}
}
fbd_data.z_clear = util_bitpack_float(fb->zs.clear_value.depth);
{
/* Set the DBD and RTD pointers. Both must be 64-bytes aligned. */
uint64_t out_gpu_addr =
framebuffer.gpu + ALIGN_POT(sizeof(struct pan_fbd_layer), 64);
if (has_zs_crc_ext) {
fbd_data.dbd_pointer = out_gpu_addr;
assert(fbd_data.dbd_pointer % 64 == 0);
out_gpu_addr += pan_size(ZS_CRC_EXTENSION);
}
fbd_data.rtd_pointer = out_gpu_addr;
assert(fbd_data.rtd_pointer % 64 == 0);
}
memcpy(out, &fbd_data, sizeof(fbd_data));
out += ALIGN_POT(sizeof(fbd_data), 64);
if (has_zs_crc_ext) {
struct mali_zs_crc_extension_packed *zs_crc_ext = out;
pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, zs_crc_ext, clean_tile);
out += pan_size(ZS_CRC_EXTENSION);
}
const unsigned rt_count = MAX2(fb->rt_count, 1);
unsigned cbuf_offset = 0;
for (unsigned i = 0; i < rt_count; i++) {
pan_emit_rt(fb, layer_idx, i, cbuf_offset, out, clean_tile);
out += pan_size(RENDER_TARGET);
if (!fb->rts[i].view)
continue;
cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
fb->tile_size *
pan_image_view_get_nr_samples(fb->rts[i].view);
if (i != crc_rt && fb->rts[i].crc_valid != NULL)
*(fb->rts[i].crc_valid) = false;
}
return 0;
}
#else
unsigned
GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
const struct pan_tls_info *tls,
const struct pan_tiler_context *tiler_ctx,
const struct pan_ptr framebuffer)
{
void *out = framebuffer.cpu;
PAN_TRACE_FUNC(PAN_TRACE_LIB_DESC);
check_fb_attachments(fb);
@ -1351,6 +1498,7 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
}
return tag.opaque[0];
}
#endif /* PAN_ARCH >= 14 */
#else /* PAN_ARCH == 4 */
static enum mali_color_format
pan_sfbd_raw_format(unsigned bits)
@ -1378,8 +1526,11 @@ GENX(pan_select_tile_size)(struct pan_fb_info *fb)
unsigned
GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
const struct pan_tls_info *tls,
const struct pan_tiler_context *tiler_ctx, void *fbd)
const struct pan_tiler_context *tiler_ctx,
const struct pan_ptr framebuffer)
{
void *fbd = framebuffer.cpu;
PAN_TRACE_FUNC(PAN_TRACE_LIB_DESC);
assert(fb->rt_count <= 1);

View file

@ -341,7 +341,7 @@ void GENX(pan_emit_afrc_color_attachment)(const struct pan_attachment_info *att,
unsigned GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
const struct pan_tls_info *tls,
const struct pan_tiler_context *tiler_ctx,
void *out);
const struct pan_ptr framebuffer);
#if PAN_ARCH >= 6
unsigned GENX(pan_select_tiler_hierarchy_mask)(uint32_t width, uint32_t height,