diff --git a/src/gallium/drivers/panfrost/pan_csf.c b/src/gallium/drivers/panfrost/pan_csf.c index 2246804b85c..357d4cfeff0 100644 --- a/src/gallium/drivers/panfrost/pan_csf.c +++ b/src/gallium/drivers/panfrost/pan_csf.c @@ -758,7 +758,7 @@ GENX(csf_preload_fb)(struct panfrost_batch *batch, struct pan_fb_info *fb) (_ctx)->fbds[PAN_INCREMENTAL_RENDERING_##_pass##_PASS] #define EMIT_FBD(_ctx, _pass, _fb, _tls, _tiler_ctx) \ GET_FBD(_ctx, _pass).gpu |= \ - GENX(pan_emit_fbd)(_fb, 0, _tls, _tiler_ctx, GET_FBD(_ctx, _pass).cpu) + GENX(pan_emit_fbd)(_fb, 0, _tls, _tiler_ctx, GET_FBD(_ctx, _pass)) void GENX(csf_emit_fbds)(struct panfrost_batch *batch, struct pan_fb_info *fb, @@ -771,7 +771,7 @@ GENX(csf_emit_fbds)(struct panfrost_batch *batch, struct pan_fb_info *fb, /* Default framebuffer descriptor */ batch->framebuffer.gpu |= - GENX(pan_emit_fbd)(fb, 0, tls, &batch->tiler_ctx, batch->framebuffer.cpu); + GENX(pan_emit_fbd)(fb, 0, tls, &batch->tiler_ctx, batch->framebuffer); if (batch->draw_count == 0) return; diff --git a/src/gallium/drivers/panfrost/pan_jm.c b/src/gallium/drivers/panfrost/pan_jm.c index 845c238853e..818846927fd 100644 --- a/src/gallium/drivers/panfrost/pan_jm.c +++ b/src/gallium/drivers/panfrost/pan_jm.c @@ -257,8 +257,8 @@ GENX(jm_emit_fbds)(struct panfrost_batch *batch, struct pan_fb_info *fb, { PAN_TRACE_FUNC(PAN_TRACE_GL_JM); - batch->framebuffer.gpu |= GENX(pan_emit_fbd)( - fb, 0, tls, &batch->tiler_ctx, batch->framebuffer.cpu); + batch->framebuffer.gpu |= + GENX(pan_emit_fbd)(fb, 0, tls, &batch->tiler_ctx, batch->framebuffer); } void diff --git a/src/panfrost/lib/pan_desc.c b/src/panfrost/lib/pan_desc.c index 3df01de0090..cf9f08aae5b 100644 --- a/src/panfrost/lib/pan_desc.c +++ b/src/panfrost/lib/pan_desc.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2021 Collabora, Ltd. + * Copyright (C) 2026 Arm Ltd. * SPDX-License-Identifier: MIT */ @@ -11,6 +12,7 @@ #include "pan_afrc.h" #include "pan_desc.h" #include "pan_encoder.h" +#include "pan_fb.h" #include "pan_props.h" #include "pan_texture.h" #include "pan_trace.h" @@ -1172,11 +1174,156 @@ check_fb_attachments(const struct pan_fb_info *fb) #endif } +#if PAN_ARCH >= 14 unsigned GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx, const struct pan_tls_info *tls, - const struct pan_tiler_context *tiler_ctx, void *out) + const struct pan_tiler_context *tiler_ctx, + const struct pan_ptr framebuffer) { + void *out = framebuffer.cpu; + + PAN_TRACE_FUNC(PAN_TRACE_LIB_DESC); + + check_fb_attachments(fb); + + const int crc_rt = GENX(pan_select_crc_rt)(fb, fb->tile_size); + const bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0); + const struct pan_clean_tile clean_tile = pan_get_clean_tile_info(fb); + + /* Emit to memory the state that might change per-layer. The static + * state is emitted directly to CSF registers by + * cs_emit_static_fragment_state(). + */ + + struct pan_fbd_layer fbd_data = {0}; + fbd_data.tiler = tiler_ctx->valhall.desc; + + /* internal_layer_index in flags0 is used to select the right + * primitive list in the tiler context, and frame_arg is the value + * that's passed to the fragment shader through r62-r63, which we use + * to pass gl_Layer. Since the layer_idx only takes 8-bits, we might + * use the extra 56-bits we have in frame_argument to pass other + * information to the fragment shader at some point. + */ + assert(layer_idx >= tiler_ctx->valhall.layer_offset); + fbd_data.frame_argument = layer_idx; + + pan_pack(&fbd_data.flags0, FRAGMENT_FLAGS_0, cfg) { + cfg.pre_frame_0 = + pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0], + pan_clean_tile_write_any_set(clean_tile)); + cfg.pre_frame_1 = + pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1], + pan_clean_tile_write_any_set(clean_tile)); + cfg.post_frame = fb->bifrost.pre_post.modes[2]; + + const unsigned zs_bytes_per_pixel = pan_zsbuf_bytes_per_pixel(fb); + /* We can interleave HSR if we have space for two ZS tiles in + * the tile buffer. */ + const unsigned max_zs_tile_size_interleave = + fb->z_tile_buf_budget >> util_logbase2_ceil(zs_bytes_per_pixel); + const bool hsr_can_interleave = + fb->tile_size <= max_zs_tile_size_interleave; + + /* Enabling prepass without interleave is generally not good for + * performance, so disable HSR in that case. */ + cfg.hsr_prepass_enable = fb->allow_hsr_prepass && hsr_can_interleave; + cfg.hsr_prepass_interleaving_enable = hsr_can_interleave; + cfg.hsr_prepass_filter_enable = true; + cfg.hsr_hierarchical_optimizations_enable = true; + + cfg.internal_layer_index = layer_idx - tiler_ctx->valhall.layer_offset; + } + + fbd_data.dcd_pointer = fb->bifrost.pre_post.dcds.gpu; + + pan_pack(&fbd_data.flags2, FRAGMENT_FLAGS_2, cfg) { + cfg.s_clear = fb->zs.clear_value.stencil; + cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s); + + /* Default to 24 bit depth if there's no surface. */ + cfg.z_internal_format = + fb->zs.view.zs ? pan_get_z_internal_format(fb->zs.view.zs->format) + : MALI_Z_INTERNAL_FORMAT_D24; + cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z); + + if (crc_rt >= 0) { + bool *valid = fb->rts[crc_rt].crc_valid; + bool full = !fb->draw_extent.minx && !fb->draw_extent.miny && + fb->draw_extent.maxx == (fb->width - 1) && + fb->draw_extent.maxy == (fb->height - 1); + + /* If the CRC was valid it stays valid, if it wasn't, we must + * ensure the render operation covers the full frame, and + * clean tiles are pushed to memory. */ + bool new_valid = *valid | (full && pan_clean_tile_write_rt_enabled( + clean_tile, crc_rt)); + + cfg.crc_read_enable = *valid; + + /* If the data is currently invalid, still write CRC + * data if we are doing a full write, so that it is + * valid for next time. */ + cfg.crc_write_enable = new_valid; + + *valid = new_valid; + } + } + + fbd_data.z_clear = util_bitpack_float(fb->zs.clear_value.depth); + + { + /* Set the DBD and RTD pointers. Both must be 64-bytes aligned. */ + uint64_t out_gpu_addr = + framebuffer.gpu + ALIGN_POT(sizeof(struct pan_fbd_layer), 64); + + if (has_zs_crc_ext) { + fbd_data.dbd_pointer = out_gpu_addr; + assert(fbd_data.dbd_pointer % 64 == 0); + out_gpu_addr += pan_size(ZS_CRC_EXTENSION); + } + + fbd_data.rtd_pointer = out_gpu_addr; + assert(fbd_data.rtd_pointer % 64 == 0); + } + + memcpy(out, &fbd_data, sizeof(fbd_data)); + out += ALIGN_POT(sizeof(fbd_data), 64); + + if (has_zs_crc_ext) { + struct mali_zs_crc_extension_packed *zs_crc_ext = out; + pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, zs_crc_ext, clean_tile); + out += pan_size(ZS_CRC_EXTENSION); + } + + const unsigned rt_count = MAX2(fb->rt_count, 1); + unsigned cbuf_offset = 0; + for (unsigned i = 0; i < rt_count; i++) { + pan_emit_rt(fb, layer_idx, i, cbuf_offset, out, clean_tile); + out += pan_size(RENDER_TARGET); + if (!fb->rts[i].view) + continue; + + cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) * + fb->tile_size * + pan_image_view_get_nr_samples(fb->rts[i].view); + + if (i != crc_rt && fb->rts[i].crc_valid != NULL) + *(fb->rts[i].crc_valid) = false; + } + + return 0; +} +#else +unsigned +GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx, + const struct pan_tls_info *tls, + const struct pan_tiler_context *tiler_ctx, + const struct pan_ptr framebuffer) +{ + void *out = framebuffer.cpu; + PAN_TRACE_FUNC(PAN_TRACE_LIB_DESC); check_fb_attachments(fb); @@ -1351,6 +1498,7 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx, } return tag.opaque[0]; } +#endif /* PAN_ARCH >= 14 */ #else /* PAN_ARCH == 4 */ static enum mali_color_format pan_sfbd_raw_format(unsigned bits) @@ -1378,8 +1526,11 @@ GENX(pan_select_tile_size)(struct pan_fb_info *fb) unsigned GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx, const struct pan_tls_info *tls, - const struct pan_tiler_context *tiler_ctx, void *fbd) + const struct pan_tiler_context *tiler_ctx, + const struct pan_ptr framebuffer) { + void *fbd = framebuffer.cpu; + PAN_TRACE_FUNC(PAN_TRACE_LIB_DESC); assert(fb->rt_count <= 1); diff --git a/src/panfrost/lib/pan_desc.h b/src/panfrost/lib/pan_desc.h index db5b6588ad3..7cc7639c897 100644 --- a/src/panfrost/lib/pan_desc.h +++ b/src/panfrost/lib/pan_desc.h @@ -341,7 +341,7 @@ void GENX(pan_emit_afrc_color_attachment)(const struct pan_attachment_info *att, unsigned GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx, const struct pan_tls_info *tls, const struct pan_tiler_context *tiler_ctx, - void *out); + const struct pan_ptr framebuffer); #if PAN_ARCH >= 6 unsigned GENX(pan_select_tiler_hierarchy_mask)(uint32_t width, uint32_t height,