mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
pan/desc: Implement pan_emit_fbd for RUN_FRAGMENT2
Reuses the same structure that is used by pan_emit_fb_desc. Also, modify pan_emit_fbd's signature to take a pan_ptr to the framebuffer memory instead of the CPU-mapped pointer. Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
This commit is contained in:
parent
1527d88bc1
commit
589dedf2f2
4 changed files with 158 additions and 7 deletions
|
|
@ -758,7 +758,7 @@ GENX(csf_preload_fb)(struct panfrost_batch *batch, struct pan_fb_info *fb)
|
|||
(_ctx)->fbds[PAN_INCREMENTAL_RENDERING_##_pass##_PASS]
|
||||
#define EMIT_FBD(_ctx, _pass, _fb, _tls, _tiler_ctx) \
|
||||
GET_FBD(_ctx, _pass).gpu |= \
|
||||
GENX(pan_emit_fbd)(_fb, 0, _tls, _tiler_ctx, GET_FBD(_ctx, _pass).cpu)
|
||||
GENX(pan_emit_fbd)(_fb, 0, _tls, _tiler_ctx, GET_FBD(_ctx, _pass))
|
||||
|
||||
void
|
||||
GENX(csf_emit_fbds)(struct panfrost_batch *batch, struct pan_fb_info *fb,
|
||||
|
|
@ -771,7 +771,7 @@ GENX(csf_emit_fbds)(struct panfrost_batch *batch, struct pan_fb_info *fb,
|
|||
/* Default framebuffer descriptor */
|
||||
|
||||
batch->framebuffer.gpu |=
|
||||
GENX(pan_emit_fbd)(fb, 0, tls, &batch->tiler_ctx, batch->framebuffer.cpu);
|
||||
GENX(pan_emit_fbd)(fb, 0, tls, &batch->tiler_ctx, batch->framebuffer);
|
||||
|
||||
if (batch->draw_count == 0)
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -257,8 +257,8 @@ GENX(jm_emit_fbds)(struct panfrost_batch *batch, struct pan_fb_info *fb,
|
|||
{
|
||||
PAN_TRACE_FUNC(PAN_TRACE_GL_JM);
|
||||
|
||||
batch->framebuffer.gpu |= GENX(pan_emit_fbd)(
|
||||
fb, 0, tls, &batch->tiler_ctx, batch->framebuffer.cpu);
|
||||
batch->framebuffer.gpu |=
|
||||
GENX(pan_emit_fbd)(fb, 0, tls, &batch->tiler_ctx, batch->framebuffer);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Collabora, Ltd.
|
||||
* Copyright (C) 2026 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
|
|
@ -11,6 +12,7 @@
|
|||
#include "pan_afrc.h"
|
||||
#include "pan_desc.h"
|
||||
#include "pan_encoder.h"
|
||||
#include "pan_fb.h"
|
||||
#include "pan_props.h"
|
||||
#include "pan_texture.h"
|
||||
#include "pan_trace.h"
|
||||
|
|
@ -1172,11 +1174,156 @@ check_fb_attachments(const struct pan_fb_info *fb)
|
|||
#endif
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
unsigned
|
||||
GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
|
||||
const struct pan_tls_info *tls,
|
||||
const struct pan_tiler_context *tiler_ctx, void *out)
|
||||
const struct pan_tiler_context *tiler_ctx,
|
||||
const struct pan_ptr framebuffer)
|
||||
{
|
||||
void *out = framebuffer.cpu;
|
||||
|
||||
PAN_TRACE_FUNC(PAN_TRACE_LIB_DESC);
|
||||
|
||||
check_fb_attachments(fb);
|
||||
|
||||
const int crc_rt = GENX(pan_select_crc_rt)(fb, fb->tile_size);
|
||||
const bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0);
|
||||
const struct pan_clean_tile clean_tile = pan_get_clean_tile_info(fb);
|
||||
|
||||
/* Emit to memory the state that might change per-layer. The static
|
||||
* state is emitted directly to CSF registers by
|
||||
* cs_emit_static_fragment_state().
|
||||
*/
|
||||
|
||||
struct pan_fbd_layer fbd_data = {0};
|
||||
fbd_data.tiler = tiler_ctx->valhall.desc;
|
||||
|
||||
/* internal_layer_index in flags0 is used to select the right
|
||||
* primitive list in the tiler context, and frame_arg is the value
|
||||
* that's passed to the fragment shader through r62-r63, which we use
|
||||
* to pass gl_Layer. Since the layer_idx only takes 8-bits, we might
|
||||
* use the extra 56-bits we have in frame_argument to pass other
|
||||
* information to the fragment shader at some point.
|
||||
*/
|
||||
assert(layer_idx >= tiler_ctx->valhall.layer_offset);
|
||||
fbd_data.frame_argument = layer_idx;
|
||||
|
||||
pan_pack(&fbd_data.flags0, FRAGMENT_FLAGS_0, cfg) {
|
||||
cfg.pre_frame_0 =
|
||||
pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0],
|
||||
pan_clean_tile_write_any_set(clean_tile));
|
||||
cfg.pre_frame_1 =
|
||||
pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1],
|
||||
pan_clean_tile_write_any_set(clean_tile));
|
||||
cfg.post_frame = fb->bifrost.pre_post.modes[2];
|
||||
|
||||
const unsigned zs_bytes_per_pixel = pan_zsbuf_bytes_per_pixel(fb);
|
||||
/* We can interleave HSR if we have space for two ZS tiles in
|
||||
* the tile buffer. */
|
||||
const unsigned max_zs_tile_size_interleave =
|
||||
fb->z_tile_buf_budget >> util_logbase2_ceil(zs_bytes_per_pixel);
|
||||
const bool hsr_can_interleave =
|
||||
fb->tile_size <= max_zs_tile_size_interleave;
|
||||
|
||||
/* Enabling prepass without interleave is generally not good for
|
||||
* performance, so disable HSR in that case. */
|
||||
cfg.hsr_prepass_enable = fb->allow_hsr_prepass && hsr_can_interleave;
|
||||
cfg.hsr_prepass_interleaving_enable = hsr_can_interleave;
|
||||
cfg.hsr_prepass_filter_enable = true;
|
||||
cfg.hsr_hierarchical_optimizations_enable = true;
|
||||
|
||||
cfg.internal_layer_index = layer_idx - tiler_ctx->valhall.layer_offset;
|
||||
}
|
||||
|
||||
fbd_data.dcd_pointer = fb->bifrost.pre_post.dcds.gpu;
|
||||
|
||||
pan_pack(&fbd_data.flags2, FRAGMENT_FLAGS_2, cfg) {
|
||||
cfg.s_clear = fb->zs.clear_value.stencil;
|
||||
cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
|
||||
|
||||
/* Default to 24 bit depth if there's no surface. */
|
||||
cfg.z_internal_format =
|
||||
fb->zs.view.zs ? pan_get_z_internal_format(fb->zs.view.zs->format)
|
||||
: MALI_Z_INTERNAL_FORMAT_D24;
|
||||
cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z);
|
||||
|
||||
if (crc_rt >= 0) {
|
||||
bool *valid = fb->rts[crc_rt].crc_valid;
|
||||
bool full = !fb->draw_extent.minx && !fb->draw_extent.miny &&
|
||||
fb->draw_extent.maxx == (fb->width - 1) &&
|
||||
fb->draw_extent.maxy == (fb->height - 1);
|
||||
|
||||
/* If the CRC was valid it stays valid, if it wasn't, we must
|
||||
* ensure the render operation covers the full frame, and
|
||||
* clean tiles are pushed to memory. */
|
||||
bool new_valid = *valid | (full && pan_clean_tile_write_rt_enabled(
|
||||
clean_tile, crc_rt));
|
||||
|
||||
cfg.crc_read_enable = *valid;
|
||||
|
||||
/* If the data is currently invalid, still write CRC
|
||||
* data if we are doing a full write, so that it is
|
||||
* valid for next time. */
|
||||
cfg.crc_write_enable = new_valid;
|
||||
|
||||
*valid = new_valid;
|
||||
}
|
||||
}
|
||||
|
||||
fbd_data.z_clear = util_bitpack_float(fb->zs.clear_value.depth);
|
||||
|
||||
{
|
||||
/* Set the DBD and RTD pointers. Both must be 64-bytes aligned. */
|
||||
uint64_t out_gpu_addr =
|
||||
framebuffer.gpu + ALIGN_POT(sizeof(struct pan_fbd_layer), 64);
|
||||
|
||||
if (has_zs_crc_ext) {
|
||||
fbd_data.dbd_pointer = out_gpu_addr;
|
||||
assert(fbd_data.dbd_pointer % 64 == 0);
|
||||
out_gpu_addr += pan_size(ZS_CRC_EXTENSION);
|
||||
}
|
||||
|
||||
fbd_data.rtd_pointer = out_gpu_addr;
|
||||
assert(fbd_data.rtd_pointer % 64 == 0);
|
||||
}
|
||||
|
||||
memcpy(out, &fbd_data, sizeof(fbd_data));
|
||||
out += ALIGN_POT(sizeof(fbd_data), 64);
|
||||
|
||||
if (has_zs_crc_ext) {
|
||||
struct mali_zs_crc_extension_packed *zs_crc_ext = out;
|
||||
pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, zs_crc_ext, clean_tile);
|
||||
out += pan_size(ZS_CRC_EXTENSION);
|
||||
}
|
||||
|
||||
const unsigned rt_count = MAX2(fb->rt_count, 1);
|
||||
unsigned cbuf_offset = 0;
|
||||
for (unsigned i = 0; i < rt_count; i++) {
|
||||
pan_emit_rt(fb, layer_idx, i, cbuf_offset, out, clean_tile);
|
||||
out += pan_size(RENDER_TARGET);
|
||||
if (!fb->rts[i].view)
|
||||
continue;
|
||||
|
||||
cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
|
||||
fb->tile_size *
|
||||
pan_image_view_get_nr_samples(fb->rts[i].view);
|
||||
|
||||
if (i != crc_rt && fb->rts[i].crc_valid != NULL)
|
||||
*(fb->rts[i].crc_valid) = false;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
unsigned
|
||||
GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
|
||||
const struct pan_tls_info *tls,
|
||||
const struct pan_tiler_context *tiler_ctx,
|
||||
const struct pan_ptr framebuffer)
|
||||
{
|
||||
void *out = framebuffer.cpu;
|
||||
|
||||
PAN_TRACE_FUNC(PAN_TRACE_LIB_DESC);
|
||||
|
||||
check_fb_attachments(fb);
|
||||
|
|
@ -1351,6 +1498,7 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
|
|||
}
|
||||
return tag.opaque[0];
|
||||
}
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
#else /* PAN_ARCH == 4 */
|
||||
static enum mali_color_format
|
||||
pan_sfbd_raw_format(unsigned bits)
|
||||
|
|
@ -1378,8 +1526,11 @@ GENX(pan_select_tile_size)(struct pan_fb_info *fb)
|
|||
unsigned
|
||||
GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
|
||||
const struct pan_tls_info *tls,
|
||||
const struct pan_tiler_context *tiler_ctx, void *fbd)
|
||||
const struct pan_tiler_context *tiler_ctx,
|
||||
const struct pan_ptr framebuffer)
|
||||
{
|
||||
void *fbd = framebuffer.cpu;
|
||||
|
||||
PAN_TRACE_FUNC(PAN_TRACE_LIB_DESC);
|
||||
|
||||
assert(fb->rt_count <= 1);
|
||||
|
|
|
|||
|
|
@ -341,7 +341,7 @@ void GENX(pan_emit_afrc_color_attachment)(const struct pan_attachment_info *att,
|
|||
unsigned GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
|
||||
const struct pan_tls_info *tls,
|
||||
const struct pan_tiler_context *tiler_ctx,
|
||||
void *out);
|
||||
const struct pan_ptr framebuffer);
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
unsigned GENX(pan_select_tiler_hierarchy_mask)(uint32_t width, uint32_t height,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue