mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 11:18:08 +02:00
pan/fb: Implement pan_emit_fb_desc for RUN_FRAGMENT2
Add a new structure that is used to store per-layer RUN_FRAGMENT2 state. Any other state will be emitted directly to registers. Also, modify pan_emit_fb_desc's signature to take a pan_ptr to the framebuffer memory instead of the CPU-mapped pointer. Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
This commit is contained in:
parent
6c89a14e1b
commit
1527d88bc1
4 changed files with 162 additions and 9 deletions
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (C) 2026 Collabora, Ltd.
|
||||
* Copyright (C) 2026 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "pan_fb.h"
|
||||
|
|
@ -669,9 +670,124 @@ pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,
|
|||
}
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
uint32_t
|
||||
GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info, void *out)
|
||||
GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info,
|
||||
const struct pan_ptr framebuffer)
|
||||
{
|
||||
/* Emit the dynamic framebuffer state. That is, state that may change per-layer. */
|
||||
|
||||
void *out = framebuffer.cpu;
|
||||
const struct pan_fb_layout *fb = info->fb;
|
||||
const struct pan_fb_load *load = info->load;
|
||||
const struct pan_fb_store *store = info->store;
|
||||
const struct pan_fb_clean_tile ct = pan_fb_get_clean_tile(info);
|
||||
const bool has_zs_crc_ext = pan_fb_has_zs(fb);
|
||||
|
||||
struct pan_fbd_layer fbd_data = {0};
|
||||
fbd_data.tiler = info->tiler_ctx->valhall.desc;
|
||||
|
||||
/* layer_index in flags0 is used to select the right primitive list in
|
||||
* the tiler context, and frame_arg is the value that's passed to the
|
||||
* fragment shader through r62-r63, which we use to pass gl_Layer. Since
|
||||
* the layer_idx only takes 8-bits, we might use the extra 56-bits we
|
||||
* have in frame_argument to pass other information to the fragment
|
||||
* shader at some point.
|
||||
*/
|
||||
assert(info->layer >= info->tiler_ctx->valhall.layer_offset);
|
||||
fbd_data.frame_argument = info->layer;
|
||||
|
||||
pan_pack(&fbd_data.flags0, FRAGMENT_FLAGS_0, cfg) {
|
||||
cfg.pre_frame_0 = pan_fix_frame_shader_mode(info->frame_shaders.modes[0],
|
||||
ct.rts || ct.zs || ct.s);
|
||||
cfg.pre_frame_1 = pan_fix_frame_shader_mode(info->frame_shaders.modes[1],
|
||||
ct.rts || ct.zs || ct.s);
|
||||
cfg.post_frame = info->frame_shaders.modes[2];
|
||||
|
||||
/* Enabling prepass without pipelineing is generally not good for
|
||||
* performance, so disable HSR in that case.
|
||||
*/
|
||||
cfg.hsr_prepass_enable = info->allow_hsr_prepass &&
|
||||
pan_fb_can_pipeline_zs(fb);
|
||||
cfg.hsr_prepass_interleaving_enable = pan_fb_can_pipeline_zs(fb);
|
||||
cfg.hsr_prepass_filter_enable = true;
|
||||
cfg.hsr_hierarchical_optimizations_enable = true;
|
||||
|
||||
cfg.internal_layer_index =
|
||||
info->layer - info->tiler_ctx->valhall.layer_offset;
|
||||
}
|
||||
|
||||
pan_pack(&fbd_data.flags2, FRAGMENT_FLAGS_2, cfg) {
|
||||
if (fb->s_format != PIPE_FORMAT_NONE) {
|
||||
cfg.s_clear = load && target_has_clear(&load->s) ?
|
||||
load->s.clear.stencil : 0;
|
||||
cfg.s_write_enable = store && store->s.store;
|
||||
}
|
||||
|
||||
if (fb->z_format != PIPE_FORMAT_NONE) {
|
||||
cfg.z_internal_format = pan_get_z_internal_format(fb->z_format);
|
||||
cfg.z_write_enable = store && store->zs.store;
|
||||
} else {
|
||||
cfg.z_internal_format = MALI_Z_INTERNAL_FORMAT_D24;
|
||||
assert(!store || !store->zs.store);
|
||||
}
|
||||
}
|
||||
|
||||
fbd_data.z_clear =
|
||||
util_bitpack_float(fb->z_format != PIPE_FORMAT_NONE && load && load &&
|
||||
target_has_clear(&load->z)
|
||||
? load->z.clear.depth
|
||||
: 0);
|
||||
|
||||
fbd_data.dcd_pointer = info->frame_shaders.dcd_pointer;
|
||||
|
||||
{
|
||||
/* Set the DBD and RTD pointers. Both must be 64-bytes aligned. */
|
||||
uint64_t out_gpu_addr =
|
||||
framebuffer.gpu + ALIGN_POT(sizeof(struct pan_fbd_layer), 64);
|
||||
|
||||
if (has_zs_crc_ext) {
|
||||
fbd_data.dbd_pointer = out_gpu_addr;
|
||||
assert(fbd_data.dbd_pointer % 64 == 0);
|
||||
out_gpu_addr += pan_size(ZS_CRC_EXTENSION);
|
||||
}
|
||||
|
||||
fbd_data.rtd_pointer = out_gpu_addr;
|
||||
assert(fbd_data.rtd_pointer % 64 == 0);
|
||||
}
|
||||
|
||||
memcpy(out, &fbd_data, sizeof(fbd_data));
|
||||
out += ALIGN_POT(sizeof(fbd_data), 64);
|
||||
|
||||
if (has_zs_crc_ext) {
|
||||
struct mali_zs_crc_extension_packed zs_crc;
|
||||
emit_zs_crc_desc(info, ct, &zs_crc);
|
||||
memcpy(out, &zs_crc, sizeof(zs_crc));
|
||||
out += sizeof(zs_crc);
|
||||
}
|
||||
|
||||
uint32_t tile_rt_offset_B = 0;
|
||||
for (unsigned rt = 0; rt < fb->rt_count; rt++) {
|
||||
struct mali_rgb_render_target_packed rgb_rt;
|
||||
emit_rgb_rt_desc(info, ct, rt, tile_rt_offset_B, &rgb_rt);
|
||||
memcpy(out, &rgb_rt, sizeof(rgb_rt));
|
||||
out += sizeof(rgb_rt);
|
||||
|
||||
if (fb->rt_formats[rt] != PIPE_FORMAT_NONE) {
|
||||
tile_rt_offset_B += pan_bytes_per_pixel_tib(fb->rt_formats[rt]) *
|
||||
fb->tile_size_px * fb->sample_count;
|
||||
}
|
||||
}
|
||||
assert(tile_rt_offset_B <= fb->tile_rt_alloc_B);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else /* PAN_ARCH < 14 */
|
||||
uint32_t
|
||||
GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info,
|
||||
const struct pan_ptr framebuffer)
|
||||
{
|
||||
void *out = framebuffer.cpu;
|
||||
const struct pan_fb_layout *fb = info->fb;
|
||||
const struct pan_fb_load *load = info->load;
|
||||
const struct pan_fb_store *store = info->store;
|
||||
|
|
@ -823,4 +939,5 @@ GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info, void *out)
|
|||
}
|
||||
return tag.opaque[0];
|
||||
}
|
||||
#endif
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
#endif /* PAN_ARCH >= 5 */
|
||||
|
|
|
|||
|
|
@ -1,14 +1,20 @@
|
|||
/*
|
||||
* Copyright (C) 2026 Collabora, Ltd.
|
||||
* Copyright (C) 2026 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef __PAN_FB_H
|
||||
#define __PAN_FB_H
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
#include "genxml/cs_builder.h"
|
||||
#endif
|
||||
|
||||
#include "compiler/shader_enums.h"
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "util/format/u_formats.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
#include "pan_pool.h"
|
||||
|
||||
struct nir_shader;
|
||||
struct nir_shader_compiler_options;
|
||||
|
|
@ -481,7 +487,7 @@ void GENX(pan_fill_fb_info)(const struct pan_fb_desc_info *info,
|
|||
struct pan_fb_info *fbinfo);
|
||||
|
||||
uint32_t GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info,
|
||||
void *out);
|
||||
const struct pan_ptr framebuffer);
|
||||
#endif
|
||||
|
||||
enum ENUM_PACKED pan_fb_shader_op {
|
||||
|
|
@ -620,4 +626,35 @@ GENX(pan_get_fb_shader)(const struct pan_fb_shader_key *key,
|
|||
const struct nir_shader_compiler_options *nir_options);
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
/* Framebuffer per-layer state. Keep this structure 64-byte aligned, since
|
||||
* we want the adjacent ZS_CRC_EXTENSION and RENDER_TARGET descriptors
|
||||
* aligned. */
|
||||
struct pan_fbd_layer {
|
||||
/** GPU address to the tiler descriptor. */
|
||||
uint64_t tiler;
|
||||
|
||||
/** Frame argument. */
|
||||
uint64_t frame_argument;
|
||||
|
||||
/** An instance of Fragment Flags 0. */
|
||||
struct mali_fragment_flags_0_packed flags0;
|
||||
|
||||
/** An instance of Fragment Flags 2. */
|
||||
struct mali_fragment_flags_2_packed flags2;
|
||||
|
||||
/** Z clear value. */
|
||||
uint32_t z_clear;
|
||||
|
||||
/** GPU address to the draw call descriptors. It may be 0. */
|
||||
uint64_t dcd_pointer;
|
||||
|
||||
/** GPU address to the ZS_CRC_EXTENSION descriptor. It may be 0. */
|
||||
uint64_t dbd_pointer;
|
||||
|
||||
/** GPU address to the RENDER_TARGET descriptors. */
|
||||
uint64_t rtd_pointer;
|
||||
} __attribute__((aligned(64)));
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
|
||||
#endif /* __PAN_FB_H */
|
||||
|
|
|
|||
|
|
@ -1316,7 +1316,7 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
tiler_ctx = get_tiler_context(cmdbuf, layer_idx);
|
||||
|
||||
uint32_t new_fbd_flags =
|
||||
GENX(pan_emit_fb_desc)(&fbd_info, fbds.cpu + fbd_sz * i);
|
||||
GENX(pan_emit_fb_desc)(&fbd_info, pan_ptr_offset(fbds, fbd_sz * i));
|
||||
|
||||
/* Make sure all FBDs have the same flags. */
|
||||
assert(i == 0 || new_fbd_flags == fbd_flags);
|
||||
|
|
@ -1335,7 +1335,6 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
for (uint32_t i = 0; i < enabled_layer_count; i++) {
|
||||
uint32_t layer_idx = multiview ? u_bit_scan(&ir_view_mask_temp) : i;
|
||||
void *ir_fbd = (void *)((uint8_t *)ir_fbds.cpu + (i * fbd_sz));
|
||||
|
||||
fbd_info.layer = layer_idx;
|
||||
tiler_ctx = get_tiler_context(cmdbuf, layer_idx);
|
||||
|
|
@ -1353,8 +1352,8 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
ASSERTED uint32_t new_fbd_flags =
|
||||
GENX(pan_emit_fb_desc)(&fbd_info, ir_fbd);
|
||||
ASSERTED uint32_t new_fbd_flags = GENX(pan_emit_fb_desc)(
|
||||
&fbd_info, pan_ptr_offset(ir_fbds, fbd_sz * i));
|
||||
|
||||
/* Make sure all FBDs have the same flags. */
|
||||
assert(new_fbd_flags == fbd_flags);
|
||||
|
|
|
|||
|
|
@ -181,7 +181,7 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
|
|||
fbd_info.layer = layer_id;
|
||||
fbd_info.frame_shaders = fs;
|
||||
fbd_info.frame_shaders.dcd_pointer += layer_id * 3 * pan_size(DRAW);
|
||||
tagged_fbd_ptr |= GENX(pan_emit_fb_desc)(&fbd_info, fbd.cpu);
|
||||
tagged_fbd_ptr |= GENX(pan_emit_fb_desc)(&fbd_info, fbd);
|
||||
|
||||
result = panvk_cmd_prepare_fragment_job(cmdbuf, tagged_fbd_ptr);
|
||||
if (result != VK_SUCCESS)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue