mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-16 22:48:05 +02:00
panvk: Implement RUN_FRAGMENT2
Added structure panvk_fb_layer_state and related logic to store and emit per-layer fragment state. Also, move some temporary registers to non-conflicting ones. Incremental rendering is left as TODO for later. Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41081>
This commit is contained in:
parent
1f2edeb54a
commit
a7ae37656d
7 changed files with 381 additions and 80 deletions
|
|
@ -1482,6 +1482,7 @@
|
|||
<value name="FRAME_SHADER_DCD_POINTER" value="46"/>
|
||||
<value name="VRS_IMAGE" value="48"/>
|
||||
<!-- 50-51, 53 zero -->
|
||||
<value name="FBD_POINTER" value="54"/> <!-- SW ABI -->
|
||||
</enum>
|
||||
|
||||
<struct name="Descriptor Header" size="1" align="32">
|
||||
|
|
|
|||
|
|
@ -76,20 +76,6 @@ GENX(pan_select_fb_tile_size)(struct pan_fb_layout *fb)
|
|||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if there's enough space in the tile buffer for at least two
|
||||
* Z/S tiles.
|
||||
*/
|
||||
static inline bool
|
||||
pan_fb_can_pipeline_zs(const struct pan_fb_layout *fb)
|
||||
{
|
||||
const uint32_t z_B_per_px = sizeof(float) * fb->sample_count;
|
||||
const uint32_t z_B_per_tile = z_B_per_px * fb->tile_size_px;
|
||||
|
||||
/* The budget is already half the available Z space */
|
||||
return z_B_per_tile < fb->tile_z_budget_B;
|
||||
}
|
||||
|
||||
static void
|
||||
align_fb_tiling_area_for_image_plane(struct pan_fb_layout *fb,
|
||||
struct pan_image_plane_ref pref)
|
||||
|
|
@ -377,13 +363,6 @@ GENX(pan_fill_fb_info)(const struct pan_fb_desc_info *info,
|
|||
}
|
||||
|
||||
#if PAN_ARCH >= 5
|
||||
static bool
|
||||
target_has_clear(const struct pan_fb_load_target *target)
|
||||
{
|
||||
return target->in_bounds_load == PAN_FB_LOAD_CLEAR ||
|
||||
target->border_load == PAN_FB_LOAD_CLEAR;
|
||||
}
|
||||
|
||||
static enum mali_msaa
|
||||
translate_msaa_copy_op(const struct pan_fb_layout *fb,
|
||||
const struct pan_image_view *iview,
|
||||
|
|
@ -414,11 +393,6 @@ translate_msaa_copy_op(const struct pan_fb_layout *fb,
|
|||
}
|
||||
}
|
||||
|
||||
struct pan_fb_clean_tile {
|
||||
uint8_t rts;
|
||||
bool zs, s;
|
||||
};
|
||||
|
||||
static bool
|
||||
pan_fb_load_target_always(const struct pan_fb_load_target *target)
|
||||
{
|
||||
|
|
@ -435,8 +409,8 @@ pan_fb_store_target_always(const struct pan_fb_store_target *target)
|
|||
return target->store && target->always;
|
||||
}
|
||||
|
||||
static struct pan_fb_clean_tile
|
||||
pan_fb_get_clean_tile(const struct pan_fb_desc_info *info)
|
||||
struct pan_fb_clean_tile
|
||||
GENX(pan_fb_get_clean_tile)(const struct pan_fb_desc_info *info)
|
||||
{
|
||||
const struct pan_fb_layout *fb = info->fb;
|
||||
const struct pan_fb_load *load = info->load;
|
||||
|
|
@ -614,7 +588,7 @@ emit_rgb_rt_desc(const struct pan_fb_desc_info *info,
|
|||
cfg.clean_pixel_write_enable = !!(ct.rts & BITFIELD_BIT(rt));
|
||||
#endif
|
||||
|
||||
if (load && target_has_clear(&load->rts[rt])) {
|
||||
if (load && pan_target_has_clear(&load->rts[rt])) {
|
||||
uint32_t packed[4] = {};
|
||||
pan_pack_color(GENX(pan_blendable_formats), packed,
|
||||
&load->rts[rt].clear.color, fb->rt_formats[rt],
|
||||
|
|
@ -649,33 +623,12 @@ emit_rgb_rt_desc(const struct pan_fb_desc_info *info,
|
|||
pan_merge(rgb_rt, &desc, RGB_RENDER_TARGET);
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
/* All GPUs starting from Bifrost are affected by issue TSIX-2033:
|
||||
*
|
||||
* Forcing clean_tile_writes breaks INTERSECT readbacks
|
||||
*
|
||||
* To workaround, use the pre-frame shader mode ALWAYS instead of INTERSECT if
|
||||
* clean_tile_write_enable is set on either one of the color, depth or stencil
|
||||
* buffers. Since INTERSECT is a hint that the hardware may ignore, this
|
||||
* cannot affect correctness, only performance. */
|
||||
|
||||
static enum mali_pre_post_frame_shader_mode
|
||||
pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,
|
||||
bool force_clean_tile)
|
||||
{
|
||||
if (force_clean_tile && mode == MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT)
|
||||
return MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS;
|
||||
else
|
||||
return mode;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
emit_rts(const struct pan_fb_desc_info *info,
|
||||
struct mali_rgb_render_target_packed *rts)
|
||||
{
|
||||
const struct pan_fb_layout *fb = info->fb;
|
||||
const struct pan_fb_clean_tile ct = pan_fb_get_clean_tile(info);
|
||||
const struct pan_fb_clean_tile ct = GENX(pan_fb_get_clean_tile)(info);
|
||||
|
||||
uint32_t tile_rt_offset_B = 0;
|
||||
for (unsigned rt = 0; rt < fb->rt_count; rt++) {
|
||||
|
|
@ -696,7 +649,7 @@ GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info,
|
|||
const struct pan_fb_descs *out)
|
||||
{
|
||||
if (pan_fb_has_zs(info->fb)) {
|
||||
emit_zs_crc_desc(info, pan_fb_get_clean_tile(info), out->zs_crc);
|
||||
emit_zs_crc_desc(info, GENX(pan_fb_get_clean_tile)(info), out->zs_crc);
|
||||
}
|
||||
|
||||
emit_rts(info, out->rts);
|
||||
|
|
@ -711,7 +664,7 @@ GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info,
|
|||
const struct pan_fb_layout *fb = info->fb;
|
||||
const struct pan_fb_load *load = info->load;
|
||||
const struct pan_fb_store *store = info->store;
|
||||
const struct pan_fb_clean_tile ct = pan_fb_get_clean_tile(info);
|
||||
const struct pan_fb_clean_tile ct = GENX(pan_fb_get_clean_tile)(info);
|
||||
|
||||
const bool has_zs_crc_ext = pan_fb_has_zs(fb);
|
||||
|
||||
|
|
@ -793,14 +746,14 @@ GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info,
|
|||
|
||||
if (fb->s_format != PIPE_FORMAT_NONE) {
|
||||
cfg.s_clear =
|
||||
load && target_has_clear(&load->s) ? load->s.clear.stencil : 0;
|
||||
load && pan_target_has_clear(&load->s) ? load->s.clear.stencil : 0;
|
||||
cfg.s_write_enable = store && store->s.store;
|
||||
}
|
||||
|
||||
if (fb->z_format != PIPE_FORMAT_NONE) {
|
||||
cfg.z_internal_format = pan_get_z_internal_format(fb->z_format);
|
||||
cfg.z_clear =
|
||||
load && target_has_clear(&load->z) ? load->z.clear.depth : 0;
|
||||
load && pan_target_has_clear(&load->z) ? load->z.clear.depth : 0;
|
||||
cfg.z_write_enable = store && store->zs.store;
|
||||
} else {
|
||||
/* Default to 24 bit depth if there's no surface. */
|
||||
|
|
|
|||
|
|
@ -618,15 +618,67 @@ bool GENX(pan_fb_load_shader_key_fill)(struct pan_fb_shader_key *key,
|
|||
const struct pan_fb_load *load,
|
||||
bool zs_prepass);
|
||||
|
||||
#if PAN_ARCH >= 5
|
||||
struct pan_fb_clean_tile {
|
||||
uint8_t rts;
|
||||
bool zs, s;
|
||||
};
|
||||
|
||||
struct pan_fb_clean_tile
|
||||
GENX(pan_fb_get_clean_tile)(const struct pan_fb_desc_info *info);
|
||||
|
||||
static inline bool
|
||||
pan_target_has_clear(const struct pan_fb_load_target *target)
|
||||
{
|
||||
return target->in_bounds_load == PAN_FB_LOAD_CLEAR ||
|
||||
target->border_load == PAN_FB_LOAD_CLEAR;
|
||||
}
|
||||
#endif /* PAN_ARCH >= 5 */
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
bool GENX(pan_fb_resolve_shader_key_fill)(struct pan_fb_shader_key *key,
|
||||
const struct pan_fb_layout *fb,
|
||||
const struct pan_fb_resolve *resolve);
|
||||
#endif
|
||||
|
||||
/* All GPUs starting from Bifrost are affected by issue TSIX-2033:
|
||||
*
|
||||
* Forcing clean_tile_writes breaks INTERSECT readbacks
|
||||
*
|
||||
* To workaround, use the pre-frame shader mode ALWAYS instead of INTERSECT if
|
||||
* clean_tile_write_enable is set on either one of the color, depth or stencil
|
||||
* buffers. Since INTERSECT is a hint that the hardware may ignore, this
|
||||
* cannot affect correctness, only performance. */
|
||||
|
||||
static inline enum mali_pre_post_frame_shader_mode
|
||||
pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,
|
||||
bool force_clean_tile)
|
||||
{
|
||||
if (force_clean_tile && mode == MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT)
|
||||
return MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS;
|
||||
else
|
||||
return mode;
|
||||
}
|
||||
#endif /* PAN_ARCH >= 6 */
|
||||
|
||||
struct nir_shader *
|
||||
GENX(pan_get_fb_shader)(const struct pan_fb_shader_key *key,
|
||||
const struct nir_shader_compiler_options *nir_options);
|
||||
|
||||
#if PAN_ARCH >= 13
|
||||
/**
|
||||
* Returns true if there's enough space in the tile buffer for at least two
|
||||
* Z/S tiles.
|
||||
*/
|
||||
static inline bool
|
||||
pan_fb_can_pipeline_zs(const struct pan_fb_layout *fb)
|
||||
{
|
||||
const uint32_t z_B_per_px = sizeof(float) * fb->sample_count;
|
||||
const uint32_t z_B_per_tile = z_B_per_px * fb->tile_size_px;
|
||||
|
||||
/* The budget is already half the available Z space */
|
||||
return z_B_per_tile < fb->tile_z_budget_B;
|
||||
}
|
||||
#endif
|
||||
#endif /* PAN_ARCH */
|
||||
|
||||
#endif /* __PAN_FB_H */
|
||||
|
|
|
|||
|
|
@ -61,6 +61,37 @@ enum panvk_incremental_rendering_pass {
|
|||
PANVK_IR_PASS_COUNT
|
||||
};
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
/* Framebuffer per-layer state. Keep this structure 64-byte aligned, since
|
||||
* we want the adjacent ZS_CRC_EXTENSION and RENDER_TARGET descriptors
|
||||
* aligned. */
|
||||
struct panvk_fb_layer_state {
|
||||
/** GPU address to the tiler descriptor. */
|
||||
uint64_t tiler;
|
||||
|
||||
/** Frame argument. */
|
||||
uint64_t frame_argument;
|
||||
|
||||
/** An instance of Fragment Flags 0. */
|
||||
struct mali_fragment_flags_0_packed flags0;
|
||||
|
||||
/** An instance of Fragment Flags 2. */
|
||||
struct mali_fragment_flags_2_packed flags2;
|
||||
|
||||
/** Z clear value. */
|
||||
uint32_t z_clear;
|
||||
|
||||
/** GPU address to the draw call descriptors. It may be 0. */
|
||||
uint64_t dcd_pointer;
|
||||
|
||||
/** GPU address to the ZS_CRC_EXTENSION descriptor. It may be 0. */
|
||||
uint64_t dbd_pointer;
|
||||
|
||||
/** GPU address to the RENDER_TARGET descriptors. */
|
||||
uint64_t rtd_pointer;
|
||||
} __attribute__((aligned(64)));
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
|
||||
static inline uint32_t
|
||||
get_tiler_oom_handler_idx(bool has_zs_ext, uint32_t rt_count)
|
||||
{
|
||||
|
|
@ -74,7 +105,11 @@ static inline uint32_t
|
|||
get_fbd_size(bool has_zs_ext, uint32_t rt_count)
|
||||
{
|
||||
assert(rt_count >= 1 && rt_count <= MAX_RTS);
|
||||
#if PAN_ARCH >= 14
|
||||
uint32_t fbd_size = ALIGN_POT(sizeof(struct panvk_fb_layer_state), 64);
|
||||
#else
|
||||
uint32_t fbd_size = pan_size(FRAMEBUFFER);
|
||||
#endif
|
||||
if (has_zs_ext)
|
||||
fbd_size += pan_size(ZS_CRC_EXTENSION);
|
||||
fbd_size += pan_size(RENDER_TARGET) * rt_count;
|
||||
|
|
@ -209,13 +244,25 @@ enum panvk_cs_regs {
|
|||
PANVK_CS_REG_RUN_IDVS_SR_END = 60,
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
/* RUN_FRAGMENT2 staging regs.
|
||||
* SW ABI:
|
||||
* - r58:59 contain the pointer to the first tiler descriptor. This is
|
||||
* needed to gather completed heap chunks after a run_fragment2.
|
||||
*/
|
||||
PANVK_CS_REG_RUN_FRAGMENT_SR_START = 0,
|
||||
PANVK_CS_REG_RUN_FRAGMENT_SR_END = 55,
|
||||
PANVK_CS_REG_TILER_DESC_PTR = 58,
|
||||
#else
|
||||
/* RUN_FRAGMENT staging regs.
|
||||
* SW ABI:
|
||||
* - r38:39 contain the pointer to the first tiler descriptor. This is
|
||||
* - r58:59 contain the pointer to the first tiler descriptor. This is
|
||||
* needed to gather completed heap chunks after a run_fragment.
|
||||
*/
|
||||
PANVK_CS_REG_RUN_FRAGMENT_SR_START = 38,
|
||||
PANVK_CS_REG_RUN_FRAGMENT_SR_END = 46,
|
||||
PANVK_CS_REG_TILER_DESC_PTR = 58,
|
||||
#endif
|
||||
|
||||
/* RUN_COMPUTE staging regs. */
|
||||
PANVK_CS_REG_RUN_COMPUTE_SR_START = 0,
|
||||
|
|
@ -870,4 +917,31 @@ vk_stages_to_subqueue_mask(VkPipelineStageFlags2 vk_stages,
|
|||
void panvk_per_arch(emit_barrier)(struct panvk_cmd_buffer *cmdbuf,
|
||||
struct panvk_cs_deps deps);
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
static inline void
|
||||
cs_emit_layer_fragment_state(struct cs_builder *b, struct cs_index fbd_ptr)
|
||||
{
|
||||
/* Emit the dynamic fragment state. This state may change per-layer. */
|
||||
|
||||
cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_0), fbd_ptr,
|
||||
offsetof(struct panvk_fb_layer_state, flags0));
|
||||
cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_2), fbd_ptr,
|
||||
offsetof(struct panvk_fb_layer_state, flags2));
|
||||
cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, Z_CLEAR), fbd_ptr,
|
||||
offsetof(struct panvk_fb_layer_state, z_clear));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, TILER_DESCRIPTOR_POINTER), fbd_ptr,
|
||||
offsetof(struct panvk_fb_layer_state, tiler));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, RTD_POINTER), fbd_ptr,
|
||||
offsetof(struct panvk_fb_layer_state, rtd_pointer));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, DBD_POINTER), fbd_ptr,
|
||||
offsetof(struct panvk_fb_layer_state, dbd_pointer));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, FRAME_ARG), fbd_ptr,
|
||||
offsetof(struct panvk_fb_layer_state, frame_argument));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, FRAME_SHADER_DCD_POINTER), fbd_ptr,
|
||||
offsetof(struct panvk_fb_layer_state, dcd_pointer));
|
||||
|
||||
cs_flush_loads(b);
|
||||
}
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
|
||||
#endif /* PANVK_CMD_BUFFER_H */
|
||||
|
|
|
|||
|
|
@ -1230,6 +1230,93 @@ get_tiler_context(struct panvk_cmd_buffer *cmdbuf, uint32_t layer)
|
|||
return tiler_ctx;
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
static void
|
||||
init_layer_fragment_state(const struct pan_fb_desc_info *info,
|
||||
const struct pan_ptr fbd)
|
||||
{
|
||||
const struct pan_fb_layout *fb = info->fb;
|
||||
const struct pan_fb_load *load = info->load;
|
||||
const struct pan_fb_store *store = info->store;
|
||||
const struct pan_fb_clean_tile ct = GENX(pan_fb_get_clean_tile)(info);
|
||||
const bool has_zs_crc_ext = pan_fb_has_zs(fb);
|
||||
|
||||
struct panvk_fb_layer_state fbd_data = {0};
|
||||
fbd_data.tiler = info->tiler_ctx->valhall.desc;
|
||||
|
||||
/* layer_index in flags0 is used to select the right primitive list in
|
||||
* the tiler context, and frame_arg is the value that's passed to the
|
||||
* fragment shader through r62-r63, which we use to pass gl_Layer. Since
|
||||
* the layer_idx only takes 8-bits, we might use the extra 56-bits we
|
||||
* have in frame_argument to pass other information to the fragment
|
||||
* shader at some point.
|
||||
*/
|
||||
assert(info->layer >= info->tiler_ctx->valhall.layer_offset);
|
||||
fbd_data.frame_argument = info->layer;
|
||||
|
||||
pan_pack(&fbd_data.flags0, FRAGMENT_FLAGS_0, cfg) {
|
||||
cfg.pre_frame_0 = pan_fix_frame_shader_mode(info->frame_shaders.modes[0],
|
||||
ct.rts || ct.zs || ct.s);
|
||||
cfg.pre_frame_1 = pan_fix_frame_shader_mode(info->frame_shaders.modes[1],
|
||||
ct.rts || ct.zs || ct.s);
|
||||
cfg.post_frame = info->frame_shaders.modes[2];
|
||||
|
||||
/* Enabling prepass without pipelineing is generally not good for
|
||||
* performance, so disable HSR in that case.
|
||||
*/
|
||||
cfg.hsr_prepass_enable =
|
||||
info->allow_hsr_prepass && pan_fb_can_pipeline_zs(fb);
|
||||
cfg.hsr_prepass_interleaving_enable = pan_fb_can_pipeline_zs(fb);
|
||||
cfg.hsr_prepass_filter_enable = true;
|
||||
cfg.hsr_hierarchical_optimizations_enable = true;
|
||||
|
||||
cfg.internal_layer_index =
|
||||
info->layer - info->tiler_ctx->valhall.layer_offset;
|
||||
}
|
||||
|
||||
pan_pack(&fbd_data.flags2, FRAGMENT_FLAGS_2, cfg) {
|
||||
if (fb->s_format != PIPE_FORMAT_NONE) {
|
||||
cfg.s_clear =
|
||||
load && pan_target_has_clear(&load->s) ? load->s.clear.stencil : 0;
|
||||
cfg.s_write_enable = store && store->s.store;
|
||||
}
|
||||
|
||||
if (fb->z_format != PIPE_FORMAT_NONE) {
|
||||
cfg.z_internal_format = pan_get_z_internal_format(fb->z_format);
|
||||
cfg.z_write_enable = store && store->zs.store;
|
||||
} else {
|
||||
cfg.z_internal_format = MALI_Z_INTERNAL_FORMAT_D24;
|
||||
assert(!store || !store->zs.store);
|
||||
}
|
||||
}
|
||||
|
||||
fbd_data.z_clear =
|
||||
util_bitpack_float(fb->z_format != PIPE_FORMAT_NONE && load && load &&
|
||||
pan_target_has_clear(&load->z)
|
||||
? load->z.clear.depth
|
||||
: 0);
|
||||
|
||||
fbd_data.dcd_pointer = info->frame_shaders.dcd_pointer;
|
||||
|
||||
/* Set the DBD and RTD pointers. Both must be 64-bytes aligned. */
|
||||
{
|
||||
uint64_t out_gpu_addr =
|
||||
fbd.gpu + ALIGN_POT(sizeof(struct panvk_fb_layer_state), 64);
|
||||
|
||||
if (has_zs_crc_ext) {
|
||||
fbd_data.dbd_pointer = out_gpu_addr;
|
||||
assert(fbd_data.dbd_pointer % 64 == 0);
|
||||
out_gpu_addr += pan_size(ZS_CRC_EXTENSION);
|
||||
}
|
||||
|
||||
fbd_data.rtd_pointer = out_gpu_addr;
|
||||
assert(fbd_data.rtd_pointer % 64 == 0);
|
||||
}
|
||||
|
||||
memcpy(fbd.cpu, &fbd_data, sizeof(fbd_data));
|
||||
}
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
|
||||
static VkResult
|
||||
get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
|
|
@ -1245,8 +1332,13 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
uint32_t fbd_sz = calc_fbd_size(cmdbuf);
|
||||
uint32_t fbds_sz = enabled_layer_count * fbd_sz;
|
||||
|
||||
cmdbuf->state.gfx.render.fbds = panvk_cmd_alloc_dev_mem(
|
||||
cmdbuf, desc, fbds_sz, pan_alignment(FRAMEBUFFER));
|
||||
#if PAN_ARCH >= 14
|
||||
const unsigned fbds_alignment = alignof(struct panvk_fb_layer_state);
|
||||
#else
|
||||
const unsigned fbds_alignment = pan_alignment(FRAMEBUFFER);
|
||||
#endif
|
||||
cmdbuf->state.gfx.render.fbds =
|
||||
panvk_cmd_alloc_dev_mem(cmdbuf, desc, fbds_sz, fbds_alignment);
|
||||
if (!cmdbuf->state.gfx.render.fbds.gpu)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
|
||||
|
|
@ -1331,13 +1423,25 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
: fbd.cpu + fb_sz,
|
||||
};
|
||||
uint32_t new_fbd_flags = GENX(pan_emit_fb_desc)(&fbd_info, &fb_descs);
|
||||
#if PAN_ARCH >= 14
|
||||
init_layer_fragment_state(&fbd_info, fbd);
|
||||
#endif
|
||||
|
||||
/* Make sure all FBDs have the same flags. */
|
||||
assert(i == 0 || new_fbd_flags == fbd_flags);
|
||||
fbd_flags = new_fbd_flags;
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
/* fbd_flags is unused on v14+. */
|
||||
assert(!fbd_flags);
|
||||
#endif
|
||||
|
||||
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
// TODO: Implement IR support for v14.
|
||||
#else
|
||||
for (uint32_t ir_pass = 0; ir_pass < PANVK_IR_PASS_COUNT; ir_pass++) {
|
||||
struct pan_ptr ir_fbds = panvk_cmd_alloc_dev_mem(
|
||||
cmdbuf, desc, fbds_sz, pan_alignment(FRAMEBUFFER));
|
||||
|
|
@ -1377,6 +1481,9 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
};
|
||||
ASSERTED uint32_t new_fbd_flags =
|
||||
GENX(pan_emit_fb_desc)(&fbd_info, &fb_descs);
|
||||
#if PAN_ARCH >= 14
|
||||
init_layer_fragment_state(&fbd_info, fbd);
|
||||
#endif
|
||||
|
||||
/* Make sure all FBDs have the same flags. */
|
||||
assert(new_fbd_flags == fbd_flags);
|
||||
|
|
@ -1389,16 +1496,17 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
/* Wait for IR info push to complete */
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
|
||||
bool unset_provoking_vertex =
|
||||
cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET;
|
||||
|
||||
if (copy_fbds) {
|
||||
struct cs_index cur_tiler = cs_reg64(b, 38);
|
||||
struct cs_index cur_tiler = cs_reg64(b, PANVK_CS_REG_TILER_DESC_PTR);
|
||||
struct cs_index dst_fbd_ptr = cs_sr_reg64(b, FRAGMENT, FBD_POINTER);
|
||||
struct cs_index fbd_idx = cs_reg32(b, 47);
|
||||
struct cs_index src_fbd_ptr = cs_reg64(b, 48);
|
||||
struct cs_index remaining_layers_in_td = cs_reg32(b, 50);
|
||||
struct cs_index fbd_idx = cs_reg32(b, 60);
|
||||
struct cs_index src_fbd_ptr = cs_reg64(b, 64);
|
||||
struct cs_index remaining_layers_in_td = cs_reg32(b, 61);
|
||||
uint32_t td_count = DIV_ROUND_UP(cmdbuf->state.gfx.render.layer_count,
|
||||
MAX_LAYERS_PER_TILER_DESC);
|
||||
|
||||
|
|
@ -1481,7 +1589,8 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_update_frag_ctx(b) {
|
||||
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
|
||||
fbds.gpu | fbd_flags);
|
||||
cs_move64_to(b, cs_reg64(b, 38), cmdbuf->state.gfx.render.tiler);
|
||||
cs_move64_to(b, cs_reg64(b, PANVK_CS_REG_TILER_DESC_PTR),
|
||||
cmdbuf->state.gfx.render.tiler);
|
||||
}
|
||||
|
||||
/* If we don't know what provoking vertex mode the application wants yet,
|
||||
|
|
@ -3321,6 +3430,9 @@ calc_tiler_oom_handler_idx(struct panvk_cmd_buffer *cmdbuf)
|
|||
static void
|
||||
setup_tiler_oom_ctx(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
#if PAN_ARCH >= 14
|
||||
// TODO: Implement IR support for v14.
|
||||
#else
|
||||
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
|
||||
const struct pan_fb_layout *fb = &cmdbuf->state.gfx.render.fb.layout;
|
||||
const bool has_zs_ext = pan_fb_has_zs(fb);
|
||||
|
|
@ -3365,6 +3477,7 @@ setup_tiler_oom_ctx(struct panvk_cmd_buffer *cmdbuf)
|
|||
TILER_OOM_CTX_FIELD_OFFSET(layer_count));
|
||||
|
||||
cs_flush_stores(b);
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
|
|
@ -3373,24 +3486,95 @@ pack_32_2x16(uint16_t lo, uint16_t hi)
|
|||
return (((uint32_t)hi) << 16) | (uint32_t)lo;
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
static void
|
||||
cs_emit_static_fragment_state(struct cs_builder *b,
|
||||
struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
/* Emit the static fragment staging registers. These don't change per-layer. */
|
||||
|
||||
const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
const struct panvk_rendering_state *render = &cmdbuf->state.gfx.render;
|
||||
const struct pan_fb_layout *fb = &render->fb.layout;
|
||||
|
||||
const uint8_t sample_count = render->fb.layout.sample_count;
|
||||
|
||||
const struct pan_fb_bbox fb_area_px =
|
||||
pan_fb_bbox_from_xywh(0, 0, fb->width_px, fb->height_px);
|
||||
const struct pan_fb_bbox bbox_px =
|
||||
pan_fb_bbox_clamp(fb->tiling_area_px, fb_area_px);
|
||||
|
||||
assert(pan_fb_bbox_is_valid(fb->tiling_area_px));
|
||||
|
||||
struct mali_fragment_bounding_box_packed bbox;
|
||||
pan_pack(&bbox, FRAGMENT_BOUNDING_BOX, cfg) {
|
||||
cfg.bound_min_x = bbox_px.min_x;
|
||||
cfg.bound_min_y = bbox_px.min_y;
|
||||
cfg.bound_max_x = bbox_px.max_x;
|
||||
cfg.bound_max_y = bbox_px.max_y;
|
||||
}
|
||||
|
||||
struct mali_frame_size_packed frame_size;
|
||||
pan_pack(&frame_size, FRAME_SIZE, cfg) {
|
||||
cfg.width = fb->width_px;
|
||||
cfg.height = fb->height_px;
|
||||
}
|
||||
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MIN),
|
||||
bbox.opaque[0]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MAX),
|
||||
bbox.opaque[1]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, FRAME_SIZE), frame_size.opaque[0]);
|
||||
cs_move64_to(
|
||||
b, cs_sr_reg64(b, FRAGMENT, SAMPLE_POSITION_ARRAY_POINTER),
|
||||
dev->sample_positions->addr.dev +
|
||||
pan_sample_positions_offset(pan_sample_pattern(sample_count)));
|
||||
|
||||
/* Flags 1 */
|
||||
struct mali_fragment_flags_1_packed flags1;
|
||||
pan_pack(&flags1, FRAGMENT_FLAGS_1, cfg) {
|
||||
cfg.sample_count = fb->sample_count;
|
||||
cfg.sample_pattern = pan_sample_pattern(fb->sample_count);
|
||||
cfg.effective_tile_size = fb->tile_size_px;
|
||||
cfg.point_sprite_coord_origin_max_y = false;
|
||||
cfg.first_provoking_vertex = get_first_provoking_vertex(cmdbuf);
|
||||
|
||||
assert(fb->rt_count > 0);
|
||||
cfg.render_target_count = fb->rt_count;
|
||||
cfg.color_buffer_allocation = fb->tile_rt_alloc_B;
|
||||
}
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_1), flags1.opaque[0]);
|
||||
|
||||
/* Leave the remaining RUN_FRAGMENT2 staging registers as zero. */
|
||||
}
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
|
||||
static VkResult
|
||||
issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
#if PAN_ARCH < 14
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
#endif
|
||||
const struct cs_tracing_ctx *tracing_ctx =
|
||||
&cmdbuf->state.cs[PANVK_SUBQUEUE_FRAGMENT].tracing;
|
||||
const struct pan_fb_layout *fb = &cmdbuf->state.gfx.render.fb.layout;
|
||||
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
|
||||
bool has_oq_chain = cmdbuf->state.gfx.render.oq.chain != 0;
|
||||
|
||||
/* Now initialize the fragment bits. */
|
||||
struct cs_index fbd_pointer = cs_sr_reg64(b, FRAGMENT, FBD_POINTER);
|
||||
cs_update_frag_ctx(b) {
|
||||
#if PAN_ARCH >= 14
|
||||
cs_emit_static_fragment_state(b, cmdbuf);
|
||||
cs_emit_layer_fragment_state(b, fbd_pointer);
|
||||
#else
|
||||
const struct pan_fb_layout *fb = &cmdbuf->state.gfx.render.fb.layout;
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MIN),
|
||||
pack_32_2x16(fb->tiling_area_px.min_x,
|
||||
fb->tiling_area_px.min_y));
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MAX),
|
||||
pack_32_2x16(fb->tiling_area_px.max_x,
|
||||
fb->tiling_area_px.max_y));
|
||||
#endif
|
||||
}
|
||||
|
||||
bool simul_use =
|
||||
|
|
@ -3423,6 +3607,9 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
* state for this renderpass, so it's safe to enable. */
|
||||
struct cs_index addr_reg = cs_scratch_reg64(b, 0);
|
||||
struct cs_index length_reg = cs_scratch_reg32(b, 2);
|
||||
#if PAN_ARCH >= 14
|
||||
// TODO: Implement IR support for v14.
|
||||
#else
|
||||
uint32_t handler_idx = calc_tiler_oom_handler_idx(cmdbuf);
|
||||
uint64_t handler_addr = dev->tiler_oom.handlers_bo->addr.dev +
|
||||
handler_idx * dev->tiler_oom.handler_stride;
|
||||
|
|
@ -3430,6 +3617,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_move32_to(b, length_reg, dev->tiler_oom.handler_stride);
|
||||
cs_set_exception_handler(b, MALI_CS_EXCEPTION_TYPE_TILER_OOM, addr_reg,
|
||||
length_reg);
|
||||
#endif
|
||||
|
||||
/* Wait for the tiling to be done before submitting the fragment job. */
|
||||
wait_finish_tiling(cmdbuf);
|
||||
|
|
@ -3444,8 +3632,12 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
* up. */
|
||||
cs_move64_to(b, addr_reg, 0);
|
||||
cs_move32_to(b, length_reg, 0);
|
||||
#if PAN_ARCH >= 14
|
||||
// TODO: Implement IR support for v14.
|
||||
#else
|
||||
cs_set_exception_handler(b, MALI_CS_EXCEPTION_TYPE_TILER_OOM, addr_reg,
|
||||
length_reg);
|
||||
#endif
|
||||
|
||||
/* Applications tend to forget to describe subpass dependencies, especially
|
||||
* when it comes to write -> read dependencies on attachments. The
|
||||
|
|
@ -3461,8 +3653,13 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
}
|
||||
|
||||
if (cmdbuf->state.gfx.render.layer_count <= 1) {
|
||||
#if PAN_ARCH >= 14
|
||||
cs_trace_run_fragment2(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||
false, MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#else
|
||||
cs_trace_run_fragment(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||
false, MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#endif
|
||||
} else {
|
||||
struct cs_index run_fragment_regs = cs_scratch_reg_tuple(b, 0, 4);
|
||||
struct cs_index remaining_layers = cs_scratch_reg32(b, 4);
|
||||
|
|
@ -3471,12 +3668,17 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_while(b, MALI_CS_CONDITION_GREATER, remaining_layers) {
|
||||
cs_add32(b, remaining_layers, remaining_layers, -1);
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
cs_emit_layer_fragment_state(b, fbd_pointer);
|
||||
cs_trace_run_fragment2(b, tracing_ctx, run_fragment_regs, false,
|
||||
MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#else
|
||||
cs_trace_run_fragment(b, tracing_ctx, run_fragment_regs, false,
|
||||
MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#endif
|
||||
|
||||
cs_update_frag_ctx(b)
|
||||
cs_add64(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
|
||||
cs_sr_reg64(b, FRAGMENT, FBD_POINTER), fbd_sz);
|
||||
cs_add64(b, fbd_pointer, fbd_pointer, fbd_sz);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3490,8 +3692,8 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
struct cs_index completed = cs_scratch_reg_tuple(b, 10, 4);
|
||||
struct cs_index completed_top = cs_scratch_reg64(b, 10);
|
||||
struct cs_index completed_bottom = cs_scratch_reg64(b, 12);
|
||||
struct cs_index cur_tiler = cs_reg64(b, 38);
|
||||
struct cs_index tiler_count = cs_reg32(b, 47);
|
||||
struct cs_index cur_tiler = cs_reg64(b, PANVK_CS_REG_TILER_DESC_PTR);
|
||||
struct cs_index tiler_count = cs_reg32(b, 60);
|
||||
struct cs_index oq_chain = cs_scratch_reg64(b, 10);
|
||||
struct cs_index oq_chain_lo = cs_scratch_reg32(b, 10);
|
||||
struct cs_index oq_syncobj = cs_scratch_reg64(b, 12);
|
||||
|
|
|
|||
|
|
@ -13,8 +13,9 @@ tiler_oom_reg_perm_cb(struct cs_builder *b, unsigned reg)
|
|||
{
|
||||
switch (reg) {
|
||||
/* The bbox is set up by the fragment subqueue, we should not modify it. */
|
||||
case 42:
|
||||
case 43:
|
||||
case MALI_FRAGMENT_SR_BBOX_MIN:
|
||||
case MALI_FRAGMENT_SR_BBOX_MAX:
|
||||
|
||||
/* We should only load from the subqueue context. */
|
||||
case PANVK_CS_REG_SUBQUEUE_CTX_START:
|
||||
case PANVK_CS_REG_SUBQUEUE_CTX_END:
|
||||
|
|
@ -42,8 +43,14 @@ copy_fbd(struct cs_builder *b, bool has_zs_ext, uint32_t rt_count,
|
|||
cs_store(b, cs_scratch_reg_tuple(b, 0, 8), dst, BITFIELD_MASK(8),
|
||||
8 * sizeof(uint32_t));
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
const size_t fbd_size = ALIGN_POT(sizeof(struct panvk_fb_layer_state), 64);
|
||||
#else
|
||||
const size_t fbd_size = sizeof(struct mali_framebuffer_packed);
|
||||
#endif
|
||||
|
||||
if (has_zs_ext) {
|
||||
const uint16_t dbd_offset = sizeof(struct mali_framebuffer_packed);
|
||||
const uint16_t dbd_offset = fbd_size;
|
||||
|
||||
/* Copy the whole DBD. */
|
||||
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 8), src_other,
|
||||
|
|
@ -57,8 +64,7 @@ copy_fbd(struct cs_builder *b, bool has_zs_ext, uint32_t rt_count,
|
|||
}
|
||||
|
||||
const uint16_t rts_offset =
|
||||
sizeof(struct mali_framebuffer_packed) +
|
||||
(has_zs_ext ? sizeof(struct mali_zs_crc_extension_packed) : 0);
|
||||
fbd_size + (has_zs_ext ? sizeof(struct mali_zs_crc_extension_packed) : 0);
|
||||
|
||||
for (uint32_t rt = 0; rt < rt_count; rt++) {
|
||||
const uint16_t rt_offset =
|
||||
|
|
@ -110,12 +116,14 @@ generate_tiler_oom_handler(struct panvk_device *dev,
|
|||
.tracebuf_addr_offset =
|
||||
offsetof(struct panvk_cs_subqueue_context, debug.tracebuf.cs),
|
||||
};
|
||||
struct mali_framebuffer_pointer_packed fb_tag;
|
||||
|
||||
#if PAN_ARCH < 14
|
||||
struct mali_framebuffer_pointer_packed fb_tag;
|
||||
pan_pack(&fb_tag, FRAMEBUFFER_POINTER, cfg) {
|
||||
cfg.zs_crc_extension_present = has_zs_ext;
|
||||
cfg.render_target_count = rt_count;
|
||||
}
|
||||
#endif
|
||||
|
||||
cs_function_def(&b, &handler, handler_ctx) {
|
||||
struct cs_index subqueue_ctx = cs_subqueue_ctx_reg(&b);
|
||||
|
|
@ -140,7 +148,7 @@ generate_tiler_oom_handler(struct panvk_device *dev,
|
|||
struct cs_index run_fragment_regs = cs_scratch_reg_tuple(&b, 0, 4);
|
||||
|
||||
/* The tiler pointer is pre-filled. */
|
||||
struct cs_index tiler_ptr = cs_reg64(&b, 38);
|
||||
struct cs_index tiler_ptr = cs_reg64(&b, PANVK_CS_REG_TILER_DESC_PTR);
|
||||
|
||||
cs_load64_to(&b, scratch_fbd_ptr_reg, subqueue_ctx,
|
||||
TILER_OOM_CTX_FIELD_OFFSET(ir_scratch_fbd_ptr));
|
||||
|
|
@ -176,11 +184,17 @@ generate_tiler_oom_handler(struct panvk_device *dev,
|
|||
cs_wait_slot(&b, SB_ID(LS));
|
||||
|
||||
/* Set FBD pointer to the scratch fbd */
|
||||
cs_add64(&b, cs_sr_reg64(&b, FRAGMENT, FBD_POINTER),
|
||||
scratch_fbd_ptr_reg, fb_tag.opaque[0]);
|
||||
|
||||
struct cs_index fbd_pointer = cs_sr_reg64(&b, FRAGMENT, FBD_POINTER);
|
||||
#if PAN_ARCH >= 14
|
||||
cs_add64(&b, fbd_pointer, scratch_fbd_ptr_reg, 0);
|
||||
cs_emit_layer_fragment_state(&b, fbd_pointer);
|
||||
cs_trace_run_fragment2(&b, &tracing_ctx, run_fragment_regs, false,
|
||||
MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#else
|
||||
cs_add64(&b, fbd_pointer, scratch_fbd_ptr_reg, fb_tag.opaque[0]);
|
||||
cs_trace_run_fragment(&b, &tracing_ctx, run_fragment_regs, false,
|
||||
MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#endif
|
||||
|
||||
/* Serialize run fragments since we reuse FBD for the runs */
|
||||
cs_wait_slots(&b, dev->csf.sb.all_iters_mask);
|
||||
|
|
|
|||
|
|
@ -717,7 +717,12 @@ init_tiler(struct panvk_gpu_queue *queue)
|
|||
tiler_heap->chunk_size = phys_dev->csf.tiler.chunk_size;
|
||||
|
||||
alloc_info.size = get_fbd_size(true, MAX_RTS);
|
||||
alloc_info.alignment = pan_alignment(FRAMEBUFFER);
|
||||
#if PAN_ARCH >= 14
|
||||
const unsigned fbds_alignment = alignof(struct panvk_fb_layer_state);
|
||||
#else
|
||||
const unsigned fbds_alignment = pan_alignment(FRAMEBUFFER);
|
||||
#endif
|
||||
alloc_info.alignment = fbds_alignment;
|
||||
tiler_heap->oom_fbd = panvk_pool_alloc_mem(&dev->mempools.rw, alloc_info);
|
||||
if (!panvk_priv_mem_check_alloc(tiler_heap->oom_fbd)) {
|
||||
result = panvk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue