diff --git a/docs/drivers/panfrost.rst b/docs/drivers/panfrost.rst
index 2e214ded1e9..d9e3a618128 100644
--- a/docs/drivers/panfrost.rst
+++ b/docs/drivers/panfrost.rst
@@ -34,6 +34,8 @@ The following hardware is currently supported:
+--------------------+---------------+-----------+--------+--------+
| G725 | 5th Gen (v13) | 3.1 | 3.1 | 1.4 |
+--------------------+---------------+-----------+--------+--------+
+| G1-Pro | 5th Gen (v14) | 3.1 | 3.1 | 1.4 |
++--------------------+---------------+-----------+--------+--------+
Other Midgard and Bifrost chips (e.g. G71) are not yet supported.
diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build
index 5b3e5e41d97..ba243f5a4ed 100644
--- a/src/gallium/drivers/panfrost/meson.build
+++ b/src/gallium/drivers/panfrost/meson.build
@@ -41,7 +41,7 @@ compile_args_panfrost = [
'-Wno-pointer-arith'
]
-panfrost_versions = ['4', '5', '6', '7', '9', '10', '12', '13']
+panfrost_versions = ['4', '5', '6', '7', '9', '10', '12', '13', '14']
libpanfrost_versions = []
foreach ver : panfrost_versions
@@ -54,7 +54,7 @@ foreach ver : panfrost_versions
]
if ver in ['4', '5', '6', '7', '9']
files_panfrost_vx += ['pan_jm.c']
- elif ver in ['10', '12', '13']
+ elif ver in ['10', '12', '13', '14']
files_panfrost_vx += ['pan_csf.c']
endif
libpanfrost_versions += static_library(
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c
index 87a3cbbe7ea..aa32944195f 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -49,7 +49,7 @@
* functions. */
#if PAN_ARCH <= 9
#define JOBX(__suffix) GENX(jm_##__suffix)
-#elif PAN_ARCH <= 13
+#elif PAN_ARCH <= 14
#define JOBX(__suffix) GENX(csf_##__suffix)
#else
#error "Unsupported arch"
diff --git a/src/gallium/drivers/panfrost/pan_csf.c b/src/gallium/drivers/panfrost/pan_csf.c
index 2246804b85c..d328c3647aa 100644
--- a/src/gallium/drivers/panfrost/pan_csf.c
+++ b/src/gallium/drivers/panfrost/pan_csf.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2023 Collabora Ltd.
+ * Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
@@ -13,6 +14,7 @@
#include "pan_cmdstream.h"
#include "pan_context.h"
#include "pan_csf.h"
+#include "pan_fb.h"
#include "pan_fb_preload.h"
#include "pan_job.h"
#include "pan_trace.h"
@@ -75,6 +77,87 @@ csf_update_tiler_oom_ctx(struct cs_builder *b, uint64_t addr)
(PAN_INCREMENTAL_RENDERING_##_pass##_PASS * sizeof(struct pan_ptr)) + \
offsetof(struct pan_ptr, gpu))
+#if PAN_ARCH >= 14
+static void
+cs_emit_static_fragment_state(struct cs_builder *b,
+ struct panfrost_batch *batch,
+ const struct pan_fb_info *fb)
+{
+ struct mali_frame_size_packed frame_size;
+ pan_pack(&frame_size, FRAME_SIZE, cfg) {
+ cfg.width = fb->width;
+ cfg.height = fb->height;
+ }
+
+ cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, FRAME_SIZE), frame_size.opaque[0]);
+ cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, SAMPLE_POSITION_ARRAY_POINTER),
+ fb->sample_positions);
+
+ struct mali_fragment_flags_1_packed flags1;
+ pan_pack(&flags1, FRAGMENT_FLAGS_1, cfg) {
+ /* The force_samples setting dictates the sample-count that is used
+ * for rasterization, and works like D3D11's ForcedSampleCount
+ * feature:
+ *
+ * - If force_samples == 0: Let nr_samples dictate sample count
+ * - If force_samples == 1: force single-sampled rasterization
+ * - If force_samples >= 1: force multi-sampled rasterization
+ *
+ * This can be used to read SYSTEM_VALUE_SAMPLE_MASK_IN from the
+ * fragment shader, even when performing single-sampled rendering.
+ */
+ if (fb->pls_enabled) {
+ cfg.sample_count = 4;
+ cfg.sample_pattern = pan_sample_pattern(1);
+ } else if (!fb->force_samples) {
+ cfg.sample_count = fb->nr_samples;
+ cfg.sample_pattern = pan_sample_pattern(fb->nr_samples);
+ } else if (fb->force_samples == 1) {
+ cfg.sample_count = fb->nr_samples;
+ cfg.sample_pattern = pan_sample_pattern(1);
+ } else {
+ cfg.sample_count = 1;
+ cfg.sample_pattern = pan_sample_pattern(fb->force_samples);
+ }
+
+ cfg.effective_tile_size = fb->tile_size;
+ cfg.point_sprite_coord_origin_max_y = fb->sprite_coord_origin;
+ cfg.first_provoking_vertex = fb->first_provoking_vertex;
+ cfg.render_target_count = MAX2(fb->rt_count, 1);
+ cfg.color_buffer_allocation = fb->cbuf_allocation;
+ }
+
+ cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_1), flags1.opaque[0]);
+
+ /* Leave the remaining RUN_FRAGMENT2 staging registers as zero. */
+}
+
+static inline void
+cs_emit_layer_fragment_state(struct cs_builder *b, struct cs_index fbd_ptr)
+{
+ /* Emit the dynamic fragment state. This state may change per-layer. */
+
+ cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_0), fbd_ptr,
+ offsetof(struct pan_fbd_layer, flags0));
+ cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_2), fbd_ptr,
+ offsetof(struct pan_fbd_layer, flags2));
+ cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, Z_CLEAR), fbd_ptr,
+ offsetof(struct pan_fbd_layer, z_clear));
+ cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, TILER_DESCRIPTOR_POINTER), fbd_ptr,
+ offsetof(struct pan_fbd_layer, tiler));
+ cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, RTD_POINTER), fbd_ptr,
+ offsetof(struct pan_fbd_layer, rtd_pointer));
+ cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, DBD_POINTER), fbd_ptr,
+ offsetof(struct pan_fbd_layer, dbd_pointer));
+ cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, FRAME_ARG), fbd_ptr,
+ offsetof(struct pan_fbd_layer, frame_argument));
+ cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, FRAME_SHADER_DCD_POINTER), fbd_ptr,
+ offsetof(struct pan_fbd_layer, dcd_pointer));
+
+ cs_flush_loads(b);
+}
+#endif /* PAN_ARCH >= 14 */
+
static int
csf_oom_handler_init(struct panfrost_context *ctx)
{
@@ -113,13 +196,14 @@ csf_oom_handler_init(struct panfrost_context *ctx)
cs_function_def(&b, &handler, handler_ctx) {
struct cs_index tiler_oom_ctx = cs_reg64(&b, TILER_OOM_CTX_REG);
- struct cs_index counter = cs_reg32(&b, 47);
- struct cs_index zero = cs_reg64(&b, 48);
- struct cs_index flush_id = cs_reg32(&b, 48);
- struct cs_index tiler_ctx = cs_reg64(&b, 50);
- struct cs_index completed_top = cs_reg64(&b, 52);
- struct cs_index completed_bottom = cs_reg64(&b, 54);
- struct cs_index completed_chunks = cs_reg_tuple(&b, 52, 4);
+ struct cs_index counter = cs_reg32(&b, 31);
+ struct cs_index zero = cs_reg64(&b, 56);
+ struct cs_index flush_id = cs_reg32(&b, 58);
+ struct cs_index tiler_ctx = cs_reg64(&b, 60);
+ struct cs_index completed_top = cs_reg64(&b, 64);
+ struct cs_index completed_bottom = cs_reg64(&b, 66);
+ struct cs_index completed_chunks = cs_reg_tuple(&b, 64, 4);
+ struct cs_index fbd_pointer = cs_sr_reg64(&b, FRAGMENT, FBD_POINTER);
/* Ensure that the OTHER endpoint is valid */
#if PAN_ARCH >= 11
@@ -133,12 +217,10 @@ csf_oom_handler_init(struct panfrost_context *ctx)
cs_load32_to(&b, counter, tiler_oom_ctx, FIELD_OFFSET(counter));
cs_wait_slot(&b, 0);
cs_if(&b, MALI_CS_CONDITION_GREATER, counter) {
- cs_load64_to(&b, cs_sr_reg64(&b, FRAGMENT, FBD_POINTER), tiler_oom_ctx,
- FBD_OFFSET(MIDDLE));
+ cs_load64_to(&b, fbd_pointer, tiler_oom_ctx, FBD_OFFSET(MIDDLE));
}
cs_else(&b) {
- cs_load64_to(&b, cs_sr_reg64(&b, FRAGMENT, FBD_POINTER), tiler_oom_ctx,
- FBD_OFFSET(FIRST));
+ cs_load64_to(&b, fbd_pointer, tiler_oom_ctx, FBD_OFFSET(FIRST));
}
cs_load32_to(&b, cs_sr_reg32(&b, FRAGMENT, BBOX_MIN), tiler_oom_ctx,
@@ -147,11 +229,18 @@ csf_oom_handler_init(struct panfrost_context *ctx)
FIELD_OFFSET(bbox_max));
cs_move64_to(&b, cs_sr_reg64(&b, FRAGMENT, TEM_POINTER), 0);
cs_move32_to(&b, cs_sr_reg32(&b, FRAGMENT, TEM_ROW_STRIDE), 0);
+#if PAN_ARCH >= 14
+ cs_emit_layer_fragment_state(&b, fbd_pointer);
+#endif
cs_wait_slot(&b, 0);
/* Run the fragment job and wait */
cs_select_endpoint_sb(&b, 3);
+#if PAN_ARCH >= 14
+ cs_run_fragment2(&b, false, MALI_TILE_RENDER_ORDER_Z_ORDER);
+#else
cs_run_fragment(&b, false, MALI_TILE_RENDER_ORDER_Z_ORDER);
+#endif
cs_wait_slot(&b, 3);
/* Increment counter */
@@ -218,6 +307,21 @@ GENX(csf_cleanup_batch)(struct panfrost_batch *batch)
panfrost_pool_cleanup(&batch->csf.cs_chunk_pool);
}
+#if PAN_ARCH >= 14
+static inline struct pan_ptr
+alloc_fbd(struct panfrost_batch *batch)
+{
+ const struct pan_desc_alloc_info fbd_layer = {
+ .size = ALIGN_POT(sizeof(struct pan_fbd_layer), 64),
+ .align = alignof(struct pan_fbd_layer),
+ .nelems = 1,
+ };
+
+ return pan_pool_alloc_desc_aggregate(
+ &batch->pool.base, fbd_layer, PAN_DESC(ZS_CRC_EXTENSION),
+ PAN_DESC_ARRAY(MAX2(batch->key.nr_cbufs, 1), RENDER_TARGET));
+}
+#else
static inline struct pan_ptr
alloc_fbd(struct panfrost_batch *batch)
{
@@ -225,6 +329,7 @@ alloc_fbd(struct panfrost_batch *batch)
&batch->pool.base, PAN_DESC(FRAMEBUFFER), PAN_DESC(ZS_CRC_EXTENSION),
PAN_DESC_ARRAY(MAX2(batch->key.nr_cbufs, 1), RENDER_TARGET));
}
+#endif /* PAN_ARCH >= 14 */
int
GENX(csf_init_batch)(struct panfrost_batch *batch)
@@ -758,7 +863,7 @@ GENX(csf_preload_fb)(struct panfrost_batch *batch, struct pan_fb_info *fb)
(_ctx)->fbds[PAN_INCREMENTAL_RENDERING_##_pass##_PASS]
#define EMIT_FBD(_ctx, _pass, _fb, _tls, _tiler_ctx) \
GET_FBD(_ctx, _pass).gpu |= \
- GENX(pan_emit_fbd)(_fb, 0, _tls, _tiler_ctx, GET_FBD(_ctx, _pass).cpu)
+ GENX(pan_emit_fbd)(_fb, 0, _tls, _tiler_ctx, GET_FBD(_ctx, _pass))
void
GENX(csf_emit_fbds)(struct panfrost_batch *batch, struct pan_fb_info *fb,
@@ -771,7 +876,7 @@ GENX(csf_emit_fbds)(struct panfrost_batch *batch, struct pan_fb_info *fb,
/* Default framebuffer descriptor */
batch->framebuffer.gpu |=
- GENX(pan_emit_fbd)(fb, 0, tls, &batch->tiler_ctx, batch->framebuffer.cpu);
+ GENX(pan_emit_fbd)(fb, 0, tls, &batch->tiler_ctx, batch->framebuffer);
if (batch->draw_count == 0)
return;
@@ -854,15 +959,21 @@ GENX(csf_emit_fragment_job)(struct panfrost_batch *batch,
cs_vt_end(b, cs_now());
}
+ struct cs_index fbd_pointer = cs_sr_reg64(b, FRAGMENT, FBD_POINTER);
+
/* Set up the fragment job */
- cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
- batch->framebuffer.gpu);
+ cs_move64_to(b, fbd_pointer, batch->framebuffer.gpu);
+
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MIN),
(batch->miny << 16) | batch->minx);
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MAX),
((batch->maxy - 1) << 16) | (batch->maxx - 1));
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, TEM_POINTER), 0);
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, TEM_ROW_STRIDE), 0);
+#if PAN_ARCH >= 14
+ cs_emit_static_fragment_state(b, batch, pfb);
+ cs_emit_layer_fragment_state(b, fbd_pointer);
+#endif
/* Use different framebuffer descriptor if incremental rendering was
* triggered while tiling */
@@ -871,13 +982,19 @@ GENX(csf_emit_fragment_job)(struct panfrost_batch *batch,
cs_load32_to(b, counter, cs_reg64(b, TILER_OOM_CTX_REG), 0);
cs_wait_slot(b, 0);
cs_if(b, MALI_CS_CONDITION_GREATER, counter) {
- cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
- GET_FBD(oom_ctx, LAST).gpu);
+ cs_move64_to(b, fbd_pointer, GET_FBD(oom_ctx, LAST).gpu);
+#if PAN_ARCH >= 14
+ cs_emit_layer_fragment_state(b, fbd_pointer);
+#endif
}
}
/* Run the fragment job and wait */
+#if PAN_ARCH >= 14
+ cs_run_fragment2(b, false, MALI_TILE_RENDER_ORDER_Z_ORDER);
+#else
cs_run_fragment(b, false, MALI_TILE_RENDER_ORDER_Z_ORDER);
+#endif
cs_wait_slot(b, 2);
/* Gather freed heap chunks and add them to the heap context free list
diff --git a/src/gallium/drivers/panfrost/pan_csf.h b/src/gallium/drivers/panfrost/pan_csf.h
index b7be8be2339..2ad51a4a33a 100644
--- a/src/gallium/drivers/panfrost/pan_csf.h
+++ b/src/gallium/drivers/panfrost/pan_csf.h
@@ -29,7 +29,8 @@ struct pan_csf_tiler_oom_ctx {
/* Alternative framebuffer descriptors for incremental rendering */
struct pan_ptr fbds[PAN_INCREMENTAL_RENDERING_PASS_COUNT];
- /* Bounding Box (Register 42 and 43) */
+ /* Bounding Box (Register MALI_FRAGMENT_SR_BBOX_MIN and
+ * MALI_FRAGMENT_SR_BBOX_MAX) */
uint32_t bbox_min;
uint32_t bbox_max;
diff --git a/src/gallium/drivers/panfrost/pan_jm.c b/src/gallium/drivers/panfrost/pan_jm.c
index 845c238853e..818846927fd 100644
--- a/src/gallium/drivers/panfrost/pan_jm.c
+++ b/src/gallium/drivers/panfrost/pan_jm.c
@@ -257,8 +257,8 @@ GENX(jm_emit_fbds)(struct panfrost_batch *batch, struct pan_fb_info *fb,
{
PAN_TRACE_FUNC(PAN_TRACE_GL_JM);
- batch->framebuffer.gpu |= GENX(pan_emit_fbd)(
- fb, 0, tls, &batch->tiler_ctx, batch->framebuffer.cpu);
+ batch->framebuffer.gpu |=
+ GENX(pan_emit_fbd)(fb, 0, tls, &batch->tiler_ctx, batch->framebuffer);
}
void
diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c
index 86d28d2de7a..ede056ba82f 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -1175,6 +1175,9 @@ panfrost_create_screen(int fd, const struct pipe_screen_config *config,
case 13:
panfrost_cmdstream_screen_init_v13(screen);
break;
+ case 14:
+ panfrost_cmdstream_screen_init_v14(screen);
+ break;
default:
debug_printf("panfrost: Unhandled architecture major %d", dev->arch);
panfrost_destroy_screen(&(screen->base));
diff --git a/src/gallium/drivers/panfrost/pan_screen.h b/src/gallium/drivers/panfrost/pan_screen.h
index 14eb7ea59fd..9e6b95d008d 100644
--- a/src/gallium/drivers/panfrost/pan_screen.h
+++ b/src/gallium/drivers/panfrost/pan_screen.h
@@ -155,6 +155,7 @@ void panfrost_cmdstream_screen_init_v9(struct panfrost_screen *screen);
void panfrost_cmdstream_screen_init_v10(struct panfrost_screen *screen);
void panfrost_cmdstream_screen_init_v12(struct panfrost_screen *screen);
void panfrost_cmdstream_screen_init_v13(struct panfrost_screen *screen);
+void panfrost_cmdstream_screen_init_v14(struct panfrost_screen *screen);
#define perf_debug(ctx, ...) \
do { \
diff --git a/src/panfrost/clc/pan_compile.c b/src/panfrost/clc/pan_compile.c
index b2e25e7c53b..3a34897c21b 100644
--- a/src/panfrost/clc/pan_compile.c
+++ b/src/panfrost/clc/pan_compile.c
@@ -275,7 +275,7 @@ main(int argc, const char **argv)
unsigned target_arch = atoi(target_arch_str);
- if (target_arch < 4 || target_arch > 13) {
+ if (target_arch < 4 || target_arch > 14) {
fprintf(stderr, "Unsupported target arch %d\n", target_arch);
return 1;
}
diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c
index 2f08cddc49e..989a36b7046 100644
--- a/src/panfrost/compiler/bifrost/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost/bifrost_compile.c
@@ -703,8 +703,10 @@ bi_emit_load_var_buf(bi_builder *b, nir_intrinsic_instr *intr)
assert(intr->intrinsic == nir_intrinsic_load_var_buf_pan ||
intr->intrinsic == nir_intrinsic_load_var_buf_flat_pan);
+ const unsigned arch = b->shader->arch;
+
/* These are only available on Valhall+ */
- assert(b->shader->arch >= 9);
+ assert(arch >= 9);
const bool flat = intr->intrinsic == nir_intrinsic_load_var_buf_flat_pan;
const nir_alu_type src_type = nir_intrinsic_src_type(intr);
@@ -757,19 +759,36 @@ bi_emit_load_var_buf(bi_builder *b, nir_intrinsic_instr *intr)
bool use_imm_form = false;
if (nir_src_is_const(intr->src[0])) {
imm_offset = nir_src_as_uint(intr->src[0]);
- assert(imm_offset < pan_ld_var_buf_off_size(b->shader->arch));
+ assert(imm_offset < pan_ld_var_buf_off_size(arch));
use_imm_form = true;
}
+ /* On v14+, flat source formats are removed from LD_VAR_BUF/LD_VAR_BUF_IMM,
+ * so flat buffer varyings must use the dedicated LD_VAR_BUF_FLAT*.
+ */
if (use_imm_form) {
- bi_ld_var_buf_imm_to(b, sz, dest, src0, regfmt, sample, source_format,
+ if (arch >= 14 && flat) {
+ bi_ld_var_buf_flat_imm_to(b, dest, regfmt, vecsize, imm_offset);
+ } else {
+ bi_ld_var_buf_imm_to(b, sz, dest, src0, regfmt, sample, source_format,
BI_UPDATE_STORE, vecsize, imm_offset);
+ }
} else {
bi_index offset = bi_src_index(&intr->src[0]);
- bi_ld_var_buf_to(b, sz, dest, src0, offset, regfmt, sample,
- source_format, BI_UPDATE_STORE, vecsize);
+ if (arch >= 14 && flat) {
+ bi_ld_var_buf_flat_to(b, dest, offset, regfmt, vecsize);
+ } else {
+ bi_ld_var_buf_to(b, sz, dest, src0, offset, regfmt, sample,
+ source_format, BI_UPDATE_STORE, vecsize);
+ }
}
+
+ /* LD_VAR_BUF_FLAT* only support register formats F16 and F32. */
+ assert(
+ arch < 14 || !flat ||
+ (regfmt == BI_REGISTER_FORMAT_F16 || regfmt == BI_REGISTER_FORMAT_F32));
+
bi_split_def(b, &intr->def);
}
diff --git a/src/panfrost/compiler/bifrost/valhall/ISA.xml b/src/panfrost/compiler/bifrost/valhall/ISA.xml
index 47ba6928e89..d25571ed34d 100644
--- a/src/panfrost/compiler/bifrost/valhall/ISA.xml
+++ b/src/panfrost/compiler/bifrost/valhall/ISA.xml
@@ -939,6 +939,32 @@
+
+
+
+
+ Fetches a given flat varying from hardware buffer
+
+
+
+
+
+
+
+
+
+
+
+
+ Fetches a given flat varying from hardware buffer
+
+
+
+
+
+
+
+
Interpolates a given varying from hardware buffer
diff --git a/src/panfrost/compiler/bifrost/valhall/test/test-packing.cpp b/src/panfrost/compiler/bifrost/valhall/test/test-packing.cpp
index 0b0a7654437..0ac71cc2f4f 100644
--- a/src/panfrost/compiler/bifrost/valhall/test/test-packing.cpp
+++ b/src/panfrost/compiler/bifrost/valhall/test/test-packing.cpp
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2021 Collabora, Ltd.
+ * Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
@@ -9,9 +10,9 @@
#include
-#define CASE(instr, expected) \
+#define CASE_ARCH(instr, arch, expected) \
do { \
- uint64_t _value = va_pack_instr(instr, 10); \
+ uint64_t _value = va_pack_instr(instr, arch); \
if (_value != expected) { \
fprintf(stderr, "Got %" PRIx64 ", expected %" PRIx64 "\n", _value, \
(uint64_t)expected); \
@@ -21,6 +22,8 @@
} \
} while (0)
+#define CASE(instr, expected) CASE_ARCH(instr, 10, expected)
+
class ValhallPacking : public testing::Test {
protected:
ValhallPacking()
@@ -278,11 +281,41 @@ TEST_F(ValhallPacking, LdVarBufImmF16)
BI_VECSIZE_V4, 0),
0x005d80843300003d);
- CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
- BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTROID,
- BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
- BI_VECSIZE_V4, 8),
- 0x005d80443308003d);
+ CASE_ARCH(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
+ BI_REGISTER_FORMAT_F16,
+ BI_SAMPLE_CENTROID, BI_SOURCE_FORMAT_F16,
+ BI_UPDATE_STORE, BI_VECSIZE_V4, 8),
+ 10, 0x005d80443308003d);
+
+ CASE_ARCH(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
+ BI_REGISTER_FORMAT_F16,
+ BI_SAMPLE_CENTROID, BI_SOURCE_FORMAT_F16,
+ BI_UPDATE_STORE, BI_VECSIZE_V4, 8),
+ 11, 0x005d80443300083d);
+}
+
+TEST_F(ValhallPacking, LdVarBufFlatImmFormat)
+{
+ CASE_ARCH(bi_ld_var_buf_flat_imm_to(b, bi_register(0),
+ BI_REGISTER_FORMAT_F32,
+ BI_VECSIZE_V4, 0x12),
+ 14, 0x0040800832001200);
+
+ CASE_ARCH(bi_ld_var_buf_flat_imm_to(b, bi_register(0),
+ BI_REGISTER_FORMAT_F16,
+ BI_VECSIZE_V4, 0x12),
+ 14, 0x0040800433001200);
+}
+
+TEST_F(ValhallPacking, LdVarBufFlat)
+{
+ CASE_ARCH(bi_ld_var_buf_flat_to(b, bi_register(0), bi_register(61),
+ BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4),
+ 14, 0x005f80083200003d);
+
+ CASE_ARCH(bi_ld_var_buf_flat_to(b, bi_register(0), bi_register(61),
+ BI_REGISTER_FORMAT_F16, BI_VECSIZE_V4),
+ 14, 0x005f80043300003d);
}
TEST_F(ValhallPacking, LeaBufImm)
diff --git a/src/panfrost/compiler/bifrost/valhall/va_gather_hsr_info.c b/src/panfrost/compiler/bifrost/valhall/va_gather_hsr_info.c
index 6fc81ebbb12..2d5ca159bd3 100644
--- a/src/panfrost/compiler/bifrost/valhall/va_gather_hsr_info.c
+++ b/src/panfrost/compiler/bifrost/valhall/va_gather_hsr_info.c
@@ -77,6 +77,8 @@ walk_bir_shader(bi_context *ctx, struct pan_shader_info *info)
if (instr->sample == BI_SAMPLE_CENTROID)
info->fs.hsr.centroid_interpolation = true;
FALLTHROUGH;
+ case BI_OPCODE_LD_VAR_BUF_FLAT:
+ case BI_OPCODE_LD_VAR_BUF_FLAT_IMM:
case BI_OPCODE_LD_VAR_FLAT:
case BI_OPCODE_LD_VAR_FLAT_IMM:
if (!found_atest)
diff --git a/src/panfrost/compiler/bifrost/valhall/va_pack.c b/src/panfrost/compiler/bifrost/valhall/va_pack.c
index 0790005e49a..d57a7119a37 100644
--- a/src/panfrost/compiler/bifrost/valhall/va_pack.c
+++ b/src/panfrost/compiler/bifrost/valhall/va_pack.c
@@ -568,6 +568,10 @@ va_pack_alu(const bi_instr *I, unsigned arch)
hex |= ((uint64_t)I->sample) << 38;
break;
+ case BI_OPCODE_LD_VAR_BUF_FLAT_IMM:
+ hex |= ((uint64_t)I->index) << 8;
+ break;
+
case BI_OPCODE_LD_ATTR_IMM:
hex |= ((uint64_t)I->table) << 16;
hex |= ((uint64_t)I->attribute_index) << 20;
diff --git a/src/panfrost/compiler/pan_compiler.c b/src/panfrost/compiler/pan_compiler.c
index ef384514061..9d3a7a79be0 100644
--- a/src/panfrost/compiler/pan_compiler.c
+++ b/src/panfrost/compiler/pan_compiler.c
@@ -52,6 +52,7 @@ pan_get_nir_shader_compiler_options(unsigned arch, bool merge_wg)
case 11:
case 12:
case 13:
+ case 14:
return merge_wg ? &bifrost_nir_options_v11_merge_wg :
&bifrost_nir_options_v11;
default:
diff --git a/src/panfrost/genxml/cs_builder.h b/src/panfrost/genxml/cs_builder.h
index a109f4d113b..ae0653a1f84 100644
--- a/src/panfrost/genxml/cs_builder.h
+++ b/src/panfrost/genxml/cs_builder.h
@@ -824,7 +824,11 @@ cs_instr_is_asynchronous(enum mali_cs_opcode opcode, uint16_t wait_mask)
case MALI_CS_OPCODE_STORE_MULTIPLE:
case MALI_CS_OPCODE_RUN_COMPUTE:
case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT:
+#if PAN_ARCH >= 14
+ case MALI_CS_OPCODE_RUN_FRAGMENT2:
+#else
case MALI_CS_OPCODE_RUN_FRAGMENT:
+#endif
case MALI_CS_OPCODE_RUN_FULLSCREEN:
#if PAN_ARCH >= 12
case MALI_CS_OPCODE_RUN_IDVS2:
@@ -1614,6 +1618,22 @@ cs_run_idvs(struct cs_builder *b, uint32_t flags_override, bool malloc_enable,
}
#endif
+#if PAN_ARCH >= 14
+static inline void
+cs_run_fragment2(struct cs_builder *b, bool enable_tem,
+ enum mali_tile_render_order tile_order)
+{
+ /* Staging regs */
+ cs_flush_loads(b);
+
+ b->req_resource_mask |= CS_FRAG_RES;
+
+ cs_emit(b, RUN_FRAGMENT2, I) {
+ I.enable_tem = enable_tem;
+ I.tile_order = tile_order;
+ }
+}
+#else
static inline void
cs_run_fragment(struct cs_builder *b, bool enable_tem,
enum mali_tile_render_order tile_order)
@@ -1628,6 +1648,7 @@ cs_run_fragment(struct cs_builder *b, bool enable_tem,
I.tile_order = tile_order;
}
}
+#endif
static inline void
cs_run_fullscreen(struct cs_builder *b, uint32_t flags_override,
@@ -2469,6 +2490,53 @@ cs_trace_preamble(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
(int16_t)(offsetof(struct cs_##__type##_trace, __field) - \
sizeof(struct cs_##__type##_trace))
+#if PAN_ARCH >= 14
+#define CS_RUN_FRAGMENT2_SR_COUNT 56
+#define CS_RUN_FRAGMENT2_SR_MASK BITFIELD64_RANGE(0, CS_RUN_FRAGMENT2_SR_COUNT)
+struct cs_run_fragment2_trace {
+ uint64_t ip;
+ uint32_t sr[CS_RUN_FRAGMENT2_SR_COUNT];
+} __attribute__((aligned(64)));
+
+static inline void
+cs_trace_run_fragment2(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
+ struct cs_index scratch_regs, bool enable_tem,
+ enum mali_tile_render_order tile_order)
+{
+ if (likely(!ctx->enabled)) {
+ cs_run_fragment2(b, enable_tem, tile_order);
+ return;
+ }
+
+ struct cs_index tracebuf_addr = cs_reg64(b, scratch_regs.reg);
+ struct cs_index data = cs_reg64(b, scratch_regs.reg + 2);
+
+ cs_trace_preamble(b, ctx, scratch_regs,
+ sizeof(struct cs_run_fragment2_trace));
+
+ /* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP
+ * won't point to the right instruction. */
+ cs_load_ip_to(b, data);
+ cs_run_fragment2(b, enable_tem, tile_order);
+ cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_fragment2, ip));
+
+ ASSERTED unsigned sr_count = 0;
+ unsigned sr_offset = cs_trace_field_offset(run_fragment2, sr);
+ for (unsigned i = 0; i < CS_RUN_FRAGMENT2_SR_COUNT; i += 16) {
+ unsigned mask = (CS_RUN_FRAGMENT2_SR_MASK >> i) & BITFIELD_MASK(16);
+ if (!mask)
+ continue;
+
+ cs_store(b, cs_reg_tuple(b, i, util_last_bit(mask)), tracebuf_addr, mask,
+ sr_offset);
+ sr_offset += util_bitcount(mask) * sizeof(uint32_t);
+ sr_count += util_bitcount(mask);
+ }
+ assert(sr_count == CS_RUN_FRAGMENT2_SR_COUNT);
+
+ cs_flush_stores(b);
+}
+#else
struct cs_run_fragment_trace {
uint64_t ip;
uint32_t sr[7];
@@ -2500,6 +2568,7 @@ cs_trace_run_fragment(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
cs_trace_field_offset(run_fragment, sr));
cs_flush_stores(b);
}
+#endif
#if PAN_ARCH >= 13
#define CS_RUN_FULLSCREEN_SR_MASK \
diff --git a/src/panfrost/genxml/decode.c b/src/panfrost/genxml/decode.c
index 38a2e696e4d..fc6068b5228 100644
--- a/src/panfrost/genxml/decode.c
+++ b/src/panfrost/genxml/decode.c
@@ -152,22 +152,22 @@ pandecode_rt(struct pandecode_context *ctx, unsigned index, uint64_t gpu_va)
}
-static void
-pandecode_rts(struct pandecode_context *ctx, uint64_t gpu_va,
- const struct MALI_FRAMEBUFFER_PARAMETERS *fb)
+void
+GENX(pandecode_rts)(struct pandecode_context *ctx, uint64_t gpu_va,
+ uint32_t render_target_count)
{
pandecode_log(ctx, "Color Render Targets @%" PRIx64 ":\n", gpu_va);
ctx->indent++;
- for (int i = 0; i < (fb->render_target_count); i++)
+ for (int i = 0; i < render_target_count; i++)
pandecode_rt(ctx, i, gpu_va);
ctx->indent--;
pandecode_log(ctx, "\n");
}
-static void
-pandecode_zs_crc_ext(struct pandecode_context *ctx, uint64_t gpu_va)
+void
+GENX(pandecode_zs_crc_ext)(struct pandecode_context *ctx, uint64_t gpu_va)
{
const struct mali_zs_crc_extension_packed *PANDECODE_PTR_VAR(
ctx, zs_crc_packed, (uint64_t)gpu_va);
@@ -223,22 +223,65 @@ pandecode_zs_crc_ext(struct pandecode_context *ctx, uint64_t gpu_va)
#if PAN_ARCH >= 6
-static void
-pandecode_sample_locations(struct pandecode_context *ctx, const void *fb)
+void
+GENX(pandecode_frame_shader_dcds)(struct pandecode_context *ctx,
+ uint64_t dcd_pointer, unsigned pre_frame_0,
+ unsigned pre_frame_1, unsigned post_frame,
+ unsigned job_type_param, uint64_t gpu_id)
{
- pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, params);
+ const unsigned dcd_size = pan_size(DRAW);
- const uint16_t *PANDECODE_PTR_VAR(ctx, samples, params.sample_locations);
+ if (pre_frame_0 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
+ const struct mali_draw_packed *PANDECODE_PTR_VAR(
+ ctx, dcd, dcd_pointer + (0 * dcd_size));
+ pan_unpack(dcd, DRAW, draw)
+ ;
+ pandecode_log(ctx, "Pre frame 0 @%" PRIx64 " (mode=%d):\n", dcd_pointer,
+ pre_frame_0);
+ ctx->indent++;
+ GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
+ ctx->indent--;
+ }
- pandecode_log(ctx, "Sample locations @%" PRIx64 ":\n",
- params.sample_locations);
+ if (pre_frame_1 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
+ const struct mali_draw_packed *PANDECODE_PTR_VAR(
+ ctx, dcd, dcd_pointer + (1 * dcd_size));
+ pan_unpack(dcd, DRAW, draw)
+ ;
+ pandecode_log(ctx, "Pre frame 1 @%" PRIx64 ":\n",
+ dcd_pointer + (1 * dcd_size));
+ ctx->indent++;
+ GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
+ ctx->indent--;
+ }
+
+ if (post_frame != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
+ const struct mali_draw_packed *PANDECODE_PTR_VAR(
+ ctx, dcd, dcd_pointer + (2 * dcd_size));
+ pan_unpack(dcd, DRAW, draw)
+ ;
+ pandecode_log(ctx, "Post frame:\n");
+ ctx->indent++;
+ GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
+ ctx->indent--;
+ }
+}
+
+void
+GENX(pandecode_sample_locations)(struct pandecode_context *ctx,
+ uint64_t sample_locations)
+{
+ const uint16_t *PANDECODE_PTR_VAR(ctx, samples, sample_locations);
+
+ pandecode_log(ctx, "Sample locations @%" PRIx64 ":\n", sample_locations);
for (int i = 0; i < 33; i++) {
pandecode_log(ctx, " (%d, %d),\n", samples[2 * i] - 128,
samples[2 * i + 1] - 128);
}
}
-#endif
+#endif /* PAN_ARCH >= 6 */
+#if PAN_ARCH < 14
struct pandecode_fbd
GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
bool is_fragment, uint64_t gpu_id)
@@ -248,46 +291,17 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
DUMP_UNPACKED(ctx, FRAMEBUFFER_PARAMETERS, params, "Parameters:\n");
#if PAN_ARCH >= 6
- pandecode_sample_locations(ctx, fb);
+ GENX(pandecode_sample_locations)(ctx, params.sample_locations);
- unsigned dcd_size = pan_size(DRAW);
unsigned job_type_param = 0;
#if PAN_ARCH <= 9
job_type_param = MALI_JOB_TYPE_FRAGMENT;
#endif
- if (params.pre_frame_0 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
- const struct mali_draw_packed *PANDECODE_PTR_VAR(
- ctx, dcd, params.frame_shader_dcds + (0 * dcd_size));
- pan_unpack(dcd, DRAW, draw);
- pandecode_log(ctx, "Pre frame 0 @%" PRIx64 " (mode=%d):\n",
- params.frame_shader_dcds, params.pre_frame_0);
- ctx->indent++;
- GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
- ctx->indent--;
- }
-
- if (params.pre_frame_1 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
- const struct mali_draw_packed *PANDECODE_PTR_VAR(
- ctx, dcd, params.frame_shader_dcds + (1 * dcd_size));
- pan_unpack(dcd, DRAW, draw);
- pandecode_log(ctx, "Pre frame 1 @%" PRIx64 ":\n",
- params.frame_shader_dcds + (1 * dcd_size));
- ctx->indent++;
- GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
- ctx->indent--;
- }
-
- if (params.post_frame != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
- const struct mali_draw_packed *PANDECODE_PTR_VAR(
- ctx, dcd, params.frame_shader_dcds + (2 * dcd_size));
- pan_unpack(dcd, DRAW, draw);
- pandecode_log(ctx, "Post frame:\n");
- ctx->indent++;
- GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
- ctx->indent--;
- }
+ GENX(pandecode_frame_shader_dcds)(ctx, params.frame_shader_dcds,
+ params.pre_frame_0, params.pre_frame_1,
+ params.post_frame, job_type_param, gpu_id);
#else
DUMP_SECTION(ctx, FRAMEBUFFER, LOCAL_STORAGE, fb, "Local Storage:\n");
@@ -312,13 +326,13 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
gpu_va += pan_size(FRAMEBUFFER);
if (params.has_zs_crc_extension) {
- pandecode_zs_crc_ext(ctx, gpu_va);
+ GENX(pandecode_zs_crc_ext)(ctx, gpu_va);
gpu_va += pan_size(ZS_CRC_EXTENSION);
}
if (is_fragment)
- pandecode_rts(ctx, gpu_va, ¶ms);
+ GENX(pandecode_rts)(ctx, gpu_va, params.render_target_count);
return (struct pandecode_fbd){
.rt_count = params.render_target_count,
@@ -336,6 +350,7 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
};
#endif
}
+#endif /* PAN_ARCH < 14 */
#if PAN_ARCH >= 5
uint64_t
diff --git a/src/panfrost/genxml/decode.h b/src/panfrost/genxml/decode.h
index f7d83ca5525..47fe28f798f 100644
--- a/src/panfrost/genxml/decode.h
+++ b/src/panfrost/genxml/decode.h
@@ -132,6 +132,13 @@ void pandecode_cs_binary_v13(struct pandecode_context *ctx, uint64_t bin,
void pandecode_cs_trace_v13(struct pandecode_context *ctx, uint64_t trace,
uint32_t trace_size, uint64_t gpu_id);
+void pandecode_interpret_cs_v14(struct pandecode_context *ctx, uint64_t queue,
+ uint32_t size, uint64_t gpu_id, uint32_t *regs);
+void pandecode_cs_binary_v14(struct pandecode_context *ctx, uint64_t bin,
+ uint32_t bin_size);
+void pandecode_cs_trace_v14(struct pandecode_context *ctx, uint64_t trace,
+ uint32_t trace_size, uint64_t gpu_id);
+
/* Logging infrastructure */
static void
pandecode_make_indent(struct pandecode_context *ctx)
@@ -275,4 +282,22 @@ void GENX(pandecode_depth_stencil)(struct pandecode_context *ctx,
#endif
+#if PAN_ARCH >= 6
+void GENX(pandecode_sample_locations)(struct pandecode_context *ctx,
+ uint64_t sample_locations);
+
+void
+ GENX(pandecode_frame_shader_dcds)(struct pandecode_context *ctx,
+ uint64_t dcd_pointer, unsigned pre_frame_0,
+ unsigned pre_frame_1, unsigned post_frame,
+ unsigned job_type_param, uint64_t gpu_id);
+#endif
+
+#if PAN_ARCH >= 5
+void GENX(pandecode_rts)(struct pandecode_context *ctx, uint64_t gpu_va,
+ uint32_t render_target_count);
+
+void GENX(pandecode_zs_crc_ext)(struct pandecode_context *ctx, uint64_t gpu_va);
+#endif
+
#endif /* __MMAP_TRACE_H__ */
diff --git a/src/panfrost/genxml/decode_common.c b/src/panfrost/genxml/decode_common.c
index 208d28a8cb5..399fec9f335 100644
--- a/src/panfrost/genxml/decode_common.c
+++ b/src/panfrost/genxml/decode_common.c
@@ -423,6 +423,9 @@ pandecode_interpret_cs(struct pandecode_context *ctx, uint64_t queue_gpu_va,
case 13:
pandecode_interpret_cs_v13(ctx, queue_gpu_va, size, gpu_id, regs);
break;
+ case 14:
+ pandecode_interpret_cs_v14(ctx, queue_gpu_va, size, gpu_id, regs);
+ break;
default:
UNREACHABLE("Unsupported architecture");
}
@@ -446,6 +449,9 @@ pandecode_cs_binary(struct pandecode_context *ctx, uint64_t bin_gpu_va,
case 13:
pandecode_cs_binary_v13(ctx, bin_gpu_va, size);
break;
+ case 14:
+ pandecode_cs_binary_v14(ctx, bin_gpu_va, size);
+ break;
default:
UNREACHABLE("Unsupported architecture");
}
@@ -469,6 +475,9 @@ pandecode_cs_trace(struct pandecode_context *ctx, uint64_t trace_gpu_va,
case 13:
pandecode_cs_trace_v13(ctx, trace_gpu_va, size, gpu_id);
break;
+ case 14:
+ pandecode_cs_trace_v14(ctx, trace_gpu_va, size, gpu_id);
+ break;
default:
UNREACHABLE("Unsupported architecture");
}
diff --git a/src/panfrost/genxml/decode_csf.c b/src/panfrost/genxml/decode_csf.c
index ca3b4807950..b196c98943a 100644
--- a/src/panfrost/genxml/decode_csf.c
+++ b/src/panfrost/genxml/decode_csf.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2022-2023 Collabora, Ltd.
+ * Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
@@ -89,6 +90,12 @@ static const char *defer_modes_str[] = {
#define defer_mode_str(I) ""
#endif
+#if PAN_ARCH <= 13
+#define assert_no_progress_inc(I) assert(!I.progress_increment)
+#else
+#define assert_no_progress_inc(I) do {} while (0)
+#endif
+
static void
print_cs_instr(FILE *fp, const uint64_t *instr)
{
@@ -117,28 +124,27 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
case MALI_CS_OPCODE_WAIT: {
cs_unpack(instr, CS_WAIT, I);
- fprintf(fp, "WAIT%s #%x", I.progress_increment ? ".progress_inc" : "",
- I.wait_mask);
+ assert_no_progress_inc(I);
+ fprintf(fp, "WAIT #%x", I.wait_mask);
break;
}
case MALI_CS_OPCODE_RUN_COMPUTE: {
const char *axes[4] = {"x_axis", "y_axis", "z_axis"};
cs_unpack(instr, CS_RUN_COMPUTE, I);
+ assert_no_progress_inc(I);
/* Print the instruction. Ignore the selects and the flags override
* since we'll print them implicitly later.
*/
#if PAN_ARCH >= 12
- fprintf(fp, "RUN_COMPUTE%s.%s.srt%d.spd%d.tsd%d.fau%d #%u, #%u",
- I.progress_increment ? ".progress_inc" : "", axes[I.task_axis],
- I.srt_select, I.spd_select, I.tsd_select, I.fau_select,
- I.task_increment, I.ep_limit);
+ fprintf(fp, "RUN_COMPUTE.%s.srt%d.spd%d.tsd%d.fau%d #%u, #%u",
+ axes[I.task_axis], I.srt_select, I.spd_select, I.tsd_select,
+ I.fau_select, I.task_increment, I.ep_limit);
#else
- fprintf(fp, "RUN_COMPUTE%s.%s.srt%d.spd%d.tsd%d.fau%d #%u",
- I.progress_increment ? ".progress_inc" : "", axes[I.task_axis],
- I.srt_select, I.spd_select, I.tsd_select, I.fau_select,
- I.task_increment);
+ fprintf(fp, "RUN_COMPUTE.%s.srt%d.spd%d.tsd%d.fau%d #%u",
+ axes[I.task_axis], I.srt_select, I.spd_select, I.tsd_select,
+ I.fau_select, I.task_increment);
#endif
break;
}
@@ -146,8 +152,8 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
#if PAN_ARCH == 10
case MALI_CS_OPCODE_RUN_TILING: {
cs_unpack(instr, CS_RUN_TILING, I);
- fprintf(fp, "RUN_TILING%s.srt%d.spd%d.tsd%d.fau%d",
- I.progress_increment ? ".progress_inc" : "", I.srt_select,
+ assert_no_progress_inc(I);
+ fprintf(fp, "RUN_TILING.srt%d.spd%d.tsd%d.fau%d", I.srt_select,
I.spd_select, I.tsd_select, I.fau_select);
break;
}
@@ -156,10 +162,10 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
#if PAN_ARCH < 12
case MALI_CS_OPCODE_RUN_IDVS: {
cs_unpack(instr, CS_RUN_IDVS, I);
+ assert_no_progress_inc(I);
fprintf(
fp,
- "RUN_IDVS%s%s%s.varying_srt%d.varying_fau%d.varying_tsd%d.frag_srt%d.frag_tsd%d r%u, #%" PRIx64,
- I.progress_increment ? ".progress_inc" : "",
+ "RUN_IDVS%s%s.varying_srt%d.varying_fau%d.varying_tsd%d.frag_srt%d.frag_tsd%d r%u, #%" PRIx64,
I.malloc_enable ? "" : ".no_malloc",
I.draw_id_register_enable ? ".draw_id_enable" : "",
I.varying_srt_select, I.varying_fau_select, I.varying_tsd_select,
@@ -170,6 +176,7 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
#else
case MALI_CS_OPCODE_RUN_IDVS2: {
cs_unpack(instr, CS_RUN_IDVS2, I);
+ assert_no_progress_inc(I);
const char *vertex_shading_str[] = {
".early",
@@ -178,8 +185,7 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
".INVALID",
};
- fprintf(fp, "RUN_IDVS2%s%s%s%s r%u, #%" PRIx64,
- I.progress_increment ? ".progress_inc" : "",
+ fprintf(fp, "RUN_IDVS2%s%s%s r%u, #%" PRIx64,
I.malloc_enable ? "" : ".no_malloc",
I.draw_id_register_enable ? ".draw_id_enable" : "",
vertex_shading_str[I.vertex_shading_mode], I.draw_id,
@@ -317,32 +323,37 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
case MALI_CS_OPCODE_SHARED_SB_INC: {
cs_unpack(instr, CS_SHARED_SB_INC, I);
-
- const char *progress_increment_name[] = {
- ".no_increment",
- ".increment",
- };
-
- fprintf(fp, "SHARED_SB_INC%s%s #%u, #%u",
- progress_increment_name[I.progress_increment],
- defer_mode_str(I), I.sb_mask, I.shared_entry);
+ assert_no_progress_inc(I);
+ fprintf(fp, "SHARED_SB_INC%s #%u, #%u", defer_mode_str(I), I.sb_mask,
+ I.shared_entry);
break;
}
case MALI_CS_OPCODE_SHARED_SB_DEC: {
cs_unpack(instr, CS_SHARED_SB_DEC, I);
-
- const char *progress_increment_name[] = {
- ".no_increment",
- ".increment",
- };
-
- fprintf(fp, "SHARED_SB_DEC%s #%u",
- progress_increment_name[I.progress_increment], I.shared_entry);
+ assert_no_progress_inc(I);
+ fprintf(fp, "SHARED_SB_DEC #%u", I.shared_entry);
break;
}
#endif
+#if PAN_ARCH >= 14
+ case MALI_CS_OPCODE_RUN_FRAGMENT2: {
+ static const char *tile_order[] = {
+ "zorder", "horizontal", "vertical", "unknown",
+ "unknown", "rev_horizontal", "rev_vertical", "unknown",
+ "unknown", "unknown", "unknown", "unknown",
+ "unknown", "unknown", "unknown", "unknown",
+ };
+
+ cs_unpack(instr, CS_RUN_FRAGMENT2, I);
+
+ fprintf(fp, "RUN_FRAGMENT2%s.tile_order=%s",
+ I.enable_tem ? ".tile_enable_map_enable" : "",
+ tile_order[I.tile_order]);
+ break;
+ }
+#else
case MALI_CS_OPCODE_RUN_FRAGMENT: {
static const char *tile_order[] = {
"zorder", "horizontal", "vertical", "unknown",
@@ -350,27 +361,27 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
"unknown", "unknown", "unknown", "unknown",
"unknown", "unknown", "unknown", "unknown",
};
- cs_unpack(instr, CS_RUN_FRAGMENT, I);
- fprintf(fp, "RUN_FRAGMENT%s%s.tile_order=%s",
- I.progress_increment ? ".progress_inc" : "",
+ cs_unpack(instr, CS_RUN_FRAGMENT, I);
+ assert_no_progress_inc(I);
+ fprintf(fp, "RUN_FRAGMENT%s.tile_order=%s",
I.enable_tem ? ".tile_enable_map_enable" : "",
tile_order[I.tile_order]);
break;
}
+#endif
case MALI_CS_OPCODE_RUN_FULLSCREEN: {
cs_unpack(instr, CS_RUN_FULLSCREEN, I);
- fprintf(fp, "RUN_FULLSCREEN%s r%u, #%" PRIx64,
- I.progress_increment ? ".progress_inc" : "", I.dcd,
- I.flags_override);
+ assert_no_progress_inc(I);
+ fprintf(fp, "RUN_FULLSCREEN r%u, #%" PRIx64, I.dcd, I.flags_override);
break;
}
case MALI_CS_OPCODE_FINISH_TILING: {
cs_unpack(instr, CS_FINISH_TILING, I);
- fprintf(fp, "FINISH_TILING%s",
- I.progress_increment ? ".progress_inc" : "");
+ assert_no_progress_inc(I);
+ fprintf(fp, "FINISH_TILING");
break;
}
@@ -443,12 +454,6 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
break;
}
- case MALI_CS_OPCODE_PROGRESS_WAIT: {
- cs_unpack(instr, CS_PROGRESS_WAIT, I);
- fprintf(fp, "PROGRESS_WAIT d%u, #%u", I.source, I.queue);
- break;
- }
-
case MALI_CS_OPCODE_SET_EXCEPTION_HANDLER: {
cs_unpack(instr, CS_SET_EXCEPTION_HANDLER, I);
fprintf(fp, "SET_EXCEPTION_HANDLER d%u, r%u", I.address, I.length);
@@ -547,29 +552,17 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
break;
}
- case MALI_CS_OPCODE_PROGRESS_STORE: {
- cs_unpack(instr, CS_PROGRESS_STORE, I);
- fprintf(fp, "PROGRESS_STORE d%u", I.source);
- break;
- }
-
- case MALI_CS_OPCODE_PROGRESS_LOAD: {
- cs_unpack(instr, CS_PROGRESS_LOAD, I);
- fprintf(fp, "PROGRESS_LOAD d%u", I.destination);
- break;
- }
-
case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT: {
cs_unpack(instr, CS_RUN_COMPUTE_INDIRECT, I);
+ assert_no_progress_inc(I);
#if PAN_ARCH >= 12
- fprintf(fp, "RUN_COMPUTE_INDIRECT%s.srt%d.spd%d.tsd%d.fau%d #%u, #%u",
- I.progress_increment ? ".progress_inc" : "", I.srt_select,
- I.spd_select, I.tsd_select, I.fau_select, I.workgroups_per_task,
- I.ep_limit);
+ fprintf(fp, "RUN_COMPUTE_INDIRECT.srt%d.spd%d.tsd%d.fau%d #%u, #%u",
+ I.srt_select, I.spd_select, I.tsd_select, I.fau_select,
+ I.workgroups_per_task, I.ep_limit);
#else
- fprintf(fp, "RUN_COMPUTE_INDIRECT%s.srt%d.spd%d.tsd%d.fau%d #%u",
- I.progress_increment ? ".progress_inc" : "", I.srt_select,
- I.spd_select, I.tsd_select, I.fau_select, I.workgroups_per_task);
+ fprintf(fp, "RUN_COMPUTE_INDIRECT.srt%d.spd%d.tsd%d.fau%d #%u",
+ I.srt_select, I.spd_select, I.tsd_select, I.fau_select,
+ I.workgroups_per_task);
#endif
break;
@@ -1097,6 +1090,99 @@ pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp,
}
#endif
+#if PAN_ARCH >= 14
+static void
+pandecode_run_fragment2(struct pandecode_context *ctx, FILE *fp,
+ struct queue_ctx *qctx, struct MALI_CS_RUN_FRAGMENT2 *I)
+{
+ if (qctx->in_exception_handler)
+ return;
+
+ ctx->indent++;
+
+ pandecode_log(ctx, "Iter trace ID0: %" PRIu32 "\n",
+ cs_get_u32(qctx, MALI_FRAGMENT_SR_ITER_TRACE_ID0));
+ pandecode_log(ctx, "Iter trace ID1: %" PRIu32 "\n",
+ cs_get_u32(qctx, MALI_FRAGMENT_SR_ITER_TRACE_ID1));
+ pandecode_log(ctx, "TEM pointer: %" PRIx64 "\n",
+ cs_get_u64(qctx, MALI_FRAGMENT_SR_TEM_POINTER));
+ pandecode_log(ctx, "TEM row stride: %" PRIu32 "\n",
+ cs_get_u32(qctx, MALI_FRAGMENT_SR_TEM_ROW_STRIDE));
+
+ for (unsigned i = 0; i < 11; ++i) {
+ const unsigned reg = MALI_FRAGMENT_SR_IRD_BUFFER_POINTER_0 + (i * 2);
+ pandecode_log(ctx, "IRD buffer pointer %u: %" PRIx64 "\n", i,
+ cs_get_u64(qctx, reg));
+ }
+
+ DUMP_CL(ctx, FRAGMENT_FLAGS_3, &qctx->regs[MALI_FRAGMENT_SR_FLAGS_3],
+ "Flags 3:\n");
+ DUMP_CL(ctx, FRAGMENT_BOUNDING_BOX, &qctx->regs[MALI_FRAGMENT_SR_BBOX_MIN],
+ "Bounding Box:\n");
+ DUMP_CL(ctx, FRAME_SIZE, &qctx->regs[MALI_FRAGMENT_SR_FRAME_SIZE],
+ "Frame size:\n");
+
+ pan_unpack((const struct mali_fragment_flags_0_packed *)&qctx
+ ->regs[MALI_FRAGMENT_SR_FLAGS_0],
+ FRAGMENT_FLAGS_0, flags0_unpacked);
+ DUMP_UNPACKED(ctx, FRAGMENT_FLAGS_0, flags0_unpacked, "Flags 0:\n");
+
+ pan_unpack((const struct mali_fragment_flags_1_packed *)&qctx
+ ->regs[MALI_FRAGMENT_SR_FLAGS_1],
+ FRAGMENT_FLAGS_1, flags1_unpacked);
+ DUMP_UNPACKED(ctx, FRAGMENT_FLAGS_1, flags1_unpacked, "Flags 1:\n");
+
+ DUMP_CL(ctx, FRAGMENT_FLAGS_2, &qctx->regs[MALI_FRAGMENT_SR_FLAGS_2],
+ "Flags 2:\n");
+ pandecode_log(ctx, "Z clear: %f\n",
+ uif(cs_get_u32(qctx, MALI_FRAGMENT_SR_Z_CLEAR)));
+
+ const uint64_t tiler_pointer =
+ cs_get_u64(qctx, MALI_FRAGMENT_SR_TILER_DESCRIPTOR_POINTER);
+ pandecode_log(ctx, "Tiler descriptor pointer: 0x%" PRIx64 "\n",
+ tiler_pointer);
+
+ const uint64_t rtd_pointer = cs_get_u64(qctx, MALI_FRAGMENT_SR_RTD_POINTER);
+ pandecode_log(ctx, "RTD pointer: 0x%" PRIx64 "\n", rtd_pointer);
+
+ const uint64_t dbd_pointer = cs_get_u64(qctx, MALI_FRAGMENT_SR_DBD_POINTER);
+ pandecode_log(ctx, "DBD pointer: 0x%" PRIx64 "\n", dbd_pointer);
+
+ pandecode_log(ctx, "Frame argument: %" PRIx64 "\n",
+ cs_get_u64(qctx, MALI_FRAGMENT_SR_FRAME_ARG));
+
+ const uint64_t sample_locations =
+ cs_get_u64(qctx, MALI_FRAGMENT_SR_SAMPLE_POSITION_ARRAY_POINTER);
+ pandecode_log(ctx, "Sample locations: 0x%" PRIx64 "\n", sample_locations);
+
+ const uint64_t dcd_pointer =
+ cs_get_u64(qctx, MALI_FRAGMENT_SR_FRAME_SHADER_DCD_POINTER);
+ pandecode_log(ctx, "Frame shader DCD pointer: 0x%" PRIx64 "\n", dcd_pointer);
+
+ DUMP_CL(ctx, VRS_IMAGE, &qctx->regs[MALI_FRAGMENT_SR_VRS_IMAGE],
+ "VRS image:\n");
+
+ GENX(pandecode_sample_locations)(ctx, sample_locations);
+
+ const unsigned job_type_param = 0;
+ GENX(pandecode_frame_shader_dcds)(ctx, dcd_pointer,
+ flags0_unpacked.pre_frame_0,
+ flags0_unpacked.pre_frame_1,
+ flags0_unpacked.post_frame,
+ job_type_param, qctx->gpu_id);
+
+ if (tiler_pointer)
+ GENX(pandecode_tiler)(ctx, tiler_pointer);
+
+ if (dbd_pointer)
+ GENX(pandecode_zs_crc_ext)(ctx, dbd_pointer);
+
+ if (rtd_pointer)
+ GENX(pandecode_rts)(ctx, rtd_pointer, flags1_unpacked.render_target_count);
+
+ ctx->indent--;
+}
+#else
static void
pandecode_run_fragment(struct pandecode_context *ctx, FILE *fp,
struct queue_ctx *qctx, struct MALI_CS_RUN_FRAGMENT *I)
@@ -1115,6 +1201,7 @@ pandecode_run_fragment(struct pandecode_context *ctx, FILE *fp,
ctx->indent--;
}
+#endif /* PAN_ARCH >= 14 */
static void
pandecode_run_fullscreen(struct pandecode_context *ctx, FILE *fp,
@@ -1261,11 +1348,19 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx)
}
#endif
+#if PAN_ARCH >= 14
+ case MALI_CS_OPCODE_RUN_FRAGMENT2: {
+ cs_unpack(bytes, CS_RUN_FRAGMENT2, I);
+ pandecode_run_fragment2(ctx, fp, qctx, &I);
+ break;
+ }
+#else
case MALI_CS_OPCODE_RUN_FRAGMENT: {
cs_unpack(bytes, CS_RUN_FRAGMENT, I);
pandecode_run_fragment(ctx, fp, qctx, &I);
break;
}
+#endif
case MALI_CS_OPCODE_RUN_FULLSCREEN: {
cs_unpack(bytes, CS_RUN_FULLSCREEN, I);
@@ -2192,18 +2287,6 @@ collect_indirect_branch_targets_recurse(struct cs_code_cfg *cfg,
break;
}
- case MALI_CS_OPCODE_PROGRESS_LOAD: {
- cs_unpack(instr, CS_PROGRESS_LOAD, I);
- for (unsigned i = 0; i < 16; i++) {
- if (BITSET_TEST(track_map, I.destination) ||
- BITSET_TEST(track_map, I.destination + 1)) {
- ibranch->has_unknown_targets = true;
- return;
- }
- }
- break;
- }
-
default:
break;
}
@@ -2430,7 +2513,12 @@ print_cs_binary(struct pandecode_context *ctx, uint64_t bin,
#else
case MALI_CS_OPCODE_RUN_IDVS:
#endif
+
+#if PAN_ARCH >= 14
+ case MALI_CS_OPCODE_RUN_FRAGMENT2:
+#else
case MALI_CS_OPCODE_RUN_FRAGMENT:
+#endif
case MALI_CS_OPCODE_RUN_FULLSCREEN:
case MALI_CS_OPCODE_RUN_COMPUTE:
case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT:
@@ -2539,6 +2627,19 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace,
}
#endif
+#if PAN_ARCH >= 14
+ case MALI_CS_OPCODE_RUN_FRAGMENT2: {
+ struct cs_run_fragment2_trace *frag_trace = trace_data;
+
+ assert(trace_size >= sizeof(*frag_trace));
+ cs_unpack(instr, CS_RUN_FRAGMENT2, I);
+ memcpy(®s[0], frag_trace->sr, sizeof(frag_trace->sr));
+ pandecode_run_fragment2(ctx, ctx->dump_stream, &qctx, &I);
+ trace_data = frag_trace + 1;
+ trace_size -= sizeof(*frag_trace);
+ break;
+ }
+#else
case MALI_CS_OPCODE_RUN_FRAGMENT: {
struct cs_run_fragment_trace *frag_trace = trace_data;
@@ -2550,6 +2651,7 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace,
trace_size -= sizeof(*frag_trace);
break;
}
+#endif
case MALI_CS_OPCODE_RUN_FULLSCREEN: {
struct cs_run_fullscreen_trace *fs_trace = trace_data;
diff --git a/src/panfrost/genxml/gen_macros.h b/src/panfrost/genxml/gen_macros.h
index b9e856f8533..c1e8ab1fbae 100644
--- a/src/panfrost/genxml/gen_macros.h
+++ b/src/panfrost/genxml/gen_macros.h
@@ -61,6 +61,9 @@
#elif (PAN_ARCH == 13)
#define GENX(X) X##_v13
#include "genxml/v13_pack.h"
+#elif (PAN_ARCH == 14)
+#define GENX(X) X##_v14
+#include "genxml/v14_pack.h"
#else
#error "Need to add suffixing macro for this architecture"
#endif
diff --git a/src/panfrost/genxml/meson.build b/src/panfrost/genxml/meson.build
index 3712b84822d..ee4b4adea3f 100644
--- a/src/panfrost/genxml/meson.build
+++ b/src/panfrost/genxml/meson.build
@@ -3,7 +3,7 @@
# SPDX-License-Identifier: MIT
pan_packers = []
-foreach packer : ['common', 'v4', 'v5', 'v6', 'v7', 'v9', 'v10', 'v12', 'v13']
+foreach packer : ['common', 'v4', 'v5', 'v6', 'v7', 'v9', 'v10', 'v12', 'v13', 'v14']
pan_packers += custom_target(
packer + '_pack.h',
input : ['gen_pack.py', packer + '.xml'],
@@ -20,7 +20,7 @@ idep_pan_packers = declare_dependency(
libpanfrost_decode_per_arch = []
-foreach ver : ['4', '5', '6', '7', '9', '10', '12', '13']
+foreach ver : ['4', '5', '6', '7', '9', '10', '12', '13', '14']
libpanfrost_decode_per_arch += static_library(
'pandecode-arch-v' + ver,
['decode.c', 'decode_jm.c', 'decode_csf.c', pan_packers],
diff --git a/src/panfrost/genxml/v10.xml b/src/panfrost/genxml/v10.xml
index 2fd4bb86637..95204c4a496 100644
--- a/src/panfrost/genxml/v10.xml
+++ b/src/panfrost/genxml/v10.xml
@@ -1,5 +1,6 @@
@@ -84,6 +85,7 @@
+
@@ -132,6 +134,7 @@
+
@@ -1163,6 +1166,13 @@
+
+
+
+
+
+
diff --git a/src/panfrost/genxml/v12.xml b/src/panfrost/genxml/v12.xml
index 0d651f01b0d..e3716030601 100644
--- a/src/panfrost/genxml/v12.xml
+++ b/src/panfrost/genxml/v12.xml
@@ -1,5 +1,6 @@
@@ -84,6 +85,7 @@
+
@@ -132,6 +134,7 @@
+
@@ -1426,6 +1429,9 @@
+
+
+
diff --git a/src/panfrost/genxml/v13.xml b/src/panfrost/genxml/v13.xml
index c644d2bd49c..30285e4c351 100644
--- a/src/panfrost/genxml/v13.xml
+++ b/src/panfrost/genxml/v13.xml
@@ -1,5 +1,6 @@
@@ -84,6 +85,7 @@
+
@@ -132,6 +134,7 @@
+
@@ -1728,6 +1731,9 @@
+
+
+
diff --git a/src/panfrost/genxml/v14.xml b/src/panfrost/genxml/v14.xml
new file mode 100644
index 00000000000..30768156967
--- /dev/null
+++ b/src/panfrost/genxml/v14.xml
@@ -0,0 +1,2755 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/panfrost/genxml/v9.xml b/src/panfrost/genxml/v9.xml
index d5bc4c1e110..3935d4dea99 100644
--- a/src/panfrost/genxml/v9.xml
+++ b/src/panfrost/genxml/v9.xml
@@ -1,5 +1,6 @@
@@ -103,6 +104,7 @@
+
diff --git a/src/panfrost/lib/meson.build b/src/panfrost/lib/meson.build
index 8c5b3d5537d..a4572db619c 100644
--- a/src/panfrost/lib/meson.build
+++ b/src/panfrost/lib/meson.build
@@ -4,7 +4,7 @@
subdir('kmod')
-pixel_format_versions = ['5', '6', '7', '9', '10', '12', '13']
+pixel_format_versions = ['5', '6', '7', '9', '10', '12', '13', '14']
libpanfrost_pixel_format = []
deps_for_libpanfrost = [dep_libdrm, idep_pan_packers, idep_mesautil, libpanfrost_model_dep]
@@ -22,7 +22,7 @@ endforeach
libpanfrost_per_arch = []
-foreach ver : ['4', '5', '6', '7', '9', '10', '12', '13']
+foreach ver : ['4', '5', '6', '7', '9', '10', '12', '13', '14']
libpanfrost_per_arch += static_library(
'pan-arch-v' + ver,
[
diff --git a/src/panfrost/lib/pan_afbc.h b/src/panfrost/lib/pan_afbc.h
index 035b77011b5..f0328a0ba44 100644
--- a/src/panfrost/lib/pan_afbc.h
+++ b/src/panfrost/lib/pan_afbc.h
@@ -3,6 +3,7 @@
* Copyright (C) 2014 Broadcom
* Copyright (C) 2018-2019 Alyssa Rosenzweig
* Copyright (C) 2019-2020 Collabora, Ltd.
+ * Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
@@ -711,6 +712,32 @@ pan_afbc_compression_mode(enum pan_afbc_mode mode)
case PAN_AFBC_MODE_R16G16B16A16:
return MALI_AFBC_COMPRESSION_MODE_R16G16B16A16;
#endif
+#if PAN_ARCH >= 14
+ case PAN_AFBC_MODE_YUV420_6C8:
+ return MALI_AFBC_COMPRESSION_MODE_Y8U8V8_420;
+ case PAN_AFBC_MODE_YUV420_2C8:
+ return MALI_AFBC_COMPRESSION_MODE_R8G8;
+ case PAN_AFBC_MODE_YUV420_1C8:
+ return MALI_AFBC_COMPRESSION_MODE_R8;
+ case PAN_AFBC_MODE_YUV420_6C10:
+ return MALI_AFBC_COMPRESSION_MODE_Y10U10V10_420;
+ case PAN_AFBC_MODE_YUV420_2C10:
+ return MALI_AFBC_COMPRESSION_MODE_R10G10;
+ case PAN_AFBC_MODE_YUV420_1C10:
+ return MALI_AFBC_COMPRESSION_MODE_R10;
+ case PAN_AFBC_MODE_YUV422_4C8:
+ return MALI_AFBC_COMPRESSION_MODE_Y8U8Y8V8_422;
+ case PAN_AFBC_MODE_YUV422_2C8:
+ return MALI_AFBC_COMPRESSION_MODE_R8G8;
+ case PAN_AFBC_MODE_YUV422_1C8:
+ return MALI_AFBC_COMPRESSION_MODE_R8;
+ case PAN_AFBC_MODE_YUV422_4C10:
+ return MALI_AFBC_COMPRESSION_MODE_Y10U10Y10V10_422;
+ case PAN_AFBC_MODE_YUV422_2C10:
+ return MALI_AFBC_COMPRESSION_MODE_R10G10;
+ case PAN_AFBC_MODE_YUV422_1C10:
+ return MALI_AFBC_COMPRESSION_MODE_R10;
+#else
case PAN_AFBC_MODE_YUV420_6C8:
return MALI_AFBC_COMPRESSION_MODE_YUV420_6C8;
case PAN_AFBC_MODE_YUV420_2C8:
@@ -735,6 +762,7 @@ pan_afbc_compression_mode(enum pan_afbc_mode mode)
return MALI_AFBC_COMPRESSION_MODE_YUV422_2C10;
case PAN_AFBC_MODE_YUV422_1C10:
return MALI_AFBC_COMPRESSION_MODE_YUV422_1C10;
+#endif /* PAN_ARCH >= 14 */
#if PAN_ARCH == 9
case PAN_AFBC_MODE_R16:
case PAN_AFBC_MODE_R16G16:
diff --git a/src/panfrost/lib/pan_afrc.h b/src/panfrost/lib/pan_afrc.h
index 4a96eb374ea..306e48fb55e 100644
--- a/src/panfrost/lib/pan_afrc.h
+++ b/src/panfrost/lib/pan_afrc.h
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2023 Collabora, Ltd.
+ * Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
@@ -347,6 +348,25 @@ pan_afrc_format(struct pan_afrc_format_info info, uint64_t modifier,
return (scan ? MALI_AFRC_FORMAT_R10G10B10A10_SCAN
: MALI_AFRC_FORMAT_R10G10B10A10_ROT);
+#if PAN_ARCH >= 14
+ case PAN_AFRC_ICHANGE_FORMAT_YUV444:
+ case PAN_AFRC_ICHANGE_FORMAT_YUV422:
+ case PAN_AFRC_ICHANGE_FORMAT_YUV420:
+ if (info.bpc == 8) {
+ if (plane == 0 || info.num_planes == 3)
+ return (scan ? MALI_AFRC_FORMAT_R8_SCAN : MALI_AFRC_FORMAT_R8_ROT);
+
+ return (scan ? MALI_AFRC_FORMAT_R8G8_SCAN : MALI_AFRC_FORMAT_R8G8_ROT);
+ }
+
+ if (plane == 0 || info.num_planes == 3)
+ return (scan ? MALI_AFRC_FORMAT_R10_SCAN : MALI_AFRC_FORMAT_R10_ROT);
+
+ assert(info.ichange_fmt == PAN_AFRC_ICHANGE_FORMAT_YUV422 ||
+ info.ichange_fmt == PAN_AFRC_ICHANGE_FORMAT_YUV420);
+ return (scan ? MALI_AFRC_FORMAT_R10G10_SCAN
+ : MALI_AFRC_FORMAT_R10G10_ROT);
+#else
case PAN_AFRC_ICHANGE_FORMAT_YUV444:
if (info.bpc == 8) {
if (plane == 0 || info.num_planes == 3)
@@ -394,6 +414,7 @@ pan_afrc_format(struct pan_afrc_format_info info, uint64_t modifier,
return (scan ? MALI_AFRC_FORMAT_R10G10_420_SCAN
: MALI_AFRC_FORMAT_R10G10_420_ROT);
+#endif /* PAN_ARCH >= 14 */
default:
return MALI_AFRC_FORMAT_INVALID;
diff --git a/src/panfrost/lib/pan_desc.c b/src/panfrost/lib/pan_desc.c
index 3df01de0090..cf9f08aae5b 100644
--- a/src/panfrost/lib/pan_desc.c
+++ b/src/panfrost/lib/pan_desc.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2021 Collabora, Ltd.
+ * Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
@@ -11,6 +12,7 @@
#include "pan_afrc.h"
#include "pan_desc.h"
#include "pan_encoder.h"
+#include "pan_fb.h"
#include "pan_props.h"
#include "pan_texture.h"
#include "pan_trace.h"
@@ -1172,11 +1174,156 @@ check_fb_attachments(const struct pan_fb_info *fb)
#endif
}
+#if PAN_ARCH >= 14
unsigned
GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
const struct pan_tls_info *tls,
- const struct pan_tiler_context *tiler_ctx, void *out)
+ const struct pan_tiler_context *tiler_ctx,
+ const struct pan_ptr framebuffer)
{
+ void *out = framebuffer.cpu;
+
+ PAN_TRACE_FUNC(PAN_TRACE_LIB_DESC);
+
+ check_fb_attachments(fb);
+
+ const int crc_rt = GENX(pan_select_crc_rt)(fb, fb->tile_size);
+ const bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0);
+ const struct pan_clean_tile clean_tile = pan_get_clean_tile_info(fb);
+
+ /* Emit to memory the state that might change per-layer. The static
+ * state is emitted directly to CSF registers by
+ * cs_emit_static_fragment_state().
+ */
+
+ struct pan_fbd_layer fbd_data = {0};
+ fbd_data.tiler = tiler_ctx->valhall.desc;
+
+ /* internal_layer_index in flags0 is used to select the right
+ * primitive list in the tiler context, and frame_arg is the value
+ * that's passed to the fragment shader through r62-r63, which we use
+ * to pass gl_Layer. Since the layer_idx only takes 8-bits, we might
+ * use the extra 56-bits we have in frame_argument to pass other
+ * information to the fragment shader at some point.
+ */
+ assert(layer_idx >= tiler_ctx->valhall.layer_offset);
+ fbd_data.frame_argument = layer_idx;
+
+ pan_pack(&fbd_data.flags0, FRAGMENT_FLAGS_0, cfg) {
+ cfg.pre_frame_0 =
+ pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0],
+ pan_clean_tile_write_any_set(clean_tile));
+ cfg.pre_frame_1 =
+ pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1],
+ pan_clean_tile_write_any_set(clean_tile));
+ cfg.post_frame = fb->bifrost.pre_post.modes[2];
+
+ const unsigned zs_bytes_per_pixel = pan_zsbuf_bytes_per_pixel(fb);
+ /* We can interleave HSR if we have space for two ZS tiles in
+ * the tile buffer. */
+ const unsigned max_zs_tile_size_interleave =
+ fb->z_tile_buf_budget >> util_logbase2_ceil(zs_bytes_per_pixel);
+ const bool hsr_can_interleave =
+ fb->tile_size <= max_zs_tile_size_interleave;
+
+ /* Enabling prepass without interleave is generally not good for
+ * performance, so disable HSR in that case. */
+ cfg.hsr_prepass_enable = fb->allow_hsr_prepass && hsr_can_interleave;
+ cfg.hsr_prepass_interleaving_enable = hsr_can_interleave;
+ cfg.hsr_prepass_filter_enable = true;
+ cfg.hsr_hierarchical_optimizations_enable = true;
+
+ cfg.internal_layer_index = layer_idx - tiler_ctx->valhall.layer_offset;
+ }
+
+ fbd_data.dcd_pointer = fb->bifrost.pre_post.dcds.gpu;
+
+ pan_pack(&fbd_data.flags2, FRAGMENT_FLAGS_2, cfg) {
+ cfg.s_clear = fb->zs.clear_value.stencil;
+ cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
+
+ /* Default to 24 bit depth if there's no surface. */
+ cfg.z_internal_format =
+ fb->zs.view.zs ? pan_get_z_internal_format(fb->zs.view.zs->format)
+ : MALI_Z_INTERNAL_FORMAT_D24;
+ cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z);
+
+ if (crc_rt >= 0) {
+ bool *valid = fb->rts[crc_rt].crc_valid;
+ bool full = !fb->draw_extent.minx && !fb->draw_extent.miny &&
+ fb->draw_extent.maxx == (fb->width - 1) &&
+ fb->draw_extent.maxy == (fb->height - 1);
+
+ /* If the CRC was valid it stays valid, if it wasn't, we must
+ * ensure the render operation covers the full frame, and
+ * clean tiles are pushed to memory. */
+ bool new_valid = *valid | (full && pan_clean_tile_write_rt_enabled(
+ clean_tile, crc_rt));
+
+ cfg.crc_read_enable = *valid;
+
+ /* If the data is currently invalid, still write CRC
+ * data if we are doing a full write, so that it is
+ * valid for next time. */
+ cfg.crc_write_enable = new_valid;
+
+ *valid = new_valid;
+ }
+ }
+
+ fbd_data.z_clear = util_bitpack_float(fb->zs.clear_value.depth);
+
+ {
+ /* Set the DBD and RTD pointers. Both must be 64-bytes aligned. */
+ uint64_t out_gpu_addr =
+ framebuffer.gpu + ALIGN_POT(sizeof(struct pan_fbd_layer), 64);
+
+ if (has_zs_crc_ext) {
+ fbd_data.dbd_pointer = out_gpu_addr;
+ assert(fbd_data.dbd_pointer % 64 == 0);
+ out_gpu_addr += pan_size(ZS_CRC_EXTENSION);
+ }
+
+ fbd_data.rtd_pointer = out_gpu_addr;
+ assert(fbd_data.rtd_pointer % 64 == 0);
+ }
+
+ memcpy(out, &fbd_data, sizeof(fbd_data));
+ out += ALIGN_POT(sizeof(fbd_data), 64);
+
+ if (has_zs_crc_ext) {
+ struct mali_zs_crc_extension_packed *zs_crc_ext = out;
+ pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, zs_crc_ext, clean_tile);
+ out += pan_size(ZS_CRC_EXTENSION);
+ }
+
+ const unsigned rt_count = MAX2(fb->rt_count, 1);
+ unsigned cbuf_offset = 0;
+ for (unsigned i = 0; i < rt_count; i++) {
+ pan_emit_rt(fb, layer_idx, i, cbuf_offset, out, clean_tile);
+ out += pan_size(RENDER_TARGET);
+ if (!fb->rts[i].view)
+ continue;
+
+ cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
+ fb->tile_size *
+ pan_image_view_get_nr_samples(fb->rts[i].view);
+
+ if (i != crc_rt && fb->rts[i].crc_valid != NULL)
+ *(fb->rts[i].crc_valid) = false;
+ }
+
+ return 0;
+}
+#else
+unsigned
+GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
+ const struct pan_tls_info *tls,
+ const struct pan_tiler_context *tiler_ctx,
+ const struct pan_ptr framebuffer)
+{
+ void *out = framebuffer.cpu;
+
PAN_TRACE_FUNC(PAN_TRACE_LIB_DESC);
check_fb_attachments(fb);
@@ -1351,6 +1498,7 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
}
return tag.opaque[0];
}
+#endif /* PAN_ARCH >= 14 */
#else /* PAN_ARCH == 4 */
static enum mali_color_format
pan_sfbd_raw_format(unsigned bits)
@@ -1378,8 +1526,11 @@ GENX(pan_select_tile_size)(struct pan_fb_info *fb)
unsigned
GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
const struct pan_tls_info *tls,
- const struct pan_tiler_context *tiler_ctx, void *fbd)
+ const struct pan_tiler_context *tiler_ctx,
+ const struct pan_ptr framebuffer)
{
+ void *fbd = framebuffer.cpu;
+
PAN_TRACE_FUNC(PAN_TRACE_LIB_DESC);
assert(fb->rt_count <= 1);
diff --git a/src/panfrost/lib/pan_desc.h b/src/panfrost/lib/pan_desc.h
index db5b6588ad3..7cc7639c897 100644
--- a/src/panfrost/lib/pan_desc.h
+++ b/src/panfrost/lib/pan_desc.h
@@ -341,7 +341,7 @@ void GENX(pan_emit_afrc_color_attachment)(const struct pan_attachment_info *att,
unsigned GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
const struct pan_tls_info *tls,
const struct pan_tiler_context *tiler_ctx,
- void *out);
+ const struct pan_ptr framebuffer);
#if PAN_ARCH >= 6
unsigned GENX(pan_select_tiler_hierarchy_mask)(uint32_t width, uint32_t height,
diff --git a/src/panfrost/lib/pan_fb.c b/src/panfrost/lib/pan_fb.c
index f9b6c85b2ce..3b3c6c86c5f 100644
--- a/src/panfrost/lib/pan_fb.c
+++ b/src/panfrost/lib/pan_fb.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2026 Collabora, Ltd.
+ * Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
#include "pan_fb.h"
@@ -669,9 +670,124 @@ pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,
}
#endif
+#if PAN_ARCH >= 14
uint32_t
-GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info, void *out)
+GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info,
+ const struct pan_ptr framebuffer)
{
+ /* Emit the dynamic framebuffer state. That is, state that may change per-layer. */
+
+ void *out = framebuffer.cpu;
+ const struct pan_fb_layout *fb = info->fb;
+ const struct pan_fb_load *load = info->load;
+ const struct pan_fb_store *store = info->store;
+ const struct pan_fb_clean_tile ct = pan_fb_get_clean_tile(info);
+ const bool has_zs_crc_ext = pan_fb_has_zs(fb);
+
+ struct pan_fbd_layer fbd_data = {0};
+ fbd_data.tiler = info->tiler_ctx->valhall.desc;
+
+ /* layer_index in flags0 is used to select the right primitive list in
+ * the tiler context, and frame_arg is the value that's passed to the
+ * fragment shader through r62-r63, which we use to pass gl_Layer. Since
+ * the layer_idx only takes 8-bits, we might use the extra 56-bits we
+ * have in frame_argument to pass other information to the fragment
+ * shader at some point.
+ */
+ assert(info->layer >= info->tiler_ctx->valhall.layer_offset);
+ fbd_data.frame_argument = info->layer;
+
+ pan_pack(&fbd_data.flags0, FRAGMENT_FLAGS_0, cfg) {
+ cfg.pre_frame_0 = pan_fix_frame_shader_mode(info->frame_shaders.modes[0],
+ ct.rts || ct.zs || ct.s);
+ cfg.pre_frame_1 = pan_fix_frame_shader_mode(info->frame_shaders.modes[1],
+ ct.rts || ct.zs || ct.s);
+ cfg.post_frame = info->frame_shaders.modes[2];
+
+ /* Enabling prepass without pipelineing is generally not good for
+ * performance, so disable HSR in that case.
+ */
+ cfg.hsr_prepass_enable = info->allow_hsr_prepass &&
+ pan_fb_can_pipeline_zs(fb);
+ cfg.hsr_prepass_interleaving_enable = pan_fb_can_pipeline_zs(fb);
+ cfg.hsr_prepass_filter_enable = true;
+ cfg.hsr_hierarchical_optimizations_enable = true;
+
+ cfg.internal_layer_index =
+ info->layer - info->tiler_ctx->valhall.layer_offset;
+ }
+
+ pan_pack(&fbd_data.flags2, FRAGMENT_FLAGS_2, cfg) {
+ if (fb->s_format != PIPE_FORMAT_NONE) {
+ cfg.s_clear = load && target_has_clear(&load->s) ?
+ load->s.clear.stencil : 0;
+ cfg.s_write_enable = store && store->s.store;
+ }
+
+ if (fb->z_format != PIPE_FORMAT_NONE) {
+ cfg.z_internal_format = pan_get_z_internal_format(fb->z_format);
+ cfg.z_write_enable = store && store->zs.store;
+ } else {
+ cfg.z_internal_format = MALI_Z_INTERNAL_FORMAT_D24;
+ assert(!store || !store->zs.store);
+ }
+ }
+
+ fbd_data.z_clear =
+ util_bitpack_float(fb->z_format != PIPE_FORMAT_NONE && load && load &&
+ target_has_clear(&load->z)
+ ? load->z.clear.depth
+ : 0);
+
+ fbd_data.dcd_pointer = info->frame_shaders.dcd_pointer;
+
+ {
+ /* Set the DBD and RTD pointers. Both must be 64-bytes aligned. */
+ uint64_t out_gpu_addr =
+ framebuffer.gpu + ALIGN_POT(sizeof(struct pan_fbd_layer), 64);
+
+ if (has_zs_crc_ext) {
+ fbd_data.dbd_pointer = out_gpu_addr;
+ assert(fbd_data.dbd_pointer % 64 == 0);
+ out_gpu_addr += pan_size(ZS_CRC_EXTENSION);
+ }
+
+ fbd_data.rtd_pointer = out_gpu_addr;
+ assert(fbd_data.rtd_pointer % 64 == 0);
+ }
+
+ memcpy(out, &fbd_data, sizeof(fbd_data));
+ out += ALIGN_POT(sizeof(fbd_data), 64);
+
+ if (has_zs_crc_ext) {
+ struct mali_zs_crc_extension_packed zs_crc;
+ emit_zs_crc_desc(info, ct, &zs_crc);
+ memcpy(out, &zs_crc, sizeof(zs_crc));
+ out += sizeof(zs_crc);
+ }
+
+ uint32_t tile_rt_offset_B = 0;
+ for (unsigned rt = 0; rt < fb->rt_count; rt++) {
+ struct mali_rgb_render_target_packed rgb_rt;
+ emit_rgb_rt_desc(info, ct, rt, tile_rt_offset_B, &rgb_rt);
+ memcpy(out, &rgb_rt, sizeof(rgb_rt));
+ out += sizeof(rgb_rt);
+
+ if (fb->rt_formats[rt] != PIPE_FORMAT_NONE) {
+ tile_rt_offset_B += pan_bytes_per_pixel_tib(fb->rt_formats[rt]) *
+ fb->tile_size_px * fb->sample_count;
+ }
+ }
+ assert(tile_rt_offset_B <= fb->tile_rt_alloc_B);
+
+ return 0;
+}
+#else /* PAN_ARCH < 14 */
+uint32_t
+GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info,
+ const struct pan_ptr framebuffer)
+{
+ void *out = framebuffer.cpu;
const struct pan_fb_layout *fb = info->fb;
const struct pan_fb_load *load = info->load;
const struct pan_fb_store *store = info->store;
@@ -823,4 +939,5 @@ GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info, void *out)
}
return tag.opaque[0];
}
-#endif
+#endif /* PAN_ARCH >= 14 */
+#endif /* PAN_ARCH >= 5 */
diff --git a/src/panfrost/lib/pan_fb.h b/src/panfrost/lib/pan_fb.h
index c4635f3f4c2..48bfc888b1c 100644
--- a/src/panfrost/lib/pan_fb.h
+++ b/src/panfrost/lib/pan_fb.h
@@ -1,14 +1,20 @@
/*
* Copyright (C) 2026 Collabora, Ltd.
+ * Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
#ifndef __PAN_FB_H
#define __PAN_FB_H
+#if PAN_ARCH >= 14
+#include "genxml/cs_builder.h"
+#endif
+
+#include "compiler/shader_enums.h"
#include "genxml/gen_macros.h"
#include "util/format/u_formats.h"
-#include "compiler/shader_enums.h"
+#include "pan_pool.h"
struct nir_shader;
struct nir_shader_compiler_options;
@@ -481,7 +487,7 @@ void GENX(pan_fill_fb_info)(const struct pan_fb_desc_info *info,
struct pan_fb_info *fbinfo);
uint32_t GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info,
- void *out);
+ const struct pan_ptr framebuffer);
#endif
enum ENUM_PACKED pan_fb_shader_op {
@@ -620,4 +626,35 @@ GENX(pan_get_fb_shader)(const struct pan_fb_shader_key *key,
const struct nir_shader_compiler_options *nir_options);
#endif
+#if PAN_ARCH >= 14
+/* Framebuffer per-layer state. Keep this structure 64-byte aligned, since
+ * we want the adjacent ZS_CRC_EXTENSION and RENDER_TARGET descriptors
+ * aligned. */
+struct pan_fbd_layer {
+ /** GPU address to the tiler descriptor. */
+ uint64_t tiler;
+
+ /** Frame argument. */
+ uint64_t frame_argument;
+
+ /** An instance of Fragment Flags 0. */
+ struct mali_fragment_flags_0_packed flags0;
+
+ /** An instance of Fragment Flags 2. */
+ struct mali_fragment_flags_2_packed flags2;
+
+ /** Z clear value. */
+ uint32_t z_clear;
+
+ /** GPU address to the draw call descriptors. It may be 0. */
+ uint64_t dcd_pointer;
+
+ /** GPU address to the ZS_CRC_EXTENSION descriptor. It may be 0. */
+ uint64_t dbd_pointer;
+
+ /** GPU address to the RENDER_TARGET descriptors. */
+ uint64_t rtd_pointer;
+} __attribute__((aligned(64)));
+#endif /* PAN_ARCH >= 14 */
+
#endif /* __PAN_FB_H */
diff --git a/src/panfrost/lib/pan_format.c b/src/panfrost/lib/pan_format.c
index f67a3528ebb..7db35f5ac78 100644
--- a/src/panfrost/lib/pan_format.c
+++ b/src/panfrost/lib/pan_format.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2019 Collabora, Ltd.
+ * Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
@@ -184,7 +185,27 @@ const struct pan_blendable_format
const struct pan_format GENX(pan_pipe_format)[PIPE_FORMAT_COUNT] = {
FMT(NONE, CONSTANT, 0000, L, VTR_IB),
-#if PAN_ARCH >= 7
+#if PAN_ARCH >= 14
+ /* Multiplane formats */
+ FMT_YUV(R8G8_R8B8_UNORM, Y8U8Y8V8_422, UVYA, NO_SWAP, CENTER_422, _T____),
+ FMT_YUV(G8R8_B8R8_UNORM, U8Y8V8Y8_422, UYVA, SWAP, CENTER_422, _T____),
+ FMT_YUV(R8B8_R8G8_UNORM, Y8U8Y8V8_422, VYUA, NO_SWAP, CENTER_422, _T____),
+ FMT_YUV(B8R8_G8R8_UNORM, U8Y8V8Y8_422, VUYA, SWAP, CENTER_422, _T____),
+ FMT_YUV(R8_G8B8_420_UNORM, Y8U8V8_420, YUVA, NO_SWAP, CENTER, _T____),
+ FMT_YUV(R8_B8G8_420_UNORM, Y8U8V8_420, YVUA, NO_SWAP, CENTER, _T____),
+ FMT_YUV(R8_G8_B8_420_UNORM, Y8U8V8_420, YUVA, NO_SWAP, CENTER, _T____),
+ FMT_YUV(R8_B8_G8_420_UNORM, Y8U8V8_420, YVUA, NO_SWAP, CENTER, _T____),
+
+ FMT_YUV(R8_G8B8_422_UNORM, Y8U8Y8V8_422, YUVA, NO_SWAP, CENTER_422, _T____),
+ FMT_YUV(R8_B8G8_422_UNORM, U8Y8V8Y8_422, YVUA, NO_SWAP, CENTER_422, _T____),
+
+ FMT_YUV(R10_G10B10_420_UNORM, YUYAAYVYAA_420, YUVA, NO_SWAP, CENTER, _T____),
+ FMT_YUV(R10_G10B10_422_UNORM, Y10X6U10X6Y10X6V10X6_422, YUVA, NO_SWAP, CENTER_422, _T____),
+ /* special internal formats */
+ FMT_YUV(R8G8B8_420_UNORM_PACKED, Y8U8V8_420, YUVA, NO_SWAP, CENTER, _T____),
+ FMT_YUV(R10G10B10_420_UNORM_PACKED, Y10U10V10_420, YUVA, NO_SWAP, CENTER, _T____),
+ FMT_YUV(X6R10X6G10_X6R10X6B10_422_UNORM, Y10X6U10X6Y10X6V10X6_422, UVYA, NO_SWAP, CENTER_422, _T____),
+#elif PAN_ARCH >= 7
/* Multiplane formats */
FMT_YUV(R8G8_R8B8_UNORM, YUYV8, UVYA, NO_SWAP, CENTER_422, _T____),
FMT_YUV(G8R8_B8R8_UNORM, VYUY8, UYVA, SWAP, CENTER_422, _T____),
diff --git a/src/panfrost/lib/pan_format.h b/src/panfrost/lib/pan_format.h
index 7c641c24105..770d8a1bf56 100644
--- a/src/panfrost/lib/pan_format.h
+++ b/src/panfrost/lib/pan_format.h
@@ -168,6 +168,8 @@ extern const struct pan_blendable_format
pan_blendable_formats_v12[PIPE_FORMAT_COUNT];
extern const struct pan_blendable_format
pan_blendable_formats_v13[PIPE_FORMAT_COUNT];
+extern const struct pan_blendable_format
+ pan_blendable_formats_v14[PIPE_FORMAT_COUNT];
uint8_t pan_raw_format_mask_midgard(enum pipe_format *formats);
@@ -184,6 +186,7 @@ pan_blendable_format_table(unsigned arch)
FMT_TABLE(10);
FMT_TABLE(12);
FMT_TABLE(13);
+ FMT_TABLE(14);
#undef FMT_TABLE
default:
assert(!"Unsupported architecture");
@@ -199,6 +202,7 @@ extern const struct pan_format pan_pipe_format_v9[PIPE_FORMAT_COUNT];
extern const struct pan_format pan_pipe_format_v10[PIPE_FORMAT_COUNT];
extern const struct pan_format pan_pipe_format_v12[PIPE_FORMAT_COUNT];
extern const struct pan_format pan_pipe_format_v13[PIPE_FORMAT_COUNT];
+extern const struct pan_format pan_pipe_format_v14[PIPE_FORMAT_COUNT];
static inline const struct pan_format *
pan_format_table(unsigned arch)
@@ -213,6 +217,7 @@ pan_format_table(unsigned arch)
FMT_TABLE(10);
FMT_TABLE(12);
FMT_TABLE(13);
+ FMT_TABLE(14);
#undef FMT_TABLE
default:
assert(!"Unsupported architecture");
diff --git a/src/panfrost/lib/pan_mod.h b/src/panfrost/lib/pan_mod.h
index 25ecaa25d50..1bd9a759a44 100644
--- a/src/panfrost/lib/pan_mod.h
+++ b/src/panfrost/lib/pan_mod.h
@@ -84,6 +84,7 @@ const struct pan_mod_handler *pan_mod_get_handler_v9(uint64_t modifier);
const struct pan_mod_handler *pan_mod_get_handler_v10(uint64_t modifier);
const struct pan_mod_handler *pan_mod_get_handler_v12(uint64_t modifier);
const struct pan_mod_handler *pan_mod_get_handler_v13(uint64_t modifier);
+const struct pan_mod_handler *pan_mod_get_handler_v14(uint64_t modifier);
static inline const struct pan_mod_handler *
pan_mod_get_handler(unsigned arch, uint64_t modifier)
@@ -105,6 +106,8 @@ pan_mod_get_handler(unsigned arch, uint64_t modifier)
return pan_mod_get_handler_v12(modifier);
case 13:
return pan_mod_get_handler_v13(modifier);
+ case 14:
+ return pan_mod_get_handler_v14(modifier);
default:
UNREACHABLE("Unsupported arch");
}
diff --git a/src/panfrost/lib/pan_texture.c b/src/panfrost/lib/pan_texture.c
index 286b5c18b67..58a413278cf 100644
--- a/src/panfrost/lib/pan_texture.c
+++ b/src/panfrost/lib/pan_texture.c
@@ -223,6 +223,25 @@ pan_clump_format(enum pipe_format format)
/* YUV-sampling has special cases */
if (pan_format_is_yuv(format)) {
switch (format) {
+#if PAN_ARCH >= 14
+ case PIPE_FORMAT_R8G8_R8B8_UNORM:
+ case PIPE_FORMAT_G8R8_B8R8_UNORM:
+ case PIPE_FORMAT_R8B8_R8G8_UNORM:
+ case PIPE_FORMAT_B8R8_G8R8_UNORM:
+ case PIPE_FORMAT_R8_G8B8_422_UNORM:
+ case PIPE_FORMAT_R8_B8G8_422_UNORM:
+ case PIPE_FORMAT_R8_G8B8_420_UNORM:
+ case PIPE_FORMAT_R8_B8G8_420_UNORM:
+ case PIPE_FORMAT_R8_G8_B8_420_UNORM:
+ case PIPE_FORMAT_R8_B8_G8_420_UNORM:
+ case PIPE_FORMAT_R8G8B8_420_UNORM_PACKED:
+ return MALI_CLUMP_FORMAT_RAW8;
+ case PIPE_FORMAT_R10_G10B10_420_UNORM:
+ case PIPE_FORMAT_R10G10B10_420_UNORM_PACKED:
+ case PIPE_FORMAT_R10_G10B10_422_UNORM:
+ case PIPE_FORMAT_X6R10X6G10_X6R10X6B10_422_UNORM:
+ return MALI_CLUMP_FORMAT_R10_PACKED;
+#else
case PIPE_FORMAT_R8G8_R8B8_UNORM:
case PIPE_FORMAT_G8R8_B8R8_UNORM:
case PIPE_FORMAT_R8B8_R8G8_UNORM:
@@ -242,6 +261,7 @@ pan_clump_format(enum pipe_format format)
case PIPE_FORMAT_R10_G10B10_422_UNORM:
case PIPE_FORMAT_X6R10X6G10_X6R10X6B10_422_UNORM:
return MALI_CLUMP_FORMAT_Y10_UV10_422;
+#endif /* PAN_ARCH >= 14 */
default:
UNREACHABLE("unhandled clump format");
}
diff --git a/src/panfrost/libpan/libpan.h b/src/panfrost/libpan/libpan.h
index ed7c5c66f29..cc79ea92b74 100644
--- a/src/panfrost/libpan/libpan.h
+++ b/src/panfrost/libpan/libpan.h
@@ -28,6 +28,8 @@
#include "libpan_v12.h"
#elif (PAN_ARCH == 13)
#include "libpan_v13.h"
+#elif (PAN_ARCH == 14)
+#include "libpan_v14.h"
#else
#error "Unsupported architecture for libpan"
#endif
diff --git a/src/panfrost/libpan/libpan_shaders.h b/src/panfrost/libpan/libpan_shaders.h
index 5154cef68d7..d51761abf64 100644
--- a/src/panfrost/libpan/libpan_shaders.h
+++ b/src/panfrost/libpan/libpan_shaders.h
@@ -26,6 +26,8 @@
#include "libpan_shaders_v12.h"
#elif (PAN_ARCH == 13)
#include "libpan_shaders_v13.h"
+#elif (PAN_ARCH == 14)
+#include "libpan_shaders_v14.h"
#else
#error "Unsupported architecture for libpan"
#endif
diff --git a/src/panfrost/libpan/meson.build b/src/panfrost/libpan/meson.build
index 734660b5735..dfe40fff9c1 100644
--- a/src/panfrost/libpan/meson.build
+++ b/src/panfrost/libpan/meson.build
@@ -11,7 +11,7 @@ libpan_shader_files = files(
idep_libpan_per_arch = {}
-foreach ver : ['4', '5', '6', '7', '9', '10', '12', '13']
+foreach ver : ['4', '5', '6', '7', '9', '10', '12', '13', '14']
libpan_spv = custom_target(
input : libpan_shader_files,
output : 'libpan_v' + ver + '.spv',
diff --git a/src/panfrost/model/pan_model.c b/src/panfrost/model/pan_model.c
index f9861ace8dc..4b28c4067fb 100644
--- a/src/panfrost/model/pan_model.c
+++ b/src/panfrost/model/pan_model.c
@@ -95,6 +95,10 @@ const struct pan_model pan_model_list[] = {
MODEL_RATES(4, 8, 128)),
FIFTHGEN_MODEL(PAN_PROD_ID(13, 8, 0), 4, "G725", "TKRx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 65536),
MODEL_RATES(4, 8, 128)),
+ FIFTHGEN_MODEL(PAN_PROD_ID(14, 8, 3), 1, "G1-Pro", "TDRx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 65536),
+ MODEL_RATES(4, 8, 64)),
+ FIFTHGEN_MODEL(PAN_PROD_ID(14, 8, 3), 4, "G1-Pro", "TDRx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 65536),
+ MODEL_RATES(4, 8, 128)),
};
/* clang-format on */
diff --git a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h
index 7e7e8922c88..65d08df53a1 100644
--- a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h
+++ b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h
@@ -74,7 +74,11 @@ static inline uint32_t
get_fbd_size(bool has_zs_ext, uint32_t rt_count)
{
assert(rt_count >= 1 && rt_count <= MAX_RTS);
+#if PAN_ARCH >= 14
+ uint32_t fbd_size = ALIGN_POT(sizeof(struct pan_fbd_layer), 64);
+#else
uint32_t fbd_size = pan_size(FRAMEBUFFER);
+#endif
if (has_zs_ext)
fbd_size += pan_size(ZS_CRC_EXTENSION);
fbd_size += pan_size(RENDER_TARGET) * rt_count;
@@ -209,13 +213,25 @@ enum panvk_cs_regs {
PANVK_CS_REG_RUN_IDVS_SR_END = 60,
#endif
+#if PAN_ARCH >= 14
+ /* RUN_FRAGMENT2 staging regs.
+ * SW ABI:
+ * - r58:59 contain the pointer to the first tiler descriptor. This is
+ * needed to gather completed heap chunks after a run_fragment2.
+ */
+ PANVK_CS_REG_RUN_FRAGMENT_SR_START = 0,
+ PANVK_CS_REG_RUN_FRAGMENT_SR_END = 55,
+ PANVK_CS_REG_TILER_DESC_PTR = 58,
+#else
/* RUN_FRAGMENT staging regs.
* SW ABI:
- * - r38:39 contain the pointer to the first tiler descriptor. This is
+ * - r58:59 contain the pointer to the first tiler descriptor. This is
* needed to gather completed heap chunks after a run_fragment.
*/
PANVK_CS_REG_RUN_FRAGMENT_SR_START = 38,
PANVK_CS_REG_RUN_FRAGMENT_SR_END = 46,
+ PANVK_CS_REG_TILER_DESC_PTR = 58,
+#endif
/* RUN_COMPUTE staging regs. */
PANVK_CS_REG_RUN_COMPUTE_SR_START = 0,
@@ -870,4 +886,31 @@ vk_stages_to_subqueue_mask(VkPipelineStageFlags2 vk_stages,
void panvk_per_arch(emit_barrier)(struct panvk_cmd_buffer *cmdbuf,
struct panvk_cs_deps deps);
+#if PAN_ARCH >= 14
+static inline void
+cs_emit_layer_fragment_state(struct cs_builder *b, struct cs_index fbd_ptr)
+{
+ /* Emit the dynamic fragment state. This state may change per-layer. */
+
+ cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_0), fbd_ptr,
+ offsetof(struct pan_fbd_layer, flags0));
+ cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_2), fbd_ptr,
+ offsetof(struct pan_fbd_layer, flags2));
+ cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, Z_CLEAR), fbd_ptr,
+ offsetof(struct pan_fbd_layer, z_clear));
+ cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, TILER_DESCRIPTOR_POINTER), fbd_ptr,
+ offsetof(struct pan_fbd_layer, tiler));
+ cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, RTD_POINTER), fbd_ptr,
+ offsetof(struct pan_fbd_layer, rtd_pointer));
+ cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, DBD_POINTER), fbd_ptr,
+ offsetof(struct pan_fbd_layer, dbd_pointer));
+ cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, FRAME_ARG), fbd_ptr,
+ offsetof(struct pan_fbd_layer, frame_argument));
+ cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, FRAME_SHADER_DCD_POINTER), fbd_ptr,
+ offsetof(struct pan_fbd_layer, dcd_pointer));
+
+ cs_flush_loads(b);
+}
+#endif /* PAN_ARCH >= 14 */
+
#endif /* PANVK_CMD_BUFFER_H */
diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c
index 794da0f16d7..c75dd998f7e 100644
--- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c
+++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c
@@ -51,6 +51,7 @@
#include "vk_render_pass.h"
#include "poly/geometry.h"
+#if PAN_ARCH < 14
static enum cs_reg_perm
provoking_vertex_fn_reg_perm_cb(struct cs_builder *b, unsigned reg)
{
@@ -202,6 +203,7 @@ panvk_per_arch(device_draw_context_cleanup)(struct panvk_device *dev)
panvk_priv_bo_unref(dev->draw_ctx->fns_bo);
vk_free(&dev->vk.alloc, dev->draw_ctx);
}
+#endif /* PAN_ARCH < 14 */
static void
emit_vs_attrib(struct panvk_cmd_buffer *cmdbuf,
@@ -1245,8 +1247,13 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
uint32_t fbd_sz = calc_fbd_size(cmdbuf);
uint32_t fbds_sz = enabled_layer_count * fbd_sz;
- cmdbuf->state.gfx.render.fbds = panvk_cmd_alloc_dev_mem(
- cmdbuf, desc, fbds_sz, pan_alignment(FRAMEBUFFER));
+#if PAN_ARCH >= 14
+ const unsigned fbds_alignment = alignof(struct pan_fbd_layer);
+#else
+ const unsigned fbds_alignment = pan_alignment(FRAMEBUFFER);
+#endif
+ cmdbuf->state.gfx.render.fbds =
+ panvk_cmd_alloc_dev_mem(cmdbuf, desc, fbds_sz, fbds_alignment);
if (!cmdbuf->state.gfx.render.fbds.gpu)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
@@ -1316,14 +1323,23 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
tiler_ctx = get_tiler_context(cmdbuf, layer_idx);
uint32_t new_fbd_flags =
- GENX(pan_emit_fb_desc)(&fbd_info, fbds.cpu + fbd_sz * i);
+ GENX(pan_emit_fb_desc)(&fbd_info, pan_ptr_offset(fbds, fbd_sz * i));
/* Make sure all FBDs have the same flags. */
assert(i == 0 || new_fbd_flags == fbd_flags);
fbd_flags = new_fbd_flags;
}
+#if PAN_ARCH >= 14
+ /* fbd_flags is unused on v14+. */
+ assert(!fbd_flags);
+#endif
+
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
+
+#if PAN_ARCH >= 14
+ // TODO: Implement IR support for v14.
+#else
for (uint32_t ir_pass = 0; ir_pass < PANVK_IR_PASS_COUNT; ir_pass++) {
struct pan_ptr ir_fbds = panvk_cmd_alloc_dev_mem(
cmdbuf, desc, fbds_sz, pan_alignment(FRAMEBUFFER));
@@ -1335,7 +1351,6 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
for (uint32_t i = 0; i < enabled_layer_count; i++) {
uint32_t layer_idx = multiview ? u_bit_scan(&ir_view_mask_temp) : i;
- void *ir_fbd = (void *)((uint8_t *)ir_fbds.cpu + (i * fbd_sz));
fbd_info.layer = layer_idx;
tiler_ctx = get_tiler_context(cmdbuf, layer_idx);
@@ -1353,8 +1368,8 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
if (result != VK_SUCCESS)
return result;
- ASSERTED uint32_t new_fbd_flags =
- GENX(pan_emit_fb_desc)(&fbd_info, ir_fbd);
+ ASSERTED uint32_t new_fbd_flags = GENX(pan_emit_fb_desc)(
+ &fbd_info, pan_ptr_offset(ir_fbds, fbd_sz * i));
/* Make sure all FBDs have the same flags. */
assert(new_fbd_flags == fbd_flags);
@@ -1367,16 +1382,14 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
/* Wait for IR info push to complete */
cs_wait_slot(b, SB_ID(LS));
-
- bool unset_provoking_vertex =
- cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET;
+#endif /* PAN_ARCH >= 14 */
if (copy_fbds) {
- struct cs_index cur_tiler = cs_reg64(b, 38);
+ struct cs_index cur_tiler = cs_reg64(b, PANVK_CS_REG_TILER_DESC_PTR);
struct cs_index dst_fbd_ptr = cs_sr_reg64(b, FRAGMENT, FBD_POINTER);
- struct cs_index fbd_idx = cs_reg32(b, 47);
- struct cs_index src_fbd_ptr = cs_reg64(b, 48);
- struct cs_index remaining_layers_in_td = cs_reg32(b, 50);
+ struct cs_index fbd_idx = cs_reg32(b, 60);
+ struct cs_index src_fbd_ptr = cs_reg64(b, 64);
+ struct cs_index remaining_layers_in_td = cs_reg32(b, 61);
uint32_t td_count = DIV_ROUND_UP(cmdbuf->state.gfx.render.layer_count,
MAX_LAYERS_PER_TILER_DESC);
@@ -1400,10 +1413,27 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
* framebuffer size is aligned on 64-bytes. */
assert(fbd_sz == ALIGN_POT(fbd_sz, 64));
+#if PAN_ARCH >= 14
+ for (uint32_t fbd_off = 0; fbd_off < fbd_sz; fbd_off += 64) {
+ cs_load_to(b, cs_scratch_reg_tuple(b, 0, 16), src_fbd_ptr,
+ BITFIELD_MASK(16), fbd_off);
+
+ /* Patch the Tiler pointer. */
+ if (fbd_off == 0)
+ cs_add64(b, cs_scratch_reg64(b, 0), cur_tiler, 0);
+
+ cs_store(b, cs_scratch_reg_tuple(b, 0, 16), dst_fbd_ptr,
+ BITFIELD_MASK(16), fbd_off);
+ }
+#else
+ bool unset_provoking_vertex =
+ cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET;
for (uint32_t fbd_off = 0; fbd_off < fbd_sz; fbd_off += 64) {
if (fbd_off == 0) {
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 14), src_fbd_ptr,
BITFIELD_MASK(14), fbd_off);
+
+ /* Patch the Tiler pointer. */
cs_add64(b, cs_scratch_reg64(b, 14), cur_tiler, 0);
/* If we don't know what provoking vertex mode the
@@ -1423,6 +1453,7 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), dst_fbd_ptr,
BITFIELD_MASK(16), fbd_off);
}
+#endif
/* Finish stores to pass_dst_fbd_ptr. */
cs_flush_stores(b);
@@ -1459,9 +1490,11 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
cs_update_frag_ctx(b) {
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
fbds.gpu | fbd_flags);
- cs_move64_to(b, cs_reg64(b, 38), cmdbuf->state.gfx.render.tiler);
+ cs_move64_to(b, cs_reg64(b, PANVK_CS_REG_TILER_DESC_PTR),
+ cmdbuf->state.gfx.render.tiler);
}
+#if PAN_ARCH < 14
/* If we don't know what provoking vertex mode the application wants yet,
* leave space to patch it later */
if (cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET) {
@@ -1483,6 +1516,7 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
cs_maybe(b, &cmdbuf->state.gfx.render.maybe_set_fbds_provoking_vertex)
cs_call(b, addr_reg, length_reg);
}
+#endif
}
return VK_SUCCESS;
@@ -3299,6 +3333,9 @@ calc_tiler_oom_handler_idx(struct panvk_cmd_buffer *cmdbuf)
static void
setup_tiler_oom_ctx(struct panvk_cmd_buffer *cmdbuf)
{
+#if PAN_ARCH >= 14
+ // TODO: Implement IR support for v14.
+#else
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
const struct pan_fb_layout *fb = &cmdbuf->state.gfx.render.fb.layout;
const bool has_zs_ext = pan_fb_has_zs(fb);
@@ -3343,6 +3380,7 @@ setup_tiler_oom_ctx(struct panvk_cmd_buffer *cmdbuf)
TILER_OOM_CTX_FIELD_OFFSET(layer_count));
cs_flush_stores(b);
+#endif /* PAN_ARCH >= 14 */
}
static uint32_t
@@ -3351,24 +3389,106 @@ pack_32_2x16(uint16_t lo, uint16_t hi)
return (((uint32_t)hi) << 16) | (uint32_t)lo;
}
+#if PAN_ARCH >= 14
+static void
+cs_emit_static_fragment_state(struct cs_builder *b,
+ struct panvk_cmd_buffer *cmdbuf)
+{
+ /* Emit the static fragment staging registers. These don't change per-layer. */
+
+ const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
+ const struct panvk_rendering_state *render = &cmdbuf->state.gfx.render;
+ const struct pan_fb_layout *fb = &render->fb.layout;
+
+ const uint8_t sample_count = render->fb.layout.sample_count;
+
+ const struct pan_fb_bbox fb_area_px =
+ pan_fb_bbox_from_xywh(0, 0, fb->width_px, fb->height_px);
+ const struct pan_fb_bbox bbox_px =
+ pan_fb_bbox_clamp(fb->tiling_area_px, fb_area_px);
+
+ assert(pan_fb_bbox_is_valid(fb->tiling_area_px));
+
+ struct mali_fragment_bounding_box_packed bbox;
+ pan_pack(&bbox, FRAGMENT_BOUNDING_BOX, cfg) {
+ cfg.bound_min_x = bbox_px.min_x;
+ cfg.bound_min_y = bbox_px.min_y;
+ cfg.bound_max_x = bbox_px.max_x;
+ cfg.bound_max_y = bbox_px.max_y;
+ }
+
+ struct mali_frame_size_packed frame_size;
+ pan_pack(&frame_size, FRAME_SIZE, cfg) {
+ cfg.width = fb->width_px;
+ cfg.height = fb->height_px;
+ }
+
+ cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MIN),
+ bbox.opaque[0]);
+ cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MAX),
+ bbox.opaque[1]);
+ cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, FRAME_SIZE), frame_size.opaque[0]);
+ cs_move64_to(
+ b, cs_sr_reg64(b, FRAGMENT, SAMPLE_POSITION_ARRAY_POINTER),
+ dev->sample_positions->addr.dev +
+ pan_sample_positions_offset(pan_sample_pattern(sample_count)));
+
+ /* Flags 1 */
+ struct mali_fragment_flags_1_packed flags1;
+ pan_pack(&flags1, FRAGMENT_FLAGS_1, cfg) {
+ cfg.sample_count = fb->sample_count;
+ cfg.sample_pattern = pan_sample_pattern(fb->sample_count);
+ cfg.effective_tile_size = fb->tile_size_px;
+ cfg.point_sprite_coord_origin_max_y = false;
+ cfg.first_provoking_vertex = get_first_provoking_vertex(cmdbuf);
+
+ assert(fb->rt_count > 0);
+ cfg.render_target_count = fb->rt_count;
+ cfg.color_buffer_allocation = fb->tile_rt_alloc_B;
+ }
+ cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_1), flags1.opaque[0]);
+
+ /* If we don't know what provoking vertex mode the application wants yet,
+ * leave space to patch it later */
+ if (cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET) {
+ cs_maybe(b, &cmdbuf->state.gfx.render.maybe_set_fbds_provoking_vertex)
+ {
+ /* provoking_vertex flag is bit 14 of Fragment Flags 1. */
+ cs_add32(b, cs_sr_reg32(b, FRAGMENT, FLAGS_1),
+ cs_sr_reg32(b, FRAGMENT, FLAGS_1), -(1 << 14));
+ }
+ }
+
+ /* Leave the remaining RUN_FRAGMENT2 staging registers as zero. */
+}
+#endif /* PAN_ARCH >= 14 */
+
static VkResult
issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
{
+#if PAN_ARCH < 14
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
+#endif
const struct cs_tracing_ctx *tracing_ctx =
&cmdbuf->state.cs[PANVK_SUBQUEUE_FRAGMENT].tracing;
- const struct pan_fb_layout *fb = &cmdbuf->state.gfx.render.fb.layout;
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
bool has_oq_chain = cmdbuf->state.gfx.render.oq.chain != 0;
/* Now initialize the fragment bits. */
+ struct cs_index fbd_pointer = cs_sr_reg64(b, FRAGMENT, FBD_POINTER);
cs_update_frag_ctx(b) {
+#if PAN_ARCH >= 14
+ cs_emit_static_fragment_state(b, cmdbuf);
+ cs_emit_layer_fragment_state(b, fbd_pointer);
+#else
+ const struct pan_fb_layout *fb = &cmdbuf->state.gfx.render.fb.layout;
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MIN),
pack_32_2x16(fb->tiling_area_px.min_x,
fb->tiling_area_px.min_y));
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MAX),
pack_32_2x16(fb->tiling_area_px.max_x,
fb->tiling_area_px.max_y));
+#endif
}
bool simul_use =
@@ -3401,6 +3521,9 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
* state for this renderpass, so it's safe to enable. */
struct cs_index addr_reg = cs_scratch_reg64(b, 0);
struct cs_index length_reg = cs_scratch_reg32(b, 2);
+#if PAN_ARCH >= 14
+ // TODO: Implement IR support for v14.
+#else
uint32_t handler_idx = calc_tiler_oom_handler_idx(cmdbuf);
uint64_t handler_addr = dev->tiler_oom.handlers_bo->addr.dev +
handler_idx * dev->tiler_oom.handler_stride;
@@ -3408,6 +3531,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
cs_move32_to(b, length_reg, dev->tiler_oom.handler_stride);
cs_set_exception_handler(b, MALI_CS_EXCEPTION_TYPE_TILER_OOM, addr_reg,
length_reg);
+#endif
/* Wait for the tiling to be done before submitting the fragment job. */
wait_finish_tiling(cmdbuf);
@@ -3422,8 +3546,12 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
* up. */
cs_move64_to(b, addr_reg, 0);
cs_move32_to(b, length_reg, 0);
+#if PAN_ARCH >= 14
+ // TODO: Implement IR support for v14.
+#else
cs_set_exception_handler(b, MALI_CS_EXCEPTION_TYPE_TILER_OOM, addr_reg,
length_reg);
+#endif
/* Applications tend to forget to describe subpass dependencies, especially
* when it comes to write -> read dependencies on attachments. The
@@ -3439,8 +3567,13 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
}
if (cmdbuf->state.gfx.render.layer_count <= 1) {
+#if PAN_ARCH >= 14
+ cs_trace_run_fragment2(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
+ false, MALI_TILE_RENDER_ORDER_Z_ORDER);
+#else
cs_trace_run_fragment(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
false, MALI_TILE_RENDER_ORDER_Z_ORDER);
+#endif
} else {
struct cs_index run_fragment_regs = cs_scratch_reg_tuple(b, 0, 4);
struct cs_index remaining_layers = cs_scratch_reg32(b, 4);
@@ -3449,12 +3582,17 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
cs_while(b, MALI_CS_CONDITION_GREATER, remaining_layers) {
cs_add32(b, remaining_layers, remaining_layers, -1);
+#if PAN_ARCH >= 14
+ cs_emit_layer_fragment_state(b, fbd_pointer);
+ cs_trace_run_fragment2(b, tracing_ctx, run_fragment_regs, false,
+ MALI_TILE_RENDER_ORDER_Z_ORDER);
+#else
cs_trace_run_fragment(b, tracing_ctx, run_fragment_regs, false,
MALI_TILE_RENDER_ORDER_Z_ORDER);
+#endif
cs_update_frag_ctx(b)
- cs_add64(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
- cs_sr_reg64(b, FRAGMENT, FBD_POINTER), fbd_sz);
+ cs_add64(b, fbd_pointer, fbd_pointer, fbd_sz);
}
}
@@ -3468,8 +3606,8 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
struct cs_index completed = cs_scratch_reg_tuple(b, 10, 4);
struct cs_index completed_top = cs_scratch_reg64(b, 10);
struct cs_index completed_bottom = cs_scratch_reg64(b, 12);
- struct cs_index cur_tiler = cs_reg64(b, 38);
- struct cs_index tiler_count = cs_reg32(b, 47);
+ struct cs_index cur_tiler = cs_reg64(b, PANVK_CS_REG_TILER_DESC_PTR);
+ struct cs_index tiler_count = cs_reg32(b, 60);
struct cs_index oq_chain = cs_scratch_reg64(b, 10);
struct cs_index oq_chain_lo = cs_scratch_reg32(b, 10);
struct cs_index oq_syncobj = cs_scratch_reg64(b, 12);
diff --git a/src/panfrost/vulkan/csf/panvk_vX_exception_handler.c b/src/panfrost/vulkan/csf/panvk_vX_exception_handler.c
index b4cf6855184..72e805dc5ac 100644
--- a/src/panfrost/vulkan/csf/panvk_vX_exception_handler.c
+++ b/src/panfrost/vulkan/csf/panvk_vX_exception_handler.c
@@ -13,8 +13,9 @@ tiler_oom_reg_perm_cb(struct cs_builder *b, unsigned reg)
{
switch (reg) {
/* The bbox is set up by the fragment subqueue, we should not modify it. */
- case 42:
- case 43:
+ case MALI_FRAGMENT_SR_BBOX_MIN:
+ case MALI_FRAGMENT_SR_BBOX_MAX:
+
/* We should only load from the subqueue context. */
case PANVK_CS_REG_SUBQUEUE_CTX_START:
case PANVK_CS_REG_SUBQUEUE_CTX_END:
@@ -42,8 +43,14 @@ copy_fbd(struct cs_builder *b, bool has_zs_ext, uint32_t rt_count,
cs_store(b, cs_scratch_reg_tuple(b, 0, 8), dst, BITFIELD_MASK(8),
8 * sizeof(uint32_t));
+#if PAN_ARCH >= 14
+ const size_t fbd_size = ALIGN_POT(sizeof(struct pan_fbd_layer), 64);
+#else
+ const size_t fbd_size = sizeof(struct mali_framebuffer_packed);
+#endif
+
if (has_zs_ext) {
- const uint16_t dbd_offset = sizeof(struct mali_framebuffer_packed);
+ const uint16_t dbd_offset = fbd_size;
/* Copy the whole DBD. */
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 8), src_other,
@@ -57,8 +64,7 @@ copy_fbd(struct cs_builder *b, bool has_zs_ext, uint32_t rt_count,
}
const uint16_t rts_offset =
- sizeof(struct mali_framebuffer_packed) +
- (has_zs_ext ? sizeof(struct mali_zs_crc_extension_packed) : 0);
+ fbd_size + (has_zs_ext ? sizeof(struct mali_zs_crc_extension_packed) : 0);
for (uint32_t rt = 0; rt < rt_count; rt++) {
const uint16_t rt_offset =
@@ -110,12 +116,14 @@ generate_tiler_oom_handler(struct panvk_device *dev,
.tracebuf_addr_offset =
offsetof(struct panvk_cs_subqueue_context, debug.tracebuf.cs),
};
- struct mali_framebuffer_pointer_packed fb_tag;
+#if PAN_ARCH < 14
+ struct mali_framebuffer_pointer_packed fb_tag;
pan_pack(&fb_tag, FRAMEBUFFER_POINTER, cfg) {
cfg.zs_crc_extension_present = has_zs_ext;
cfg.render_target_count = rt_count;
}
+#endif
cs_function_def(&b, &handler, handler_ctx) {
struct cs_index subqueue_ctx = cs_subqueue_ctx_reg(&b);
@@ -140,7 +148,7 @@ generate_tiler_oom_handler(struct panvk_device *dev,
struct cs_index run_fragment_regs = cs_scratch_reg_tuple(&b, 0, 4);
/* The tiler pointer is pre-filled. */
- struct cs_index tiler_ptr = cs_reg64(&b, 38);
+ struct cs_index tiler_ptr = cs_reg64(&b, PANVK_CS_REG_TILER_DESC_PTR);
cs_load64_to(&b, scratch_fbd_ptr_reg, subqueue_ctx,
TILER_OOM_CTX_FIELD_OFFSET(ir_scratch_fbd_ptr));
@@ -176,11 +184,17 @@ generate_tiler_oom_handler(struct panvk_device *dev,
cs_wait_slot(&b, SB_ID(LS));
/* Set FBD pointer to the scratch fbd */
- cs_add64(&b, cs_sr_reg64(&b, FRAGMENT, FBD_POINTER),
- scratch_fbd_ptr_reg, fb_tag.opaque[0]);
-
+ struct cs_index fbd_pointer = cs_sr_reg64(&b, FRAGMENT, FBD_POINTER);
+#if PAN_ARCH >= 14
+ cs_add64(&b, fbd_pointer, scratch_fbd_ptr_reg, 0);
+ cs_emit_layer_fragment_state(&b, fbd_pointer);
+ cs_trace_run_fragment2(&b, &tracing_ctx, run_fragment_regs, false,
+ MALI_TILE_RENDER_ORDER_Z_ORDER);
+#else
+ cs_add64(&b, fbd_pointer, scratch_fbd_ptr_reg, fb_tag.opaque[0]);
cs_trace_run_fragment(&b, &tracing_ctx, run_fragment_regs, false,
MALI_TILE_RENDER_ORDER_Z_ORDER);
+#endif
/* Serialize run fragments since we reuse FBD for the runs */
cs_wait_slots(&b, dev->csf.sb.all_iters_mask);
diff --git a/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c b/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c
index c4848fe575b..b738be274d3 100644
--- a/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c
+++ b/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c
@@ -717,7 +717,12 @@ init_tiler(struct panvk_gpu_queue *queue)
tiler_heap->chunk_size = phys_dev->csf.tiler.chunk_size;
alloc_info.size = get_fbd_size(true, MAX_RTS);
- alloc_info.alignment = pan_alignment(FRAMEBUFFER);
+#if PAN_ARCH >= 14
+ const unsigned fbds_alignment = alignof(struct pan_fbd_layer);
+#else
+ const unsigned fbds_alignment = pan_alignment(FRAMEBUFFER);
+#endif
+ alloc_info.alignment = fbds_alignment;
tiler_heap->oom_fbd = panvk_pool_alloc_mem(&dev->mempools.rw, alloc_info);
if (!panvk_priv_mem_check_alloc(tiler_heap->oom_fbd)) {
result = panvk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c
index 0579034aea2..9879ca8b112 100644
--- a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c
+++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c
@@ -181,7 +181,7 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
fbd_info.layer = layer_id;
fbd_info.frame_shaders = fs;
fbd_info.frame_shaders.dcd_pointer += layer_id * 3 * pan_size(DRAW);
- tagged_fbd_ptr |= GENX(pan_emit_fb_desc)(&fbd_info, fbd.cpu);
+ tagged_fbd_ptr |= GENX(pan_emit_fb_desc)(&fbd_info, fbd);
result = panvk_cmd_prepare_fragment_job(cmdbuf, tagged_fbd_ptr);
if (result != VK_SUCCESS)
diff --git a/src/panfrost/vulkan/meson.build b/src/panfrost/vulkan/meson.build
index d79bcf885a7..ce06192d50a 100644
--- a/src/panfrost/vulkan/meson.build
+++ b/src/panfrost/vulkan/meson.build
@@ -14,6 +14,7 @@ panvk_entrypoints = custom_target(
'--device-prefix', 'panvk_v6', '--device-prefix', 'panvk_v7',
'--device-prefix', 'panvk_v9', '--device-prefix', 'panvk_v10',
'--device-prefix', 'panvk_v12', '--device-prefix', 'panvk_v13',
+ '--device-prefix', 'panvk_v14',
'--beta', with_vulkan_beta.to_string()
],
depend_files : vk_entrypoints_gen_depend_files,
@@ -65,7 +66,7 @@ valhall_archs = [9, 10]
valhall_inc_dir = ['valhall']
valhall_files = []
-fifthgen_archs = [12, 13]
+fifthgen_archs = [12, 13, 14]
fifthgen_inc_dir = ['fifthgen']
fifthgen_files = []
@@ -83,7 +84,7 @@ jm_files = [
'jm/panvk_vX_gpu_queue.c',
]
-csf_archs = [10, 12, 13]
+csf_archs = [10, 12, 13, 14]
csf_inc_dir = ['csf']
csf_files = [
'csf/panvk_vX_bind_queue.c',
@@ -126,7 +127,7 @@ common_per_arch_files = [
sha1_h,
]
-foreach arch : [6, 7, 10, 12, 13]
+foreach arch : [6, 7, 10, 12, 13, 14]
per_arch_files = common_per_arch_files
inc_panvk_per_arch = []
diff --git a/src/panfrost/vulkan/panvk_cmd_draw.h b/src/panfrost/vulkan/panvk_cmd_draw.h
index 8de69cfdb42..7c11787fd44 100644
--- a/src/panfrost/vulkan/panvk_cmd_draw.h
+++ b/src/panfrost/vulkan/panvk_cmd_draw.h
@@ -243,7 +243,7 @@ struct panvk_cmd_graphics_state {
} \
} while (0)
-#if PAN_ARCH >= 10
+#if PAN_ARCH >= 10 && PAN_ARCH < 14
struct panvk_device_draw_context {
struct panvk_priv_bo *fns_bo;
uint64_t fn_set_fbds_provoking_vertex_stride;
@@ -376,8 +376,7 @@ cached_fs_required(ASSERTED const struct panvk_cmd_graphics_state *state,
gfx_state_set_dirty(__cmdbuf, FS_PUSH_UNIFORMS); \
} while (0)
-
-#if PAN_ARCH >= 10
+#if PAN_ARCH >= 10 && PAN_ARCH < 14
VkResult
panvk_per_arch(device_draw_context_init)(struct panvk_device *dev);
diff --git a/src/panfrost/vulkan/panvk_macros.h b/src/panfrost/vulkan/panvk_macros.h
index 940d00522bb..09253ffdb93 100644
--- a/src/panfrost/vulkan/panvk_macros.h
+++ b/src/panfrost/vulkan/panvk_macros.h
@@ -61,6 +61,9 @@ panvk_catch_indirect_alloc_failure(VkResult error)
case 13: \
panvk_arch_name(name, v13)(__VA_ARGS__); \
break; \
+ case 14: \
+ panvk_arch_name(name, v14)(__VA_ARGS__); \
+ break; \
default: \
UNREACHABLE("Unsupported architecture"); \
} \
@@ -84,6 +87,9 @@ panvk_catch_indirect_alloc_failure(VkResult error)
case 13: \
ret = panvk_arch_name(name, v13)(__VA_ARGS__); \
break; \
+ case 14: \
+ ret = panvk_arch_name(name, v14)(__VA_ARGS__); \
+ break; \
default: \
UNREACHABLE("Unsupported architecture"); \
} \
@@ -102,6 +108,8 @@ panvk_catch_indirect_alloc_failure(VkResult error)
#define panvk_per_arch(name) panvk_arch_name(name, v12)
#elif PAN_ARCH == 13
#define panvk_per_arch(name) panvk_arch_name(name, v13)
+#elif PAN_ARCH == 14
+#define panvk_per_arch(name) panvk_arch_name(name, v14)
#else
#error "Unsupported arch"
#endif
diff --git a/src/panfrost/vulkan/panvk_physical_device.c b/src/panfrost/vulkan/panvk_physical_device.c
index 1e95c5c9390..bb18df6b49a 100644
--- a/src/panfrost/vulkan/panvk_physical_device.c
+++ b/src/panfrost/vulkan/panvk_physical_device.c
@@ -64,6 +64,7 @@ PER_ARCH_FUNCS(7);
PER_ARCH_FUNCS(10);
PER_ARCH_FUNCS(12);
PER_ARCH_FUNCS(13);
+PER_ARCH_FUNCS(14);
static VkResult
create_kmod_dev(struct panvk_physical_device *device,
@@ -411,6 +412,7 @@ panvk_physical_device_init(struct panvk_physical_device *device,
switch (arch) {
case 6:
case 7:
+ case 14:
if (!os_get_option("PAN_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
result = panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
"WARNING: panvk is not well-tested on v%d, "
diff --git a/src/panfrost/vulkan/panvk_vX_device.c b/src/panfrost/vulkan/panvk_vX_device.c
index c32d2f279e8..93b8a8e21af 100644
--- a/src/panfrost/vulkan/panvk_vX_device.c
+++ b/src/panfrost/vulkan/panvk_vX_device.c
@@ -550,7 +550,7 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
goto err_free_precomp;
}
-#if PAN_ARCH >= 10
+#if PAN_ARCH >= 10 && PAN_ARCH < 14
result = panvk_per_arch(device_draw_context_init)(device);
if (result != VK_SUCCESS)
goto err_free_mem_cache;
@@ -616,7 +616,7 @@ err_finish_queues:
panvk_meta_cleanup(device);
err_free_draw_ctx:
-#if PAN_ARCH >= 10
+#if PAN_ARCH >= 10 && PAN_ARCH < 14
panvk_per_arch(device_draw_context_cleanup)(device);
err_free_mem_cache:
#endif
@@ -679,7 +679,7 @@ panvk_per_arch(destroy_device)(struct panvk_device *device,
}
panvk_precomp_cleanup(device);
-#if PAN_ARCH >= 10
+#if PAN_ARCH >= 10 && PAN_ARCH < 14
panvk_per_arch(device_draw_context_cleanup)(device);
#endif
panvk_meta_cleanup(device);