Merge branch 'panfrost-v14' into 'main'

Panfrost: Add v14 support

See merge request mesa/mesa!41081
This commit is contained in:
Marc Alcalà 2026-05-08 02:09:35 +02:00
commit b4c4cb1561
52 changed files with 4048 additions and 216 deletions

View file

@ -34,6 +34,8 @@ The following hardware is currently supported:
+--------------------+---------------+-----------+--------+--------+
| G725 | 5th Gen (v13) | 3.1 | 3.1 | 1.4 |
+--------------------+---------------+-----------+--------+--------+
| G1-Pro | 5th Gen (v14) | 3.1 | 3.1 | 1.4 |
+--------------------+---------------+-----------+--------+--------+
Other Midgard and Bifrost chips (e.g. G71) are not yet supported.

View file

@ -41,7 +41,7 @@ compile_args_panfrost = [
'-Wno-pointer-arith'
]
panfrost_versions = ['4', '5', '6', '7', '9', '10', '12', '13']
panfrost_versions = ['4', '5', '6', '7', '9', '10', '12', '13', '14']
libpanfrost_versions = []
foreach ver : panfrost_versions
@ -54,7 +54,7 @@ foreach ver : panfrost_versions
]
if ver in ['4', '5', '6', '7', '9']
files_panfrost_vx += ['pan_jm.c']
elif ver in ['10', '12', '13']
elif ver in ['10', '12', '13', '14']
files_panfrost_vx += ['pan_csf.c']
endif
libpanfrost_versions += static_library(

View file

@ -49,7 +49,7 @@
* functions. */
#if PAN_ARCH <= 9
#define JOBX(__suffix) GENX(jm_##__suffix)
#elif PAN_ARCH <= 13
#elif PAN_ARCH <= 14
#define JOBX(__suffix) GENX(csf_##__suffix)
#else
#error "Unsupported arch"

View file

@ -1,5 +1,6 @@
/*
* Copyright (C) 2023 Collabora Ltd.
* Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
@ -13,6 +14,7 @@
#include "pan_cmdstream.h"
#include "pan_context.h"
#include "pan_csf.h"
#include "pan_fb.h"
#include "pan_fb_preload.h"
#include "pan_job.h"
#include "pan_trace.h"
@ -75,6 +77,87 @@ csf_update_tiler_oom_ctx(struct cs_builder *b, uint64_t addr)
(PAN_INCREMENTAL_RENDERING_##_pass##_PASS * sizeof(struct pan_ptr)) + \
offsetof(struct pan_ptr, gpu))
#if PAN_ARCH >= 14
static void
cs_emit_static_fragment_state(struct cs_builder *b,
struct panfrost_batch *batch,
const struct pan_fb_info *fb)
{
struct mali_frame_size_packed frame_size;
pan_pack(&frame_size, FRAME_SIZE, cfg) {
cfg.width = fb->width;
cfg.height = fb->height;
}
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, FRAME_SIZE), frame_size.opaque[0]);
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, SAMPLE_POSITION_ARRAY_POINTER),
fb->sample_positions);
struct mali_fragment_flags_1_packed flags1;
pan_pack(&flags1, FRAGMENT_FLAGS_1, cfg) {
/* The force_samples setting dictates the sample-count that is used
* for rasterization, and works like D3D11's ForcedSampleCount
* feature:
*
* - If force_samples == 0: Let nr_samples dictate sample count
* - If force_samples == 1: force single-sampled rasterization
* - If force_samples >= 1: force multi-sampled rasterization
*
* This can be used to read SYSTEM_VALUE_SAMPLE_MASK_IN from the
* fragment shader, even when performing single-sampled rendering.
*/
if (fb->pls_enabled) {
cfg.sample_count = 4;
cfg.sample_pattern = pan_sample_pattern(1);
} else if (!fb->force_samples) {
cfg.sample_count = fb->nr_samples;
cfg.sample_pattern = pan_sample_pattern(fb->nr_samples);
} else if (fb->force_samples == 1) {
cfg.sample_count = fb->nr_samples;
cfg.sample_pattern = pan_sample_pattern(1);
} else {
cfg.sample_count = 1;
cfg.sample_pattern = pan_sample_pattern(fb->force_samples);
}
cfg.effective_tile_size = fb->tile_size;
cfg.point_sprite_coord_origin_max_y = fb->sprite_coord_origin;
cfg.first_provoking_vertex = fb->first_provoking_vertex;
cfg.render_target_count = MAX2(fb->rt_count, 1);
cfg.color_buffer_allocation = fb->cbuf_allocation;
}
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_1), flags1.opaque[0]);
/* Leave the remaining RUN_FRAGMENT2 staging registers as zero. */
}
static inline void
cs_emit_layer_fragment_state(struct cs_builder *b, struct cs_index fbd_ptr)
{
/* Emit the dynamic fragment state. This state may change per-layer. */
cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_0), fbd_ptr,
offsetof(struct pan_fbd_layer, flags0));
cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_2), fbd_ptr,
offsetof(struct pan_fbd_layer, flags2));
cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, Z_CLEAR), fbd_ptr,
offsetof(struct pan_fbd_layer, z_clear));
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, TILER_DESCRIPTOR_POINTER), fbd_ptr,
offsetof(struct pan_fbd_layer, tiler));
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, RTD_POINTER), fbd_ptr,
offsetof(struct pan_fbd_layer, rtd_pointer));
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, DBD_POINTER), fbd_ptr,
offsetof(struct pan_fbd_layer, dbd_pointer));
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, FRAME_ARG), fbd_ptr,
offsetof(struct pan_fbd_layer, frame_argument));
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, FRAME_SHADER_DCD_POINTER), fbd_ptr,
offsetof(struct pan_fbd_layer, dcd_pointer));
cs_flush_loads(b);
}
#endif /* PAN_ARCH >= 14 */
static int
csf_oom_handler_init(struct panfrost_context *ctx)
{
@ -113,13 +196,14 @@ csf_oom_handler_init(struct panfrost_context *ctx)
cs_function_def(&b, &handler, handler_ctx) {
struct cs_index tiler_oom_ctx = cs_reg64(&b, TILER_OOM_CTX_REG);
struct cs_index counter = cs_reg32(&b, 47);
struct cs_index zero = cs_reg64(&b, 48);
struct cs_index flush_id = cs_reg32(&b, 48);
struct cs_index tiler_ctx = cs_reg64(&b, 50);
struct cs_index completed_top = cs_reg64(&b, 52);
struct cs_index completed_bottom = cs_reg64(&b, 54);
struct cs_index completed_chunks = cs_reg_tuple(&b, 52, 4);
struct cs_index counter = cs_reg32(&b, 31);
struct cs_index zero = cs_reg64(&b, 56);
struct cs_index flush_id = cs_reg32(&b, 58);
struct cs_index tiler_ctx = cs_reg64(&b, 60);
struct cs_index completed_top = cs_reg64(&b, 64);
struct cs_index completed_bottom = cs_reg64(&b, 66);
struct cs_index completed_chunks = cs_reg_tuple(&b, 64, 4);
struct cs_index fbd_pointer = cs_sr_reg64(&b, FRAGMENT, FBD_POINTER);
/* Ensure that the OTHER endpoint is valid */
#if PAN_ARCH >= 11
@ -133,12 +217,10 @@ csf_oom_handler_init(struct panfrost_context *ctx)
cs_load32_to(&b, counter, tiler_oom_ctx, FIELD_OFFSET(counter));
cs_wait_slot(&b, 0);
cs_if(&b, MALI_CS_CONDITION_GREATER, counter) {
cs_load64_to(&b, cs_sr_reg64(&b, FRAGMENT, FBD_POINTER), tiler_oom_ctx,
FBD_OFFSET(MIDDLE));
cs_load64_to(&b, fbd_pointer, tiler_oom_ctx, FBD_OFFSET(MIDDLE));
}
cs_else(&b) {
cs_load64_to(&b, cs_sr_reg64(&b, FRAGMENT, FBD_POINTER), tiler_oom_ctx,
FBD_OFFSET(FIRST));
cs_load64_to(&b, fbd_pointer, tiler_oom_ctx, FBD_OFFSET(FIRST));
}
cs_load32_to(&b, cs_sr_reg32(&b, FRAGMENT, BBOX_MIN), tiler_oom_ctx,
@ -147,11 +229,18 @@ csf_oom_handler_init(struct panfrost_context *ctx)
FIELD_OFFSET(bbox_max));
cs_move64_to(&b, cs_sr_reg64(&b, FRAGMENT, TEM_POINTER), 0);
cs_move32_to(&b, cs_sr_reg32(&b, FRAGMENT, TEM_ROW_STRIDE), 0);
#if PAN_ARCH >= 14
cs_emit_layer_fragment_state(&b, fbd_pointer);
#endif
cs_wait_slot(&b, 0);
/* Run the fragment job and wait */
cs_select_endpoint_sb(&b, 3);
#if PAN_ARCH >= 14
cs_run_fragment2(&b, false, MALI_TILE_RENDER_ORDER_Z_ORDER);
#else
cs_run_fragment(&b, false, MALI_TILE_RENDER_ORDER_Z_ORDER);
#endif
cs_wait_slot(&b, 3);
/* Increment counter */
@ -218,6 +307,21 @@ GENX(csf_cleanup_batch)(struct panfrost_batch *batch)
panfrost_pool_cleanup(&batch->csf.cs_chunk_pool);
}
#if PAN_ARCH >= 14
static inline struct pan_ptr
alloc_fbd(struct panfrost_batch *batch)
{
const struct pan_desc_alloc_info fbd_layer = {
.size = ALIGN_POT(sizeof(struct pan_fbd_layer), 64),
.align = alignof(struct pan_fbd_layer),
.nelems = 1,
};
return pan_pool_alloc_desc_aggregate(
&batch->pool.base, fbd_layer, PAN_DESC(ZS_CRC_EXTENSION),
PAN_DESC_ARRAY(MAX2(batch->key.nr_cbufs, 1), RENDER_TARGET));
}
#else
static inline struct pan_ptr
alloc_fbd(struct panfrost_batch *batch)
{
@ -225,6 +329,7 @@ alloc_fbd(struct panfrost_batch *batch)
&batch->pool.base, PAN_DESC(FRAMEBUFFER), PAN_DESC(ZS_CRC_EXTENSION),
PAN_DESC_ARRAY(MAX2(batch->key.nr_cbufs, 1), RENDER_TARGET));
}
#endif /* PAN_ARCH >= 14 */
int
GENX(csf_init_batch)(struct panfrost_batch *batch)
@ -758,7 +863,7 @@ GENX(csf_preload_fb)(struct panfrost_batch *batch, struct pan_fb_info *fb)
(_ctx)->fbds[PAN_INCREMENTAL_RENDERING_##_pass##_PASS]
#define EMIT_FBD(_ctx, _pass, _fb, _tls, _tiler_ctx) \
GET_FBD(_ctx, _pass).gpu |= \
GENX(pan_emit_fbd)(_fb, 0, _tls, _tiler_ctx, GET_FBD(_ctx, _pass).cpu)
GENX(pan_emit_fbd)(_fb, 0, _tls, _tiler_ctx, GET_FBD(_ctx, _pass))
void
GENX(csf_emit_fbds)(struct panfrost_batch *batch, struct pan_fb_info *fb,
@ -771,7 +876,7 @@ GENX(csf_emit_fbds)(struct panfrost_batch *batch, struct pan_fb_info *fb,
/* Default framebuffer descriptor */
batch->framebuffer.gpu |=
GENX(pan_emit_fbd)(fb, 0, tls, &batch->tiler_ctx, batch->framebuffer.cpu);
GENX(pan_emit_fbd)(fb, 0, tls, &batch->tiler_ctx, batch->framebuffer);
if (batch->draw_count == 0)
return;
@ -854,15 +959,21 @@ GENX(csf_emit_fragment_job)(struct panfrost_batch *batch,
cs_vt_end(b, cs_now());
}
struct cs_index fbd_pointer = cs_sr_reg64(b, FRAGMENT, FBD_POINTER);
/* Set up the fragment job */
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
batch->framebuffer.gpu);
cs_move64_to(b, fbd_pointer, batch->framebuffer.gpu);
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MIN),
(batch->miny << 16) | batch->minx);
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MAX),
((batch->maxy - 1) << 16) | (batch->maxx - 1));
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, TEM_POINTER), 0);
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, TEM_ROW_STRIDE), 0);
#if PAN_ARCH >= 14
cs_emit_static_fragment_state(b, batch, pfb);
cs_emit_layer_fragment_state(b, fbd_pointer);
#endif
/* Use different framebuffer descriptor if incremental rendering was
* triggered while tiling */
@ -871,13 +982,19 @@ GENX(csf_emit_fragment_job)(struct panfrost_batch *batch,
cs_load32_to(b, counter, cs_reg64(b, TILER_OOM_CTX_REG), 0);
cs_wait_slot(b, 0);
cs_if(b, MALI_CS_CONDITION_GREATER, counter) {
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
GET_FBD(oom_ctx, LAST).gpu);
cs_move64_to(b, fbd_pointer, GET_FBD(oom_ctx, LAST).gpu);
#if PAN_ARCH >= 14
cs_emit_layer_fragment_state(b, fbd_pointer);
#endif
}
}
/* Run the fragment job and wait */
#if PAN_ARCH >= 14
cs_run_fragment2(b, false, MALI_TILE_RENDER_ORDER_Z_ORDER);
#else
cs_run_fragment(b, false, MALI_TILE_RENDER_ORDER_Z_ORDER);
#endif
cs_wait_slot(b, 2);
/* Gather freed heap chunks and add them to the heap context free list

View file

@ -29,7 +29,8 @@ struct pan_csf_tiler_oom_ctx {
/* Alternative framebuffer descriptors for incremental rendering */
struct pan_ptr fbds[PAN_INCREMENTAL_RENDERING_PASS_COUNT];
/* Bounding Box (Register 42 and 43) */
/* Bounding Box (Register MALI_FRAGMENT_SR_BBOX_MIN and
* MALI_FRAGMENT_SR_BBOX_MAX) */
uint32_t bbox_min;
uint32_t bbox_max;

View file

@ -257,8 +257,8 @@ GENX(jm_emit_fbds)(struct panfrost_batch *batch, struct pan_fb_info *fb,
{
PAN_TRACE_FUNC(PAN_TRACE_GL_JM);
batch->framebuffer.gpu |= GENX(pan_emit_fbd)(
fb, 0, tls, &batch->tiler_ctx, batch->framebuffer.cpu);
batch->framebuffer.gpu |=
GENX(pan_emit_fbd)(fb, 0, tls, &batch->tiler_ctx, batch->framebuffer);
}
void

View file

@ -1175,6 +1175,9 @@ panfrost_create_screen(int fd, const struct pipe_screen_config *config,
case 13:
panfrost_cmdstream_screen_init_v13(screen);
break;
case 14:
panfrost_cmdstream_screen_init_v14(screen);
break;
default:
debug_printf("panfrost: Unhandled architecture major %d", dev->arch);
panfrost_destroy_screen(&(screen->base));

View file

@ -155,6 +155,7 @@ void panfrost_cmdstream_screen_init_v9(struct panfrost_screen *screen);
void panfrost_cmdstream_screen_init_v10(struct panfrost_screen *screen);
void panfrost_cmdstream_screen_init_v12(struct panfrost_screen *screen);
void panfrost_cmdstream_screen_init_v13(struct panfrost_screen *screen);
void panfrost_cmdstream_screen_init_v14(struct panfrost_screen *screen);
#define perf_debug(ctx, ...) \
do { \

View file

@ -275,7 +275,7 @@ main(int argc, const char **argv)
unsigned target_arch = atoi(target_arch_str);
if (target_arch < 4 || target_arch > 13) {
if (target_arch < 4 || target_arch > 14) {
fprintf(stderr, "Unsupported target arch %d\n", target_arch);
return 1;
}

View file

@ -703,8 +703,10 @@ bi_emit_load_var_buf(bi_builder *b, nir_intrinsic_instr *intr)
assert(intr->intrinsic == nir_intrinsic_load_var_buf_pan ||
intr->intrinsic == nir_intrinsic_load_var_buf_flat_pan);
const unsigned arch = b->shader->arch;
/* These are only available on Valhall+ */
assert(b->shader->arch >= 9);
assert(arch >= 9);
const bool flat = intr->intrinsic == nir_intrinsic_load_var_buf_flat_pan;
const nir_alu_type src_type = nir_intrinsic_src_type(intr);
@ -757,19 +759,36 @@ bi_emit_load_var_buf(bi_builder *b, nir_intrinsic_instr *intr)
bool use_imm_form = false;
if (nir_src_is_const(intr->src[0])) {
imm_offset = nir_src_as_uint(intr->src[0]);
assert(imm_offset < pan_ld_var_buf_off_size(b->shader->arch));
assert(imm_offset < pan_ld_var_buf_off_size(arch));
use_imm_form = true;
}
/* On v14+, flat source formats are removed from LD_VAR_BUF/LD_VAR_BUF_IMM,
* so flat buffer varyings must use the dedicated LD_VAR_BUF_FLAT*.
*/
if (use_imm_form) {
bi_ld_var_buf_imm_to(b, sz, dest, src0, regfmt, sample, source_format,
if (arch >= 14 && flat) {
bi_ld_var_buf_flat_imm_to(b, dest, regfmt, vecsize, imm_offset);
} else {
bi_ld_var_buf_imm_to(b, sz, dest, src0, regfmt, sample, source_format,
BI_UPDATE_STORE, vecsize, imm_offset);
}
} else {
bi_index offset = bi_src_index(&intr->src[0]);
bi_ld_var_buf_to(b, sz, dest, src0, offset, regfmt, sample,
source_format, BI_UPDATE_STORE, vecsize);
if (arch >= 14 && flat) {
bi_ld_var_buf_flat_to(b, dest, offset, regfmt, vecsize);
} else {
bi_ld_var_buf_to(b, sz, dest, src0, offset, regfmt, sample,
source_format, BI_UPDATE_STORE, vecsize);
}
}
/* LD_VAR_BUF_FLAT* only support register formats F16 and F32. */
assert(
arch < 14 || !flat ||
(regfmt == BI_REGISTER_FORMAT_F16 || regfmt == BI_REGISTER_FORMAT_F32));
bi_split_def(b, &intr->def);
}

View file

@ -939,6 +939,32 @@
<imm name="index" start="12" size="4"/> <!-- 0 for pointx, 1 for pointy, 2 for fragw, 3 for fragz -->
</ins>
<ins name="LD_VAR_BUF_FLAT_IMM" title="Load immediate flat varying" message="varying" unit="V">
<opcode>
<op val="0x40" start="48" mask="0x1FF"/>
</opcode>
<desc>Fetches a given flat varying from hardware buffer</desc>
<slot/>
<vecsize/>
<regfmt/>
<sr write="true"/>
<sr_count count="format"/>
<imm name="index" start="8" size="11"/>
</ins>
<ins name="LD_VAR_BUF_FLAT" title="Load indirect flat varying" message="varying" unit="V">
<opcode>
<op val="0x5F" start="48" mask="0x1FF"/>
</opcode>
<desc>Fetches a given flat varying from hardware buffer</desc>
<slot/>
<vecsize/>
<regfmt/>
<sr write="true"/>
<sr_count count="format"/>
<src/>
</ins>
<group name="LD_VAR_BUF_IMM" title="Load immediate varying" message="varying" unit="V">
<desc>Interpolates a given varying from hardware buffer</desc>
<ins name="LD_VAR_BUF_IMM.f32">

View file

@ -1,5 +1,6 @@
/*
* Copyright (C) 2021 Collabora, Ltd.
* Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
@ -9,9 +10,9 @@
#include <gtest/gtest.h>
#define CASE(instr, expected) \
#define CASE_ARCH(instr, arch, expected) \
do { \
uint64_t _value = va_pack_instr(instr, 10); \
uint64_t _value = va_pack_instr(instr, arch); \
if (_value != expected) { \
fprintf(stderr, "Got %" PRIx64 ", expected %" PRIx64 "\n", _value, \
(uint64_t)expected); \
@ -21,6 +22,8 @@
} \
} while (0)
#define CASE(instr, expected) CASE_ARCH(instr, 10, expected)
class ValhallPacking : public testing::Test {
protected:
ValhallPacking()
@ -278,11 +281,41 @@ TEST_F(ValhallPacking, LdVarBufImmF16)
BI_VECSIZE_V4, 0),
0x005d80843300003d);
CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTROID,
BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
BI_VECSIZE_V4, 8),
0x005d80443308003d);
CASE_ARCH(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
BI_REGISTER_FORMAT_F16,
BI_SAMPLE_CENTROID, BI_SOURCE_FORMAT_F16,
BI_UPDATE_STORE, BI_VECSIZE_V4, 8),
10, 0x005d80443308003d);
CASE_ARCH(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
BI_REGISTER_FORMAT_F16,
BI_SAMPLE_CENTROID, BI_SOURCE_FORMAT_F16,
BI_UPDATE_STORE, BI_VECSIZE_V4, 8),
11, 0x005d80443300083d);
}
TEST_F(ValhallPacking, LdVarBufFlatImmFormat)
{
CASE_ARCH(bi_ld_var_buf_flat_imm_to(b, bi_register(0),
BI_REGISTER_FORMAT_F32,
BI_VECSIZE_V4, 0x12),
14, 0x0040800832001200);
CASE_ARCH(bi_ld_var_buf_flat_imm_to(b, bi_register(0),
BI_REGISTER_FORMAT_F16,
BI_VECSIZE_V4, 0x12),
14, 0x0040800433001200);
}
TEST_F(ValhallPacking, LdVarBufFlat)
{
CASE_ARCH(bi_ld_var_buf_flat_to(b, bi_register(0), bi_register(61),
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4),
14, 0x005f80083200003d);
CASE_ARCH(bi_ld_var_buf_flat_to(b, bi_register(0), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_VECSIZE_V4),
14, 0x005f80043300003d);
}
TEST_F(ValhallPacking, LeaBufImm)

View file

@ -77,6 +77,8 @@ walk_bir_shader(bi_context *ctx, struct pan_shader_info *info)
if (instr->sample == BI_SAMPLE_CENTROID)
info->fs.hsr.centroid_interpolation = true;
FALLTHROUGH;
case BI_OPCODE_LD_VAR_BUF_FLAT:
case BI_OPCODE_LD_VAR_BUF_FLAT_IMM:
case BI_OPCODE_LD_VAR_FLAT:
case BI_OPCODE_LD_VAR_FLAT_IMM:
if (!found_atest)

View file

@ -568,6 +568,10 @@ va_pack_alu(const bi_instr *I, unsigned arch)
hex |= ((uint64_t)I->sample) << 38;
break;
case BI_OPCODE_LD_VAR_BUF_FLAT_IMM:
hex |= ((uint64_t)I->index) << 8;
break;
case BI_OPCODE_LD_ATTR_IMM:
hex |= ((uint64_t)I->table) << 16;
hex |= ((uint64_t)I->attribute_index) << 20;

View file

@ -52,6 +52,7 @@ pan_get_nir_shader_compiler_options(unsigned arch, bool merge_wg)
case 11:
case 12:
case 13:
case 14:
return merge_wg ? &bifrost_nir_options_v11_merge_wg :
&bifrost_nir_options_v11;
default:

View file

@ -824,7 +824,11 @@ cs_instr_is_asynchronous(enum mali_cs_opcode opcode, uint16_t wait_mask)
case MALI_CS_OPCODE_STORE_MULTIPLE:
case MALI_CS_OPCODE_RUN_COMPUTE:
case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT:
#if PAN_ARCH >= 14
case MALI_CS_OPCODE_RUN_FRAGMENT2:
#else
case MALI_CS_OPCODE_RUN_FRAGMENT:
#endif
case MALI_CS_OPCODE_RUN_FULLSCREEN:
#if PAN_ARCH >= 12
case MALI_CS_OPCODE_RUN_IDVS2:
@ -1614,6 +1618,22 @@ cs_run_idvs(struct cs_builder *b, uint32_t flags_override, bool malloc_enable,
}
#endif
#if PAN_ARCH >= 14
static inline void
cs_run_fragment2(struct cs_builder *b, bool enable_tem,
enum mali_tile_render_order tile_order)
{
/* Staging regs */
cs_flush_loads(b);
b->req_resource_mask |= CS_FRAG_RES;
cs_emit(b, RUN_FRAGMENT2, I) {
I.enable_tem = enable_tem;
I.tile_order = tile_order;
}
}
#else
static inline void
cs_run_fragment(struct cs_builder *b, bool enable_tem,
enum mali_tile_render_order tile_order)
@ -1628,6 +1648,7 @@ cs_run_fragment(struct cs_builder *b, bool enable_tem,
I.tile_order = tile_order;
}
}
#endif
static inline void
cs_run_fullscreen(struct cs_builder *b, uint32_t flags_override,
@ -2469,6 +2490,53 @@ cs_trace_preamble(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
(int16_t)(offsetof(struct cs_##__type##_trace, __field) - \
sizeof(struct cs_##__type##_trace))
#if PAN_ARCH >= 14
#define CS_RUN_FRAGMENT2_SR_COUNT 56
#define CS_RUN_FRAGMENT2_SR_MASK BITFIELD64_RANGE(0, CS_RUN_FRAGMENT2_SR_COUNT)
struct cs_run_fragment2_trace {
uint64_t ip;
uint32_t sr[CS_RUN_FRAGMENT2_SR_COUNT];
} __attribute__((aligned(64)));
static inline void
cs_trace_run_fragment2(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
struct cs_index scratch_regs, bool enable_tem,
enum mali_tile_render_order tile_order)
{
if (likely(!ctx->enabled)) {
cs_run_fragment2(b, enable_tem, tile_order);
return;
}
struct cs_index tracebuf_addr = cs_reg64(b, scratch_regs.reg);
struct cs_index data = cs_reg64(b, scratch_regs.reg + 2);
cs_trace_preamble(b, ctx, scratch_regs,
sizeof(struct cs_run_fragment2_trace));
/* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP
* won't point to the right instruction. */
cs_load_ip_to(b, data);
cs_run_fragment2(b, enable_tem, tile_order);
cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_fragment2, ip));
ASSERTED unsigned sr_count = 0;
unsigned sr_offset = cs_trace_field_offset(run_fragment2, sr);
for (unsigned i = 0; i < CS_RUN_FRAGMENT2_SR_COUNT; i += 16) {
unsigned mask = (CS_RUN_FRAGMENT2_SR_MASK >> i) & BITFIELD_MASK(16);
if (!mask)
continue;
cs_store(b, cs_reg_tuple(b, i, util_last_bit(mask)), tracebuf_addr, mask,
sr_offset);
sr_offset += util_bitcount(mask) * sizeof(uint32_t);
sr_count += util_bitcount(mask);
}
assert(sr_count == CS_RUN_FRAGMENT2_SR_COUNT);
cs_flush_stores(b);
}
#else
struct cs_run_fragment_trace {
uint64_t ip;
uint32_t sr[7];
@ -2500,6 +2568,7 @@ cs_trace_run_fragment(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
cs_trace_field_offset(run_fragment, sr));
cs_flush_stores(b);
}
#endif
#if PAN_ARCH >= 13
#define CS_RUN_FULLSCREEN_SR_MASK \

View file

@ -152,22 +152,22 @@ pandecode_rt(struct pandecode_context *ctx, unsigned index, uint64_t gpu_va)
}
static void
pandecode_rts(struct pandecode_context *ctx, uint64_t gpu_va,
const struct MALI_FRAMEBUFFER_PARAMETERS *fb)
void
GENX(pandecode_rts)(struct pandecode_context *ctx, uint64_t gpu_va,
uint32_t render_target_count)
{
pandecode_log(ctx, "Color Render Targets @%" PRIx64 ":\n", gpu_va);
ctx->indent++;
for (int i = 0; i < (fb->render_target_count); i++)
for (int i = 0; i < render_target_count; i++)
pandecode_rt(ctx, i, gpu_va);
ctx->indent--;
pandecode_log(ctx, "\n");
}
static void
pandecode_zs_crc_ext(struct pandecode_context *ctx, uint64_t gpu_va)
void
GENX(pandecode_zs_crc_ext)(struct pandecode_context *ctx, uint64_t gpu_va)
{
const struct mali_zs_crc_extension_packed *PANDECODE_PTR_VAR(
ctx, zs_crc_packed, (uint64_t)gpu_va);
@ -223,22 +223,65 @@ pandecode_zs_crc_ext(struct pandecode_context *ctx, uint64_t gpu_va)
#if PAN_ARCH >= 6
static void
pandecode_sample_locations(struct pandecode_context *ctx, const void *fb)
void
GENX(pandecode_frame_shader_dcds)(struct pandecode_context *ctx,
uint64_t dcd_pointer, unsigned pre_frame_0,
unsigned pre_frame_1, unsigned post_frame,
unsigned job_type_param, uint64_t gpu_id)
{
pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, params);
const unsigned dcd_size = pan_size(DRAW);
const uint16_t *PANDECODE_PTR_VAR(ctx, samples, params.sample_locations);
if (pre_frame_0 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
const struct mali_draw_packed *PANDECODE_PTR_VAR(
ctx, dcd, dcd_pointer + (0 * dcd_size));
pan_unpack(dcd, DRAW, draw)
;
pandecode_log(ctx, "Pre frame 0 @%" PRIx64 " (mode=%d):\n", dcd_pointer,
pre_frame_0);
ctx->indent++;
GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
ctx->indent--;
}
pandecode_log(ctx, "Sample locations @%" PRIx64 ":\n",
params.sample_locations);
if (pre_frame_1 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
const struct mali_draw_packed *PANDECODE_PTR_VAR(
ctx, dcd, dcd_pointer + (1 * dcd_size));
pan_unpack(dcd, DRAW, draw)
;
pandecode_log(ctx, "Pre frame 1 @%" PRIx64 ":\n",
dcd_pointer + (1 * dcd_size));
ctx->indent++;
GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
ctx->indent--;
}
if (post_frame != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
const struct mali_draw_packed *PANDECODE_PTR_VAR(
ctx, dcd, dcd_pointer + (2 * dcd_size));
pan_unpack(dcd, DRAW, draw)
;
pandecode_log(ctx, "Post frame:\n");
ctx->indent++;
GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
ctx->indent--;
}
}
void
GENX(pandecode_sample_locations)(struct pandecode_context *ctx,
uint64_t sample_locations)
{
const uint16_t *PANDECODE_PTR_VAR(ctx, samples, sample_locations);
pandecode_log(ctx, "Sample locations @%" PRIx64 ":\n", sample_locations);
for (int i = 0; i < 33; i++) {
pandecode_log(ctx, " (%d, %d),\n", samples[2 * i] - 128,
samples[2 * i + 1] - 128);
}
}
#endif
#endif /* PAN_ARCH >= 6 */
#if PAN_ARCH < 14
struct pandecode_fbd
GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
bool is_fragment, uint64_t gpu_id)
@ -248,46 +291,17 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
DUMP_UNPACKED(ctx, FRAMEBUFFER_PARAMETERS, params, "Parameters:\n");
#if PAN_ARCH >= 6
pandecode_sample_locations(ctx, fb);
GENX(pandecode_sample_locations)(ctx, params.sample_locations);
unsigned dcd_size = pan_size(DRAW);
unsigned job_type_param = 0;
#if PAN_ARCH <= 9
job_type_param = MALI_JOB_TYPE_FRAGMENT;
#endif
if (params.pre_frame_0 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
const struct mali_draw_packed *PANDECODE_PTR_VAR(
ctx, dcd, params.frame_shader_dcds + (0 * dcd_size));
pan_unpack(dcd, DRAW, draw);
pandecode_log(ctx, "Pre frame 0 @%" PRIx64 " (mode=%d):\n",
params.frame_shader_dcds, params.pre_frame_0);
ctx->indent++;
GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
ctx->indent--;
}
if (params.pre_frame_1 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
const struct mali_draw_packed *PANDECODE_PTR_VAR(
ctx, dcd, params.frame_shader_dcds + (1 * dcd_size));
pan_unpack(dcd, DRAW, draw);
pandecode_log(ctx, "Pre frame 1 @%" PRIx64 ":\n",
params.frame_shader_dcds + (1 * dcd_size));
ctx->indent++;
GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
ctx->indent--;
}
if (params.post_frame != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
const struct mali_draw_packed *PANDECODE_PTR_VAR(
ctx, dcd, params.frame_shader_dcds + (2 * dcd_size));
pan_unpack(dcd, DRAW, draw);
pandecode_log(ctx, "Post frame:\n");
ctx->indent++;
GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
ctx->indent--;
}
GENX(pandecode_frame_shader_dcds)(ctx, params.frame_shader_dcds,
params.pre_frame_0, params.pre_frame_1,
params.post_frame, job_type_param, gpu_id);
#else
DUMP_SECTION(ctx, FRAMEBUFFER, LOCAL_STORAGE, fb, "Local Storage:\n");
@ -312,13 +326,13 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
gpu_va += pan_size(FRAMEBUFFER);
if (params.has_zs_crc_extension) {
pandecode_zs_crc_ext(ctx, gpu_va);
GENX(pandecode_zs_crc_ext)(ctx, gpu_va);
gpu_va += pan_size(ZS_CRC_EXTENSION);
}
if (is_fragment)
pandecode_rts(ctx, gpu_va, &params);
GENX(pandecode_rts)(ctx, gpu_va, params.render_target_count);
return (struct pandecode_fbd){
.rt_count = params.render_target_count,
@ -336,6 +350,7 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
};
#endif
}
#endif /* PAN_ARCH < 14 */
#if PAN_ARCH >= 5
uint64_t

View file

@ -132,6 +132,13 @@ void pandecode_cs_binary_v13(struct pandecode_context *ctx, uint64_t bin,
void pandecode_cs_trace_v13(struct pandecode_context *ctx, uint64_t trace,
uint32_t trace_size, uint64_t gpu_id);
void pandecode_interpret_cs_v14(struct pandecode_context *ctx, uint64_t queue,
uint32_t size, uint64_t gpu_id, uint32_t *regs);
void pandecode_cs_binary_v14(struct pandecode_context *ctx, uint64_t bin,
uint32_t bin_size);
void pandecode_cs_trace_v14(struct pandecode_context *ctx, uint64_t trace,
uint32_t trace_size, uint64_t gpu_id);
/* Logging infrastructure */
static void
pandecode_make_indent(struct pandecode_context *ctx)
@ -275,4 +282,22 @@ void GENX(pandecode_depth_stencil)(struct pandecode_context *ctx,
#endif
#if PAN_ARCH >= 6
void GENX(pandecode_sample_locations)(struct pandecode_context *ctx,
uint64_t sample_locations);
void
GENX(pandecode_frame_shader_dcds)(struct pandecode_context *ctx,
uint64_t dcd_pointer, unsigned pre_frame_0,
unsigned pre_frame_1, unsigned post_frame,
unsigned job_type_param, uint64_t gpu_id);
#endif
#if PAN_ARCH >= 5
void GENX(pandecode_rts)(struct pandecode_context *ctx, uint64_t gpu_va,
uint32_t render_target_count);
void GENX(pandecode_zs_crc_ext)(struct pandecode_context *ctx, uint64_t gpu_va);
#endif
#endif /* __MMAP_TRACE_H__ */

View file

@ -423,6 +423,9 @@ pandecode_interpret_cs(struct pandecode_context *ctx, uint64_t queue_gpu_va,
case 13:
pandecode_interpret_cs_v13(ctx, queue_gpu_va, size, gpu_id, regs);
break;
case 14:
pandecode_interpret_cs_v14(ctx, queue_gpu_va, size, gpu_id, regs);
break;
default:
UNREACHABLE("Unsupported architecture");
}
@ -446,6 +449,9 @@ pandecode_cs_binary(struct pandecode_context *ctx, uint64_t bin_gpu_va,
case 13:
pandecode_cs_binary_v13(ctx, bin_gpu_va, size);
break;
case 14:
pandecode_cs_binary_v14(ctx, bin_gpu_va, size);
break;
default:
UNREACHABLE("Unsupported architecture");
}
@ -469,6 +475,9 @@ pandecode_cs_trace(struct pandecode_context *ctx, uint64_t trace_gpu_va,
case 13:
pandecode_cs_trace_v13(ctx, trace_gpu_va, size, gpu_id);
break;
case 14:
pandecode_cs_trace_v14(ctx, trace_gpu_va, size, gpu_id);
break;
default:
UNREACHABLE("Unsupported architecture");
}

View file

@ -1,5 +1,6 @@
/*
* Copyright (C) 2022-2023 Collabora, Ltd.
* Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
@ -89,6 +90,12 @@ static const char *defer_modes_str[] = {
#define defer_mode_str(I) ""
#endif
#if PAN_ARCH <= 13
#define assert_no_progress_inc(I) assert(!I.progress_increment)
#else
#define assert_no_progress_inc(I) do {} while (0)
#endif
static void
print_cs_instr(FILE *fp, const uint64_t *instr)
{
@ -117,28 +124,27 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
case MALI_CS_OPCODE_WAIT: {
cs_unpack(instr, CS_WAIT, I);
fprintf(fp, "WAIT%s #%x", I.progress_increment ? ".progress_inc" : "",
I.wait_mask);
assert_no_progress_inc(I);
fprintf(fp, "WAIT #%x", I.wait_mask);
break;
}
case MALI_CS_OPCODE_RUN_COMPUTE: {
const char *axes[4] = {"x_axis", "y_axis", "z_axis"};
cs_unpack(instr, CS_RUN_COMPUTE, I);
assert_no_progress_inc(I);
/* Print the instruction. Ignore the selects and the flags override
* since we'll print them implicitly later.
*/
#if PAN_ARCH >= 12
fprintf(fp, "RUN_COMPUTE%s.%s.srt%d.spd%d.tsd%d.fau%d #%u, #%u",
I.progress_increment ? ".progress_inc" : "", axes[I.task_axis],
I.srt_select, I.spd_select, I.tsd_select, I.fau_select,
I.task_increment, I.ep_limit);
fprintf(fp, "RUN_COMPUTE.%s.srt%d.spd%d.tsd%d.fau%d #%u, #%u",
axes[I.task_axis], I.srt_select, I.spd_select, I.tsd_select,
I.fau_select, I.task_increment, I.ep_limit);
#else
fprintf(fp, "RUN_COMPUTE%s.%s.srt%d.spd%d.tsd%d.fau%d #%u",
I.progress_increment ? ".progress_inc" : "", axes[I.task_axis],
I.srt_select, I.spd_select, I.tsd_select, I.fau_select,
I.task_increment);
fprintf(fp, "RUN_COMPUTE.%s.srt%d.spd%d.tsd%d.fau%d #%u",
axes[I.task_axis], I.srt_select, I.spd_select, I.tsd_select,
I.fau_select, I.task_increment);
#endif
break;
}
@ -146,8 +152,8 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
#if PAN_ARCH == 10
case MALI_CS_OPCODE_RUN_TILING: {
cs_unpack(instr, CS_RUN_TILING, I);
fprintf(fp, "RUN_TILING%s.srt%d.spd%d.tsd%d.fau%d",
I.progress_increment ? ".progress_inc" : "", I.srt_select,
assert_no_progress_inc(I);
fprintf(fp, "RUN_TILING.srt%d.spd%d.tsd%d.fau%d", I.srt_select,
I.spd_select, I.tsd_select, I.fau_select);
break;
}
@ -156,10 +162,10 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
#if PAN_ARCH < 12
case MALI_CS_OPCODE_RUN_IDVS: {
cs_unpack(instr, CS_RUN_IDVS, I);
assert_no_progress_inc(I);
fprintf(
fp,
"RUN_IDVS%s%s%s.varying_srt%d.varying_fau%d.varying_tsd%d.frag_srt%d.frag_tsd%d r%u, #%" PRIx64,
I.progress_increment ? ".progress_inc" : "",
"RUN_IDVS%s%s.varying_srt%d.varying_fau%d.varying_tsd%d.frag_srt%d.frag_tsd%d r%u, #%" PRIx64,
I.malloc_enable ? "" : ".no_malloc",
I.draw_id_register_enable ? ".draw_id_enable" : "",
I.varying_srt_select, I.varying_fau_select, I.varying_tsd_select,
@ -170,6 +176,7 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
#else
case MALI_CS_OPCODE_RUN_IDVS2: {
cs_unpack(instr, CS_RUN_IDVS2, I);
assert_no_progress_inc(I);
const char *vertex_shading_str[] = {
".early",
@ -178,8 +185,7 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
".INVALID",
};
fprintf(fp, "RUN_IDVS2%s%s%s%s r%u, #%" PRIx64,
I.progress_increment ? ".progress_inc" : "",
fprintf(fp, "RUN_IDVS2%s%s%s r%u, #%" PRIx64,
I.malloc_enable ? "" : ".no_malloc",
I.draw_id_register_enable ? ".draw_id_enable" : "",
vertex_shading_str[I.vertex_shading_mode], I.draw_id,
@ -317,32 +323,37 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
case MALI_CS_OPCODE_SHARED_SB_INC: {
cs_unpack(instr, CS_SHARED_SB_INC, I);
const char *progress_increment_name[] = {
".no_increment",
".increment",
};
fprintf(fp, "SHARED_SB_INC%s%s #%u, #%u",
progress_increment_name[I.progress_increment],
defer_mode_str(I), I.sb_mask, I.shared_entry);
assert_no_progress_inc(I);
fprintf(fp, "SHARED_SB_INC%s #%u, #%u", defer_mode_str(I), I.sb_mask,
I.shared_entry);
break;
}
case MALI_CS_OPCODE_SHARED_SB_DEC: {
cs_unpack(instr, CS_SHARED_SB_DEC, I);
const char *progress_increment_name[] = {
".no_increment",
".increment",
};
fprintf(fp, "SHARED_SB_DEC%s #%u",
progress_increment_name[I.progress_increment], I.shared_entry);
assert_no_progress_inc(I);
fprintf(fp, "SHARED_SB_DEC #%u", I.shared_entry);
break;
}
#endif
#if PAN_ARCH >= 14
case MALI_CS_OPCODE_RUN_FRAGMENT2: {
static const char *tile_order[] = {
"zorder", "horizontal", "vertical", "unknown",
"unknown", "rev_horizontal", "rev_vertical", "unknown",
"unknown", "unknown", "unknown", "unknown",
"unknown", "unknown", "unknown", "unknown",
};
cs_unpack(instr, CS_RUN_FRAGMENT2, I);
fprintf(fp, "RUN_FRAGMENT2%s.tile_order=%s",
I.enable_tem ? ".tile_enable_map_enable" : "",
tile_order[I.tile_order]);
break;
}
#else
case MALI_CS_OPCODE_RUN_FRAGMENT: {
static const char *tile_order[] = {
"zorder", "horizontal", "vertical", "unknown",
@ -350,27 +361,27 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
"unknown", "unknown", "unknown", "unknown",
"unknown", "unknown", "unknown", "unknown",
};
cs_unpack(instr, CS_RUN_FRAGMENT, I);
fprintf(fp, "RUN_FRAGMENT%s%s.tile_order=%s",
I.progress_increment ? ".progress_inc" : "",
cs_unpack(instr, CS_RUN_FRAGMENT, I);
assert_no_progress_inc(I);
fprintf(fp, "RUN_FRAGMENT%s.tile_order=%s",
I.enable_tem ? ".tile_enable_map_enable" : "",
tile_order[I.tile_order]);
break;
}
#endif
case MALI_CS_OPCODE_RUN_FULLSCREEN: {
cs_unpack(instr, CS_RUN_FULLSCREEN, I);
fprintf(fp, "RUN_FULLSCREEN%s r%u, #%" PRIx64,
I.progress_increment ? ".progress_inc" : "", I.dcd,
I.flags_override);
assert_no_progress_inc(I);
fprintf(fp, "RUN_FULLSCREEN r%u, #%" PRIx64, I.dcd, I.flags_override);
break;
}
case MALI_CS_OPCODE_FINISH_TILING: {
cs_unpack(instr, CS_FINISH_TILING, I);
fprintf(fp, "FINISH_TILING%s",
I.progress_increment ? ".progress_inc" : "");
assert_no_progress_inc(I);
fprintf(fp, "FINISH_TILING");
break;
}
@ -443,12 +454,6 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
break;
}
case MALI_CS_OPCODE_PROGRESS_WAIT: {
cs_unpack(instr, CS_PROGRESS_WAIT, I);
fprintf(fp, "PROGRESS_WAIT d%u, #%u", I.source, I.queue);
break;
}
case MALI_CS_OPCODE_SET_EXCEPTION_HANDLER: {
cs_unpack(instr, CS_SET_EXCEPTION_HANDLER, I);
fprintf(fp, "SET_EXCEPTION_HANDLER d%u, r%u", I.address, I.length);
@ -547,29 +552,17 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
break;
}
case MALI_CS_OPCODE_PROGRESS_STORE: {
cs_unpack(instr, CS_PROGRESS_STORE, I);
fprintf(fp, "PROGRESS_STORE d%u", I.source);
break;
}
case MALI_CS_OPCODE_PROGRESS_LOAD: {
cs_unpack(instr, CS_PROGRESS_LOAD, I);
fprintf(fp, "PROGRESS_LOAD d%u", I.destination);
break;
}
case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT: {
cs_unpack(instr, CS_RUN_COMPUTE_INDIRECT, I);
assert_no_progress_inc(I);
#if PAN_ARCH >= 12
fprintf(fp, "RUN_COMPUTE_INDIRECT%s.srt%d.spd%d.tsd%d.fau%d #%u, #%u",
I.progress_increment ? ".progress_inc" : "", I.srt_select,
I.spd_select, I.tsd_select, I.fau_select, I.workgroups_per_task,
I.ep_limit);
fprintf(fp, "RUN_COMPUTE_INDIRECT.srt%d.spd%d.tsd%d.fau%d #%u, #%u",
I.srt_select, I.spd_select, I.tsd_select, I.fau_select,
I.workgroups_per_task, I.ep_limit);
#else
fprintf(fp, "RUN_COMPUTE_INDIRECT%s.srt%d.spd%d.tsd%d.fau%d #%u",
I.progress_increment ? ".progress_inc" : "", I.srt_select,
I.spd_select, I.tsd_select, I.fau_select, I.workgroups_per_task);
fprintf(fp, "RUN_COMPUTE_INDIRECT.srt%d.spd%d.tsd%d.fau%d #%u",
I.srt_select, I.spd_select, I.tsd_select, I.fau_select,
I.workgroups_per_task);
#endif
break;
@ -1097,6 +1090,99 @@ pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp,
}
#endif
#if PAN_ARCH >= 14
static void
pandecode_run_fragment2(struct pandecode_context *ctx, FILE *fp,
struct queue_ctx *qctx, struct MALI_CS_RUN_FRAGMENT2 *I)
{
if (qctx->in_exception_handler)
return;
ctx->indent++;
pandecode_log(ctx, "Iter trace ID0: %" PRIu32 "\n",
cs_get_u32(qctx, MALI_FRAGMENT_SR_ITER_TRACE_ID0));
pandecode_log(ctx, "Iter trace ID1: %" PRIu32 "\n",
cs_get_u32(qctx, MALI_FRAGMENT_SR_ITER_TRACE_ID1));
pandecode_log(ctx, "TEM pointer: %" PRIx64 "\n",
cs_get_u64(qctx, MALI_FRAGMENT_SR_TEM_POINTER));
pandecode_log(ctx, "TEM row stride: %" PRIu32 "\n",
cs_get_u32(qctx, MALI_FRAGMENT_SR_TEM_ROW_STRIDE));
for (unsigned i = 0; i < 11; ++i) {
const unsigned reg = MALI_FRAGMENT_SR_IRD_BUFFER_POINTER_0 + (i * 2);
pandecode_log(ctx, "IRD buffer pointer %u: %" PRIx64 "\n", i,
cs_get_u64(qctx, reg));
}
DUMP_CL(ctx, FRAGMENT_FLAGS_3, &qctx->regs[MALI_FRAGMENT_SR_FLAGS_3],
"Flags 3:\n");
DUMP_CL(ctx, FRAGMENT_BOUNDING_BOX, &qctx->regs[MALI_FRAGMENT_SR_BBOX_MIN],
"Bounding Box:\n");
DUMP_CL(ctx, FRAME_SIZE, &qctx->regs[MALI_FRAGMENT_SR_FRAME_SIZE],
"Frame size:\n");
pan_unpack((const struct mali_fragment_flags_0_packed *)&qctx
->regs[MALI_FRAGMENT_SR_FLAGS_0],
FRAGMENT_FLAGS_0, flags0_unpacked);
DUMP_UNPACKED(ctx, FRAGMENT_FLAGS_0, flags0_unpacked, "Flags 0:\n");
pan_unpack((const struct mali_fragment_flags_1_packed *)&qctx
->regs[MALI_FRAGMENT_SR_FLAGS_1],
FRAGMENT_FLAGS_1, flags1_unpacked);
DUMP_UNPACKED(ctx, FRAGMENT_FLAGS_1, flags1_unpacked, "Flags 1:\n");
DUMP_CL(ctx, FRAGMENT_FLAGS_2, &qctx->regs[MALI_FRAGMENT_SR_FLAGS_2],
"Flags 2:\n");
pandecode_log(ctx, "Z clear: %f\n",
uif(cs_get_u32(qctx, MALI_FRAGMENT_SR_Z_CLEAR)));
const uint64_t tiler_pointer =
cs_get_u64(qctx, MALI_FRAGMENT_SR_TILER_DESCRIPTOR_POINTER);
pandecode_log(ctx, "Tiler descriptor pointer: 0x%" PRIx64 "\n",
tiler_pointer);
const uint64_t rtd_pointer = cs_get_u64(qctx, MALI_FRAGMENT_SR_RTD_POINTER);
pandecode_log(ctx, "RTD pointer: 0x%" PRIx64 "\n", rtd_pointer);
const uint64_t dbd_pointer = cs_get_u64(qctx, MALI_FRAGMENT_SR_DBD_POINTER);
pandecode_log(ctx, "DBD pointer: 0x%" PRIx64 "\n", dbd_pointer);
pandecode_log(ctx, "Frame argument: %" PRIx64 "\n",
cs_get_u64(qctx, MALI_FRAGMENT_SR_FRAME_ARG));
const uint64_t sample_locations =
cs_get_u64(qctx, MALI_FRAGMENT_SR_SAMPLE_POSITION_ARRAY_POINTER);
pandecode_log(ctx, "Sample locations: 0x%" PRIx64 "\n", sample_locations);
const uint64_t dcd_pointer =
cs_get_u64(qctx, MALI_FRAGMENT_SR_FRAME_SHADER_DCD_POINTER);
pandecode_log(ctx, "Frame shader DCD pointer: 0x%" PRIx64 "\n", dcd_pointer);
DUMP_CL(ctx, VRS_IMAGE, &qctx->regs[MALI_FRAGMENT_SR_VRS_IMAGE],
"VRS image:\n");
GENX(pandecode_sample_locations)(ctx, sample_locations);
const unsigned job_type_param = 0;
GENX(pandecode_frame_shader_dcds)(ctx, dcd_pointer,
flags0_unpacked.pre_frame_0,
flags0_unpacked.pre_frame_1,
flags0_unpacked.post_frame,
job_type_param, qctx->gpu_id);
if (tiler_pointer)
GENX(pandecode_tiler)(ctx, tiler_pointer);
if (dbd_pointer)
GENX(pandecode_zs_crc_ext)(ctx, dbd_pointer);
if (rtd_pointer)
GENX(pandecode_rts)(ctx, rtd_pointer, flags1_unpacked.render_target_count);
ctx->indent--;
}
#else
static void
pandecode_run_fragment(struct pandecode_context *ctx, FILE *fp,
struct queue_ctx *qctx, struct MALI_CS_RUN_FRAGMENT *I)
@ -1115,6 +1201,7 @@ pandecode_run_fragment(struct pandecode_context *ctx, FILE *fp,
ctx->indent--;
}
#endif /* PAN_ARCH >= 14 */
static void
pandecode_run_fullscreen(struct pandecode_context *ctx, FILE *fp,
@ -1261,11 +1348,19 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx)
}
#endif
#if PAN_ARCH >= 14
case MALI_CS_OPCODE_RUN_FRAGMENT2: {
cs_unpack(bytes, CS_RUN_FRAGMENT2, I);
pandecode_run_fragment2(ctx, fp, qctx, &I);
break;
}
#else
case MALI_CS_OPCODE_RUN_FRAGMENT: {
cs_unpack(bytes, CS_RUN_FRAGMENT, I);
pandecode_run_fragment(ctx, fp, qctx, &I);
break;
}
#endif
case MALI_CS_OPCODE_RUN_FULLSCREEN: {
cs_unpack(bytes, CS_RUN_FULLSCREEN, I);
@ -2192,18 +2287,6 @@ collect_indirect_branch_targets_recurse(struct cs_code_cfg *cfg,
break;
}
case MALI_CS_OPCODE_PROGRESS_LOAD: {
cs_unpack(instr, CS_PROGRESS_LOAD, I);
for (unsigned i = 0; i < 16; i++) {
if (BITSET_TEST(track_map, I.destination) ||
BITSET_TEST(track_map, I.destination + 1)) {
ibranch->has_unknown_targets = true;
return;
}
}
break;
}
default:
break;
}
@ -2430,7 +2513,12 @@ print_cs_binary(struct pandecode_context *ctx, uint64_t bin,
#else
case MALI_CS_OPCODE_RUN_IDVS:
#endif
#if PAN_ARCH >= 14
case MALI_CS_OPCODE_RUN_FRAGMENT2:
#else
case MALI_CS_OPCODE_RUN_FRAGMENT:
#endif
case MALI_CS_OPCODE_RUN_FULLSCREEN:
case MALI_CS_OPCODE_RUN_COMPUTE:
case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT:
@ -2539,6 +2627,19 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace,
}
#endif
#if PAN_ARCH >= 14
case MALI_CS_OPCODE_RUN_FRAGMENT2: {
struct cs_run_fragment2_trace *frag_trace = trace_data;
assert(trace_size >= sizeof(*frag_trace));
cs_unpack(instr, CS_RUN_FRAGMENT2, I);
memcpy(&regs[0], frag_trace->sr, sizeof(frag_trace->sr));
pandecode_run_fragment2(ctx, ctx->dump_stream, &qctx, &I);
trace_data = frag_trace + 1;
trace_size -= sizeof(*frag_trace);
break;
}
#else
case MALI_CS_OPCODE_RUN_FRAGMENT: {
struct cs_run_fragment_trace *frag_trace = trace_data;
@ -2550,6 +2651,7 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace,
trace_size -= sizeof(*frag_trace);
break;
}
#endif
case MALI_CS_OPCODE_RUN_FULLSCREEN: {
struct cs_run_fullscreen_trace *fs_trace = trace_data;

View file

@ -61,6 +61,9 @@
#elif (PAN_ARCH == 13)
#define GENX(X) X##_v13
#include "genxml/v13_pack.h"
#elif (PAN_ARCH == 14)
#define GENX(X) X##_v14
#include "genxml/v14_pack.h"
#else
#error "Need to add suffixing macro for this architecture"
#endif

View file

@ -3,7 +3,7 @@
# SPDX-License-Identifier: MIT
pan_packers = []
foreach packer : ['common', 'v4', 'v5', 'v6', 'v7', 'v9', 'v10', 'v12', 'v13']
foreach packer : ['common', 'v4', 'v5', 'v6', 'v7', 'v9', 'v10', 'v12', 'v13', 'v14']
pan_packers += custom_target(
packer + '_pack.h',
input : ['gen_pack.py', packer + '.xml'],
@ -20,7 +20,7 @@ idep_pan_packers = declare_dependency(
libpanfrost_decode_per_arch = []
foreach ver : ['4', '5', '6', '7', '9', '10', '12', '13']
foreach ver : ['4', '5', '6', '7', '9', '10', '12', '13', '14']
libpanfrost_decode_per_arch += static_library(
'pandecode-arch-v' + ver,
['decode.c', 'decode_jm.c', 'decode_csf.c', pan_packers],

View file

@ -1,5 +1,6 @@
<!--
Copyright (C) 2020 Collabora Ltd.
Copyright (C) 2026 Arm Ltd.
SPDX-License-Identifier: MIT
-->
@ -84,6 +85,7 @@
<enum name="Address Mode">
<value name="Flat" value="0"/>
<value name="Packed" value="1"/>
<value name="Out of bounds" value="8"/>
</enum>
<enum name="Format">
@ -132,6 +134,7 @@
<value name="A2 YUV10" value="41"/>
<value name="YUYAAYVYAA" value="42"/>
<!--- TODO: revisit YUV -->
<value name="Y10U10V10_420" value="43"/>
<value name="YUYV10" value="44"/>
<value name="VYUY10" value="45"/>
<value name="Y10 UV10 422" value="46"/>
@ -1163,6 +1166,13 @@
<enum name="Clump Ordering">
<value name="Tiled U-Interleaved" value="1"/>
<value name="Linear" value="2"/>
<!-- Block-linear interleaved clump orderings are not available on
all v10 architectures. -->
<value name="Block-linear interleaved 16x16" value="3"/>
<value name="Block-linear interleaved 8x16" value="4"/>
<value name="Block-linear interleaved 8x8" value="5"/>
<value name="Interleaved 64k" value="8"/>
</enum>

View file

@ -1,5 +1,6 @@
<!--
Copyright (C) 2025 Collabora Ltd.
Copyright (C) 2026 Arm Ltd.
SPDX-License-Identifier: MIT
-->
@ -84,6 +85,7 @@
<enum name="Address Mode">
<value name="Flat" value="0"/>
<value name="Packed" value="1"/>
<value name="Out of bounds" value="8"/>
</enum>
<enum name="Format">
@ -132,6 +134,7 @@
<value name="A2 YUV10" value="41"/>
<value name="YUYAAYVYAA" value="42"/>
<!--- TODO: revisit YUV -->
<value name="Y10U10V10_420" value="43"/>
<value name="YUYV10" value="44"/>
<value name="VYUY10" value="45"/>
<value name="Y10 UV10 422" value="46"/>
@ -1426,6 +1429,9 @@
<enum name="Clump Ordering">
<value name="Tiled U-Interleaved" value="1"/>
<value name="Linear" value="2"/>
<value name="Block-linear interleaved 16x16" value="3"/>
<value name="Block-linear interleaved 8x16" value="4"/>
<value name="Block-linear interleaved 8x8" value="5"/>
<value name="Interleaved 64k" value="8"/>
</enum>

View file

@ -1,5 +1,6 @@
<!--
Copyright (C) 2025 Collabora Ltd.
Copyright (C) 2026 Arm Ltd.
SPDX-License-Identifier: MIT
-->
@ -84,6 +85,7 @@
<enum name="Address Mode">
<value name="Flat" value="0"/>
<value name="Packed" value="1"/>
<value name="Out of bounds" value="8"/>
</enum>
<enum name="Format">
@ -132,6 +134,7 @@
<value name="A2 YUV10" value="41"/>
<value name="YUYAAYVYAA" value="42"/>
<!--- TODO: revisit YUV -->
<value name="Y10U10V10_420" value="43"/>
<value name="YUYV10" value="44"/>
<value name="VYUY10" value="45"/>
<value name="Y10 UV10 422" value="46"/>
@ -1728,6 +1731,9 @@
<enum name="Clump Ordering">
<value name="Tiled U-Interleaved" value="1"/>
<value name="Linear" value="2"/>
<value name="Block-linear interleaved 16x16" value="3"/>
<value name="Block-linear interleaved 8x16" value="4"/>
<value name="Block-linear interleaved 8x8" value="5"/>
<value name="Interleaved 64k" value="8"/>
</enum>

2755
src/panfrost/genxml/v14.xml Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,6 @@
<!--
Copyright (C) 2020 Collabora Ltd.
Copyright (C) 2026 Arm Ltd.
SPDX-License-Identifier: MIT
-->
@ -103,6 +104,7 @@
<enum name="Address Mode">
<value name="Flat" value="0"/>
<value name="Packed" value="1"/>
<value name="Out of bounds" value="8"/>
</enum>
<enum name="Format">

View file

@ -4,7 +4,7 @@
subdir('kmod')
pixel_format_versions = ['5', '6', '7', '9', '10', '12', '13']
pixel_format_versions = ['5', '6', '7', '9', '10', '12', '13', '14']
libpanfrost_pixel_format = []
deps_for_libpanfrost = [dep_libdrm, idep_pan_packers, idep_mesautil, libpanfrost_model_dep]
@ -22,7 +22,7 @@ endforeach
libpanfrost_per_arch = []
foreach ver : ['4', '5', '6', '7', '9', '10', '12', '13']
foreach ver : ['4', '5', '6', '7', '9', '10', '12', '13', '14']
libpanfrost_per_arch += static_library(
'pan-arch-v' + ver,
[

View file

@ -3,6 +3,7 @@
* Copyright (C) 2014 Broadcom
* Copyright (C) 2018-2019 Alyssa Rosenzweig
* Copyright (C) 2019-2020 Collabora, Ltd.
* Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
@ -711,6 +712,32 @@ pan_afbc_compression_mode(enum pan_afbc_mode mode)
case PAN_AFBC_MODE_R16G16B16A16:
return MALI_AFBC_COMPRESSION_MODE_R16G16B16A16;
#endif
#if PAN_ARCH >= 14
case PAN_AFBC_MODE_YUV420_6C8:
return MALI_AFBC_COMPRESSION_MODE_Y8U8V8_420;
case PAN_AFBC_MODE_YUV420_2C8:
return MALI_AFBC_COMPRESSION_MODE_R8G8;
case PAN_AFBC_MODE_YUV420_1C8:
return MALI_AFBC_COMPRESSION_MODE_R8;
case PAN_AFBC_MODE_YUV420_6C10:
return MALI_AFBC_COMPRESSION_MODE_Y10U10V10_420;
case PAN_AFBC_MODE_YUV420_2C10:
return MALI_AFBC_COMPRESSION_MODE_R10G10;
case PAN_AFBC_MODE_YUV420_1C10:
return MALI_AFBC_COMPRESSION_MODE_R10;
case PAN_AFBC_MODE_YUV422_4C8:
return MALI_AFBC_COMPRESSION_MODE_Y8U8Y8V8_422;
case PAN_AFBC_MODE_YUV422_2C8:
return MALI_AFBC_COMPRESSION_MODE_R8G8;
case PAN_AFBC_MODE_YUV422_1C8:
return MALI_AFBC_COMPRESSION_MODE_R8;
case PAN_AFBC_MODE_YUV422_4C10:
return MALI_AFBC_COMPRESSION_MODE_Y10U10Y10V10_422;
case PAN_AFBC_MODE_YUV422_2C10:
return MALI_AFBC_COMPRESSION_MODE_R10G10;
case PAN_AFBC_MODE_YUV422_1C10:
return MALI_AFBC_COMPRESSION_MODE_R10;
#else
case PAN_AFBC_MODE_YUV420_6C8:
return MALI_AFBC_COMPRESSION_MODE_YUV420_6C8;
case PAN_AFBC_MODE_YUV420_2C8:
@ -735,6 +762,7 @@ pan_afbc_compression_mode(enum pan_afbc_mode mode)
return MALI_AFBC_COMPRESSION_MODE_YUV422_2C10;
case PAN_AFBC_MODE_YUV422_1C10:
return MALI_AFBC_COMPRESSION_MODE_YUV422_1C10;
#endif /* PAN_ARCH >= 14 */
#if PAN_ARCH == 9
case PAN_AFBC_MODE_R16:
case PAN_AFBC_MODE_R16G16:

View file

@ -1,5 +1,6 @@
/*
* Copyright (C) 2023 Collabora, Ltd.
* Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
@ -347,6 +348,25 @@ pan_afrc_format(struct pan_afrc_format_info info, uint64_t modifier,
return (scan ? MALI_AFRC_FORMAT_R10G10B10A10_SCAN
: MALI_AFRC_FORMAT_R10G10B10A10_ROT);
#if PAN_ARCH >= 14
case PAN_AFRC_ICHANGE_FORMAT_YUV444:
case PAN_AFRC_ICHANGE_FORMAT_YUV422:
case PAN_AFRC_ICHANGE_FORMAT_YUV420:
if (info.bpc == 8) {
if (plane == 0 || info.num_planes == 3)
return (scan ? MALI_AFRC_FORMAT_R8_SCAN : MALI_AFRC_FORMAT_R8_ROT);
return (scan ? MALI_AFRC_FORMAT_R8G8_SCAN : MALI_AFRC_FORMAT_R8G8_ROT);
}
if (plane == 0 || info.num_planes == 3)
return (scan ? MALI_AFRC_FORMAT_R10_SCAN : MALI_AFRC_FORMAT_R10_ROT);
assert(info.ichange_fmt == PAN_AFRC_ICHANGE_FORMAT_YUV422 ||
info.ichange_fmt == PAN_AFRC_ICHANGE_FORMAT_YUV420);
return (scan ? MALI_AFRC_FORMAT_R10G10_SCAN
: MALI_AFRC_FORMAT_R10G10_ROT);
#else
case PAN_AFRC_ICHANGE_FORMAT_YUV444:
if (info.bpc == 8) {
if (plane == 0 || info.num_planes == 3)
@ -394,6 +414,7 @@ pan_afrc_format(struct pan_afrc_format_info info, uint64_t modifier,
return (scan ? MALI_AFRC_FORMAT_R10G10_420_SCAN
: MALI_AFRC_FORMAT_R10G10_420_ROT);
#endif /* PAN_ARCH >= 14 */
default:
return MALI_AFRC_FORMAT_INVALID;

View file

@ -1,5 +1,6 @@
/*
* Copyright (C) 2021 Collabora, Ltd.
* Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
@ -11,6 +12,7 @@
#include "pan_afrc.h"
#include "pan_desc.h"
#include "pan_encoder.h"
#include "pan_fb.h"
#include "pan_props.h"
#include "pan_texture.h"
#include "pan_trace.h"
@ -1172,11 +1174,156 @@ check_fb_attachments(const struct pan_fb_info *fb)
#endif
}
#if PAN_ARCH >= 14
unsigned
GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
const struct pan_tls_info *tls,
const struct pan_tiler_context *tiler_ctx, void *out)
const struct pan_tiler_context *tiler_ctx,
const struct pan_ptr framebuffer)
{
void *out = framebuffer.cpu;
PAN_TRACE_FUNC(PAN_TRACE_LIB_DESC);
check_fb_attachments(fb);
const int crc_rt = GENX(pan_select_crc_rt)(fb, fb->tile_size);
const bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0);
const struct pan_clean_tile clean_tile = pan_get_clean_tile_info(fb);
/* Emit to memory the state that might change per-layer. The static
* state is emitted directly to CSF registers by
* cs_emit_static_fragment_state().
*/
struct pan_fbd_layer fbd_data = {0};
fbd_data.tiler = tiler_ctx->valhall.desc;
/* internal_layer_index in flags0 is used to select the right
* primitive list in the tiler context, and frame_arg is the value
* that's passed to the fragment shader through r62-r63, which we use
* to pass gl_Layer. Since the layer_idx only takes 8-bits, we might
* use the extra 56-bits we have in frame_argument to pass other
* information to the fragment shader at some point.
*/
assert(layer_idx >= tiler_ctx->valhall.layer_offset);
fbd_data.frame_argument = layer_idx;
pan_pack(&fbd_data.flags0, FRAGMENT_FLAGS_0, cfg) {
cfg.pre_frame_0 =
pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0],
pan_clean_tile_write_any_set(clean_tile));
cfg.pre_frame_1 =
pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1],
pan_clean_tile_write_any_set(clean_tile));
cfg.post_frame = fb->bifrost.pre_post.modes[2];
const unsigned zs_bytes_per_pixel = pan_zsbuf_bytes_per_pixel(fb);
/* We can interleave HSR if we have space for two ZS tiles in
* the tile buffer. */
const unsigned max_zs_tile_size_interleave =
fb->z_tile_buf_budget >> util_logbase2_ceil(zs_bytes_per_pixel);
const bool hsr_can_interleave =
fb->tile_size <= max_zs_tile_size_interleave;
/* Enabling prepass without interleave is generally not good for
* performance, so disable HSR in that case. */
cfg.hsr_prepass_enable = fb->allow_hsr_prepass && hsr_can_interleave;
cfg.hsr_prepass_interleaving_enable = hsr_can_interleave;
cfg.hsr_prepass_filter_enable = true;
cfg.hsr_hierarchical_optimizations_enable = true;
cfg.internal_layer_index = layer_idx - tiler_ctx->valhall.layer_offset;
}
fbd_data.dcd_pointer = fb->bifrost.pre_post.dcds.gpu;
pan_pack(&fbd_data.flags2, FRAGMENT_FLAGS_2, cfg) {
cfg.s_clear = fb->zs.clear_value.stencil;
cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
/* Default to 24 bit depth if there's no surface. */
cfg.z_internal_format =
fb->zs.view.zs ? pan_get_z_internal_format(fb->zs.view.zs->format)
: MALI_Z_INTERNAL_FORMAT_D24;
cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z);
if (crc_rt >= 0) {
bool *valid = fb->rts[crc_rt].crc_valid;
bool full = !fb->draw_extent.minx && !fb->draw_extent.miny &&
fb->draw_extent.maxx == (fb->width - 1) &&
fb->draw_extent.maxy == (fb->height - 1);
/* If the CRC was valid it stays valid, if it wasn't, we must
* ensure the render operation covers the full frame, and
* clean tiles are pushed to memory. */
bool new_valid = *valid | (full && pan_clean_tile_write_rt_enabled(
clean_tile, crc_rt));
cfg.crc_read_enable = *valid;
/* If the data is currently invalid, still write CRC
* data if we are doing a full write, so that it is
* valid for next time. */
cfg.crc_write_enable = new_valid;
*valid = new_valid;
}
}
fbd_data.z_clear = util_bitpack_float(fb->zs.clear_value.depth);
{
/* Set the DBD and RTD pointers. Both must be 64-bytes aligned. */
uint64_t out_gpu_addr =
framebuffer.gpu + ALIGN_POT(sizeof(struct pan_fbd_layer), 64);
if (has_zs_crc_ext) {
fbd_data.dbd_pointer = out_gpu_addr;
assert(fbd_data.dbd_pointer % 64 == 0);
out_gpu_addr += pan_size(ZS_CRC_EXTENSION);
}
fbd_data.rtd_pointer = out_gpu_addr;
assert(fbd_data.rtd_pointer % 64 == 0);
}
memcpy(out, &fbd_data, sizeof(fbd_data));
out += ALIGN_POT(sizeof(fbd_data), 64);
if (has_zs_crc_ext) {
struct mali_zs_crc_extension_packed *zs_crc_ext = out;
pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, zs_crc_ext, clean_tile);
out += pan_size(ZS_CRC_EXTENSION);
}
const unsigned rt_count = MAX2(fb->rt_count, 1);
unsigned cbuf_offset = 0;
for (unsigned i = 0; i < rt_count; i++) {
pan_emit_rt(fb, layer_idx, i, cbuf_offset, out, clean_tile);
out += pan_size(RENDER_TARGET);
if (!fb->rts[i].view)
continue;
cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
fb->tile_size *
pan_image_view_get_nr_samples(fb->rts[i].view);
if (i != crc_rt && fb->rts[i].crc_valid != NULL)
*(fb->rts[i].crc_valid) = false;
}
return 0;
}
#else
unsigned
GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
const struct pan_tls_info *tls,
const struct pan_tiler_context *tiler_ctx,
const struct pan_ptr framebuffer)
{
void *out = framebuffer.cpu;
PAN_TRACE_FUNC(PAN_TRACE_LIB_DESC);
check_fb_attachments(fb);
@ -1351,6 +1498,7 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
}
return tag.opaque[0];
}
#endif /* PAN_ARCH >= 14 */
#else /* PAN_ARCH == 4 */
static enum mali_color_format
pan_sfbd_raw_format(unsigned bits)
@ -1378,8 +1526,11 @@ GENX(pan_select_tile_size)(struct pan_fb_info *fb)
unsigned
GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
const struct pan_tls_info *tls,
const struct pan_tiler_context *tiler_ctx, void *fbd)
const struct pan_tiler_context *tiler_ctx,
const struct pan_ptr framebuffer)
{
void *fbd = framebuffer.cpu;
PAN_TRACE_FUNC(PAN_TRACE_LIB_DESC);
assert(fb->rt_count <= 1);

View file

@ -341,7 +341,7 @@ void GENX(pan_emit_afrc_color_attachment)(const struct pan_attachment_info *att,
unsigned GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
const struct pan_tls_info *tls,
const struct pan_tiler_context *tiler_ctx,
void *out);
const struct pan_ptr framebuffer);
#if PAN_ARCH >= 6
unsigned GENX(pan_select_tiler_hierarchy_mask)(uint32_t width, uint32_t height,

View file

@ -1,5 +1,6 @@
/*
* Copyright (C) 2026 Collabora, Ltd.
* Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
#include "pan_fb.h"
@ -669,9 +670,124 @@ pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,
}
#endif
#if PAN_ARCH >= 14
uint32_t
GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info, void *out)
GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info,
const struct pan_ptr framebuffer)
{
/* Emit the dynamic framebuffer state. That is, state that may change per-layer. */
void *out = framebuffer.cpu;
const struct pan_fb_layout *fb = info->fb;
const struct pan_fb_load *load = info->load;
const struct pan_fb_store *store = info->store;
const struct pan_fb_clean_tile ct = pan_fb_get_clean_tile(info);
const bool has_zs_crc_ext = pan_fb_has_zs(fb);
struct pan_fbd_layer fbd_data = {0};
fbd_data.tiler = info->tiler_ctx->valhall.desc;
/* layer_index in flags0 is used to select the right primitive list in
* the tiler context, and frame_arg is the value that's passed to the
* fragment shader through r62-r63, which we use to pass gl_Layer. Since
* the layer_idx only takes 8-bits, we might use the extra 56-bits we
* have in frame_argument to pass other information to the fragment
* shader at some point.
*/
assert(info->layer >= info->tiler_ctx->valhall.layer_offset);
fbd_data.frame_argument = info->layer;
pan_pack(&fbd_data.flags0, FRAGMENT_FLAGS_0, cfg) {
cfg.pre_frame_0 = pan_fix_frame_shader_mode(info->frame_shaders.modes[0],
ct.rts || ct.zs || ct.s);
cfg.pre_frame_1 = pan_fix_frame_shader_mode(info->frame_shaders.modes[1],
ct.rts || ct.zs || ct.s);
cfg.post_frame = info->frame_shaders.modes[2];
/* Enabling prepass without pipelineing is generally not good for
* performance, so disable HSR in that case.
*/
cfg.hsr_prepass_enable = info->allow_hsr_prepass &&
pan_fb_can_pipeline_zs(fb);
cfg.hsr_prepass_interleaving_enable = pan_fb_can_pipeline_zs(fb);
cfg.hsr_prepass_filter_enable = true;
cfg.hsr_hierarchical_optimizations_enable = true;
cfg.internal_layer_index =
info->layer - info->tiler_ctx->valhall.layer_offset;
}
pan_pack(&fbd_data.flags2, FRAGMENT_FLAGS_2, cfg) {
if (fb->s_format != PIPE_FORMAT_NONE) {
cfg.s_clear = load && target_has_clear(&load->s) ?
load->s.clear.stencil : 0;
cfg.s_write_enable = store && store->s.store;
}
if (fb->z_format != PIPE_FORMAT_NONE) {
cfg.z_internal_format = pan_get_z_internal_format(fb->z_format);
cfg.z_write_enable = store && store->zs.store;
} else {
cfg.z_internal_format = MALI_Z_INTERNAL_FORMAT_D24;
assert(!store || !store->zs.store);
}
}
fbd_data.z_clear =
util_bitpack_float(fb->z_format != PIPE_FORMAT_NONE && load && load &&
target_has_clear(&load->z)
? load->z.clear.depth
: 0);
fbd_data.dcd_pointer = info->frame_shaders.dcd_pointer;
{
/* Set the DBD and RTD pointers. Both must be 64-bytes aligned. */
uint64_t out_gpu_addr =
framebuffer.gpu + ALIGN_POT(sizeof(struct pan_fbd_layer), 64);
if (has_zs_crc_ext) {
fbd_data.dbd_pointer = out_gpu_addr;
assert(fbd_data.dbd_pointer % 64 == 0);
out_gpu_addr += pan_size(ZS_CRC_EXTENSION);
}
fbd_data.rtd_pointer = out_gpu_addr;
assert(fbd_data.rtd_pointer % 64 == 0);
}
memcpy(out, &fbd_data, sizeof(fbd_data));
out += ALIGN_POT(sizeof(fbd_data), 64);
if (has_zs_crc_ext) {
struct mali_zs_crc_extension_packed zs_crc;
emit_zs_crc_desc(info, ct, &zs_crc);
memcpy(out, &zs_crc, sizeof(zs_crc));
out += sizeof(zs_crc);
}
uint32_t tile_rt_offset_B = 0;
for (unsigned rt = 0; rt < fb->rt_count; rt++) {
struct mali_rgb_render_target_packed rgb_rt;
emit_rgb_rt_desc(info, ct, rt, tile_rt_offset_B, &rgb_rt);
memcpy(out, &rgb_rt, sizeof(rgb_rt));
out += sizeof(rgb_rt);
if (fb->rt_formats[rt] != PIPE_FORMAT_NONE) {
tile_rt_offset_B += pan_bytes_per_pixel_tib(fb->rt_formats[rt]) *
fb->tile_size_px * fb->sample_count;
}
}
assert(tile_rt_offset_B <= fb->tile_rt_alloc_B);
return 0;
}
#else /* PAN_ARCH < 14 */
uint32_t
GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info,
const struct pan_ptr framebuffer)
{
void *out = framebuffer.cpu;
const struct pan_fb_layout *fb = info->fb;
const struct pan_fb_load *load = info->load;
const struct pan_fb_store *store = info->store;
@ -823,4 +939,5 @@ GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info, void *out)
}
return tag.opaque[0];
}
#endif
#endif /* PAN_ARCH >= 14 */
#endif /* PAN_ARCH >= 5 */

View file

@ -1,14 +1,20 @@
/*
* Copyright (C) 2026 Collabora, Ltd.
* Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
#ifndef __PAN_FB_H
#define __PAN_FB_H
#if PAN_ARCH >= 14
#include "genxml/cs_builder.h"
#endif
#include "compiler/shader_enums.h"
#include "genxml/gen_macros.h"
#include "util/format/u_formats.h"
#include "compiler/shader_enums.h"
#include "pan_pool.h"
struct nir_shader;
struct nir_shader_compiler_options;
@ -481,7 +487,7 @@ void GENX(pan_fill_fb_info)(const struct pan_fb_desc_info *info,
struct pan_fb_info *fbinfo);
uint32_t GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info,
void *out);
const struct pan_ptr framebuffer);
#endif
enum ENUM_PACKED pan_fb_shader_op {
@ -620,4 +626,35 @@ GENX(pan_get_fb_shader)(const struct pan_fb_shader_key *key,
const struct nir_shader_compiler_options *nir_options);
#endif
#if PAN_ARCH >= 14
/* Framebuffer per-layer state. Keep this structure 64-byte aligned, since
* we want the adjacent ZS_CRC_EXTENSION and RENDER_TARGET descriptors
* aligned. */
struct pan_fbd_layer {
/** GPU address to the tiler descriptor. */
uint64_t tiler;
/** Frame argument. */
uint64_t frame_argument;
/** An instance of Fragment Flags 0. */
struct mali_fragment_flags_0_packed flags0;
/** An instance of Fragment Flags 2. */
struct mali_fragment_flags_2_packed flags2;
/** Z clear value. */
uint32_t z_clear;
/** GPU address to the draw call descriptors. It may be 0. */
uint64_t dcd_pointer;
/** GPU address to the ZS_CRC_EXTENSION descriptor. It may be 0. */
uint64_t dbd_pointer;
/** GPU address to the RENDER_TARGET descriptors. */
uint64_t rtd_pointer;
} __attribute__((aligned(64)));
#endif /* PAN_ARCH >= 14 */
#endif /* __PAN_FB_H */

View file

@ -1,5 +1,6 @@
/*
* Copyright (C) 2019 Collabora, Ltd.
* Copyright (C) 2026 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
@ -184,7 +185,27 @@ const struct pan_blendable_format
const struct pan_format GENX(pan_pipe_format)[PIPE_FORMAT_COUNT] = {
FMT(NONE, CONSTANT, 0000, L, VTR_IB),
#if PAN_ARCH >= 7
#if PAN_ARCH >= 14
/* Multiplane formats */
FMT_YUV(R8G8_R8B8_UNORM, Y8U8Y8V8_422, UVYA, NO_SWAP, CENTER_422, _T____),
FMT_YUV(G8R8_B8R8_UNORM, U8Y8V8Y8_422, UYVA, SWAP, CENTER_422, _T____),
FMT_YUV(R8B8_R8G8_UNORM, Y8U8Y8V8_422, VYUA, NO_SWAP, CENTER_422, _T____),
FMT_YUV(B8R8_G8R8_UNORM, U8Y8V8Y8_422, VUYA, SWAP, CENTER_422, _T____),
FMT_YUV(R8_G8B8_420_UNORM, Y8U8V8_420, YUVA, NO_SWAP, CENTER, _T____),
FMT_YUV(R8_B8G8_420_UNORM, Y8U8V8_420, YVUA, NO_SWAP, CENTER, _T____),
FMT_YUV(R8_G8_B8_420_UNORM, Y8U8V8_420, YUVA, NO_SWAP, CENTER, _T____),
FMT_YUV(R8_B8_G8_420_UNORM, Y8U8V8_420, YVUA, NO_SWAP, CENTER, _T____),
FMT_YUV(R8_G8B8_422_UNORM, Y8U8Y8V8_422, YUVA, NO_SWAP, CENTER_422, _T____),
FMT_YUV(R8_B8G8_422_UNORM, U8Y8V8Y8_422, YVUA, NO_SWAP, CENTER_422, _T____),
FMT_YUV(R10_G10B10_420_UNORM, YUYAAYVYAA_420, YUVA, NO_SWAP, CENTER, _T____),
FMT_YUV(R10_G10B10_422_UNORM, Y10X6U10X6Y10X6V10X6_422, YUVA, NO_SWAP, CENTER_422, _T____),
/* special internal formats */
FMT_YUV(R8G8B8_420_UNORM_PACKED, Y8U8V8_420, YUVA, NO_SWAP, CENTER, _T____),
FMT_YUV(R10G10B10_420_UNORM_PACKED, Y10U10V10_420, YUVA, NO_SWAP, CENTER, _T____),
FMT_YUV(X6R10X6G10_X6R10X6B10_422_UNORM, Y10X6U10X6Y10X6V10X6_422, UVYA, NO_SWAP, CENTER_422, _T____),
#elif PAN_ARCH >= 7
/* Multiplane formats */
FMT_YUV(R8G8_R8B8_UNORM, YUYV8, UVYA, NO_SWAP, CENTER_422, _T____),
FMT_YUV(G8R8_B8R8_UNORM, VYUY8, UYVA, SWAP, CENTER_422, _T____),

View file

@ -168,6 +168,8 @@ extern const struct pan_blendable_format
pan_blendable_formats_v12[PIPE_FORMAT_COUNT];
extern const struct pan_blendable_format
pan_blendable_formats_v13[PIPE_FORMAT_COUNT];
extern const struct pan_blendable_format
pan_blendable_formats_v14[PIPE_FORMAT_COUNT];
uint8_t pan_raw_format_mask_midgard(enum pipe_format *formats);
@ -184,6 +186,7 @@ pan_blendable_format_table(unsigned arch)
FMT_TABLE(10);
FMT_TABLE(12);
FMT_TABLE(13);
FMT_TABLE(14);
#undef FMT_TABLE
default:
assert(!"Unsupported architecture");
@ -199,6 +202,7 @@ extern const struct pan_format pan_pipe_format_v9[PIPE_FORMAT_COUNT];
extern const struct pan_format pan_pipe_format_v10[PIPE_FORMAT_COUNT];
extern const struct pan_format pan_pipe_format_v12[PIPE_FORMAT_COUNT];
extern const struct pan_format pan_pipe_format_v13[PIPE_FORMAT_COUNT];
extern const struct pan_format pan_pipe_format_v14[PIPE_FORMAT_COUNT];
static inline const struct pan_format *
pan_format_table(unsigned arch)
@ -213,6 +217,7 @@ pan_format_table(unsigned arch)
FMT_TABLE(10);
FMT_TABLE(12);
FMT_TABLE(13);
FMT_TABLE(14);
#undef FMT_TABLE
default:
assert(!"Unsupported architecture");

View file

@ -84,6 +84,7 @@ const struct pan_mod_handler *pan_mod_get_handler_v9(uint64_t modifier);
const struct pan_mod_handler *pan_mod_get_handler_v10(uint64_t modifier);
const struct pan_mod_handler *pan_mod_get_handler_v12(uint64_t modifier);
const struct pan_mod_handler *pan_mod_get_handler_v13(uint64_t modifier);
const struct pan_mod_handler *pan_mod_get_handler_v14(uint64_t modifier);
static inline const struct pan_mod_handler *
pan_mod_get_handler(unsigned arch, uint64_t modifier)
@ -105,6 +106,8 @@ pan_mod_get_handler(unsigned arch, uint64_t modifier)
return pan_mod_get_handler_v12(modifier);
case 13:
return pan_mod_get_handler_v13(modifier);
case 14:
return pan_mod_get_handler_v14(modifier);
default:
UNREACHABLE("Unsupported arch");
}

View file

@ -223,6 +223,25 @@ pan_clump_format(enum pipe_format format)
/* YUV-sampling has special cases */
if (pan_format_is_yuv(format)) {
switch (format) {
#if PAN_ARCH >= 14
case PIPE_FORMAT_R8G8_R8B8_UNORM:
case PIPE_FORMAT_G8R8_B8R8_UNORM:
case PIPE_FORMAT_R8B8_R8G8_UNORM:
case PIPE_FORMAT_B8R8_G8R8_UNORM:
case PIPE_FORMAT_R8_G8B8_422_UNORM:
case PIPE_FORMAT_R8_B8G8_422_UNORM:
case PIPE_FORMAT_R8_G8B8_420_UNORM:
case PIPE_FORMAT_R8_B8G8_420_UNORM:
case PIPE_FORMAT_R8_G8_B8_420_UNORM:
case PIPE_FORMAT_R8_B8_G8_420_UNORM:
case PIPE_FORMAT_R8G8B8_420_UNORM_PACKED:
return MALI_CLUMP_FORMAT_RAW8;
case PIPE_FORMAT_R10_G10B10_420_UNORM:
case PIPE_FORMAT_R10G10B10_420_UNORM_PACKED:
case PIPE_FORMAT_R10_G10B10_422_UNORM:
case PIPE_FORMAT_X6R10X6G10_X6R10X6B10_422_UNORM:
return MALI_CLUMP_FORMAT_R10_PACKED;
#else
case PIPE_FORMAT_R8G8_R8B8_UNORM:
case PIPE_FORMAT_G8R8_B8R8_UNORM:
case PIPE_FORMAT_R8B8_R8G8_UNORM:
@ -242,6 +261,7 @@ pan_clump_format(enum pipe_format format)
case PIPE_FORMAT_R10_G10B10_422_UNORM:
case PIPE_FORMAT_X6R10X6G10_X6R10X6B10_422_UNORM:
return MALI_CLUMP_FORMAT_Y10_UV10_422;
#endif /* PAN_ARCH >= 14 */
default:
UNREACHABLE("unhandled clump format");
}

View file

@ -28,6 +28,8 @@
#include "libpan_v12.h"
#elif (PAN_ARCH == 13)
#include "libpan_v13.h"
#elif (PAN_ARCH == 14)
#include "libpan_v14.h"
#else
#error "Unsupported architecture for libpan"
#endif

View file

@ -26,6 +26,8 @@
#include "libpan_shaders_v12.h"
#elif (PAN_ARCH == 13)
#include "libpan_shaders_v13.h"
#elif (PAN_ARCH == 14)
#include "libpan_shaders_v14.h"
#else
#error "Unsupported architecture for libpan"
#endif

View file

@ -11,7 +11,7 @@ libpan_shader_files = files(
idep_libpan_per_arch = {}
foreach ver : ['4', '5', '6', '7', '9', '10', '12', '13']
foreach ver : ['4', '5', '6', '7', '9', '10', '12', '13', '14']
libpan_spv = custom_target(
input : libpan_shader_files,
output : 'libpan_v' + ver + '.spv',

View file

@ -95,6 +95,10 @@ const struct pan_model pan_model_list[] = {
MODEL_RATES(4, 8, 128)),
FIFTHGEN_MODEL(PAN_PROD_ID(13, 8, 0), 4, "G725", "TKRx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 65536),
MODEL_RATES(4, 8, 128)),
FIFTHGEN_MODEL(PAN_PROD_ID(14, 8, 3), 1, "G1-Pro", "TDRx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 65536),
MODEL_RATES(4, 8, 64)),
FIFTHGEN_MODEL(PAN_PROD_ID(14, 8, 3), 4, "G1-Pro", "TDRx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 65536),
MODEL_RATES(4, 8, 128)),
};
/* clang-format on */

View file

@ -74,7 +74,11 @@ static inline uint32_t
get_fbd_size(bool has_zs_ext, uint32_t rt_count)
{
assert(rt_count >= 1 && rt_count <= MAX_RTS);
#if PAN_ARCH >= 14
uint32_t fbd_size = ALIGN_POT(sizeof(struct pan_fbd_layer), 64);
#else
uint32_t fbd_size = pan_size(FRAMEBUFFER);
#endif
if (has_zs_ext)
fbd_size += pan_size(ZS_CRC_EXTENSION);
fbd_size += pan_size(RENDER_TARGET) * rt_count;
@ -209,13 +213,25 @@ enum panvk_cs_regs {
PANVK_CS_REG_RUN_IDVS_SR_END = 60,
#endif
#if PAN_ARCH >= 14
/* RUN_FRAGMENT2 staging regs.
* SW ABI:
* - r58:59 contain the pointer to the first tiler descriptor. This is
* needed to gather completed heap chunks after a run_fragment2.
*/
PANVK_CS_REG_RUN_FRAGMENT_SR_START = 0,
PANVK_CS_REG_RUN_FRAGMENT_SR_END = 55,
PANVK_CS_REG_TILER_DESC_PTR = 58,
#else
/* RUN_FRAGMENT staging regs.
* SW ABI:
* - r38:39 contain the pointer to the first tiler descriptor. This is
* - r58:59 contain the pointer to the first tiler descriptor. This is
* needed to gather completed heap chunks after a run_fragment.
*/
PANVK_CS_REG_RUN_FRAGMENT_SR_START = 38,
PANVK_CS_REG_RUN_FRAGMENT_SR_END = 46,
PANVK_CS_REG_TILER_DESC_PTR = 58,
#endif
/* RUN_COMPUTE staging regs. */
PANVK_CS_REG_RUN_COMPUTE_SR_START = 0,
@ -870,4 +886,31 @@ vk_stages_to_subqueue_mask(VkPipelineStageFlags2 vk_stages,
void panvk_per_arch(emit_barrier)(struct panvk_cmd_buffer *cmdbuf,
struct panvk_cs_deps deps);
#if PAN_ARCH >= 14
static inline void
cs_emit_layer_fragment_state(struct cs_builder *b, struct cs_index fbd_ptr)
{
/* Emit the dynamic fragment state. This state may change per-layer. */
cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_0), fbd_ptr,
offsetof(struct pan_fbd_layer, flags0));
cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_2), fbd_ptr,
offsetof(struct pan_fbd_layer, flags2));
cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, Z_CLEAR), fbd_ptr,
offsetof(struct pan_fbd_layer, z_clear));
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, TILER_DESCRIPTOR_POINTER), fbd_ptr,
offsetof(struct pan_fbd_layer, tiler));
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, RTD_POINTER), fbd_ptr,
offsetof(struct pan_fbd_layer, rtd_pointer));
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, DBD_POINTER), fbd_ptr,
offsetof(struct pan_fbd_layer, dbd_pointer));
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, FRAME_ARG), fbd_ptr,
offsetof(struct pan_fbd_layer, frame_argument));
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, FRAME_SHADER_DCD_POINTER), fbd_ptr,
offsetof(struct pan_fbd_layer, dcd_pointer));
cs_flush_loads(b);
}
#endif /* PAN_ARCH >= 14 */
#endif /* PANVK_CMD_BUFFER_H */

View file

@ -51,6 +51,7 @@
#include "vk_render_pass.h"
#include "poly/geometry.h"
#if PAN_ARCH < 14
static enum cs_reg_perm
provoking_vertex_fn_reg_perm_cb(struct cs_builder *b, unsigned reg)
{
@ -202,6 +203,7 @@ panvk_per_arch(device_draw_context_cleanup)(struct panvk_device *dev)
panvk_priv_bo_unref(dev->draw_ctx->fns_bo);
vk_free(&dev->vk.alloc, dev->draw_ctx);
}
#endif /* PAN_ARCH < 14 */
static void
emit_vs_attrib(struct panvk_cmd_buffer *cmdbuf,
@ -1245,8 +1247,13 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
uint32_t fbd_sz = calc_fbd_size(cmdbuf);
uint32_t fbds_sz = enabled_layer_count * fbd_sz;
cmdbuf->state.gfx.render.fbds = panvk_cmd_alloc_dev_mem(
cmdbuf, desc, fbds_sz, pan_alignment(FRAMEBUFFER));
#if PAN_ARCH >= 14
const unsigned fbds_alignment = alignof(struct pan_fbd_layer);
#else
const unsigned fbds_alignment = pan_alignment(FRAMEBUFFER);
#endif
cmdbuf->state.gfx.render.fbds =
panvk_cmd_alloc_dev_mem(cmdbuf, desc, fbds_sz, fbds_alignment);
if (!cmdbuf->state.gfx.render.fbds.gpu)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
@ -1316,14 +1323,23 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
tiler_ctx = get_tiler_context(cmdbuf, layer_idx);
uint32_t new_fbd_flags =
GENX(pan_emit_fb_desc)(&fbd_info, fbds.cpu + fbd_sz * i);
GENX(pan_emit_fb_desc)(&fbd_info, pan_ptr_offset(fbds, fbd_sz * i));
/* Make sure all FBDs have the same flags. */
assert(i == 0 || new_fbd_flags == fbd_flags);
fbd_flags = new_fbd_flags;
}
#if PAN_ARCH >= 14
/* fbd_flags is unused on v14+. */
assert(!fbd_flags);
#endif
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
#if PAN_ARCH >= 14
// TODO: Implement IR support for v14.
#else
for (uint32_t ir_pass = 0; ir_pass < PANVK_IR_PASS_COUNT; ir_pass++) {
struct pan_ptr ir_fbds = panvk_cmd_alloc_dev_mem(
cmdbuf, desc, fbds_sz, pan_alignment(FRAMEBUFFER));
@ -1335,7 +1351,6 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
for (uint32_t i = 0; i < enabled_layer_count; i++) {
uint32_t layer_idx = multiview ? u_bit_scan(&ir_view_mask_temp) : i;
void *ir_fbd = (void *)((uint8_t *)ir_fbds.cpu + (i * fbd_sz));
fbd_info.layer = layer_idx;
tiler_ctx = get_tiler_context(cmdbuf, layer_idx);
@ -1353,8 +1368,8 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
if (result != VK_SUCCESS)
return result;
ASSERTED uint32_t new_fbd_flags =
GENX(pan_emit_fb_desc)(&fbd_info, ir_fbd);
ASSERTED uint32_t new_fbd_flags = GENX(pan_emit_fb_desc)(
&fbd_info, pan_ptr_offset(ir_fbds, fbd_sz * i));
/* Make sure all FBDs have the same flags. */
assert(new_fbd_flags == fbd_flags);
@ -1367,16 +1382,14 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
/* Wait for IR info push to complete */
cs_wait_slot(b, SB_ID(LS));
bool unset_provoking_vertex =
cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET;
#endif /* PAN_ARCH >= 14 */
if (copy_fbds) {
struct cs_index cur_tiler = cs_reg64(b, 38);
struct cs_index cur_tiler = cs_reg64(b, PANVK_CS_REG_TILER_DESC_PTR);
struct cs_index dst_fbd_ptr = cs_sr_reg64(b, FRAGMENT, FBD_POINTER);
struct cs_index fbd_idx = cs_reg32(b, 47);
struct cs_index src_fbd_ptr = cs_reg64(b, 48);
struct cs_index remaining_layers_in_td = cs_reg32(b, 50);
struct cs_index fbd_idx = cs_reg32(b, 60);
struct cs_index src_fbd_ptr = cs_reg64(b, 64);
struct cs_index remaining_layers_in_td = cs_reg32(b, 61);
uint32_t td_count = DIV_ROUND_UP(cmdbuf->state.gfx.render.layer_count,
MAX_LAYERS_PER_TILER_DESC);
@ -1400,10 +1413,27 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
* framebuffer size is aligned on 64-bytes. */
assert(fbd_sz == ALIGN_POT(fbd_sz, 64));
#if PAN_ARCH >= 14
for (uint32_t fbd_off = 0; fbd_off < fbd_sz; fbd_off += 64) {
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 16), src_fbd_ptr,
BITFIELD_MASK(16), fbd_off);
/* Patch the Tiler pointer. */
if (fbd_off == 0)
cs_add64(b, cs_scratch_reg64(b, 0), cur_tiler, 0);
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), dst_fbd_ptr,
BITFIELD_MASK(16), fbd_off);
}
#else
bool unset_provoking_vertex =
cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET;
for (uint32_t fbd_off = 0; fbd_off < fbd_sz; fbd_off += 64) {
if (fbd_off == 0) {
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 14), src_fbd_ptr,
BITFIELD_MASK(14), fbd_off);
/* Patch the Tiler pointer. */
cs_add64(b, cs_scratch_reg64(b, 14), cur_tiler, 0);
/* If we don't know what provoking vertex mode the
@ -1423,6 +1453,7 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), dst_fbd_ptr,
BITFIELD_MASK(16), fbd_off);
}
#endif
/* Finish stores to pass_dst_fbd_ptr. */
cs_flush_stores(b);
@ -1459,9 +1490,11 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
cs_update_frag_ctx(b) {
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
fbds.gpu | fbd_flags);
cs_move64_to(b, cs_reg64(b, 38), cmdbuf->state.gfx.render.tiler);
cs_move64_to(b, cs_reg64(b, PANVK_CS_REG_TILER_DESC_PTR),
cmdbuf->state.gfx.render.tiler);
}
#if PAN_ARCH < 14
/* If we don't know what provoking vertex mode the application wants yet,
* leave space to patch it later */
if (cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET) {
@ -1483,6 +1516,7 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
cs_maybe(b, &cmdbuf->state.gfx.render.maybe_set_fbds_provoking_vertex)
cs_call(b, addr_reg, length_reg);
}
#endif
}
return VK_SUCCESS;
@ -3299,6 +3333,9 @@ calc_tiler_oom_handler_idx(struct panvk_cmd_buffer *cmdbuf)
static void
setup_tiler_oom_ctx(struct panvk_cmd_buffer *cmdbuf)
{
#if PAN_ARCH >= 14
// TODO: Implement IR support for v14.
#else
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
const struct pan_fb_layout *fb = &cmdbuf->state.gfx.render.fb.layout;
const bool has_zs_ext = pan_fb_has_zs(fb);
@ -3343,6 +3380,7 @@ setup_tiler_oom_ctx(struct panvk_cmd_buffer *cmdbuf)
TILER_OOM_CTX_FIELD_OFFSET(layer_count));
cs_flush_stores(b);
#endif /* PAN_ARCH >= 14 */
}
static uint32_t
@ -3351,24 +3389,106 @@ pack_32_2x16(uint16_t lo, uint16_t hi)
return (((uint32_t)hi) << 16) | (uint32_t)lo;
}
#if PAN_ARCH >= 14
static void
cs_emit_static_fragment_state(struct cs_builder *b,
struct panvk_cmd_buffer *cmdbuf)
{
/* Emit the static fragment staging registers. These don't change per-layer. */
const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
const struct panvk_rendering_state *render = &cmdbuf->state.gfx.render;
const struct pan_fb_layout *fb = &render->fb.layout;
const uint8_t sample_count = render->fb.layout.sample_count;
const struct pan_fb_bbox fb_area_px =
pan_fb_bbox_from_xywh(0, 0, fb->width_px, fb->height_px);
const struct pan_fb_bbox bbox_px =
pan_fb_bbox_clamp(fb->tiling_area_px, fb_area_px);
assert(pan_fb_bbox_is_valid(fb->tiling_area_px));
struct mali_fragment_bounding_box_packed bbox;
pan_pack(&bbox, FRAGMENT_BOUNDING_BOX, cfg) {
cfg.bound_min_x = bbox_px.min_x;
cfg.bound_min_y = bbox_px.min_y;
cfg.bound_max_x = bbox_px.max_x;
cfg.bound_max_y = bbox_px.max_y;
}
struct mali_frame_size_packed frame_size;
pan_pack(&frame_size, FRAME_SIZE, cfg) {
cfg.width = fb->width_px;
cfg.height = fb->height_px;
}
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MIN),
bbox.opaque[0]);
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MAX),
bbox.opaque[1]);
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, FRAME_SIZE), frame_size.opaque[0]);
cs_move64_to(
b, cs_sr_reg64(b, FRAGMENT, SAMPLE_POSITION_ARRAY_POINTER),
dev->sample_positions->addr.dev +
pan_sample_positions_offset(pan_sample_pattern(sample_count)));
/* Flags 1 */
struct mali_fragment_flags_1_packed flags1;
pan_pack(&flags1, FRAGMENT_FLAGS_1, cfg) {
cfg.sample_count = fb->sample_count;
cfg.sample_pattern = pan_sample_pattern(fb->sample_count);
cfg.effective_tile_size = fb->tile_size_px;
cfg.point_sprite_coord_origin_max_y = false;
cfg.first_provoking_vertex = get_first_provoking_vertex(cmdbuf);
assert(fb->rt_count > 0);
cfg.render_target_count = fb->rt_count;
cfg.color_buffer_allocation = fb->tile_rt_alloc_B;
}
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_1), flags1.opaque[0]);
/* If we don't know what provoking vertex mode the application wants yet,
* leave space to patch it later */
if (cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET) {
cs_maybe(b, &cmdbuf->state.gfx.render.maybe_set_fbds_provoking_vertex)
{
/* provoking_vertex flag is bit 14 of Fragment Flags 1. */
cs_add32(b, cs_sr_reg32(b, FRAGMENT, FLAGS_1),
cs_sr_reg32(b, FRAGMENT, FLAGS_1), -(1 << 14));
}
}
/* Leave the remaining RUN_FRAGMENT2 staging registers as zero. */
}
#endif /* PAN_ARCH >= 14 */
static VkResult
issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
{
#if PAN_ARCH < 14
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
#endif
const struct cs_tracing_ctx *tracing_ctx =
&cmdbuf->state.cs[PANVK_SUBQUEUE_FRAGMENT].tracing;
const struct pan_fb_layout *fb = &cmdbuf->state.gfx.render.fb.layout;
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
bool has_oq_chain = cmdbuf->state.gfx.render.oq.chain != 0;
/* Now initialize the fragment bits. */
struct cs_index fbd_pointer = cs_sr_reg64(b, FRAGMENT, FBD_POINTER);
cs_update_frag_ctx(b) {
#if PAN_ARCH >= 14
cs_emit_static_fragment_state(b, cmdbuf);
cs_emit_layer_fragment_state(b, fbd_pointer);
#else
const struct pan_fb_layout *fb = &cmdbuf->state.gfx.render.fb.layout;
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MIN),
pack_32_2x16(fb->tiling_area_px.min_x,
fb->tiling_area_px.min_y));
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MAX),
pack_32_2x16(fb->tiling_area_px.max_x,
fb->tiling_area_px.max_y));
#endif
}
bool simul_use =
@ -3401,6 +3521,9 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
* state for this renderpass, so it's safe to enable. */
struct cs_index addr_reg = cs_scratch_reg64(b, 0);
struct cs_index length_reg = cs_scratch_reg32(b, 2);
#if PAN_ARCH >= 14
// TODO: Implement IR support for v14.
#else
uint32_t handler_idx = calc_tiler_oom_handler_idx(cmdbuf);
uint64_t handler_addr = dev->tiler_oom.handlers_bo->addr.dev +
handler_idx * dev->tiler_oom.handler_stride;
@ -3408,6 +3531,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
cs_move32_to(b, length_reg, dev->tiler_oom.handler_stride);
cs_set_exception_handler(b, MALI_CS_EXCEPTION_TYPE_TILER_OOM, addr_reg,
length_reg);
#endif
/* Wait for the tiling to be done before submitting the fragment job. */
wait_finish_tiling(cmdbuf);
@ -3422,8 +3546,12 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
* up. */
cs_move64_to(b, addr_reg, 0);
cs_move32_to(b, length_reg, 0);
#if PAN_ARCH >= 14
// TODO: Implement IR support for v14.
#else
cs_set_exception_handler(b, MALI_CS_EXCEPTION_TYPE_TILER_OOM, addr_reg,
length_reg);
#endif
/* Applications tend to forget to describe subpass dependencies, especially
* when it comes to write -> read dependencies on attachments. The
@ -3439,8 +3567,13 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
}
if (cmdbuf->state.gfx.render.layer_count <= 1) {
#if PAN_ARCH >= 14
cs_trace_run_fragment2(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
false, MALI_TILE_RENDER_ORDER_Z_ORDER);
#else
cs_trace_run_fragment(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
false, MALI_TILE_RENDER_ORDER_Z_ORDER);
#endif
} else {
struct cs_index run_fragment_regs = cs_scratch_reg_tuple(b, 0, 4);
struct cs_index remaining_layers = cs_scratch_reg32(b, 4);
@ -3449,12 +3582,17 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
cs_while(b, MALI_CS_CONDITION_GREATER, remaining_layers) {
cs_add32(b, remaining_layers, remaining_layers, -1);
#if PAN_ARCH >= 14
cs_emit_layer_fragment_state(b, fbd_pointer);
cs_trace_run_fragment2(b, tracing_ctx, run_fragment_regs, false,
MALI_TILE_RENDER_ORDER_Z_ORDER);
#else
cs_trace_run_fragment(b, tracing_ctx, run_fragment_regs, false,
MALI_TILE_RENDER_ORDER_Z_ORDER);
#endif
cs_update_frag_ctx(b)
cs_add64(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
cs_sr_reg64(b, FRAGMENT, FBD_POINTER), fbd_sz);
cs_add64(b, fbd_pointer, fbd_pointer, fbd_sz);
}
}
@ -3468,8 +3606,8 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
struct cs_index completed = cs_scratch_reg_tuple(b, 10, 4);
struct cs_index completed_top = cs_scratch_reg64(b, 10);
struct cs_index completed_bottom = cs_scratch_reg64(b, 12);
struct cs_index cur_tiler = cs_reg64(b, 38);
struct cs_index tiler_count = cs_reg32(b, 47);
struct cs_index cur_tiler = cs_reg64(b, PANVK_CS_REG_TILER_DESC_PTR);
struct cs_index tiler_count = cs_reg32(b, 60);
struct cs_index oq_chain = cs_scratch_reg64(b, 10);
struct cs_index oq_chain_lo = cs_scratch_reg32(b, 10);
struct cs_index oq_syncobj = cs_scratch_reg64(b, 12);

View file

@ -13,8 +13,9 @@ tiler_oom_reg_perm_cb(struct cs_builder *b, unsigned reg)
{
switch (reg) {
/* The bbox is set up by the fragment subqueue, we should not modify it. */
case 42:
case 43:
case MALI_FRAGMENT_SR_BBOX_MIN:
case MALI_FRAGMENT_SR_BBOX_MAX:
/* We should only load from the subqueue context. */
case PANVK_CS_REG_SUBQUEUE_CTX_START:
case PANVK_CS_REG_SUBQUEUE_CTX_END:
@ -42,8 +43,14 @@ copy_fbd(struct cs_builder *b, bool has_zs_ext, uint32_t rt_count,
cs_store(b, cs_scratch_reg_tuple(b, 0, 8), dst, BITFIELD_MASK(8),
8 * sizeof(uint32_t));
#if PAN_ARCH >= 14
const size_t fbd_size = ALIGN_POT(sizeof(struct pan_fbd_layer), 64);
#else
const size_t fbd_size = sizeof(struct mali_framebuffer_packed);
#endif
if (has_zs_ext) {
const uint16_t dbd_offset = sizeof(struct mali_framebuffer_packed);
const uint16_t dbd_offset = fbd_size;
/* Copy the whole DBD. */
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 8), src_other,
@ -57,8 +64,7 @@ copy_fbd(struct cs_builder *b, bool has_zs_ext, uint32_t rt_count,
}
const uint16_t rts_offset =
sizeof(struct mali_framebuffer_packed) +
(has_zs_ext ? sizeof(struct mali_zs_crc_extension_packed) : 0);
fbd_size + (has_zs_ext ? sizeof(struct mali_zs_crc_extension_packed) : 0);
for (uint32_t rt = 0; rt < rt_count; rt++) {
const uint16_t rt_offset =
@ -110,12 +116,14 @@ generate_tiler_oom_handler(struct panvk_device *dev,
.tracebuf_addr_offset =
offsetof(struct panvk_cs_subqueue_context, debug.tracebuf.cs),
};
struct mali_framebuffer_pointer_packed fb_tag;
#if PAN_ARCH < 14
struct mali_framebuffer_pointer_packed fb_tag;
pan_pack(&fb_tag, FRAMEBUFFER_POINTER, cfg) {
cfg.zs_crc_extension_present = has_zs_ext;
cfg.render_target_count = rt_count;
}
#endif
cs_function_def(&b, &handler, handler_ctx) {
struct cs_index subqueue_ctx = cs_subqueue_ctx_reg(&b);
@ -140,7 +148,7 @@ generate_tiler_oom_handler(struct panvk_device *dev,
struct cs_index run_fragment_regs = cs_scratch_reg_tuple(&b, 0, 4);
/* The tiler pointer is pre-filled. */
struct cs_index tiler_ptr = cs_reg64(&b, 38);
struct cs_index tiler_ptr = cs_reg64(&b, PANVK_CS_REG_TILER_DESC_PTR);
cs_load64_to(&b, scratch_fbd_ptr_reg, subqueue_ctx,
TILER_OOM_CTX_FIELD_OFFSET(ir_scratch_fbd_ptr));
@ -176,11 +184,17 @@ generate_tiler_oom_handler(struct panvk_device *dev,
cs_wait_slot(&b, SB_ID(LS));
/* Set FBD pointer to the scratch fbd */
cs_add64(&b, cs_sr_reg64(&b, FRAGMENT, FBD_POINTER),
scratch_fbd_ptr_reg, fb_tag.opaque[0]);
struct cs_index fbd_pointer = cs_sr_reg64(&b, FRAGMENT, FBD_POINTER);
#if PAN_ARCH >= 14
cs_add64(&b, fbd_pointer, scratch_fbd_ptr_reg, 0);
cs_emit_layer_fragment_state(&b, fbd_pointer);
cs_trace_run_fragment2(&b, &tracing_ctx, run_fragment_regs, false,
MALI_TILE_RENDER_ORDER_Z_ORDER);
#else
cs_add64(&b, fbd_pointer, scratch_fbd_ptr_reg, fb_tag.opaque[0]);
cs_trace_run_fragment(&b, &tracing_ctx, run_fragment_regs, false,
MALI_TILE_RENDER_ORDER_Z_ORDER);
#endif
/* Serialize run fragments since we reuse FBD for the runs */
cs_wait_slots(&b, dev->csf.sb.all_iters_mask);

View file

@ -717,7 +717,12 @@ init_tiler(struct panvk_gpu_queue *queue)
tiler_heap->chunk_size = phys_dev->csf.tiler.chunk_size;
alloc_info.size = get_fbd_size(true, MAX_RTS);
alloc_info.alignment = pan_alignment(FRAMEBUFFER);
#if PAN_ARCH >= 14
const unsigned fbds_alignment = alignof(struct pan_fbd_layer);
#else
const unsigned fbds_alignment = pan_alignment(FRAMEBUFFER);
#endif
alloc_info.alignment = fbds_alignment;
tiler_heap->oom_fbd = panvk_pool_alloc_mem(&dev->mempools.rw, alloc_info);
if (!panvk_priv_mem_check_alloc(tiler_heap->oom_fbd)) {
result = panvk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,

View file

@ -181,7 +181,7 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
fbd_info.layer = layer_id;
fbd_info.frame_shaders = fs;
fbd_info.frame_shaders.dcd_pointer += layer_id * 3 * pan_size(DRAW);
tagged_fbd_ptr |= GENX(pan_emit_fb_desc)(&fbd_info, fbd.cpu);
tagged_fbd_ptr |= GENX(pan_emit_fb_desc)(&fbd_info, fbd);
result = panvk_cmd_prepare_fragment_job(cmdbuf, tagged_fbd_ptr);
if (result != VK_SUCCESS)

View file

@ -14,6 +14,7 @@ panvk_entrypoints = custom_target(
'--device-prefix', 'panvk_v6', '--device-prefix', 'panvk_v7',
'--device-prefix', 'panvk_v9', '--device-prefix', 'panvk_v10',
'--device-prefix', 'panvk_v12', '--device-prefix', 'panvk_v13',
'--device-prefix', 'panvk_v14',
'--beta', with_vulkan_beta.to_string()
],
depend_files : vk_entrypoints_gen_depend_files,
@ -65,7 +66,7 @@ valhall_archs = [9, 10]
valhall_inc_dir = ['valhall']
valhall_files = []
fifthgen_archs = [12, 13]
fifthgen_archs = [12, 13, 14]
fifthgen_inc_dir = ['fifthgen']
fifthgen_files = []
@ -83,7 +84,7 @@ jm_files = [
'jm/panvk_vX_gpu_queue.c',
]
csf_archs = [10, 12, 13]
csf_archs = [10, 12, 13, 14]
csf_inc_dir = ['csf']
csf_files = [
'csf/panvk_vX_bind_queue.c',
@ -126,7 +127,7 @@ common_per_arch_files = [
sha1_h,
]
foreach arch : [6, 7, 10, 12, 13]
foreach arch : [6, 7, 10, 12, 13, 14]
per_arch_files = common_per_arch_files
inc_panvk_per_arch = []

View file

@ -243,7 +243,7 @@ struct panvk_cmd_graphics_state {
} \
} while (0)
#if PAN_ARCH >= 10
#if PAN_ARCH >= 10 && PAN_ARCH < 14
struct panvk_device_draw_context {
struct panvk_priv_bo *fns_bo;
uint64_t fn_set_fbds_provoking_vertex_stride;
@ -376,8 +376,7 @@ cached_fs_required(ASSERTED const struct panvk_cmd_graphics_state *state,
gfx_state_set_dirty(__cmdbuf, FS_PUSH_UNIFORMS); \
} while (0)
#if PAN_ARCH >= 10
#if PAN_ARCH >= 10 && PAN_ARCH < 14
VkResult
panvk_per_arch(device_draw_context_init)(struct panvk_device *dev);

View file

@ -61,6 +61,9 @@ panvk_catch_indirect_alloc_failure(VkResult error)
case 13: \
panvk_arch_name(name, v13)(__VA_ARGS__); \
break; \
case 14: \
panvk_arch_name(name, v14)(__VA_ARGS__); \
break; \
default: \
UNREACHABLE("Unsupported architecture"); \
} \
@ -84,6 +87,9 @@ panvk_catch_indirect_alloc_failure(VkResult error)
case 13: \
ret = panvk_arch_name(name, v13)(__VA_ARGS__); \
break; \
case 14: \
ret = panvk_arch_name(name, v14)(__VA_ARGS__); \
break; \
default: \
UNREACHABLE("Unsupported architecture"); \
} \
@ -102,6 +108,8 @@ panvk_catch_indirect_alloc_failure(VkResult error)
#define panvk_per_arch(name) panvk_arch_name(name, v12)
#elif PAN_ARCH == 13
#define panvk_per_arch(name) panvk_arch_name(name, v13)
#elif PAN_ARCH == 14
#define panvk_per_arch(name) panvk_arch_name(name, v14)
#else
#error "Unsupported arch"
#endif

View file

@ -64,6 +64,7 @@ PER_ARCH_FUNCS(7);
PER_ARCH_FUNCS(10);
PER_ARCH_FUNCS(12);
PER_ARCH_FUNCS(13);
PER_ARCH_FUNCS(14);
static VkResult
create_kmod_dev(struct panvk_physical_device *device,
@ -411,6 +412,7 @@ panvk_physical_device_init(struct panvk_physical_device *device,
switch (arch) {
case 6:
case 7:
case 14:
if (!os_get_option("PAN_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
result = panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
"WARNING: panvk is not well-tested on v%d, "

View file

@ -550,7 +550,7 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
goto err_free_precomp;
}
#if PAN_ARCH >= 10
#if PAN_ARCH >= 10 && PAN_ARCH < 14
result = panvk_per_arch(device_draw_context_init)(device);
if (result != VK_SUCCESS)
goto err_free_mem_cache;
@ -616,7 +616,7 @@ err_finish_queues:
panvk_meta_cleanup(device);
err_free_draw_ctx:
#if PAN_ARCH >= 10
#if PAN_ARCH >= 10 && PAN_ARCH < 14
panvk_per_arch(device_draw_context_cleanup)(device);
err_free_mem_cache:
#endif
@ -679,7 +679,7 @@ panvk_per_arch(destroy_device)(struct panvk_device *device,
}
panvk_precomp_cleanup(device);
#if PAN_ARCH >= 10
#if PAN_ARCH >= 10 && PAN_ARCH < 14
panvk_per_arch(device_draw_context_cleanup)(device);
#endif
panvk_meta_cleanup(device);