mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 02:38:04 +02:00
Merge branch 'panfrost-v14' into 'main'
Panfrost: Add v14 support See merge request mesa/mesa!41081
This commit is contained in:
commit
b4c4cb1561
52 changed files with 4048 additions and 216 deletions
|
|
@ -34,6 +34,8 @@ The following hardware is currently supported:
|
|||
+--------------------+---------------+-----------+--------+--------+
|
||||
| G725 | 5th Gen (v13) | 3.1 | 3.1 | 1.4 |
|
||||
+--------------------+---------------+-----------+--------+--------+
|
||||
| G1-Pro | 5th Gen (v14) | 3.1 | 3.1 | 1.4 |
|
||||
+--------------------+---------------+-----------+--------+--------+
|
||||
|
||||
Other Midgard and Bifrost chips (e.g. G71) are not yet supported.
|
||||
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ compile_args_panfrost = [
|
|||
'-Wno-pointer-arith'
|
||||
]
|
||||
|
||||
panfrost_versions = ['4', '5', '6', '7', '9', '10', '12', '13']
|
||||
panfrost_versions = ['4', '5', '6', '7', '9', '10', '12', '13', '14']
|
||||
libpanfrost_versions = []
|
||||
|
||||
foreach ver : panfrost_versions
|
||||
|
|
@ -54,7 +54,7 @@ foreach ver : panfrost_versions
|
|||
]
|
||||
if ver in ['4', '5', '6', '7', '9']
|
||||
files_panfrost_vx += ['pan_jm.c']
|
||||
elif ver in ['10', '12', '13']
|
||||
elif ver in ['10', '12', '13', '14']
|
||||
files_panfrost_vx += ['pan_csf.c']
|
||||
endif
|
||||
libpanfrost_versions += static_library(
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@
|
|||
* functions. */
|
||||
#if PAN_ARCH <= 9
|
||||
#define JOBX(__suffix) GENX(jm_##__suffix)
|
||||
#elif PAN_ARCH <= 13
|
||||
#elif PAN_ARCH <= 14
|
||||
#define JOBX(__suffix) GENX(csf_##__suffix)
|
||||
#else
|
||||
#error "Unsupported arch"
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (C) 2023 Collabora Ltd.
|
||||
* Copyright (C) 2026 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
|
|
@ -13,6 +14,7 @@
|
|||
#include "pan_cmdstream.h"
|
||||
#include "pan_context.h"
|
||||
#include "pan_csf.h"
|
||||
#include "pan_fb.h"
|
||||
#include "pan_fb_preload.h"
|
||||
#include "pan_job.h"
|
||||
#include "pan_trace.h"
|
||||
|
|
@ -75,6 +77,87 @@ csf_update_tiler_oom_ctx(struct cs_builder *b, uint64_t addr)
|
|||
(PAN_INCREMENTAL_RENDERING_##_pass##_PASS * sizeof(struct pan_ptr)) + \
|
||||
offsetof(struct pan_ptr, gpu))
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
static void
|
||||
cs_emit_static_fragment_state(struct cs_builder *b,
|
||||
struct panfrost_batch *batch,
|
||||
const struct pan_fb_info *fb)
|
||||
{
|
||||
struct mali_frame_size_packed frame_size;
|
||||
pan_pack(&frame_size, FRAME_SIZE, cfg) {
|
||||
cfg.width = fb->width;
|
||||
cfg.height = fb->height;
|
||||
}
|
||||
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, FRAME_SIZE), frame_size.opaque[0]);
|
||||
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, SAMPLE_POSITION_ARRAY_POINTER),
|
||||
fb->sample_positions);
|
||||
|
||||
struct mali_fragment_flags_1_packed flags1;
|
||||
pan_pack(&flags1, FRAGMENT_FLAGS_1, cfg) {
|
||||
/* The force_samples setting dictates the sample-count that is used
|
||||
* for rasterization, and works like D3D11's ForcedSampleCount
|
||||
* feature:
|
||||
*
|
||||
* - If force_samples == 0: Let nr_samples dictate sample count
|
||||
* - If force_samples == 1: force single-sampled rasterization
|
||||
* - If force_samples >= 1: force multi-sampled rasterization
|
||||
*
|
||||
* This can be used to read SYSTEM_VALUE_SAMPLE_MASK_IN from the
|
||||
* fragment shader, even when performing single-sampled rendering.
|
||||
*/
|
||||
if (fb->pls_enabled) {
|
||||
cfg.sample_count = 4;
|
||||
cfg.sample_pattern = pan_sample_pattern(1);
|
||||
} else if (!fb->force_samples) {
|
||||
cfg.sample_count = fb->nr_samples;
|
||||
cfg.sample_pattern = pan_sample_pattern(fb->nr_samples);
|
||||
} else if (fb->force_samples == 1) {
|
||||
cfg.sample_count = fb->nr_samples;
|
||||
cfg.sample_pattern = pan_sample_pattern(1);
|
||||
} else {
|
||||
cfg.sample_count = 1;
|
||||
cfg.sample_pattern = pan_sample_pattern(fb->force_samples);
|
||||
}
|
||||
|
||||
cfg.effective_tile_size = fb->tile_size;
|
||||
cfg.point_sprite_coord_origin_max_y = fb->sprite_coord_origin;
|
||||
cfg.first_provoking_vertex = fb->first_provoking_vertex;
|
||||
cfg.render_target_count = MAX2(fb->rt_count, 1);
|
||||
cfg.color_buffer_allocation = fb->cbuf_allocation;
|
||||
}
|
||||
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_1), flags1.opaque[0]);
|
||||
|
||||
/* Leave the remaining RUN_FRAGMENT2 staging registers as zero. */
|
||||
}
|
||||
|
||||
static inline void
|
||||
cs_emit_layer_fragment_state(struct cs_builder *b, struct cs_index fbd_ptr)
|
||||
{
|
||||
/* Emit the dynamic fragment state. This state may change per-layer. */
|
||||
|
||||
cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_0), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, flags0));
|
||||
cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_2), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, flags2));
|
||||
cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, Z_CLEAR), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, z_clear));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, TILER_DESCRIPTOR_POINTER), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, tiler));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, RTD_POINTER), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, rtd_pointer));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, DBD_POINTER), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, dbd_pointer));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, FRAME_ARG), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, frame_argument));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, FRAME_SHADER_DCD_POINTER), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, dcd_pointer));
|
||||
|
||||
cs_flush_loads(b);
|
||||
}
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
|
||||
static int
|
||||
csf_oom_handler_init(struct panfrost_context *ctx)
|
||||
{
|
||||
|
|
@ -113,13 +196,14 @@ csf_oom_handler_init(struct panfrost_context *ctx)
|
|||
|
||||
cs_function_def(&b, &handler, handler_ctx) {
|
||||
struct cs_index tiler_oom_ctx = cs_reg64(&b, TILER_OOM_CTX_REG);
|
||||
struct cs_index counter = cs_reg32(&b, 47);
|
||||
struct cs_index zero = cs_reg64(&b, 48);
|
||||
struct cs_index flush_id = cs_reg32(&b, 48);
|
||||
struct cs_index tiler_ctx = cs_reg64(&b, 50);
|
||||
struct cs_index completed_top = cs_reg64(&b, 52);
|
||||
struct cs_index completed_bottom = cs_reg64(&b, 54);
|
||||
struct cs_index completed_chunks = cs_reg_tuple(&b, 52, 4);
|
||||
struct cs_index counter = cs_reg32(&b, 31);
|
||||
struct cs_index zero = cs_reg64(&b, 56);
|
||||
struct cs_index flush_id = cs_reg32(&b, 58);
|
||||
struct cs_index tiler_ctx = cs_reg64(&b, 60);
|
||||
struct cs_index completed_top = cs_reg64(&b, 64);
|
||||
struct cs_index completed_bottom = cs_reg64(&b, 66);
|
||||
struct cs_index completed_chunks = cs_reg_tuple(&b, 64, 4);
|
||||
struct cs_index fbd_pointer = cs_sr_reg64(&b, FRAGMENT, FBD_POINTER);
|
||||
|
||||
/* Ensure that the OTHER endpoint is valid */
|
||||
#if PAN_ARCH >= 11
|
||||
|
|
@ -133,12 +217,10 @@ csf_oom_handler_init(struct panfrost_context *ctx)
|
|||
cs_load32_to(&b, counter, tiler_oom_ctx, FIELD_OFFSET(counter));
|
||||
cs_wait_slot(&b, 0);
|
||||
cs_if(&b, MALI_CS_CONDITION_GREATER, counter) {
|
||||
cs_load64_to(&b, cs_sr_reg64(&b, FRAGMENT, FBD_POINTER), tiler_oom_ctx,
|
||||
FBD_OFFSET(MIDDLE));
|
||||
cs_load64_to(&b, fbd_pointer, tiler_oom_ctx, FBD_OFFSET(MIDDLE));
|
||||
}
|
||||
cs_else(&b) {
|
||||
cs_load64_to(&b, cs_sr_reg64(&b, FRAGMENT, FBD_POINTER), tiler_oom_ctx,
|
||||
FBD_OFFSET(FIRST));
|
||||
cs_load64_to(&b, fbd_pointer, tiler_oom_ctx, FBD_OFFSET(FIRST));
|
||||
}
|
||||
|
||||
cs_load32_to(&b, cs_sr_reg32(&b, FRAGMENT, BBOX_MIN), tiler_oom_ctx,
|
||||
|
|
@ -147,11 +229,18 @@ csf_oom_handler_init(struct panfrost_context *ctx)
|
|||
FIELD_OFFSET(bbox_max));
|
||||
cs_move64_to(&b, cs_sr_reg64(&b, FRAGMENT, TEM_POINTER), 0);
|
||||
cs_move32_to(&b, cs_sr_reg32(&b, FRAGMENT, TEM_ROW_STRIDE), 0);
|
||||
#if PAN_ARCH >= 14
|
||||
cs_emit_layer_fragment_state(&b, fbd_pointer);
|
||||
#endif
|
||||
cs_wait_slot(&b, 0);
|
||||
|
||||
/* Run the fragment job and wait */
|
||||
cs_select_endpoint_sb(&b, 3);
|
||||
#if PAN_ARCH >= 14
|
||||
cs_run_fragment2(&b, false, MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#else
|
||||
cs_run_fragment(&b, false, MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#endif
|
||||
cs_wait_slot(&b, 3);
|
||||
|
||||
/* Increment counter */
|
||||
|
|
@ -218,6 +307,21 @@ GENX(csf_cleanup_batch)(struct panfrost_batch *batch)
|
|||
panfrost_pool_cleanup(&batch->csf.cs_chunk_pool);
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
static inline struct pan_ptr
|
||||
alloc_fbd(struct panfrost_batch *batch)
|
||||
{
|
||||
const struct pan_desc_alloc_info fbd_layer = {
|
||||
.size = ALIGN_POT(sizeof(struct pan_fbd_layer), 64),
|
||||
.align = alignof(struct pan_fbd_layer),
|
||||
.nelems = 1,
|
||||
};
|
||||
|
||||
return pan_pool_alloc_desc_aggregate(
|
||||
&batch->pool.base, fbd_layer, PAN_DESC(ZS_CRC_EXTENSION),
|
||||
PAN_DESC_ARRAY(MAX2(batch->key.nr_cbufs, 1), RENDER_TARGET));
|
||||
}
|
||||
#else
|
||||
static inline struct pan_ptr
|
||||
alloc_fbd(struct panfrost_batch *batch)
|
||||
{
|
||||
|
|
@ -225,6 +329,7 @@ alloc_fbd(struct panfrost_batch *batch)
|
|||
&batch->pool.base, PAN_DESC(FRAMEBUFFER), PAN_DESC(ZS_CRC_EXTENSION),
|
||||
PAN_DESC_ARRAY(MAX2(batch->key.nr_cbufs, 1), RENDER_TARGET));
|
||||
}
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
|
||||
int
|
||||
GENX(csf_init_batch)(struct panfrost_batch *batch)
|
||||
|
|
@ -758,7 +863,7 @@ GENX(csf_preload_fb)(struct panfrost_batch *batch, struct pan_fb_info *fb)
|
|||
(_ctx)->fbds[PAN_INCREMENTAL_RENDERING_##_pass##_PASS]
|
||||
#define EMIT_FBD(_ctx, _pass, _fb, _tls, _tiler_ctx) \
|
||||
GET_FBD(_ctx, _pass).gpu |= \
|
||||
GENX(pan_emit_fbd)(_fb, 0, _tls, _tiler_ctx, GET_FBD(_ctx, _pass).cpu)
|
||||
GENX(pan_emit_fbd)(_fb, 0, _tls, _tiler_ctx, GET_FBD(_ctx, _pass))
|
||||
|
||||
void
|
||||
GENX(csf_emit_fbds)(struct panfrost_batch *batch, struct pan_fb_info *fb,
|
||||
|
|
@ -771,7 +876,7 @@ GENX(csf_emit_fbds)(struct panfrost_batch *batch, struct pan_fb_info *fb,
|
|||
/* Default framebuffer descriptor */
|
||||
|
||||
batch->framebuffer.gpu |=
|
||||
GENX(pan_emit_fbd)(fb, 0, tls, &batch->tiler_ctx, batch->framebuffer.cpu);
|
||||
GENX(pan_emit_fbd)(fb, 0, tls, &batch->tiler_ctx, batch->framebuffer);
|
||||
|
||||
if (batch->draw_count == 0)
|
||||
return;
|
||||
|
|
@ -854,15 +959,21 @@ GENX(csf_emit_fragment_job)(struct panfrost_batch *batch,
|
|||
cs_vt_end(b, cs_now());
|
||||
}
|
||||
|
||||
struct cs_index fbd_pointer = cs_sr_reg64(b, FRAGMENT, FBD_POINTER);
|
||||
|
||||
/* Set up the fragment job */
|
||||
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
|
||||
batch->framebuffer.gpu);
|
||||
cs_move64_to(b, fbd_pointer, batch->framebuffer.gpu);
|
||||
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MIN),
|
||||
(batch->miny << 16) | batch->minx);
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MAX),
|
||||
((batch->maxy - 1) << 16) | (batch->maxx - 1));
|
||||
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, TEM_POINTER), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, TEM_ROW_STRIDE), 0);
|
||||
#if PAN_ARCH >= 14
|
||||
cs_emit_static_fragment_state(b, batch, pfb);
|
||||
cs_emit_layer_fragment_state(b, fbd_pointer);
|
||||
#endif
|
||||
|
||||
/* Use different framebuffer descriptor if incremental rendering was
|
||||
* triggered while tiling */
|
||||
|
|
@ -871,13 +982,19 @@ GENX(csf_emit_fragment_job)(struct panfrost_batch *batch,
|
|||
cs_load32_to(b, counter, cs_reg64(b, TILER_OOM_CTX_REG), 0);
|
||||
cs_wait_slot(b, 0);
|
||||
cs_if(b, MALI_CS_CONDITION_GREATER, counter) {
|
||||
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
|
||||
GET_FBD(oom_ctx, LAST).gpu);
|
||||
cs_move64_to(b, fbd_pointer, GET_FBD(oom_ctx, LAST).gpu);
|
||||
#if PAN_ARCH >= 14
|
||||
cs_emit_layer_fragment_state(b, fbd_pointer);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* Run the fragment job and wait */
|
||||
#if PAN_ARCH >= 14
|
||||
cs_run_fragment2(b, false, MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#else
|
||||
cs_run_fragment(b, false, MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#endif
|
||||
cs_wait_slot(b, 2);
|
||||
|
||||
/* Gather freed heap chunks and add them to the heap context free list
|
||||
|
|
|
|||
|
|
@ -29,7 +29,8 @@ struct pan_csf_tiler_oom_ctx {
|
|||
/* Alternative framebuffer descriptors for incremental rendering */
|
||||
struct pan_ptr fbds[PAN_INCREMENTAL_RENDERING_PASS_COUNT];
|
||||
|
||||
/* Bounding Box (Register 42 and 43) */
|
||||
/* Bounding Box (Register MALI_FRAGMENT_SR_BBOX_MIN and
|
||||
* MALI_FRAGMENT_SR_BBOX_MAX) */
|
||||
uint32_t bbox_min;
|
||||
uint32_t bbox_max;
|
||||
|
||||
|
|
|
|||
|
|
@ -257,8 +257,8 @@ GENX(jm_emit_fbds)(struct panfrost_batch *batch, struct pan_fb_info *fb,
|
|||
{
|
||||
PAN_TRACE_FUNC(PAN_TRACE_GL_JM);
|
||||
|
||||
batch->framebuffer.gpu |= GENX(pan_emit_fbd)(
|
||||
fb, 0, tls, &batch->tiler_ctx, batch->framebuffer.cpu);
|
||||
batch->framebuffer.gpu |=
|
||||
GENX(pan_emit_fbd)(fb, 0, tls, &batch->tiler_ctx, batch->framebuffer);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -1175,6 +1175,9 @@ panfrost_create_screen(int fd, const struct pipe_screen_config *config,
|
|||
case 13:
|
||||
panfrost_cmdstream_screen_init_v13(screen);
|
||||
break;
|
||||
case 14:
|
||||
panfrost_cmdstream_screen_init_v14(screen);
|
||||
break;
|
||||
default:
|
||||
debug_printf("panfrost: Unhandled architecture major %d", dev->arch);
|
||||
panfrost_destroy_screen(&(screen->base));
|
||||
|
|
|
|||
|
|
@ -155,6 +155,7 @@ void panfrost_cmdstream_screen_init_v9(struct panfrost_screen *screen);
|
|||
void panfrost_cmdstream_screen_init_v10(struct panfrost_screen *screen);
|
||||
void panfrost_cmdstream_screen_init_v12(struct panfrost_screen *screen);
|
||||
void panfrost_cmdstream_screen_init_v13(struct panfrost_screen *screen);
|
||||
void panfrost_cmdstream_screen_init_v14(struct panfrost_screen *screen);
|
||||
|
||||
#define perf_debug(ctx, ...) \
|
||||
do { \
|
||||
|
|
|
|||
|
|
@ -275,7 +275,7 @@ main(int argc, const char **argv)
|
|||
|
||||
unsigned target_arch = atoi(target_arch_str);
|
||||
|
||||
if (target_arch < 4 || target_arch > 13) {
|
||||
if (target_arch < 4 || target_arch > 14) {
|
||||
fprintf(stderr, "Unsupported target arch %d\n", target_arch);
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -703,8 +703,10 @@ bi_emit_load_var_buf(bi_builder *b, nir_intrinsic_instr *intr)
|
|||
assert(intr->intrinsic == nir_intrinsic_load_var_buf_pan ||
|
||||
intr->intrinsic == nir_intrinsic_load_var_buf_flat_pan);
|
||||
|
||||
const unsigned arch = b->shader->arch;
|
||||
|
||||
/* These are only available on Valhall+ */
|
||||
assert(b->shader->arch >= 9);
|
||||
assert(arch >= 9);
|
||||
|
||||
const bool flat = intr->intrinsic == nir_intrinsic_load_var_buf_flat_pan;
|
||||
const nir_alu_type src_type = nir_intrinsic_src_type(intr);
|
||||
|
|
@ -757,19 +759,36 @@ bi_emit_load_var_buf(bi_builder *b, nir_intrinsic_instr *intr)
|
|||
bool use_imm_form = false;
|
||||
if (nir_src_is_const(intr->src[0])) {
|
||||
imm_offset = nir_src_as_uint(intr->src[0]);
|
||||
assert(imm_offset < pan_ld_var_buf_off_size(b->shader->arch));
|
||||
assert(imm_offset < pan_ld_var_buf_off_size(arch));
|
||||
|
||||
use_imm_form = true;
|
||||
}
|
||||
|
||||
/* On v14+, flat source formats are removed from LD_VAR_BUF/LD_VAR_BUF_IMM,
|
||||
* so flat buffer varyings must use the dedicated LD_VAR_BUF_FLAT*.
|
||||
*/
|
||||
if (use_imm_form) {
|
||||
bi_ld_var_buf_imm_to(b, sz, dest, src0, regfmt, sample, source_format,
|
||||
if (arch >= 14 && flat) {
|
||||
bi_ld_var_buf_flat_imm_to(b, dest, regfmt, vecsize, imm_offset);
|
||||
} else {
|
||||
bi_ld_var_buf_imm_to(b, sz, dest, src0, regfmt, sample, source_format,
|
||||
BI_UPDATE_STORE, vecsize, imm_offset);
|
||||
}
|
||||
} else {
|
||||
bi_index offset = bi_src_index(&intr->src[0]);
|
||||
bi_ld_var_buf_to(b, sz, dest, src0, offset, regfmt, sample,
|
||||
source_format, BI_UPDATE_STORE, vecsize);
|
||||
if (arch >= 14 && flat) {
|
||||
bi_ld_var_buf_flat_to(b, dest, offset, regfmt, vecsize);
|
||||
} else {
|
||||
bi_ld_var_buf_to(b, sz, dest, src0, offset, regfmt, sample,
|
||||
source_format, BI_UPDATE_STORE, vecsize);
|
||||
}
|
||||
}
|
||||
|
||||
/* LD_VAR_BUF_FLAT* only support register formats F16 and F32. */
|
||||
assert(
|
||||
arch < 14 || !flat ||
|
||||
(regfmt == BI_REGISTER_FORMAT_F16 || regfmt == BI_REGISTER_FORMAT_F32));
|
||||
|
||||
bi_split_def(b, &intr->def);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -939,6 +939,32 @@
|
|||
<imm name="index" start="12" size="4"/> <!-- 0 for pointx, 1 for pointy, 2 for fragw, 3 for fragz -->
|
||||
</ins>
|
||||
|
||||
<ins name="LD_VAR_BUF_FLAT_IMM" title="Load immediate flat varying" message="varying" unit="V">
|
||||
<opcode>
|
||||
<op val="0x40" start="48" mask="0x1FF"/>
|
||||
</opcode>
|
||||
<desc>Fetches a given flat varying from hardware buffer</desc>
|
||||
<slot/>
|
||||
<vecsize/>
|
||||
<regfmt/>
|
||||
<sr write="true"/>
|
||||
<sr_count count="format"/>
|
||||
<imm name="index" start="8" size="11"/>
|
||||
</ins>
|
||||
|
||||
<ins name="LD_VAR_BUF_FLAT" title="Load indirect flat varying" message="varying" unit="V">
|
||||
<opcode>
|
||||
<op val="0x5F" start="48" mask="0x1FF"/>
|
||||
</opcode>
|
||||
<desc>Fetches a given flat varying from hardware buffer</desc>
|
||||
<slot/>
|
||||
<vecsize/>
|
||||
<regfmt/>
|
||||
<sr write="true"/>
|
||||
<sr_count count="format"/>
|
||||
<src/>
|
||||
</ins>
|
||||
|
||||
<group name="LD_VAR_BUF_IMM" title="Load immediate varying" message="varying" unit="V">
|
||||
<desc>Interpolates a given varying from hardware buffer</desc>
|
||||
<ins name="LD_VAR_BUF_IMM.f32">
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Collabora, Ltd.
|
||||
* Copyright (C) 2026 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
|
|
@ -9,9 +10,9 @@
|
|||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#define CASE(instr, expected) \
|
||||
#define CASE_ARCH(instr, arch, expected) \
|
||||
do { \
|
||||
uint64_t _value = va_pack_instr(instr, 10); \
|
||||
uint64_t _value = va_pack_instr(instr, arch); \
|
||||
if (_value != expected) { \
|
||||
fprintf(stderr, "Got %" PRIx64 ", expected %" PRIx64 "\n", _value, \
|
||||
(uint64_t)expected); \
|
||||
|
|
@ -21,6 +22,8 @@
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
#define CASE(instr, expected) CASE_ARCH(instr, 10, expected)
|
||||
|
||||
class ValhallPacking : public testing::Test {
|
||||
protected:
|
||||
ValhallPacking()
|
||||
|
|
@ -278,11 +281,41 @@ TEST_F(ValhallPacking, LdVarBufImmF16)
|
|||
BI_VECSIZE_V4, 0),
|
||||
0x005d80843300003d);
|
||||
|
||||
CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTROID,
|
||||
BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
|
||||
BI_VECSIZE_V4, 8),
|
||||
0x005d80443308003d);
|
||||
CASE_ARCH(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F16,
|
||||
BI_SAMPLE_CENTROID, BI_SOURCE_FORMAT_F16,
|
||||
BI_UPDATE_STORE, BI_VECSIZE_V4, 8),
|
||||
10, 0x005d80443308003d);
|
||||
|
||||
CASE_ARCH(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F16,
|
||||
BI_SAMPLE_CENTROID, BI_SOURCE_FORMAT_F16,
|
||||
BI_UPDATE_STORE, BI_VECSIZE_V4, 8),
|
||||
11, 0x005d80443300083d);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, LdVarBufFlatImmFormat)
|
||||
{
|
||||
CASE_ARCH(bi_ld_var_buf_flat_imm_to(b, bi_register(0),
|
||||
BI_REGISTER_FORMAT_F32,
|
||||
BI_VECSIZE_V4, 0x12),
|
||||
14, 0x0040800832001200);
|
||||
|
||||
CASE_ARCH(bi_ld_var_buf_flat_imm_to(b, bi_register(0),
|
||||
BI_REGISTER_FORMAT_F16,
|
||||
BI_VECSIZE_V4, 0x12),
|
||||
14, 0x0040800433001200);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, LdVarBufFlat)
|
||||
{
|
||||
CASE_ARCH(bi_ld_var_buf_flat_to(b, bi_register(0), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4),
|
||||
14, 0x005f80083200003d);
|
||||
|
||||
CASE_ARCH(bi_ld_var_buf_flat_to(b, bi_register(0), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F16, BI_VECSIZE_V4),
|
||||
14, 0x005f80043300003d);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, LeaBufImm)
|
||||
|
|
|
|||
|
|
@ -77,6 +77,8 @@ walk_bir_shader(bi_context *ctx, struct pan_shader_info *info)
|
|||
if (instr->sample == BI_SAMPLE_CENTROID)
|
||||
info->fs.hsr.centroid_interpolation = true;
|
||||
FALLTHROUGH;
|
||||
case BI_OPCODE_LD_VAR_BUF_FLAT:
|
||||
case BI_OPCODE_LD_VAR_BUF_FLAT_IMM:
|
||||
case BI_OPCODE_LD_VAR_FLAT:
|
||||
case BI_OPCODE_LD_VAR_FLAT_IMM:
|
||||
if (!found_atest)
|
||||
|
|
|
|||
|
|
@ -568,6 +568,10 @@ va_pack_alu(const bi_instr *I, unsigned arch)
|
|||
hex |= ((uint64_t)I->sample) << 38;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_LD_VAR_BUF_FLAT_IMM:
|
||||
hex |= ((uint64_t)I->index) << 8;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_LD_ATTR_IMM:
|
||||
hex |= ((uint64_t)I->table) << 16;
|
||||
hex |= ((uint64_t)I->attribute_index) << 20;
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ pan_get_nir_shader_compiler_options(unsigned arch, bool merge_wg)
|
|||
case 11:
|
||||
case 12:
|
||||
case 13:
|
||||
case 14:
|
||||
return merge_wg ? &bifrost_nir_options_v11_merge_wg :
|
||||
&bifrost_nir_options_v11;
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -824,7 +824,11 @@ cs_instr_is_asynchronous(enum mali_cs_opcode opcode, uint16_t wait_mask)
|
|||
case MALI_CS_OPCODE_STORE_MULTIPLE:
|
||||
case MALI_CS_OPCODE_RUN_COMPUTE:
|
||||
case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT:
|
||||
#if PAN_ARCH >= 14
|
||||
case MALI_CS_OPCODE_RUN_FRAGMENT2:
|
||||
#else
|
||||
case MALI_CS_OPCODE_RUN_FRAGMENT:
|
||||
#endif
|
||||
case MALI_CS_OPCODE_RUN_FULLSCREEN:
|
||||
#if PAN_ARCH >= 12
|
||||
case MALI_CS_OPCODE_RUN_IDVS2:
|
||||
|
|
@ -1614,6 +1618,22 @@ cs_run_idvs(struct cs_builder *b, uint32_t flags_override, bool malloc_enable,
|
|||
}
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
static inline void
|
||||
cs_run_fragment2(struct cs_builder *b, bool enable_tem,
|
||||
enum mali_tile_render_order tile_order)
|
||||
{
|
||||
/* Staging regs */
|
||||
cs_flush_loads(b);
|
||||
|
||||
b->req_resource_mask |= CS_FRAG_RES;
|
||||
|
||||
cs_emit(b, RUN_FRAGMENT2, I) {
|
||||
I.enable_tem = enable_tem;
|
||||
I.tile_order = tile_order;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void
|
||||
cs_run_fragment(struct cs_builder *b, bool enable_tem,
|
||||
enum mali_tile_render_order tile_order)
|
||||
|
|
@ -1628,6 +1648,7 @@ cs_run_fragment(struct cs_builder *b, bool enable_tem,
|
|||
I.tile_order = tile_order;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void
|
||||
cs_run_fullscreen(struct cs_builder *b, uint32_t flags_override,
|
||||
|
|
@ -2469,6 +2490,53 @@ cs_trace_preamble(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
|
|||
(int16_t)(offsetof(struct cs_##__type##_trace, __field) - \
|
||||
sizeof(struct cs_##__type##_trace))
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
#define CS_RUN_FRAGMENT2_SR_COUNT 56
|
||||
#define CS_RUN_FRAGMENT2_SR_MASK BITFIELD64_RANGE(0, CS_RUN_FRAGMENT2_SR_COUNT)
|
||||
struct cs_run_fragment2_trace {
|
||||
uint64_t ip;
|
||||
uint32_t sr[CS_RUN_FRAGMENT2_SR_COUNT];
|
||||
} __attribute__((aligned(64)));
|
||||
|
||||
static inline void
|
||||
cs_trace_run_fragment2(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
|
||||
struct cs_index scratch_regs, bool enable_tem,
|
||||
enum mali_tile_render_order tile_order)
|
||||
{
|
||||
if (likely(!ctx->enabled)) {
|
||||
cs_run_fragment2(b, enable_tem, tile_order);
|
||||
return;
|
||||
}
|
||||
|
||||
struct cs_index tracebuf_addr = cs_reg64(b, scratch_regs.reg);
|
||||
struct cs_index data = cs_reg64(b, scratch_regs.reg + 2);
|
||||
|
||||
cs_trace_preamble(b, ctx, scratch_regs,
|
||||
sizeof(struct cs_run_fragment2_trace));
|
||||
|
||||
/* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP
|
||||
* won't point to the right instruction. */
|
||||
cs_load_ip_to(b, data);
|
||||
cs_run_fragment2(b, enable_tem, tile_order);
|
||||
cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_fragment2, ip));
|
||||
|
||||
ASSERTED unsigned sr_count = 0;
|
||||
unsigned sr_offset = cs_trace_field_offset(run_fragment2, sr);
|
||||
for (unsigned i = 0; i < CS_RUN_FRAGMENT2_SR_COUNT; i += 16) {
|
||||
unsigned mask = (CS_RUN_FRAGMENT2_SR_MASK >> i) & BITFIELD_MASK(16);
|
||||
if (!mask)
|
||||
continue;
|
||||
|
||||
cs_store(b, cs_reg_tuple(b, i, util_last_bit(mask)), tracebuf_addr, mask,
|
||||
sr_offset);
|
||||
sr_offset += util_bitcount(mask) * sizeof(uint32_t);
|
||||
sr_count += util_bitcount(mask);
|
||||
}
|
||||
assert(sr_count == CS_RUN_FRAGMENT2_SR_COUNT);
|
||||
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
#else
|
||||
struct cs_run_fragment_trace {
|
||||
uint64_t ip;
|
||||
uint32_t sr[7];
|
||||
|
|
@ -2500,6 +2568,7 @@ cs_trace_run_fragment(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
|
|||
cs_trace_field_offset(run_fragment, sr));
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 13
|
||||
#define CS_RUN_FULLSCREEN_SR_MASK \
|
||||
|
|
|
|||
|
|
@ -152,22 +152,22 @@ pandecode_rt(struct pandecode_context *ctx, unsigned index, uint64_t gpu_va)
|
|||
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_rts(struct pandecode_context *ctx, uint64_t gpu_va,
|
||||
const struct MALI_FRAMEBUFFER_PARAMETERS *fb)
|
||||
void
|
||||
GENX(pandecode_rts)(struct pandecode_context *ctx, uint64_t gpu_va,
|
||||
uint32_t render_target_count)
|
||||
{
|
||||
pandecode_log(ctx, "Color Render Targets @%" PRIx64 ":\n", gpu_va);
|
||||
ctx->indent++;
|
||||
|
||||
for (int i = 0; i < (fb->render_target_count); i++)
|
||||
for (int i = 0; i < render_target_count; i++)
|
||||
pandecode_rt(ctx, i, gpu_va);
|
||||
|
||||
ctx->indent--;
|
||||
pandecode_log(ctx, "\n");
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_zs_crc_ext(struct pandecode_context *ctx, uint64_t gpu_va)
|
||||
void
|
||||
GENX(pandecode_zs_crc_ext)(struct pandecode_context *ctx, uint64_t gpu_va)
|
||||
{
|
||||
const struct mali_zs_crc_extension_packed *PANDECODE_PTR_VAR(
|
||||
ctx, zs_crc_packed, (uint64_t)gpu_va);
|
||||
|
|
@ -223,22 +223,65 @@ pandecode_zs_crc_ext(struct pandecode_context *ctx, uint64_t gpu_va)
|
|||
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
static void
|
||||
pandecode_sample_locations(struct pandecode_context *ctx, const void *fb)
|
||||
void
|
||||
GENX(pandecode_frame_shader_dcds)(struct pandecode_context *ctx,
|
||||
uint64_t dcd_pointer, unsigned pre_frame_0,
|
||||
unsigned pre_frame_1, unsigned post_frame,
|
||||
unsigned job_type_param, uint64_t gpu_id)
|
||||
{
|
||||
pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, params);
|
||||
const unsigned dcd_size = pan_size(DRAW);
|
||||
|
||||
const uint16_t *PANDECODE_PTR_VAR(ctx, samples, params.sample_locations);
|
||||
if (pre_frame_0 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
|
||||
const struct mali_draw_packed *PANDECODE_PTR_VAR(
|
||||
ctx, dcd, dcd_pointer + (0 * dcd_size));
|
||||
pan_unpack(dcd, DRAW, draw)
|
||||
;
|
||||
pandecode_log(ctx, "Pre frame 0 @%" PRIx64 " (mode=%d):\n", dcd_pointer,
|
||||
pre_frame_0);
|
||||
ctx->indent++;
|
||||
GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
|
||||
ctx->indent--;
|
||||
}
|
||||
|
||||
pandecode_log(ctx, "Sample locations @%" PRIx64 ":\n",
|
||||
params.sample_locations);
|
||||
if (pre_frame_1 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
|
||||
const struct mali_draw_packed *PANDECODE_PTR_VAR(
|
||||
ctx, dcd, dcd_pointer + (1 * dcd_size));
|
||||
pan_unpack(dcd, DRAW, draw)
|
||||
;
|
||||
pandecode_log(ctx, "Pre frame 1 @%" PRIx64 ":\n",
|
||||
dcd_pointer + (1 * dcd_size));
|
||||
ctx->indent++;
|
||||
GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
|
||||
ctx->indent--;
|
||||
}
|
||||
|
||||
if (post_frame != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
|
||||
const struct mali_draw_packed *PANDECODE_PTR_VAR(
|
||||
ctx, dcd, dcd_pointer + (2 * dcd_size));
|
||||
pan_unpack(dcd, DRAW, draw)
|
||||
;
|
||||
pandecode_log(ctx, "Post frame:\n");
|
||||
ctx->indent++;
|
||||
GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
|
||||
ctx->indent--;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GENX(pandecode_sample_locations)(struct pandecode_context *ctx,
|
||||
uint64_t sample_locations)
|
||||
{
|
||||
const uint16_t *PANDECODE_PTR_VAR(ctx, samples, sample_locations);
|
||||
|
||||
pandecode_log(ctx, "Sample locations @%" PRIx64 ":\n", sample_locations);
|
||||
for (int i = 0; i < 33; i++) {
|
||||
pandecode_log(ctx, " (%d, %d),\n", samples[2 * i] - 128,
|
||||
samples[2 * i + 1] - 128);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif /* PAN_ARCH >= 6 */
|
||||
|
||||
#if PAN_ARCH < 14
|
||||
struct pandecode_fbd
|
||||
GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
|
||||
bool is_fragment, uint64_t gpu_id)
|
||||
|
|
@ -248,46 +291,17 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
|
|||
DUMP_UNPACKED(ctx, FRAMEBUFFER_PARAMETERS, params, "Parameters:\n");
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
pandecode_sample_locations(ctx, fb);
|
||||
GENX(pandecode_sample_locations)(ctx, params.sample_locations);
|
||||
|
||||
unsigned dcd_size = pan_size(DRAW);
|
||||
unsigned job_type_param = 0;
|
||||
|
||||
#if PAN_ARCH <= 9
|
||||
job_type_param = MALI_JOB_TYPE_FRAGMENT;
|
||||
#endif
|
||||
|
||||
if (params.pre_frame_0 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
|
||||
const struct mali_draw_packed *PANDECODE_PTR_VAR(
|
||||
ctx, dcd, params.frame_shader_dcds + (0 * dcd_size));
|
||||
pan_unpack(dcd, DRAW, draw);
|
||||
pandecode_log(ctx, "Pre frame 0 @%" PRIx64 " (mode=%d):\n",
|
||||
params.frame_shader_dcds, params.pre_frame_0);
|
||||
ctx->indent++;
|
||||
GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
|
||||
ctx->indent--;
|
||||
}
|
||||
|
||||
if (params.pre_frame_1 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
|
||||
const struct mali_draw_packed *PANDECODE_PTR_VAR(
|
||||
ctx, dcd, params.frame_shader_dcds + (1 * dcd_size));
|
||||
pan_unpack(dcd, DRAW, draw);
|
||||
pandecode_log(ctx, "Pre frame 1 @%" PRIx64 ":\n",
|
||||
params.frame_shader_dcds + (1 * dcd_size));
|
||||
ctx->indent++;
|
||||
GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
|
||||
ctx->indent--;
|
||||
}
|
||||
|
||||
if (params.post_frame != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
|
||||
const struct mali_draw_packed *PANDECODE_PTR_VAR(
|
||||
ctx, dcd, params.frame_shader_dcds + (2 * dcd_size));
|
||||
pan_unpack(dcd, DRAW, draw);
|
||||
pandecode_log(ctx, "Post frame:\n");
|
||||
ctx->indent++;
|
||||
GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
|
||||
ctx->indent--;
|
||||
}
|
||||
GENX(pandecode_frame_shader_dcds)(ctx, params.frame_shader_dcds,
|
||||
params.pre_frame_0, params.pre_frame_1,
|
||||
params.post_frame, job_type_param, gpu_id);
|
||||
#else
|
||||
DUMP_SECTION(ctx, FRAMEBUFFER, LOCAL_STORAGE, fb, "Local Storage:\n");
|
||||
|
||||
|
|
@ -312,13 +326,13 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
|
|||
gpu_va += pan_size(FRAMEBUFFER);
|
||||
|
||||
if (params.has_zs_crc_extension) {
|
||||
pandecode_zs_crc_ext(ctx, gpu_va);
|
||||
GENX(pandecode_zs_crc_ext)(ctx, gpu_va);
|
||||
|
||||
gpu_va += pan_size(ZS_CRC_EXTENSION);
|
||||
}
|
||||
|
||||
if (is_fragment)
|
||||
pandecode_rts(ctx, gpu_va, ¶ms);
|
||||
GENX(pandecode_rts)(ctx, gpu_va, params.render_target_count);
|
||||
|
||||
return (struct pandecode_fbd){
|
||||
.rt_count = params.render_target_count,
|
||||
|
|
@ -336,6 +350,7 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
|
|||
};
|
||||
#endif
|
||||
}
|
||||
#endif /* PAN_ARCH < 14 */
|
||||
|
||||
#if PAN_ARCH >= 5
|
||||
uint64_t
|
||||
|
|
|
|||
|
|
@ -132,6 +132,13 @@ void pandecode_cs_binary_v13(struct pandecode_context *ctx, uint64_t bin,
|
|||
void pandecode_cs_trace_v13(struct pandecode_context *ctx, uint64_t trace,
|
||||
uint32_t trace_size, uint64_t gpu_id);
|
||||
|
||||
void pandecode_interpret_cs_v14(struct pandecode_context *ctx, uint64_t queue,
|
||||
uint32_t size, uint64_t gpu_id, uint32_t *regs);
|
||||
void pandecode_cs_binary_v14(struct pandecode_context *ctx, uint64_t bin,
|
||||
uint32_t bin_size);
|
||||
void pandecode_cs_trace_v14(struct pandecode_context *ctx, uint64_t trace,
|
||||
uint32_t trace_size, uint64_t gpu_id);
|
||||
|
||||
/* Logging infrastructure */
|
||||
static void
|
||||
pandecode_make_indent(struct pandecode_context *ctx)
|
||||
|
|
@ -275,4 +282,22 @@ void GENX(pandecode_depth_stencil)(struct pandecode_context *ctx,
|
|||
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
void GENX(pandecode_sample_locations)(struct pandecode_context *ctx,
|
||||
uint64_t sample_locations);
|
||||
|
||||
void
|
||||
GENX(pandecode_frame_shader_dcds)(struct pandecode_context *ctx,
|
||||
uint64_t dcd_pointer, unsigned pre_frame_0,
|
||||
unsigned pre_frame_1, unsigned post_frame,
|
||||
unsigned job_type_param, uint64_t gpu_id);
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 5
|
||||
void GENX(pandecode_rts)(struct pandecode_context *ctx, uint64_t gpu_va,
|
||||
uint32_t render_target_count);
|
||||
|
||||
void GENX(pandecode_zs_crc_ext)(struct pandecode_context *ctx, uint64_t gpu_va);
|
||||
#endif
|
||||
|
||||
#endif /* __MMAP_TRACE_H__ */
|
||||
|
|
|
|||
|
|
@ -423,6 +423,9 @@ pandecode_interpret_cs(struct pandecode_context *ctx, uint64_t queue_gpu_va,
|
|||
case 13:
|
||||
pandecode_interpret_cs_v13(ctx, queue_gpu_va, size, gpu_id, regs);
|
||||
break;
|
||||
case 14:
|
||||
pandecode_interpret_cs_v14(ctx, queue_gpu_va, size, gpu_id, regs);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("Unsupported architecture");
|
||||
}
|
||||
|
|
@ -446,6 +449,9 @@ pandecode_cs_binary(struct pandecode_context *ctx, uint64_t bin_gpu_va,
|
|||
case 13:
|
||||
pandecode_cs_binary_v13(ctx, bin_gpu_va, size);
|
||||
break;
|
||||
case 14:
|
||||
pandecode_cs_binary_v14(ctx, bin_gpu_va, size);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("Unsupported architecture");
|
||||
}
|
||||
|
|
@ -469,6 +475,9 @@ pandecode_cs_trace(struct pandecode_context *ctx, uint64_t trace_gpu_va,
|
|||
case 13:
|
||||
pandecode_cs_trace_v13(ctx, trace_gpu_va, size, gpu_id);
|
||||
break;
|
||||
case 14:
|
||||
pandecode_cs_trace_v14(ctx, trace_gpu_va, size, gpu_id);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("Unsupported architecture");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (C) 2022-2023 Collabora, Ltd.
|
||||
* Copyright (C) 2026 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
|
|
@ -89,6 +90,12 @@ static const char *defer_modes_str[] = {
|
|||
#define defer_mode_str(I) ""
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH <= 13
|
||||
#define assert_no_progress_inc(I) assert(!I.progress_increment)
|
||||
#else
|
||||
#define assert_no_progress_inc(I) do {} while (0)
|
||||
#endif
|
||||
|
||||
static void
|
||||
print_cs_instr(FILE *fp, const uint64_t *instr)
|
||||
{
|
||||
|
|
@ -117,28 +124,27 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
|
|||
|
||||
case MALI_CS_OPCODE_WAIT: {
|
||||
cs_unpack(instr, CS_WAIT, I);
|
||||
fprintf(fp, "WAIT%s #%x", I.progress_increment ? ".progress_inc" : "",
|
||||
I.wait_mask);
|
||||
assert_no_progress_inc(I);
|
||||
fprintf(fp, "WAIT #%x", I.wait_mask);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CS_OPCODE_RUN_COMPUTE: {
|
||||
const char *axes[4] = {"x_axis", "y_axis", "z_axis"};
|
||||
cs_unpack(instr, CS_RUN_COMPUTE, I);
|
||||
assert_no_progress_inc(I);
|
||||
|
||||
/* Print the instruction. Ignore the selects and the flags override
|
||||
* since we'll print them implicitly later.
|
||||
*/
|
||||
#if PAN_ARCH >= 12
|
||||
fprintf(fp, "RUN_COMPUTE%s.%s.srt%d.spd%d.tsd%d.fau%d #%u, #%u",
|
||||
I.progress_increment ? ".progress_inc" : "", axes[I.task_axis],
|
||||
I.srt_select, I.spd_select, I.tsd_select, I.fau_select,
|
||||
I.task_increment, I.ep_limit);
|
||||
fprintf(fp, "RUN_COMPUTE.%s.srt%d.spd%d.tsd%d.fau%d #%u, #%u",
|
||||
axes[I.task_axis], I.srt_select, I.spd_select, I.tsd_select,
|
||||
I.fau_select, I.task_increment, I.ep_limit);
|
||||
#else
|
||||
fprintf(fp, "RUN_COMPUTE%s.%s.srt%d.spd%d.tsd%d.fau%d #%u",
|
||||
I.progress_increment ? ".progress_inc" : "", axes[I.task_axis],
|
||||
I.srt_select, I.spd_select, I.tsd_select, I.fau_select,
|
||||
I.task_increment);
|
||||
fprintf(fp, "RUN_COMPUTE.%s.srt%d.spd%d.tsd%d.fau%d #%u",
|
||||
axes[I.task_axis], I.srt_select, I.spd_select, I.tsd_select,
|
||||
I.fau_select, I.task_increment);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
|
@ -146,8 +152,8 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
|
|||
#if PAN_ARCH == 10
|
||||
case MALI_CS_OPCODE_RUN_TILING: {
|
||||
cs_unpack(instr, CS_RUN_TILING, I);
|
||||
fprintf(fp, "RUN_TILING%s.srt%d.spd%d.tsd%d.fau%d",
|
||||
I.progress_increment ? ".progress_inc" : "", I.srt_select,
|
||||
assert_no_progress_inc(I);
|
||||
fprintf(fp, "RUN_TILING.srt%d.spd%d.tsd%d.fau%d", I.srt_select,
|
||||
I.spd_select, I.tsd_select, I.fau_select);
|
||||
break;
|
||||
}
|
||||
|
|
@ -156,10 +162,10 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
|
|||
#if PAN_ARCH < 12
|
||||
case MALI_CS_OPCODE_RUN_IDVS: {
|
||||
cs_unpack(instr, CS_RUN_IDVS, I);
|
||||
assert_no_progress_inc(I);
|
||||
fprintf(
|
||||
fp,
|
||||
"RUN_IDVS%s%s%s.varying_srt%d.varying_fau%d.varying_tsd%d.frag_srt%d.frag_tsd%d r%u, #%" PRIx64,
|
||||
I.progress_increment ? ".progress_inc" : "",
|
||||
"RUN_IDVS%s%s.varying_srt%d.varying_fau%d.varying_tsd%d.frag_srt%d.frag_tsd%d r%u, #%" PRIx64,
|
||||
I.malloc_enable ? "" : ".no_malloc",
|
||||
I.draw_id_register_enable ? ".draw_id_enable" : "",
|
||||
I.varying_srt_select, I.varying_fau_select, I.varying_tsd_select,
|
||||
|
|
@ -170,6 +176,7 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
|
|||
#else
|
||||
case MALI_CS_OPCODE_RUN_IDVS2: {
|
||||
cs_unpack(instr, CS_RUN_IDVS2, I);
|
||||
assert_no_progress_inc(I);
|
||||
|
||||
const char *vertex_shading_str[] = {
|
||||
".early",
|
||||
|
|
@ -178,8 +185,7 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
|
|||
".INVALID",
|
||||
};
|
||||
|
||||
fprintf(fp, "RUN_IDVS2%s%s%s%s r%u, #%" PRIx64,
|
||||
I.progress_increment ? ".progress_inc" : "",
|
||||
fprintf(fp, "RUN_IDVS2%s%s%s r%u, #%" PRIx64,
|
||||
I.malloc_enable ? "" : ".no_malloc",
|
||||
I.draw_id_register_enable ? ".draw_id_enable" : "",
|
||||
vertex_shading_str[I.vertex_shading_mode], I.draw_id,
|
||||
|
|
@ -317,32 +323,37 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
|
|||
|
||||
case MALI_CS_OPCODE_SHARED_SB_INC: {
|
||||
cs_unpack(instr, CS_SHARED_SB_INC, I);
|
||||
|
||||
const char *progress_increment_name[] = {
|
||||
".no_increment",
|
||||
".increment",
|
||||
};
|
||||
|
||||
fprintf(fp, "SHARED_SB_INC%s%s #%u, #%u",
|
||||
progress_increment_name[I.progress_increment],
|
||||
defer_mode_str(I), I.sb_mask, I.shared_entry);
|
||||
assert_no_progress_inc(I);
|
||||
fprintf(fp, "SHARED_SB_INC%s #%u, #%u", defer_mode_str(I), I.sb_mask,
|
||||
I.shared_entry);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CS_OPCODE_SHARED_SB_DEC: {
|
||||
cs_unpack(instr, CS_SHARED_SB_DEC, I);
|
||||
|
||||
const char *progress_increment_name[] = {
|
||||
".no_increment",
|
||||
".increment",
|
||||
};
|
||||
|
||||
fprintf(fp, "SHARED_SB_DEC%s #%u",
|
||||
progress_increment_name[I.progress_increment], I.shared_entry);
|
||||
assert_no_progress_inc(I);
|
||||
fprintf(fp, "SHARED_SB_DEC #%u", I.shared_entry);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
case MALI_CS_OPCODE_RUN_FRAGMENT2: {
|
||||
static const char *tile_order[] = {
|
||||
"zorder", "horizontal", "vertical", "unknown",
|
||||
"unknown", "rev_horizontal", "rev_vertical", "unknown",
|
||||
"unknown", "unknown", "unknown", "unknown",
|
||||
"unknown", "unknown", "unknown", "unknown",
|
||||
};
|
||||
|
||||
cs_unpack(instr, CS_RUN_FRAGMENT2, I);
|
||||
|
||||
fprintf(fp, "RUN_FRAGMENT2%s.tile_order=%s",
|
||||
I.enable_tem ? ".tile_enable_map_enable" : "",
|
||||
tile_order[I.tile_order]);
|
||||
break;
|
||||
}
|
||||
#else
|
||||
case MALI_CS_OPCODE_RUN_FRAGMENT: {
|
||||
static const char *tile_order[] = {
|
||||
"zorder", "horizontal", "vertical", "unknown",
|
||||
|
|
@ -350,27 +361,27 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
|
|||
"unknown", "unknown", "unknown", "unknown",
|
||||
"unknown", "unknown", "unknown", "unknown",
|
||||
};
|
||||
cs_unpack(instr, CS_RUN_FRAGMENT, I);
|
||||
|
||||
fprintf(fp, "RUN_FRAGMENT%s%s.tile_order=%s",
|
||||
I.progress_increment ? ".progress_inc" : "",
|
||||
cs_unpack(instr, CS_RUN_FRAGMENT, I);
|
||||
assert_no_progress_inc(I);
|
||||
fprintf(fp, "RUN_FRAGMENT%s.tile_order=%s",
|
||||
I.enable_tem ? ".tile_enable_map_enable" : "",
|
||||
tile_order[I.tile_order]);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
case MALI_CS_OPCODE_RUN_FULLSCREEN: {
|
||||
cs_unpack(instr, CS_RUN_FULLSCREEN, I);
|
||||
fprintf(fp, "RUN_FULLSCREEN%s r%u, #%" PRIx64,
|
||||
I.progress_increment ? ".progress_inc" : "", I.dcd,
|
||||
I.flags_override);
|
||||
assert_no_progress_inc(I);
|
||||
fprintf(fp, "RUN_FULLSCREEN r%u, #%" PRIx64, I.dcd, I.flags_override);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CS_OPCODE_FINISH_TILING: {
|
||||
cs_unpack(instr, CS_FINISH_TILING, I);
|
||||
fprintf(fp, "FINISH_TILING%s",
|
||||
I.progress_increment ? ".progress_inc" : "");
|
||||
assert_no_progress_inc(I);
|
||||
fprintf(fp, "FINISH_TILING");
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -443,12 +454,6 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
|
|||
break;
|
||||
}
|
||||
|
||||
case MALI_CS_OPCODE_PROGRESS_WAIT: {
|
||||
cs_unpack(instr, CS_PROGRESS_WAIT, I);
|
||||
fprintf(fp, "PROGRESS_WAIT d%u, #%u", I.source, I.queue);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CS_OPCODE_SET_EXCEPTION_HANDLER: {
|
||||
cs_unpack(instr, CS_SET_EXCEPTION_HANDLER, I);
|
||||
fprintf(fp, "SET_EXCEPTION_HANDLER d%u, r%u", I.address, I.length);
|
||||
|
|
@ -547,29 +552,17 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
|
|||
break;
|
||||
}
|
||||
|
||||
case MALI_CS_OPCODE_PROGRESS_STORE: {
|
||||
cs_unpack(instr, CS_PROGRESS_STORE, I);
|
||||
fprintf(fp, "PROGRESS_STORE d%u", I.source);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CS_OPCODE_PROGRESS_LOAD: {
|
||||
cs_unpack(instr, CS_PROGRESS_LOAD, I);
|
||||
fprintf(fp, "PROGRESS_LOAD d%u", I.destination);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT: {
|
||||
cs_unpack(instr, CS_RUN_COMPUTE_INDIRECT, I);
|
||||
assert_no_progress_inc(I);
|
||||
#if PAN_ARCH >= 12
|
||||
fprintf(fp, "RUN_COMPUTE_INDIRECT%s.srt%d.spd%d.tsd%d.fau%d #%u, #%u",
|
||||
I.progress_increment ? ".progress_inc" : "", I.srt_select,
|
||||
I.spd_select, I.tsd_select, I.fau_select, I.workgroups_per_task,
|
||||
I.ep_limit);
|
||||
fprintf(fp, "RUN_COMPUTE_INDIRECT.srt%d.spd%d.tsd%d.fau%d #%u, #%u",
|
||||
I.srt_select, I.spd_select, I.tsd_select, I.fau_select,
|
||||
I.workgroups_per_task, I.ep_limit);
|
||||
#else
|
||||
fprintf(fp, "RUN_COMPUTE_INDIRECT%s.srt%d.spd%d.tsd%d.fau%d #%u",
|
||||
I.progress_increment ? ".progress_inc" : "", I.srt_select,
|
||||
I.spd_select, I.tsd_select, I.fau_select, I.workgroups_per_task);
|
||||
fprintf(fp, "RUN_COMPUTE_INDIRECT.srt%d.spd%d.tsd%d.fau%d #%u",
|
||||
I.srt_select, I.spd_select, I.tsd_select, I.fau_select,
|
||||
I.workgroups_per_task);
|
||||
#endif
|
||||
|
||||
break;
|
||||
|
|
@ -1097,6 +1090,99 @@ pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp,
|
|||
}
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
static void
|
||||
pandecode_run_fragment2(struct pandecode_context *ctx, FILE *fp,
|
||||
struct queue_ctx *qctx, struct MALI_CS_RUN_FRAGMENT2 *I)
|
||||
{
|
||||
if (qctx->in_exception_handler)
|
||||
return;
|
||||
|
||||
ctx->indent++;
|
||||
|
||||
pandecode_log(ctx, "Iter trace ID0: %" PRIu32 "\n",
|
||||
cs_get_u32(qctx, MALI_FRAGMENT_SR_ITER_TRACE_ID0));
|
||||
pandecode_log(ctx, "Iter trace ID1: %" PRIu32 "\n",
|
||||
cs_get_u32(qctx, MALI_FRAGMENT_SR_ITER_TRACE_ID1));
|
||||
pandecode_log(ctx, "TEM pointer: %" PRIx64 "\n",
|
||||
cs_get_u64(qctx, MALI_FRAGMENT_SR_TEM_POINTER));
|
||||
pandecode_log(ctx, "TEM row stride: %" PRIu32 "\n",
|
||||
cs_get_u32(qctx, MALI_FRAGMENT_SR_TEM_ROW_STRIDE));
|
||||
|
||||
for (unsigned i = 0; i < 11; ++i) {
|
||||
const unsigned reg = MALI_FRAGMENT_SR_IRD_BUFFER_POINTER_0 + (i * 2);
|
||||
pandecode_log(ctx, "IRD buffer pointer %u: %" PRIx64 "\n", i,
|
||||
cs_get_u64(qctx, reg));
|
||||
}
|
||||
|
||||
DUMP_CL(ctx, FRAGMENT_FLAGS_3, &qctx->regs[MALI_FRAGMENT_SR_FLAGS_3],
|
||||
"Flags 3:\n");
|
||||
DUMP_CL(ctx, FRAGMENT_BOUNDING_BOX, &qctx->regs[MALI_FRAGMENT_SR_BBOX_MIN],
|
||||
"Bounding Box:\n");
|
||||
DUMP_CL(ctx, FRAME_SIZE, &qctx->regs[MALI_FRAGMENT_SR_FRAME_SIZE],
|
||||
"Frame size:\n");
|
||||
|
||||
pan_unpack((const struct mali_fragment_flags_0_packed *)&qctx
|
||||
->regs[MALI_FRAGMENT_SR_FLAGS_0],
|
||||
FRAGMENT_FLAGS_0, flags0_unpacked);
|
||||
DUMP_UNPACKED(ctx, FRAGMENT_FLAGS_0, flags0_unpacked, "Flags 0:\n");
|
||||
|
||||
pan_unpack((const struct mali_fragment_flags_1_packed *)&qctx
|
||||
->regs[MALI_FRAGMENT_SR_FLAGS_1],
|
||||
FRAGMENT_FLAGS_1, flags1_unpacked);
|
||||
DUMP_UNPACKED(ctx, FRAGMENT_FLAGS_1, flags1_unpacked, "Flags 1:\n");
|
||||
|
||||
DUMP_CL(ctx, FRAGMENT_FLAGS_2, &qctx->regs[MALI_FRAGMENT_SR_FLAGS_2],
|
||||
"Flags 2:\n");
|
||||
pandecode_log(ctx, "Z clear: %f\n",
|
||||
uif(cs_get_u32(qctx, MALI_FRAGMENT_SR_Z_CLEAR)));
|
||||
|
||||
const uint64_t tiler_pointer =
|
||||
cs_get_u64(qctx, MALI_FRAGMENT_SR_TILER_DESCRIPTOR_POINTER);
|
||||
pandecode_log(ctx, "Tiler descriptor pointer: 0x%" PRIx64 "\n",
|
||||
tiler_pointer);
|
||||
|
||||
const uint64_t rtd_pointer = cs_get_u64(qctx, MALI_FRAGMENT_SR_RTD_POINTER);
|
||||
pandecode_log(ctx, "RTD pointer: 0x%" PRIx64 "\n", rtd_pointer);
|
||||
|
||||
const uint64_t dbd_pointer = cs_get_u64(qctx, MALI_FRAGMENT_SR_DBD_POINTER);
|
||||
pandecode_log(ctx, "DBD pointer: 0x%" PRIx64 "\n", dbd_pointer);
|
||||
|
||||
pandecode_log(ctx, "Frame argument: %" PRIx64 "\n",
|
||||
cs_get_u64(qctx, MALI_FRAGMENT_SR_FRAME_ARG));
|
||||
|
||||
const uint64_t sample_locations =
|
||||
cs_get_u64(qctx, MALI_FRAGMENT_SR_SAMPLE_POSITION_ARRAY_POINTER);
|
||||
pandecode_log(ctx, "Sample locations: 0x%" PRIx64 "\n", sample_locations);
|
||||
|
||||
const uint64_t dcd_pointer =
|
||||
cs_get_u64(qctx, MALI_FRAGMENT_SR_FRAME_SHADER_DCD_POINTER);
|
||||
pandecode_log(ctx, "Frame shader DCD pointer: 0x%" PRIx64 "\n", dcd_pointer);
|
||||
|
||||
DUMP_CL(ctx, VRS_IMAGE, &qctx->regs[MALI_FRAGMENT_SR_VRS_IMAGE],
|
||||
"VRS image:\n");
|
||||
|
||||
GENX(pandecode_sample_locations)(ctx, sample_locations);
|
||||
|
||||
const unsigned job_type_param = 0;
|
||||
GENX(pandecode_frame_shader_dcds)(ctx, dcd_pointer,
|
||||
flags0_unpacked.pre_frame_0,
|
||||
flags0_unpacked.pre_frame_1,
|
||||
flags0_unpacked.post_frame,
|
||||
job_type_param, qctx->gpu_id);
|
||||
|
||||
if (tiler_pointer)
|
||||
GENX(pandecode_tiler)(ctx, tiler_pointer);
|
||||
|
||||
if (dbd_pointer)
|
||||
GENX(pandecode_zs_crc_ext)(ctx, dbd_pointer);
|
||||
|
||||
if (rtd_pointer)
|
||||
GENX(pandecode_rts)(ctx, rtd_pointer, flags1_unpacked.render_target_count);
|
||||
|
||||
ctx->indent--;
|
||||
}
|
||||
#else
|
||||
static void
|
||||
pandecode_run_fragment(struct pandecode_context *ctx, FILE *fp,
|
||||
struct queue_ctx *qctx, struct MALI_CS_RUN_FRAGMENT *I)
|
||||
|
|
@ -1115,6 +1201,7 @@ pandecode_run_fragment(struct pandecode_context *ctx, FILE *fp,
|
|||
|
||||
ctx->indent--;
|
||||
}
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
|
||||
static void
|
||||
pandecode_run_fullscreen(struct pandecode_context *ctx, FILE *fp,
|
||||
|
|
@ -1261,11 +1348,19 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx)
|
|||
}
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
case MALI_CS_OPCODE_RUN_FRAGMENT2: {
|
||||
cs_unpack(bytes, CS_RUN_FRAGMENT2, I);
|
||||
pandecode_run_fragment2(ctx, fp, qctx, &I);
|
||||
break;
|
||||
}
|
||||
#else
|
||||
case MALI_CS_OPCODE_RUN_FRAGMENT: {
|
||||
cs_unpack(bytes, CS_RUN_FRAGMENT, I);
|
||||
pandecode_run_fragment(ctx, fp, qctx, &I);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
case MALI_CS_OPCODE_RUN_FULLSCREEN: {
|
||||
cs_unpack(bytes, CS_RUN_FULLSCREEN, I);
|
||||
|
|
@ -2192,18 +2287,6 @@ collect_indirect_branch_targets_recurse(struct cs_code_cfg *cfg,
|
|||
break;
|
||||
}
|
||||
|
||||
case MALI_CS_OPCODE_PROGRESS_LOAD: {
|
||||
cs_unpack(instr, CS_PROGRESS_LOAD, I);
|
||||
for (unsigned i = 0; i < 16; i++) {
|
||||
if (BITSET_TEST(track_map, I.destination) ||
|
||||
BITSET_TEST(track_map, I.destination + 1)) {
|
||||
ibranch->has_unknown_targets = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -2430,7 +2513,12 @@ print_cs_binary(struct pandecode_context *ctx, uint64_t bin,
|
|||
#else
|
||||
case MALI_CS_OPCODE_RUN_IDVS:
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
case MALI_CS_OPCODE_RUN_FRAGMENT2:
|
||||
#else
|
||||
case MALI_CS_OPCODE_RUN_FRAGMENT:
|
||||
#endif
|
||||
case MALI_CS_OPCODE_RUN_FULLSCREEN:
|
||||
case MALI_CS_OPCODE_RUN_COMPUTE:
|
||||
case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT:
|
||||
|
|
@ -2539,6 +2627,19 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace,
|
|||
}
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
case MALI_CS_OPCODE_RUN_FRAGMENT2: {
|
||||
struct cs_run_fragment2_trace *frag_trace = trace_data;
|
||||
|
||||
assert(trace_size >= sizeof(*frag_trace));
|
||||
cs_unpack(instr, CS_RUN_FRAGMENT2, I);
|
||||
memcpy(®s[0], frag_trace->sr, sizeof(frag_trace->sr));
|
||||
pandecode_run_fragment2(ctx, ctx->dump_stream, &qctx, &I);
|
||||
trace_data = frag_trace + 1;
|
||||
trace_size -= sizeof(*frag_trace);
|
||||
break;
|
||||
}
|
||||
#else
|
||||
case MALI_CS_OPCODE_RUN_FRAGMENT: {
|
||||
struct cs_run_fragment_trace *frag_trace = trace_data;
|
||||
|
||||
|
|
@ -2550,6 +2651,7 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace,
|
|||
trace_size -= sizeof(*frag_trace);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
case MALI_CS_OPCODE_RUN_FULLSCREEN: {
|
||||
struct cs_run_fullscreen_trace *fs_trace = trace_data;
|
||||
|
|
|
|||
|
|
@ -61,6 +61,9 @@
|
|||
#elif (PAN_ARCH == 13)
|
||||
#define GENX(X) X##_v13
|
||||
#include "genxml/v13_pack.h"
|
||||
#elif (PAN_ARCH == 14)
|
||||
#define GENX(X) X##_v14
|
||||
#include "genxml/v14_pack.h"
|
||||
#else
|
||||
#error "Need to add suffixing macro for this architecture"
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
|
||||
pan_packers = []
|
||||
foreach packer : ['common', 'v4', 'v5', 'v6', 'v7', 'v9', 'v10', 'v12', 'v13']
|
||||
foreach packer : ['common', 'v4', 'v5', 'v6', 'v7', 'v9', 'v10', 'v12', 'v13', 'v14']
|
||||
pan_packers += custom_target(
|
||||
packer + '_pack.h',
|
||||
input : ['gen_pack.py', packer + '.xml'],
|
||||
|
|
@ -20,7 +20,7 @@ idep_pan_packers = declare_dependency(
|
|||
|
||||
libpanfrost_decode_per_arch = []
|
||||
|
||||
foreach ver : ['4', '5', '6', '7', '9', '10', '12', '13']
|
||||
foreach ver : ['4', '5', '6', '7', '9', '10', '12', '13', '14']
|
||||
libpanfrost_decode_per_arch += static_library(
|
||||
'pandecode-arch-v' + ver,
|
||||
['decode.c', 'decode_jm.c', 'decode_csf.c', pan_packers],
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
<!--
|
||||
Copyright (C) 2020 Collabora Ltd.
|
||||
Copyright (C) 2026 Arm Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
|
||||
|
|
@ -84,6 +85,7 @@
|
|||
<enum name="Address Mode">
|
||||
<value name="Flat" value="0"/>
|
||||
<value name="Packed" value="1"/>
|
||||
<value name="Out of bounds" value="8"/>
|
||||
</enum>
|
||||
|
||||
<enum name="Format">
|
||||
|
|
@ -132,6 +134,7 @@
|
|||
<value name="A2 YUV10" value="41"/>
|
||||
<value name="YUYAAYVYAA" value="42"/>
|
||||
<!--- TODO: revisit YUV -->
|
||||
<value name="Y10U10V10_420" value="43"/>
|
||||
<value name="YUYV10" value="44"/>
|
||||
<value name="VYUY10" value="45"/>
|
||||
<value name="Y10 UV10 422" value="46"/>
|
||||
|
|
@ -1163,6 +1166,13 @@
|
|||
<enum name="Clump Ordering">
|
||||
<value name="Tiled U-Interleaved" value="1"/>
|
||||
<value name="Linear" value="2"/>
|
||||
|
||||
<!-- Block-linear interleaved clump orderings are not available on
|
||||
all v10 architectures. -->
|
||||
<value name="Block-linear interleaved 16x16" value="3"/>
|
||||
<value name="Block-linear interleaved 8x16" value="4"/>
|
||||
<value name="Block-linear interleaved 8x8" value="5"/>
|
||||
|
||||
<value name="Interleaved 64k" value="8"/>
|
||||
</enum>
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
<!--
|
||||
Copyright (C) 2025 Collabora Ltd.
|
||||
Copyright (C) 2026 Arm Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
|
||||
|
|
@ -84,6 +85,7 @@
|
|||
<enum name="Address Mode">
|
||||
<value name="Flat" value="0"/>
|
||||
<value name="Packed" value="1"/>
|
||||
<value name="Out of bounds" value="8"/>
|
||||
</enum>
|
||||
|
||||
<enum name="Format">
|
||||
|
|
@ -132,6 +134,7 @@
|
|||
<value name="A2 YUV10" value="41"/>
|
||||
<value name="YUYAAYVYAA" value="42"/>
|
||||
<!--- TODO: revisit YUV -->
|
||||
<value name="Y10U10V10_420" value="43"/>
|
||||
<value name="YUYV10" value="44"/>
|
||||
<value name="VYUY10" value="45"/>
|
||||
<value name="Y10 UV10 422" value="46"/>
|
||||
|
|
@ -1426,6 +1429,9 @@
|
|||
<enum name="Clump Ordering">
|
||||
<value name="Tiled U-Interleaved" value="1"/>
|
||||
<value name="Linear" value="2"/>
|
||||
<value name="Block-linear interleaved 16x16" value="3"/>
|
||||
<value name="Block-linear interleaved 8x16" value="4"/>
|
||||
<value name="Block-linear interleaved 8x8" value="5"/>
|
||||
<value name="Interleaved 64k" value="8"/>
|
||||
</enum>
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
<!--
|
||||
Copyright (C) 2025 Collabora Ltd.
|
||||
Copyright (C) 2026 Arm Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
|
||||
|
|
@ -84,6 +85,7 @@
|
|||
<enum name="Address Mode">
|
||||
<value name="Flat" value="0"/>
|
||||
<value name="Packed" value="1"/>
|
||||
<value name="Out of bounds" value="8"/>
|
||||
</enum>
|
||||
|
||||
<enum name="Format">
|
||||
|
|
@ -132,6 +134,7 @@
|
|||
<value name="A2 YUV10" value="41"/>
|
||||
<value name="YUYAAYVYAA" value="42"/>
|
||||
<!--- TODO: revisit YUV -->
|
||||
<value name="Y10U10V10_420" value="43"/>
|
||||
<value name="YUYV10" value="44"/>
|
||||
<value name="VYUY10" value="45"/>
|
||||
<value name="Y10 UV10 422" value="46"/>
|
||||
|
|
@ -1728,6 +1731,9 @@
|
|||
<enum name="Clump Ordering">
|
||||
<value name="Tiled U-Interleaved" value="1"/>
|
||||
<value name="Linear" value="2"/>
|
||||
<value name="Block-linear interleaved 16x16" value="3"/>
|
||||
<value name="Block-linear interleaved 8x16" value="4"/>
|
||||
<value name="Block-linear interleaved 8x8" value="5"/>
|
||||
<value name="Interleaved 64k" value="8"/>
|
||||
</enum>
|
||||
|
||||
|
|
|
|||
2755
src/panfrost/genxml/v14.xml
Normal file
2755
src/panfrost/genxml/v14.xml
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -1,5 +1,6 @@
|
|||
<!--
|
||||
Copyright (C) 2020 Collabora Ltd.
|
||||
Copyright (C) 2026 Arm Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
|
||||
|
|
@ -103,6 +104,7 @@
|
|||
<enum name="Address Mode">
|
||||
<value name="Flat" value="0"/>
|
||||
<value name="Packed" value="1"/>
|
||||
<value name="Out of bounds" value="8"/>
|
||||
</enum>
|
||||
|
||||
<enum name="Format">
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
subdir('kmod')
|
||||
|
||||
pixel_format_versions = ['5', '6', '7', '9', '10', '12', '13']
|
||||
pixel_format_versions = ['5', '6', '7', '9', '10', '12', '13', '14']
|
||||
libpanfrost_pixel_format = []
|
||||
|
||||
deps_for_libpanfrost = [dep_libdrm, idep_pan_packers, idep_mesautil, libpanfrost_model_dep]
|
||||
|
|
@ -22,7 +22,7 @@ endforeach
|
|||
|
||||
libpanfrost_per_arch = []
|
||||
|
||||
foreach ver : ['4', '5', '6', '7', '9', '10', '12', '13']
|
||||
foreach ver : ['4', '5', '6', '7', '9', '10', '12', '13', '14']
|
||||
libpanfrost_per_arch += static_library(
|
||||
'pan-arch-v' + ver,
|
||||
[
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
* Copyright (C) 2014 Broadcom
|
||||
* Copyright (C) 2018-2019 Alyssa Rosenzweig
|
||||
* Copyright (C) 2019-2020 Collabora, Ltd.
|
||||
* Copyright (C) 2026 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
|
|
@ -711,6 +712,32 @@ pan_afbc_compression_mode(enum pan_afbc_mode mode)
|
|||
case PAN_AFBC_MODE_R16G16B16A16:
|
||||
return MALI_AFBC_COMPRESSION_MODE_R16G16B16A16;
|
||||
#endif
|
||||
#if PAN_ARCH >= 14
|
||||
case PAN_AFBC_MODE_YUV420_6C8:
|
||||
return MALI_AFBC_COMPRESSION_MODE_Y8U8V8_420;
|
||||
case PAN_AFBC_MODE_YUV420_2C8:
|
||||
return MALI_AFBC_COMPRESSION_MODE_R8G8;
|
||||
case PAN_AFBC_MODE_YUV420_1C8:
|
||||
return MALI_AFBC_COMPRESSION_MODE_R8;
|
||||
case PAN_AFBC_MODE_YUV420_6C10:
|
||||
return MALI_AFBC_COMPRESSION_MODE_Y10U10V10_420;
|
||||
case PAN_AFBC_MODE_YUV420_2C10:
|
||||
return MALI_AFBC_COMPRESSION_MODE_R10G10;
|
||||
case PAN_AFBC_MODE_YUV420_1C10:
|
||||
return MALI_AFBC_COMPRESSION_MODE_R10;
|
||||
case PAN_AFBC_MODE_YUV422_4C8:
|
||||
return MALI_AFBC_COMPRESSION_MODE_Y8U8Y8V8_422;
|
||||
case PAN_AFBC_MODE_YUV422_2C8:
|
||||
return MALI_AFBC_COMPRESSION_MODE_R8G8;
|
||||
case PAN_AFBC_MODE_YUV422_1C8:
|
||||
return MALI_AFBC_COMPRESSION_MODE_R8;
|
||||
case PAN_AFBC_MODE_YUV422_4C10:
|
||||
return MALI_AFBC_COMPRESSION_MODE_Y10U10Y10V10_422;
|
||||
case PAN_AFBC_MODE_YUV422_2C10:
|
||||
return MALI_AFBC_COMPRESSION_MODE_R10G10;
|
||||
case PAN_AFBC_MODE_YUV422_1C10:
|
||||
return MALI_AFBC_COMPRESSION_MODE_R10;
|
||||
#else
|
||||
case PAN_AFBC_MODE_YUV420_6C8:
|
||||
return MALI_AFBC_COMPRESSION_MODE_YUV420_6C8;
|
||||
case PAN_AFBC_MODE_YUV420_2C8:
|
||||
|
|
@ -735,6 +762,7 @@ pan_afbc_compression_mode(enum pan_afbc_mode mode)
|
|||
return MALI_AFBC_COMPRESSION_MODE_YUV422_2C10;
|
||||
case PAN_AFBC_MODE_YUV422_1C10:
|
||||
return MALI_AFBC_COMPRESSION_MODE_YUV422_1C10;
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
#if PAN_ARCH == 9
|
||||
case PAN_AFBC_MODE_R16:
|
||||
case PAN_AFBC_MODE_R16G16:
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (C) 2023 Collabora, Ltd.
|
||||
* Copyright (C) 2026 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
|
|
@ -347,6 +348,25 @@ pan_afrc_format(struct pan_afrc_format_info info, uint64_t modifier,
|
|||
return (scan ? MALI_AFRC_FORMAT_R10G10B10A10_SCAN
|
||||
: MALI_AFRC_FORMAT_R10G10B10A10_ROT);
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
case PAN_AFRC_ICHANGE_FORMAT_YUV444:
|
||||
case PAN_AFRC_ICHANGE_FORMAT_YUV422:
|
||||
case PAN_AFRC_ICHANGE_FORMAT_YUV420:
|
||||
if (info.bpc == 8) {
|
||||
if (plane == 0 || info.num_planes == 3)
|
||||
return (scan ? MALI_AFRC_FORMAT_R8_SCAN : MALI_AFRC_FORMAT_R8_ROT);
|
||||
|
||||
return (scan ? MALI_AFRC_FORMAT_R8G8_SCAN : MALI_AFRC_FORMAT_R8G8_ROT);
|
||||
}
|
||||
|
||||
if (plane == 0 || info.num_planes == 3)
|
||||
return (scan ? MALI_AFRC_FORMAT_R10_SCAN : MALI_AFRC_FORMAT_R10_ROT);
|
||||
|
||||
assert(info.ichange_fmt == PAN_AFRC_ICHANGE_FORMAT_YUV422 ||
|
||||
info.ichange_fmt == PAN_AFRC_ICHANGE_FORMAT_YUV420);
|
||||
return (scan ? MALI_AFRC_FORMAT_R10G10_SCAN
|
||||
: MALI_AFRC_FORMAT_R10G10_ROT);
|
||||
#else
|
||||
case PAN_AFRC_ICHANGE_FORMAT_YUV444:
|
||||
if (info.bpc == 8) {
|
||||
if (plane == 0 || info.num_planes == 3)
|
||||
|
|
@ -394,6 +414,7 @@ pan_afrc_format(struct pan_afrc_format_info info, uint64_t modifier,
|
|||
|
||||
return (scan ? MALI_AFRC_FORMAT_R10G10_420_SCAN
|
||||
: MALI_AFRC_FORMAT_R10G10_420_ROT);
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
|
||||
default:
|
||||
return MALI_AFRC_FORMAT_INVALID;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Collabora, Ltd.
|
||||
* Copyright (C) 2026 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
|
|
@ -11,6 +12,7 @@
|
|||
#include "pan_afrc.h"
|
||||
#include "pan_desc.h"
|
||||
#include "pan_encoder.h"
|
||||
#include "pan_fb.h"
|
||||
#include "pan_props.h"
|
||||
#include "pan_texture.h"
|
||||
#include "pan_trace.h"
|
||||
|
|
@ -1172,11 +1174,156 @@ check_fb_attachments(const struct pan_fb_info *fb)
|
|||
#endif
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
unsigned
|
||||
GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
|
||||
const struct pan_tls_info *tls,
|
||||
const struct pan_tiler_context *tiler_ctx, void *out)
|
||||
const struct pan_tiler_context *tiler_ctx,
|
||||
const struct pan_ptr framebuffer)
|
||||
{
|
||||
void *out = framebuffer.cpu;
|
||||
|
||||
PAN_TRACE_FUNC(PAN_TRACE_LIB_DESC);
|
||||
|
||||
check_fb_attachments(fb);
|
||||
|
||||
const int crc_rt = GENX(pan_select_crc_rt)(fb, fb->tile_size);
|
||||
const bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0);
|
||||
const struct pan_clean_tile clean_tile = pan_get_clean_tile_info(fb);
|
||||
|
||||
/* Emit to memory the state that might change per-layer. The static
|
||||
* state is emitted directly to CSF registers by
|
||||
* cs_emit_static_fragment_state().
|
||||
*/
|
||||
|
||||
struct pan_fbd_layer fbd_data = {0};
|
||||
fbd_data.tiler = tiler_ctx->valhall.desc;
|
||||
|
||||
/* internal_layer_index in flags0 is used to select the right
|
||||
* primitive list in the tiler context, and frame_arg is the value
|
||||
* that's passed to the fragment shader through r62-r63, which we use
|
||||
* to pass gl_Layer. Since the layer_idx only takes 8-bits, we might
|
||||
* use the extra 56-bits we have in frame_argument to pass other
|
||||
* information to the fragment shader at some point.
|
||||
*/
|
||||
assert(layer_idx >= tiler_ctx->valhall.layer_offset);
|
||||
fbd_data.frame_argument = layer_idx;
|
||||
|
||||
pan_pack(&fbd_data.flags0, FRAGMENT_FLAGS_0, cfg) {
|
||||
cfg.pre_frame_0 =
|
||||
pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0],
|
||||
pan_clean_tile_write_any_set(clean_tile));
|
||||
cfg.pre_frame_1 =
|
||||
pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1],
|
||||
pan_clean_tile_write_any_set(clean_tile));
|
||||
cfg.post_frame = fb->bifrost.pre_post.modes[2];
|
||||
|
||||
const unsigned zs_bytes_per_pixel = pan_zsbuf_bytes_per_pixel(fb);
|
||||
/* We can interleave HSR if we have space for two ZS tiles in
|
||||
* the tile buffer. */
|
||||
const unsigned max_zs_tile_size_interleave =
|
||||
fb->z_tile_buf_budget >> util_logbase2_ceil(zs_bytes_per_pixel);
|
||||
const bool hsr_can_interleave =
|
||||
fb->tile_size <= max_zs_tile_size_interleave;
|
||||
|
||||
/* Enabling prepass without interleave is generally not good for
|
||||
* performance, so disable HSR in that case. */
|
||||
cfg.hsr_prepass_enable = fb->allow_hsr_prepass && hsr_can_interleave;
|
||||
cfg.hsr_prepass_interleaving_enable = hsr_can_interleave;
|
||||
cfg.hsr_prepass_filter_enable = true;
|
||||
cfg.hsr_hierarchical_optimizations_enable = true;
|
||||
|
||||
cfg.internal_layer_index = layer_idx - tiler_ctx->valhall.layer_offset;
|
||||
}
|
||||
|
||||
fbd_data.dcd_pointer = fb->bifrost.pre_post.dcds.gpu;
|
||||
|
||||
pan_pack(&fbd_data.flags2, FRAGMENT_FLAGS_2, cfg) {
|
||||
cfg.s_clear = fb->zs.clear_value.stencil;
|
||||
cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
|
||||
|
||||
/* Default to 24 bit depth if there's no surface. */
|
||||
cfg.z_internal_format =
|
||||
fb->zs.view.zs ? pan_get_z_internal_format(fb->zs.view.zs->format)
|
||||
: MALI_Z_INTERNAL_FORMAT_D24;
|
||||
cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z);
|
||||
|
||||
if (crc_rt >= 0) {
|
||||
bool *valid = fb->rts[crc_rt].crc_valid;
|
||||
bool full = !fb->draw_extent.minx && !fb->draw_extent.miny &&
|
||||
fb->draw_extent.maxx == (fb->width - 1) &&
|
||||
fb->draw_extent.maxy == (fb->height - 1);
|
||||
|
||||
/* If the CRC was valid it stays valid, if it wasn't, we must
|
||||
* ensure the render operation covers the full frame, and
|
||||
* clean tiles are pushed to memory. */
|
||||
bool new_valid = *valid | (full && pan_clean_tile_write_rt_enabled(
|
||||
clean_tile, crc_rt));
|
||||
|
||||
cfg.crc_read_enable = *valid;
|
||||
|
||||
/* If the data is currently invalid, still write CRC
|
||||
* data if we are doing a full write, so that it is
|
||||
* valid for next time. */
|
||||
cfg.crc_write_enable = new_valid;
|
||||
|
||||
*valid = new_valid;
|
||||
}
|
||||
}
|
||||
|
||||
fbd_data.z_clear = util_bitpack_float(fb->zs.clear_value.depth);
|
||||
|
||||
{
|
||||
/* Set the DBD and RTD pointers. Both must be 64-bytes aligned. */
|
||||
uint64_t out_gpu_addr =
|
||||
framebuffer.gpu + ALIGN_POT(sizeof(struct pan_fbd_layer), 64);
|
||||
|
||||
if (has_zs_crc_ext) {
|
||||
fbd_data.dbd_pointer = out_gpu_addr;
|
||||
assert(fbd_data.dbd_pointer % 64 == 0);
|
||||
out_gpu_addr += pan_size(ZS_CRC_EXTENSION);
|
||||
}
|
||||
|
||||
fbd_data.rtd_pointer = out_gpu_addr;
|
||||
assert(fbd_data.rtd_pointer % 64 == 0);
|
||||
}
|
||||
|
||||
memcpy(out, &fbd_data, sizeof(fbd_data));
|
||||
out += ALIGN_POT(sizeof(fbd_data), 64);
|
||||
|
||||
if (has_zs_crc_ext) {
|
||||
struct mali_zs_crc_extension_packed *zs_crc_ext = out;
|
||||
pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, zs_crc_ext, clean_tile);
|
||||
out += pan_size(ZS_CRC_EXTENSION);
|
||||
}
|
||||
|
||||
const unsigned rt_count = MAX2(fb->rt_count, 1);
|
||||
unsigned cbuf_offset = 0;
|
||||
for (unsigned i = 0; i < rt_count; i++) {
|
||||
pan_emit_rt(fb, layer_idx, i, cbuf_offset, out, clean_tile);
|
||||
out += pan_size(RENDER_TARGET);
|
||||
if (!fb->rts[i].view)
|
||||
continue;
|
||||
|
||||
cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
|
||||
fb->tile_size *
|
||||
pan_image_view_get_nr_samples(fb->rts[i].view);
|
||||
|
||||
if (i != crc_rt && fb->rts[i].crc_valid != NULL)
|
||||
*(fb->rts[i].crc_valid) = false;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
unsigned
|
||||
GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
|
||||
const struct pan_tls_info *tls,
|
||||
const struct pan_tiler_context *tiler_ctx,
|
||||
const struct pan_ptr framebuffer)
|
||||
{
|
||||
void *out = framebuffer.cpu;
|
||||
|
||||
PAN_TRACE_FUNC(PAN_TRACE_LIB_DESC);
|
||||
|
||||
check_fb_attachments(fb);
|
||||
|
|
@ -1351,6 +1498,7 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
|
|||
}
|
||||
return tag.opaque[0];
|
||||
}
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
#else /* PAN_ARCH == 4 */
|
||||
static enum mali_color_format
|
||||
pan_sfbd_raw_format(unsigned bits)
|
||||
|
|
@ -1378,8 +1526,11 @@ GENX(pan_select_tile_size)(struct pan_fb_info *fb)
|
|||
unsigned
|
||||
GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
|
||||
const struct pan_tls_info *tls,
|
||||
const struct pan_tiler_context *tiler_ctx, void *fbd)
|
||||
const struct pan_tiler_context *tiler_ctx,
|
||||
const struct pan_ptr framebuffer)
|
||||
{
|
||||
void *fbd = framebuffer.cpu;
|
||||
|
||||
PAN_TRACE_FUNC(PAN_TRACE_LIB_DESC);
|
||||
|
||||
assert(fb->rt_count <= 1);
|
||||
|
|
|
|||
|
|
@ -341,7 +341,7 @@ void GENX(pan_emit_afrc_color_attachment)(const struct pan_attachment_info *att,
|
|||
unsigned GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
|
||||
const struct pan_tls_info *tls,
|
||||
const struct pan_tiler_context *tiler_ctx,
|
||||
void *out);
|
||||
const struct pan_ptr framebuffer);
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
unsigned GENX(pan_select_tiler_hierarchy_mask)(uint32_t width, uint32_t height,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (C) 2026 Collabora, Ltd.
|
||||
* Copyright (C) 2026 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "pan_fb.h"
|
||||
|
|
@ -669,9 +670,124 @@ pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,
|
|||
}
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
uint32_t
|
||||
GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info, void *out)
|
||||
GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info,
|
||||
const struct pan_ptr framebuffer)
|
||||
{
|
||||
/* Emit the dynamic framebuffer state. That is, state that may change per-layer. */
|
||||
|
||||
void *out = framebuffer.cpu;
|
||||
const struct pan_fb_layout *fb = info->fb;
|
||||
const struct pan_fb_load *load = info->load;
|
||||
const struct pan_fb_store *store = info->store;
|
||||
const struct pan_fb_clean_tile ct = pan_fb_get_clean_tile(info);
|
||||
const bool has_zs_crc_ext = pan_fb_has_zs(fb);
|
||||
|
||||
struct pan_fbd_layer fbd_data = {0};
|
||||
fbd_data.tiler = info->tiler_ctx->valhall.desc;
|
||||
|
||||
/* layer_index in flags0 is used to select the right primitive list in
|
||||
* the tiler context, and frame_arg is the value that's passed to the
|
||||
* fragment shader through r62-r63, which we use to pass gl_Layer. Since
|
||||
* the layer_idx only takes 8-bits, we might use the extra 56-bits we
|
||||
* have in frame_argument to pass other information to the fragment
|
||||
* shader at some point.
|
||||
*/
|
||||
assert(info->layer >= info->tiler_ctx->valhall.layer_offset);
|
||||
fbd_data.frame_argument = info->layer;
|
||||
|
||||
pan_pack(&fbd_data.flags0, FRAGMENT_FLAGS_0, cfg) {
|
||||
cfg.pre_frame_0 = pan_fix_frame_shader_mode(info->frame_shaders.modes[0],
|
||||
ct.rts || ct.zs || ct.s);
|
||||
cfg.pre_frame_1 = pan_fix_frame_shader_mode(info->frame_shaders.modes[1],
|
||||
ct.rts || ct.zs || ct.s);
|
||||
cfg.post_frame = info->frame_shaders.modes[2];
|
||||
|
||||
/* Enabling prepass without pipelineing is generally not good for
|
||||
* performance, so disable HSR in that case.
|
||||
*/
|
||||
cfg.hsr_prepass_enable = info->allow_hsr_prepass &&
|
||||
pan_fb_can_pipeline_zs(fb);
|
||||
cfg.hsr_prepass_interleaving_enable = pan_fb_can_pipeline_zs(fb);
|
||||
cfg.hsr_prepass_filter_enable = true;
|
||||
cfg.hsr_hierarchical_optimizations_enable = true;
|
||||
|
||||
cfg.internal_layer_index =
|
||||
info->layer - info->tiler_ctx->valhall.layer_offset;
|
||||
}
|
||||
|
||||
pan_pack(&fbd_data.flags2, FRAGMENT_FLAGS_2, cfg) {
|
||||
if (fb->s_format != PIPE_FORMAT_NONE) {
|
||||
cfg.s_clear = load && target_has_clear(&load->s) ?
|
||||
load->s.clear.stencil : 0;
|
||||
cfg.s_write_enable = store && store->s.store;
|
||||
}
|
||||
|
||||
if (fb->z_format != PIPE_FORMAT_NONE) {
|
||||
cfg.z_internal_format = pan_get_z_internal_format(fb->z_format);
|
||||
cfg.z_write_enable = store && store->zs.store;
|
||||
} else {
|
||||
cfg.z_internal_format = MALI_Z_INTERNAL_FORMAT_D24;
|
||||
assert(!store || !store->zs.store);
|
||||
}
|
||||
}
|
||||
|
||||
fbd_data.z_clear =
|
||||
util_bitpack_float(fb->z_format != PIPE_FORMAT_NONE && load && load &&
|
||||
target_has_clear(&load->z)
|
||||
? load->z.clear.depth
|
||||
: 0);
|
||||
|
||||
fbd_data.dcd_pointer = info->frame_shaders.dcd_pointer;
|
||||
|
||||
{
|
||||
/* Set the DBD and RTD pointers. Both must be 64-bytes aligned. */
|
||||
uint64_t out_gpu_addr =
|
||||
framebuffer.gpu + ALIGN_POT(sizeof(struct pan_fbd_layer), 64);
|
||||
|
||||
if (has_zs_crc_ext) {
|
||||
fbd_data.dbd_pointer = out_gpu_addr;
|
||||
assert(fbd_data.dbd_pointer % 64 == 0);
|
||||
out_gpu_addr += pan_size(ZS_CRC_EXTENSION);
|
||||
}
|
||||
|
||||
fbd_data.rtd_pointer = out_gpu_addr;
|
||||
assert(fbd_data.rtd_pointer % 64 == 0);
|
||||
}
|
||||
|
||||
memcpy(out, &fbd_data, sizeof(fbd_data));
|
||||
out += ALIGN_POT(sizeof(fbd_data), 64);
|
||||
|
||||
if (has_zs_crc_ext) {
|
||||
struct mali_zs_crc_extension_packed zs_crc;
|
||||
emit_zs_crc_desc(info, ct, &zs_crc);
|
||||
memcpy(out, &zs_crc, sizeof(zs_crc));
|
||||
out += sizeof(zs_crc);
|
||||
}
|
||||
|
||||
uint32_t tile_rt_offset_B = 0;
|
||||
for (unsigned rt = 0; rt < fb->rt_count; rt++) {
|
||||
struct mali_rgb_render_target_packed rgb_rt;
|
||||
emit_rgb_rt_desc(info, ct, rt, tile_rt_offset_B, &rgb_rt);
|
||||
memcpy(out, &rgb_rt, sizeof(rgb_rt));
|
||||
out += sizeof(rgb_rt);
|
||||
|
||||
if (fb->rt_formats[rt] != PIPE_FORMAT_NONE) {
|
||||
tile_rt_offset_B += pan_bytes_per_pixel_tib(fb->rt_formats[rt]) *
|
||||
fb->tile_size_px * fb->sample_count;
|
||||
}
|
||||
}
|
||||
assert(tile_rt_offset_B <= fb->tile_rt_alloc_B);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else /* PAN_ARCH < 14 */
|
||||
uint32_t
|
||||
GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info,
|
||||
const struct pan_ptr framebuffer)
|
||||
{
|
||||
void *out = framebuffer.cpu;
|
||||
const struct pan_fb_layout *fb = info->fb;
|
||||
const struct pan_fb_load *load = info->load;
|
||||
const struct pan_fb_store *store = info->store;
|
||||
|
|
@ -823,4 +939,5 @@ GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info, void *out)
|
|||
}
|
||||
return tag.opaque[0];
|
||||
}
|
||||
#endif
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
#endif /* PAN_ARCH >= 5 */
|
||||
|
|
|
|||
|
|
@ -1,14 +1,20 @@
|
|||
/*
|
||||
* Copyright (C) 2026 Collabora, Ltd.
|
||||
* Copyright (C) 2026 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef __PAN_FB_H
|
||||
#define __PAN_FB_H
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
#include "genxml/cs_builder.h"
|
||||
#endif
|
||||
|
||||
#include "compiler/shader_enums.h"
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "util/format/u_formats.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
#include "pan_pool.h"
|
||||
|
||||
struct nir_shader;
|
||||
struct nir_shader_compiler_options;
|
||||
|
|
@ -481,7 +487,7 @@ void GENX(pan_fill_fb_info)(const struct pan_fb_desc_info *info,
|
|||
struct pan_fb_info *fbinfo);
|
||||
|
||||
uint32_t GENX(pan_emit_fb_desc)(const struct pan_fb_desc_info *info,
|
||||
void *out);
|
||||
const struct pan_ptr framebuffer);
|
||||
#endif
|
||||
|
||||
enum ENUM_PACKED pan_fb_shader_op {
|
||||
|
|
@ -620,4 +626,35 @@ GENX(pan_get_fb_shader)(const struct pan_fb_shader_key *key,
|
|||
const struct nir_shader_compiler_options *nir_options);
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
/* Framebuffer per-layer state. Keep this structure 64-byte aligned, since
|
||||
* we want the adjacent ZS_CRC_EXTENSION and RENDER_TARGET descriptors
|
||||
* aligned. */
|
||||
struct pan_fbd_layer {
|
||||
/** GPU address to the tiler descriptor. */
|
||||
uint64_t tiler;
|
||||
|
||||
/** Frame argument. */
|
||||
uint64_t frame_argument;
|
||||
|
||||
/** An instance of Fragment Flags 0. */
|
||||
struct mali_fragment_flags_0_packed flags0;
|
||||
|
||||
/** An instance of Fragment Flags 2. */
|
||||
struct mali_fragment_flags_2_packed flags2;
|
||||
|
||||
/** Z clear value. */
|
||||
uint32_t z_clear;
|
||||
|
||||
/** GPU address to the draw call descriptors. It may be 0. */
|
||||
uint64_t dcd_pointer;
|
||||
|
||||
/** GPU address to the ZS_CRC_EXTENSION descriptor. It may be 0. */
|
||||
uint64_t dbd_pointer;
|
||||
|
||||
/** GPU address to the RENDER_TARGET descriptors. */
|
||||
uint64_t rtd_pointer;
|
||||
} __attribute__((aligned(64)));
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
|
||||
#endif /* __PAN_FB_H */
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (C) 2019 Collabora, Ltd.
|
||||
* Copyright (C) 2026 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
|
|
@ -184,7 +185,27 @@ const struct pan_blendable_format
|
|||
const struct pan_format GENX(pan_pipe_format)[PIPE_FORMAT_COUNT] = {
|
||||
FMT(NONE, CONSTANT, 0000, L, VTR_IB),
|
||||
|
||||
#if PAN_ARCH >= 7
|
||||
#if PAN_ARCH >= 14
|
||||
/* Multiplane formats */
|
||||
FMT_YUV(R8G8_R8B8_UNORM, Y8U8Y8V8_422, UVYA, NO_SWAP, CENTER_422, _T____),
|
||||
FMT_YUV(G8R8_B8R8_UNORM, U8Y8V8Y8_422, UYVA, SWAP, CENTER_422, _T____),
|
||||
FMT_YUV(R8B8_R8G8_UNORM, Y8U8Y8V8_422, VYUA, NO_SWAP, CENTER_422, _T____),
|
||||
FMT_YUV(B8R8_G8R8_UNORM, U8Y8V8Y8_422, VUYA, SWAP, CENTER_422, _T____),
|
||||
FMT_YUV(R8_G8B8_420_UNORM, Y8U8V8_420, YUVA, NO_SWAP, CENTER, _T____),
|
||||
FMT_YUV(R8_B8G8_420_UNORM, Y8U8V8_420, YVUA, NO_SWAP, CENTER, _T____),
|
||||
FMT_YUV(R8_G8_B8_420_UNORM, Y8U8V8_420, YUVA, NO_SWAP, CENTER, _T____),
|
||||
FMT_YUV(R8_B8_G8_420_UNORM, Y8U8V8_420, YVUA, NO_SWAP, CENTER, _T____),
|
||||
|
||||
FMT_YUV(R8_G8B8_422_UNORM, Y8U8Y8V8_422, YUVA, NO_SWAP, CENTER_422, _T____),
|
||||
FMT_YUV(R8_B8G8_422_UNORM, U8Y8V8Y8_422, YVUA, NO_SWAP, CENTER_422, _T____),
|
||||
|
||||
FMT_YUV(R10_G10B10_420_UNORM, YUYAAYVYAA_420, YUVA, NO_SWAP, CENTER, _T____),
|
||||
FMT_YUV(R10_G10B10_422_UNORM, Y10X6U10X6Y10X6V10X6_422, YUVA, NO_SWAP, CENTER_422, _T____),
|
||||
/* special internal formats */
|
||||
FMT_YUV(R8G8B8_420_UNORM_PACKED, Y8U8V8_420, YUVA, NO_SWAP, CENTER, _T____),
|
||||
FMT_YUV(R10G10B10_420_UNORM_PACKED, Y10U10V10_420, YUVA, NO_SWAP, CENTER, _T____),
|
||||
FMT_YUV(X6R10X6G10_X6R10X6B10_422_UNORM, Y10X6U10X6Y10X6V10X6_422, UVYA, NO_SWAP, CENTER_422, _T____),
|
||||
#elif PAN_ARCH >= 7
|
||||
/* Multiplane formats */
|
||||
FMT_YUV(R8G8_R8B8_UNORM, YUYV8, UVYA, NO_SWAP, CENTER_422, _T____),
|
||||
FMT_YUV(G8R8_B8R8_UNORM, VYUY8, UYVA, SWAP, CENTER_422, _T____),
|
||||
|
|
|
|||
|
|
@ -168,6 +168,8 @@ extern const struct pan_blendable_format
|
|||
pan_blendable_formats_v12[PIPE_FORMAT_COUNT];
|
||||
extern const struct pan_blendable_format
|
||||
pan_blendable_formats_v13[PIPE_FORMAT_COUNT];
|
||||
extern const struct pan_blendable_format
|
||||
pan_blendable_formats_v14[PIPE_FORMAT_COUNT];
|
||||
|
||||
uint8_t pan_raw_format_mask_midgard(enum pipe_format *formats);
|
||||
|
||||
|
|
@ -184,6 +186,7 @@ pan_blendable_format_table(unsigned arch)
|
|||
FMT_TABLE(10);
|
||||
FMT_TABLE(12);
|
||||
FMT_TABLE(13);
|
||||
FMT_TABLE(14);
|
||||
#undef FMT_TABLE
|
||||
default:
|
||||
assert(!"Unsupported architecture");
|
||||
|
|
@ -199,6 +202,7 @@ extern const struct pan_format pan_pipe_format_v9[PIPE_FORMAT_COUNT];
|
|||
extern const struct pan_format pan_pipe_format_v10[PIPE_FORMAT_COUNT];
|
||||
extern const struct pan_format pan_pipe_format_v12[PIPE_FORMAT_COUNT];
|
||||
extern const struct pan_format pan_pipe_format_v13[PIPE_FORMAT_COUNT];
|
||||
extern const struct pan_format pan_pipe_format_v14[PIPE_FORMAT_COUNT];
|
||||
|
||||
static inline const struct pan_format *
|
||||
pan_format_table(unsigned arch)
|
||||
|
|
@ -213,6 +217,7 @@ pan_format_table(unsigned arch)
|
|||
FMT_TABLE(10);
|
||||
FMT_TABLE(12);
|
||||
FMT_TABLE(13);
|
||||
FMT_TABLE(14);
|
||||
#undef FMT_TABLE
|
||||
default:
|
||||
assert(!"Unsupported architecture");
|
||||
|
|
|
|||
|
|
@ -84,6 +84,7 @@ const struct pan_mod_handler *pan_mod_get_handler_v9(uint64_t modifier);
|
|||
const struct pan_mod_handler *pan_mod_get_handler_v10(uint64_t modifier);
|
||||
const struct pan_mod_handler *pan_mod_get_handler_v12(uint64_t modifier);
|
||||
const struct pan_mod_handler *pan_mod_get_handler_v13(uint64_t modifier);
|
||||
const struct pan_mod_handler *pan_mod_get_handler_v14(uint64_t modifier);
|
||||
|
||||
static inline const struct pan_mod_handler *
|
||||
pan_mod_get_handler(unsigned arch, uint64_t modifier)
|
||||
|
|
@ -105,6 +106,8 @@ pan_mod_get_handler(unsigned arch, uint64_t modifier)
|
|||
return pan_mod_get_handler_v12(modifier);
|
||||
case 13:
|
||||
return pan_mod_get_handler_v13(modifier);
|
||||
case 14:
|
||||
return pan_mod_get_handler_v14(modifier);
|
||||
default:
|
||||
UNREACHABLE("Unsupported arch");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -223,6 +223,25 @@ pan_clump_format(enum pipe_format format)
|
|||
/* YUV-sampling has special cases */
|
||||
if (pan_format_is_yuv(format)) {
|
||||
switch (format) {
|
||||
#if PAN_ARCH >= 14
|
||||
case PIPE_FORMAT_R8G8_R8B8_UNORM:
|
||||
case PIPE_FORMAT_G8R8_B8R8_UNORM:
|
||||
case PIPE_FORMAT_R8B8_R8G8_UNORM:
|
||||
case PIPE_FORMAT_B8R8_G8R8_UNORM:
|
||||
case PIPE_FORMAT_R8_G8B8_422_UNORM:
|
||||
case PIPE_FORMAT_R8_B8G8_422_UNORM:
|
||||
case PIPE_FORMAT_R8_G8B8_420_UNORM:
|
||||
case PIPE_FORMAT_R8_B8G8_420_UNORM:
|
||||
case PIPE_FORMAT_R8_G8_B8_420_UNORM:
|
||||
case PIPE_FORMAT_R8_B8_G8_420_UNORM:
|
||||
case PIPE_FORMAT_R8G8B8_420_UNORM_PACKED:
|
||||
return MALI_CLUMP_FORMAT_RAW8;
|
||||
case PIPE_FORMAT_R10_G10B10_420_UNORM:
|
||||
case PIPE_FORMAT_R10G10B10_420_UNORM_PACKED:
|
||||
case PIPE_FORMAT_R10_G10B10_422_UNORM:
|
||||
case PIPE_FORMAT_X6R10X6G10_X6R10X6B10_422_UNORM:
|
||||
return MALI_CLUMP_FORMAT_R10_PACKED;
|
||||
#else
|
||||
case PIPE_FORMAT_R8G8_R8B8_UNORM:
|
||||
case PIPE_FORMAT_G8R8_B8R8_UNORM:
|
||||
case PIPE_FORMAT_R8B8_R8G8_UNORM:
|
||||
|
|
@ -242,6 +261,7 @@ pan_clump_format(enum pipe_format format)
|
|||
case PIPE_FORMAT_R10_G10B10_422_UNORM:
|
||||
case PIPE_FORMAT_X6R10X6G10_X6R10X6B10_422_UNORM:
|
||||
return MALI_CLUMP_FORMAT_Y10_UV10_422;
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
default:
|
||||
UNREACHABLE("unhandled clump format");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,8 @@
|
|||
#include "libpan_v12.h"
|
||||
#elif (PAN_ARCH == 13)
|
||||
#include "libpan_v13.h"
|
||||
#elif (PAN_ARCH == 14)
|
||||
#include "libpan_v14.h"
|
||||
#else
|
||||
#error "Unsupported architecture for libpan"
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -26,6 +26,8 @@
|
|||
#include "libpan_shaders_v12.h"
|
||||
#elif (PAN_ARCH == 13)
|
||||
#include "libpan_shaders_v13.h"
|
||||
#elif (PAN_ARCH == 14)
|
||||
#include "libpan_shaders_v14.h"
|
||||
#else
|
||||
#error "Unsupported architecture for libpan"
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ libpan_shader_files = files(
|
|||
|
||||
idep_libpan_per_arch = {}
|
||||
|
||||
foreach ver : ['4', '5', '6', '7', '9', '10', '12', '13']
|
||||
foreach ver : ['4', '5', '6', '7', '9', '10', '12', '13', '14']
|
||||
libpan_spv = custom_target(
|
||||
input : libpan_shader_files,
|
||||
output : 'libpan_v' + ver + '.spv',
|
||||
|
|
|
|||
|
|
@ -95,6 +95,10 @@ const struct pan_model pan_model_list[] = {
|
|||
MODEL_RATES(4, 8, 128)),
|
||||
FIFTHGEN_MODEL(PAN_PROD_ID(13, 8, 0), 4, "G725", "TKRx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 65536),
|
||||
MODEL_RATES(4, 8, 128)),
|
||||
FIFTHGEN_MODEL(PAN_PROD_ID(14, 8, 3), 1, "G1-Pro", "TDRx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 65536),
|
||||
MODEL_RATES(4, 8, 64)),
|
||||
FIFTHGEN_MODEL(PAN_PROD_ID(14, 8, 3), 4, "G1-Pro", "TDRx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 65536),
|
||||
MODEL_RATES(4, 8, 128)),
|
||||
};
|
||||
/* clang-format on */
|
||||
|
||||
|
|
|
|||
|
|
@ -74,7 +74,11 @@ static inline uint32_t
|
|||
get_fbd_size(bool has_zs_ext, uint32_t rt_count)
|
||||
{
|
||||
assert(rt_count >= 1 && rt_count <= MAX_RTS);
|
||||
#if PAN_ARCH >= 14
|
||||
uint32_t fbd_size = ALIGN_POT(sizeof(struct pan_fbd_layer), 64);
|
||||
#else
|
||||
uint32_t fbd_size = pan_size(FRAMEBUFFER);
|
||||
#endif
|
||||
if (has_zs_ext)
|
||||
fbd_size += pan_size(ZS_CRC_EXTENSION);
|
||||
fbd_size += pan_size(RENDER_TARGET) * rt_count;
|
||||
|
|
@ -209,13 +213,25 @@ enum panvk_cs_regs {
|
|||
PANVK_CS_REG_RUN_IDVS_SR_END = 60,
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
/* RUN_FRAGMENT2 staging regs.
|
||||
* SW ABI:
|
||||
* - r58:59 contain the pointer to the first tiler descriptor. This is
|
||||
* needed to gather completed heap chunks after a run_fragment2.
|
||||
*/
|
||||
PANVK_CS_REG_RUN_FRAGMENT_SR_START = 0,
|
||||
PANVK_CS_REG_RUN_FRAGMENT_SR_END = 55,
|
||||
PANVK_CS_REG_TILER_DESC_PTR = 58,
|
||||
#else
|
||||
/* RUN_FRAGMENT staging regs.
|
||||
* SW ABI:
|
||||
* - r38:39 contain the pointer to the first tiler descriptor. This is
|
||||
* - r58:59 contain the pointer to the first tiler descriptor. This is
|
||||
* needed to gather completed heap chunks after a run_fragment.
|
||||
*/
|
||||
PANVK_CS_REG_RUN_FRAGMENT_SR_START = 38,
|
||||
PANVK_CS_REG_RUN_FRAGMENT_SR_END = 46,
|
||||
PANVK_CS_REG_TILER_DESC_PTR = 58,
|
||||
#endif
|
||||
|
||||
/* RUN_COMPUTE staging regs. */
|
||||
PANVK_CS_REG_RUN_COMPUTE_SR_START = 0,
|
||||
|
|
@ -870,4 +886,31 @@ vk_stages_to_subqueue_mask(VkPipelineStageFlags2 vk_stages,
|
|||
void panvk_per_arch(emit_barrier)(struct panvk_cmd_buffer *cmdbuf,
|
||||
struct panvk_cs_deps deps);
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
static inline void
|
||||
cs_emit_layer_fragment_state(struct cs_builder *b, struct cs_index fbd_ptr)
|
||||
{
|
||||
/* Emit the dynamic fragment state. This state may change per-layer. */
|
||||
|
||||
cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_0), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, flags0));
|
||||
cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_2), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, flags2));
|
||||
cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, Z_CLEAR), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, z_clear));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, TILER_DESCRIPTOR_POINTER), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, tiler));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, RTD_POINTER), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, rtd_pointer));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, DBD_POINTER), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, dbd_pointer));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, FRAME_ARG), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, frame_argument));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, FRAME_SHADER_DCD_POINTER), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, dcd_pointer));
|
||||
|
||||
cs_flush_loads(b);
|
||||
}
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
|
||||
#endif /* PANVK_CMD_BUFFER_H */
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@
|
|||
#include "vk_render_pass.h"
|
||||
#include "poly/geometry.h"
|
||||
|
||||
#if PAN_ARCH < 14
|
||||
static enum cs_reg_perm
|
||||
provoking_vertex_fn_reg_perm_cb(struct cs_builder *b, unsigned reg)
|
||||
{
|
||||
|
|
@ -202,6 +203,7 @@ panvk_per_arch(device_draw_context_cleanup)(struct panvk_device *dev)
|
|||
panvk_priv_bo_unref(dev->draw_ctx->fns_bo);
|
||||
vk_free(&dev->vk.alloc, dev->draw_ctx);
|
||||
}
|
||||
#endif /* PAN_ARCH < 14 */
|
||||
|
||||
static void
|
||||
emit_vs_attrib(struct panvk_cmd_buffer *cmdbuf,
|
||||
|
|
@ -1245,8 +1247,13 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
uint32_t fbd_sz = calc_fbd_size(cmdbuf);
|
||||
uint32_t fbds_sz = enabled_layer_count * fbd_sz;
|
||||
|
||||
cmdbuf->state.gfx.render.fbds = panvk_cmd_alloc_dev_mem(
|
||||
cmdbuf, desc, fbds_sz, pan_alignment(FRAMEBUFFER));
|
||||
#if PAN_ARCH >= 14
|
||||
const unsigned fbds_alignment = alignof(struct pan_fbd_layer);
|
||||
#else
|
||||
const unsigned fbds_alignment = pan_alignment(FRAMEBUFFER);
|
||||
#endif
|
||||
cmdbuf->state.gfx.render.fbds =
|
||||
panvk_cmd_alloc_dev_mem(cmdbuf, desc, fbds_sz, fbds_alignment);
|
||||
if (!cmdbuf->state.gfx.render.fbds.gpu)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
|
||||
|
|
@ -1316,14 +1323,23 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
tiler_ctx = get_tiler_context(cmdbuf, layer_idx);
|
||||
|
||||
uint32_t new_fbd_flags =
|
||||
GENX(pan_emit_fb_desc)(&fbd_info, fbds.cpu + fbd_sz * i);
|
||||
GENX(pan_emit_fb_desc)(&fbd_info, pan_ptr_offset(fbds, fbd_sz * i));
|
||||
|
||||
/* Make sure all FBDs have the same flags. */
|
||||
assert(i == 0 || new_fbd_flags == fbd_flags);
|
||||
fbd_flags = new_fbd_flags;
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
/* fbd_flags is unused on v14+. */
|
||||
assert(!fbd_flags);
|
||||
#endif
|
||||
|
||||
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
// TODO: Implement IR support for v14.
|
||||
#else
|
||||
for (uint32_t ir_pass = 0; ir_pass < PANVK_IR_PASS_COUNT; ir_pass++) {
|
||||
struct pan_ptr ir_fbds = panvk_cmd_alloc_dev_mem(
|
||||
cmdbuf, desc, fbds_sz, pan_alignment(FRAMEBUFFER));
|
||||
|
|
@ -1335,7 +1351,6 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
for (uint32_t i = 0; i < enabled_layer_count; i++) {
|
||||
uint32_t layer_idx = multiview ? u_bit_scan(&ir_view_mask_temp) : i;
|
||||
void *ir_fbd = (void *)((uint8_t *)ir_fbds.cpu + (i * fbd_sz));
|
||||
|
||||
fbd_info.layer = layer_idx;
|
||||
tiler_ctx = get_tiler_context(cmdbuf, layer_idx);
|
||||
|
|
@ -1353,8 +1368,8 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
ASSERTED uint32_t new_fbd_flags =
|
||||
GENX(pan_emit_fb_desc)(&fbd_info, ir_fbd);
|
||||
ASSERTED uint32_t new_fbd_flags = GENX(pan_emit_fb_desc)(
|
||||
&fbd_info, pan_ptr_offset(ir_fbds, fbd_sz * i));
|
||||
|
||||
/* Make sure all FBDs have the same flags. */
|
||||
assert(new_fbd_flags == fbd_flags);
|
||||
|
|
@ -1367,16 +1382,14 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
/* Wait for IR info push to complete */
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
bool unset_provoking_vertex =
|
||||
cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET;
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
|
||||
if (copy_fbds) {
|
||||
struct cs_index cur_tiler = cs_reg64(b, 38);
|
||||
struct cs_index cur_tiler = cs_reg64(b, PANVK_CS_REG_TILER_DESC_PTR);
|
||||
struct cs_index dst_fbd_ptr = cs_sr_reg64(b, FRAGMENT, FBD_POINTER);
|
||||
struct cs_index fbd_idx = cs_reg32(b, 47);
|
||||
struct cs_index src_fbd_ptr = cs_reg64(b, 48);
|
||||
struct cs_index remaining_layers_in_td = cs_reg32(b, 50);
|
||||
struct cs_index fbd_idx = cs_reg32(b, 60);
|
||||
struct cs_index src_fbd_ptr = cs_reg64(b, 64);
|
||||
struct cs_index remaining_layers_in_td = cs_reg32(b, 61);
|
||||
uint32_t td_count = DIV_ROUND_UP(cmdbuf->state.gfx.render.layer_count,
|
||||
MAX_LAYERS_PER_TILER_DESC);
|
||||
|
||||
|
|
@ -1400,10 +1413,27 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
* framebuffer size is aligned on 64-bytes. */
|
||||
assert(fbd_sz == ALIGN_POT(fbd_sz, 64));
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
for (uint32_t fbd_off = 0; fbd_off < fbd_sz; fbd_off += 64) {
|
||||
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 16), src_fbd_ptr,
|
||||
BITFIELD_MASK(16), fbd_off);
|
||||
|
||||
/* Patch the Tiler pointer. */
|
||||
if (fbd_off == 0)
|
||||
cs_add64(b, cs_scratch_reg64(b, 0), cur_tiler, 0);
|
||||
|
||||
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), dst_fbd_ptr,
|
||||
BITFIELD_MASK(16), fbd_off);
|
||||
}
|
||||
#else
|
||||
bool unset_provoking_vertex =
|
||||
cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET;
|
||||
for (uint32_t fbd_off = 0; fbd_off < fbd_sz; fbd_off += 64) {
|
||||
if (fbd_off == 0) {
|
||||
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 14), src_fbd_ptr,
|
||||
BITFIELD_MASK(14), fbd_off);
|
||||
|
||||
/* Patch the Tiler pointer. */
|
||||
cs_add64(b, cs_scratch_reg64(b, 14), cur_tiler, 0);
|
||||
|
||||
/* If we don't know what provoking vertex mode the
|
||||
|
|
@ -1423,6 +1453,7 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), dst_fbd_ptr,
|
||||
BITFIELD_MASK(16), fbd_off);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Finish stores to pass_dst_fbd_ptr. */
|
||||
cs_flush_stores(b);
|
||||
|
|
@ -1459,9 +1490,11 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_update_frag_ctx(b) {
|
||||
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
|
||||
fbds.gpu | fbd_flags);
|
||||
cs_move64_to(b, cs_reg64(b, 38), cmdbuf->state.gfx.render.tiler);
|
||||
cs_move64_to(b, cs_reg64(b, PANVK_CS_REG_TILER_DESC_PTR),
|
||||
cmdbuf->state.gfx.render.tiler);
|
||||
}
|
||||
|
||||
#if PAN_ARCH < 14
|
||||
/* If we don't know what provoking vertex mode the application wants yet,
|
||||
* leave space to patch it later */
|
||||
if (cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET) {
|
||||
|
|
@ -1483,6 +1516,7 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_maybe(b, &cmdbuf->state.gfx.render.maybe_set_fbds_provoking_vertex)
|
||||
cs_call(b, addr_reg, length_reg);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
|
@ -3299,6 +3333,9 @@ calc_tiler_oom_handler_idx(struct panvk_cmd_buffer *cmdbuf)
|
|||
static void
|
||||
setup_tiler_oom_ctx(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
#if PAN_ARCH >= 14
|
||||
// TODO: Implement IR support for v14.
|
||||
#else
|
||||
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
|
||||
const struct pan_fb_layout *fb = &cmdbuf->state.gfx.render.fb.layout;
|
||||
const bool has_zs_ext = pan_fb_has_zs(fb);
|
||||
|
|
@ -3343,6 +3380,7 @@ setup_tiler_oom_ctx(struct panvk_cmd_buffer *cmdbuf)
|
|||
TILER_OOM_CTX_FIELD_OFFSET(layer_count));
|
||||
|
||||
cs_flush_stores(b);
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
|
|
@ -3351,24 +3389,106 @@ pack_32_2x16(uint16_t lo, uint16_t hi)
|
|||
return (((uint32_t)hi) << 16) | (uint32_t)lo;
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
static void
|
||||
cs_emit_static_fragment_state(struct cs_builder *b,
|
||||
struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
/* Emit the static fragment staging registers. These don't change per-layer. */
|
||||
|
||||
const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
const struct panvk_rendering_state *render = &cmdbuf->state.gfx.render;
|
||||
const struct pan_fb_layout *fb = &render->fb.layout;
|
||||
|
||||
const uint8_t sample_count = render->fb.layout.sample_count;
|
||||
|
||||
const struct pan_fb_bbox fb_area_px =
|
||||
pan_fb_bbox_from_xywh(0, 0, fb->width_px, fb->height_px);
|
||||
const struct pan_fb_bbox bbox_px =
|
||||
pan_fb_bbox_clamp(fb->tiling_area_px, fb_area_px);
|
||||
|
||||
assert(pan_fb_bbox_is_valid(fb->tiling_area_px));
|
||||
|
||||
struct mali_fragment_bounding_box_packed bbox;
|
||||
pan_pack(&bbox, FRAGMENT_BOUNDING_BOX, cfg) {
|
||||
cfg.bound_min_x = bbox_px.min_x;
|
||||
cfg.bound_min_y = bbox_px.min_y;
|
||||
cfg.bound_max_x = bbox_px.max_x;
|
||||
cfg.bound_max_y = bbox_px.max_y;
|
||||
}
|
||||
|
||||
struct mali_frame_size_packed frame_size;
|
||||
pan_pack(&frame_size, FRAME_SIZE, cfg) {
|
||||
cfg.width = fb->width_px;
|
||||
cfg.height = fb->height_px;
|
||||
}
|
||||
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MIN),
|
||||
bbox.opaque[0]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MAX),
|
||||
bbox.opaque[1]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, FRAME_SIZE), frame_size.opaque[0]);
|
||||
cs_move64_to(
|
||||
b, cs_sr_reg64(b, FRAGMENT, SAMPLE_POSITION_ARRAY_POINTER),
|
||||
dev->sample_positions->addr.dev +
|
||||
pan_sample_positions_offset(pan_sample_pattern(sample_count)));
|
||||
|
||||
/* Flags 1 */
|
||||
struct mali_fragment_flags_1_packed flags1;
|
||||
pan_pack(&flags1, FRAGMENT_FLAGS_1, cfg) {
|
||||
cfg.sample_count = fb->sample_count;
|
||||
cfg.sample_pattern = pan_sample_pattern(fb->sample_count);
|
||||
cfg.effective_tile_size = fb->tile_size_px;
|
||||
cfg.point_sprite_coord_origin_max_y = false;
|
||||
cfg.first_provoking_vertex = get_first_provoking_vertex(cmdbuf);
|
||||
|
||||
assert(fb->rt_count > 0);
|
||||
cfg.render_target_count = fb->rt_count;
|
||||
cfg.color_buffer_allocation = fb->tile_rt_alloc_B;
|
||||
}
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_1), flags1.opaque[0]);
|
||||
|
||||
/* If we don't know what provoking vertex mode the application wants yet,
|
||||
* leave space to patch it later */
|
||||
if (cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET) {
|
||||
cs_maybe(b, &cmdbuf->state.gfx.render.maybe_set_fbds_provoking_vertex)
|
||||
{
|
||||
/* provoking_vertex flag is bit 14 of Fragment Flags 1. */
|
||||
cs_add32(b, cs_sr_reg32(b, FRAGMENT, FLAGS_1),
|
||||
cs_sr_reg32(b, FRAGMENT, FLAGS_1), -(1 << 14));
|
||||
}
|
||||
}
|
||||
|
||||
/* Leave the remaining RUN_FRAGMENT2 staging registers as zero. */
|
||||
}
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
|
||||
static VkResult
|
||||
issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
#if PAN_ARCH < 14
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
#endif
|
||||
const struct cs_tracing_ctx *tracing_ctx =
|
||||
&cmdbuf->state.cs[PANVK_SUBQUEUE_FRAGMENT].tracing;
|
||||
const struct pan_fb_layout *fb = &cmdbuf->state.gfx.render.fb.layout;
|
||||
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
|
||||
bool has_oq_chain = cmdbuf->state.gfx.render.oq.chain != 0;
|
||||
|
||||
/* Now initialize the fragment bits. */
|
||||
struct cs_index fbd_pointer = cs_sr_reg64(b, FRAGMENT, FBD_POINTER);
|
||||
cs_update_frag_ctx(b) {
|
||||
#if PAN_ARCH >= 14
|
||||
cs_emit_static_fragment_state(b, cmdbuf);
|
||||
cs_emit_layer_fragment_state(b, fbd_pointer);
|
||||
#else
|
||||
const struct pan_fb_layout *fb = &cmdbuf->state.gfx.render.fb.layout;
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MIN),
|
||||
pack_32_2x16(fb->tiling_area_px.min_x,
|
||||
fb->tiling_area_px.min_y));
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MAX),
|
||||
pack_32_2x16(fb->tiling_area_px.max_x,
|
||||
fb->tiling_area_px.max_y));
|
||||
#endif
|
||||
}
|
||||
|
||||
bool simul_use =
|
||||
|
|
@ -3401,6 +3521,9 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
* state for this renderpass, so it's safe to enable. */
|
||||
struct cs_index addr_reg = cs_scratch_reg64(b, 0);
|
||||
struct cs_index length_reg = cs_scratch_reg32(b, 2);
|
||||
#if PAN_ARCH >= 14
|
||||
// TODO: Implement IR support for v14.
|
||||
#else
|
||||
uint32_t handler_idx = calc_tiler_oom_handler_idx(cmdbuf);
|
||||
uint64_t handler_addr = dev->tiler_oom.handlers_bo->addr.dev +
|
||||
handler_idx * dev->tiler_oom.handler_stride;
|
||||
|
|
@ -3408,6 +3531,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_move32_to(b, length_reg, dev->tiler_oom.handler_stride);
|
||||
cs_set_exception_handler(b, MALI_CS_EXCEPTION_TYPE_TILER_OOM, addr_reg,
|
||||
length_reg);
|
||||
#endif
|
||||
|
||||
/* Wait for the tiling to be done before submitting the fragment job. */
|
||||
wait_finish_tiling(cmdbuf);
|
||||
|
|
@ -3422,8 +3546,12 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
* up. */
|
||||
cs_move64_to(b, addr_reg, 0);
|
||||
cs_move32_to(b, length_reg, 0);
|
||||
#if PAN_ARCH >= 14
|
||||
// TODO: Implement IR support for v14.
|
||||
#else
|
||||
cs_set_exception_handler(b, MALI_CS_EXCEPTION_TYPE_TILER_OOM, addr_reg,
|
||||
length_reg);
|
||||
#endif
|
||||
|
||||
/* Applications tend to forget to describe subpass dependencies, especially
|
||||
* when it comes to write -> read dependencies on attachments. The
|
||||
|
|
@ -3439,8 +3567,13 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
}
|
||||
|
||||
if (cmdbuf->state.gfx.render.layer_count <= 1) {
|
||||
#if PAN_ARCH >= 14
|
||||
cs_trace_run_fragment2(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||
false, MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#else
|
||||
cs_trace_run_fragment(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||
false, MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#endif
|
||||
} else {
|
||||
struct cs_index run_fragment_regs = cs_scratch_reg_tuple(b, 0, 4);
|
||||
struct cs_index remaining_layers = cs_scratch_reg32(b, 4);
|
||||
|
|
@ -3449,12 +3582,17 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_while(b, MALI_CS_CONDITION_GREATER, remaining_layers) {
|
||||
cs_add32(b, remaining_layers, remaining_layers, -1);
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
cs_emit_layer_fragment_state(b, fbd_pointer);
|
||||
cs_trace_run_fragment2(b, tracing_ctx, run_fragment_regs, false,
|
||||
MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#else
|
||||
cs_trace_run_fragment(b, tracing_ctx, run_fragment_regs, false,
|
||||
MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#endif
|
||||
|
||||
cs_update_frag_ctx(b)
|
||||
cs_add64(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
|
||||
cs_sr_reg64(b, FRAGMENT, FBD_POINTER), fbd_sz);
|
||||
cs_add64(b, fbd_pointer, fbd_pointer, fbd_sz);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3468,8 +3606,8 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
struct cs_index completed = cs_scratch_reg_tuple(b, 10, 4);
|
||||
struct cs_index completed_top = cs_scratch_reg64(b, 10);
|
||||
struct cs_index completed_bottom = cs_scratch_reg64(b, 12);
|
||||
struct cs_index cur_tiler = cs_reg64(b, 38);
|
||||
struct cs_index tiler_count = cs_reg32(b, 47);
|
||||
struct cs_index cur_tiler = cs_reg64(b, PANVK_CS_REG_TILER_DESC_PTR);
|
||||
struct cs_index tiler_count = cs_reg32(b, 60);
|
||||
struct cs_index oq_chain = cs_scratch_reg64(b, 10);
|
||||
struct cs_index oq_chain_lo = cs_scratch_reg32(b, 10);
|
||||
struct cs_index oq_syncobj = cs_scratch_reg64(b, 12);
|
||||
|
|
|
|||
|
|
@ -13,8 +13,9 @@ tiler_oom_reg_perm_cb(struct cs_builder *b, unsigned reg)
|
|||
{
|
||||
switch (reg) {
|
||||
/* The bbox is set up by the fragment subqueue, we should not modify it. */
|
||||
case 42:
|
||||
case 43:
|
||||
case MALI_FRAGMENT_SR_BBOX_MIN:
|
||||
case MALI_FRAGMENT_SR_BBOX_MAX:
|
||||
|
||||
/* We should only load from the subqueue context. */
|
||||
case PANVK_CS_REG_SUBQUEUE_CTX_START:
|
||||
case PANVK_CS_REG_SUBQUEUE_CTX_END:
|
||||
|
|
@ -42,8 +43,14 @@ copy_fbd(struct cs_builder *b, bool has_zs_ext, uint32_t rt_count,
|
|||
cs_store(b, cs_scratch_reg_tuple(b, 0, 8), dst, BITFIELD_MASK(8),
|
||||
8 * sizeof(uint32_t));
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
const size_t fbd_size = ALIGN_POT(sizeof(struct pan_fbd_layer), 64);
|
||||
#else
|
||||
const size_t fbd_size = sizeof(struct mali_framebuffer_packed);
|
||||
#endif
|
||||
|
||||
if (has_zs_ext) {
|
||||
const uint16_t dbd_offset = sizeof(struct mali_framebuffer_packed);
|
||||
const uint16_t dbd_offset = fbd_size;
|
||||
|
||||
/* Copy the whole DBD. */
|
||||
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 8), src_other,
|
||||
|
|
@ -57,8 +64,7 @@ copy_fbd(struct cs_builder *b, bool has_zs_ext, uint32_t rt_count,
|
|||
}
|
||||
|
||||
const uint16_t rts_offset =
|
||||
sizeof(struct mali_framebuffer_packed) +
|
||||
(has_zs_ext ? sizeof(struct mali_zs_crc_extension_packed) : 0);
|
||||
fbd_size + (has_zs_ext ? sizeof(struct mali_zs_crc_extension_packed) : 0);
|
||||
|
||||
for (uint32_t rt = 0; rt < rt_count; rt++) {
|
||||
const uint16_t rt_offset =
|
||||
|
|
@ -110,12 +116,14 @@ generate_tiler_oom_handler(struct panvk_device *dev,
|
|||
.tracebuf_addr_offset =
|
||||
offsetof(struct panvk_cs_subqueue_context, debug.tracebuf.cs),
|
||||
};
|
||||
struct mali_framebuffer_pointer_packed fb_tag;
|
||||
|
||||
#if PAN_ARCH < 14
|
||||
struct mali_framebuffer_pointer_packed fb_tag;
|
||||
pan_pack(&fb_tag, FRAMEBUFFER_POINTER, cfg) {
|
||||
cfg.zs_crc_extension_present = has_zs_ext;
|
||||
cfg.render_target_count = rt_count;
|
||||
}
|
||||
#endif
|
||||
|
||||
cs_function_def(&b, &handler, handler_ctx) {
|
||||
struct cs_index subqueue_ctx = cs_subqueue_ctx_reg(&b);
|
||||
|
|
@ -140,7 +148,7 @@ generate_tiler_oom_handler(struct panvk_device *dev,
|
|||
struct cs_index run_fragment_regs = cs_scratch_reg_tuple(&b, 0, 4);
|
||||
|
||||
/* The tiler pointer is pre-filled. */
|
||||
struct cs_index tiler_ptr = cs_reg64(&b, 38);
|
||||
struct cs_index tiler_ptr = cs_reg64(&b, PANVK_CS_REG_TILER_DESC_PTR);
|
||||
|
||||
cs_load64_to(&b, scratch_fbd_ptr_reg, subqueue_ctx,
|
||||
TILER_OOM_CTX_FIELD_OFFSET(ir_scratch_fbd_ptr));
|
||||
|
|
@ -176,11 +184,17 @@ generate_tiler_oom_handler(struct panvk_device *dev,
|
|||
cs_wait_slot(&b, SB_ID(LS));
|
||||
|
||||
/* Set FBD pointer to the scratch fbd */
|
||||
cs_add64(&b, cs_sr_reg64(&b, FRAGMENT, FBD_POINTER),
|
||||
scratch_fbd_ptr_reg, fb_tag.opaque[0]);
|
||||
|
||||
struct cs_index fbd_pointer = cs_sr_reg64(&b, FRAGMENT, FBD_POINTER);
|
||||
#if PAN_ARCH >= 14
|
||||
cs_add64(&b, fbd_pointer, scratch_fbd_ptr_reg, 0);
|
||||
cs_emit_layer_fragment_state(&b, fbd_pointer);
|
||||
cs_trace_run_fragment2(&b, &tracing_ctx, run_fragment_regs, false,
|
||||
MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#else
|
||||
cs_add64(&b, fbd_pointer, scratch_fbd_ptr_reg, fb_tag.opaque[0]);
|
||||
cs_trace_run_fragment(&b, &tracing_ctx, run_fragment_regs, false,
|
||||
MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#endif
|
||||
|
||||
/* Serialize run fragments since we reuse FBD for the runs */
|
||||
cs_wait_slots(&b, dev->csf.sb.all_iters_mask);
|
||||
|
|
|
|||
|
|
@ -717,7 +717,12 @@ init_tiler(struct panvk_gpu_queue *queue)
|
|||
tiler_heap->chunk_size = phys_dev->csf.tiler.chunk_size;
|
||||
|
||||
alloc_info.size = get_fbd_size(true, MAX_RTS);
|
||||
alloc_info.alignment = pan_alignment(FRAMEBUFFER);
|
||||
#if PAN_ARCH >= 14
|
||||
const unsigned fbds_alignment = alignof(struct pan_fbd_layer);
|
||||
#else
|
||||
const unsigned fbds_alignment = pan_alignment(FRAMEBUFFER);
|
||||
#endif
|
||||
alloc_info.alignment = fbds_alignment;
|
||||
tiler_heap->oom_fbd = panvk_pool_alloc_mem(&dev->mempools.rw, alloc_info);
|
||||
if (!panvk_priv_mem_check_alloc(tiler_heap->oom_fbd)) {
|
||||
result = panvk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
|
|
|
|||
|
|
@ -181,7 +181,7 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
|
|||
fbd_info.layer = layer_id;
|
||||
fbd_info.frame_shaders = fs;
|
||||
fbd_info.frame_shaders.dcd_pointer += layer_id * 3 * pan_size(DRAW);
|
||||
tagged_fbd_ptr |= GENX(pan_emit_fb_desc)(&fbd_info, fbd.cpu);
|
||||
tagged_fbd_ptr |= GENX(pan_emit_fb_desc)(&fbd_info, fbd);
|
||||
|
||||
result = panvk_cmd_prepare_fragment_job(cmdbuf, tagged_fbd_ptr);
|
||||
if (result != VK_SUCCESS)
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ panvk_entrypoints = custom_target(
|
|||
'--device-prefix', 'panvk_v6', '--device-prefix', 'panvk_v7',
|
||||
'--device-prefix', 'panvk_v9', '--device-prefix', 'panvk_v10',
|
||||
'--device-prefix', 'panvk_v12', '--device-prefix', 'panvk_v13',
|
||||
'--device-prefix', 'panvk_v14',
|
||||
'--beta', with_vulkan_beta.to_string()
|
||||
],
|
||||
depend_files : vk_entrypoints_gen_depend_files,
|
||||
|
|
@ -65,7 +66,7 @@ valhall_archs = [9, 10]
|
|||
valhall_inc_dir = ['valhall']
|
||||
valhall_files = []
|
||||
|
||||
fifthgen_archs = [12, 13]
|
||||
fifthgen_archs = [12, 13, 14]
|
||||
fifthgen_inc_dir = ['fifthgen']
|
||||
fifthgen_files = []
|
||||
|
||||
|
|
@ -83,7 +84,7 @@ jm_files = [
|
|||
'jm/panvk_vX_gpu_queue.c',
|
||||
]
|
||||
|
||||
csf_archs = [10, 12, 13]
|
||||
csf_archs = [10, 12, 13, 14]
|
||||
csf_inc_dir = ['csf']
|
||||
csf_files = [
|
||||
'csf/panvk_vX_bind_queue.c',
|
||||
|
|
@ -126,7 +127,7 @@ common_per_arch_files = [
|
|||
sha1_h,
|
||||
]
|
||||
|
||||
foreach arch : [6, 7, 10, 12, 13]
|
||||
foreach arch : [6, 7, 10, 12, 13, 14]
|
||||
per_arch_files = common_per_arch_files
|
||||
inc_panvk_per_arch = []
|
||||
|
||||
|
|
|
|||
|
|
@ -243,7 +243,7 @@ struct panvk_cmd_graphics_state {
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
#if PAN_ARCH >= 10 && PAN_ARCH < 14
|
||||
struct panvk_device_draw_context {
|
||||
struct panvk_priv_bo *fns_bo;
|
||||
uint64_t fn_set_fbds_provoking_vertex_stride;
|
||||
|
|
@ -376,8 +376,7 @@ cached_fs_required(ASSERTED const struct panvk_cmd_graphics_state *state,
|
|||
gfx_state_set_dirty(__cmdbuf, FS_PUSH_UNIFORMS); \
|
||||
} while (0)
|
||||
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
#if PAN_ARCH >= 10 && PAN_ARCH < 14
|
||||
VkResult
|
||||
panvk_per_arch(device_draw_context_init)(struct panvk_device *dev);
|
||||
|
||||
|
|
|
|||
|
|
@ -61,6 +61,9 @@ panvk_catch_indirect_alloc_failure(VkResult error)
|
|||
case 13: \
|
||||
panvk_arch_name(name, v13)(__VA_ARGS__); \
|
||||
break; \
|
||||
case 14: \
|
||||
panvk_arch_name(name, v14)(__VA_ARGS__); \
|
||||
break; \
|
||||
default: \
|
||||
UNREACHABLE("Unsupported architecture"); \
|
||||
} \
|
||||
|
|
@ -84,6 +87,9 @@ panvk_catch_indirect_alloc_failure(VkResult error)
|
|||
case 13: \
|
||||
ret = panvk_arch_name(name, v13)(__VA_ARGS__); \
|
||||
break; \
|
||||
case 14: \
|
||||
ret = panvk_arch_name(name, v14)(__VA_ARGS__); \
|
||||
break; \
|
||||
default: \
|
||||
UNREACHABLE("Unsupported architecture"); \
|
||||
} \
|
||||
|
|
@ -102,6 +108,8 @@ panvk_catch_indirect_alloc_failure(VkResult error)
|
|||
#define panvk_per_arch(name) panvk_arch_name(name, v12)
|
||||
#elif PAN_ARCH == 13
|
||||
#define panvk_per_arch(name) panvk_arch_name(name, v13)
|
||||
#elif PAN_ARCH == 14
|
||||
#define panvk_per_arch(name) panvk_arch_name(name, v14)
|
||||
#else
|
||||
#error "Unsupported arch"
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ PER_ARCH_FUNCS(7);
|
|||
PER_ARCH_FUNCS(10);
|
||||
PER_ARCH_FUNCS(12);
|
||||
PER_ARCH_FUNCS(13);
|
||||
PER_ARCH_FUNCS(14);
|
||||
|
||||
static VkResult
|
||||
create_kmod_dev(struct panvk_physical_device *device,
|
||||
|
|
@ -411,6 +412,7 @@ panvk_physical_device_init(struct panvk_physical_device *device,
|
|||
switch (arch) {
|
||||
case 6:
|
||||
case 7:
|
||||
case 14:
|
||||
if (!os_get_option("PAN_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
|
||||
result = panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
|
||||
"WARNING: panvk is not well-tested on v%d, "
|
||||
|
|
|
|||
|
|
@ -550,7 +550,7 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
|
|||
goto err_free_precomp;
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
#if PAN_ARCH >= 10 && PAN_ARCH < 14
|
||||
result = panvk_per_arch(device_draw_context_init)(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_mem_cache;
|
||||
|
|
@ -616,7 +616,7 @@ err_finish_queues:
|
|||
panvk_meta_cleanup(device);
|
||||
|
||||
err_free_draw_ctx:
|
||||
#if PAN_ARCH >= 10
|
||||
#if PAN_ARCH >= 10 && PAN_ARCH < 14
|
||||
panvk_per_arch(device_draw_context_cleanup)(device);
|
||||
err_free_mem_cache:
|
||||
#endif
|
||||
|
|
@ -679,7 +679,7 @@ panvk_per_arch(destroy_device)(struct panvk_device *device,
|
|||
}
|
||||
|
||||
panvk_precomp_cleanup(device);
|
||||
#if PAN_ARCH >= 10
|
||||
#if PAN_ARCH >= 10 && PAN_ARCH < 14
|
||||
panvk_per_arch(device_draw_context_cleanup)(device);
|
||||
#endif
|
||||
panvk_meta_cleanup(device);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue