mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 06:58:05 +02:00
panfrost: Hook up RUN_FRAGMENT2 on the Gallium driver
Set the FBD size/alignment correctly and emit the fragment staging registers before issuing fragment commands. Also, move some temporary registers to non-conflicting ones. Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
This commit is contained in:
parent
dacccc3dec
commit
2b216a01f9
2 changed files with 134 additions and 16 deletions
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (C) 2023 Collabora Ltd.
|
||||
* Copyright (C) 2026 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
|
|
@ -13,6 +14,7 @@
|
|||
#include "pan_cmdstream.h"
|
||||
#include "pan_context.h"
|
||||
#include "pan_csf.h"
|
||||
#include "pan_fb.h"
|
||||
#include "pan_fb_preload.h"
|
||||
#include "pan_job.h"
|
||||
#include "pan_trace.h"
|
||||
|
|
@ -75,6 +77,87 @@ csf_update_tiler_oom_ctx(struct cs_builder *b, uint64_t addr)
|
|||
(PAN_INCREMENTAL_RENDERING_##_pass##_PASS * sizeof(struct pan_ptr)) + \
|
||||
offsetof(struct pan_ptr, gpu))
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
static void
|
||||
cs_emit_static_fragment_state(struct cs_builder *b,
|
||||
struct panfrost_batch *batch,
|
||||
const struct pan_fb_info *fb)
|
||||
{
|
||||
struct mali_frame_size_packed frame_size;
|
||||
pan_pack(&frame_size, FRAME_SIZE, cfg) {
|
||||
cfg.width = fb->width;
|
||||
cfg.height = fb->height;
|
||||
}
|
||||
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, FRAME_SIZE), frame_size.opaque[0]);
|
||||
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, SAMPLE_POSITION_ARRAY_POINTER),
|
||||
fb->sample_positions);
|
||||
|
||||
struct mali_fragment_flags_1_packed flags1;
|
||||
pan_pack(&flags1, FRAGMENT_FLAGS_1, cfg) {
|
||||
/* The force_samples setting dictates the sample-count that is used
|
||||
* for rasterization, and works like D3D11's ForcedSampleCount
|
||||
* feature:
|
||||
*
|
||||
* - If force_samples == 0: Let nr_samples dictate sample count
|
||||
* - If force_samples == 1: force single-sampled rasterization
|
||||
* - If force_samples >= 1: force multi-sampled rasterization
|
||||
*
|
||||
* This can be used to read SYSTEM_VALUE_SAMPLE_MASK_IN from the
|
||||
* fragment shader, even when performing single-sampled rendering.
|
||||
*/
|
||||
if (fb->pls_enabled) {
|
||||
cfg.sample_count = 4;
|
||||
cfg.sample_pattern = pan_sample_pattern(1);
|
||||
} else if (!fb->force_samples) {
|
||||
cfg.sample_count = fb->nr_samples;
|
||||
cfg.sample_pattern = pan_sample_pattern(fb->nr_samples);
|
||||
} else if (fb->force_samples == 1) {
|
||||
cfg.sample_count = fb->nr_samples;
|
||||
cfg.sample_pattern = pan_sample_pattern(1);
|
||||
} else {
|
||||
cfg.sample_count = 1;
|
||||
cfg.sample_pattern = pan_sample_pattern(fb->force_samples);
|
||||
}
|
||||
|
||||
cfg.effective_tile_size = fb->tile_size;
|
||||
cfg.point_sprite_coord_origin_max_y = fb->sprite_coord_origin;
|
||||
cfg.first_provoking_vertex = fb->first_provoking_vertex;
|
||||
cfg.render_target_count = MAX2(fb->rt_count, 1);
|
||||
cfg.color_buffer_allocation = fb->cbuf_allocation;
|
||||
}
|
||||
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_1), flags1.opaque[0]);
|
||||
|
||||
/* Leave the remaining RUN_FRAGMENT2 staging registers as zero. */
|
||||
}
|
||||
|
||||
static inline void
|
||||
cs_emit_layer_fragment_state(struct cs_builder *b, struct cs_index fbd_ptr)
|
||||
{
|
||||
/* Emit the dynamic fragment state. This state may change per-layer. */
|
||||
|
||||
cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_0), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, flags0));
|
||||
cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_2), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, flags2));
|
||||
cs_load32_to(b, cs_sr_reg32(b, FRAGMENT, Z_CLEAR), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, z_clear));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, TILER_DESCRIPTOR_POINTER), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, tiler));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, RTD_POINTER), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, rtd_pointer));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, DBD_POINTER), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, dbd_pointer));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, FRAME_ARG), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, frame_argument));
|
||||
cs_load64_to(b, cs_sr_reg64(b, FRAGMENT, FRAME_SHADER_DCD_POINTER), fbd_ptr,
|
||||
offsetof(struct pan_fbd_layer, dcd_pointer));
|
||||
|
||||
cs_flush_loads(b);
|
||||
}
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
|
||||
static int
|
||||
csf_oom_handler_init(struct panfrost_context *ctx)
|
||||
{
|
||||
|
|
@ -113,13 +196,14 @@ csf_oom_handler_init(struct panfrost_context *ctx)
|
|||
|
||||
cs_function_def(&b, &handler, handler_ctx) {
|
||||
struct cs_index tiler_oom_ctx = cs_reg64(&b, TILER_OOM_CTX_REG);
|
||||
struct cs_index counter = cs_reg32(&b, 47);
|
||||
struct cs_index zero = cs_reg64(&b, 48);
|
||||
struct cs_index flush_id = cs_reg32(&b, 48);
|
||||
struct cs_index tiler_ctx = cs_reg64(&b, 50);
|
||||
struct cs_index completed_top = cs_reg64(&b, 52);
|
||||
struct cs_index completed_bottom = cs_reg64(&b, 54);
|
||||
struct cs_index completed_chunks = cs_reg_tuple(&b, 52, 4);
|
||||
struct cs_index counter = cs_reg32(&b, 31);
|
||||
struct cs_index zero = cs_reg64(&b, 56);
|
||||
struct cs_index flush_id = cs_reg32(&b, 58);
|
||||
struct cs_index tiler_ctx = cs_reg64(&b, 60);
|
||||
struct cs_index completed_top = cs_reg64(&b, 64);
|
||||
struct cs_index completed_bottom = cs_reg64(&b, 66);
|
||||
struct cs_index completed_chunks = cs_reg_tuple(&b, 64, 4);
|
||||
struct cs_index fbd_pointer = cs_sr_reg64(&b, FRAGMENT, FBD_POINTER);
|
||||
|
||||
/* Ensure that the OTHER endpoint is valid */
|
||||
#if PAN_ARCH >= 11
|
||||
|
|
@ -133,12 +217,10 @@ csf_oom_handler_init(struct panfrost_context *ctx)
|
|||
cs_load32_to(&b, counter, tiler_oom_ctx, FIELD_OFFSET(counter));
|
||||
cs_wait_slot(&b, 0);
|
||||
cs_if(&b, MALI_CS_CONDITION_GREATER, counter) {
|
||||
cs_load64_to(&b, cs_sr_reg64(&b, FRAGMENT, FBD_POINTER), tiler_oom_ctx,
|
||||
FBD_OFFSET(MIDDLE));
|
||||
cs_load64_to(&b, fbd_pointer, tiler_oom_ctx, FBD_OFFSET(MIDDLE));
|
||||
}
|
||||
cs_else(&b) {
|
||||
cs_load64_to(&b, cs_sr_reg64(&b, FRAGMENT, FBD_POINTER), tiler_oom_ctx,
|
||||
FBD_OFFSET(FIRST));
|
||||
cs_load64_to(&b, fbd_pointer, tiler_oom_ctx, FBD_OFFSET(FIRST));
|
||||
}
|
||||
|
||||
cs_load32_to(&b, cs_sr_reg32(&b, FRAGMENT, BBOX_MIN), tiler_oom_ctx,
|
||||
|
|
@ -147,11 +229,18 @@ csf_oom_handler_init(struct panfrost_context *ctx)
|
|||
FIELD_OFFSET(bbox_max));
|
||||
cs_move64_to(&b, cs_sr_reg64(&b, FRAGMENT, TEM_POINTER), 0);
|
||||
cs_move32_to(&b, cs_sr_reg32(&b, FRAGMENT, TEM_ROW_STRIDE), 0);
|
||||
#if PAN_ARCH >= 14
|
||||
cs_emit_layer_fragment_state(&b, fbd_pointer);
|
||||
#endif
|
||||
cs_wait_slot(&b, 0);
|
||||
|
||||
/* Run the fragment job and wait */
|
||||
cs_select_endpoint_sb(&b, 3);
|
||||
#if PAN_ARCH >= 14
|
||||
cs_run_fragment2(&b, false, MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#else
|
||||
cs_run_fragment(&b, false, MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#endif
|
||||
cs_wait_slot(&b, 3);
|
||||
|
||||
/* Increment counter */
|
||||
|
|
@ -218,6 +307,21 @@ GENX(csf_cleanup_batch)(struct panfrost_batch *batch)
|
|||
panfrost_pool_cleanup(&batch->csf.cs_chunk_pool);
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 14
|
||||
static inline struct pan_ptr
|
||||
alloc_fbd(struct panfrost_batch *batch)
|
||||
{
|
||||
const struct pan_desc_alloc_info fbd_layer = {
|
||||
.size = ALIGN_POT(sizeof(struct pan_fbd_layer), 64),
|
||||
.align = alignof(struct pan_fbd_layer),
|
||||
.nelems = 1,
|
||||
};
|
||||
|
||||
return pan_pool_alloc_desc_aggregate(
|
||||
&batch->pool.base, fbd_layer, PAN_DESC(ZS_CRC_EXTENSION),
|
||||
PAN_DESC_ARRAY(MAX2(batch->key.nr_cbufs, 1), RENDER_TARGET));
|
||||
}
|
||||
#else
|
||||
static inline struct pan_ptr
|
||||
alloc_fbd(struct panfrost_batch *batch)
|
||||
{
|
||||
|
|
@ -225,6 +329,7 @@ alloc_fbd(struct panfrost_batch *batch)
|
|||
&batch->pool.base, PAN_DESC(FRAMEBUFFER), PAN_DESC(ZS_CRC_EXTENSION),
|
||||
PAN_DESC_ARRAY(MAX2(batch->key.nr_cbufs, 1), RENDER_TARGET));
|
||||
}
|
||||
#endif /* PAN_ARCH >= 14 */
|
||||
|
||||
int
|
||||
GENX(csf_init_batch)(struct panfrost_batch *batch)
|
||||
|
|
@ -854,15 +959,21 @@ GENX(csf_emit_fragment_job)(struct panfrost_batch *batch,
|
|||
cs_vt_end(b, cs_now());
|
||||
}
|
||||
|
||||
struct cs_index fbd_pointer = cs_sr_reg64(b, FRAGMENT, FBD_POINTER);
|
||||
|
||||
/* Set up the fragment job */
|
||||
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
|
||||
batch->framebuffer.gpu);
|
||||
cs_move64_to(b, fbd_pointer, batch->framebuffer.gpu);
|
||||
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MIN),
|
||||
(batch->miny << 16) | batch->minx);
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MAX),
|
||||
((batch->maxy - 1) << 16) | (batch->maxx - 1));
|
||||
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, TEM_POINTER), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, TEM_ROW_STRIDE), 0);
|
||||
#if PAN_ARCH >= 14
|
||||
cs_emit_static_fragment_state(b, batch, pfb);
|
||||
cs_emit_layer_fragment_state(b, fbd_pointer);
|
||||
#endif
|
||||
|
||||
/* Use different framebuffer descriptor if incremental rendering was
|
||||
* triggered while tiling */
|
||||
|
|
@ -871,13 +982,19 @@ GENX(csf_emit_fragment_job)(struct panfrost_batch *batch,
|
|||
cs_load32_to(b, counter, cs_reg64(b, TILER_OOM_CTX_REG), 0);
|
||||
cs_wait_slot(b, 0);
|
||||
cs_if(b, MALI_CS_CONDITION_GREATER, counter) {
|
||||
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
|
||||
GET_FBD(oom_ctx, LAST).gpu);
|
||||
cs_move64_to(b, fbd_pointer, GET_FBD(oom_ctx, LAST).gpu);
|
||||
#if PAN_ARCH >= 14
|
||||
cs_emit_layer_fragment_state(b, fbd_pointer);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* Run the fragment job and wait */
|
||||
#if PAN_ARCH >= 14
|
||||
cs_run_fragment2(b, false, MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#else
|
||||
cs_run_fragment(b, false, MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
#endif
|
||||
cs_wait_slot(b, 2);
|
||||
|
||||
/* Gather freed heap chunks and add them to the heap context free list
|
||||
|
|
|
|||
|
|
@ -29,7 +29,8 @@ struct pan_csf_tiler_oom_ctx {
|
|||
/* Alternative framebuffer descriptors for incremental rendering */
|
||||
struct pan_ptr fbds[PAN_INCREMENTAL_RENDERING_PASS_COUNT];
|
||||
|
||||
/* Bounding Box (Register 42 and 43) */
|
||||
/* Bounding Box (Register MALI_FRAGMENT_SR_BBOX_MIN and
|
||||
* MALI_FRAGMENT_SR_BBOX_MAX) */
|
||||
uint32_t bbox_min;
|
||||
uint32_t bbox_max;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue