panfrost: Add v12 support to the Gallium driver

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34032>
This commit is contained in:
Mary Guillemard 2025-02-17 09:29:56 +01:00
parent 7a8d0b78e9
commit 1f1c36094b
8 changed files with 140 additions and 9 deletions

View file

@ -41,7 +41,7 @@ compile_args_panfrost = [
'-Wno-pointer-arith'
]
panfrost_versions = ['4', '5', '6', '7', '9', '10']
panfrost_versions = ['4', '5', '6', '7', '9', '10', '12']
libpanfrost_versions = []
foreach ver : panfrost_versions
@ -53,7 +53,7 @@ foreach ver : panfrost_versions
]
if ver in ['4', '5', '6', '7', '9']
files_panfrost_vx += ['pan_jm.c']
elif ver in ['10']
elif ver in ['10', '12']
files_panfrost_vx += ['pan_csf.c']
endif
libpanfrost_versions += static_library(

View file

@ -64,7 +64,7 @@
* functions. */
#if PAN_ARCH <= 9
#define JOBX(__suffix) GENX(jm_##__suffix)
#elif PAN_ARCH <= 10
#elif PAN_ARCH <= 12
#define JOBX(__suffix) GENX(csf_##__suffix)
#else
#error "Unsupported arch"
@ -707,6 +707,71 @@ panfrost_emit_frag_shader_meta(struct panfrost_batch *batch)
}
#endif
#if PAN_ARCH >= 12
static uint64_t
panfrost_emit_viewport(struct panfrost_batch *batch)
{
struct panfrost_context *ctx = batch->ctx;
const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
const struct pipe_scissor_state *ss = &ctx->scissor;
const struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
/* Derive min/max from translate/scale. Note since |x| >= 0 by
* definition, we have that -|x| <= |x| hence translate - |scale| <=
* translate + |scale|, so the ordering is correct here. */
float vp_minx = vp->translate[0] - fabsf(vp->scale[0]);
float vp_maxx = vp->translate[0] + fabsf(vp->scale[0]);
float vp_miny = vp->translate[1] - fabsf(vp->scale[1]);
float vp_maxy = vp->translate[1] + fabsf(vp->scale[1]);
float minz, maxz;
util_viewport_zmin_zmax(vp, rast->clip_halfz, &minz, &maxz);
/* Viewport clamped to the framebuffer */
unsigned minx = MIN2(batch->key.width, MAX2((int)vp_minx, 0));
unsigned maxx = MIN2(batch->key.width, MAX2((int)vp_maxx, 0));
unsigned miny = MIN2(batch->key.height, MAX2((int)vp_miny, 0));
unsigned maxy = MIN2(batch->key.height, MAX2((int)vp_maxy, 0));
if (ss && rast->scissor) {
minx = MAX2(ss->minx, minx);
miny = MAX2(ss->miny, miny);
maxx = MIN2(ss->maxx, maxx);
maxy = MIN2(ss->maxy, maxy);
}
/* Set the range to [1, 1) so max values don't wrap round */
if (maxx == 0 || maxy == 0)
maxx = maxy = minx = miny = 1;
panfrost_batch_union_scissor(batch, minx, miny, maxx, maxy);
batch->scissor_culls_everything = (minx >= maxx || miny >= maxy);
pan_cast_and_pack(&batch->avalon_viewport, VIEWPORT, cfg) {
/* Clamp viewport to valid range */
cfg.min_x = CLAMP(minx, 0, UINT16_MAX);
cfg.min_y = CLAMP(miny, 0, UINT16_MAX);
cfg.max_x = CLAMP(maxx, 0, UINT16_MAX);
cfg.max_y = CLAMP(maxy, 0, UINT16_MAX);
cfg.min_depth = CLAMP(minz, 0.0f, 1.0f);
cfg.max_depth = CLAMP(maxz, 0.0f, 1.0f);
}
/* [minx, maxx) and [miny, maxy) are exclusive ranges for scissors in the hardware */
maxx--;
maxy--;
pan_cast_and_pack(&batch->scissor, SCISSOR, cfg) {
cfg.scissor_minimum_x = minx;
cfg.scissor_minimum_y = miny;
cfg.scissor_maximum_x = maxx;
cfg.scissor_maximum_y = maxy;
}
return 0;
}
#else
static uint64_t
panfrost_emit_viewport(struct panfrost_batch *batch)
{
@ -783,6 +848,7 @@ panfrost_emit_viewport(struct panfrost_batch *batch)
return 0;
#endif
}
#endif
#if PAN_ARCH >= 9
/**
@ -4002,9 +4068,14 @@ prepare_shader(struct panfrost_compiled_shader *state,
return;
bool vs = (state->info.stage == MESA_SHADER_VERTEX);
bool secondary_enable = (vs && state->info.vs.secondary_enable);
#if PAN_ARCH >= 12
unsigned nr_variants = vs ? 2 : 1;
#else
bool secondary_enable = (vs && state->info.vs.secondary_enable);
unsigned nr_variants = secondary_enable ? 3 : vs ? 2 : 1;
#endif
struct panfrost_ptr ptr =
pan_pool_alloc_desc_array(&pool->base, nr_variants, SHADER_PROGRAM);
@ -4018,8 +4089,10 @@ prepare_shader(struct panfrost_compiled_shader *state,
if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT)
cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
#if PAN_ARCH < 12
else if (vs)
cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF;
#endif
cfg.register_allocation =
pan_register_allocation(state->info.work_reg_count);
@ -4037,7 +4110,9 @@ prepare_shader(struct panfrost_compiled_shader *state,
/* IDVS/triangles */
pan_pack(&programs[1], SHADER_PROGRAM, cfg) {
cfg.stage = pan_shader_stage(&state->info);
#if PAN_ARCH < 12
cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF;
#endif
cfg.register_allocation =
pan_register_allocation(state->info.work_reg_count);
cfg.binary = state->bin.gpu + state->info.vs.no_psiz_offset;
@ -4045,6 +4120,7 @@ prepare_shader(struct panfrost_compiled_shader *state,
cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info);
}
#if PAN_ARCH < 12
if (!secondary_enable)
return;
@ -4059,6 +4135,7 @@ prepare_shader(struct panfrost_compiled_shader *state,
cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info);
}
#endif
#endif
}
static void

View file

@ -256,11 +256,13 @@ panfrost_get_position_shader(struct panfrost_batch *batch,
return vs_ptr;
}
#if PAN_ARCH < 12
static inline uint64_t
panfrost_get_varying_shader(struct panfrost_batch *batch)
{
return batch->rsd[PIPE_SHADER_VERTEX] + (2 * pan_size(SHADER_PROGRAM));
}
#endif
static inline unsigned
panfrost_vertex_attribute_stride(struct panfrost_compiled_shader *vs,

View file

@ -139,6 +139,13 @@ csf_oom_handler_init(struct panfrost_context *ctx)
struct cs_index completed_bottom = cs_reg64(&b, 54);
struct cs_index completed_chunks = cs_reg_tuple(&b, 52, 4);
/* Ensure that the OTHER endpoint is valid */
#if PAN_ARCH >= 11
cs_set_state_imm32(&b, MALI_CS_SET_STATE_TYPE_SB_SEL_OTHER, 0);
#else
cs_set_scoreboard_entry(&b, 0, 0);
#endif
/* Use different framebuffer descriptor depending on whether incremental
* rendering has already been triggered */
cs_load32_to(&b, counter, tiler_oom_ctx, FIELD_OFFSET(counter));
@ -161,7 +168,7 @@ csf_oom_handler_init(struct panfrost_context *ctx)
cs_wait_slot(&b, 0, false);
/* Run the fragment job and wait */
cs_set_scoreboard_entry(&b, 3, 0);
cs_select_sb_entries_for_async_ops(&b, 3);
cs_run_fragment(&b, false, MALI_TILE_RENDER_ORDER_Z_ORDER, false);
cs_wait_slot(&b, 3, false);
@ -191,7 +198,7 @@ csf_oom_handler_init(struct panfrost_context *ctx)
cs_wait_slot(&b, 0, false);
cs_set_scoreboard_entry(&b, 2, 0);
cs_select_sb_entries_for_async_ops(&b, 2);
}
assert(cs_is_valid(&b));
@ -274,7 +281,7 @@ GENX(csf_init_batch)(struct panfrost_batch *batch)
/* Set up entries */
struct cs_builder *b = batch->csf.cs.builder;
cs_set_scoreboard_entry(b, 2, 0);
cs_select_sb_entries_for_async_ops(b, 2);
batch->framebuffer = alloc_fbd(batch);
if (!batch->framebuffer.gpu)
@ -698,6 +705,10 @@ csf_emit_tiler_desc(struct panfrost_batch *batch, const struct pan_fb_info *fb)
;
tiler.hierarchy_mask &= ~BITFIELD_MASK(disable_hierarchies);
#if PAN_ARCH >= 12
tiler.effective_tile_size = fb->tile_size;
#endif
tiler.fb_width = batch->key.width;
tiler.fb_height = batch->key.height;
tiler.heap = batch->ctx->csf.heap.desc_bo->ptr.gpu;
@ -873,7 +884,12 @@ csf_emit_shader_regs(struct panfrost_batch *batch, enum pipe_shader_type stage,
assert(stage == PIPE_SHADER_VERTEX || stage == PIPE_SHADER_FRAGMENT ||
stage == PIPE_SHADER_COMPUTE);
#if PAN_ARCH >= 12
unsigned offset = (stage == PIPE_SHADER_FRAGMENT) ? 2 : 0;
#else
unsigned offset = (stage == PIPE_SHADER_FRAGMENT) ? 4 : 0;
#endif
unsigned fau_count = DIV_ROUND_UP(batch->nr_push_uniforms[stage], 2);
struct cs_builder *b = batch->csf.cs.builder;
@ -1092,7 +1108,7 @@ csf_emit_draw_state(struct panfrost_batch *batch,
}
csf_emit_shader_regs(batch, PIPE_SHADER_VERTEX,
panfrost_get_position_shader(batch, info));
panfrost_get_position_shader(batch, info));
if (fs_required) {
csf_emit_shader_regs(batch, PIPE_SHADER_FRAGMENT,
@ -1103,12 +1119,18 @@ csf_emit_draw_state(struct panfrost_batch *batch,
cs_move64_to(b, cs_sr_reg64(b, IDVS, FRAGMENT_SPD), 0);
}
#if PAN_ARCH >= 12
cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_VERTEX_TSD), batch->tls.gpu);
cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_FRAGMENT_TSD), batch->tls.gpu);
#else
if (secondary_shader) {
cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_VARY_SPD),
panfrost_get_varying_shader(batch));
}
cs_move64_to(b, cs_sr_reg64(b, IDVS, TSD_0), batch->tls.gpu);
#endif
cs_move32_to(b, cs_sr_reg32(b, IDVS, GLOBAL_ATTRIBUTE_OFFSET), 0);
cs_move32_to(b, cs_sr_reg32(b, IDVS, INSTANCE_OFFSET), 0);
cs_move32_to(b, cs_sr_reg32(b, IDVS, DCD2), 0);
@ -1120,10 +1142,16 @@ csf_emit_draw_state(struct panfrost_batch *batch,
uint64_t *sbd = (uint64_t *)&batch->scissor[0];
cs_move64_to(b, cs_sr_reg64(b, IDVS, SCISSOR_BOX), *sbd);
#if PAN_ARCH >= 12
uint64_t *avalon_viewport = (uint64_t *)batch->avalon_viewport;
cs_move64_to(b, cs_sr_reg64(b, IDVS, VIEWPORT_HIGH), avalon_viewport[0]);
cs_move64_to(b, cs_sr_reg64(b, IDVS, VIEWPORT_LOW), avalon_viewport[1]);
#else
cs_move32_to(b, cs_sr_reg32(b, IDVS, LOW_DEPTH_CLAMP),
fui(batch->minimum_z));
cs_move32_to(b, cs_sr_reg32(b, IDVS, HIGH_DEPTH_CLAMP),
fui(batch->maximum_z));
#endif
if (ctx->occlusion_query && ctx->active_queries) {
struct panfrost_resource *rsrc = pan_resource(ctx->occlusion_query->rsrc);
@ -1336,8 +1364,13 @@ GENX(csf_launch_draw)(struct panfrost_batch *batch,
cs_move32_to(b, cs_sr_reg32(b, IDVS, INDEX_BUFFER_SIZE), 0);
}
#if PAN_ARCH >= 12
cs_run_idvs2(b, flags_override, false, true, drawid,
MALI_IDVS_SHADING_MODE_EARLY);
#else
cs_run_idvs(b, flags_override, false, true, cs_shader_res_sel(0, 0, 1, 0),
cs_shader_res_sel(2, 2, 2, 0), drawid);
#endif
}
void
@ -1378,8 +1411,13 @@ GENX(csf_launch_draw_indirect)(struct panfrost_batch *batch,
}
cs_wait_slot(b, 0, false);
#if PAN_ARCH >= 12
cs_run_idvs2(b, flags_override, false, true, drawid,
MALI_IDVS_SHADING_MODE_EARLY);
#else
cs_run_idvs(b, flags_override, false, true, cs_shader_res_sel(0, 0, 1, 0),
cs_shader_res_sel(2, 2, 2, 0), drawid);
#endif
cs_add64(b, address, address, indirect->stride);
cs_add32(b, counter, counter, (unsigned int)-1);

View file

@ -1204,11 +1204,18 @@ pan_preload_emit_dcd(struct pan_fb_preload_cache *cache, struct pan_pool *pool,
cfg.flags_1.sample_mask = 0xFFFF;
cfg.flags_0.multisample_enable = ms;
cfg.flags_0.evaluate_per_sample = ms;
cfg.maximum_z = 1.0;
cfg.flags_0.clean_fragment_write = clean_fragment_write;
#if PAN_ARCH >= 12
cfg.fragment_resources = T.gpu | nr_tables;
cfg.fragment_shader = spd.gpu;
cfg.thread_storage = tsd;
#else
cfg.maximum_z = 1.0;
cfg.shader.resources = T.gpu | nr_tables;
cfg.shader.shader = spd.gpu;
cfg.shader.thread_storage = tsd;
#endif
}
#endif
}

View file

@ -151,6 +151,9 @@ struct panfrost_batch {
unsigned scissor[2];
float minimum_z, maximum_z;
/* Avalon: struct mali_viewport_packed */
unsigned avalon_viewport[4];
/* Used on Valhall only. Midgard includes attributes in-band with
* attributes, wildly enough.
*/

View file

@ -939,6 +939,9 @@ panfrost_create_screen(int fd, const struct pipe_screen_config *config,
case 10:
panfrost_cmdstream_screen_init_v10(screen);
break;
case 12:
panfrost_cmdstream_screen_init_v12(screen);
break;
default:
debug_printf("panfrost: Unhandled architecture major %d", dev->arch);
panfrost_destroy_screen(&(screen->base));

View file

@ -160,6 +160,7 @@ void panfrost_cmdstream_screen_init_v6(struct panfrost_screen *screen);
void panfrost_cmdstream_screen_init_v7(struct panfrost_screen *screen);
void panfrost_cmdstream_screen_init_v9(struct panfrost_screen *screen);
void panfrost_cmdstream_screen_init_v10(struct panfrost_screen *screen);
void panfrost_cmdstream_screen_init_v12(struct panfrost_screen *screen);
#define perf_debug(ctx, ...) \
do { \