mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 18:00:10 +01:00
panfrost: Add v12 support to the Gallium driver
Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34032>
This commit is contained in:
parent
7a8d0b78e9
commit
1f1c36094b
8 changed files with 140 additions and 9 deletions
|
|
@ -41,7 +41,7 @@ compile_args_panfrost = [
|
|||
'-Wno-pointer-arith'
|
||||
]
|
||||
|
||||
panfrost_versions = ['4', '5', '6', '7', '9', '10']
|
||||
panfrost_versions = ['4', '5', '6', '7', '9', '10', '12']
|
||||
libpanfrost_versions = []
|
||||
|
||||
foreach ver : panfrost_versions
|
||||
|
|
@ -53,7 +53,7 @@ foreach ver : panfrost_versions
|
|||
]
|
||||
if ver in ['4', '5', '6', '7', '9']
|
||||
files_panfrost_vx += ['pan_jm.c']
|
||||
elif ver in ['10']
|
||||
elif ver in ['10', '12']
|
||||
files_panfrost_vx += ['pan_csf.c']
|
||||
endif
|
||||
libpanfrost_versions += static_library(
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@
|
|||
* functions. */
|
||||
#if PAN_ARCH <= 9
|
||||
#define JOBX(__suffix) GENX(jm_##__suffix)
|
||||
#elif PAN_ARCH <= 10
|
||||
#elif PAN_ARCH <= 12
|
||||
#define JOBX(__suffix) GENX(csf_##__suffix)
|
||||
#else
|
||||
#error "Unsupported arch"
|
||||
|
|
@ -707,6 +707,71 @@ panfrost_emit_frag_shader_meta(struct panfrost_batch *batch)
|
|||
}
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 12
|
||||
static uint64_t
|
||||
panfrost_emit_viewport(struct panfrost_batch *batch)
|
||||
{
|
||||
struct panfrost_context *ctx = batch->ctx;
|
||||
const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
|
||||
const struct pipe_scissor_state *ss = &ctx->scissor;
|
||||
const struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
|
||||
|
||||
/* Derive min/max from translate/scale. Note since |x| >= 0 by
|
||||
* definition, we have that -|x| <= |x| hence translate - |scale| <=
|
||||
* translate + |scale|, so the ordering is correct here. */
|
||||
float vp_minx = vp->translate[0] - fabsf(vp->scale[0]);
|
||||
float vp_maxx = vp->translate[0] + fabsf(vp->scale[0]);
|
||||
float vp_miny = vp->translate[1] - fabsf(vp->scale[1]);
|
||||
float vp_maxy = vp->translate[1] + fabsf(vp->scale[1]);
|
||||
|
||||
float minz, maxz;
|
||||
util_viewport_zmin_zmax(vp, rast->clip_halfz, &minz, &maxz);
|
||||
|
||||
/* Viewport clamped to the framebuffer */
|
||||
unsigned minx = MIN2(batch->key.width, MAX2((int)vp_minx, 0));
|
||||
unsigned maxx = MIN2(batch->key.width, MAX2((int)vp_maxx, 0));
|
||||
unsigned miny = MIN2(batch->key.height, MAX2((int)vp_miny, 0));
|
||||
unsigned maxy = MIN2(batch->key.height, MAX2((int)vp_maxy, 0));
|
||||
|
||||
if (ss && rast->scissor) {
|
||||
minx = MAX2(ss->minx, minx);
|
||||
miny = MAX2(ss->miny, miny);
|
||||
maxx = MIN2(ss->maxx, maxx);
|
||||
maxy = MIN2(ss->maxy, maxy);
|
||||
}
|
||||
|
||||
/* Set the range to [1, 1) so max values don't wrap round */
|
||||
if (maxx == 0 || maxy == 0)
|
||||
maxx = maxy = minx = miny = 1;
|
||||
|
||||
panfrost_batch_union_scissor(batch, minx, miny, maxx, maxy);
|
||||
batch->scissor_culls_everything = (minx >= maxx || miny >= maxy);
|
||||
|
||||
pan_cast_and_pack(&batch->avalon_viewport, VIEWPORT, cfg) {
|
||||
/* Clamp viewport to valid range */
|
||||
cfg.min_x = CLAMP(minx, 0, UINT16_MAX);
|
||||
cfg.min_y = CLAMP(miny, 0, UINT16_MAX);
|
||||
cfg.max_x = CLAMP(maxx, 0, UINT16_MAX);
|
||||
cfg.max_y = CLAMP(maxy, 0, UINT16_MAX);
|
||||
|
||||
cfg.min_depth = CLAMP(minz, 0.0f, 1.0f);
|
||||
cfg.max_depth = CLAMP(maxz, 0.0f, 1.0f);
|
||||
}
|
||||
|
||||
/* [minx, maxx) and [miny, maxy) are exclusive ranges for scissors in the hardware */
|
||||
maxx--;
|
||||
maxy--;
|
||||
|
||||
pan_cast_and_pack(&batch->scissor, SCISSOR, cfg) {
|
||||
cfg.scissor_minimum_x = minx;
|
||||
cfg.scissor_minimum_y = miny;
|
||||
cfg.scissor_maximum_x = maxx;
|
||||
cfg.scissor_maximum_y = maxy;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static uint64_t
|
||||
panfrost_emit_viewport(struct panfrost_batch *batch)
|
||||
{
|
||||
|
|
@ -783,6 +848,7 @@ panfrost_emit_viewport(struct panfrost_batch *batch)
|
|||
return 0;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 9
|
||||
/**
|
||||
|
|
@ -4002,9 +4068,14 @@ prepare_shader(struct panfrost_compiled_shader *state,
|
|||
return;
|
||||
|
||||
bool vs = (state->info.stage == MESA_SHADER_VERTEX);
|
||||
bool secondary_enable = (vs && state->info.vs.secondary_enable);
|
||||
|
||||
#if PAN_ARCH >= 12
|
||||
unsigned nr_variants = vs ? 2 : 1;
|
||||
#else
|
||||
bool secondary_enable = (vs && state->info.vs.secondary_enable);
|
||||
unsigned nr_variants = secondary_enable ? 3 : vs ? 2 : 1;
|
||||
#endif
|
||||
|
||||
struct panfrost_ptr ptr =
|
||||
pan_pool_alloc_desc_array(&pool->base, nr_variants, SHADER_PROGRAM);
|
||||
|
||||
|
|
@ -4018,8 +4089,10 @@ prepare_shader(struct panfrost_compiled_shader *state,
|
|||
|
||||
if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT)
|
||||
cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
|
||||
#if PAN_ARCH < 12
|
||||
else if (vs)
|
||||
cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF;
|
||||
#endif
|
||||
|
||||
cfg.register_allocation =
|
||||
pan_register_allocation(state->info.work_reg_count);
|
||||
|
|
@ -4037,7 +4110,9 @@ prepare_shader(struct panfrost_compiled_shader *state,
|
|||
/* IDVS/triangles */
|
||||
pan_pack(&programs[1], SHADER_PROGRAM, cfg) {
|
||||
cfg.stage = pan_shader_stage(&state->info);
|
||||
#if PAN_ARCH < 12
|
||||
cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF;
|
||||
#endif
|
||||
cfg.register_allocation =
|
||||
pan_register_allocation(state->info.work_reg_count);
|
||||
cfg.binary = state->bin.gpu + state->info.vs.no_psiz_offset;
|
||||
|
|
@ -4045,6 +4120,7 @@ prepare_shader(struct panfrost_compiled_shader *state,
|
|||
cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info);
|
||||
}
|
||||
|
||||
#if PAN_ARCH < 12
|
||||
if (!secondary_enable)
|
||||
return;
|
||||
|
||||
|
|
@ -4059,6 +4135,7 @@ prepare_shader(struct panfrost_compiled_shader *state,
|
|||
cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -256,11 +256,13 @@ panfrost_get_position_shader(struct panfrost_batch *batch,
|
|||
return vs_ptr;
|
||||
}
|
||||
|
||||
#if PAN_ARCH < 12
|
||||
static inline uint64_t
|
||||
panfrost_get_varying_shader(struct panfrost_batch *batch)
|
||||
{
|
||||
return batch->rsd[PIPE_SHADER_VERTEX] + (2 * pan_size(SHADER_PROGRAM));
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline unsigned
|
||||
panfrost_vertex_attribute_stride(struct panfrost_compiled_shader *vs,
|
||||
|
|
|
|||
|
|
@ -139,6 +139,13 @@ csf_oom_handler_init(struct panfrost_context *ctx)
|
|||
struct cs_index completed_bottom = cs_reg64(&b, 54);
|
||||
struct cs_index completed_chunks = cs_reg_tuple(&b, 52, 4);
|
||||
|
||||
/* Ensure that the OTHER endpoint is valid */
|
||||
#if PAN_ARCH >= 11
|
||||
cs_set_state_imm32(&b, MALI_CS_SET_STATE_TYPE_SB_SEL_OTHER, 0);
|
||||
#else
|
||||
cs_set_scoreboard_entry(&b, 0, 0);
|
||||
#endif
|
||||
|
||||
/* Use different framebuffer descriptor depending on whether incremental
|
||||
* rendering has already been triggered */
|
||||
cs_load32_to(&b, counter, tiler_oom_ctx, FIELD_OFFSET(counter));
|
||||
|
|
@ -161,7 +168,7 @@ csf_oom_handler_init(struct panfrost_context *ctx)
|
|||
cs_wait_slot(&b, 0, false);
|
||||
|
||||
/* Run the fragment job and wait */
|
||||
cs_set_scoreboard_entry(&b, 3, 0);
|
||||
cs_select_sb_entries_for_async_ops(&b, 3);
|
||||
cs_run_fragment(&b, false, MALI_TILE_RENDER_ORDER_Z_ORDER, false);
|
||||
cs_wait_slot(&b, 3, false);
|
||||
|
||||
|
|
@ -191,7 +198,7 @@ csf_oom_handler_init(struct panfrost_context *ctx)
|
|||
|
||||
cs_wait_slot(&b, 0, false);
|
||||
|
||||
cs_set_scoreboard_entry(&b, 2, 0);
|
||||
cs_select_sb_entries_for_async_ops(&b, 2);
|
||||
}
|
||||
|
||||
assert(cs_is_valid(&b));
|
||||
|
|
@ -274,7 +281,7 @@ GENX(csf_init_batch)(struct panfrost_batch *batch)
|
|||
|
||||
/* Set up entries */
|
||||
struct cs_builder *b = batch->csf.cs.builder;
|
||||
cs_set_scoreboard_entry(b, 2, 0);
|
||||
cs_select_sb_entries_for_async_ops(b, 2);
|
||||
|
||||
batch->framebuffer = alloc_fbd(batch);
|
||||
if (!batch->framebuffer.gpu)
|
||||
|
|
@ -698,6 +705,10 @@ csf_emit_tiler_desc(struct panfrost_batch *batch, const struct pan_fb_info *fb)
|
|||
;
|
||||
tiler.hierarchy_mask &= ~BITFIELD_MASK(disable_hierarchies);
|
||||
|
||||
#if PAN_ARCH >= 12
|
||||
tiler.effective_tile_size = fb->tile_size;
|
||||
#endif
|
||||
|
||||
tiler.fb_width = batch->key.width;
|
||||
tiler.fb_height = batch->key.height;
|
||||
tiler.heap = batch->ctx->csf.heap.desc_bo->ptr.gpu;
|
||||
|
|
@ -873,7 +884,12 @@ csf_emit_shader_regs(struct panfrost_batch *batch, enum pipe_shader_type stage,
|
|||
assert(stage == PIPE_SHADER_VERTEX || stage == PIPE_SHADER_FRAGMENT ||
|
||||
stage == PIPE_SHADER_COMPUTE);
|
||||
|
||||
#if PAN_ARCH >= 12
|
||||
unsigned offset = (stage == PIPE_SHADER_FRAGMENT) ? 2 : 0;
|
||||
#else
|
||||
unsigned offset = (stage == PIPE_SHADER_FRAGMENT) ? 4 : 0;
|
||||
#endif
|
||||
|
||||
unsigned fau_count = DIV_ROUND_UP(batch->nr_push_uniforms[stage], 2);
|
||||
|
||||
struct cs_builder *b = batch->csf.cs.builder;
|
||||
|
|
@ -1092,7 +1108,7 @@ csf_emit_draw_state(struct panfrost_batch *batch,
|
|||
}
|
||||
|
||||
csf_emit_shader_regs(batch, PIPE_SHADER_VERTEX,
|
||||
panfrost_get_position_shader(batch, info));
|
||||
panfrost_get_position_shader(batch, info));
|
||||
|
||||
if (fs_required) {
|
||||
csf_emit_shader_regs(batch, PIPE_SHADER_FRAGMENT,
|
||||
|
|
@ -1103,12 +1119,18 @@ csf_emit_draw_state(struct panfrost_batch *batch,
|
|||
cs_move64_to(b, cs_sr_reg64(b, IDVS, FRAGMENT_SPD), 0);
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 12
|
||||
cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_VERTEX_TSD), batch->tls.gpu);
|
||||
cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_FRAGMENT_TSD), batch->tls.gpu);
|
||||
#else
|
||||
if (secondary_shader) {
|
||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_VARY_SPD),
|
||||
panfrost_get_varying_shader(batch));
|
||||
}
|
||||
|
||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, TSD_0), batch->tls.gpu);
|
||||
#endif
|
||||
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, GLOBAL_ATTRIBUTE_OFFSET), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, INSTANCE_OFFSET), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, DCD2), 0);
|
||||
|
|
@ -1120,10 +1142,16 @@ csf_emit_draw_state(struct panfrost_batch *batch,
|
|||
uint64_t *sbd = (uint64_t *)&batch->scissor[0];
|
||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, SCISSOR_BOX), *sbd);
|
||||
|
||||
#if PAN_ARCH >= 12
|
||||
uint64_t *avalon_viewport = (uint64_t *)batch->avalon_viewport;
|
||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, VIEWPORT_HIGH), avalon_viewport[0]);
|
||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, VIEWPORT_LOW), avalon_viewport[1]);
|
||||
#else
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, LOW_DEPTH_CLAMP),
|
||||
fui(batch->minimum_z));
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, HIGH_DEPTH_CLAMP),
|
||||
fui(batch->maximum_z));
|
||||
#endif
|
||||
|
||||
if (ctx->occlusion_query && ctx->active_queries) {
|
||||
struct panfrost_resource *rsrc = pan_resource(ctx->occlusion_query->rsrc);
|
||||
|
|
@ -1336,8 +1364,13 @@ GENX(csf_launch_draw)(struct panfrost_batch *batch,
|
|||
cs_move32_to(b, cs_sr_reg32(b, IDVS, INDEX_BUFFER_SIZE), 0);
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 12
|
||||
cs_run_idvs2(b, flags_override, false, true, drawid,
|
||||
MALI_IDVS_SHADING_MODE_EARLY);
|
||||
#else
|
||||
cs_run_idvs(b, flags_override, false, true, cs_shader_res_sel(0, 0, 1, 0),
|
||||
cs_shader_res_sel(2, 2, 2, 0), drawid);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -1378,8 +1411,13 @@ GENX(csf_launch_draw_indirect)(struct panfrost_batch *batch,
|
|||
}
|
||||
|
||||
cs_wait_slot(b, 0, false);
|
||||
#if PAN_ARCH >= 12
|
||||
cs_run_idvs2(b, flags_override, false, true, drawid,
|
||||
MALI_IDVS_SHADING_MODE_EARLY);
|
||||
#else
|
||||
cs_run_idvs(b, flags_override, false, true, cs_shader_res_sel(0, 0, 1, 0),
|
||||
cs_shader_res_sel(2, 2, 2, 0), drawid);
|
||||
#endif
|
||||
|
||||
cs_add64(b, address, address, indirect->stride);
|
||||
cs_add32(b, counter, counter, (unsigned int)-1);
|
||||
|
|
|
|||
|
|
@ -1204,11 +1204,18 @@ pan_preload_emit_dcd(struct pan_fb_preload_cache *cache, struct pan_pool *pool,
|
|||
cfg.flags_1.sample_mask = 0xFFFF;
|
||||
cfg.flags_0.multisample_enable = ms;
|
||||
cfg.flags_0.evaluate_per_sample = ms;
|
||||
cfg.maximum_z = 1.0;
|
||||
cfg.flags_0.clean_fragment_write = clean_fragment_write;
|
||||
|
||||
#if PAN_ARCH >= 12
|
||||
cfg.fragment_resources = T.gpu | nr_tables;
|
||||
cfg.fragment_shader = spd.gpu;
|
||||
cfg.thread_storage = tsd;
|
||||
#else
|
||||
cfg.maximum_z = 1.0;
|
||||
cfg.shader.resources = T.gpu | nr_tables;
|
||||
cfg.shader.shader = spd.gpu;
|
||||
cfg.shader.thread_storage = tsd;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
|
|
@ -151,6 +151,9 @@ struct panfrost_batch {
|
|||
unsigned scissor[2];
|
||||
float minimum_z, maximum_z;
|
||||
|
||||
/* Avalon: struct mali_viewport_packed */
|
||||
unsigned avalon_viewport[4];
|
||||
|
||||
/* Used on Valhall only. Midgard includes attributes in-band with
|
||||
* attributes, wildly enough.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -939,6 +939,9 @@ panfrost_create_screen(int fd, const struct pipe_screen_config *config,
|
|||
case 10:
|
||||
panfrost_cmdstream_screen_init_v10(screen);
|
||||
break;
|
||||
case 12:
|
||||
panfrost_cmdstream_screen_init_v12(screen);
|
||||
break;
|
||||
default:
|
||||
debug_printf("panfrost: Unhandled architecture major %d", dev->arch);
|
||||
panfrost_destroy_screen(&(screen->base));
|
||||
|
|
|
|||
|
|
@ -160,6 +160,7 @@ void panfrost_cmdstream_screen_init_v6(struct panfrost_screen *screen);
|
|||
void panfrost_cmdstream_screen_init_v7(struct panfrost_screen *screen);
|
||||
void panfrost_cmdstream_screen_init_v9(struct panfrost_screen *screen);
|
||||
void panfrost_cmdstream_screen_init_v10(struct panfrost_screen *screen);
|
||||
void panfrost_cmdstream_screen_init_v12(struct panfrost_screen *screen);
|
||||
|
||||
#define perf_debug(ctx, ...) \
|
||||
do { \
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue