From e0696b80d039a31987218dd07874f3db73ccd872 Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Thu, 30 Jan 2025 11:56:17 +0000 Subject: [PATCH] pan/genxml: Define RUN_IDVS staging registers in an enum This makes it more clear what is what. It will also reduce the pain of migration on newer gen as most values only moved place. Signed-off-by: Mary Guillemard Reviewed-by: Boris Brezillon Reviewed-by: Benjamin Lee Part-of: --- src/gallium/drivers/panfrost/pan_csf.c | 86 +++++++++++-------- src/panfrost/lib/genxml/cs_builder.h | 13 +++ src/panfrost/lib/genxml/decode_csf.c | 71 ++++++++++------ src/panfrost/lib/genxml/v10.xml | 40 +++++++++ src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c | 94 ++++++++++++--------- 5 files changed, 204 insertions(+), 100 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_csf.c b/src/gallium/drivers/panfrost/pan_csf.c index 03f16ce39fe..59e150bc589 100644 --- a/src/gallium/drivers/panfrost/pan_csf.c +++ b/src/gallium/drivers/panfrost/pan_csf.c @@ -1070,44 +1070,49 @@ csf_emit_draw_state(struct panfrost_batch *batch, csf_emit_shader_regs(batch, PIPE_SHADER_FRAGMENT, batch->rsd[PIPE_SHADER_FRAGMENT]); } else { - cs_move64_to(b, cs_reg64(b, 4), 0); - cs_move64_to(b, cs_reg64(b, 12), 0); - cs_move64_to(b, cs_reg64(b, 20), 0); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_FRAGMENT_SRT), 0); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_FRAGMENT_FAU), 0); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_FRAGMENT_SPD), 0); } if (secondary_shader) { - cs_move64_to(b, cs_reg64(b, 18), panfrost_get_varying_shader(batch)); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_VERTEX_VARY_SPD), + panfrost_get_varying_shader(batch)); } - cs_move64_to(b, cs_reg64(b, 24), batch->tls.gpu); - cs_move64_to(b, cs_reg64(b, 30), batch->tls.gpu); - cs_move32_to(b, cs_reg32(b, 32), 0); - cs_move32_to(b, cs_reg32(b, 37), 0); - cs_move32_to(b, cs_reg32(b, 38), 0); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_TSD_0), batch->tls.gpu); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_TSD_3), batch->tls.gpu); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_GLOBAL_ATTRIBUTE_OFFSET), 0); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), 0); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_DCD2), 0); - cs_move64_to(b, cs_reg64(b, 40), csf_get_tiler_desc(batch)); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_TILER_CTX), + csf_get_tiler_desc(batch)); STATIC_ASSERT(sizeof(batch->scissor) == pan_size(SCISSOR)); STATIC_ASSERT(sizeof(uint64_t) == pan_size(SCISSOR)); uint64_t *sbd = (uint64_t *)&batch->scissor[0]; - cs_move64_to(b, cs_reg64(b, 42), *sbd); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_SCISSOR_BOX), *sbd); - cs_move32_to(b, cs_reg32(b, 44), fui(batch->minimum_z)); - cs_move32_to(b, cs_reg32(b, 45), fui(batch->maximum_z)); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_LOW_DEPTH_CLAMP), + fui(batch->minimum_z)); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_HIGH_DEPTH_CLAMP), + fui(batch->maximum_z)); if (ctx->occlusion_query && ctx->active_queries) { struct panfrost_resource *rsrc = pan_resource(ctx->occlusion_query->rsrc); - cs_move64_to(b, cs_reg64(b, 46), rsrc->image.data.base); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_OQ), rsrc->image.data.base); panfrost_batch_write_rsrc(ctx->batch, rsrc, PIPE_SHADER_FRAGMENT); } - cs_move32_to(b, cs_reg32(b, 48), panfrost_vertex_attribute_stride(vs, fs)); - cs_move64_to(b, cs_reg64(b, 50), + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_VARY_SIZE), + panfrost_vertex_attribute_stride(vs, fs)); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_BLEND_DESC), batch->blend | MAX2(batch->key.nr_cbufs, 1)); - cs_move64_to(b, cs_reg64(b, 52), batch->depth_stencil); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_ZSD), batch->depth_stencil); if (info->index_size) - cs_move64_to(b, cs_reg64(b, 54), batch->indices); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_INDEX_BUFFER), batch->indices); struct pipe_rasterizer_state *rast = &ctx->rasterizer->base; @@ -1130,7 +1135,8 @@ csf_emit_draw_state(struct panfrost_batch *batch, : MALI_FIFO_FORMAT_BASIC; } - cs_move32_to(b, cs_reg32(b, 56), primitive_flags.opaque[0]); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_TILER_FLAGS), + primitive_flags.opaque[0]); struct mali_dcd_flags_0_packed dcd_flags0; struct mali_dcd_flags_1_packed dcd_flags1; @@ -1240,14 +1246,15 @@ csf_emit_draw_state(struct panfrost_batch *batch, } } - cs_move32_to(b, cs_reg32(b, 57), dcd_flags0.opaque[0]); - cs_move32_to(b, cs_reg32(b, 58), dcd_flags1.opaque[0]); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_DCD0), dcd_flags0.opaque[0]); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_DCD1), dcd_flags1.opaque[0]); struct mali_primitive_size_packed primsize; panfrost_emit_primitive_size(ctx, info->mode == MESA_PRIM_POINTS, 0, &primsize); struct mali_primitive_size_packed *primsize_ptr = &primsize; - cs_move64_to(b, cs_reg64(b, 60), *((uint64_t*)primsize_ptr)); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_PRIMITIVE_SIZE), + *((uint64_t *)primsize_ptr)); struct mali_primitive_flags_packed flags_override; /* Pack with nodefaults so only explicitly set override fields affect the @@ -1288,19 +1295,22 @@ GENX(csf_launch_draw)(struct panfrost_batch *batch, uint32_t flags_override = csf_emit_draw_state(batch, info, drawid_offset); struct cs_index drawid = csf_emit_draw_id_register(batch, drawid_offset); - cs_move32_to(b, cs_reg32(b, 33), draw->count); - cs_move32_to(b, cs_reg32(b, 34), info->instance_count); - cs_move32_to(b, cs_reg32(b, 35), 0); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INDEX_COUNT), draw->count); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INSTANCE_COUNT), + info->instance_count); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), 0); /* Base vertex offset on Valhall is used for both indexed and * non-indexed draws, in a simple way for either. Handle both cases. */ if (info->index_size) { - cs_move32_to(b, cs_reg32(b, 36), draw->index_bias); - cs_move32_to(b, cs_reg32(b, 39), info->index_size * draw->count); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_VERTEX_OFFSET), + draw->index_bias); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INDEX_BUFFER_SIZE), + info->index_size * draw->count); } else { - cs_move32_to(b, cs_reg32(b, 36), draw->start); - cs_move32_to(b, cs_reg32(b, 39), 0); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_VERTEX_OFFSET), draw->start); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INDEX_BUFFER_SIZE), 0); } cs_run_idvs(b, flags_override, false, true, cs_shader_res_sel(0, 0, 1, 0), @@ -1328,16 +1338,20 @@ GENX(csf_launch_draw_indirect)(struct panfrost_batch *batch, cs_while(b, MALI_CS_CONDITION_GREATER, counter) { if (info->index_size) { /* loads vertex count, instance count, index offset, vertex offset */ - cs_load_to(b, cs_reg_tuple(b, 33, 4), address, BITFIELD_MASK(4), 0); - cs_move32_to(b, cs_reg32(b, 39), info->index.resource->width0); + cs_load_to(b, cs_reg_tuple(b, MALI_IDVS_SR_INDEX_COUNT, 4), address, + BITFIELD_MASK(4), 0); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INDEX_BUFFER_SIZE), + info->index.resource->width0); } else { /* vertex count, instance count */ - cs_load_to(b, cs_reg_tuple(b, 33, 2), address, BITFIELD_MASK(2), 0); - cs_move32_to(b, cs_reg32(b, 35), 0); - cs_load_to(b, cs_reg_tuple(b, 36, 1), address, BITFIELD_MASK(1), + cs_load_to(b, cs_reg_tuple(b, MALI_IDVS_SR_INDEX_COUNT, 2), address, + BITFIELD_MASK(2), 0); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INDEX_OFFSET), 0); + cs_load_to(b, cs_reg_tuple(b, MALI_IDVS_SR_VERTEX_OFFSET, 1), address, + BITFIELD_MASK(1), 2 * sizeof(uint32_t)); // instance offset - cs_move32_to(b, cs_reg32(b, 37), 0); - cs_move32_to(b, cs_reg32(b, 39), 0); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), 0); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INDEX_BUFFER_SIZE), 0); } cs_wait_slot(b, 0, false); diff --git a/src/panfrost/lib/genxml/cs_builder.h b/src/panfrost/lib/genxml/cs_builder.h index 51238dc43de..680716d8a8e 100644 --- a/src/panfrost/lib/genxml/cs_builder.h +++ b/src/panfrost/lib/genxml/cs_builder.h @@ -32,6 +32,19 @@ #include "util/bitset.h" #include "util/u_dynarray.h" +/* Before Avalon, RUN_IDVS could use a selector but as we only hardcode the same + * configuration, we match v12+ naming here */ + +#if PAN_ARCH <= 11 +#define MALI_IDVS_SR_VERTEX_SRT MALI_IDVS_SR_SRT_0 +#define MALI_IDVS_SR_FRAGMENT_SRT MALI_IDVS_SR_SRT_2 +#define MALI_IDVS_SR_VERTEX_FAU MALI_IDVS_SR_FAU_0 +#define MALI_IDVS_SR_FRAGMENT_FAU MALI_IDVS_SR_FAU_2 +#define MALI_IDVS_SR_VERTEX_POS_SPD MALI_IDVS_SR_SPD_0 +#define MALI_IDVS_SR_VERTEX_VARY_SPD MALI_IDVS_SR_SPD_1 +#define MALI_IDVS_SR_FRAGMENT_SPD MALI_IDVS_SR_SPD_2 +#endif + /* * cs_builder implements a builder for CSF command streams. It manages the * allocation and overflow behaviour of queues and provides helpers for emitting diff --git a/src/panfrost/lib/genxml/decode_csf.c b/src/panfrost/lib/genxml/decode_csf.c index dd90bcf2ca6..5dbf52320ff 100644 --- a/src/panfrost/lib/genxml/decode_csf.c +++ b/src/panfrost/lib/genxml/decode_csf.c @@ -622,7 +622,8 @@ pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp, /* Merge flag overrides with the register flags */ struct mali_primitive_flags_packed tiler_flags_packed = { - .opaque[0] = cs_get_u32(qctx, 56) | I->flags_override, + .opaque[0] = + cs_get_u32(qctx, MALI_IDVS_SR_TILER_FLAGS) | I->flags_override, }; pan_unpack(&tiler_flags_packed, PRIMITIVE_FLAGS, tiler_flags); @@ -676,20 +677,22 @@ pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp, GENX(pandecode_fau)(ctx, lo, hi, "Fragment FAU"); } - if (cs_get_u64(qctx, 16)) { + if (cs_get_u64(qctx, MALI_IDVS_SR_VERTEX_POS_SPD)) { GENX(pandecode_shader) - (ctx, cs_get_u64(qctx, 16), "Position shader", qctx->gpu_id); + (ctx, cs_get_u64(qctx, MALI_IDVS_SR_VERTEX_POS_SPD), "Position shader", + qctx->gpu_id); } if (tiler_flags.secondary_shader) { - uint64_t ptr = cs_get_u64(qctx, 18); + uint64_t ptr = cs_get_u64(qctx, MALI_IDVS_SR_VERTEX_VARY_SPD); GENX(pandecode_shader)(ctx, ptr, "Varying shader", qctx->gpu_id); } - if (cs_get_u64(qctx, 20)) { + if (cs_get_u64(qctx, MALI_IDVS_SR_FRAGMENT_SPD)) { GENX(pandecode_shader) - (ctx, cs_get_u64(qctx, 20), "Fragment shader", qctx->gpu_id); + (ctx, cs_get_u64(qctx, MALI_IDVS_SR_FRAGMENT_SPD), "Fragment shader", + qctx->gpu_id); } DUMP_ADDR(ctx, LOCAL_STORAGE, cs_get_u64(qctx, reg_position_tsd), @@ -702,42 +705,58 @@ pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp, "Fragment Local Storage @%" PRIx64 ":\n", cs_get_u64(qctx, reg_frag_tsd)); - pandecode_log(ctx, "Global attribute offset: %u\n", cs_get_u32(qctx, 32)); - pandecode_log(ctx, "Index count: %u\n", cs_get_u32(qctx, 33)); - pandecode_log(ctx, "Instance count: %u\n", cs_get_u32(qctx, 34)); + pandecode_log(ctx, "Global attribute offset: %u\n", + cs_get_u32(qctx, MALI_IDVS_SR_GLOBAL_ATTRIBUTE_OFFSET)); + pandecode_log(ctx, "Index count: %u\n", + cs_get_u32(qctx, MALI_IDVS_SR_INDEX_COUNT)); + pandecode_log(ctx, "Instance count: %u\n", + cs_get_u32(qctx, MALI_IDVS_SR_INSTANCE_COUNT)); if (tiler_flags.index_type) - pandecode_log(ctx, "Index offset: %u\n", cs_get_u32(qctx, 35)); + pandecode_log(ctx, "Index offset: %u\n", + cs_get_u32(qctx, MALI_IDVS_SR_INDEX_OFFSET)); - pandecode_log(ctx, "Vertex offset: %d\n", cs_get_u32(qctx, 36)); - pandecode_log(ctx, "Instance offset: %u\n", cs_get_u32(qctx, 37)); - pandecode_log(ctx, "Tiler DCD flags2: %X\n", cs_get_u32(qctx, 38)); + pandecode_log(ctx, "Vertex offset: %d\n", + cs_get_u32(qctx, MALI_IDVS_SR_VERTEX_OFFSET)); + pandecode_log(ctx, "Instance offset: %u\n", + cs_get_u32(qctx, MALI_IDVS_SR_INSTANCE_OFFSET)); + pandecode_log(ctx, "Tiler DCD flags2: %X\n", + cs_get_u32(qctx, MALI_IDVS_SR_DCD2)); if (tiler_flags.index_type) - pandecode_log(ctx, "Index array size: %u\n", cs_get_u32(qctx, 39)); + pandecode_log(ctx, "Index array size: %u\n", + cs_get_u32(qctx, MALI_IDVS_SR_INDEX_BUFFER_SIZE)); - GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40), qctx->gpu_id); + GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, MALI_IDVS_SR_TILER_CTX), + qctx->gpu_id); - DUMP_CL(ctx, SCISSOR, &qctx->regs[42], "Scissor\n"); - pandecode_log(ctx, "Low depth clamp: %f\n", uif(cs_get_u32(qctx, 44))); - pandecode_log(ctx, "High depth clamp: %f\n", uif(cs_get_u32(qctx, 45))); - pandecode_log(ctx, "Occlusion: %" PRIx64 "\n", cs_get_u64(qctx, 46)); + DUMP_CL(ctx, SCISSOR, &qctx->regs[MALI_IDVS_SR_SCISSOR_BOX], "Scissor\n"); + pandecode_log(ctx, "Low depth clamp: %f\n", + uif(cs_get_u32(qctx, MALI_IDVS_SR_LOW_DEPTH_CLAMP))); + pandecode_log(ctx, "High depth clamp: %f\n", + uif(cs_get_u32(qctx, MALI_IDVS_SR_HIGH_DEPTH_CLAMP))); + pandecode_log(ctx, "Occlusion: %" PRIx64 "\n", + cs_get_u64(qctx, MALI_IDVS_SR_OQ)); if (tiler_flags.secondary_shader) - pandecode_log(ctx, "Varying allocation: %u\n", cs_get_u32(qctx, 48)); + pandecode_log(ctx, "Varying allocation: %u\n", + cs_get_u32(qctx, MALI_IDVS_SR_VARY_SIZE)); - uint64_t blend = cs_get_u64(qctx, 50); + uint64_t blend = cs_get_u64(qctx, MALI_IDVS_SR_BLEND_DESC); GENX(pandecode_blend_descs)(ctx, blend & ~15, blend & 15, 0, qctx->gpu_id); - DUMP_ADDR(ctx, DEPTH_STENCIL, cs_get_u64(qctx, 52), "Depth/stencil"); + DUMP_ADDR(ctx, DEPTH_STENCIL, cs_get_u64(qctx, MALI_IDVS_SR_ZSD), + "Depth/stencil"); if (tiler_flags.index_type) - pandecode_log(ctx, "Indices: %" PRIx64 "\n", cs_get_u64(qctx, 54)); + pandecode_log(ctx, "Indices: %" PRIx64 "\n", + cs_get_u64(qctx, MALI_IDVS_SR_INDEX_BUFFER)); DUMP_UNPACKED(ctx, PRIMITIVE_FLAGS, tiler_flags, "Primitive flags\n"); - DUMP_CL(ctx, DCD_FLAGS_0, &qctx->regs[57], "DCD Flags 0\n"); - DUMP_CL(ctx, DCD_FLAGS_1, &qctx->regs[58], "DCD Flags 1\n"); - DUMP_CL(ctx, PRIMITIVE_SIZE, &qctx->regs[60], "Primitive size\n"); + DUMP_CL(ctx, DCD_FLAGS_0, &qctx->regs[MALI_IDVS_SR_DCD0], "DCD Flags 0\n"); + DUMP_CL(ctx, DCD_FLAGS_1, &qctx->regs[MALI_IDVS_SR_DCD1], "DCD Flags 1\n"); + DUMP_CL(ctx, PRIMITIVE_SIZE, &qctx->regs[MALI_IDVS_SR_PRIMITIVE_SIZE], + "Primitive size\n"); ctx->indent--; } diff --git a/src/panfrost/lib/genxml/v10.xml b/src/panfrost/lib/genxml/v10.xml index 769b5cda6e1..bf395ecf001 100644 --- a/src/panfrost/lib/genxml/v10.xml +++ b/src/panfrost/lib/genxml/v10.xml @@ -840,6 +840,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index 67429ed74d0..59d29837fa2 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -403,7 +403,7 @@ update_tls(struct panvk_cmd_buffer *cmdbuf) cmdbuf->state.gfx.tsd = state->desc.gpu; cs_update_vt_ctx(b) - cs_move64_to(b, cs_sr_reg64(b, 24), state->desc.gpu); + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_TSD_0), state->desc.gpu); } state->info.tls.size = @@ -462,7 +462,8 @@ prepare_blend(struct panvk_cmd_buffer *cmdbuf) panvk_per_arch(blend_emit_descs)(cmdbuf, bds); cs_update_vt_ctx(b) - cs_move64_to(b, cs_sr_reg64(b, 50), ptr.gpu | bd_count); + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_BLEND_DESC), + ptr.gpu | bd_count); return VK_SUCCESS; } @@ -510,7 +511,8 @@ prepare_vp(struct panvk_cmd_buffer *cmdbuf) } struct mali_scissor_packed *scissor_box_ptr = &scissor_box; - cs_move64_to(b, cs_sr_reg64(b, 42), *((uint64_t*)scissor_box_ptr)); + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_SCISSOR_BOX), + *((uint64_t *)scissor_box_ptr)); } if (dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS) || @@ -520,8 +522,10 @@ prepare_vp(struct panvk_cmd_buffer *cmdbuf) float z_min = sysvals->viewport.offset.z; float z_max = z_min + sysvals->viewport.scale.z; - cs_move32_to(b, cs_sr_reg32(b, 44), fui(MIN2(z_min, z_max))); - cs_move32_to(b, cs_sr_reg32(b, 45), fui(MAX2(z_min, z_max))); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_LOW_DEPTH_CLAMP), + fui(MIN2(z_min, z_max))); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_HIGH_DEPTH_CLAMP), + fui(MAX2(z_min, z_max))); } } @@ -575,7 +579,8 @@ prepare_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf) return; } - cs_move32_to(b, cs_sr_reg32(b, 60), fui(primitive_size)); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_PRIMITIVE_SIZE), + fui(primitive_size)); } static uint32_t @@ -731,7 +736,7 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf) cmdbuf->state.gfx.render.tiler = simul_use ? 0xdeadbeefdeadbeefull : tiler_desc.gpu; - struct cs_index tiler_ctx_addr = cs_sr_reg64(b, 40); + struct cs_index tiler_ctx_addr = cs_sr_reg64(b, MALI_IDVS_SR_TILER_CTX); if (simul_use) { uint32_t descs_sz = calc_render_descs_size(cmdbuf); @@ -1200,14 +1205,16 @@ prepare_vs(struct panvk_cmd_buffer *cmdbuf) cs_update_vt_ctx(b) { if (upd_res_table) - cs_move64_to(b, cs_sr_reg64(b, 0), vs_desc_state->res_table); + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_SRT), + vs_desc_state->res_table); if (gfx_state_dirty(cmdbuf, VS) || dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_TOPOLOGY)) - cs_move64_to(b, cs_sr_reg64(b, 16), get_pos_spd(cmdbuf)); + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_POS_SPD), + get_pos_spd(cmdbuf)); if (gfx_state_dirty(cmdbuf, VS)) - cs_move64_to(b, cs_sr_reg64(b, 18), + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_VARY_SPD), panvk_priv_mem_dev_addr(vs->spds.var)); } @@ -1237,9 +1244,10 @@ prepare_fs(struct panvk_cmd_buffer *cmdbuf) cs_update_vt_ctx(b) { if (fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, DESC_STATE)) - cs_move64_to(b, cs_sr_reg64(b, 4), fs ? fs_desc_state->res_table : 0); + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_FRAGMENT_SRT), + fs ? fs_desc_state->res_table : 0); if (fs_user_dirty(cmdbuf)) - cs_move64_to(b, cs_sr_reg64(b, 20), + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_FRAGMENT_SPD), fs ? panvk_priv_mem_dev_addr(fs->spd) : 0); } @@ -1261,7 +1269,7 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf) return result; cs_update_vt_ctx(b) { - cs_move64_to(b, cs_sr_reg64(b, 8), + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_FAU), cmdbuf->state.gfx.vs.push_uniforms | ((uint64_t)vs->fau.total_count << 56)); } @@ -1280,7 +1288,7 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf) } cs_update_vt_ctx(b) - cs_move64_to(b, cs_sr_reg64(b, 12), fau_ptr); + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_FRAGMENT_FAU), fau_ptr); } return VK_SUCCESS; @@ -1362,7 +1370,7 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf) } cs_update_vt_ctx(b) - cs_move64_to(b, cs_sr_reg64(b, 52), zsd.gpu); + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_ZSD), zsd.gpu); return VK_SUCCESS; } @@ -1439,7 +1447,8 @@ prepare_oq(struct panvk_cmd_buffer *cmdbuf) struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER); - cs_move64_to(b, cs_sr_reg64(b, 46), cmdbuf->state.gfx.occlusion_query.ptr); + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_OQ), + cmdbuf->state.gfx.occlusion_query.ptr); cmdbuf->state.gfx.render.oq.last = cmdbuf->state.gfx.occlusion_query.syncobj; @@ -1531,7 +1540,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf) } cs_update_vt_ctx(b) - cs_move32_to(b, cs_sr_reg32(b, 57), dcd0.opaque[0]); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_DCD0), dcd0.opaque[0]); } if (dcd1_dirty) { @@ -1549,7 +1558,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf) } cs_update_vt_ctx(b) - cs_move32_to(b, cs_sr_reg32(b, 58), dcd1.opaque[0]); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_DCD1), dcd1.opaque[0]); } } @@ -1565,9 +1574,9 @@ prepare_index_buffer(struct panvk_cmd_buffer *cmdbuf, panvk_buffer_range(cmdbuf->state.gfx.ib.buffer, cmdbuf->state.gfx.ib.offset, VK_WHOLE_SIZE); assert(ib_size <= UINT32_MAX); - cs_move32_to(b, cs_sr_reg32(b, 39), ib_size); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INDEX_BUFFER_SIZE), ib_size); - cs_move64_to(b, cs_sr_reg64(b, 54), + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_INDEX_BUFFER), panvk_buffer_gpu_ptr(cmdbuf->state.gfx.ib.buffer, cmdbuf->state.gfx.ib.offset)); } @@ -1627,7 +1636,8 @@ set_tiler_idvs_flags(struct cs_builder *b, struct panvk_cmd_buffer *cmdbuf, cfg.view_mask = cmdbuf->state.gfx.render.view_mask; } - cs_move32_to(b, cs_sr_reg32(b, 56), tiler_idvs_flags.opaque[0]); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_TILER_FLAGS), + tiler_idvs_flags.opaque[0]); } } @@ -1707,13 +1717,13 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) cs_update_vt_ctx(b) { /* We don't use the resource dep system yet. */ - cs_move32_to(b, cs_sr_reg32(b, 38), 0); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_DCD2), 0); prepare_index_buffer(cmdbuf, draw); set_tiler_idvs_flags(b, cmdbuf, draw); - cs_move32_to(b, cs_sr_reg32(b, 48), varying_size); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_VARY_SIZE), varying_size); result = prepare_ds(cmdbuf); if (result != VK_SUCCESS) @@ -1772,16 +1782,21 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) return; cs_update_vt_ctx(b) { - cs_move32_to(b, cs_sr_reg32(b, 32), 0); - cs_move32_to(b, cs_sr_reg32(b, 33), draw->vertex.count); - cs_move32_to(b, cs_sr_reg32(b, 34), draw->instance.count); - cs_move32_to(b, cs_sr_reg32(b, 35), draw->index.offset); - cs_move32_to(b, cs_sr_reg32(b, 36), draw->vertex.base); - /* NIR expects zero-based instance ID, but even if it did have an intrinsic to - * load the absolute instance ID, we'd want to keep it zero-based to work around - * Mali's limitation on non-zero firstInstance when a instance divisor is used. + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_GLOBAL_ATTRIBUTE_OFFSET), 0); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INDEX_COUNT), + draw->vertex.count); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_COUNT), + draw->instance.count); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INDEX_OFFSET), + draw->index.offset); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_VERTEX_OFFSET), + draw->vertex.base); + /* NIR expects zero-based instance ID, but even if it did have an + * intrinsic to load the absolute instance ID, we'd want to keep it + * zero-based to work around Mali's limitation on non-zero firstInstance + * when a instance divisor is used. */ - cs_move32_to(b, cs_sr_reg32(b, 37), 0); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), 0); } struct mali_primitive_flags_packed flags_override = @@ -1793,7 +1808,7 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) cs_req_res(b, CS_IDVS_RES); if (idvs_count > 1) { struct cs_index counter_reg = cs_scratch_reg32(b, 17); - struct cs_index tiler_ctx_addr = cs_sr_reg64(b, 40); + struct cs_index tiler_ctx_addr = cs_sr_reg64(b, MALI_IDVS_SR_TILER_CTX); cs_move32_to(b, counter_reg, idvs_count); @@ -1942,10 +1957,11 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf, cs_move64_to(b, draw_params_addr, draw->indirect.buffer_dev_addr); cs_update_vt_ctx(b) { - cs_move32_to(b, cs_sr_reg32(b, 32), 0); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_GLOBAL_ATTRIBUTE_OFFSET), 0); /* Load SR33-37 from indirect buffer. */ unsigned reg_mask = draw->index.size ? 0b11111 : 0b11011; - cs_load_to(b, cs_sr_reg_tuple(b, 33, 5), draw_params_addr, reg_mask, 0); + cs_load_to(b, cs_sr_reg_tuple(b, MALI_IDVS_SR_INDEX_COUNT, 5), + draw_params_addr, reg_mask, 0); } /* Wait for the SR33-37 indirect buffer load. */ @@ -1957,13 +1973,15 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf, cs_move64_to(b, fau_block_addr, cmdbuf->state.gfx.vs.push_uniforms); if (shader_uses_sysval(vs, graphics, vs.first_vertex)) { - cs_store32(b, cs_sr_reg32(b, 36), fau_block_addr, + cs_store32(b, cs_sr_reg32(b, MALI_IDVS_SR_VERTEX_OFFSET), + fau_block_addr, shader_remapped_sysval_offset( vs, sysval_offset(graphics, vs.first_vertex))); } if (shader_uses_sysval(vs, graphics, vs.base_instance)) { - cs_store32(b, cs_sr_reg32(b, 37), fau_block_addr, + cs_store32(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), + fau_block_addr, shader_remapped_sysval_offset( vs, sysval_offset(graphics, vs.base_instance))); } @@ -1978,7 +1996,7 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf, * Mali's limitation on non-zero firstInstance when a instance divisor is used. */ cs_update_vt_ctx(b) - cs_move32_to(b, cs_sr_reg32(b, 37), 0); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), 0); struct mali_primitive_flags_packed flags_override = get_tiler_flags_override(draw);