diff --git a/src/gallium/drivers/panfrost/pan_csf.c b/src/gallium/drivers/panfrost/pan_csf.c index 03f16ce39fe..59e150bc589 100644 --- a/src/gallium/drivers/panfrost/pan_csf.c +++ b/src/gallium/drivers/panfrost/pan_csf.c @@ -1070,44 +1070,49 @@ csf_emit_draw_state(struct panfrost_batch *batch, csf_emit_shader_regs(batch, PIPE_SHADER_FRAGMENT, batch->rsd[PIPE_SHADER_FRAGMENT]); } else { - cs_move64_to(b, cs_reg64(b, 4), 0); - cs_move64_to(b, cs_reg64(b, 12), 0); - cs_move64_to(b, cs_reg64(b, 20), 0); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_FRAGMENT_SRT), 0); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_FRAGMENT_FAU), 0); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_FRAGMENT_SPD), 0); } if (secondary_shader) { - cs_move64_to(b, cs_reg64(b, 18), panfrost_get_varying_shader(batch)); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_VERTEX_VARY_SPD), + panfrost_get_varying_shader(batch)); } - cs_move64_to(b, cs_reg64(b, 24), batch->tls.gpu); - cs_move64_to(b, cs_reg64(b, 30), batch->tls.gpu); - cs_move32_to(b, cs_reg32(b, 32), 0); - cs_move32_to(b, cs_reg32(b, 37), 0); - cs_move32_to(b, cs_reg32(b, 38), 0); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_TSD_0), batch->tls.gpu); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_TSD_3), batch->tls.gpu); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_GLOBAL_ATTRIBUTE_OFFSET), 0); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), 0); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_DCD2), 0); - cs_move64_to(b, cs_reg64(b, 40), csf_get_tiler_desc(batch)); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_TILER_CTX), + csf_get_tiler_desc(batch)); STATIC_ASSERT(sizeof(batch->scissor) == pan_size(SCISSOR)); STATIC_ASSERT(sizeof(uint64_t) == pan_size(SCISSOR)); uint64_t *sbd = (uint64_t *)&batch->scissor[0]; - cs_move64_to(b, cs_reg64(b, 42), *sbd); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_SCISSOR_BOX), *sbd); - cs_move32_to(b, cs_reg32(b, 44), fui(batch->minimum_z)); - cs_move32_to(b, cs_reg32(b, 45), fui(batch->maximum_z)); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_LOW_DEPTH_CLAMP), + fui(batch->minimum_z)); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_HIGH_DEPTH_CLAMP), + fui(batch->maximum_z)); if (ctx->occlusion_query && ctx->active_queries) { struct panfrost_resource *rsrc = pan_resource(ctx->occlusion_query->rsrc); - cs_move64_to(b, cs_reg64(b, 46), rsrc->image.data.base); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_OQ), rsrc->image.data.base); panfrost_batch_write_rsrc(ctx->batch, rsrc, PIPE_SHADER_FRAGMENT); } - cs_move32_to(b, cs_reg32(b, 48), panfrost_vertex_attribute_stride(vs, fs)); - cs_move64_to(b, cs_reg64(b, 50), + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_VARY_SIZE), + panfrost_vertex_attribute_stride(vs, fs)); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_BLEND_DESC), batch->blend | MAX2(batch->key.nr_cbufs, 1)); - cs_move64_to(b, cs_reg64(b, 52), batch->depth_stencil); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_ZSD), batch->depth_stencil); if (info->index_size) - cs_move64_to(b, cs_reg64(b, 54), batch->indices); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_INDEX_BUFFER), batch->indices); struct pipe_rasterizer_state *rast = &ctx->rasterizer->base; @@ -1130,7 +1135,8 @@ csf_emit_draw_state(struct panfrost_batch *batch, : MALI_FIFO_FORMAT_BASIC; } - cs_move32_to(b, cs_reg32(b, 56), primitive_flags.opaque[0]); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_TILER_FLAGS), + primitive_flags.opaque[0]); struct mali_dcd_flags_0_packed dcd_flags0; struct mali_dcd_flags_1_packed dcd_flags1; @@ -1240,14 +1246,15 @@ csf_emit_draw_state(struct panfrost_batch *batch, } } - cs_move32_to(b, cs_reg32(b, 57), dcd_flags0.opaque[0]); - cs_move32_to(b, cs_reg32(b, 58), dcd_flags1.opaque[0]); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_DCD0), dcd_flags0.opaque[0]); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_DCD1), dcd_flags1.opaque[0]); struct mali_primitive_size_packed primsize; panfrost_emit_primitive_size(ctx, info->mode == MESA_PRIM_POINTS, 0, &primsize); struct mali_primitive_size_packed *primsize_ptr = &primsize; - cs_move64_to(b, cs_reg64(b, 60), *((uint64_t*)primsize_ptr)); + cs_move64_to(b, cs_reg64(b, MALI_IDVS_SR_PRIMITIVE_SIZE), + *((uint64_t *)primsize_ptr)); struct mali_primitive_flags_packed flags_override; /* Pack with nodefaults so only explicitly set override fields affect the @@ -1288,19 +1295,22 @@ GENX(csf_launch_draw)(struct panfrost_batch *batch, uint32_t flags_override = csf_emit_draw_state(batch, info, drawid_offset); struct cs_index drawid = csf_emit_draw_id_register(batch, drawid_offset); - cs_move32_to(b, cs_reg32(b, 33), draw->count); - cs_move32_to(b, cs_reg32(b, 34), info->instance_count); - cs_move32_to(b, cs_reg32(b, 35), 0); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INDEX_COUNT), draw->count); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INSTANCE_COUNT), + info->instance_count); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), 0); /* Base vertex offset on Valhall is used for both indexed and * non-indexed draws, in a simple way for either. Handle both cases. */ if (info->index_size) { - cs_move32_to(b, cs_reg32(b, 36), draw->index_bias); - cs_move32_to(b, cs_reg32(b, 39), info->index_size * draw->count); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_VERTEX_OFFSET), + draw->index_bias); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INDEX_BUFFER_SIZE), + info->index_size * draw->count); } else { - cs_move32_to(b, cs_reg32(b, 36), draw->start); - cs_move32_to(b, cs_reg32(b, 39), 0); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_VERTEX_OFFSET), draw->start); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INDEX_BUFFER_SIZE), 0); } cs_run_idvs(b, flags_override, false, true, cs_shader_res_sel(0, 0, 1, 0), @@ -1328,16 +1338,20 @@ GENX(csf_launch_draw_indirect)(struct panfrost_batch *batch, cs_while(b, MALI_CS_CONDITION_GREATER, counter) { if (info->index_size) { /* loads vertex count, instance count, index offset, vertex offset */ - cs_load_to(b, cs_reg_tuple(b, 33, 4), address, BITFIELD_MASK(4), 0); - cs_move32_to(b, cs_reg32(b, 39), info->index.resource->width0); + cs_load_to(b, cs_reg_tuple(b, MALI_IDVS_SR_INDEX_COUNT, 4), address, + BITFIELD_MASK(4), 0); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INDEX_BUFFER_SIZE), + info->index.resource->width0); } else { /* vertex count, instance count */ - cs_load_to(b, cs_reg_tuple(b, 33, 2), address, BITFIELD_MASK(2), 0); - cs_move32_to(b, cs_reg32(b, 35), 0); - cs_load_to(b, cs_reg_tuple(b, 36, 1), address, BITFIELD_MASK(1), + cs_load_to(b, cs_reg_tuple(b, MALI_IDVS_SR_INDEX_COUNT, 2), address, + BITFIELD_MASK(2), 0); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INDEX_OFFSET), 0); + cs_load_to(b, cs_reg_tuple(b, MALI_IDVS_SR_VERTEX_OFFSET, 1), address, + BITFIELD_MASK(1), 2 * sizeof(uint32_t)); // instance offset - cs_move32_to(b, cs_reg32(b, 37), 0); - cs_move32_to(b, cs_reg32(b, 39), 0); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), 0); + cs_move32_to(b, cs_reg32(b, MALI_IDVS_SR_INDEX_BUFFER_SIZE), 0); } cs_wait_slot(b, 0, false); diff --git a/src/panfrost/lib/genxml/cs_builder.h b/src/panfrost/lib/genxml/cs_builder.h index 51238dc43de..680716d8a8e 100644 --- a/src/panfrost/lib/genxml/cs_builder.h +++ b/src/panfrost/lib/genxml/cs_builder.h @@ -32,6 +32,19 @@ #include "util/bitset.h" #include "util/u_dynarray.h" +/* Before Avalon, RUN_IDVS could use a selector but as we only hardcode the same + * configuration, we match v12+ naming here */ + +#if PAN_ARCH <= 11 +#define MALI_IDVS_SR_VERTEX_SRT MALI_IDVS_SR_SRT_0 +#define MALI_IDVS_SR_FRAGMENT_SRT MALI_IDVS_SR_SRT_2 +#define MALI_IDVS_SR_VERTEX_FAU MALI_IDVS_SR_FAU_0 +#define MALI_IDVS_SR_FRAGMENT_FAU MALI_IDVS_SR_FAU_2 +#define MALI_IDVS_SR_VERTEX_POS_SPD MALI_IDVS_SR_SPD_0 +#define MALI_IDVS_SR_VERTEX_VARY_SPD MALI_IDVS_SR_SPD_1 +#define MALI_IDVS_SR_FRAGMENT_SPD MALI_IDVS_SR_SPD_2 +#endif + /* * cs_builder implements a builder for CSF command streams. It manages the * allocation and overflow behaviour of queues and provides helpers for emitting diff --git a/src/panfrost/lib/genxml/decode_csf.c b/src/panfrost/lib/genxml/decode_csf.c index dd90bcf2ca6..5dbf52320ff 100644 --- a/src/panfrost/lib/genxml/decode_csf.c +++ b/src/panfrost/lib/genxml/decode_csf.c @@ -622,7 +622,8 @@ pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp, /* Merge flag overrides with the register flags */ struct mali_primitive_flags_packed tiler_flags_packed = { - .opaque[0] = cs_get_u32(qctx, 56) | I->flags_override, + .opaque[0] = + cs_get_u32(qctx, MALI_IDVS_SR_TILER_FLAGS) | I->flags_override, }; pan_unpack(&tiler_flags_packed, PRIMITIVE_FLAGS, tiler_flags); @@ -676,20 +677,22 @@ pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp, GENX(pandecode_fau)(ctx, lo, hi, "Fragment FAU"); } - if (cs_get_u64(qctx, 16)) { + if (cs_get_u64(qctx, MALI_IDVS_SR_VERTEX_POS_SPD)) { GENX(pandecode_shader) - (ctx, cs_get_u64(qctx, 16), "Position shader", qctx->gpu_id); + (ctx, cs_get_u64(qctx, MALI_IDVS_SR_VERTEX_POS_SPD), "Position shader", + qctx->gpu_id); } if (tiler_flags.secondary_shader) { - uint64_t ptr = cs_get_u64(qctx, 18); + uint64_t ptr = cs_get_u64(qctx, MALI_IDVS_SR_VERTEX_VARY_SPD); GENX(pandecode_shader)(ctx, ptr, "Varying shader", qctx->gpu_id); } - if (cs_get_u64(qctx, 20)) { + if (cs_get_u64(qctx, MALI_IDVS_SR_FRAGMENT_SPD)) { GENX(pandecode_shader) - (ctx, cs_get_u64(qctx, 20), "Fragment shader", qctx->gpu_id); + (ctx, cs_get_u64(qctx, MALI_IDVS_SR_FRAGMENT_SPD), "Fragment shader", + qctx->gpu_id); } DUMP_ADDR(ctx, LOCAL_STORAGE, cs_get_u64(qctx, reg_position_tsd), @@ -702,42 +705,58 @@ pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp, "Fragment Local Storage @%" PRIx64 ":\n", cs_get_u64(qctx, reg_frag_tsd)); - pandecode_log(ctx, "Global attribute offset: %u\n", cs_get_u32(qctx, 32)); - pandecode_log(ctx, "Index count: %u\n", cs_get_u32(qctx, 33)); - pandecode_log(ctx, "Instance count: %u\n", cs_get_u32(qctx, 34)); + pandecode_log(ctx, "Global attribute offset: %u\n", + cs_get_u32(qctx, MALI_IDVS_SR_GLOBAL_ATTRIBUTE_OFFSET)); + pandecode_log(ctx, "Index count: %u\n", + cs_get_u32(qctx, MALI_IDVS_SR_INDEX_COUNT)); + pandecode_log(ctx, "Instance count: %u\n", + cs_get_u32(qctx, MALI_IDVS_SR_INSTANCE_COUNT)); if (tiler_flags.index_type) - pandecode_log(ctx, "Index offset: %u\n", cs_get_u32(qctx, 35)); + pandecode_log(ctx, "Index offset: %u\n", + cs_get_u32(qctx, MALI_IDVS_SR_INDEX_OFFSET)); - pandecode_log(ctx, "Vertex offset: %d\n", cs_get_u32(qctx, 36)); - pandecode_log(ctx, "Instance offset: %u\n", cs_get_u32(qctx, 37)); - pandecode_log(ctx, "Tiler DCD flags2: %X\n", cs_get_u32(qctx, 38)); + pandecode_log(ctx, "Vertex offset: %d\n", + cs_get_u32(qctx, MALI_IDVS_SR_VERTEX_OFFSET)); + pandecode_log(ctx, "Instance offset: %u\n", + cs_get_u32(qctx, MALI_IDVS_SR_INSTANCE_OFFSET)); + pandecode_log(ctx, "Tiler DCD flags2: %X\n", + cs_get_u32(qctx, MALI_IDVS_SR_DCD2)); if (tiler_flags.index_type) - pandecode_log(ctx, "Index array size: %u\n", cs_get_u32(qctx, 39)); + pandecode_log(ctx, "Index array size: %u\n", + cs_get_u32(qctx, MALI_IDVS_SR_INDEX_BUFFER_SIZE)); - GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40), qctx->gpu_id); + GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, MALI_IDVS_SR_TILER_CTX), + qctx->gpu_id); - DUMP_CL(ctx, SCISSOR, &qctx->regs[42], "Scissor\n"); - pandecode_log(ctx, "Low depth clamp: %f\n", uif(cs_get_u32(qctx, 44))); - pandecode_log(ctx, "High depth clamp: %f\n", uif(cs_get_u32(qctx, 45))); - pandecode_log(ctx, "Occlusion: %" PRIx64 "\n", cs_get_u64(qctx, 46)); + DUMP_CL(ctx, SCISSOR, &qctx->regs[MALI_IDVS_SR_SCISSOR_BOX], "Scissor\n"); + pandecode_log(ctx, "Low depth clamp: %f\n", + uif(cs_get_u32(qctx, MALI_IDVS_SR_LOW_DEPTH_CLAMP))); + pandecode_log(ctx, "High depth clamp: %f\n", + uif(cs_get_u32(qctx, MALI_IDVS_SR_HIGH_DEPTH_CLAMP))); + pandecode_log(ctx, "Occlusion: %" PRIx64 "\n", + cs_get_u64(qctx, MALI_IDVS_SR_OQ)); if (tiler_flags.secondary_shader) - pandecode_log(ctx, "Varying allocation: %u\n", cs_get_u32(qctx, 48)); + pandecode_log(ctx, "Varying allocation: %u\n", + cs_get_u32(qctx, MALI_IDVS_SR_VARY_SIZE)); - uint64_t blend = cs_get_u64(qctx, 50); + uint64_t blend = cs_get_u64(qctx, MALI_IDVS_SR_BLEND_DESC); GENX(pandecode_blend_descs)(ctx, blend & ~15, blend & 15, 0, qctx->gpu_id); - DUMP_ADDR(ctx, DEPTH_STENCIL, cs_get_u64(qctx, 52), "Depth/stencil"); + DUMP_ADDR(ctx, DEPTH_STENCIL, cs_get_u64(qctx, MALI_IDVS_SR_ZSD), + "Depth/stencil"); if (tiler_flags.index_type) - pandecode_log(ctx, "Indices: %" PRIx64 "\n", cs_get_u64(qctx, 54)); + pandecode_log(ctx, "Indices: %" PRIx64 "\n", + cs_get_u64(qctx, MALI_IDVS_SR_INDEX_BUFFER)); DUMP_UNPACKED(ctx, PRIMITIVE_FLAGS, tiler_flags, "Primitive flags\n"); - DUMP_CL(ctx, DCD_FLAGS_0, &qctx->regs[57], "DCD Flags 0\n"); - DUMP_CL(ctx, DCD_FLAGS_1, &qctx->regs[58], "DCD Flags 1\n"); - DUMP_CL(ctx, PRIMITIVE_SIZE, &qctx->regs[60], "Primitive size\n"); + DUMP_CL(ctx, DCD_FLAGS_0, &qctx->regs[MALI_IDVS_SR_DCD0], "DCD Flags 0\n"); + DUMP_CL(ctx, DCD_FLAGS_1, &qctx->regs[MALI_IDVS_SR_DCD1], "DCD Flags 1\n"); + DUMP_CL(ctx, PRIMITIVE_SIZE, &qctx->regs[MALI_IDVS_SR_PRIMITIVE_SIZE], + "Primitive size\n"); ctx->indent--; } diff --git a/src/panfrost/lib/genxml/v10.xml b/src/panfrost/lib/genxml/v10.xml index 769b5cda6e1..bf395ecf001 100644 --- a/src/panfrost/lib/genxml/v10.xml +++ b/src/panfrost/lib/genxml/v10.xml @@ -840,6 +840,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index 67429ed74d0..59d29837fa2 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -403,7 +403,7 @@ update_tls(struct panvk_cmd_buffer *cmdbuf) cmdbuf->state.gfx.tsd = state->desc.gpu; cs_update_vt_ctx(b) - cs_move64_to(b, cs_sr_reg64(b, 24), state->desc.gpu); + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_TSD_0), state->desc.gpu); } state->info.tls.size = @@ -462,7 +462,8 @@ prepare_blend(struct panvk_cmd_buffer *cmdbuf) panvk_per_arch(blend_emit_descs)(cmdbuf, bds); cs_update_vt_ctx(b) - cs_move64_to(b, cs_sr_reg64(b, 50), ptr.gpu | bd_count); + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_BLEND_DESC), + ptr.gpu | bd_count); return VK_SUCCESS; } @@ -510,7 +511,8 @@ prepare_vp(struct panvk_cmd_buffer *cmdbuf) } struct mali_scissor_packed *scissor_box_ptr = &scissor_box; - cs_move64_to(b, cs_sr_reg64(b, 42), *((uint64_t*)scissor_box_ptr)); + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_SCISSOR_BOX), + *((uint64_t *)scissor_box_ptr)); } if (dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS) || @@ -520,8 +522,10 @@ prepare_vp(struct panvk_cmd_buffer *cmdbuf) float z_min = sysvals->viewport.offset.z; float z_max = z_min + sysvals->viewport.scale.z; - cs_move32_to(b, cs_sr_reg32(b, 44), fui(MIN2(z_min, z_max))); - cs_move32_to(b, cs_sr_reg32(b, 45), fui(MAX2(z_min, z_max))); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_LOW_DEPTH_CLAMP), + fui(MIN2(z_min, z_max))); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_HIGH_DEPTH_CLAMP), + fui(MAX2(z_min, z_max))); } } @@ -575,7 +579,8 @@ prepare_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf) return; } - cs_move32_to(b, cs_sr_reg32(b, 60), fui(primitive_size)); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_PRIMITIVE_SIZE), + fui(primitive_size)); } static uint32_t @@ -731,7 +736,7 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf) cmdbuf->state.gfx.render.tiler = simul_use ? 0xdeadbeefdeadbeefull : tiler_desc.gpu; - struct cs_index tiler_ctx_addr = cs_sr_reg64(b, 40); + struct cs_index tiler_ctx_addr = cs_sr_reg64(b, MALI_IDVS_SR_TILER_CTX); if (simul_use) { uint32_t descs_sz = calc_render_descs_size(cmdbuf); @@ -1200,14 +1205,16 @@ prepare_vs(struct panvk_cmd_buffer *cmdbuf) cs_update_vt_ctx(b) { if (upd_res_table) - cs_move64_to(b, cs_sr_reg64(b, 0), vs_desc_state->res_table); + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_SRT), + vs_desc_state->res_table); if (gfx_state_dirty(cmdbuf, VS) || dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_TOPOLOGY)) - cs_move64_to(b, cs_sr_reg64(b, 16), get_pos_spd(cmdbuf)); + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_POS_SPD), + get_pos_spd(cmdbuf)); if (gfx_state_dirty(cmdbuf, VS)) - cs_move64_to(b, cs_sr_reg64(b, 18), + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_VARY_SPD), panvk_priv_mem_dev_addr(vs->spds.var)); } @@ -1237,9 +1244,10 @@ prepare_fs(struct panvk_cmd_buffer *cmdbuf) cs_update_vt_ctx(b) { if (fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, DESC_STATE)) - cs_move64_to(b, cs_sr_reg64(b, 4), fs ? fs_desc_state->res_table : 0); + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_FRAGMENT_SRT), + fs ? fs_desc_state->res_table : 0); if (fs_user_dirty(cmdbuf)) - cs_move64_to(b, cs_sr_reg64(b, 20), + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_FRAGMENT_SPD), fs ? panvk_priv_mem_dev_addr(fs->spd) : 0); } @@ -1261,7 +1269,7 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf) return result; cs_update_vt_ctx(b) { - cs_move64_to(b, cs_sr_reg64(b, 8), + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_FAU), cmdbuf->state.gfx.vs.push_uniforms | ((uint64_t)vs->fau.total_count << 56)); } @@ -1280,7 +1288,7 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf) } cs_update_vt_ctx(b) - cs_move64_to(b, cs_sr_reg64(b, 12), fau_ptr); + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_FRAGMENT_FAU), fau_ptr); } return VK_SUCCESS; @@ -1362,7 +1370,7 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf) } cs_update_vt_ctx(b) - cs_move64_to(b, cs_sr_reg64(b, 52), zsd.gpu); + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_ZSD), zsd.gpu); return VK_SUCCESS; } @@ -1439,7 +1447,8 @@ prepare_oq(struct panvk_cmd_buffer *cmdbuf) struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER); - cs_move64_to(b, cs_sr_reg64(b, 46), cmdbuf->state.gfx.occlusion_query.ptr); + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_OQ), + cmdbuf->state.gfx.occlusion_query.ptr); cmdbuf->state.gfx.render.oq.last = cmdbuf->state.gfx.occlusion_query.syncobj; @@ -1531,7 +1540,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf) } cs_update_vt_ctx(b) - cs_move32_to(b, cs_sr_reg32(b, 57), dcd0.opaque[0]); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_DCD0), dcd0.opaque[0]); } if (dcd1_dirty) { @@ -1549,7 +1558,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf) } cs_update_vt_ctx(b) - cs_move32_to(b, cs_sr_reg32(b, 58), dcd1.opaque[0]); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_DCD1), dcd1.opaque[0]); } } @@ -1565,9 +1574,9 @@ prepare_index_buffer(struct panvk_cmd_buffer *cmdbuf, panvk_buffer_range(cmdbuf->state.gfx.ib.buffer, cmdbuf->state.gfx.ib.offset, VK_WHOLE_SIZE); assert(ib_size <= UINT32_MAX); - cs_move32_to(b, cs_sr_reg32(b, 39), ib_size); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INDEX_BUFFER_SIZE), ib_size); - cs_move64_to(b, cs_sr_reg64(b, 54), + cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_INDEX_BUFFER), panvk_buffer_gpu_ptr(cmdbuf->state.gfx.ib.buffer, cmdbuf->state.gfx.ib.offset)); } @@ -1627,7 +1636,8 @@ set_tiler_idvs_flags(struct cs_builder *b, struct panvk_cmd_buffer *cmdbuf, cfg.view_mask = cmdbuf->state.gfx.render.view_mask; } - cs_move32_to(b, cs_sr_reg32(b, 56), tiler_idvs_flags.opaque[0]); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_TILER_FLAGS), + tiler_idvs_flags.opaque[0]); } } @@ -1707,13 +1717,13 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) cs_update_vt_ctx(b) { /* We don't use the resource dep system yet. */ - cs_move32_to(b, cs_sr_reg32(b, 38), 0); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_DCD2), 0); prepare_index_buffer(cmdbuf, draw); set_tiler_idvs_flags(b, cmdbuf, draw); - cs_move32_to(b, cs_sr_reg32(b, 48), varying_size); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_VARY_SIZE), varying_size); result = prepare_ds(cmdbuf); if (result != VK_SUCCESS) @@ -1772,16 +1782,21 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) return; cs_update_vt_ctx(b) { - cs_move32_to(b, cs_sr_reg32(b, 32), 0); - cs_move32_to(b, cs_sr_reg32(b, 33), draw->vertex.count); - cs_move32_to(b, cs_sr_reg32(b, 34), draw->instance.count); - cs_move32_to(b, cs_sr_reg32(b, 35), draw->index.offset); - cs_move32_to(b, cs_sr_reg32(b, 36), draw->vertex.base); - /* NIR expects zero-based instance ID, but even if it did have an intrinsic to - * load the absolute instance ID, we'd want to keep it zero-based to work around - * Mali's limitation on non-zero firstInstance when a instance divisor is used. + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_GLOBAL_ATTRIBUTE_OFFSET), 0); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INDEX_COUNT), + draw->vertex.count); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_COUNT), + draw->instance.count); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INDEX_OFFSET), + draw->index.offset); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_VERTEX_OFFSET), + draw->vertex.base); + /* NIR expects zero-based instance ID, but even if it did have an + * intrinsic to load the absolute instance ID, we'd want to keep it + * zero-based to work around Mali's limitation on non-zero firstInstance + * when a instance divisor is used. */ - cs_move32_to(b, cs_sr_reg32(b, 37), 0); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), 0); } struct mali_primitive_flags_packed flags_override = @@ -1793,7 +1808,7 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) cs_req_res(b, CS_IDVS_RES); if (idvs_count > 1) { struct cs_index counter_reg = cs_scratch_reg32(b, 17); - struct cs_index tiler_ctx_addr = cs_sr_reg64(b, 40); + struct cs_index tiler_ctx_addr = cs_sr_reg64(b, MALI_IDVS_SR_TILER_CTX); cs_move32_to(b, counter_reg, idvs_count); @@ -1942,10 +1957,11 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf, cs_move64_to(b, draw_params_addr, draw->indirect.buffer_dev_addr); cs_update_vt_ctx(b) { - cs_move32_to(b, cs_sr_reg32(b, 32), 0); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_GLOBAL_ATTRIBUTE_OFFSET), 0); /* Load SR33-37 from indirect buffer. */ unsigned reg_mask = draw->index.size ? 0b11111 : 0b11011; - cs_load_to(b, cs_sr_reg_tuple(b, 33, 5), draw_params_addr, reg_mask, 0); + cs_load_to(b, cs_sr_reg_tuple(b, MALI_IDVS_SR_INDEX_COUNT, 5), + draw_params_addr, reg_mask, 0); } /* Wait for the SR33-37 indirect buffer load. */ @@ -1957,13 +1973,15 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf, cs_move64_to(b, fau_block_addr, cmdbuf->state.gfx.vs.push_uniforms); if (shader_uses_sysval(vs, graphics, vs.first_vertex)) { - cs_store32(b, cs_sr_reg32(b, 36), fau_block_addr, + cs_store32(b, cs_sr_reg32(b, MALI_IDVS_SR_VERTEX_OFFSET), + fau_block_addr, shader_remapped_sysval_offset( vs, sysval_offset(graphics, vs.first_vertex))); } if (shader_uses_sysval(vs, graphics, vs.base_instance)) { - cs_store32(b, cs_sr_reg32(b, 37), fau_block_addr, + cs_store32(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), + fau_block_addr, shader_remapped_sysval_offset( vs, sysval_offset(graphics, vs.base_instance))); } @@ -1978,7 +1996,7 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf, * Mali's limitation on non-zero firstInstance when a instance divisor is used. */ cs_update_vt_ctx(b) - cs_move32_to(b, cs_sr_reg32(b, 37), 0); + cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), 0); struct mali_primitive_flags_packed flags_override = get_tiler_flags_override(draw);