From 4c2e2eb445b6878c1c2adcba25cdd0bd3d34853b Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Wed, 26 Feb 2025 09:00:00 +0100 Subject: [PATCH] panfrost: Rework cs_sr_regXX to be a macro This move cs_sr_regXX in cs_builder.h and make usage less verbose. Signed-off-by: Mary Guillemard Reviewed-by: Boris Brezillon Reviewed-by: Benjamin Lee Part-of: --- src/panfrost/lib/genxml/cs_builder.h | 7 + src/panfrost/vulkan/csf/panvk_cmd_buffer.h | 20 --- .../vulkan/csf/panvk_vX_cmd_dispatch.c | 32 ++--- src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c | 121 +++++++++--------- .../vulkan/csf/panvk_vX_cmd_precomp.c | 29 ++--- .../vulkan/csf/panvk_vX_exception_handler.c | 4 +- 6 files changed, 94 insertions(+), 119 deletions(-) diff --git a/src/panfrost/lib/genxml/cs_builder.h b/src/panfrost/lib/genxml/cs_builder.h index 680716d8a8e..62309790fd3 100644 --- a/src/panfrost/lib/genxml/cs_builder.h +++ b/src/panfrost/lib/genxml/cs_builder.h @@ -429,6 +429,13 @@ cs_reg64(struct cs_builder *b, unsigned reg) return cs_reg_tuple(b, reg, 2); } +#define cs_sr_reg_tuple(__b, __cmd, __name, __size) \ + cs_reg_tuple((__b), MALI_##__cmd##_SR_##__name, (__size)) +#define cs_sr_reg32(__b, __cmd, __name) \ + cs_reg32((__b), MALI_##__cmd##_SR_##__name) +#define cs_sr_reg64(__b, __cmd, __name) \ + cs_reg64((__b), MALI_##__cmd##_SR_##__name) + /* * The top of the register file is reserved for cs_builder internal use. We * need 3 spare registers for handling command queue overflow. These are diff --git a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h index b1fee33d36b..643fb8884dd 100644 --- a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h +++ b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h @@ -210,26 +210,6 @@ cs_scratch_reg64(struct cs_builder *b, unsigned reg) return cs_scratch_reg_tuple(b, reg, 2); } -static inline struct cs_index -cs_sr_reg_tuple(struct cs_builder *b, unsigned start, unsigned count) -{ - assert(start + count - 1 < PANVK_CS_REG_SCRATCH_START); - return cs_reg_tuple(b, start, count); -} - -static inline struct cs_index -cs_sr_reg32(struct cs_builder *b, unsigned reg) -{ - return cs_sr_reg_tuple(b, reg, 1); -} - -static inline struct cs_index -cs_sr_reg64(struct cs_builder *b, unsigned reg) -{ - assert(reg % 2 == 0); - return cs_sr_reg_tuple(b, reg, 2); -} - static inline struct cs_index cs_subqueue_ctx_reg(struct cs_builder *b) { diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c index 3f67b9ad423..f27fc026ca5 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c @@ -236,23 +236,23 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) cs_update_compute_ctx(b) { if (compute_state_dirty(cmdbuf, CS) || compute_state_dirty(cmdbuf, DESC_STATE)) - cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_SRT_0), + cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SRT_0), cs_desc_state->res_table); if (compute_state_dirty(cmdbuf, PUSH_UNIFORMS)) { uint64_t fau_ptr = cmdbuf->state.compute.push_uniforms | ((uint64_t)shader->fau.total_count << 56); - cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_FAU_0), fau_ptr); + cs_move64_to(b, cs_sr_reg64(b, COMPUTE, FAU_0), fau_ptr); } if (compute_state_dirty(cmdbuf, CS)) - cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_SPD_0), + cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SPD_0), panvk_priv_mem_dev_addr(shader->spd)); - cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_TSD_0), tsd); + cs_move64_to(b, cs_sr_reg64(b, COMPUTE, TSD_0), tsd); /* Global attribute offset */ - cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_GLOBAL_ATTRIBUTE_OFFSET), + cs_move32_to(b, cs_sr_reg32(b, COMPUTE, GLOBAL_ATTRIBUTE_OFFSET), 0); struct mali_compute_size_workgroup_packed wg_size; @@ -262,41 +262,41 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) cfg.workgroup_size_z = shader->local_size.z; cfg.allow_merging_workgroups = false; } - cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_WG_SIZE), + cs_move32_to(b, cs_sr_reg32(b, COMPUTE, WG_SIZE), wg_size.opaque[0]); - cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_X), + cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_X), info->wg_base.x * shader->local_size.x); - cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Y), + cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Y), info->wg_base.y * shader->local_size.y); - cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Z), + cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Z), info->wg_base.z * shader->local_size.z); if (indirect) { /* Load parameters from indirect buffer and update workgroup count * registers and sysvals */ cs_move64_to(b, cs_scratch_reg64(b, 0), info->indirect.buffer_dev_addr); - cs_load_to(b, cs_sr_reg_tuple(b, MALI_COMPUTE_SR_JOB_SIZE_X, 3), + cs_load_to(b, cs_sr_reg_tuple(b, COMPUTE, JOB_SIZE_X, 3), cs_scratch_reg64(b, 0), BITFIELD_MASK(3), 0); cs_move64_to(b, cs_scratch_reg64(b, 0), cmdbuf->state.compute.push_uniforms); cs_wait_slot(b, SB_ID(LS), false); if (shader_uses_sysval(shader, compute, num_work_groups.x)) { - cs_store32(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_X), + cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X), cs_scratch_reg64(b, 0), shader_remapped_sysval_offset( shader, sysval_offset(compute, num_work_groups.x))); } if (shader_uses_sysval(shader, compute, num_work_groups.y)) { - cs_store32(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Y), + cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Y), cs_scratch_reg64(b, 0), shader_remapped_sysval_offset( shader, sysval_offset(compute, num_work_groups.y))); } if (shader_uses_sysval(shader, compute, num_work_groups.z)) { - cs_store32(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Z), + cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Z), cs_scratch_reg64(b, 0), shader_remapped_sysval_offset( shader, sysval_offset(compute, num_work_groups.z))); @@ -304,11 +304,11 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) cs_wait_slot(b, SB_ID(LS), false); } else { - cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_X), + cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X), info->direct.wg_count.x); - cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Y), + cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Y), info->direct.wg_count.y); - cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Z), + cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Z), info->direct.wg_count.z); } } diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index 0a50e896aa2..c85049862bb 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -403,7 +403,7 @@ update_tls(struct panvk_cmd_buffer *cmdbuf) cmdbuf->state.gfx.tsd = state->desc.gpu; cs_update_vt_ctx(b) - cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_TSD_0), state->desc.gpu); + cs_move64_to(b, cs_sr_reg64(b, IDVS, TSD_0), state->desc.gpu); } state->info.tls.size = @@ -462,8 +462,7 @@ prepare_blend(struct panvk_cmd_buffer *cmdbuf) panvk_per_arch(blend_emit_descs)(cmdbuf, bds); cs_update_vt_ctx(b) - cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_BLEND_DESC), - ptr.gpu | bd_count); + cs_move64_to(b, cs_sr_reg64(b, IDVS, BLEND_DESC), ptr.gpu | bd_count); return VK_SUCCESS; } @@ -511,7 +510,7 @@ prepare_vp(struct panvk_cmd_buffer *cmdbuf) } struct mali_scissor_packed *scissor_box_ptr = &scissor_box; - cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_SCISSOR_BOX), + cs_move64_to(b, cs_sr_reg64(b, IDVS, SCISSOR_BOX), *((uint64_t *)scissor_box_ptr)); } @@ -522,9 +521,9 @@ prepare_vp(struct panvk_cmd_buffer *cmdbuf) float z_min = sysvals->viewport.offset.z; float z_max = z_min + sysvals->viewport.scale.z; - cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_LOW_DEPTH_CLAMP), + cs_move32_to(b, cs_sr_reg32(b, IDVS, LOW_DEPTH_CLAMP), fui(MIN2(z_min, z_max))); - cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_HIGH_DEPTH_CLAMP), + cs_move32_to(b, cs_sr_reg32(b, IDVS, HIGH_DEPTH_CLAMP), fui(MAX2(z_min, z_max))); } } @@ -579,7 +578,7 @@ prepare_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf) return; } - cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_PRIMITIVE_SIZE), + cs_move32_to(b, cs_sr_reg32(b, IDVS, PRIMITIVE_SIZE), fui(primitive_size)); } @@ -736,7 +735,7 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf) cmdbuf->state.gfx.render.tiler = simul_use ? 0xdeadbeefdeadbeefull : tiler_desc.gpu; - struct cs_index tiler_ctx_addr = cs_sr_reg64(b, MALI_IDVS_SR_TILER_CTX); + struct cs_index tiler_ctx_addr = cs_sr_reg64(b, IDVS, TILER_CTX); if (simul_use) { uint32_t descs_sz = calc_render_descs_size(cmdbuf); @@ -1060,15 +1059,14 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf) struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT); if (copy_fbds) { - struct cs_index cur_tiler = cs_sr_reg64(b, 38); - struct cs_index dst_fbd_ptr = - cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER); - struct cs_index layer_count = cs_sr_reg32(b, 47); - struct cs_index src_fbd_ptr = cs_sr_reg64(b, 48); - struct cs_index remaining_layers_in_td = cs_sr_reg32(b, 50); - struct cs_index pass_count = cs_sr_reg32(b, 51); - struct cs_index pass_src_fbd_ptr = cs_sr_reg64(b, 52); - struct cs_index pass_dst_fbd_ptr = cs_sr_reg64(b, 54); + struct cs_index cur_tiler = cs_reg64(b, 38); + struct cs_index dst_fbd_ptr = cs_sr_reg64(b, FRAGMENT, FBD_POINTER); + struct cs_index layer_count = cs_reg32(b, 47); + struct cs_index src_fbd_ptr = cs_reg64(b, 48); + struct cs_index remaining_layers_in_td = cs_reg32(b, 50); + struct cs_index pass_count = cs_reg32(b, 51); + struct cs_index pass_src_fbd_ptr = cs_reg64(b, 52); + struct cs_index pass_dst_fbd_ptr = cs_reg64(b, 54); uint32_t td_count = DIV_ROUND_UP(cmdbuf->state.gfx.render.layer_count, MAX_LAYERS_PER_TILER_DESC); @@ -1145,9 +1143,9 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf) } } else { cs_update_frag_ctx(b) { - cs_move64_to(b, cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER), + cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER), fbds.gpu | fbd_flags); - cs_move64_to(b, cs_sr_reg64(b, 38), cmdbuf->state.gfx.render.tiler); + cs_move64_to(b, cs_reg64(b, 38), cmdbuf->state.gfx.render.tiler); } } @@ -1207,16 +1205,16 @@ prepare_vs(struct panvk_cmd_buffer *cmdbuf) cs_update_vt_ctx(b) { if (upd_res_table) - cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_SRT), + cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_SRT), vs_desc_state->res_table); if (gfx_state_dirty(cmdbuf, VS) || dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_TOPOLOGY)) - cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_POS_SPD), + cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_POS_SPD), get_pos_spd(cmdbuf)); if (gfx_state_dirty(cmdbuf, VS)) - cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_VARY_SPD), + cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_VARY_SPD), panvk_priv_mem_dev_addr(vs->spds.var)); } @@ -1246,10 +1244,10 @@ prepare_fs(struct panvk_cmd_buffer *cmdbuf) cs_update_vt_ctx(b) { if (fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, DESC_STATE)) - cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_FRAGMENT_SRT), + cs_move64_to(b, cs_sr_reg64(b, IDVS, FRAGMENT_SRT), fs ? fs_desc_state->res_table : 0); if (fs_user_dirty(cmdbuf)) - cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_FRAGMENT_SPD), + cs_move64_to(b, cs_sr_reg64(b, IDVS, FRAGMENT_SPD), fs ? panvk_priv_mem_dev_addr(fs->spd) : 0); } @@ -1271,7 +1269,7 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf) return result; cs_update_vt_ctx(b) { - cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_FAU), + cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_FAU), cmdbuf->state.gfx.vs.push_uniforms | ((uint64_t)vs->fau.total_count << 56)); } @@ -1290,7 +1288,7 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf) } cs_update_vt_ctx(b) - cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_FRAGMENT_FAU), fau_ptr); + cs_move64_to(b, cs_sr_reg64(b, IDVS, FRAGMENT_FAU), fau_ptr); } return VK_SUCCESS; @@ -1372,7 +1370,7 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf) } cs_update_vt_ctx(b) - cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_ZSD), zsd.gpu); + cs_move64_to(b, cs_sr_reg64(b, IDVS, ZSD), zsd.gpu); return VK_SUCCESS; } @@ -1449,7 +1447,7 @@ prepare_oq(struct panvk_cmd_buffer *cmdbuf) struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER); - cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_OQ), + cs_move64_to(b, cs_sr_reg64(b, IDVS, OQ), cmdbuf->state.gfx.occlusion_query.ptr); cmdbuf->state.gfx.render.oq.last = @@ -1542,7 +1540,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf) } cs_update_vt_ctx(b) - cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_DCD0), dcd0.opaque[0]); + cs_move32_to(b, cs_sr_reg32(b, IDVS, DCD0), dcd0.opaque[0]); } if (dcd1_dirty) { @@ -1560,7 +1558,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf) } cs_update_vt_ctx(b) - cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_DCD1), dcd1.opaque[0]); + cs_move32_to(b, cs_sr_reg32(b, IDVS, DCD1), dcd1.opaque[0]); } } @@ -1576,9 +1574,9 @@ prepare_index_buffer(struct panvk_cmd_buffer *cmdbuf, panvk_buffer_range(cmdbuf->state.gfx.ib.buffer, cmdbuf->state.gfx.ib.offset, VK_WHOLE_SIZE); assert(ib_size <= UINT32_MAX); - cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INDEX_BUFFER_SIZE), ib_size); + cs_move32_to(b, cs_sr_reg32(b, IDVS, INDEX_BUFFER_SIZE), ib_size); - cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_INDEX_BUFFER), + cs_move64_to(b, cs_sr_reg64(b, IDVS, INDEX_BUFFER), panvk_buffer_gpu_ptr(cmdbuf->state.gfx.ib.buffer, cmdbuf->state.gfx.ib.offset)); } @@ -1638,7 +1636,7 @@ set_tiler_idvs_flags(struct cs_builder *b, struct panvk_cmd_buffer *cmdbuf, cfg.view_mask = cmdbuf->state.gfx.render.view_mask; } - cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_TILER_FLAGS), + cs_move32_to(b, cs_sr_reg32(b, IDVS, TILER_FLAGS), tiler_idvs_flags.opaque[0]); } } @@ -1719,13 +1717,13 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) cs_update_vt_ctx(b) { /* We don't use the resource dep system yet. */ - cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_DCD2), 0); + cs_move32_to(b, cs_sr_reg32(b, IDVS, DCD2), 0); prepare_index_buffer(cmdbuf, draw); set_tiler_idvs_flags(b, cmdbuf, draw); - cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_VARY_SIZE), varying_size); + cs_move32_to(b, cs_sr_reg32(b, IDVS, VARY_SIZE), varying_size); result = prepare_ds(cmdbuf); if (result != VK_SUCCESS) @@ -1784,21 +1782,18 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) return; cs_update_vt_ctx(b) { - cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_GLOBAL_ATTRIBUTE_OFFSET), 0); - cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INDEX_COUNT), - draw->vertex.count); - cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_COUNT), + cs_move32_to(b, cs_sr_reg32(b, IDVS, GLOBAL_ATTRIBUTE_OFFSET), 0); + cs_move32_to(b, cs_sr_reg32(b, IDVS, INDEX_COUNT), draw->vertex.count); + cs_move32_to(b, cs_sr_reg32(b, IDVS, INSTANCE_COUNT), draw->instance.count); - cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INDEX_OFFSET), - draw->index.offset); - cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_VERTEX_OFFSET), - draw->vertex.base); + cs_move32_to(b, cs_sr_reg32(b, IDVS, INDEX_OFFSET), draw->index.offset); + cs_move32_to(b, cs_sr_reg32(b, IDVS, VERTEX_OFFSET), draw->vertex.base); /* NIR expects zero-based instance ID, but even if it did have an * intrinsic to load the absolute instance ID, we'd want to keep it * zero-based to work around Mali's limitation on non-zero firstInstance * when a instance divisor is used. */ - cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), 0); + cs_move32_to(b, cs_sr_reg32(b, IDVS, INSTANCE_OFFSET), 0); } struct mali_primitive_flags_packed flags_override = @@ -1810,7 +1805,7 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) cs_req_res(b, CS_IDVS_RES); if (idvs_count > 1) { struct cs_index counter_reg = cs_scratch_reg32(b, 17); - struct cs_index tiler_ctx_addr = cs_sr_reg64(b, MALI_IDVS_SR_TILER_CTX); + struct cs_index tiler_ctx_addr = cs_sr_reg64(b, IDVS, TILER_CTX); cs_move32_to(b, counter_reg, idvs_count); @@ -1959,10 +1954,10 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf, cs_move64_to(b, draw_params_addr, draw->indirect.buffer_dev_addr); cs_update_vt_ctx(b) { - cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_GLOBAL_ATTRIBUTE_OFFSET), 0); + cs_move32_to(b, cs_sr_reg32(b, IDVS, GLOBAL_ATTRIBUTE_OFFSET), 0); /* Load SR33-37 from indirect buffer. */ unsigned reg_mask = draw->index.size ? 0b11111 : 0b11011; - cs_load_to(b, cs_sr_reg_tuple(b, MALI_IDVS_SR_INDEX_COUNT, 5), + cs_load_to(b, cs_sr_reg_tuple(b, IDVS, INDEX_COUNT, 5), draw_params_addr, reg_mask, 0); } @@ -1975,15 +1970,13 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf, cs_move64_to(b, fau_block_addr, cmdbuf->state.gfx.vs.push_uniforms); if (shader_uses_sysval(vs, graphics, vs.first_vertex)) { - cs_store32(b, cs_sr_reg32(b, MALI_IDVS_SR_VERTEX_OFFSET), - fau_block_addr, + cs_store32(b, cs_sr_reg32(b, IDVS, VERTEX_OFFSET), fau_block_addr, shader_remapped_sysval_offset( vs, sysval_offset(graphics, vs.first_vertex))); } if (shader_uses_sysval(vs, graphics, vs.base_instance)) { - cs_store32(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), - fau_block_addr, + cs_store32(b, cs_sr_reg32(b, IDVS, INSTANCE_OFFSET), fau_block_addr, shader_remapped_sysval_offset( vs, sysval_offset(graphics, vs.base_instance))); } @@ -1998,7 +1991,7 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf, * Mali's limitation on non-zero firstInstance when a instance divisor is used. */ cs_update_vt_ctx(b) - cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), 0); + cs_move32_to(b, cs_sr_reg32(b, IDVS, INSTANCE_OFFSET), 0); struct mali_primitive_flags_packed flags_override = get_tiler_flags_override(draw); @@ -2270,17 +2263,17 @@ setup_tiler_oom_ctx(struct panvk_cmd_buffer *cmdbuf) TILER_OOM_CTX_FIELD_OFFSET(counter)); struct cs_index fbd_first = cs_scratch_reg64(b, 2); - cs_add64(b, fbd_first, cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER), + cs_add64(b, fbd_first, cs_sr_reg64(b, FRAGMENT, FBD_POINTER), (1 + PANVK_IR_FIRST_PASS) * fbd_ir_pass_offset); cs_store64(b, fbd_first, cs_subqueue_ctx_reg(b), TILER_OOM_CTX_FBDPTR_OFFSET(FIRST)); struct cs_index fbd_middle = cs_scratch_reg64(b, 4); - cs_add64(b, fbd_middle, cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER), + cs_add64(b, fbd_middle, cs_sr_reg64(b, FRAGMENT, FBD_POINTER), (1 + PANVK_IR_MIDDLE_PASS) * fbd_ir_pass_offset); cs_store64(b, fbd_middle, cs_subqueue_ctx_reg(b), TILER_OOM_CTX_FBDPTR_OFFSET(MIDDLE)); struct cs_index fbd_last = cs_scratch_reg64(b, 6); - cs_add64(b, fbd_last, cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER), + cs_add64(b, fbd_last, cs_sr_reg64(b, FRAGMENT, FBD_POINTER), (1 + PANVK_IR_LAST_PASS) * fbd_ir_pass_offset); cs_store64(b, fbd_last, cs_subqueue_ctx_reg(b), TILER_OOM_CTX_FBDPTR_OFFSET(LAST)); @@ -2314,9 +2307,9 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf) /* Now initialize the fragment bits. */ cs_update_frag_ctx(b) { - cs_move32_to(b, cs_sr_reg32(b, 42), + cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MIN), (fbinfo->extent.miny << 16) | fbinfo->extent.minx); - cs_move32_to(b, cs_sr_reg32(b, 43), + cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MAX), (fbinfo->extent.maxy << 16) | fbinfo->extent.maxx); } @@ -2384,8 +2377,8 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf) cs_wait_slot(b, SB_ID(LS), false); cs_if(b, MALI_CS_CONDITION_GREATER, counter) cs_update_frag_ctx(b) - cs_add64(b, cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER), - cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER), + cs_add64(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER), + cs_sr_reg64(b, FRAGMENT, FBD_POINTER), (1 + PANVK_IR_LAST_PASS) * fbd_ir_pass_offset); /* Applications tend to forget to describe subpass dependencies, especially @@ -2402,7 +2395,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf) cs_req_res(b, CS_FRAG_RES); if (cmdbuf->state.gfx.render.layer_count > 1) { - struct cs_index layer_count = cs_sr_reg32(b, 47); + struct cs_index layer_count = cs_reg32(b, 47); cs_move32_to(b, layer_count, calc_enabled_layer_count(cmdbuf)); cs_while(b, MALI_CS_CONDITION_GREATER, layer_count) { @@ -2411,8 +2404,8 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf) cs_add32(b, layer_count, layer_count, -1); cs_update_frag_ctx(b) - cs_add64(b, cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER), - cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER), fbd_sz); + cs_add64(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER), + cs_sr_reg64(b, FRAGMENT, FBD_POINTER), fbd_sz); } } else { cs_trace_run_fragment(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4), @@ -2431,8 +2424,8 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf) struct cs_index completed = cs_scratch_reg_tuple(b, 10, 4); struct cs_index completed_top = cs_scratch_reg64(b, 10); struct cs_index completed_bottom = cs_scratch_reg64(b, 12); - struct cs_index cur_tiler = cs_sr_reg64(b, 38); - struct cs_index tiler_count = cs_sr_reg32(b, 47); + struct cs_index cur_tiler = cs_reg64(b, 38); + struct cs_index tiler_count = cs_reg32(b, 47); struct cs_index oq_chain = cs_scratch_reg64(b, 10); struct cs_index oq_chain_lo = cs_scratch_reg32(b, 10); struct cs_index oq_chain_hi = cs_scratch_reg32(b, 11); diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c index 2e88e01dd36..2c70d56b439 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c @@ -103,21 +103,20 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx, cs_update_compute_ctx(b) { /* No resource table */ - cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_SRT_0), 0); + cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SRT_0), 0); uint64_t fau_count = DIV_ROUND_UP(BIFROST_PRECOMPILED_KERNEL_SYSVALS_SIZE + data_size, 8); uint64_t fau_ptr = push_uniforms.gpu | (fau_count << 56); - cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_FAU_0), fau_ptr); + cs_move64_to(b, cs_sr_reg64(b, COMPUTE, FAU_0), fau_ptr); - cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_SPD_0), + cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SPD_0), panvk_priv_mem_dev_addr(shader->spd)); - cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_TSD_0), tsd); + cs_move64_to(b, cs_sr_reg64(b, COMPUTE, TSD_0), tsd); /* Global attribute offset */ - cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_GLOBAL_ATTRIBUTE_OFFSET), - 0); + cs_move32_to(b, cs_sr_reg32(b, COMPUTE, GLOBAL_ATTRIBUTE_OFFSET), 0); struct mali_compute_size_workgroup_packed wg_size; pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) { @@ -126,21 +125,17 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx, cfg.workgroup_size_z = shader->local_size.z; cfg.allow_merging_workgroups = false; } - cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_WG_SIZE), - wg_size.opaque[0]); + cs_move32_to(b, cs_sr_reg32(b, COMPUTE, WG_SIZE), wg_size.opaque[0]); /* Job offset */ - cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_X), 0); - cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Y), 0); - cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Z), 0); + cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_X), 0); + cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Y), 0); + cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Z), 0); /* Job size */ - cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_X), - grid.count[0]); - cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Y), - grid.count[1]); - cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Z), - grid.count[2]); + cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X), grid.count[0]); + cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Y), grid.count[1]); + cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Z), grid.count[2]); } panvk_per_arch(cs_pick_iter_sb)(cmdbuf, PANVK_SUBQUEUE_COMPUTE); diff --git a/src/panfrost/vulkan/csf/panvk_vX_exception_handler.c b/src/panfrost/vulkan/csf/panvk_vX_exception_handler.c index 6b83dbc44cd..7c5c1e47b7d 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_exception_handler.c +++ b/src/panfrost/vulkan/csf/panvk_vX_exception_handler.c @@ -65,8 +65,8 @@ generate_tiler_oom_handler(struct cs_buffer handler_mem, bool has_zs_ext, struct cs_index layer_count = cs_scratch_reg32(&b, 7); /* The tiler pointer is pre-filled. */ - struct cs_index tiler_ptr = cs_sr_reg64(&b, 38); - struct cs_index fbd_ptr = cs_sr_reg64(&b, MALI_FRAGMENT_SR_FBD_POINTER); + struct cs_index tiler_ptr = cs_reg64(&b, 38); + struct cs_index fbd_ptr = cs_sr_reg64(&b, FRAGMENT, FBD_POINTER); /* Use different framebuffer descriptor depending on whether incremental * rendering has already been triggered */