panfrost: Rework cs_sr_regXX to be a macro

This move cs_sr_regXX in cs_builder.h and make usage less verbose.

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Benjamin Lee <benjamin.lee@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33736>
This commit is contained in:
Mary Guillemard 2025-02-26 09:00:00 +01:00 committed by Marge Bot
parent bbecaacc3f
commit 4c2e2eb445
6 changed files with 94 additions and 119 deletions

View file

@ -429,6 +429,13 @@ cs_reg64(struct cs_builder *b, unsigned reg)
return cs_reg_tuple(b, reg, 2);
}
#define cs_sr_reg_tuple(__b, __cmd, __name, __size) \
cs_reg_tuple((__b), MALI_##__cmd##_SR_##__name, (__size))
#define cs_sr_reg32(__b, __cmd, __name) \
cs_reg32((__b), MALI_##__cmd##_SR_##__name)
#define cs_sr_reg64(__b, __cmd, __name) \
cs_reg64((__b), MALI_##__cmd##_SR_##__name)
/*
* The top of the register file is reserved for cs_builder internal use. We
* need 3 spare registers for handling command queue overflow. These are

View file

@ -210,26 +210,6 @@ cs_scratch_reg64(struct cs_builder *b, unsigned reg)
return cs_scratch_reg_tuple(b, reg, 2);
}
static inline struct cs_index
cs_sr_reg_tuple(struct cs_builder *b, unsigned start, unsigned count)
{
assert(start + count - 1 < PANVK_CS_REG_SCRATCH_START);
return cs_reg_tuple(b, start, count);
}
static inline struct cs_index
cs_sr_reg32(struct cs_builder *b, unsigned reg)
{
return cs_sr_reg_tuple(b, reg, 1);
}
static inline struct cs_index
cs_sr_reg64(struct cs_builder *b, unsigned reg)
{
assert(reg % 2 == 0);
return cs_sr_reg_tuple(b, reg, 2);
}
static inline struct cs_index
cs_subqueue_ctx_reg(struct cs_builder *b)
{

View file

@ -236,23 +236,23 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
cs_update_compute_ctx(b) {
if (compute_state_dirty(cmdbuf, CS) ||
compute_state_dirty(cmdbuf, DESC_STATE))
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_SRT_0),
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SRT_0),
cs_desc_state->res_table);
if (compute_state_dirty(cmdbuf, PUSH_UNIFORMS)) {
uint64_t fau_ptr = cmdbuf->state.compute.push_uniforms |
((uint64_t)shader->fau.total_count << 56);
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_FAU_0), fau_ptr);
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, FAU_0), fau_ptr);
}
if (compute_state_dirty(cmdbuf, CS))
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_SPD_0),
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SPD_0),
panvk_priv_mem_dev_addr(shader->spd));
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_TSD_0), tsd);
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, TSD_0), tsd);
/* Global attribute offset */
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_GLOBAL_ATTRIBUTE_OFFSET),
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, GLOBAL_ATTRIBUTE_OFFSET),
0);
struct mali_compute_size_workgroup_packed wg_size;
@ -262,41 +262,41 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
cfg.workgroup_size_z = shader->local_size.z;
cfg.allow_merging_workgroups = false;
}
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_WG_SIZE),
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, WG_SIZE),
wg_size.opaque[0]);
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_X),
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_X),
info->wg_base.x * shader->local_size.x);
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Y),
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Y),
info->wg_base.y * shader->local_size.y);
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Z),
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Z),
info->wg_base.z * shader->local_size.z);
if (indirect) {
/* Load parameters from indirect buffer and update workgroup count
* registers and sysvals */
cs_move64_to(b, cs_scratch_reg64(b, 0),
info->indirect.buffer_dev_addr);
cs_load_to(b, cs_sr_reg_tuple(b, MALI_COMPUTE_SR_JOB_SIZE_X, 3),
cs_load_to(b, cs_sr_reg_tuple(b, COMPUTE, JOB_SIZE_X, 3),
cs_scratch_reg64(b, 0), BITFIELD_MASK(3), 0);
cs_move64_to(b, cs_scratch_reg64(b, 0),
cmdbuf->state.compute.push_uniforms);
cs_wait_slot(b, SB_ID(LS), false);
if (shader_uses_sysval(shader, compute, num_work_groups.x)) {
cs_store32(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_X),
cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X),
cs_scratch_reg64(b, 0),
shader_remapped_sysval_offset(
shader, sysval_offset(compute, num_work_groups.x)));
}
if (shader_uses_sysval(shader, compute, num_work_groups.y)) {
cs_store32(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Y),
cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Y),
cs_scratch_reg64(b, 0),
shader_remapped_sysval_offset(
shader, sysval_offset(compute, num_work_groups.y)));
}
if (shader_uses_sysval(shader, compute, num_work_groups.z)) {
cs_store32(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Z),
cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Z),
cs_scratch_reg64(b, 0),
shader_remapped_sysval_offset(
shader, sysval_offset(compute, num_work_groups.z)));
@ -304,11 +304,11 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
cs_wait_slot(b, SB_ID(LS), false);
} else {
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_X),
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X),
info->direct.wg_count.x);
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Y),
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Y),
info->direct.wg_count.y);
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Z),
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Z),
info->direct.wg_count.z);
}
}

View file

@ -403,7 +403,7 @@ update_tls(struct panvk_cmd_buffer *cmdbuf)
cmdbuf->state.gfx.tsd = state->desc.gpu;
cs_update_vt_ctx(b)
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_TSD_0), state->desc.gpu);
cs_move64_to(b, cs_sr_reg64(b, IDVS, TSD_0), state->desc.gpu);
}
state->info.tls.size =
@ -462,8 +462,7 @@ prepare_blend(struct panvk_cmd_buffer *cmdbuf)
panvk_per_arch(blend_emit_descs)(cmdbuf, bds);
cs_update_vt_ctx(b)
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_BLEND_DESC),
ptr.gpu | bd_count);
cs_move64_to(b, cs_sr_reg64(b, IDVS, BLEND_DESC), ptr.gpu | bd_count);
return VK_SUCCESS;
}
@ -511,7 +510,7 @@ prepare_vp(struct panvk_cmd_buffer *cmdbuf)
}
struct mali_scissor_packed *scissor_box_ptr = &scissor_box;
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_SCISSOR_BOX),
cs_move64_to(b, cs_sr_reg64(b, IDVS, SCISSOR_BOX),
*((uint64_t *)scissor_box_ptr));
}
@ -522,9 +521,9 @@ prepare_vp(struct panvk_cmd_buffer *cmdbuf)
float z_min = sysvals->viewport.offset.z;
float z_max = z_min + sysvals->viewport.scale.z;
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_LOW_DEPTH_CLAMP),
cs_move32_to(b, cs_sr_reg32(b, IDVS, LOW_DEPTH_CLAMP),
fui(MIN2(z_min, z_max)));
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_HIGH_DEPTH_CLAMP),
cs_move32_to(b, cs_sr_reg32(b, IDVS, HIGH_DEPTH_CLAMP),
fui(MAX2(z_min, z_max)));
}
}
@ -579,7 +578,7 @@ prepare_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf)
return;
}
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_PRIMITIVE_SIZE),
cs_move32_to(b, cs_sr_reg32(b, IDVS, PRIMITIVE_SIZE),
fui(primitive_size));
}
@ -736,7 +735,7 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
cmdbuf->state.gfx.render.tiler =
simul_use ? 0xdeadbeefdeadbeefull : tiler_desc.gpu;
struct cs_index tiler_ctx_addr = cs_sr_reg64(b, MALI_IDVS_SR_TILER_CTX);
struct cs_index tiler_ctx_addr = cs_sr_reg64(b, IDVS, TILER_CTX);
if (simul_use) {
uint32_t descs_sz = calc_render_descs_size(cmdbuf);
@ -1060,15 +1059,14 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
if (copy_fbds) {
struct cs_index cur_tiler = cs_sr_reg64(b, 38);
struct cs_index dst_fbd_ptr =
cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER);
struct cs_index layer_count = cs_sr_reg32(b, 47);
struct cs_index src_fbd_ptr = cs_sr_reg64(b, 48);
struct cs_index remaining_layers_in_td = cs_sr_reg32(b, 50);
struct cs_index pass_count = cs_sr_reg32(b, 51);
struct cs_index pass_src_fbd_ptr = cs_sr_reg64(b, 52);
struct cs_index pass_dst_fbd_ptr = cs_sr_reg64(b, 54);
struct cs_index cur_tiler = cs_reg64(b, 38);
struct cs_index dst_fbd_ptr = cs_sr_reg64(b, FRAGMENT, FBD_POINTER);
struct cs_index layer_count = cs_reg32(b, 47);
struct cs_index src_fbd_ptr = cs_reg64(b, 48);
struct cs_index remaining_layers_in_td = cs_reg32(b, 50);
struct cs_index pass_count = cs_reg32(b, 51);
struct cs_index pass_src_fbd_ptr = cs_reg64(b, 52);
struct cs_index pass_dst_fbd_ptr = cs_reg64(b, 54);
uint32_t td_count = DIV_ROUND_UP(cmdbuf->state.gfx.render.layer_count,
MAX_LAYERS_PER_TILER_DESC);
@ -1145,9 +1143,9 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
}
} else {
cs_update_frag_ctx(b) {
cs_move64_to(b, cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER),
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
fbds.gpu | fbd_flags);
cs_move64_to(b, cs_sr_reg64(b, 38), cmdbuf->state.gfx.render.tiler);
cs_move64_to(b, cs_reg64(b, 38), cmdbuf->state.gfx.render.tiler);
}
}
@ -1207,16 +1205,16 @@ prepare_vs(struct panvk_cmd_buffer *cmdbuf)
cs_update_vt_ctx(b) {
if (upd_res_table)
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_SRT),
cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_SRT),
vs_desc_state->res_table);
if (gfx_state_dirty(cmdbuf, VS) ||
dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_TOPOLOGY))
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_POS_SPD),
cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_POS_SPD),
get_pos_spd(cmdbuf));
if (gfx_state_dirty(cmdbuf, VS))
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_VARY_SPD),
cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_VARY_SPD),
panvk_priv_mem_dev_addr(vs->spds.var));
}
@ -1246,10 +1244,10 @@ prepare_fs(struct panvk_cmd_buffer *cmdbuf)
cs_update_vt_ctx(b) {
if (fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, DESC_STATE))
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_FRAGMENT_SRT),
cs_move64_to(b, cs_sr_reg64(b, IDVS, FRAGMENT_SRT),
fs ? fs_desc_state->res_table : 0);
if (fs_user_dirty(cmdbuf))
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_FRAGMENT_SPD),
cs_move64_to(b, cs_sr_reg64(b, IDVS, FRAGMENT_SPD),
fs ? panvk_priv_mem_dev_addr(fs->spd) : 0);
}
@ -1271,7 +1269,7 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf)
return result;
cs_update_vt_ctx(b) {
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_FAU),
cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_FAU),
cmdbuf->state.gfx.vs.push_uniforms |
((uint64_t)vs->fau.total_count << 56));
}
@ -1290,7 +1288,7 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf)
}
cs_update_vt_ctx(b)
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_FRAGMENT_FAU), fau_ptr);
cs_move64_to(b, cs_sr_reg64(b, IDVS, FRAGMENT_FAU), fau_ptr);
}
return VK_SUCCESS;
@ -1372,7 +1370,7 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf)
}
cs_update_vt_ctx(b)
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_ZSD), zsd.gpu);
cs_move64_to(b, cs_sr_reg64(b, IDVS, ZSD), zsd.gpu);
return VK_SUCCESS;
}
@ -1449,7 +1447,7 @@ prepare_oq(struct panvk_cmd_buffer *cmdbuf)
struct cs_builder *b =
panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_OQ),
cs_move64_to(b, cs_sr_reg64(b, IDVS, OQ),
cmdbuf->state.gfx.occlusion_query.ptr);
cmdbuf->state.gfx.render.oq.last =
@ -1542,7 +1540,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf)
}
cs_update_vt_ctx(b)
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_DCD0), dcd0.opaque[0]);
cs_move32_to(b, cs_sr_reg32(b, IDVS, DCD0), dcd0.opaque[0]);
}
if (dcd1_dirty) {
@ -1560,7 +1558,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf)
}
cs_update_vt_ctx(b)
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_DCD1), dcd1.opaque[0]);
cs_move32_to(b, cs_sr_reg32(b, IDVS, DCD1), dcd1.opaque[0]);
}
}
@ -1576,9 +1574,9 @@ prepare_index_buffer(struct panvk_cmd_buffer *cmdbuf,
panvk_buffer_range(cmdbuf->state.gfx.ib.buffer,
cmdbuf->state.gfx.ib.offset, VK_WHOLE_SIZE);
assert(ib_size <= UINT32_MAX);
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INDEX_BUFFER_SIZE), ib_size);
cs_move32_to(b, cs_sr_reg32(b, IDVS, INDEX_BUFFER_SIZE), ib_size);
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_INDEX_BUFFER),
cs_move64_to(b, cs_sr_reg64(b, IDVS, INDEX_BUFFER),
panvk_buffer_gpu_ptr(cmdbuf->state.gfx.ib.buffer,
cmdbuf->state.gfx.ib.offset));
}
@ -1638,7 +1636,7 @@ set_tiler_idvs_flags(struct cs_builder *b, struct panvk_cmd_buffer *cmdbuf,
cfg.view_mask = cmdbuf->state.gfx.render.view_mask;
}
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_TILER_FLAGS),
cs_move32_to(b, cs_sr_reg32(b, IDVS, TILER_FLAGS),
tiler_idvs_flags.opaque[0]);
}
}
@ -1719,13 +1717,13 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
cs_update_vt_ctx(b) {
/* We don't use the resource dep system yet. */
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_DCD2), 0);
cs_move32_to(b, cs_sr_reg32(b, IDVS, DCD2), 0);
prepare_index_buffer(cmdbuf, draw);
set_tiler_idvs_flags(b, cmdbuf, draw);
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_VARY_SIZE), varying_size);
cs_move32_to(b, cs_sr_reg32(b, IDVS, VARY_SIZE), varying_size);
result = prepare_ds(cmdbuf);
if (result != VK_SUCCESS)
@ -1784,21 +1782,18 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
return;
cs_update_vt_ctx(b) {
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_GLOBAL_ATTRIBUTE_OFFSET), 0);
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INDEX_COUNT),
draw->vertex.count);
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_COUNT),
cs_move32_to(b, cs_sr_reg32(b, IDVS, GLOBAL_ATTRIBUTE_OFFSET), 0);
cs_move32_to(b, cs_sr_reg32(b, IDVS, INDEX_COUNT), draw->vertex.count);
cs_move32_to(b, cs_sr_reg32(b, IDVS, INSTANCE_COUNT),
draw->instance.count);
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INDEX_OFFSET),
draw->index.offset);
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_VERTEX_OFFSET),
draw->vertex.base);
cs_move32_to(b, cs_sr_reg32(b, IDVS, INDEX_OFFSET), draw->index.offset);
cs_move32_to(b, cs_sr_reg32(b, IDVS, VERTEX_OFFSET), draw->vertex.base);
/* NIR expects zero-based instance ID, but even if it did have an
* intrinsic to load the absolute instance ID, we'd want to keep it
* zero-based to work around Mali's limitation on non-zero firstInstance
* when a instance divisor is used.
*/
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), 0);
cs_move32_to(b, cs_sr_reg32(b, IDVS, INSTANCE_OFFSET), 0);
}
struct mali_primitive_flags_packed flags_override =
@ -1810,7 +1805,7 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
cs_req_res(b, CS_IDVS_RES);
if (idvs_count > 1) {
struct cs_index counter_reg = cs_scratch_reg32(b, 17);
struct cs_index tiler_ctx_addr = cs_sr_reg64(b, MALI_IDVS_SR_TILER_CTX);
struct cs_index tiler_ctx_addr = cs_sr_reg64(b, IDVS, TILER_CTX);
cs_move32_to(b, counter_reg, idvs_count);
@ -1959,10 +1954,10 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
cs_move64_to(b, draw_params_addr, draw->indirect.buffer_dev_addr);
cs_update_vt_ctx(b) {
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_GLOBAL_ATTRIBUTE_OFFSET), 0);
cs_move32_to(b, cs_sr_reg32(b, IDVS, GLOBAL_ATTRIBUTE_OFFSET), 0);
/* Load SR33-37 from indirect buffer. */
unsigned reg_mask = draw->index.size ? 0b11111 : 0b11011;
cs_load_to(b, cs_sr_reg_tuple(b, MALI_IDVS_SR_INDEX_COUNT, 5),
cs_load_to(b, cs_sr_reg_tuple(b, IDVS, INDEX_COUNT, 5),
draw_params_addr, reg_mask, 0);
}
@ -1975,15 +1970,13 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
cs_move64_to(b, fau_block_addr, cmdbuf->state.gfx.vs.push_uniforms);
if (shader_uses_sysval(vs, graphics, vs.first_vertex)) {
cs_store32(b, cs_sr_reg32(b, MALI_IDVS_SR_VERTEX_OFFSET),
fau_block_addr,
cs_store32(b, cs_sr_reg32(b, IDVS, VERTEX_OFFSET), fau_block_addr,
shader_remapped_sysval_offset(
vs, sysval_offset(graphics, vs.first_vertex)));
}
if (shader_uses_sysval(vs, graphics, vs.base_instance)) {
cs_store32(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET),
fau_block_addr,
cs_store32(b, cs_sr_reg32(b, IDVS, INSTANCE_OFFSET), fau_block_addr,
shader_remapped_sysval_offset(
vs, sysval_offset(graphics, vs.base_instance)));
}
@ -1998,7 +1991,7 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
* Mali's limitation on non-zero firstInstance when a instance divisor is used.
*/
cs_update_vt_ctx(b)
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), 0);
cs_move32_to(b, cs_sr_reg32(b, IDVS, INSTANCE_OFFSET), 0);
struct mali_primitive_flags_packed flags_override =
get_tiler_flags_override(draw);
@ -2270,17 +2263,17 @@ setup_tiler_oom_ctx(struct panvk_cmd_buffer *cmdbuf)
TILER_OOM_CTX_FIELD_OFFSET(counter));
struct cs_index fbd_first = cs_scratch_reg64(b, 2);
cs_add64(b, fbd_first, cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER),
cs_add64(b, fbd_first, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
(1 + PANVK_IR_FIRST_PASS) * fbd_ir_pass_offset);
cs_store64(b, fbd_first, cs_subqueue_ctx_reg(b),
TILER_OOM_CTX_FBDPTR_OFFSET(FIRST));
struct cs_index fbd_middle = cs_scratch_reg64(b, 4);
cs_add64(b, fbd_middle, cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER),
cs_add64(b, fbd_middle, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
(1 + PANVK_IR_MIDDLE_PASS) * fbd_ir_pass_offset);
cs_store64(b, fbd_middle, cs_subqueue_ctx_reg(b),
TILER_OOM_CTX_FBDPTR_OFFSET(MIDDLE));
struct cs_index fbd_last = cs_scratch_reg64(b, 6);
cs_add64(b, fbd_last, cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER),
cs_add64(b, fbd_last, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
(1 + PANVK_IR_LAST_PASS) * fbd_ir_pass_offset);
cs_store64(b, fbd_last, cs_subqueue_ctx_reg(b),
TILER_OOM_CTX_FBDPTR_OFFSET(LAST));
@ -2314,9 +2307,9 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
/* Now initialize the fragment bits. */
cs_update_frag_ctx(b) {
cs_move32_to(b, cs_sr_reg32(b, 42),
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MIN),
(fbinfo->extent.miny << 16) | fbinfo->extent.minx);
cs_move32_to(b, cs_sr_reg32(b, 43),
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MAX),
(fbinfo->extent.maxy << 16) | fbinfo->extent.maxx);
}
@ -2384,8 +2377,8 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
cs_wait_slot(b, SB_ID(LS), false);
cs_if(b, MALI_CS_CONDITION_GREATER, counter)
cs_update_frag_ctx(b)
cs_add64(b, cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER),
cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER),
cs_add64(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
(1 + PANVK_IR_LAST_PASS) * fbd_ir_pass_offset);
/* Applications tend to forget to describe subpass dependencies, especially
@ -2402,7 +2395,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
cs_req_res(b, CS_FRAG_RES);
if (cmdbuf->state.gfx.render.layer_count > 1) {
struct cs_index layer_count = cs_sr_reg32(b, 47);
struct cs_index layer_count = cs_reg32(b, 47);
cs_move32_to(b, layer_count, calc_enabled_layer_count(cmdbuf));
cs_while(b, MALI_CS_CONDITION_GREATER, layer_count) {
@ -2411,8 +2404,8 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
cs_add32(b, layer_count, layer_count, -1);
cs_update_frag_ctx(b)
cs_add64(b, cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER),
cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER), fbd_sz);
cs_add64(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
cs_sr_reg64(b, FRAGMENT, FBD_POINTER), fbd_sz);
}
} else {
cs_trace_run_fragment(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
@ -2431,8 +2424,8 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
struct cs_index completed = cs_scratch_reg_tuple(b, 10, 4);
struct cs_index completed_top = cs_scratch_reg64(b, 10);
struct cs_index completed_bottom = cs_scratch_reg64(b, 12);
struct cs_index cur_tiler = cs_sr_reg64(b, 38);
struct cs_index tiler_count = cs_sr_reg32(b, 47);
struct cs_index cur_tiler = cs_reg64(b, 38);
struct cs_index tiler_count = cs_reg32(b, 47);
struct cs_index oq_chain = cs_scratch_reg64(b, 10);
struct cs_index oq_chain_lo = cs_scratch_reg32(b, 10);
struct cs_index oq_chain_hi = cs_scratch_reg32(b, 11);

View file

@ -103,21 +103,20 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
cs_update_compute_ctx(b) {
/* No resource table */
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_SRT_0), 0);
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SRT_0), 0);
uint64_t fau_count =
DIV_ROUND_UP(BIFROST_PRECOMPILED_KERNEL_SYSVALS_SIZE + data_size, 8);
uint64_t fau_ptr = push_uniforms.gpu | (fau_count << 56);
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_FAU_0), fau_ptr);
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, FAU_0), fau_ptr);
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_SPD_0),
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SPD_0),
panvk_priv_mem_dev_addr(shader->spd));
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_TSD_0), tsd);
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, TSD_0), tsd);
/* Global attribute offset */
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_GLOBAL_ATTRIBUTE_OFFSET),
0);
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, GLOBAL_ATTRIBUTE_OFFSET), 0);
struct mali_compute_size_workgroup_packed wg_size;
pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) {
@ -126,21 +125,17 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
cfg.workgroup_size_z = shader->local_size.z;
cfg.allow_merging_workgroups = false;
}
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_WG_SIZE),
wg_size.opaque[0]);
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, WG_SIZE), wg_size.opaque[0]);
/* Job offset */
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_X), 0);
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Y), 0);
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Z), 0);
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_X), 0);
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Y), 0);
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Z), 0);
/* Job size */
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_X),
grid.count[0]);
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Y),
grid.count[1]);
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Z),
grid.count[2]);
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X), grid.count[0]);
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Y), grid.count[1]);
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Z), grid.count[2]);
}
panvk_per_arch(cs_pick_iter_sb)(cmdbuf, PANVK_SUBQUEUE_COMPUTE);

View file

@ -65,8 +65,8 @@ generate_tiler_oom_handler(struct cs_buffer handler_mem, bool has_zs_ext,
struct cs_index layer_count = cs_scratch_reg32(&b, 7);
/* The tiler pointer is pre-filled. */
struct cs_index tiler_ptr = cs_sr_reg64(&b, 38);
struct cs_index fbd_ptr = cs_sr_reg64(&b, MALI_FRAGMENT_SR_FBD_POINTER);
struct cs_index tiler_ptr = cs_reg64(&b, 38);
struct cs_index fbd_ptr = cs_sr_reg64(&b, FRAGMENT, FBD_POINTER);
/* Use different framebuffer descriptor depending on whether incremental
* rendering has already been triggered */