mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-08 08:30:10 +01:00
panfrost: Rework cs_sr_regXX to be a macro
This move cs_sr_regXX in cs_builder.h and make usage less verbose. Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Benjamin Lee <benjamin.lee@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33736>
This commit is contained in:
parent
bbecaacc3f
commit
4c2e2eb445
6 changed files with 94 additions and 119 deletions
|
|
@ -429,6 +429,13 @@ cs_reg64(struct cs_builder *b, unsigned reg)
|
|||
return cs_reg_tuple(b, reg, 2);
|
||||
}
|
||||
|
||||
#define cs_sr_reg_tuple(__b, __cmd, __name, __size) \
|
||||
cs_reg_tuple((__b), MALI_##__cmd##_SR_##__name, (__size))
|
||||
#define cs_sr_reg32(__b, __cmd, __name) \
|
||||
cs_reg32((__b), MALI_##__cmd##_SR_##__name)
|
||||
#define cs_sr_reg64(__b, __cmd, __name) \
|
||||
cs_reg64((__b), MALI_##__cmd##_SR_##__name)
|
||||
|
||||
/*
|
||||
* The top of the register file is reserved for cs_builder internal use. We
|
||||
* need 3 spare registers for handling command queue overflow. These are
|
||||
|
|
|
|||
|
|
@ -210,26 +210,6 @@ cs_scratch_reg64(struct cs_builder *b, unsigned reg)
|
|||
return cs_scratch_reg_tuple(b, reg, 2);
|
||||
}
|
||||
|
||||
static inline struct cs_index
|
||||
cs_sr_reg_tuple(struct cs_builder *b, unsigned start, unsigned count)
|
||||
{
|
||||
assert(start + count - 1 < PANVK_CS_REG_SCRATCH_START);
|
||||
return cs_reg_tuple(b, start, count);
|
||||
}
|
||||
|
||||
static inline struct cs_index
|
||||
cs_sr_reg32(struct cs_builder *b, unsigned reg)
|
||||
{
|
||||
return cs_sr_reg_tuple(b, reg, 1);
|
||||
}
|
||||
|
||||
static inline struct cs_index
|
||||
cs_sr_reg64(struct cs_builder *b, unsigned reg)
|
||||
{
|
||||
assert(reg % 2 == 0);
|
||||
return cs_sr_reg_tuple(b, reg, 2);
|
||||
}
|
||||
|
||||
static inline struct cs_index
|
||||
cs_subqueue_ctx_reg(struct cs_builder *b)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -236,23 +236,23 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
cs_update_compute_ctx(b) {
|
||||
if (compute_state_dirty(cmdbuf, CS) ||
|
||||
compute_state_dirty(cmdbuf, DESC_STATE))
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_SRT_0),
|
||||
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SRT_0),
|
||||
cs_desc_state->res_table);
|
||||
|
||||
if (compute_state_dirty(cmdbuf, PUSH_UNIFORMS)) {
|
||||
uint64_t fau_ptr = cmdbuf->state.compute.push_uniforms |
|
||||
((uint64_t)shader->fau.total_count << 56);
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_FAU_0), fau_ptr);
|
||||
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, FAU_0), fau_ptr);
|
||||
}
|
||||
|
||||
if (compute_state_dirty(cmdbuf, CS))
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_SPD_0),
|
||||
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SPD_0),
|
||||
panvk_priv_mem_dev_addr(shader->spd));
|
||||
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_TSD_0), tsd);
|
||||
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, TSD_0), tsd);
|
||||
|
||||
/* Global attribute offset */
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_GLOBAL_ATTRIBUTE_OFFSET),
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, GLOBAL_ATTRIBUTE_OFFSET),
|
||||
0);
|
||||
|
||||
struct mali_compute_size_workgroup_packed wg_size;
|
||||
|
|
@ -262,41 +262,41 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
cfg.workgroup_size_z = shader->local_size.z;
|
||||
cfg.allow_merging_workgroups = false;
|
||||
}
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_WG_SIZE),
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, WG_SIZE),
|
||||
wg_size.opaque[0]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_X),
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_X),
|
||||
info->wg_base.x * shader->local_size.x);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Y),
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Y),
|
||||
info->wg_base.y * shader->local_size.y);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Z),
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Z),
|
||||
info->wg_base.z * shader->local_size.z);
|
||||
if (indirect) {
|
||||
/* Load parameters from indirect buffer and update workgroup count
|
||||
* registers and sysvals */
|
||||
cs_move64_to(b, cs_scratch_reg64(b, 0),
|
||||
info->indirect.buffer_dev_addr);
|
||||
cs_load_to(b, cs_sr_reg_tuple(b, MALI_COMPUTE_SR_JOB_SIZE_X, 3),
|
||||
cs_load_to(b, cs_sr_reg_tuple(b, COMPUTE, JOB_SIZE_X, 3),
|
||||
cs_scratch_reg64(b, 0), BITFIELD_MASK(3), 0);
|
||||
cs_move64_to(b, cs_scratch_reg64(b, 0),
|
||||
cmdbuf->state.compute.push_uniforms);
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
|
||||
if (shader_uses_sysval(shader, compute, num_work_groups.x)) {
|
||||
cs_store32(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_X),
|
||||
cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X),
|
||||
cs_scratch_reg64(b, 0),
|
||||
shader_remapped_sysval_offset(
|
||||
shader, sysval_offset(compute, num_work_groups.x)));
|
||||
}
|
||||
|
||||
if (shader_uses_sysval(shader, compute, num_work_groups.y)) {
|
||||
cs_store32(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Y),
|
||||
cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Y),
|
||||
cs_scratch_reg64(b, 0),
|
||||
shader_remapped_sysval_offset(
|
||||
shader, sysval_offset(compute, num_work_groups.y)));
|
||||
}
|
||||
|
||||
if (shader_uses_sysval(shader, compute, num_work_groups.z)) {
|
||||
cs_store32(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Z),
|
||||
cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Z),
|
||||
cs_scratch_reg64(b, 0),
|
||||
shader_remapped_sysval_offset(
|
||||
shader, sysval_offset(compute, num_work_groups.z)));
|
||||
|
|
@ -304,11 +304,11 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
} else {
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_X),
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X),
|
||||
info->direct.wg_count.x);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Y),
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Y),
|
||||
info->direct.wg_count.y);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Z),
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Z),
|
||||
info->direct.wg_count.z);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -403,7 +403,7 @@ update_tls(struct panvk_cmd_buffer *cmdbuf)
|
|||
cmdbuf->state.gfx.tsd = state->desc.gpu;
|
||||
|
||||
cs_update_vt_ctx(b)
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_TSD_0), state->desc.gpu);
|
||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, TSD_0), state->desc.gpu);
|
||||
}
|
||||
|
||||
state->info.tls.size =
|
||||
|
|
@ -462,8 +462,7 @@ prepare_blend(struct panvk_cmd_buffer *cmdbuf)
|
|||
panvk_per_arch(blend_emit_descs)(cmdbuf, bds);
|
||||
|
||||
cs_update_vt_ctx(b)
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_BLEND_DESC),
|
||||
ptr.gpu | bd_count);
|
||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, BLEND_DESC), ptr.gpu | bd_count);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
|
@ -511,7 +510,7 @@ prepare_vp(struct panvk_cmd_buffer *cmdbuf)
|
|||
}
|
||||
|
||||
struct mali_scissor_packed *scissor_box_ptr = &scissor_box;
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_SCISSOR_BOX),
|
||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, SCISSOR_BOX),
|
||||
*((uint64_t *)scissor_box_ptr));
|
||||
}
|
||||
|
||||
|
|
@ -522,9 +521,9 @@ prepare_vp(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
float z_min = sysvals->viewport.offset.z;
|
||||
float z_max = z_min + sysvals->viewport.scale.z;
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_LOW_DEPTH_CLAMP),
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, LOW_DEPTH_CLAMP),
|
||||
fui(MIN2(z_min, z_max)));
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_HIGH_DEPTH_CLAMP),
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, HIGH_DEPTH_CLAMP),
|
||||
fui(MAX2(z_min, z_max)));
|
||||
}
|
||||
}
|
||||
|
|
@ -579,7 +578,7 @@ prepare_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf)
|
|||
return;
|
||||
}
|
||||
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_PRIMITIVE_SIZE),
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, PRIMITIVE_SIZE),
|
||||
fui(primitive_size));
|
||||
}
|
||||
|
||||
|
|
@ -736,7 +735,7 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
|
|||
cmdbuf->state.gfx.render.tiler =
|
||||
simul_use ? 0xdeadbeefdeadbeefull : tiler_desc.gpu;
|
||||
|
||||
struct cs_index tiler_ctx_addr = cs_sr_reg64(b, MALI_IDVS_SR_TILER_CTX);
|
||||
struct cs_index tiler_ctx_addr = cs_sr_reg64(b, IDVS, TILER_CTX);
|
||||
|
||||
if (simul_use) {
|
||||
uint32_t descs_sz = calc_render_descs_size(cmdbuf);
|
||||
|
|
@ -1060,15 +1059,14 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
|
||||
|
||||
if (copy_fbds) {
|
||||
struct cs_index cur_tiler = cs_sr_reg64(b, 38);
|
||||
struct cs_index dst_fbd_ptr =
|
||||
cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER);
|
||||
struct cs_index layer_count = cs_sr_reg32(b, 47);
|
||||
struct cs_index src_fbd_ptr = cs_sr_reg64(b, 48);
|
||||
struct cs_index remaining_layers_in_td = cs_sr_reg32(b, 50);
|
||||
struct cs_index pass_count = cs_sr_reg32(b, 51);
|
||||
struct cs_index pass_src_fbd_ptr = cs_sr_reg64(b, 52);
|
||||
struct cs_index pass_dst_fbd_ptr = cs_sr_reg64(b, 54);
|
||||
struct cs_index cur_tiler = cs_reg64(b, 38);
|
||||
struct cs_index dst_fbd_ptr = cs_sr_reg64(b, FRAGMENT, FBD_POINTER);
|
||||
struct cs_index layer_count = cs_reg32(b, 47);
|
||||
struct cs_index src_fbd_ptr = cs_reg64(b, 48);
|
||||
struct cs_index remaining_layers_in_td = cs_reg32(b, 50);
|
||||
struct cs_index pass_count = cs_reg32(b, 51);
|
||||
struct cs_index pass_src_fbd_ptr = cs_reg64(b, 52);
|
||||
struct cs_index pass_dst_fbd_ptr = cs_reg64(b, 54);
|
||||
uint32_t td_count = DIV_ROUND_UP(cmdbuf->state.gfx.render.layer_count,
|
||||
MAX_LAYERS_PER_TILER_DESC);
|
||||
|
||||
|
|
@ -1145,9 +1143,9 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
}
|
||||
} else {
|
||||
cs_update_frag_ctx(b) {
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER),
|
||||
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
|
||||
fbds.gpu | fbd_flags);
|
||||
cs_move64_to(b, cs_sr_reg64(b, 38), cmdbuf->state.gfx.render.tiler);
|
||||
cs_move64_to(b, cs_reg64(b, 38), cmdbuf->state.gfx.render.tiler);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1207,16 +1205,16 @@ prepare_vs(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
cs_update_vt_ctx(b) {
|
||||
if (upd_res_table)
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_SRT),
|
||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_SRT),
|
||||
vs_desc_state->res_table);
|
||||
|
||||
if (gfx_state_dirty(cmdbuf, VS) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_TOPOLOGY))
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_POS_SPD),
|
||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_POS_SPD),
|
||||
get_pos_spd(cmdbuf));
|
||||
|
||||
if (gfx_state_dirty(cmdbuf, VS))
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_VARY_SPD),
|
||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_VARY_SPD),
|
||||
panvk_priv_mem_dev_addr(vs->spds.var));
|
||||
}
|
||||
|
||||
|
|
@ -1246,10 +1244,10 @@ prepare_fs(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
cs_update_vt_ctx(b) {
|
||||
if (fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, DESC_STATE))
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_FRAGMENT_SRT),
|
||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, FRAGMENT_SRT),
|
||||
fs ? fs_desc_state->res_table : 0);
|
||||
if (fs_user_dirty(cmdbuf))
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_FRAGMENT_SPD),
|
||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, FRAGMENT_SPD),
|
||||
fs ? panvk_priv_mem_dev_addr(fs->spd) : 0);
|
||||
}
|
||||
|
||||
|
|
@ -1271,7 +1269,7 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf)
|
|||
return result;
|
||||
|
||||
cs_update_vt_ctx(b) {
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_VERTEX_FAU),
|
||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_FAU),
|
||||
cmdbuf->state.gfx.vs.push_uniforms |
|
||||
((uint64_t)vs->fau.total_count << 56));
|
||||
}
|
||||
|
|
@ -1290,7 +1288,7 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf)
|
|||
}
|
||||
|
||||
cs_update_vt_ctx(b)
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_FRAGMENT_FAU), fau_ptr);
|
||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, FRAGMENT_FAU), fau_ptr);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
|
@ -1372,7 +1370,7 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf)
|
|||
}
|
||||
|
||||
cs_update_vt_ctx(b)
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_ZSD), zsd.gpu);
|
||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, ZSD), zsd.gpu);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
|
@ -1449,7 +1447,7 @@ prepare_oq(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
struct cs_builder *b =
|
||||
panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_OQ),
|
||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, OQ),
|
||||
cmdbuf->state.gfx.occlusion_query.ptr);
|
||||
|
||||
cmdbuf->state.gfx.render.oq.last =
|
||||
|
|
@ -1542,7 +1540,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf)
|
|||
}
|
||||
|
||||
cs_update_vt_ctx(b)
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_DCD0), dcd0.opaque[0]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, DCD0), dcd0.opaque[0]);
|
||||
}
|
||||
|
||||
if (dcd1_dirty) {
|
||||
|
|
@ -1560,7 +1558,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf)
|
|||
}
|
||||
|
||||
cs_update_vt_ctx(b)
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_DCD1), dcd1.opaque[0]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, DCD1), dcd1.opaque[0]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1576,9 +1574,9 @@ prepare_index_buffer(struct panvk_cmd_buffer *cmdbuf,
|
|||
panvk_buffer_range(cmdbuf->state.gfx.ib.buffer,
|
||||
cmdbuf->state.gfx.ib.offset, VK_WHOLE_SIZE);
|
||||
assert(ib_size <= UINT32_MAX);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INDEX_BUFFER_SIZE), ib_size);
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, INDEX_BUFFER_SIZE), ib_size);
|
||||
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_IDVS_SR_INDEX_BUFFER),
|
||||
cs_move64_to(b, cs_sr_reg64(b, IDVS, INDEX_BUFFER),
|
||||
panvk_buffer_gpu_ptr(cmdbuf->state.gfx.ib.buffer,
|
||||
cmdbuf->state.gfx.ib.offset));
|
||||
}
|
||||
|
|
@ -1638,7 +1636,7 @@ set_tiler_idvs_flags(struct cs_builder *b, struct panvk_cmd_buffer *cmdbuf,
|
|||
cfg.view_mask = cmdbuf->state.gfx.render.view_mask;
|
||||
}
|
||||
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_TILER_FLAGS),
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, TILER_FLAGS),
|
||||
tiler_idvs_flags.opaque[0]);
|
||||
}
|
||||
}
|
||||
|
|
@ -1719,13 +1717,13 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
|||
|
||||
cs_update_vt_ctx(b) {
|
||||
/* We don't use the resource dep system yet. */
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_DCD2), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, DCD2), 0);
|
||||
|
||||
prepare_index_buffer(cmdbuf, draw);
|
||||
|
||||
set_tiler_idvs_flags(b, cmdbuf, draw);
|
||||
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_VARY_SIZE), varying_size);
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, VARY_SIZE), varying_size);
|
||||
|
||||
result = prepare_ds(cmdbuf);
|
||||
if (result != VK_SUCCESS)
|
||||
|
|
@ -1784,21 +1782,18 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
|||
return;
|
||||
|
||||
cs_update_vt_ctx(b) {
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_GLOBAL_ATTRIBUTE_OFFSET), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INDEX_COUNT),
|
||||
draw->vertex.count);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_COUNT),
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, GLOBAL_ATTRIBUTE_OFFSET), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, INDEX_COUNT), draw->vertex.count);
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, INSTANCE_COUNT),
|
||||
draw->instance.count);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INDEX_OFFSET),
|
||||
draw->index.offset);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_VERTEX_OFFSET),
|
||||
draw->vertex.base);
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, INDEX_OFFSET), draw->index.offset);
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, VERTEX_OFFSET), draw->vertex.base);
|
||||
/* NIR expects zero-based instance ID, but even if it did have an
|
||||
* intrinsic to load the absolute instance ID, we'd want to keep it
|
||||
* zero-based to work around Mali's limitation on non-zero firstInstance
|
||||
* when a instance divisor is used.
|
||||
*/
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, INSTANCE_OFFSET), 0);
|
||||
}
|
||||
|
||||
struct mali_primitive_flags_packed flags_override =
|
||||
|
|
@ -1810,7 +1805,7 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
|||
cs_req_res(b, CS_IDVS_RES);
|
||||
if (idvs_count > 1) {
|
||||
struct cs_index counter_reg = cs_scratch_reg32(b, 17);
|
||||
struct cs_index tiler_ctx_addr = cs_sr_reg64(b, MALI_IDVS_SR_TILER_CTX);
|
||||
struct cs_index tiler_ctx_addr = cs_sr_reg64(b, IDVS, TILER_CTX);
|
||||
|
||||
cs_move32_to(b, counter_reg, idvs_count);
|
||||
|
||||
|
|
@ -1959,10 +1954,10 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
cs_move64_to(b, draw_params_addr, draw->indirect.buffer_dev_addr);
|
||||
|
||||
cs_update_vt_ctx(b) {
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_GLOBAL_ATTRIBUTE_OFFSET), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, GLOBAL_ATTRIBUTE_OFFSET), 0);
|
||||
/* Load SR33-37 from indirect buffer. */
|
||||
unsigned reg_mask = draw->index.size ? 0b11111 : 0b11011;
|
||||
cs_load_to(b, cs_sr_reg_tuple(b, MALI_IDVS_SR_INDEX_COUNT, 5),
|
||||
cs_load_to(b, cs_sr_reg_tuple(b, IDVS, INDEX_COUNT, 5),
|
||||
draw_params_addr, reg_mask, 0);
|
||||
}
|
||||
|
||||
|
|
@ -1975,15 +1970,13 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
cs_move64_to(b, fau_block_addr, cmdbuf->state.gfx.vs.push_uniforms);
|
||||
|
||||
if (shader_uses_sysval(vs, graphics, vs.first_vertex)) {
|
||||
cs_store32(b, cs_sr_reg32(b, MALI_IDVS_SR_VERTEX_OFFSET),
|
||||
fau_block_addr,
|
||||
cs_store32(b, cs_sr_reg32(b, IDVS, VERTEX_OFFSET), fau_block_addr,
|
||||
shader_remapped_sysval_offset(
|
||||
vs, sysval_offset(graphics, vs.first_vertex)));
|
||||
}
|
||||
|
||||
if (shader_uses_sysval(vs, graphics, vs.base_instance)) {
|
||||
cs_store32(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET),
|
||||
fau_block_addr,
|
||||
cs_store32(b, cs_sr_reg32(b, IDVS, INSTANCE_OFFSET), fau_block_addr,
|
||||
shader_remapped_sysval_offset(
|
||||
vs, sysval_offset(graphics, vs.base_instance)));
|
||||
}
|
||||
|
|
@ -1998,7 +1991,7 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
* Mali's limitation on non-zero firstInstance when a instance divisor is used.
|
||||
*/
|
||||
cs_update_vt_ctx(b)
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_IDVS_SR_INSTANCE_OFFSET), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, INSTANCE_OFFSET), 0);
|
||||
|
||||
struct mali_primitive_flags_packed flags_override =
|
||||
get_tiler_flags_override(draw);
|
||||
|
|
@ -2270,17 +2263,17 @@ setup_tiler_oom_ctx(struct panvk_cmd_buffer *cmdbuf)
|
|||
TILER_OOM_CTX_FIELD_OFFSET(counter));
|
||||
|
||||
struct cs_index fbd_first = cs_scratch_reg64(b, 2);
|
||||
cs_add64(b, fbd_first, cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER),
|
||||
cs_add64(b, fbd_first, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
|
||||
(1 + PANVK_IR_FIRST_PASS) * fbd_ir_pass_offset);
|
||||
cs_store64(b, fbd_first, cs_subqueue_ctx_reg(b),
|
||||
TILER_OOM_CTX_FBDPTR_OFFSET(FIRST));
|
||||
struct cs_index fbd_middle = cs_scratch_reg64(b, 4);
|
||||
cs_add64(b, fbd_middle, cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER),
|
||||
cs_add64(b, fbd_middle, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
|
||||
(1 + PANVK_IR_MIDDLE_PASS) * fbd_ir_pass_offset);
|
||||
cs_store64(b, fbd_middle, cs_subqueue_ctx_reg(b),
|
||||
TILER_OOM_CTX_FBDPTR_OFFSET(MIDDLE));
|
||||
struct cs_index fbd_last = cs_scratch_reg64(b, 6);
|
||||
cs_add64(b, fbd_last, cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER),
|
||||
cs_add64(b, fbd_last, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
|
||||
(1 + PANVK_IR_LAST_PASS) * fbd_ir_pass_offset);
|
||||
cs_store64(b, fbd_last, cs_subqueue_ctx_reg(b),
|
||||
TILER_OOM_CTX_FBDPTR_OFFSET(LAST));
|
||||
|
|
@ -2314,9 +2307,9 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
/* Now initialize the fragment bits. */
|
||||
cs_update_frag_ctx(b) {
|
||||
cs_move32_to(b, cs_sr_reg32(b, 42),
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MIN),
|
||||
(fbinfo->extent.miny << 16) | fbinfo->extent.minx);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 43),
|
||||
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, BBOX_MAX),
|
||||
(fbinfo->extent.maxy << 16) | fbinfo->extent.maxx);
|
||||
}
|
||||
|
||||
|
|
@ -2384,8 +2377,8 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_if(b, MALI_CS_CONDITION_GREATER, counter)
|
||||
cs_update_frag_ctx(b)
|
||||
cs_add64(b, cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER),
|
||||
cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER),
|
||||
cs_add64(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
|
||||
cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
|
||||
(1 + PANVK_IR_LAST_PASS) * fbd_ir_pass_offset);
|
||||
|
||||
/* Applications tend to forget to describe subpass dependencies, especially
|
||||
|
|
@ -2402,7 +2395,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
cs_req_res(b, CS_FRAG_RES);
|
||||
if (cmdbuf->state.gfx.render.layer_count > 1) {
|
||||
struct cs_index layer_count = cs_sr_reg32(b, 47);
|
||||
struct cs_index layer_count = cs_reg32(b, 47);
|
||||
|
||||
cs_move32_to(b, layer_count, calc_enabled_layer_count(cmdbuf));
|
||||
cs_while(b, MALI_CS_CONDITION_GREATER, layer_count) {
|
||||
|
|
@ -2411,8 +2404,8 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
cs_add32(b, layer_count, layer_count, -1);
|
||||
cs_update_frag_ctx(b)
|
||||
cs_add64(b, cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER),
|
||||
cs_sr_reg64(b, MALI_FRAGMENT_SR_FBD_POINTER), fbd_sz);
|
||||
cs_add64(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
|
||||
cs_sr_reg64(b, FRAGMENT, FBD_POINTER), fbd_sz);
|
||||
}
|
||||
} else {
|
||||
cs_trace_run_fragment(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||
|
|
@ -2431,8 +2424,8 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
struct cs_index completed = cs_scratch_reg_tuple(b, 10, 4);
|
||||
struct cs_index completed_top = cs_scratch_reg64(b, 10);
|
||||
struct cs_index completed_bottom = cs_scratch_reg64(b, 12);
|
||||
struct cs_index cur_tiler = cs_sr_reg64(b, 38);
|
||||
struct cs_index tiler_count = cs_sr_reg32(b, 47);
|
||||
struct cs_index cur_tiler = cs_reg64(b, 38);
|
||||
struct cs_index tiler_count = cs_reg32(b, 47);
|
||||
struct cs_index oq_chain = cs_scratch_reg64(b, 10);
|
||||
struct cs_index oq_chain_lo = cs_scratch_reg32(b, 10);
|
||||
struct cs_index oq_chain_hi = cs_scratch_reg32(b, 11);
|
||||
|
|
|
|||
|
|
@ -103,21 +103,20 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
|
|||
|
||||
cs_update_compute_ctx(b) {
|
||||
/* No resource table */
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_SRT_0), 0);
|
||||
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SRT_0), 0);
|
||||
|
||||
uint64_t fau_count =
|
||||
DIV_ROUND_UP(BIFROST_PRECOMPILED_KERNEL_SYSVALS_SIZE + data_size, 8);
|
||||
uint64_t fau_ptr = push_uniforms.gpu | (fau_count << 56);
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_FAU_0), fau_ptr);
|
||||
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, FAU_0), fau_ptr);
|
||||
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_SPD_0),
|
||||
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SPD_0),
|
||||
panvk_priv_mem_dev_addr(shader->spd));
|
||||
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_TSD_0), tsd);
|
||||
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, TSD_0), tsd);
|
||||
|
||||
/* Global attribute offset */
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_GLOBAL_ATTRIBUTE_OFFSET),
|
||||
0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, GLOBAL_ATTRIBUTE_OFFSET), 0);
|
||||
|
||||
struct mali_compute_size_workgroup_packed wg_size;
|
||||
pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) {
|
||||
|
|
@ -126,21 +125,17 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
|
|||
cfg.workgroup_size_z = shader->local_size.z;
|
||||
cfg.allow_merging_workgroups = false;
|
||||
}
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_WG_SIZE),
|
||||
wg_size.opaque[0]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, WG_SIZE), wg_size.opaque[0]);
|
||||
|
||||
/* Job offset */
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_X), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Y), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Z), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_X), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Y), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Z), 0);
|
||||
|
||||
/* Job size */
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_X),
|
||||
grid.count[0]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Y),
|
||||
grid.count[1]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Z),
|
||||
grid.count[2]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X), grid.count[0]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Y), grid.count[1]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Z), grid.count[2]);
|
||||
}
|
||||
|
||||
panvk_per_arch(cs_pick_iter_sb)(cmdbuf, PANVK_SUBQUEUE_COMPUTE);
|
||||
|
|
|
|||
|
|
@ -65,8 +65,8 @@ generate_tiler_oom_handler(struct cs_buffer handler_mem, bool has_zs_ext,
|
|||
struct cs_index layer_count = cs_scratch_reg32(&b, 7);
|
||||
|
||||
/* The tiler pointer is pre-filled. */
|
||||
struct cs_index tiler_ptr = cs_sr_reg64(&b, 38);
|
||||
struct cs_index fbd_ptr = cs_sr_reg64(&b, MALI_FRAGMENT_SR_FBD_POINTER);
|
||||
struct cs_index tiler_ptr = cs_reg64(&b, 38);
|
||||
struct cs_index fbd_ptr = cs_sr_reg64(&b, FRAGMENT, FBD_POINTER);
|
||||
|
||||
/* Use different framebuffer descriptor depending on whether incremental
|
||||
* rendering has already been triggered */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue