mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-24 15:50:37 +02:00
pan/genxml: Define RUN_COMPUTE staging registers in an enum
This makes it more clear what is what. It will also reduce the pain of migration on newer gen. RUN_COMPUTE_INDIRECT also use the same SRs so we also map to RUN_COMPUTE there. Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Benjamin Lee <benjamin.lee@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33736>
This commit is contained in:
parent
11beea6242
commit
c8882d83fd
5 changed files with 104 additions and 59 deletions
|
|
@ -881,10 +881,10 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch,
|
|||
csf_emit_shader_regs(batch, PIPE_SHADER_COMPUTE,
|
||||
batch->rsd[PIPE_SHADER_COMPUTE]);
|
||||
|
||||
cs_move64_to(b, cs_reg64(b, 24), batch->tls.gpu);
|
||||
cs_move64_to(b, cs_reg64(b, MALI_COMPUTE_SR_TSD_0), batch->tls.gpu);
|
||||
|
||||
/* Global attribute offset */
|
||||
cs_move32_to(b, cs_reg32(b, 32), 0);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_GLOBAL_ATTRIBUTE_OFFSET), 0);
|
||||
|
||||
/* Compute workgroup size */
|
||||
struct mali_compute_size_workgroup_packed wg_size;
|
||||
|
|
@ -903,11 +903,11 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch,
|
|||
(info->variable_shared_mem == 0);
|
||||
}
|
||||
|
||||
cs_move32_to(b, cs_reg32(b, 33), wg_size.opaque[0]);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_WG_SIZE), wg_size.opaque[0]);
|
||||
|
||||
/* Offset */
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
cs_move32_to(b, cs_reg32(b, 34 + i), 0);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_X), 0);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Y), 0);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Z), 0);
|
||||
|
||||
unsigned threads_per_wg = info->block[0] * info->block[1] * info->block[2];
|
||||
unsigned max_thread_cnt = panfrost_compute_max_thread_count(
|
||||
|
|
@ -920,7 +920,7 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch,
|
|||
b, address,
|
||||
pan_resource(info->indirect)->image.data.base + info->indirect_offset);
|
||||
|
||||
struct cs_index grid_xyz = cs_reg_tuple(b, 37, 3);
|
||||
struct cs_index grid_xyz = cs_reg_tuple(b, MALI_COMPUTE_SR_JOB_SIZE_X, 3);
|
||||
cs_load_to(b, grid_xyz, address, BITFIELD_MASK(3), 0);
|
||||
|
||||
/* Wait for the load */
|
||||
|
|
@ -942,8 +942,9 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch,
|
|||
false, cs_shader_res_sel(0, 0, 0, 0));
|
||||
} else {
|
||||
/* Set size in workgroups per dimension immediately */
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
cs_move32_to(b, cs_reg32(b, 37 + i), info->grid[i]);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_X), info->grid[0]);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Y), info->grid[1]);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Z), info->grid[2]);
|
||||
|
||||
/* Pick the task_axis and task_increment to maximize thread utilization. */
|
||||
unsigned task_axis = MALI_TASK_AXIS_X;
|
||||
|
|
@ -984,10 +985,11 @@ GENX(csf_launch_xfb)(struct panfrost_batch *batch,
|
|||
{
|
||||
struct cs_builder *b = batch->csf.cs.builder;
|
||||
|
||||
cs_move64_to(b, cs_reg64(b, 24), batch->tls.gpu);
|
||||
cs_move64_to(b, cs_reg64(b, MALI_COMPUTE_SR_TSD_0), batch->tls.gpu);
|
||||
|
||||
/* TODO: Indexing. Also, attribute_offset is a legacy feature.. */
|
||||
cs_move32_to(b, cs_reg32(b, 32), batch->ctx->offset_start);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_GLOBAL_ATTRIBUTE_OFFSET),
|
||||
batch->ctx->offset_start);
|
||||
|
||||
/* Compute workgroup size */
|
||||
struct mali_compute_size_workgroup_packed wg_size;
|
||||
|
|
@ -1001,15 +1003,16 @@ GENX(csf_launch_xfb)(struct panfrost_batch *batch,
|
|||
*/
|
||||
cfg.allow_merging_workgroups = true;
|
||||
}
|
||||
cs_move32_to(b, cs_reg32(b, 33), wg_size.opaque[0]);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_WG_SIZE), wg_size.opaque[0]);
|
||||
|
||||
/* Offset */
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
cs_move32_to(b, cs_reg32(b, 34 + i), 0);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_X), 0);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Y), 0);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Z), 0);
|
||||
|
||||
cs_move32_to(b, cs_reg32(b, 37), count);
|
||||
cs_move32_to(b, cs_reg32(b, 38), info->instance_count);
|
||||
cs_move32_to(b, cs_reg32(b, 39), 1);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_X), count);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Y),
|
||||
info->instance_count);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Z), 1);
|
||||
|
||||
csf_emit_shader_regs(batch, PIPE_SHADER_VERTEX,
|
||||
batch->rsd[PIPE_SHADER_VERTEX]);
|
||||
|
|
|
|||
|
|
@ -320,18 +320,18 @@ GENX(panfrost_launch_precomp)(struct panfrost_batch *batch,
|
|||
struct cs_builder *b = batch->csf.cs.builder;
|
||||
|
||||
/* No resource table */
|
||||
cs_move64_to(b, cs_reg64(b, 0), 0);
|
||||
cs_move64_to(b, cs_reg64(b, MALI_COMPUTE_SR_SRT_0), 0);
|
||||
|
||||
uint64_t fau_count =
|
||||
DIV_ROUND_UP(BIFROST_PRECOMPILED_KERNEL_SYSVALS_SIZE + data_size, 8);
|
||||
uint64_t fau_ptr = push_uniforms.gpu | (fau_count << 56);
|
||||
cs_move64_to(b, cs_reg64(b, 8), fau_ptr);
|
||||
cs_move64_to(b, cs_reg64(b, MALI_COMPUTE_SR_FAU_0), fau_ptr);
|
||||
|
||||
cs_move64_to(b, cs_reg64(b, 16), shader->state_ptr);
|
||||
cs_move64_to(b, cs_reg64(b, 24), tsd);
|
||||
cs_move64_to(b, cs_reg64(b, MALI_COMPUTE_SR_SPD_0), shader->state_ptr);
|
||||
cs_move64_to(b, cs_reg64(b, MALI_COMPUTE_SR_TSD_0), tsd);
|
||||
|
||||
/* Global attribute offset */
|
||||
cs_move32_to(b, cs_reg32(b, 32), 0);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_GLOBAL_ATTRIBUTE_OFFSET), 0);
|
||||
|
||||
/* Compute workgroup size */
|
||||
struct mali_compute_size_workgroup_packed wg_size;
|
||||
|
|
@ -341,17 +341,17 @@ GENX(panfrost_launch_precomp)(struct panfrost_batch *batch,
|
|||
cfg.workgroup_size_z = shader->local_size.z;
|
||||
cfg.allow_merging_workgroups = false;
|
||||
}
|
||||
cs_move32_to(b, cs_reg32(b, 33), wg_size.opaque[0]);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_WG_SIZE), wg_size.opaque[0]);
|
||||
|
||||
/* Job offset */
|
||||
cs_move32_to(b, cs_reg32(b, 34), 0);
|
||||
cs_move32_to(b, cs_reg32(b, 35), 0);
|
||||
cs_move32_to(b, cs_reg32(b, 36), 0);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_X), 0);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Y), 0);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Z), 0);
|
||||
|
||||
/* Job size */
|
||||
cs_move32_to(b, cs_reg32(b, 37), grid.count[0]);
|
||||
cs_move32_to(b, cs_reg32(b, 38), grid.count[1]);
|
||||
cs_move32_to(b, cs_reg32(b, 39), grid.count[2]);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_X), grid.count[0]);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Y), grid.count[1]);
|
||||
cs_move32_to(b, cs_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Z), grid.count[2]);
|
||||
|
||||
unsigned threads_per_wg =
|
||||
shader->local_size.x * shader->local_size.y * shader->local_size.z;
|
||||
|
|
|
|||
|
|
@ -840,6 +840,33 @@
|
|||
<field name="Opcode" size="8" start="56" type="CS Opcode" default="SYNC_WAIT64"/>
|
||||
</struct>
|
||||
|
||||
<enum name="COMPUTE SR">
|
||||
<value name="SRT_0" value="0"/>
|
||||
<value name="SRT_1" value="2"/>
|
||||
<value name="SRT_2" value="4"/>
|
||||
<value name="SRT_3" value="6"/>
|
||||
<value name="FAU_0" value="8"/>
|
||||
<value name="FAU_1" value="10"/>
|
||||
<value name="FAU_2" value="12"/>
|
||||
<value name="FAU_3" value="14"/>
|
||||
<value name="SPD_0" value="16"/>
|
||||
<value name="SPD_1" value="18"/>
|
||||
<value name="SPD_2" value="20"/>
|
||||
<value name="SPD_3" value="22"/>
|
||||
<value name="TSD_0" value="24"/>
|
||||
<value name="TSD_1" value="26"/>
|
||||
<value name="TSD_2" value="28"/>
|
||||
<value name="TSD_3" value="30"/>
|
||||
<value name="GLOBAL_ATTRIBUTE_OFFSET" value="32"/>
|
||||
<value name="WG_SIZE" value="33"/>
|
||||
<value name="JOB_OFFSET_X" value="34"/>
|
||||
<value name="JOB_OFFSET_Y" value="35"/>
|
||||
<value name="JOB_OFFSET_Z" value="36"/>
|
||||
<value name="JOB_SIZE_X" value="37"/>
|
||||
<value name="JOB_SIZE_Y" value="38"/>
|
||||
<value name="JOB_SIZE_Z" value="39"/>
|
||||
</enum>
|
||||
|
||||
<enum name="IDVS SR">
|
||||
<value name="SRT_0" value="0"/>
|
||||
<value name="SRT_1" value="2"/>
|
||||
|
|
|
|||
|
|
@ -236,22 +236,24 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
cs_update_compute_ctx(b) {
|
||||
if (compute_state_dirty(cmdbuf, CS) ||
|
||||
compute_state_dirty(cmdbuf, DESC_STATE))
|
||||
cs_move64_to(b, cs_sr_reg64(b, 0), cs_desc_state->res_table);
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_SRT_0),
|
||||
cs_desc_state->res_table);
|
||||
|
||||
if (compute_state_dirty(cmdbuf, PUSH_UNIFORMS)) {
|
||||
uint64_t fau_ptr = cmdbuf->state.compute.push_uniforms |
|
||||
((uint64_t)shader->fau.total_count << 56);
|
||||
cs_move64_to(b, cs_sr_reg64(b, 8), fau_ptr);
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_FAU_0), fau_ptr);
|
||||
}
|
||||
|
||||
if (compute_state_dirty(cmdbuf, CS))
|
||||
cs_move64_to(b, cs_sr_reg64(b, 16),
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_SPD_0),
|
||||
panvk_priv_mem_dev_addr(shader->spd));
|
||||
|
||||
cs_move64_to(b, cs_sr_reg64(b, 24), tsd);
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_TSD_0), tsd);
|
||||
|
||||
/* Global attribute offset */
|
||||
cs_move32_to(b, cs_sr_reg32(b, 32), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_GLOBAL_ATTRIBUTE_OFFSET),
|
||||
0);
|
||||
|
||||
struct mali_compute_size_workgroup_packed wg_size;
|
||||
pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) {
|
||||
|
|
@ -260,47 +262,54 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
cfg.workgroup_size_z = shader->local_size.z;
|
||||
cfg.allow_merging_workgroups = false;
|
||||
}
|
||||
cs_move32_to(b, cs_sr_reg32(b, 33), wg_size.opaque[0]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 34),
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_WG_SIZE),
|
||||
wg_size.opaque[0]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_X),
|
||||
info->wg_base.x * shader->local_size.x);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 35),
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Y),
|
||||
info->wg_base.y * shader->local_size.y);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 36),
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Z),
|
||||
info->wg_base.z * shader->local_size.z);
|
||||
if (indirect) {
|
||||
/* Load parameters from indirect buffer and update workgroup count
|
||||
* registers and sysvals */
|
||||
cs_move64_to(b, cs_scratch_reg64(b, 0),
|
||||
info->indirect.buffer_dev_addr);
|
||||
cs_load_to(b, cs_sr_reg_tuple(b, 37, 3), cs_scratch_reg64(b, 0),
|
||||
BITFIELD_MASK(3), 0);
|
||||
cs_load_to(b, cs_sr_reg_tuple(b, MALI_COMPUTE_SR_JOB_SIZE_X, 3),
|
||||
cs_scratch_reg64(b, 0), BITFIELD_MASK(3), 0);
|
||||
cs_move64_to(b, cs_scratch_reg64(b, 0),
|
||||
cmdbuf->state.compute.push_uniforms);
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
|
||||
if (shader_uses_sysval(shader, compute, num_work_groups.x)) {
|
||||
cs_store32(b, cs_sr_reg32(b, 37), cs_scratch_reg64(b, 0),
|
||||
cs_store32(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_X),
|
||||
cs_scratch_reg64(b, 0),
|
||||
shader_remapped_sysval_offset(
|
||||
shader, sysval_offset(compute, num_work_groups.x)));
|
||||
}
|
||||
|
||||
if (shader_uses_sysval(shader, compute, num_work_groups.y)) {
|
||||
cs_store32(b, cs_sr_reg32(b, 38), cs_scratch_reg64(b, 0),
|
||||
cs_store32(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Y),
|
||||
cs_scratch_reg64(b, 0),
|
||||
shader_remapped_sysval_offset(
|
||||
shader, sysval_offset(compute, num_work_groups.y)));
|
||||
}
|
||||
|
||||
if (shader_uses_sysval(shader, compute, num_work_groups.z)) {
|
||||
cs_store32(b, cs_sr_reg32(b, 39), cs_scratch_reg64(b, 0),
|
||||
cs_store32(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Z),
|
||||
cs_scratch_reg64(b, 0),
|
||||
shader_remapped_sysval_offset(
|
||||
shader, sysval_offset(compute, num_work_groups.z)));
|
||||
}
|
||||
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
} else {
|
||||
cs_move32_to(b, cs_sr_reg32(b, 37), info->direct.wg_count.x);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 38), info->direct.wg_count.y);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 39), info->direct.wg_count.z);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_X),
|
||||
info->direct.wg_count.x);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Y),
|
||||
info->direct.wg_count.y);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Z),
|
||||
info->direct.wg_count.z);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -103,19 +103,21 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
|
|||
|
||||
cs_update_compute_ctx(b) {
|
||||
/* No resource table */
|
||||
cs_move64_to(b, cs_sr_reg64(b, 0), 0);
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_SRT_0), 0);
|
||||
|
||||
uint64_t fau_count =
|
||||
DIV_ROUND_UP(BIFROST_PRECOMPILED_KERNEL_SYSVALS_SIZE + data_size, 8);
|
||||
uint64_t fau_ptr = push_uniforms.gpu | (fau_count << 56);
|
||||
cs_move64_to(b, cs_sr_reg64(b, 8), fau_ptr);
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_FAU_0), fau_ptr);
|
||||
|
||||
cs_move64_to(b, cs_sr_reg64(b, 16), panvk_priv_mem_dev_addr(shader->spd));
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_SPD_0),
|
||||
panvk_priv_mem_dev_addr(shader->spd));
|
||||
|
||||
cs_move64_to(b, cs_sr_reg64(b, 24), tsd);
|
||||
cs_move64_to(b, cs_sr_reg64(b, MALI_COMPUTE_SR_TSD_0), tsd);
|
||||
|
||||
/* Global attribute offset */
|
||||
cs_move32_to(b, cs_sr_reg32(b, 32), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_GLOBAL_ATTRIBUTE_OFFSET),
|
||||
0);
|
||||
|
||||
struct mali_compute_size_workgroup_packed wg_size;
|
||||
pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) {
|
||||
|
|
@ -124,17 +126,21 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
|
|||
cfg.workgroup_size_z = shader->local_size.z;
|
||||
cfg.allow_merging_workgroups = false;
|
||||
}
|
||||
cs_move32_to(b, cs_sr_reg32(b, 33), wg_size.opaque[0]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_WG_SIZE),
|
||||
wg_size.opaque[0]);
|
||||
|
||||
/* Job offset */
|
||||
cs_move32_to(b, cs_sr_reg32(b, 34), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 35), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 36), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_X), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Y), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_OFFSET_Z), 0);
|
||||
|
||||
/* Job size */
|
||||
cs_move32_to(b, cs_sr_reg32(b, 37), grid.count[0]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 38), grid.count[1]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 39), grid.count[2]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_X),
|
||||
grid.count[0]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Y),
|
||||
grid.count[1]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, MALI_COMPUTE_SR_JOB_SIZE_Z),
|
||||
grid.count[2]);
|
||||
}
|
||||
|
||||
panvk_per_arch(cs_pick_iter_sb)(cmdbuf, PANVK_SUBQUEUE_COMPUTE);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue