mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-08 08:30:10 +01:00
tu: Emit CP_SET_AMBLE packets
Make sure skipsaverestore works. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30544>
This commit is contained in:
parent
db86c4c496
commit
700e26a448
4 changed files with 107 additions and 11 deletions
|
|
@ -1430,6 +1430,76 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs)
|
|||
}
|
||||
}
|
||||
|
||||
/* Set always-identical registers used specifically for GMEM */
|
||||
static void
|
||||
tu7_emit_tile_render_begin_regs(struct tu_cs *cs)
|
||||
{
|
||||
tu_cs_emit_regs(cs,
|
||||
A7XX_RB_UNKNOWN_8812(0x0));
|
||||
tu_cs_emit_regs(cs,
|
||||
A7XX_RB_UNKNOWN_8E06(0x0));
|
||||
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_UNKNOWN_8007(0x0));
|
||||
|
||||
tu_cs_emit_regs(cs, A6XX_GRAS_UNKNOWN_8110(0x2));
|
||||
tu_cs_emit_regs(cs, A7XX_RB_UNKNOWN_8E09(0x4));
|
||||
|
||||
tu_cs_emit_regs(cs, A7XX_RB_BLIT_CLEAR_MODE(.clear_mode = CLEAR_MODE_GMEM));
|
||||
}
|
||||
|
||||
/* Emit the bin restore preamble, which runs in between bins when L1
|
||||
* preemption with skipsaverestore happens and we switch back to this context.
|
||||
* We need to restore static registers normally programmed at cmdbuf start
|
||||
* which weren't saved, and we need to program the CCU state which is normally
|
||||
* programmed before rendering the bins and isn't saved/restored by the CP
|
||||
* because it is always the same for GMEM render passes.
|
||||
*/
|
||||
template <chip CHIP>
|
||||
static void
|
||||
tu_emit_bin_preamble(struct tu_device *dev, struct tu_cs *cs)
|
||||
{
|
||||
struct tu_physical_device *phys_dev = dev->physical_device;
|
||||
|
||||
tu6_init_static_regs<CHIP>(dev, cs);
|
||||
emit_rb_ccu_cntl<CHIP>(cs, dev, true);
|
||||
|
||||
if (CHIP == A6XX) {
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_PC_POWER_CNTL(phys_dev->info->a6xx.magic.PC_POWER_CNTL));
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_VFD_POWER_CNTL(phys_dev->info->a6xx.magic.PC_POWER_CNTL));
|
||||
}
|
||||
|
||||
if (CHIP == A7XX) {
|
||||
tu7_emit_tile_render_begin_regs(cs);
|
||||
}
|
||||
|
||||
/* TODO use CP_MEM_TO_SCRATCH_MEM on a7xx. The VSC scratch mem should be
|
||||
* automatically saved, unlike GPU registers, so we wouldn't have to
|
||||
* manually restore this state.
|
||||
*/
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3);
|
||||
tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(REG_A6XX_VSC_STATE(0)) |
|
||||
CP_MEM_TO_REG_0_CNT(32));
|
||||
tu_cs_emit_qw(cs, dev->global_bo->iova + gb_offset(vsc_state));
|
||||
}
|
||||
|
||||
VkResult
|
||||
tu_init_bin_preamble(struct tu_device *device)
|
||||
{
|
||||
struct tu_cs preamble_cs;
|
||||
VkResult result = tu_cs_begin_sub_stream(&device->sub_cs, 256, &preamble_cs);
|
||||
if (result != VK_SUCCESS)
|
||||
return vk_startup_errorf(device->instance, result, "bin restore");
|
||||
|
||||
TU_CALLX(device, tu_emit_bin_preamble)(device, &preamble_cs);
|
||||
|
||||
device->bin_preamble_entry = tu_cs_end_sub_stream(&device->sub_cs, &preamble_cs);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
|
|
@ -1490,6 +1560,21 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
tu_cs_emit_ib(cs, &dev->cmdbuf_start_a725_quirk_entry);
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_AMBLE, 3);
|
||||
tu_cs_emit_qw(cs, cmd->device->bin_preamble_entry.bo->iova +
|
||||
cmd->device->bin_preamble_entry.offset);
|
||||
tu_cs_emit(cs, CP_SET_AMBLE_2_DWORDS(cmd->device->bin_preamble_entry.size /
|
||||
sizeof(uint32_t)) |
|
||||
CP_SET_AMBLE_2_TYPE(BIN_PREAMBLE_AMBLE_TYPE));
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_AMBLE, 3);
|
||||
tu_cs_emit_qw(cs, 0);
|
||||
tu_cs_emit(cs, CP_SET_AMBLE_2_TYPE(PREAMBLE_AMBLE_TYPE));
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_AMBLE, 3);
|
||||
tu_cs_emit_qw(cs, 0);
|
||||
tu_cs_emit(cs, CP_SET_AMBLE_2_TYPE(POSTAMBLE_AMBLE_TYPE));
|
||||
|
||||
tu_cs_sanity_check(cs);
|
||||
}
|
||||
|
||||
|
|
@ -2013,17 +2098,7 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
tu_cs_emit(cs, 0x0);
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
tu_cs_emit_regs(cs,
|
||||
A7XX_RB_UNKNOWN_8812(0x0));
|
||||
tu_cs_emit_regs(cs,
|
||||
A7XX_RB_UNKNOWN_8E06(0x0));
|
||||
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_UNKNOWN_8007(0x0));
|
||||
|
||||
tu_cs_emit_regs(cs, A6XX_GRAS_UNKNOWN_8110(0x2));
|
||||
tu_cs_emit_regs(cs, A7XX_RB_UNKNOWN_8E09(0x4));
|
||||
|
||||
tu_cs_emit_regs(cs, A7XX_RB_BLIT_CLEAR_MODE(.clear_mode = CLEAR_MODE_GMEM));
|
||||
tu7_emit_tile_render_begin_regs(cs);
|
||||
}
|
||||
|
||||
tu_emit_cache_flush_ccu<CHIP>(cmd, cs, TU_CMD_CCU_GMEM);
|
||||
|
|
@ -2071,6 +2146,16 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
}
|
||||
}
|
||||
|
||||
if (tiling->binning_possible) {
|
||||
/* Upload state regs to memory to be restored on skipsaverestore
|
||||
* preemption.
|
||||
*/
|
||||
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
|
||||
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_VSC_STATE_REG(0)) |
|
||||
CP_REG_TO_MEM_0_CNT(32));
|
||||
tu_cs_emit_qw(cs, global_iova(cmd, vsc_state));
|
||||
}
|
||||
|
||||
tu_autotune_begin_renderpass<CHIP>(cmd, cs, autotune_result);
|
||||
|
||||
tu_cs_sanity_check(cs);
|
||||
|
|
|
|||
|
|
@ -771,4 +771,6 @@ _tu_create_fdm_bin_patchpoint(struct tu_cmd_buffer *cmd,
|
|||
#define tu_create_fdm_bin_patchpoint(cmd, cs, size, apply, state) \
|
||||
_tu_create_fdm_bin_patchpoint(cmd, cs, size, apply, &state, sizeof(state))
|
||||
|
||||
VkResult tu_init_bin_preamble(struct tu_device *device);
|
||||
|
||||
#endif /* TU_CMD_BUFFER_H */
|
||||
|
|
|
|||
|
|
@ -2503,6 +2503,10 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
}
|
||||
}
|
||||
|
||||
result = tu_init_bin_preamble(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_bin_preamble;
|
||||
|
||||
if (physical_device->info->a7xx.cmdbuf_start_a725_quirk) {
|
||||
result = tu_init_cmdbuf_start_a725_quirk(device);
|
||||
if (result != VK_SUCCESS)
|
||||
|
|
@ -2596,6 +2600,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
|
||||
fail_timeline_cond:
|
||||
fail_a725_workaround:
|
||||
fail_bin_preamble:
|
||||
fail_prepare_perfcntrs_pass_cs:
|
||||
free(device->perfcntrs_pass_cs_entries);
|
||||
fail_perfcntrs_pass_entries_alloc:
|
||||
|
|
|
|||
|
|
@ -235,6 +235,8 @@ struct tu6_global
|
|||
|
||||
alignas(16) uint32_t cs_indirect_xyz[12];
|
||||
|
||||
uint32_t vsc_state[32];
|
||||
|
||||
volatile uint32_t vtx_stats_query_not_running;
|
||||
|
||||
/* To know when renderpass stats for autotune are valid */
|
||||
|
|
@ -391,6 +393,8 @@ struct tu_device
|
|||
|
||||
struct tu_cs_entry cmdbuf_start_a725_quirk_entry;
|
||||
|
||||
struct tu_cs_entry bin_preamble_entry;
|
||||
|
||||
struct util_dynarray dynamic_rendering_pending;
|
||||
VkCommandPool dynamic_rendering_pool;
|
||||
uint32_t dynamic_rendering_fence;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue