mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 08:30:10 +01:00
tu: Initialize registers for BV
Start introducing commands to setup BV. We need to run the initial register setup with both BR and BV enabled, and similarly we need to setup a bin preamble for BV. A few magic registers are BR-only and should be skipped when initializing BV. The VPC attribute carveout registers are a bit special because they must be initialized in BR and BV bin preambles, so they are pulled into a separate function. This commit also switches the "default" thread control from BR with concurrent binning disabled to BR with concurrent binning enabled. GMEM renderpasses now explicitly disable concurrent binning. This is necessary because switching from CB enabled to disabled and vice versa imposes a synchronization, and we want BV to be able to skip over compute dispatches, sysmem renderpasses, etc. to find the next binning pass. GMEM renderpasses re-enable concurrent binning at the end to keep the "default" thread control and avoid having to sprinkle THREAD_CONTROL(BR) around all the other Vulkan commands that can run outside of a renderpass (compute dispatch, blits, query pool operations, etc.). Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36590>
This commit is contained in:
parent
9ea3d7c393
commit
416dc87be9
2 changed files with 90 additions and 26 deletions
|
|
@ -354,6 +354,26 @@ tu_emit_cache_flush_renderpass(struct tu_cmd_buffer *cmd_buffer)
|
|||
}
|
||||
TU_GENX(tu_emit_cache_flush_renderpass);
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
emit_vpc_attr_buf(struct tu_cs *cs, struct tu_device *dev, bool gmem)
|
||||
{
|
||||
if (!dev->physical_device->info->a7xx.has_gmem_vpc_attr_buf)
|
||||
return;
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
VPC_ATTR_BUF_GMEM_SIZE(CHIP,
|
||||
gmem ? dev->physical_device->vpc_attr_buf_size_gmem
|
||||
: dev->physical_device->vpc_attr_buf_size_bypass),
|
||||
VPC_ATTR_BUF_GMEM_BASE(CHIP,
|
||||
gmem ? dev->physical_device->vpc_attr_buf_offset_gmem
|
||||
: dev->physical_device->vpc_attr_buf_offset_bypass), );
|
||||
tu_cs_emit_regs(cs,
|
||||
PC_ATTR_BUF_GMEM_SIZE(CHIP,
|
||||
gmem ? dev->physical_device->vpc_attr_buf_size_gmem
|
||||
: dev->physical_device->vpc_attr_buf_size_bypass), );
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
emit_rb_ccu_cntl(struct tu_cs *cs, struct tu_device *dev, bool gmem)
|
||||
|
|
@ -403,20 +423,6 @@ emit_rb_ccu_cntl(struct tu_cs *cs, struct tu_device *dev, bool gmem)
|
|||
.color_cache_size = color_cache_size,
|
||||
.color_offset = color_offset
|
||||
));
|
||||
|
||||
if (dev->physical_device->info->a7xx.has_gmem_vpc_attr_buf) {
|
||||
tu_cs_emit_regs(cs,
|
||||
VPC_ATTR_BUF_GMEM_SIZE(CHIP,
|
||||
gmem ? dev->physical_device->vpc_attr_buf_size_gmem
|
||||
: dev->physical_device->vpc_attr_buf_size_bypass),
|
||||
VPC_ATTR_BUF_GMEM_BASE(CHIP,
|
||||
gmem ? dev->physical_device->vpc_attr_buf_offset_gmem
|
||||
: dev->physical_device->vpc_attr_buf_offset_bypass), );
|
||||
tu_cs_emit_regs(cs,
|
||||
A7XX_PC_ATTR_BUF_GMEM_SIZE(
|
||||
gmem ? dev->physical_device->vpc_attr_buf_size_gmem
|
||||
: dev->physical_device->vpc_attr_buf_size_bypass), );
|
||||
}
|
||||
} else {
|
||||
tu_cs_emit_regs(cs, RB_CCU_CNTL(CHIP,
|
||||
.gmem_fast_clear_disable =
|
||||
|
|
@ -479,6 +485,14 @@ tu_emit_cache_flush_ccu(struct tu_cmd_buffer *cmd_buffer,
|
|||
if (ccu_state != cmd_buffer->state.ccu_state) {
|
||||
emit_rb_ccu_cntl<CHIP>(cs, cmd_buffer->device,
|
||||
ccu_state == TU_CMD_CCU_GMEM);
|
||||
if (cmd_buffer->device->physical_device->info->a7xx.has_gmem_vpc_attr_buf) {
|
||||
tu7_thread_control(cs, CP_SET_THREAD_BOTH);
|
||||
|
||||
emit_vpc_attr_buf<CHIP>(cs, cmd_buffer->device,
|
||||
ccu_state == TU_CMD_CCU_GMEM);
|
||||
|
||||
tu7_thread_control(cs, CP_SET_THREAD_BR);
|
||||
}
|
||||
cmd_buffer->state.ccu_state = ccu_state;
|
||||
}
|
||||
}
|
||||
|
|
@ -1793,8 +1807,6 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs)
|
|||
fd_gras_shading_rate_lut(1));
|
||||
}
|
||||
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_RB_DBG_ECO_CNTL,
|
||||
phys_dev->info->a6xx.magic.RB_DBG_ECO_CNTL);
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_SP_NC_MODE_CNTL_2, 0);
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_SP_DBG_ECO_CNTL,
|
||||
phys_dev->info->a6xx.magic.SP_DBG_ECO_CNTL);
|
||||
|
|
@ -1826,8 +1838,6 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs)
|
|||
phys_dev->info->a6xx.magic.UCHE_UNKNOWN_0E12);
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_CLIENT_PF,
|
||||
phys_dev->info->a6xx.magic.UCHE_CLIENT_PF);
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_RB_RBP_CNTL,
|
||||
phys_dev->info->a6xx.magic.RB_RBP_CNTL);
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A9A8, 0);
|
||||
tu_cs_emit_regs(cs, A6XX_SP_MODE_CNTL(.constant_demotion_enable = true,
|
||||
.isammode = ISAMMODE_GL,
|
||||
|
|
@ -1891,6 +1901,17 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs)
|
|||
A6XX_TPL1_CS_BORDER_COLOR_BASE(.bo = dev->global_bo,
|
||||
.bo_offset = gb_offset(bcolor)));
|
||||
|
||||
/* BR-only registers */
|
||||
if (CHIP >= A7XX)
|
||||
tu_cond_exec_start(cs, CP_COND_REG_EXEC_0_MODE(THREAD_MODE) |
|
||||
CP_COND_REG_EXEC_0_BR);
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_RB_DBG_ECO_CNTL,
|
||||
phys_dev->info->a6xx.magic.RB_DBG_ECO_CNTL);
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_RB_RBP_CNTL,
|
||||
phys_dev->info->a6xx.magic.RB_RBP_CNTL);
|
||||
if (CHIP >= A7XX)
|
||||
tu_cond_exec_end(cs);
|
||||
|
||||
if (CHIP == A7XX) {
|
||||
tu_cs_emit_regs(cs, TPL1_BICUBIC_WEIGHTS_TABLE_REG(CHIP, 0, 0),
|
||||
TPL1_BICUBIC_WEIGHTS_TABLE_REG(CHIP, 1, 0x3fe05ff4),
|
||||
|
|
@ -1963,12 +1984,15 @@ tu7_emit_tile_render_begin_regs(struct tu_cs *cs)
|
|||
*/
|
||||
template <chip CHIP>
|
||||
static void
|
||||
tu_emit_bin_preamble(struct tu_device *dev, struct tu_cs *cs)
|
||||
tu_emit_bin_preamble(struct tu_device *dev, struct tu_cs *cs, bool bv)
|
||||
{
|
||||
struct tu_physical_device *phys_dev = dev->physical_device;
|
||||
|
||||
tu6_init_static_regs<CHIP>(dev, cs);
|
||||
emit_rb_ccu_cntl<CHIP>(cs, dev, true);
|
||||
|
||||
if (!bv)
|
||||
emit_rb_ccu_cntl<CHIP>(cs, dev, true);
|
||||
emit_vpc_attr_buf<CHIP>(cs, dev, true);
|
||||
|
||||
if (CHIP == A6XX) {
|
||||
tu_cs_emit_regs(cs,
|
||||
|
|
@ -1978,7 +2002,7 @@ tu_emit_bin_preamble(struct tu_device *dev, struct tu_cs *cs)
|
|||
A6XX_VFD_POWER_CNTL(phys_dev->info->a6xx.magic.PC_POWER_CNTL));
|
||||
}
|
||||
|
||||
if (CHIP == A7XX) {
|
||||
if (CHIP == A7XX && !bv) {
|
||||
tu7_emit_tile_render_begin_regs(cs);
|
||||
}
|
||||
|
||||
|
|
@ -1998,10 +2022,20 @@ tu_init_bin_preamble(struct tu_device *device)
|
|||
if (result != VK_SUCCESS)
|
||||
return vk_startup_errorf(device->instance, result, "bin restore");
|
||||
|
||||
TU_CALLX(device, tu_emit_bin_preamble)(device, &preamble_cs);
|
||||
TU_CALLX(device, tu_emit_bin_preamble)(device, &preamble_cs, false);
|
||||
|
||||
device->bin_preamble_entry = tu_cs_end_sub_stream(&device->sub_cs, &preamble_cs);
|
||||
|
||||
if (device->physical_device->info->chip >= 7) {
|
||||
result = tu_cs_begin_sub_stream(&device->sub_cs, 256, &preamble_cs);
|
||||
if (result != VK_SUCCESS)
|
||||
return vk_startup_errorf(device->instance, result, "bin restore");
|
||||
|
||||
TU_CALLX(device, tu_emit_bin_preamble)(device, &preamble_cs, true);
|
||||
|
||||
device->bin_preamble_bv_entry = tu_cs_end_sub_stream(&device->sub_cs, &preamble_cs);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
@ -2015,9 +2049,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
if (CHIP == A6XX) {
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CACHE_INVALIDATE);
|
||||
} else {
|
||||
tu_cs_emit_pkt7(cs, CP_THREAD_CONTROL, 1);
|
||||
tu_cs_emit(cs, CP_THREAD_CONTROL_0_THREAD(CP_SET_THREAD_BR) |
|
||||
CP_THREAD_CONTROL_0_CONCURRENT_BIN_DISABLE);
|
||||
tu7_thread_control(cs, CP_SET_THREAD_BOTH);
|
||||
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_INVALIDATE_COLOR);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_INVALIDATE_DEPTH);
|
||||
|
|
@ -2052,6 +2084,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
tu6_init_static_regs<CHIP>(cmd->device, cs);
|
||||
|
||||
emit_rb_ccu_cntl<CHIP>(cs, cmd->device, false);
|
||||
emit_vpc_attr_buf<CHIP>(cs, cmd->device, false);
|
||||
cmd->state.ccu_state = TU_CMD_CCU_SYSMEM;
|
||||
|
||||
tu_disable_draw_states(cmd, cs);
|
||||
|
|
@ -2065,6 +2098,10 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
tu_cs_emit_ib(cs, &dev->cmdbuf_start_a725_quirk_entry);
|
||||
}
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
tu7_thread_control(cs, CP_SET_THREAD_BR);
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_AMBLE, 3);
|
||||
tu_cs_emit_qw(cs, cmd->device->bin_preamble_entry.bo->iova +
|
||||
cmd->device->bin_preamble_entry.offset);
|
||||
|
|
@ -2072,6 +2109,19 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
sizeof(uint32_t)) |
|
||||
CP_SET_AMBLE_2_TYPE(BIN_PREAMBLE_AMBLE_TYPE));
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
tu7_thread_control(cs, CP_SET_THREAD_BV);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_AMBLE, 3);
|
||||
tu_cs_emit_qw(cs, cmd->device->bin_preamble_bv_entry.bo->iova +
|
||||
cmd->device->bin_preamble_bv_entry.offset);
|
||||
tu_cs_emit(cs, CP_SET_AMBLE_2_DWORDS(cmd->device->bin_preamble_bv_entry.size /
|
||||
sizeof(uint32_t)) |
|
||||
CP_SET_AMBLE_2_TYPE(BIN_PREAMBLE_AMBLE_TYPE));
|
||||
|
||||
tu7_thread_control(cs, CP_SET_THREAD_BOTH);
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_AMBLE, 3);
|
||||
tu_cs_emit_qw(cs, 0);
|
||||
tu_cs_emit(cs, CP_SET_AMBLE_2_TYPE(PREAMBLE_AMBLE_TYPE));
|
||||
|
|
@ -2080,6 +2130,10 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
tu_cs_emit_qw(cs, 0);
|
||||
tu_cs_emit(cs, CP_SET_AMBLE_2_TYPE(POSTAMBLE_AMBLE_TYPE));
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
tu7_thread_control(cs, CP_SET_THREAD_BR);
|
||||
}
|
||||
|
||||
tu_cs_sanity_check(cs);
|
||||
}
|
||||
|
||||
|
|
@ -2720,6 +2774,12 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
|
||||
tu_emit_cache_flush_ccu<CHIP>(cmd, cs, TU_CMD_CCU_GMEM);
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
tu_cs_emit_pkt7(cs, CP_THREAD_CONTROL, 1);
|
||||
tu_cs_emit(cs, CP_THREAD_CONTROL_0_THREAD(CP_SET_THREAD_BR) |
|
||||
CP_THREAD_CONTROL_0_CONCURRENT_BIN_DISABLE);
|
||||
}
|
||||
|
||||
if (use_hw_binning(cmd)) {
|
||||
if (!cmd->vsc_initialized) {
|
||||
tu6_lazy_init_vsc(cmd);
|
||||
|
|
@ -2894,6 +2954,10 @@ tu6_tile_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_BLIT_CACHE);
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
tu7_thread_control(cs, CP_SET_THREAD_BR);
|
||||
}
|
||||
|
||||
tu_cs_sanity_check(cs);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -431,7 +431,7 @@ struct tu_device
|
|||
|
||||
struct tu_cs_entry cmdbuf_start_a725_quirk_entry;
|
||||
|
||||
struct tu_cs_entry bin_preamble_entry;
|
||||
struct tu_cs_entry bin_preamble_entry, bin_preamble_bv_entry;
|
||||
|
||||
struct util_dynarray dynamic_rendering_pending;
|
||||
VkCommandPool dynamic_rendering_pool;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue