diff --git a/src/imagination/vulkan/pvr_blit.c b/src/imagination/vulkan/pvr_blit.c index baf3de31d3a..adf41f6a454 100644 --- a/src/imagination/vulkan/pvr_blit.c +++ b/src/imagination/vulkan/pvr_blit.c @@ -2156,6 +2156,8 @@ static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer, pvr_clear_vdm_state_get_size_in_dw(dev_info, clear_rect->layerCount); + pvr_csb_set_relocation_mark(&sub_cmd->control_stream); + vdm_cs_buffer = pvr_csb_alloc_dwords(&sub_cmd->control_stream, vdm_cs_size_in_dw); if (!vdm_cs_buffer) { @@ -2171,6 +2173,8 @@ static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer, vs_output_size_in_bytes, clear_rect->layerCount, vdm_cs_buffer); + + pvr_csb_clear_relocation_mark(&sub_cmd->control_stream); } } } diff --git a/src/imagination/vulkan/pvr_clear.c b/src/imagination/vulkan/pvr_clear.c index 2189e66f4ee..a54b8dc1c22 100644 --- a/src/imagination/vulkan/pvr_clear.c +++ b/src/imagination/vulkan/pvr_clear.c @@ -216,6 +216,8 @@ VkResult pvr_emit_ppp_from_template( stream = NULL; + pvr_csb_set_relocation_mark(csb); + pvr_csb_emit (csb, VDMCTRL_PPP_STATE0, state) { state.word_count = dword_count; state.addrmsb = pvr_bo->dev_addr; @@ -225,6 +227,8 @@ VkResult pvr_emit_ppp_from_template( state.addrlsb = pvr_bo->dev_addr; } + pvr_csb_clear_relocation_mark(csb); + *pvr_bo_out = pvr_bo; return VK_SUCCESS; @@ -931,6 +935,12 @@ void pvr_pack_clear_vdm_state(const struct pvr_device_info *const dev_info, } stream += pvr_cmd_length(VDMCTRL_VDM_STATE5); + /* TODO: Here we're doing another state update. If emitting directly to the + * control stream, we don't mark them as separate state updates by setting + * the relocation mark so we might be wasting a little bit of memory. See if + * it's worth changing the code to use the relocation mark. + */ + pvr_csb_pack (stream, VDMCTRL_INDEX_LIST0, index_list0) { index_list0.index_count_present = true; index_list0.index_instance_count_present = needs_instance_count; diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index df73591168d..3693bb5d799 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -362,6 +362,8 @@ pvr_cmd_buffer_emit_ppp_state(const struct pvr_cmd_buffer *const cmd_buffer, assert(csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS || csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS_DEFERRED); + pvr_csb_set_relocation_mark(csb); + pvr_csb_emit (csb, VDMCTRL_PPP_STATE0, state0) { state0.addrmsb = framebuffer->ppp_state_bo->dev_addr; state0.word_count = framebuffer->ppp_state_size; @@ -371,6 +373,8 @@ pvr_cmd_buffer_emit_ppp_state(const struct pvr_cmd_buffer *const cmd_buffer, state1.addrlsb = framebuffer->ppp_state_bo->dev_addr; } + pvr_csb_clear_relocation_mark(csb); + return csb->status; } @@ -1756,6 +1760,8 @@ pvr_compute_generate_control_stream(struct pvr_csb *csb, struct pvr_sub_cmd_compute *sub_cmd, const struct pvr_compute_kernel_info *info) { + pvr_csb_set_relocation_mark(csb); + /* Compute kernel 0. */ pvr_csb_emit (csb, CDMCTRL_KERNEL0, kernel0) { kernel0.indirect_present = !!info->indirect_buffer_addr.addr; @@ -1825,6 +1831,8 @@ pvr_compute_generate_control_stream(struct pvr_csb *csb, kernel8.workgroup_size_z = info->local_size[2U] - 1U; } + pvr_csb_clear_relocation_mark(csb); + /* Track the highest amount of shared registers usage in this dispatch. * This is used by the FW for context switching, so must be large enough * to contain all the shared registers that might be in use for this compute @@ -2957,6 +2965,8 @@ static void pvr_emit_clear_words(struct pvr_cmd_buffer *const cmd_buffer, vdm_state_size_in_dw = pvr_clear_vdm_state_get_size_in_dw(&device->pdevice->dev_info, 1); + pvr_csb_set_relocation_mark(csb); + stream = pvr_csb_alloc_dwords(csb, vdm_state_size_in_dw); if (!stream) { pvr_cmd_buffer_set_error_unwarned(cmd_buffer, csb->status); @@ -2969,6 +2979,8 @@ static void pvr_emit_clear_words(struct pvr_cmd_buffer *const cmd_buffer, vdm_state = device->static_clear_state.vdm_words; memcpy(stream, vdm_state, PVR_DW_TO_BYTES(vdm_state_size_in_dw)); + + pvr_csb_clear_relocation_mark(csb); } static VkResult pvr_cs_write_load_op(struct pvr_cmd_buffer *cmd_buffer, @@ -4684,6 +4696,8 @@ pvr_emit_dirty_pds_state(const struct pvr_cmd_buffer *const cmd_buffer, if (!vertex_descriptor_state->pds_info.code_size_in_dwords) return; + pvr_csb_set_relocation_mark(csb); + pvr_csb_emit (csb, VDMCTRL_PDS_STATE0, state0) { state0.usc_target = PVRX(VDMCTRL_USC_TARGET_ALL); @@ -4705,6 +4719,8 @@ pvr_emit_dirty_pds_state(const struct pvr_cmd_buffer *const cmd_buffer, state2.pds_code_addr = PVR_DEV_ADDR(vertex_descriptor_state->pds_code.code_offset); } + + pvr_csb_clear_relocation_mark(csb); } static void pvr_setup_output_select(struct pvr_cmd_buffer *const cmd_buffer) @@ -5748,6 +5764,8 @@ static VkResult pvr_emit_ppp_state(struct pvr_cmd_buffer *const cmd_buffer, ppp_state_words, PVR_DW_TO_BYTES(ppp_state_words_count)); + pvr_csb_set_relocation_mark(control_stream); + /* Write the VDM state update into the VDM control stream. */ pvr_csb_emit (control_stream, VDMCTRL_PPP_STATE0, state0) { state0.word_count = ppp_state_words_count; @@ -5758,19 +5776,26 @@ static VkResult pvr_emit_ppp_state(struct pvr_cmd_buffer *const cmd_buffer, state1.addrlsb = pvr_bo->dev_addr; } + pvr_csb_clear_relocation_mark(control_stream); + if (emit_dbsc && cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { struct pvr_deferred_cs_command cmd; if (deferred_secondary) { const uint32_t num_dwords = pvr_cmd_length(VDMCTRL_PPP_STATE0) + pvr_cmd_length(VDMCTRL_PPP_STATE1); + uint32_t *vdm_state; - uint32_t *vdm_state = pvr_csb_alloc_dwords(control_stream, num_dwords); + pvr_csb_set_relocation_mark(control_stream); + + vdm_state = pvr_csb_alloc_dwords(control_stream, num_dwords); if (!vdm_state) { result = pvr_csb_get_status(control_stream); return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result); } + pvr_csb_clear_relocation_mark(control_stream); + cmd = (struct pvr_deferred_cs_command){ .type = PVR_DEFERRED_CS_COMMAND_TYPE_DBSC, .dbsc = { @@ -6012,6 +6037,8 @@ static void pvr_emit_dirty_vdm_state(struct pvr_cmd_buffer *const cmd_buffer, &cam_size, &max_instances); + pvr_csb_set_relocation_mark(csb); + pvr_csb_emit (csb, VDMCTRL_VDM_STATE0, state0) { state0.cam_size = cam_size; @@ -6113,6 +6140,8 @@ static void pvr_emit_dirty_vdm_state(struct pvr_cmd_buffer *const cmd_buffer, PVRX(VDMCTRL_VDM_STATE5_VS_PDS_DATA_SIZE_UNIT_SIZE)); } } + + pvr_csb_clear_relocation_mark(csb); } static VkResult pvr_validate_draw_state(struct pvr_cmd_buffer *cmd_buffer) @@ -6427,7 +6456,8 @@ pvr_write_draw_indirect_vdm_stream(struct pvr_cmd_buffer *cmd_buffer, dev_info); } - /* Write the VDM state update. */ + pvr_csb_set_relocation_mark(csb); + pvr_csb_emit (csb, VDMCTRL_PDS_STATE0, state0) { state0.usc_target = PVRX(VDMCTRL_USC_TARGET_ANY); @@ -6459,6 +6489,13 @@ pvr_write_draw_indirect_vdm_stream(struct pvr_cmd_buffer *cmd_buffer, state2.pds_code_addr = PVR_DEV_ADDR(code_offset); } + pvr_csb_clear_relocation_mark(csb); + + /* We don't really need to set the relocation mark since the following + * state update is just one emit but let's be nice and use it. + */ + pvr_csb_set_relocation_mark(csb); + /* Sync task to ensure the VDM doesn't start reading the dummy blocks * before they are ready. */ @@ -6466,6 +6503,8 @@ pvr_write_draw_indirect_vdm_stream(struct pvr_cmd_buffer *cmd_buffer, list0.primitive_topology = PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_LIST); } + pvr_csb_clear_relocation_mark(csb); + dummy_stream = pvr_bo_suballoc_get_map_addr(dummy_bo); /* For indexed draw cmds fill in the dummy's header (as it won't change @@ -6483,6 +6522,8 @@ pvr_write_draw_indirect_vdm_stream(struct pvr_cmd_buffer *cmd_buffer, pvr_csb_pack (dummy_stream, VDMCTRL_STREAM_RETURN, word); /* clang-format on */ + pvr_csb_set_relocation_mark(csb); + /* Stream link to the first dummy which forces the VDM to discard any * prefetched (dummy) control stream. */ @@ -6495,6 +6536,8 @@ pvr_write_draw_indirect_vdm_stream(struct pvr_cmd_buffer *cmd_buffer, link.link_addrlsb = dummy_bo->dev_addr; } + pvr_csb_clear_relocation_mark(csb); + /* Point the pds program to the next argument buffer and the next VDM * dummy buffer. */ @@ -6585,6 +6628,8 @@ static void pvr_emit_vdm_index_list(struct pvr_cmd_buffer *cmd_buffer, return; } + pvr_csb_set_relocation_mark(csb); + pvr_csb_emit (csb, VDMCTRL_INDEX_LIST0, list0) { list0 = list_hdr; } @@ -6612,6 +6657,8 @@ static void pvr_emit_vdm_index_list(struct pvr_cmd_buffer *cmd_buffer, list4.index_offset = index_offset; } } + + pvr_csb_clear_relocation_mark(csb); } void pvr_CmdDraw(VkCommandBuffer commandBuffer, diff --git a/src/imagination/vulkan/pvr_csb.c b/src/imagination/vulkan/pvr_csb.c index 29e208c9537..0d2fa7c7e7f 100644 --- a/src/imagination/vulkan/pvr_csb.c +++ b/src/imagination/vulkan/pvr_csb.c @@ -79,6 +79,12 @@ void pvr_csb_init(struct pvr_device *device, csb->next = NULL; csb->pvr_bo = NULL; csb->end = NULL; + csb->relocation_mark = NULL; + +#if defined(DEBUG) + csb->relocation_mark_status = PVR_CSB_RELOCATION_MARK_UNINITIALIZED; +#endif + csb->device = device; csb->stream_type = stream_type; csb->status = VK_SUCCESS; @@ -98,6 +104,10 @@ void pvr_csb_init(struct pvr_device *device, */ void pvr_csb_finish(struct pvr_csb *csb) { +#if defined(DEBUG) + assert(csb->relocation_mark_status == PVR_CSB_RELOCATION_MARK_CLEARED); +#endif + if (csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS_DEFERRED) { util_dynarray_fini(&csb->deferred_cs_mem); } else { @@ -144,6 +154,56 @@ VkResult pvr_csb_bake(struct pvr_csb *const csb, return VK_SUCCESS; } +/** + * \brief Adds VDMCTRL_STREAM_LINK/CDMCTRL_STREAM_LINK dwords into the control + * stream pointed by csb object without setting a relocation mark. + * + * \warning This does not set the relocation mark. + * + * \param[in] csb Control Stream Builder object to add LINK dwords to. + * \param[in] addr Device virtual address of the sub control stream to link to. + * \param[in] ret Selects whether the sub control stream will return or + * terminate. + */ +static void +pvr_csb_emit_link_unmarked(struct pvr_csb *csb, pvr_dev_addr_t addr, bool ret) +{ + /* Not supported for deferred control stream. */ + assert(csb->stream_type != PVR_CMD_STREAM_TYPE_GRAPHICS_DEFERRED); + + /* Stream return is only supported for graphics control stream. */ + assert(!ret || csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS); + + switch (csb->stream_type) { + case PVR_CMD_STREAM_TYPE_GRAPHICS: + pvr_csb_emit (csb, VDMCTRL_STREAM_LINK0, link) { + link.link_addrmsb = addr; + link.with_return = ret; + } + + pvr_csb_emit (csb, VDMCTRL_STREAM_LINK1, link) { + link.link_addrlsb = addr; + } + + break; + + case PVR_CMD_STREAM_TYPE_COMPUTE: + pvr_csb_emit (csb, CDMCTRL_STREAM_LINK0, link) { + link.link_addrmsb = addr; + } + + pvr_csb_emit (csb, CDMCTRL_STREAM_LINK1, link) { + link.link_addrlsb = addr; + } + + break; + + default: + unreachable("Unknown stream type"); + break; + } +} + /** * \brief Helper function to extend csb memory. * @@ -196,12 +256,30 @@ static bool pvr_csb_buffer_extend(struct pvr_csb *csb) return false; } - /* Chain to the old BO if this is not the first BO in csb */ + /* if this is not the first BO in csb */ if (csb->pvr_bo) { + const size_t current_state_update_size = + (uint8_t *)csb->next - (uint8_t *)csb->relocation_mark; + void *new_buffer = pvr_bo->bo->map; + + assert(csb->relocation_mark != NULL); + assert(csb->next >= csb->relocation_mark); + + memcpy(new_buffer, csb->relocation_mark, current_state_update_size); + +#if defined(DEBUG) + assert(csb->relocation_mark_status == PVR_CSB_RELOCATION_MARK_SET); + csb->relocation_mark_status = PVR_CSB_RELOCATION_MARK_SET_AND_CONSUMED; + + memset(csb->relocation_mark, 0, current_state_update_size); +#endif + + csb->next = csb->relocation_mark; + csb->end += stream_link_space; assert(csb->next + stream_link_space <= csb->end); - pvr_csb_emit_link(csb, pvr_bo->vma->dev_addr, false); + pvr_csb_emit_link_unmarked(csb, pvr_bo->vma->dev_addr, false); } csb->pvr_bo = pvr_bo; @@ -245,6 +323,11 @@ void *pvr_csb_alloc_dwords(struct pvr_csb *csb, uint32_t num_dwords) return p; } +#if defined(DEBUG) + if (csb->relocation_mark_status == PVR_CSB_RELOCATION_MARK_CLEARED) + mesa_logd_once("CS memory without relocation mark detected."); +#endif + if (csb->next + required_space > csb->end) { bool ret = pvr_csb_buffer_extend(csb); if (!ret) @@ -327,40 +410,9 @@ VkResult pvr_csb_copy(struct pvr_csb *csb_dst, struct pvr_csb *csb_src) */ void pvr_csb_emit_link(struct pvr_csb *csb, pvr_dev_addr_t addr, bool ret) { - /* Not supported for deferred control stream. */ - assert(csb->stream_type != PVR_CMD_STREAM_TYPE_GRAPHICS_DEFERRED); - - /* Stream return is only supported for graphics control stream. */ - assert(!ret || csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS); - - switch (csb->stream_type) { - case PVR_CMD_STREAM_TYPE_GRAPHICS: - pvr_csb_emit (csb, VDMCTRL_STREAM_LINK0, link) { - link.link_addrmsb = addr; - link.with_return = ret; - } - - pvr_csb_emit (csb, VDMCTRL_STREAM_LINK1, link) { - link.link_addrlsb = addr; - } - - break; - - case PVR_CMD_STREAM_TYPE_COMPUTE: - pvr_csb_emit (csb, CDMCTRL_STREAM_LINK0, link) { - link.link_addrmsb = addr; - } - - pvr_csb_emit (csb, CDMCTRL_STREAM_LINK1, link) { - link.link_addrlsb = addr; - } - - break; - - default: - unreachable("Unknown stream type"); - break; - } + pvr_csb_set_relocation_mark(csb); + pvr_csb_emit_link_unmarked(csb, addr, ret); + pvr_csb_clear_relocation_mark(csb); } /** @@ -377,9 +429,11 @@ VkResult pvr_csb_emit_return(struct pvr_csb *csb) assert(csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS || csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS_DEFERRED); + pvr_csb_set_relocation_mark(csb); /* clang-format off */ pvr_csb_emit(csb, VDMCTRL_STREAM_RETURN, ret); /* clang-format on */ + pvr_csb_clear_relocation_mark(csb); return csb->status; } @@ -394,6 +448,8 @@ VkResult pvr_csb_emit_return(struct pvr_csb *csb) */ VkResult pvr_csb_emit_terminate(struct pvr_csb *csb) { + pvr_csb_set_relocation_mark(csb); + switch (csb->stream_type) { case PVR_CMD_STREAM_TYPE_GRAPHICS: /* clang-format off */ @@ -412,5 +468,7 @@ VkResult pvr_csb_emit_terminate(struct pvr_csb *csb) break; } + pvr_csb_clear_relocation_mark(csb); + return csb->status; } diff --git a/src/imagination/vulkan/pvr_csb.h b/src/imagination/vulkan/pvr_csb.h index ab0f1dcb660..99e862a104c 100644 --- a/src/imagination/vulkan/pvr_csb.h +++ b/src/imagination/vulkan/pvr_csb.h @@ -75,6 +75,27 @@ struct pvr_csb { void *end; void *next; + /* When extending the control stream we can't break state updates across bos. + * This indicates where the current state update starts, so that it can be + * be relocated into the new bo without breaking the update. + */ + void *relocation_mark; +#if defined(DEBUG) + /* Used to track the state of the `relocation_mark` and to catch cases where + * the driver might have emitted to the cs without using the + * `relocation_mark`. Doing so is mostly harmless but will waste memory in + * case the cs is extended while an untracked state update is emitted, as + * we'll have to relocate the cs contents from the last tracked state update + * instead of just the one currently being emitted. + */ + enum pvr_csb_relocation_mark_status { + PVR_CSB_RELOCATION_MARK_UNINITIALIZED, + PVR_CSB_RELOCATION_MARK_SET, + PVR_CSB_RELOCATION_MARK_SET_AND_CONSUMED, + PVR_CSB_RELOCATION_MARK_CLEARED, + } relocation_mark_status; +#endif + /* List of csb buffer objects */ struct list_head pvr_bo_list; @@ -127,6 +148,90 @@ pvr_csb_get_start_address(const struct pvr_csb *csb) return PVR_DEV_ADDR_INVALID; } +/** \defgroup CSB relocation marking. + * Functions and macros related to relocation marking for control stream words. + * + * When there is no more space left in the current bo, csb needs has to extend + * the control stream by allocating a new bo and emitting a link to it. State + * updates have to be contiguous so cannot be broken by a link. Thus csb copies + * the current, in construction, state update into the new bo and emits a link + * in its place in the old bo. To do so however, it needs a hint from the driver + * to determine where the current state update started from, so a relocation + * mark is used. + * + * List of words demarking the beginning of state updates (i.e. state update + * headers): + * - ROGUE_VDMCTRL_PPP_STATE0 + * - ROGUE_VDMCTRL_PDS_STATE0 + * - ROGUE_VDMCTRL_VDM_STATE0 + * - ROGUE_VDMCTRL_INDEX_LIST0 + * - ROGUE_VDMCTRL_STREAM_LINK0 + * - ROGUE_VDMCTRL_STREAM_RETURN + * - ROGUE_VDMCTRL_STREAM_TERMINATE + * + * - ROGUE_CDMCTRL_KERNEL0 + * - ROGUE_CDMCTRL_STREAM_LINK0 + * - ROGUE_CDMCTRL_STREAM_TERMINATE + * + * The driver should set the relocation mark whenever a new state update is + * started. And clear it when the state update is fully formed. + * + * PVR_CSB_RELOCATION_MARK state machine: + * + * UNINITIALIZED + * ↓ + * ┌─── → SET ─────────┐ + * │ ↓ │ + * │ SET_AND_CONSUMED │ + * │ ↓ │ + * │ CLEARED ← ─────┘ + * └───────┘ + * + * @{ + */ +/* TODO: Add in the IPF transfer control stream state updates to the list once + * csb gets used for it + */ + +/** + * \brief Set the relocation mark. + * + * Indicates to csb that on cs extension it should relocate all words, starting + * from now, into the new bo. + */ +static inline void pvr_csb_set_relocation_mark(struct pvr_csb *csb) +{ +#if defined(DEBUG) + assert(csb->relocation_mark_status == + PVR_CSB_RELOCATION_MARK_UNINITIALIZED || + csb->relocation_mark_status == PVR_CSB_RELOCATION_MARK_CLEARED); + + csb->relocation_mark_status = PVR_CSB_RELOCATION_MARK_SET; +#endif + + csb->relocation_mark = csb->next; +} + +/** + * \brief Clear the relocation mark. + * + * Indicate to csb that the state update is fully formed so it doesn't need to + * relocate it in case of cs extension. + */ +static inline void pvr_csb_clear_relocation_mark(UNUSED struct pvr_csb *csb) +{ +#if defined(DEBUG) + assert(csb->relocation_mark_status == PVR_CSB_RELOCATION_MARK_SET || + csb->relocation_mark_status == + PVR_CSB_RELOCATION_MARK_SET_AND_CONSUMED); + + csb->relocation_mark_status = PVR_CSB_RELOCATION_MARK_CLEARED; +#endif +} + +/** @} */ +/* End of \defgroup CSB relocation marking. */ + void pvr_csb_init(struct pvr_device *device, enum pvr_cmd_stream_type stream_type, struct pvr_csb *csb);