pvr: Fix csb control stream extension

Previously we reserved space for a stream link and whenever we ran
out of space in the current bo, allocated a new one, and emitted a
link to it. This is problematic as stream links can only be emitted
at state update boundaries so the handling could have produced a
corrupted control stream.

That's fixed by using a `relocation_mark` set by the driver to
indicate where a state update was last started, so csb can relocate
the whole update into the new bo and link to it.

Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Reviewed-by: Frank Binns <frank.binns@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23520>
This commit is contained in:
Karmjit Mahil 2023-06-06 11:51:01 +01:00 committed by Marge Bot
parent 5336cbff3b
commit 5d03bbc91d
5 changed files with 262 additions and 38 deletions

View file

@ -2156,6 +2156,8 @@ static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer,
pvr_clear_vdm_state_get_size_in_dw(dev_info,
clear_rect->layerCount);
pvr_csb_set_relocation_mark(&sub_cmd->control_stream);
vdm_cs_buffer =
pvr_csb_alloc_dwords(&sub_cmd->control_stream, vdm_cs_size_in_dw);
if (!vdm_cs_buffer) {
@ -2171,6 +2173,8 @@ static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer,
vs_output_size_in_bytes,
clear_rect->layerCount,
vdm_cs_buffer);
pvr_csb_clear_relocation_mark(&sub_cmd->control_stream);
}
}
}

View file

@ -216,6 +216,8 @@ VkResult pvr_emit_ppp_from_template(
stream = NULL;
pvr_csb_set_relocation_mark(csb);
pvr_csb_emit (csb, VDMCTRL_PPP_STATE0, state) {
state.word_count = dword_count;
state.addrmsb = pvr_bo->dev_addr;
@ -225,6 +227,8 @@ VkResult pvr_emit_ppp_from_template(
state.addrlsb = pvr_bo->dev_addr;
}
pvr_csb_clear_relocation_mark(csb);
*pvr_bo_out = pvr_bo;
return VK_SUCCESS;
@ -931,6 +935,12 @@ void pvr_pack_clear_vdm_state(const struct pvr_device_info *const dev_info,
}
stream += pvr_cmd_length(VDMCTRL_VDM_STATE5);
/* TODO: Here we're doing another state update. If emitting directly to the
* control stream, we don't mark them as separate state updates by setting
* the relocation mark so we might be wasting a little bit of memory. See if
* it's worth changing the code to use the relocation mark.
*/
pvr_csb_pack (stream, VDMCTRL_INDEX_LIST0, index_list0) {
index_list0.index_count_present = true;
index_list0.index_instance_count_present = needs_instance_count;

View file

@ -362,6 +362,8 @@ pvr_cmd_buffer_emit_ppp_state(const struct pvr_cmd_buffer *const cmd_buffer,
assert(csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS ||
csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS_DEFERRED);
pvr_csb_set_relocation_mark(csb);
pvr_csb_emit (csb, VDMCTRL_PPP_STATE0, state0) {
state0.addrmsb = framebuffer->ppp_state_bo->dev_addr;
state0.word_count = framebuffer->ppp_state_size;
@ -371,6 +373,8 @@ pvr_cmd_buffer_emit_ppp_state(const struct pvr_cmd_buffer *const cmd_buffer,
state1.addrlsb = framebuffer->ppp_state_bo->dev_addr;
}
pvr_csb_clear_relocation_mark(csb);
return csb->status;
}
@ -1756,6 +1760,8 @@ pvr_compute_generate_control_stream(struct pvr_csb *csb,
struct pvr_sub_cmd_compute *sub_cmd,
const struct pvr_compute_kernel_info *info)
{
pvr_csb_set_relocation_mark(csb);
/* Compute kernel 0. */
pvr_csb_emit (csb, CDMCTRL_KERNEL0, kernel0) {
kernel0.indirect_present = !!info->indirect_buffer_addr.addr;
@ -1825,6 +1831,8 @@ pvr_compute_generate_control_stream(struct pvr_csb *csb,
kernel8.workgroup_size_z = info->local_size[2U] - 1U;
}
pvr_csb_clear_relocation_mark(csb);
/* Track the highest amount of shared registers usage in this dispatch.
* This is used by the FW for context switching, so must be large enough
* to contain all the shared registers that might be in use for this compute
@ -2957,6 +2965,8 @@ static void pvr_emit_clear_words(struct pvr_cmd_buffer *const cmd_buffer,
vdm_state_size_in_dw =
pvr_clear_vdm_state_get_size_in_dw(&device->pdevice->dev_info, 1);
pvr_csb_set_relocation_mark(csb);
stream = pvr_csb_alloc_dwords(csb, vdm_state_size_in_dw);
if (!stream) {
pvr_cmd_buffer_set_error_unwarned(cmd_buffer, csb->status);
@ -2969,6 +2979,8 @@ static void pvr_emit_clear_words(struct pvr_cmd_buffer *const cmd_buffer,
vdm_state = device->static_clear_state.vdm_words;
memcpy(stream, vdm_state, PVR_DW_TO_BYTES(vdm_state_size_in_dw));
pvr_csb_clear_relocation_mark(csb);
}
static VkResult pvr_cs_write_load_op(struct pvr_cmd_buffer *cmd_buffer,
@ -4684,6 +4696,8 @@ pvr_emit_dirty_pds_state(const struct pvr_cmd_buffer *const cmd_buffer,
if (!vertex_descriptor_state->pds_info.code_size_in_dwords)
return;
pvr_csb_set_relocation_mark(csb);
pvr_csb_emit (csb, VDMCTRL_PDS_STATE0, state0) {
state0.usc_target = PVRX(VDMCTRL_USC_TARGET_ALL);
@ -4705,6 +4719,8 @@ pvr_emit_dirty_pds_state(const struct pvr_cmd_buffer *const cmd_buffer,
state2.pds_code_addr =
PVR_DEV_ADDR(vertex_descriptor_state->pds_code.code_offset);
}
pvr_csb_clear_relocation_mark(csb);
}
static void pvr_setup_output_select(struct pvr_cmd_buffer *const cmd_buffer)
@ -5748,6 +5764,8 @@ static VkResult pvr_emit_ppp_state(struct pvr_cmd_buffer *const cmd_buffer,
ppp_state_words,
PVR_DW_TO_BYTES(ppp_state_words_count));
pvr_csb_set_relocation_mark(control_stream);
/* Write the VDM state update into the VDM control stream. */
pvr_csb_emit (control_stream, VDMCTRL_PPP_STATE0, state0) {
state0.word_count = ppp_state_words_count;
@ -5758,19 +5776,26 @@ static VkResult pvr_emit_ppp_state(struct pvr_cmd_buffer *const cmd_buffer,
state1.addrlsb = pvr_bo->dev_addr;
}
pvr_csb_clear_relocation_mark(control_stream);
if (emit_dbsc && cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
struct pvr_deferred_cs_command cmd;
if (deferred_secondary) {
const uint32_t num_dwords = pvr_cmd_length(VDMCTRL_PPP_STATE0) +
pvr_cmd_length(VDMCTRL_PPP_STATE1);
uint32_t *vdm_state;
uint32_t *vdm_state = pvr_csb_alloc_dwords(control_stream, num_dwords);
pvr_csb_set_relocation_mark(control_stream);
vdm_state = pvr_csb_alloc_dwords(control_stream, num_dwords);
if (!vdm_state) {
result = pvr_csb_get_status(control_stream);
return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
}
pvr_csb_clear_relocation_mark(control_stream);
cmd = (struct pvr_deferred_cs_command){
.type = PVR_DEFERRED_CS_COMMAND_TYPE_DBSC,
.dbsc = {
@ -6012,6 +6037,8 @@ static void pvr_emit_dirty_vdm_state(struct pvr_cmd_buffer *const cmd_buffer,
&cam_size,
&max_instances);
pvr_csb_set_relocation_mark(csb);
pvr_csb_emit (csb, VDMCTRL_VDM_STATE0, state0) {
state0.cam_size = cam_size;
@ -6113,6 +6140,8 @@ static void pvr_emit_dirty_vdm_state(struct pvr_cmd_buffer *const cmd_buffer,
PVRX(VDMCTRL_VDM_STATE5_VS_PDS_DATA_SIZE_UNIT_SIZE));
}
}
pvr_csb_clear_relocation_mark(csb);
}
static VkResult pvr_validate_draw_state(struct pvr_cmd_buffer *cmd_buffer)
@ -6427,7 +6456,8 @@ pvr_write_draw_indirect_vdm_stream(struct pvr_cmd_buffer *cmd_buffer,
dev_info);
}
/* Write the VDM state update. */
pvr_csb_set_relocation_mark(csb);
pvr_csb_emit (csb, VDMCTRL_PDS_STATE0, state0) {
state0.usc_target = PVRX(VDMCTRL_USC_TARGET_ANY);
@ -6459,6 +6489,13 @@ pvr_write_draw_indirect_vdm_stream(struct pvr_cmd_buffer *cmd_buffer,
state2.pds_code_addr = PVR_DEV_ADDR(code_offset);
}
pvr_csb_clear_relocation_mark(csb);
/* We don't really need to set the relocation mark since the following
* state update is just one emit but let's be nice and use it.
*/
pvr_csb_set_relocation_mark(csb);
/* Sync task to ensure the VDM doesn't start reading the dummy blocks
* before they are ready.
*/
@ -6466,6 +6503,8 @@ pvr_write_draw_indirect_vdm_stream(struct pvr_cmd_buffer *cmd_buffer,
list0.primitive_topology = PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_LIST);
}
pvr_csb_clear_relocation_mark(csb);
dummy_stream = pvr_bo_suballoc_get_map_addr(dummy_bo);
/* For indexed draw cmds fill in the dummy's header (as it won't change
@ -6483,6 +6522,8 @@ pvr_write_draw_indirect_vdm_stream(struct pvr_cmd_buffer *cmd_buffer,
pvr_csb_pack (dummy_stream, VDMCTRL_STREAM_RETURN, word);
/* clang-format on */
pvr_csb_set_relocation_mark(csb);
/* Stream link to the first dummy which forces the VDM to discard any
* prefetched (dummy) control stream.
*/
@ -6495,6 +6536,8 @@ pvr_write_draw_indirect_vdm_stream(struct pvr_cmd_buffer *cmd_buffer,
link.link_addrlsb = dummy_bo->dev_addr;
}
pvr_csb_clear_relocation_mark(csb);
/* Point the pds program to the next argument buffer and the next VDM
* dummy buffer.
*/
@ -6585,6 +6628,8 @@ static void pvr_emit_vdm_index_list(struct pvr_cmd_buffer *cmd_buffer,
return;
}
pvr_csb_set_relocation_mark(csb);
pvr_csb_emit (csb, VDMCTRL_INDEX_LIST0, list0) {
list0 = list_hdr;
}
@ -6612,6 +6657,8 @@ static void pvr_emit_vdm_index_list(struct pvr_cmd_buffer *cmd_buffer,
list4.index_offset = index_offset;
}
}
pvr_csb_clear_relocation_mark(csb);
}
void pvr_CmdDraw(VkCommandBuffer commandBuffer,

View file

@ -79,6 +79,12 @@ void pvr_csb_init(struct pvr_device *device,
csb->next = NULL;
csb->pvr_bo = NULL;
csb->end = NULL;
csb->relocation_mark = NULL;
#if defined(DEBUG)
csb->relocation_mark_status = PVR_CSB_RELOCATION_MARK_UNINITIALIZED;
#endif
csb->device = device;
csb->stream_type = stream_type;
csb->status = VK_SUCCESS;
@ -98,6 +104,10 @@ void pvr_csb_init(struct pvr_device *device,
*/
void pvr_csb_finish(struct pvr_csb *csb)
{
#if defined(DEBUG)
assert(csb->relocation_mark_status == PVR_CSB_RELOCATION_MARK_CLEARED);
#endif
if (csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS_DEFERRED) {
util_dynarray_fini(&csb->deferred_cs_mem);
} else {
@ -144,6 +154,56 @@ VkResult pvr_csb_bake(struct pvr_csb *const csb,
return VK_SUCCESS;
}
/**
* \brief Adds VDMCTRL_STREAM_LINK/CDMCTRL_STREAM_LINK dwords into the control
* stream pointed by csb object without setting a relocation mark.
*
* \warning This does not set the relocation mark.
*
* \param[in] csb Control Stream Builder object to add LINK dwords to.
* \param[in] addr Device virtual address of the sub control stream to link to.
* \param[in] ret Selects whether the sub control stream will return or
* terminate.
*/
static void
pvr_csb_emit_link_unmarked(struct pvr_csb *csb, pvr_dev_addr_t addr, bool ret)
{
/* Not supported for deferred control stream. */
assert(csb->stream_type != PVR_CMD_STREAM_TYPE_GRAPHICS_DEFERRED);
/* Stream return is only supported for graphics control stream. */
assert(!ret || csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS);
switch (csb->stream_type) {
case PVR_CMD_STREAM_TYPE_GRAPHICS:
pvr_csb_emit (csb, VDMCTRL_STREAM_LINK0, link) {
link.link_addrmsb = addr;
link.with_return = ret;
}
pvr_csb_emit (csb, VDMCTRL_STREAM_LINK1, link) {
link.link_addrlsb = addr;
}
break;
case PVR_CMD_STREAM_TYPE_COMPUTE:
pvr_csb_emit (csb, CDMCTRL_STREAM_LINK0, link) {
link.link_addrmsb = addr;
}
pvr_csb_emit (csb, CDMCTRL_STREAM_LINK1, link) {
link.link_addrlsb = addr;
}
break;
default:
unreachable("Unknown stream type");
break;
}
}
/**
* \brief Helper function to extend csb memory.
*
@ -196,12 +256,30 @@ static bool pvr_csb_buffer_extend(struct pvr_csb *csb)
return false;
}
/* Chain to the old BO if this is not the first BO in csb */
/* if this is not the first BO in csb */
if (csb->pvr_bo) {
const size_t current_state_update_size =
(uint8_t *)csb->next - (uint8_t *)csb->relocation_mark;
void *new_buffer = pvr_bo->bo->map;
assert(csb->relocation_mark != NULL);
assert(csb->next >= csb->relocation_mark);
memcpy(new_buffer, csb->relocation_mark, current_state_update_size);
#if defined(DEBUG)
assert(csb->relocation_mark_status == PVR_CSB_RELOCATION_MARK_SET);
csb->relocation_mark_status = PVR_CSB_RELOCATION_MARK_SET_AND_CONSUMED;
memset(csb->relocation_mark, 0, current_state_update_size);
#endif
csb->next = csb->relocation_mark;
csb->end += stream_link_space;
assert(csb->next + stream_link_space <= csb->end);
pvr_csb_emit_link(csb, pvr_bo->vma->dev_addr, false);
pvr_csb_emit_link_unmarked(csb, pvr_bo->vma->dev_addr, false);
}
csb->pvr_bo = pvr_bo;
@ -245,6 +323,11 @@ void *pvr_csb_alloc_dwords(struct pvr_csb *csb, uint32_t num_dwords)
return p;
}
#if defined(DEBUG)
if (csb->relocation_mark_status == PVR_CSB_RELOCATION_MARK_CLEARED)
mesa_logd_once("CS memory without relocation mark detected.");
#endif
if (csb->next + required_space > csb->end) {
bool ret = pvr_csb_buffer_extend(csb);
if (!ret)
@ -327,40 +410,9 @@ VkResult pvr_csb_copy(struct pvr_csb *csb_dst, struct pvr_csb *csb_src)
*/
void pvr_csb_emit_link(struct pvr_csb *csb, pvr_dev_addr_t addr, bool ret)
{
/* Not supported for deferred control stream. */
assert(csb->stream_type != PVR_CMD_STREAM_TYPE_GRAPHICS_DEFERRED);
/* Stream return is only supported for graphics control stream. */
assert(!ret || csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS);
switch (csb->stream_type) {
case PVR_CMD_STREAM_TYPE_GRAPHICS:
pvr_csb_emit (csb, VDMCTRL_STREAM_LINK0, link) {
link.link_addrmsb = addr;
link.with_return = ret;
}
pvr_csb_emit (csb, VDMCTRL_STREAM_LINK1, link) {
link.link_addrlsb = addr;
}
break;
case PVR_CMD_STREAM_TYPE_COMPUTE:
pvr_csb_emit (csb, CDMCTRL_STREAM_LINK0, link) {
link.link_addrmsb = addr;
}
pvr_csb_emit (csb, CDMCTRL_STREAM_LINK1, link) {
link.link_addrlsb = addr;
}
break;
default:
unreachable("Unknown stream type");
break;
}
pvr_csb_set_relocation_mark(csb);
pvr_csb_emit_link_unmarked(csb, addr, ret);
pvr_csb_clear_relocation_mark(csb);
}
/**
@ -377,9 +429,11 @@ VkResult pvr_csb_emit_return(struct pvr_csb *csb)
assert(csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS ||
csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS_DEFERRED);
pvr_csb_set_relocation_mark(csb);
/* clang-format off */
pvr_csb_emit(csb, VDMCTRL_STREAM_RETURN, ret);
/* clang-format on */
pvr_csb_clear_relocation_mark(csb);
return csb->status;
}
@ -394,6 +448,8 @@ VkResult pvr_csb_emit_return(struct pvr_csb *csb)
*/
VkResult pvr_csb_emit_terminate(struct pvr_csb *csb)
{
pvr_csb_set_relocation_mark(csb);
switch (csb->stream_type) {
case PVR_CMD_STREAM_TYPE_GRAPHICS:
/* clang-format off */
@ -412,5 +468,7 @@ VkResult pvr_csb_emit_terminate(struct pvr_csb *csb)
break;
}
pvr_csb_clear_relocation_mark(csb);
return csb->status;
}

View file

@ -75,6 +75,27 @@ struct pvr_csb {
void *end;
void *next;
/* When extending the control stream we can't break state updates across bos.
* This indicates where the current state update starts, so that it can be
* be relocated into the new bo without breaking the update.
*/
void *relocation_mark;
#if defined(DEBUG)
/* Used to track the state of the `relocation_mark` and to catch cases where
* the driver might have emitted to the cs without using the
* `relocation_mark`. Doing so is mostly harmless but will waste memory in
* case the cs is extended while an untracked state update is emitted, as
* we'll have to relocate the cs contents from the last tracked state update
* instead of just the one currently being emitted.
*/
enum pvr_csb_relocation_mark_status {
PVR_CSB_RELOCATION_MARK_UNINITIALIZED,
PVR_CSB_RELOCATION_MARK_SET,
PVR_CSB_RELOCATION_MARK_SET_AND_CONSUMED,
PVR_CSB_RELOCATION_MARK_CLEARED,
} relocation_mark_status;
#endif
/* List of csb buffer objects */
struct list_head pvr_bo_list;
@ -127,6 +148,90 @@ pvr_csb_get_start_address(const struct pvr_csb *csb)
return PVR_DEV_ADDR_INVALID;
}
/** \defgroup CSB relocation marking.
* Functions and macros related to relocation marking for control stream words.
*
* When there is no more space left in the current bo, csb needs has to extend
* the control stream by allocating a new bo and emitting a link to it. State
* updates have to be contiguous so cannot be broken by a link. Thus csb copies
* the current, in construction, state update into the new bo and emits a link
* in its place in the old bo. To do so however, it needs a hint from the driver
* to determine where the current state update started from, so a relocation
* mark is used.
*
* List of words demarking the beginning of state updates (i.e. state update
* headers):
* - ROGUE_VDMCTRL_PPP_STATE0
* - ROGUE_VDMCTRL_PDS_STATE0
* - ROGUE_VDMCTRL_VDM_STATE0
* - ROGUE_VDMCTRL_INDEX_LIST0
* - ROGUE_VDMCTRL_STREAM_LINK0
* - ROGUE_VDMCTRL_STREAM_RETURN
* - ROGUE_VDMCTRL_STREAM_TERMINATE
*
* - ROGUE_CDMCTRL_KERNEL0
* - ROGUE_CDMCTRL_STREAM_LINK0
* - ROGUE_CDMCTRL_STREAM_TERMINATE
*
* The driver should set the relocation mark whenever a new state update is
* started. And clear it when the state update is fully formed.
*
* PVR_CSB_RELOCATION_MARK state machine:
*
* UNINITIALIZED
*
* SET
*
* SET_AND_CONSUMED
*
* CLEARED
*
*
* @{
*/
/* TODO: Add in the IPF transfer control stream state updates to the list once
* csb gets used for it
*/
/**
* \brief Set the relocation mark.
*
* Indicates to csb that on cs extension it should relocate all words, starting
* from now, into the new bo.
*/
static inline void pvr_csb_set_relocation_mark(struct pvr_csb *csb)
{
#if defined(DEBUG)
assert(csb->relocation_mark_status ==
PVR_CSB_RELOCATION_MARK_UNINITIALIZED ||
csb->relocation_mark_status == PVR_CSB_RELOCATION_MARK_CLEARED);
csb->relocation_mark_status = PVR_CSB_RELOCATION_MARK_SET;
#endif
csb->relocation_mark = csb->next;
}
/**
* \brief Clear the relocation mark.
*
* Indicate to csb that the state update is fully formed so it doesn't need to
* relocate it in case of cs extension.
*/
static inline void pvr_csb_clear_relocation_mark(UNUSED struct pvr_csb *csb)
{
#if defined(DEBUG)
assert(csb->relocation_mark_status == PVR_CSB_RELOCATION_MARK_SET ||
csb->relocation_mark_status ==
PVR_CSB_RELOCATION_MARK_SET_AND_CONSUMED);
csb->relocation_mark_status = PVR_CSB_RELOCATION_MARK_CLEARED;
#endif
}
/** @} */
/* End of \defgroup CSB relocation marking. */
void pvr_csb_init(struct pvr_device *device,
enum pvr_cmd_stream_type stream_type,
struct pvr_csb *csb);