pvr: Implement vkCmdExecuteCommands API.

Signed-off-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com>
Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18871>
This commit is contained in:
Rajnesh Kanwal 2022-09-21 16:19:45 +01:00
parent 39ace3a0e6
commit 3047195c62
2 changed files with 337 additions and 29 deletions

View file

@ -83,36 +83,37 @@ struct pvr_compute_kernel_info {
static void pvr_cmd_buffer_free_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
struct pvr_sub_cmd *sub_cmd)
{
switch (sub_cmd->type) {
case PVR_SUB_CMD_TYPE_GRAPHICS:
pvr_csb_finish(&sub_cmd->gfx.control_stream);
pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.depth_bias_bo);
pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.scissor_bo);
break;
if (sub_cmd->owned) {
switch (sub_cmd->type) {
case PVR_SUB_CMD_TYPE_GRAPHICS:
pvr_csb_finish(&sub_cmd->gfx.control_stream);
pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.depth_bias_bo);
pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.scissor_bo);
break;
case PVR_SUB_CMD_TYPE_COMPUTE:
pvr_csb_finish(&sub_cmd->compute.control_stream);
break;
case PVR_SUB_CMD_TYPE_COMPUTE:
pvr_csb_finish(&sub_cmd->compute.control_stream);
break;
case PVR_SUB_CMD_TYPE_TRANSFER:
list_for_each_entry_safe (struct pvr_transfer_cmd,
transfer_cmd,
&sub_cmd->transfer.transfer_cmds,
link) {
list_del(&transfer_cmd->link);
vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
case PVR_SUB_CMD_TYPE_TRANSFER:
list_for_each_entry_safe (struct pvr_transfer_cmd,
transfer_cmd,
&sub_cmd->transfer.transfer_cmds,
link) {
list_del(&transfer_cmd->link);
vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
}
break;
case PVR_SUB_CMD_TYPE_EVENT:
if (sub_cmd->event.type == PVR_EVENT_TYPE_WAIT)
vk_free(&cmd_buffer->vk.pool->alloc, sub_cmd->event.wait.events);
break;
default:
pvr_finishme("Unsupported sub-command type %d", sub_cmd->type);
break;
}
break;
case PVR_SUB_CMD_TYPE_EVENT:
if (sub_cmd->event.type == PVR_EVENT_TYPE_WAIT)
vk_free(&cmd_buffer->vk.pool->alloc, sub_cmd->event.wait.events);
break;
default:
pvr_finishme("Unsupported sub-command type %d", sub_cmd->type);
break;
}
list_del(&sub_cmd->link);
@ -1502,6 +1503,11 @@ static VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer)
if (!sub_cmd)
return VK_SUCCESS;
if (!sub_cmd->owned) {
state->current_sub_cmd = NULL;
return VK_SUCCESS;
}
switch (sub_cmd->type) {
case PVR_SUB_CMD_TYPE_GRAPHICS: {
struct pvr_sub_cmd_gfx *const gfx_sub_cmd = &sub_cmd->gfx;
@ -1636,7 +1642,7 @@ static void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer_state *state,
}
static inline bool
pvr_cmd_uses_deferred_cs_cmds(struct pvr_cmd_buffer *const cmd_buffer)
pvr_cmd_uses_deferred_cs_cmds(const struct pvr_cmd_buffer *const cmd_buffer)
{
const VkCommandBufferUsageFlags deferred_control_stream_flags =
VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT &
@ -1683,6 +1689,7 @@ static VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
}
sub_cmd->type = type;
sub_cmd->owned = true;
switch (type) {
case PVR_SUB_CMD_TYPE_GRAPHICS:
@ -5639,11 +5646,306 @@ void pvr_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
state->render_pass_info.clear_values = clear_values;
}
static VkResult
pvr_execute_deferred_cmd_buffer(struct pvr_cmd_buffer *cmd_buffer,
const struct pvr_cmd_buffer *sec_cmd_buffer)
{
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
const uint32_t prim_db_elems =
util_dynarray_num_elements(&cmd_buffer->depth_bias_array,
__typeof__(state->dynamic.common.depth_bias));
const uint32_t prim_scissor_elems =
util_dynarray_num_elements(&cmd_buffer->scissor_array,
__typeof__(cmd_buffer->scissor_words));
const uint32_t sec_db_size =
util_dynarray_num_elements(&sec_cmd_buffer->depth_bias_array, char);
const uint32_t sec_scissor_size =
util_dynarray_num_elements(&sec_cmd_buffer->scissor_array, char);
uint32_t *addr;
util_dynarray_foreach (&sec_cmd_buffer->deferred_csb_commands,
struct pvr_deferred_cs_command,
cmd) {
switch (cmd->type) {
case PVR_DEFERRED_CS_COMMAND_TYPE_DBSC: {
const uint32_t scissor_idx =
prim_scissor_elems + cmd->dbsc.state.scissor_index;
const uint32_t db_idx =
prim_db_elems + cmd->dbsc.state.depthbias_index;
const uint32_t num_dwords =
pvr_cmd_length(TA_STATE_HEADER) + pvr_cmd_length(TA_STATE_ISPDBSC);
uint32_t ppp_state[num_dwords];
struct pvr_bo *pvr_bo;
VkResult result;
pvr_csb_pack (&ppp_state[0], TA_STATE_HEADER, header) {
header.pres_ispctl_dbsc = true;
}
pvr_csb_pack (&ppp_state[1], TA_STATE_ISPDBSC, ispdbsc) {
ispdbsc.dbindex = db_idx;
ispdbsc.scindex = scissor_idx;
}
result = pvr_cmd_buffer_upload_general(cmd_buffer,
&ppp_state[0],
sizeof(ppp_state),
&pvr_bo);
if (result != VK_SUCCESS)
return result;
pvr_csb_pack (&cmd->dbsc.vdm_state[0], VDMCTRL_PPP_STATE0, state) {
state.word_count = num_dwords;
state.addrmsb = pvr_bo->vma->dev_addr;
}
pvr_csb_pack (&cmd->dbsc.vdm_state[1], VDMCTRL_PPP_STATE1, state) {
state.addrlsb = pvr_bo->vma->dev_addr;
}
break;
}
case PVR_DEFERRED_CS_COMMAND_TYPE_DBSC2: {
const uint32_t scissor_idx =
prim_scissor_elems + cmd->dbsc2.state.scissor_index;
const uint32_t db_idx =
prim_db_elems + cmd->dbsc2.state.depthbias_index;
assert(cmd->dbsc2.ppp_cs_bo->bo->map);
addr = cmd->dbsc2.ppp_cs_bo->bo->map + cmd->dbsc2.patch_offset;
pvr_csb_pack (addr, TA_STATE_ISPDBSC, ispdbsc) {
ispdbsc.dbindex = db_idx;
ispdbsc.scindex = scissor_idx;
}
break;
}
default:
unreachable("Invalid deferred control stream command type.");
break;
}
}
addr =
util_dynarray_grow_bytes(&cmd_buffer->depth_bias_array, 1, sec_db_size);
memcpy(addr,
util_dynarray_begin(&sec_cmd_buffer->depth_bias_array),
sec_db_size);
addr =
util_dynarray_grow_bytes(&cmd_buffer->scissor_array, 1, sec_scissor_size);
memcpy(addr,
util_dynarray_begin(&sec_cmd_buffer->scissor_array),
sec_scissor_size);
cmd_buffer->state.dirty.depth_bias = true;
memset(&cmd_buffer->scissor_words[0], 0, sizeof(cmd_buffer->scissor_words));
return VK_SUCCESS;
}
/* Caller needs to make sure that it ends the current sub_cmd. This function
* only creates a copy of sec_sub_cmd and links it to the cmd_buffer's
* sub_cmd list.
*/
static VkResult pvr_execute_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
struct pvr_sub_cmd *sec_sub_cmd)
{
struct pvr_sub_cmd *primary_sub_cmd =
vk_zalloc(&cmd_buffer->vk.pool->alloc,
sizeof(*primary_sub_cmd),
8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!primary_sub_cmd) {
cmd_buffer->state.status =
vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
return cmd_buffer->state.status;
}
primary_sub_cmd->type = sec_sub_cmd->type;
primary_sub_cmd->owned = false;
primary_sub_cmd->flags = sec_sub_cmd->flags;
list_addtail(&primary_sub_cmd->link, &cmd_buffer->sub_cmds);
switch (sec_sub_cmd->type) {
case PVR_SUB_CMD_TYPE_GRAPHICS:
primary_sub_cmd->gfx = sec_sub_cmd->gfx;
break;
case PVR_SUB_CMD_TYPE_COMPUTE:
primary_sub_cmd->compute = sec_sub_cmd->compute;
break;
case PVR_SUB_CMD_TYPE_TRANSFER:
primary_sub_cmd->transfer = sec_sub_cmd->transfer;
break;
case PVR_SUB_CMD_TYPE_EVENT:
primary_sub_cmd->event = sec_sub_cmd->event;
break;
default:
mesa_loge("Unsupported sub-command type %d", primary_sub_cmd->type);
break;
}
return VK_SUCCESS;
}
static void
pvr_execute_graphics_cmd_buffer(struct pvr_cmd_buffer *cmd_buffer,
const struct pvr_cmd_buffer *sec_cmd_buffer)
{
const struct pvr_device_info *dev_info =
&cmd_buffer->device->pdevice->dev_info;
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
struct pvr_sub_cmd *primary_sub_cmd = state->current_sub_cmd;
VkResult result;
if (list_is_empty(&sec_cmd_buffer->sub_cmds))
return;
list_for_each_entry (struct pvr_sub_cmd,
sec_sub_cmd,
&sec_cmd_buffer->sub_cmds,
link) {
/* Only graphics secondary execution supported within a renderpass. */
assert(sec_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
if (!sec_sub_cmd->gfx.empty_cmd)
primary_sub_cmd->gfx.empty_cmd = false;
if (pvr_cmd_uses_deferred_cs_cmds(sec_cmd_buffer)) {
/* TODO: In case if secondary buffer is created with
* VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT, then we patch the
* stream and copy it to primary stream using pvr_csb_copy below.
* This will need locking if the same secondary command buffer is
* executed in multiple primary buffers at the same time.
*/
result = pvr_execute_deferred_cmd_buffer(cmd_buffer, sec_cmd_buffer);
if (result != VK_SUCCESS)
return;
pvr_csb_copy(&primary_sub_cmd->gfx.control_stream,
&sec_sub_cmd->gfx.control_stream);
} else {
result = pvr_execute_deferred_cmd_buffer(cmd_buffer, sec_cmd_buffer);
if (result != VK_SUCCESS)
return;
pvr_csb_emit_link(
&primary_sub_cmd->gfx.control_stream,
pvr_csb_get_start_address(&sec_sub_cmd->gfx.control_stream),
true);
}
if (PVR_HAS_FEATURE(&cmd_buffer->device->pdevice->dev_info,
compute_overlap)) {
primary_sub_cmd->gfx.job.disable_compute_overlap |=
sec_sub_cmd->gfx.job.disable_compute_overlap;
}
primary_sub_cmd->gfx.max_tiles_in_flight =
MIN2(primary_sub_cmd->gfx.max_tiles_in_flight,
sec_sub_cmd->gfx.max_tiles_in_flight);
/* Pass loaded depth/stencil usage from secondary command buffer. */
if (sec_sub_cmd->gfx.depth_usage == PVR_DEPTH_STENCIL_USAGE_NEEDED)
primary_sub_cmd->gfx.depth_usage = PVR_DEPTH_STENCIL_USAGE_NEEDED;
if (sec_sub_cmd->gfx.stencil_usage == PVR_DEPTH_STENCIL_USAGE_NEEDED)
primary_sub_cmd->gfx.stencil_usage = PVR_DEPTH_STENCIL_USAGE_NEEDED;
/* Pass depth/stencil modification state from secondary command buffer. */
if (sec_sub_cmd->gfx.modifies_depth)
primary_sub_cmd->gfx.modifies_depth = true;
if (sec_sub_cmd->gfx.modifies_stencil)
primary_sub_cmd->gfx.modifies_stencil = true;
pvr_finishme(
"Copy barrier related load/store information from secondary to primary command buffer.");
if (!PVR_HAS_FEATURE(dev_info, gs_rta_support))
pvr_finishme("Unimplemented path.");
}
}
void pvr_CmdExecuteCommands(VkCommandBuffer commandBuffer,
uint32_t commandBufferCount,
const VkCommandBuffer *pCommandBuffers)
{
assert(!"Unimplemented");
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
struct pvr_cmd_buffer *last_cmd_buffer;
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
/* Reset the CPU copy of the most recent PPP state of the primary command
* buffer.
*
* The next draw call in the primary after CmdExecuteCommands may send
* redundant state, if it all goes in the same geom job.
*
* Can't just copy state from the secondary because the recording state of
* the secondary command buffers would have been deleted at this point.
*/
pvr_reset_graphics_dirty_state(state, false);
if (state->current_sub_cmd &&
state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS) {
for (uint32_t i = 0; i < commandBufferCount; i++) {
PVR_FROM_HANDLE(pvr_cmd_buffer, sec_cmd_buffer, pCommandBuffers[i]);
assert(sec_cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
pvr_execute_graphics_cmd_buffer(cmd_buffer, sec_cmd_buffer);
}
last_cmd_buffer =
pvr_cmd_buffer_from_handle(pCommandBuffers[commandBufferCount - 1]);
/* Set barriers from final command secondary command buffer. */
for (uint32_t i = 0; i != PVR_NUM_SYNC_PIPELINE_STAGES; i++) {
state->barriers_needed[i] |=
last_cmd_buffer->state.barriers_needed[i] &
PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS;
}
} else {
for (uint32_t i = 0; i < commandBufferCount; i++) {
PVR_FROM_HANDLE(pvr_cmd_buffer, sec_cmd_buffer, pCommandBuffers[i]);
VkResult result;
assert(sec_cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
if (result != VK_SUCCESS)
return;
list_for_each_entry_safe (struct pvr_sub_cmd,
sec_sub_cmd,
&sec_cmd_buffer->sub_cmds,
link) {
result = pvr_execute_sub_cmd(cmd_buffer, sec_sub_cmd);
if (result != VK_SUCCESS)
return;
}
}
last_cmd_buffer =
pvr_cmd_buffer_from_handle(pCommandBuffers[commandBufferCount - 1]);
memcpy(state->barriers_needed,
last_cmd_buffer->state.barriers_needed,
sizeof(state->barriers_needed));
}
}
static void pvr_insert_transparent_obj(struct pvr_cmd_buffer *const cmd_buffer,

View file

@ -775,6 +775,12 @@ struct pvr_sub_cmd {
enum pvr_sub_command_flags flags;
/* True if the sub_cmd is owned by this command buffer. False if taken from
* a secondary command buffer, in that case we are not supposed to free any
* resources associated with the sub_cmd.
*/
bool owned;
union {
struct pvr_sub_cmd_gfx gfx;
struct pvr_sub_cmd_compute compute;