mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 22:38:05 +02:00
pvr: Implement vkCmdDrawIndirect API.
Signed-off-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com> Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18486>
This commit is contained in:
parent
e14c418862
commit
cdf986a3e6
1 changed files with 293 additions and 53 deletions
|
|
@ -4769,6 +4769,187 @@ static uint32_t pvr_get_hw_primitive_topology(VkPrimitiveTopology topology)
|
|||
}
|
||||
}
|
||||
|
||||
/* TODO: Rewrite this in terms of ALIGN_POT() and pvr_cmd_length(). */
|
||||
/* Aligned to 128 bit for PDS loads / stores */
|
||||
#define DUMMY_VDM_CONTROL_STREAM_BLOCK_SIZE 8
|
||||
|
||||
static VkResult
|
||||
pvr_write_draw_indirect_vdm_stream(struct pvr_cmd_buffer *cmd_buffer,
|
||||
struct pvr_csb *const csb,
|
||||
pvr_dev_addr_t idx_buffer_addr,
|
||||
uint32_t idx_stride,
|
||||
struct PVRX(VDMCTRL_INDEX_LIST0) * list_hdr,
|
||||
struct pvr_buffer *buffer,
|
||||
VkDeviceSize offset,
|
||||
uint32_t count,
|
||||
uint32_t stride)
|
||||
{
|
||||
struct pvr_pds_drawindirect_program pds_prog = { 0 };
|
||||
uint32_t word0;
|
||||
|
||||
/* Draw indirect always has index offset and instance count. */
|
||||
list_hdr->index_offset_present = true;
|
||||
list_hdr->index_instance_count_present = true;
|
||||
|
||||
pvr_cmd_pack(VDMCTRL_INDEX_LIST0)(&word0, list_hdr);
|
||||
|
||||
pds_prog.support_base_instance = true;
|
||||
pds_prog.arg_buffer = buffer->dev_addr.addr + offset;
|
||||
pds_prog.index_buffer = idx_buffer_addr.addr;
|
||||
pds_prog.index_block_header = word0;
|
||||
pds_prog.index_stride = idx_stride;
|
||||
pds_prog.num_views = 1U;
|
||||
|
||||
/* TODO: See if we can pre-upload the code section of all the pds programs
|
||||
* and reuse them here.
|
||||
*/
|
||||
/* Generate and upload the PDS programs (code + data). */
|
||||
for (uint32_t i = 0U; i < count; i++) {
|
||||
const struct pvr_device_info *dev_info =
|
||||
&cmd_buffer->device->pdevice->dev_info;
|
||||
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
|
||||
struct pvr_bo *dummy_bo;
|
||||
uint32_t *dummy_stream;
|
||||
struct pvr_bo *pds_bo;
|
||||
uint32_t *pds_base;
|
||||
uint32_t pds_size;
|
||||
VkResult result;
|
||||
|
||||
pds_prog.increment_draw_id = (i != 0);
|
||||
|
||||
if (state->draw_state.draw_indexed) {
|
||||
pvr_pds_generate_draw_elements_indirect(&pds_prog,
|
||||
0,
|
||||
PDS_GENERATE_SIZES,
|
||||
dev_info);
|
||||
} else {
|
||||
pvr_pds_generate_draw_arrays_indirect(&pds_prog,
|
||||
0,
|
||||
PDS_GENERATE_SIZES,
|
||||
dev_info);
|
||||
}
|
||||
|
||||
pds_size = (pds_prog.program.data_size_aligned +
|
||||
pds_prog.program.code_size_aligned)
|
||||
<< 2;
|
||||
|
||||
result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
|
||||
cmd_buffer->device->heaps.pds_heap,
|
||||
pds_size,
|
||||
PVR_BO_ALLOC_FLAG_CPU_MAPPED,
|
||||
&pds_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
pds_base = pds_bo->bo->map;
|
||||
memcpy(pds_base,
|
||||
pds_prog.program.code,
|
||||
pds_prog.program.code_size_aligned << 2);
|
||||
|
||||
if (state->draw_state.draw_indexed) {
|
||||
pvr_pds_generate_draw_elements_indirect(
|
||||
&pds_prog,
|
||||
pds_base + pds_prog.program.code_size_aligned,
|
||||
PDS_GENERATE_DATA_SEGMENT,
|
||||
dev_info);
|
||||
} else {
|
||||
pvr_pds_generate_draw_arrays_indirect(
|
||||
&pds_prog,
|
||||
pds_base + pds_prog.program.code_size_aligned,
|
||||
PDS_GENERATE_DATA_SEGMENT,
|
||||
dev_info);
|
||||
}
|
||||
|
||||
pvr_bo_cpu_unmap(cmd_buffer->device, pds_bo);
|
||||
|
||||
/* Write the VDM state update. */
|
||||
pvr_csb_emit (csb, VDMCTRL_PDS_STATE0, state0) {
|
||||
state0.usc_target = PVRX(VDMCTRL_USC_TARGET_ANY);
|
||||
|
||||
state0.pds_temp_size =
|
||||
DIV_ROUND_UP(pds_prog.program.temp_size_aligned << 2,
|
||||
PVRX(VDMCTRL_PDS_STATE0_PDS_TEMP_SIZE_UNIT_SIZE));
|
||||
|
||||
state0.pds_data_size =
|
||||
DIV_ROUND_UP(pds_prog.program.data_size_aligned << 2,
|
||||
PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE));
|
||||
}
|
||||
|
||||
pvr_csb_emit (csb, VDMCTRL_PDS_STATE1, state1) {
|
||||
const uint32_t data_offset =
|
||||
pds_bo->vma->dev_addr.addr + (pds_prog.program.code_size << 2) -
|
||||
cmd_buffer->device->heaps.pds_heap->base_addr.addr;
|
||||
|
||||
state1.pds_data_addr = PVR_DEV_ADDR(data_offset);
|
||||
state1.sd_type = PVRX(VDMCTRL_SD_TYPE_PDS);
|
||||
state1.sd_next_type = PVRX(VDMCTRL_SD_TYPE_NONE);
|
||||
}
|
||||
|
||||
pvr_csb_emit (csb, VDMCTRL_PDS_STATE2, state2) {
|
||||
const uint32_t code_offset =
|
||||
pds_bo->vma->dev_addr.addr -
|
||||
cmd_buffer->device->heaps.pds_heap->base_addr.addr;
|
||||
|
||||
state2.pds_code_addr = PVR_DEV_ADDR(code_offset);
|
||||
}
|
||||
|
||||
/* Sync task to ensure the VDM doesn't start reading the dummy blocks
|
||||
* before they are ready.
|
||||
*/
|
||||
pvr_csb_emit (csb, VDMCTRL_INDEX_LIST0, list0) {
|
||||
list0.primitive_topology = PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_LIST);
|
||||
}
|
||||
|
||||
result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
|
||||
cmd_buffer->device->heaps.general_heap,
|
||||
DUMMY_VDM_CONTROL_STREAM_BLOCK_SIZE,
|
||||
PVR_BO_ALLOC_FLAG_CPU_MAPPED,
|
||||
&dummy_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
dummy_stream = dummy_bo->bo->map;
|
||||
|
||||
/* For indexed draw cmds fill in the dummy's header (as it won't change
|
||||
* based on the indirect args) and increment by the in-use size of each
|
||||
* dummy block.
|
||||
*/
|
||||
if (!state->draw_state.draw_indexed) {
|
||||
dummy_stream[0] = word0;
|
||||
dummy_stream += 4;
|
||||
} else {
|
||||
dummy_stream += 5;
|
||||
}
|
||||
|
||||
/* clang-format off */
|
||||
pvr_csb_pack (dummy_stream, VDMCTRL_STREAM_RETURN, word);
|
||||
/* clang-format on */
|
||||
|
||||
pvr_bo_cpu_unmap(cmd_buffer->device, dummy_bo);
|
||||
|
||||
/* Stream link to the first dummy which forces the VDM to discard any
|
||||
* prefetched (dummy) control stream.
|
||||
*/
|
||||
pvr_csb_emit (csb, VDMCTRL_STREAM_LINK0, link) {
|
||||
link.with_return = true;
|
||||
link.link_addrmsb = dummy_bo->vma->dev_addr;
|
||||
}
|
||||
|
||||
pvr_csb_emit (csb, VDMCTRL_STREAM_LINK1, link) {
|
||||
link.link_addrlsb = dummy_bo->vma->dev_addr;
|
||||
}
|
||||
|
||||
/* Point the pds program to the next argument buffer and the next VDM
|
||||
* dummy buffer.
|
||||
*/
|
||||
pds_prog.arg_buffer += stride;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
#undef DUMMY_VDM_CONTROL_STREAM_BLOCK_SIZE
|
||||
|
||||
static void pvr_emit_vdm_index_list(struct pvr_cmd_buffer *cmd_buffer,
|
||||
struct pvr_sub_cmd_gfx *const sub_cmd,
|
||||
VkPrimitiveTopology topology,
|
||||
|
|
@ -4776,64 +4957,88 @@ static void pvr_emit_vdm_index_list(struct pvr_cmd_buffer *cmd_buffer,
|
|||
uint32_t vertex_count,
|
||||
uint32_t first_index,
|
||||
uint32_t index_count,
|
||||
uint32_t instance_count)
|
||||
uint32_t instance_count,
|
||||
struct pvr_buffer *buffer,
|
||||
VkDeviceSize offset,
|
||||
uint32_t count,
|
||||
uint32_t stride)
|
||||
{
|
||||
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
|
||||
struct pvr_csb *const csb = &sub_cmd->control_stream;
|
||||
const bool vertex_shader_has_side_effects =
|
||||
state->gfx_pipeline->vertex_shader_state.stage_state.has_side_effects;
|
||||
struct PVRX(VDMCTRL_INDEX_LIST0)
|
||||
list_hdr = { pvr_cmd_header(VDMCTRL_INDEX_LIST0) };
|
||||
pvr_dev_addr_t index_buffer_addr = PVR_DEV_ADDR_INVALID;
|
||||
struct pvr_csb *const csb = &sub_cmd->control_stream;
|
||||
unsigned int index_stride = 0;
|
||||
|
||||
pvr_csb_emit (csb, VDMCTRL_INDEX_LIST0, list0) {
|
||||
const bool vertex_shader_has_side_effects =
|
||||
cmd_buffer->state.gfx_pipeline->vertex_shader_state.stage_state
|
||||
.has_side_effects;
|
||||
list_hdr.primitive_topology = pvr_get_hw_primitive_topology(topology);
|
||||
|
||||
list0.primitive_topology = pvr_get_hw_primitive_topology(topology);
|
||||
/* firstInstance is not handled here in the VDM state, it's implemented as
|
||||
* an addition in the PDS vertex fetch using
|
||||
* PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE entry type.
|
||||
*/
|
||||
|
||||
/* First instance is not handled in the VDM state, it's implemented as
|
||||
* an addition in the PDS vertex fetch.
|
||||
*/
|
||||
list0.index_count_present = true;
|
||||
list_hdr.index_count_present = true;
|
||||
|
||||
if (instance_count > 1)
|
||||
list0.index_instance_count_present = true;
|
||||
if (instance_count > 1)
|
||||
list_hdr.index_instance_count_present = true;
|
||||
|
||||
if (first_vertex != 0)
|
||||
list0.index_offset_present = true;
|
||||
if (first_vertex != 0)
|
||||
list_hdr.index_offset_present = true;
|
||||
|
||||
if (state->draw_state.draw_indexed) {
|
||||
struct pvr_buffer *buffer = state->index_buffer_binding.buffer;
|
||||
if (state->draw_state.draw_indexed) {
|
||||
struct pvr_buffer *buffer = state->index_buffer_binding.buffer;
|
||||
|
||||
switch (state->index_buffer_binding.type) {
|
||||
case VK_INDEX_TYPE_UINT32:
|
||||
list0.index_size = PVRX(VDMCTRL_INDEX_SIZE_B32);
|
||||
index_stride = 4;
|
||||
break;
|
||||
switch (state->index_buffer_binding.type) {
|
||||
case VK_INDEX_TYPE_UINT32:
|
||||
list_hdr.index_size = PVRX(VDMCTRL_INDEX_SIZE_B32);
|
||||
index_stride = 4;
|
||||
break;
|
||||
|
||||
case VK_INDEX_TYPE_UINT16:
|
||||
list0.index_size = PVRX(VDMCTRL_INDEX_SIZE_B16);
|
||||
index_stride = 2;
|
||||
break;
|
||||
case VK_INDEX_TYPE_UINT16:
|
||||
list_hdr.index_size = PVRX(VDMCTRL_INDEX_SIZE_B16);
|
||||
index_stride = 2;
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Invalid index type");
|
||||
}
|
||||
|
||||
list0.index_addr_present = true;
|
||||
index_buffer_addr = PVR_DEV_ADDR_OFFSET(
|
||||
buffer->dev_addr,
|
||||
state->index_buffer_binding.offset + first_index * index_stride);
|
||||
list0.index_base_addrmsb = index_buffer_addr;
|
||||
default:
|
||||
unreachable("Invalid index type");
|
||||
}
|
||||
|
||||
list0.degen_cull_enable =
|
||||
PVR_HAS_FEATURE(&cmd_buffer->device->pdevice->dev_info,
|
||||
vdm_degenerate_culling) &&
|
||||
!vertex_shader_has_side_effects;
|
||||
index_buffer_addr = PVR_DEV_ADDR_OFFSET(
|
||||
buffer->dev_addr,
|
||||
state->index_buffer_binding.offset + first_index * index_stride);
|
||||
|
||||
list_hdr = list0;
|
||||
list_hdr.index_addr_present = true;
|
||||
|
||||
/* For indirect draw calls, index buffer address is not embedded into VDM
|
||||
* control stream.
|
||||
*/
|
||||
if (!state->draw_state.draw_indirect)
|
||||
list_hdr.index_base_addrmsb = index_buffer_addr;
|
||||
}
|
||||
|
||||
list_hdr.degen_cull_enable =
|
||||
PVR_HAS_FEATURE(&cmd_buffer->device->pdevice->dev_info,
|
||||
vdm_degenerate_culling) &&
|
||||
!vertex_shader_has_side_effects;
|
||||
|
||||
if (state->draw_state.draw_indirect) {
|
||||
assert(buffer);
|
||||
pvr_write_draw_indirect_vdm_stream(cmd_buffer,
|
||||
csb,
|
||||
index_buffer_addr,
|
||||
index_stride,
|
||||
&list_hdr,
|
||||
buffer,
|
||||
offset,
|
||||
count,
|
||||
stride);
|
||||
return;
|
||||
}
|
||||
|
||||
pvr_csb_emit (csb, VDMCTRL_INDEX_LIST0, list0) {
|
||||
list0 = list_hdr;
|
||||
}
|
||||
|
||||
if (list_hdr.index_addr_present) {
|
||||
|
|
@ -4869,17 +5074,16 @@ void pvr_CmdDraw(VkCommandBuffer commandBuffer,
|
|||
uint32_t firstVertex,
|
||||
uint32_t firstInstance)
|
||||
{
|
||||
const struct pvr_cmd_buffer_draw_state draw_state = {
|
||||
.base_vertex = firstVertex,
|
||||
.base_instance = firstInstance,
|
||||
};
|
||||
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
|
||||
struct pvr_cmd_buffer_draw_state draw_state;
|
||||
VkResult result;
|
||||
|
||||
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
||||
|
||||
draw_state.base_vertex = firstVertex;
|
||||
draw_state.base_instance = firstInstance;
|
||||
draw_state.draw_indirect = false;
|
||||
draw_state.draw_indexed = false;
|
||||
pvr_update_draw_state(state, &draw_state);
|
||||
|
||||
result = pvr_validate_draw_state(cmd_buffer);
|
||||
|
|
@ -4894,7 +5098,11 @@ void pvr_CmdDraw(VkCommandBuffer commandBuffer,
|
|||
vertexCount,
|
||||
0U,
|
||||
0U,
|
||||
instanceCount);
|
||||
instanceCount,
|
||||
NULL,
|
||||
0U,
|
||||
0U,
|
||||
0U);
|
||||
}
|
||||
|
||||
void pvr_CmdDrawIndexed(VkCommandBuffer commandBuffer,
|
||||
|
|
@ -4904,17 +5112,17 @@ void pvr_CmdDrawIndexed(VkCommandBuffer commandBuffer,
|
|||
int32_t vertexOffset,
|
||||
uint32_t firstInstance)
|
||||
{
|
||||
const struct pvr_cmd_buffer_draw_state draw_state = {
|
||||
.base_vertex = vertexOffset,
|
||||
.base_instance = firstInstance,
|
||||
.draw_indexed = true,
|
||||
};
|
||||
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
|
||||
struct pvr_cmd_buffer_draw_state draw_state;
|
||||
VkResult result;
|
||||
|
||||
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
||||
|
||||
draw_state.base_vertex = vertexOffset;
|
||||
draw_state.base_instance = firstInstance;
|
||||
draw_state.draw_indirect = false;
|
||||
draw_state.draw_indexed = true;
|
||||
pvr_update_draw_state(state, &draw_state);
|
||||
|
||||
result = pvr_validate_draw_state(cmd_buffer);
|
||||
|
|
@ -4929,7 +5137,11 @@ void pvr_CmdDrawIndexed(VkCommandBuffer commandBuffer,
|
|||
0,
|
||||
firstIndex,
|
||||
indexCount,
|
||||
instanceCount);
|
||||
instanceCount,
|
||||
NULL,
|
||||
0U,
|
||||
0U,
|
||||
0U);
|
||||
}
|
||||
|
||||
void pvr_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
|
||||
|
|
@ -4947,7 +5159,35 @@ void pvr_CmdDrawIndirect(VkCommandBuffer commandBuffer,
|
|||
uint32_t drawCount,
|
||||
uint32_t stride)
|
||||
{
|
||||
assert(!"Unimplemented");
|
||||
const struct pvr_cmd_buffer_draw_state draw_state = {
|
||||
.draw_indirect = true,
|
||||
};
|
||||
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
|
||||
PVR_FROM_HANDLE(pvr_buffer, buffer, _buffer);
|
||||
VkResult result;
|
||||
|
||||
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
||||
|
||||
pvr_update_draw_state(state, &draw_state);
|
||||
|
||||
result = pvr_validate_draw_state(cmd_buffer);
|
||||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
|
||||
/* Write the VDM control stream for the primitive. */
|
||||
pvr_emit_vdm_index_list(cmd_buffer,
|
||||
&state->current_sub_cmd->gfx,
|
||||
state->gfx_pipeline->input_asm_state.topology,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
buffer,
|
||||
offset,
|
||||
drawCount,
|
||||
stride);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue