pvr: add view index support for vertex shaders

Signed-off-by: Peter Quayle <peter.quayle@imgtec.com>
Co-authored-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37512>
This commit is contained in:
Peter Quayle 2025-05-06 18:30:30 +02:00 committed by Marge Bot
parent 93c7f0f9c0
commit 9d48088428
7 changed files with 270 additions and 1 deletions

View file

@ -4569,6 +4569,60 @@ uint32_t *pvr_pds_generate_stream_out_terminate_program(
return NULL;
}
uint32_t *pvr_pds_generate_view_index_init_program(
struct pvr_pds_view_index_init_program *restrict program,
uint32_t *restrict buffer,
enum pvr_pds_generate_mode gen_mode)
{
uint32_t next_temp = PVR_PDS_TEMPS_BLOCK_BASE;
uint32_t temps_used = 0;
uint32_t data_size = 0;
uint32_t code_size = 0;
if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
gen_mode == PDS_GENERATE_SIZES) {
const bool encode = (gen_mode == PDS_GENERATE_CODE_SEGMENT);
#define APPEND(X) \
if (encode) { \
*buffer = X; \
buffer++; \
} else { \
code_size += sizeof(uint32_t); \
}
uint32_t view_index_temp = pvr_pds_get_temps(&next_temp, 1, &temps_used);
uint32_t zero_temp = pvr_pds_get_temps(&next_temp, 1, &temps_used);
/* Load the view index into a view_index_temp. */
APPEND(
pvr_pds_inst_encode_limm(0, view_index_temp, program->view_index, 0));
/* Load the zero into a zero_temp. */
APPEND(pvr_pds_inst_encode_limm(0, zero_temp, 0, 0));
/* Copy the temp into ptemp. */
APPEND(pvr_pds_inst_encode_add32(
0,
0,
0,
view_index_temp,
zero_temp,
PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER + PVR_PTEMP_VIEW_INDEX));
APPEND(pvr_pds_inst_encode_halt(0));
#undef APPEND
program->temps_used = temps_used;
}
if (gen_mode == PDS_GENERATE_SIZES) {
program->code_size = code_size;
program->data_size = data_size;
}
return buffer;
}
/* DrawArrays works in several steps:
*
* 1) load data from draw_indirect buffer

View file

@ -517,6 +517,14 @@ struct pvr_pds_stream_out_terminate_program {
uint32_t stream_out_terminate_pds_code_size;
};
struct pvr_pds_view_index_init_program {
uint16_t view_index;
uint32_t data_size;
uint32_t code_size;
uint32_t temps_used;
};
/* Structure representing the PDS compute shader program.
* This structure describes the USC code and compute buffers required
* by the PDS compute task loading program
@ -815,6 +823,13 @@ uint32_t *pvr_pds_generate_stream_out_terminate_program(
enum pvr_pds_generate_mode gen_mode,
const struct pvr_device_info *dev_info);
#define PVR_PTEMP_VIEW_INDEX 4U
uint32_t *pvr_pds_generate_view_index_init_program(
struct pvr_pds_view_index_init_program *restrict program,
uint32_t *restrict buffer,
enum pvr_pds_generate_mode gen_mode);
/* Structure representing DrawIndirect PDS programs. */
struct pvr_pds_drawindirect_program {
/* --- Input to pvr_pds_drawindirect_program --- */
@ -965,6 +980,7 @@ struct pvr_pds_descriptor_program_input {
#define PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED BITFIELD_BIT(5U)
#define PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED BITFIELD_BIT(6U)
#define PVR_PDS_VERTEX_FLAGS_VIEW_INDEX_REQUIRED BITFIELD_BIT(7U)
#define PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE BITFIELD_BIT(0U)
@ -1002,6 +1018,7 @@ struct pvr_pds_vertex_primary_program_input {
uint16_t base_instance_register;
uint16_t base_vertex_register;
uint16_t draw_index_register;
uint16_t view_index_register;
};
#define PVR_PDS_CONST_MAP_ENTRY_TYPE_NULL (0)

View file

@ -459,6 +459,7 @@ void pvr_pds_generate_vertex_primary_program(
uint32_t write_base_instance_control = ~0;
uint32_t write_base_vertex_control = ~0;
uint32_t pvr_write_draw_index_control = ~0;
uint32_t write_view_index_control = ~0;
uint32_t ddmad_count = 0;
uint32_t doutw_count = 0;
@ -497,6 +498,8 @@ void pvr_pds_generate_vertex_primary_program(
PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED);
pvr_debug_pds_flag(input_program->flags,
PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED);
pvr_debug_pds_flag(input_program->flags,
PVR_PDS_VERTEX_FLAGS_VIEW_INDEX_REQUIRED);
pvr_debug(" ");
pvr_init_pds_const_map_entry_write_state(info, &entry_write_state);
@ -607,6 +610,12 @@ void pvr_pds_generate_vertex_primary_program(
}
}
if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VIEW_INDEX_REQUIRED) {
doutw_count++;
write_view_index_control =
pvr_find_constant(const_usage, RESERVE_32BIT, "View index DOUTW Ctrl");
}
if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
/* Load absolute instance id into uiInstanceIdTemp. */
PVR_PDS_MODE_TOGGLE(
@ -1451,6 +1460,25 @@ void pvr_pds_generate_vertex_primary_program(
}
}
if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VIEW_INDEX_REQUIRED) {
bool last_dma = (++running_dma_count == total_dma_count);
uint32_t data_mask = (PVR_PTEMP_VIEW_INDEX & 1) ? 0x2 : 0x1;
PVR_PDS_MODE_TOGGLE(
code,
instruction,
pvr_encode_direct_write(
&entry_write_state,
last_dma,
false,
R64_C(write_view_index_control),
R64_P(PVR_PTEMP_VIEW_INDEX >> 1),
data_mask,
input_program->view_index_register,
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
dev_info));
}
doutu_address_entry =
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
sizeof(*doutu_address_entry));

View file

@ -98,6 +98,7 @@ static void pvr_cmd_buffer_free_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
util_dynarray_fini(&sub_cmd->gfx.sec_query_indices);
pvr_csb_finish(&sub_cmd->gfx.control_stream);
pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.terminate_ctrl_stream);
pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.multiview_ctrl_stream);
pvr_bo_suballoc_free(sub_cmd->gfx.depth_bias_bo);
pvr_bo_suballoc_free(sub_cmd->gfx.scissor_bo);
break;
@ -2242,6 +2243,76 @@ pvr_cmd_buffer_process_deferred_clears(struct pvr_cmd_buffer *cmd_buffer)
return VK_SUCCESS;
}
static VkResult
pvr_csb_gfx_build_view_index_ctrl_stream(struct pvr_device *const device,
pvr_dev_addr_t addr,
struct pvr_bo **bo,
uint32_t *stride)
{
struct list_head bo_list;
struct pvr_csb csb;
VkResult result;
pvr_csb_init(device, PVR_CMD_STREAM_TYPE_GRAPHICS, &csb);
for (uint32_t i = 0; i < PVR_MAX_MULTIVIEW; ++i) {
struct pvr_pds_view_index_init_program *program =
&device->view_index_init_info[i];
pvr_csb_set_relocation_mark(&csb);
pvr_csb_emit (&csb, VDMCTRL_PDS_STATE0, state_update0) {
state_update0.block_type = ROGUE_VDMCTRL_BLOCK_TYPE_PDS_STATE_UPDATE;
state_update0.dm_target = ROGUE_VDMCTRL_DM_TARGET_VDM;
state_update0.usc_target = ROGUE_VDMCTRL_USC_TARGET_ALL;
state_update0.usc_common_size = 0;
state_update0.usc_unified_size = 0;
state_update0.pds_temp_size = program->temps_used;
state_update0.pds_data_size = DIV_ROUND_UP(
PVR_DW_TO_BYTES(device->view_index_init_programs[i].data_size),
ROGUE_VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
}
pvr_csb_emit (&csb, VDMCTRL_PDS_STATE1, state_update1) {
state_update1.pds_data_addr =
device->view_index_init_programs[i].pvr_bo->dev_addr;
state_update1.sd_type = ROGUE_VDMCTRL_SD_TYPE_PDS;
state_update1.sd_next_type = ROGUE_VDMCTRL_SD_TYPE_USC;
}
pvr_csb_emit (&csb, VDMCTRL_PDS_STATE2, state_update2) {
state_update2.pds_code_addr.addr =
device->view_index_init_programs[i].pvr_bo->dev_addr.addr +
PVR_DW_TO_BYTES(device->view_index_init_programs[i].data_size);
}
pvr_csb_clear_relocation_mark(&csb);
pvr_csb_emit_link(&csb, addr, false);
}
result = pvr_csb_bake(&csb, &bo_list);
if (result != VK_SUCCESS)
goto err_csb_finish;
assert(list_is_singular(&bo_list));
*bo = list_first_entry(&bo_list, struct pvr_bo, link);
/* This needs to be kept in sync with the instructions emitted above. */
*stride = pvr_cmd_length(VDMCTRL_PDS_STATE0) +
pvr_cmd_length(VDMCTRL_PDS_STATE1) +
pvr_cmd_length(VDMCTRL_PDS_STATE2) +
pvr_cmd_length(VDMCTRL_STREAM_LINK0) +
pvr_cmd_length(VDMCTRL_STREAM_LINK1);
return VK_SUCCESS;
err_csb_finish:
pvr_csb_finish(&csb);
return result;
}
VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer)
{
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
@ -2329,6 +2400,16 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer)
if (result != VK_SUCCESS)
return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
if (gfx_sub_cmd->multiview_enabled) {
result = pvr_csb_gfx_build_view_index_ctrl_stream(
device,
pvr_csb_get_start_address(&gfx_sub_cmd->control_stream),
&gfx_sub_cmd->multiview_ctrl_stream,
&gfx_sub_cmd->multiview_ctrl_stream_stride);
if (result != VK_SUCCESS)
return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
}
result = pvr_sub_cmd_gfx_job_init(&device->pdevice->dev_info,
cmd_buffer,
gfx_sub_cmd);
@ -2571,6 +2652,8 @@ VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
sub_cmd->gfx.empty_cmd = true;
sub_cmd->gfx.view_mask =
pvr_render_pass_info_get_view_mask(&state->render_pass_info);
sub_cmd->gfx.multiview_enabled =
state->render_pass_info.pass->multiview_enabled;
if (state->vis_test_enabled)
sub_cmd->gfx.query_pool = state->query_pool;

View file

@ -1896,6 +1896,68 @@ err_free_nop_usc_bo:
return result;
}
static VkResult
pvr_device_init_view_index_init_programs(struct pvr_device *device)
{
uint32_t *staging_buffer = NULL;
VkResult result;
for (unsigned i = 0; i < PVR_MAX_MULTIVIEW; ++i) {
uint32_t staging_buffer_size;
struct pvr_pds_view_index_init_program *program =
&device->view_index_init_info[i];
program->view_index = i;
pvr_pds_generate_view_index_init_program(program,
NULL,
PDS_GENERATE_SIZES);
staging_buffer_size = program->data_size + program->code_size;
staging_buffer = vk_realloc(&device->vk.alloc,
staging_buffer,
staging_buffer_size,
8U,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!staging_buffer) {
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
break;
}
pvr_pds_generate_view_index_init_program(program,
staging_buffer,
PDS_GENERATE_DATA_SEGMENT);
pvr_pds_generate_view_index_init_program(
program,
&staging_buffer[program->data_size],
PDS_GENERATE_CODE_SEGMENT);
result =
pvr_gpu_upload_pds(device,
(program->data_size == 0 ? NULL : staging_buffer),
program->data_size / sizeof(uint32_t),
16U,
&staging_buffer[program->data_size],
program->code_size / sizeof(uint32_t),
16U,
16U,
&device->view_index_init_programs[i]);
if (result != VK_SUCCESS)
break;
}
vk_free(&device->vk.alloc, staging_buffer);
if (result != VK_SUCCESS)
for (uint32_t u = 0; u < PVR_MAX_MULTIVIEW; ++u)
pvr_bo_suballoc_free(device->view_index_init_programs[u].pvr_bo);
return result;
}
static void pvr_device_init_tile_buffer_state(struct pvr_device *device)
{
simple_mtx_init(&device->tile_buffer_state.mtx, mtx_plain);
@ -2104,10 +2166,14 @@ VkResult pvr_CreateDevice(VkPhysicalDevice physicalDevice,
if (result != VK_SUCCESS)
goto err_pvr_free_compute_fence;
result = pvr_device_create_compute_query_programs(device);
result = pvr_device_init_view_index_init_programs(device);
if (result != VK_SUCCESS)
goto err_pvr_free_compute_empty;
result = pvr_device_create_compute_query_programs(device);
if (result != VK_SUCCESS)
goto err_pvr_free_view_index;
result = pvr_device_init_compute_idfwdf_state(device);
if (result != VK_SUCCESS)
goto err_pvr_destroy_compute_query_programs;
@ -2177,6 +2243,10 @@ err_pvr_destroy_compute_query_programs:
err_pvr_free_compute_empty:
pvr_bo_suballoc_free(device->pds_compute_empty_program.pvr_bo);
err_pvr_free_view_index:
for (uint32_t u = 0; u < PVR_MAX_MULTIVIEW; ++u)
pvr_bo_suballoc_free(device->view_index_init_programs[u].pvr_bo);
err_pvr_free_compute_fence:
pvr_bo_suballoc_free(device->pds_compute_fence_program.pvr_bo);
@ -2229,6 +2299,10 @@ void pvr_DestroyDevice(VkDevice _device,
pvr_device_finish_compute_idfwdf_state(device);
pvr_device_destroy_compute_query_programs(device);
pvr_bo_suballoc_free(device->pds_compute_empty_program.pvr_bo);
for (uint32_t u = 0; u < PVR_MAX_MULTIVIEW; ++u)
pvr_bo_suballoc_free(device->view_index_init_programs[u].pvr_bo);
pvr_bo_suballoc_free(device->pds_compute_fence_program.pvr_bo);
pvr_bo_suballoc_free(device->nop_program.pds.pvr_bo);
pvr_bo_suballoc_free(device->nop_program.usc);

View file

@ -214,6 +214,10 @@ struct pvr_device {
struct pvr_suballoc_bo *usc;
} nop_program;
struct pvr_pds_view_index_init_program
view_index_init_info[PVR_MAX_MULTIVIEW];
struct pvr_pds_upload view_index_init_programs[PVR_MAX_MULTIVIEW];
/* Issue Data Fence, Wait for Data Fence state. */
struct {
uint32_t usc_shareds;
@ -482,6 +486,9 @@ struct pvr_sub_cmd_gfx {
/* Control stream builder object */
struct pvr_csb control_stream;
struct pvr_bo *multiview_ctrl_stream;
uint32_t multiview_ctrl_stream_stride;
/* Required iff pvr_sub_cmd_gfx_requires_split_submit() returns true. */
struct pvr_bo *terminate_ctrl_stream;
@ -520,6 +527,7 @@ struct pvr_sub_cmd_gfx {
bool has_depth_feedback;
uint32_t view_mask;
bool multiview_enabled;
};
struct pvr_sub_cmd_compute {

View file

@ -288,6 +288,11 @@ pvr_process_graphics_cmd_for_view(struct pvr_device *device,
*/
assert(sub_cmd->terminate_ctrl_stream);
job->ctrl_stream_addr = sub_cmd->terminate_ctrl_stream->vma->dev_addr;
} else if (sub_cmd->multiview_enabled) {
original_ctrl_stream_addr = job->ctrl_stream_addr;
job->ctrl_stream_addr.addr =
sub_cmd->multiview_ctrl_stream->vma->dev_addr.addr +
(view_index * PVR_DW_TO_BYTES(sub_cmd->multiview_ctrl_stream_stride));
}
result = pvr_render_job_submit(queue->gfx_ctx,