pvr: add initial driver support for VK_KHR_multiview

Signed-off-by: Luigi Santivetti <luigi.santivetti@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37512>
This commit is contained in:
Luigi Santivetti 2024-02-01 14:51:52 +00:00 committed by Marge Bot
parent 8eb26e4986
commit a1002a6673
13 changed files with 851 additions and 217 deletions

View file

@ -36,6 +36,7 @@
#define PVR_MAX_QUEUES 2U
#define PVR_MAX_VIEWPORTS 1U
#define PVR_MAX_NEG_OFFSCREEN_OFFSET 4096U
#define PVR_MAX_MULTIVIEW 6U
#define PVR_MAX_PUSH_CONSTANTS_SIZE 128U

View file

@ -1496,10 +1496,11 @@ pvr_get_max_layers_covering_target(VkRect2D target_rect,
*/
static inline bool
pvr_clear_needs_rt_id_output(struct pvr_device_info *dev_info,
bool multiview_enabled,
uint32_t rect_count,
const VkClearRect *rects)
{
if (!PVR_HAS_FEATURE(dev_info, gs_rta_support))
if (!PVR_HAS_FEATURE(dev_info, gs_rta_support) || multiview_enabled)
return false;
for (uint32_t i = 0; i < rect_count; i++) {
@ -1890,8 +1891,10 @@ static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer,
/* We'll be emitting to the control stream. */
sub_cmd->empty_cmd = false;
vs_has_rt_id_output =
pvr_clear_needs_rt_id_output(dev_info, rect_count, rects);
vs_has_rt_id_output = pvr_clear_needs_rt_id_output(dev_info,
pass->multiview_enabled,
rect_count,
rects);
/* 4 because we're expecting the USC to output X, Y, Z, and W. */
vs_output_size_in_bytes = PVR_DW_TO_BYTES(4);

View file

@ -634,7 +634,8 @@ err_csb_finish:
static VkResult pvr_setup_texture_state_words(
struct pvr_device *device,
struct pvr_combined_image_sampler_descriptor *descriptor,
const struct pvr_image_view *image_view)
const struct pvr_image_view *image_view,
uint32_t view_index)
{
const struct pvr_image *image = vk_to_pvr_image(image_view->vk.image);
struct pvr_texture_state_info info = {
@ -648,6 +649,7 @@ static VkResult pvr_setup_texture_state_words(
.mip_levels = 1,
.sample_count = image_view->vk.image->samples,
.stride = image->physical_extent.width,
.offset = image->layer_size * view_index,
.addr = image->dev_addr,
};
const uint8_t *const swizzle = pvr_get_format_swizzle(info.format);
@ -682,6 +684,7 @@ static VkResult pvr_setup_texture_state_words(
static VkResult
pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
const struct pvr_load_op *load_op,
uint32_t view_index,
pvr_dev_addr_t *const addr_out)
{
const struct pvr_render_pass_info *render_pass_info =
@ -725,7 +728,8 @@ pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
if (load_op->clears_loads_state.rt_load_mask & BITFIELD_BIT(i)) {
result = pvr_setup_texture_state_words(cmd_buffer->device,
&texture_states[texture_count],
image_view);
image_view,
view_index);
if (result != VK_SUCCESS)
return result;
@ -786,7 +790,8 @@ pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
result = pvr_setup_texture_state_words(cmd_buffer->device,
&texture_states[texture_count],
image_view);
image_view,
view_index);
if (result != VK_SUCCESS)
return result;
@ -919,6 +924,7 @@ static VkResult pvr_load_op_pds_data_create_and_upload(
static VkResult
pvr_load_op_data_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
const struct pvr_load_op *load_op,
uint32_t view_index,
struct pvr_pds_upload *const pds_upload_out)
{
pvr_dev_addr_t constants_addr;
@ -926,6 +932,7 @@ pvr_load_op_data_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
result = pvr_load_op_constants_create_and_upload(cmd_buffer,
load_op,
view_index,
&constants_addr);
if (result != VK_SUCCESS)
return result;
@ -964,6 +971,63 @@ static void pvr_pds_bgnd_pack_state(
}
}
static inline VkResult pvr_load_op_state_data_create_and_upload_for_view(
struct pvr_cmd_buffer *cmd_buffer,
const struct pvr_load_op *load_op,
uint32_t view_index,
uint64_t pds_reg_values[static const ROGUE_NUM_CR_PDS_BGRND_WORDS])
{
struct pvr_pds_upload load_op_program;
VkResult result;
/* FIXME: Should we free the PDS pixel event data or let it be freed
* when the pool gets emptied?
*/
result = pvr_load_op_data_create_and_upload(cmd_buffer,
load_op,
view_index,
&load_op_program);
if (result != VK_SUCCESS)
return result;
pvr_pds_bgnd_pack_state(load_op, &load_op_program, pds_reg_values);
return VK_SUCCESS;
}
static VkResult pvr_load_op_state_data_create_and_upload(
struct pvr_cmd_buffer *cmd_buffer,
const struct pvr_load_op_state *load_op_state,
struct pvr_view_state *view_state)
{
for (uint32_t i = 0; i < load_op_state->load_op_count; i++) {
const struct pvr_load_op *load_op = &load_op_state->load_ops[i];
uint32_t view_index = load_op->view_indices[0];
uint64_t *pds_reg_values;
VkResult result;
pds_reg_values = view_state->view[view_index].pds_bgnd_reg_values;
result =
pvr_load_op_state_data_create_and_upload_for_view(cmd_buffer,
load_op,
view_index,
pds_reg_values);
if (result != VK_SUCCESS)
return result;
pds_reg_values = view_state->view[view_index].pr_pds_bgnd_reg_values;
result =
pvr_load_op_state_data_create_and_upload_for_view(cmd_buffer,
load_op,
view_index,
pds_reg_values);
if (result != VK_SUCCESS)
return result;
}
return VK_SUCCESS;
}
/**
* \brief Calculates the stride in pixels based on the pitch in bytes and pixel
* format.
@ -991,7 +1055,8 @@ static void pvr_setup_pbe_state(
const bool down_scale,
const uint32_t samples,
uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS])
uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS],
uint32_t view_index)
{
const struct pvr_image *image = pvr_image_view_get_image(iview);
uint32_t level_pitch = image->mip_levels[iview->vk.base_mip_level].pitch;
@ -1033,9 +1098,10 @@ static void pvr_setup_pbe_state(
/* FIXME: Should we have an inline function to return the address of a mip
* level?
*/
surface_params.addr =
PVR_DEV_ADDR_OFFSET(image->vma->dev_addr,
image->mip_levels[iview->vk.base_mip_level].offset);
surface_params.addr = PVR_DEV_ADDR_OFFSET(
image->vma->dev_addr,
image->layer_size * view_index +
image->mip_levels[iview->vk.base_mip_level].offset);
if (!iview->vk.storage.z_slice_offset) {
surface_params.addr =
@ -1381,6 +1447,7 @@ static void
pvr_setup_emit_state(const struct pvr_device_info *dev_info,
const struct pvr_renderpass_hwsetup_render *hw_render,
struct pvr_render_pass_info *render_pass_info,
uint32_t view_index,
struct pvr_emit_state *emit_state)
{
assert(hw_render->pbe_emits <= PVR_NUM_PBE_EMIT_REGS);
@ -1450,7 +1517,8 @@ pvr_setup_emit_state(const struct pvr_device_info *dev_info,
surface->need_resolve,
samples,
emit_state->pbe_cs_words[emit_state->emit_count],
emit_state->pbe_reg_words[emit_state->emit_count]);
emit_state->pbe_reg_words[emit_state->emit_count],
view_index);
emit_state->emit_count += 1;
}
}
@ -1486,7 +1554,6 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
const struct pvr_renderpass_hwsetup_render *hw_render =
&render_pass_info->pass->hw_setup->renders[sub_cmd->hw_render_idx];
struct pvr_render_job *job = &sub_cmd->job;
struct pvr_pds_upload pds_pixel_event_program;
struct pvr_framebuffer *framebuffer = render_pass_info->framebuffer;
struct pvr_spm_bgobj_state *spm_bgobj_state =
&framebuffer->spm_bgobj_state_per_render[sub_cmd->hw_render_idx];
@ -1495,6 +1562,12 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
struct pvr_render_target *render_target;
VkResult result;
/* Unless for barrier_{store,load}, where the index defaults to zero, the
* view index associated with a gfx job is known and set only at submission
* time.
*/
job->view_state.view_index = 0;
if (sub_cmd->barrier_store) {
/* Store to the SPM scratch buffer. */
@ -1514,15 +1587,46 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
memcpy(job->pbe_reg_words,
&spm_eot_state->pbe_reg_words,
sizeof(job->pbe_reg_words));
job->pds_pixel_event_data_offset =
/* Configure the job view state for a barrier store */
assert(!job->view_state.view_index);
job->view_state.view[0].pds_pixel_event_data_offset =
spm_eot_state->pixel_event_program_data_offset;
job->view_state.force_pds_pixel_event_data_offset_zero = true;
} else {
struct pvr_pds_upload pds_pixel_event_program;
struct pvr_emit_state emit_state = { 0 };
memset(emit_state.tile_buffer_ids,
~0,
sizeof(emit_state.tile_buffer_ids));
pvr_setup_emit_state(dev_info, hw_render, render_pass_info, &emit_state);
u_foreach_bit (view_idx, hw_render->view_mask) {
pvr_setup_emit_state(dev_info,
hw_render,
render_pass_info,
view_idx,
&emit_state);
unsigned pixel_output_width =
pvr_pass_get_pixel_output_width(render_pass_info->pass,
sub_cmd->hw_render_idx,
dev_info);
result = pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
cmd_buffer,
emit_state.emit_count,
emit_state.pbe_cs_words[0],
emit_state.tile_buffer_ids,
pixel_output_width,
&pds_pixel_event_program);
if (result != VK_SUCCESS)
return result;
/* Configure the job view state */
job->view_state.view[view_idx].pds_pixel_event_data_offset =
pds_pixel_event_program.data_offset;
}
job->z_only_render = !hw_render->eot_surface_count &&
!sub_cmd->frag_has_side_effects &&
@ -1531,23 +1635,6 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
memcpy(job->pbe_reg_words,
emit_state.pbe_reg_words,
sizeof(job->pbe_reg_words));
unsigned pixel_output_width =
pvr_pass_get_pixel_output_width(render_pass_info->pass,
sub_cmd->hw_render_idx,
dev_info);
result = pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
cmd_buffer,
emit_state.emit_count,
emit_state.pbe_cs_words[0],
emit_state.tile_buffer_ids,
pixel_output_width,
&pds_pixel_event_program);
if (result != VK_SUCCESS)
return result;
job->pds_pixel_event_data_offset = pds_pixel_event_program.data_offset;
}
if (sub_cmd->barrier_load) {
@ -1556,45 +1643,45 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
/* Load the previously stored render from the SPM scratch buffer. */
STATIC_ASSERT(ARRAY_SIZE(job->pds_bgnd_reg_values) ==
STATIC_ASSERT(ARRAY_SIZE(job->view_state.view[0].pds_bgnd_reg_values) ==
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
typed_memcpy(job->pds_bgnd_reg_values,
typed_memcpy(job->view_state.view[0].pds_bgnd_reg_values,
spm_bgobj_state->pds_reg_values,
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
STATIC_ASSERT(ARRAY_SIZE(job->pr_pds_bgnd_reg_values) ==
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
typed_memcpy(job->pr_pds_bgnd_reg_values,
STATIC_ASSERT(
ARRAY_SIZE(job->view_state.view[0].pr_pds_bgnd_reg_values) ==
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
typed_memcpy(job->view_state.view[0].pr_pds_bgnd_reg_values,
spm_bgobj_state->pds_reg_values,
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
} else if (hw_render->load_op) {
const struct pvr_load_op *load_op = hw_render->load_op;
struct pvr_pds_upload load_op_program;
/* Configure the job view state for a barrier load */
assert(!job->view_state.view_index);
job->view_state.force_pds_bgnd_reg_values_zero = true;
} else if (hw_render->load_op_state) {
const struct pvr_load_op_state *load_op_state = hw_render->load_op_state;
/* We always have at least 1 bit set in the view_mask */
assert(load_op_state->load_op_count);
/* Recalculate Background Object(s). */
/* FIXME: Should we free the PDS pixel event data or let it be freed
* when the pool gets emptied?
*/
result = pvr_load_op_data_create_and_upload(cmd_buffer,
load_op,
&load_op_program);
result = pvr_load_op_state_data_create_and_upload(cmd_buffer,
load_op_state,
&job->view_state);
if (result != VK_SUCCESS)
return result;
job->enable_bg_tag = render_pass_info->enable_bg_tag;
job->process_empty_tiles = render_pass_info->process_empty_tiles;
pvr_pds_bgnd_pack_state(load_op,
&load_op_program,
job->pds_bgnd_reg_values);
}
if (!hw_render->requires_frag_pr) {
memcpy(job->pr_pbe_reg_words,
job->pbe_reg_words,
sizeof(job->pbe_reg_words));
job->pr_pds_pixel_event_data_offset = job->pds_pixel_event_data_offset;
job->view_state.use_pds_pixel_event_data_offset = true;
} else {
memcpy(job->pr_pbe_reg_words,
&spm_eot_state->pbe_reg_words,
@ -1606,7 +1693,7 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
render_target = pvr_get_render_target(render_pass_info->pass,
framebuffer,
sub_cmd->hw_render_idx);
job->rt_dataset = render_target->rt_dataset;
job->view_state.rt_datasets = &render_target->rt_dataset[0];
job->ctrl_stream_addr = pvr_csb_get_start_address(&sub_cmd->control_stream);
@ -2097,6 +2184,9 @@ pvr_compute_generate_idfwdf(struct pvr_cmd_buffer *cmd_buffer,
pvr_compute_generate_control_stream(csb, sub_cmd, &info);
}
/* TODO: This can be pre-packed and uploaded directly. Would that provide any
* speed up?
*/
void pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer,
struct pvr_sub_cmd_compute *const sub_cmd,
bool deallocate_shareds)
@ -2416,6 +2506,17 @@ pvr_cmd_uses_deferred_cs_cmds(const struct pvr_cmd_buffer *const cmd_buffer)
deferred_control_stream_flags;
}
static inline uint32_t
pvr_render_pass_info_get_view_mask(const struct pvr_render_pass_info *rp_info)
{
const uint32_t hw_render_idx = rp_info->current_hw_subpass;
const struct pvr_render_pass *pass = rp_info->pass;
const struct pvr_renderpass_hwsetup_render *hw_render =
&pass->hw_setup->renders[hw_render_idx];
return hw_render->view_mask;
}
VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
enum pvr_sub_cmd_type type)
{
@ -2468,6 +2569,8 @@ VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
sub_cmd->gfx.hw_render_idx = state->render_pass_info.current_hw_subpass;
sub_cmd->gfx.framebuffer = state->render_pass_info.framebuffer;
sub_cmd->gfx.empty_cmd = true;
sub_cmd->gfx.view_mask =
pvr_render_pass_info_get_view_mask(&state->render_pass_info);
if (state->vis_test_enabled)
sub_cmd->gfx.query_pool = state->query_pool;
@ -2892,40 +2995,63 @@ static VkResult pvr_cmd_buffer_attachments_setup(
return VK_SUCCESS;
}
static VkResult pvr_render_targets_init(struct pvr_device *device,
struct pvr_render_pass *pass,
struct pvr_framebuffer *framebuffer)
static inline VkResult pvr_render_targets_datasets_create(
struct pvr_device *device,
struct pvr_framebuffer *framebuffer,
const struct pvr_renderpass_hwsetup_render *hw_render,
struct pvr_render_target *render_target)
{
const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
const uint32_t layers =
PVR_HAS_FEATURE(dev_info, gs_rta_support) ? framebuffer->layers : 1;
pthread_mutex_lock(&render_target->mutex);
u_foreach_bit (view_idx, hw_render->view_mask) {
struct pvr_rt_dataset *rt_dataset;
VkResult result;
if (render_target->valid_mask & BITFIELD_BIT(view_idx))
continue;
result = pvr_render_target_dataset_create(device,
framebuffer->width,
framebuffer->height,
hw_render->sample_count,
layers,
&rt_dataset);
if (result != VK_SUCCESS) {
pvr_render_targets_datasets_destroy(render_target);
pthread_mutex_unlock(&render_target->mutex);
return result;
}
render_target->valid_mask |= BITFIELD_BIT(view_idx);
render_target->rt_dataset[view_idx] = rt_dataset;
}
pthread_mutex_unlock(&render_target->mutex);
return VK_SUCCESS;
}
static VkResult pvr_render_targets_init(struct pvr_device *device,
struct pvr_render_pass *pass,
struct pvr_framebuffer *framebuffer)
{
for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
struct pvr_render_target *render_target =
pvr_get_render_target(pass, framebuffer, i);
const struct pvr_renderpass_hwsetup_render *hw_render =
&pass->hw_setup->renders[i];
VkResult result;
pthread_mutex_lock(&render_target->mutex);
if (!render_target->valid) {
const struct pvr_renderpass_hwsetup_render *hw_render =
&pass->hw_setup->renders[i];
VkResult result;
result = pvr_render_target_dataset_create(device,
framebuffer->width,
framebuffer->height,
hw_render->sample_count,
layers,
&render_target->rt_dataset);
if (result != VK_SUCCESS) {
pthread_mutex_unlock(&render_target->mutex);
return result;
}
render_target->valid = true;
}
pthread_mutex_unlock(&render_target->mutex);
result = pvr_render_targets_datasets_create(device,
framebuffer,
hw_render,
render_target);
if (result != VK_SUCCESS)
return result;
}
return VK_SUCCESS;
@ -3213,10 +3339,11 @@ static void pvr_emit_clear_words(struct pvr_cmd_buffer *const cmd_buffer,
pvr_csb_clear_relocation_mark(csb);
}
static VkResult pvr_cs_write_load_op(struct pvr_cmd_buffer *cmd_buffer,
struct pvr_sub_cmd_gfx *sub_cmd,
struct pvr_load_op *load_op,
uint32_t isp_userpass)
static VkResult pvr_cs_write_load_op_for_view(struct pvr_cmd_buffer *cmd_buffer,
struct pvr_sub_cmd_gfx *sub_cmd,
struct pvr_load_op *load_op,
uint32_t view_index,
uint32_t isp_userpass)
{
const struct pvr_device *device = cmd_buffer->device;
struct pvr_static_clear_ppp_template template =
@ -3228,6 +3355,7 @@ static VkResult pvr_cs_write_load_op(struct pvr_cmd_buffer *cmd_buffer,
result = pvr_load_op_data_create_and_upload(cmd_buffer,
load_op,
view_index,
&shareds_update_program);
if (result != VK_SUCCESS)
return result;
@ -3295,6 +3423,29 @@ static VkResult pvr_cs_write_load_op(struct pvr_cmd_buffer *cmd_buffer,
return VK_SUCCESS;
}
static VkResult pvr_cs_write_load_op(struct pvr_cmd_buffer *cmd_buffer,
struct pvr_sub_cmd_gfx *sub_cmd,
struct pvr_load_op *load_op,
uint32_t isp_userpass)
{
assert(load_op->view_count);
for (uint32_t i = 0; i < load_op->view_count; i++) {
const uint32_t view_index = load_op->view_indices[i];
VkResult result;
result = pvr_cs_write_load_op_for_view(cmd_buffer,
sub_cmd,
load_op,
view_index,
isp_userpass);
if (result != VK_SUCCESS)
return result;
}
return VK_SUCCESS;
}
void pvr_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
const VkRenderPassBeginInfo *pRenderPassBeginInfo,
const VkSubpassBeginInfo *pSubpassBeginInfo)

View file

@ -3016,11 +3016,7 @@ static void pvr_render_targets_fini(struct pvr_render_target *render_targets,
uint32_t render_targets_count)
{
for (uint32_t i = 0; i < render_targets_count; i++) {
if (render_targets[i].valid) {
pvr_render_target_dataset_destroy(render_targets[i].rt_dataset);
render_targets[i].valid = false;
}
pvr_render_targets_datasets_destroy(&render_targets[i]);
pthread_mutex_destroy(&render_targets[i].mutex);
}
}

View file

@ -1877,6 +1877,9 @@ pvr_can_combine_with_render(const struct pvr_device_info *dev_info,
sp_dsts->color = NULL;
new_alloc->tile_buffers = NULL;
if (ctx->hw_render && (ctx->hw_render->view_mask != subpass->view_mask))
return false;
/* The hardware doesn't support replicating the stencil, so we need to store
* the depth to memory if a stencil attachment is used as an input
* attachment.
@ -2060,6 +2063,7 @@ pvr_merge_subpass(const struct pvr_device *device,
ctx->hw_render->depth_init = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
ctx->hw_render->stencil_init = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
ctx->hw_render->sample_count = input_subpass->sample_count;
ctx->hw_render->view_mask = input_subpass->view_mask;
}
/* Allocate a new subpass in the in-progress render. */

View file

@ -255,9 +255,12 @@ struct pvr_renderpass_hwsetup_render {
/* true if this HW render has lasting effects on its attachments. */
bool has_side_effects;
struct pvr_load_op *load_op;
bool requires_frag_pr;
/* View mask for multiview. */
uint32_t view_mask;
struct pvr_load_op_state *load_op_state;
};
struct pvr_renderpass_hw_map {

View file

@ -992,9 +992,11 @@ static void pvr_geom_state_stream_init(struct pvr_render_ctx *ctx,
stream_ptr += pvr_cmd_length(CR_PPP_CTRL);
pvr_csb_pack (stream_ptr, CR_TE_PSG, value) {
struct pvr_rt_dataset *rt_dataset =
job->view_state.rt_datasets[job->view_state.view_index];
value.completeonterminate = job->geometry_terminate;
value.region_stride = job->rt_dataset->rgn_headers_stride /
value.region_stride = rt_dataset->rgn_headers_stride /
ROGUE_CR_TE_PSG_REGION_STRIDE_UNIT_SIZE;
value.forcenewstate = PVR_HAS_QUIRK(dev_info, 52942);
@ -1015,9 +1017,9 @@ static void pvr_geom_state_stream_init(struct pvr_render_ctx *ctx,
}
stream_ptr += pvr_cmd_length(VDMCTRL_PDS_STATE0);
/* clang-format off */
pvr_csb_pack (stream_ptr, KMD_STREAM_VIEW_IDX, value);
/* clang-format on */
pvr_csb_pack (stream_ptr, KMD_STREAM_VIEW_IDX, value) {
value.idx = job->view_state.view_index;
}
stream_ptr += pvr_cmd_length(KMD_STREAM_VIEW_IDX);
state->fw_stream_len = (uint8_t *)stream_ptr - (uint8_t *)state->fw_stream;
@ -1070,7 +1072,8 @@ pvr_geom_state_flags_init(const struct pvr_render_job *const job,
struct pvr_winsys_geometry_state_flags *flags)
{
*flags = (struct pvr_winsys_geometry_state_flags){
.is_first_geometry = !job->rt_dataset->need_frag,
.is_first_geometry =
!job->view_state.rt_datasets[job->view_state.view_index]->need_frag,
.is_last_geometry = job->geometry_terminate,
.use_single_core = job->frag_uses_atomic_ops,
};
@ -1147,7 +1150,8 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
const struct pvr_device_runtime_info *dev_runtime_info =
&pdevice->dev_runtime_info;
const struct pvr_device_info *dev_info = &pdevice->dev_info;
const struct pvr_rt_dataset *rt_dataset = job->rt_dataset;
const struct pvr_rt_dataset *rt_dataset =
job->view_state.rt_datasets[job->view_state.view_index];
const enum ROGUE_CR_ISP_AA_MODE_TYPE isp_aa_mode =
pvr_cr_isp_aa_mode_type(job->samples);
struct pvr_rt_mtile_info tiling_info = { 0 };
@ -1155,6 +1159,7 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
enum ROGUE_CR_ZLS_FORMAT_TYPE zload_format = ROGUE_CR_ZLS_FORMAT_TYPE_F32Z;
uint32_t *stream_ptr = (uint32_t *)state->fw_stream;
uint32_t *stream_len_ptr = stream_ptr;
uint32_t view_index;
uint32_t pixel_ctl;
uint32_t isp_ctl;
@ -1296,20 +1301,28 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
}
stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_PDM);
STATIC_ASSERT(ARRAY_SIZE(job->pds_bgnd_reg_values) ==
STATIC_ASSERT(ARRAY_SIZE(job->view_state.view[0].pds_bgnd_reg_values) ==
ROGUE_NUM_CR_PDS_BGRND_WORDS);
STATIC_ASSERT(sizeof(job->pds_bgnd_reg_values[0]) == sizeof(uint64_t));
STATIC_ASSERT(sizeof(job->view_state.view[0].pds_bgnd_reg_values[0]) ==
sizeof(uint64_t));
if (job->view_state.force_pds_bgnd_reg_values_zero)
view_index = 0;
else
view_index = job->view_state.view_index;
memcpy(stream_ptr,
job->pds_bgnd_reg_values,
sizeof(job->pds_bgnd_reg_values));
job->view_state.view[view_index].pds_bgnd_reg_values,
sizeof(job->view_state.view[view_index].pds_bgnd_reg_values));
stream_ptr += ROGUE_NUM_CR_PDS_BGRND_WORDS * DWORDS_PER_U64;
STATIC_ASSERT(ARRAY_SIZE(job->pr_pds_bgnd_reg_values) ==
STATIC_ASSERT(ARRAY_SIZE(job->view_state.view[0].pr_pds_bgnd_reg_values) ==
ROGUE_NUM_CR_PDS_BGRND_WORDS);
STATIC_ASSERT(sizeof(job->pr_pds_bgnd_reg_values[0]) == sizeof(uint64_t));
STATIC_ASSERT(sizeof(job->view_state.view[0].pr_pds_bgnd_reg_values[0]) ==
sizeof(uint64_t));
memcpy(stream_ptr,
job->pr_pds_bgnd_reg_values,
sizeof(job->pr_pds_bgnd_reg_values));
job->view_state.view[view_index].pr_pds_bgnd_reg_values,
sizeof(job->view_state.view[view_index].pr_pds_bgnd_reg_values));
stream_ptr += ROGUE_NUM_CR_PDS_BGRND_WORDS * DWORDS_PER_U64;
#undef DWORDS_PER_U64
@ -1445,9 +1458,9 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
stream_ptr += pvr_cmd_length(KMD_STREAM_PIXEL_PHANTOM);
}
/* clang-format off */
pvr_csb_pack (stream_ptr, KMD_STREAM_VIEW_IDX, value);
/* clang-format on */
pvr_csb_pack (stream_ptr, KMD_STREAM_VIEW_IDX, value) {
value.idx = job->view_state.view_index;
}
stream_ptr += pvr_cmd_length(KMD_STREAM_VIEW_IDX);
/* Make sure that the pvr_frag_km_...() function is returning the correct
@ -1456,8 +1469,14 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
assert((uint8_t *)stream_ptr - (uint8_t *)state->fw_stream ==
pvr_frag_km_stream_pds_eot_data_addr_offset(dev_info));
if (job->view_state.force_pds_pixel_event_data_offset_zero)
view_index = 0;
else
view_index = job->view_state.view_index;
pvr_csb_pack (stream_ptr, CR_EVENT_PIXEL_PDS_DATA, value) {
value.addr = PVR_DEV_ADDR(job->pds_pixel_event_data_offset);
value.addr = PVR_DEV_ADDR(
job->view_state.view[view_index].pds_pixel_event_data_offset);
}
stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA);
@ -1599,6 +1618,14 @@ static void pvr_render_job_ws_fragment_pr_init_based_on_fragment_state(
pvr_frag_km_stream_pbe_reg_words_offset(dev_info);
const uint32_t eot_data_addr_byte_offset =
pvr_frag_km_stream_pds_eot_data_addr_offset(dev_info);
const uint32_t view_index =
job->view_state.force_pds_pixel_event_data_offset_zero
? 0
: job->view_state.view_index;
const uint32_t pr_pds_pixel_event_data_offset =
job->view_state.use_pds_pixel_event_data_offset
? job->view_state.view[view_index].pds_pixel_event_data_offset
: job->pr_pds_pixel_event_data_offset;
/* Massive copy :( */
*state = *frag;
@ -1613,10 +1640,11 @@ static void pvr_render_job_ws_fragment_pr_init_based_on_fragment_state(
assert(state->fw_stream_len >=
eot_data_addr_byte_offset +
PVR_DW_TO_BYTES(pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA)));
pvr_csb_pack ((uint32_t *)&state->fw_stream[eot_data_addr_byte_offset],
CR_EVENT_PIXEL_PDS_DATA,
eot_pds_data) {
eot_pds_data.addr = PVR_DEV_ADDR(job->pr_pds_pixel_event_data_offset);
eot_pds_data.addr = PVR_DEV_ADDR(pr_pds_pixel_event_data_offset);
}
}
@ -1629,8 +1657,10 @@ static void pvr_render_job_ws_submit_info_init(
{
memset(submit_info, 0, sizeof(*submit_info));
submit_info->rt_dataset = job->rt_dataset->ws_rt_dataset;
submit_info->rt_data_idx = job->rt_dataset->rt_data_idx;
submit_info->rt_dataset =
job->view_state.rt_datasets[job->view_state.view_index]->ws_rt_dataset;
submit_info->rt_data_idx =
job->view_state.rt_datasets[job->view_state.view_index]->rt_data_idx;
submit_info->frame_num = ctx->device->global_queue_present_count;
submit_info->job_num = ctx->device->global_cmd_buffer_submit_count;
@ -1671,7 +1701,8 @@ VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx,
struct vk_sync *signal_sync_geom,
struct vk_sync *signal_sync_frag)
{
struct pvr_rt_dataset *rt_dataset = job->rt_dataset;
struct pvr_rt_dataset *rt_dataset =
job->view_state.rt_datasets[job->view_state.view_index];
struct pvr_winsys_render_submit_info submit_info;
struct pvr_device *device = ctx->device;
VkResult result;

View file

@ -71,8 +71,6 @@ struct pvr_rt_mtile_info {
* (although it doesn't subclass).
*/
struct pvr_render_job {
struct pvr_rt_dataset *rt_dataset;
struct {
bool run_frag : 1;
bool geometry_terminate : 1;
@ -88,7 +86,7 @@ struct pvr_render_job {
bool z_only_render : 1;
};
uint32_t pds_pixel_event_data_offset;
/* PDS pixel event for partial renders do not depend on the view index. */
uint32_t pr_pds_pixel_event_data_offset;
pvr_dev_addr_t ctrl_stream_addr;
@ -154,8 +152,33 @@ struct pvr_render_job {
"CR_PDS_BGRND3_SIZEINFO cannot be stored in uint64_t");
static_assert(ROGUE_NUM_CR_PDS_BGRND_WORDS == 3,
"Cannot store all CR_PDS_BGRND words");
uint64_t pds_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
uint64_t pr_pds_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
struct pvr_view_state {
struct {
uint32_t pds_pixel_event_data_offset;
uint64_t pds_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
uint64_t pr_pds_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
} view[PVR_MAX_MULTIVIEW];
/* True if pds_pixel_event_data_offset should be taken from the first
* element of the view array. Otherwise view_index should be used.
*/
bool force_pds_pixel_event_data_offset_zero : 1;
/* True if a partial render job uses the same EOT program data for a
* pixel event as the fragment job and not from the scratch buffer.
*/
bool use_pds_pixel_event_data_offset : 1;
/* True if first_pds_bgnd_reg_values should be taken from the first
* element of the view array. Otherwise view_index should be used.
*/
bool force_pds_bgnd_reg_values_zero : 1;
struct pvr_rt_dataset **rt_datasets;
uint32_t view_index;
} view_state;
};
void pvr_rt_mtile_info_init(const struct pvr_device_info *dev_info,

View file

@ -384,32 +384,90 @@ pvr_subpass_load_op_init(struct pvr_device *device,
return result;
}
load_op->view_count = 0;
u_foreach_bit (view_idx, hw_render->view_mask) {
load_op->view_indices[load_op->view_count] = view_idx;
load_op->view_count++;
}
hw_render->subpasses[hw_subpass_idx].load_op = load_op;
return VK_SUCCESS;
}
static VkResult
pvr_render_load_op_init(struct pvr_device *device,
const VkAllocationCallbacks *allocator,
const struct pvr_render_pass *pass,
struct pvr_renderpass_hwsetup_render *hw_render)
struct pvr_per_view_attachment_first_use_info {
uint32_t *first_subpass[PVR_MAX_MULTIVIEW];
uint32_t *first_subpass_memory;
};
/**
* \brief Returns true if a clear op is needed instead of the hw render reported
* load op load.
*
* The hw render isn't aware of multiview renders so it thinks we're reusing the
* attachment of a previous subpass even if it's the first time the attachment
* is used in the render pass, so a clear op gets reported as a load op load
* instead.
*/
/* FIXME: Investigate whether we can change the HW render code so it reports
* the correct load operation. This will mean we can get rid of struct
* pvr_per_view_attachment_first_use_info and struct pvr_load_op_state.
* Instead we'll be able to have a single render struct load_op like we do for
* subpasses.
*/
static bool pvr_render_load_op_multiview_load_should_be_clear(
const struct pvr_render_pass *pass,
const struct pvr_renderpass_hwsetup_render *hw_render,
uint32_t hw_render_index,
const struct pvr_renderpass_colorinit *color_init,
const struct pvr_per_view_attachment_first_use_info *first_use_info,
uint32_t view_index)
{
VkResult result;
uint32_t first_use_view_index;
struct pvr_load_op *load_op = vk_zalloc2(&device->vk.alloc,
allocator,
sizeof(*load_op),
8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!load_op)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
if (!pass->multiview_enabled)
return false;
/* Check we have a load op load to see if we might need to correct the hw
* render.
*/
if (color_init->op != VK_ATTACHMENT_LOAD_OP_LOAD)
return false;
first_use_view_index =
first_use_info->first_subpass[view_index][color_init->index];
/* Check that we're looking at the render where the attachment is used for
* the first time.
*/
if (first_use_view_index != hw_render_index)
return false;
/* Check that the original load op was a clear op. */
if (pass->attachments[color_init->index].load_op !=
VK_ATTACHMENT_LOAD_OP_CLEAR) {
return false;
}
return true;
}
static VkResult pvr_render_load_op_init(
struct pvr_device *device,
const VkAllocationCallbacks *allocator,
struct pvr_load_op *const load_op,
const struct pvr_render_pass *pass,
const struct pvr_renderpass_hwsetup_render *hw_render,
uint32_t hw_render_index,
uint32_t view_index,
const struct pvr_per_view_attachment_first_use_info *first_use_info)
{
load_op->clears_loads_state.depth_clear_to_reg = PVR_NO_DEPTH_CLEAR_TO_REG;
assert(hw_render->color_init_count <= PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
struct pvr_renderpass_colorinit *color_init = &hw_render->color_init[i];
bool multiview_load_op_should_be_clear = false;
assert(color_init->index < pass->attachment_count);
load_op->clears_loads_state.dest_vk_format[i] =
@ -418,37 +476,115 @@ pvr_render_load_op_init(struct pvr_device *device,
if (pass->attachments[color_init->index].sample_count > 1)
load_op->clears_loads_state.unresolved_msaa_mask |= BITFIELD_BIT(i);
if (color_init->op == VK_ATTACHMENT_LOAD_OP_LOAD)
load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i);
else if (color_init->op == VK_ATTACHMENT_LOAD_OP_CLEAR)
multiview_load_op_should_be_clear =
pvr_render_load_op_multiview_load_should_be_clear(pass,
hw_render,
hw_render_index,
color_init,
first_use_info,
view_index);
if (color_init->op == VK_ATTACHMENT_LOAD_OP_CLEAR ||
multiview_load_op_should_be_clear) {
load_op->clears_loads_state.rt_clear_mask |= BITFIELD_BIT(i);
} else if (color_init->op == VK_ATTACHMENT_LOAD_OP_LOAD) {
load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i);
}
}
load_op->is_hw_object = true;
load_op->hw_render = hw_render;
load_op->clears_loads_state.mrt_setup = &hw_render->init_setup;
load_op->view_indices[0] = view_index;
load_op->view_count = 1;
result = pvr_load_op_shader_generate(device, allocator, load_op);
if (result != VK_SUCCESS) {
vk_free2(&device->vk.alloc, allocator, load_op);
return result;
}
return pvr_load_op_shader_generate(device, allocator, load_op);
}
hw_render->load_op = load_op;
return VK_SUCCESS;
static void pvr_load_op_fini(struct pvr_load_op *load_op)
{
pvr_bo_suballoc_free(load_op->pds_tex_state_prog.pvr_bo);
pvr_bo_suballoc_free(load_op->pds_frag_prog.pvr_bo);
pvr_bo_suballoc_free(load_op->usc_frag_prog_bo);
}
static void pvr_load_op_destroy(struct pvr_device *device,
const VkAllocationCallbacks *allocator,
struct pvr_load_op *load_op)
{
pvr_bo_suballoc_free(load_op->pds_tex_state_prog.pvr_bo);
pvr_bo_suballoc_free(load_op->pds_frag_prog.pvr_bo);
pvr_bo_suballoc_free(load_op->usc_frag_prog_bo);
pvr_load_op_fini(load_op);
vk_free2(&device->vk.alloc, allocator, load_op);
}
static void
pvr_render_load_op_state_destroy(struct pvr_device *device,
const VkAllocationCallbacks *pAllocator,
struct pvr_load_op_state *load_op_state)
{
if (!load_op_state)
return;
while (load_op_state->load_op_count--) {
const uint32_t load_op_idx = load_op_state->load_op_count;
struct pvr_load_op *load_op = &load_op_state->load_ops[load_op_idx];
pvr_load_op_fini(load_op);
}
vk_free2(&device->vk.alloc, pAllocator, load_op_state);
}
static VkResult pvr_render_load_op_state_create(
struct pvr_device *device,
const VkAllocationCallbacks *allocator,
const struct pvr_render_pass *pass,
const struct pvr_renderpass_hwsetup_render *hw_render,
uint32_t hw_render_index,
const struct pvr_per_view_attachment_first_use_info *first_use_info,
struct pvr_load_op_state **const load_op_state_out)
{
const uint32_t view_count = util_bitcount(hw_render->view_mask);
struct pvr_load_op_state *load_op_state;
struct pvr_load_op *load_ops;
VkResult result;
VK_MULTIALLOC(ma);
vk_multialloc_add(&ma, &load_op_state, __typeof__(*load_op_state), 1);
vk_multialloc_add(&ma, &load_ops, __typeof__(*load_ops), view_count);
if (!vk_multialloc_zalloc(&ma, allocator, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
load_op_state->load_ops = load_ops;
u_foreach_bit (view_idx, hw_render->view_mask) {
struct pvr_load_op *const load_op =
&load_op_state->load_ops[load_op_state->load_op_count];
result = pvr_render_load_op_init(device,
allocator,
load_op,
pass,
hw_render,
hw_render_index,
view_idx,
first_use_info);
if (result != VK_SUCCESS)
goto err_load_op_state_destroy;
load_op_state->load_op_count++;
}
*load_op_state_out = load_op_state;
return VK_SUCCESS;
err_load_op_state_destroy:
pvr_render_load_op_state_destroy(device, allocator, load_op_state);
return result;
}
#define PVR_SPM_LOAD_IN_BUFFERS_COUNT(dev_info) \
({ \
int __ret = PVR_MAX_TILE_BUFFER_COUNT; \
@ -486,29 +622,94 @@ pvr_is_load_op_needed(const struct pvr_render_pass *pass,
return false;
}
static void
pvr_render_pass_load_ops_cleanup(struct pvr_device *device,
const VkAllocationCallbacks *pAllocator,
struct pvr_render_pass *pass)
static VkResult pvr_per_view_attachment_first_use_info_init(
struct pvr_device *device,
const VkAllocationCallbacks *allocator,
struct pvr_render_pass *pass,
struct pvr_per_view_attachment_first_use_info *first_use_info)
{
if (!pass)
return;
size_t alloc_size;
for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
struct pvr_renderpass_hwsetup_render *hw_render =
&pass->hw_setup->renders[i];
if (!pass->attachment_count) {
memset(first_use_info, 0, sizeof(*first_use_info));
for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
if (hw_render->subpasses[j].load_op) {
pvr_load_op_destroy(device,
pAllocator,
hw_render->subpasses[j].load_op);
return VK_SUCCESS;
}
STATIC_ASSERT(ARRAY_SIZE(first_use_info->first_subpass) ==
PVR_MAX_MULTIVIEW);
alloc_size =
sizeof(first_use_info->first_subpass_memory[0]) * pass->attachment_count;
alloc_size *= ARRAY_SIZE(first_use_info->first_subpass);
first_use_info->first_subpass_memory =
vk_zalloc2(&device->vk.alloc,
allocator,
alloc_size,
4,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!first_use_info->first_subpass_memory)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
#define PVR_SUBPASS_INVALID (~0U)
for (uint32_t i = 0; i < ARRAY_SIZE(first_use_info->first_subpass); i++) {
first_use_info->first_subpass[i] =
&first_use_info->first_subpass_memory[i * pass->attachment_count];
for (uint32_t j = 0; j < pass->attachment_count; j++)
first_use_info->first_subpass[i][j] = PVR_SUBPASS_INVALID;
}
for (uint32_t subpass_idx = 0; subpass_idx < pass->subpass_count;
subpass_idx++) {
struct pvr_render_subpass *const subpass = &pass->subpasses[subpass_idx];
u_foreach_bit (view_idx, subpass->view_mask) {
for (uint32_t i = 0; i < subpass->color_count; i++) {
const uint32_t attach_idx = subpass->color_attachments[i];
uint32_t *first_use =
&first_use_info->first_subpass[view_idx][attach_idx];
if (attach_idx < pass->attachment_count &&
*first_use == PVR_SUBPASS_INVALID) {
*first_use = subpass_idx;
}
}
for (uint32_t i = 0; i < subpass->input_count; i++) {
const uint32_t input_attach_idx =
subpass->input_attachments[i].attachment_idx;
uint32_t *first_use =
&first_use_info->first_subpass[view_idx][input_attach_idx];
if (input_attach_idx < pass->attachment_count &&
*first_use == PVR_SUBPASS_INVALID) {
*first_use = subpass_idx;
}
}
if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) {
const uint32_t ds_attach_idx = subpass->depth_stencil_attachment;
uint32_t *first_use =
&first_use_info->first_subpass[view_idx][ds_attach_idx];
if (*first_use == PVR_SUBPASS_INVALID)
*first_use = subpass_idx;
}
}
if (hw_render->load_op)
pvr_load_op_destroy(device, pAllocator, hw_render->load_op);
}
#undef PVR_SUBPASS_INVALID
return VK_SUCCESS;
}
static inline void pvr_per_view_attachment_first_use_info_fini(
struct pvr_device *device,
const VkAllocationCallbacks *allocator,
struct pvr_per_view_attachment_first_use_info *first_use_info)
{
vk_free2(&device->vk.alloc, allocator, first_use_info->first_subpass_memory);
}
static inline VkResult pvr_render_add_missing_output_register_write(
@ -551,55 +752,173 @@ static inline VkResult pvr_render_add_missing_output_register_write(
return VK_SUCCESS;
}
static inline void
pvr_subpass_load_op_cleanup(struct pvr_device *device,
const VkAllocationCallbacks *allocator,
struct pvr_renderpass_hwsetup_render *hw_render,
uint32_t subpass_count)
{
while (subpass_count--) {
const uint32_t subpass_idx = subpass_count;
if (hw_render->subpasses[subpass_idx].load_op) {
pvr_load_op_destroy(device,
allocator,
hw_render->subpasses[subpass_idx].load_op);
}
}
}
static inline VkResult
pvr_subpass_load_op_setup(struct pvr_device *device,
const VkAllocationCallbacks *allocator,
struct pvr_render_pass *pass,
struct pvr_renderpass_hwsetup_render *hw_render)
{
for (uint32_t i = 0; i < hw_render->subpass_count; i++) {
VkResult result;
if (!pvr_is_load_op_needed(pass, hw_render, i))
continue;
result = pvr_subpass_load_op_init(device, allocator, pass, hw_render, i);
if (result != VK_SUCCESS) {
/* pvr_subpass_load_op_setup() is responsible for cleaning
* up all load_ops created in this loop for this hw_render.
*/
pvr_subpass_load_op_cleanup(device, allocator, hw_render, i);
return result;
}
}
return VK_SUCCESS;
}
static inline VkResult pvr_hw_render_load_ops_setup(
struct pvr_device *device,
const VkAllocationCallbacks *allocator,
struct pvr_render_pass *pass,
struct pvr_renderpass_hwsetup_render *hw_render,
uint32_t hw_render_idx,
struct pvr_per_view_attachment_first_use_info *first_use_info)
{
VkResult result;
if (hw_render->tile_buffers_count) {
result = pvr_device_tile_buffer_ensure_cap(
device,
hw_render->tile_buffers_count,
hw_render->eot_setup.tile_buffer_size);
if (result != VK_SUCCESS)
return result;
}
assert(!hw_render->load_op_state);
if (hw_render->color_init_count != 0U) {
struct pvr_load_op_state *load_op_state = NULL;
result =
pvr_render_add_missing_output_register_write(hw_render, allocator);
if (result != VK_SUCCESS)
return result;
result = pvr_render_load_op_state_create(device,
allocator,
pass,
hw_render,
hw_render_idx,
first_use_info,
&load_op_state);
if (result != VK_SUCCESS)
return result;
hw_render->load_op_state = load_op_state;
}
result = pvr_subpass_load_op_setup(device, allocator, pass, hw_render);
if (result != VK_SUCCESS) {
/* pvr_hw_render_load_ops_setup() is responsible for cleaning up only
* one load_op_state for this hw_render.
*/
pvr_render_load_op_state_destroy(device,
allocator,
hw_render->load_op_state);
return result;
}
return VK_SUCCESS;
}
static void
pvr_render_pass_load_ops_cleanup(struct pvr_device *device,
const VkAllocationCallbacks *allocator,
struct pvr_render_pass *pass,
uint32_t hw_render_count)
{
while (hw_render_count--) {
const uint32_t hw_render_idx = hw_render_count;
struct pvr_renderpass_hwsetup_render *hw_render =
&pass->hw_setup->renders[hw_render_idx];
pvr_subpass_load_op_cleanup(device,
allocator,
hw_render,
hw_render->subpass_count);
pvr_render_load_op_state_destroy(device,
allocator,
hw_render->load_op_state);
}
}
static VkResult
pvr_render_pass_load_ops_setup(struct pvr_device *device,
const VkAllocationCallbacks *allocator,
struct pvr_render_pass *pass)
{
struct pvr_per_view_attachment_first_use_info first_use_info;
uint32_t hw_render_idx;
VkResult result;
for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
result = pvr_per_view_attachment_first_use_info_init(device,
allocator,
pass,
&first_use_info);
if (result != VK_SUCCESS)
goto err_return;
for (hw_render_idx = 0; hw_render_idx < pass->hw_setup->render_count;
hw_render_idx++) {
struct pvr_renderpass_hwsetup_render *hw_render =
&pass->hw_setup->renders[i];
&pass->hw_setup->renders[hw_render_idx];
if (hw_render->tile_buffers_count) {
result = pvr_device_tile_buffer_ensure_cap(
device,
hw_render->tile_buffers_count,
hw_render->eot_setup.tile_buffer_size);
if (result != VK_SUCCESS)
goto err_load_op_cleanup;
}
assert(!hw_render->load_op);
if (hw_render->color_init_count != 0U) {
result =
pvr_render_add_missing_output_register_write(hw_render, allocator);
if (result != VK_SUCCESS)
goto err_load_op_cleanup;
result = pvr_render_load_op_init(device, allocator, pass, hw_render);
if (result != VK_SUCCESS)
goto err_load_op_cleanup;
}
for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
if (!pvr_is_load_op_needed(pass, hw_render, j))
continue;
result =
pvr_subpass_load_op_init(device, allocator, pass, hw_render, j);
if (result != VK_SUCCESS)
goto err_load_op_cleanup;
}
result = pvr_hw_render_load_ops_setup(device,
allocator,
pass,
hw_render,
hw_render_idx,
&first_use_info);
if (result != VK_SUCCESS)
goto err_pvr_render_pass_load_ops_cleanup;
}
pvr_per_view_attachment_first_use_info_fini(device,
allocator,
&first_use_info);
return VK_SUCCESS;
err_load_op_cleanup:
pvr_render_pass_load_ops_cleanup(device, allocator, pass);
err_pvr_render_pass_load_ops_cleanup:
/* pvr_render_pass_load_ops_setup() is responsible for cleaning
* up all load_ops created in this loop for each hw_render.
*/
pvr_render_pass_load_ops_cleanup(device, allocator, pass, hw_render_idx);
pvr_per_view_attachment_first_use_info_fini(device,
allocator,
&first_use_info);
err_return:
return result;
}
@ -718,6 +1037,13 @@ VkResult pvr_CreateRenderPass2(VkDevice _device,
}
}
/* Multiview is considered enabled for all subpasses when the viewMask
* of them all isn't 0. Assume this now and assert later that it holds
* for each subpass viewMask.
*/
pass->multiview_enabled = pass->subpass_count &&
pCreateInfo->pSubpasses[0].viewMask;
/* Assign reference pointers to lists, and fill in the attachments list, we
* need to re-walk the dependencies array later to fill the per-subpass
* dependencies lists in.
@ -727,6 +1053,12 @@ VkResult pvr_CreateRenderPass2(VkDevice _device,
struct pvr_render_subpass *subpass = &pass->subpasses[i];
subpass->pipeline_bind_point = desc->pipelineBindPoint;
subpass->view_mask = desc->viewMask;
assert(!pass->multiview_enabled || subpass->view_mask);
if (!pass->multiview_enabled)
subpass->view_mask = 1;
/* From the Vulkan spec. 1.3.265
* VUID-VkSubpassDescription2-multisampledRenderToSingleSampled-06872:
@ -873,7 +1205,10 @@ void pvr_DestroyRenderPass(VkDevice _device,
if (!pass)
return;
pvr_render_pass_load_ops_cleanup(device, allocator, pass);
pvr_render_pass_load_ops_cleanup(device,
allocator,
pass,
pass->hw_setup->render_count);
pvr_destroy_renderpass_hwsetup(allocator, pass->hw_setup);
vk_object_base_finish(&pass->base);
vk_free2(&device->vk.alloc, pAllocator, pass);

View file

@ -2834,11 +2834,7 @@ pvr_create_renderpass_state(const VkGraphicsPipelineCreateInfo *const info)
return (struct vk_render_pass_state){
.attachments = attachments,
/* TODO: This is only needed for VK_KHR_create_renderpass2 (or core 1.2),
* which is not currently supported.
*/
.view_mask = 0,
.view_mask = subpass->view_mask,
};
}

View file

@ -518,6 +518,8 @@ struct pvr_sub_cmd_gfx {
bool wait_on_previous_transfer;
bool has_depth_feedback;
uint32_t view_mask;
};
struct pvr_sub_cmd_compute {
@ -1009,11 +1011,11 @@ struct pvr_query_info {
};
struct pvr_render_target {
struct pvr_rt_dataset *rt_dataset;
struct pvr_rt_dataset *rt_dataset[PVR_MAX_MULTIVIEW];
pthread_mutex_t mutex;
bool valid;
uint32_t valid_mask;
};
struct pvr_framebuffer {
@ -1101,6 +1103,9 @@ struct pvr_render_subpass {
uint32_t isp_userpass;
VkPipelineBindPoint pipeline_bind_point;
/* View mask for multiview. */
uint32_t view_mask;
};
struct pvr_render_pass {
@ -1123,6 +1128,13 @@ struct pvr_render_pass {
/* The maximum number of tile buffers to use in any subpass. */
uint32_t max_tilebuffer_count;
/* VkSubpassDescription2::viewMask or 1 when non-multiview
*
* To determine whether multiview is enabled, check
* pvr_render_pass::multiview_enabled.
*/
bool multiview_enabled;
};
/* Max render targets for the clears loads state in load op.
@ -1166,6 +1178,10 @@ struct pvr_load_op {
const struct usc_mrt_setup *mrt_setup;
} clears_loads_state;
uint32_t view_indices[PVR_MAX_MULTIVIEW];
uint32_t view_count;
};
#define CHECK_MASK_SIZE(_struct_type, _field_name, _nr_bits) \
@ -1185,6 +1201,15 @@ CHECK_MASK_SIZE(pvr_load_op,
#undef CHECK_MASK_SIZE
struct pvr_load_op_state {
uint32_t load_op_count;
/* Load op array indexed by HW render view (not by the index in the view
* mask).
*/
struct pvr_load_op *load_ops;
};
uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
const struct pvr_device_info *dev_info,
const struct pvr_device_runtime_info *dev_runtime_info,
@ -1428,6 +1453,20 @@ void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer *const cmd_buffer,
const struct pvr_renderpass_hwsetup_subpass *
pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass);
static inline void
pvr_render_targets_datasets_destroy(struct pvr_render_target *render_target)
{
u_foreach_bit (valid_idx, render_target->valid_mask) {
struct pvr_rt_dataset *rt_dataset = render_target->rt_dataset[valid_idx];
if (rt_dataset && render_target->valid_mask & BITFIELD_BIT(valid_idx))
pvr_render_target_dataset_destroy(rt_dataset);
render_target->rt_dataset[valid_idx] = NULL;
render_target->valid_mask &= ~BITFIELD_BIT(valid_idx);
}
}
VK_DEFINE_HANDLE_CASTS(pvr_cmd_buffer,
vk.base,
VkCommandBuffer,

View file

@ -394,6 +394,24 @@ void pvr_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
};
}
static inline const uint32_t
pvr_cmd_buffer_state_get_view_count(const struct pvr_cmd_buffer_state *state)
{
const struct pvr_render_pass_info *render_pass_info =
&state->render_pass_info;
const struct pvr_sub_cmd_gfx *gfx_sub_cmd = &state->current_sub_cmd->gfx;
const uint32_t hw_render_idx = gfx_sub_cmd->hw_render_idx;
const struct pvr_renderpass_hwsetup_render *hw_render =
&render_pass_info->pass->hw_setup->renders[hw_render_idx];
const uint32_t view_count = util_bitcount(hw_render->view_mask);
assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
/* hw_render view masks have 1 bit set at least. */
assert(view_count);
return view_count;
}
void pvr_CmdBeginQuery(VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
uint32_t query,
@ -401,6 +419,7 @@ void pvr_CmdBeginQuery(VkCommandBuffer commandBuffer,
{
VK_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
uint32_t view_count = 1;
VK_FROM_HANDLE(pvr_query_pool, pool, queryPool);
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
@ -435,6 +454,8 @@ void pvr_CmdBeginQuery(VkCommandBuffer commandBuffer,
state->current_sub_cmd->gfx.barrier_store = false;
state->current_sub_cmd->gfx.query_pool = pool;
}
view_count = pvr_cmd_buffer_state_get_view_count(state);
}
state->query_pool = pool;
@ -443,7 +464,8 @@ void pvr_CmdBeginQuery(VkCommandBuffer commandBuffer,
state->dirty.vis_test = true;
/* Add the index to the list for this render. */
util_dynarray_append(&state->query_indices, __typeof__(query), query);
for (uint32_t i = 0; i < view_count; i++)
util_dynarray_append(&state->query_indices, __typeof__(query), query);
}
void pvr_CmdEndQuery(VkCommandBuffer commandBuffer,

View file

@ -213,16 +213,23 @@ static void pvr_update_job_syncs(struct pvr_device *device,
queue->last_job_signal_sync[submitted_job_type] = new_signal_sync;
}
static VkResult pvr_process_graphics_cmd(struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_cmd_buffer *cmd_buffer,
struct pvr_sub_cmd_gfx *sub_cmd)
static VkResult
pvr_process_graphics_cmd_for_view(struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_cmd_buffer *cmd_buffer,
struct pvr_sub_cmd_gfx *sub_cmd,
uint32_t view_index)
{
pvr_dev_addr_t original_ctrl_stream_addr = { 0 };
struct pvr_render_job *job = &sub_cmd->job;
struct vk_sync *geom_signal_sync;
struct vk_sync *frag_signal_sync = NULL;
VkResult result;
job->ds.addr =
PVR_DEV_ADDR_OFFSET(job->ds.addr, job->ds.stride * view_index);
job->view_state.view_index = view_index;
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
@ -231,7 +238,7 @@ static VkResult pvr_process_graphics_cmd(struct pvr_device *device,
if (result != VK_SUCCESS)
return result;
if (sub_cmd->job.run_frag) {
if (job->run_frag) {
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
@ -254,11 +261,11 @@ static VkResult pvr_process_graphics_cmd(struct pvr_device *device,
* and if geometry_terminate is false this kick can't have a fragment
* stage without another terminating geometry kick.
*/
assert(sub_cmd->job.geometry_terminate && sub_cmd->job.run_frag);
assert(job->geometry_terminate && job->run_frag);
/* First submit must not touch fragment work. */
sub_cmd->job.geometry_terminate = false;
sub_cmd->job.run_frag = false;
job->geometry_terminate = false;
job->run_frag = false;
result =
pvr_render_job_submit(queue->gfx_ctx,
@ -268,20 +275,19 @@ static VkResult pvr_process_graphics_cmd(struct pvr_device *device,
NULL,
NULL);
sub_cmd->job.geometry_terminate = true;
sub_cmd->job.run_frag = true;
job->geometry_terminate = true;
job->run_frag = true;
if (result != VK_SUCCESS)
goto err_destroy_frag_sync;
original_ctrl_stream_addr = sub_cmd->job.ctrl_stream_addr;
original_ctrl_stream_addr = job->ctrl_stream_addr;
/* Second submit contains only a trivial control stream to terminate the
* geometry work.
*/
assert(sub_cmd->terminate_ctrl_stream);
sub_cmd->job.ctrl_stream_addr =
sub_cmd->terminate_ctrl_stream->vma->dev_addr;
job->ctrl_stream_addr = sub_cmd->terminate_ctrl_stream->vma->dev_addr;
}
result = pvr_render_job_submit(queue->gfx_ctx,
@ -292,14 +298,14 @@ static VkResult pvr_process_graphics_cmd(struct pvr_device *device,
frag_signal_sync);
if (original_ctrl_stream_addr.addr > 0)
sub_cmd->job.ctrl_stream_addr = original_ctrl_stream_addr;
job->ctrl_stream_addr = original_ctrl_stream_addr;
if (result != VK_SUCCESS)
goto err_destroy_frag_sync;
pvr_update_job_syncs(device, queue, geom_signal_sync, PVR_JOB_TYPE_GEOM);
if (sub_cmd->job.run_frag)
if (job->run_frag)
pvr_update_job_syncs(device, queue, frag_signal_sync, PVR_JOB_TYPE_FRAG);
/* FIXME: DoShadowLoadOrStore() */
@ -315,6 +321,30 @@ err_destroy_geom_sync:
return result;
}
static VkResult pvr_process_graphics_cmd(struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_cmd_buffer *cmd_buffer,
struct pvr_sub_cmd_gfx *sub_cmd)
{
const pvr_dev_addr_t ds_addr = sub_cmd->job.ds.addr;
u_foreach_bit (view_idx, sub_cmd->view_mask) {
VkResult result;
result = pvr_process_graphics_cmd_for_view(device,
queue,
cmd_buffer,
sub_cmd,
view_idx);
if (result != VK_SUCCESS)
return result;
}
sub_cmd->job.ds.addr = ds_addr;
return VK_SUCCESS;
}
static VkResult pvr_process_compute_cmd(struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_sub_cmd_compute *sub_cmd)