pvr: Implement new firmware stream interface

Signed-off-by: Matt Coster <matt.coster@imgtec.com>
Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Reviewed-by: Frank Binns <frank.binns@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19530>
This commit is contained in:
Matt Coster 2022-08-03 09:52:17 +01:00 committed by Marge Bot
parent a824b18fdf
commit 947e183ff5
15 changed files with 784 additions and 350 deletions

View file

@ -112,7 +112,6 @@ const struct pvr_device_quirks pvr_device_quirks_4_40_2_51 = {
.has_brn51764 = true,
.has_brn52354 = true,
.has_brn52942 = true,
.has_brn56279 = true,
.has_brn58839 = true,
.has_brn62269 = true,
.has_brn66011 = true,

View file

@ -344,7 +344,6 @@ struct pvr_device_quirks {
bool has_brn51764 : 1;
bool has_brn52354 : 1;
bool has_brn52942 : 1;
bool has_brn56279 : 1;
bool has_brn58839 : 1;
bool has_brn62269 : 1;
bool has_brn66011 : 1;

View file

@ -22,6 +22,7 @@
pvr_xml_files = [
'rogue_cdm.xml',
'rogue_cr.xml',
'rogue_fw.xml',
'rogue_ipf.xml',
'rogue_lls.xml',
'rogue_pbestate.xml',

View file

@ -264,8 +264,11 @@ SOFTWARE.
<field name="mask" start="0" end="31" type="uint"/>
</struct>
<struct name="PDS_CTRL" length="2">
<field name="sm_overlap_enable" start="55" end="55" type="bool"/>
<struct name="CDM_ITEM" length="1">
<field name="mode" start="0" end="1" type="uint"/>
</struct>
<struct name="PDS_CTRL0" length="1">
<condition type="if" check="ROGUEXE"/>
<condition type="if" check="COMPUTE"/>
<field name="roguexe_max_num_cdm_tasks" start="24" end="31" type="uint"/>
@ -285,6 +288,10 @@ SOFTWARE.
<condition type="endif" check="ROGUEXE"/>
</struct>
<struct name="PDS_CTRL1" length="1">
<field name="sm_overlap_enable" start="23" end="23" type="bool"/>
</struct>
<struct name="EVENT_PIXEL_PDS_CODE" length="1">
<field name="addr" start="4" end="31" shift="4" type="address"/>
</struct>

View file

@ -0,0 +1,69 @@
<?xml version="1.0" ?>
<!--
Copyright © 2022 Imagination Technologies Ltd.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice (including the next
paragraph) shall be included in all copies or substantial portions of the
Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
-->
<!--
These definitions are based on the ones for the firmware streams found
in the fwif header (pvr_rogue_fwif_stream.h).
TODO: Once the kernel driver is merged upstream, check to see if this comment
needs updating.
-->
<csbgen name="ROGUE" prefix="FW">
<define name="STREAM_EXTHDR_DATA_MASK" value="0xFFFFFFF"/>
<enum name="STREAM_EXTHDR_TYPE_COMPUTE">
<value name="0" value="0"/>
</enum>
<enum name="STREAM_EXTHDR_TYPE_GEOM">
<value name="0" value="0"/>
</enum>
<enum name="STREAM_EXTHDR_TYPE_FRAG">
<value name="0" value="0"/>
</enum>
<struct name="STREAM_EXTHDR_COMPUTE0" length="1">
<field name="type" start="29" end="31" type="STREAM_EXTHDR_TYPE_COMPUTE" default="0"/>
<field name="continuation" start="28" end="28" type="bool"/>
<field name="has_brn49927" start="0" end="0" type="bool"/>
</struct>
<struct name="STREAM_EXTHDR_FRAG0" length="1">
<field name="type" start="29" end="31" type="STREAM_EXTHDR_TYPE_FRAG" default="0"/>
<field name="continuation" start="28" end="28" type="bool"/>
<field name="has_brn49927" start="1" end="1" type="bool"/>
<field name="has_brn47217" start="0" end="0" type="bool"/>
</struct>
<struct name="STREAM_EXTHDR_GEOM0" length="1">
<field name="type" start="29" end="31" type="STREAM_EXTHDR_TYPE_GEOM" default="0"/>
<field name="continuation" start="28" end="28" type="bool"/>
<field name="has_brn49927" start="0" end="0" type="bool"/>
</struct>
</csbgen>

View file

@ -26,6 +26,7 @@
#include "rogue_cdm.h"
#include "rogue_cr.h"
#include "rogue_fw.h"
#include "rogue_ipf.h"
#include "rogue_lls.h"
#include "rogue_pbestate.h"

View file

@ -102,16 +102,6 @@ rogue_get_isp_samples_per_tile_xy(const struct pvr_device_info *dev_info,
}
}
static inline uint32_t
rogue_get_max_num_vdm_pds_tasks(const struct pvr_device_info *dev_info)
{
/* Default value based on the minimum value found in all existing cores. */
uint32_t max_usc_tasks = PVR_GET_FEATURE_VALUE(dev_info, max_usc_tasks, 24U);
/* FIXME: Where does the 9 come from? */
return max_usc_tasks - 9;
}
static inline uint32_t
rogue_get_max_output_regs_per_pixel(const struct pvr_device_info *dev_info)
{

View file

@ -34,80 +34,40 @@
#include "pvr_winsys.h"
#include "util/macros.h"
static void pvr_compute_job_ws_submit_info_init(
struct pvr_compute_ctx *ctx,
static void
pvr_submit_info_stream_init(struct pvr_compute_ctx *ctx,
struct pvr_sub_cmd_compute *sub_cmd,
struct vk_sync *barrier,
struct vk_sync **waits,
uint32_t wait_count,
uint32_t *stage_flags,
struct pvr_winsys_compute_submit_info *submit_info)
{
const struct pvr_device *const device = ctx->device;
const struct pvr_physical_device *const pdevice = device->pdevice;
const struct pvr_physical_device *const pdevice = ctx->device->pdevice;
const struct pvr_device_runtime_info *const dev_runtime_info =
&pdevice->dev_runtime_info;
const struct pvr_device_info *const dev_info = &pdevice->dev_info;
const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch;
uint32_t shared_regs = sub_cmd->num_shared_regs;
submit_info->frame_num = device->global_queue_present_count;
submit_info->job_num = device->global_queue_job_count;
uint32_t *stream_ptr = (uint32_t *)submit_info->fw_stream;
submit_info->barrier = barrier;
submit_info->waits = waits;
submit_info->wait_count = wait_count;
submit_info->stage_flags = stage_flags;
pvr_csb_pack (&submit_info->regs.cdm_ctrl_stream_base,
CR_CDM_CTRL_STREAM_BASE,
value) {
value.addr = pvr_csb_get_start_address(&sub_cmd->control_stream);
}
/* FIXME: Need to set up the border color table at device creation
* time. Set to invalid for the time being.
/* FIXME: Need to set up the border color table at device creation time. Set
* to invalid for the time being.
*/
pvr_csb_pack (&submit_info->regs.tpu_border_colour_table,
pvr_csb_pack ((uint64_t *)stream_ptr,
CR_TPU_BORDER_COLOUR_TABLE_CDM,
value) {
value.border_colour_table_address = PVR_DEV_ADDR_INVALID;
}
stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_CDM);
if (PVR_HAS_FEATURE(dev_info, compute_morton_capable))
submit_info->regs.cdm_item = 0;
pvr_csb_pack (&submit_info->regs.tpu, CR_TPU, value) {
value.tag_cem_4k_face_packing = true;
pvr_csb_pack ((uint64_t *)stream_ptr, CR_CDM_CTRL_STREAM_BASE, value) {
value.addr = pvr_csb_get_start_address(&sub_cmd->control_stream);
}
stream_ptr += pvr_cmd_length(CR_CDM_CTRL_STREAM_BASE);
pvr_csb_pack (&submit_info->regs.compute_cluster,
CR_COMPUTE_CLUSTER,
value) {
if (PVR_HAS_FEATURE(dev_info, cluster_grouping) &&
PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
dev_runtime_info->num_phantoms > 1 && sub_cmd->uses_atomic_ops) {
/* Each phantom has its own MCU, so atomicity can only be guaranteed
* when all work items are processed on the same phantom. This means we
* need to disable all USCs other than those of the first phantom,
* which has 4 clusters.
*/
value.mask = 0xFU;
} else {
value.mask = 0U;
}
}
pvr_csb_pack (&submit_info->regs.cdm_ctx_state_base_addr,
CR_CDM_CONTEXT_STATE_BASE,
state) {
pvr_csb_pack ((uint64_t *)stream_ptr, CR_CDM_CONTEXT_STATE_BASE, state) {
state.addr = ctx_switch->compute_state_bo->vma->dev_addr;
}
stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_STATE_BASE);
pvr_csb_pack (&submit_info->regs.cdm_resume_pds1,
CR_CDM_CONTEXT_PDS1,
state) {
pvr_csb_pack (stream_ptr, CR_CDM_CONTEXT_PDS1, state) {
/* Convert the data size from dwords to bytes. */
const uint32_t load_program_data_size =
ctx_switch->sr[0].pds.load_program.data_size * 4U;
@ -118,7 +78,7 @@ static void pvr_compute_job_ws_submit_info_init(
state.unified_size = ctx_switch->sr[0].usc.unified_size;
state.common_shared = true;
state.common_size =
DIV_ROUND_UP(shared_regs << 2,
DIV_ROUND_UP(sub_cmd->num_shared_regs << 2,
PVRX(CR_CDM_CONTEXT_PDS1_COMMON_SIZE_UNIT_SIZE));
state.temp_size = 0;
@ -129,6 +89,99 @@ static void pvr_compute_job_ws_submit_info_init(
load_program_data_size / PVRX(CR_CDM_CONTEXT_PDS1_DATA_SIZE_UNIT_SIZE);
state.fence = false;
}
stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_PDS1);
if (PVR_HAS_FEATURE(dev_info, compute_morton_capable)) {
pvr_csb_pack (stream_ptr, CR_CDM_ITEM, value) {
value.mode = 0;
}
stream_ptr += pvr_cmd_length(CR_CDM_ITEM);
}
if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) {
pvr_csb_pack (stream_ptr, CR_COMPUTE_CLUSTER, value) {
if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
dev_runtime_info->num_phantoms > 1 && sub_cmd->uses_atomic_ops) {
/* Each phantom has its own MCU, so atomicity can only be
* guaranteed when all work items are processed on the same
* phantom. This means we need to disable all USCs other than
* those of the first phantom, which has 4 clusters.
*/
value.mask = 0xFU;
} else {
value.mask = 0U;
}
}
stream_ptr += pvr_cmd_length(CR_COMPUTE_CLUSTER);
}
if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
pvr_finishme(
"Emit execute_count when feature gpu_multicore_support is present");
*stream_ptr = 0;
stream_ptr++;
}
submit_info->fw_stream_len = (uint8_t *)stream_ptr - submit_info->fw_stream;
assert(submit_info->fw_stream_len <= ARRAY_SIZE(submit_info->fw_stream));
}
static void pvr_submit_info_ext_stream_init(
struct pvr_compute_ctx *ctx,
struct pvr_winsys_compute_submit_info *submit_info)
{
const struct pvr_device_info *const dev_info =
&ctx->device->pdevice->dev_info;
uint32_t *ext_stream_ptr = (uint32_t *)submit_info->fw_ext_stream;
uint32_t *header0_ptr;
header0_ptr = ext_stream_ptr;
ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_COMPUTE0);
pvr_csb_pack (header0_ptr, FW_STREAM_EXTHDR_COMPUTE0, header0) {
if (PVR_HAS_QUIRK(dev_info, 49927)) {
header0.has_brn49927 = true;
pvr_csb_pack (ext_stream_ptr, CR_TPU, value) {
value.tag_cem_4k_face_packing = true;
}
ext_stream_ptr += pvr_cmd_length(CR_TPU);
}
}
submit_info->fw_ext_stream_len =
(uint8_t *)ext_stream_ptr - submit_info->fw_ext_stream;
assert(submit_info->fw_ext_stream_len <=
ARRAY_SIZE(submit_info->fw_ext_stream));
if ((*header0_ptr & PVRX(FW_STREAM_EXTHDR_DATA_MASK)) == 0)
submit_info->fw_ext_stream_len = 0;
}
static void pvr_compute_job_ws_submit_info_init(
struct pvr_compute_ctx *ctx,
struct pvr_sub_cmd_compute *sub_cmd,
struct vk_sync *barrier,
struct vk_sync **waits,
uint32_t wait_count,
uint32_t *stage_flags,
struct pvr_winsys_compute_submit_info *submit_info)
{
const struct pvr_device *const device = ctx->device;
const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
submit_info->frame_num = device->global_queue_present_count;
submit_info->job_num = device->global_queue_job_count;
submit_info->barrier = barrier;
submit_info->waits = waits;
submit_info->wait_count = wait_count;
submit_info->stage_flags = stage_flags;
pvr_submit_info_stream_init(ctx, sub_cmd, submit_info);
pvr_submit_info_ext_stream_init(ctx, submit_info);
if (sub_cmd->uses_barrier)
submit_info->flags |= PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
@ -160,5 +213,6 @@ VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx,
return device->ws->ops->compute_submit(ctx->ws_ctx,
&submit_info,
&device->pdevice->dev_info,
signal_sync);
}

View file

@ -1165,30 +1165,33 @@ void pvr_render_target_dataset_destroy(struct pvr_rt_dataset *rt_dataset)
vk_free(&device->vk.alloc, rt_dataset);
}
static void
pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx,
static void pvr_geom_state_stream_init(struct pvr_render_ctx *ctx,
struct pvr_render_job *job,
struct pvr_winsys_geometry_state *state)
{
const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
/* FIXME: Should this just be done unconditionally? The firmware will just
* ignore the value anyway.
*/
if (PVR_HAS_QUIRK(dev_info, 56279)) {
pvr_csb_pack (&state->regs.pds_ctrl, CR_PDS_CTRL, value) {
value.max_num_vdm_tasks = rogue_get_max_num_vdm_pds_tasks(dev_info);
}
} else {
state->regs.pds_ctrl = 0;
}
uint32_t *stream_ptr = (uint32_t *)state->fw_stream;
pvr_csb_pack (&state->regs.ppp_ctrl, CR_PPP_CTRL, value) {
pvr_csb_pack ((uint64_t *)stream_ptr, CR_VDM_CTRL_STREAM_BASE, value) {
value.addr = job->ctrl_stream_addr;
}
stream_ptr += pvr_cmd_length(CR_VDM_CTRL_STREAM_BASE);
pvr_csb_pack ((uint64_t *)stream_ptr,
CR_TPU_BORDER_COLOUR_TABLE_VDM,
value) {
value.border_colour_table_address = job->border_colour_table_addr;
}
stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_VDM);
pvr_csb_pack (stream_ptr, CR_PPP_CTRL, value) {
value.wclampen = true;
value.fixed_point_format = 1;
}
stream_ptr += pvr_cmd_length(CR_PPP_CTRL);
pvr_csb_pack (&state->regs.te_psg, CR_TE_PSG, value) {
pvr_csb_pack (stream_ptr, CR_TE_PSG, value) {
value.completeonterminate = job->geometry_terminate;
value.region_stride = job->rt_dataset->rgn_headers_stride /
@ -1196,40 +1199,71 @@ pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx,
value.forcenewstate = PVR_HAS_QUIRK(dev_info, 52942);
}
/* The set up of CR_TPU must be identical to
* pvr_render_job_ws_fragment_state_init().
*/
pvr_csb_pack (&state->regs.tpu, CR_TPU, value) {
value.tag_cem_4k_face_packing = true;
}
pvr_csb_pack (&state->regs.tpu_border_colour_table,
CR_TPU_BORDER_COLOUR_TABLE_VDM,
value) {
value.border_colour_table_address = job->border_colour_table_addr;
}
pvr_csb_pack (&state->regs.vdm_ctrl_stream_base,
CR_VDM_CTRL_STREAM_BASE,
value) {
value.addr = job->ctrl_stream_addr;
}
stream_ptr += pvr_cmd_length(CR_TE_PSG);
/* Set up the USC common size for the context switch resume/load program
* (ctx->ctx_switch.programs[i].sr->pds_load_program), which was created
* as part of the render context.
*/
pvr_csb_pack (&state->regs.vdm_ctx_resume_task0_size,
VDMCTRL_PDS_STATE0,
value) {
pvr_csb_pack (stream_ptr, VDMCTRL_PDS_STATE0, value) {
/* Calculate the size in bytes. */
const uint16_t shared_registers_size = job->max_shared_registers * 4;
value.usc_common_size =
DIV_ROUND_UP(shared_registers_size,
PVRX(VDMCTRL_PDS_STATE0_USC_COMMON_SIZE_UNIT_SIZE));
};
}
stream_ptr += pvr_cmd_length(VDMCTRL_PDS_STATE0);
/* Set up view_idx to 0 */
*stream_ptr = 0;
stream_ptr++;
state->fw_stream_len = (uint8_t *)stream_ptr - state->fw_stream;
assert(state->fw_stream_len <= ARRAY_SIZE(state->fw_stream));
}
static void
pvr_geom_state_stream_ext_init(struct pvr_render_ctx *ctx,
struct pvr_render_job *job,
struct pvr_winsys_geometry_state *state)
{
const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
uint32_t *ext_stream_ptr = (uint32_t *)state->fw_ext_stream;
uint32_t *header0_ptr;
header0_ptr = ext_stream_ptr;
ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_GEOM0);
pvr_csb_pack (header0_ptr, FW_STREAM_EXTHDR_GEOM0, header0) {
if (PVR_HAS_QUIRK(dev_info, 49927)) {
header0.has_brn49927 = true;
/* The set up of CR_TPU must be identical to
* pvr_render_job_ws_fragment_state_stream_ext_init().
*/
pvr_csb_pack (ext_stream_ptr, CR_TPU, value) {
value.tag_cem_4k_face_packing = true;
}
ext_stream_ptr += pvr_cmd_length(CR_TPU);
}
}
state->fw_ext_stream_len = (uint8_t *)ext_stream_ptr - state->fw_ext_stream;
assert(state->fw_ext_stream_len <= ARRAY_SIZE(state->fw_ext_stream));
if ((*header0_ptr & PVRX(FW_STREAM_EXTHDR_DATA_MASK)) == 0)
state->fw_ext_stream_len = 0;
}
static void
pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx,
struct pvr_render_job *job,
struct pvr_winsys_geometry_state *state)
{
pvr_geom_state_stream_init(ctx, job, state);
pvr_geom_state_stream_ext_init(ctx, job, state);
state->flags = 0;
@ -1295,19 +1329,20 @@ pvr_get_isp_num_tiles_xy(const struct pvr_device_info *dev_info,
}
}
static void
pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
struct pvr_render_job *job,
struct pvr_winsys_fragment_state *state)
{
const struct pvr_physical_device *const pdevice = ctx->device->pdevice;
const struct pvr_device_runtime_info *dev_runtime_info =
&pdevice->dev_runtime_info;
const struct pvr_device_info *dev_info = &pdevice->dev_info;
const enum PVRX(CR_ISP_AA_MODE_TYPE)
isp_aa_mode = pvr_cr_isp_aa_mode_type(job->samples);
const struct pvr_device_runtime_info *dev_runtime_info =
&ctx->device->pdevice->dev_runtime_info;
const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
uint32_t isp_ctl;
/* FIXME: what to do when job->run_frag is false? */
uint32_t *stream_ptr = (uint32_t *)state->fw_stream;
uint32_t pixel_ctl;
uint32_t isp_ctl;
/* FIXME: pass in the number of samples rather than isp_aa_mode? */
pvr_setup_tiles_in_flight(dev_info,
@ -1317,72 +1352,27 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
false,
job->max_tiles_in_flight,
&isp_ctl,
&state->regs.usc_pixel_output_ctrl);
&pixel_ctl);
pvr_csb_pack (&state->regs.isp_ctl, CR_ISP_CTL, value) {
value.sample_pos = true;
/* FIXME: There are a number of things that cause this to be set, this
* is just one of them.
*/
value.process_empty_tiles = job->process_empty_tiles;
pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_SCISSOR_BASE, value) {
value.addr = job->scissor_table_addr;
}
stream_ptr += pvr_cmd_length(CR_ISP_SCISSOR_BASE);
/* FIXME: When pvr_setup_tiles_in_flight() is refactored it might be
* possible to fully pack CR_ISP_CTL above rather than having to OR in part
* of the value.
*/
state->regs.isp_ctl |= isp_ctl;
pvr_csb_pack (&state->regs.isp_aa, CR_ISP_AA, value) {
value.mode = isp_aa_mode;
pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_DBIAS_BASE, value) {
value.addr = job->depth_bias_table_addr;
}
stream_ptr += pvr_cmd_length(CR_ISP_DBIAS_BASE);
/* The set up of CR_TPU must be identical to
* pvr_render_job_ws_geometry_state_init().
*/
pvr_csb_pack (&state->regs.tpu, CR_TPU, value) {
value.tag_cem_4k_face_packing = true;
}
if (PVR_HAS_FEATURE(dev_info, cluster_grouping) &&
PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
dev_runtime_info->num_phantoms > 1 && job->frag_uses_atomic_ops) {
/* Each phantom has its own MCU, so atomicity can only be guaranteed
* when all work items are processed on the same phantom. This means we
* need to disable all USCs other than those of the first phantom, which
* has 4 clusters. Note that we only need to do this for atomic
* operations in fragment shaders, since hardware prevents the TA to run
* on more than one phantom anyway.
*/
state->regs.pixel_phantom = 0xF;
} else {
state->regs.pixel_phantom = 0;
}
pvr_csb_pack (&state->regs.isp_bgobjvals, CR_ISP_BGOBJVALS, value) {
value.enablebgtag = job->enable_bg_tag;
value.mask = true;
/* FIXME: Hard code this for now as we don't currently support any
* stencil image formats.
*/
value.stencil = 0xFF;
}
pvr_csb_pack (&state->regs.isp_bgobjdepth, CR_ISP_BGOBJDEPTH, value) {
/* FIXME: This is suitable for the single depth format the driver
* currently supports, but may need updating to handle other depth
* formats.
*/
value.value = fui(job->depth_clear_value);
pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_OCLQRY_BASE, value) {
value.addr = PVR_DEV_ADDR_INVALID;
}
stream_ptr += pvr_cmd_length(CR_ISP_OCLQRY_BASE);
/* FIXME: Some additional set up needed to support depth and stencil
* load/store operations.
*/
pvr_csb_pack (&state->regs.isp_zlsctl, CR_ISP_ZLSCTL, value) {
pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_ZLSCTL, value) {
uint32_t aligned_width =
ALIGN_POT(job->depth_physical_width, ROGUE_IPF_TILE_SIZE_PIXELS);
uint32_t aligned_height =
@ -1410,47 +1400,96 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
value.zloadformat = PVRX(CR_ZLOADFORMAT_TYPE_F32Z);
value.zstoreformat = PVRX(CR_ZSTOREFORMAT_TYPE_F32Z);
}
stream_ptr += pvr_cmd_length(CR_ISP_ZLSCTL);
if (PVR_HAS_FEATURE(dev_info, zls_subtile)) {
pvr_csb_pack (&state->regs.isp_zls_pixels, CR_ISP_ZLS_PIXELS, value) {
value.x = job->depth_stride - 1;
value.y = job->depth_height - 1;
}
} else {
state->regs.isp_zls_pixels = 0;
}
pvr_csb_pack (&state->regs.isp_zload_store_base, CR_ISP_ZLOAD_BASE, value) {
pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_ZLOAD_BASE, value) {
value.addr = job->depth_addr;
}
stream_ptr += pvr_cmd_length(CR_ISP_ZLOAD_BASE);
pvr_csb_pack (&state->regs.isp_stencil_load_store_base,
CR_ISP_STENCIL_LOAD_BASE,
value) {
pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_STENCIL_LOAD_BASE, value) {
value.addr = job->stencil_addr;
/* FIXME: May need to set value.enable to true. */
}
stream_ptr += pvr_cmd_length(CR_ISP_STENCIL_LOAD_BASE);
pvr_csb_pack (&state->regs.tpu_border_colour_table,
*(uint64_t *)stream_ptr = 0;
stream_ptr += 2U;
STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words) == 8U);
STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words[0]) == 3U);
STATIC_ASSERT(sizeof(job->pbe_reg_words[0][0]) == sizeof(uint64_t));
memcpy(stream_ptr, job->pbe_reg_words, sizeof(job->pbe_reg_words));
stream_ptr += 8U * 3U * 2U;
pvr_csb_pack ((uint64_t *)stream_ptr,
CR_TPU_BORDER_COLOUR_TABLE_PDM,
value) {
value.border_colour_table_address = job->border_colour_table_addr;
}
stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_PDM);
state->regs.isp_oclqry_base = 0;
STATIC_ASSERT(ARRAY_SIZE(job->pds_bgnd_reg_values) == 3U);
STATIC_ASSERT(sizeof(job->pds_bgnd_reg_values[0]) == sizeof(uint64_t));
memcpy(stream_ptr,
job->pds_bgnd_reg_values,
sizeof(job->pds_bgnd_reg_values));
stream_ptr += 3U * 2U;
pvr_csb_pack (&state->regs.isp_dbias_base, CR_ISP_DBIAS_BASE, value) {
value.addr = job->depth_bias_table_addr;
/* Set pds_pr_bgnd array to 0 */
memset(stream_ptr, 0, 3U * sizeof(uint64_t));
stream_ptr += 3U * 2U;
/* Set usc_clear_register array to 0 */
memset(stream_ptr, 0, 8U * sizeof(uint32_t));
stream_ptr += 8U;
*stream_ptr = pixel_ctl;
stream_ptr++;
pvr_csb_pack (stream_ptr, CR_ISP_BGOBJDEPTH, value) {
/* FIXME: This is suitable for the single depth format the driver
* currently supports, but may need updating to handle other depth
* formats.
*/
value.value = fui(job->depth_clear_value);
}
stream_ptr += pvr_cmd_length(CR_ISP_BGOBJDEPTH);
pvr_csb_pack (&state->regs.isp_scissor_base, CR_ISP_SCISSOR_BASE, value) {
value.addr = job->scissor_table_addr;
pvr_csb_pack (stream_ptr, CR_ISP_BGOBJVALS, value) {
value.enablebgtag = job->enable_bg_tag;
value.mask = true;
/* FIXME: Hard code this for now as we don't currently support any
* stencil image formats.
*/
value.stencil = 0xFF;
}
stream_ptr += pvr_cmd_length(CR_ISP_BGOBJVALS);
pvr_csb_pack (&state->regs.event_pixel_pds_info,
CR_EVENT_PIXEL_PDS_INFO,
value) {
pvr_csb_pack (stream_ptr, CR_ISP_AA, value) {
value.mode = isp_aa_mode;
}
stream_ptr += pvr_cmd_length(CR_ISP_AA);
pvr_csb_pack (stream_ptr, CR_ISP_CTL, value) {
value.sample_pos = true;
/* FIXME: There are a number of things that cause this to be set, this
* is just one of them.
*/
value.process_empty_tiles = job->process_empty_tiles;
}
/* FIXME: When pvr_setup_tiles_in_flight() is refactored it might be
* possible to fully pack CR_ISP_CTL above rather than having to OR in part
* of the value.
*/
*stream_ptr |= isp_ctl;
stream_ptr += pvr_cmd_length(CR_ISP_CTL);
pvr_csb_pack (stream_ptr, CR_EVENT_PIXEL_PDS_INFO, value) {
value.const_size =
DIV_ROUND_UP(ctx->device->pixel_event_data_size_in_dwords,
PVRX(CR_EVENT_PIXEL_PDS_INFO_CONST_SIZE_UNIT_SIZE));
@ -1459,32 +1498,114 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
DIV_ROUND_UP(PVR_STATE_PBE_DWORDS,
PVRX(CR_EVENT_PIXEL_PDS_INFO_USC_SR_SIZE_UNIT_SIZE));
}
stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO);
pvr_csb_pack (&state->regs.event_pixel_pds_data,
CR_EVENT_PIXEL_PDS_DATA,
value) {
if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) {
uint32_t pixel_phantom = 0;
if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
dev_runtime_info->num_phantoms > 1 && job->frag_uses_atomic_ops) {
/* Each phantom has its own MCU, so atomicity can only be guaranteed
* when all work items are processed on the same phantom. This means
* we need to disable all USCs other than those of the first
* phantom, which has 4 clusters. Note that we only need to do this
* for atomic operations in fragment shaders, since hardware
* prevents the TA to run on more than one phantom anyway.
*/
pixel_phantom = 0xF;
}
*stream_ptr = pixel_phantom;
stream_ptr++;
}
/* Set up view_idx to 0 */
*stream_ptr = 0;
stream_ptr++;
pvr_csb_pack (stream_ptr, CR_EVENT_PIXEL_PDS_DATA, value) {
value.addr = PVR_DEV_ADDR(job->pds_pixel_event_data_offset);
}
stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA);
STATIC_ASSERT(ARRAY_SIZE(state->regs.pbe_word) ==
ARRAY_SIZE(job->pbe_reg_words));
STATIC_ASSERT(ARRAY_SIZE(state->regs.pbe_word[0]) ==
ARRAY_SIZE(job->pbe_reg_words[0]));
for (uint32_t i = 0; i < ARRAY_SIZE(job->pbe_reg_words); i++) {
state->regs.pbe_word[i][0] = job->pbe_reg_words[i][0];
state->regs.pbe_word[i][1] = job->pbe_reg_words[i][1];
state->regs.pbe_word[i][2] = job->pbe_reg_words[i][2];
if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
pvr_finishme(
"Emit isp_oclqry_stride when feature gpu_multicore_support is present");
*stream_ptr = 0;
stream_ptr++;
}
STATIC_ASSERT(__same_type(state->regs.pds_bgnd, job->pds_bgnd_reg_values));
typed_memcpy(state->regs.pds_bgnd,
job->pds_bgnd_reg_values,
ARRAY_SIZE(state->regs.pds_bgnd));
if (PVR_HAS_FEATURE(dev_info, zls_subtile)) {
pvr_csb_pack (stream_ptr, CR_ISP_ZLS_PIXELS, value) {
value.x = job->depth_stride - 1;
value.y = job->depth_height - 1;
}
stream_ptr += pvr_cmd_length(CR_ISP_ZLS_PIXELS);
}
memset(state->regs.pds_pr_bgnd, 0, sizeof(state->regs.pds_pr_bgnd));
/* zls_stride */
*stream_ptr = job->depth_layer_size;
stream_ptr++;
/* sls_stride */
*stream_ptr = job->depth_layer_size;
stream_ptr++;
if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
pvr_finishme(
"Emit execute_count when feature gpu_multicore_support is present");
*stream_ptr = 0;
stream_ptr++;
}
state->fw_stream_len = (uint8_t *)stream_ptr - state->fw_stream;
assert(state->fw_stream_len <= ARRAY_SIZE(state->fw_stream));
}
static void
pvr_frag_state_stream_ext_init(struct pvr_render_ctx *ctx,
struct pvr_render_job *job,
struct pvr_winsys_fragment_state *state)
{
const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
uint32_t *ext_stream_ptr = (uint32_t *)state->fw_ext_stream;
uint32_t *header0_ptr;
header0_ptr = ext_stream_ptr;
ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_FRAG0);
pvr_csb_pack (header0_ptr, FW_STREAM_EXTHDR_FRAG0, header0) {
if (PVR_HAS_QUIRK(dev_info, 49927)) {
header0.has_brn49927 = true;
/* The set up of CR_TPU must be identical to
* pvr_render_job_ws_geometry_state_stream_ext_init().
*/
pvr_csb_pack (ext_stream_ptr, CR_TPU, value) {
value.tag_cem_4k_face_packing = true;
}
ext_stream_ptr += pvr_cmd_length(CR_TPU);
}
}
state->fw_ext_stream_len = (uint8_t *)ext_stream_ptr - state->fw_ext_stream;
assert(state->fw_ext_stream_len <= ARRAY_SIZE(state->fw_ext_stream));
if ((*header0_ptr & PVRX(FW_STREAM_EXTHDR_DATA_MASK)) == 0)
state->fw_ext_stream_len = 0;
}
static void
pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
struct pvr_render_job *job,
struct pvr_winsys_fragment_state *state)
{
/* FIXME: what to do when job->run_frag is false? */
pvr_frag_state_stream_init(ctx, job, state);
pvr_frag_state_stream_ext_init(ctx, job, state);
/* FIXME: Merge geometry and fragment flags into a single flags member? */
/* FIXME: move to its own function? */
state->flags = 0;
@ -1499,9 +1620,6 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
if (job->frag_uses_atomic_ops)
state->flags |= PVR_WINSYS_FRAG_FLAG_SINGLE_CORE;
state->zls_stride = job->depth_layer_size;
state->sls_stride = job->depth_layer_size;
}
static void pvr_render_job_ws_submit_info_init(
@ -1533,9 +1651,6 @@ static void pvr_render_job_ws_submit_info_init(
pvr_render_job_ws_geometry_state_init(ctx, job, &submit_info->geometry);
pvr_render_job_ws_fragment_state_init(ctx, job, &submit_info->fragment);
/* These values are expected to match. */
assert(submit_info->geometry.regs.tpu == submit_info->fragment.regs.tpu);
}
VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx,
@ -1564,6 +1679,7 @@ VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx,
result = device->ws->ops->render_submit(ctx->ws_ctx,
&submit_info,
&device->pdevice->dev_info,
signal_sync_geom,
signal_sync_frag);
if (result != VK_SUCCESS)

View file

@ -309,15 +309,17 @@ struct pvr_winsys_compute_submit_info {
uint32_t wait_count;
uint32_t *stage_flags;
struct {
uint64_t tpu_border_colour_table;
uint64_t cdm_ctrl_stream_base;
uint64_t cdm_ctx_state_base_addr;
uint32_t tpu;
uint32_t cdm_resume_pds1;
uint32_t cdm_item;
uint32_t compute_cluster;
} regs;
/* Firmware stream buffer. This is the maximum possible size taking into
* consideration all HW features.
*/
uint8_t fw_stream[92];
uint32_t fw_stream_len;
/* Firmware extension stream buffer. This is the maximum possible size taking
* into considation all quirks and enhancements.
*/
uint8_t fw_ext_stream[8];
uint32_t fw_ext_stream_len;
/* Must be 0 or a combination of PVR_WINSYS_COMPUTE_FLAG_* flags. */
uint32_t flags;
@ -351,49 +353,37 @@ struct pvr_winsys_render_submit_info {
uint32_t *stage_flags;
struct pvr_winsys_geometry_state {
struct {
uint64_t pds_ctrl;
uint32_t ppp_ctrl;
uint32_t te_psg;
uint32_t tpu;
uint64_t tpu_border_colour_table;
uint64_t vdm_ctrl_stream_base;
uint32_t vdm_ctx_resume_task0_size;
} regs;
/* Firmware stream buffer. This is the maximum possible size taking into
* consideration all HW features.
*/
uint8_t fw_stream[52];
uint32_t fw_stream_len;
/* Firmware extension stream buffer. This is the maximum possible size
* taking into considation all quirks and enhancements.
*/
uint8_t fw_ext_stream[12];
uint32_t fw_ext_stream_len;
/* Must be 0 or a combination of PVR_WINSYS_GEOM_FLAG_* flags. */
uint32_t flags;
} geometry;
struct pvr_winsys_fragment_state {
struct {
uint32_t event_pixel_pds_data;
uint32_t event_pixel_pds_info;
uint32_t isp_aa;
uint32_t isp_bgobjdepth;
uint32_t isp_bgobjvals;
uint32_t isp_ctl;
uint64_t isp_dbias_base;
uint64_t isp_oclqry_base;
uint64_t isp_scissor_base;
uint64_t isp_stencil_load_store_base;
uint64_t isp_zload_store_base;
uint64_t isp_zlsctl;
uint32_t isp_zls_pixels;
uint64_t pbe_word[PVR_MAX_COLOR_ATTACHMENTS]
[ROGUE_NUM_PBESTATE_REG_WORDS];
uint32_t pixel_phantom;
uint64_t pds_bgnd[ROGUE_NUM_CR_PDS_BGRND_WORDS];
uint64_t pds_pr_bgnd[ROGUE_NUM_CR_PDS_BGRND_WORDS];
uint32_t tpu;
uint64_t tpu_border_colour_table;
uint32_t usc_pixel_output_ctrl;
} regs;
/* Firmware stream buffer. This is the maximum possible size taking into
* consideration all HW features.
*/
uint8_t fw_stream[432];
uint32_t fw_stream_len;
/* Firmware extension stream buffer. This is the maximum possible size
* taking into considation all quirks and enhancements.
*/
uint8_t fw_ext_stream[8];
uint32_t fw_ext_stream_len;
/* Must be 0 or a combination of PVR_WINSYS_FRAG_FLAG_* flags. */
uint32_t flags;
uint32_t zls_stride;
uint32_t sls_stride;
} fragment;
};
@ -458,6 +448,7 @@ struct pvr_winsys_ops {
VkResult (*render_submit)(
const struct pvr_winsys_render_ctx *ctx,
const struct pvr_winsys_render_submit_info *submit_info,
const struct pvr_device_info *dev_info,
struct vk_sync *signal_sync_geom,
struct vk_sync *signal_sync_frag);
@ -469,6 +460,7 @@ struct pvr_winsys_ops {
VkResult (*compute_submit)(
const struct pvr_winsys_compute_ctx *ctx,
const struct pvr_winsys_compute_submit_info *submit_info,
const struct pvr_device_info *dev_info,
struct vk_sync *signal_sync);
VkResult (*transfer_ctx_create)(

View file

@ -168,7 +168,7 @@ struct rogue_fwif_ta_regs {
/* Only used when feature VDM_OBJECT_LEVEL_LLS present. */
uint32_t vdm_context_resume_task3_size;
/* Only used when BRN 56279 or BRN 67381 present. */
/* Only used when BRN 67381 present. */
uint32_t pds_ctrl;
uint32_t view_idx;
@ -208,7 +208,7 @@ struct rogue_fwif_cmd_ta {
*/
struct rogue_fwif_cmd_ta_3d_shared cmd_shared;
struct rogue_fwif_ta_regs ALIGN_ATTR(8) geom_regs;
struct rogue_fwif_ta_regs ALIGN_ATTR(8) regs;
uint32_t ALIGN_ATTR(8) flags;
/**
* Holds the TA/3D fence value to allow the 3D partial render command

View file

@ -30,6 +30,7 @@
#include "fw-api/pvr_rogue_fwif.h"
#include "fw-api/pvr_rogue_fwif_rf.h"
#include "pvr_device_info.h"
#include "pvr_private.h"
#include "pvr_srv.h"
#include "pvr_srv_bridge.h"
@ -136,24 +137,86 @@ void pvr_srv_winsys_compute_ctx_destroy(struct pvr_winsys_compute_ctx *ctx)
vk_free(srv_ws->alloc, srv_ctx);
}
static void
pvr_srv_compute_cmd_stream_load(struct rogue_fwif_cmd_compute *const cmd,
const uint8_t *const stream,
const uint32_t stream_len,
const struct pvr_device_info *const dev_info)
{
const uint32_t *stream_ptr = (const uint32_t *)stream;
struct rogue_fwif_cdm_regs *const regs = &cmd->regs;
regs->tpu_border_colour_table = *(const uint64_t *)stream_ptr;
stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_CDM);
regs->cdm_ctrl_stream_base = *(const uint64_t *)stream_ptr;
stream_ptr += pvr_cmd_length(CR_CDM_CTRL_STREAM_BASE);
regs->cdm_context_state_base_addr = *(const uint64_t *)stream_ptr;
stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_STATE_BASE);
regs->cdm_resume_pds1 = *stream_ptr;
stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_PDS1);
regs->cdm_item = *stream_ptr;
stream_ptr += pvr_cmd_length(CR_CDM_ITEM);
if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) {
regs->compute_cluster = *stream_ptr;
stream_ptr += pvr_cmd_length(CR_COMPUTE_CLUSTER);
}
if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
cmd->execute_count = *stream_ptr;
stream_ptr++;
}
assert((const uint8_t *)stream_ptr - stream == stream_len);
}
static void pvr_srv_compute_cmd_ext_stream_load(
struct rogue_fwif_cmd_compute *const cmd,
const uint8_t *const ext_stream,
const uint32_t ext_stream_len,
const struct pvr_device_info *const dev_info)
{
const uint32_t *ext_stream_ptr = (const uint32_t *)ext_stream;
struct rogue_fwif_cdm_regs *const regs = &cmd->regs;
struct PVRX(FW_STREAM_EXTHDR_COMPUTE0) header0;
header0 = pvr_csb_unpack(ext_stream_ptr, FW_STREAM_EXTHDR_COMPUTE0);
ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_COMPUTE0);
assert(PVR_HAS_QUIRK(dev_info, 49927) == header0.has_brn49927);
if (header0.has_brn49927) {
regs->tpu = *ext_stream_ptr;
ext_stream_ptr += pvr_cmd_length(CR_TPU);
}
assert((const uint8_t *)ext_stream_ptr - ext_stream == ext_stream_len);
}
static void pvr_srv_compute_cmd_init(
const struct pvr_winsys_compute_submit_info *submit_info,
struct rogue_fwif_cmd_compute *cmd)
struct rogue_fwif_cmd_compute *cmd,
const struct pvr_device_info *const dev_info)
{
struct rogue_fwif_cdm_regs *fw_regs = &cmd->regs;
memset(cmd, 0, sizeof(*cmd));
cmd->cmn.frame_num = submit_info->frame_num;
fw_regs->tpu_border_colour_table = submit_info->regs.tpu_border_colour_table;
fw_regs->cdm_item = submit_info->regs.cdm_item;
fw_regs->compute_cluster = submit_info->regs.compute_cluster;
fw_regs->cdm_ctrl_stream_base = submit_info->regs.cdm_ctrl_stream_base;
fw_regs->cdm_context_state_base_addr =
submit_info->regs.cdm_ctx_state_base_addr;
fw_regs->tpu = submit_info->regs.tpu;
fw_regs->cdm_resume_pds1 = submit_info->regs.cdm_resume_pds1;
pvr_srv_compute_cmd_stream_load(cmd,
submit_info->fw_stream,
submit_info->fw_stream_len,
dev_info);
if (submit_info->fw_ext_stream_len) {
pvr_srv_compute_cmd_ext_stream_load(cmd,
submit_info->fw_ext_stream,
submit_info->fw_ext_stream_len,
dev_info);
}
if (submit_info->flags & PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP)
cmd->flags |= ROGUE_FWIF_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
@ -165,6 +228,7 @@ static void pvr_srv_compute_cmd_init(
VkResult pvr_srv_winsys_compute_submit(
const struct pvr_winsys_compute_ctx *ctx,
const struct pvr_winsys_compute_submit_info *submit_info,
const struct pvr_device_info *const dev_info,
struct vk_sync *signal_sync)
{
const struct pvr_srv_winsys_compute_ctx *srv_ctx =
@ -176,7 +240,7 @@ VkResult pvr_srv_winsys_compute_submit(
int in_fd = -1;
int fence;
pvr_srv_compute_cmd_init(submit_info, &compute_cmd);
pvr_srv_compute_cmd_init(submit_info, &compute_cmd, dev_info);
for (uint32_t i = 0U; i < submit_info->wait_count; i++) {
struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->waits[i]);

View file

@ -26,6 +26,7 @@
#include <vulkan/vulkan.h>
struct pvr_device_info;
struct pvr_winsys;
struct pvr_winsys_compute_ctx;
struct pvr_winsys_compute_ctx_create_info;
@ -45,6 +46,7 @@ void pvr_srv_winsys_compute_ctx_destroy(struct pvr_winsys_compute_ctx *ctx);
VkResult pvr_srv_winsys_compute_submit(
const struct pvr_winsys_compute_ctx *ctx,
const struct pvr_winsys_compute_submit_info *submit_info,
const struct pvr_device_info *dev_info,
struct vk_sync *signal_sync);
#endif /* PVR_SRV_JOB_COMPUTE_H */

View file

@ -402,28 +402,82 @@ void pvr_srv_winsys_render_ctx_destroy(struct pvr_winsys_render_ctx *ctx)
vk_free(srv_ws->alloc, srv_ctx);
}
static void
pvr_srv_geometry_cmd_stream_load(struct rogue_fwif_cmd_ta *const cmd,
const uint8_t *const stream,
const uint32_t stream_len,
const struct pvr_device_info *const dev_info)
{
const uint32_t *stream_ptr = (const uint32_t *)stream;
struct rogue_fwif_ta_regs *const regs = &cmd->regs;
regs->vdm_ctrl_stream_base = *(const uint64_t *)stream_ptr;
stream_ptr += pvr_cmd_length(CR_VDM_CTRL_STREAM_BASE);
regs->tpu_border_colour_table = *(const uint64_t *)stream_ptr;
stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_VDM);
regs->ppp_ctrl = *stream_ptr;
stream_ptr += pvr_cmd_length(CR_PPP_CTRL);
regs->te_psg = *stream_ptr;
stream_ptr += pvr_cmd_length(CR_TE_PSG);
regs->vdm_context_resume_task0_size = *stream_ptr;
stream_ptr += pvr_cmd_length(VDMCTRL_PDS_STATE0);
regs->view_idx = *stream_ptr;
stream_ptr++;
assert((const uint8_t *)stream_ptr - stream == stream_len);
}
static void pvr_srv_geometry_cmd_ext_stream_load(
struct rogue_fwif_cmd_ta *const cmd,
const uint8_t *const ext_stream,
const uint32_t ext_stream_len,
const struct pvr_device_info *const dev_info)
{
const uint32_t *ext_stream_ptr = (const uint32_t *)ext_stream;
struct rogue_fwif_ta_regs *const regs = &cmd->regs;
struct PVRX(FW_STREAM_EXTHDR_GEOM0) header0;
header0 = pvr_csb_unpack(ext_stream_ptr, FW_STREAM_EXTHDR_GEOM0);
ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_GEOM0);
assert(PVR_HAS_QUIRK(dev_info, 49927) == header0.has_brn49927);
if (header0.has_brn49927) {
regs->tpu = *ext_stream_ptr;
ext_stream_ptr += pvr_cmd_length(CR_TPU);
}
assert((const uint8_t *)ext_stream_ptr - ext_stream == ext_stream_len);
}
static void pvr_srv_geometry_cmd_init(
const struct pvr_winsys_render_submit_info *submit_info,
const struct pvr_srv_sync_prim *sync_prim,
struct rogue_fwif_cmd_ta *cmd)
struct rogue_fwif_cmd_ta *cmd,
const struct pvr_device_info *const dev_info)
{
const struct pvr_winsys_geometry_state *state = &submit_info->geometry;
struct rogue_fwif_ta_regs *fw_regs = &cmd->geom_regs;
memset(cmd, 0, sizeof(*cmd));
cmd->cmd_shared.cmn.frame_num = submit_info->frame_num;
fw_regs->vdm_ctrl_stream_base = state->regs.vdm_ctrl_stream_base;
fw_regs->tpu_border_colour_table = state->regs.tpu_border_colour_table;
fw_regs->ppp_ctrl = state->regs.ppp_ctrl;
fw_regs->te_psg = state->regs.te_psg;
fw_regs->tpu = state->regs.tpu;
fw_regs->vdm_context_resume_task0_size =
state->regs.vdm_ctx_resume_task0_size;
pvr_srv_geometry_cmd_stream_load(cmd,
state->fw_stream,
state->fw_stream_len,
dev_info);
assert(state->regs.pds_ctrl >> 32U == 0U);
fw_regs->pds_ctrl = (uint32_t)state->regs.pds_ctrl;
if (state->fw_ext_stream_len) {
pvr_srv_geometry_cmd_ext_stream_load(cmd,
state->fw_ext_stream,
state->fw_ext_stream_len,
dev_info);
}
if (state->flags & PVR_WINSYS_GEOM_FLAG_FIRST_GEOMETRY)
cmd->flags |= ROGUE_FWIF_TAFLAGS_FIRSTKICK;
@ -439,73 +493,160 @@ static void pvr_srv_geometry_cmd_init(
cmd->partial_render_ta_3d_fence.value = sync_prim->value;
}
static void
pvr_srv_fragment_cmd_stream_load(struct rogue_fwif_cmd_3d *const cmd,
const uint8_t *const stream,
const uint32_t stream_len,
const struct pvr_device_info *const dev_info)
{
const uint32_t *stream_ptr = (const uint32_t *)stream;
struct rogue_fwif_3d_regs *const regs = &cmd->regs;
regs->isp_scissor_base = *(const uint64_t *)stream_ptr;
stream_ptr += pvr_cmd_length(CR_ISP_SCISSOR_BASE);
regs->isp_dbias_base = *(const uint64_t *)stream_ptr;
stream_ptr += pvr_cmd_length(CR_ISP_DBIAS_BASE);
regs->isp_oclqry_base = *(const uint64_t *)stream_ptr;
stream_ptr += pvr_cmd_length(CR_ISP_OCLQRY_BASE);
regs->isp_zlsctl = *(const uint64_t *)stream_ptr;
stream_ptr += pvr_cmd_length(CR_ISP_ZLSCTL);
regs->isp_zload_store_base = *(const uint64_t *)stream_ptr;
stream_ptr += pvr_cmd_length(CR_ISP_ZLOAD_BASE);
regs->isp_stencil_load_store_base = *(const uint64_t *)stream_ptr;
stream_ptr += pvr_cmd_length(CR_ISP_STENCIL_LOAD_BASE);
regs->fb_cdc_zls = *(const uint64_t *)stream_ptr;
stream_ptr += 2U;
STATIC_ASSERT(ARRAY_SIZE(regs->pbe_word) == 8U);
STATIC_ASSERT(ARRAY_SIZE(regs->pbe_word[0]) == 3U);
STATIC_ASSERT(sizeof(regs->pbe_word[0][0]) == sizeof(uint64_t));
memcpy(regs->pbe_word, stream_ptr, sizeof(regs->pbe_word));
stream_ptr += 8U * 3U * 2U;
regs->tpu_border_colour_table = *(const uint64_t *)stream_ptr;
stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_PDM);
STATIC_ASSERT(ARRAY_SIZE(regs->pds_bgnd) == 3U);
STATIC_ASSERT(sizeof(regs->pds_bgnd[0]) == sizeof(uint64_t));
memcpy(regs->pds_bgnd, stream_ptr, sizeof(regs->pds_bgnd));
stream_ptr += 3U * 2U;
STATIC_ASSERT(ARRAY_SIZE(regs->pds_pr_bgnd) == 3U);
STATIC_ASSERT(sizeof(regs->pds_pr_bgnd[0]) == sizeof(uint64_t));
memcpy(regs->pds_pr_bgnd, stream_ptr, sizeof(regs->pds_pr_bgnd));
stream_ptr += 3U * 2U;
STATIC_ASSERT(ARRAY_SIZE(regs->usc_clear_register) == 8U);
STATIC_ASSERT(sizeof(regs->usc_clear_register[0]) == sizeof(uint32_t));
memcpy(regs->usc_clear_register,
stream_ptr,
sizeof(regs->usc_clear_register));
stream_ptr += 8U;
regs->usc_pixel_output_ctrl = *stream_ptr;
stream_ptr += pvr_cmd_length(CR_USC_PIXEL_OUTPUT_CTRL);
regs->isp_bgobjdepth = *stream_ptr;
stream_ptr += pvr_cmd_length(CR_ISP_BGOBJDEPTH);
regs->isp_bgobjvals = *stream_ptr;
stream_ptr += pvr_cmd_length(CR_ISP_BGOBJVALS);
regs->isp_aa = *stream_ptr;
stream_ptr += pvr_cmd_length(CR_ISP_AA);
regs->isp_ctl = *stream_ptr;
stream_ptr += pvr_cmd_length(CR_ISP_CTL);
regs->event_pixel_pds_info = *stream_ptr;
stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO);
if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) {
regs->pixel_phantom = *stream_ptr;
stream_ptr++;
}
regs->view_idx = *stream_ptr;
stream_ptr++;
regs->event_pixel_pds_data = *stream_ptr;
stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA);
if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
regs->isp_oclqry_stride = *stream_ptr;
stream_ptr++;
}
if (PVR_HAS_FEATURE(dev_info, zls_subtile)) {
regs->isp_zls_pixels = *stream_ptr;
stream_ptr += pvr_cmd_length(CR_ISP_ZLS_PIXELS);
}
cmd->zls_stride = *stream_ptr;
stream_ptr++;
cmd->sls_stride = *stream_ptr;
stream_ptr++;
if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
cmd->execute_count = *stream_ptr;
stream_ptr++;
}
assert((const uint8_t *)stream_ptr - stream == stream_len);
}
static void pvr_srv_fragment_cmd_ext_stream_load(
struct rogue_fwif_cmd_3d *const cmd,
const uint8_t *const ext_stream,
const uint32_t ext_stream_len,
const struct pvr_device_info *const dev_info)
{
const uint32_t *ext_stream_ptr = (const uint32_t *)ext_stream;
struct rogue_fwif_3d_regs *const regs = &cmd->regs;
struct PVRX(FW_STREAM_EXTHDR_FRAG0) header0;
header0 = pvr_csb_unpack(ext_stream_ptr, FW_STREAM_EXTHDR_FRAG0);
ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_FRAG0);
assert(PVR_HAS_QUIRK(dev_info, 49927) == header0.has_brn49927);
if (header0.has_brn49927) {
regs->tpu = *ext_stream_ptr;
ext_stream_ptr += pvr_cmd_length(CR_TPU);
}
assert((const uint8_t *)ext_stream_ptr - ext_stream == ext_stream_len);
}
static void pvr_srv_fragment_cmd_init(
const struct pvr_winsys_render_submit_info *submit_info,
struct rogue_fwif_cmd_3d *cmd)
struct rogue_fwif_cmd_3d *cmd,
const struct pvr_device_info *dev_info)
{
const struct pvr_winsys_fragment_state *state = &submit_info->fragment;
struct rogue_fwif_3d_regs *fw_regs = &cmd->regs;
memset(cmd, 0, sizeof(*cmd));
cmd->cmd_shared.cmn.frame_num = submit_info->frame_num;
fw_regs->usc_pixel_output_ctrl = state->regs.usc_pixel_output_ctrl;
fw_regs->isp_bgobjdepth = state->regs.isp_bgobjdepth;
fw_regs->isp_bgobjvals = state->regs.isp_bgobjvals;
fw_regs->isp_aa = state->regs.isp_aa;
fw_regs->isp_ctl = state->regs.isp_ctl;
fw_regs->tpu = state->regs.tpu;
fw_regs->event_pixel_pds_info = state->regs.event_pixel_pds_info;
fw_regs->pixel_phantom = state->regs.pixel_phantom;
fw_regs->event_pixel_pds_data = state->regs.event_pixel_pds_data;
fw_regs->isp_scissor_base = state->regs.isp_scissor_base;
fw_regs->isp_dbias_base = state->regs.isp_dbias_base;
fw_regs->isp_oclqry_base = state->regs.isp_oclqry_base;
fw_regs->isp_zlsctl = state->regs.isp_zlsctl;
fw_regs->isp_zload_store_base = state->regs.isp_zload_store_base;
fw_regs->isp_stencil_load_store_base =
state->regs.isp_stencil_load_store_base;
fw_regs->isp_zls_pixels = state->regs.isp_zls_pixels;
pvr_srv_fragment_cmd_stream_load(cmd,
state->fw_stream,
state->fw_stream_len,
dev_info);
STATIC_ASSERT(ARRAY_SIZE(fw_regs->pbe_word) ==
ARRAY_SIZE(state->regs.pbe_word));
STATIC_ASSERT(ARRAY_SIZE(fw_regs->pbe_word[0]) <=
ARRAY_SIZE(state->regs.pbe_word[0]));
#if !defined(NDEBUG)
/* Depending on the hardware we might have more PBE words than the firmware
* accepts so check that the extra words are 0.
*/
if (ARRAY_SIZE(fw_regs->pbe_word[0]) < ARRAY_SIZE(state->regs.pbe_word[0])) {
/* For each color attachment. */
for (uint32_t i = 0; i < ARRAY_SIZE(state->regs.pbe_word); i++) {
/* For each extra PBE word not used by the firmware. */
for (uint32_t j = ARRAY_SIZE(fw_regs->pbe_word[0]);
j < ARRAY_SIZE(state->regs.pbe_word[0]);
j++) {
assert(state->regs.pbe_word[i][j] == 0);
if (state->fw_ext_stream_len) {
pvr_srv_fragment_cmd_ext_stream_load(cmd,
state->fw_ext_stream,
state->fw_ext_stream_len,
dev_info);
}
}
}
#endif
memcpy(fw_regs->pbe_word, state->regs.pbe_word, sizeof(fw_regs->pbe_word));
fw_regs->tpu_border_colour_table = state->regs.tpu_border_colour_table;
STATIC_ASSERT(ARRAY_SIZE(fw_regs->pds_bgnd) ==
ARRAY_SIZE(state->regs.pds_bgnd));
typed_memcpy(fw_regs->pds_bgnd,
state->regs.pds_bgnd,
ARRAY_SIZE(fw_regs->pds_bgnd));
STATIC_ASSERT(ARRAY_SIZE(fw_regs->pds_pr_bgnd) ==
ARRAY_SIZE(state->regs.pds_pr_bgnd));
typed_memcpy(fw_regs->pds_pr_bgnd,
state->regs.pds_pr_bgnd,
ARRAY_SIZE(fw_regs->pds_pr_bgnd));
if (state->flags & PVR_WINSYS_FRAG_FLAG_DEPTH_BUFFER_PRESENT)
cmd->flags |= ROGUE_FWIF_RENDERFLAGS_DEPTHBUFFER;
@ -518,14 +659,12 @@ static void pvr_srv_fragment_cmd_init(
if (state->flags & PVR_WINSYS_FRAG_FLAG_SINGLE_CORE)
cmd->flags |= ROGUE_FWIF_RENDERFLAGS_SINGLE_CORE;
cmd->zls_stride = state->zls_stride;
cmd->sls_stride = state->sls_stride;
}
VkResult pvr_srv_winsys_render_submit(
const struct pvr_winsys_render_ctx *ctx,
const struct pvr_winsys_render_submit_info *submit_info,
const struct pvr_device_info *dev_info,
struct vk_sync *signal_sync_geom,
struct vk_sync *signal_sync_frag)
{
@ -552,8 +691,8 @@ VkResult pvr_srv_winsys_render_submit(
VkResult result;
pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd);
pvr_srv_fragment_cmd_init(submit_info, &frag_cmd);
pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd, dev_info);
pvr_srv_fragment_cmd_init(submit_info, &frag_cmd, dev_info);
for (uint32_t i = 0U; i < submit_info->wait_count; i++) {
struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->waits[i]);

View file

@ -68,6 +68,7 @@ void pvr_srv_winsys_render_ctx_destroy(struct pvr_winsys_render_ctx *ctx);
VkResult pvr_srv_winsys_render_submit(
const struct pvr_winsys_render_ctx *ctx,
const struct pvr_winsys_render_submit_info *submit_info,
const struct pvr_device_info *dev_info,
struct vk_sync *signal_sync_geom,
struct vk_sync *signal_sync_frag);