diff --git a/src/imagination/common/pvr_device_info.c b/src/imagination/common/pvr_device_info.c index c0354672c95..f26f7d4395f 100644 --- a/src/imagination/common/pvr_device_info.c +++ b/src/imagination/common/pvr_device_info.c @@ -112,7 +112,6 @@ const struct pvr_device_quirks pvr_device_quirks_4_40_2_51 = { .has_brn51764 = true, .has_brn52354 = true, .has_brn52942 = true, - .has_brn56279 = true, .has_brn58839 = true, .has_brn62269 = true, .has_brn66011 = true, diff --git a/src/imagination/common/pvr_device_info.h b/src/imagination/common/pvr_device_info.h index f168a1ccde4..6b7d19354f7 100644 --- a/src/imagination/common/pvr_device_info.h +++ b/src/imagination/common/pvr_device_info.h @@ -344,7 +344,6 @@ struct pvr_device_quirks { bool has_brn51764 : 1; bool has_brn52354 : 1; bool has_brn52942 : 1; - bool has_brn56279 : 1; bool has_brn58839 : 1; bool has_brn62269 : 1; bool has_brn66011 : 1; diff --git a/src/imagination/csbgen/meson.build b/src/imagination/csbgen/meson.build index ea6a8ff26ad..2ddbd791a6d 100644 --- a/src/imagination/csbgen/meson.build +++ b/src/imagination/csbgen/meson.build @@ -22,6 +22,7 @@ pvr_xml_files = [ 'rogue_cdm.xml', 'rogue_cr.xml', + 'rogue_fw.xml', 'rogue_ipf.xml', 'rogue_lls.xml', 'rogue_pbestate.xml', diff --git a/src/imagination/csbgen/rogue_cr.xml b/src/imagination/csbgen/rogue_cr.xml index 9cfaeaf7a03..3477a77f6d7 100644 --- a/src/imagination/csbgen/rogue_cr.xml +++ b/src/imagination/csbgen/rogue_cr.xml @@ -264,8 +264,11 @@ SOFTWARE. - - + + + + + @@ -285,6 +288,10 @@ SOFTWARE. + + + + diff --git a/src/imagination/csbgen/rogue_fw.xml b/src/imagination/csbgen/rogue_fw.xml new file mode 100644 index 00000000000..4fa32547cc8 --- /dev/null +++ b/src/imagination/csbgen/rogue_fw.xml @@ -0,0 +1,69 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/imagination/csbgen/rogue_hwdefs.h b/src/imagination/csbgen/rogue_hwdefs.h index 75a3a472d4c..bf4345af19c 100644 --- a/src/imagination/csbgen/rogue_hwdefs.h +++ b/src/imagination/csbgen/rogue_hwdefs.h @@ -26,6 +26,7 @@ #include "rogue_cdm.h" #include "rogue_cr.h" +#include "rogue_fw.h" #include "rogue_ipf.h" #include "rogue_lls.h" #include "rogue_pbestate.h" diff --git a/src/imagination/include/hwdef/rogue_hw_utils.h b/src/imagination/include/hwdef/rogue_hw_utils.h index 9118cce67ae..f5d88cb33c8 100644 --- a/src/imagination/include/hwdef/rogue_hw_utils.h +++ b/src/imagination/include/hwdef/rogue_hw_utils.h @@ -102,16 +102,6 @@ rogue_get_isp_samples_per_tile_xy(const struct pvr_device_info *dev_info, } } -static inline uint32_t -rogue_get_max_num_vdm_pds_tasks(const struct pvr_device_info *dev_info) -{ - /* Default value based on the minimum value found in all existing cores. */ - uint32_t max_usc_tasks = PVR_GET_FEATURE_VALUE(dev_info, max_usc_tasks, 24U); - - /* FIXME: Where does the 9 come from? */ - return max_usc_tasks - 9; -} - static inline uint32_t rogue_get_max_output_regs_per_pixel(const struct pvr_device_info *dev_info) { diff --git a/src/imagination/vulkan/pvr_job_compute.c b/src/imagination/vulkan/pvr_job_compute.c index cf29bfcf224..9a378c9aaa0 100644 --- a/src/imagination/vulkan/pvr_job_compute.c +++ b/src/imagination/vulkan/pvr_job_compute.c @@ -34,80 +34,40 @@ #include "pvr_winsys.h" #include "util/macros.h" -static void pvr_compute_job_ws_submit_info_init( - struct pvr_compute_ctx *ctx, - struct pvr_sub_cmd_compute *sub_cmd, - struct vk_sync *barrier, - struct vk_sync **waits, - uint32_t wait_count, - uint32_t *stage_flags, - struct pvr_winsys_compute_submit_info *submit_info) +static void +pvr_submit_info_stream_init(struct pvr_compute_ctx *ctx, + struct pvr_sub_cmd_compute *sub_cmd, + struct pvr_winsys_compute_submit_info *submit_info) { - const struct pvr_device *const device = ctx->device; - const struct pvr_physical_device *const pdevice = device->pdevice; + const struct pvr_physical_device *const pdevice = ctx->device->pdevice; const struct pvr_device_runtime_info *const dev_runtime_info = &pdevice->dev_runtime_info; const struct pvr_device_info *const dev_info = &pdevice->dev_info; const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch; - uint32_t shared_regs = sub_cmd->num_shared_regs; - submit_info->frame_num = device->global_queue_present_count; - submit_info->job_num = device->global_queue_job_count; + uint32_t *stream_ptr = (uint32_t *)submit_info->fw_stream; - submit_info->barrier = barrier; - - submit_info->waits = waits; - submit_info->wait_count = wait_count; - submit_info->stage_flags = stage_flags; - - pvr_csb_pack (&submit_info->regs.cdm_ctrl_stream_base, - CR_CDM_CTRL_STREAM_BASE, - value) { - value.addr = pvr_csb_get_start_address(&sub_cmd->control_stream); - } - - /* FIXME: Need to set up the border color table at device creation - * time. Set to invalid for the time being. + /* FIXME: Need to set up the border color table at device creation time. Set + * to invalid for the time being. */ - pvr_csb_pack (&submit_info->regs.tpu_border_colour_table, + pvr_csb_pack ((uint64_t *)stream_ptr, CR_TPU_BORDER_COLOUR_TABLE_CDM, value) { value.border_colour_table_address = PVR_DEV_ADDR_INVALID; } + stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_CDM); - if (PVR_HAS_FEATURE(dev_info, compute_morton_capable)) - submit_info->regs.cdm_item = 0; - - pvr_csb_pack (&submit_info->regs.tpu, CR_TPU, value) { - value.tag_cem_4k_face_packing = true; + pvr_csb_pack ((uint64_t *)stream_ptr, CR_CDM_CTRL_STREAM_BASE, value) { + value.addr = pvr_csb_get_start_address(&sub_cmd->control_stream); } + stream_ptr += pvr_cmd_length(CR_CDM_CTRL_STREAM_BASE); - pvr_csb_pack (&submit_info->regs.compute_cluster, - CR_COMPUTE_CLUSTER, - value) { - if (PVR_HAS_FEATURE(dev_info, cluster_grouping) && - PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) && - dev_runtime_info->num_phantoms > 1 && sub_cmd->uses_atomic_ops) { - /* Each phantom has its own MCU, so atomicity can only be guaranteed - * when all work items are processed on the same phantom. This means we - * need to disable all USCs other than those of the first phantom, - * which has 4 clusters. - */ - value.mask = 0xFU; - } else { - value.mask = 0U; - } - } - - pvr_csb_pack (&submit_info->regs.cdm_ctx_state_base_addr, - CR_CDM_CONTEXT_STATE_BASE, - state) { + pvr_csb_pack ((uint64_t *)stream_ptr, CR_CDM_CONTEXT_STATE_BASE, state) { state.addr = ctx_switch->compute_state_bo->vma->dev_addr; } + stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_STATE_BASE); - pvr_csb_pack (&submit_info->regs.cdm_resume_pds1, - CR_CDM_CONTEXT_PDS1, - state) { + pvr_csb_pack (stream_ptr, CR_CDM_CONTEXT_PDS1, state) { /* Convert the data size from dwords to bytes. */ const uint32_t load_program_data_size = ctx_switch->sr[0].pds.load_program.data_size * 4U; @@ -118,7 +78,7 @@ static void pvr_compute_job_ws_submit_info_init( state.unified_size = ctx_switch->sr[0].usc.unified_size; state.common_shared = true; state.common_size = - DIV_ROUND_UP(shared_regs << 2, + DIV_ROUND_UP(sub_cmd->num_shared_regs << 2, PVRX(CR_CDM_CONTEXT_PDS1_COMMON_SIZE_UNIT_SIZE)); state.temp_size = 0; @@ -129,6 +89,99 @@ static void pvr_compute_job_ws_submit_info_init( load_program_data_size / PVRX(CR_CDM_CONTEXT_PDS1_DATA_SIZE_UNIT_SIZE); state.fence = false; } + stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_PDS1); + + if (PVR_HAS_FEATURE(dev_info, compute_morton_capable)) { + pvr_csb_pack (stream_ptr, CR_CDM_ITEM, value) { + value.mode = 0; + } + stream_ptr += pvr_cmd_length(CR_CDM_ITEM); + } + + if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) { + pvr_csb_pack (stream_ptr, CR_COMPUTE_CLUSTER, value) { + if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) && + dev_runtime_info->num_phantoms > 1 && sub_cmd->uses_atomic_ops) { + /* Each phantom has its own MCU, so atomicity can only be + * guaranteed when all work items are processed on the same + * phantom. This means we need to disable all USCs other than + * those of the first phantom, which has 4 clusters. + */ + value.mask = 0xFU; + } else { + value.mask = 0U; + } + } + stream_ptr += pvr_cmd_length(CR_COMPUTE_CLUSTER); + } + + if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) { + pvr_finishme( + "Emit execute_count when feature gpu_multicore_support is present"); + *stream_ptr = 0; + stream_ptr++; + } + + submit_info->fw_stream_len = (uint8_t *)stream_ptr - submit_info->fw_stream; + assert(submit_info->fw_stream_len <= ARRAY_SIZE(submit_info->fw_stream)); +} + +static void pvr_submit_info_ext_stream_init( + struct pvr_compute_ctx *ctx, + struct pvr_winsys_compute_submit_info *submit_info) +{ + const struct pvr_device_info *const dev_info = + &ctx->device->pdevice->dev_info; + + uint32_t *ext_stream_ptr = (uint32_t *)submit_info->fw_ext_stream; + uint32_t *header0_ptr; + + header0_ptr = ext_stream_ptr; + ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_COMPUTE0); + + pvr_csb_pack (header0_ptr, FW_STREAM_EXTHDR_COMPUTE0, header0) { + if (PVR_HAS_QUIRK(dev_info, 49927)) { + header0.has_brn49927 = true; + + pvr_csb_pack (ext_stream_ptr, CR_TPU, value) { + value.tag_cem_4k_face_packing = true; + } + ext_stream_ptr += pvr_cmd_length(CR_TPU); + } + } + + submit_info->fw_ext_stream_len = + (uint8_t *)ext_stream_ptr - submit_info->fw_ext_stream; + assert(submit_info->fw_ext_stream_len <= + ARRAY_SIZE(submit_info->fw_ext_stream)); + + if ((*header0_ptr & PVRX(FW_STREAM_EXTHDR_DATA_MASK)) == 0) + submit_info->fw_ext_stream_len = 0; +} + +static void pvr_compute_job_ws_submit_info_init( + struct pvr_compute_ctx *ctx, + struct pvr_sub_cmd_compute *sub_cmd, + struct vk_sync *barrier, + struct vk_sync **waits, + uint32_t wait_count, + uint32_t *stage_flags, + struct pvr_winsys_compute_submit_info *submit_info) +{ + const struct pvr_device *const device = ctx->device; + const struct pvr_device_info *const dev_info = &device->pdevice->dev_info; + + submit_info->frame_num = device->global_queue_present_count; + submit_info->job_num = device->global_queue_job_count; + + submit_info->barrier = barrier; + + submit_info->waits = waits; + submit_info->wait_count = wait_count; + submit_info->stage_flags = stage_flags; + + pvr_submit_info_stream_init(ctx, sub_cmd, submit_info); + pvr_submit_info_ext_stream_init(ctx, submit_info); if (sub_cmd->uses_barrier) submit_info->flags |= PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP; @@ -160,5 +213,6 @@ VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx, return device->ws->ops->compute_submit(ctx->ws_ctx, &submit_info, + &device->pdevice->dev_info, signal_sync); } diff --git a/src/imagination/vulkan/pvr_job_render.c b/src/imagination/vulkan/pvr_job_render.c index 2f74144707c..03cb0140f1b 100644 --- a/src/imagination/vulkan/pvr_job_render.c +++ b/src/imagination/vulkan/pvr_job_render.c @@ -1165,30 +1165,33 @@ void pvr_render_target_dataset_destroy(struct pvr_rt_dataset *rt_dataset) vk_free(&device->vk.alloc, rt_dataset); } -static void -pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx, - struct pvr_render_job *job, - struct pvr_winsys_geometry_state *state) +static void pvr_geom_state_stream_init(struct pvr_render_ctx *ctx, + struct pvr_render_job *job, + struct pvr_winsys_geometry_state *state) { const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info; - /* FIXME: Should this just be done unconditionally? The firmware will just - * ignore the value anyway. - */ - if (PVR_HAS_QUIRK(dev_info, 56279)) { - pvr_csb_pack (&state->regs.pds_ctrl, CR_PDS_CTRL, value) { - value.max_num_vdm_tasks = rogue_get_max_num_vdm_pds_tasks(dev_info); - } - } else { - state->regs.pds_ctrl = 0; - } + uint32_t *stream_ptr = (uint32_t *)state->fw_stream; - pvr_csb_pack (&state->regs.ppp_ctrl, CR_PPP_CTRL, value) { + pvr_csb_pack ((uint64_t *)stream_ptr, CR_VDM_CTRL_STREAM_BASE, value) { + value.addr = job->ctrl_stream_addr; + } + stream_ptr += pvr_cmd_length(CR_VDM_CTRL_STREAM_BASE); + + pvr_csb_pack ((uint64_t *)stream_ptr, + CR_TPU_BORDER_COLOUR_TABLE_VDM, + value) { + value.border_colour_table_address = job->border_colour_table_addr; + } + stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_VDM); + + pvr_csb_pack (stream_ptr, CR_PPP_CTRL, value) { value.wclampen = true; value.fixed_point_format = 1; } + stream_ptr += pvr_cmd_length(CR_PPP_CTRL); - pvr_csb_pack (&state->regs.te_psg, CR_TE_PSG, value) { + pvr_csb_pack (stream_ptr, CR_TE_PSG, value) { value.completeonterminate = job->geometry_terminate; value.region_stride = job->rt_dataset->rgn_headers_stride / @@ -1196,40 +1199,71 @@ pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx, value.forcenewstate = PVR_HAS_QUIRK(dev_info, 52942); } - - /* The set up of CR_TPU must be identical to - * pvr_render_job_ws_fragment_state_init(). - */ - pvr_csb_pack (&state->regs.tpu, CR_TPU, value) { - value.tag_cem_4k_face_packing = true; - } - - pvr_csb_pack (&state->regs.tpu_border_colour_table, - CR_TPU_BORDER_COLOUR_TABLE_VDM, - value) { - value.border_colour_table_address = job->border_colour_table_addr; - } - - pvr_csb_pack (&state->regs.vdm_ctrl_stream_base, - CR_VDM_CTRL_STREAM_BASE, - value) { - value.addr = job->ctrl_stream_addr; - } + stream_ptr += pvr_cmd_length(CR_TE_PSG); /* Set up the USC common size for the context switch resume/load program * (ctx->ctx_switch.programs[i].sr->pds_load_program), which was created * as part of the render context. */ - pvr_csb_pack (&state->regs.vdm_ctx_resume_task0_size, - VDMCTRL_PDS_STATE0, - value) { + pvr_csb_pack (stream_ptr, VDMCTRL_PDS_STATE0, value) { /* Calculate the size in bytes. */ const uint16_t shared_registers_size = job->max_shared_registers * 4; value.usc_common_size = DIV_ROUND_UP(shared_registers_size, PVRX(VDMCTRL_PDS_STATE0_USC_COMMON_SIZE_UNIT_SIZE)); - }; + } + stream_ptr += pvr_cmd_length(VDMCTRL_PDS_STATE0); + + /* Set up view_idx to 0 */ + *stream_ptr = 0; + stream_ptr++; + + state->fw_stream_len = (uint8_t *)stream_ptr - state->fw_stream; + assert(state->fw_stream_len <= ARRAY_SIZE(state->fw_stream)); +} + +static void +pvr_geom_state_stream_ext_init(struct pvr_render_ctx *ctx, + struct pvr_render_job *job, + struct pvr_winsys_geometry_state *state) +{ + const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info; + + uint32_t *ext_stream_ptr = (uint32_t *)state->fw_ext_stream; + uint32_t *header0_ptr; + + header0_ptr = ext_stream_ptr; + ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_GEOM0); + + pvr_csb_pack (header0_ptr, FW_STREAM_EXTHDR_GEOM0, header0) { + if (PVR_HAS_QUIRK(dev_info, 49927)) { + header0.has_brn49927 = true; + + /* The set up of CR_TPU must be identical to + * pvr_render_job_ws_fragment_state_stream_ext_init(). + */ + pvr_csb_pack (ext_stream_ptr, CR_TPU, value) { + value.tag_cem_4k_face_packing = true; + } + ext_stream_ptr += pvr_cmd_length(CR_TPU); + } + } + + state->fw_ext_stream_len = (uint8_t *)ext_stream_ptr - state->fw_ext_stream; + assert(state->fw_ext_stream_len <= ARRAY_SIZE(state->fw_ext_stream)); + + if ((*header0_ptr & PVRX(FW_STREAM_EXTHDR_DATA_MASK)) == 0) + state->fw_ext_stream_len = 0; +} + +static void +pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx, + struct pvr_render_job *job, + struct pvr_winsys_geometry_state *state) +{ + pvr_geom_state_stream_init(ctx, job, state); + pvr_geom_state_stream_ext_init(ctx, job, state); state->flags = 0; @@ -1295,19 +1329,20 @@ pvr_get_isp_num_tiles_xy(const struct pvr_device_info *dev_info, } } -static void -pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx, - struct pvr_render_job *job, - struct pvr_winsys_fragment_state *state) +static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, + struct pvr_render_job *job, + struct pvr_winsys_fragment_state *state) { + const struct pvr_physical_device *const pdevice = ctx->device->pdevice; + const struct pvr_device_runtime_info *dev_runtime_info = + &pdevice->dev_runtime_info; + const struct pvr_device_info *dev_info = &pdevice->dev_info; const enum PVRX(CR_ISP_AA_MODE_TYPE) isp_aa_mode = pvr_cr_isp_aa_mode_type(job->samples); - const struct pvr_device_runtime_info *dev_runtime_info = - &ctx->device->pdevice->dev_runtime_info; - const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info; - uint32_t isp_ctl; - /* FIXME: what to do when job->run_frag is false? */ + uint32_t *stream_ptr = (uint32_t *)state->fw_stream; + uint32_t pixel_ctl; + uint32_t isp_ctl; /* FIXME: pass in the number of samples rather than isp_aa_mode? */ pvr_setup_tiles_in_flight(dev_info, @@ -1317,72 +1352,27 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx, false, job->max_tiles_in_flight, &isp_ctl, - &state->regs.usc_pixel_output_ctrl); + &pixel_ctl); - pvr_csb_pack (&state->regs.isp_ctl, CR_ISP_CTL, value) { - value.sample_pos = true; - - /* FIXME: There are a number of things that cause this to be set, this - * is just one of them. - */ - value.process_empty_tiles = job->process_empty_tiles; + pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_SCISSOR_BASE, value) { + value.addr = job->scissor_table_addr; } + stream_ptr += pvr_cmd_length(CR_ISP_SCISSOR_BASE); - /* FIXME: When pvr_setup_tiles_in_flight() is refactored it might be - * possible to fully pack CR_ISP_CTL above rather than having to OR in part - * of the value. - */ - state->regs.isp_ctl |= isp_ctl; - - pvr_csb_pack (&state->regs.isp_aa, CR_ISP_AA, value) { - value.mode = isp_aa_mode; + pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_DBIAS_BASE, value) { + value.addr = job->depth_bias_table_addr; } + stream_ptr += pvr_cmd_length(CR_ISP_DBIAS_BASE); - /* The set up of CR_TPU must be identical to - * pvr_render_job_ws_geometry_state_init(). - */ - pvr_csb_pack (&state->regs.tpu, CR_TPU, value) { - value.tag_cem_4k_face_packing = true; - } - - if (PVR_HAS_FEATURE(dev_info, cluster_grouping) && - PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) && - dev_runtime_info->num_phantoms > 1 && job->frag_uses_atomic_ops) { - /* Each phantom has its own MCU, so atomicity can only be guaranteed - * when all work items are processed on the same phantom. This means we - * need to disable all USCs other than those of the first phantom, which - * has 4 clusters. Note that we only need to do this for atomic - * operations in fragment shaders, since hardware prevents the TA to run - * on more than one phantom anyway. - */ - state->regs.pixel_phantom = 0xF; - } else { - state->regs.pixel_phantom = 0; - } - - pvr_csb_pack (&state->regs.isp_bgobjvals, CR_ISP_BGOBJVALS, value) { - value.enablebgtag = job->enable_bg_tag; - - value.mask = true; - - /* FIXME: Hard code this for now as we don't currently support any - * stencil image formats. - */ - value.stencil = 0xFF; - } - - pvr_csb_pack (&state->regs.isp_bgobjdepth, CR_ISP_BGOBJDEPTH, value) { - /* FIXME: This is suitable for the single depth format the driver - * currently supports, but may need updating to handle other depth - * formats. - */ - value.value = fui(job->depth_clear_value); + pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_OCLQRY_BASE, value) { + value.addr = PVR_DEV_ADDR_INVALID; } + stream_ptr += pvr_cmd_length(CR_ISP_OCLQRY_BASE); /* FIXME: Some additional set up needed to support depth and stencil * load/store operations. */ - pvr_csb_pack (&state->regs.isp_zlsctl, CR_ISP_ZLSCTL, value) { + pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_ZLSCTL, value) { uint32_t aligned_width = ALIGN_POT(job->depth_physical_width, ROGUE_IPF_TILE_SIZE_PIXELS); uint32_t aligned_height = @@ -1410,47 +1400,96 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx, value.zloadformat = PVRX(CR_ZLOADFORMAT_TYPE_F32Z); value.zstoreformat = PVRX(CR_ZSTOREFORMAT_TYPE_F32Z); } + stream_ptr += pvr_cmd_length(CR_ISP_ZLSCTL); - if (PVR_HAS_FEATURE(dev_info, zls_subtile)) { - pvr_csb_pack (&state->regs.isp_zls_pixels, CR_ISP_ZLS_PIXELS, value) { - value.x = job->depth_stride - 1; - value.y = job->depth_height - 1; - } - } else { - state->regs.isp_zls_pixels = 0; - } - - pvr_csb_pack (&state->regs.isp_zload_store_base, CR_ISP_ZLOAD_BASE, value) { + pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_ZLOAD_BASE, value) { value.addr = job->depth_addr; } + stream_ptr += pvr_cmd_length(CR_ISP_ZLOAD_BASE); - pvr_csb_pack (&state->regs.isp_stencil_load_store_base, - CR_ISP_STENCIL_LOAD_BASE, - value) { + pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_STENCIL_LOAD_BASE, value) { value.addr = job->stencil_addr; /* FIXME: May need to set value.enable to true. */ } + stream_ptr += pvr_cmd_length(CR_ISP_STENCIL_LOAD_BASE); - pvr_csb_pack (&state->regs.tpu_border_colour_table, + *(uint64_t *)stream_ptr = 0; + stream_ptr += 2U; + + STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words) == 8U); + STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words[0]) == 3U); + STATIC_ASSERT(sizeof(job->pbe_reg_words[0][0]) == sizeof(uint64_t)); + memcpy(stream_ptr, job->pbe_reg_words, sizeof(job->pbe_reg_words)); + stream_ptr += 8U * 3U * 2U; + + pvr_csb_pack ((uint64_t *)stream_ptr, CR_TPU_BORDER_COLOUR_TABLE_PDM, value) { value.border_colour_table_address = job->border_colour_table_addr; } + stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_PDM); - state->regs.isp_oclqry_base = 0; + STATIC_ASSERT(ARRAY_SIZE(job->pds_bgnd_reg_values) == 3U); + STATIC_ASSERT(sizeof(job->pds_bgnd_reg_values[0]) == sizeof(uint64_t)); + memcpy(stream_ptr, + job->pds_bgnd_reg_values, + sizeof(job->pds_bgnd_reg_values)); + stream_ptr += 3U * 2U; - pvr_csb_pack (&state->regs.isp_dbias_base, CR_ISP_DBIAS_BASE, value) { - value.addr = job->depth_bias_table_addr; + /* Set pds_pr_bgnd array to 0 */ + memset(stream_ptr, 0, 3U * sizeof(uint64_t)); + stream_ptr += 3U * 2U; + + /* Set usc_clear_register array to 0 */ + memset(stream_ptr, 0, 8U * sizeof(uint32_t)); + stream_ptr += 8U; + + *stream_ptr = pixel_ctl; + stream_ptr++; + + pvr_csb_pack (stream_ptr, CR_ISP_BGOBJDEPTH, value) { + /* FIXME: This is suitable for the single depth format the driver + * currently supports, but may need updating to handle other depth + * formats. + */ + value.value = fui(job->depth_clear_value); } + stream_ptr += pvr_cmd_length(CR_ISP_BGOBJDEPTH); - pvr_csb_pack (&state->regs.isp_scissor_base, CR_ISP_SCISSOR_BASE, value) { - value.addr = job->scissor_table_addr; + pvr_csb_pack (stream_ptr, CR_ISP_BGOBJVALS, value) { + value.enablebgtag = job->enable_bg_tag; + + value.mask = true; + + /* FIXME: Hard code this for now as we don't currently support any + * stencil image formats. + */ + value.stencil = 0xFF; } + stream_ptr += pvr_cmd_length(CR_ISP_BGOBJVALS); - pvr_csb_pack (&state->regs.event_pixel_pds_info, - CR_EVENT_PIXEL_PDS_INFO, - value) { + pvr_csb_pack (stream_ptr, CR_ISP_AA, value) { + value.mode = isp_aa_mode; + } + stream_ptr += pvr_cmd_length(CR_ISP_AA); + + pvr_csb_pack (stream_ptr, CR_ISP_CTL, value) { + value.sample_pos = true; + + /* FIXME: There are a number of things that cause this to be set, this + * is just one of them. + */ + value.process_empty_tiles = job->process_empty_tiles; + } + /* FIXME: When pvr_setup_tiles_in_flight() is refactored it might be + * possible to fully pack CR_ISP_CTL above rather than having to OR in part + * of the value. + */ + *stream_ptr |= isp_ctl; + stream_ptr += pvr_cmd_length(CR_ISP_CTL); + + pvr_csb_pack (stream_ptr, CR_EVENT_PIXEL_PDS_INFO, value) { value.const_size = DIV_ROUND_UP(ctx->device->pixel_event_data_size_in_dwords, PVRX(CR_EVENT_PIXEL_PDS_INFO_CONST_SIZE_UNIT_SIZE)); @@ -1459,32 +1498,114 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx, DIV_ROUND_UP(PVR_STATE_PBE_DWORDS, PVRX(CR_EVENT_PIXEL_PDS_INFO_USC_SR_SIZE_UNIT_SIZE)); } + stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO); - pvr_csb_pack (&state->regs.event_pixel_pds_data, - CR_EVENT_PIXEL_PDS_DATA, - value) { + if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) { + uint32_t pixel_phantom = 0; + + if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) && + dev_runtime_info->num_phantoms > 1 && job->frag_uses_atomic_ops) { + /* Each phantom has its own MCU, so atomicity can only be guaranteed + * when all work items are processed on the same phantom. This means + * we need to disable all USCs other than those of the first + * phantom, which has 4 clusters. Note that we only need to do this + * for atomic operations in fragment shaders, since hardware + * prevents the TA to run on more than one phantom anyway. + */ + pixel_phantom = 0xF; + } + + *stream_ptr = pixel_phantom; + stream_ptr++; + } + + /* Set up view_idx to 0 */ + *stream_ptr = 0; + stream_ptr++; + + pvr_csb_pack (stream_ptr, CR_EVENT_PIXEL_PDS_DATA, value) { value.addr = PVR_DEV_ADDR(job->pds_pixel_event_data_offset); } + stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA); - STATIC_ASSERT(ARRAY_SIZE(state->regs.pbe_word) == - ARRAY_SIZE(job->pbe_reg_words)); - STATIC_ASSERT(ARRAY_SIZE(state->regs.pbe_word[0]) == - ARRAY_SIZE(job->pbe_reg_words[0])); - - for (uint32_t i = 0; i < ARRAY_SIZE(job->pbe_reg_words); i++) { - state->regs.pbe_word[i][0] = job->pbe_reg_words[i][0]; - state->regs.pbe_word[i][1] = job->pbe_reg_words[i][1]; - state->regs.pbe_word[i][2] = job->pbe_reg_words[i][2]; + if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) { + pvr_finishme( + "Emit isp_oclqry_stride when feature gpu_multicore_support is present"); + *stream_ptr = 0; + stream_ptr++; } - STATIC_ASSERT(__same_type(state->regs.pds_bgnd, job->pds_bgnd_reg_values)); - typed_memcpy(state->regs.pds_bgnd, - job->pds_bgnd_reg_values, - ARRAY_SIZE(state->regs.pds_bgnd)); + if (PVR_HAS_FEATURE(dev_info, zls_subtile)) { + pvr_csb_pack (stream_ptr, CR_ISP_ZLS_PIXELS, value) { + value.x = job->depth_stride - 1; + value.y = job->depth_height - 1; + } + stream_ptr += pvr_cmd_length(CR_ISP_ZLS_PIXELS); + } - memset(state->regs.pds_pr_bgnd, 0, sizeof(state->regs.pds_pr_bgnd)); + /* zls_stride */ + *stream_ptr = job->depth_layer_size; + stream_ptr++; + + /* sls_stride */ + *stream_ptr = job->depth_layer_size; + stream_ptr++; + + if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) { + pvr_finishme( + "Emit execute_count when feature gpu_multicore_support is present"); + *stream_ptr = 0; + stream_ptr++; + } + + state->fw_stream_len = (uint8_t *)stream_ptr - state->fw_stream; + assert(state->fw_stream_len <= ARRAY_SIZE(state->fw_stream)); +} + +static void +pvr_frag_state_stream_ext_init(struct pvr_render_ctx *ctx, + struct pvr_render_job *job, + struct pvr_winsys_fragment_state *state) +{ + const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info; + + uint32_t *ext_stream_ptr = (uint32_t *)state->fw_ext_stream; + uint32_t *header0_ptr; + + header0_ptr = ext_stream_ptr; + ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_FRAG0); + + pvr_csb_pack (header0_ptr, FW_STREAM_EXTHDR_FRAG0, header0) { + if (PVR_HAS_QUIRK(dev_info, 49927)) { + header0.has_brn49927 = true; + + /* The set up of CR_TPU must be identical to + * pvr_render_job_ws_geometry_state_stream_ext_init(). + */ + pvr_csb_pack (ext_stream_ptr, CR_TPU, value) { + value.tag_cem_4k_face_packing = true; + } + ext_stream_ptr += pvr_cmd_length(CR_TPU); + } + } + + state->fw_ext_stream_len = (uint8_t *)ext_stream_ptr - state->fw_ext_stream; + assert(state->fw_ext_stream_len <= ARRAY_SIZE(state->fw_ext_stream)); + + if ((*header0_ptr & PVRX(FW_STREAM_EXTHDR_DATA_MASK)) == 0) + state->fw_ext_stream_len = 0; +} + +static void +pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx, + struct pvr_render_job *job, + struct pvr_winsys_fragment_state *state) +{ + /* FIXME: what to do when job->run_frag is false? */ + + pvr_frag_state_stream_init(ctx, job, state); + pvr_frag_state_stream_ext_init(ctx, job, state); - /* FIXME: Merge geometry and fragment flags into a single flags member? */ /* FIXME: move to its own function? */ state->flags = 0; @@ -1499,9 +1620,6 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx, if (job->frag_uses_atomic_ops) state->flags |= PVR_WINSYS_FRAG_FLAG_SINGLE_CORE; - - state->zls_stride = job->depth_layer_size; - state->sls_stride = job->depth_layer_size; } static void pvr_render_job_ws_submit_info_init( @@ -1533,9 +1651,6 @@ static void pvr_render_job_ws_submit_info_init( pvr_render_job_ws_geometry_state_init(ctx, job, &submit_info->geometry); pvr_render_job_ws_fragment_state_init(ctx, job, &submit_info->fragment); - - /* These values are expected to match. */ - assert(submit_info->geometry.regs.tpu == submit_info->fragment.regs.tpu); } VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx, @@ -1564,6 +1679,7 @@ VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx, result = device->ws->ops->render_submit(ctx->ws_ctx, &submit_info, + &device->pdevice->dev_info, signal_sync_geom, signal_sync_frag); if (result != VK_SUCCESS) diff --git a/src/imagination/vulkan/winsys/pvr_winsys.h b/src/imagination/vulkan/winsys/pvr_winsys.h index c8887255553..3013b21e5c0 100644 --- a/src/imagination/vulkan/winsys/pvr_winsys.h +++ b/src/imagination/vulkan/winsys/pvr_winsys.h @@ -309,15 +309,17 @@ struct pvr_winsys_compute_submit_info { uint32_t wait_count; uint32_t *stage_flags; - struct { - uint64_t tpu_border_colour_table; - uint64_t cdm_ctrl_stream_base; - uint64_t cdm_ctx_state_base_addr; - uint32_t tpu; - uint32_t cdm_resume_pds1; - uint32_t cdm_item; - uint32_t compute_cluster; - } regs; + /* Firmware stream buffer. This is the maximum possible size taking into + * consideration all HW features. + */ + uint8_t fw_stream[92]; + uint32_t fw_stream_len; + + /* Firmware extension stream buffer. This is the maximum possible size taking + * into considation all quirks and enhancements. + */ + uint8_t fw_ext_stream[8]; + uint32_t fw_ext_stream_len; /* Must be 0 or a combination of PVR_WINSYS_COMPUTE_FLAG_* flags. */ uint32_t flags; @@ -351,49 +353,37 @@ struct pvr_winsys_render_submit_info { uint32_t *stage_flags; struct pvr_winsys_geometry_state { - struct { - uint64_t pds_ctrl; - uint32_t ppp_ctrl; - uint32_t te_psg; - uint32_t tpu; - uint64_t tpu_border_colour_table; - uint64_t vdm_ctrl_stream_base; - uint32_t vdm_ctx_resume_task0_size; - } regs; + /* Firmware stream buffer. This is the maximum possible size taking into + * consideration all HW features. + */ + uint8_t fw_stream[52]; + uint32_t fw_stream_len; + + /* Firmware extension stream buffer. This is the maximum possible size + * taking into considation all quirks and enhancements. + */ + uint8_t fw_ext_stream[12]; + uint32_t fw_ext_stream_len; /* Must be 0 or a combination of PVR_WINSYS_GEOM_FLAG_* flags. */ uint32_t flags; } geometry; struct pvr_winsys_fragment_state { - struct { - uint32_t event_pixel_pds_data; - uint32_t event_pixel_pds_info; - uint32_t isp_aa; - uint32_t isp_bgobjdepth; - uint32_t isp_bgobjvals; - uint32_t isp_ctl; - uint64_t isp_dbias_base; - uint64_t isp_oclqry_base; - uint64_t isp_scissor_base; - uint64_t isp_stencil_load_store_base; - uint64_t isp_zload_store_base; - uint64_t isp_zlsctl; - uint32_t isp_zls_pixels; - uint64_t pbe_word[PVR_MAX_COLOR_ATTACHMENTS] - [ROGUE_NUM_PBESTATE_REG_WORDS]; - uint32_t pixel_phantom; - uint64_t pds_bgnd[ROGUE_NUM_CR_PDS_BGRND_WORDS]; - uint64_t pds_pr_bgnd[ROGUE_NUM_CR_PDS_BGRND_WORDS]; - uint32_t tpu; - uint64_t tpu_border_colour_table; - uint32_t usc_pixel_output_ctrl; - } regs; + /* Firmware stream buffer. This is the maximum possible size taking into + * consideration all HW features. + */ + uint8_t fw_stream[432]; + uint32_t fw_stream_len; + + /* Firmware extension stream buffer. This is the maximum possible size + * taking into considation all quirks and enhancements. + */ + uint8_t fw_ext_stream[8]; + uint32_t fw_ext_stream_len; /* Must be 0 or a combination of PVR_WINSYS_FRAG_FLAG_* flags. */ uint32_t flags; - uint32_t zls_stride; - uint32_t sls_stride; } fragment; }; @@ -458,6 +448,7 @@ struct pvr_winsys_ops { VkResult (*render_submit)( const struct pvr_winsys_render_ctx *ctx, const struct pvr_winsys_render_submit_info *submit_info, + const struct pvr_device_info *dev_info, struct vk_sync *signal_sync_geom, struct vk_sync *signal_sync_frag); @@ -469,6 +460,7 @@ struct pvr_winsys_ops { VkResult (*compute_submit)( const struct pvr_winsys_compute_ctx *ctx, const struct pvr_winsys_compute_submit_info *submit_info, + const struct pvr_device_info *dev_info, struct vk_sync *signal_sync); VkResult (*transfer_ctx_create)( diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif.h b/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif.h index 983a33f5ce2..9774ae7041e 100644 --- a/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif.h +++ b/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif.h @@ -168,7 +168,7 @@ struct rogue_fwif_ta_regs { /* Only used when feature VDM_OBJECT_LEVEL_LLS present. */ uint32_t vdm_context_resume_task3_size; - /* Only used when BRN 56279 or BRN 67381 present. */ + /* Only used when BRN 67381 present. */ uint32_t pds_ctrl; uint32_t view_idx; @@ -208,7 +208,7 @@ struct rogue_fwif_cmd_ta { */ struct rogue_fwif_cmd_ta_3d_shared cmd_shared; - struct rogue_fwif_ta_regs ALIGN_ATTR(8) geom_regs; + struct rogue_fwif_ta_regs ALIGN_ATTR(8) regs; uint32_t ALIGN_ATTR(8) flags; /** * Holds the TA/3D fence value to allow the 3D partial render command diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.c index 03faa250cc0..5208cd83446 100644 --- a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.c +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.c @@ -30,6 +30,7 @@ #include "fw-api/pvr_rogue_fwif.h" #include "fw-api/pvr_rogue_fwif_rf.h" +#include "pvr_device_info.h" #include "pvr_private.h" #include "pvr_srv.h" #include "pvr_srv_bridge.h" @@ -136,24 +137,86 @@ void pvr_srv_winsys_compute_ctx_destroy(struct pvr_winsys_compute_ctx *ctx) vk_free(srv_ws->alloc, srv_ctx); } +static void +pvr_srv_compute_cmd_stream_load(struct rogue_fwif_cmd_compute *const cmd, + const uint8_t *const stream, + const uint32_t stream_len, + const struct pvr_device_info *const dev_info) +{ + const uint32_t *stream_ptr = (const uint32_t *)stream; + struct rogue_fwif_cdm_regs *const regs = &cmd->regs; + + regs->tpu_border_colour_table = *(const uint64_t *)stream_ptr; + stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_CDM); + + regs->cdm_ctrl_stream_base = *(const uint64_t *)stream_ptr; + stream_ptr += pvr_cmd_length(CR_CDM_CTRL_STREAM_BASE); + + regs->cdm_context_state_base_addr = *(const uint64_t *)stream_ptr; + stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_STATE_BASE); + + regs->cdm_resume_pds1 = *stream_ptr; + stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_PDS1); + + regs->cdm_item = *stream_ptr; + stream_ptr += pvr_cmd_length(CR_CDM_ITEM); + + if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) { + regs->compute_cluster = *stream_ptr; + stream_ptr += pvr_cmd_length(CR_COMPUTE_CLUSTER); + } + + if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) { + cmd->execute_count = *stream_ptr; + stream_ptr++; + } + + assert((const uint8_t *)stream_ptr - stream == stream_len); +} + +static void pvr_srv_compute_cmd_ext_stream_load( + struct rogue_fwif_cmd_compute *const cmd, + const uint8_t *const ext_stream, + const uint32_t ext_stream_len, + const struct pvr_device_info *const dev_info) +{ + const uint32_t *ext_stream_ptr = (const uint32_t *)ext_stream; + struct rogue_fwif_cdm_regs *const regs = &cmd->regs; + + struct PVRX(FW_STREAM_EXTHDR_COMPUTE0) header0; + + header0 = pvr_csb_unpack(ext_stream_ptr, FW_STREAM_EXTHDR_COMPUTE0); + ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_COMPUTE0); + + assert(PVR_HAS_QUIRK(dev_info, 49927) == header0.has_brn49927); + if (header0.has_brn49927) { + regs->tpu = *ext_stream_ptr; + ext_stream_ptr += pvr_cmd_length(CR_TPU); + } + + assert((const uint8_t *)ext_stream_ptr - ext_stream == ext_stream_len); +} + static void pvr_srv_compute_cmd_init( const struct pvr_winsys_compute_submit_info *submit_info, - struct rogue_fwif_cmd_compute *cmd) + struct rogue_fwif_cmd_compute *cmd, + const struct pvr_device_info *const dev_info) { - struct rogue_fwif_cdm_regs *fw_regs = &cmd->regs; - memset(cmd, 0, sizeof(*cmd)); cmd->cmn.frame_num = submit_info->frame_num; - fw_regs->tpu_border_colour_table = submit_info->regs.tpu_border_colour_table; - fw_regs->cdm_item = submit_info->regs.cdm_item; - fw_regs->compute_cluster = submit_info->regs.compute_cluster; - fw_regs->cdm_ctrl_stream_base = submit_info->regs.cdm_ctrl_stream_base; - fw_regs->cdm_context_state_base_addr = - submit_info->regs.cdm_ctx_state_base_addr; - fw_regs->tpu = submit_info->regs.tpu; - fw_regs->cdm_resume_pds1 = submit_info->regs.cdm_resume_pds1; + pvr_srv_compute_cmd_stream_load(cmd, + submit_info->fw_stream, + submit_info->fw_stream_len, + dev_info); + + if (submit_info->fw_ext_stream_len) { + pvr_srv_compute_cmd_ext_stream_load(cmd, + submit_info->fw_ext_stream, + submit_info->fw_ext_stream_len, + dev_info); + } if (submit_info->flags & PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP) cmd->flags |= ROGUE_FWIF_COMPUTE_FLAG_PREVENT_ALL_OVERLAP; @@ -165,6 +228,7 @@ static void pvr_srv_compute_cmd_init( VkResult pvr_srv_winsys_compute_submit( const struct pvr_winsys_compute_ctx *ctx, const struct pvr_winsys_compute_submit_info *submit_info, + const struct pvr_device_info *const dev_info, struct vk_sync *signal_sync) { const struct pvr_srv_winsys_compute_ctx *srv_ctx = @@ -176,7 +240,7 @@ VkResult pvr_srv_winsys_compute_submit( int in_fd = -1; int fence; - pvr_srv_compute_cmd_init(submit_info, &compute_cmd); + pvr_srv_compute_cmd_init(submit_info, &compute_cmd, dev_info); for (uint32_t i = 0U; i < submit_info->wait_count; i++) { struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->waits[i]); diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.h index 7793b75bd17..a5a50754d61 100644 --- a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.h +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.h @@ -26,6 +26,7 @@ #include +struct pvr_device_info; struct pvr_winsys; struct pvr_winsys_compute_ctx; struct pvr_winsys_compute_ctx_create_info; @@ -45,6 +46,7 @@ void pvr_srv_winsys_compute_ctx_destroy(struct pvr_winsys_compute_ctx *ctx); VkResult pvr_srv_winsys_compute_submit( const struct pvr_winsys_compute_ctx *ctx, const struct pvr_winsys_compute_submit_info *submit_info, + const struct pvr_device_info *dev_info, struct vk_sync *signal_sync); #endif /* PVR_SRV_JOB_COMPUTE_H */ diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c index 8080beebb1e..d6d1510f595 100644 --- a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c @@ -402,28 +402,82 @@ void pvr_srv_winsys_render_ctx_destroy(struct pvr_winsys_render_ctx *ctx) vk_free(srv_ws->alloc, srv_ctx); } +static void +pvr_srv_geometry_cmd_stream_load(struct rogue_fwif_cmd_ta *const cmd, + const uint8_t *const stream, + const uint32_t stream_len, + const struct pvr_device_info *const dev_info) +{ + const uint32_t *stream_ptr = (const uint32_t *)stream; + struct rogue_fwif_ta_regs *const regs = &cmd->regs; + + regs->vdm_ctrl_stream_base = *(const uint64_t *)stream_ptr; + stream_ptr += pvr_cmd_length(CR_VDM_CTRL_STREAM_BASE); + + regs->tpu_border_colour_table = *(const uint64_t *)stream_ptr; + stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_VDM); + + regs->ppp_ctrl = *stream_ptr; + stream_ptr += pvr_cmd_length(CR_PPP_CTRL); + + regs->te_psg = *stream_ptr; + stream_ptr += pvr_cmd_length(CR_TE_PSG); + + regs->vdm_context_resume_task0_size = *stream_ptr; + stream_ptr += pvr_cmd_length(VDMCTRL_PDS_STATE0); + + regs->view_idx = *stream_ptr; + stream_ptr++; + + assert((const uint8_t *)stream_ptr - stream == stream_len); +} + +static void pvr_srv_geometry_cmd_ext_stream_load( + struct rogue_fwif_cmd_ta *const cmd, + const uint8_t *const ext_stream, + const uint32_t ext_stream_len, + const struct pvr_device_info *const dev_info) +{ + const uint32_t *ext_stream_ptr = (const uint32_t *)ext_stream; + struct rogue_fwif_ta_regs *const regs = &cmd->regs; + + struct PVRX(FW_STREAM_EXTHDR_GEOM0) header0; + + header0 = pvr_csb_unpack(ext_stream_ptr, FW_STREAM_EXTHDR_GEOM0); + ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_GEOM0); + + assert(PVR_HAS_QUIRK(dev_info, 49927) == header0.has_brn49927); + if (header0.has_brn49927) { + regs->tpu = *ext_stream_ptr; + ext_stream_ptr += pvr_cmd_length(CR_TPU); + } + + assert((const uint8_t *)ext_stream_ptr - ext_stream == ext_stream_len); +} + static void pvr_srv_geometry_cmd_init( const struct pvr_winsys_render_submit_info *submit_info, const struct pvr_srv_sync_prim *sync_prim, - struct rogue_fwif_cmd_ta *cmd) + struct rogue_fwif_cmd_ta *cmd, + const struct pvr_device_info *const dev_info) { const struct pvr_winsys_geometry_state *state = &submit_info->geometry; - struct rogue_fwif_ta_regs *fw_regs = &cmd->geom_regs; memset(cmd, 0, sizeof(*cmd)); cmd->cmd_shared.cmn.frame_num = submit_info->frame_num; - fw_regs->vdm_ctrl_stream_base = state->regs.vdm_ctrl_stream_base; - fw_regs->tpu_border_colour_table = state->regs.tpu_border_colour_table; - fw_regs->ppp_ctrl = state->regs.ppp_ctrl; - fw_regs->te_psg = state->regs.te_psg; - fw_regs->tpu = state->regs.tpu; - fw_regs->vdm_context_resume_task0_size = - state->regs.vdm_ctx_resume_task0_size; + pvr_srv_geometry_cmd_stream_load(cmd, + state->fw_stream, + state->fw_stream_len, + dev_info); - assert(state->regs.pds_ctrl >> 32U == 0U); - fw_regs->pds_ctrl = (uint32_t)state->regs.pds_ctrl; + if (state->fw_ext_stream_len) { + pvr_srv_geometry_cmd_ext_stream_load(cmd, + state->fw_ext_stream, + state->fw_ext_stream_len, + dev_info); + } if (state->flags & PVR_WINSYS_GEOM_FLAG_FIRST_GEOMETRY) cmd->flags |= ROGUE_FWIF_TAFLAGS_FIRSTKICK; @@ -439,73 +493,160 @@ static void pvr_srv_geometry_cmd_init( cmd->partial_render_ta_3d_fence.value = sync_prim->value; } +static void +pvr_srv_fragment_cmd_stream_load(struct rogue_fwif_cmd_3d *const cmd, + const uint8_t *const stream, + const uint32_t stream_len, + const struct pvr_device_info *const dev_info) +{ + const uint32_t *stream_ptr = (const uint32_t *)stream; + struct rogue_fwif_3d_regs *const regs = &cmd->regs; + + regs->isp_scissor_base = *(const uint64_t *)stream_ptr; + stream_ptr += pvr_cmd_length(CR_ISP_SCISSOR_BASE); + + regs->isp_dbias_base = *(const uint64_t *)stream_ptr; + stream_ptr += pvr_cmd_length(CR_ISP_DBIAS_BASE); + + regs->isp_oclqry_base = *(const uint64_t *)stream_ptr; + stream_ptr += pvr_cmd_length(CR_ISP_OCLQRY_BASE); + + regs->isp_zlsctl = *(const uint64_t *)stream_ptr; + stream_ptr += pvr_cmd_length(CR_ISP_ZLSCTL); + + regs->isp_zload_store_base = *(const uint64_t *)stream_ptr; + stream_ptr += pvr_cmd_length(CR_ISP_ZLOAD_BASE); + + regs->isp_stencil_load_store_base = *(const uint64_t *)stream_ptr; + stream_ptr += pvr_cmd_length(CR_ISP_STENCIL_LOAD_BASE); + + regs->fb_cdc_zls = *(const uint64_t *)stream_ptr; + stream_ptr += 2U; + + STATIC_ASSERT(ARRAY_SIZE(regs->pbe_word) == 8U); + STATIC_ASSERT(ARRAY_SIZE(regs->pbe_word[0]) == 3U); + STATIC_ASSERT(sizeof(regs->pbe_word[0][0]) == sizeof(uint64_t)); + memcpy(regs->pbe_word, stream_ptr, sizeof(regs->pbe_word)); + stream_ptr += 8U * 3U * 2U; + + regs->tpu_border_colour_table = *(const uint64_t *)stream_ptr; + stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_PDM); + + STATIC_ASSERT(ARRAY_SIZE(regs->pds_bgnd) == 3U); + STATIC_ASSERT(sizeof(regs->pds_bgnd[0]) == sizeof(uint64_t)); + memcpy(regs->pds_bgnd, stream_ptr, sizeof(regs->pds_bgnd)); + stream_ptr += 3U * 2U; + + STATIC_ASSERT(ARRAY_SIZE(regs->pds_pr_bgnd) == 3U); + STATIC_ASSERT(sizeof(regs->pds_pr_bgnd[0]) == sizeof(uint64_t)); + memcpy(regs->pds_pr_bgnd, stream_ptr, sizeof(regs->pds_pr_bgnd)); + stream_ptr += 3U * 2U; + + STATIC_ASSERT(ARRAY_SIZE(regs->usc_clear_register) == 8U); + STATIC_ASSERT(sizeof(regs->usc_clear_register[0]) == sizeof(uint32_t)); + memcpy(regs->usc_clear_register, + stream_ptr, + sizeof(regs->usc_clear_register)); + stream_ptr += 8U; + + regs->usc_pixel_output_ctrl = *stream_ptr; + stream_ptr += pvr_cmd_length(CR_USC_PIXEL_OUTPUT_CTRL); + + regs->isp_bgobjdepth = *stream_ptr; + stream_ptr += pvr_cmd_length(CR_ISP_BGOBJDEPTH); + + regs->isp_bgobjvals = *stream_ptr; + stream_ptr += pvr_cmd_length(CR_ISP_BGOBJVALS); + + regs->isp_aa = *stream_ptr; + stream_ptr += pvr_cmd_length(CR_ISP_AA); + + regs->isp_ctl = *stream_ptr; + stream_ptr += pvr_cmd_length(CR_ISP_CTL); + + regs->event_pixel_pds_info = *stream_ptr; + stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO); + + if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) { + regs->pixel_phantom = *stream_ptr; + stream_ptr++; + } + + regs->view_idx = *stream_ptr; + stream_ptr++; + + regs->event_pixel_pds_data = *stream_ptr; + stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA); + + if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) { + regs->isp_oclqry_stride = *stream_ptr; + stream_ptr++; + } + + if (PVR_HAS_FEATURE(dev_info, zls_subtile)) { + regs->isp_zls_pixels = *stream_ptr; + stream_ptr += pvr_cmd_length(CR_ISP_ZLS_PIXELS); + } + + cmd->zls_stride = *stream_ptr; + stream_ptr++; + + cmd->sls_stride = *stream_ptr; + stream_ptr++; + + if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) { + cmd->execute_count = *stream_ptr; + stream_ptr++; + } + + assert((const uint8_t *)stream_ptr - stream == stream_len); +} + +static void pvr_srv_fragment_cmd_ext_stream_load( + struct rogue_fwif_cmd_3d *const cmd, + const uint8_t *const ext_stream, + const uint32_t ext_stream_len, + const struct pvr_device_info *const dev_info) +{ + const uint32_t *ext_stream_ptr = (const uint32_t *)ext_stream; + struct rogue_fwif_3d_regs *const regs = &cmd->regs; + + struct PVRX(FW_STREAM_EXTHDR_FRAG0) header0; + + header0 = pvr_csb_unpack(ext_stream_ptr, FW_STREAM_EXTHDR_FRAG0); + ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_FRAG0); + + assert(PVR_HAS_QUIRK(dev_info, 49927) == header0.has_brn49927); + if (header0.has_brn49927) { + regs->tpu = *ext_stream_ptr; + ext_stream_ptr += pvr_cmd_length(CR_TPU); + } + + assert((const uint8_t *)ext_stream_ptr - ext_stream == ext_stream_len); +} + static void pvr_srv_fragment_cmd_init( const struct pvr_winsys_render_submit_info *submit_info, - struct rogue_fwif_cmd_3d *cmd) + struct rogue_fwif_cmd_3d *cmd, + const struct pvr_device_info *dev_info) { const struct pvr_winsys_fragment_state *state = &submit_info->fragment; - struct rogue_fwif_3d_regs *fw_regs = &cmd->regs; memset(cmd, 0, sizeof(*cmd)); cmd->cmd_shared.cmn.frame_num = submit_info->frame_num; - fw_regs->usc_pixel_output_ctrl = state->regs.usc_pixel_output_ctrl; - fw_regs->isp_bgobjdepth = state->regs.isp_bgobjdepth; - fw_regs->isp_bgobjvals = state->regs.isp_bgobjvals; - fw_regs->isp_aa = state->regs.isp_aa; - fw_regs->isp_ctl = state->regs.isp_ctl; - fw_regs->tpu = state->regs.tpu; - fw_regs->event_pixel_pds_info = state->regs.event_pixel_pds_info; - fw_regs->pixel_phantom = state->regs.pixel_phantom; - fw_regs->event_pixel_pds_data = state->regs.event_pixel_pds_data; - fw_regs->isp_scissor_base = state->regs.isp_scissor_base; - fw_regs->isp_dbias_base = state->regs.isp_dbias_base; - fw_regs->isp_oclqry_base = state->regs.isp_oclqry_base; - fw_regs->isp_zlsctl = state->regs.isp_zlsctl; - fw_regs->isp_zload_store_base = state->regs.isp_zload_store_base; - fw_regs->isp_stencil_load_store_base = - state->regs.isp_stencil_load_store_base; - fw_regs->isp_zls_pixels = state->regs.isp_zls_pixels; + pvr_srv_fragment_cmd_stream_load(cmd, + state->fw_stream, + state->fw_stream_len, + dev_info); - STATIC_ASSERT(ARRAY_SIZE(fw_regs->pbe_word) == - ARRAY_SIZE(state->regs.pbe_word)); - - STATIC_ASSERT(ARRAY_SIZE(fw_regs->pbe_word[0]) <= - ARRAY_SIZE(state->regs.pbe_word[0])); - -#if !defined(NDEBUG) - /* Depending on the hardware we might have more PBE words than the firmware - * accepts so check that the extra words are 0. - */ - if (ARRAY_SIZE(fw_regs->pbe_word[0]) < ARRAY_SIZE(state->regs.pbe_word[0])) { - /* For each color attachment. */ - for (uint32_t i = 0; i < ARRAY_SIZE(state->regs.pbe_word); i++) { - /* For each extra PBE word not used by the firmware. */ - for (uint32_t j = ARRAY_SIZE(fw_regs->pbe_word[0]); - j < ARRAY_SIZE(state->regs.pbe_word[0]); - j++) { - assert(state->regs.pbe_word[i][j] == 0); - } - } + if (state->fw_ext_stream_len) { + pvr_srv_fragment_cmd_ext_stream_load(cmd, + state->fw_ext_stream, + state->fw_ext_stream_len, + dev_info); } -#endif - - memcpy(fw_regs->pbe_word, state->regs.pbe_word, sizeof(fw_regs->pbe_word)); - - fw_regs->tpu_border_colour_table = state->regs.tpu_border_colour_table; - - STATIC_ASSERT(ARRAY_SIZE(fw_regs->pds_bgnd) == - ARRAY_SIZE(state->regs.pds_bgnd)); - typed_memcpy(fw_regs->pds_bgnd, - state->regs.pds_bgnd, - ARRAY_SIZE(fw_regs->pds_bgnd)); - - STATIC_ASSERT(ARRAY_SIZE(fw_regs->pds_pr_bgnd) == - ARRAY_SIZE(state->regs.pds_pr_bgnd)); - typed_memcpy(fw_regs->pds_pr_bgnd, - state->regs.pds_pr_bgnd, - ARRAY_SIZE(fw_regs->pds_pr_bgnd)); if (state->flags & PVR_WINSYS_FRAG_FLAG_DEPTH_BUFFER_PRESENT) cmd->flags |= ROGUE_FWIF_RENDERFLAGS_DEPTHBUFFER; @@ -518,14 +659,12 @@ static void pvr_srv_fragment_cmd_init( if (state->flags & PVR_WINSYS_FRAG_FLAG_SINGLE_CORE) cmd->flags |= ROGUE_FWIF_RENDERFLAGS_SINGLE_CORE; - - cmd->zls_stride = state->zls_stride; - cmd->sls_stride = state->sls_stride; } VkResult pvr_srv_winsys_render_submit( const struct pvr_winsys_render_ctx *ctx, const struct pvr_winsys_render_submit_info *submit_info, + const struct pvr_device_info *dev_info, struct vk_sync *signal_sync_geom, struct vk_sync *signal_sync_frag) { @@ -552,8 +691,8 @@ VkResult pvr_srv_winsys_render_submit( VkResult result; - pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd); - pvr_srv_fragment_cmd_init(submit_info, &frag_cmd); + pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd, dev_info); + pvr_srv_fragment_cmd_init(submit_info, &frag_cmd, dev_info); for (uint32_t i = 0U; i < submit_info->wait_count; i++) { struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->waits[i]); diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.h index 3612e38c51a..257a2ab5297 100644 --- a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.h +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.h @@ -68,6 +68,7 @@ void pvr_srv_winsys_render_ctx_destroy(struct pvr_winsys_render_ctx *ctx); VkResult pvr_srv_winsys_render_submit( const struct pvr_winsys_render_ctx *ctx, const struct pvr_winsys_render_submit_info *submit_info, + const struct pvr_device_info *dev_info, struct vk_sync *signal_sync_geom, struct vk_sync *signal_sync_frag);