mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-23 08:38:08 +02:00
Signed-off-by: Luigi Santivetti <luigi.santivetti@imgtec.com> Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37512>
1752 lines
61 KiB
C
1752 lines
61 KiB
C
/*
|
|
* Copyright © 2022 Imagination Technologies Ltd.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
#include <vulkan/vulkan.h>
|
|
|
|
#include "hwdef/rogue_hw_defs.h"
|
|
#include "hwdef/rogue_hw_utils.h"
|
|
#include "pvr_bo.h"
|
|
#include "pvr_csb.h"
|
|
#include "pvr_debug.h"
|
|
#include "pvr_csb_enum_helpers.h"
|
|
#include "pvr_debug.h"
|
|
#include "pvr_job_common.h"
|
|
#include "pvr_job_context.h"
|
|
#include "pvr_job_render.h"
|
|
#include "pvr_pds.h"
|
|
#include "pvr_private.h"
|
|
#include "pvr_rogue_fw.h"
|
|
#include "pvr_types.h"
|
|
#include "pvr_winsys.h"
|
|
#include "util/compiler.h"
|
|
#include "util/format/format_utils.h"
|
|
#include "util/macros.h"
|
|
#include "util/u_math.h"
|
|
#include "vk_alloc.h"
|
|
#include "vk_log.h"
|
|
#include "vk_util.h"
|
|
|
|
#define ROGUE_BIF_PM_FREELIST_BASE_ADDR_ALIGNSIZE 16U
|
|
|
|
/* FIXME: Is there a hardware define we can use instead? */
|
|
/* 1 DWord per PM physical page stored in the free list */
|
|
#define ROGUE_FREE_LIST_ENTRY_SIZE ((uint32_t)sizeof(uint32_t))
|
|
|
|
/* FIXME: The three defines below, for the number of PC, PD and PT entries in a
|
|
* 4KB page, come from rgxmmudefs_km.h (meaning they're part of the
|
|
* auto-generated hwdefs). Should these be defined in rogue_mmu.xml? Keeping in
|
|
* mind that we probably only need these three values. */
|
|
#define ROGUE_NUM_PC_ENTRIES_PER_PAGE 0x400U
|
|
|
|
#define ROGUE_NUM_PD_ENTRIES_PER_PAGE 0x200U
|
|
|
|
#define ROGUE_NUM_PT_ENTRIES_PER_PAGE 0x200U
|
|
|
|
struct pvr_free_list {
|
|
struct pvr_device *device;
|
|
|
|
uint64_t size;
|
|
|
|
struct pvr_bo *bo;
|
|
|
|
struct pvr_winsys_free_list *ws_free_list;
|
|
};
|
|
|
|
struct pvr_rt_dataset {
|
|
struct pvr_device *device;
|
|
|
|
/* RT dataset information */
|
|
uint32_t width;
|
|
uint32_t height;
|
|
uint32_t samples;
|
|
uint32_t layers;
|
|
|
|
struct pvr_free_list *global_free_list;
|
|
struct pvr_free_list *local_free_list;
|
|
|
|
struct pvr_bo *vheap_rtc_bo;
|
|
pvr_dev_addr_t vheap_dev_addr;
|
|
pvr_dev_addr_t rtc_dev_addr;
|
|
|
|
struct pvr_bo *tpc_bo;
|
|
uint64_t tpc_stride;
|
|
uint64_t tpc_size;
|
|
|
|
struct pvr_winsys_rt_dataset *ws_rt_dataset;
|
|
|
|
/* RT data information */
|
|
struct pvr_bo *mta_bo;
|
|
struct pvr_bo *mlist_bo;
|
|
|
|
struct pvr_bo *rgn_headers_bo;
|
|
uint64_t rgn_headers_stride;
|
|
|
|
bool need_frag;
|
|
|
|
uint8_t rt_data_idx;
|
|
|
|
struct {
|
|
pvr_dev_addr_t mta_dev_addr;
|
|
pvr_dev_addr_t mlist_dev_addr;
|
|
pvr_dev_addr_t rgn_headers_dev_addr;
|
|
} rt_datas[ROGUE_NUM_RTDATAS];
|
|
};
|
|
|
|
VkResult pvr_free_list_create(struct pvr_device *device,
|
|
uint32_t initial_size,
|
|
uint32_t max_size,
|
|
uint32_t grow_size,
|
|
uint32_t grow_threshold,
|
|
struct pvr_free_list *parent_free_list,
|
|
struct pvr_free_list **const free_list_out)
|
|
{
|
|
const struct pvr_device_runtime_info *runtime_info =
|
|
&device->pdevice->dev_runtime_info;
|
|
struct pvr_winsys_free_list *parent_ws_free_list =
|
|
parent_free_list ? parent_free_list->ws_free_list : NULL;
|
|
const uint64_t bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
|
|
PVR_BO_ALLOC_FLAG_PM_FW_PROTECT;
|
|
struct pvr_free_list *free_list;
|
|
uint32_t cache_line_size;
|
|
uint32_t initial_num_pages;
|
|
uint32_t grow_num_pages;
|
|
uint32_t max_num_pages;
|
|
uint64_t addr_alignment;
|
|
uint64_t size_alignment;
|
|
uint64_t size;
|
|
VkResult result;
|
|
|
|
assert((initial_size + grow_size) <= max_size);
|
|
assert(max_size != 0);
|
|
assert(grow_threshold <= 100);
|
|
|
|
/* Make sure the free list is created with at least a single page. */
|
|
if (initial_size == 0)
|
|
initial_size = ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE;
|
|
|
|
/* The freelists sizes must respect the PM freelist base address alignment
|
|
* requirement. As the freelist entries are cached by the SLC, it's also
|
|
* necessary to ensure the sizes respect the SLC cache line size to avoid
|
|
* invalid entries appearing in the cache, which would be problematic after
|
|
* a grow operation, as the SLC entries aren't invalidated. We do this by
|
|
* making sure the freelist values are appropriately aligned.
|
|
*
|
|
* To calculate the alignment, we first take the largest of the freelist
|
|
* base address alignment and the SLC cache line size. We then divide this
|
|
* by the freelist entry size to determine the number of freelist entries
|
|
* required by the PM. Finally, as each entry holds a single PM physical
|
|
* page, we multiple the number of entries by the page size.
|
|
*
|
|
* As an example, if the base address alignment is 16 bytes, the SLC cache
|
|
* line size is 64 bytes and the freelist entry size is 4 bytes then 16
|
|
* entries are required, as we take the SLC cacheline size (being the larger
|
|
* of the two values) and divide this by 4. If the PM page size is 4096
|
|
* bytes then we end up with an alignment of 65536 bytes.
|
|
*/
|
|
cache_line_size = rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
|
|
|
|
addr_alignment =
|
|
MAX2(ROGUE_BIF_PM_FREELIST_BASE_ADDR_ALIGNSIZE, cache_line_size);
|
|
size_alignment = (addr_alignment / ROGUE_FREE_LIST_ENTRY_SIZE) *
|
|
ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE;
|
|
|
|
assert(util_is_power_of_two_nonzero64(size_alignment));
|
|
|
|
initial_size = align64(initial_size, size_alignment);
|
|
max_size = align64(max_size, size_alignment);
|
|
grow_size = align64(grow_size, size_alignment);
|
|
|
|
/* Make sure the 'max' size doesn't exceed what the firmware supports and
|
|
* adjust the other sizes accordingly.
|
|
*/
|
|
if (max_size > runtime_info->max_free_list_size) {
|
|
max_size = runtime_info->max_free_list_size;
|
|
assert(align64(max_size, size_alignment) == max_size);
|
|
}
|
|
|
|
if (initial_size > max_size)
|
|
initial_size = max_size;
|
|
|
|
if (initial_size == max_size)
|
|
grow_size = 0;
|
|
|
|
initial_num_pages = initial_size >> ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT;
|
|
max_num_pages = max_size >> ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT;
|
|
grow_num_pages = grow_size >> ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT;
|
|
|
|
/* Calculate the size of the buffer needed to store the free list entries
|
|
* based on the maximum number of pages we can have.
|
|
*/
|
|
size = max_num_pages * ROGUE_FREE_LIST_ENTRY_SIZE;
|
|
assert(align64(size, addr_alignment) == size);
|
|
|
|
free_list = vk_alloc(&device->vk.alloc,
|
|
sizeof(*free_list),
|
|
8,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
|
if (!free_list)
|
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
/* FIXME: The memory is mapped GPU uncached, but this seems to contradict
|
|
* the comment above about aligning to the SLC cache line size.
|
|
*/
|
|
result = pvr_bo_alloc(device,
|
|
device->heaps.general_heap,
|
|
size,
|
|
addr_alignment,
|
|
bo_flags,
|
|
&free_list->bo);
|
|
if (result != VK_SUCCESS)
|
|
goto err_vk_free_free_list;
|
|
|
|
result = device->ws->ops->free_list_create(device->ws,
|
|
free_list->bo->vma,
|
|
initial_num_pages,
|
|
max_num_pages,
|
|
grow_num_pages,
|
|
grow_threshold,
|
|
parent_ws_free_list,
|
|
&free_list->ws_free_list);
|
|
if (result != VK_SUCCESS)
|
|
goto err_pvr_bo_free_bo;
|
|
|
|
free_list->device = device;
|
|
free_list->size = size;
|
|
|
|
*free_list_out = free_list;
|
|
|
|
return VK_SUCCESS;
|
|
|
|
err_pvr_bo_free_bo:
|
|
pvr_bo_free(device, free_list->bo);
|
|
|
|
err_vk_free_free_list:
|
|
vk_free(&device->vk.alloc, free_list);
|
|
|
|
return result;
|
|
}
|
|
|
|
void pvr_free_list_destroy(struct pvr_free_list *free_list)
|
|
{
|
|
struct pvr_device *device = free_list->device;
|
|
|
|
device->ws->ops->free_list_destroy(free_list->ws_free_list);
|
|
pvr_bo_free(device, free_list->bo);
|
|
vk_free(&device->vk.alloc, free_list);
|
|
}
|
|
|
|
static inline void pvr_get_samples_in_xy(uint32_t samples,
|
|
uint32_t *const x_out,
|
|
uint32_t *const y_out)
|
|
{
|
|
switch (samples) {
|
|
case 1:
|
|
*x_out = 1;
|
|
*y_out = 1;
|
|
break;
|
|
case 2:
|
|
*x_out = 1;
|
|
*y_out = 2;
|
|
break;
|
|
case 4:
|
|
*x_out = 2;
|
|
*y_out = 2;
|
|
break;
|
|
case 8:
|
|
*x_out = 2;
|
|
*y_out = 4;
|
|
break;
|
|
default:
|
|
UNREACHABLE("Unsupported number of samples");
|
|
}
|
|
}
|
|
|
|
void pvr_rt_mtile_info_init(const struct pvr_device_info *dev_info,
|
|
struct pvr_rt_mtile_info *info,
|
|
uint32_t width,
|
|
uint32_t height,
|
|
uint32_t samples)
|
|
{
|
|
uint32_t samples_in_x;
|
|
uint32_t samples_in_y;
|
|
|
|
pvr_get_samples_in_xy(samples, &samples_in_x, &samples_in_y);
|
|
|
|
info->tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 1);
|
|
info->tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 1);
|
|
|
|
info->num_tiles_x = DIV_ROUND_UP(width, info->tile_size_x);
|
|
info->num_tiles_y = DIV_ROUND_UP(height, info->tile_size_y);
|
|
|
|
rogue_get_num_macrotiles_xy(dev_info, &info->mtiles_x, &info->mtiles_y);
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
|
|
assert(PVR_GET_FEATURE_VALUE(dev_info,
|
|
simple_parameter_format_version,
|
|
0) == 2);
|
|
/* Set up 16 macrotiles with a multiple of 2x2 tiles per macrotile,
|
|
* which is aligned to a tile group.
|
|
*/
|
|
info->mtile_x1 = DIV_ROUND_UP(info->num_tiles_x, 8) * 2;
|
|
info->mtile_y1 = DIV_ROUND_UP(info->num_tiles_y, 8) * 2;
|
|
info->mtile_x2 = 0;
|
|
info->mtile_y2 = 0;
|
|
info->mtile_x3 = 0;
|
|
info->mtile_y3 = 0;
|
|
info->x_tile_max = ALIGN_POT(info->num_tiles_x, 2) - 1;
|
|
info->y_tile_max = ALIGN_POT(info->num_tiles_y, 2) - 1;
|
|
} else {
|
|
/* Set up 16 macrotiles with a multiple of 4x4 tiles per macrotile. */
|
|
info->mtile_x1 = ALIGN_POT(DIV_ROUND_UP(info->num_tiles_x, 4), 4);
|
|
info->mtile_y1 = ALIGN_POT(DIV_ROUND_UP(info->num_tiles_y, 4), 4);
|
|
info->mtile_x2 = info->mtile_x1 * 2;
|
|
info->mtile_y2 = info->mtile_y1 * 2;
|
|
info->mtile_x3 = info->mtile_x1 * 3;
|
|
info->mtile_y3 = info->mtile_y1 * 3;
|
|
info->x_tile_max = info->num_tiles_x - 1;
|
|
info->y_tile_max = info->num_tiles_y - 1;
|
|
}
|
|
|
|
info->tiles_per_mtile_x = info->mtile_x1 * samples_in_x;
|
|
info->tiles_per_mtile_y = info->mtile_y1 * samples_in_y;
|
|
}
|
|
|
|
/* Note that the unit of the return value depends on the GPU. For cores with the
|
|
* simple_internal_parameter_format feature the returned size is interpreted as
|
|
* the number of region headers. For cores without this feature its interpreted
|
|
* as the size in dwords.
|
|
*/
|
|
static uint64_t
|
|
pvr_rt_get_isp_region_size(struct pvr_device *device,
|
|
const struct pvr_rt_mtile_info *mtile_info)
|
|
{
|
|
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
|
uint64_t rgn_size =
|
|
(uint64_t)mtile_info->tiles_per_mtile_x * mtile_info->tiles_per_mtile_y;
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
|
|
uint32_t version;
|
|
|
|
rgn_size *= (uint64_t)mtile_info->mtiles_x * mtile_info->mtiles_y;
|
|
|
|
if (PVR_FEATURE_VALUE(dev_info,
|
|
simple_parameter_format_version,
|
|
&version)) {
|
|
version = 0;
|
|
}
|
|
|
|
if (version == 2) {
|
|
/* One region header per 2x2 tile group. */
|
|
rgn_size /= (2U * 2U);
|
|
}
|
|
} else {
|
|
const uint64_t single_rgn_header_size =
|
|
rogue_get_region_header_size(dev_info);
|
|
|
|
/* Round up to next dword to prevent IPF overrun and convert to bytes.
|
|
*/
|
|
rgn_size = DIV_ROUND_UP(rgn_size * single_rgn_header_size, 4);
|
|
}
|
|
|
|
return rgn_size;
|
|
}
|
|
|
|
static VkResult pvr_rt_vheap_rtc_data_init(struct pvr_device *device,
|
|
struct pvr_rt_dataset *rt_dataset,
|
|
uint32_t layers)
|
|
{
|
|
uint64_t vheap_size;
|
|
uint32_t alignment;
|
|
uint64_t rtc_size;
|
|
VkResult result;
|
|
|
|
vheap_size = ROGUE_CR_PM_VHEAP_TABLE_SIZE * ROGUE_PM_VHEAP_ENTRY_SIZE;
|
|
|
|
if (layers > 1) {
|
|
uint64_t rtc_entries;
|
|
|
|
vheap_size = ALIGN_POT(vheap_size, ROGUE_CR_TA_RTC_ADDR_BASE_ALIGNMENT);
|
|
|
|
rtc_entries = ROGUE_NUM_TEAC + ROGUE_NUM_TE + ROGUE_NUM_VCE;
|
|
if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 48545))
|
|
rtc_entries += ROGUE_NUM_TE;
|
|
|
|
rtc_size = rtc_entries * ROGUE_RTC_SIZE_IN_BYTES;
|
|
} else {
|
|
rtc_size = 0;
|
|
}
|
|
|
|
alignment = MAX2(ROGUE_CR_PM_VHEAP_TABLE_BASE_ADDR_ALIGNMENT,
|
|
ROGUE_CR_TA_RTC_ADDR_BASE_ALIGNMENT);
|
|
|
|
result = pvr_bo_alloc(device,
|
|
device->heaps.general_heap,
|
|
vheap_size + rtc_size,
|
|
alignment,
|
|
PVR_BO_ALLOC_FLAG_GPU_UNCACHED,
|
|
&rt_dataset->vheap_rtc_bo);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
rt_dataset->vheap_dev_addr = rt_dataset->vheap_rtc_bo->vma->dev_addr;
|
|
|
|
if (rtc_size > 0) {
|
|
rt_dataset->rtc_dev_addr =
|
|
PVR_DEV_ADDR_OFFSET(rt_dataset->vheap_dev_addr, vheap_size);
|
|
} else {
|
|
rt_dataset->rtc_dev_addr = PVR_DEV_ADDR_INVALID;
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static void pvr_rt_vheap_rtc_data_fini(struct pvr_rt_dataset *rt_dataset)
|
|
{
|
|
rt_dataset->rtc_dev_addr = PVR_DEV_ADDR_INVALID;
|
|
|
|
pvr_bo_free(rt_dataset->device, rt_dataset->vheap_rtc_bo);
|
|
rt_dataset->vheap_rtc_bo = NULL;
|
|
}
|
|
|
|
static void
|
|
pvr_rt_get_tail_ptr_stride_size(const struct pvr_device *device,
|
|
const struct pvr_rt_mtile_info *mtile_info,
|
|
uint32_t layers,
|
|
uint64_t *const stride_out,
|
|
uint64_t *const size_out)
|
|
{
|
|
uint32_t max_num_mtiles;
|
|
uint32_t num_mtiles_x;
|
|
uint32_t num_mtiles_y;
|
|
uint32_t version;
|
|
uint64_t size;
|
|
|
|
num_mtiles_x = mtile_info->mtiles_x * mtile_info->tiles_per_mtile_x;
|
|
num_mtiles_y = mtile_info->mtiles_y * mtile_info->tiles_per_mtile_y;
|
|
|
|
max_num_mtiles = MAX2(util_next_power_of_two64(num_mtiles_x),
|
|
util_next_power_of_two64(num_mtiles_y));
|
|
|
|
size = (uint64_t)max_num_mtiles * max_num_mtiles;
|
|
|
|
if (PVR_FEATURE_VALUE(&device->pdevice->dev_info,
|
|
simple_parameter_format_version,
|
|
&version)) {
|
|
version = 0;
|
|
}
|
|
|
|
if (version == 2) {
|
|
/* One tail pointer cache entry per 2x2 tile group. */
|
|
size /= (2U * 2U);
|
|
}
|
|
|
|
size *= ROGUE_TAIL_POINTER_SIZE;
|
|
|
|
if (layers > 1) {
|
|
size = ALIGN_POT(size, ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE);
|
|
|
|
*stride_out = size / ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE;
|
|
*size_out = size * layers;
|
|
} else {
|
|
*stride_out = 0;
|
|
*size_out = size;
|
|
}
|
|
}
|
|
|
|
static VkResult pvr_rt_tpc_data_init(struct pvr_device *device,
|
|
struct pvr_rt_dataset *rt_dataset,
|
|
const struct pvr_rt_mtile_info *mtile_info,
|
|
uint32_t layers)
|
|
{
|
|
uint64_t tpc_size;
|
|
|
|
pvr_rt_get_tail_ptr_stride_size(device,
|
|
mtile_info,
|
|
layers,
|
|
&rt_dataset->tpc_stride,
|
|
&rt_dataset->tpc_size);
|
|
tpc_size = ALIGN_POT(rt_dataset->tpc_size, ROGUE_TE_TPC_CACHE_LINE_SIZE);
|
|
|
|
return pvr_bo_alloc(device,
|
|
device->heaps.general_heap,
|
|
tpc_size,
|
|
ROGUE_CR_TE_TPC_ADDR_BASE_ALIGNMENT,
|
|
PVR_BO_ALLOC_FLAG_GPU_UNCACHED,
|
|
&rt_dataset->tpc_bo);
|
|
}
|
|
|
|
static void pvr_rt_tpc_data_fini(struct pvr_rt_dataset *rt_dataset)
|
|
{
|
|
pvr_bo_free(rt_dataset->device, rt_dataset->tpc_bo);
|
|
rt_dataset->tpc_bo = NULL;
|
|
}
|
|
|
|
static uint32_t
|
|
pvr_rt_get_mlist_size(const struct pvr_free_list *global_free_list,
|
|
const struct pvr_free_list *local_free_list)
|
|
{
|
|
uint32_t num_pte_pages;
|
|
uint32_t num_pde_pages;
|
|
uint32_t num_pce_pages;
|
|
uint64_t total_pages;
|
|
uint32_t mlist_size;
|
|
|
|
assert(global_free_list->size + local_free_list->size <=
|
|
ROGUE_PM_MAX_PB_VIRT_ADDR_SPACE);
|
|
|
|
total_pages = (global_free_list->size + local_free_list->size) >>
|
|
ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT;
|
|
|
|
/* Calculate the total number of physical pages required to hold the page
|
|
* table, directory and catalog entries for the freelist pages.
|
|
*/
|
|
num_pte_pages = DIV_ROUND_UP(total_pages, ROGUE_NUM_PT_ENTRIES_PER_PAGE);
|
|
num_pde_pages = DIV_ROUND_UP(num_pte_pages, ROGUE_NUM_PD_ENTRIES_PER_PAGE);
|
|
num_pce_pages = DIV_ROUND_UP(num_pde_pages, ROGUE_NUM_PC_ENTRIES_PER_PAGE);
|
|
|
|
/* Calculate the MList size considering the total number of pages in the PB
|
|
* are shared among all the PM address spaces.
|
|
*/
|
|
mlist_size = (num_pce_pages + num_pde_pages + num_pte_pages) *
|
|
ROGUE_NUM_PM_ADDRESS_SPACES * ROGUE_MLIST_ENTRY_STRIDE;
|
|
|
|
return ALIGN_POT(mlist_size, ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE);
|
|
}
|
|
|
|
static void pvr_rt_get_region_headers_stride_size(
|
|
const struct pvr_device *device,
|
|
const struct pvr_rt_mtile_info *mtile_info,
|
|
uint32_t layers,
|
|
uint64_t *const stride_out,
|
|
uint64_t *const size_out)
|
|
{
|
|
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
|
const uint32_t single_rgn_header_size =
|
|
rogue_get_region_header_size(dev_info);
|
|
uint64_t rgn_headers_size;
|
|
uint32_t num_tiles_x;
|
|
uint32_t num_tiles_y;
|
|
uint32_t group_size;
|
|
uint32_t version;
|
|
|
|
if (PVR_FEATURE_VALUE(dev_info, simple_parameter_format_version, &version))
|
|
version = 0;
|
|
|
|
group_size = version == 2 ? 2 : 1;
|
|
|
|
num_tiles_x = mtile_info->mtiles_x * mtile_info->tiles_per_mtile_x;
|
|
num_tiles_y = mtile_info->mtiles_y * mtile_info->tiles_per_mtile_y;
|
|
|
|
rgn_headers_size = (uint64_t)num_tiles_x / group_size;
|
|
/* Careful here. We want the division to happen first. */
|
|
rgn_headers_size *= num_tiles_y / group_size;
|
|
rgn_headers_size *= single_rgn_header_size;
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
|
|
rgn_headers_size =
|
|
ALIGN_POT(rgn_headers_size, ROGUE_CR_TE_PSGREGION_ADDR_BASE_ALIGNMENT);
|
|
}
|
|
|
|
if (layers > 1) {
|
|
rgn_headers_size =
|
|
ALIGN_POT(rgn_headers_size, ROGUE_CR_TE_PSG_REGION_STRIDE_UNIT_SIZE);
|
|
}
|
|
|
|
*stride_out = rgn_headers_size;
|
|
*size_out = rgn_headers_size * layers;
|
|
}
|
|
|
|
static VkResult pvr_rt_mta_data_init(struct pvr_device *device,
|
|
struct pvr_rt_dataset *rt_dataset)
|
|
{
|
|
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
|
const uint32_t mta_size = rogue_get_macrotile_array_size(dev_info);
|
|
const uint32_t num_rt_datas = ARRAY_SIZE(rt_dataset->rt_datas);
|
|
pvr_dev_addr_t dev_addr;
|
|
VkResult result;
|
|
|
|
/* Valid case - see rogue_get_macrotile_array_size() */
|
|
if (mta_size == 0) {
|
|
rt_dataset->mta_bo = NULL;
|
|
|
|
for (uint32_t i = 0; i < num_rt_datas; i++)
|
|
rt_dataset->rt_datas[i].mta_dev_addr = PVR_DEV_ADDR_INVALID;
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
result = pvr_bo_alloc(device,
|
|
device->heaps.general_heap,
|
|
mta_size * num_rt_datas,
|
|
ROGUE_CR_PM_MTILE_ARRAY_BASE_ADDR_ALIGNMENT,
|
|
PVR_BO_ALLOC_FLAG_GPU_UNCACHED,
|
|
&rt_dataset->mta_bo);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
dev_addr = rt_dataset->mta_bo->vma->dev_addr;
|
|
|
|
for (uint32_t i = 0; i < num_rt_datas; i++) {
|
|
rt_dataset->rt_datas[i].mta_dev_addr = dev_addr;
|
|
dev_addr = PVR_DEV_ADDR_OFFSET(dev_addr, mta_size);
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static void pvr_rt_mta_data_fini(struct pvr_rt_dataset *rt_dataset)
|
|
{
|
|
if (rt_dataset->mta_bo == NULL)
|
|
return;
|
|
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(rt_dataset->rt_datas); i++)
|
|
rt_dataset->rt_datas[i].mta_dev_addr = PVR_DEV_ADDR_INVALID;
|
|
|
|
pvr_bo_free(rt_dataset->device, rt_dataset->mta_bo);
|
|
rt_dataset->mta_bo = NULL;
|
|
}
|
|
|
|
static VkResult
|
|
pvr_rt_mlist_data_init(struct pvr_device *device,
|
|
struct pvr_rt_dataset *rt_dataset,
|
|
const struct pvr_free_list *global_free_list,
|
|
const struct pvr_free_list *local_free_list)
|
|
{
|
|
const uint32_t mlist_size =
|
|
pvr_rt_get_mlist_size(global_free_list, local_free_list);
|
|
const uint32_t num_rt_datas = ARRAY_SIZE(rt_dataset->rt_datas);
|
|
pvr_dev_addr_t dev_addr;
|
|
VkResult result;
|
|
|
|
/* No known cores where the Mlist size could be 0 */
|
|
assert(mlist_size > 0);
|
|
|
|
result = pvr_bo_alloc(device,
|
|
device->heaps.general_heap,
|
|
mlist_size * num_rt_datas,
|
|
ROGUE_CR_PM_MTILE_ARRAY_BASE_ADDR_ALIGNMENT,
|
|
PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
|
|
PVR_BO_ALLOC_FLAG_PM_FW_PROTECT,
|
|
&rt_dataset->mlist_bo);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
dev_addr = rt_dataset->mlist_bo->vma->dev_addr;
|
|
|
|
for (uint32_t i = 0; i < num_rt_datas; i++) {
|
|
rt_dataset->rt_datas[i].mlist_dev_addr = dev_addr;
|
|
dev_addr = PVR_DEV_ADDR_OFFSET(dev_addr, mlist_size);
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static void pvr_rt_mlist_data_fini(struct pvr_rt_dataset *rt_dataset)
|
|
{
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(rt_dataset->rt_datas); i++)
|
|
rt_dataset->rt_datas[i].mlist_dev_addr = PVR_DEV_ADDR_INVALID;
|
|
|
|
pvr_bo_free(rt_dataset->device, rt_dataset->mlist_bo);
|
|
rt_dataset->mlist_bo = NULL;
|
|
}
|
|
|
|
static VkResult
|
|
pvr_rt_rgn_headers_data_init(struct pvr_device *device,
|
|
struct pvr_rt_dataset *rt_dataset,
|
|
const struct pvr_rt_mtile_info *mtile_info,
|
|
uint32_t layers)
|
|
{
|
|
const uint32_t num_rt_datas = ARRAY_SIZE(rt_dataset->rt_datas);
|
|
uint64_t rgn_headers_size;
|
|
pvr_dev_addr_t dev_addr;
|
|
VkResult result;
|
|
|
|
pvr_rt_get_region_headers_stride_size(device,
|
|
mtile_info,
|
|
layers,
|
|
&rt_dataset->rgn_headers_stride,
|
|
&rgn_headers_size);
|
|
|
|
result = pvr_bo_alloc(device,
|
|
device->heaps.rgn_hdr_heap,
|
|
rgn_headers_size * num_rt_datas,
|
|
ROGUE_CR_TE_PSGREGION_ADDR_BASE_ALIGNMENT,
|
|
PVR_BO_ALLOC_FLAG_GPU_UNCACHED,
|
|
&rt_dataset->rgn_headers_bo);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
dev_addr = rt_dataset->rgn_headers_bo->vma->dev_addr;
|
|
|
|
for (uint32_t i = 0; i < num_rt_datas; i++) {
|
|
rt_dataset->rt_datas[i].rgn_headers_dev_addr = dev_addr;
|
|
dev_addr = PVR_DEV_ADDR_OFFSET(dev_addr, rgn_headers_size);
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static void pvr_rt_rgn_headers_data_fini(struct pvr_rt_dataset *rt_dataset)
|
|
{
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(rt_dataset->rt_datas); i++)
|
|
rt_dataset->rt_datas[i].rgn_headers_dev_addr = PVR_DEV_ADDR_INVALID;
|
|
|
|
pvr_bo_free(rt_dataset->device, rt_dataset->rgn_headers_bo);
|
|
rt_dataset->rgn_headers_bo = NULL;
|
|
}
|
|
|
|
static VkResult pvr_rt_datas_init(struct pvr_device *device,
|
|
struct pvr_rt_dataset *rt_dataset,
|
|
const struct pvr_free_list *global_free_list,
|
|
const struct pvr_free_list *local_free_list,
|
|
const struct pvr_rt_mtile_info *mtile_info,
|
|
uint32_t layers)
|
|
{
|
|
VkResult result;
|
|
|
|
result = pvr_rt_mta_data_init(device, rt_dataset);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
result = pvr_rt_mlist_data_init(device,
|
|
rt_dataset,
|
|
global_free_list,
|
|
local_free_list);
|
|
if (result != VK_SUCCESS)
|
|
goto err_pvr_rt_mta_data_fini;
|
|
|
|
result =
|
|
pvr_rt_rgn_headers_data_init(device, rt_dataset, mtile_info, layers);
|
|
if (result != VK_SUCCESS)
|
|
goto err_pvr_rt_mlist_data_fini;
|
|
|
|
return VK_SUCCESS;
|
|
|
|
err_pvr_rt_mlist_data_fini:
|
|
pvr_rt_mlist_data_fini(rt_dataset);
|
|
|
|
err_pvr_rt_mta_data_fini:
|
|
pvr_rt_mta_data_fini(rt_dataset);
|
|
|
|
return result;
|
|
}
|
|
|
|
static void pvr_rt_datas_fini(struct pvr_rt_dataset *rt_dataset)
|
|
{
|
|
pvr_rt_rgn_headers_data_fini(rt_dataset);
|
|
pvr_rt_mlist_data_fini(rt_dataset);
|
|
pvr_rt_mta_data_fini(rt_dataset);
|
|
}
|
|
|
|
static void pvr_rt_dataset_ws_create_info_init(
|
|
struct pvr_rt_dataset *rt_dataset,
|
|
const struct pvr_rt_mtile_info *mtile_info,
|
|
struct pvr_winsys_rt_dataset_create_info *create_info)
|
|
{
|
|
struct pvr_device *device = rt_dataset->device;
|
|
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
|
|
|
memset(create_info, 0, sizeof(*create_info));
|
|
|
|
/* Local freelist. */
|
|
create_info->local_free_list = rt_dataset->local_free_list->ws_free_list;
|
|
|
|
create_info->width = rt_dataset->width;
|
|
create_info->height = rt_dataset->height;
|
|
create_info->samples = rt_dataset->samples;
|
|
create_info->layers = rt_dataset->layers;
|
|
|
|
/* ISP register values. */
|
|
if (PVR_HAS_ERN(dev_info, 42307) &&
|
|
!(PVR_HAS_FEATURE(dev_info, roguexe) && mtile_info->tile_size_x == 16)) {
|
|
float value;
|
|
|
|
if (rt_dataset->width != 0) {
|
|
value =
|
|
ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR / (float)rt_dataset->width;
|
|
create_info->isp_merge_lower_x = fui(value);
|
|
|
|
value =
|
|
ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR / (float)rt_dataset->width;
|
|
create_info->isp_merge_upper_x = fui(value);
|
|
}
|
|
|
|
if (rt_dataset->height != 0) {
|
|
value =
|
|
ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR / (float)rt_dataset->height;
|
|
create_info->isp_merge_lower_y = fui(value);
|
|
|
|
value =
|
|
ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR / (float)rt_dataset->height;
|
|
create_info->isp_merge_upper_y = fui(value);
|
|
}
|
|
|
|
value = ((float)rt_dataset->width * ROGUE_ISP_MERGE_SCALE_FACTOR) /
|
|
(ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR -
|
|
ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR);
|
|
create_info->isp_merge_scale_x = fui(value);
|
|
|
|
value = ((float)rt_dataset->height * ROGUE_ISP_MERGE_SCALE_FACTOR) /
|
|
(ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR -
|
|
ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR);
|
|
create_info->isp_merge_scale_y = fui(value);
|
|
}
|
|
|
|
/* Allocations and associated information. */
|
|
create_info->vheap_table_dev_addr = rt_dataset->vheap_dev_addr;
|
|
create_info->rtc_dev_addr = rt_dataset->rtc_dev_addr;
|
|
|
|
create_info->tpc_dev_addr = rt_dataset->tpc_bo->vma->dev_addr;
|
|
create_info->tpc_stride = rt_dataset->tpc_stride;
|
|
create_info->tpc_size = rt_dataset->tpc_size;
|
|
|
|
STATIC_ASSERT(ARRAY_SIZE(create_info->rt_datas) ==
|
|
ARRAY_SIZE(rt_dataset->rt_datas));
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(create_info->rt_datas); i++) {
|
|
create_info->rt_datas[i].pm_mlist_dev_addr =
|
|
rt_dataset->rt_datas[i].mlist_dev_addr;
|
|
create_info->rt_datas[i].macrotile_array_dev_addr =
|
|
rt_dataset->rt_datas[i].mta_dev_addr;
|
|
create_info->rt_datas[i].rgn_header_dev_addr =
|
|
rt_dataset->rt_datas[i].rgn_headers_dev_addr;
|
|
}
|
|
|
|
create_info->rgn_header_size =
|
|
pvr_rt_get_isp_region_size(device, mtile_info);
|
|
}
|
|
|
|
VkResult
|
|
pvr_render_target_dataset_create(struct pvr_device *device,
|
|
uint32_t width,
|
|
uint32_t height,
|
|
uint32_t samples,
|
|
uint32_t layers,
|
|
struct pvr_rt_dataset **const rt_dataset_out)
|
|
{
|
|
struct pvr_device_runtime_info *runtime_info =
|
|
&device->pdevice->dev_runtime_info;
|
|
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
|
struct pvr_winsys_rt_dataset_create_info rt_dataset_create_info;
|
|
struct pvr_rt_mtile_info mtile_info;
|
|
struct pvr_rt_dataset *rt_dataset;
|
|
VkResult result;
|
|
|
|
assert(device->global_free_list);
|
|
assert(width <= rogue_get_render_size_max_x(dev_info));
|
|
assert(height <= rogue_get_render_size_max_y(dev_info));
|
|
assert(layers > 0 && layers <= PVR_MAX_FRAMEBUFFER_LAYERS);
|
|
|
|
pvr_rt_mtile_info_init(dev_info, &mtile_info, width, height, samples);
|
|
|
|
rt_dataset = vk_zalloc(&device->vk.alloc,
|
|
sizeof(*rt_dataset),
|
|
8,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
|
if (!rt_dataset)
|
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
rt_dataset->device = device;
|
|
rt_dataset->width = width;
|
|
rt_dataset->height = height;
|
|
rt_dataset->samples = samples;
|
|
rt_dataset->layers = layers;
|
|
rt_dataset->global_free_list = device->global_free_list;
|
|
|
|
/* The maximum supported free list size is based on the assumption that this
|
|
* freelist (the "local" freelist) is always the minimum size required by
|
|
* the hardware. See the documentation of ROGUE_FREE_LIST_MAX_SIZE for more
|
|
* details.
|
|
*/
|
|
result = pvr_free_list_create(device,
|
|
runtime_info->min_free_list_size,
|
|
runtime_info->min_free_list_size,
|
|
0 /* grow_size */,
|
|
0 /* grow_threshold */,
|
|
rt_dataset->global_free_list,
|
|
&rt_dataset->local_free_list);
|
|
if (result != VK_SUCCESS)
|
|
goto err_vk_free_rt_dataset;
|
|
|
|
result = pvr_rt_vheap_rtc_data_init(device, rt_dataset, layers);
|
|
if (result != VK_SUCCESS)
|
|
goto err_pvr_free_list_destroy;
|
|
|
|
result = pvr_rt_tpc_data_init(device, rt_dataset, &mtile_info, layers);
|
|
if (result != VK_SUCCESS)
|
|
goto err_pvr_rt_vheap_rtc_data_fini;
|
|
|
|
result = pvr_rt_datas_init(device,
|
|
rt_dataset,
|
|
rt_dataset->global_free_list,
|
|
rt_dataset->local_free_list,
|
|
&mtile_info,
|
|
layers);
|
|
if (result != VK_SUCCESS)
|
|
goto err_pvr_rt_tpc_data_fini;
|
|
|
|
/* rt_dataset must be fully initialized by this point since
|
|
* pvr_rt_dataset_ws_create_info_init() depends on this.
|
|
*/
|
|
pvr_rt_dataset_ws_create_info_init(rt_dataset,
|
|
&mtile_info,
|
|
&rt_dataset_create_info);
|
|
|
|
result =
|
|
device->ws->ops->render_target_dataset_create(device->ws,
|
|
&rt_dataset_create_info,
|
|
dev_info,
|
|
&rt_dataset->ws_rt_dataset);
|
|
if (result != VK_SUCCESS)
|
|
goto err_pvr_rt_datas_fini;
|
|
|
|
*rt_dataset_out = rt_dataset;
|
|
|
|
return VK_SUCCESS;
|
|
|
|
err_pvr_rt_datas_fini:
|
|
pvr_rt_datas_fini(rt_dataset);
|
|
|
|
err_pvr_rt_tpc_data_fini:
|
|
pvr_rt_tpc_data_fini(rt_dataset);
|
|
|
|
err_pvr_rt_vheap_rtc_data_fini:
|
|
pvr_rt_vheap_rtc_data_fini(rt_dataset);
|
|
|
|
err_pvr_free_list_destroy:
|
|
pvr_free_list_destroy(rt_dataset->local_free_list);
|
|
|
|
err_vk_free_rt_dataset:
|
|
vk_free(&device->vk.alloc, rt_dataset);
|
|
|
|
return result;
|
|
}
|
|
|
|
void pvr_render_target_dataset_destroy(struct pvr_rt_dataset *rt_dataset)
|
|
{
|
|
struct pvr_device *device = rt_dataset->device;
|
|
|
|
device->ws->ops->render_target_dataset_destroy(rt_dataset->ws_rt_dataset);
|
|
|
|
pvr_rt_datas_fini(rt_dataset);
|
|
pvr_rt_tpc_data_fini(rt_dataset);
|
|
pvr_rt_vheap_rtc_data_fini(rt_dataset);
|
|
|
|
pvr_free_list_destroy(rt_dataset->local_free_list);
|
|
|
|
vk_free(&device->vk.alloc, rt_dataset);
|
|
}
|
|
|
|
static void pvr_geom_state_stream_init(struct pvr_render_ctx *ctx,
|
|
struct pvr_render_job *job,
|
|
struct pvr_winsys_geometry_state *state)
|
|
{
|
|
const struct pvr_device *const device = ctx->device;
|
|
const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
|
|
|
|
uint32_t *stream_ptr = (uint32_t *)state->fw_stream;
|
|
uint32_t *stream_len_ptr = stream_ptr;
|
|
|
|
/* Leave space for stream header. */
|
|
stream_ptr += pvr_cmd_length(KMD_STREAM_HDR);
|
|
|
|
pvr_csb_pack ((uint64_t *)stream_ptr, CR_VDM_CTRL_STREAM_BASE, value) {
|
|
value.addr = job->ctrl_stream_addr;
|
|
}
|
|
stream_ptr += pvr_cmd_length(CR_VDM_CTRL_STREAM_BASE);
|
|
|
|
pvr_csb_pack ((uint64_t *)stream_ptr,
|
|
CR_TPU_BORDER_COLOUR_TABLE_VDM,
|
|
value) {
|
|
value.border_colour_table_address =
|
|
device->border_color_table.table->vma->dev_addr;
|
|
}
|
|
stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_VDM);
|
|
|
|
pvr_csb_pack (stream_ptr, CR_PPP_CTRL, value) {
|
|
value.wclampen = true;
|
|
value.fixed_point_format = 1;
|
|
}
|
|
stream_ptr += pvr_cmd_length(CR_PPP_CTRL);
|
|
|
|
pvr_csb_pack (stream_ptr, CR_TE_PSG, value) {
|
|
struct pvr_rt_dataset *rt_dataset =
|
|
job->view_state.rt_datasets[job->view_state.view_index];
|
|
value.completeonterminate = job->geometry_terminate;
|
|
|
|
value.region_stride = rt_dataset->rgn_headers_stride /
|
|
ROGUE_CR_TE_PSG_REGION_STRIDE_UNIT_SIZE;
|
|
|
|
value.forcenewstate = PVR_HAS_QUIRK(dev_info, 52942);
|
|
}
|
|
stream_ptr += pvr_cmd_length(CR_TE_PSG);
|
|
|
|
/* Set up the USC common size for the context switch resume/load program
|
|
* (ctx->ctx_switch.programs[i].sr->pds_load_program), which was created
|
|
* as part of the render context.
|
|
*/
|
|
pvr_csb_pack (stream_ptr, VDMCTRL_PDS_STATE0, value) {
|
|
/* Calculate the size in bytes. */
|
|
const uint16_t shared_registers_size = job->max_shared_registers * 4;
|
|
|
|
value.usc_common_size =
|
|
DIV_ROUND_UP(shared_registers_size,
|
|
ROGUE_VDMCTRL_PDS_STATE0_USC_COMMON_SIZE_UNIT_SIZE);
|
|
}
|
|
stream_ptr += pvr_cmd_length(VDMCTRL_PDS_STATE0);
|
|
|
|
pvr_csb_pack (stream_ptr, KMD_STREAM_VIEW_IDX, value) {
|
|
value.idx = job->view_state.view_index;
|
|
}
|
|
stream_ptr += pvr_cmd_length(KMD_STREAM_VIEW_IDX);
|
|
|
|
state->fw_stream_len = (uint8_t *)stream_ptr - (uint8_t *)state->fw_stream;
|
|
assert(state->fw_stream_len <= ARRAY_SIZE(state->fw_stream));
|
|
|
|
pvr_csb_pack ((uint64_t *)stream_len_ptr, KMD_STREAM_HDR, value) {
|
|
value.length = state->fw_stream_len;
|
|
}
|
|
}
|
|
|
|
static void
|
|
pvr_geom_state_stream_ext_init(struct pvr_render_ctx *ctx,
|
|
struct pvr_render_job *job,
|
|
struct pvr_winsys_geometry_state *state)
|
|
{
|
|
const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
|
|
|
|
uint32_t main_stream_len =
|
|
pvr_csb_unpack((uint64_t *)state->fw_stream, KMD_STREAM_HDR).length;
|
|
uint32_t *ext_stream_ptr =
|
|
(uint32_t *)((uint8_t *)state->fw_stream + main_stream_len);
|
|
uint32_t *header0_ptr;
|
|
|
|
header0_ptr = ext_stream_ptr;
|
|
ext_stream_ptr += pvr_cmd_length(KMD_STREAM_EXTHDR_GEOM0);
|
|
|
|
pvr_csb_pack (header0_ptr, KMD_STREAM_EXTHDR_GEOM0, header0) {
|
|
if (PVR_HAS_QUIRK(dev_info, 49927)) {
|
|
header0.has_brn49927 = true;
|
|
|
|
/* The set up of CR_TPU must be identical to
|
|
* pvr_render_job_ws_fragment_state_stream_ext_init().
|
|
*/
|
|
pvr_csb_pack (ext_stream_ptr, CR_TPU, value) {
|
|
value.tag_cem_4k_face_packing = true;
|
|
}
|
|
ext_stream_ptr += pvr_cmd_length(CR_TPU);
|
|
}
|
|
}
|
|
|
|
if ((*header0_ptr & ROGUE_KMD_STREAM_EXTHDR_DATA_MASK) != 0) {
|
|
state->fw_stream_len =
|
|
(uint8_t *)ext_stream_ptr - (uint8_t *)state->fw_stream;
|
|
assert(state->fw_stream_len <= ARRAY_SIZE(state->fw_stream));
|
|
}
|
|
}
|
|
|
|
static void
|
|
pvr_geom_state_flags_init(const struct pvr_render_job *const job,
|
|
struct pvr_winsys_geometry_state_flags *flags)
|
|
{
|
|
*flags = (struct pvr_winsys_geometry_state_flags){
|
|
.is_first_geometry =
|
|
!job->view_state.rt_datasets[job->view_state.view_index]->need_frag,
|
|
.is_last_geometry = job->geometry_terminate,
|
|
.use_single_core = job->frag_uses_atomic_ops,
|
|
};
|
|
}
|
|
|
|
static void
|
|
pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx,
|
|
struct pvr_render_job *job,
|
|
struct vk_sync *wait,
|
|
struct pvr_winsys_geometry_state *state)
|
|
{
|
|
pvr_geom_state_stream_init(ctx, job, state);
|
|
pvr_geom_state_stream_ext_init(ctx, job, state);
|
|
|
|
state->wait = wait;
|
|
pvr_geom_state_flags_init(job, &state->flags);
|
|
}
|
|
|
|
static inline uint32_t pvr_frag_km_stream_pbe_reg_words_offset(
|
|
const struct pvr_device_info *const dev_info)
|
|
{
|
|
uint32_t offset = 0;
|
|
|
|
offset += pvr_cmd_length(KMD_STREAM_HDR);
|
|
offset += pvr_cmd_length(CR_ISP_SCISSOR_BASE);
|
|
offset += pvr_cmd_length(CR_ISP_DBIAS_BASE);
|
|
offset += pvr_cmd_length(CR_ISP_OCLQRY_BASE);
|
|
offset += pvr_cmd_length(CR_ISP_ZLSCTL);
|
|
offset += pvr_cmd_length(CR_ISP_ZLOAD_BASE);
|
|
offset += pvr_cmd_length(CR_ISP_STENCIL_LOAD_BASE);
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, requires_fb_cdc_zls_setup))
|
|
offset += pvr_cmd_length(CR_FB_CDC_ZLS);
|
|
|
|
return PVR_DW_TO_BYTES(offset);
|
|
}
|
|
|
|
#define DWORDS_PER_U64 2
|
|
|
|
static inline uint32_t pvr_frag_km_stream_pds_eot_data_addr_offset(
|
|
const struct pvr_device_info *const dev_info)
|
|
{
|
|
uint32_t offset = 0;
|
|
|
|
offset += pvr_frag_km_stream_pbe_reg_words_offset(dev_info) / 4U;
|
|
offset +=
|
|
PVR_MAX_COLOR_ATTACHMENTS * ROGUE_NUM_PBESTATE_REG_WORDS * DWORDS_PER_U64;
|
|
offset += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_PDM);
|
|
offset += ROGUE_NUM_CR_PDS_BGRND_WORDS * DWORDS_PER_U64;
|
|
offset += ROGUE_NUM_CR_PDS_BGRND_WORDS * DWORDS_PER_U64;
|
|
offset += ROGUE_KMD_STREAM_USC_CLEAR_REGISTER_COUNT *
|
|
pvr_cmd_length(CR_USC_CLEAR_REGISTER);
|
|
offset += pvr_cmd_length(CR_USC_PIXEL_OUTPUT_CTRL);
|
|
offset += pvr_cmd_length(CR_ISP_BGOBJDEPTH);
|
|
offset += pvr_cmd_length(CR_ISP_BGOBJVALS);
|
|
offset += pvr_cmd_length(CR_ISP_AA);
|
|
offset += pvr_cmd_length(CR_ISP_CTL);
|
|
offset += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO);
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, cluster_grouping))
|
|
offset += pvr_cmd_length(KMD_STREAM_PIXEL_PHANTOM);
|
|
|
|
offset += pvr_cmd_length(KMD_STREAM_VIEW_IDX);
|
|
|
|
return PVR_DW_TO_BYTES(offset);
|
|
}
|
|
|
|
static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
|
|
struct pvr_render_job *job,
|
|
struct pvr_winsys_fragment_state *state)
|
|
{
|
|
const struct pvr_device *const device = ctx->device;
|
|
const struct pvr_physical_device *const pdevice = device->pdevice;
|
|
const struct pvr_device_runtime_info *dev_runtime_info =
|
|
&pdevice->dev_runtime_info;
|
|
const struct pvr_device_info *dev_info = &pdevice->dev_info;
|
|
const struct pvr_rt_dataset *rt_dataset =
|
|
job->view_state.rt_datasets[job->view_state.view_index];
|
|
const enum ROGUE_CR_ISP_AA_MODE_TYPE isp_aa_mode =
|
|
pvr_cr_isp_aa_mode_type(job->samples);
|
|
struct pvr_rt_mtile_info tiling_info = { 0 };
|
|
|
|
enum ROGUE_CR_ZLS_FORMAT_TYPE zload_format = ROGUE_CR_ZLS_FORMAT_TYPE_F32Z;
|
|
uint32_t *stream_ptr = (uint32_t *)state->fw_stream;
|
|
uint32_t *stream_len_ptr = stream_ptr;
|
|
uint32_t view_index;
|
|
uint32_t pixel_ctl;
|
|
uint32_t isp_ctl;
|
|
|
|
/* Leave space for stream header. */
|
|
stream_ptr += pvr_cmd_length(KMD_STREAM_HDR);
|
|
|
|
/* FIXME: pass in the number of samples rather than isp_aa_mode? */
|
|
pvr_setup_tiles_in_flight(dev_info,
|
|
dev_runtime_info,
|
|
isp_aa_mode,
|
|
job->pixel_output_width,
|
|
false,
|
|
job->max_tiles_in_flight,
|
|
&isp_ctl,
|
|
&pixel_ctl);
|
|
|
|
pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_SCISSOR_BASE, value) {
|
|
value.addr = job->scissor_table_addr;
|
|
}
|
|
stream_ptr += pvr_cmd_length(CR_ISP_SCISSOR_BASE);
|
|
|
|
pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_DBIAS_BASE, value) {
|
|
value.addr = job->depth_bias_table_addr;
|
|
}
|
|
stream_ptr += pvr_cmd_length(CR_ISP_DBIAS_BASE);
|
|
|
|
pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_OCLQRY_BASE, value) {
|
|
const struct pvr_sub_cmd_gfx *sub_cmd =
|
|
container_of(job, const struct pvr_sub_cmd_gfx, job);
|
|
|
|
if (sub_cmd->query_pool)
|
|
value.addr = sub_cmd->query_pool->result_buffer->dev_addr;
|
|
else
|
|
value.addr = PVR_DEV_ADDR_INVALID;
|
|
}
|
|
stream_ptr += pvr_cmd_length(CR_ISP_OCLQRY_BASE);
|
|
|
|
pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_ZLSCTL, value) {
|
|
if (job->has_depth_attachment || job->has_stencil_attachment) {
|
|
uint32_t alignment_x;
|
|
uint32_t alignment_y;
|
|
|
|
if (job->ds.has_alignment_transfers) {
|
|
rogue_get_zls_tile_size_xy(dev_info, &alignment_x, &alignment_y);
|
|
} else {
|
|
alignment_x = ROGUE_IPF_TILE_SIZE_PIXELS;
|
|
alignment_y = ROGUE_IPF_TILE_SIZE_PIXELS;
|
|
}
|
|
|
|
rogue_get_isp_num_tiles_xy(
|
|
dev_info,
|
|
job->samples,
|
|
ALIGN_POT(job->ds.physical_extent.width, alignment_x),
|
|
ALIGN_POT(job->ds.physical_extent.height, alignment_y),
|
|
&value.zlsextent_x_z,
|
|
&value.zlsextent_y_z);
|
|
|
|
value.zlsextent_x_z -= 1;
|
|
value.zlsextent_y_z -= 1;
|
|
|
|
if (job->ds.memlayout == PVR_MEMLAYOUT_TWIDDLED &&
|
|
!job->ds.has_alignment_transfers) {
|
|
value.loadtwiddled = true;
|
|
value.storetwiddled = true;
|
|
}
|
|
|
|
value.zloadformat = job->ds.zls_format;
|
|
value.zstoreformat = job->ds.zls_format;
|
|
|
|
zload_format = value.zloadformat;
|
|
}
|
|
|
|
if (job->has_depth_attachment) {
|
|
value.zloaden = job->ds.load.d;
|
|
value.zstoreen = job->ds.store.d;
|
|
}
|
|
|
|
if (job->has_stencil_attachment) {
|
|
value.sloaden = job->ds.load.s;
|
|
value.sstoreen = job->ds.store.s;
|
|
}
|
|
|
|
value.forcezload = value.zloaden || value.sloaden;
|
|
value.forcezstore = value.zstoreen || value.sstoreen;
|
|
value.zonlyrender = job->z_only_render;
|
|
}
|
|
stream_ptr += pvr_cmd_length(CR_ISP_ZLSCTL);
|
|
|
|
pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_ZLOAD_BASE, value) {
|
|
if (job->has_depth_attachment)
|
|
value.addr = job->ds.addr;
|
|
}
|
|
stream_ptr += pvr_cmd_length(CR_ISP_ZLOAD_BASE);
|
|
|
|
pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_STENCIL_LOAD_BASE, value) {
|
|
if (job->has_stencil_attachment) {
|
|
value.addr = job->ds.addr;
|
|
|
|
/* Enable separate stencil. This should be enabled iff the buffer set
|
|
* in CR_ISP_STENCIL_LOAD_BASE does not contain a depth component.
|
|
*/
|
|
assert(job->has_depth_attachment ||
|
|
!pvr_zls_format_type_is_packed(job->ds.zls_format));
|
|
value.enable = !job->has_depth_attachment;
|
|
}
|
|
}
|
|
stream_ptr += pvr_cmd_length(CR_ISP_STENCIL_LOAD_BASE);
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, requires_fb_cdc_zls_setup)) {
|
|
/* Currently no support for FBC, so just go ahead and set the default
|
|
* values.
|
|
*/
|
|
pvr_csb_pack ((uint64_t *)stream_ptr, CR_FB_CDC_ZLS, value) {
|
|
value.fbdc_depth_fmt = ROGUE_TEXSTATE_FORMAT_F32;
|
|
value.fbdc_stencil_fmt = ROGUE_TEXSTATE_FORMAT_U8;
|
|
}
|
|
stream_ptr += pvr_cmd_length(CR_FB_CDC_ZLS);
|
|
}
|
|
|
|
/* Make sure that the pvr_frag_km_...() function is returning the correct
|
|
* offset.
|
|
*/
|
|
assert((uint8_t *)stream_ptr - (uint8_t *)state->fw_stream ==
|
|
pvr_frag_km_stream_pbe_reg_words_offset(dev_info));
|
|
|
|
STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words) == PVR_MAX_COLOR_ATTACHMENTS);
|
|
STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words[0]) ==
|
|
ROGUE_NUM_PBESTATE_REG_WORDS);
|
|
STATIC_ASSERT(sizeof(job->pbe_reg_words[0][0]) == sizeof(uint64_t));
|
|
memcpy(stream_ptr, job->pbe_reg_words, sizeof(job->pbe_reg_words));
|
|
stream_ptr +=
|
|
PVR_MAX_COLOR_ATTACHMENTS * ROGUE_NUM_PBESTATE_REG_WORDS * DWORDS_PER_U64;
|
|
|
|
pvr_csb_pack ((uint64_t *)stream_ptr,
|
|
CR_TPU_BORDER_COLOUR_TABLE_PDM,
|
|
value) {
|
|
value.border_colour_table_address =
|
|
device->border_color_table.table->vma->dev_addr;
|
|
}
|
|
stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_PDM);
|
|
|
|
STATIC_ASSERT(ARRAY_SIZE(job->view_state.view[0].pds_bgnd_reg_values) ==
|
|
ROGUE_NUM_CR_PDS_BGRND_WORDS);
|
|
STATIC_ASSERT(sizeof(job->view_state.view[0].pds_bgnd_reg_values[0]) ==
|
|
sizeof(uint64_t));
|
|
|
|
if (job->view_state.force_pds_bgnd_reg_values_zero)
|
|
view_index = 0;
|
|
else
|
|
view_index = job->view_state.view_index;
|
|
|
|
memcpy(stream_ptr,
|
|
job->view_state.view[view_index].pds_bgnd_reg_values,
|
|
sizeof(job->view_state.view[view_index].pds_bgnd_reg_values));
|
|
stream_ptr += ROGUE_NUM_CR_PDS_BGRND_WORDS * DWORDS_PER_U64;
|
|
|
|
STATIC_ASSERT(ARRAY_SIZE(job->view_state.view[0].pr_pds_bgnd_reg_values) ==
|
|
ROGUE_NUM_CR_PDS_BGRND_WORDS);
|
|
STATIC_ASSERT(sizeof(job->view_state.view[0].pr_pds_bgnd_reg_values[0]) ==
|
|
sizeof(uint64_t));
|
|
memcpy(stream_ptr,
|
|
job->view_state.view[view_index].pr_pds_bgnd_reg_values,
|
|
sizeof(job->view_state.view[view_index].pr_pds_bgnd_reg_values));
|
|
stream_ptr += ROGUE_NUM_CR_PDS_BGRND_WORDS * DWORDS_PER_U64;
|
|
|
|
#undef DWORDS_PER_U64
|
|
|
|
memset(stream_ptr,
|
|
0,
|
|
ROGUE_KMD_STREAM_USC_CLEAR_REGISTER_COUNT *
|
|
PVR_DW_TO_BYTES(pvr_cmd_length(CR_USC_CLEAR_REGISTER)));
|
|
stream_ptr += ROGUE_KMD_STREAM_USC_CLEAR_REGISTER_COUNT *
|
|
pvr_cmd_length(CR_USC_CLEAR_REGISTER);
|
|
|
|
*stream_ptr = pixel_ctl;
|
|
stream_ptr += pvr_cmd_length(CR_USC_PIXEL_OUTPUT_CTRL);
|
|
|
|
pvr_csb_pack (stream_ptr, CR_ISP_BGOBJDEPTH, value) {
|
|
const float depth_clear = job->ds_clear_value.depth;
|
|
|
|
/* This is valid even when we don't have a depth attachment because:
|
|
* - zload_format is set to a sensible default above, and
|
|
* - job->depth_clear_value is set to a sensible default in that case.
|
|
*/
|
|
switch (zload_format) {
|
|
case ROGUE_CR_ZLS_FORMAT_TYPE_F32Z:
|
|
case ROGUE_CR_ZLS_FORMAT_TYPE_F64Z:
|
|
value.value = fui(depth_clear);
|
|
break;
|
|
|
|
case ROGUE_CR_ZLS_FORMAT_TYPE_16BITINT:
|
|
value.value = _mesa_float_to_unorm(depth_clear, 16);
|
|
break;
|
|
|
|
case ROGUE_CR_ZLS_FORMAT_TYPE_24BITINT:
|
|
value.value = _mesa_float_to_unorm(depth_clear, 24);
|
|
break;
|
|
|
|
default:
|
|
UNREACHABLE("Unsupported depth format");
|
|
}
|
|
}
|
|
stream_ptr += pvr_cmd_length(CR_ISP_BGOBJDEPTH);
|
|
|
|
pvr_csb_pack (stream_ptr, CR_ISP_BGOBJVALS, value) {
|
|
value.enablebgtag = job->enable_bg_tag;
|
|
|
|
value.mask = true;
|
|
|
|
value.stencil = job->ds_clear_value.stencil & 0xFF;
|
|
}
|
|
stream_ptr += pvr_cmd_length(CR_ISP_BGOBJVALS);
|
|
|
|
pvr_csb_pack (stream_ptr, CR_ISP_AA, value) {
|
|
value.mode = isp_aa_mode;
|
|
}
|
|
stream_ptr += pvr_cmd_length(CR_ISP_AA);
|
|
|
|
pvr_rt_mtile_info_init(dev_info,
|
|
&tiling_info,
|
|
rt_dataset->width,
|
|
rt_dataset->height,
|
|
rt_dataset->samples);
|
|
pvr_csb_pack (stream_ptr, CR_ISP_CTL, value) {
|
|
value.sample_pos = true;
|
|
value.process_empty_tiles = job->process_empty_tiles;
|
|
|
|
/* For integer depth formats we'll convert the specified floating point
|
|
* depth bias values and specify them as integers. In this mode a depth
|
|
* bias factor of 1.0 equates to 1 ULP of increase to the depth value.
|
|
*/
|
|
value.dbias_is_int = PVR_HAS_ERN(dev_info, 42307) &&
|
|
pvr_zls_format_type_is_int(job->ds.zls_format);
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
|
|
value.skip_init_hdrs = true;
|
|
|
|
if (PVR_HAS_QUIRK(dev_info, 72168)) {
|
|
/* x_tile_max is the 0-based index of the last 16x16 tile in a row.
|
|
* We cannot set SKIP_INIT_HDRS if the number of tile group rows is
|
|
* odd AND the width is <= 2 tiles.
|
|
*/
|
|
if (((((tiling_info.y_tile_max + 1) / 2) & 0x1) == 1) &&
|
|
(tiling_info.x_tile_max <= 1)) {
|
|
value.skip_init_hdrs = false;
|
|
}
|
|
}
|
|
|
|
if (PVR_HAS_QUIRK(dev_info, 72463)) {
|
|
/* We cannot set SKIP_INIT_HDRS if the number of tile group rows is
|
|
* <= 2 AND the width is <= 2 tiles.
|
|
*/
|
|
if ((((tiling_info.y_tile_max + 1) / 2) <= 2) &&
|
|
(tiling_info.x_tile_max <= 1)) {
|
|
value.skip_init_hdrs = false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
/* FIXME: When pvr_setup_tiles_in_flight() is refactored it might be
|
|
* possible to fully pack CR_ISP_CTL above rather than having to OR in part
|
|
* of the value.
|
|
*/
|
|
*stream_ptr |= isp_ctl;
|
|
stream_ptr += pvr_cmd_length(CR_ISP_CTL);
|
|
|
|
pvr_csb_pack (stream_ptr, CR_EVENT_PIXEL_PDS_INFO, value) {
|
|
value.const_size =
|
|
DIV_ROUND_UP(ctx->device->pixel_event_data_size_in_dwords,
|
|
ROGUE_CR_EVENT_PIXEL_PDS_INFO_CONST_SIZE_UNIT_SIZE);
|
|
value.temp_stride = 0;
|
|
value.usc_sr_size =
|
|
DIV_ROUND_UP(PVR_STATE_PBE_DWORDS,
|
|
ROGUE_CR_EVENT_PIXEL_PDS_INFO_USC_SR_SIZE_UNIT_SIZE);
|
|
}
|
|
stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO);
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) {
|
|
pvr_csb_pack (stream_ptr, KMD_STREAM_PIXEL_PHANTOM, value) {
|
|
/* Each phantom has its own MCU, so atomicity can only be guaranteed
|
|
* when all work items are processed on the same phantom. This means
|
|
* we need to disable all USCs other than those of the first
|
|
* phantom, which has 4 clusters. Note that we only need to do this
|
|
* for atomic operations in fragment shaders, since hardware
|
|
* prevents the TA to run on more than one phantom anyway.
|
|
*/
|
|
/* Note that leaving all phantoms disabled (as csbgen will do by
|
|
* default since it will zero out things) will set them to their
|
|
* default state (i.e. enabled) instead of disabling them.
|
|
*/
|
|
if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
|
|
dev_runtime_info->num_phantoms > 1 && job->frag_uses_atomic_ops) {
|
|
value.phantom_0 = ROGUE_KMD_STREAM_PIXEL_PHANTOM_STATE_ENABLED;
|
|
}
|
|
}
|
|
stream_ptr += pvr_cmd_length(KMD_STREAM_PIXEL_PHANTOM);
|
|
}
|
|
|
|
pvr_csb_pack (stream_ptr, KMD_STREAM_VIEW_IDX, value) {
|
|
value.idx = job->view_state.view_index;
|
|
}
|
|
stream_ptr += pvr_cmd_length(KMD_STREAM_VIEW_IDX);
|
|
|
|
/* Make sure that the pvr_frag_km_...() function is returning the correct
|
|
* offset.
|
|
*/
|
|
assert((uint8_t *)stream_ptr - (uint8_t *)state->fw_stream ==
|
|
pvr_frag_km_stream_pds_eot_data_addr_offset(dev_info));
|
|
|
|
if (job->view_state.force_pds_pixel_event_data_offset_zero)
|
|
view_index = 0;
|
|
else
|
|
view_index = job->view_state.view_index;
|
|
|
|
pvr_csb_pack (stream_ptr, CR_EVENT_PIXEL_PDS_DATA, value) {
|
|
value.addr = PVR_DEV_ADDR(
|
|
job->view_state.view[view_index].pds_pixel_event_data_offset);
|
|
}
|
|
stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA);
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
|
|
if (device->pdevice->dev_runtime_info.core_count > 1)
|
|
pvr_finishme("Emit isp_oclqry_stride, core_count is greater than one");
|
|
*stream_ptr = 0;
|
|
stream_ptr++;
|
|
}
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, zls_subtile)) {
|
|
pvr_csb_pack (stream_ptr, CR_ISP_ZLS_PIXELS, value) {
|
|
if (job->has_depth_attachment) {
|
|
if (job->ds.has_alignment_transfers) {
|
|
value.x = job->ds.physical_extent.width - 1;
|
|
value.y = job->ds.physical_extent.height - 1;
|
|
} else {
|
|
value.x = job->ds.stride - 1;
|
|
value.y = job->ds.height - 1;
|
|
}
|
|
}
|
|
}
|
|
stream_ptr += pvr_cmd_length(CR_ISP_ZLS_PIXELS);
|
|
}
|
|
|
|
/* zls_stride */
|
|
*stream_ptr = job->has_depth_attachment ? job->ds.layer_size : 0;
|
|
stream_ptr++;
|
|
|
|
/* sls_stride */
|
|
*stream_ptr = job->has_stencil_attachment ? job->ds.layer_size : 0;
|
|
stream_ptr++;
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
|
|
if (device->pdevice->dev_runtime_info.core_count > 1)
|
|
pvr_finishme("Emit execute_count core_count is greater than one");
|
|
*stream_ptr = 0;
|
|
stream_ptr++;
|
|
}
|
|
|
|
state->fw_stream_len = (uint8_t *)stream_ptr - (uint8_t *)state->fw_stream;
|
|
assert(state->fw_stream_len <= ARRAY_SIZE(state->fw_stream));
|
|
|
|
pvr_csb_pack ((uint64_t *)stream_len_ptr, KMD_STREAM_HDR, value) {
|
|
value.length = state->fw_stream_len;
|
|
}
|
|
}
|
|
|
|
#undef DWORDS_PER_U64
|
|
|
|
static void
|
|
pvr_frag_state_stream_ext_init(struct pvr_render_ctx *ctx,
|
|
struct pvr_render_job *job,
|
|
struct pvr_winsys_fragment_state *state)
|
|
{
|
|
const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
|
|
|
|
uint32_t main_stream_len =
|
|
pvr_csb_unpack((uint64_t *)state->fw_stream, KMD_STREAM_HDR).length;
|
|
uint32_t *ext_stream_ptr =
|
|
(uint32_t *)((uint8_t *)state->fw_stream + main_stream_len);
|
|
uint32_t *header0_ptr;
|
|
|
|
header0_ptr = ext_stream_ptr;
|
|
ext_stream_ptr += pvr_cmd_length(KMD_STREAM_EXTHDR_FRAG0);
|
|
|
|
pvr_csb_pack (header0_ptr, KMD_STREAM_EXTHDR_FRAG0, header0) {
|
|
if (PVR_HAS_QUIRK(dev_info, 49927)) {
|
|
header0.has_brn49927 = true;
|
|
|
|
/* The set up of CR_TPU must be identical to
|
|
* pvr_render_job_ws_geometry_state_stream_ext_init().
|
|
*/
|
|
pvr_csb_pack (ext_stream_ptr, CR_TPU, value) {
|
|
value.tag_cem_4k_face_packing = true;
|
|
}
|
|
ext_stream_ptr += pvr_cmd_length(CR_TPU);
|
|
}
|
|
}
|
|
|
|
if ((*header0_ptr & ROGUE_KMD_STREAM_EXTHDR_DATA_MASK) != 0) {
|
|
state->fw_stream_len =
|
|
(uint8_t *)ext_stream_ptr - (uint8_t *)state->fw_stream;
|
|
assert(state->fw_stream_len <= ARRAY_SIZE(state->fw_stream));
|
|
}
|
|
}
|
|
|
|
static void
|
|
pvr_frag_state_flags_init(const struct pvr_render_job *const job,
|
|
struct pvr_winsys_fragment_state_flags *flags)
|
|
{
|
|
*flags = (struct pvr_winsys_fragment_state_flags){
|
|
.has_depth_buffer = job->has_depth_attachment,
|
|
.has_stencil_buffer = job->has_stencil_attachment,
|
|
.prevent_cdm_overlap = job->disable_compute_overlap,
|
|
.use_single_core = job->frag_uses_atomic_ops,
|
|
.get_vis_results = job->get_vis_results,
|
|
.has_spm_scratch_buffer = job->requires_spm_scratch_buffer,
|
|
.disable_pixel_merging = job->disable_pixel_merging,
|
|
};
|
|
}
|
|
|
|
static void
|
|
pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
|
|
struct pvr_render_job *job,
|
|
struct vk_sync *wait,
|
|
struct pvr_winsys_fragment_state *state)
|
|
{
|
|
pvr_frag_state_stream_init(ctx, job, state);
|
|
pvr_frag_state_stream_ext_init(ctx, job, state);
|
|
|
|
state->wait = wait;
|
|
pvr_frag_state_flags_init(job, &state->flags);
|
|
}
|
|
|
|
/**
|
|
* \brief Sets up the fragment state for a Partial Render (PR) based on the
|
|
* state for a normal fragment job.
|
|
*
|
|
* The state of a fragment PR is almost the same as of that for a normal
|
|
* fragment job apart the PBE words and the EOT program, both of which are
|
|
* necessary for the render to use the SPM scratch buffer instead of the final
|
|
* render targets.
|
|
*
|
|
* By basing the fragment PR state on that of a normal fragment state,
|
|
* repacking of the same words can be avoided as we end up mostly doing copies
|
|
* instead.
|
|
*/
|
|
static void pvr_render_job_ws_fragment_pr_init_based_on_fragment_state(
|
|
const struct pvr_render_ctx *ctx,
|
|
struct pvr_render_job *job,
|
|
struct vk_sync *wait,
|
|
struct pvr_winsys_fragment_state *frag,
|
|
struct pvr_winsys_fragment_state *state)
|
|
{
|
|
const struct pvr_device_info *const dev_info =
|
|
&ctx->device->pdevice->dev_info;
|
|
const uint32_t pbe_reg_byte_offset =
|
|
pvr_frag_km_stream_pbe_reg_words_offset(dev_info);
|
|
const uint32_t eot_data_addr_byte_offset =
|
|
pvr_frag_km_stream_pds_eot_data_addr_offset(dev_info);
|
|
const uint32_t view_index =
|
|
job->view_state.force_pds_pixel_event_data_offset_zero
|
|
? 0
|
|
: job->view_state.view_index;
|
|
const uint32_t pr_pds_pixel_event_data_offset =
|
|
job->view_state.use_pds_pixel_event_data_offset
|
|
? job->view_state.view[view_index].pds_pixel_event_data_offset
|
|
: job->pr_pds_pixel_event_data_offset;
|
|
|
|
/* Massive copy :( */
|
|
*state = *frag;
|
|
|
|
assert(state->fw_stream_len >=
|
|
pbe_reg_byte_offset + sizeof(job->pr_pbe_reg_words));
|
|
memcpy(&state->fw_stream[pbe_reg_byte_offset],
|
|
job->pr_pbe_reg_words,
|
|
sizeof(job->pr_pbe_reg_words));
|
|
|
|
/* TODO: Update this when csbgen is byte instead of dword granular. */
|
|
assert(state->fw_stream_len >=
|
|
eot_data_addr_byte_offset +
|
|
PVR_DW_TO_BYTES(pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA)));
|
|
|
|
pvr_csb_pack ((uint32_t *)&state->fw_stream[eot_data_addr_byte_offset],
|
|
CR_EVENT_PIXEL_PDS_DATA,
|
|
eot_pds_data) {
|
|
eot_pds_data.addr = PVR_DEV_ADDR(pr_pds_pixel_event_data_offset);
|
|
}
|
|
}
|
|
|
|
static void pvr_render_job_ws_submit_info_init(
|
|
struct pvr_render_ctx *ctx,
|
|
struct pvr_render_job *job,
|
|
struct vk_sync *wait_geom,
|
|
struct vk_sync *wait_frag,
|
|
struct pvr_winsys_render_submit_info *submit_info)
|
|
{
|
|
memset(submit_info, 0, sizeof(*submit_info));
|
|
|
|
submit_info->rt_dataset =
|
|
job->view_state.rt_datasets[job->view_state.view_index]->ws_rt_dataset;
|
|
submit_info->rt_data_idx =
|
|
job->view_state.rt_datasets[job->view_state.view_index]->rt_data_idx;
|
|
|
|
submit_info->frame_num = ctx->device->global_queue_present_count;
|
|
submit_info->job_num = ctx->device->global_cmd_buffer_submit_count;
|
|
|
|
pvr_render_job_ws_geometry_state_init(ctx,
|
|
job,
|
|
wait_geom,
|
|
&submit_info->geometry);
|
|
|
|
submit_info->has_fragment_job = job->run_frag;
|
|
|
|
/* TODO: Move the job setup from queue submit into cmd_buffer if possible. */
|
|
|
|
/* TODO: See if it's worth avoiding setting up the fragment state and setup
|
|
* the pr state directly if `!job->run_frag`. For now we'll always set it up.
|
|
*/
|
|
pvr_render_job_ws_fragment_state_init(ctx,
|
|
job,
|
|
wait_frag,
|
|
&submit_info->fragment);
|
|
|
|
/* TODO: In some cases we could eliminate the pr and use the frag directly in
|
|
* case we enter SPM. There's likely some performance improvement to be had
|
|
* there. For now we'll always setup the pr.
|
|
*/
|
|
pvr_render_job_ws_fragment_pr_init_based_on_fragment_state(
|
|
ctx,
|
|
job,
|
|
wait_frag,
|
|
&submit_info->fragment,
|
|
&submit_info->fragment_pr);
|
|
}
|
|
|
|
VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx,
|
|
struct pvr_render_job *job,
|
|
struct vk_sync *wait_geom,
|
|
struct vk_sync *wait_frag,
|
|
struct vk_sync *signal_sync_geom,
|
|
struct vk_sync *signal_sync_frag)
|
|
{
|
|
struct pvr_rt_dataset *rt_dataset =
|
|
job->view_state.rt_datasets[job->view_state.view_index];
|
|
struct pvr_winsys_render_submit_info submit_info;
|
|
struct pvr_device *device = ctx->device;
|
|
VkResult result;
|
|
|
|
pvr_render_job_ws_submit_info_init(ctx,
|
|
job,
|
|
wait_geom,
|
|
wait_frag,
|
|
&submit_info);
|
|
|
|
if (PVR_IS_DEBUG_SET(DUMP_CONTROL_STREAM)) {
|
|
/* FIXME: This isn't an ideal method of accessing the information we
|
|
* need, but it's considered good enough for a debug code path. It can be
|
|
* streamlined and made more correct if/when pvr_render_job becomes a
|
|
* subclass of pvr_sub_cmd.
|
|
*/
|
|
const struct pvr_sub_cmd *sub_cmd =
|
|
container_of(job, const struct pvr_sub_cmd, gfx.job);
|
|
|
|
pvr_csb_dump(&sub_cmd->gfx.control_stream,
|
|
submit_info.frame_num,
|
|
submit_info.job_num);
|
|
}
|
|
|
|
result = device->ws->ops->render_submit(ctx->ws_ctx,
|
|
&submit_info,
|
|
&device->pdevice->dev_info,
|
|
signal_sync_geom,
|
|
signal_sync_frag);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
if (job->run_frag) {
|
|
/* Move to the next render target data now that a fragment job has been
|
|
* successfully submitted. This will allow the next geometry job to be
|
|
* submitted to been run in parallel with it.
|
|
*/
|
|
rt_dataset->rt_data_idx =
|
|
(rt_dataset->rt_data_idx + 1) % ARRAY_SIZE(rt_dataset->rt_datas);
|
|
|
|
rt_dataset->need_frag = false;
|
|
} else {
|
|
rt_dataset->need_frag = true;
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|