mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-20 15:38:19 +02:00
Signed-off-by: Simon Perretta <simon.perretta@imgtec.com> Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37439>
931 lines
34 KiB
C
931 lines
34 KiB
C
/*
|
|
* Copyright © 2022 Imagination Technologies Ltd.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#include <stdint.h>
|
|
#include <vulkan/vulkan_core.h>
|
|
|
|
#include "hwdef/rogue_hw_utils.h"
|
|
#include "pco/pco_data.h"
|
|
#include "pco_uscgen_programs.h"
|
|
#include "pvr_clear.h"
|
|
#include "pvr_pds.h"
|
|
#include "pvr_private.h"
|
|
#include "pvr_usc.h"
|
|
#include "pvr_types.h"
|
|
#include "vk_alloc.h"
|
|
#include "vk_log.h"
|
|
|
|
static void pvr_device_setup_graphics_static_clear_ppp_base(
|
|
struct pvr_static_clear_ppp_base *const base)
|
|
{
|
|
pvr_csb_pack (&base->wclamp, TA_WCLAMP, wclamp) {
|
|
wclamp.val = fui(0.00001f);
|
|
}
|
|
|
|
/* clang-format off */
|
|
pvr_csb_pack (&base->varying_word[0], TA_STATE_VARYING0, varying0);
|
|
pvr_csb_pack (&base->varying_word[1], TA_STATE_VARYING1, varying1);
|
|
pvr_csb_pack (&base->varying_word[2], TA_STATE_VARYING2, varying2);
|
|
/* clang-format on */
|
|
|
|
pvr_csb_pack (&base->ppp_ctrl, TA_STATE_PPP_CTRL, ppp_ctrl) {
|
|
ppp_ctrl.pretransform = true;
|
|
ppp_ctrl.cullmode = ROGUE_TA_CULLMODE_NO_CULLING;
|
|
}
|
|
|
|
/* clang-format off */
|
|
pvr_csb_pack (&base->stream_out0, TA_STATE_STREAM_OUT0, stream_out0);
|
|
/* clang-format on */
|
|
}
|
|
|
|
static void pvr_device_setup_graphics_static_clear_ppp_templates(
|
|
struct pvr_static_clear_ppp_template
|
|
templates[static PVR_STATIC_CLEAR_VARIANT_COUNT])
|
|
{
|
|
for (uint32_t i = 0; i < PVR_STATIC_CLEAR_VARIANT_COUNT; i++) {
|
|
const bool has_color = !!(i & VK_IMAGE_ASPECT_COLOR_BIT);
|
|
const bool has_depth = !!(i & VK_IMAGE_ASPECT_DEPTH_BIT);
|
|
const bool has_stencil = !!(i & VK_IMAGE_ASPECT_STENCIL_BIT);
|
|
|
|
struct pvr_static_clear_ppp_template *const template = &templates[i];
|
|
|
|
template->requires_pds_state = has_color;
|
|
|
|
pvr_csb_pack (&template->header, TA_STATE_HEADER, header) {
|
|
header.pres_stream_out_size = true;
|
|
header.pres_ppp_ctrl = true;
|
|
header.pres_varying_word2 = true;
|
|
header.pres_varying_word1 = true;
|
|
header.pres_varying_word0 = true;
|
|
header.pres_outselects = true;
|
|
header.pres_wclamp = true;
|
|
header.pres_region_clip = true;
|
|
header.pres_pds_state_ptr2 = template->requires_pds_state;
|
|
header.pres_pds_state_ptr1 = template->requires_pds_state;
|
|
header.pres_pds_state_ptr0 = template->requires_pds_state;
|
|
header.pres_ispctl_fb = true;
|
|
header.pres_ispctl_fa = true;
|
|
header.pres_ispctl = true;
|
|
}
|
|
|
|
#define CS_HEADER(cs) \
|
|
(struct ROGUE_##cs) \
|
|
{ \
|
|
pvr_cmd_header(cs) \
|
|
}
|
|
|
|
template->config.ispctl = CS_HEADER(TA_STATE_ISPCTL);
|
|
template->config.ispctl.tagwritedisable = !has_color;
|
|
template->config.ispctl.bpres = true;
|
|
|
|
template->config.ispa = CS_HEADER(TA_STATE_ISPA);
|
|
template->config.ispa.objtype = ROGUE_TA_OBJTYPE_TRIANGLE;
|
|
template->config.ispa.passtype = ROGUE_TA_PASSTYPE_TRANSLUCENT;
|
|
template->config.ispa.dwritedisable = !has_depth;
|
|
template->config.ispa.dcmpmode = (i == 0) ? ROGUE_TA_CMPMODE_NEVER
|
|
: ROGUE_TA_CMPMODE_ALWAYS;
|
|
template->config.ispa.sref =
|
|
has_stencil ? ROGUE_TA_STATE_ISPA_SREF_SIZE_MAX : 0;
|
|
|
|
pvr_csb_pack (&template->ispb, TA_STATE_ISPB, ispb) {
|
|
ispb.scmpmode = ROGUE_TA_CMPMODE_ALWAYS;
|
|
ispb.sop1 = ROGUE_TA_ISPB_STENCILOP_KEEP;
|
|
ispb.sop2 = ROGUE_TA_ISPB_STENCILOP_KEEP;
|
|
|
|
ispb.sop3 = has_stencil ? ROGUE_TA_ISPB_STENCILOP_REPLACE
|
|
: ROGUE_TA_ISPB_STENCILOP_KEEP;
|
|
|
|
ispb.swmask = has_stencil ? 0xFF : 0;
|
|
}
|
|
|
|
template->config.pds_state = NULL;
|
|
|
|
template->config.region_clip0 = CS_HEADER(TA_REGION_CLIP0);
|
|
template->config.region_clip0.mode = ROGUE_TA_REGION_CLIP_MODE_OUTSIDE;
|
|
template->config.region_clip0.left = 0;
|
|
template->config.region_clip0.right = ROGUE_TA_REGION_CLIP_MAX;
|
|
|
|
template->config.region_clip1 = CS_HEADER(TA_REGION_CLIP1);
|
|
template->config.region_clip1.top = 0;
|
|
template->config.region_clip1.bottom = ROGUE_TA_REGION_CLIP_MAX;
|
|
|
|
template->config.output_sel = CS_HEADER(TA_OUTPUT_SEL);
|
|
template->config.output_sel.vtxsize = 4;
|
|
template->config.output_sel.rhw_pres = true;
|
|
|
|
#undef CS_HEADER
|
|
}
|
|
}
|
|
|
|
/**
|
|
* \brief Emit geom state from a configurable template.
|
|
*
|
|
* Note that the state is emitted by joining the template with a base so the
|
|
* base must have been setup before calling this.
|
|
*
|
|
* \param[in] csb Control stream to emit to.
|
|
* \param[in] template The configured template.
|
|
* \param[out] pvr_bo_out Uploaded state's pvr_bo object.
|
|
*
|
|
* \return VK_SUCCESS if the state was successfully uploaded.
|
|
*/
|
|
VkResult pvr_emit_ppp_from_template(
|
|
struct pvr_csb *const csb,
|
|
const struct pvr_static_clear_ppp_template *const template,
|
|
struct pvr_suballoc_bo **const pvr_bo_out)
|
|
{
|
|
const uint32_t dword_count =
|
|
pvr_cmd_length(TA_STATE_HEADER) + pvr_cmd_length(TA_STATE_ISPCTL) +
|
|
pvr_cmd_length(TA_STATE_ISPA) + pvr_cmd_length(TA_STATE_ISPB) +
|
|
(template->requires_pds_state ? PVR_STATIC_CLEAR_PDS_STATE_COUNT : 0) +
|
|
pvr_cmd_length(TA_REGION_CLIP0) + pvr_cmd_length(TA_REGION_CLIP1) +
|
|
pvr_cmd_length(TA_WCLAMP) + pvr_cmd_length(TA_OUTPUT_SEL) +
|
|
pvr_cmd_length(TA_STATE_VARYING0) + pvr_cmd_length(TA_STATE_VARYING1) +
|
|
pvr_cmd_length(TA_STATE_VARYING2) + pvr_cmd_length(TA_STATE_PPP_CTRL) +
|
|
pvr_cmd_length(TA_STATE_STREAM_OUT0);
|
|
|
|
struct pvr_device *const device = csb->device;
|
|
const uint32_t cache_line_size =
|
|
rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
|
|
const struct pvr_static_clear_ppp_base *const base =
|
|
&device->static_clear_state.ppp_base;
|
|
struct pvr_suballoc_bo *pvr_bo;
|
|
uint32_t *stream;
|
|
VkResult result;
|
|
|
|
result = pvr_bo_suballoc(&device->suballoc_general,
|
|
PVR_DW_TO_BYTES(dword_count),
|
|
cache_line_size,
|
|
false,
|
|
&pvr_bo);
|
|
if (result != VK_SUCCESS) {
|
|
*pvr_bo_out = NULL;
|
|
return result;
|
|
}
|
|
|
|
stream = (uint32_t *)pvr_bo_suballoc_get_map_addr(pvr_bo);
|
|
|
|
pvr_csb_write_value(stream, TA_STATE_HEADER, template->header);
|
|
pvr_csb_write_struct(stream, TA_STATE_ISPCTL, &template->config.ispctl);
|
|
pvr_csb_write_struct(stream, TA_STATE_ISPA, &template->config.ispa);
|
|
pvr_csb_write_value(stream, TA_STATE_ISPB, template->ispb);
|
|
|
|
if (template->requires_pds_state) {
|
|
static_assert(sizeof(*stream) == sizeof((*template->config.pds_state)[0]),
|
|
"Size mismatch");
|
|
for (uint32_t i = 0; i < PVR_STATIC_CLEAR_PDS_STATE_COUNT; i++)
|
|
*stream++ = (*template->config.pds_state)[i];
|
|
}
|
|
|
|
pvr_csb_write_struct(stream,
|
|
TA_REGION_CLIP0,
|
|
&template->config.region_clip0);
|
|
pvr_csb_write_struct(stream,
|
|
TA_REGION_CLIP1,
|
|
&template->config.region_clip1);
|
|
pvr_csb_write_value(stream, TA_WCLAMP, base->wclamp);
|
|
pvr_csb_write_struct(stream, TA_OUTPUT_SEL, &template->config.output_sel);
|
|
pvr_csb_write_value(stream, TA_STATE_VARYING0, base->varying_word[0]);
|
|
pvr_csb_write_value(stream, TA_STATE_VARYING1, base->varying_word[1]);
|
|
pvr_csb_write_value(stream, TA_STATE_VARYING2, base->varying_word[2]);
|
|
pvr_csb_write_value(stream, TA_STATE_PPP_CTRL, base->ppp_ctrl);
|
|
pvr_csb_write_value(stream, TA_STATE_STREAM_OUT0, base->stream_out0);
|
|
|
|
assert((uint64_t)(stream - (uint32_t *)pvr_bo_suballoc_get_map_addr(
|
|
pvr_bo)) == dword_count);
|
|
|
|
stream = NULL;
|
|
|
|
pvr_csb_set_relocation_mark(csb);
|
|
|
|
pvr_csb_emit (csb, VDMCTRL_PPP_STATE0, state) {
|
|
state.word_count = dword_count;
|
|
state.addrmsb = pvr_bo->dev_addr;
|
|
}
|
|
|
|
pvr_csb_emit (csb, VDMCTRL_PPP_STATE1, state) {
|
|
state.addrlsb = pvr_bo->dev_addr;
|
|
}
|
|
|
|
pvr_csb_clear_relocation_mark(csb);
|
|
|
|
*pvr_bo_out = pvr_bo;
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
pvr_device_init_clear_attachment_programs(struct pvr_device *device)
|
|
{
|
|
const uint32_t pds_prog_alignment =
|
|
MAX2(ROGUE_TA_STATE_PDS_TEXUNICODEBASE_ADDR_ALIGNMENT,
|
|
ROGUE_TA_STATE_PDS_SHADERBASE_ADDR_ALIGNMENT);
|
|
struct pvr_device_static_clear_state *clear_state =
|
|
&device->static_clear_state;
|
|
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
|
uint32_t pds_texture_program_offsets[PVR_NUM_CLEAR_ATTACH_SHADERS];
|
|
uint32_t pds_pixel_program_offsets[PVR_NUM_CLEAR_ATTACH_SHADERS];
|
|
uint32_t usc_program_offsets[PVR_NUM_CLEAR_ATTACH_SHADERS];
|
|
pco_shader *shaders[PVR_NUM_CLEAR_ATTACH_SHADERS];
|
|
struct pvr_clear_attach_props props;
|
|
uint64_t usc_upload_offset;
|
|
uint64_t pds_upload_offset;
|
|
uint32_t alloc_size = 0;
|
|
VkResult result;
|
|
uint8_t *ptr;
|
|
|
|
/* Build and upload USC fragment shaders. */
|
|
for (unsigned dword_count = 1; dword_count <= 4; ++dword_count) {
|
|
for (unsigned offset = 0; offset <= 3; ++offset) {
|
|
for (unsigned uses_tile_buffer = 0; uses_tile_buffer <= 1;
|
|
++uses_tile_buffer) {
|
|
if (dword_count + offset > 4)
|
|
continue;
|
|
|
|
props.dword_count = dword_count;
|
|
props.offset = offset;
|
|
props.uses_tile_buffer = uses_tile_buffer;
|
|
|
|
unsigned u = pvr_uscgen_clear_attach_index(&props);
|
|
shaders[u] =
|
|
pvr_uscgen_clear_attach(device->pdevice->pco_ctx, &props);
|
|
alloc_size += pco_shader_binary_size(shaders[u]);
|
|
}
|
|
}
|
|
}
|
|
|
|
result = pvr_bo_suballoc(&device->suballoc_usc,
|
|
alloc_size,
|
|
4,
|
|
false,
|
|
&clear_state->usc_clear_attachment_programs);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
usc_upload_offset =
|
|
clear_state->usc_clear_attachment_programs->dev_addr.addr -
|
|
device->heaps.usc_heap->base_addr.addr;
|
|
|
|
ptr = (uint8_t *)pvr_bo_suballoc_get_map_addr(
|
|
clear_state->usc_clear_attachment_programs);
|
|
|
|
unsigned offset = 0;
|
|
for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u) {
|
|
unsigned shader_size = pco_shader_binary_size(shaders[u]);
|
|
|
|
usc_program_offsets[u] = offset;
|
|
memcpy(&ptr[offset], pco_shader_binary_data(shaders[u]), shader_size);
|
|
|
|
offset += shader_size;
|
|
}
|
|
|
|
/* Upload PDS programs. */
|
|
|
|
alloc_size = 0;
|
|
|
|
for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u) {
|
|
struct pvr_pds_pixel_shader_sa_program texture_pds_program;
|
|
struct pvr_pds_kickusc_program pixel_shader_pds_program;
|
|
uint32_t program_size;
|
|
|
|
/* Texture program to load colors. */
|
|
texture_pds_program = (struct pvr_pds_pixel_shader_sa_program){
|
|
.num_texture_dma_kicks = 1,
|
|
};
|
|
|
|
pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&texture_pds_program);
|
|
|
|
pds_texture_program_offsets[u] = alloc_size;
|
|
alloc_size += ALIGN_POT(PVR_DW_TO_BYTES(texture_pds_program.code_size),
|
|
pds_prog_alignment);
|
|
|
|
/* Pixel program to load fragment shader. */
|
|
pixel_shader_pds_program = (struct pvr_pds_kickusc_program){ 0 };
|
|
|
|
pvr_pds_setup_doutu(&pixel_shader_pds_program.usc_task_control,
|
|
usc_upload_offset + usc_program_offsets[u],
|
|
pco_shader_data(shaders[u])->common.temps,
|
|
ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
|
|
false);
|
|
|
|
pvr_pds_set_sizes_pixel_shader(&pixel_shader_pds_program);
|
|
|
|
program_size = pixel_shader_pds_program.code_size +
|
|
pixel_shader_pds_program.data_size;
|
|
program_size = PVR_DW_TO_BYTES(program_size);
|
|
|
|
pds_pixel_program_offsets[u] = alloc_size;
|
|
alloc_size += ALIGN_POT(program_size, pds_prog_alignment);
|
|
}
|
|
|
|
result = pvr_bo_suballoc(&device->suballoc_pds,
|
|
alloc_size,
|
|
pds_prog_alignment,
|
|
false,
|
|
&clear_state->pds_clear_attachment_programs);
|
|
if (result != VK_SUCCESS) {
|
|
pvr_bo_suballoc_free(clear_state->usc_clear_attachment_programs);
|
|
return result;
|
|
}
|
|
|
|
pds_upload_offset =
|
|
clear_state->pds_clear_attachment_programs->dev_addr.addr -
|
|
device->heaps.pds_heap->base_addr.addr;
|
|
|
|
ptr =
|
|
pvr_bo_suballoc_get_map_addr(clear_state->pds_clear_attachment_programs);
|
|
|
|
for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u) {
|
|
struct pvr_pds_pixel_shader_sa_program texture_pds_program;
|
|
struct pvr_pds_kickusc_program pixel_shader_pds_program;
|
|
|
|
/* Texture program to load colors. */
|
|
texture_pds_program = (struct pvr_pds_pixel_shader_sa_program){
|
|
.num_texture_dma_kicks = 1,
|
|
};
|
|
|
|
pvr_pds_generate_pixel_shader_sa_code_segment(
|
|
&texture_pds_program,
|
|
(uint32_t *)(ptr + pds_texture_program_offsets[u]));
|
|
|
|
/* Pixel program to load fragment shader. */
|
|
pixel_shader_pds_program = (struct pvr_pds_kickusc_program){ 0 };
|
|
|
|
pvr_pds_setup_doutu(&pixel_shader_pds_program.usc_task_control,
|
|
usc_upload_offset + usc_program_offsets[u],
|
|
pco_shader_data(shaders[u])->common.temps,
|
|
ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
|
|
false);
|
|
|
|
pvr_pds_generate_pixel_shader_program(
|
|
&pixel_shader_pds_program,
|
|
(uint32_t *)(ptr + pds_pixel_program_offsets[u]));
|
|
|
|
/* Setup the PDS program info. */
|
|
pvr_pds_set_sizes_pixel_shader_sa_texture_data(&texture_pds_program,
|
|
dev_info);
|
|
|
|
clear_state->pds_clear_attachment_program_info[u] =
|
|
(struct pvr_pds_clear_attachment_program_info){
|
|
.texture_program_offset =
|
|
PVR_DEV_ADDR(pds_upload_offset + pds_texture_program_offsets[u]),
|
|
.pixel_program_offset =
|
|
PVR_DEV_ADDR(pds_upload_offset + pds_pixel_program_offsets[u]),
|
|
|
|
.texture_program_pds_temps_count = texture_pds_program.temps_used,
|
|
.texture_program_data_size = texture_pds_program.data_size,
|
|
};
|
|
}
|
|
|
|
for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u)
|
|
ralloc_free(shaders[u]);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static void
|
|
pvr_device_finish_clear_attachment_programs(struct pvr_device *device)
|
|
{
|
|
struct pvr_device_static_clear_state *clear_state =
|
|
&device->static_clear_state;
|
|
|
|
pvr_bo_suballoc_free(clear_state->usc_clear_attachment_programs);
|
|
pvr_bo_suballoc_free(clear_state->pds_clear_attachment_programs);
|
|
}
|
|
|
|
/**
|
|
* \brief Generate and uploads vertices required to clear the rect area.
|
|
*
|
|
* We use the triangle strip topology for clears so this functions generates 4
|
|
* vertices to represent the rect. Note that the coordinates are in screen space
|
|
* and not NDC.
|
|
*
|
|
* \param[in] device Device to upload to.
|
|
* \param[in] rect Area to clear.
|
|
* \param[in] depth Depth (i.e. Z coordinate) of the area to clear.
|
|
* \param[out] pvr_bo_out BO upload object.
|
|
* \return VK_SUCCESS if the upload succeeded.
|
|
*/
|
|
VkResult pvr_clear_vertices_upload(struct pvr_device *device,
|
|
const VkRect2D *rect,
|
|
float depth,
|
|
struct pvr_suballoc_bo **const pvr_bo_out)
|
|
{
|
|
const float y1 = (float)(rect->offset.y + rect->extent.height);
|
|
const float x1 = (float)(rect->offset.x + rect->extent.width);
|
|
const float y0 = (float)rect->offset.y;
|
|
const float x0 = (float)rect->offset.x;
|
|
|
|
const float vertices[PVR_CLEAR_VERTEX_COUNT][PVR_CLEAR_VERTEX_COORDINATES] = {
|
|
[0] = { [0] = x0, [1] = y0, [2] = depth },
|
|
[1] = { [0] = x0, [1] = y1, [2] = depth },
|
|
[2] = { [0] = x1, [1] = y0, [2] = depth },
|
|
[3] = { [0] = x1, [1] = y1, [2] = depth }
|
|
};
|
|
|
|
return pvr_gpu_upload(device,
|
|
device->heaps.general_heap,
|
|
vertices,
|
|
sizeof(vertices),
|
|
4,
|
|
pvr_bo_out);
|
|
}
|
|
|
|
VkResult pvr_device_init_graphics_static_clear_state(struct pvr_device *device)
|
|
{
|
|
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
|
const VkRect2D vf_rect = {
|
|
.offset = { .x = 0, .y = 0 },
|
|
.extent = { .width = rogue_get_param_vf_max_x(dev_info),
|
|
.height = rogue_get_param_vf_max_y(dev_info) }
|
|
};
|
|
|
|
const uint32_t vdm_state_size_in_dw =
|
|
pvr_clear_vdm_state_get_size_in_dw(dev_info, 1);
|
|
struct pvr_device_static_clear_state *state = &device->static_clear_state;
|
|
const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
|
|
struct pvr_pds_vertex_shader_program pds_program;
|
|
const pco_precomp_data *precomp_data;
|
|
uint32_t *state_buffer;
|
|
VkResult result;
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, gs_rta_support)) {
|
|
precomp_data =
|
|
(pco_precomp_data *)pco_usclib_common[VS_PASSTHROUGH_RTA_COMMON];
|
|
|
|
result = pvr_gpu_upload_usc(device,
|
|
precomp_data->binary,
|
|
precomp_data->size_dwords * sizeof(uint32_t),
|
|
cache_line_size,
|
|
&state->usc_multi_layer_vertex_shader_bo);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
} else {
|
|
state->usc_multi_layer_vertex_shader_bo = NULL;
|
|
}
|
|
|
|
precomp_data = (pco_precomp_data *)pco_usclib_common[VS_PASSTHROUGH_COMMON];
|
|
result = pvr_gpu_upload_usc(device,
|
|
precomp_data->binary,
|
|
precomp_data->size_dwords * sizeof(uint32_t),
|
|
cache_line_size,
|
|
&state->usc_vertex_shader_bo);
|
|
if (result != VK_SUCCESS)
|
|
goto err_free_usc_multi_layer_shader;
|
|
|
|
result =
|
|
pvr_clear_vertices_upload(device, &vf_rect, 0.0f, &state->vertices_bo);
|
|
if (result != VK_SUCCESS)
|
|
goto err_free_usc_shader;
|
|
|
|
pvr_pds_clear_vertex_shader_program_init_base(&pds_program,
|
|
state->usc_vertex_shader_bo);
|
|
|
|
result =
|
|
pvr_pds_clear_vertex_shader_program_create_and_upload(&pds_program,
|
|
device,
|
|
state->vertices_bo,
|
|
&state->pds);
|
|
if (result != VK_SUCCESS)
|
|
goto err_free_vertices_buffer;
|
|
|
|
pvr_device_setup_graphics_static_clear_ppp_base(&state->ppp_base);
|
|
pvr_device_setup_graphics_static_clear_ppp_templates(state->ppp_templates);
|
|
|
|
assert(pds_program.code_size <= state->pds.code_size);
|
|
|
|
state_buffer = vk_alloc(&device->vk.alloc,
|
|
PVR_DW_TO_BYTES(vdm_state_size_in_dw * 2),
|
|
8,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
|
if (state_buffer == NULL) {
|
|
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
goto err_free_pds_program;
|
|
}
|
|
|
|
/* TODO: The difference between the large and normal words is only the last
|
|
* word. The value is 3 or 4 depending on the amount of indices. Should we
|
|
* dedup this?
|
|
*/
|
|
|
|
/* The large clear state words cover the max framebuffer. The normal clear
|
|
* state words cover only half (since 3 indices are passed, forming a single
|
|
* triangle, instead of 4) and are used when the render area fits within a
|
|
* quarter of the max framebuffer, i.e. fit within the single triangle.
|
|
*/
|
|
/* 4 * sizeof(uint32_t) because of the 4 pixel output regs. */
|
|
/* TODO: Replace 4 * sizeof(uint32_t) with a defines from the compiler or
|
|
* hook up the value directly to it using some compiler info.
|
|
*/
|
|
pvr_pack_clear_vdm_state(&device->pdevice->dev_info,
|
|
&state->pds,
|
|
pds_program.temps_used,
|
|
3,
|
|
4 * sizeof(uint32_t),
|
|
1,
|
|
state_buffer);
|
|
state->vdm_words = state_buffer;
|
|
state_buffer += vdm_state_size_in_dw;
|
|
|
|
pvr_pack_clear_vdm_state(&device->pdevice->dev_info,
|
|
&state->pds,
|
|
pds_program.temps_used,
|
|
4,
|
|
4 * sizeof(uint32_t),
|
|
1,
|
|
state_buffer);
|
|
state->large_clear_vdm_words = state_buffer;
|
|
|
|
result = pvr_device_init_clear_attachment_programs(device);
|
|
if (result != VK_SUCCESS)
|
|
goto err_free_vdm_state;
|
|
|
|
return VK_SUCCESS;
|
|
|
|
err_free_vdm_state:
|
|
/* Cast away the const :( */
|
|
vk_free(&device->vk.alloc, (void *)state->vdm_words);
|
|
|
|
err_free_pds_program:
|
|
pvr_bo_suballoc_free(state->pds.pvr_bo);
|
|
|
|
err_free_vertices_buffer:
|
|
pvr_bo_suballoc_free(state->vertices_bo);
|
|
|
|
err_free_usc_shader:
|
|
pvr_bo_suballoc_free(state->usc_vertex_shader_bo);
|
|
|
|
err_free_usc_multi_layer_shader:
|
|
pvr_bo_suballoc_free(state->usc_multi_layer_vertex_shader_bo);
|
|
|
|
return result;
|
|
}
|
|
|
|
void pvr_device_finish_graphics_static_clear_state(struct pvr_device *device)
|
|
{
|
|
struct pvr_device_static_clear_state *state = &device->static_clear_state;
|
|
|
|
pvr_device_finish_clear_attachment_programs(device);
|
|
|
|
/* Don't free `large_clear_vdm_words` since it was allocated together with
|
|
* `vdm_words`.
|
|
*/
|
|
/* Cast away the const :( */
|
|
vk_free(&device->vk.alloc, (void *)state->vdm_words);
|
|
|
|
pvr_bo_suballoc_free(state->pds.pvr_bo);
|
|
pvr_bo_suballoc_free(state->vertices_bo);
|
|
pvr_bo_suballoc_free(state->usc_vertex_shader_bo);
|
|
pvr_bo_suballoc_free(state->usc_multi_layer_vertex_shader_bo);
|
|
}
|
|
|
|
void pvr_pds_clear_vertex_shader_program_init_base(
|
|
struct pvr_pds_vertex_shader_program *program,
|
|
const struct pvr_suballoc_bo *usc_shader_bo)
|
|
{
|
|
*program = (struct pvr_pds_vertex_shader_program){
|
|
.num_streams = 1,
|
|
.streams = {
|
|
[0] = {
|
|
/* We'll get this from this interface's client when generating the
|
|
* data segment. This will be the address of the vertex buffer.
|
|
*/
|
|
.address = 0,
|
|
.stride = PVR_CLEAR_VERTEX_COORDINATES * sizeof(uint32_t),
|
|
.num_elements = 1,
|
|
.elements = {
|
|
[0] = {
|
|
.size = PVR_CLEAR_VERTEX_COUNT * PVR_CLEAR_VERTEX_COORDINATES,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
};
|
|
|
|
pvr_pds_setup_doutu(&program->usc_task_control,
|
|
usc_shader_bo->dev_addr.addr,
|
|
0,
|
|
ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
|
|
false);
|
|
}
|
|
|
|
VkResult pvr_pds_clear_vertex_shader_program_create_and_upload(
|
|
struct pvr_pds_vertex_shader_program *program,
|
|
struct pvr_device *device,
|
|
const struct pvr_suballoc_bo *vertices_bo,
|
|
struct pvr_pds_upload *const upload_out)
|
|
{
|
|
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
|
uint32_t staging_buffer_size;
|
|
uint32_t *staging_buffer;
|
|
VkResult result;
|
|
|
|
program->streams[0].address = vertices_bo->dev_addr.addr;
|
|
|
|
pvr_pds_vertex_shader(program, NULL, PDS_GENERATE_SIZES, dev_info);
|
|
|
|
staging_buffer_size =
|
|
PVR_DW_TO_BYTES(program->code_size + program->data_size);
|
|
|
|
staging_buffer = vk_alloc(&device->vk.alloc,
|
|
staging_buffer_size,
|
|
8,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
|
if (!staging_buffer) {
|
|
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
goto err_exit;
|
|
}
|
|
|
|
pvr_pds_vertex_shader(program,
|
|
staging_buffer,
|
|
PDS_GENERATE_DATA_SEGMENT,
|
|
dev_info);
|
|
pvr_pds_vertex_shader(program,
|
|
&staging_buffer[program->data_size],
|
|
PDS_GENERATE_CODE_SEGMENT,
|
|
dev_info);
|
|
|
|
/* FIXME: Figure out the define for alignment of 16. */
|
|
result = pvr_gpu_upload_pds(device,
|
|
&staging_buffer[0],
|
|
program->data_size,
|
|
16,
|
|
&staging_buffer[program->data_size],
|
|
program->code_size,
|
|
16,
|
|
16,
|
|
upload_out);
|
|
if (result != VK_SUCCESS)
|
|
goto err_free_staging_buffer;
|
|
|
|
vk_free(&device->vk.alloc, staging_buffer);
|
|
return VK_SUCCESS;
|
|
|
|
err_free_staging_buffer:
|
|
vk_free(&device->vk.alloc, staging_buffer);
|
|
|
|
err_exit:
|
|
*upload_out = (struct pvr_pds_upload){ 0 };
|
|
return result;
|
|
}
|
|
|
|
VkResult pvr_pds_clear_vertex_shader_program_create_and_upload_data(
|
|
struct pvr_pds_vertex_shader_program *program,
|
|
struct pvr_cmd_buffer *cmd_buffer,
|
|
struct pvr_suballoc_bo *vertices_bo,
|
|
struct pvr_pds_upload *const pds_upload_out)
|
|
{
|
|
struct pvr_device_info *dev_info = &cmd_buffer->device->pdevice->dev_info;
|
|
uint32_t staging_buffer_size;
|
|
uint32_t *staging_buffer;
|
|
VkResult result;
|
|
|
|
program->streams[0].address = vertices_bo->dev_addr.addr;
|
|
|
|
pvr_pds_vertex_shader(program, NULL, PDS_GENERATE_SIZES, dev_info);
|
|
|
|
staging_buffer_size = PVR_DW_TO_BYTES(program->data_size);
|
|
|
|
staging_buffer = vk_alloc(&cmd_buffer->device->vk.alloc,
|
|
staging_buffer_size,
|
|
8,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
|
if (!staging_buffer) {
|
|
*pds_upload_out = (struct pvr_pds_upload){ 0 };
|
|
|
|
return vk_command_buffer_set_error(&cmd_buffer->vk,
|
|
VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
}
|
|
|
|
pvr_pds_vertex_shader(program,
|
|
staging_buffer,
|
|
PDS_GENERATE_DATA_SEGMENT,
|
|
dev_info);
|
|
|
|
result = pvr_cmd_buffer_upload_pds(cmd_buffer,
|
|
staging_buffer,
|
|
program->data_size,
|
|
4,
|
|
NULL,
|
|
0,
|
|
0,
|
|
4,
|
|
pds_upload_out);
|
|
if (result != VK_SUCCESS) {
|
|
vk_free(&cmd_buffer->device->vk.alloc, staging_buffer);
|
|
|
|
*pds_upload_out = (struct pvr_pds_upload){ 0 };
|
|
|
|
return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
|
|
}
|
|
|
|
vk_free(&cmd_buffer->device->vk.alloc, staging_buffer);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
void pvr_pds_clear_rta_vertex_shader_program_init_base(
|
|
struct pvr_pds_vertex_shader_program *program,
|
|
const struct pvr_suballoc_bo *usc_shader_bo)
|
|
{
|
|
pvr_pds_clear_vertex_shader_program_init_base(program, usc_shader_bo);
|
|
|
|
/* We'll set the render target index to be the instance id + base array
|
|
* layer. Since the base array layer can change in between clear rects, we
|
|
* don't set it here and ask for it when generating the code and data
|
|
* section.
|
|
*/
|
|
/* This is 3 because the instance id register will follow the xyz coordinate
|
|
* registers in the register file.
|
|
* TODO: Maybe we want this to be hooked up to the compiler?
|
|
*/
|
|
program->iterate_instance_id = true;
|
|
program->instance_id_register = 3;
|
|
}
|
|
|
|
VkResult pvr_pds_clear_rta_vertex_shader_program_create_and_upload_code(
|
|
struct pvr_pds_vertex_shader_program *program,
|
|
struct pvr_cmd_buffer *cmd_buffer,
|
|
uint32_t base_array_layer,
|
|
struct pvr_pds_upload *const pds_upload_out)
|
|
{
|
|
struct pvr_device_info *dev_info = &cmd_buffer->device->pdevice->dev_info;
|
|
uint32_t staging_buffer_size;
|
|
uint32_t *staging_buffer;
|
|
VkResult result;
|
|
|
|
program->instance_id_modifier = base_array_layer;
|
|
|
|
pvr_pds_vertex_shader(program, NULL, PDS_GENERATE_SIZES, dev_info);
|
|
|
|
staging_buffer_size = PVR_DW_TO_BYTES(program->code_size);
|
|
|
|
staging_buffer = vk_alloc(&cmd_buffer->device->vk.alloc,
|
|
staging_buffer_size,
|
|
8,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
|
if (!staging_buffer) {
|
|
*pds_upload_out = (struct pvr_pds_upload){ 0 };
|
|
|
|
return vk_command_buffer_set_error(&cmd_buffer->vk,
|
|
VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
}
|
|
|
|
pvr_pds_vertex_shader(program,
|
|
staging_buffer,
|
|
PDS_GENERATE_CODE_SEGMENT,
|
|
dev_info);
|
|
|
|
result = pvr_cmd_buffer_upload_pds(cmd_buffer,
|
|
NULL,
|
|
0,
|
|
0,
|
|
staging_buffer,
|
|
program->code_size,
|
|
4,
|
|
4,
|
|
pds_upload_out);
|
|
if (result != VK_SUCCESS) {
|
|
vk_free(&cmd_buffer->device->vk.alloc, staging_buffer);
|
|
|
|
*pds_upload_out = (struct pvr_pds_upload){ 0 };
|
|
|
|
return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
|
|
}
|
|
|
|
vk_free(&cmd_buffer->device->vk.alloc, staging_buffer);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
/**
|
|
* Pack VDM control stream words for clear.
|
|
*
|
|
* The size of the `state_buffer` provided is expected to point to a buffer of
|
|
* size equal to what is returned by `pvr_clear_vdm_state_get_size_in_dw()`.
|
|
*/
|
|
void pvr_pack_clear_vdm_state(const struct pvr_device_info *const dev_info,
|
|
const struct pvr_pds_upload *const program,
|
|
uint32_t temps,
|
|
uint32_t index_count,
|
|
uint32_t vs_output_size_in_bytes,
|
|
uint32_t layer_count,
|
|
uint32_t *const state_buffer)
|
|
{
|
|
const uint32_t vs_output_size =
|
|
DIV_ROUND_UP(vs_output_size_in_bytes,
|
|
ROGUE_VDMCTRL_VDM_STATE4_VS_OUTPUT_SIZE_UNIT_SIZE);
|
|
uint32_t *stream = state_buffer;
|
|
bool needs_instance_count;
|
|
uint32_t max_instances;
|
|
uint32_t cam_size;
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, gs_rta_support)) {
|
|
needs_instance_count = layer_count > 1;
|
|
|
|
/* The layer count should at least be 1. For vkCmdClearAttachment() the
|
|
* spec. guarantees that the layer count is not 0.
|
|
*/
|
|
assert(layer_count);
|
|
} else {
|
|
needs_instance_count = false;
|
|
assert(layer_count == 1);
|
|
}
|
|
|
|
pvr_calculate_vertex_cam_size(dev_info,
|
|
vs_output_size,
|
|
true,
|
|
&cam_size,
|
|
&max_instances);
|
|
|
|
pvr_csb_pack (stream, VDMCTRL_VDM_STATE0, state0) {
|
|
state0.vs_data_addr_present = true;
|
|
state0.vs_other_present = true;
|
|
state0.cam_size = cam_size;
|
|
state0.uvs_scratch_size_select =
|
|
ROGUE_VDMCTRL_UVS_SCRATCH_SIZE_SELECT_FIVE;
|
|
state0.flatshade_control = ROGUE_VDMCTRL_FLATSHADE_CONTROL_VERTEX_0;
|
|
}
|
|
stream += pvr_cmd_length(VDMCTRL_VDM_STATE0);
|
|
|
|
pvr_csb_pack (stream, VDMCTRL_VDM_STATE2, state2) {
|
|
state2.vs_pds_data_base_addr = PVR_DEV_ADDR(program->data_offset);
|
|
}
|
|
stream += pvr_cmd_length(VDMCTRL_VDM_STATE2);
|
|
|
|
pvr_csb_pack (stream, VDMCTRL_VDM_STATE3, state3) {
|
|
state3.vs_pds_code_base_addr = PVR_DEV_ADDR(program->code_offset);
|
|
}
|
|
stream += pvr_cmd_length(VDMCTRL_VDM_STATE3);
|
|
|
|
pvr_csb_pack (stream, VDMCTRL_VDM_STATE4, state4) {
|
|
state4.vs_output_size = vs_output_size;
|
|
}
|
|
stream += pvr_cmd_length(VDMCTRL_VDM_STATE4);
|
|
|
|
pvr_csb_pack (stream, VDMCTRL_VDM_STATE5, state5) {
|
|
state5.vs_max_instances = max_instances;
|
|
/* This is the size of the input vertex. The hw manages the USC
|
|
* temporaries separately so we don't need to include them here.
|
|
*/
|
|
state5.vs_usc_unified_size =
|
|
DIV_ROUND_UP(PVR_CLEAR_VERTEX_COORDINATES * sizeof(uint32_t),
|
|
ROGUE_VDMCTRL_VDM_STATE5_VS_USC_UNIFIED_SIZE_UNIT_SIZE);
|
|
state5.vs_pds_temp_size =
|
|
DIV_ROUND_UP(temps,
|
|
ROGUE_VDMCTRL_VDM_STATE5_VS_PDS_TEMP_SIZE_UNIT_SIZE);
|
|
state5.vs_pds_data_size =
|
|
DIV_ROUND_UP(PVR_DW_TO_BYTES(program->data_size),
|
|
ROGUE_VDMCTRL_VDM_STATE5_VS_PDS_DATA_SIZE_UNIT_SIZE);
|
|
}
|
|
stream += pvr_cmd_length(VDMCTRL_VDM_STATE5);
|
|
|
|
/* TODO: Here we're doing another state update. If emitting directly to the
|
|
* control stream, we don't mark them as separate state updates by setting
|
|
* the relocation mark so we might be wasting a little bit of memory. See if
|
|
* it's worth changing the code to use the relocation mark.
|
|
*/
|
|
|
|
pvr_csb_pack (stream, VDMCTRL_INDEX_LIST0, index_list0) {
|
|
index_list0.index_count_present = true;
|
|
index_list0.index_instance_count_present = needs_instance_count;
|
|
index_list0.primitive_topology =
|
|
ROGUE_VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_STRIP;
|
|
}
|
|
stream += pvr_cmd_length(VDMCTRL_INDEX_LIST0);
|
|
|
|
pvr_csb_pack (stream, VDMCTRL_INDEX_LIST2, index_list3) {
|
|
index_list3.index_count = index_count;
|
|
}
|
|
stream += pvr_cmd_length(VDMCTRL_INDEX_LIST2);
|
|
|
|
if (needs_instance_count) {
|
|
pvr_csb_pack (stream, VDMCTRL_INDEX_LIST3, index_list3) {
|
|
index_list3.instance_count = layer_count - 1;
|
|
}
|
|
stream += pvr_cmd_length(VDMCTRL_INDEX_LIST3);
|
|
}
|
|
|
|
assert((uint64_t)(stream - state_buffer) ==
|
|
pvr_clear_vdm_state_get_size_in_dw(dev_info, layer_count));
|
|
}
|