mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-16 17:20:28 +01:00
pvr: Setup SPM background object
Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Reviewed-by: Frank Binns <frank.binns@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21575>
This commit is contained in:
parent
d75a36a9ee
commit
1dfd535124
8 changed files with 443 additions and 23 deletions
|
|
@ -1163,6 +1163,9 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
||||||
struct pvr_pds_upload pds_pixel_event_program;
|
struct pvr_pds_upload pds_pixel_event_program;
|
||||||
uint32_t pbe_cs_words[PVR_MAX_COLOR_ATTACHMENTS]
|
uint32_t pbe_cs_words[PVR_MAX_COLOR_ATTACHMENTS]
|
||||||
[ROGUE_NUM_PBESTATE_STATE_WORDS] = { 0 };
|
[ROGUE_NUM_PBESTATE_STATE_WORDS] = { 0 };
|
||||||
|
struct pvr_framebuffer *framebuffer = render_pass_info->framebuffer;
|
||||||
|
struct pvr_spm_bgobj_state *spm_bgobj_state =
|
||||||
|
&framebuffer->spm_bgobj_state_per_render[sub_cmd->hw_render_idx];
|
||||||
struct pvr_render_target *render_target;
|
struct pvr_render_target *render_target;
|
||||||
VkResult result;
|
VkResult result;
|
||||||
|
|
||||||
|
|
@ -1194,7 +1197,7 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
||||||
}
|
}
|
||||||
|
|
||||||
pvr_setup_pbe_state(dev_info,
|
pvr_setup_pbe_state(dev_info,
|
||||||
render_pass_info->framebuffer,
|
framebuffer,
|
||||||
surface->mrt_idx,
|
surface->mrt_idx,
|
||||||
mrt_resource,
|
mrt_resource,
|
||||||
iview,
|
iview,
|
||||||
|
|
@ -1216,8 +1219,16 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
||||||
|
|
||||||
job->pds_pixel_event_data_offset = pds_pixel_event_program.data_offset;
|
job->pds_pixel_event_data_offset = pds_pixel_event_program.data_offset;
|
||||||
|
|
||||||
/* FIXME: Don't do this if there is a barrier load. */
|
if (sub_cmd->barrier_load) {
|
||||||
if (render_pass_info->enable_bg_tag) {
|
job->enable_bg_tag = true;
|
||||||
|
job->process_empty_tiles = true;
|
||||||
|
|
||||||
|
STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) ==
|
||||||
|
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
|
||||||
|
typed_memcpy(job->pds_bgnd_reg_values,
|
||||||
|
spm_bgobj_state->pds_reg_values,
|
||||||
|
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
|
||||||
|
} else if (render_pass_info->enable_bg_tag) {
|
||||||
const struct pvr_load_op *load_op = hw_render->load_op;
|
const struct pvr_load_op *load_op = hw_render->load_op;
|
||||||
struct pvr_pds_upload load_op_program;
|
struct pvr_pds_upload load_op_program;
|
||||||
|
|
||||||
|
|
@ -1230,16 +1241,22 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
|
job->enable_bg_tag = render_pass_info->enable_bg_tag;
|
||||||
|
job->process_empty_tiles = render_pass_info->process_empty_tiles;
|
||||||
|
|
||||||
pvr_pds_bgnd_pack_state(load_op,
|
pvr_pds_bgnd_pack_state(load_op,
|
||||||
&load_op_program,
|
&load_op_program,
|
||||||
job->pds_bgnd_reg_values);
|
job->pds_bgnd_reg_values);
|
||||||
}
|
}
|
||||||
|
|
||||||
job->enable_bg_tag = render_pass_info->enable_bg_tag;
|
STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) ==
|
||||||
job->process_empty_tiles = render_pass_info->process_empty_tiles;
|
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
|
||||||
|
typed_memcpy(job->pds_pr_bgnd_reg_values,
|
||||||
|
spm_bgobj_state->pds_reg_values,
|
||||||
|
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
|
||||||
|
|
||||||
render_target = pvr_get_render_target(render_pass_info->pass,
|
render_target = pvr_get_render_target(render_pass_info->pass,
|
||||||
render_pass_info->framebuffer,
|
framebuffer,
|
||||||
sub_cmd->hw_render_idx);
|
sub_cmd->hw_render_idx);
|
||||||
job->rt_dataset = render_target->rt_dataset;
|
job->rt_dataset = render_target->rt_dataset;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2676,6 +2676,7 @@ VkResult pvr_CreateFramebuffer(VkDevice _device,
|
||||||
{
|
{
|
||||||
PVR_FROM_HANDLE(pvr_render_pass, pass, pCreateInfo->renderPass);
|
PVR_FROM_HANDLE(pvr_render_pass, pass, pCreateInfo->renderPass);
|
||||||
PVR_FROM_HANDLE(pvr_device, device, _device);
|
PVR_FROM_HANDLE(pvr_device, device, _device);
|
||||||
|
struct pvr_spm_bgobj_state *spm_bgobj_state_per_render;
|
||||||
struct pvr_spm_eot_state *spm_eot_state_per_render;
|
struct pvr_spm_eot_state *spm_eot_state_per_render;
|
||||||
struct pvr_render_target *render_targets;
|
struct pvr_render_target *render_targets;
|
||||||
struct pvr_framebuffer *framebuffer;
|
struct pvr_framebuffer *framebuffer;
|
||||||
|
|
@ -2703,6 +2704,10 @@ VkResult pvr_CreateFramebuffer(VkDevice _device,
|
||||||
&spm_eot_state_per_render,
|
&spm_eot_state_per_render,
|
||||||
__typeof__(*spm_eot_state_per_render),
|
__typeof__(*spm_eot_state_per_render),
|
||||||
pass->hw_setup->render_count);
|
pass->hw_setup->render_count);
|
||||||
|
vk_multialloc_add(&ma,
|
||||||
|
&spm_bgobj_state_per_render,
|
||||||
|
__typeof__(*spm_bgobj_state_per_render),
|
||||||
|
pass->hw_setup->render_count);
|
||||||
|
|
||||||
if (!vk_multialloc_zalloc2(&ma,
|
if (!vk_multialloc_zalloc2(&ma,
|
||||||
&device->vk.alloc,
|
&device->vk.alloc,
|
||||||
|
|
@ -2749,20 +2754,42 @@ VkResult pvr_CreateFramebuffer(VkDevice _device,
|
||||||
goto err_finish_render_targets;
|
goto err_finish_render_targets;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
|
for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
|
||||||
|
uint32_t emit_count;
|
||||||
|
|
||||||
result = pvr_spm_init_eot_state(device,
|
result = pvr_spm_init_eot_state(device,
|
||||||
&spm_eot_state_per_render[i],
|
&spm_eot_state_per_render[i],
|
||||||
framebuffer,
|
framebuffer,
|
||||||
&pass->hw_setup->renders[i]);
|
&pass->hw_setup->renders[i],
|
||||||
if (result != VK_SUCCESS) {
|
&emit_count);
|
||||||
for (uint32_t j = 0; j < i; j++)
|
if (result != VK_SUCCESS)
|
||||||
pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[j]);
|
goto err_finish_eot_state;
|
||||||
|
|
||||||
goto err_finish_render_targets;
|
result = pvr_spm_init_bgobj_state(device,
|
||||||
}
|
&spm_bgobj_state_per_render[i],
|
||||||
|
framebuffer,
|
||||||
|
&pass->hw_setup->renders[i],
|
||||||
|
emit_count);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto err_finish_bgobj_state;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
|
||||||
|
err_finish_bgobj_state:
|
||||||
|
pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[i]);
|
||||||
|
|
||||||
|
for (uint32_t j = 0; j < i; j++)
|
||||||
|
pvr_spm_finish_bgobj_state(device, &spm_bgobj_state_per_render[j]);
|
||||||
|
|
||||||
|
err_finish_eot_state:
|
||||||
|
for (uint32_t j = 0; j < i; j++)
|
||||||
|
pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[j]);
|
||||||
|
|
||||||
|
goto err_finish_render_targets;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
framebuffer->render_count = pass->hw_setup->render_count;
|
||||||
framebuffer->spm_eot_state_per_render = spm_eot_state_per_render;
|
framebuffer->spm_eot_state_per_render = spm_eot_state_per_render;
|
||||||
framebuffer->spm_eot_state_count = pass->hw_setup->render_count;
|
framebuffer->spm_bgobj_state_per_render = spm_bgobj_state_per_render;
|
||||||
|
|
||||||
*pFramebuffer = pvr_framebuffer_to_handle(framebuffer);
|
*pFramebuffer = pvr_framebuffer_to_handle(framebuffer);
|
||||||
|
|
||||||
|
|
@ -2791,7 +2818,10 @@ void pvr_DestroyFramebuffer(VkDevice _device,
|
||||||
if (!framebuffer)
|
if (!framebuffer)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < framebuffer->spm_eot_state_count; i++) {
|
for (uint32_t i = 0; i < framebuffer->render_count; i++) {
|
||||||
|
pvr_spm_finish_bgobj_state(device,
|
||||||
|
&framebuffer->spm_bgobj_state_per_render[i]);
|
||||||
|
|
||||||
pvr_spm_finish_eot_state(device,
|
pvr_spm_finish_eot_state(device,
|
||||||
&framebuffer->spm_eot_state_per_render[i]);
|
&framebuffer->spm_eot_state_per_render[i]);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1475,8 +1475,11 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
|
||||||
sizeof(job->pds_bgnd_reg_values));
|
sizeof(job->pds_bgnd_reg_values));
|
||||||
stream_ptr += 3U * 2U;
|
stream_ptr += 3U * 2U;
|
||||||
|
|
||||||
/* Set pds_pr_bgnd array to 0 */
|
STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) == 3U);
|
||||||
memset(stream_ptr, 0, 3U * sizeof(uint64_t));
|
STATIC_ASSERT(sizeof(job->pds_pr_bgnd_reg_values[0]) == sizeof(uint64_t));
|
||||||
|
memcpy(stream_ptr,
|
||||||
|
job->pds_pr_bgnd_reg_values,
|
||||||
|
sizeof(job->pds_pr_bgnd_reg_values));
|
||||||
stream_ptr += 3U * 2U;
|
stream_ptr += 3U * 2U;
|
||||||
|
|
||||||
/* Set usc_clear_register array to 0 */
|
/* Set usc_clear_register array to 0 */
|
||||||
|
|
|
||||||
|
|
@ -107,6 +107,7 @@ struct pvr_render_job {
|
||||||
[ROGUE_NUM_PBESTATE_REG_WORDS];
|
[ROGUE_NUM_PBESTATE_REG_WORDS];
|
||||||
|
|
||||||
uint64_t pds_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
|
uint64_t pds_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
|
||||||
|
uint64_t pds_pr_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
|
||||||
};
|
};
|
||||||
|
|
||||||
VkResult pvr_free_list_create(struct pvr_device *device,
|
VkResult pvr_free_list_create(struct pvr_device *device,
|
||||||
|
|
|
||||||
|
|
@ -251,7 +251,7 @@ struct pvr_device {
|
||||||
struct pvr_bo *usc_programs;
|
struct pvr_bo *usc_programs;
|
||||||
struct pvr_bo *pds_programs;
|
struct pvr_bo *pds_programs;
|
||||||
|
|
||||||
struct {
|
struct pvr_spm_per_load_program_state {
|
||||||
pvr_dev_addr_t pds_pixel_program_offset;
|
pvr_dev_addr_t pds_pixel_program_offset;
|
||||||
pvr_dev_addr_t pds_uniform_program_offset;
|
pvr_dev_addr_t pds_uniform_program_offset;
|
||||||
|
|
||||||
|
|
@ -988,8 +988,9 @@ struct pvr_framebuffer {
|
||||||
|
|
||||||
struct pvr_spm_scratch_buffer *scratch_buffer;
|
struct pvr_spm_scratch_buffer *scratch_buffer;
|
||||||
|
|
||||||
uint32_t spm_eot_state_count;
|
uint32_t render_count;
|
||||||
struct pvr_spm_eot_state *spm_eot_state_per_render;
|
struct pvr_spm_eot_state *spm_eot_state_per_render;
|
||||||
|
struct pvr_spm_bgobj_state *spm_bgobj_state_per_render;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct pvr_render_pass_attachment {
|
struct pvr_render_pass_attachment {
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@
|
||||||
#include "pvr_csb.h"
|
#include "pvr_csb.h"
|
||||||
#include "pvr_csb_enum_helpers.h"
|
#include "pvr_csb_enum_helpers.h"
|
||||||
#include "pvr_device_info.h"
|
#include "pvr_device_info.h"
|
||||||
|
#include "pvr_formats.h"
|
||||||
#include "pvr_hw_pass.h"
|
#include "pvr_hw_pass.h"
|
||||||
#include "pvr_job_common.h"
|
#include "pvr_job_common.h"
|
||||||
#include "pvr_pds.h"
|
#include "pvr_pds.h"
|
||||||
|
|
@ -39,6 +40,7 @@
|
||||||
#include "pvr_shader_factory.h"
|
#include "pvr_shader_factory.h"
|
||||||
#include "pvr_spm.h"
|
#include "pvr_spm.h"
|
||||||
#include "pvr_static_shaders.h"
|
#include "pvr_static_shaders.h"
|
||||||
|
#include "pvr_tex_state.h"
|
||||||
#include "pvr_types.h"
|
#include "pvr_types.h"
|
||||||
#include "util/bitscan.h"
|
#include "util/bitscan.h"
|
||||||
#include "util/macros.h"
|
#include "util/macros.h"
|
||||||
|
|
@ -642,7 +644,8 @@ VkResult
|
||||||
pvr_spm_init_eot_state(struct pvr_device *device,
|
pvr_spm_init_eot_state(struct pvr_device *device,
|
||||||
struct pvr_spm_eot_state *spm_eot_state,
|
struct pvr_spm_eot_state *spm_eot_state,
|
||||||
const struct pvr_framebuffer *framebuffer,
|
const struct pvr_framebuffer *framebuffer,
|
||||||
const struct pvr_renderpass_hwsetup_render *hw_render)
|
const struct pvr_renderpass_hwsetup_render *hw_render,
|
||||||
|
uint32_t *emit_count_out)
|
||||||
{
|
{
|
||||||
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
||||||
struct pvr_pds_upload pds_eot_program;
|
struct pvr_pds_upload pds_eot_program;
|
||||||
|
|
@ -810,14 +813,325 @@ pvr_spm_init_eot_state(struct pvr_device *device,
|
||||||
spm_eot_state->pixel_event_program_data_upload = pds_eot_program.pvr_bo;
|
spm_eot_state->pixel_event_program_data_upload = pds_eot_program.pvr_bo;
|
||||||
spm_eot_state->pixel_event_program_data_offset = pds_eot_program.data_offset;
|
spm_eot_state->pixel_event_program_data_offset = pds_eot_program.data_offset;
|
||||||
|
|
||||||
|
*emit_count_out = mrt_setup.num_render_targets;
|
||||||
|
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef PVR_DEV_ADDR_ADVANCE
|
|
||||||
|
|
||||||
void pvr_spm_finish_eot_state(struct pvr_device *device,
|
void pvr_spm_finish_eot_state(struct pvr_device *device,
|
||||||
struct pvr_spm_eot_state *spm_eot_state)
|
struct pvr_spm_eot_state *spm_eot_state)
|
||||||
{
|
{
|
||||||
pvr_bo_free(device, spm_eot_state->pixel_event_program_data_upload);
|
pvr_bo_free(device, spm_eot_state->pixel_event_program_data_upload);
|
||||||
pvr_bo_free(device, spm_eot_state->usc_eot_program);
|
pvr_bo_free(device, spm_eot_state->usc_eot_program);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static VkFormat pvr_get_format_from_dword_count(uint32_t dword_count)
|
||||||
|
{
|
||||||
|
switch (dword_count) {
|
||||||
|
case 1:
|
||||||
|
return VK_FORMAT_R32_UINT;
|
||||||
|
case 2:
|
||||||
|
return VK_FORMAT_R32G32_UINT;
|
||||||
|
case 4:
|
||||||
|
return VK_FORMAT_R32G32B32A32_UINT;
|
||||||
|
default:
|
||||||
|
unreachable("Invalid dword_count");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult pvr_spm_setup_texture_state_words(
|
||||||
|
struct pvr_device *device,
|
||||||
|
uint32_t dword_count,
|
||||||
|
const VkExtent2D framebuffer_size,
|
||||||
|
uint32_t sample_count,
|
||||||
|
pvr_dev_addr_t scratch_buffer_addr,
|
||||||
|
uint64_t image_descriptor[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS],
|
||||||
|
uint64_t *mem_used_out)
|
||||||
|
{
|
||||||
|
/* We can ignore the framebuffer's layer count since we only support
|
||||||
|
* writing to layer 0.
|
||||||
|
*/
|
||||||
|
struct pvr_texture_state_info info = {
|
||||||
|
.format = pvr_get_format_from_dword_count(dword_count),
|
||||||
|
.mem_layout = PVR_MEMLAYOUT_LINEAR,
|
||||||
|
|
||||||
|
.type = VK_IMAGE_VIEW_TYPE_2D,
|
||||||
|
.tex_state_type = PVR_TEXTURE_STATE_STORAGE,
|
||||||
|
.extent = {
|
||||||
|
.width = framebuffer_size.width,
|
||||||
|
.height = framebuffer_size.height,
|
||||||
|
},
|
||||||
|
|
||||||
|
.mip_levels = 1,
|
||||||
|
|
||||||
|
.sample_count = sample_count,
|
||||||
|
.stride = framebuffer_size.width,
|
||||||
|
|
||||||
|
.addr = scratch_buffer_addr,
|
||||||
|
};
|
||||||
|
const uint64_t aligned_fb_width =
|
||||||
|
ALIGN_POT(framebuffer_size.width,
|
||||||
|
PVRX(CR_PBE_WORD0_MRT0_LINESTRIDE_ALIGNMENT));
|
||||||
|
const uint64_t fb_area = aligned_fb_width * framebuffer_size.height;
|
||||||
|
const uint8_t *format_swizzle;
|
||||||
|
VkResult result;
|
||||||
|
|
||||||
|
format_swizzle = pvr_get_format_swizzle(info.format);
|
||||||
|
memcpy(info.swizzle, format_swizzle, sizeof(info.swizzle));
|
||||||
|
|
||||||
|
result = pvr_pack_tex_state(device, &info, image_descriptor);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
*mem_used_out = fb_area * dword_count * sizeof(uint32_t) * sample_count;
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* FIXME: Can we dedup this with pvr_load_op_pds_data_create_and_upload() ? */
|
||||||
|
static VkResult pvr_pds_bgnd_program_create_and_upload(
|
||||||
|
struct pvr_device *device,
|
||||||
|
uint32_t texture_program_data_size_in_dwords,
|
||||||
|
const struct pvr_bo *consts_buffer,
|
||||||
|
uint32_t const_shared_regs,
|
||||||
|
struct pvr_pds_upload *pds_upload_out)
|
||||||
|
{
|
||||||
|
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
||||||
|
struct pvr_pds_pixel_shader_sa_program texture_program = { 0 };
|
||||||
|
uint32_t staging_buffer_size;
|
||||||
|
uint32_t *staging_buffer;
|
||||||
|
VkResult result;
|
||||||
|
|
||||||
|
pvr_csb_pack (&texture_program.texture_dma_address[0],
|
||||||
|
PDSINST_DOUT_FIELDS_DOUTD_SRC0,
|
||||||
|
doutd_src0) {
|
||||||
|
doutd_src0.sbase = consts_buffer->vma->dev_addr;
|
||||||
|
}
|
||||||
|
|
||||||
|
pvr_csb_pack (&texture_program.texture_dma_control[0],
|
||||||
|
PDSINST_DOUT_FIELDS_DOUTD_SRC1,
|
||||||
|
doutd_src1) {
|
||||||
|
doutd_src1.dest = PVRX(PDSINST_DOUTD_DEST_COMMON_STORE);
|
||||||
|
doutd_src1.bsize = const_shared_regs;
|
||||||
|
}
|
||||||
|
|
||||||
|
texture_program.num_texture_dma_kicks += 1;
|
||||||
|
|
||||||
|
#if defined(DEBUG)
|
||||||
|
pvr_pds_set_sizes_pixel_shader_sa_texture_data(&texture_program, dev_info);
|
||||||
|
assert(texture_program_data_size_in_dwords == texture_program.data_size);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
staging_buffer_size = texture_program_data_size_in_dwords * sizeof(uint32_t);
|
||||||
|
|
||||||
|
staging_buffer = vk_alloc(&device->vk.alloc,
|
||||||
|
staging_buffer_size,
|
||||||
|
8,
|
||||||
|
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
||||||
|
if (!staging_buffer)
|
||||||
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
|
|
||||||
|
pvr_pds_generate_pixel_shader_sa_texture_state_data(&texture_program,
|
||||||
|
staging_buffer,
|
||||||
|
dev_info);
|
||||||
|
|
||||||
|
/* FIXME: Figure out the define for alignment of 16. */
|
||||||
|
result = pvr_gpu_upload_pds(device,
|
||||||
|
&staging_buffer[0],
|
||||||
|
texture_program_data_size_in_dwords,
|
||||||
|
16,
|
||||||
|
NULL,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
16,
|
||||||
|
pds_upload_out);
|
||||||
|
if (result != VK_SUCCESS) {
|
||||||
|
vk_free(&device->vk.alloc, staging_buffer);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
vk_free(&device->vk.alloc, staging_buffer);
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkResult
|
||||||
|
pvr_spm_init_bgobj_state(struct pvr_device *device,
|
||||||
|
struct pvr_spm_bgobj_state *spm_bgobj_state,
|
||||||
|
const struct pvr_framebuffer *framebuffer,
|
||||||
|
const struct pvr_renderpass_hwsetup_render *hw_render,
|
||||||
|
uint32_t emit_count)
|
||||||
|
{
|
||||||
|
const uint32_t spm_load_program_idx =
|
||||||
|
pvr_get_spm_load_program_index(hw_render->sample_count,
|
||||||
|
hw_render->tile_buffers_count,
|
||||||
|
hw_render->output_regs_count);
|
||||||
|
const VkExtent2D framebuffer_size = {
|
||||||
|
.width = framebuffer->width,
|
||||||
|
.height = framebuffer->height,
|
||||||
|
};
|
||||||
|
pvr_dev_addr_t next_scratch_buffer_addr =
|
||||||
|
framebuffer->scratch_buffer->bo->vma->dev_addr;
|
||||||
|
struct pvr_spm_per_load_program_state *load_program_state;
|
||||||
|
struct pvr_pds_upload pds_texture_data_upload;
|
||||||
|
const struct pvr_shader_factory_info *info;
|
||||||
|
union pvr_sampler_descriptor *descriptor;
|
||||||
|
uint64_t consts_buffer_size;
|
||||||
|
uint32_t dword_count;
|
||||||
|
uint32_t *mem_ptr;
|
||||||
|
VkResult result;
|
||||||
|
|
||||||
|
assert(spm_load_program_idx < ARRAY_SIZE(spm_load_collection));
|
||||||
|
info = spm_load_collection[spm_load_program_idx].info;
|
||||||
|
|
||||||
|
consts_buffer_size = info->const_shared_regs * sizeof(uint32_t);
|
||||||
|
|
||||||
|
result = pvr_bo_alloc(device,
|
||||||
|
device->heaps.general_heap,
|
||||||
|
consts_buffer_size,
|
||||||
|
sizeof(uint32_t),
|
||||||
|
PVR_BO_ALLOC_FLAG_CPU_MAPPED,
|
||||||
|
&spm_bgobj_state->consts_buffer);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
mem_ptr = spm_bgobj_state->consts_buffer->bo->map;
|
||||||
|
|
||||||
|
if (info->driver_const_location_map) {
|
||||||
|
const uint32_t *const const_map = info->driver_const_location_map;
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < PVR_SPM_LOAD_CONST_COUNT; i += 2) {
|
||||||
|
pvr_dev_addr_t tile_buffer_addr;
|
||||||
|
|
||||||
|
if (const_map[i] == PVR_SPM_LOAD_DEST_UNUSED) {
|
||||||
|
#if defined(DEBUG)
|
||||||
|
for (uint32_t j = i; j < PVR_SPM_LOAD_CONST_COUNT; j++)
|
||||||
|
assert(const_map[j] == PVR_SPM_LOAD_DEST_UNUSED);
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
tile_buffer_addr =
|
||||||
|
device->tile_buffer_state.buffers[i / 2]->vma->dev_addr;
|
||||||
|
|
||||||
|
assert(const_map[i] == const_map[i + 1] + 1);
|
||||||
|
mem_ptr[const_map[i]] = tile_buffer_addr.addr >> 32;
|
||||||
|
mem_ptr[const_map[i + 1]] = (uint32_t)tile_buffer_addr.addr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TODO: The 32 comes from how the shaders are compiled. We should
|
||||||
|
* unhardcode it when this is hooked up to the compiler.
|
||||||
|
*/
|
||||||
|
descriptor = (union pvr_sampler_descriptor *)(mem_ptr + 32);
|
||||||
|
*descriptor = (union pvr_sampler_descriptor){ 0 };
|
||||||
|
|
||||||
|
pvr_csb_pack (&descriptor->data.sampler_word, TEXSTATE_SAMPLER, sampler) {
|
||||||
|
sampler.non_normalized_coords = true;
|
||||||
|
sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
|
||||||
|
sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
|
||||||
|
sampler.minfilter = PVRX(TEXSTATE_FILTER_POINT);
|
||||||
|
sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
|
||||||
|
sampler.maxlod = PVRX(TEXSTATE_CLAMP_MIN);
|
||||||
|
sampler.minlod = PVRX(TEXSTATE_CLAMP_MIN);
|
||||||
|
sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Even if we might have 8 output regs we can only pack and write 4 dwords
|
||||||
|
* using R32G32B32A32_UINT.
|
||||||
|
*/
|
||||||
|
if (hw_render->tile_buffers_count > 0)
|
||||||
|
dword_count = 4;
|
||||||
|
else
|
||||||
|
dword_count = MIN2(hw_render->output_regs_count, 4);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < emit_count; i++) {
|
||||||
|
uint64_t *mem_ptr_u64 = (uint64_t *)mem_ptr;
|
||||||
|
uint64_t mem_used = 0;
|
||||||
|
|
||||||
|
STATIC_ASSERT(ROGUE_NUM_TEXSTATE_IMAGE_WORDS * sizeof(uint64_t) /
|
||||||
|
sizeof(uint32_t) ==
|
||||||
|
PVR_IMAGE_DESCRIPTOR_SIZE);
|
||||||
|
mem_ptr_u64 += i * ROGUE_NUM_TEXSTATE_IMAGE_WORDS;
|
||||||
|
|
||||||
|
result = pvr_spm_setup_texture_state_words(device,
|
||||||
|
dword_count,
|
||||||
|
framebuffer_size,
|
||||||
|
hw_render->sample_count,
|
||||||
|
next_scratch_buffer_addr,
|
||||||
|
mem_ptr_u64,
|
||||||
|
&mem_used);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto err_free_consts_buffer;
|
||||||
|
|
||||||
|
PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_used);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(spm_load_program_idx <
|
||||||
|
ARRAY_SIZE(device->spm_load_state.load_program));
|
||||||
|
load_program_state =
|
||||||
|
&device->spm_load_state.load_program[spm_load_program_idx];
|
||||||
|
|
||||||
|
result = pvr_pds_bgnd_program_create_and_upload(
|
||||||
|
device,
|
||||||
|
load_program_state->pds_texture_program_data_size,
|
||||||
|
spm_bgobj_state->consts_buffer,
|
||||||
|
info->const_shared_regs,
|
||||||
|
&pds_texture_data_upload);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto err_free_consts_buffer;
|
||||||
|
|
||||||
|
spm_bgobj_state->pds_texture_data_upload = pds_texture_data_upload.pvr_bo;
|
||||||
|
|
||||||
|
/* TODO: Is it worth to dedup this with pvr_pds_bgnd_pack_state() ? */
|
||||||
|
|
||||||
|
/* clang-format off */
|
||||||
|
pvr_csb_pack (&spm_bgobj_state->pds_reg_values[0],
|
||||||
|
CR_PDS_BGRND0_BASE,
|
||||||
|
value) {
|
||||||
|
/* clang-format on */
|
||||||
|
value.shader_addr = load_program_state->pds_pixel_program_offset;
|
||||||
|
value.texunicode_addr = load_program_state->pds_uniform_program_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* clang-format off */
|
||||||
|
pvr_csb_pack (&spm_bgobj_state->pds_reg_values[1],
|
||||||
|
CR_PDS_BGRND1_BASE,
|
||||||
|
value) {
|
||||||
|
/* clang-format on */
|
||||||
|
value.texturedata_addr =
|
||||||
|
PVR_DEV_ADDR(pds_texture_data_upload.data_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* clang-format off */
|
||||||
|
pvr_csb_pack (&spm_bgobj_state->pds_reg_values[2],
|
||||||
|
CR_PDS_BGRND3_SIZEINFO,
|
||||||
|
value) {
|
||||||
|
/* clang-format on */
|
||||||
|
value.usc_sharedsize =
|
||||||
|
DIV_ROUND_UP(info->const_shared_regs,
|
||||||
|
PVRX(CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE));
|
||||||
|
value.pds_texturestatesize = DIV_ROUND_UP(
|
||||||
|
pds_texture_data_upload.data_size,
|
||||||
|
PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE));
|
||||||
|
value.pds_tempsize =
|
||||||
|
DIV_ROUND_UP(load_program_state->pds_texture_program_temps_count,
|
||||||
|
PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEMPSIZE_UNIT_SIZE));
|
||||||
|
}
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
|
||||||
|
err_free_consts_buffer:
|
||||||
|
pvr_bo_free(device, spm_bgobj_state->consts_buffer);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void pvr_spm_finish_bgobj_state(struct pvr_device *device,
|
||||||
|
struct pvr_spm_bgobj_state *spm_bgobj_state)
|
||||||
|
{
|
||||||
|
pvr_bo_free(device, spm_bgobj_state->pds_texture_data_upload);
|
||||||
|
pvr_bo_free(device, spm_bgobj_state->consts_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef PVR_DEV_ADDR_ADVANCE
|
||||||
|
|
|
||||||
|
|
@ -76,6 +76,17 @@ struct pvr_spm_eot_state {
|
||||||
struct pvr_bo *pixel_event_program_data_upload;
|
struct pvr_bo *pixel_event_program_data_upload;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct pvr_spm_bgobj_state {
|
||||||
|
struct pvr_bo *consts_buffer;
|
||||||
|
|
||||||
|
/* TODO: Make this struct pvr_pds_upload? It would pull in pvr_private.h
|
||||||
|
* though which causes a cycle since that includes pvr_spm.h .
|
||||||
|
*/
|
||||||
|
struct pvr_bo *pds_texture_data_upload;
|
||||||
|
|
||||||
|
uint64_t pds_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
|
||||||
|
};
|
||||||
|
|
||||||
void pvr_spm_init_scratch_buffer_store(struct pvr_device *device);
|
void pvr_spm_init_scratch_buffer_store(struct pvr_device *device);
|
||||||
void pvr_spm_finish_scratch_buffer_store(struct pvr_device *device);
|
void pvr_spm_finish_scratch_buffer_store(struct pvr_device *device);
|
||||||
|
|
||||||
|
|
@ -106,8 +117,18 @@ VkResult
|
||||||
pvr_spm_init_eot_state(struct pvr_device *device,
|
pvr_spm_init_eot_state(struct pvr_device *device,
|
||||||
struct pvr_spm_eot_state *spm_eot_state,
|
struct pvr_spm_eot_state *spm_eot_state,
|
||||||
const struct pvr_framebuffer *framebuffer,
|
const struct pvr_framebuffer *framebuffer,
|
||||||
const struct pvr_renderpass_hwsetup_render *hw_render);
|
const struct pvr_renderpass_hwsetup_render *hw_render,
|
||||||
|
uint32_t *emit_count_out);
|
||||||
void pvr_spm_finish_eot_state(struct pvr_device *device,
|
void pvr_spm_finish_eot_state(struct pvr_device *device,
|
||||||
struct pvr_spm_eot_state *spm_eot_state);
|
struct pvr_spm_eot_state *spm_eot_state);
|
||||||
|
|
||||||
|
VkResult
|
||||||
|
pvr_spm_init_bgobj_state(struct pvr_device *device,
|
||||||
|
struct pvr_spm_bgobj_state *spm_bgobj_state,
|
||||||
|
const struct pvr_framebuffer *framebuffer,
|
||||||
|
const struct pvr_renderpass_hwsetup_render *hw_render,
|
||||||
|
uint32_t emit_count);
|
||||||
|
void pvr_spm_finish_bgobj_state(struct pvr_device *device,
|
||||||
|
struct pvr_spm_bgobj_state *spm_bgobj_state);
|
||||||
|
|
||||||
#endif /* PVR_SPM_H */
|
#endif /* PVR_SPM_H */
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,8 @@
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
|
||||||
#include "util/bitpack_helpers.h"
|
#include "util/bitpack_helpers.h"
|
||||||
|
#include "util/bitscan.h"
|
||||||
|
#include "util/u_math.h"
|
||||||
|
|
||||||
/* Occlusion query availability writes. */
|
/* Occlusion query availability writes. */
|
||||||
enum pvr_query_availability_write_pool_const {
|
enum pvr_query_availability_write_pool_const {
|
||||||
|
|
@ -145,10 +147,14 @@ enum pvr_spm_load_const {
|
||||||
SPM_LOAD_CONST_TILE_BUFFER_7_UPPER,
|
SPM_LOAD_CONST_TILE_BUFFER_7_UPPER,
|
||||||
SPM_LOAD_CONST_TILE_BUFFER_7_LOWER,
|
SPM_LOAD_CONST_TILE_BUFFER_7_LOWER,
|
||||||
};
|
};
|
||||||
|
#define PVR_SPM_LOAD_CONST_COUNT (SPM_LOAD_CONST_TILE_BUFFER_7_LOWER + 1)
|
||||||
#define PVR_SPM_LOAD_DEST_UNUSED ~0
|
#define PVR_SPM_LOAD_DEST_UNUSED ~0
|
||||||
|
|
||||||
#define PVR_SPM_LOAD_SAMPLES_COUNT 4U
|
#define PVR_SPM_LOAD_SAMPLES_COUNT 4U
|
||||||
|
|
||||||
|
#define PVR_SPM_LOAD_IN_REGS_COUNT 3 /* 1, 2, 4 */
|
||||||
|
#define PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT 7 /* 1, 2, 3, 4, 5, 6, 7 */
|
||||||
|
|
||||||
/* If output_regs == 8
|
/* If output_regs == 8
|
||||||
* reg_load_programs = 4 # 1, 2, 4, 8
|
* reg_load_programs = 4 # 1, 2, 4, 8
|
||||||
* tile_buffer_load_programs = 3 # 1, 2, 3
|
* tile_buffer_load_programs = 3 # 1, 2, 3
|
||||||
|
|
@ -164,6 +170,33 @@ enum pvr_spm_load_const {
|
||||||
/* FIXME: This is currently hard coded for the am62. The Chromebook has 8
|
/* FIXME: This is currently hard coded for the am62. The Chromebook has 8
|
||||||
* output regs so the count is different.
|
* output regs so the count is different.
|
||||||
*/
|
*/
|
||||||
#define PVR_SPM_LOAD_PROGRAM_COUNT (PVR_SPM_LOAD_SAMPLES_COUNT * (3 + 7))
|
#define PVR_SPM_LOAD_PROGRAM_COUNT \
|
||||||
|
(PVR_SPM_LOAD_SAMPLES_COUNT * \
|
||||||
|
(PVR_SPM_LOAD_IN_REGS_COUNT + PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT))
|
||||||
|
|
||||||
|
static inline uint32_t pvr_get_spm_load_program_index(uint32_t sample_count,
|
||||||
|
uint32_t num_tile_buffers,
|
||||||
|
uint32_t num_output_regs)
|
||||||
|
{
|
||||||
|
uint32_t idx;
|
||||||
|
|
||||||
|
assert(util_is_power_of_two_nonzero(sample_count));
|
||||||
|
idx = util_logbase2(sample_count) *
|
||||||
|
(PVR_SPM_LOAD_IN_REGS_COUNT + PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT);
|
||||||
|
|
||||||
|
assert((num_tile_buffers > 0) ^ (num_output_regs > 0));
|
||||||
|
|
||||||
|
if (num_output_regs > 0) {
|
||||||
|
assert(util_is_power_of_two_nonzero(num_output_regs));
|
||||||
|
assert(util_logbase2(num_output_regs) < PVR_SPM_LOAD_IN_REGS_COUNT);
|
||||||
|
idx += util_logbase2(num_output_regs);
|
||||||
|
} else {
|
||||||
|
assert(num_tile_buffers <= PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT);
|
||||||
|
idx += PVR_SPM_LOAD_IN_REGS_COUNT + num_tile_buffers - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(idx < PVR_SPM_LOAD_PROGRAM_COUNT);
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* PVR_SHADER_FACTORY_H */
|
#endif /* PVR_SHADER_FACTORY_H */
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue