mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 06:40:11 +01:00
pvr: Setup SPM background object
Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Reviewed-by: Frank Binns <frank.binns@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21575>
This commit is contained in:
parent
d75a36a9ee
commit
1dfd535124
8 changed files with 443 additions and 23 deletions
|
|
@ -1163,6 +1163,9 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
|||
struct pvr_pds_upload pds_pixel_event_program;
|
||||
uint32_t pbe_cs_words[PVR_MAX_COLOR_ATTACHMENTS]
|
||||
[ROGUE_NUM_PBESTATE_STATE_WORDS] = { 0 };
|
||||
struct pvr_framebuffer *framebuffer = render_pass_info->framebuffer;
|
||||
struct pvr_spm_bgobj_state *spm_bgobj_state =
|
||||
&framebuffer->spm_bgobj_state_per_render[sub_cmd->hw_render_idx];
|
||||
struct pvr_render_target *render_target;
|
||||
VkResult result;
|
||||
|
||||
|
|
@ -1194,7 +1197,7 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
|||
}
|
||||
|
||||
pvr_setup_pbe_state(dev_info,
|
||||
render_pass_info->framebuffer,
|
||||
framebuffer,
|
||||
surface->mrt_idx,
|
||||
mrt_resource,
|
||||
iview,
|
||||
|
|
@ -1216,8 +1219,16 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
|||
|
||||
job->pds_pixel_event_data_offset = pds_pixel_event_program.data_offset;
|
||||
|
||||
/* FIXME: Don't do this if there is a barrier load. */
|
||||
if (render_pass_info->enable_bg_tag) {
|
||||
if (sub_cmd->barrier_load) {
|
||||
job->enable_bg_tag = true;
|
||||
job->process_empty_tiles = true;
|
||||
|
||||
STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) ==
|
||||
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
|
||||
typed_memcpy(job->pds_bgnd_reg_values,
|
||||
spm_bgobj_state->pds_reg_values,
|
||||
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
|
||||
} else if (render_pass_info->enable_bg_tag) {
|
||||
const struct pvr_load_op *load_op = hw_render->load_op;
|
||||
struct pvr_pds_upload load_op_program;
|
||||
|
||||
|
|
@ -1230,16 +1241,22 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
|||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
job->enable_bg_tag = render_pass_info->enable_bg_tag;
|
||||
job->process_empty_tiles = render_pass_info->process_empty_tiles;
|
||||
|
||||
pvr_pds_bgnd_pack_state(load_op,
|
||||
&load_op_program,
|
||||
job->pds_bgnd_reg_values);
|
||||
}
|
||||
|
||||
job->enable_bg_tag = render_pass_info->enable_bg_tag;
|
||||
job->process_empty_tiles = render_pass_info->process_empty_tiles;
|
||||
STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) ==
|
||||
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
|
||||
typed_memcpy(job->pds_pr_bgnd_reg_values,
|
||||
spm_bgobj_state->pds_reg_values,
|
||||
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
|
||||
|
||||
render_target = pvr_get_render_target(render_pass_info->pass,
|
||||
render_pass_info->framebuffer,
|
||||
framebuffer,
|
||||
sub_cmd->hw_render_idx);
|
||||
job->rt_dataset = render_target->rt_dataset;
|
||||
|
||||
|
|
|
|||
|
|
@ -2676,6 +2676,7 @@ VkResult pvr_CreateFramebuffer(VkDevice _device,
|
|||
{
|
||||
PVR_FROM_HANDLE(pvr_render_pass, pass, pCreateInfo->renderPass);
|
||||
PVR_FROM_HANDLE(pvr_device, device, _device);
|
||||
struct pvr_spm_bgobj_state *spm_bgobj_state_per_render;
|
||||
struct pvr_spm_eot_state *spm_eot_state_per_render;
|
||||
struct pvr_render_target *render_targets;
|
||||
struct pvr_framebuffer *framebuffer;
|
||||
|
|
@ -2703,6 +2704,10 @@ VkResult pvr_CreateFramebuffer(VkDevice _device,
|
|||
&spm_eot_state_per_render,
|
||||
__typeof__(*spm_eot_state_per_render),
|
||||
pass->hw_setup->render_count);
|
||||
vk_multialloc_add(&ma,
|
||||
&spm_bgobj_state_per_render,
|
||||
__typeof__(*spm_bgobj_state_per_render),
|
||||
pass->hw_setup->render_count);
|
||||
|
||||
if (!vk_multialloc_zalloc2(&ma,
|
||||
&device->vk.alloc,
|
||||
|
|
@ -2749,20 +2754,42 @@ VkResult pvr_CreateFramebuffer(VkDevice _device,
|
|||
goto err_finish_render_targets;
|
||||
|
||||
for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
|
||||
uint32_t emit_count;
|
||||
|
||||
result = pvr_spm_init_eot_state(device,
|
||||
&spm_eot_state_per_render[i],
|
||||
framebuffer,
|
||||
&pass->hw_setup->renders[i]);
|
||||
if (result != VK_SUCCESS) {
|
||||
for (uint32_t j = 0; j < i; j++)
|
||||
pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[j]);
|
||||
&pass->hw_setup->renders[i],
|
||||
&emit_count);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_finish_eot_state;
|
||||
|
||||
goto err_finish_render_targets;
|
||||
}
|
||||
result = pvr_spm_init_bgobj_state(device,
|
||||
&spm_bgobj_state_per_render[i],
|
||||
framebuffer,
|
||||
&pass->hw_setup->renders[i],
|
||||
emit_count);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_finish_bgobj_state;
|
||||
|
||||
continue;
|
||||
|
||||
err_finish_bgobj_state:
|
||||
pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[i]);
|
||||
|
||||
for (uint32_t j = 0; j < i; j++)
|
||||
pvr_spm_finish_bgobj_state(device, &spm_bgobj_state_per_render[j]);
|
||||
|
||||
err_finish_eot_state:
|
||||
for (uint32_t j = 0; j < i; j++)
|
||||
pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[j]);
|
||||
|
||||
goto err_finish_render_targets;
|
||||
}
|
||||
|
||||
framebuffer->render_count = pass->hw_setup->render_count;
|
||||
framebuffer->spm_eot_state_per_render = spm_eot_state_per_render;
|
||||
framebuffer->spm_eot_state_count = pass->hw_setup->render_count;
|
||||
framebuffer->spm_bgobj_state_per_render = spm_bgobj_state_per_render;
|
||||
|
||||
*pFramebuffer = pvr_framebuffer_to_handle(framebuffer);
|
||||
|
||||
|
|
@ -2791,7 +2818,10 @@ void pvr_DestroyFramebuffer(VkDevice _device,
|
|||
if (!framebuffer)
|
||||
return;
|
||||
|
||||
for (uint32_t i = 0; i < framebuffer->spm_eot_state_count; i++) {
|
||||
for (uint32_t i = 0; i < framebuffer->render_count; i++) {
|
||||
pvr_spm_finish_bgobj_state(device,
|
||||
&framebuffer->spm_bgobj_state_per_render[i]);
|
||||
|
||||
pvr_spm_finish_eot_state(device,
|
||||
&framebuffer->spm_eot_state_per_render[i]);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1475,8 +1475,11 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
|
|||
sizeof(job->pds_bgnd_reg_values));
|
||||
stream_ptr += 3U * 2U;
|
||||
|
||||
/* Set pds_pr_bgnd array to 0 */
|
||||
memset(stream_ptr, 0, 3U * sizeof(uint64_t));
|
||||
STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) == 3U);
|
||||
STATIC_ASSERT(sizeof(job->pds_pr_bgnd_reg_values[0]) == sizeof(uint64_t));
|
||||
memcpy(stream_ptr,
|
||||
job->pds_pr_bgnd_reg_values,
|
||||
sizeof(job->pds_pr_bgnd_reg_values));
|
||||
stream_ptr += 3U * 2U;
|
||||
|
||||
/* Set usc_clear_register array to 0 */
|
||||
|
|
|
|||
|
|
@ -107,6 +107,7 @@ struct pvr_render_job {
|
|||
[ROGUE_NUM_PBESTATE_REG_WORDS];
|
||||
|
||||
uint64_t pds_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
|
||||
uint64_t pds_pr_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
|
||||
};
|
||||
|
||||
VkResult pvr_free_list_create(struct pvr_device *device,
|
||||
|
|
|
|||
|
|
@ -251,7 +251,7 @@ struct pvr_device {
|
|||
struct pvr_bo *usc_programs;
|
||||
struct pvr_bo *pds_programs;
|
||||
|
||||
struct {
|
||||
struct pvr_spm_per_load_program_state {
|
||||
pvr_dev_addr_t pds_pixel_program_offset;
|
||||
pvr_dev_addr_t pds_uniform_program_offset;
|
||||
|
||||
|
|
@ -988,8 +988,9 @@ struct pvr_framebuffer {
|
|||
|
||||
struct pvr_spm_scratch_buffer *scratch_buffer;
|
||||
|
||||
uint32_t spm_eot_state_count;
|
||||
uint32_t render_count;
|
||||
struct pvr_spm_eot_state *spm_eot_state_per_render;
|
||||
struct pvr_spm_bgobj_state *spm_bgobj_state_per_render;
|
||||
};
|
||||
|
||||
struct pvr_render_pass_attachment {
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@
|
|||
#include "pvr_csb.h"
|
||||
#include "pvr_csb_enum_helpers.h"
|
||||
#include "pvr_device_info.h"
|
||||
#include "pvr_formats.h"
|
||||
#include "pvr_hw_pass.h"
|
||||
#include "pvr_job_common.h"
|
||||
#include "pvr_pds.h"
|
||||
|
|
@ -39,6 +40,7 @@
|
|||
#include "pvr_shader_factory.h"
|
||||
#include "pvr_spm.h"
|
||||
#include "pvr_static_shaders.h"
|
||||
#include "pvr_tex_state.h"
|
||||
#include "pvr_types.h"
|
||||
#include "util/bitscan.h"
|
||||
#include "util/macros.h"
|
||||
|
|
@ -642,7 +644,8 @@ VkResult
|
|||
pvr_spm_init_eot_state(struct pvr_device *device,
|
||||
struct pvr_spm_eot_state *spm_eot_state,
|
||||
const struct pvr_framebuffer *framebuffer,
|
||||
const struct pvr_renderpass_hwsetup_render *hw_render)
|
||||
const struct pvr_renderpass_hwsetup_render *hw_render,
|
||||
uint32_t *emit_count_out)
|
||||
{
|
||||
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
||||
struct pvr_pds_upload pds_eot_program;
|
||||
|
|
@ -810,14 +813,325 @@ pvr_spm_init_eot_state(struct pvr_device *device,
|
|||
spm_eot_state->pixel_event_program_data_upload = pds_eot_program.pvr_bo;
|
||||
spm_eot_state->pixel_event_program_data_offset = pds_eot_program.data_offset;
|
||||
|
||||
*emit_count_out = mrt_setup.num_render_targets;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
#undef PVR_DEV_ADDR_ADVANCE
|
||||
|
||||
void pvr_spm_finish_eot_state(struct pvr_device *device,
|
||||
struct pvr_spm_eot_state *spm_eot_state)
|
||||
{
|
||||
pvr_bo_free(device, spm_eot_state->pixel_event_program_data_upload);
|
||||
pvr_bo_free(device, spm_eot_state->usc_eot_program);
|
||||
}
|
||||
|
||||
static VkFormat pvr_get_format_from_dword_count(uint32_t dword_count)
|
||||
{
|
||||
switch (dword_count) {
|
||||
case 1:
|
||||
return VK_FORMAT_R32_UINT;
|
||||
case 2:
|
||||
return VK_FORMAT_R32G32_UINT;
|
||||
case 4:
|
||||
return VK_FORMAT_R32G32B32A32_UINT;
|
||||
default:
|
||||
unreachable("Invalid dword_count");
|
||||
}
|
||||
}
|
||||
|
||||
static VkResult pvr_spm_setup_texture_state_words(
|
||||
struct pvr_device *device,
|
||||
uint32_t dword_count,
|
||||
const VkExtent2D framebuffer_size,
|
||||
uint32_t sample_count,
|
||||
pvr_dev_addr_t scratch_buffer_addr,
|
||||
uint64_t image_descriptor[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS],
|
||||
uint64_t *mem_used_out)
|
||||
{
|
||||
/* We can ignore the framebuffer's layer count since we only support
|
||||
* writing to layer 0.
|
||||
*/
|
||||
struct pvr_texture_state_info info = {
|
||||
.format = pvr_get_format_from_dword_count(dword_count),
|
||||
.mem_layout = PVR_MEMLAYOUT_LINEAR,
|
||||
|
||||
.type = VK_IMAGE_VIEW_TYPE_2D,
|
||||
.tex_state_type = PVR_TEXTURE_STATE_STORAGE,
|
||||
.extent = {
|
||||
.width = framebuffer_size.width,
|
||||
.height = framebuffer_size.height,
|
||||
},
|
||||
|
||||
.mip_levels = 1,
|
||||
|
||||
.sample_count = sample_count,
|
||||
.stride = framebuffer_size.width,
|
||||
|
||||
.addr = scratch_buffer_addr,
|
||||
};
|
||||
const uint64_t aligned_fb_width =
|
||||
ALIGN_POT(framebuffer_size.width,
|
||||
PVRX(CR_PBE_WORD0_MRT0_LINESTRIDE_ALIGNMENT));
|
||||
const uint64_t fb_area = aligned_fb_width * framebuffer_size.height;
|
||||
const uint8_t *format_swizzle;
|
||||
VkResult result;
|
||||
|
||||
format_swizzle = pvr_get_format_swizzle(info.format);
|
||||
memcpy(info.swizzle, format_swizzle, sizeof(info.swizzle));
|
||||
|
||||
result = pvr_pack_tex_state(device, &info, image_descriptor);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
*mem_used_out = fb_area * dword_count * sizeof(uint32_t) * sample_count;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/* FIXME: Can we dedup this with pvr_load_op_pds_data_create_and_upload() ? */
|
||||
static VkResult pvr_pds_bgnd_program_create_and_upload(
|
||||
struct pvr_device *device,
|
||||
uint32_t texture_program_data_size_in_dwords,
|
||||
const struct pvr_bo *consts_buffer,
|
||||
uint32_t const_shared_regs,
|
||||
struct pvr_pds_upload *pds_upload_out)
|
||||
{
|
||||
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
||||
struct pvr_pds_pixel_shader_sa_program texture_program = { 0 };
|
||||
uint32_t staging_buffer_size;
|
||||
uint32_t *staging_buffer;
|
||||
VkResult result;
|
||||
|
||||
pvr_csb_pack (&texture_program.texture_dma_address[0],
|
||||
PDSINST_DOUT_FIELDS_DOUTD_SRC0,
|
||||
doutd_src0) {
|
||||
doutd_src0.sbase = consts_buffer->vma->dev_addr;
|
||||
}
|
||||
|
||||
pvr_csb_pack (&texture_program.texture_dma_control[0],
|
||||
PDSINST_DOUT_FIELDS_DOUTD_SRC1,
|
||||
doutd_src1) {
|
||||
doutd_src1.dest = PVRX(PDSINST_DOUTD_DEST_COMMON_STORE);
|
||||
doutd_src1.bsize = const_shared_regs;
|
||||
}
|
||||
|
||||
texture_program.num_texture_dma_kicks += 1;
|
||||
|
||||
#if defined(DEBUG)
|
||||
pvr_pds_set_sizes_pixel_shader_sa_texture_data(&texture_program, dev_info);
|
||||
assert(texture_program_data_size_in_dwords == texture_program.data_size);
|
||||
#endif
|
||||
|
||||
staging_buffer_size = texture_program_data_size_in_dwords * sizeof(uint32_t);
|
||||
|
||||
staging_buffer = vk_alloc(&device->vk.alloc,
|
||||
staging_buffer_size,
|
||||
8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
||||
if (!staging_buffer)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
pvr_pds_generate_pixel_shader_sa_texture_state_data(&texture_program,
|
||||
staging_buffer,
|
||||
dev_info);
|
||||
|
||||
/* FIXME: Figure out the define for alignment of 16. */
|
||||
result = pvr_gpu_upload_pds(device,
|
||||
&staging_buffer[0],
|
||||
texture_program_data_size_in_dwords,
|
||||
16,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
16,
|
||||
pds_upload_out);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&device->vk.alloc, staging_buffer);
|
||||
return result;
|
||||
}
|
||||
|
||||
vk_free(&device->vk.alloc, staging_buffer);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
pvr_spm_init_bgobj_state(struct pvr_device *device,
|
||||
struct pvr_spm_bgobj_state *spm_bgobj_state,
|
||||
const struct pvr_framebuffer *framebuffer,
|
||||
const struct pvr_renderpass_hwsetup_render *hw_render,
|
||||
uint32_t emit_count)
|
||||
{
|
||||
const uint32_t spm_load_program_idx =
|
||||
pvr_get_spm_load_program_index(hw_render->sample_count,
|
||||
hw_render->tile_buffers_count,
|
||||
hw_render->output_regs_count);
|
||||
const VkExtent2D framebuffer_size = {
|
||||
.width = framebuffer->width,
|
||||
.height = framebuffer->height,
|
||||
};
|
||||
pvr_dev_addr_t next_scratch_buffer_addr =
|
||||
framebuffer->scratch_buffer->bo->vma->dev_addr;
|
||||
struct pvr_spm_per_load_program_state *load_program_state;
|
||||
struct pvr_pds_upload pds_texture_data_upload;
|
||||
const struct pvr_shader_factory_info *info;
|
||||
union pvr_sampler_descriptor *descriptor;
|
||||
uint64_t consts_buffer_size;
|
||||
uint32_t dword_count;
|
||||
uint32_t *mem_ptr;
|
||||
VkResult result;
|
||||
|
||||
assert(spm_load_program_idx < ARRAY_SIZE(spm_load_collection));
|
||||
info = spm_load_collection[spm_load_program_idx].info;
|
||||
|
||||
consts_buffer_size = info->const_shared_regs * sizeof(uint32_t);
|
||||
|
||||
result = pvr_bo_alloc(device,
|
||||
device->heaps.general_heap,
|
||||
consts_buffer_size,
|
||||
sizeof(uint32_t),
|
||||
PVR_BO_ALLOC_FLAG_CPU_MAPPED,
|
||||
&spm_bgobj_state->consts_buffer);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
mem_ptr = spm_bgobj_state->consts_buffer->bo->map;
|
||||
|
||||
if (info->driver_const_location_map) {
|
||||
const uint32_t *const const_map = info->driver_const_location_map;
|
||||
|
||||
for (uint32_t i = 0; i < PVR_SPM_LOAD_CONST_COUNT; i += 2) {
|
||||
pvr_dev_addr_t tile_buffer_addr;
|
||||
|
||||
if (const_map[i] == PVR_SPM_LOAD_DEST_UNUSED) {
|
||||
#if defined(DEBUG)
|
||||
for (uint32_t j = i; j < PVR_SPM_LOAD_CONST_COUNT; j++)
|
||||
assert(const_map[j] == PVR_SPM_LOAD_DEST_UNUSED);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
tile_buffer_addr =
|
||||
device->tile_buffer_state.buffers[i / 2]->vma->dev_addr;
|
||||
|
||||
assert(const_map[i] == const_map[i + 1] + 1);
|
||||
mem_ptr[const_map[i]] = tile_buffer_addr.addr >> 32;
|
||||
mem_ptr[const_map[i + 1]] = (uint32_t)tile_buffer_addr.addr;
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: The 32 comes from how the shaders are compiled. We should
|
||||
* unhardcode it when this is hooked up to the compiler.
|
||||
*/
|
||||
descriptor = (union pvr_sampler_descriptor *)(mem_ptr + 32);
|
||||
*descriptor = (union pvr_sampler_descriptor){ 0 };
|
||||
|
||||
pvr_csb_pack (&descriptor->data.sampler_word, TEXSTATE_SAMPLER, sampler) {
|
||||
sampler.non_normalized_coords = true;
|
||||
sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
|
||||
sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
|
||||
sampler.minfilter = PVRX(TEXSTATE_FILTER_POINT);
|
||||
sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
|
||||
sampler.maxlod = PVRX(TEXSTATE_CLAMP_MIN);
|
||||
sampler.minlod = PVRX(TEXSTATE_CLAMP_MIN);
|
||||
sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
|
||||
}
|
||||
|
||||
/* Even if we might have 8 output regs we can only pack and write 4 dwords
|
||||
* using R32G32B32A32_UINT.
|
||||
*/
|
||||
if (hw_render->tile_buffers_count > 0)
|
||||
dword_count = 4;
|
||||
else
|
||||
dword_count = MIN2(hw_render->output_regs_count, 4);
|
||||
|
||||
for (uint32_t i = 0; i < emit_count; i++) {
|
||||
uint64_t *mem_ptr_u64 = (uint64_t *)mem_ptr;
|
||||
uint64_t mem_used = 0;
|
||||
|
||||
STATIC_ASSERT(ROGUE_NUM_TEXSTATE_IMAGE_WORDS * sizeof(uint64_t) /
|
||||
sizeof(uint32_t) ==
|
||||
PVR_IMAGE_DESCRIPTOR_SIZE);
|
||||
mem_ptr_u64 += i * ROGUE_NUM_TEXSTATE_IMAGE_WORDS;
|
||||
|
||||
result = pvr_spm_setup_texture_state_words(device,
|
||||
dword_count,
|
||||
framebuffer_size,
|
||||
hw_render->sample_count,
|
||||
next_scratch_buffer_addr,
|
||||
mem_ptr_u64,
|
||||
&mem_used);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_consts_buffer;
|
||||
|
||||
PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_used);
|
||||
}
|
||||
|
||||
assert(spm_load_program_idx <
|
||||
ARRAY_SIZE(device->spm_load_state.load_program));
|
||||
load_program_state =
|
||||
&device->spm_load_state.load_program[spm_load_program_idx];
|
||||
|
||||
result = pvr_pds_bgnd_program_create_and_upload(
|
||||
device,
|
||||
load_program_state->pds_texture_program_data_size,
|
||||
spm_bgobj_state->consts_buffer,
|
||||
info->const_shared_regs,
|
||||
&pds_texture_data_upload);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_consts_buffer;
|
||||
|
||||
spm_bgobj_state->pds_texture_data_upload = pds_texture_data_upload.pvr_bo;
|
||||
|
||||
/* TODO: Is it worth to dedup this with pvr_pds_bgnd_pack_state() ? */
|
||||
|
||||
/* clang-format off */
|
||||
pvr_csb_pack (&spm_bgobj_state->pds_reg_values[0],
|
||||
CR_PDS_BGRND0_BASE,
|
||||
value) {
|
||||
/* clang-format on */
|
||||
value.shader_addr = load_program_state->pds_pixel_program_offset;
|
||||
value.texunicode_addr = load_program_state->pds_uniform_program_offset;
|
||||
}
|
||||
|
||||
/* clang-format off */
|
||||
pvr_csb_pack (&spm_bgobj_state->pds_reg_values[1],
|
||||
CR_PDS_BGRND1_BASE,
|
||||
value) {
|
||||
/* clang-format on */
|
||||
value.texturedata_addr =
|
||||
PVR_DEV_ADDR(pds_texture_data_upload.data_offset);
|
||||
}
|
||||
|
||||
/* clang-format off */
|
||||
pvr_csb_pack (&spm_bgobj_state->pds_reg_values[2],
|
||||
CR_PDS_BGRND3_SIZEINFO,
|
||||
value) {
|
||||
/* clang-format on */
|
||||
value.usc_sharedsize =
|
||||
DIV_ROUND_UP(info->const_shared_regs,
|
||||
PVRX(CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE));
|
||||
value.pds_texturestatesize = DIV_ROUND_UP(
|
||||
pds_texture_data_upload.data_size,
|
||||
PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE));
|
||||
value.pds_tempsize =
|
||||
DIV_ROUND_UP(load_program_state->pds_texture_program_temps_count,
|
||||
PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEMPSIZE_UNIT_SIZE));
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
err_free_consts_buffer:
|
||||
pvr_bo_free(device, spm_bgobj_state->consts_buffer);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void pvr_spm_finish_bgobj_state(struct pvr_device *device,
|
||||
struct pvr_spm_bgobj_state *spm_bgobj_state)
|
||||
{
|
||||
pvr_bo_free(device, spm_bgobj_state->pds_texture_data_upload);
|
||||
pvr_bo_free(device, spm_bgobj_state->consts_buffer);
|
||||
}
|
||||
|
||||
#undef PVR_DEV_ADDR_ADVANCE
|
||||
|
|
|
|||
|
|
@ -76,6 +76,17 @@ struct pvr_spm_eot_state {
|
|||
struct pvr_bo *pixel_event_program_data_upload;
|
||||
};
|
||||
|
||||
struct pvr_spm_bgobj_state {
|
||||
struct pvr_bo *consts_buffer;
|
||||
|
||||
/* TODO: Make this struct pvr_pds_upload? It would pull in pvr_private.h
|
||||
* though which causes a cycle since that includes pvr_spm.h .
|
||||
*/
|
||||
struct pvr_bo *pds_texture_data_upload;
|
||||
|
||||
uint64_t pds_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
|
||||
};
|
||||
|
||||
void pvr_spm_init_scratch_buffer_store(struct pvr_device *device);
|
||||
void pvr_spm_finish_scratch_buffer_store(struct pvr_device *device);
|
||||
|
||||
|
|
@ -106,8 +117,18 @@ VkResult
|
|||
pvr_spm_init_eot_state(struct pvr_device *device,
|
||||
struct pvr_spm_eot_state *spm_eot_state,
|
||||
const struct pvr_framebuffer *framebuffer,
|
||||
const struct pvr_renderpass_hwsetup_render *hw_render);
|
||||
const struct pvr_renderpass_hwsetup_render *hw_render,
|
||||
uint32_t *emit_count_out);
|
||||
void pvr_spm_finish_eot_state(struct pvr_device *device,
|
||||
struct pvr_spm_eot_state *spm_eot_state);
|
||||
|
||||
VkResult
|
||||
pvr_spm_init_bgobj_state(struct pvr_device *device,
|
||||
struct pvr_spm_bgobj_state *spm_bgobj_state,
|
||||
const struct pvr_framebuffer *framebuffer,
|
||||
const struct pvr_renderpass_hwsetup_render *hw_render,
|
||||
uint32_t emit_count);
|
||||
void pvr_spm_finish_bgobj_state(struct pvr_device *device,
|
||||
struct pvr_spm_bgobj_state *spm_bgobj_state);
|
||||
|
||||
#endif /* PVR_SPM_H */
|
||||
|
|
|
|||
|
|
@ -28,6 +28,8 @@
|
|||
#include <stdbool.h>
|
||||
|
||||
#include "util/bitpack_helpers.h"
|
||||
#include "util/bitscan.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
/* Occlusion query availability writes. */
|
||||
enum pvr_query_availability_write_pool_const {
|
||||
|
|
@ -145,10 +147,14 @@ enum pvr_spm_load_const {
|
|||
SPM_LOAD_CONST_TILE_BUFFER_7_UPPER,
|
||||
SPM_LOAD_CONST_TILE_BUFFER_7_LOWER,
|
||||
};
|
||||
#define PVR_SPM_LOAD_CONST_COUNT (SPM_LOAD_CONST_TILE_BUFFER_7_LOWER + 1)
|
||||
#define PVR_SPM_LOAD_DEST_UNUSED ~0
|
||||
|
||||
#define PVR_SPM_LOAD_SAMPLES_COUNT 4U
|
||||
|
||||
#define PVR_SPM_LOAD_IN_REGS_COUNT 3 /* 1, 2, 4 */
|
||||
#define PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT 7 /* 1, 2, 3, 4, 5, 6, 7 */
|
||||
|
||||
/* If output_regs == 8
|
||||
* reg_load_programs = 4 # 1, 2, 4, 8
|
||||
* tile_buffer_load_programs = 3 # 1, 2, 3
|
||||
|
|
@ -164,6 +170,33 @@ enum pvr_spm_load_const {
|
|||
/* FIXME: This is currently hard coded for the am62. The Chromebook has 8
|
||||
* output regs so the count is different.
|
||||
*/
|
||||
#define PVR_SPM_LOAD_PROGRAM_COUNT (PVR_SPM_LOAD_SAMPLES_COUNT * (3 + 7))
|
||||
#define PVR_SPM_LOAD_PROGRAM_COUNT \
|
||||
(PVR_SPM_LOAD_SAMPLES_COUNT * \
|
||||
(PVR_SPM_LOAD_IN_REGS_COUNT + PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT))
|
||||
|
||||
static inline uint32_t pvr_get_spm_load_program_index(uint32_t sample_count,
|
||||
uint32_t num_tile_buffers,
|
||||
uint32_t num_output_regs)
|
||||
{
|
||||
uint32_t idx;
|
||||
|
||||
assert(util_is_power_of_two_nonzero(sample_count));
|
||||
idx = util_logbase2(sample_count) *
|
||||
(PVR_SPM_LOAD_IN_REGS_COUNT + PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT);
|
||||
|
||||
assert((num_tile_buffers > 0) ^ (num_output_regs > 0));
|
||||
|
||||
if (num_output_regs > 0) {
|
||||
assert(util_is_power_of_two_nonzero(num_output_regs));
|
||||
assert(util_logbase2(num_output_regs) < PVR_SPM_LOAD_IN_REGS_COUNT);
|
||||
idx += util_logbase2(num_output_regs);
|
||||
} else {
|
||||
assert(num_tile_buffers <= PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT);
|
||||
idx += PVR_SPM_LOAD_IN_REGS_COUNT + num_tile_buffers - 1;
|
||||
}
|
||||
|
||||
assert(idx < PVR_SPM_LOAD_PROGRAM_COUNT);
|
||||
return idx;
|
||||
}
|
||||
|
||||
#endif /* PVR_SHADER_FACTORY_H */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue