pvr: Setup SPM background object

Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Reviewed-by: Frank Binns <frank.binns@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21575>
This commit is contained in:
Karmjit Mahil 2023-02-08 11:38:30 +00:00 committed by Marge Bot
parent d75a36a9ee
commit 1dfd535124
8 changed files with 443 additions and 23 deletions

View file

@ -1163,6 +1163,9 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
struct pvr_pds_upload pds_pixel_event_program;
uint32_t pbe_cs_words[PVR_MAX_COLOR_ATTACHMENTS]
[ROGUE_NUM_PBESTATE_STATE_WORDS] = { 0 };
struct pvr_framebuffer *framebuffer = render_pass_info->framebuffer;
struct pvr_spm_bgobj_state *spm_bgobj_state =
&framebuffer->spm_bgobj_state_per_render[sub_cmd->hw_render_idx];
struct pvr_render_target *render_target;
VkResult result;
@ -1194,7 +1197,7 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
}
pvr_setup_pbe_state(dev_info,
render_pass_info->framebuffer,
framebuffer,
surface->mrt_idx,
mrt_resource,
iview,
@ -1216,8 +1219,16 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
job->pds_pixel_event_data_offset = pds_pixel_event_program.data_offset;
/* FIXME: Don't do this if there is a barrier load. */
if (render_pass_info->enable_bg_tag) {
if (sub_cmd->barrier_load) {
job->enable_bg_tag = true;
job->process_empty_tiles = true;
STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) ==
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
typed_memcpy(job->pds_bgnd_reg_values,
spm_bgobj_state->pds_reg_values,
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
} else if (render_pass_info->enable_bg_tag) {
const struct pvr_load_op *load_op = hw_render->load_op;
struct pvr_pds_upload load_op_program;
@ -1230,16 +1241,22 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
if (result != VK_SUCCESS)
return result;
job->enable_bg_tag = render_pass_info->enable_bg_tag;
job->process_empty_tiles = render_pass_info->process_empty_tiles;
pvr_pds_bgnd_pack_state(load_op,
&load_op_program,
job->pds_bgnd_reg_values);
}
job->enable_bg_tag = render_pass_info->enable_bg_tag;
job->process_empty_tiles = render_pass_info->process_empty_tiles;
STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) ==
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
typed_memcpy(job->pds_pr_bgnd_reg_values,
spm_bgobj_state->pds_reg_values,
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
render_target = pvr_get_render_target(render_pass_info->pass,
render_pass_info->framebuffer,
framebuffer,
sub_cmd->hw_render_idx);
job->rt_dataset = render_target->rt_dataset;

View file

@ -2676,6 +2676,7 @@ VkResult pvr_CreateFramebuffer(VkDevice _device,
{
PVR_FROM_HANDLE(pvr_render_pass, pass, pCreateInfo->renderPass);
PVR_FROM_HANDLE(pvr_device, device, _device);
struct pvr_spm_bgobj_state *spm_bgobj_state_per_render;
struct pvr_spm_eot_state *spm_eot_state_per_render;
struct pvr_render_target *render_targets;
struct pvr_framebuffer *framebuffer;
@ -2703,6 +2704,10 @@ VkResult pvr_CreateFramebuffer(VkDevice _device,
&spm_eot_state_per_render,
__typeof__(*spm_eot_state_per_render),
pass->hw_setup->render_count);
vk_multialloc_add(&ma,
&spm_bgobj_state_per_render,
__typeof__(*spm_bgobj_state_per_render),
pass->hw_setup->render_count);
if (!vk_multialloc_zalloc2(&ma,
&device->vk.alloc,
@ -2749,20 +2754,42 @@ VkResult pvr_CreateFramebuffer(VkDevice _device,
goto err_finish_render_targets;
for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
uint32_t emit_count;
result = pvr_spm_init_eot_state(device,
&spm_eot_state_per_render[i],
framebuffer,
&pass->hw_setup->renders[i]);
if (result != VK_SUCCESS) {
for (uint32_t j = 0; j < i; j++)
pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[j]);
&pass->hw_setup->renders[i],
&emit_count);
if (result != VK_SUCCESS)
goto err_finish_eot_state;
goto err_finish_render_targets;
}
result = pvr_spm_init_bgobj_state(device,
&spm_bgobj_state_per_render[i],
framebuffer,
&pass->hw_setup->renders[i],
emit_count);
if (result != VK_SUCCESS)
goto err_finish_bgobj_state;
continue;
err_finish_bgobj_state:
pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[i]);
for (uint32_t j = 0; j < i; j++)
pvr_spm_finish_bgobj_state(device, &spm_bgobj_state_per_render[j]);
err_finish_eot_state:
for (uint32_t j = 0; j < i; j++)
pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[j]);
goto err_finish_render_targets;
}
framebuffer->render_count = pass->hw_setup->render_count;
framebuffer->spm_eot_state_per_render = spm_eot_state_per_render;
framebuffer->spm_eot_state_count = pass->hw_setup->render_count;
framebuffer->spm_bgobj_state_per_render = spm_bgobj_state_per_render;
*pFramebuffer = pvr_framebuffer_to_handle(framebuffer);
@ -2791,7 +2818,10 @@ void pvr_DestroyFramebuffer(VkDevice _device,
if (!framebuffer)
return;
for (uint32_t i = 0; i < framebuffer->spm_eot_state_count; i++) {
for (uint32_t i = 0; i < framebuffer->render_count; i++) {
pvr_spm_finish_bgobj_state(device,
&framebuffer->spm_bgobj_state_per_render[i]);
pvr_spm_finish_eot_state(device,
&framebuffer->spm_eot_state_per_render[i]);
}

View file

@ -1475,8 +1475,11 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
sizeof(job->pds_bgnd_reg_values));
stream_ptr += 3U * 2U;
/* Set pds_pr_bgnd array to 0 */
memset(stream_ptr, 0, 3U * sizeof(uint64_t));
STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) == 3U);
STATIC_ASSERT(sizeof(job->pds_pr_bgnd_reg_values[0]) == sizeof(uint64_t));
memcpy(stream_ptr,
job->pds_pr_bgnd_reg_values,
sizeof(job->pds_pr_bgnd_reg_values));
stream_ptr += 3U * 2U;
/* Set usc_clear_register array to 0 */

View file

@ -107,6 +107,7 @@ struct pvr_render_job {
[ROGUE_NUM_PBESTATE_REG_WORDS];
uint64_t pds_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
uint64_t pds_pr_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
};
VkResult pvr_free_list_create(struct pvr_device *device,

View file

@ -251,7 +251,7 @@ struct pvr_device {
struct pvr_bo *usc_programs;
struct pvr_bo *pds_programs;
struct {
struct pvr_spm_per_load_program_state {
pvr_dev_addr_t pds_pixel_program_offset;
pvr_dev_addr_t pds_uniform_program_offset;
@ -988,8 +988,9 @@ struct pvr_framebuffer {
struct pvr_spm_scratch_buffer *scratch_buffer;
uint32_t spm_eot_state_count;
uint32_t render_count;
struct pvr_spm_eot_state *spm_eot_state_per_render;
struct pvr_spm_bgobj_state *spm_bgobj_state_per_render;
};
struct pvr_render_pass_attachment {

View file

@ -32,6 +32,7 @@
#include "pvr_csb.h"
#include "pvr_csb_enum_helpers.h"
#include "pvr_device_info.h"
#include "pvr_formats.h"
#include "pvr_hw_pass.h"
#include "pvr_job_common.h"
#include "pvr_pds.h"
@ -39,6 +40,7 @@
#include "pvr_shader_factory.h"
#include "pvr_spm.h"
#include "pvr_static_shaders.h"
#include "pvr_tex_state.h"
#include "pvr_types.h"
#include "util/bitscan.h"
#include "util/macros.h"
@ -642,7 +644,8 @@ VkResult
pvr_spm_init_eot_state(struct pvr_device *device,
struct pvr_spm_eot_state *spm_eot_state,
const struct pvr_framebuffer *framebuffer,
const struct pvr_renderpass_hwsetup_render *hw_render)
const struct pvr_renderpass_hwsetup_render *hw_render,
uint32_t *emit_count_out)
{
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
struct pvr_pds_upload pds_eot_program;
@ -810,14 +813,325 @@ pvr_spm_init_eot_state(struct pvr_device *device,
spm_eot_state->pixel_event_program_data_upload = pds_eot_program.pvr_bo;
spm_eot_state->pixel_event_program_data_offset = pds_eot_program.data_offset;
*emit_count_out = mrt_setup.num_render_targets;
return VK_SUCCESS;
}
#undef PVR_DEV_ADDR_ADVANCE
void pvr_spm_finish_eot_state(struct pvr_device *device,
struct pvr_spm_eot_state *spm_eot_state)
{
pvr_bo_free(device, spm_eot_state->pixel_event_program_data_upload);
pvr_bo_free(device, spm_eot_state->usc_eot_program);
}
static VkFormat pvr_get_format_from_dword_count(uint32_t dword_count)
{
switch (dword_count) {
case 1:
return VK_FORMAT_R32_UINT;
case 2:
return VK_FORMAT_R32G32_UINT;
case 4:
return VK_FORMAT_R32G32B32A32_UINT;
default:
unreachable("Invalid dword_count");
}
}
static VkResult pvr_spm_setup_texture_state_words(
struct pvr_device *device,
uint32_t dword_count,
const VkExtent2D framebuffer_size,
uint32_t sample_count,
pvr_dev_addr_t scratch_buffer_addr,
uint64_t image_descriptor[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS],
uint64_t *mem_used_out)
{
/* We can ignore the framebuffer's layer count since we only support
* writing to layer 0.
*/
struct pvr_texture_state_info info = {
.format = pvr_get_format_from_dword_count(dword_count),
.mem_layout = PVR_MEMLAYOUT_LINEAR,
.type = VK_IMAGE_VIEW_TYPE_2D,
.tex_state_type = PVR_TEXTURE_STATE_STORAGE,
.extent = {
.width = framebuffer_size.width,
.height = framebuffer_size.height,
},
.mip_levels = 1,
.sample_count = sample_count,
.stride = framebuffer_size.width,
.addr = scratch_buffer_addr,
};
const uint64_t aligned_fb_width =
ALIGN_POT(framebuffer_size.width,
PVRX(CR_PBE_WORD0_MRT0_LINESTRIDE_ALIGNMENT));
const uint64_t fb_area = aligned_fb_width * framebuffer_size.height;
const uint8_t *format_swizzle;
VkResult result;
format_swizzle = pvr_get_format_swizzle(info.format);
memcpy(info.swizzle, format_swizzle, sizeof(info.swizzle));
result = pvr_pack_tex_state(device, &info, image_descriptor);
if (result != VK_SUCCESS)
return result;
*mem_used_out = fb_area * dword_count * sizeof(uint32_t) * sample_count;
return VK_SUCCESS;
}
/* FIXME: Can we dedup this with pvr_load_op_pds_data_create_and_upload() ? */
static VkResult pvr_pds_bgnd_program_create_and_upload(
struct pvr_device *device,
uint32_t texture_program_data_size_in_dwords,
const struct pvr_bo *consts_buffer,
uint32_t const_shared_regs,
struct pvr_pds_upload *pds_upload_out)
{
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
struct pvr_pds_pixel_shader_sa_program texture_program = { 0 };
uint32_t staging_buffer_size;
uint32_t *staging_buffer;
VkResult result;
pvr_csb_pack (&texture_program.texture_dma_address[0],
PDSINST_DOUT_FIELDS_DOUTD_SRC0,
doutd_src0) {
doutd_src0.sbase = consts_buffer->vma->dev_addr;
}
pvr_csb_pack (&texture_program.texture_dma_control[0],
PDSINST_DOUT_FIELDS_DOUTD_SRC1,
doutd_src1) {
doutd_src1.dest = PVRX(PDSINST_DOUTD_DEST_COMMON_STORE);
doutd_src1.bsize = const_shared_regs;
}
texture_program.num_texture_dma_kicks += 1;
#if defined(DEBUG)
pvr_pds_set_sizes_pixel_shader_sa_texture_data(&texture_program, dev_info);
assert(texture_program_data_size_in_dwords == texture_program.data_size);
#endif
staging_buffer_size = texture_program_data_size_in_dwords * sizeof(uint32_t);
staging_buffer = vk_alloc(&device->vk.alloc,
staging_buffer_size,
8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!staging_buffer)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
pvr_pds_generate_pixel_shader_sa_texture_state_data(&texture_program,
staging_buffer,
dev_info);
/* FIXME: Figure out the define for alignment of 16. */
result = pvr_gpu_upload_pds(device,
&staging_buffer[0],
texture_program_data_size_in_dwords,
16,
NULL,
0,
0,
16,
pds_upload_out);
if (result != VK_SUCCESS) {
vk_free(&device->vk.alloc, staging_buffer);
return result;
}
vk_free(&device->vk.alloc, staging_buffer);
return VK_SUCCESS;
}
VkResult
pvr_spm_init_bgobj_state(struct pvr_device *device,
struct pvr_spm_bgobj_state *spm_bgobj_state,
const struct pvr_framebuffer *framebuffer,
const struct pvr_renderpass_hwsetup_render *hw_render,
uint32_t emit_count)
{
const uint32_t spm_load_program_idx =
pvr_get_spm_load_program_index(hw_render->sample_count,
hw_render->tile_buffers_count,
hw_render->output_regs_count);
const VkExtent2D framebuffer_size = {
.width = framebuffer->width,
.height = framebuffer->height,
};
pvr_dev_addr_t next_scratch_buffer_addr =
framebuffer->scratch_buffer->bo->vma->dev_addr;
struct pvr_spm_per_load_program_state *load_program_state;
struct pvr_pds_upload pds_texture_data_upload;
const struct pvr_shader_factory_info *info;
union pvr_sampler_descriptor *descriptor;
uint64_t consts_buffer_size;
uint32_t dword_count;
uint32_t *mem_ptr;
VkResult result;
assert(spm_load_program_idx < ARRAY_SIZE(spm_load_collection));
info = spm_load_collection[spm_load_program_idx].info;
consts_buffer_size = info->const_shared_regs * sizeof(uint32_t);
result = pvr_bo_alloc(device,
device->heaps.general_heap,
consts_buffer_size,
sizeof(uint32_t),
PVR_BO_ALLOC_FLAG_CPU_MAPPED,
&spm_bgobj_state->consts_buffer);
if (result != VK_SUCCESS)
return result;
mem_ptr = spm_bgobj_state->consts_buffer->bo->map;
if (info->driver_const_location_map) {
const uint32_t *const const_map = info->driver_const_location_map;
for (uint32_t i = 0; i < PVR_SPM_LOAD_CONST_COUNT; i += 2) {
pvr_dev_addr_t tile_buffer_addr;
if (const_map[i] == PVR_SPM_LOAD_DEST_UNUSED) {
#if defined(DEBUG)
for (uint32_t j = i; j < PVR_SPM_LOAD_CONST_COUNT; j++)
assert(const_map[j] == PVR_SPM_LOAD_DEST_UNUSED);
#endif
break;
}
tile_buffer_addr =
device->tile_buffer_state.buffers[i / 2]->vma->dev_addr;
assert(const_map[i] == const_map[i + 1] + 1);
mem_ptr[const_map[i]] = tile_buffer_addr.addr >> 32;
mem_ptr[const_map[i + 1]] = (uint32_t)tile_buffer_addr.addr;
}
}
/* TODO: The 32 comes from how the shaders are compiled. We should
* unhardcode it when this is hooked up to the compiler.
*/
descriptor = (union pvr_sampler_descriptor *)(mem_ptr + 32);
*descriptor = (union pvr_sampler_descriptor){ 0 };
pvr_csb_pack (&descriptor->data.sampler_word, TEXSTATE_SAMPLER, sampler) {
sampler.non_normalized_coords = true;
sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
sampler.minfilter = PVRX(TEXSTATE_FILTER_POINT);
sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
sampler.maxlod = PVRX(TEXSTATE_CLAMP_MIN);
sampler.minlod = PVRX(TEXSTATE_CLAMP_MIN);
sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
}
/* Even if we might have 8 output regs we can only pack and write 4 dwords
* using R32G32B32A32_UINT.
*/
if (hw_render->tile_buffers_count > 0)
dword_count = 4;
else
dword_count = MIN2(hw_render->output_regs_count, 4);
for (uint32_t i = 0; i < emit_count; i++) {
uint64_t *mem_ptr_u64 = (uint64_t *)mem_ptr;
uint64_t mem_used = 0;
STATIC_ASSERT(ROGUE_NUM_TEXSTATE_IMAGE_WORDS * sizeof(uint64_t) /
sizeof(uint32_t) ==
PVR_IMAGE_DESCRIPTOR_SIZE);
mem_ptr_u64 += i * ROGUE_NUM_TEXSTATE_IMAGE_WORDS;
result = pvr_spm_setup_texture_state_words(device,
dword_count,
framebuffer_size,
hw_render->sample_count,
next_scratch_buffer_addr,
mem_ptr_u64,
&mem_used);
if (result != VK_SUCCESS)
goto err_free_consts_buffer;
PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_used);
}
assert(spm_load_program_idx <
ARRAY_SIZE(device->spm_load_state.load_program));
load_program_state =
&device->spm_load_state.load_program[spm_load_program_idx];
result = pvr_pds_bgnd_program_create_and_upload(
device,
load_program_state->pds_texture_program_data_size,
spm_bgobj_state->consts_buffer,
info->const_shared_regs,
&pds_texture_data_upload);
if (result != VK_SUCCESS)
goto err_free_consts_buffer;
spm_bgobj_state->pds_texture_data_upload = pds_texture_data_upload.pvr_bo;
/* TODO: Is it worth to dedup this with pvr_pds_bgnd_pack_state() ? */
/* clang-format off */
pvr_csb_pack (&spm_bgobj_state->pds_reg_values[0],
CR_PDS_BGRND0_BASE,
value) {
/* clang-format on */
value.shader_addr = load_program_state->pds_pixel_program_offset;
value.texunicode_addr = load_program_state->pds_uniform_program_offset;
}
/* clang-format off */
pvr_csb_pack (&spm_bgobj_state->pds_reg_values[1],
CR_PDS_BGRND1_BASE,
value) {
/* clang-format on */
value.texturedata_addr =
PVR_DEV_ADDR(pds_texture_data_upload.data_offset);
}
/* clang-format off */
pvr_csb_pack (&spm_bgobj_state->pds_reg_values[2],
CR_PDS_BGRND3_SIZEINFO,
value) {
/* clang-format on */
value.usc_sharedsize =
DIV_ROUND_UP(info->const_shared_regs,
PVRX(CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE));
value.pds_texturestatesize = DIV_ROUND_UP(
pds_texture_data_upload.data_size,
PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE));
value.pds_tempsize =
DIV_ROUND_UP(load_program_state->pds_texture_program_temps_count,
PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEMPSIZE_UNIT_SIZE));
}
return VK_SUCCESS;
err_free_consts_buffer:
pvr_bo_free(device, spm_bgobj_state->consts_buffer);
return result;
}
void pvr_spm_finish_bgobj_state(struct pvr_device *device,
struct pvr_spm_bgobj_state *spm_bgobj_state)
{
pvr_bo_free(device, spm_bgobj_state->pds_texture_data_upload);
pvr_bo_free(device, spm_bgobj_state->consts_buffer);
}
#undef PVR_DEV_ADDR_ADVANCE

View file

@ -76,6 +76,17 @@ struct pvr_spm_eot_state {
struct pvr_bo *pixel_event_program_data_upload;
};
struct pvr_spm_bgobj_state {
struct pvr_bo *consts_buffer;
/* TODO: Make this struct pvr_pds_upload? It would pull in pvr_private.h
* though which causes a cycle since that includes pvr_spm.h .
*/
struct pvr_bo *pds_texture_data_upload;
uint64_t pds_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
};
void pvr_spm_init_scratch_buffer_store(struct pvr_device *device);
void pvr_spm_finish_scratch_buffer_store(struct pvr_device *device);
@ -106,8 +117,18 @@ VkResult
pvr_spm_init_eot_state(struct pvr_device *device,
struct pvr_spm_eot_state *spm_eot_state,
const struct pvr_framebuffer *framebuffer,
const struct pvr_renderpass_hwsetup_render *hw_render);
const struct pvr_renderpass_hwsetup_render *hw_render,
uint32_t *emit_count_out);
void pvr_spm_finish_eot_state(struct pvr_device *device,
struct pvr_spm_eot_state *spm_eot_state);
VkResult
pvr_spm_init_bgobj_state(struct pvr_device *device,
struct pvr_spm_bgobj_state *spm_bgobj_state,
const struct pvr_framebuffer *framebuffer,
const struct pvr_renderpass_hwsetup_render *hw_render,
uint32_t emit_count);
void pvr_spm_finish_bgobj_state(struct pvr_device *device,
struct pvr_spm_bgobj_state *spm_bgobj_state);
#endif /* PVR_SPM_H */

View file

@ -28,6 +28,8 @@
#include <stdbool.h>
#include "util/bitpack_helpers.h"
#include "util/bitscan.h"
#include "util/u_math.h"
/* Occlusion query availability writes. */
enum pvr_query_availability_write_pool_const {
@ -145,10 +147,14 @@ enum pvr_spm_load_const {
SPM_LOAD_CONST_TILE_BUFFER_7_UPPER,
SPM_LOAD_CONST_TILE_BUFFER_7_LOWER,
};
#define PVR_SPM_LOAD_CONST_COUNT (SPM_LOAD_CONST_TILE_BUFFER_7_LOWER + 1)
#define PVR_SPM_LOAD_DEST_UNUSED ~0
#define PVR_SPM_LOAD_SAMPLES_COUNT 4U
#define PVR_SPM_LOAD_IN_REGS_COUNT 3 /* 1, 2, 4 */
#define PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT 7 /* 1, 2, 3, 4, 5, 6, 7 */
/* If output_regs == 8
* reg_load_programs = 4 # 1, 2, 4, 8
* tile_buffer_load_programs = 3 # 1, 2, 3
@ -164,6 +170,33 @@ enum pvr_spm_load_const {
/* FIXME: This is currently hard coded for the am62. The Chromebook has 8
* output regs so the count is different.
*/
#define PVR_SPM_LOAD_PROGRAM_COUNT (PVR_SPM_LOAD_SAMPLES_COUNT * (3 + 7))
#define PVR_SPM_LOAD_PROGRAM_COUNT \
(PVR_SPM_LOAD_SAMPLES_COUNT * \
(PVR_SPM_LOAD_IN_REGS_COUNT + PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT))
static inline uint32_t pvr_get_spm_load_program_index(uint32_t sample_count,
uint32_t num_tile_buffers,
uint32_t num_output_regs)
{
uint32_t idx;
assert(util_is_power_of_two_nonzero(sample_count));
idx = util_logbase2(sample_count) *
(PVR_SPM_LOAD_IN_REGS_COUNT + PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT);
assert((num_tile_buffers > 0) ^ (num_output_regs > 0));
if (num_output_regs > 0) {
assert(util_is_power_of_two_nonzero(num_output_regs));
assert(util_logbase2(num_output_regs) < PVR_SPM_LOAD_IN_REGS_COUNT);
idx += util_logbase2(num_output_regs);
} else {
assert(num_tile_buffers <= PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT);
idx += PVR_SPM_LOAD_IN_REGS_COUNT + num_tile_buffers - 1;
}
assert(idx < PVR_SPM_LOAD_PROGRAM_COUNT);
return idx;
}
#endif /* PVR_SHADER_FACTORY_H */