diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index a8fbbe046b0..8c2bee21006 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -1163,6 +1163,9 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, struct pvr_pds_upload pds_pixel_event_program; uint32_t pbe_cs_words[PVR_MAX_COLOR_ATTACHMENTS] [ROGUE_NUM_PBESTATE_STATE_WORDS] = { 0 }; + struct pvr_framebuffer *framebuffer = render_pass_info->framebuffer; + struct pvr_spm_bgobj_state *spm_bgobj_state = + &framebuffer->spm_bgobj_state_per_render[sub_cmd->hw_render_idx]; struct pvr_render_target *render_target; VkResult result; @@ -1194,7 +1197,7 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, } pvr_setup_pbe_state(dev_info, - render_pass_info->framebuffer, + framebuffer, surface->mrt_idx, mrt_resource, iview, @@ -1216,8 +1219,16 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, job->pds_pixel_event_data_offset = pds_pixel_event_program.data_offset; - /* FIXME: Don't do this if there is a barrier load. */ - if (render_pass_info->enable_bg_tag) { + if (sub_cmd->barrier_load) { + job->enable_bg_tag = true; + job->process_empty_tiles = true; + + STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) == + ARRAY_SIZE(spm_bgobj_state->pds_reg_values)); + typed_memcpy(job->pds_bgnd_reg_values, + spm_bgobj_state->pds_reg_values, + ARRAY_SIZE(spm_bgobj_state->pds_reg_values)); + } else if (render_pass_info->enable_bg_tag) { const struct pvr_load_op *load_op = hw_render->load_op; struct pvr_pds_upload load_op_program; @@ -1230,16 +1241,22 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, if (result != VK_SUCCESS) return result; + job->enable_bg_tag = render_pass_info->enable_bg_tag; + job->process_empty_tiles = render_pass_info->process_empty_tiles; + pvr_pds_bgnd_pack_state(load_op, &load_op_program, job->pds_bgnd_reg_values); } - job->enable_bg_tag = render_pass_info->enable_bg_tag; - job->process_empty_tiles = render_pass_info->process_empty_tiles; + STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) == + ARRAY_SIZE(spm_bgobj_state->pds_reg_values)); + typed_memcpy(job->pds_pr_bgnd_reg_values, + spm_bgobj_state->pds_reg_values, + ARRAY_SIZE(spm_bgobj_state->pds_reg_values)); render_target = pvr_get_render_target(render_pass_info->pass, - render_pass_info->framebuffer, + framebuffer, sub_cmd->hw_render_idx); job->rt_dataset = render_target->rt_dataset; diff --git a/src/imagination/vulkan/pvr_device.c b/src/imagination/vulkan/pvr_device.c index 0dffb7aeaa9..c15686e0a9d 100644 --- a/src/imagination/vulkan/pvr_device.c +++ b/src/imagination/vulkan/pvr_device.c @@ -2676,6 +2676,7 @@ VkResult pvr_CreateFramebuffer(VkDevice _device, { PVR_FROM_HANDLE(pvr_render_pass, pass, pCreateInfo->renderPass); PVR_FROM_HANDLE(pvr_device, device, _device); + struct pvr_spm_bgobj_state *spm_bgobj_state_per_render; struct pvr_spm_eot_state *spm_eot_state_per_render; struct pvr_render_target *render_targets; struct pvr_framebuffer *framebuffer; @@ -2703,6 +2704,10 @@ VkResult pvr_CreateFramebuffer(VkDevice _device, &spm_eot_state_per_render, __typeof__(*spm_eot_state_per_render), pass->hw_setup->render_count); + vk_multialloc_add(&ma, + &spm_bgobj_state_per_render, + __typeof__(*spm_bgobj_state_per_render), + pass->hw_setup->render_count); if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, @@ -2749,20 +2754,42 @@ VkResult pvr_CreateFramebuffer(VkDevice _device, goto err_finish_render_targets; for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) { + uint32_t emit_count; + result = pvr_spm_init_eot_state(device, &spm_eot_state_per_render[i], framebuffer, - &pass->hw_setup->renders[i]); - if (result != VK_SUCCESS) { - for (uint32_t j = 0; j < i; j++) - pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[j]); + &pass->hw_setup->renders[i], + &emit_count); + if (result != VK_SUCCESS) + goto err_finish_eot_state; - goto err_finish_render_targets; - } + result = pvr_spm_init_bgobj_state(device, + &spm_bgobj_state_per_render[i], + framebuffer, + &pass->hw_setup->renders[i], + emit_count); + if (result != VK_SUCCESS) + goto err_finish_bgobj_state; + + continue; + +err_finish_bgobj_state: + pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[i]); + + for (uint32_t j = 0; j < i; j++) + pvr_spm_finish_bgobj_state(device, &spm_bgobj_state_per_render[j]); + +err_finish_eot_state: + for (uint32_t j = 0; j < i; j++) + pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[j]); + + goto err_finish_render_targets; } + framebuffer->render_count = pass->hw_setup->render_count; framebuffer->spm_eot_state_per_render = spm_eot_state_per_render; - framebuffer->spm_eot_state_count = pass->hw_setup->render_count; + framebuffer->spm_bgobj_state_per_render = spm_bgobj_state_per_render; *pFramebuffer = pvr_framebuffer_to_handle(framebuffer); @@ -2791,7 +2818,10 @@ void pvr_DestroyFramebuffer(VkDevice _device, if (!framebuffer) return; - for (uint32_t i = 0; i < framebuffer->spm_eot_state_count; i++) { + for (uint32_t i = 0; i < framebuffer->render_count; i++) { + pvr_spm_finish_bgobj_state(device, + &framebuffer->spm_bgobj_state_per_render[i]); + pvr_spm_finish_eot_state(device, &framebuffer->spm_eot_state_per_render[i]); } diff --git a/src/imagination/vulkan/pvr_job_render.c b/src/imagination/vulkan/pvr_job_render.c index a5baa3858d5..b8bb905daaf 100644 --- a/src/imagination/vulkan/pvr_job_render.c +++ b/src/imagination/vulkan/pvr_job_render.c @@ -1475,8 +1475,11 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, sizeof(job->pds_bgnd_reg_values)); stream_ptr += 3U * 2U; - /* Set pds_pr_bgnd array to 0 */ - memset(stream_ptr, 0, 3U * sizeof(uint64_t)); + STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) == 3U); + STATIC_ASSERT(sizeof(job->pds_pr_bgnd_reg_values[0]) == sizeof(uint64_t)); + memcpy(stream_ptr, + job->pds_pr_bgnd_reg_values, + sizeof(job->pds_pr_bgnd_reg_values)); stream_ptr += 3U * 2U; /* Set usc_clear_register array to 0 */ diff --git a/src/imagination/vulkan/pvr_job_render.h b/src/imagination/vulkan/pvr_job_render.h index 406aa6dfe88..cfb8b0f9465 100644 --- a/src/imagination/vulkan/pvr_job_render.h +++ b/src/imagination/vulkan/pvr_job_render.h @@ -107,6 +107,7 @@ struct pvr_render_job { [ROGUE_NUM_PBESTATE_REG_WORDS]; uint64_t pds_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS]; + uint64_t pds_pr_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS]; }; VkResult pvr_free_list_create(struct pvr_device *device, diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index abff0008c46..ab966036915 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -251,7 +251,7 @@ struct pvr_device { struct pvr_bo *usc_programs; struct pvr_bo *pds_programs; - struct { + struct pvr_spm_per_load_program_state { pvr_dev_addr_t pds_pixel_program_offset; pvr_dev_addr_t pds_uniform_program_offset; @@ -988,8 +988,9 @@ struct pvr_framebuffer { struct pvr_spm_scratch_buffer *scratch_buffer; - uint32_t spm_eot_state_count; + uint32_t render_count; struct pvr_spm_eot_state *spm_eot_state_per_render; + struct pvr_spm_bgobj_state *spm_bgobj_state_per_render; }; struct pvr_render_pass_attachment { diff --git a/src/imagination/vulkan/pvr_spm.c b/src/imagination/vulkan/pvr_spm.c index b394baf7231..e07a6f5c8d8 100644 --- a/src/imagination/vulkan/pvr_spm.c +++ b/src/imagination/vulkan/pvr_spm.c @@ -32,6 +32,7 @@ #include "pvr_csb.h" #include "pvr_csb_enum_helpers.h" #include "pvr_device_info.h" +#include "pvr_formats.h" #include "pvr_hw_pass.h" #include "pvr_job_common.h" #include "pvr_pds.h" @@ -39,6 +40,7 @@ #include "pvr_shader_factory.h" #include "pvr_spm.h" #include "pvr_static_shaders.h" +#include "pvr_tex_state.h" #include "pvr_types.h" #include "util/bitscan.h" #include "util/macros.h" @@ -642,7 +644,8 @@ VkResult pvr_spm_init_eot_state(struct pvr_device *device, struct pvr_spm_eot_state *spm_eot_state, const struct pvr_framebuffer *framebuffer, - const struct pvr_renderpass_hwsetup_render *hw_render) + const struct pvr_renderpass_hwsetup_render *hw_render, + uint32_t *emit_count_out) { const struct pvr_device_info *dev_info = &device->pdevice->dev_info; struct pvr_pds_upload pds_eot_program; @@ -810,14 +813,325 @@ pvr_spm_init_eot_state(struct pvr_device *device, spm_eot_state->pixel_event_program_data_upload = pds_eot_program.pvr_bo; spm_eot_state->pixel_event_program_data_offset = pds_eot_program.data_offset; + *emit_count_out = mrt_setup.num_render_targets; + return VK_SUCCESS; } -#undef PVR_DEV_ADDR_ADVANCE - void pvr_spm_finish_eot_state(struct pvr_device *device, struct pvr_spm_eot_state *spm_eot_state) { pvr_bo_free(device, spm_eot_state->pixel_event_program_data_upload); pvr_bo_free(device, spm_eot_state->usc_eot_program); } + +static VkFormat pvr_get_format_from_dword_count(uint32_t dword_count) +{ + switch (dword_count) { + case 1: + return VK_FORMAT_R32_UINT; + case 2: + return VK_FORMAT_R32G32_UINT; + case 4: + return VK_FORMAT_R32G32B32A32_UINT; + default: + unreachable("Invalid dword_count"); + } +} + +static VkResult pvr_spm_setup_texture_state_words( + struct pvr_device *device, + uint32_t dword_count, + const VkExtent2D framebuffer_size, + uint32_t sample_count, + pvr_dev_addr_t scratch_buffer_addr, + uint64_t image_descriptor[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS], + uint64_t *mem_used_out) +{ + /* We can ignore the framebuffer's layer count since we only support + * writing to layer 0. + */ + struct pvr_texture_state_info info = { + .format = pvr_get_format_from_dword_count(dword_count), + .mem_layout = PVR_MEMLAYOUT_LINEAR, + + .type = VK_IMAGE_VIEW_TYPE_2D, + .tex_state_type = PVR_TEXTURE_STATE_STORAGE, + .extent = { + .width = framebuffer_size.width, + .height = framebuffer_size.height, + }, + + .mip_levels = 1, + + .sample_count = sample_count, + .stride = framebuffer_size.width, + + .addr = scratch_buffer_addr, + }; + const uint64_t aligned_fb_width = + ALIGN_POT(framebuffer_size.width, + PVRX(CR_PBE_WORD0_MRT0_LINESTRIDE_ALIGNMENT)); + const uint64_t fb_area = aligned_fb_width * framebuffer_size.height; + const uint8_t *format_swizzle; + VkResult result; + + format_swizzle = pvr_get_format_swizzle(info.format); + memcpy(info.swizzle, format_swizzle, sizeof(info.swizzle)); + + result = pvr_pack_tex_state(device, &info, image_descriptor); + if (result != VK_SUCCESS) + return result; + + *mem_used_out = fb_area * dword_count * sizeof(uint32_t) * sample_count; + + return VK_SUCCESS; +} + +/* FIXME: Can we dedup this with pvr_load_op_pds_data_create_and_upload() ? */ +static VkResult pvr_pds_bgnd_program_create_and_upload( + struct pvr_device *device, + uint32_t texture_program_data_size_in_dwords, + const struct pvr_bo *consts_buffer, + uint32_t const_shared_regs, + struct pvr_pds_upload *pds_upload_out) +{ + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + struct pvr_pds_pixel_shader_sa_program texture_program = { 0 }; + uint32_t staging_buffer_size; + uint32_t *staging_buffer; + VkResult result; + + pvr_csb_pack (&texture_program.texture_dma_address[0], + PDSINST_DOUT_FIELDS_DOUTD_SRC0, + doutd_src0) { + doutd_src0.sbase = consts_buffer->vma->dev_addr; + } + + pvr_csb_pack (&texture_program.texture_dma_control[0], + PDSINST_DOUT_FIELDS_DOUTD_SRC1, + doutd_src1) { + doutd_src1.dest = PVRX(PDSINST_DOUTD_DEST_COMMON_STORE); + doutd_src1.bsize = const_shared_regs; + } + + texture_program.num_texture_dma_kicks += 1; + +#if defined(DEBUG) + pvr_pds_set_sizes_pixel_shader_sa_texture_data(&texture_program, dev_info); + assert(texture_program_data_size_in_dwords == texture_program.data_size); +#endif + + staging_buffer_size = texture_program_data_size_in_dwords * sizeof(uint32_t); + + staging_buffer = vk_alloc(&device->vk.alloc, + staging_buffer_size, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!staging_buffer) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + pvr_pds_generate_pixel_shader_sa_texture_state_data(&texture_program, + staging_buffer, + dev_info); + + /* FIXME: Figure out the define for alignment of 16. */ + result = pvr_gpu_upload_pds(device, + &staging_buffer[0], + texture_program_data_size_in_dwords, + 16, + NULL, + 0, + 0, + 16, + pds_upload_out); + if (result != VK_SUCCESS) { + vk_free(&device->vk.alloc, staging_buffer); + return result; + } + + vk_free(&device->vk.alloc, staging_buffer); + + return VK_SUCCESS; +} + +VkResult +pvr_spm_init_bgobj_state(struct pvr_device *device, + struct pvr_spm_bgobj_state *spm_bgobj_state, + const struct pvr_framebuffer *framebuffer, + const struct pvr_renderpass_hwsetup_render *hw_render, + uint32_t emit_count) +{ + const uint32_t spm_load_program_idx = + pvr_get_spm_load_program_index(hw_render->sample_count, + hw_render->tile_buffers_count, + hw_render->output_regs_count); + const VkExtent2D framebuffer_size = { + .width = framebuffer->width, + .height = framebuffer->height, + }; + pvr_dev_addr_t next_scratch_buffer_addr = + framebuffer->scratch_buffer->bo->vma->dev_addr; + struct pvr_spm_per_load_program_state *load_program_state; + struct pvr_pds_upload pds_texture_data_upload; + const struct pvr_shader_factory_info *info; + union pvr_sampler_descriptor *descriptor; + uint64_t consts_buffer_size; + uint32_t dword_count; + uint32_t *mem_ptr; + VkResult result; + + assert(spm_load_program_idx < ARRAY_SIZE(spm_load_collection)); + info = spm_load_collection[spm_load_program_idx].info; + + consts_buffer_size = info->const_shared_regs * sizeof(uint32_t); + + result = pvr_bo_alloc(device, + device->heaps.general_heap, + consts_buffer_size, + sizeof(uint32_t), + PVR_BO_ALLOC_FLAG_CPU_MAPPED, + &spm_bgobj_state->consts_buffer); + if (result != VK_SUCCESS) + return result; + + mem_ptr = spm_bgobj_state->consts_buffer->bo->map; + + if (info->driver_const_location_map) { + const uint32_t *const const_map = info->driver_const_location_map; + + for (uint32_t i = 0; i < PVR_SPM_LOAD_CONST_COUNT; i += 2) { + pvr_dev_addr_t tile_buffer_addr; + + if (const_map[i] == PVR_SPM_LOAD_DEST_UNUSED) { +#if defined(DEBUG) + for (uint32_t j = i; j < PVR_SPM_LOAD_CONST_COUNT; j++) + assert(const_map[j] == PVR_SPM_LOAD_DEST_UNUSED); +#endif + break; + } + + tile_buffer_addr = + device->tile_buffer_state.buffers[i / 2]->vma->dev_addr; + + assert(const_map[i] == const_map[i + 1] + 1); + mem_ptr[const_map[i]] = tile_buffer_addr.addr >> 32; + mem_ptr[const_map[i + 1]] = (uint32_t)tile_buffer_addr.addr; + } + } + + /* TODO: The 32 comes from how the shaders are compiled. We should + * unhardcode it when this is hooked up to the compiler. + */ + descriptor = (union pvr_sampler_descriptor *)(mem_ptr + 32); + *descriptor = (union pvr_sampler_descriptor){ 0 }; + + pvr_csb_pack (&descriptor->data.sampler_word, TEXSTATE_SAMPLER, sampler) { + sampler.non_normalized_coords = true; + sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE); + sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE); + sampler.minfilter = PVRX(TEXSTATE_FILTER_POINT); + sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT); + sampler.maxlod = PVRX(TEXSTATE_CLAMP_MIN); + sampler.minlod = PVRX(TEXSTATE_CLAMP_MIN); + sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT); + } + + /* Even if we might have 8 output regs we can only pack and write 4 dwords + * using R32G32B32A32_UINT. + */ + if (hw_render->tile_buffers_count > 0) + dword_count = 4; + else + dword_count = MIN2(hw_render->output_regs_count, 4); + + for (uint32_t i = 0; i < emit_count; i++) { + uint64_t *mem_ptr_u64 = (uint64_t *)mem_ptr; + uint64_t mem_used = 0; + + STATIC_ASSERT(ROGUE_NUM_TEXSTATE_IMAGE_WORDS * sizeof(uint64_t) / + sizeof(uint32_t) == + PVR_IMAGE_DESCRIPTOR_SIZE); + mem_ptr_u64 += i * ROGUE_NUM_TEXSTATE_IMAGE_WORDS; + + result = pvr_spm_setup_texture_state_words(device, + dword_count, + framebuffer_size, + hw_render->sample_count, + next_scratch_buffer_addr, + mem_ptr_u64, + &mem_used); + if (result != VK_SUCCESS) + goto err_free_consts_buffer; + + PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_used); + } + + assert(spm_load_program_idx < + ARRAY_SIZE(device->spm_load_state.load_program)); + load_program_state = + &device->spm_load_state.load_program[spm_load_program_idx]; + + result = pvr_pds_bgnd_program_create_and_upload( + device, + load_program_state->pds_texture_program_data_size, + spm_bgobj_state->consts_buffer, + info->const_shared_regs, + &pds_texture_data_upload); + if (result != VK_SUCCESS) + goto err_free_consts_buffer; + + spm_bgobj_state->pds_texture_data_upload = pds_texture_data_upload.pvr_bo; + + /* TODO: Is it worth to dedup this with pvr_pds_bgnd_pack_state() ? */ + + /* clang-format off */ + pvr_csb_pack (&spm_bgobj_state->pds_reg_values[0], + CR_PDS_BGRND0_BASE, + value) { + /* clang-format on */ + value.shader_addr = load_program_state->pds_pixel_program_offset; + value.texunicode_addr = load_program_state->pds_uniform_program_offset; + } + + /* clang-format off */ + pvr_csb_pack (&spm_bgobj_state->pds_reg_values[1], + CR_PDS_BGRND1_BASE, + value) { + /* clang-format on */ + value.texturedata_addr = + PVR_DEV_ADDR(pds_texture_data_upload.data_offset); + } + + /* clang-format off */ + pvr_csb_pack (&spm_bgobj_state->pds_reg_values[2], + CR_PDS_BGRND3_SIZEINFO, + value) { + /* clang-format on */ + value.usc_sharedsize = + DIV_ROUND_UP(info->const_shared_regs, + PVRX(CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE)); + value.pds_texturestatesize = DIV_ROUND_UP( + pds_texture_data_upload.data_size, + PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE)); + value.pds_tempsize = + DIV_ROUND_UP(load_program_state->pds_texture_program_temps_count, + PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEMPSIZE_UNIT_SIZE)); + } + + return VK_SUCCESS; + +err_free_consts_buffer: + pvr_bo_free(device, spm_bgobj_state->consts_buffer); + + return result; +} + +void pvr_spm_finish_bgobj_state(struct pvr_device *device, + struct pvr_spm_bgobj_state *spm_bgobj_state) +{ + pvr_bo_free(device, spm_bgobj_state->pds_texture_data_upload); + pvr_bo_free(device, spm_bgobj_state->consts_buffer); +} + +#undef PVR_DEV_ADDR_ADVANCE diff --git a/src/imagination/vulkan/pvr_spm.h b/src/imagination/vulkan/pvr_spm.h index 22cd67d61e7..c5b0b566e28 100644 --- a/src/imagination/vulkan/pvr_spm.h +++ b/src/imagination/vulkan/pvr_spm.h @@ -76,6 +76,17 @@ struct pvr_spm_eot_state { struct pvr_bo *pixel_event_program_data_upload; }; +struct pvr_spm_bgobj_state { + struct pvr_bo *consts_buffer; + + /* TODO: Make this struct pvr_pds_upload? It would pull in pvr_private.h + * though which causes a cycle since that includes pvr_spm.h . + */ + struct pvr_bo *pds_texture_data_upload; + + uint64_t pds_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS]; +}; + void pvr_spm_init_scratch_buffer_store(struct pvr_device *device); void pvr_spm_finish_scratch_buffer_store(struct pvr_device *device); @@ -106,8 +117,18 @@ VkResult pvr_spm_init_eot_state(struct pvr_device *device, struct pvr_spm_eot_state *spm_eot_state, const struct pvr_framebuffer *framebuffer, - const struct pvr_renderpass_hwsetup_render *hw_render); + const struct pvr_renderpass_hwsetup_render *hw_render, + uint32_t *emit_count_out); void pvr_spm_finish_eot_state(struct pvr_device *device, struct pvr_spm_eot_state *spm_eot_state); +VkResult +pvr_spm_init_bgobj_state(struct pvr_device *device, + struct pvr_spm_bgobj_state *spm_bgobj_state, + const struct pvr_framebuffer *framebuffer, + const struct pvr_renderpass_hwsetup_render *hw_render, + uint32_t emit_count); +void pvr_spm_finish_bgobj_state(struct pvr_device *device, + struct pvr_spm_bgobj_state *spm_bgobj_state); + #endif /* PVR_SPM_H */ diff --git a/src/imagination/vulkan/usc/programs/pvr_shader_factory.h b/src/imagination/vulkan/usc/programs/pvr_shader_factory.h index dc8915df1f0..a81254b7be4 100644 --- a/src/imagination/vulkan/usc/programs/pvr_shader_factory.h +++ b/src/imagination/vulkan/usc/programs/pvr_shader_factory.h @@ -28,6 +28,8 @@ #include #include "util/bitpack_helpers.h" +#include "util/bitscan.h" +#include "util/u_math.h" /* Occlusion query availability writes. */ enum pvr_query_availability_write_pool_const { @@ -145,10 +147,14 @@ enum pvr_spm_load_const { SPM_LOAD_CONST_TILE_BUFFER_7_UPPER, SPM_LOAD_CONST_TILE_BUFFER_7_LOWER, }; +#define PVR_SPM_LOAD_CONST_COUNT (SPM_LOAD_CONST_TILE_BUFFER_7_LOWER + 1) #define PVR_SPM_LOAD_DEST_UNUSED ~0 #define PVR_SPM_LOAD_SAMPLES_COUNT 4U +#define PVR_SPM_LOAD_IN_REGS_COUNT 3 /* 1, 2, 4 */ +#define PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT 7 /* 1, 2, 3, 4, 5, 6, 7 */ + /* If output_regs == 8 * reg_load_programs = 4 # 1, 2, 4, 8 * tile_buffer_load_programs = 3 # 1, 2, 3 @@ -164,6 +170,33 @@ enum pvr_spm_load_const { /* FIXME: This is currently hard coded for the am62. The Chromebook has 8 * output regs so the count is different. */ -#define PVR_SPM_LOAD_PROGRAM_COUNT (PVR_SPM_LOAD_SAMPLES_COUNT * (3 + 7)) +#define PVR_SPM_LOAD_PROGRAM_COUNT \ + (PVR_SPM_LOAD_SAMPLES_COUNT * \ + (PVR_SPM_LOAD_IN_REGS_COUNT + PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT)) + +static inline uint32_t pvr_get_spm_load_program_index(uint32_t sample_count, + uint32_t num_tile_buffers, + uint32_t num_output_regs) +{ + uint32_t idx; + + assert(util_is_power_of_two_nonzero(sample_count)); + idx = util_logbase2(sample_count) * + (PVR_SPM_LOAD_IN_REGS_COUNT + PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT); + + assert((num_tile_buffers > 0) ^ (num_output_regs > 0)); + + if (num_output_regs > 0) { + assert(util_is_power_of_two_nonzero(num_output_regs)); + assert(util_logbase2(num_output_regs) < PVR_SPM_LOAD_IN_REGS_COUNT); + idx += util_logbase2(num_output_regs); + } else { + assert(num_tile_buffers <= PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT); + idx += PVR_SPM_LOAD_IN_REGS_COUNT + num_tile_buffers - 1; + } + + assert(idx < PVR_SPM_LOAD_PROGRAM_COUNT); + return idx; +} #endif /* PVR_SHADER_FACTORY_H */