From 825c4443bfdf4f493137ee00baacc8a4ca665acf Mon Sep 17 00:00:00 2001 From: Simon Perretta Date: Mon, 11 Aug 2025 18:58:55 +0100 Subject: [PATCH] pvr: switch to usc generated spm load shaders Signed-off-by: Simon Perretta Acked-by: Erik Faye-Lund Part-of: --- src/imagination/common/pvr_iface.h | 30 + src/imagination/vulkan/pvr_device.c | 8 +- src/imagination/vulkan/pvr_private.h | 3 +- src/imagination/vulkan/pvr_spm.c | 250 ++-- src/imagination/vulkan/pvr_spm.h | 8 +- src/imagination/vulkan/pvr_usc.c | 129 ++ src/imagination/vulkan/pvr_usc.h | 87 ++ .../vulkan/usc/programs/pvr_shader_factory.h | 106 -- .../vulkan/usc/programs/pvr_static_shaders.h | 1105 ----------------- 9 files changed, 387 insertions(+), 1339 deletions(-) delete mode 100644 src/imagination/vulkan/usc/programs/pvr_shader_factory.h delete mode 100644 src/imagination/vulkan/usc/programs/pvr_static_shaders.h diff --git a/src/imagination/common/pvr_iface.h b/src/imagination/common/pvr_iface.h index 5d29f178847..fcef653e147 100644 --- a/src/imagination/common/pvr_iface.h +++ b/src/imagination/common/pvr_iface.h @@ -71,4 +71,34 @@ enum pvr_clear_attach_data { _PVR_CLEAR_ATTACH_DATA_COUNT, }; +/** SPM load shader data; shared registers. */ +enum pvr_spm_load_data { + PVR_SPM_LOAD_DATA_SMP = 0, + + PVR_SPM_LOAD_DATA_REG_TEX = 4, + + PVR_SPM_LOAD_DATA_BUF_TEX_0 = 8, + PVR_SPM_LOAD_DATA_BUF_ADDR_0 = 12, + + PVR_SPM_LOAD_DATA_BUF_ADDR_1 = 14, + PVR_SPM_LOAD_DATA_BUF_TEX_1 = 16, + + PVR_SPM_LOAD_DATA_BUF_TEX_2 = 20, + PVR_SPM_LOAD_DATA_BUF_ADDR_2 = 24, + + PVR_SPM_LOAD_DATA_BUF_ADDR_3 = 26, + PVR_SPM_LOAD_DATA_BUF_TEX_3 = 28, + + PVR_SPM_LOAD_DATA_BUF_TEX_4 = 32, + PVR_SPM_LOAD_DATA_BUF_ADDR_4 = 36, + + PVR_SPM_LOAD_DATA_BUF_ADDR_5 = 38, + PVR_SPM_LOAD_DATA_BUF_TEX_5 = 40, + + PVR_SPM_LOAD_DATA_BUF_TEX_6 = 44, + PVR_SPM_LOAD_DATA_BUF_ADDR_6 = 48, + + _PVR_SPM_LOAD_DATA_COUNT = 50, +}; + #endif /* PVR_IFACE_H */ diff --git a/src/imagination/vulkan/pvr_device.c b/src/imagination/vulkan/pvr_device.c index 50dc2f56128..8b3e03f244b 100644 --- a/src/imagination/vulkan/pvr_device.c +++ b/src/imagination/vulkan/pvr_device.c @@ -3104,21 +3104,17 @@ VkResult pvr_CreateFramebuffer(VkDevice _device, goto err_finish_render_targets; for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) { - uint32_t emit_count; - result = pvr_spm_init_eot_state(device, &spm_eot_state_per_render[i], framebuffer, - &pass->hw_setup->renders[i], - &emit_count); + &pass->hw_setup->renders[i]); if (result != VK_SUCCESS) goto err_finish_eot_state; result = pvr_spm_init_bgobj_state(device, &spm_bgobj_state_per_render[i], framebuffer, - &pass->hw_setup->renders[i], - emit_count); + &pass->hw_setup->renders[i]); if (result != VK_SUCCESS) goto err_finish_bgobj_state; diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index 0fe79e12d9c..0a6f665b5bf 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -51,7 +51,6 @@ #include "pvr_limits.h" #include "pvr_pds.h" #include "pvr_usc.h" -#include "usc/programs/pvr_shader_factory.h" #include "pvr_spm.h" #include "pvr_types.h" #include "pvr_winsys.h" @@ -266,7 +265,7 @@ struct pvr_device { uint32_t pds_texture_program_data_size; uint32_t pds_texture_program_temps_count; - } load_program[PVR_SPM_LOAD_PROGRAM_COUNT]; + } load_program[PVR_NUM_SPM_LOAD_SHADERS]; } spm_load_state; struct pvr_device_tile_buffer_state { diff --git a/src/imagination/vulkan/pvr_spm.c b/src/imagination/vulkan/pvr_spm.c index d364eb6d9bc..45264fe5e09 100644 --- a/src/imagination/vulkan/pvr_spm.c +++ b/src/imagination/vulkan/pvr_spm.c @@ -37,9 +37,7 @@ #include "pvr_job_common.h" #include "pvr_pds.h" #include "pvr_private.h" -#include "usc/programs/pvr_shader_factory.h" #include "pvr_spm.h" -#include "usc/programs/pvr_static_shaders.h" #include "pvr_tex_state.h" #include "pvr_types.h" #include "pvr_usc.h" @@ -257,9 +255,10 @@ VkResult pvr_spm_scratch_buffer_get_buffer( VkResult pvr_device_init_spm_load_state(struct pvr_device *device) { const struct pvr_device_info *dev_info = &device->pdevice->dev_info; - uint32_t pds_texture_aligned_offsets[PVR_SPM_LOAD_PROGRAM_COUNT]; - uint32_t pds_kick_aligned_offsets[PVR_SPM_LOAD_PROGRAM_COUNT]; - uint32_t usc_aligned_offsets[PVR_SPM_LOAD_PROGRAM_COUNT]; + uint32_t pds_texture_aligned_offsets[PVR_NUM_SPM_LOAD_SHADERS]; + uint32_t pds_kick_aligned_offsets[PVR_NUM_SPM_LOAD_SHADERS]; + uint32_t usc_aligned_offsets[PVR_NUM_SPM_LOAD_SHADERS]; + pco_shader *shaders[PVR_NUM_SPM_LOAD_SHADERS]; uint32_t pds_allocation_size = 0; uint32_t usc_allocation_size = 0; struct pvr_suballoc_bo *pds_bo; @@ -267,9 +266,6 @@ VkResult pvr_device_init_spm_load_state(struct pvr_device *device) uint8_t *mem_ptr; VkResult result; - static_assert(PVR_SPM_LOAD_PROGRAM_COUNT == ARRAY_SIZE(spm_load_collection), - "Size mismatch"); - /* TODO: We don't need to upload all the programs since the set contains * programs for devices with 8 output regs as well. We can save some memory * by not uploading them on devices without the feature. @@ -277,11 +273,37 @@ VkResult pvr_device_init_spm_load_state(struct pvr_device *device) * cache and generate the shaders as needed so this todo will be unnecessary. */ - /* Upload USC shaders. */ + /* Build and upload USC shaders. */ - for (uint32_t i = 0; i < ARRAY_SIZE(spm_load_collection); i++) { - usc_aligned_offsets[i] = usc_allocation_size; - usc_allocation_size += ALIGN_POT(spm_load_collection[i].size, 4); + struct pvr_spm_load_props props; + + for (unsigned is_multisampled = 0; is_multisampled <= 1; ++is_multisampled) { + for (unsigned output_reg_count_log2 = 0; output_reg_count_log2 <= 2; + ++output_reg_count_log2) { + unsigned output_reg_count = 1 << output_reg_count_log2; + + props = (struct pvr_spm_load_props){ + .output_reg_count = output_reg_count, + .tile_buffer_count = 0, + .is_multisampled = is_multisampled, + }; + + unsigned u = pvr_uscgen_spm_load_index(&props); + shaders[u] = pvr_uscgen_spm_load(device->pdevice->pco_ctx, &props); + usc_allocation_size += pco_shader_binary_size(shaders[u]); + + if (output_reg_count != 4) + continue; + + for (unsigned tile_buffer_count = 1; tile_buffer_count <= 7; + ++tile_buffer_count) { + props.tile_buffer_count = tile_buffer_count; + + u = pvr_uscgen_spm_load_index(&props); + shaders[u] = pvr_uscgen_spm_load(device->pdevice->pco_ctx, &props); + usc_allocation_size += pco_shader_binary_size(shaders[u]); + } + } } result = pvr_bo_suballoc(&device->suballoc_usc, @@ -294,15 +316,19 @@ VkResult pvr_device_init_spm_load_state(struct pvr_device *device) mem_ptr = (uint8_t *)pvr_bo_suballoc_get_map_addr(usc_bo); - for (uint32_t i = 0; i < ARRAY_SIZE(spm_load_collection); i++) { - memcpy(mem_ptr + usc_aligned_offsets[i], - spm_load_collection[i].code, - spm_load_collection[i].size); + unsigned offset = 0; + for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u) { + unsigned shader_size = pco_shader_binary_size(shaders[u]); + + usc_aligned_offsets[u] = offset; + memcpy(&mem_ptr[offset], pco_shader_binary_data(shaders[u]), shader_size); + + offset += shader_size; } /* Upload PDS programs. */ - for (uint32_t i = 0; i < ARRAY_SIZE(spm_load_collection); i++) { + for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u) { struct pvr_pds_pixel_shader_sa_program pds_texture_program = { /* DMA for clear colors and tile buffer address parts. */ .num_texture_dma_kicks = 1, @@ -320,17 +346,17 @@ VkResult pvr_device_init_spm_load_state(struct pvr_device *device) * behavior the data size is always the same here. Should we try saving * some memory by adjusting things based on that? */ - device->spm_load_state.load_program[i].pds_texture_program_data_size = + device->spm_load_state.load_program[u].pds_texture_program_data_size = pds_texture_program.data_size; - pds_texture_aligned_offsets[i] = pds_allocation_size; + pds_texture_aligned_offsets[u] = pds_allocation_size; /* FIXME: Figure out the define for alignment of 16. */ pds_allocation_size += ALIGN_POT(PVR_DW_TO_BYTES(pds_texture_program.code_size), 16); pvr_pds_set_sizes_pixel_shader(&pds_kick_program); - pds_kick_aligned_offsets[i] = pds_allocation_size; + pds_kick_aligned_offsets[u] = pds_allocation_size; /* FIXME: Figure out the define for alignment of 16. */ pds_allocation_size += ALIGN_POT(PVR_DW_TO_BYTES(pds_kick_program.code_size + @@ -351,52 +377,53 @@ VkResult pvr_device_init_spm_load_state(struct pvr_device *device) mem_ptr = (uint8_t *)pvr_bo_suballoc_get_map_addr(pds_bo); - for (uint32_t i = 0; i < ARRAY_SIZE(spm_load_collection); i++) { + for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u) { struct pvr_pds_pixel_shader_sa_program pds_texture_program = { /* DMA for clear colors and tile buffer address parts. */ .num_texture_dma_kicks = 1, }; const pvr_dev_addr_t usc_program_dev_addr = - PVR_DEV_ADDR_OFFSET(usc_bo->dev_addr, usc_aligned_offsets[i]); + PVR_DEV_ADDR_OFFSET(usc_bo->dev_addr, usc_aligned_offsets[u]); struct pvr_pds_kickusc_program pds_kick_program = { 0 }; - enum ROGUE_PDSINST_DOUTU_SAMPLE_RATE sample_rate; + + pco_data *shader_data = pco_shader_data(shaders[u]); pvr_pds_generate_pixel_shader_sa_code_segment( &pds_texture_program, - (uint32_t *)(mem_ptr + pds_texture_aligned_offsets[i])); - - if (spm_load_collection[i].info->msaa_sample_count > 1) - sample_rate = ROGUE_PDSINST_DOUTU_SAMPLE_RATE_FULL; - else - sample_rate = ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE; + (uint32_t *)(mem_ptr + pds_texture_aligned_offsets[u])); pvr_pds_setup_doutu(&pds_kick_program.usc_task_control, usc_program_dev_addr.addr, - spm_load_collection[i].info->temps_required, - sample_rate, + shader_data->common.temps, + shader_data->fs.uses.sample_shading + ? ROGUE_PDSINST_DOUTU_SAMPLE_RATE_FULL + : ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE, false); /* Generated both code and data. */ pvr_pds_generate_pixel_shader_program( &pds_kick_program, - (uint32_t *)(mem_ptr + pds_kick_aligned_offsets[i])); + (uint32_t *)(mem_ptr + pds_kick_aligned_offsets[u])); - device->spm_load_state.load_program[i].pds_pixel_program_offset = - PVR_DEV_ADDR_OFFSET(pds_bo->dev_addr, pds_kick_aligned_offsets[i]); - device->spm_load_state.load_program[i].pds_uniform_program_offset = - PVR_DEV_ADDR_OFFSET(pds_bo->dev_addr, pds_texture_aligned_offsets[i]); + device->spm_load_state.load_program[u].pds_pixel_program_offset = + PVR_DEV_ADDR_OFFSET(pds_bo->dev_addr, pds_kick_aligned_offsets[u]); + device->spm_load_state.load_program[u].pds_uniform_program_offset = + PVR_DEV_ADDR_OFFSET(pds_bo->dev_addr, pds_texture_aligned_offsets[u]); /* TODO: From looking at the pvr_pds_generate_...() functions, it seems * like temps_used is always 1. Should we remove this and hard code it * with a define in the PDS code? */ - device->spm_load_state.load_program[i].pds_texture_program_temps_count = + device->spm_load_state.load_program[u].pds_texture_program_temps_count = pds_texture_program.temps_used; } device->spm_load_state.usc_programs = usc_bo; device->spm_load_state.pds_programs = pds_bo; + for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u) + ralloc_free(shaders[u]); + return VK_SUCCESS; } @@ -482,9 +509,6 @@ static inline void pvr_set_pbe_all_valid_mask(struct usc_mrt_desc *desc) desc->valid_mask[i] = ~0; } -#define PVR_DEV_ADDR_ADVANCE(_addr, _offset) \ - _addr = PVR_DEV_ADDR_OFFSET(_addr, _offset) - /** * \brief Sets up PBE registers, PBE state values and MRT data per a single * render output requiring 8 dwords to be written. @@ -534,7 +558,7 @@ static uint64_t pvr_spm_setup_pbe_eight_dword_write( pbe_state_word_0_out, pbe_reg_word_0_out); - PVR_DEV_ADDR_ADVANCE(scratch_buffer_addr, mem_stored); + scratch_buffer_addr = PVR_DEV_ADDR_OFFSET(scratch_buffer_addr, mem_stored); render_target_used++; @@ -547,7 +571,7 @@ static uint64_t pvr_spm_setup_pbe_eight_dword_write( pbe_state_word_1_out, pbe_reg_word_1_out); - PVR_DEV_ADDR_ADVANCE(scratch_buffer_addr, mem_stored); + scratch_buffer_addr = PVR_DEV_ADDR_OFFSET(scratch_buffer_addr, mem_stored); render_target_used++; *render_target_used_out = render_target_used; @@ -615,8 +639,7 @@ VkResult pvr_spm_init_eot_state(struct pvr_device *device, struct pvr_spm_eot_state *spm_eot_state, const struct pvr_framebuffer *framebuffer, - const struct pvr_renderpass_hwsetup_render *hw_render, - uint32_t *emit_count_out) + const struct pvr_renderpass_hwsetup_render *hw_render) { const VkExtent2D framebuffer_size = { .width = framebuffer->width, @@ -659,7 +682,8 @@ pvr_spm_init_eot_state(struct pvr_device *device, spm_eot_state->pbe_reg_words[total_render_target_used + 1], &render_targets_used); - PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_stored); + next_scratch_buffer_addr = + PVR_DEV_ADDR_OFFSET(next_scratch_buffer_addr, mem_stored); total_render_target_used += render_targets_used; /* Store off-chip tile data (i.e. tile buffers). */ @@ -684,7 +708,8 @@ pvr_spm_init_eot_state(struct pvr_device *device, spm_eot_state->pbe_reg_words[total_render_target_used + 1], &render_targets_used); - PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_stored); + next_scratch_buffer_addr = + PVR_DEV_ADDR_OFFSET(next_scratch_buffer_addr, mem_stored); total_render_target_used += render_targets_used; } } else { @@ -700,7 +725,8 @@ pvr_spm_init_eot_state(struct pvr_device *device, pbe_state_words[total_render_target_used], spm_eot_state->pbe_reg_words[total_render_target_used]); - PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_stored); + next_scratch_buffer_addr = + PVR_DEV_ADDR_OFFSET(next_scratch_buffer_addr, mem_stored); total_render_target_used++; @@ -723,7 +749,8 @@ pvr_spm_init_eot_state(struct pvr_device *device, pbe_state_words[total_render_target_used], spm_eot_state->pbe_reg_words[total_render_target_used]); - PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_stored); + next_scratch_buffer_addr = + PVR_DEV_ADDR_OFFSET(next_scratch_buffer_addr, mem_stored); total_render_target_used++; } @@ -763,8 +790,6 @@ pvr_spm_init_eot_state(struct pvr_device *device, spm_eot_state->pixel_event_program_data_upload = pds_eot_program.pvr_bo; spm_eot_state->pixel_event_program_data_offset = pds_eot_program.data_offset; - *emit_count_out = total_render_target_used; - return VK_SUCCESS; } @@ -795,7 +820,7 @@ pvr_spm_setup_texture_state_words(struct pvr_device *device, const VkExtent2D framebuffer_size, uint32_t sample_count, pvr_dev_addr_t scratch_buffer_addr, - uint64_t *image_state_ptr, + void *image_state_ptr, uint64_t *mem_used_out) { const uint64_t aligned_fb_width = @@ -835,7 +860,9 @@ pvr_spm_setup_texture_state_words(struct pvr_device *device, if (result != VK_SUCCESS) return result; - memcpy(image_state_ptr, &image_descriptor, sizeof(image_descriptor)); + memcpy(image_state_ptr, + image_descriptor.words, + sizeof(image_descriptor.words)); *mem_used_out = fb_area * PVR_DW_TO_BYTES(dword_count) * sample_count; @@ -913,13 +940,8 @@ VkResult pvr_spm_init_bgobj_state(struct pvr_device *device, struct pvr_spm_bgobj_state *spm_bgobj_state, const struct pvr_framebuffer *framebuffer, - const struct pvr_renderpass_hwsetup_render *hw_render, - uint32_t emit_count) + const struct pvr_renderpass_hwsetup_render *hw_render) { - const uint32_t spm_load_program_idx = - pvr_get_spm_load_program_index(hw_render->sample_count, - hw_render->tile_buffers_count, - hw_render->output_regs_count); const VkExtent2D framebuffer_size = { .width = framebuffer->width, .height = framebuffer->height, @@ -928,17 +950,29 @@ pvr_spm_init_bgobj_state(struct pvr_device *device, framebuffer->scratch_buffer->bo->vma->dev_addr; struct pvr_spm_per_load_program_state *load_program_state; struct pvr_pds_upload pds_texture_data_upload; - const struct pvr_shader_factory_info *info; struct pvr_sampler_descriptor *descriptor; uint64_t consts_buffer_size; uint32_t dword_count; uint32_t *mem_ptr; VkResult result; - assert(spm_load_program_idx < ARRAY_SIZE(spm_load_collection)); - info = spm_load_collection[spm_load_program_idx].info; + /* Even if we might have 8 output regs we can only pack and write 4 dwords + * using R32G32B32A32_UINT. + */ + if (hw_render->tile_buffers_count > 0) + dword_count = 4; + else + dword_count = MIN2(hw_render->output_regs_count, 4); - consts_buffer_size = PVR_DW_TO_BYTES(info->const_shared_regs); + struct pvr_spm_load_props props = { + .output_reg_count = dword_count, + .tile_buffer_count = hw_render->tile_buffers_count, + .is_multisampled = hw_render->sample_count > 1, + }; + + const uint32_t spm_load_program_idx = pvr_uscgen_spm_load_index(&props); + + consts_buffer_size = PVR_DW_TO_BYTES(pvr_uscgen_spm_load_data_size(&props)); result = pvr_bo_alloc(device, device->heaps.general_heap, @@ -951,35 +985,17 @@ pvr_spm_init_bgobj_state(struct pvr_device *device, mem_ptr = spm_bgobj_state->consts_buffer->bo->map; - if (info->driver_const_location_map) { - const uint32_t *const const_map = info->driver_const_location_map; + for (unsigned u = 0; u < hw_render->tile_buffers_count; ++u) { + unsigned tile_buffer_addr_location = pvr_uscgen_spm_buffer_data(u, true); + pvr_dev_addr_t tile_buffer_addr = + device->tile_buffer_state.buffers[u]->vma->dev_addr; - for (uint32_t i = 0; i < PVR_SPM_LOAD_CONST_COUNT; i += 2) { - pvr_dev_addr_t tile_buffer_addr; - - if (const_map[i] == PVR_SPM_LOAD_DEST_UNUSED) { -#if MESA_DEBUG - for (uint32_t j = i; j < PVR_SPM_LOAD_CONST_COUNT; j++) - assert(const_map[j] == PVR_SPM_LOAD_DEST_UNUSED); -#endif - break; - } - - tile_buffer_addr = - device->tile_buffer_state.buffers[i / 2]->vma->dev_addr; - - assert(const_map[i] == const_map[i + 1] + 1); - mem_ptr[const_map[i]] = tile_buffer_addr.addr >> 32; - mem_ptr[const_map[i + 1]] = (uint32_t)tile_buffer_addr.addr; - } + mem_ptr[tile_buffer_addr_location] = tile_buffer_addr.addr & 0xffffffff; + mem_ptr[tile_buffer_addr_location + 1] = tile_buffer_addr.addr >> 32; } - /* TODO: The 32 comes from how the shaders are compiled. We should - * unhardcode it when this is hooked up to the compiler. - */ - descriptor = (struct pvr_sampler_descriptor *)(mem_ptr + 32); - *descriptor = (struct pvr_sampler_descriptor){ 0 }; - + descriptor = + (struct pvr_sampler_descriptor *)&mem_ptr[PVR_SPM_LOAD_DATA_SMP]; pvr_csb_pack (&descriptor->words[0], TEXSTATE_SAMPLER_WORD0, sampler) { sampler.non_normalized_coords = true; sampler.addrmode_v = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE; @@ -993,38 +1009,42 @@ pvr_spm_init_bgobj_state(struct pvr_device *device, pvr_csb_pack (&descriptor->words[1], TEXSTATE_SAMPLER_WORD1, sampler) {} - /* Even if we might have 8 output regs we can only pack and write 4 dwords - * using R32G32B32A32_UINT. - */ - if (hw_render->tile_buffers_count > 0) - dword_count = 4; - else - dword_count = MIN2(hw_render->output_regs_count, 4); + uint64_t mem_used = 0; + /* Setup image descriptor for reg output. */ + result = + pvr_spm_setup_texture_state_words(device, + dword_count, + framebuffer_size, + hw_render->sample_count, + next_scratch_buffer_addr, + &mem_ptr[PVR_SPM_LOAD_DATA_REG_TEX], + &mem_used); + if (result != VK_SUCCESS) + goto err_free_consts_buffer; - for (uint32_t i = 0; i < emit_count; i++) { - uint64_t *mem_ptr_u64 = (uint64_t *)mem_ptr; - uint64_t mem_used = 0; + next_scratch_buffer_addr = + PVR_DEV_ADDR_OFFSET(next_scratch_buffer_addr, mem_used); - assert((sizeof(struct pvr_image_descriptor) / sizeof(uint64_t)) == - PVR_IMAGE_DESCRIPTOR_SIZE); - mem_ptr_u64 += - i * (sizeof(struct pvr_image_descriptor) / sizeof(uint64_t)); + /* Setup image descriptors for tile buffer outputs. */ + for (unsigned u = 0; u < hw_render->tile_buffers_count; ++u) { + unsigned tile_buffer_tex_state_location = + pvr_uscgen_spm_buffer_data(u, false); - result = pvr_spm_setup_texture_state_words(device, - dword_count, - framebuffer_size, - hw_render->sample_count, - next_scratch_buffer_addr, - mem_ptr_u64, - &mem_used); + result = pvr_spm_setup_texture_state_words( + device, + dword_count, + framebuffer_size, + hw_render->sample_count, + next_scratch_buffer_addr, + &mem_ptr[tile_buffer_tex_state_location], + &mem_used); if (result != VK_SUCCESS) goto err_free_consts_buffer; - PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_used); + next_scratch_buffer_addr = + PVR_DEV_ADDR_OFFSET(next_scratch_buffer_addr, mem_used); } - assert(spm_load_program_idx < - ARRAY_SIZE(device->spm_load_state.load_program)); load_program_state = &device->spm_load_state.load_program[spm_load_program_idx]; @@ -1032,7 +1052,7 @@ pvr_spm_init_bgobj_state(struct pvr_device *device, device, load_program_state->pds_texture_program_data_size, spm_bgobj_state->consts_buffer, - info->const_shared_regs, + consts_buffer_size, &pds_texture_data_upload); if (result != VK_SUCCESS) goto err_free_consts_buffer; @@ -1065,7 +1085,7 @@ pvr_spm_init_bgobj_state(struct pvr_device *device, value) { /* clang-format on */ value.usc_sharedsize = - DIV_ROUND_UP(info->const_shared_regs, + DIV_ROUND_UP(consts_buffer_size, ROGUE_CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE); value.pds_texturestatesize = DIV_ROUND_UP( pds_texture_data_upload.data_size, @@ -1089,5 +1109,3 @@ void pvr_spm_finish_bgobj_state(struct pvr_device *device, pvr_bo_suballoc_free(spm_bgobj_state->pds_texture_data_upload); pvr_bo_free(device, spm_bgobj_state->consts_buffer); } - -#undef PVR_DEV_ADDR_ADVANCE diff --git a/src/imagination/vulkan/pvr_spm.h b/src/imagination/vulkan/pvr_spm.h index 4390d25637d..e50c093edbd 100644 --- a/src/imagination/vulkan/pvr_spm.h +++ b/src/imagination/vulkan/pvr_spm.h @@ -115,8 +115,8 @@ VkResult pvr_spm_init_eot_state(struct pvr_device *device, struct pvr_spm_eot_state *spm_eot_state, const struct pvr_framebuffer *framebuffer, - const struct pvr_renderpass_hwsetup_render *hw_render, - uint32_t *emit_count_out); + const struct pvr_renderpass_hwsetup_render *hw_render); + void pvr_spm_finish_eot_state(struct pvr_device *device, struct pvr_spm_eot_state *spm_eot_state); @@ -124,8 +124,8 @@ VkResult pvr_spm_init_bgobj_state(struct pvr_device *device, struct pvr_spm_bgobj_state *spm_bgobj_state, const struct pvr_framebuffer *framebuffer, - const struct pvr_renderpass_hwsetup_render *hw_render, - uint32_t emit_count); + const struct pvr_renderpass_hwsetup_render *hw_render); + void pvr_spm_finish_bgobj_state(struct pvr_device *device, struct pvr_spm_bgobj_state *spm_bgobj_state); diff --git a/src/imagination/vulkan/pvr_usc.c b/src/imagination/vulkan/pvr_usc.c index 11d125379d2..acd5a8390e4 100644 --- a/src/imagination/vulkan/pvr_usc.c +++ b/src/imagination/vulkan/pvr_usc.c @@ -1319,3 +1319,132 @@ pvr_usc_zero_init_wg_mem(pco_ctx *ctx, unsigned start, unsigned count) return build_shader(ctx, b.shader, &data); } + +pco_shader *pvr_uscgen_spm_load(pco_ctx *ctx, struct pvr_spm_load_props *props) +{ + pco_data data = { 0 }; + + nir_builder b = nir_builder_init_simple_shader( + MESA_SHADER_FRAGMENT, + pco_nir_options(), + "spm_load(%u output regs, %u tile buffers, %s)", + props->output_reg_count, + props->tile_buffer_count, + props->is_multisampled ? "ms" : "non-ms"); + + b.shader->info.fs.uses_sample_shading = props->is_multisampled; + + nir_variable *pos = nir_get_variable_with_location(b.shader, + nir_var_shader_in, + VARYING_SLOT_POS, + glsl_vec4_type()); + pos->data.interpolation = INTERP_MODE_NOPERSPECTIVE; + + nir_def *coords = nir_channels(&b, nir_load_var(&b, pos), 0b11); + nir_def *ms_index = props->is_multisampled ? nir_load_sample_id(&b) : NULL; + + nir_def *smp_state = nir_load_preamble(&b, + ROGUE_NUM_TEXSTATE_DWORDS, + 32, + .base = PVR_SPM_LOAD_DATA_SMP); + + /* Initialize common params. */ + pco_smp_params params = { + .smp_state = smp_state, + .dest_type = nir_type_uint32, + .sampler_dim = GLSL_SAMPLER_DIM_2D, + .coords = coords, + .lod_replace = nir_imm_int(&b, 0), + .ms_index = ms_index, + }; + + nir_def *valid_mask = nir_load_savmsk_vm_pco(&b); + nir_intrinsic_instr *smp; + + /* Emit tile buffer sample + writes. */ + /* TODO: emit nir_store_outputs instead, needs backend to handle + * discontiguous tile buffer locations. + */ + for (unsigned buffer = 0; buffer < props->tile_buffer_count; ++buffer) { + unsigned tex_base = pvr_uscgen_spm_buffer_data(buffer, false); + params.tex_state = + nir_load_preamble(&b, ROGUE_NUM_TEXSTATE_DWORDS, 32, .base = tex_base); + params.sample_components = 4; + + smp = pco_emit_nir_smp(&b, ¶ms); + + unsigned tile_addr_base = pvr_uscgen_spm_buffer_data(buffer, true); + nir_def *tile_addr_lo = + nir_load_preamble(&b, 1, 32, .base = tile_addr_base); + nir_def *tile_addr_hi = + nir_load_preamble(&b, 1, 32, .base = tile_addr_base + 1); + + for (unsigned u = 0; u < params.sample_components; ++u) { + nir_def *tiled_offset = nir_load_tiled_offset_pco(&b, .component = u); + + nir_def *addr = + nir_uadd64_32(&b, tile_addr_lo, tile_addr_hi, tiled_offset); + + nir_def *data = nir_channel(&b, &smp->def, u); + + nir_def *addr_data = nir_vec3(&b, + nir_channel(&b, addr, 0), + nir_channel(&b, addr, 1), + data); + + nir_dma_st_tiled_pco(&b, addr_data, valid_mask); + } + } + + /* Emit output reg sample + write. */ + switch (props->output_reg_count) { + case 1: + data.fs.output_formats[FRAG_RESULT_DATA0] = PIPE_FORMAT_R32_UINT; + break; + + case 2: + data.fs.output_formats[FRAG_RESULT_DATA0] = PIPE_FORMAT_R32G32_UINT; + break; + + case 4: + data.fs.output_formats[FRAG_RESULT_DATA0] = PIPE_FORMAT_R32G32B32A32_UINT; + break; + + default: + UNREACHABLE(""); + } + + data.fs.outputs[FRAG_RESULT_DATA0] = (pco_range){ + .start = 0, + .count = props->output_reg_count, + }; + + nir_create_variable_with_location(b.shader, + nir_var_shader_out, + FRAG_RESULT_DATA0, + glsl_uvec_type(props->output_reg_count)); + + params.tex_state = nir_load_preamble(&b, + ROGUE_NUM_TEXSTATE_DWORDS, + 32, + .base = PVR_SPM_LOAD_DATA_REG_TEX); + params.sample_components = props->output_reg_count; + + smp = pco_emit_nir_smp(&b, ¶ms); + + for (unsigned u = 0; u < props->output_reg_count; ++u) { + nir_store_output(&b, + nir_channel(&b, &smp->def, u), + nir_imm_int(&b, 0), + .base = 0, + .component = u, + .src_type = nir_type_invalid | 32, + .write_mask = 1, + .io_semantics.location = FRAG_RESULT_DATA0, + .io_semantics.num_slots = 1); + } + + nir_jump(&b, nir_jump_return); + + return build_shader(ctx, b.shader, &data); +} diff --git a/src/imagination/vulkan/pvr_usc.h b/src/imagination/vulkan/pvr_usc.h index 00df73008b1..666d08a42a9 100644 --- a/src/imagination/vulkan/pvr_usc.h +++ b/src/imagination/vulkan/pvr_usc.h @@ -99,4 +99,91 @@ pvr_uscgen_clear_attach_index(struct pvr_clear_attach_props *props) pco_shader * pvr_usc_zero_init_wg_mem(pco_ctx *ctx, unsigned start, unsigned count); +/* SPM load shader generation. */ +struct pvr_spm_load_props { + unsigned output_reg_count; + unsigned tile_buffer_count; + bool is_multisampled; +}; + +static inline unsigned pvr_uscgen_spm_buffer_data(unsigned buffer_index, + bool addr) +{ + switch (buffer_index) { + case 0: + return addr ? PVR_SPM_LOAD_DATA_BUF_ADDR_0 : PVR_SPM_LOAD_DATA_BUF_TEX_0; + + case 1: + return addr ? PVR_SPM_LOAD_DATA_BUF_ADDR_1 : PVR_SPM_LOAD_DATA_BUF_TEX_1; + + case 2: + return addr ? PVR_SPM_LOAD_DATA_BUF_ADDR_2 : PVR_SPM_LOAD_DATA_BUF_TEX_2; + + case 3: + return addr ? PVR_SPM_LOAD_DATA_BUF_ADDR_3 : PVR_SPM_LOAD_DATA_BUF_TEX_3; + + case 4: + return addr ? PVR_SPM_LOAD_DATA_BUF_ADDR_4 : PVR_SPM_LOAD_DATA_BUF_TEX_4; + + case 5: + return addr ? PVR_SPM_LOAD_DATA_BUF_ADDR_5 : PVR_SPM_LOAD_DATA_BUF_TEX_5; + + case 6: + return addr ? PVR_SPM_LOAD_DATA_BUF_ADDR_6 : PVR_SPM_LOAD_DATA_BUF_TEX_6; + + default: + break; + } + + UNREACHABLE(""); +} + +static inline unsigned +pvr_uscgen_spm_load_data_size(struct pvr_spm_load_props *props) +{ + return PVR_SPM_LOAD_DATA_BUF_TEX_0 + + props->tile_buffer_count * (ROGUE_NUM_TEXSTATE_DWORDS + + (sizeof(uint64_t) / sizeof(uint32_t))); +} + +pco_shader *pvr_uscgen_spm_load(pco_ctx *ctx, struct pvr_spm_load_props *props); + +#define INDEX(o_r_c, t_b_c, i_m, i) \ + if (props->output_reg_count == o_r_c && \ + props->tile_buffer_count == t_b_c && props->is_multisampled == i_m) \ + return i + +inline static unsigned +pvr_uscgen_spm_load_index(struct pvr_spm_load_props *props) +{ + INDEX(1, 0, false, 0); + INDEX(2, 0, false, 1); + INDEX(4, 0, false, 2); + + INDEX(4, 1, false, 3); + INDEX(4, 2, false, 4); + INDEX(4, 3, false, 5); + INDEX(4, 4, false, 6); + INDEX(4, 5, false, 7); + INDEX(4, 6, false, 8); + INDEX(4, 7, false, 9); + + INDEX(1, 0, true, 10); + INDEX(2, 0, true, 11); + INDEX(4, 0, true, 12); + + INDEX(4, 1, true, 13); + INDEX(4, 2, true, 14); + INDEX(4, 3, true, 15); + INDEX(4, 4, true, 16); + INDEX(4, 5, true, 17); + INDEX(4, 6, true, 18); + INDEX(4, 7, true, 19); + + UNREACHABLE("Invalid SPM load shader properties."); +} +#undef INDEX + +#define PVR_NUM_SPM_LOAD_SHADERS 20U + #endif /* PVR_USC_H */ diff --git a/src/imagination/vulkan/usc/programs/pvr_shader_factory.h b/src/imagination/vulkan/usc/programs/pvr_shader_factory.h deleted file mode 100644 index 163c05f7fba..00000000000 --- a/src/imagination/vulkan/usc/programs/pvr_shader_factory.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright © 2022 Imagination Technologies Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef PVR_SHADER_FACTORY_H -#define PVR_SHADER_FACTORY_H - -#include -#include - -#include "util/bitpack_helpers.h" -#include "util/bitscan.h" -#include "util/u_math.h" - -enum pvr_spm_load_const { - SPM_LOAD_CONST_TILE_BUFFER_1_UPPER, - SPM_LOAD_CONST_TILE_BUFFER_1_LOWER, - SPM_LOAD_CONST_TILE_BUFFER_2_UPPER, - SPM_LOAD_CONST_TILE_BUFFER_2_LOWER, - SPM_LOAD_CONST_TILE_BUFFER_3_UPPER, - SPM_LOAD_CONST_TILE_BUFFER_3_LOWER, - /* The following are only available if the core does not have the - * has_eight_output_registers feature. I.e. only available if the device has - * 4 output regs. - */ - SPM_LOAD_CONST_TILE_BUFFER_4_UPPER, - SPM_LOAD_CONST_TILE_BUFFER_4_LOWER, - SPM_LOAD_CONST_TILE_BUFFER_5_UPPER, - SPM_LOAD_CONST_TILE_BUFFER_5_LOWER, - SPM_LOAD_CONST_TILE_BUFFER_6_UPPER, - SPM_LOAD_CONST_TILE_BUFFER_6_LOWER, - SPM_LOAD_CONST_TILE_BUFFER_7_UPPER, - SPM_LOAD_CONST_TILE_BUFFER_7_LOWER, -}; -#define PVR_SPM_LOAD_CONST_COUNT (SPM_LOAD_CONST_TILE_BUFFER_7_LOWER + 1) -#define PVR_SPM_LOAD_DEST_UNUSED ~0 - -#define PVR_SPM_LOAD_SAMPLES_COUNT 4U - -#define PVR_SPM_LOAD_IN_REGS_COUNT 3 /* 1, 2, 4 */ -#define PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT 7 /* 1, 2, 3, 4, 5, 6, 7 */ - -/* If output_regs == 8 - * reg_load_programs = 4 # 1, 2, 4, 8 - * tile_buffer_load_programs = 3 # 1, 2, 3 - * else #output_regs == 4 - * reg_load_programs = 3 # 1, 2, 4 - * tile_buffer_load_programs = 7 # 1, 2, 3, 4, 5, 6, 7 - * - * See PVR_SPM_LOAD_IN_BUFFERS_COUNT for where the amount of - * tile_buffer_load_programs comes from. - * - * Tot = sample_count * (reg_load_programs + tile_buffer_load_programs) - */ -/* FIXME: This is currently hard coded for the am62. The Chromebook has 8 - * output regs so the count is different. - */ -#define PVR_SPM_LOAD_PROGRAM_COUNT \ - (PVR_SPM_LOAD_SAMPLES_COUNT * \ - (PVR_SPM_LOAD_IN_REGS_COUNT + PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT)) - -static inline uint32_t pvr_get_spm_load_program_index(uint32_t sample_count, - uint32_t num_tile_buffers, - uint32_t num_output_regs) -{ - uint32_t idx; - - assert(util_is_power_of_two_nonzero(sample_count)); - idx = util_logbase2(sample_count) * - (PVR_SPM_LOAD_IN_REGS_COUNT + PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT); - - assert((num_tile_buffers > 0) ^ (num_output_regs > 0)); - - if (num_output_regs > 0) { - assert(util_is_power_of_two_nonzero(num_output_regs)); - assert(util_logbase2(num_output_regs) < PVR_SPM_LOAD_IN_REGS_COUNT); - idx += util_logbase2(num_output_regs); - } else { - assert(num_tile_buffers <= PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT); - idx += PVR_SPM_LOAD_IN_REGS_COUNT + num_tile_buffers - 1; - } - - assert(idx < PVR_SPM_LOAD_PROGRAM_COUNT); - return idx; -} - -#endif /* PVR_SHADER_FACTORY_H */ diff --git a/src/imagination/vulkan/usc/programs/pvr_static_shaders.h b/src/imagination/vulkan/usc/programs/pvr_static_shaders.h deleted file mode 100644 index cfaf8b5a7c4..00000000000 --- a/src/imagination/vulkan/usc/programs/pvr_static_shaders.h +++ /dev/null @@ -1,1105 +0,0 @@ -/* - * Copyright © 2022 Imagination Technologies Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef PVR_STATIC_SHADERS_H -#define PVR_STATIC_SHADERS_H - -#include -#include - -/* TODO: Remove this once compiler is integrated. */ -#define PVR_INVALID_INST (~0) - -struct pvr_static_buffer { - uint32_t dst_idx; - uint32_t value; -}; - -struct pvr_shader_factory_info { - uint32_t temps_required; - uint32_t const_shared_regs; - uint32_t coeff_regs; - uint32_t input_regs; - uint32_t explicit_const_start_offset; - uint32_t code_size; - const uint8_t *const shader_code; - uint32_t const_calc_prog_inst_bytes; - uint32_t sec_temp_regs; - const uint8_t *const_calc_program; - uint32_t coeff_update_prog_start; - uint32_t coeff_update_temp_regs; - const uint32_t *driver_const_location_map; - uint32_t num_driver_consts; - const struct pvr_static_buffer *static_const_buffer; - uint32_t num_static_const; - uint32_t msaa_sample_count; -}; - -static const uint8_t spm_load_1X_1_regs_shader_code[8] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_1X_1_regs_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_1X_1_regs_shader_code), - spm_load_1X_1_regs_shader_code, - 0, - 0, - NULL, - 0, - 0, - NULL, - 0, - NULL, - 0, - 1, -}; - -static const uint8_t spm_load_1X_2_regs_shader_code[8] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_1X_2_regs_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_1X_2_regs_shader_code), - spm_load_1X_2_regs_shader_code, - 0, - 0, - NULL, - 0, - 0, - NULL, - 0, - NULL, - 0, - 1, -}; - -static const uint8_t spm_load_1X_4_regs_shader_code[8] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_1X_4_regs_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_1X_4_regs_shader_code), - spm_load_1X_4_regs_shader_code, - 0, - 0, - NULL, - 0, - 0, - NULL, - 0, - NULL, - 0, - 1, -}; - -static const uint8_t spm_load_1X_1_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_1X_1_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_1X_1_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_1X_1_buffers_shader_code), - spm_load_1X_1_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_1X_1_buffers_const_dest, - 0, - NULL, - 0, - 1, -}; - -static const uint8_t spm_load_1X_2_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_1X_2_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_1X_2_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_1X_2_buffers_shader_code), - spm_load_1X_2_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_1X_2_buffers_const_dest, - 0, - NULL, - 0, - 1, -}; - -static const uint8_t spm_load_1X_3_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_1X_3_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_1X_3_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_1X_3_buffers_shader_code), - spm_load_1X_3_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_1X_3_buffers_const_dest, - 0, - NULL, - 0, - 1, -}; - -static const uint8_t spm_load_1X_4_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_1X_4_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_1X_4_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_1X_4_buffers_shader_code), - spm_load_1X_4_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_1X_4_buffers_const_dest, - 0, - NULL, - 0, - 1, -}; - -static const uint8_t spm_load_1X_5_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_1X_5_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_1X_5_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_1X_5_buffers_shader_code), - spm_load_1X_5_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_1X_5_buffers_const_dest, - 0, - NULL, - 0, - 1, -}; - -static const uint8_t spm_load_1X_6_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_1X_6_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_1X_6_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_1X_6_buffers_shader_code), - spm_load_1X_6_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_1X_6_buffers_const_dest, - 0, - NULL, - 0, - 1, -}; - -static const uint8_t spm_load_1X_7_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_1X_7_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_1X_7_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_1X_7_buffers_shader_code), - spm_load_1X_7_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_1X_7_buffers_const_dest, - 0, - NULL, - 0, - 1, -}; - -static const uint8_t spm_load_2X_1_regs_shader_code[8] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_2X_1_regs_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_2X_1_regs_shader_code), - spm_load_2X_1_regs_shader_code, - 0, - 0, - NULL, - 0, - 0, - NULL, - 0, - NULL, - 0, - 2, -}; - -static const uint8_t spm_load_2X_2_regs_shader_code[8] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_2X_2_regs_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_2X_2_regs_shader_code), - spm_load_2X_2_regs_shader_code, - 0, - 0, - NULL, - 0, - 0, - NULL, - 0, - NULL, - 0, - 2, -}; - -static const uint8_t spm_load_2X_4_regs_shader_code[8] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_2X_4_regs_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_2X_4_regs_shader_code), - spm_load_2X_4_regs_shader_code, - 0, - 0, - NULL, - 0, - 0, - NULL, - 0, - NULL, - 0, - 2, -}; - -static const uint8_t spm_load_2X_1_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_2X_1_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_2X_1_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_2X_1_buffers_shader_code), - spm_load_2X_1_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_2X_1_buffers_const_dest, - 0, - NULL, - 0, - 2, -}; - -static const uint8_t spm_load_2X_2_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_2X_2_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_2X_2_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_2X_2_buffers_shader_code), - spm_load_2X_2_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_2X_2_buffers_const_dest, - 0, - NULL, - 0, - 2, -}; - -static const uint8_t spm_load_2X_3_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_2X_3_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_2X_3_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_2X_3_buffers_shader_code), - spm_load_2X_3_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_2X_3_buffers_const_dest, - 0, - NULL, - 0, - 2, -}; - -static const uint8_t spm_load_2X_4_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_2X_4_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_2X_4_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_2X_4_buffers_shader_code), - spm_load_2X_4_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_2X_4_buffers_const_dest, - 0, - NULL, - 0, - 2, -}; - -static const uint8_t spm_load_2X_5_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_2X_5_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_2X_5_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_2X_5_buffers_shader_code), - spm_load_2X_5_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_2X_5_buffers_const_dest, - 0, - NULL, - 0, - 2, -}; - -static const uint8_t spm_load_2X_6_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_2X_6_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_2X_6_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_2X_6_buffers_shader_code), - spm_load_2X_6_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_2X_6_buffers_const_dest, - 0, - NULL, - 0, - 2, -}; - -static const uint8_t spm_load_2X_7_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_2X_7_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_2X_7_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_2X_7_buffers_shader_code), - spm_load_2X_7_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_2X_7_buffers_const_dest, - 0, - NULL, - 0, - 2, -}; - -static const uint8_t spm_load_4X_1_regs_shader_code[8] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_4X_1_regs_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_4X_1_regs_shader_code), - spm_load_4X_1_regs_shader_code, - 0, - 0, - NULL, - 0, - 0, - NULL, - 0, - NULL, - 0, - 4, -}; - -static const uint8_t spm_load_4X_2_regs_shader_code[8] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_4X_2_regs_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_4X_2_regs_shader_code), - spm_load_4X_2_regs_shader_code, - 0, - 0, - NULL, - 0, - 0, - NULL, - 0, - NULL, - 0, - 4, -}; - -static const uint8_t spm_load_4X_4_regs_shader_code[8] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_4X_4_regs_info = { - 0, 0, 0, 0, 0, 0, spm_load_4X_4_regs_shader_code, 0, 0, NULL, 0, - 0, NULL, 0, NULL, 0, 4 -}; - -static const uint8_t spm_load_4X_1_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_4X_1_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_4X_1_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_4X_1_buffers_shader_code), - spm_load_4X_1_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_4X_1_buffers_const_dest, - 0, - NULL, - 0, - 4, -}; - -static const uint8_t spm_load_4X_2_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_4X_2_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_4X_2_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_4X_2_buffers_shader_code), - spm_load_4X_2_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_4X_2_buffers_const_dest, - 0, - NULL, - 0, - 4, -}; - -static const uint8_t spm_load_4X_3_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_4X_3_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_4X_3_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_4X_3_buffers_shader_code), - spm_load_4X_3_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_4X_3_buffers_const_dest, - 0, - NULL, - 0, - 4, -}; - -static const uint8_t spm_load_4X_4_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_4X_4_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_4X_4_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_4X_4_buffers_shader_code), - spm_load_4X_4_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_4X_4_buffers_const_dest, - 0, - NULL, - 0, - 4, -}; - -static const uint8_t spm_load_4X_5_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_4X_5_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_4X_5_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_4X_5_buffers_shader_code), - spm_load_4X_5_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_4X_5_buffers_const_dest, - 0, - NULL, - 0, - 4, -}; - -static const uint8_t spm_load_4X_6_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_4X_6_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_4X_6_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_4X_6_buffers_shader_code), - spm_load_4X_6_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_4X_6_buffers_const_dest, - 0, - NULL, - 0, - 4, -}; - -static const uint8_t spm_load_4X_7_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_4X_7_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_4X_7_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_4X_7_buffers_shader_code), - spm_load_4X_7_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_4X_7_buffers_const_dest, - 0, - NULL, - 0, - 4, -}; - -static const uint8_t spm_load_8X_1_regs_shader_code[8] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_8X_1_regs_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_8X_1_regs_shader_code), - spm_load_8X_1_regs_shader_code, - 0, - 0, - NULL, - 0, - 0, - NULL, - 0, - NULL, - 0, - 8, -}; - -static const uint8_t spm_load_8X_2_regs_shader_code[8] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_8X_2_regs_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_8X_2_regs_shader_code), - spm_load_8X_2_regs_shader_code, - 0, - 0, - NULL, - 0, - 0, - NULL, - 0, - NULL, - 0, - 8, -}; - -static const uint8_t spm_load_8X_4_regs_shader_code[8] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_8X_4_regs_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_8X_4_regs_shader_code), - spm_load_8X_4_regs_shader_code, - 0, - 0, - NULL, - 0, - 0, - NULL, - 0, - NULL, - 0, - 8, -}; - -static const uint8_t spm_load_8X_1_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_8X_1_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_8X_1_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_8X_1_buffers_shader_code), - spm_load_8X_1_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_8X_1_buffers_const_dest, - 0, - NULL, - 0, - 8, -}; - -static const uint8_t spm_load_8X_2_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_8X_2_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_8X_2_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_8X_2_buffers_shader_code), - spm_load_8X_2_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_8X_2_buffers_const_dest, - 0, - NULL, - 0, - 8, -}; - -static const uint8_t spm_load_8X_3_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_8X_3_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_8X_3_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_8X_3_buffers_shader_code), - spm_load_8X_3_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_8X_3_buffers_const_dest, - 0, - NULL, - 0, - 8, -}; - -static const uint8_t spm_load_8X_4_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_8X_4_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_8X_4_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_8X_4_buffers_shader_code), - spm_load_8X_4_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_8X_4_buffers_const_dest, - 0, - NULL, - 0, - 8, -}; - -static const uint8_t spm_load_8X_5_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_8X_5_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_8X_5_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_8X_5_buffers_shader_code), - spm_load_8X_5_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_8X_5_buffers_const_dest, - 0, - NULL, - 0, - 8, -}; - -static const uint8_t spm_load_8X_6_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_8X_6_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_8X_6_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_8X_6_buffers_shader_code), - spm_load_8X_6_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_8X_6_buffers_const_dest, - 0, - NULL, - 0, - 8, -}; - -static const uint8_t spm_load_8X_7_buffers_shader_code[8] = { 0 }; - -static const uint32_t spm_load_8X_7_buffers_const_dest[14] = { 0 }; - -static const struct pvr_shader_factory_info spm_load_8X_7_buffers_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(spm_load_8X_7_buffers_shader_code), - spm_load_8X_7_buffers_shader_code, - 0, - 0, - NULL, - 0, - 0, - spm_load_8X_7_buffers_const_dest, - 0, - NULL, - 0, - 8, -}; -static struct { - const uint8_t *code; - const uint32_t size; - const struct pvr_shader_factory_info *info; -} const spm_load_collection[40] = { - { spm_load_1X_1_regs_shader_code, - sizeof(spm_load_1X_1_regs_shader_code), - &spm_load_1X_1_regs_info }, - { spm_load_1X_2_regs_shader_code, - sizeof(spm_load_1X_2_regs_shader_code), - &spm_load_1X_2_regs_info }, - { spm_load_1X_4_regs_shader_code, - sizeof(spm_load_1X_4_regs_shader_code), - &spm_load_1X_4_regs_info }, - { spm_load_1X_1_buffers_shader_code, - sizeof(spm_load_1X_1_buffers_shader_code), - &spm_load_1X_1_buffers_info }, - { spm_load_1X_2_buffers_shader_code, - sizeof(spm_load_1X_2_buffers_shader_code), - &spm_load_1X_2_buffers_info }, - { spm_load_1X_3_buffers_shader_code, - sizeof(spm_load_1X_3_buffers_shader_code), - &spm_load_1X_3_buffers_info }, - { spm_load_1X_4_buffers_shader_code, - sizeof(spm_load_1X_4_buffers_shader_code), - &spm_load_1X_4_buffers_info }, - { spm_load_1X_5_buffers_shader_code, - sizeof(spm_load_1X_5_buffers_shader_code), - &spm_load_1X_5_buffers_info }, - { spm_load_1X_6_buffers_shader_code, - sizeof(spm_load_1X_6_buffers_shader_code), - &spm_load_1X_6_buffers_info }, - { spm_load_1X_7_buffers_shader_code, - sizeof(spm_load_1X_7_buffers_shader_code), - &spm_load_1X_7_buffers_info }, - { spm_load_2X_1_regs_shader_code, - sizeof(spm_load_2X_1_regs_shader_code), - &spm_load_2X_1_regs_info }, - { spm_load_2X_2_regs_shader_code, - sizeof(spm_load_2X_2_regs_shader_code), - &spm_load_2X_2_regs_info }, - { spm_load_2X_4_regs_shader_code, - sizeof(spm_load_2X_4_regs_shader_code), - &spm_load_2X_4_regs_info }, - { spm_load_2X_1_buffers_shader_code, - sizeof(spm_load_2X_1_buffers_shader_code), - &spm_load_2X_1_buffers_info }, - { spm_load_2X_2_buffers_shader_code, - sizeof(spm_load_2X_2_buffers_shader_code), - &spm_load_2X_2_buffers_info }, - { spm_load_2X_3_buffers_shader_code, - sizeof(spm_load_2X_3_buffers_shader_code), - &spm_load_2X_3_buffers_info }, - { spm_load_2X_4_buffers_shader_code, - sizeof(spm_load_2X_4_buffers_shader_code), - &spm_load_2X_4_buffers_info }, - { spm_load_2X_5_buffers_shader_code, - sizeof(spm_load_2X_5_buffers_shader_code), - &spm_load_2X_5_buffers_info }, - { spm_load_2X_6_buffers_shader_code, - sizeof(spm_load_2X_6_buffers_shader_code), - &spm_load_2X_6_buffers_info }, - { spm_load_2X_7_buffers_shader_code, - sizeof(spm_load_2X_7_buffers_shader_code), - &spm_load_2X_7_buffers_info }, - { spm_load_4X_1_regs_shader_code, - sizeof(spm_load_4X_1_regs_shader_code), - &spm_load_4X_1_regs_info }, - { spm_load_4X_2_regs_shader_code, - sizeof(spm_load_4X_2_regs_shader_code), - &spm_load_4X_2_regs_info }, - { spm_load_4X_4_regs_shader_code, - sizeof(spm_load_4X_4_regs_shader_code), - &spm_load_4X_4_regs_info }, - { spm_load_4X_1_buffers_shader_code, - sizeof(spm_load_4X_1_buffers_shader_code), - &spm_load_4X_1_buffers_info }, - { spm_load_4X_2_buffers_shader_code, - sizeof(spm_load_4X_2_buffers_shader_code), - &spm_load_4X_2_buffers_info }, - { spm_load_4X_3_buffers_shader_code, - sizeof(spm_load_4X_3_buffers_shader_code), - &spm_load_4X_3_buffers_info }, - { spm_load_4X_4_buffers_shader_code, - sizeof(spm_load_4X_4_buffers_shader_code), - &spm_load_4X_4_buffers_info }, - { spm_load_4X_5_buffers_shader_code, - sizeof(spm_load_4X_5_buffers_shader_code), - &spm_load_4X_5_buffers_info }, - { spm_load_4X_6_buffers_shader_code, - sizeof(spm_load_4X_6_buffers_shader_code), - &spm_load_4X_6_buffers_info }, - { spm_load_4X_7_buffers_shader_code, - sizeof(spm_load_4X_7_buffers_shader_code), - &spm_load_4X_7_buffers_info }, - { spm_load_8X_1_regs_shader_code, - sizeof(spm_load_8X_1_regs_shader_code), - &spm_load_8X_1_regs_info }, - { spm_load_8X_2_regs_shader_code, - sizeof(spm_load_8X_2_regs_shader_code), - &spm_load_8X_2_regs_info }, - { spm_load_8X_4_regs_shader_code, - sizeof(spm_load_8X_4_regs_shader_code), - &spm_load_8X_4_regs_info }, - { spm_load_8X_1_buffers_shader_code, - sizeof(spm_load_8X_1_buffers_shader_code), - &spm_load_8X_1_buffers_info }, - { spm_load_8X_2_buffers_shader_code, - sizeof(spm_load_8X_2_buffers_shader_code), - &spm_load_8X_2_buffers_info }, - { spm_load_8X_3_buffers_shader_code, - sizeof(spm_load_8X_3_buffers_shader_code), - &spm_load_8X_3_buffers_info }, - { spm_load_8X_4_buffers_shader_code, - sizeof(spm_load_8X_4_buffers_shader_code), - &spm_load_8X_4_buffers_info }, - { spm_load_8X_5_buffers_shader_code, - sizeof(spm_load_8X_5_buffers_shader_code), - &spm_load_8X_5_buffers_info }, - { spm_load_8X_6_buffers_shader_code, - sizeof(spm_load_8X_6_buffers_shader_code), - &spm_load_8X_6_buffers_info }, - { spm_load_8X_7_buffers_shader_code, - sizeof(spm_load_8X_7_buffers_shader_code), - &spm_load_8X_7_buffers_info }, -}; - -#endif /* PVR_STATIC_SHADERS_H */