mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 02:40:11 +01:00
pvr: switch to usc generated spm load shaders
Signed-off-by: Simon Perretta <simon.perretta@imgtec.com> Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37439>
This commit is contained in:
parent
585cca9b2f
commit
825c4443bf
9 changed files with 387 additions and 1339 deletions
|
|
@ -71,4 +71,34 @@ enum pvr_clear_attach_data {
|
|||
_PVR_CLEAR_ATTACH_DATA_COUNT,
|
||||
};
|
||||
|
||||
/** SPM load shader data; shared registers. */
|
||||
enum pvr_spm_load_data {
|
||||
PVR_SPM_LOAD_DATA_SMP = 0,
|
||||
|
||||
PVR_SPM_LOAD_DATA_REG_TEX = 4,
|
||||
|
||||
PVR_SPM_LOAD_DATA_BUF_TEX_0 = 8,
|
||||
PVR_SPM_LOAD_DATA_BUF_ADDR_0 = 12,
|
||||
|
||||
PVR_SPM_LOAD_DATA_BUF_ADDR_1 = 14,
|
||||
PVR_SPM_LOAD_DATA_BUF_TEX_1 = 16,
|
||||
|
||||
PVR_SPM_LOAD_DATA_BUF_TEX_2 = 20,
|
||||
PVR_SPM_LOAD_DATA_BUF_ADDR_2 = 24,
|
||||
|
||||
PVR_SPM_LOAD_DATA_BUF_ADDR_3 = 26,
|
||||
PVR_SPM_LOAD_DATA_BUF_TEX_3 = 28,
|
||||
|
||||
PVR_SPM_LOAD_DATA_BUF_TEX_4 = 32,
|
||||
PVR_SPM_LOAD_DATA_BUF_ADDR_4 = 36,
|
||||
|
||||
PVR_SPM_LOAD_DATA_BUF_ADDR_5 = 38,
|
||||
PVR_SPM_LOAD_DATA_BUF_TEX_5 = 40,
|
||||
|
||||
PVR_SPM_LOAD_DATA_BUF_TEX_6 = 44,
|
||||
PVR_SPM_LOAD_DATA_BUF_ADDR_6 = 48,
|
||||
|
||||
_PVR_SPM_LOAD_DATA_COUNT = 50,
|
||||
};
|
||||
|
||||
#endif /* PVR_IFACE_H */
|
||||
|
|
|
|||
|
|
@ -3104,21 +3104,17 @@ VkResult pvr_CreateFramebuffer(VkDevice _device,
|
|||
goto err_finish_render_targets;
|
||||
|
||||
for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
|
||||
uint32_t emit_count;
|
||||
|
||||
result = pvr_spm_init_eot_state(device,
|
||||
&spm_eot_state_per_render[i],
|
||||
framebuffer,
|
||||
&pass->hw_setup->renders[i],
|
||||
&emit_count);
|
||||
&pass->hw_setup->renders[i]);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_finish_eot_state;
|
||||
|
||||
result = pvr_spm_init_bgobj_state(device,
|
||||
&spm_bgobj_state_per_render[i],
|
||||
framebuffer,
|
||||
&pass->hw_setup->renders[i],
|
||||
emit_count);
|
||||
&pass->hw_setup->renders[i]);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_finish_bgobj_state;
|
||||
|
||||
|
|
|
|||
|
|
@ -51,7 +51,6 @@
|
|||
#include "pvr_limits.h"
|
||||
#include "pvr_pds.h"
|
||||
#include "pvr_usc.h"
|
||||
#include "usc/programs/pvr_shader_factory.h"
|
||||
#include "pvr_spm.h"
|
||||
#include "pvr_types.h"
|
||||
#include "pvr_winsys.h"
|
||||
|
|
@ -266,7 +265,7 @@ struct pvr_device {
|
|||
|
||||
uint32_t pds_texture_program_data_size;
|
||||
uint32_t pds_texture_program_temps_count;
|
||||
} load_program[PVR_SPM_LOAD_PROGRAM_COUNT];
|
||||
} load_program[PVR_NUM_SPM_LOAD_SHADERS];
|
||||
} spm_load_state;
|
||||
|
||||
struct pvr_device_tile_buffer_state {
|
||||
|
|
|
|||
|
|
@ -37,9 +37,7 @@
|
|||
#include "pvr_job_common.h"
|
||||
#include "pvr_pds.h"
|
||||
#include "pvr_private.h"
|
||||
#include "usc/programs/pvr_shader_factory.h"
|
||||
#include "pvr_spm.h"
|
||||
#include "usc/programs/pvr_static_shaders.h"
|
||||
#include "pvr_tex_state.h"
|
||||
#include "pvr_types.h"
|
||||
#include "pvr_usc.h"
|
||||
|
|
@ -257,9 +255,10 @@ VkResult pvr_spm_scratch_buffer_get_buffer(
|
|||
VkResult pvr_device_init_spm_load_state(struct pvr_device *device)
|
||||
{
|
||||
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
||||
uint32_t pds_texture_aligned_offsets[PVR_SPM_LOAD_PROGRAM_COUNT];
|
||||
uint32_t pds_kick_aligned_offsets[PVR_SPM_LOAD_PROGRAM_COUNT];
|
||||
uint32_t usc_aligned_offsets[PVR_SPM_LOAD_PROGRAM_COUNT];
|
||||
uint32_t pds_texture_aligned_offsets[PVR_NUM_SPM_LOAD_SHADERS];
|
||||
uint32_t pds_kick_aligned_offsets[PVR_NUM_SPM_LOAD_SHADERS];
|
||||
uint32_t usc_aligned_offsets[PVR_NUM_SPM_LOAD_SHADERS];
|
||||
pco_shader *shaders[PVR_NUM_SPM_LOAD_SHADERS];
|
||||
uint32_t pds_allocation_size = 0;
|
||||
uint32_t usc_allocation_size = 0;
|
||||
struct pvr_suballoc_bo *pds_bo;
|
||||
|
|
@ -267,9 +266,6 @@ VkResult pvr_device_init_spm_load_state(struct pvr_device *device)
|
|||
uint8_t *mem_ptr;
|
||||
VkResult result;
|
||||
|
||||
static_assert(PVR_SPM_LOAD_PROGRAM_COUNT == ARRAY_SIZE(spm_load_collection),
|
||||
"Size mismatch");
|
||||
|
||||
/* TODO: We don't need to upload all the programs since the set contains
|
||||
* programs for devices with 8 output regs as well. We can save some memory
|
||||
* by not uploading them on devices without the feature.
|
||||
|
|
@ -277,11 +273,37 @@ VkResult pvr_device_init_spm_load_state(struct pvr_device *device)
|
|||
* cache and generate the shaders as needed so this todo will be unnecessary.
|
||||
*/
|
||||
|
||||
/* Upload USC shaders. */
|
||||
/* Build and upload USC shaders. */
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(spm_load_collection); i++) {
|
||||
usc_aligned_offsets[i] = usc_allocation_size;
|
||||
usc_allocation_size += ALIGN_POT(spm_load_collection[i].size, 4);
|
||||
struct pvr_spm_load_props props;
|
||||
|
||||
for (unsigned is_multisampled = 0; is_multisampled <= 1; ++is_multisampled) {
|
||||
for (unsigned output_reg_count_log2 = 0; output_reg_count_log2 <= 2;
|
||||
++output_reg_count_log2) {
|
||||
unsigned output_reg_count = 1 << output_reg_count_log2;
|
||||
|
||||
props = (struct pvr_spm_load_props){
|
||||
.output_reg_count = output_reg_count,
|
||||
.tile_buffer_count = 0,
|
||||
.is_multisampled = is_multisampled,
|
||||
};
|
||||
|
||||
unsigned u = pvr_uscgen_spm_load_index(&props);
|
||||
shaders[u] = pvr_uscgen_spm_load(device->pdevice->pco_ctx, &props);
|
||||
usc_allocation_size += pco_shader_binary_size(shaders[u]);
|
||||
|
||||
if (output_reg_count != 4)
|
||||
continue;
|
||||
|
||||
for (unsigned tile_buffer_count = 1; tile_buffer_count <= 7;
|
||||
++tile_buffer_count) {
|
||||
props.tile_buffer_count = tile_buffer_count;
|
||||
|
||||
u = pvr_uscgen_spm_load_index(&props);
|
||||
shaders[u] = pvr_uscgen_spm_load(device->pdevice->pco_ctx, &props);
|
||||
usc_allocation_size += pco_shader_binary_size(shaders[u]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result = pvr_bo_suballoc(&device->suballoc_usc,
|
||||
|
|
@ -294,15 +316,19 @@ VkResult pvr_device_init_spm_load_state(struct pvr_device *device)
|
|||
|
||||
mem_ptr = (uint8_t *)pvr_bo_suballoc_get_map_addr(usc_bo);
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(spm_load_collection); i++) {
|
||||
memcpy(mem_ptr + usc_aligned_offsets[i],
|
||||
spm_load_collection[i].code,
|
||||
spm_load_collection[i].size);
|
||||
unsigned offset = 0;
|
||||
for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u) {
|
||||
unsigned shader_size = pco_shader_binary_size(shaders[u]);
|
||||
|
||||
usc_aligned_offsets[u] = offset;
|
||||
memcpy(&mem_ptr[offset], pco_shader_binary_data(shaders[u]), shader_size);
|
||||
|
||||
offset += shader_size;
|
||||
}
|
||||
|
||||
/* Upload PDS programs. */
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(spm_load_collection); i++) {
|
||||
for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u) {
|
||||
struct pvr_pds_pixel_shader_sa_program pds_texture_program = {
|
||||
/* DMA for clear colors and tile buffer address parts. */
|
||||
.num_texture_dma_kicks = 1,
|
||||
|
|
@ -320,17 +346,17 @@ VkResult pvr_device_init_spm_load_state(struct pvr_device *device)
|
|||
* behavior the data size is always the same here. Should we try saving
|
||||
* some memory by adjusting things based on that?
|
||||
*/
|
||||
device->spm_load_state.load_program[i].pds_texture_program_data_size =
|
||||
device->spm_load_state.load_program[u].pds_texture_program_data_size =
|
||||
pds_texture_program.data_size;
|
||||
|
||||
pds_texture_aligned_offsets[i] = pds_allocation_size;
|
||||
pds_texture_aligned_offsets[u] = pds_allocation_size;
|
||||
/* FIXME: Figure out the define for alignment of 16. */
|
||||
pds_allocation_size +=
|
||||
ALIGN_POT(PVR_DW_TO_BYTES(pds_texture_program.code_size), 16);
|
||||
|
||||
pvr_pds_set_sizes_pixel_shader(&pds_kick_program);
|
||||
|
||||
pds_kick_aligned_offsets[i] = pds_allocation_size;
|
||||
pds_kick_aligned_offsets[u] = pds_allocation_size;
|
||||
/* FIXME: Figure out the define for alignment of 16. */
|
||||
pds_allocation_size +=
|
||||
ALIGN_POT(PVR_DW_TO_BYTES(pds_kick_program.code_size +
|
||||
|
|
@ -351,52 +377,53 @@ VkResult pvr_device_init_spm_load_state(struct pvr_device *device)
|
|||
|
||||
mem_ptr = (uint8_t *)pvr_bo_suballoc_get_map_addr(pds_bo);
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(spm_load_collection); i++) {
|
||||
for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u) {
|
||||
struct pvr_pds_pixel_shader_sa_program pds_texture_program = {
|
||||
/* DMA for clear colors and tile buffer address parts. */
|
||||
.num_texture_dma_kicks = 1,
|
||||
};
|
||||
const pvr_dev_addr_t usc_program_dev_addr =
|
||||
PVR_DEV_ADDR_OFFSET(usc_bo->dev_addr, usc_aligned_offsets[i]);
|
||||
PVR_DEV_ADDR_OFFSET(usc_bo->dev_addr, usc_aligned_offsets[u]);
|
||||
struct pvr_pds_kickusc_program pds_kick_program = { 0 };
|
||||
enum ROGUE_PDSINST_DOUTU_SAMPLE_RATE sample_rate;
|
||||
|
||||
pco_data *shader_data = pco_shader_data(shaders[u]);
|
||||
|
||||
pvr_pds_generate_pixel_shader_sa_code_segment(
|
||||
&pds_texture_program,
|
||||
(uint32_t *)(mem_ptr + pds_texture_aligned_offsets[i]));
|
||||
|
||||
if (spm_load_collection[i].info->msaa_sample_count > 1)
|
||||
sample_rate = ROGUE_PDSINST_DOUTU_SAMPLE_RATE_FULL;
|
||||
else
|
||||
sample_rate = ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE;
|
||||
(uint32_t *)(mem_ptr + pds_texture_aligned_offsets[u]));
|
||||
|
||||
pvr_pds_setup_doutu(&pds_kick_program.usc_task_control,
|
||||
usc_program_dev_addr.addr,
|
||||
spm_load_collection[i].info->temps_required,
|
||||
sample_rate,
|
||||
shader_data->common.temps,
|
||||
shader_data->fs.uses.sample_shading
|
||||
? ROGUE_PDSINST_DOUTU_SAMPLE_RATE_FULL
|
||||
: ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
|
||||
false);
|
||||
|
||||
/* Generated both code and data. */
|
||||
pvr_pds_generate_pixel_shader_program(
|
||||
&pds_kick_program,
|
||||
(uint32_t *)(mem_ptr + pds_kick_aligned_offsets[i]));
|
||||
(uint32_t *)(mem_ptr + pds_kick_aligned_offsets[u]));
|
||||
|
||||
device->spm_load_state.load_program[i].pds_pixel_program_offset =
|
||||
PVR_DEV_ADDR_OFFSET(pds_bo->dev_addr, pds_kick_aligned_offsets[i]);
|
||||
device->spm_load_state.load_program[i].pds_uniform_program_offset =
|
||||
PVR_DEV_ADDR_OFFSET(pds_bo->dev_addr, pds_texture_aligned_offsets[i]);
|
||||
device->spm_load_state.load_program[u].pds_pixel_program_offset =
|
||||
PVR_DEV_ADDR_OFFSET(pds_bo->dev_addr, pds_kick_aligned_offsets[u]);
|
||||
device->spm_load_state.load_program[u].pds_uniform_program_offset =
|
||||
PVR_DEV_ADDR_OFFSET(pds_bo->dev_addr, pds_texture_aligned_offsets[u]);
|
||||
|
||||
/* TODO: From looking at the pvr_pds_generate_...() functions, it seems
|
||||
* like temps_used is always 1. Should we remove this and hard code it
|
||||
* with a define in the PDS code?
|
||||
*/
|
||||
device->spm_load_state.load_program[i].pds_texture_program_temps_count =
|
||||
device->spm_load_state.load_program[u].pds_texture_program_temps_count =
|
||||
pds_texture_program.temps_used;
|
||||
}
|
||||
|
||||
device->spm_load_state.usc_programs = usc_bo;
|
||||
device->spm_load_state.pds_programs = pds_bo;
|
||||
|
||||
for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u)
|
||||
ralloc_free(shaders[u]);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
@ -482,9 +509,6 @@ static inline void pvr_set_pbe_all_valid_mask(struct usc_mrt_desc *desc)
|
|||
desc->valid_mask[i] = ~0;
|
||||
}
|
||||
|
||||
#define PVR_DEV_ADDR_ADVANCE(_addr, _offset) \
|
||||
_addr = PVR_DEV_ADDR_OFFSET(_addr, _offset)
|
||||
|
||||
/**
|
||||
* \brief Sets up PBE registers, PBE state values and MRT data per a single
|
||||
* render output requiring 8 dwords to be written.
|
||||
|
|
@ -534,7 +558,7 @@ static uint64_t pvr_spm_setup_pbe_eight_dword_write(
|
|||
pbe_state_word_0_out,
|
||||
pbe_reg_word_0_out);
|
||||
|
||||
PVR_DEV_ADDR_ADVANCE(scratch_buffer_addr, mem_stored);
|
||||
scratch_buffer_addr = PVR_DEV_ADDR_OFFSET(scratch_buffer_addr, mem_stored);
|
||||
|
||||
render_target_used++;
|
||||
|
||||
|
|
@ -547,7 +571,7 @@ static uint64_t pvr_spm_setup_pbe_eight_dword_write(
|
|||
pbe_state_word_1_out,
|
||||
pbe_reg_word_1_out);
|
||||
|
||||
PVR_DEV_ADDR_ADVANCE(scratch_buffer_addr, mem_stored);
|
||||
scratch_buffer_addr = PVR_DEV_ADDR_OFFSET(scratch_buffer_addr, mem_stored);
|
||||
|
||||
render_target_used++;
|
||||
*render_target_used_out = render_target_used;
|
||||
|
|
@ -615,8 +639,7 @@ VkResult
|
|||
pvr_spm_init_eot_state(struct pvr_device *device,
|
||||
struct pvr_spm_eot_state *spm_eot_state,
|
||||
const struct pvr_framebuffer *framebuffer,
|
||||
const struct pvr_renderpass_hwsetup_render *hw_render,
|
||||
uint32_t *emit_count_out)
|
||||
const struct pvr_renderpass_hwsetup_render *hw_render)
|
||||
{
|
||||
const VkExtent2D framebuffer_size = {
|
||||
.width = framebuffer->width,
|
||||
|
|
@ -659,7 +682,8 @@ pvr_spm_init_eot_state(struct pvr_device *device,
|
|||
spm_eot_state->pbe_reg_words[total_render_target_used + 1],
|
||||
&render_targets_used);
|
||||
|
||||
PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_stored);
|
||||
next_scratch_buffer_addr =
|
||||
PVR_DEV_ADDR_OFFSET(next_scratch_buffer_addr, mem_stored);
|
||||
total_render_target_used += render_targets_used;
|
||||
|
||||
/* Store off-chip tile data (i.e. tile buffers). */
|
||||
|
|
@ -684,7 +708,8 @@ pvr_spm_init_eot_state(struct pvr_device *device,
|
|||
spm_eot_state->pbe_reg_words[total_render_target_used + 1],
|
||||
&render_targets_used);
|
||||
|
||||
PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_stored);
|
||||
next_scratch_buffer_addr =
|
||||
PVR_DEV_ADDR_OFFSET(next_scratch_buffer_addr, mem_stored);
|
||||
total_render_target_used += render_targets_used;
|
||||
}
|
||||
} else {
|
||||
|
|
@ -700,7 +725,8 @@ pvr_spm_init_eot_state(struct pvr_device *device,
|
|||
pbe_state_words[total_render_target_used],
|
||||
spm_eot_state->pbe_reg_words[total_render_target_used]);
|
||||
|
||||
PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_stored);
|
||||
next_scratch_buffer_addr =
|
||||
PVR_DEV_ADDR_OFFSET(next_scratch_buffer_addr, mem_stored);
|
||||
|
||||
total_render_target_used++;
|
||||
|
||||
|
|
@ -723,7 +749,8 @@ pvr_spm_init_eot_state(struct pvr_device *device,
|
|||
pbe_state_words[total_render_target_used],
|
||||
spm_eot_state->pbe_reg_words[total_render_target_used]);
|
||||
|
||||
PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_stored);
|
||||
next_scratch_buffer_addr =
|
||||
PVR_DEV_ADDR_OFFSET(next_scratch_buffer_addr, mem_stored);
|
||||
|
||||
total_render_target_used++;
|
||||
}
|
||||
|
|
@ -763,8 +790,6 @@ pvr_spm_init_eot_state(struct pvr_device *device,
|
|||
spm_eot_state->pixel_event_program_data_upload = pds_eot_program.pvr_bo;
|
||||
spm_eot_state->pixel_event_program_data_offset = pds_eot_program.data_offset;
|
||||
|
||||
*emit_count_out = total_render_target_used;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
@ -795,7 +820,7 @@ pvr_spm_setup_texture_state_words(struct pvr_device *device,
|
|||
const VkExtent2D framebuffer_size,
|
||||
uint32_t sample_count,
|
||||
pvr_dev_addr_t scratch_buffer_addr,
|
||||
uint64_t *image_state_ptr,
|
||||
void *image_state_ptr,
|
||||
uint64_t *mem_used_out)
|
||||
{
|
||||
const uint64_t aligned_fb_width =
|
||||
|
|
@ -835,7 +860,9 @@ pvr_spm_setup_texture_state_words(struct pvr_device *device,
|
|||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
memcpy(image_state_ptr, &image_descriptor, sizeof(image_descriptor));
|
||||
memcpy(image_state_ptr,
|
||||
image_descriptor.words,
|
||||
sizeof(image_descriptor.words));
|
||||
|
||||
*mem_used_out = fb_area * PVR_DW_TO_BYTES(dword_count) * sample_count;
|
||||
|
||||
|
|
@ -913,13 +940,8 @@ VkResult
|
|||
pvr_spm_init_bgobj_state(struct pvr_device *device,
|
||||
struct pvr_spm_bgobj_state *spm_bgobj_state,
|
||||
const struct pvr_framebuffer *framebuffer,
|
||||
const struct pvr_renderpass_hwsetup_render *hw_render,
|
||||
uint32_t emit_count)
|
||||
const struct pvr_renderpass_hwsetup_render *hw_render)
|
||||
{
|
||||
const uint32_t spm_load_program_idx =
|
||||
pvr_get_spm_load_program_index(hw_render->sample_count,
|
||||
hw_render->tile_buffers_count,
|
||||
hw_render->output_regs_count);
|
||||
const VkExtent2D framebuffer_size = {
|
||||
.width = framebuffer->width,
|
||||
.height = framebuffer->height,
|
||||
|
|
@ -928,17 +950,29 @@ pvr_spm_init_bgobj_state(struct pvr_device *device,
|
|||
framebuffer->scratch_buffer->bo->vma->dev_addr;
|
||||
struct pvr_spm_per_load_program_state *load_program_state;
|
||||
struct pvr_pds_upload pds_texture_data_upload;
|
||||
const struct pvr_shader_factory_info *info;
|
||||
struct pvr_sampler_descriptor *descriptor;
|
||||
uint64_t consts_buffer_size;
|
||||
uint32_t dword_count;
|
||||
uint32_t *mem_ptr;
|
||||
VkResult result;
|
||||
|
||||
assert(spm_load_program_idx < ARRAY_SIZE(spm_load_collection));
|
||||
info = spm_load_collection[spm_load_program_idx].info;
|
||||
/* Even if we might have 8 output regs we can only pack and write 4 dwords
|
||||
* using R32G32B32A32_UINT.
|
||||
*/
|
||||
if (hw_render->tile_buffers_count > 0)
|
||||
dword_count = 4;
|
||||
else
|
||||
dword_count = MIN2(hw_render->output_regs_count, 4);
|
||||
|
||||
consts_buffer_size = PVR_DW_TO_BYTES(info->const_shared_regs);
|
||||
struct pvr_spm_load_props props = {
|
||||
.output_reg_count = dword_count,
|
||||
.tile_buffer_count = hw_render->tile_buffers_count,
|
||||
.is_multisampled = hw_render->sample_count > 1,
|
||||
};
|
||||
|
||||
const uint32_t spm_load_program_idx = pvr_uscgen_spm_load_index(&props);
|
||||
|
||||
consts_buffer_size = PVR_DW_TO_BYTES(pvr_uscgen_spm_load_data_size(&props));
|
||||
|
||||
result = pvr_bo_alloc(device,
|
||||
device->heaps.general_heap,
|
||||
|
|
@ -951,35 +985,17 @@ pvr_spm_init_bgobj_state(struct pvr_device *device,
|
|||
|
||||
mem_ptr = spm_bgobj_state->consts_buffer->bo->map;
|
||||
|
||||
if (info->driver_const_location_map) {
|
||||
const uint32_t *const const_map = info->driver_const_location_map;
|
||||
for (unsigned u = 0; u < hw_render->tile_buffers_count; ++u) {
|
||||
unsigned tile_buffer_addr_location = pvr_uscgen_spm_buffer_data(u, true);
|
||||
pvr_dev_addr_t tile_buffer_addr =
|
||||
device->tile_buffer_state.buffers[u]->vma->dev_addr;
|
||||
|
||||
for (uint32_t i = 0; i < PVR_SPM_LOAD_CONST_COUNT; i += 2) {
|
||||
pvr_dev_addr_t tile_buffer_addr;
|
||||
|
||||
if (const_map[i] == PVR_SPM_LOAD_DEST_UNUSED) {
|
||||
#if MESA_DEBUG
|
||||
for (uint32_t j = i; j < PVR_SPM_LOAD_CONST_COUNT; j++)
|
||||
assert(const_map[j] == PVR_SPM_LOAD_DEST_UNUSED);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
tile_buffer_addr =
|
||||
device->tile_buffer_state.buffers[i / 2]->vma->dev_addr;
|
||||
|
||||
assert(const_map[i] == const_map[i + 1] + 1);
|
||||
mem_ptr[const_map[i]] = tile_buffer_addr.addr >> 32;
|
||||
mem_ptr[const_map[i + 1]] = (uint32_t)tile_buffer_addr.addr;
|
||||
}
|
||||
mem_ptr[tile_buffer_addr_location] = tile_buffer_addr.addr & 0xffffffff;
|
||||
mem_ptr[tile_buffer_addr_location + 1] = tile_buffer_addr.addr >> 32;
|
||||
}
|
||||
|
||||
/* TODO: The 32 comes from how the shaders are compiled. We should
|
||||
* unhardcode it when this is hooked up to the compiler.
|
||||
*/
|
||||
descriptor = (struct pvr_sampler_descriptor *)(mem_ptr + 32);
|
||||
*descriptor = (struct pvr_sampler_descriptor){ 0 };
|
||||
|
||||
descriptor =
|
||||
(struct pvr_sampler_descriptor *)&mem_ptr[PVR_SPM_LOAD_DATA_SMP];
|
||||
pvr_csb_pack (&descriptor->words[0], TEXSTATE_SAMPLER_WORD0, sampler) {
|
||||
sampler.non_normalized_coords = true;
|
||||
sampler.addrmode_v = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
|
||||
|
|
@ -993,38 +1009,42 @@ pvr_spm_init_bgobj_state(struct pvr_device *device,
|
|||
|
||||
pvr_csb_pack (&descriptor->words[1], TEXSTATE_SAMPLER_WORD1, sampler) {}
|
||||
|
||||
/* Even if we might have 8 output regs we can only pack and write 4 dwords
|
||||
* using R32G32B32A32_UINT.
|
||||
*/
|
||||
if (hw_render->tile_buffers_count > 0)
|
||||
dword_count = 4;
|
||||
else
|
||||
dword_count = MIN2(hw_render->output_regs_count, 4);
|
||||
uint64_t mem_used = 0;
|
||||
/* Setup image descriptor for reg output. */
|
||||
result =
|
||||
pvr_spm_setup_texture_state_words(device,
|
||||
dword_count,
|
||||
framebuffer_size,
|
||||
hw_render->sample_count,
|
||||
next_scratch_buffer_addr,
|
||||
&mem_ptr[PVR_SPM_LOAD_DATA_REG_TEX],
|
||||
&mem_used);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_consts_buffer;
|
||||
|
||||
for (uint32_t i = 0; i < emit_count; i++) {
|
||||
uint64_t *mem_ptr_u64 = (uint64_t *)mem_ptr;
|
||||
uint64_t mem_used = 0;
|
||||
next_scratch_buffer_addr =
|
||||
PVR_DEV_ADDR_OFFSET(next_scratch_buffer_addr, mem_used);
|
||||
|
||||
assert((sizeof(struct pvr_image_descriptor) / sizeof(uint64_t)) ==
|
||||
PVR_IMAGE_DESCRIPTOR_SIZE);
|
||||
mem_ptr_u64 +=
|
||||
i * (sizeof(struct pvr_image_descriptor) / sizeof(uint64_t));
|
||||
/* Setup image descriptors for tile buffer outputs. */
|
||||
for (unsigned u = 0; u < hw_render->tile_buffers_count; ++u) {
|
||||
unsigned tile_buffer_tex_state_location =
|
||||
pvr_uscgen_spm_buffer_data(u, false);
|
||||
|
||||
result = pvr_spm_setup_texture_state_words(device,
|
||||
dword_count,
|
||||
framebuffer_size,
|
||||
hw_render->sample_count,
|
||||
next_scratch_buffer_addr,
|
||||
mem_ptr_u64,
|
||||
&mem_used);
|
||||
result = pvr_spm_setup_texture_state_words(
|
||||
device,
|
||||
dword_count,
|
||||
framebuffer_size,
|
||||
hw_render->sample_count,
|
||||
next_scratch_buffer_addr,
|
||||
&mem_ptr[tile_buffer_tex_state_location],
|
||||
&mem_used);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_consts_buffer;
|
||||
|
||||
PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_used);
|
||||
next_scratch_buffer_addr =
|
||||
PVR_DEV_ADDR_OFFSET(next_scratch_buffer_addr, mem_used);
|
||||
}
|
||||
|
||||
assert(spm_load_program_idx <
|
||||
ARRAY_SIZE(device->spm_load_state.load_program));
|
||||
load_program_state =
|
||||
&device->spm_load_state.load_program[spm_load_program_idx];
|
||||
|
||||
|
|
@ -1032,7 +1052,7 @@ pvr_spm_init_bgobj_state(struct pvr_device *device,
|
|||
device,
|
||||
load_program_state->pds_texture_program_data_size,
|
||||
spm_bgobj_state->consts_buffer,
|
||||
info->const_shared_regs,
|
||||
consts_buffer_size,
|
||||
&pds_texture_data_upload);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_consts_buffer;
|
||||
|
|
@ -1065,7 +1085,7 @@ pvr_spm_init_bgobj_state(struct pvr_device *device,
|
|||
value) {
|
||||
/* clang-format on */
|
||||
value.usc_sharedsize =
|
||||
DIV_ROUND_UP(info->const_shared_regs,
|
||||
DIV_ROUND_UP(consts_buffer_size,
|
||||
ROGUE_CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE);
|
||||
value.pds_texturestatesize = DIV_ROUND_UP(
|
||||
pds_texture_data_upload.data_size,
|
||||
|
|
@ -1089,5 +1109,3 @@ void pvr_spm_finish_bgobj_state(struct pvr_device *device,
|
|||
pvr_bo_suballoc_free(spm_bgobj_state->pds_texture_data_upload);
|
||||
pvr_bo_free(device, spm_bgobj_state->consts_buffer);
|
||||
}
|
||||
|
||||
#undef PVR_DEV_ADDR_ADVANCE
|
||||
|
|
|
|||
|
|
@ -115,8 +115,8 @@ VkResult
|
|||
pvr_spm_init_eot_state(struct pvr_device *device,
|
||||
struct pvr_spm_eot_state *spm_eot_state,
|
||||
const struct pvr_framebuffer *framebuffer,
|
||||
const struct pvr_renderpass_hwsetup_render *hw_render,
|
||||
uint32_t *emit_count_out);
|
||||
const struct pvr_renderpass_hwsetup_render *hw_render);
|
||||
|
||||
void pvr_spm_finish_eot_state(struct pvr_device *device,
|
||||
struct pvr_spm_eot_state *spm_eot_state);
|
||||
|
||||
|
|
@ -124,8 +124,8 @@ VkResult
|
|||
pvr_spm_init_bgobj_state(struct pvr_device *device,
|
||||
struct pvr_spm_bgobj_state *spm_bgobj_state,
|
||||
const struct pvr_framebuffer *framebuffer,
|
||||
const struct pvr_renderpass_hwsetup_render *hw_render,
|
||||
uint32_t emit_count);
|
||||
const struct pvr_renderpass_hwsetup_render *hw_render);
|
||||
|
||||
void pvr_spm_finish_bgobj_state(struct pvr_device *device,
|
||||
struct pvr_spm_bgobj_state *spm_bgobj_state);
|
||||
|
||||
|
|
|
|||
|
|
@ -1319,3 +1319,132 @@ pvr_usc_zero_init_wg_mem(pco_ctx *ctx, unsigned start, unsigned count)
|
|||
|
||||
return build_shader(ctx, b.shader, &data);
|
||||
}
|
||||
|
||||
pco_shader *pvr_uscgen_spm_load(pco_ctx *ctx, struct pvr_spm_load_props *props)
|
||||
{
|
||||
pco_data data = { 0 };
|
||||
|
||||
nir_builder b = nir_builder_init_simple_shader(
|
||||
MESA_SHADER_FRAGMENT,
|
||||
pco_nir_options(),
|
||||
"spm_load(%u output regs, %u tile buffers, %s)",
|
||||
props->output_reg_count,
|
||||
props->tile_buffer_count,
|
||||
props->is_multisampled ? "ms" : "non-ms");
|
||||
|
||||
b.shader->info.fs.uses_sample_shading = props->is_multisampled;
|
||||
|
||||
nir_variable *pos = nir_get_variable_with_location(b.shader,
|
||||
nir_var_shader_in,
|
||||
VARYING_SLOT_POS,
|
||||
glsl_vec4_type());
|
||||
pos->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
|
||||
|
||||
nir_def *coords = nir_channels(&b, nir_load_var(&b, pos), 0b11);
|
||||
nir_def *ms_index = props->is_multisampled ? nir_load_sample_id(&b) : NULL;
|
||||
|
||||
nir_def *smp_state = nir_load_preamble(&b,
|
||||
ROGUE_NUM_TEXSTATE_DWORDS,
|
||||
32,
|
||||
.base = PVR_SPM_LOAD_DATA_SMP);
|
||||
|
||||
/* Initialize common params. */
|
||||
pco_smp_params params = {
|
||||
.smp_state = smp_state,
|
||||
.dest_type = nir_type_uint32,
|
||||
.sampler_dim = GLSL_SAMPLER_DIM_2D,
|
||||
.coords = coords,
|
||||
.lod_replace = nir_imm_int(&b, 0),
|
||||
.ms_index = ms_index,
|
||||
};
|
||||
|
||||
nir_def *valid_mask = nir_load_savmsk_vm_pco(&b);
|
||||
nir_intrinsic_instr *smp;
|
||||
|
||||
/* Emit tile buffer sample + writes. */
|
||||
/* TODO: emit nir_store_outputs instead, needs backend to handle
|
||||
* discontiguous tile buffer locations.
|
||||
*/
|
||||
for (unsigned buffer = 0; buffer < props->tile_buffer_count; ++buffer) {
|
||||
unsigned tex_base = pvr_uscgen_spm_buffer_data(buffer, false);
|
||||
params.tex_state =
|
||||
nir_load_preamble(&b, ROGUE_NUM_TEXSTATE_DWORDS, 32, .base = tex_base);
|
||||
params.sample_components = 4;
|
||||
|
||||
smp = pco_emit_nir_smp(&b, ¶ms);
|
||||
|
||||
unsigned tile_addr_base = pvr_uscgen_spm_buffer_data(buffer, true);
|
||||
nir_def *tile_addr_lo =
|
||||
nir_load_preamble(&b, 1, 32, .base = tile_addr_base);
|
||||
nir_def *tile_addr_hi =
|
||||
nir_load_preamble(&b, 1, 32, .base = tile_addr_base + 1);
|
||||
|
||||
for (unsigned u = 0; u < params.sample_components; ++u) {
|
||||
nir_def *tiled_offset = nir_load_tiled_offset_pco(&b, .component = u);
|
||||
|
||||
nir_def *addr =
|
||||
nir_uadd64_32(&b, tile_addr_lo, tile_addr_hi, tiled_offset);
|
||||
|
||||
nir_def *data = nir_channel(&b, &smp->def, u);
|
||||
|
||||
nir_def *addr_data = nir_vec3(&b,
|
||||
nir_channel(&b, addr, 0),
|
||||
nir_channel(&b, addr, 1),
|
||||
data);
|
||||
|
||||
nir_dma_st_tiled_pco(&b, addr_data, valid_mask);
|
||||
}
|
||||
}
|
||||
|
||||
/* Emit output reg sample + write. */
|
||||
switch (props->output_reg_count) {
|
||||
case 1:
|
||||
data.fs.output_formats[FRAG_RESULT_DATA0] = PIPE_FORMAT_R32_UINT;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
data.fs.output_formats[FRAG_RESULT_DATA0] = PIPE_FORMAT_R32G32_UINT;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
data.fs.output_formats[FRAG_RESULT_DATA0] = PIPE_FORMAT_R32G32B32A32_UINT;
|
||||
break;
|
||||
|
||||
default:
|
||||
UNREACHABLE("");
|
||||
}
|
||||
|
||||
data.fs.outputs[FRAG_RESULT_DATA0] = (pco_range){
|
||||
.start = 0,
|
||||
.count = props->output_reg_count,
|
||||
};
|
||||
|
||||
nir_create_variable_with_location(b.shader,
|
||||
nir_var_shader_out,
|
||||
FRAG_RESULT_DATA0,
|
||||
glsl_uvec_type(props->output_reg_count));
|
||||
|
||||
params.tex_state = nir_load_preamble(&b,
|
||||
ROGUE_NUM_TEXSTATE_DWORDS,
|
||||
32,
|
||||
.base = PVR_SPM_LOAD_DATA_REG_TEX);
|
||||
params.sample_components = props->output_reg_count;
|
||||
|
||||
smp = pco_emit_nir_smp(&b, ¶ms);
|
||||
|
||||
for (unsigned u = 0; u < props->output_reg_count; ++u) {
|
||||
nir_store_output(&b,
|
||||
nir_channel(&b, &smp->def, u),
|
||||
nir_imm_int(&b, 0),
|
||||
.base = 0,
|
||||
.component = u,
|
||||
.src_type = nir_type_invalid | 32,
|
||||
.write_mask = 1,
|
||||
.io_semantics.location = FRAG_RESULT_DATA0,
|
||||
.io_semantics.num_slots = 1);
|
||||
}
|
||||
|
||||
nir_jump(&b, nir_jump_return);
|
||||
|
||||
return build_shader(ctx, b.shader, &data);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -99,4 +99,91 @@ pvr_uscgen_clear_attach_index(struct pvr_clear_attach_props *props)
|
|||
pco_shader *
|
||||
pvr_usc_zero_init_wg_mem(pco_ctx *ctx, unsigned start, unsigned count);
|
||||
|
||||
/* SPM load shader generation. */
|
||||
struct pvr_spm_load_props {
|
||||
unsigned output_reg_count;
|
||||
unsigned tile_buffer_count;
|
||||
bool is_multisampled;
|
||||
};
|
||||
|
||||
static inline unsigned pvr_uscgen_spm_buffer_data(unsigned buffer_index,
|
||||
bool addr)
|
||||
{
|
||||
switch (buffer_index) {
|
||||
case 0:
|
||||
return addr ? PVR_SPM_LOAD_DATA_BUF_ADDR_0 : PVR_SPM_LOAD_DATA_BUF_TEX_0;
|
||||
|
||||
case 1:
|
||||
return addr ? PVR_SPM_LOAD_DATA_BUF_ADDR_1 : PVR_SPM_LOAD_DATA_BUF_TEX_1;
|
||||
|
||||
case 2:
|
||||
return addr ? PVR_SPM_LOAD_DATA_BUF_ADDR_2 : PVR_SPM_LOAD_DATA_BUF_TEX_2;
|
||||
|
||||
case 3:
|
||||
return addr ? PVR_SPM_LOAD_DATA_BUF_ADDR_3 : PVR_SPM_LOAD_DATA_BUF_TEX_3;
|
||||
|
||||
case 4:
|
||||
return addr ? PVR_SPM_LOAD_DATA_BUF_ADDR_4 : PVR_SPM_LOAD_DATA_BUF_TEX_4;
|
||||
|
||||
case 5:
|
||||
return addr ? PVR_SPM_LOAD_DATA_BUF_ADDR_5 : PVR_SPM_LOAD_DATA_BUF_TEX_5;
|
||||
|
||||
case 6:
|
||||
return addr ? PVR_SPM_LOAD_DATA_BUF_ADDR_6 : PVR_SPM_LOAD_DATA_BUF_TEX_6;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
UNREACHABLE("");
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
pvr_uscgen_spm_load_data_size(struct pvr_spm_load_props *props)
|
||||
{
|
||||
return PVR_SPM_LOAD_DATA_BUF_TEX_0 +
|
||||
props->tile_buffer_count * (ROGUE_NUM_TEXSTATE_DWORDS +
|
||||
(sizeof(uint64_t) / sizeof(uint32_t)));
|
||||
}
|
||||
|
||||
pco_shader *pvr_uscgen_spm_load(pco_ctx *ctx, struct pvr_spm_load_props *props);
|
||||
|
||||
#define INDEX(o_r_c, t_b_c, i_m, i) \
|
||||
if (props->output_reg_count == o_r_c && \
|
||||
props->tile_buffer_count == t_b_c && props->is_multisampled == i_m) \
|
||||
return i
|
||||
|
||||
inline static unsigned
|
||||
pvr_uscgen_spm_load_index(struct pvr_spm_load_props *props)
|
||||
{
|
||||
INDEX(1, 0, false, 0);
|
||||
INDEX(2, 0, false, 1);
|
||||
INDEX(4, 0, false, 2);
|
||||
|
||||
INDEX(4, 1, false, 3);
|
||||
INDEX(4, 2, false, 4);
|
||||
INDEX(4, 3, false, 5);
|
||||
INDEX(4, 4, false, 6);
|
||||
INDEX(4, 5, false, 7);
|
||||
INDEX(4, 6, false, 8);
|
||||
INDEX(4, 7, false, 9);
|
||||
|
||||
INDEX(1, 0, true, 10);
|
||||
INDEX(2, 0, true, 11);
|
||||
INDEX(4, 0, true, 12);
|
||||
|
||||
INDEX(4, 1, true, 13);
|
||||
INDEX(4, 2, true, 14);
|
||||
INDEX(4, 3, true, 15);
|
||||
INDEX(4, 4, true, 16);
|
||||
INDEX(4, 5, true, 17);
|
||||
INDEX(4, 6, true, 18);
|
||||
INDEX(4, 7, true, 19);
|
||||
|
||||
UNREACHABLE("Invalid SPM load shader properties.");
|
||||
}
|
||||
#undef INDEX
|
||||
|
||||
#define PVR_NUM_SPM_LOAD_SHADERS 20U
|
||||
|
||||
#endif /* PVR_USC_H */
|
||||
|
|
|
|||
|
|
@ -1,106 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2022 Imagination Technologies Ltd.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef PVR_SHADER_FACTORY_H
|
||||
#define PVR_SHADER_FACTORY_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "util/bitpack_helpers.h"
|
||||
#include "util/bitscan.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
enum pvr_spm_load_const {
|
||||
SPM_LOAD_CONST_TILE_BUFFER_1_UPPER,
|
||||
SPM_LOAD_CONST_TILE_BUFFER_1_LOWER,
|
||||
SPM_LOAD_CONST_TILE_BUFFER_2_UPPER,
|
||||
SPM_LOAD_CONST_TILE_BUFFER_2_LOWER,
|
||||
SPM_LOAD_CONST_TILE_BUFFER_3_UPPER,
|
||||
SPM_LOAD_CONST_TILE_BUFFER_3_LOWER,
|
||||
/* The following are only available if the core does not have the
|
||||
* has_eight_output_registers feature. I.e. only available if the device has
|
||||
* 4 output regs.
|
||||
*/
|
||||
SPM_LOAD_CONST_TILE_BUFFER_4_UPPER,
|
||||
SPM_LOAD_CONST_TILE_BUFFER_4_LOWER,
|
||||
SPM_LOAD_CONST_TILE_BUFFER_5_UPPER,
|
||||
SPM_LOAD_CONST_TILE_BUFFER_5_LOWER,
|
||||
SPM_LOAD_CONST_TILE_BUFFER_6_UPPER,
|
||||
SPM_LOAD_CONST_TILE_BUFFER_6_LOWER,
|
||||
SPM_LOAD_CONST_TILE_BUFFER_7_UPPER,
|
||||
SPM_LOAD_CONST_TILE_BUFFER_7_LOWER,
|
||||
};
|
||||
#define PVR_SPM_LOAD_CONST_COUNT (SPM_LOAD_CONST_TILE_BUFFER_7_LOWER + 1)
|
||||
#define PVR_SPM_LOAD_DEST_UNUSED ~0
|
||||
|
||||
#define PVR_SPM_LOAD_SAMPLES_COUNT 4U
|
||||
|
||||
#define PVR_SPM_LOAD_IN_REGS_COUNT 3 /* 1, 2, 4 */
|
||||
#define PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT 7 /* 1, 2, 3, 4, 5, 6, 7 */
|
||||
|
||||
/* If output_regs == 8
|
||||
* reg_load_programs = 4 # 1, 2, 4, 8
|
||||
* tile_buffer_load_programs = 3 # 1, 2, 3
|
||||
* else #output_regs == 4
|
||||
* reg_load_programs = 3 # 1, 2, 4
|
||||
* tile_buffer_load_programs = 7 # 1, 2, 3, 4, 5, 6, 7
|
||||
*
|
||||
* See PVR_SPM_LOAD_IN_BUFFERS_COUNT for where the amount of
|
||||
* tile_buffer_load_programs comes from.
|
||||
*
|
||||
* Tot = sample_count * (reg_load_programs + tile_buffer_load_programs)
|
||||
*/
|
||||
/* FIXME: This is currently hard coded for the am62. The Chromebook has 8
|
||||
* output regs so the count is different.
|
||||
*/
|
||||
#define PVR_SPM_LOAD_PROGRAM_COUNT \
|
||||
(PVR_SPM_LOAD_SAMPLES_COUNT * \
|
||||
(PVR_SPM_LOAD_IN_REGS_COUNT + PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT))
|
||||
|
||||
static inline uint32_t pvr_get_spm_load_program_index(uint32_t sample_count,
|
||||
uint32_t num_tile_buffers,
|
||||
uint32_t num_output_regs)
|
||||
{
|
||||
uint32_t idx;
|
||||
|
||||
assert(util_is_power_of_two_nonzero(sample_count));
|
||||
idx = util_logbase2(sample_count) *
|
||||
(PVR_SPM_LOAD_IN_REGS_COUNT + PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT);
|
||||
|
||||
assert((num_tile_buffers > 0) ^ (num_output_regs > 0));
|
||||
|
||||
if (num_output_regs > 0) {
|
||||
assert(util_is_power_of_two_nonzero(num_output_regs));
|
||||
assert(util_logbase2(num_output_regs) < PVR_SPM_LOAD_IN_REGS_COUNT);
|
||||
idx += util_logbase2(num_output_regs);
|
||||
} else {
|
||||
assert(num_tile_buffers <= PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT);
|
||||
idx += PVR_SPM_LOAD_IN_REGS_COUNT + num_tile_buffers - 1;
|
||||
}
|
||||
|
||||
assert(idx < PVR_SPM_LOAD_PROGRAM_COUNT);
|
||||
return idx;
|
||||
}
|
||||
|
||||
#endif /* PVR_SHADER_FACTORY_H */
|
||||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue