diff --git a/src/imagination/vulkan/pds/pvr_pds.c b/src/imagination/vulkan/pds/pvr_pds.c index 9a16fb5a3cd..eefccdaa69b 100644 --- a/src/imagination/vulkan/pds/pvr_pds.c +++ b/src/imagination/vulkan/pds/pvr_pds.c @@ -4151,6 +4151,11 @@ uint32_t *pvr_pds_coefficient_loading( /* Copy the USC task control words to constants. */ constant = pvr_pds_get_constants(&next_constant, 2, &data_size); + if (BITSET_TEST(program->flat_iter_mask, iterator)) + program->dout_src_offsets[iterator] = constant; + else + program->dout_src_offsets[iterator] = ~0u; + /* Write the first iterator. */ iterator_word = (uint64_t)program->FPU_iterators[iterator] diff --git a/src/imagination/vulkan/pds/pvr_pds.h b/src/imagination/vulkan/pds/pvr_pds.h index e2610cef733..874418fcced 100644 --- a/src/imagination/vulkan/pds/pvr_pds.h +++ b/src/imagination/vulkan/pds/pvr_pds.h @@ -29,6 +29,7 @@ #include "pvr_device_info.h" #include "pvr_limits.h" #include "pds/pvr_rogue_pds_defs.h" +#include "util/bitset.h" #include "util/macros.h" #ifdef __cplusplus @@ -268,6 +269,9 @@ struct pvr_pds_coeff_loading_program { uint32_t FPU_iterators[PVR_MAXIMUM_ITERATIONS]; uint32_t destination[PVR_MAXIMUM_ITERATIONS]; + BITSET_DECLARE(flat_iter_mask, PVR_MAXIMUM_ITERATIONS); + uint32_t dout_src_offsets[PVR_MAXIMUM_ITERATIONS]; + uint32_t data_size; uint32_t code_size; diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index 47ae33ef4b8..7d7bbc9d7bd 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -4925,7 +4925,58 @@ pvr_setup_triangle_merging_flag(struct pvr_cmd_buffer *const cmd_buffer, } } -static void +static VkResult +setup_pds_coeff_program(struct pvr_cmd_buffer *const cmd_buffer, + struct pvr_pds_upload *pds_coeff_program) +{ + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + const struct pvr_fragment_shader_state *const fragment_shader_state = + &state->gfx_pipeline->shader_state.fragment; + const struct vk_dynamic_graphics_state *const dynamic_state = + &cmd_buffer->vk.dynamic_graphics_state; + const VkPrimitiveTopology topology = dynamic_state->ia.primitive_topology; + const struct pvr_pds_coeff_loading_program *program = + &fragment_shader_state->pds_coeff_program; + uint32_t *pds_coeff_program_buffer = + fragment_shader_state->pds_coeff_program_buffer; + unsigned i; + + memset(pds_coeff_program, 0, sizeof(*pds_coeff_program)); + + if (!pds_coeff_program_buffer) + return VK_SUCCESS; + + BITSET_FOREACH_SET (i, program->flat_iter_mask, PVR_MAXIMUM_ITERATIONS) { + uint32_t off = program->dout_src_offsets[i]; + assert(off != ~0u); + + struct ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC douti_src; + ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_unpack(&pds_coeff_program_buffer[off], + &douti_src); + + if (topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) + douti_src.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_FLAT_VERTEX1; + else + douti_src.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_FLAT_VERTEX0; + + ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_pack(&pds_coeff_program_buffer[off], + &douti_src); + } + + /* FIXME: Figure out the define for alignment of 16. */ + return pvr_cmd_buffer_upload_pds( + cmd_buffer, + &pds_coeff_program_buffer[0], + program->data_size, + 16, + &pds_coeff_program_buffer[program->data_size], + program->code_size, + 16, + 16, + pds_coeff_program); +} + +static VkResult pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer, struct pvr_sub_cmd_gfx *const sub_cmd) { @@ -4938,8 +4989,12 @@ pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer, &fragment_shader_state->descriptor_state; const struct pvr_pipeline_stage_state *fragment_state = &fragment_shader_state->stage_state; - const struct pvr_pds_upload *pds_coeff_program = - &fragment_shader_state->pds_coeff_program; + struct pvr_pds_upload pds_coeff_program; + VkResult result; + + result = setup_pds_coeff_program(cmd_buffer, &pds_coeff_program); + if (result != VK_SUCCESS) + return result; const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice; struct ROGUE_TA_STATE_HEADER *const header = &state->emit_header; @@ -4950,7 +5005,7 @@ pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer, ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_UNIFORMSIZE_UNIT_SIZE); const uint32_t pds_varying_state_size = - DIV_ROUND_UP(pds_coeff_program->data_size, + DIV_ROUND_UP(pds_coeff_program.data_size, ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_VARYINGSIZE_UNIT_SIZE); const uint32_t usc_varying_size = @@ -5018,13 +5073,13 @@ pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer, ppp_state->pds.size_info2 |= size_info2; - if (pds_coeff_program->pvr_bo) { + if (pds_coeff_program.pvr_bo) { header->pres_pds_state_ptr1 = true; pvr_csb_pack (&ppp_state->pds.varying_base, TA_STATE_PDS_VARYINGBASE, base) { - base.addr = PVR_DEV_ADDR(pds_coeff_program->data_offset); + base.addr = PVR_DEV_ADDR(pds_coeff_program.data_offset); } } else { ppp_state->pds.varying_base = 0U; @@ -5038,6 +5093,8 @@ pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer, header->pres_pds_state_ptr0 = true; header->pres_pds_state_ptr3 = true; + + return result; } static void pvr_setup_viewport(struct pvr_cmd_buffer *const cmd_buffer) @@ -5540,7 +5597,9 @@ pvr_emit_dirty_ppp_state(struct pvr_cmd_buffer *const cmd_buffer, state->dirty.fragment_descriptors && state->gfx_pipeline->shader_state.fragment.shader_bo && !state->gfx_pipeline->fs_data.common.uses.empty) { - pvr_setup_fragment_state_pointers(cmd_buffer, sub_cmd); + result = pvr_setup_fragment_state_pointers(cmd_buffer, sub_cmd); + if (result != VK_SUCCESS) + return result; } pvr_setup_isp_depth_bias_scissor_state(cmd_buffer); diff --git a/src/imagination/vulkan/pvr_pipeline.c b/src/imagination/vulkan/pvr_pipeline.c index 35c7bb5ad55..1c0fc481ac2 100644 --- a/src/imagination/vulkan/pvr_pipeline.c +++ b/src/imagination/vulkan/pvr_pipeline.c @@ -69,12 +69,12 @@ static VkResult pvr_pds_coeff_program_create_and_upload( struct pvr_device *device, const VkAllocationCallbacks *allocator, - struct pvr_pds_coeff_loading_program *program, struct pvr_fragment_shader_state *fragment_state) { + struct pvr_pds_coeff_loading_program *program = + &fragment_state->pds_coeff_program; uint32_t staging_buffer_size; uint32_t *staging_buffer; - VkResult result; assert(program->num_fpu_iterators < PVR_MAXIMUM_ITERATIONS); @@ -82,9 +82,7 @@ static VkResult pvr_pds_coeff_program_create_and_upload( pvr_pds_coefficient_loading(program, NULL, PDS_GENERATE_SIZES); if (!program->code_size) { - fragment_state->pds_coeff_program.pvr_bo = NULL; - fragment_state->pds_coeff_program.code_size = 0; - fragment_state->pds_coeff_program.data_size = 0; + fragment_state->pds_coeff_program_buffer = NULL; fragment_state->stage_state.pds_temps_count = 0; return VK_SUCCESS; @@ -106,23 +104,7 @@ static VkResult pvr_pds_coeff_program_create_and_upload( staging_buffer, PDS_GENERATE_CODEDATA_SEGMENTS); - /* FIXME: Figure out the define for alignment of 16. */ - result = pvr_gpu_upload_pds(device, - &staging_buffer[0], - program->data_size, - 16, - &staging_buffer[program->data_size], - program->code_size, - 16, - 16, - &fragment_state->pds_coeff_program); - if (result != VK_SUCCESS) { - vk_free2(&device->vk.alloc, allocator, staging_buffer); - return result; - } - - vk_free2(&device->vk.alloc, allocator, staging_buffer); - + fragment_state->pds_coeff_program_buffer = staging_buffer; fragment_state->stage_state.pds_temps_count = program->temps_used; return VK_SUCCESS; @@ -1178,6 +1160,8 @@ pvr_graphics_pipeline_destroy(struct pvr_device *const device, { const uint32_t num_vertex_attrib_programs = ARRAY_SIZE(gfx_pipeline->shader_state.vertex.pds_attrib_programs); + struct pvr_fragment_shader_state *fragment_state = + &gfx_pipeline->shader_state.fragment; pvr_pds_descriptor_program_destroy( device, @@ -1198,8 +1182,9 @@ pvr_graphics_pipeline_destroy(struct pvr_device *const device, pvr_bo_suballoc_free( gfx_pipeline->shader_state.fragment.pds_fragment_program.pvr_bo); - pvr_bo_suballoc_free( - gfx_pipeline->shader_state.fragment.pds_coeff_program.pvr_bo); + vk_free2(&device->vk.alloc, + allocator, + fragment_state->pds_coeff_program_buffer); pvr_bo_suballoc_free(gfx_pipeline->shader_state.fragment.shader_bo); pvr_bo_suballoc_free(gfx_pipeline->shader_state.vertex.shader_bo); @@ -1384,9 +1369,13 @@ static void pvr_graphics_pipeline_setup_vertex_dma( static void pvr_graphics_pipeline_setup_fragment_coeff_program( struct pvr_graphics_pipeline *gfx_pipeline, - nir_shader *fs, - struct pvr_pds_coeff_loading_program *frag_coeff_program) + nir_shader *fs) { + struct pvr_fragment_shader_state *fragment_state = + &gfx_pipeline->shader_state.fragment; + struct pvr_pds_coeff_loading_program *frag_coeff_program = + &fragment_state->pds_coeff_program; + uint64_t varyings_used = fs->info.inputs_read & BITFIELD64_RANGE(VARYING_SLOT_VAR0, MAX_VARYING); pco_vs_data *vs_data = &gfx_pipeline->vs_data.vs; @@ -1478,17 +1467,16 @@ static void pvr_graphics_pipeline_setup_fragment_coeff_program( switch (var->data.interpolation) { case INTERP_MODE_SMOOTH: - douti_src.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD; douti_src.perspective = true; - break; + FALLTHROUGH; case INTERP_MODE_NOPERSPECTIVE: douti_src.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD; break; case INTERP_MODE_FLAT: - /* TODO: triangle fan, provoking vertex last. */ - douti_src.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_FLAT_VERTEX0; + /* Shademodel will be set up later for flat. */ + BITSET_SET(frag_coeff_program->flat_iter_mask, fpu); break; default: @@ -2294,8 +2282,6 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device, struct pvr_pds_vertex_dma vtx_dma_descriptions[PVR_MAX_VERTEX_ATTRIB_DMAS]; uint32_t vtx_dma_count = 0; - struct pvr_pds_coeff_loading_program frag_coeff_program = { 0 }; - for (mesa_shader_stage stage = 0; stage < MESA_SHADER_STAGES; ++stage) { size_t stage_index = gfx_pipeline->stage_indices[stage]; @@ -2398,8 +2384,7 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device, pvr_graphics_pipeline_setup_fragment_coeff_program( gfx_pipeline, - nir_shaders[MESA_SHADER_FRAGMENT], - &frag_coeff_program); + nir_shaders[MESA_SHADER_FRAGMENT]); result = pvr_gpu_upload_usc(device, pco_shader_binary_data(*fs), @@ -2411,7 +2396,6 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device, result = pvr_pds_coeff_program_create_and_upload(device, allocator, - &frag_coeff_program, fragment_state); if (result != VK_SUCCESS) goto err_free_fragment_bo; @@ -2485,7 +2469,9 @@ err_free_frag_descriptor_program: err_free_frag_program: pvr_bo_suballoc_free(fragment_state->pds_fragment_program.pvr_bo); err_free_coeff_program: - pvr_bo_suballoc_free(fragment_state->pds_coeff_program.pvr_bo); + vk_free2(&device->vk.alloc, + allocator, + fragment_state->pds_coeff_program_buffer); err_free_fragment_bo: pvr_bo_suballoc_free(fragment_state->shader_bo); err_free_vertex_bo: diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index 1a5d93d32ce..1d9f05a7aec 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -886,7 +886,9 @@ struct pvr_fragment_shader_state { enum ROGUE_TA_PASSTYPE pass_type; enum ROGUE_PDSINST_DOUTU_SAMPLE_RATE sample_rate; - struct pvr_pds_upload pds_coeff_program; + struct pvr_pds_coeff_loading_program pds_coeff_program; + uint32_t *pds_coeff_program_buffer; + struct pvr_pds_upload pds_fragment_program; };