diff --git a/src/imagination/common/pvr_limits.h b/src/imagination/common/pvr_limits.h index 5c5f3f3ce98..2226b48e2b5 100644 --- a/src/imagination/common/pvr_limits.h +++ b/src/imagination/common/pvr_limits.h @@ -37,7 +37,7 @@ #define PVR_MAX_VIEWPORTS 1U #define PVR_MAX_NEG_OFFSCREEN_OFFSET 4096U -#define PVR_MAX_PUSH_CONSTANTS_SIZE 256U +#define PVR_MAX_PUSH_CONSTANTS_SIZE 128U #define PVR_POINT_SIZE_RANGE_MIN 1.0f #define PVR_POINT_SIZE_RANGE_MAX 511.0f diff --git a/src/imagination/pco/pco_data.h b/src/imagination/pco/pco_data.h index b89eb146180..e4b6d991706 100644 --- a/src/imagination/pco/pco_data.h +++ b/src/imagination/pco/pco_data.h @@ -113,6 +113,13 @@ typedef struct _pco_descriptor_set_data { bool used; /** Whether the descriptor set is used by the shader. */ } pco_descriptor_set_data; +/** PCO push constant data. */ +typedef struct _pco_push_const_data { + pco_range range; /** Push constant range. */ + + unsigned used; +} pco_push_const_data; + /** PCO common data. */ typedef struct _pco_common_data { /** System value mappings. */ @@ -121,6 +128,9 @@ typedef struct _pco_common_data { /** Descriptor set data. */ pco_descriptor_set_data desc_sets[PVR_MAX_DESCRIPTOR_SETS]; + /** Push constant data. */ + pco_push_const_data push_consts; + unsigned temps; /** Number of allocated temp registers. */ unsigned vtxins; /** Number of allocated vertex input registers. */ unsigned interns; /** Number of allocated internal registers. */ diff --git a/src/imagination/pco/pco_nir.c b/src/imagination/pco/pco_nir.c index 4f57c86829e..2760d942760 100644 --- a/src/imagination/pco/pco_nir.c +++ b/src/imagination/pco/pco_nir.c @@ -23,6 +23,7 @@ static const struct spirv_to_nir_options spirv_options = { .ubo_addr_format = nir_address_format_vec2_index_32bit_offset, .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset, + .push_const_addr_format = nir_address_format_32bit_offset, .min_ubo_alignment = PVR_UNIFORM_BUFFER_OFFSET_ALIGNMENT, .min_ssbo_alignment = PVR_STORAGE_BUFFER_OFFSET_ALIGNMENT, @@ -249,6 +250,12 @@ void pco_lower_nir(pco_ctx *ctx, nir_shader *nir, pco_data *data) nir_var_mem_ubo | nir_var_mem_ssbo, nir_address_format_vec2_index_32bit_offset); + NIR_PASS(_, + nir, + nir_lower_explicit_io, + nir_var_mem_push_const, + spirv_options.push_const_addr_format); + NIR_PASS(_, nir, pco_nir_lower_vk, &data->common); NIR_PASS(_, diff --git a/src/imagination/pco/pco_trans_nir.c b/src/imagination/pco/pco_trans_nir.c index 129db439fa5..5d0733293d6 100644 --- a/src/imagination/pco/pco_trans_nir.c +++ b/src/imagination/pco/pco_trans_nir.c @@ -457,6 +457,42 @@ static unsigned fetch_resource_base_reg_packed(const pco_common_data *common, return fetch_resource_base_reg(common, desc_set, binding, elem, is_img_smp); } + +static pco_instr *trans_load_push_constant(trans_ctx *tctx, + nir_intrinsic_instr *intr, + pco_ref dest, + pco_ref src) +{ + const pco_common_data *common = &tctx->shader->data.common; + + unsigned chans = pco_ref_get_chans(dest); + ASSERTED unsigned bits = pco_ref_get_bits(dest); + assert(bits == 32); + + assert(common->push_consts.range.count > 0); + + if (nir_src_is_const(intr->src[0])) { + unsigned offset = nir_src_as_uint(intr->src[0]); + assert(offset < common->push_consts.range.count); + + unsigned reg_index = common->push_consts.range.start + offset; + + src = pco_ref_hwreg_vec(reg_index, PCO_REG_CLASS_SHARED, chans); + return pco_mov(&tctx->b, dest, src, .rpt = chans); + } + + /* Use the dynamic offset to set up the index register. */ + pco_ref idx_reg = pco_ref_hwreg_idx(0, 0, PCO_REG_CLASS_INDEX); + pco_mov(&tctx->b, idx_reg, src); + + pco_ref idx_src = pco_ref_hwreg_idx_vec(0, + common->push_consts.range.start, + PCO_REG_CLASS_SHARED, + chans); + + return pco_mov(&tctx->b, dest, idx_src, .rpt = chans); +} + static pco_instr *trans_load_buffer(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest, @@ -889,6 +925,10 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr) UNREACHABLE("Unsupported stage for \"nir_intrinsic_store_output\"."); break; + case nir_intrinsic_load_push_constant: + instr = trans_load_push_constant(tctx, intr, dest, src[0]); + break; + case nir_intrinsic_load_ubo: case nir_intrinsic_load_ssbo: instr = trans_load_buffer(tctx, intr, dest, src[1]); diff --git a/src/imagination/vulkan/pds/pvr_pipeline_pds.c b/src/imagination/vulkan/pds/pvr_pipeline_pds.c index a1ffd3a0dae..34e8c0bd0cd 100644 --- a/src/imagination/vulkan/pds/pvr_pipeline_pds.c +++ b/src/imagination/vulkan/pds/pvr_pipeline_pds.c @@ -1517,7 +1517,12 @@ void pvr_pds_generate_descriptor_upload_program( pvr_init_pds_const_map_entry_write_state(info, &entry_write_state); - assert(!input_program->buffer_count); + /* 1 DOUTD per compile time buffer: */ + for (unsigned int index = 0; index < input_program->buffer_count; index++) { + num_consts32++; + num_consts64++; + total_dma_count++; + } /* DOUTU for the secondary update program requires a 64-bit constant. */ if (input_program->secondary_program_present) @@ -1561,6 +1566,42 @@ void pvr_pds_generate_descriptor_upload_program( next_const32++; } + for (unsigned int index = 0; index < input_program->buffer_count; index++) { + struct pvr_pds_buffer *buffer = &input_program->buffers[index]; + + bool last_dma = (++running_dma_count == total_dma_count); + bool halt = last_dma && !input_program->secondary_program_present; + + switch (buffer->type) { + case PVR_BUFFER_TYPE_PUSH_CONSTS: { + struct pvr_const_map_entry_special_buffer *special_buffer_entry; + + special_buffer_entry = + pvr_prepare_next_pds_const_map_entry(&entry_write_state, + sizeof(*special_buffer_entry)); + special_buffer_entry->type = + PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER; + special_buffer_entry->buffer_type = buffer->type; + break; + } + } + + entry_write_state.entry->const_offset = next_const64 * 2; + + PVR_PDS_MODE_TOGGLE(code_section, + instruction, + pvr_encode_burst_cs(&entry_write_state, + last_dma, + halt, + next_const32, + next_const64, + buffer->size_in_dwords, + buffer->destination)); + + next_const64++; + next_const32++; + } + if (total_dma_count != running_dma_count) fprintf(stderr, "Mismatch in DMA count\n"); diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index e6c0e52b185..43a90b4325a 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -2690,28 +2690,43 @@ void pvr_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, state->dirty.index_buffer_binding = true; } -void pvr_CmdPushConstants(VkCommandBuffer commandBuffer, - VkPipelineLayout layout, - VkShaderStageFlags stageFlags, - uint32_t offset, - uint32_t size, - const void *pValues) +static void update_push_constants(struct pvr_push_constants *push_consts, + uint32_t offset, + uint32_t size, + const void *data) { -#if MESA_DEBUG - const uint64_t ending = (uint64_t)offset + (uint64_t)size; -#endif + memcpy(&push_consts->data[offset], data, size); + push_consts->bytes_updated = MAX2(push_consts->bytes_updated, offset + size); + push_consts->dirty = true; +} +void pvr_CmdPushConstants2KHR(VkCommandBuffer commandBuffer, + const VkPushConstantsInfoKHR *pPushConstantsInfo) +{ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; - PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); + if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_VERTEX_BIT) { + update_push_constants( + &state->push_consts[PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY], + pPushConstantsInfo->offset, + pPushConstantsInfo->size, + pPushConstantsInfo->pValues); + } - pvr_assert(ending <= PVR_MAX_PUSH_CONSTANTS_SIZE); + if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_FRAGMENT_BIT) { + update_push_constants(&state->push_consts[PVR_STAGE_ALLOCATION_FRAGMENT], + pPushConstantsInfo->offset, + pPushConstantsInfo->size, + pPushConstantsInfo->pValues); + } - memcpy(&state->push_constants.data[offset], pValues, size); - - state->push_constants.dirty_stages |= stageFlags; - state->push_constants.uploaded = false; + if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) { + update_push_constants(&state->push_consts[PVR_STAGE_ALLOCATION_COMPUTE], + pPushConstantsInfo->offset, + pPushConstantsInfo->size, + pPushConstantsInfo->pValues); + } } static VkResult @@ -3505,6 +3520,9 @@ pvr_setup_vertex_buffers(struct pvr_cmd_buffer *cmd_buffer, return VK_SUCCESS; } +static VkResult pvr_cmd_upload_push_consts(struct pvr_cmd_buffer *cmd_buffer, + enum pvr_stage_allocation stage); + static VkResult pvr_setup_descriptor_mappings( struct pvr_cmd_buffer *const cmd_buffer, enum pvr_stage_allocation stage, @@ -3591,6 +3609,45 @@ static VkResult pvr_setup_descriptor_mappings( break; } + case PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER: { + const struct pvr_const_map_entry_special_buffer *special_buff_entry = + (struct pvr_const_map_entry_special_buffer *)entries; + + switch (special_buff_entry->buffer_type) { + case PVR_BUFFER_TYPE_PUSH_CONSTS: { + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + + /* Handle running with undefined push constants. */ + if (!state->push_consts[stage].dev_addr.addr) { + state->push_consts[stage].dirty = true; + assert(!state->push_consts[stage].bytes_updated); + state->push_consts[stage].bytes_updated = + sizeof(state->push_consts[stage].data); + + result = pvr_cmd_upload_push_consts(cmd_buffer, stage); + + /* Reset. */ + state->push_consts[stage].bytes_updated = 0; + + if (result != VK_SUCCESS) + return result; + } + + PVR_WRITE(qword_buffer, + state->push_consts[stage].dev_addr.addr, + special_buff_entry->const_offset, + pds_info->data_size_in_dwords); + break; + } + + default: + UNREACHABLE("Unsupported special buffer type."); + } + + entries += sizeof(*special_buff_entry); + break; + } + default: UNREACHABLE("Unsupported map entry type."); } @@ -3921,45 +3978,26 @@ static void pvr_compute_update_kernel( pvr_compute_generate_control_stream(csb, sub_cmd, &info); } -static VkResult pvr_cmd_upload_push_consts(struct pvr_cmd_buffer *cmd_buffer) +static VkResult pvr_cmd_upload_push_consts(struct pvr_cmd_buffer *cmd_buffer, + enum pvr_stage_allocation stage) { struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + struct pvr_push_constants *push_consts = &state->push_consts[stage]; struct pvr_suballoc_bo *suballoc_bo; VkResult result; - /* TODO: Here are some possible optimizations/things to consider: - * - * - Currently we upload maxPushConstantsSize. The application might only - * be using a portion of that so we might end up with unused memory. - * Should we be smarter about this. If we intend to upload the push - * consts into shareds, we definitely want to do avoid reserving unused - * regs. - * - * - For now we have to upload to a new buffer each time since the shaders - * access the push constants from memory. If we were to reuse the same - * buffer we might update the contents out of sync with job submission - * and the shaders will see the updated contents while the command - * buffer was still being recorded and not yet submitted. - * If we were to upload the push constants directly to shared regs we - * could reuse the same buffer (avoiding extra allocation overhead) - * since the contents will be DMAed only on job submission when the - * control stream is processed and the PDS program is executed. This - * approach would also allow us to avoid regenerating the PDS data - * section in some cases since the buffer address will be constants. - */ - - if (cmd_buffer->state.push_constants.uploaded) + if (!push_consts->dirty) return VK_SUCCESS; result = pvr_cmd_buffer_upload_general(cmd_buffer, - state->push_constants.data, - sizeof(state->push_constants.data), + push_consts->data, + push_consts->bytes_updated, &suballoc_bo); if (result != VK_SUCCESS) return result; - cmd_buffer->state.push_constants.dev_addr = suballoc_bo->dev_addr; - cmd_buffer->state.push_constants.uploaded = true; + push_consts->dev_addr = suballoc_bo->dev_addr; + push_consts->dirty = false; return VK_SUCCESS; } @@ -3983,15 +4021,14 @@ static void pvr_cmd_dispatch( sub_cmd->uses_atomic_ops |= cs_data->common.uses.atomics; sub_cmd->uses_barrier |= cs_data->common.uses.barriers; - if (state->push_constants.dirty_stages & VK_SHADER_STAGE_COMPUTE_BIT) { - result = pvr_cmd_upload_push_consts(cmd_buffer); + if (state->push_consts[PVR_STAGE_ALLOCATION_COMPUTE].dirty) { + result = + pvr_cmd_upload_push_consts(cmd_buffer, PVR_STAGE_ALLOCATION_COMPUTE); if (result != VK_SUCCESS) return; /* Regenerate the PDS program to use the new push consts buffer. */ state->dirty.compute_desc_dirty = true; - - state->push_constants.dirty_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT; } if (state->dirty.compute_desc_dirty || @@ -5279,7 +5316,7 @@ pvr_ppp_state_update_required(const struct pvr_cmd_buffer *cmd_buffer) header->pres_varying_word2 || header->pres_stream_out_program || state->dirty.fragment_descriptors || state->dirty.vis_test || state->dirty.gfx_pipeline_binding || state->dirty.isp_userpass || - state->push_constants.dirty_stages & VK_SHADER_STAGE_FRAGMENT_BIT || + state->push_consts[PVR_STAGE_ALLOCATION_FRAGMENT].dirty || BITSET_TEST(dynamic_dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) || BITSET_TEST(dynamic_dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) || BITSET_TEST(dynamic_dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) || @@ -5658,15 +5695,23 @@ static VkResult pvr_validate_draw_state(struct pvr_cmd_buffer *cmd_buffer) pvr_setup_vertex_buffers(cmd_buffer, gfx_pipeline); } - if (state->push_constants.dirty_stages & VK_SHADER_STAGE_ALL_GRAPHICS) { - result = pvr_cmd_upload_push_consts(cmd_buffer); - if (result != VK_SUCCESS) - return result; - } - state->dirty.vertex_descriptors = state->dirty.gfx_pipeline_binding; state->dirty.fragment_descriptors = state->dirty.vertex_descriptors; + if (state->push_consts[PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY].dirty) { + result = pvr_cmd_upload_push_consts(cmd_buffer, + PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY); + + state->dirty.vertex_descriptors = true; + } + + if (state->push_consts[PVR_STAGE_ALLOCATION_FRAGMENT].dirty) { + result = + pvr_cmd_upload_push_consts(cmd_buffer, PVR_STAGE_ALLOCATION_FRAGMENT); + + state->dirty.fragment_descriptors = true; + } + /* Account for dirty descriptor set. */ /* TODO: It could be the case that there are no descriptors for a specific * stage, or that the update descriptors aren't active for a particular @@ -5679,12 +5724,6 @@ static VkResult pvr_validate_draw_state(struct pvr_cmd_buffer *cmd_buffer) if (BITSET_TEST(dynamic_state->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) state->dirty.fragment_descriptors = true; - state->dirty.vertex_descriptors |= - state->push_constants.dirty_stages & - (VK_SHADER_STAGE_ALL_GRAPHICS & ~VK_SHADER_STAGE_FRAGMENT_BIT); - state->dirty.fragment_descriptors |= state->push_constants.dirty_stages & - VK_SHADER_STAGE_FRAGMENT_BIT; - if (state->dirty.fragment_descriptors) { result = pvr_setup_descriptor_mappings( cmd_buffer, @@ -5730,8 +5769,6 @@ static VkResult pvr_validate_draw_state(struct pvr_cmd_buffer *cmd_buffer) state->dirty.vertex_bindings = false; state->dirty.vis_test = false; - state->push_constants.dirty_stages &= ~VK_SHADER_STAGE_ALL_GRAPHICS; - return VK_SUCCESS; } diff --git a/src/imagination/vulkan/pvr_pipeline.c b/src/imagination/vulkan/pvr_pipeline.c index a5d8e23aaf5..f531367934c 100644 --- a/src/imagination/vulkan/pvr_pipeline.c +++ b/src/imagination/vulkan/pvr_pipeline.c @@ -580,6 +580,14 @@ static VkResult pvr_pds_descriptor_program_create_and_upload( goto err_free_static_consts; } + if (data->common.push_consts.range.count > 0) { + program.buffers[program.buffer_count++] = (struct pvr_pds_buffer){ + .type = PVR_BUFFER_TYPE_PUSH_CONSTS, + .size_in_dwords = data->common.push_consts.range.count, + .destination = data->common.push_consts.range.start, + }; + } + pds_info->entries_size_in_bytes = const_entries_size_in_bytes; pvr_pds_generate_descriptor_upload_program(&program, NULL, pds_info); @@ -2003,6 +2011,32 @@ static void pvr_setup_descriptors(pco_data *data, }; } } + + if (data->common.push_consts.used > 0) { + unsigned count = data->common.push_consts.used; + + if (count == ~0U) { + count = 0; + for (unsigned u = 0; u < layout->push_range_count; ++u) { + VkPushConstantRange *range = &layout->push_ranges[u]; + if (!(mesa_to_vk_shader_stage(stage) & range->stageFlags)) + continue; + + count = MAX2(count, range->offset + range->size); + } + + assert(!(count % 4)); + count = count / 4; + } + + data->common.push_consts.range = (pco_range){ + .start = data->common.shareds, + .count = count, + }; + + data->common.shareds += count; + } + assert(data->common.shareds < 256); } @@ -2098,6 +2132,9 @@ static void pvr_postprocess_shader_data(pco_data *data, pvr_setup_descriptors(data, nir, layout); /* TODO: common things, like large constants being put into shareds. */ + + assert(data->common.shareds < 256); + assert(data->common.coeffs < 256); } /* Compiles and uploads shaders and PDS programs. */ diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index 6c99ccc615b..e6dfc384993 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -690,6 +690,13 @@ struct pvr_cmd_buffer_draw_state { bool draw_indexed; }; +struct pvr_push_constants { + uint8_t data[PVR_MAX_PUSH_CONSTANTS_SIZE]; + unsigned bytes_updated; + pvr_dev_addr_t dev_addr; + bool dirty; +}; + struct pvr_cmd_buffer_state { /* Pipeline binding. */ const struct pvr_graphics_pipeline *gfx_pipeline; @@ -712,17 +719,6 @@ struct pvr_cmd_buffer_state { VkIndexType type; } index_buffer_binding; - struct { - uint8_t data[PVR_MAX_PUSH_CONSTANTS_SIZE]; - VkShaderStageFlags dirty_stages; - /* Indicates if the whole push constants buffer was uploaded. This avoids - * having to upload the same stuff twice when the push constant range - * covers both gfx and compute. - */ - bool uploaded; - pvr_dev_addr_t dev_addr; - } push_constants; - /* Array size of barriers_needed is based on number of sync pipeline * stages. */ @@ -731,6 +727,8 @@ struct pvr_cmd_buffer_state { struct pvr_descriptor_state gfx_desc_state; struct pvr_descriptor_state compute_desc_state; + struct pvr_push_constants push_consts[PVR_STAGE_ALLOCATION_COUNT]; + VkFormat depth_format; struct {