diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c index bbb54a45d76..871f79fbd32 100644 --- a/src/broadcom/vulkan/v3dv_cmd_buffer.c +++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c @@ -3080,7 +3080,9 @@ job_update_ez_state(struct v3dv_job *job, */ /* If the FS writes Z, then it may update against the chosen EZ direction */ - if (pipeline->fs->current_variant->prog_data.fs->writes_z) { + struct v3dv_shader_variant *fs_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; + if (fs_variant->prog_data.fs->writes_z) { job->ez_state = VC5_EZ_DISABLED; return; } @@ -3673,7 +3675,7 @@ emit_varyings_state(struct v3dv_cmd_buffer *cmd_buffer) struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; struct v3d_fs_prog_data *prog_data_fs = - pipeline->fs->current_variant->prog_data.fs; + pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs; const uint32_t num_flags = ARRAY_SIZE(prog_data_fs->flat_shade_flags); @@ -3753,8 +3755,11 @@ update_gfx_uniform_state(struct v3dv_cmd_buffer *cmd_buffer, (pipeline->layout->shader_stages & VK_SHADER_STAGE_FRAGMENT_BIT); if (needs_fs_update) { + struct v3dv_shader_variant *fs_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; + cmd_buffer->state.uniforms.fs = - v3dv_write_uniforms(cmd_buffer, pipeline->fs); + v3dv_write_uniforms(cmd_buffer, pipeline, fs_variant); } const bool needs_vs_update = @@ -3762,11 +3767,17 @@ update_gfx_uniform_state(struct v3dv_cmd_buffer *cmd_buffer, (pipeline->layout->shader_stages & VK_SHADER_STAGE_VERTEX_BIT); if (needs_vs_update) { + struct v3dv_shader_variant *vs_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; + + struct v3dv_shader_variant *vs_bin_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]; + cmd_buffer->state.uniforms.vs = - v3dv_write_uniforms(cmd_buffer, pipeline->vs); + v3dv_write_uniforms(cmd_buffer, pipeline, vs_variant); cmd_buffer->state.uniforms.vs_bin = - v3dv_write_uniforms(cmd_buffer, pipeline->vs_bin); + v3dv_write_uniforms(cmd_buffer, pipeline, vs_bin_variant); } } @@ -3780,10 +3791,17 @@ emit_gl_shader_state(struct v3dv_cmd_buffer *cmd_buffer) struct v3dv_pipeline *pipeline = state->gfx.pipeline; assert(pipeline); + struct v3d_vs_prog_data *prog_data_vs = + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs; + struct v3d_vs_prog_data *prog_data_vs_bin = + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs; + struct v3d_fs_prog_data *prog_data_fs = + pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs; + /* Update the cache dirty flag based on the shader progs data */ - job->tmu_dirty_rcl |= pipeline->vs_bin->current_variant->prog_data.vs->base.tmu_dirty_rcl; - job->tmu_dirty_rcl |= pipeline->vs->current_variant->prog_data.vs->base.tmu_dirty_rcl; - job->tmu_dirty_rcl |= pipeline->fs->current_variant->prog_data.fs->base.tmu_dirty_rcl; + job->tmu_dirty_rcl |= prog_data_vs_bin->base.tmu_dirty_rcl; + job->tmu_dirty_rcl |= prog_data_vs->base.tmu_dirty_rcl; + job->tmu_dirty_rcl |= prog_data_fs->base.tmu_dirty_rcl; /* See GFXH-930 workaround below */ uint32_t num_elements_to_emit = MAX2(pipeline->va_count, 1); @@ -3796,6 +3814,14 @@ emit_gl_shader_state(struct v3dv_cmd_buffer *cmd_buffer) 32); v3dv_return_if_oom(cmd_buffer, NULL); + struct v3dv_shader_variant *vs_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; + struct v3dv_shader_variant *vs_bin_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]; + struct v3dv_shader_variant *fs_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; + struct v3dv_bo *assembly_bo = pipeline->shared_data->assembly_bo; + cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_RECORD, pipeline->shader_state_record, shader) { @@ -3810,11 +3836,11 @@ emit_gl_shader_state(struct v3dv_cmd_buffer *cmd_buffer) pipeline->vpm_cfg.As; shader.coordinate_shader_code_address = - v3dv_cl_address(pipeline->vs_bin->current_variant->assembly_bo, 0); + v3dv_cl_address(assembly_bo, vs_bin_variant->assembly_offset); shader.vertex_shader_code_address = - v3dv_cl_address(pipeline->vs->current_variant->assembly_bo, 0); + v3dv_cl_address(assembly_bo, vs_variant->assembly_offset); shader.fragment_shader_code_address = - v3dv_cl_address(pipeline->fs->current_variant->assembly_bo, 0); + v3dv_cl_address(assembly_bo, fs_variant->assembly_offset); shader.coordinate_shader_uniforms_address = cmd_buffer->state.uniforms.vs_bin; shader.vertex_shader_uniforms_address = cmd_buffer->state.uniforms.vs; @@ -3825,12 +3851,6 @@ emit_gl_shader_state(struct v3dv_cmd_buffer *cmd_buffer) } /* Upload vertex element attributes (SHADER_STATE_ATTRIBUTE_RECORD) */ - struct v3d_vs_prog_data *prog_data_vs = - pipeline->vs->current_variant->prog_data.vs; - - struct v3d_vs_prog_data *prog_data_vs_bin = - pipeline->vs_bin->current_variant->prog_data.vs; - bool cs_loaded_any = false; const bool cs_uses_builtins = prog_data_vs_bin->uses_iid || prog_data_vs_bin->uses_biid || @@ -5122,7 +5142,8 @@ static void cmd_buffer_emit_pre_dispatch(struct v3dv_cmd_buffer *cmd_buffer) { assert(cmd_buffer->state.compute.pipeline); - assert(cmd_buffer->state.compute.pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + assert(cmd_buffer->state.compute.pipeline->active_stages == + VK_SHADER_STAGE_COMPUTE_BIT); uint32_t *dirty = &cmd_buffer->state.dirty; *dirty &= ~(V3DV_CMD_DIRTY_COMPUTE_PIPELINE | @@ -5198,7 +5219,9 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer, uint32_t *wg_size_out) { struct v3dv_pipeline *pipeline = cmd_buffer->state.compute.pipeline; - assert(pipeline && pipeline->cs && pipeline->cs->current_variant); + assert(pipeline && pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]); + struct v3dv_shader_variant *cs_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]; struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc, sizeof(struct v3dv_job), 8, @@ -5222,7 +5245,7 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer, submit->cfg[2] |= group_count_z << V3D_CSD_CFG012_WG_COUNT_SHIFT; const struct v3d_compute_prog_data *cpd = - pipeline->cs->current_variant->prog_data.cs; + cs_variant->prog_data.cs; const uint32_t wgs_per_sg = 1; /* FIXME */ const uint32_t wg_size = cpd->local_size[0] * @@ -5230,7 +5253,7 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer, cpd->local_size[2]; submit->cfg[3] |= wgs_per_sg << V3D_CSD_CFG3_WGS_PER_SG_SHIFT; submit->cfg[3] |= ((DIV_ROUND_UP(wgs_per_sg * wg_size, 16) - 1) << - V3D_CSD_CFG3_BATCHES_PER_SG_M1_SHIFT); + V3D_CSD_CFG3_BATCHES_PER_SG_M1_SHIFT); submit->cfg[3] |= (wg_size & 0xff) << V3D_CSD_CFG3_WG_SIZE_SHIFT; if (wg_size_out) *wg_size_out = wg_size; @@ -5240,20 +5263,20 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer, (group_count_x * group_count_y * group_count_z) - 1; assert(submit->cfg[4] != ~0); - assert(pipeline->cs->current_variant && - pipeline->cs->current_variant->assembly_bo); - const struct v3dv_shader_variant *variant = pipeline->cs->current_variant; - submit->cfg[5] = variant->assembly_bo->offset; + assert(pipeline->shared_data->assembly_bo); + struct v3dv_bo *cs_assembly_bo = pipeline->shared_data->assembly_bo; + + submit->cfg[5] = cs_assembly_bo->offset + cs_variant->assembly_offset; submit->cfg[5] |= V3D_CSD_CFG5_PROPAGATE_NANS; - if (variant->prog_data.base->single_seg) + if (cs_variant->prog_data.base->single_seg) submit->cfg[5] |= V3D_CSD_CFG5_SINGLE_SEG; - if (variant->prog_data.base->threads == 4) + if (cs_variant->prog_data.base->threads == 4) submit->cfg[5] |= V3D_CSD_CFG5_THREADING; - if (variant->prog_data.cs->shared_size > 0) { + if (cs_variant->prog_data.cs->shared_size > 0) { job->csd.shared_memory = v3dv_bo_alloc(cmd_buffer->device, - variant->prog_data.cs->shared_size * wgs_per_sg, + cs_variant->prog_data.cs->shared_size * wgs_per_sg, "shared_vars", true); if (!job->csd.shared_memory) { v3dv_flag_oom(cmd_buffer, NULL); @@ -5261,10 +5284,10 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer, } } - v3dv_job_add_bo(job, variant->assembly_bo); - + v3dv_job_add_bo(job, cs_assembly_bo); struct v3dv_cl_reloc uniforms = - v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline->cs, + v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline, + cs_variant, wg_uniform_offsets_out); submit->cfg[6] = uniforms.bo->offset + uniforms.offset; diff --git a/src/broadcom/vulkan/v3dv_meta_clear.c b/src/broadcom/vulkan/v3dv_meta_clear.c index 6b0de54b391..0a38edb21e1 100644 --- a/src/broadcom/vulkan/v3dv_meta_clear.c +++ b/src/broadcom/vulkan/v3dv_meta_clear.c @@ -235,7 +235,8 @@ create_pipeline(struct v3dv_device *device, struct vk_shader_module fs_m; v3dv_shader_module_internal_init(device, &vs_m, vs_nir); - v3dv_shader_module_internal_init(device, &fs_m, fs_nir); + if (fs_nir) + v3dv_shader_module_internal_init(device, &fs_m, fs_nir); VkPipelineShaderStageCreateInfo stages[2] = { { @@ -247,7 +248,7 @@ create_pipeline(struct v3dv_device *device, { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = vk_shader_module_to_handle(&fs_m), + .module = fs_nir ? vk_shader_module_to_handle(&fs_m) : VK_NULL_HANDLE, .pName = "main", }, }; diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c index 5c6816c64b3..190e8026e68 100644 --- a/src/broadcom/vulkan/v3dv_pipeline.c +++ b/src/broadcom/vulkan/v3dv_pipeline.c @@ -83,8 +83,9 @@ void v3dv_shader_variant_destroy(struct v3dv_device *device, struct v3dv_shader_variant *variant) { - if (variant->assembly_bo) - v3dv_bo_free(device, variant->assembly_bo); + /* The assembly BO is shared by all variants in the pipeline, so it can't + * be freed here and should be freed with the pipeline + */ ralloc_free(variant->prog_data.base); vk_free(&device->vk.alloc, variant); } @@ -98,11 +99,30 @@ destroy_pipeline_stage(struct v3dv_device *device, return; ralloc_free(p_stage->nir); - if (p_stage->current_variant) - v3dv_shader_variant_unref(device, p_stage->current_variant); vk_free2(&device->vk.alloc, pAllocator, p_stage); } +static void +pipeline_free_stages(struct v3dv_device *device, + struct v3dv_pipeline *pipeline, + const VkAllocationCallbacks *pAllocator) +{ + assert(pipeline); + + /* FIXME: we can't just use a loop over mesa stage due the bin, would be + * good to find an alternative. + */ + destroy_pipeline_stage(device, pipeline->vs, pAllocator); + destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator); + destroy_pipeline_stage(device, pipeline->fs, pAllocator); + destroy_pipeline_stage(device, pipeline->cs, pAllocator); + + pipeline->vs = NULL; + pipeline->vs_bin = NULL; + pipeline->fs = NULL; + pipeline->cs = NULL; +} + static void v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline, struct v3dv_device *device, @@ -111,13 +131,12 @@ v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline, if (!pipeline) return; - /* FIXME: we can't just use a loop over mesa stage due the bin, would be - * good to find an alternative. - */ - destroy_pipeline_stage(device, pipeline->vs, pAllocator); - destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator); - destroy_pipeline_stage(device, pipeline->fs, pAllocator); - destroy_pipeline_stage(device, pipeline->cs, pAllocator); + pipeline_free_stages(device, pipeline, pAllocator); + + if (pipeline->shared_data) { + v3dv_pipeline_shared_data_unref(device, pipeline->shared_data); + pipeline->shared_data = NULL; + } if (pipeline->spill.bo) { assert(pipeline->spill.size_per_thread > 0); @@ -432,6 +451,7 @@ shader_module_compile_to_nir(struct v3dv_device *device, broadcom_shader_stage_to_gl(stage->stage), stage->entrypoint, &spirv_options, nir_options); + assert(nir); nir_validate_shader(nir, "after spirv_to_nir"); free(spec_entries); } else { @@ -565,7 +585,7 @@ lower_vulkan_resource_index(nir_builder *b, case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: { struct v3dv_descriptor_map *descriptor_map = nir_intrinsic_desc_type(instr) == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ? - &pipeline->ubo_map : &pipeline->ssbo_map; + &pipeline->shared_data->ubo_map : &pipeline->shared_data->ssbo_map; if (!const_val) unreachable("non-constant vulkan_resource_index array index"); @@ -680,9 +700,11 @@ lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx, uint8_t return_size = relaxed_precision || instr->is_shadow ? 16 : 32; + struct v3dv_descriptor_map *map = is_sampler ? + &pipeline->shared_data->sampler_map : + &pipeline->shared_data->texture_map; int desc_index = - descriptor_map_add(is_sampler ? - &pipeline->sampler_map : &pipeline->texture_map, + descriptor_map_add(map, deref->var->data.descriptor_set, deref->var->data.binding, array_index, @@ -784,7 +806,7 @@ lower_image_deref(nir_builder *b, binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); int desc_index = - descriptor_map_add(&pipeline->texture_map, + descriptor_map_add(&pipeline->shared_data->texture_map, deref->var->data.descriptor_set, deref->var->data.binding, array_index, @@ -957,8 +979,10 @@ pipeline_populate_v3d_key(struct v3d_key *key, /* The following values are default values used at pipeline create. We use * there 32 bit as default return size. */ - struct v3dv_descriptor_map *sampler_map = &p_stage->pipeline->sampler_map; - struct v3dv_descriptor_map *texture_map = &p_stage->pipeline->texture_map; + struct v3dv_descriptor_map *sampler_map = + &p_stage->pipeline->shared_data->sampler_map; + struct v3dv_descriptor_map *texture_map = + &p_stage->pipeline->shared_data->texture_map; key->num_tex_used = texture_map->num_desc; assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS); @@ -1171,7 +1195,8 @@ pipeline_populate_v3d_vs_key(struct v3d_vs_key *key, key->num_used_outputs = 0; } else { struct v3dv_pipeline *pipeline = p_stage->pipeline; - struct v3dv_shader_variant *fs_variant = pipeline->fs->current_variant; + struct v3dv_shader_variant *fs_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; key->num_used_outputs = fs_variant->prog_data.fs->num_inputs; @@ -1217,113 +1242,123 @@ pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage *src, p_stage->stage = BROADCOM_SHADER_VERTEX_BIN; p_stage->entrypoint = src->entrypoint; p_stage->module = src->module; - p_stage->nir = nir_shader_clone(NULL, src->nir); + p_stage->nir = src->nir ? nir_shader_clone(NULL, src->nir) : NULL; p_stage->spec_info = src->spec_info; memcpy(p_stage->shader_sha1, src->shader_sha1, 20); return p_stage; } -/* FIXME: right now this just asks for an bo for the exact size of the qpu - * assembly. It would be good to be able to re-use bos to avoid bo - * fragmentation. This could be tricky though, as right now we are uploading - * the assembly from two paths, when compiling a shader, or when deserializing - * from the pipeline cache. This also means that the same variant can be - * shared by different objects. So with the current approach it is clear who - * owns the assembly bo, but if shared, who owns the shared bo? - * - * For now one-bo per-assembly would work. - * +/** * Returns false if it was not able to allocate or map the assembly bo memory. */ static bool -upload_assembly(struct v3dv_device *device, - struct v3dv_shader_variant *variant, - broadcom_shader_stage stage, - const void *data, - uint32_t size) +upload_assembly(struct v3dv_pipeline *pipeline) { - const char *name = NULL; - /* We are uploading the assembly just once, so at this point we shouldn't - * have any bo - */ - assert(variant->assembly_bo == NULL); + uint32_t total_size = 0; + for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { + struct v3dv_shader_variant *variant = + pipeline->shared_data->variants[stage]; - switch (stage) { - case BROADCOM_SHADER_VERTEX: - name = "vertex_shader_assembly"; - break; - case BROADCOM_SHADER_VERTEX_BIN: - name = "vs_bin_shader_assembly"; - break; - case BROADCOM_SHADER_FRAGMENT: - name = "fragment_shader_assembly"; - break; - case BROADCOM_SHADER_COMPUTE: - name = "compute_shader_assembly"; - break; - default: - unreachable("Stage not supported\n"); - break; - }; + if (variant != NULL) + total_size += variant->qpu_insts_size; + } - struct v3dv_bo *bo = v3dv_bo_alloc(device, size, name, true); + struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size, + "pipeline shader assembly", true); if (!bo) { fprintf(stderr, "failed to allocate memory for shader\n"); return false; } - bool ok = v3dv_bo_map(device, bo, size); + bool ok = v3dv_bo_map(pipeline->device, bo, total_size); if (!ok) { fprintf(stderr, "failed to map source shader buffer\n"); return false; } - memcpy(bo->map, data, size); + uint32_t offset = 0; + for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { + struct v3dv_shader_variant *variant = + pipeline->shared_data->variants[stage]; - /* We don't unmap the assembly bo, as we would use to gather the assembly - * when serializing the variant. - */ - variant->assembly_bo = bo; + if (variant != NULL) { + variant->assembly_offset = offset; + + memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size); + offset += variant->qpu_insts_size; + + /* We dont need qpu_insts anymore. */ + free(variant->qpu_insts); + variant->qpu_insts = NULL; + } + } + assert(total_size == offset); + + pipeline->shared_data->assembly_bo = bo; return true; } static void -pipeline_hash_variant(const struct v3dv_pipeline_stage *p_stage, - struct v3d_key *key, - size_t key_size, - unsigned char *sha1_out) +pipeline_hash_graphics(const struct v3dv_pipeline *pipeline, + struct v3dv_pipeline_key *key, + unsigned char *sha1_out) { struct mesa_sha1 ctx; - struct v3dv_pipeline *pipeline = p_stage->pipeline; _mesa_sha1_init(&ctx); - if (p_stage->stage == BROADCOM_SHADER_COMPUTE) { - _mesa_sha1_update(&ctx, p_stage->shader_sha1, sizeof(p_stage->shader_sha1)); - } else { - /* We need to include both on the sha1 key as one could affect the other - * during linking (like if vertex output are constants, then the - * fragment shader would load_const intead of load_input). An - * alternative would be to use the serialized nir, but that seems like - * an overkill - */ - _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1, - sizeof(pipeline->vs->shader_sha1)); - _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1, - sizeof(pipeline->fs->shader_sha1)); - } - _mesa_sha1_update(&ctx, key, key_size); + /* We need to include both on the sha1 key as one could affect the other + * during linking (like if vertex output are constants, then the + * fragment shader would load_const intead of load_input). An + * alternative would be to use the serialized nir, but that seems like + * an overkill + */ + _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1, + sizeof(pipeline->vs->shader_sha1)); + _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1, + sizeof(pipeline->fs->shader_sha1)); + + _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key)); _mesa_sha1_final(&ctx, sha1_out); } -/* Checks that the pipeline has enough spill size to use a specific variant */ static void -pipeline_check_spill_size(struct v3dv_pipeline *pipeline, - struct v3dv_shader_variant *variant) +pipeline_hash_compute(const struct v3dv_pipeline *pipeline, + struct v3dv_pipeline_key *key, + unsigned char *sha1_out) { - if (variant->prog_data.base->spill_size > pipeline->spill.size_per_thread) { + struct mesa_sha1 ctx; + _mesa_sha1_init(&ctx); + + _mesa_sha1_update(&ctx, pipeline->cs->shader_sha1, + sizeof(pipeline->cs->shader_sha1)); + + _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key)); + + _mesa_sha1_final(&ctx, sha1_out); +} + +/* Checks that the pipeline has enough spill size to use for any of their + * variants + */ +static void +pipeline_check_spill_size(struct v3dv_pipeline *pipeline) +{ + uint32_t max_spill_size = 0; + + for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { + struct v3dv_shader_variant *variant = + pipeline->shared_data->variants[stage]; + + if (variant != NULL) { + max_spill_size = MAX2(variant->prog_data.base->spill_size, + max_spill_size); + } + } + + if (max_spill_size > 0) { struct v3dv_device *device = pipeline->device; /* The TIDX register we use for choosing the area to access @@ -1332,30 +1367,35 @@ pipeline_check_spill_size(struct v3dv_pipeline *pipeline, * means we still multiply by qpus by 4. */ const uint32_t total_spill_size = - 4 * device->devinfo.qpu_count * variant->prog_data.base->spill_size; + 4 * device->devinfo.qpu_count * max_spill_size; if (pipeline->spill.bo) { assert(pipeline->spill.size_per_thread > 0); v3dv_bo_free(device, pipeline->spill.bo); } pipeline->spill.bo = v3dv_bo_alloc(device, total_spill_size, "spill", true); - pipeline->spill.size_per_thread = variant->prog_data.base->spill_size; + pipeline->spill.size_per_thread = max_spill_size; } } -/* - * Creates a new shader_variant_create. Note that for prog_data is const, so - * it is used only to copy to their own prog_data +/** + * Creates a new shader_variant_create. Note that for prog_data is not const, + * so it is assumed that the caller will prove a pointer that the + * shader_variant will own. * - * Creation includes allocating a shader source bo, and filling it up. + * Creation doesn't include allocate a BD to store the content of qpu_insts, + * as we will try to share the same bo for several shader variants. Also note + * that qpu_ints being NULL is valid, for example if we are creating the + * shader_variants from the cache, so we can just upload the assembly of all + * the shader stages at once. */ struct v3dv_shader_variant * v3dv_shader_variant_create(struct v3dv_device *device, broadcom_shader_stage stage, - const unsigned char *variant_sha1, struct v3d_prog_data *prog_data, uint32_t prog_data_size, - const uint64_t *qpu_insts, + uint32_t assembly_offset, + uint64_t *qpu_insts, uint32_t qpu_insts_size, VkResult *out_vk_result) { @@ -1368,70 +1408,35 @@ v3dv_shader_variant_create(struct v3dv_device *device, return NULL; } - variant->ref_cnt = 1; variant->stage = stage; - memcpy(variant->variant_sha1, variant_sha1, sizeof(variant->variant_sha1)); variant->prog_data_size = prog_data_size; variant->prog_data.base = prog_data; - if (qpu_insts) { - if (!upload_assembly(device, variant, stage, - qpu_insts, qpu_insts_size)) { - ralloc_free(variant->prog_data.base); - vk_free(&device->vk.alloc, variant); - - *out_vk_result = VK_ERROR_OUT_OF_DEVICE_MEMORY; - return NULL; - } - variant->qpu_insts_size = qpu_insts_size; - } + variant->assembly_offset = assembly_offset; + variant->qpu_insts_size = qpu_insts_size; + variant->qpu_insts = qpu_insts; *out_vk_result = VK_SUCCESS; return variant; } -/* For a given key, it returns the compiled version of the shader. If it was - * already compiled, it gets it from the p_stage cache, if not it compiles is - * through the v3d compiler +/* For a given key, it returns the compiled version of the shader. * * If the method returns NULL it means that it was not able to allocate the - * resources for the variant. out_vk_result would return which OOM applies. + * resources for the variant. out_vk_result would return the corresponding OOM + * error. * - * Returns a new reference of the shader_variant to the caller. + * Returns a new reference to the shader_variant to the caller. */ -struct v3dv_shader_variant* -v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage, - struct v3dv_pipeline_cache *cache, - struct v3d_key *key, - size_t key_size, - const VkAllocationCallbacks *pAllocator, - VkResult *out_vk_result) +static struct v3dv_shader_variant* +pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage, + struct v3d_key *key, + size_t key_size, + const VkAllocationCallbacks *pAllocator, + VkResult *out_vk_result) { - /* We search on the pipeline cache if provided by the user, or the default - * one - */ - unsigned char variant_sha1[20]; - pipeline_hash_variant(p_stage, key, key_size, variant_sha1); - struct v3dv_pipeline *pipeline = p_stage->pipeline; - struct v3dv_device *device = pipeline->device; - if (cache == NULL && device->instance->default_pipeline_cache_enabled) - cache = &device->default_pipeline_cache; - - struct v3dv_shader_variant *variant = - v3dv_pipeline_cache_search_for_variant(pipeline, - cache, - variant_sha1); - - if (variant) { - pipeline_check_spill_size(pipeline, variant); - *out_vk_result = VK_SUCCESS; - return variant; - } - /* If we don't find the variant in any cache, we compile one and add the - * variant to the cache - */ struct v3dv_physical_device *physical_device = &pipeline->device->instance->physicalDevice; const struct v3d_compiler *compiler = physical_device->compiler; @@ -1448,6 +1453,8 @@ v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage, uint64_t *qpu_insts; uint32_t qpu_insts_size; struct v3d_prog_data *prog_data; + uint32_t prog_data_size = + v3d_prog_data_size(broadcom_shader_stage_to_gl(p_stage->stage)); qpu_insts = v3d_compile(compiler, key, &prog_data, @@ -1462,30 +1469,17 @@ v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage, p_stage->program_id); } - variant = v3dv_shader_variant_create(device, p_stage->stage, - variant_sha1, - prog_data, v3d_prog_data_size(p_stage->stage), - qpu_insts, qpu_insts_size, - out_vk_result); - if (qpu_insts) - free(qpu_insts); + struct v3dv_shader_variant *variant = + v3dv_shader_variant_create(pipeline->device, p_stage->stage, + prog_data, prog_data_size, + 0, /* assembly_offset, no final value yet */ + qpu_insts, qpu_insts_size, + out_vk_result); - if (variant) - pipeline_check_spill_size(pipeline, variant); - - if (*out_vk_result == VK_SUCCESS) { - struct v3dv_pipeline_cache *default_cache = - &pipeline->device->default_pipeline_cache; - - v3dv_pipeline_cache_upload_variant(pipeline, cache, variant); - - /* Ensure that the NIR shader is on the default cache, as cmd_buffer could - * need to change the current variant. - */ - if (default_cache != cache) { - v3dv_pipeline_cache_upload_variant(pipeline, default_cache, variant); - } - } + /* At this point we don't need anymore the nir shader, but we are freeing + * all the temporary p_stage structs used during the pipeline creation when + * we finish it, so let's not worry about freeing the nir here. + */ return variant; } @@ -1596,12 +1590,12 @@ pipeline_lower_nir(struct v3dv_pipeline *pipeline, * another for the case we need a 32bit return size. */ UNUSED unsigned index = - descriptor_map_add(&pipeline->sampler_map, + descriptor_map_add(&pipeline->shared_data->sampler_map, -1, -1, -1, 0, 16); assert(index == V3DV_NO_SAMPLER_16BIT_IDX); index = - descriptor_map_add(&pipeline->sampler_map, + descriptor_map_add(&pipeline->shared_data->sampler_map, -2, -2, -2, 0, 32); assert(index == V3DV_NO_SAMPLER_32BIT_IDX); @@ -1693,75 +1687,184 @@ pipeline_hash_shader(const struct vk_shader_module *module, _mesa_sha1_final(&ctx, sha1_out); } - static VkResult pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline, - struct v3dv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator) + const VkAllocationCallbacks *pAllocator, + const VkGraphicsPipelineCreateInfo *pCreateInfo) { struct v3dv_pipeline_stage *p_stage = pipeline->vs; - pipeline_lower_nir(pipeline, p_stage, pipeline->layout); /* Right now we only support pipelines with both vertex and fragment * shader. */ - assert(pipeline->fs); + assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]); - /* Make sure we do all our common lowering *before* we create the vs - * and vs_bin pipeline stages, since from that point forward we need to - * run lowerings for both of them separately, since each stage will - * own its NIR code. - */ - lower_vs_io(p_stage->nir); - - pipeline->vs_bin = pipeline_stage_create_vs_bin(pipeline->vs, pAllocator); - if (pipeline->vs_bin == NULL) - return VK_ERROR_OUT_OF_HOST_MEMORY; + assert(pipeline->vs_bin != NULL); + if (pipeline->vs_bin->nir == NULL) { + assert(pipeline->vs->nir); + pipeline->vs_bin->nir = nir_shader_clone(NULL, pipeline->vs->nir); + } + VkResult vk_result; struct v3d_vs_key key; pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs); - VkResult vk_result; - pipeline->vs->current_variant = - v3dv_get_shader_variant(pipeline->vs, cache, &key.base, sizeof(key), - pAllocator, &vk_result); + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] = + pipeline_compile_shader_variant(pipeline->vs, &key.base, sizeof(key), + pAllocator, &vk_result); if (vk_result != VK_SUCCESS) return vk_result; - pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs_bin); - pipeline->vs_bin->current_variant = - v3dv_get_shader_variant(pipeline->vs_bin, cache, &key.base, sizeof(key), - pAllocator, &vk_result); + p_stage = pipeline->vs_bin; + pipeline_populate_v3d_vs_key(&key, pCreateInfo, p_stage); + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] = + pipeline_compile_shader_variant(pipeline->vs_bin, &key.base, sizeof(key), + pAllocator, &vk_result); return vk_result; } static VkResult pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline, - struct v3dv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator) + const VkAllocationCallbacks *pAllocator, + const VkGraphicsPipelineCreateInfo *pCreateInfo) { struct v3dv_pipeline_stage *p_stage = pipeline->vs; p_stage = pipeline->fs; - pipeline_lower_nir(pipeline, p_stage, pipeline->layout); struct v3d_fs_key key; pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage, get_ucp_enable_mask(pipeline->vs)); - lower_fs_io(p_stage->nir); - VkResult vk_result; - p_stage->current_variant = - v3dv_get_shader_variant(p_stage, cache, &key.base, sizeof(key), - pAllocator, &vk_result); + pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] = + pipeline_compile_shader_variant(p_stage, &key.base, sizeof(key), + pAllocator, &vk_result); return vk_result; } +static void +pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline, + struct v3dv_pipeline_key *key, + const VkGraphicsPipelineCreateInfo *pCreateInfo) +{ + memset(key, 0, sizeof(*key)); + key->robust_buffer_access = + pipeline->device->features.robustBufferAccess; + + const VkPipelineInputAssemblyStateCreateInfo *ia_info = + pCreateInfo->pInputAssemblyState; + key->topology = vk_to_pipe_prim_type[ia_info->topology]; + + const VkPipelineColorBlendStateCreateInfo *cb_info = + pCreateInfo->pColorBlendState; + key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ? + vk_to_pipe_logicop[cb_info->logicOp] : + PIPE_LOGICOP_COPY; + + const bool raster_enabled = + !pCreateInfo->pRasterizationState->rasterizerDiscardEnable; + + /* Multisample rasterization state must be ignored if rasterization + * is disabled. + */ + const VkPipelineMultisampleStateCreateInfo *ms_info = + raster_enabled ? pCreateInfo->pMultisampleState : NULL; + if (ms_info) { + assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT || + ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT); + key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT; + + if (key->msaa) { + key->sample_coverage = + pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1; + key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable; + key->sample_alpha_to_one = ms_info->alphaToOneEnable; + } + } + + const struct v3dv_render_pass *pass = + v3dv_render_pass_from_handle(pCreateInfo->renderPass); + const struct v3dv_subpass *subpass = pipeline->subpass; + for (uint32_t i = 0; i < subpass->color_count; i++) { + const uint32_t att_idx = subpass->color_attachments[i].attachment; + if (att_idx == VK_ATTACHMENT_UNUSED) + continue; + + key->cbufs |= 1 << i; + + VkFormat fb_format = pass->attachments[att_idx].desc.format; + enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format); + + /* If logic operations are enabled then we might emit color reads and we + * need to know the color buffer format and swizzle for that + */ + if (key->logicop_func != PIPE_LOGICOP_COPY) { + key->color_fmt[i].format = fb_pipe_format; + key->color_fmt[i].swizzle = v3dv_get_format_swizzle(fb_format); + } + + const struct util_format_description *desc = + vk_format_description(fb_format); + + if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && + desc->channel[0].size == 32) { + key->f32_color_rb |= 1 << i; + } + } + + const VkPipelineVertexInputStateCreateInfo *vi_info = + pCreateInfo->pVertexInputState; + for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *desc = + &vi_info->pVertexAttributeDescriptions[i]; + assert(desc->location < MAX_VERTEX_ATTRIBS); + if (desc->format == VK_FORMAT_B8G8R8A8_UNORM) + key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location); + } + +} + +static void +pipeline_populate_compute_key(struct v3dv_pipeline *pipeline, + struct v3dv_pipeline_key *key, + const VkComputePipelineCreateInfo *pCreateInfo) +{ + /* We use the same pipeline key for graphics and compute, but we don't need + * to add a field to flag compute keys because this key is not used alone + * to search in the cache, we also use the SPIR-V or the serialized NIR for + * example, which already flags compute shaders. + */ + memset(key, 0, sizeof(*key)); + key->robust_buffer_access = + pipeline->device->features.robustBufferAccess; +} + +static struct v3dv_pipeline_shared_data * +v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20], + struct v3dv_device *device) +{ + size_t size = sizeof(struct v3dv_pipeline_shared_data); + /* We create new_entry using the device alloc. Right now shared_data is ref + * and unref by both the pipeline and the pipeline cache, so we can't + * ensure that the cache or pipeline alloc will be available on the last + * unref. + */ + struct v3dv_pipeline_shared_data *new_entry = + vk_zalloc2(&device->vk.alloc, NULL, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (new_entry == NULL) + return NULL; + + new_entry->ref_cnt = 1; + memcpy(new_entry->sha1_key, sha1_key, 20); + + return new_entry; +} + /* * It compiles a pipeline. Note that it also allocate internal object, but if * some allocations success, but other fails, the method is not freeing the @@ -1782,8 +1885,8 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, struct v3dv_physical_device *physical_device = &device->instance->physicalDevice; - /* First pass to get the the common info from the shader and the nir - * shader. We don't care of the coord shader for now. + /* First pass to get some common info from the shader, and create the + * individual pipeline_stage objects */ for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i]; @@ -1819,11 +1922,19 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, pipeline->active_stages |= sinfo->stage; - p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache); + /* We will try to get directly the compiled shader variant, so let's not + * worry about getting the nir shader for now. + */ + p_stage->nir = NULL; switch(stage) { case MESA_SHADER_VERTEX: pipeline->vs = p_stage; + pipeline->vs_bin = + pipeline_stage_create_vs_bin(pipeline->vs, pAllocator); + if (pipeline->vs_bin == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + break; case MESA_SHADER_FRAGMENT: pipeline->fs = p_stage; @@ -1864,34 +1975,86 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, pipeline->active_stages |= MESA_SHADER_FRAGMENT; } - /* Linking */ + /* Now we will try to get the variants from the pipeline cache */ + struct v3dv_pipeline_key pipeline_key; + pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo); + unsigned char pipeline_sha1[20]; + pipeline_hash_graphics(pipeline, &pipeline_key, pipeline_sha1); + + pipeline->shared_data = + v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1); + + if (pipeline->shared_data != NULL) { + assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]); + assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]); + assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]); + + goto success; + } + + pipeline->shared_data = + v3dv_pipeline_shared_data_new_empty(pipeline_sha1, pipeline->device); + /* If not, we try to get the nir shaders (from the SPIR-V shader, or from + * the pipeline cache again) and compile. + */ + if (!pipeline->vs->nir) + pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache); + if (!pipeline->fs->nir) + pipeline->fs->nir = pipeline_stage_get_nir(pipeline->fs, pipeline, cache); + + /* Linking + pipeline lowerings */ link_shaders(pipeline->vs->nir, pipeline->fs->nir); - /* Compiling to vir (or getting it from a cache); - */ + pipeline_lower_nir(pipeline, pipeline->fs, pipeline->layout); + lower_fs_io(pipeline->fs->nir); + + pipeline_lower_nir(pipeline, pipeline->vs, pipeline->layout); + lower_vs_io(pipeline->vs->nir); + + /* Compiling to vir */ VkResult vk_result; - vk_result = pipeline_compile_fragment_shader(pipeline, cache, - pCreateInfo, pAllocator); + + /* We should have got all the variants or no variants from the cache */ + assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]); + vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo); if (vk_result != VK_SUCCESS) return vk_result; - vk_result = pipeline_compile_vertex_shader(pipeline, cache, - pCreateInfo, pAllocator); + assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] && + !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]); + + vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo); if (vk_result != VK_SUCCESS) return vk_result; + if (!upload_assembly(pipeline)) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + v3dv_pipeline_cache_upload_pipeline(pipeline, cache); + + /* As we got the variants in pipeline->shared_data, after compiling we + * don't need the pipeline_stages + */ + pipeline_free_stages(device, pipeline, pAllocator); + + success: + pipeline_check_spill_size(pipeline); + /* FIXME: values below are default when non-GS is available. Would need to * provide real values if GS gets supported */ + struct v3dv_shader_variant *vs_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; + struct v3dv_shader_variant *vs_bin_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]; + pipeline->vpm_cfg_bin.As = 1; pipeline->vpm_cfg_bin.Ve = 0; - pipeline->vpm_cfg_bin.Vc = - pipeline->vs_bin->current_variant->prog_data.vs->vcm_cache_size; + pipeline->vpm_cfg_bin.Vc = vs_bin_variant->prog_data.vs->vcm_cache_size; pipeline->vpm_cfg.As = 1; pipeline->vpm_cfg.Ve = 0; - pipeline->vpm_cfg.Vc = - pipeline->vs->current_variant->prog_data.vs->vcm_cache_size; + pipeline->vpm_cfg.Vc = vs_variant->prog_data.vs->vcm_cache_size; return VK_SUCCESS; } @@ -2397,13 +2560,13 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline) cl_packet_length(GL_SHADER_STATE_RECORD)); struct v3d_fs_prog_data *prog_data_fs = - pipeline->fs->current_variant->prog_data.fs; + pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs; struct v3d_vs_prog_data *prog_data_vs = - pipeline->vs->current_variant->prog_data.vs; + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs; struct v3d_vs_prog_data *prog_data_vs_bin = - pipeline->vs_bin->current_variant->prog_data.vs; + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs; /* Note: we are not packing addresses, as we need the job (see @@ -2787,7 +2950,7 @@ pipeline_init(struct v3dv_pipeline *pipeline, pipeline->va_count = 0; struct v3d_vs_prog_data *prog_data_vs = - pipeline->vs->current_variant->prog_data.vs; + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs; for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { const VkVertexInputAttributeDescription *desc = @@ -2835,7 +2998,7 @@ graphics_pipeline_create(VkDevice _device, /* Use the default pipeline cache if none is specified */ if (cache == NULL && device->instance->default_pipeline_cache_enabled) - cache = &device->default_pipeline_cache; + cache = &device->default_pipeline_cache; pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline), VK_OBJECT_TYPE_PIPELINE); @@ -2945,24 +3108,62 @@ pipeline_compile_compute(struct v3dv_pipeline *pipeline, p_stage->spec_info, p_stage->shader_sha1); - p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache); + /* We try to get directly the variant first from the cache */ + p_stage->nir = NULL; + pipeline->cs = p_stage; pipeline->active_stages |= sinfo->stage; + + struct v3dv_pipeline_key pipeline_key; + pipeline_populate_compute_key(pipeline, &pipeline_key, info); + unsigned char pipeline_sha1[20]; + pipeline_hash_compute(pipeline, &pipeline_key, pipeline_sha1); + + pipeline->shared_data = + v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1); + + if (pipeline->shared_data != NULL) { + assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]); + goto success; + } + + pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline_sha1, + pipeline->device); + + /* If not found on cache, compile it */ + p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache); + assert(p_stage->nir); + st_nir_opts(p_stage->nir); pipeline_lower_nir(pipeline, p_stage, pipeline->layout); lower_cs_shared(p_stage->nir); - pipeline->cs = p_stage; + VkResult result = VK_SUCCESS; struct v3d_key key; memset(&key, 0, sizeof(key)); pipeline_populate_v3d_key(&key, p_stage, 0, pipeline->device->features.robustBufferAccess); + pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] = + pipeline_compile_shader_variant(p_stage, &key, sizeof(key), + alloc, &result); - VkResult result; - p_stage->current_variant = - v3dv_get_shader_variant(p_stage, cache, &key, sizeof(key), alloc, &result); - return result; + if (result != VK_SUCCESS) + return result; + + if (!upload_assembly(pipeline)) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + v3dv_pipeline_cache_upload_pipeline(pipeline, cache); + /* As we got the variants in pipeline->shared_data, after compiling we + * don't need the pipeline_stages + */ + pipeline_free_stages(device, pipeline, alloc); + + success: + pipeline_check_spill_size(pipeline); + + return VK_SUCCESS; } static VkResult @@ -2997,7 +3198,7 @@ compute_pipeline_create(VkDevice _device, /* Use the default pipeline cache if none is specified */ if (cache == NULL && device->instance->default_pipeline_cache_enabled) - cache = &device->default_pipeline_cache; + cache = &device->default_pipeline_cache; pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline), VK_OBJECT_TYPE_PIPELINE); diff --git a/src/broadcom/vulkan/v3dv_pipeline_cache.c b/src/broadcom/vulkan/v3dv_pipeline_cache.c index ab1f71639af..4200eba9fa5 100644 --- a/src/broadcom/vulkan/v3dv_pipeline_cache.c +++ b/src/broadcom/vulkan/v3dv_pipeline_cache.c @@ -58,9 +58,9 @@ cache_dump_stats(struct v3dv_pipeline_cache *cache) fprintf(stderr, " NIR cache miss count: %d\n", cache->nir_stats.miss); fprintf(stderr, " NIR cache hit count: %d\n", cache->nir_stats.hit); - fprintf(stderr, " variant cache entries: %d\n", cache->variant_stats.count); - fprintf(stderr, " variant cache miss count: %d\n", cache->variant_stats.miss); - fprintf(stderr, " variant cache hit count: %d\n", cache->variant_stats.hit); + fprintf(stderr, " cache entries: %d\n", cache->stats.count); + fprintf(stderr, " cache miss count: %d\n", cache->stats.miss); + fprintf(stderr, " cache hit count: %d\n", cache->stats.hit); } void @@ -197,59 +197,65 @@ v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache, cache->nir_stats.hit = 0; cache->nir_stats.count = 0; - cache->variant_cache = _mesa_hash_table_create(NULL, sha1_hash_func, - sha1_compare_func); - cache->variant_stats.miss = 0; - cache->variant_stats.hit = 0; - cache->variant_stats.count = 0; + cache->cache = _mesa_hash_table_create(NULL, sha1_hash_func, + sha1_compare_func); + cache->stats.miss = 0; + cache->stats.hit = 0; + cache->stats.count = 0; } else { cache->nir_cache = NULL; - cache->variant_cache = NULL; + cache->cache = NULL; } } -struct v3dv_shader_variant* -v3dv_pipeline_cache_search_for_variant(struct v3dv_pipeline *pipeline, - struct v3dv_pipeline_cache *cache, - unsigned char sha1_key[20]) +/** + * It searchs for pipeline cached data, and returns a v3dv_pipeline_shared_data with + * it, or NULL if doesn't have it cached. On the former, it will increases the + * ref_count, so caller is responsible to unref it. + */ +struct v3dv_pipeline_shared_data * +v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache, + unsigned char sha1_key[20]) { - if (!cache || !cache->variant_cache) + if (!cache || !cache->cache) return NULL; if (debug_cache) { char sha1buf[41]; _mesa_sha1_format(sha1buf, sha1_key); - fprintf(stderr, "pipeline cache %p, search variant with key %s\n", cache, sha1buf); + fprintf(stderr, "pipeline cache %p, search pipeline with key %s\n", cache, sha1buf); } pthread_mutex_lock(&cache->mutex); struct hash_entry *entry = - _mesa_hash_table_search(cache->variant_cache, sha1_key); + _mesa_hash_table_search(cache->cache, sha1_key); if (entry) { - struct v3dv_shader_variant *variant = - (struct v3dv_shader_variant *) entry->data; + struct v3dv_pipeline_shared_data *cache_entry = + (struct v3dv_pipeline_shared_data *) entry->data; + assert(cache_entry); - cache->variant_stats.hit++; + cache->stats.hit++; if (debug_cache) { - fprintf(stderr, "\tvariant cache hit: %p\n", variant); + fprintf(stderr, "\tcache hit: %p\n", cache_entry); if (dump_stats) cache_dump_stats(cache); } - if (variant) - v3dv_shader_variant_ref(variant); + + v3dv_pipeline_shared_data_ref(cache_entry); pthread_mutex_unlock(&cache->mutex); - return variant; + + return cache_entry; } - cache->variant_stats.miss++; + cache->stats.miss++; if (debug_cache) { - fprintf(stderr, "\tvariant cache miss\n"); + fprintf(stderr, "\tcache miss\n"); if (dump_stats) cache_dump_stats(cache); } @@ -259,34 +265,109 @@ v3dv_pipeline_cache_search_for_variant(struct v3dv_pipeline *pipeline, } void -v3dv_pipeline_cache_upload_variant(struct v3dv_pipeline *pipeline, - struct v3dv_pipeline_cache *cache, - struct v3dv_shader_variant *variant) +v3dv_pipeline_shared_data_destroy(struct v3dv_device *device, + struct v3dv_pipeline_shared_data *shared_data) { - if (!cache || !cache->variant_cache) + assert(shared_data->ref_cnt == 0); + + for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { + if (shared_data->variants[stage] != NULL) + v3dv_shader_variant_destroy(device, shared_data->variants[stage]); + } + + if (shared_data->assembly_bo) + v3dv_bo_free(device, shared_data->assembly_bo); + + vk_free(&device->vk.alloc, shared_data); +} + +static struct v3dv_pipeline_shared_data * +v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache *cache, + const unsigned char sha1_key[20], + struct v3dv_shader_variant **variants, + const struct v3dv_descriptor_map *ubo_map, + const struct v3dv_descriptor_map *ssbo_map, + const struct v3dv_descriptor_map *sampler_map, + const struct v3dv_descriptor_map *texture_map, + const uint64_t *total_assembly, + const uint32_t total_assembly_size) +{ + size_t size = sizeof(struct v3dv_pipeline_shared_data); + /* We create new_entry using the device alloc. Right now shared_data is ref + * and unref by both the pipeline and the pipeline cache, so we can't + * ensure that the cache or pipeline alloc will be available on the last + * unref. + */ + struct v3dv_pipeline_shared_data *new_entry = + vk_zalloc2(&cache->device->vk.alloc, NULL, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (new_entry == NULL) + return NULL; + + new_entry->ref_cnt = 1; + memcpy(new_entry->sha1_key, sha1_key, 20); + + memcpy(&new_entry->ubo_map, ubo_map, sizeof(struct v3dv_descriptor_map)); + memcpy(&new_entry->ssbo_map, ssbo_map, sizeof(struct v3dv_descriptor_map)); + memcpy(&new_entry->sampler_map, sampler_map, sizeof(struct v3dv_descriptor_map)); + memcpy(&new_entry->texture_map, texture_map, sizeof(struct v3dv_descriptor_map)); + + for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) + new_entry->variants[stage] = variants[stage]; + + struct v3dv_bo *bo = v3dv_bo_alloc(cache->device, total_assembly_size, + "pipeline shader assembly", true); + if (!bo) { + fprintf(stderr, "failed to allocate memory for shaders assembly\n"); + v3dv_pipeline_shared_data_unref(cache->device, new_entry); + return NULL; + } + + bool ok = v3dv_bo_map(cache->device, bo, total_assembly_size); + if (!ok) { + fprintf(stderr, "failed to map source shader buffer\n"); + v3dv_pipeline_shared_data_unref(cache->device, new_entry); + return NULL; + } + + memcpy(bo->map, total_assembly, total_assembly_size); + + new_entry->assembly_bo = bo; + + return new_entry; +} + +/* Uploads all the "cacheable" or shared data from the pipeline */ +void +v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline, + struct v3dv_pipeline_cache *cache) +{ + if (!cache || !cache->cache) return; - if (cache->variant_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES) + if (cache->stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES) return; pthread_mutex_lock(&cache->mutex); struct hash_entry *entry = - _mesa_hash_table_search(cache->variant_cache, variant->variant_sha1); + _mesa_hash_table_search(cache->cache, pipeline->shared_data->sha1_key); if (entry) { pthread_mutex_unlock(&cache->mutex); return; } - v3dv_shader_variant_ref(variant); - _mesa_hash_table_insert(cache->variant_cache, variant->variant_sha1, variant); - cache->variant_stats.count++; + v3dv_pipeline_shared_data_ref(pipeline->shared_data); + _mesa_hash_table_insert(cache->cache, pipeline->shared_data->sha1_key, + pipeline->shared_data); + cache->stats.count++; if (debug_cache) { char sha1buf[41]; - _mesa_sha1_format(sha1buf, variant->variant_sha1); + _mesa_sha1_format(sha1buf, pipeline->shared_data->sha1_key); - fprintf(stderr, "pipeline cache %p, new variant entry with key %s\n\t%p\n", - cache, sha1buf, variant); + fprintf(stderr, "pipeline cache %p, new cache entry with sha1 key %s:%p\n\n", + cache, sha1buf, pipeline->shared_data); if (dump_stats) cache_dump_stats(cache); } @@ -321,8 +402,6 @@ shader_variant_create_from_blob(struct v3dv_device *device, broadcom_shader_stage stage = blob_read_uint32(blob); - const unsigned char *variant_sha1 = blob_read_bytes(blob, 20); - uint32_t prog_data_size = blob_read_uint32(blob); /* FIXME: as we include the stage perhaps we can avoid prog_data_size? */ assert(prog_data_size == v3d_prog_data_size(broadcom_shader_stage_to_gl(stage))); @@ -342,10 +421,8 @@ shader_variant_create_from_blob(struct v3dv_device *device, if (blob->overrun) return NULL; + uint32_t assembly_offset = blob_read_uint32(blob); uint32_t qpu_insts_size = blob_read_uint32(blob); - const uint64_t *qpu_insts = blob_read_bytes(blob, qpu_insts_size); - if (blob->overrun) - return NULL; /* shader_variant_create expects a newly created prog_data for their own, * as it is what the v3d compiler returns. So we are also allocating one @@ -362,12 +439,53 @@ shader_variant_create_from_blob(struct v3dv_device *device, memcpy(ulist->data, ulist_data_data, ulist_data_size); return v3dv_shader_variant_create(device, stage, - variant_sha1, new_prog_data, prog_data_size, - qpu_insts, qpu_insts_size, + assembly_offset, + NULL, qpu_insts_size, &result); } +static struct v3dv_pipeline_shared_data * +v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache, + struct blob_reader *blob) +{ + const unsigned char *sha1_key = blob_read_bytes(blob, 20); + + const struct v3dv_descriptor_map *ubo_map = + blob_read_bytes(blob, sizeof(struct v3dv_descriptor_map)); + const struct v3dv_descriptor_map *ssbo_map = + blob_read_bytes(blob, sizeof(struct v3dv_descriptor_map)); + const struct v3dv_descriptor_map *sampler_map = + blob_read_bytes(blob, sizeof(struct v3dv_descriptor_map)); + const struct v3dv_descriptor_map *texture_map = + blob_read_bytes(blob, sizeof(struct v3dv_descriptor_map)); + + if (blob->overrun) + return NULL; + + uint8_t variant_count = blob_read_uint8(blob); + + struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES] = { 0 }; + + for (uint8_t count = 0; count < variant_count; count++) { + uint8_t stage = blob_read_uint8(blob); + struct v3dv_shader_variant *variant = + shader_variant_create_from_blob(cache->device, blob); + variants[stage] = variant; + } + + uint32_t total_assembly_size = blob_read_uint32(blob); + const uint64_t *total_assembly = + blob_read_bytes(blob, total_assembly_size); + + if (blob->overrun) + return NULL; + + return v3dv_pipeline_shared_data_new(cache, sha1_key, variants, + ubo_map, ssbo_map, sampler_map, texture_map, + total_assembly, total_assembly_size); +} + static void pipeline_cache_load(struct v3dv_pipeline_cache *cache, size_t size, @@ -377,7 +495,7 @@ pipeline_cache_load(struct v3dv_pipeline_cache *cache, struct v3dv_physical_device *pdevice = &device->instance->physicalDevice; struct vk_pipeline_cache_header header; - if (cache->variant_cache == NULL) + if (cache->cache == NULL || cache->nir_cache == NULL) return; struct blob_reader blob; @@ -418,17 +536,18 @@ pipeline_cache_load(struct v3dv_pipeline_cache *cache, return; for (uint32_t i = 0; i < count; i++) { - struct v3dv_shader_variant *variant = - shader_variant_create_from_blob(device, &blob); - if (!variant) + struct v3dv_pipeline_shared_data *cache_entry = + v3dv_pipeline_shared_data_create_from_blob(cache, &blob); + if (!cache_entry) break; - _mesa_hash_table_insert(cache->variant_cache, variant->variant_sha1, variant); - cache->variant_stats.count++; + + _mesa_hash_table_insert(cache->cache, cache_entry->sha1_key, cache_entry); + cache->stats.count++; } if (debug_cache) { fprintf(stderr, "pipeline cache %p, loaded %i nir shaders and " - "%i variant entries\n", cache, nir_count, count); + "%i entries\n", cache, nir_count, count); if (dump_stats) cache_dump_stats(cache); } @@ -482,15 +601,14 @@ v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache) _mesa_hash_table_destroy(cache->nir_cache, NULL); } - if (cache->variant_cache) { - hash_table_foreach(cache->variant_cache, entry) { - struct v3dv_shader_variant *variant = entry->data; - if (variant) - v3dv_shader_variant_unref(cache->device, variant); + if (cache->cache) { + hash_table_foreach(cache->cache, entry) { + struct v3dv_pipeline_shared_data *cache_entry = entry->data; + if (cache_entry) + v3dv_pipeline_shared_data_unref(cache->device, cache_entry); } - _mesa_hash_table_destroy(cache->variant_cache, NULL); - + _mesa_hash_table_destroy(cache->cache, NULL); } } @@ -518,12 +636,12 @@ v3dv_MergePipelineCaches(VkDevice device, { V3DV_FROM_HANDLE(v3dv_pipeline_cache, dst, dstCache); - if (!dst->variant_cache || !dst->nir_cache) + if (!dst->cache || !dst->nir_cache) return VK_SUCCESS; for (uint32_t i = 0; i < srcCacheCount; i++) { V3DV_FROM_HANDLE(v3dv_pipeline_cache, src, pSrcCaches[i]); - if (!src->variant_cache || !src->nir_cache) + if (!src->cache || !src->nir_cache) continue; hash_table_foreach(src->nir_cache, entry) { @@ -559,22 +677,22 @@ v3dv_MergePipelineCaches(VkDevice device, } } - hash_table_foreach(src->variant_cache, entry) { - struct v3dv_shader_variant *variant = entry->data; - assert(variant); + hash_table_foreach(src->cache, entry) { + struct v3dv_pipeline_shared_data *cache_entry = entry->data; + assert(cache_entry); - if (_mesa_hash_table_search(dst->variant_cache, variant->variant_sha1)) + if (_mesa_hash_table_search(dst->cache, cache_entry->sha1_key)) continue; - v3dv_shader_variant_ref(variant); - _mesa_hash_table_insert(dst->variant_cache, variant->variant_sha1, variant); + v3dv_pipeline_shared_data_ref(cache_entry); + _mesa_hash_table_insert(dst->cache, cache_entry->sha1_key, cache_entry); - dst->variant_stats.count++; + dst->stats.count++; if (debug_cache) { char sha1buf[41]; - _mesa_sha1_format(sha1buf, variant->variant_sha1); + _mesa_sha1_format(sha1buf, cache_entry->sha1_key); - fprintf(stderr, "pipeline cache %p, added variant entry %s " + fprintf(stderr, "pipeline cache %p, added entry %s " "from pipeline cache %p\n", dst, sha1buf, src); if (dump_stats) @@ -592,8 +710,6 @@ shader_variant_write_to_blob(const struct v3dv_shader_variant *variant, { blob_write_uint32(blob, variant->stage); - blob_write_bytes(blob, variant->variant_sha1, sizeof(variant->variant_sha1)); - blob_write_uint32(blob, variant->prog_data_size); blob_write_bytes(blob, variant->prog_data.base, variant->prog_data_size); @@ -602,13 +718,62 @@ shader_variant_write_to_blob(const struct v3dv_shader_variant *variant, blob_write_bytes(blob, ulist->contents, sizeof(enum quniform_contents) * ulist->count); blob_write_bytes(blob, ulist->data, sizeof(uint32_t) * ulist->count); + blob_write_uint32(blob, variant->assembly_offset); blob_write_uint32(blob, variant->qpu_insts_size); - assert(variant->assembly_bo->map); - blob_write_bytes(blob, variant->assembly_bo->map, variant->qpu_insts_size); return !blob->out_of_memory; } +static bool +v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry, + struct blob *blob) +{ + blob_write_bytes(blob, cache_entry->sha1_key, 20); + + blob_write_bytes(blob, &cache_entry->ubo_map, + sizeof(struct v3dv_descriptor_map)); + blob_write_bytes(blob, &cache_entry->ssbo_map, + sizeof(struct v3dv_descriptor_map)); + blob_write_bytes(blob, &cache_entry->sampler_map, + sizeof(struct v3dv_descriptor_map)); + blob_write_bytes(blob, &cache_entry->texture_map, + sizeof(struct v3dv_descriptor_map)); + + uint8_t variant_count = 0; + for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { + if (cache_entry->variants[stage] == NULL) + continue; + variant_count++; + } + + /* Right now we only support compute pipeline, or graphics pipeline with + * vertex, vertex bin, and fragment shader. + */ + assert(variant_count == 3 || + (variant_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE])); + blob_write_uint8(blob, variant_count); + + uint32_t total_assembly_size = 0; + for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { + if (cache_entry->variants[stage] == NULL) + continue; + + blob_write_uint8(blob, stage); + if (!shader_variant_write_to_blob(cache_entry->variants[stage], blob)) + return false; + + total_assembly_size += cache_entry->variants[stage]->qpu_insts_size; + } + blob_write_uint32(blob, total_assembly_size); + + assert(cache_entry->assembly_bo->map); + assert(cache_entry->assembly_bo->size > total_assembly_size); + blob_write_bytes(blob, cache_entry->assembly_bo->map, total_assembly_size); + + return !blob->out_of_memory; +} + + VkResult v3dv_GetPipelineCacheData(VkDevice _device, VkPipelineCache _cache, @@ -679,12 +844,12 @@ v3dv_GetPipelineCacheData(VkDevice _device, return VK_INCOMPLETE; } - if (cache->variant_cache) { - hash_table_foreach(cache->variant_cache, entry) { - struct v3dv_shader_variant *variant = entry->data; + if (cache->cache) { + hash_table_foreach(cache->cache, entry) { + struct v3dv_pipeline_shared_data *cache_entry = entry->data; size_t save_size = blob.size; - if (!shader_variant_write_to_blob(variant, &blob)) { + if (!v3dv_pipeline_shared_data_write_to_blob(cache_entry, &blob)) { /* If it fails reset to the previous size and bail */ blob.size = save_size; pthread_mutex_unlock(&cache->mutex); @@ -703,10 +868,10 @@ v3dv_GetPipelineCacheData(VkDevice _device, blob_finish(&blob); if (debug_cache) { - assert(count <= cache->variant_stats.count); + assert(count <= cache->stats.count); fprintf(stderr, "GetPipelineCacheData: serializing cache %p, " "%i nir shader entries " - "%i variant entries, %u DataSize\n", + "%i entries, %u DataSize\n", cache, nir_count, count, (uint32_t) *pDataSize); } diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h index b6af5a7fc46..6266f5e3f52 100644 --- a/src/broadcom/vulkan/v3dv_private.h +++ b/src/broadcom/vulkan/v3dv_private.h @@ -254,6 +254,23 @@ struct v3dv_meta_texel_buffer_copy_pipeline { uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE]; }; +struct v3dv_pipeline_key { + bool robust_buffer_access; + uint8_t topology; + uint8_t logicop_func; + bool msaa; + bool sample_coverage; + bool sample_alpha_to_coverage; + bool sample_alpha_to_one; + uint8_t cbufs; + struct { + enum pipe_format format; + const uint8_t *swizzle; + } color_fmt[V3D_MAX_DRAW_BUFFERS]; + uint8_t f32_color_rb; + uint32_t va_swap_rb_mask; +}; + struct v3dv_pipeline_cache_stats { uint32_t miss; uint32_t hit; @@ -314,8 +331,8 @@ struct v3dv_pipeline_cache { struct hash_table *nir_cache; struct v3dv_pipeline_cache_stats nir_stats; - struct hash_table *variant_cache; - struct v3dv_pipeline_cache_stats variant_stats; + struct hash_table *cache; + struct v3dv_pipeline_cache_stats stats; }; struct v3dv_device { @@ -1340,15 +1357,8 @@ vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage) } struct v3dv_shader_variant { - uint32_t ref_cnt; - broadcom_shader_stage stage; - /* key for the pipeline cache, it is p_stage shader_sha1 + v3d compiler - * sha1 - */ - unsigned char variant_sha1[20]; - union { struct v3d_prog_data *base; struct v3d_vs_prog_data *vs; @@ -1360,11 +1370,17 @@ struct v3dv_shader_variant { * serialize */ uint32_t prog_data_size; - /* FIXME: using one bo per shader. Eventually we would be interested on - * reusing the same bo for all the shaders, like a bo per v3dv_pipeline for - * shaders. + + /* The assembly for this variant will be uploaded to a BO shared with all + * other shader stages in that pipeline. This is the offset in that BO. */ - struct v3dv_bo *assembly_bo; + uint32_t assembly_offset; + + /* Note: it is really likely that qpu_insts would be NULL, as it will be + * used only temporarily, to upload it to the shared bo, as we compile the + * different stages individually. + */ + uint64_t *qpu_insts; uint32_t qpu_insts_size; }; @@ -1393,8 +1409,6 @@ struct v3dv_pipeline_stage { /** A name for this program, so you can track it in shader-db output. */ uint32_t program_id; - - struct v3dv_shader_variant*current_variant; }; /* FIXME: although the full vpm_config is not required at this point, as we @@ -1606,6 +1620,25 @@ v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key, *sampler_index = sampler; } +/* The structure represents data shared between different objects, like the + * pipeline and the pipeline cache, so we ref count it to know when it should + * be freed. + */ +struct v3dv_pipeline_shared_data { + uint32_t ref_cnt; + + unsigned char sha1_key[20]; + + struct v3dv_descriptor_map ubo_map; + struct v3dv_descriptor_map ssbo_map; + struct v3dv_descriptor_map sampler_map; + struct v3dv_descriptor_map texture_map; + + struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES]; + + struct v3dv_bo *assembly_bo; +}; + struct v3dv_pipeline { struct vk_object_base base; @@ -1668,11 +1701,7 @@ struct v3dv_pipeline { enum pipe_prim_type topology; - struct v3dv_descriptor_map ubo_map; - struct v3dv_descriptor_map ssbo_map; - - struct v3dv_descriptor_map sampler_map; - struct v3dv_descriptor_map texture_map; + struct v3dv_pipeline_shared_data *shared_data; /* FIXME: this bo is another candidate to data to be uploaded using a * resource manager, instead of a individual bo @@ -1848,9 +1877,12 @@ void v3d_store_tiled_image(void *dst, uint32_t dst_stride, const struct pipe_box *box); struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer, - struct v3dv_pipeline_stage *p_stage); + struct v3dv_pipeline *pipeline, + struct v3dv_shader_variant *variant); + struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer, - struct v3dv_pipeline_stage *p_stage, + struct v3dv_pipeline *pipeline, + struct v3dv_shader_variant *variant, uint32_t **wg_count_offsets); struct v3dv_shader_variant * @@ -1864,10 +1896,10 @@ v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage, struct v3dv_shader_variant * v3dv_shader_variant_create(struct v3dv_device *device, broadcom_shader_stage stage, - const unsigned char *variant_sha1, struct v3d_prog_data *prog_data, uint32_t prog_data_size, - const uint64_t *qpu_insts, + uint32_t assembly_offset, + uint64_t *qpu_insts, uint32_t qpu_insts_size, VkResult *out_vk_result); @@ -1876,19 +1908,23 @@ v3dv_shader_variant_destroy(struct v3dv_device *device, struct v3dv_shader_variant *variant); static inline void -v3dv_shader_variant_ref(struct v3dv_shader_variant *variant) +v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data *shared_data) { - assert(variant && variant->ref_cnt >= 1); - p_atomic_inc(&variant->ref_cnt); + assert(shared_data && shared_data->ref_cnt >= 1); + p_atomic_inc(&shared_data->ref_cnt); } +void +v3dv_pipeline_shared_data_destroy(struct v3dv_device *device, + struct v3dv_pipeline_shared_data *shared_data); + static inline void -v3dv_shader_variant_unref(struct v3dv_device *device, - struct v3dv_shader_variant *variant) +v3dv_pipeline_shared_data_unref(struct v3dv_device *device, + struct v3dv_pipeline_shared_data *shared_data) { - assert(variant && variant->ref_cnt >= 1); - if (p_atomic_dec_zero(&variant->ref_cnt)) - v3dv_shader_variant_destroy(device, variant); + assert(shared_data && shared_data->ref_cnt >= 1); + if (p_atomic_dec_zero(&shared_data->ref_cnt)) + v3dv_pipeline_shared_data_destroy(device, shared_data); } struct v3dv_descriptor * @@ -1953,15 +1989,13 @@ nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline, const nir_shader_compiler_options *nir_options, unsigned char sha1_key[20]); -struct v3dv_shader_variant* -v3dv_pipeline_cache_search_for_variant(struct v3dv_pipeline *pipeline, - struct v3dv_pipeline_cache *cache, - unsigned char sha1_key[20]); +struct v3dv_pipeline_shared_data * +v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache, + unsigned char sha1_key[20]); void -v3dv_pipeline_cache_upload_variant(struct v3dv_pipeline *pipeline, - struct v3dv_pipeline_cache *cache, - struct v3dv_shader_variant *variant); +v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline, + struct v3dv_pipeline_cache *cache); void v3dv_shader_module_internal_init(struct v3dv_device *device, struct vk_shader_module *module, diff --git a/src/broadcom/vulkan/v3dv_uniforms.c b/src/broadcom/vulkan/v3dv_uniforms.c index e0eea03d194..8dd085862e8 100644 --- a/src/broadcom/vulkan/v3dv_uniforms.c +++ b/src/broadcom/vulkan/v3dv_uniforms.c @@ -97,14 +97,15 @@ write_tmu_p0(struct v3dv_cmd_buffer *cmd_buffer, /* We need to ensure that the texture bo is added to the job */ struct v3dv_bo *texture_bo = - v3dv_descriptor_map_get_texture_bo(descriptor_state, &pipeline->texture_map, + v3dv_descriptor_map_get_texture_bo(descriptor_state, + &pipeline->shared_data->texture_map, pipeline->layout, texture_idx); assert(texture_bo); v3dv_job_add_bo(job, texture_bo); struct v3dv_cl_reloc state_reloc = v3dv_descriptor_map_get_texture_shader_state(descriptor_state, - &pipeline->texture_map, + &pipeline->shared_data->texture_map, pipeline->layout, texture_idx); @@ -130,12 +131,14 @@ write_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer, sampler_idx != V3DV_NO_SAMPLER_32BIT_IDX); struct v3dv_cl_reloc sampler_state_reloc = - v3dv_descriptor_map_get_sampler_state(descriptor_state, &pipeline->sampler_map, + v3dv_descriptor_map_get_sampler_state(descriptor_state, + &pipeline->shared_data->sampler_map, pipeline->layout, sampler_idx); const struct v3dv_sampler *sampler = - v3dv_descriptor_map_get_sampler(descriptor_state, &pipeline->sampler_map, - pipeline->layout, sampler_idx); + v3dv_descriptor_map_get_sampler(descriptor_state, + &pipeline->shared_data->sampler_map, + pipeline->layout, sampler_idx); assert(sampler); /* Set unnormalized coordinates flag from sampler object */ @@ -167,7 +170,7 @@ write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_descriptor_map *map = content == QUNIFORM_UBO_ADDR || content == QUNIFORM_GET_UBO_SIZE ? - &pipeline->ubo_map : &pipeline->ssbo_map; + &pipeline->shared_data->ubo_map : &pipeline->shared_data->ssbo_map; uint32_t offset = content == QUNIFORM_UBO_ADDR ? @@ -285,7 +288,7 @@ get_texture_size(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_descriptor *descriptor = v3dv_descriptor_map_get_descriptor(descriptor_state, - &pipeline->texture_map, + &pipeline->shared_data->texture_map, pipeline->layout, texture_idx, NULL); @@ -309,13 +312,13 @@ get_texture_size(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer, - struct v3dv_pipeline_stage *p_stage, + struct v3dv_pipeline *pipeline, + struct v3dv_shader_variant *variant, uint32_t **wg_count_offsets) { struct v3d_uniform_list *uinfo = - &p_stage->current_variant->prog_data.base->uniforms; + &variant->prog_data.base->uniforms; struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; - struct v3dv_pipeline *pipeline = p_stage->pipeline; struct v3dv_job *job = cmd_buffer->state.job; assert(job); @@ -432,7 +435,8 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer, - struct v3dv_pipeline_stage *p_stage) + struct v3dv_pipeline *pipeline, + struct v3dv_shader_variant *variant) { - return v3dv_write_uniforms_wg_offsets(cmd_buffer, p_stage, NULL); + return v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline, variant, NULL); }