diff --git a/src/broadcom/common/v3d_debug.c b/src/broadcom/common/v3d_debug.c index 748c3fbafa5..64a2426b9c1 100644 --- a/src/broadcom/common/v3d_debug.c +++ b/src/broadcom/common/v3d_debug.c @@ -57,6 +57,7 @@ static const struct debug_control debug_control[] = { { "always_flush", V3D_DEBUG_ALWAYS_FLUSH}, { "precompile", V3D_DEBUG_PRECOMPILE}, { "ra", V3D_DEBUG_RA}, + { "dump_spirv", V3D_DEBUG_DUMP_SPIRV}, { NULL, 0 } }; diff --git a/src/broadcom/common/v3d_debug.h b/src/broadcom/common/v3d_debug.h index f7afb457469..c3f7e56a9ae 100644 --- a/src/broadcom/common/v3d_debug.h +++ b/src/broadcom/common/v3d_debug.h @@ -58,6 +58,7 @@ extern uint32_t V3D_DEBUG; #define V3D_DEBUG_CLIF (1 << 14) #define V3D_DEBUG_PRECOMPILE (1 << 15) #define V3D_DEBUG_RA (1 << 16) +#define V3D_DEBUG_DUMP_SPIRV (1 << 17) #ifdef HAVE_ANDROID_PLATFORM #define LOG_TAG "BROADCOM-MESA" diff --git a/src/broadcom/vulkan/meson.build b/src/broadcom/vulkan/meson.build index 91012c4014e..83e023c0932 100644 --- a/src/broadcom/vulkan/meson.build +++ b/src/broadcom/vulkan/meson.build @@ -84,6 +84,8 @@ v3dv_deps = [ dep_libdrm, dep_valgrind, dep_v3dv3, + idep_nir, + idep_nir_headers, idep_vulkan_util, ] diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c index 334e2e6815f..20ecbf185a1 100644 --- a/src/broadcom/vulkan/v3dv_device.c +++ b/src/broadcom/vulkan/v3dv_device.c @@ -36,7 +36,9 @@ #include "broadcom/cle/v3dx_pack.h" +#include "compiler/v3d_compiler.h" #include "compiler/glsl_types.h" + #include "drm-uapi/v3d_drm.h" #include "vk_util.h" @@ -311,6 +313,9 @@ physical_device_init(struct v3dv_physical_device *device, goto fail; } + device->compiler = v3d_compiler_init(&device->devinfo); + device->next_program_id = 0; + asprintf(&device->name, "V3D %d.%d", device->devinfo.ver / 10, device->devinfo.ver % 10); diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c index d26e0b272e5..190c2986548 100644 --- a/src/broadcom/vulkan/v3dv_pipeline.c +++ b/src/broadcom/vulkan/v3dv_pipeline.c @@ -23,8 +23,15 @@ #include "vk_util.h" +#include "v3dv_debug.h" #include "v3dv_private.h" +#include "vk_format_info.h" + +#include "common/v3d_debug.h" + +#include "vulkan/util/vk_format.h" + VkResult v3dv_CreateShaderModule(VkDevice _device, const VkShaderModuleCreateInfo *pCreateInfo, @@ -67,14 +74,667 @@ v3dv_DestroyShaderModule(VkDevice _device, vk_free2(&device->alloc, pAllocator, module); } +static void +destroy_pipeline_stage(struct v3dv_device *device, + struct v3dv_pipeline_stage *p_stage, + const VkAllocationCallbacks *pAllocator) +{ + v3dv_bo_free(device, p_stage->assembly_bo); + + vk_free2(&device->alloc, pAllocator, p_stage); +} + void v3dv_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline); + + if (!pipeline) + return; + + /* FIXME: we can't just use a loop over mesa stage due the bin, would be + * good to find an alternative. + */ + destroy_pipeline_stage(device, pipeline->vs, pAllocator); + destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator); + destroy_pipeline_stage(device, pipeline->fs, pAllocator); + + vk_free2(&device->alloc, pAllocator, pipeline); +} + +static const struct spirv_to_nir_options default_spirv_options = { + .caps = { false }, + .ubo_addr_format = nir_address_format_32bit_index_offset, + .ssbo_addr_format = nir_address_format_32bit_index_offset, + .phys_ssbo_addr_format = nir_address_format_64bit_global, + .push_const_addr_format = nir_address_format_logical, + .shared_addr_format = nir_address_format_32bit_offset, + .frag_coord_is_sysval = false, +}; + +const nir_shader_compiler_options v3dv_nir_options = { + .lower_all_io_to_temps = true, + .lower_extract_byte = true, + .lower_extract_word = true, + .lower_bitfield_insert_to_shifts = true, + .lower_bitfield_extract_to_shifts = true, + .lower_bitfield_reverse = true, + .lower_bit_count = true, + .lower_cs_local_id_from_index = true, + .lower_ffract = true, + .lower_fmod = true, + .lower_pack_unorm_2x16 = true, + .lower_pack_snorm_2x16 = true, + .lower_pack_unorm_4x8 = true, + .lower_pack_snorm_4x8 = true, + .lower_unpack_unorm_4x8 = true, + .lower_unpack_snorm_4x8 = true, + .lower_pack_half_2x16 = true, + .lower_unpack_half_2x16 = true, + .lower_fdiv = true, + .lower_find_lsb = true, + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, + .lower_flrp32 = true, + .lower_fpow = true, + .lower_fsat = true, + .lower_fsqrt = true, + .lower_ifind_msb = true, + .lower_isign = true, + .lower_ldexp = true, + .lower_mul_high = true, + .lower_wpos_pntc = true, + .lower_rotate = true, + .lower_to_scalar = true, + .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic + * needs to be supported */ +}; + +static nir_shader * +shader_module_compile_to_nir(struct v3dv_device *device, + struct v3dv_pipeline_stage *stage) +{ + nir_shader *nir; + const nir_shader_compiler_options *nir_options = &v3dv_nir_options; + + uint32_t *spirv = (uint32_t *) stage->module->data; + assert(stage->module->size % 4 == 0); + + if (V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV) + v3dv_print_spirv(stage->module->data, stage->module->size, stderr); + + uint32_t num_spec_entries = 0; + struct nir_spirv_specialization *spec_entries = NULL; + + const struct spirv_to_nir_options spirv_options = default_spirv_options; + nir = spirv_to_nir(spirv, stage->module->size / 4, + spec_entries, num_spec_entries, + stage->stage, stage->entrypoint, + &spirv_options, nir_options); + assert(nir->info.stage == stage->stage); + nir_validate_shader(nir, "after spirv_to_nir"); + + free(spec_entries); + + /* We have to lower away local variable initializers right before we + * inline functions. That way they get properly initialized at the top + * of the function and not at the top of its caller. + */ + NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); + NIR_PASS_V(nir, nir_lower_returns); + NIR_PASS_V(nir, nir_inline_functions); + NIR_PASS_V(nir, nir_opt_deref); + + /* Pick off the single entrypoint that we want */ + foreach_list_typed_safe(nir_function, func, node, &nir->functions) { + if (func->is_entrypoint) + func->name = ralloc_strdup(func, "main"); + else + exec_node_remove(&func->node); + } + assert(exec_list_length(&nir->functions) == 1); + + /* Make sure we lower variable initializers on output variables so that + * nir_remove_dead_variables below sees the corresponding stores + */ + NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out); + + /* Now that we've deleted all but the main function, we can go ahead and + * lower the rest of the variable initializers. + */ + NIR_PASS_V(nir, nir_lower_variable_initializers, ~0); + + /* Split member structs. We do this before lower_io_to_temporaries so that + * it doesn't lower system values to temporaries by accident. + */ + NIR_PASS_V(nir, nir_split_var_copies); + NIR_PASS_V(nir, nir_split_per_member_structs); + + /* FIXME: needed? */ + if (nir->info.stage == MESA_SHADER_FRAGMENT) + NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out); + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + NIR_PASS_V(nir, nir_lower_input_attachments, + &(nir_input_attachment_options) { + .use_fragcoord_sysval = false, + }); } + + NIR_PASS_V(nir, nir_remove_dead_variables, + nir_var_shader_in | nir_var_shader_out | + nir_var_system_value | nir_var_mem_shared, + NULL); + + NIR_PASS_V(nir, nir_propagate_invariant); + + NIR_PASS_V(nir, nir_lower_system_values); + NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays); + + /* Vulkan uses the separate-shader linking model */ + nir->info.separate_shader = true; + + return nir; +} + +static void +v3dv_nir_lower_fs_inputs(nir_shader *nir) { /* FIXME: stub */ } +static int +type_size_vec4(const struct glsl_type *type, bool bindless) +{ + return glsl_count_attribute_slots(type, false); +} + +static void +v3dv_nir_lower_fs_outputs(nir_shader *nir) +{ + NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out, type_size_vec4, 0); +} + +static void +shader_debug_output(const char *message, void *data) +{ + /* FIXME: We probably don't want to debug anything extra here, and in fact + * the compiler is not using this callback too much, only as an alternative + * way to debug out the shaderdb stats, that you can already get using + * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d + * compiler to remove that callback. + */ +} + +static void +pipeline_populate_v3d_key(struct v3d_key *key, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct v3dv_pipeline_stage *p_stage) +{ + /* default value. Would be override on the vs/gs populate methods when GS + * gets supported + */ + key->is_last_geometry_stage = true; + + /* Vulkan provides a way to define clip distances, but not clip planes, so + * we understand that this would be always zero. Probably would need to be + * revisited based on all the clip related extensions available. + */ + key->ucp_enables = 0; +} + +/* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the + * same. For not using prim_mode that is the one already used on v3d + */ +static const enum pipe_prim_type vk_to_pipe_prim_type[] = { + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY, +}; + +static const enum pipe_logicop vk_to_pipe_logicop[] = { + [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR, + [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND, + [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE, + [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY, + [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED, + [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP, + [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR, + [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR, + [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR, + [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV, + [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT, + [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE, + [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED, + [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED, + [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND, + [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET, +}; + +static void +pipeline_populate_v3d_fs_key(struct v3d_fs_key *key, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct v3dv_pipeline_stage *p_stage) +{ + memset(key, 0, sizeof(*key)); + + pipeline_populate_v3d_key(&key->base, pCreateInfo, p_stage); + + const VkPipelineInputAssemblyStateCreateInfo *ia_info = + pCreateInfo->pInputAssemblyState; + uint8_t topology = vk_to_pipe_prim_type[ia_info->topology]; + + key->is_points = (topology == PIPE_PRIM_POINTS); + key->is_lines = (topology >= PIPE_PRIM_LINES && + topology <= PIPE_PRIM_LINE_STRIP); + + /* Vulkan doesn't appear to specify (anv does the same) */ + key->clamp_color = false; + + const VkPipelineColorBlendStateCreateInfo *cb_info = + pCreateInfo->pColorBlendState; + + key->logicop_func = (cb_info->logicOpEnable == VK_FALSE ? + PIPE_LOGICOP_COPY : + vk_to_pipe_logicop[cb_info->logicOp]); + + const VkPipelineMultisampleStateCreateInfo *ms_info = + pCreateInfo->pMultisampleState; + + /* FIXME: msaa not supported yet (although we add some of the code to + * translate vk sample info in advance) + */ + key->msaa = false; + if (key->msaa & (ms_info != NULL)) { + uint32_t sample_mask = 0xffff; + + if (ms_info->pSampleMask) + sample_mask = ms_info->pSampleMask[0] & 0xffff; + + key->sample_coverage = (sample_mask != (1 << V3D_MAX_SAMPLES) - 1); + key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable; + key->sample_alpha_to_one = ms_info->alphaToOneEnable; + } + + const VkPipelineDepthStencilStateCreateInfo *ds_info = + pCreateInfo->pDepthStencilState; + + key->depth_enabled = (ds_info == NULL ? false : ds_info->depthTestEnable); + + /* Vulkan doesn't support alpha test */ + key->alpha_test = false; + key->alpha_test_func = COMPARE_FUNC_NEVER; + + /* FIXME: placeholder. Final value for swap_color_rb depends on the format + * of the surface to be used. + */ + key->swap_color_rb = false; + + const struct v3dv_subpass *subpass = p_stage->pipeline->subpass; + for (uint32_t i = 0; i < subpass->color_count; i++) { + if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) + continue; + + key->cbufs |= 1 << i; + + /* FIXME: in order to know this we need to access to the color + * framebuffer. Still not in place. Using default hardcode value. + */ + VkFormat fb_format = VK_FORMAT_R8G8B8A8_UNORM; + enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format); + + /* If logic operations are enabled then we might emit color reads and we + * need to know the color buffer format and swizzle for that + */ + if (key->logicop_func != PIPE_LOGICOP_COPY) { + key->color_fmt[i].format = fb_pipe_format; + key->color_fmt[i].swizzle = v3dv_get_format_swizzle(fb_format); + } + + const struct util_format_description *desc = + vk_format_description(fb_format); + + if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && + desc->channel[0].size == 32) { + key->f32_color_rb |= 1 << i; + } + + if (p_stage->nir->info.fs.untyped_color_outputs) { + if (util_format_is_pure_uint(fb_pipe_format)) + key->uint_color_rb |= 1 << i; + else if (util_format_is_pure_sint(fb_pipe_format)) + key->int_color_rb |= 1 << i; + } + + if (key->is_points) { + /* FIXME: The mask would need to be computed based on the shader + * inputs. On gallium it is done at st_atom_rasterizer + * (sprite_coord_enable). anv seems (need to confirm) to do that on + * genX_pipeline (PointSpriteTextureCoordinateEnable). Would be also + * better to have tests to guide filling the mask. + */ + key->point_sprite_mask = 0; + + /* Vulkan mandates upper left. */ + key->point_coord_upper_left = true; + } + } + + /* FIXME: we understand that this is used on GL to configure fixed-function + * two side lighting support, and not make sense for Vulkan. Need to + * confirm though. + */ + key->light_twoside = false; + + /* FIXME: ditto, although for flat lighting. Again, neet to confirm.*/ + key->shade_model_flat = false; +} + +static void +pipeline_populate_v3d_vs_key(struct v3d_vs_key *key, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct v3dv_pipeline_stage *p_stage) +{ + memset(key, 0, sizeof(*key)); + + pipeline_populate_v3d_key(&key->base, pCreateInfo, p_stage); + + /* Vulkan doesn't appear to specify (anv does the same) */ + key->clamp_color = false; + + /* Vulkan specifies a point size per vertex, so true for if the prim are + * points, like on ES2) + */ + const VkPipelineInputAssemblyStateCreateInfo *ia_info = + pCreateInfo->pInputAssemblyState; + uint8_t topology = vk_to_pipe_prim_type[ia_info->topology]; + + /* FIXME: not enough to being PRIM_POINTS, on gallium the full check is + * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */ + key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS); + + key->is_coord = p_stage->is_coord; + if (p_stage->is_coord) { + /* The only output varying on coord shaders are for transform + * feedback. Set to 0 as VK_EXT_transform_feedback is not supported. + */ + key->num_used_outputs = 0; + } else { + struct v3dv_pipeline *pipeline = p_stage->pipeline; + key->num_used_outputs = pipeline->fs->prog_data.fs->num_inputs; + STATIC_ASSERT(sizeof(key->used_outputs) == + sizeof(pipeline->fs->prog_data.fs->input_slots)); + memcpy(key->used_outputs, pipeline->fs->prog_data.fs->input_slots, + sizeof(key->used_outputs)); + } +} + +/* + * Creates the pipeline_stage for the coordinate shader. Initially a clone of + * the vs pipeline_stage, with is_coord to true; + */ +static struct v3dv_pipeline_stage* +pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage *src, + const VkAllocationCallbacks *alloc) +{ + struct v3dv_device *device = src->pipeline->device; + + struct v3dv_pipeline_stage *p_stage = + vk_zalloc2(&device->alloc, alloc, sizeof(*p_stage), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + p_stage->pipeline = src->pipeline; + assert(src->stage == MESA_SHADER_VERTEX); + p_stage->stage = src->stage; + p_stage->entrypoint = src->entrypoint; + p_stage->module = src->module; + p_stage->nir = src->nir; + + p_stage->is_coord = true; + + return p_stage; +} + +/* FIXME: right now this just asks for an bo for the exact size of the qpu + * assembly. It would be good to be slighly smarter and having one "all + * shaders" bo per pipeline, so each p_stage would save their offset on + * such. That is really relevant due the fact that bo are always aligned to + * 4096, so that would allow to use less memory. + * + * For now one-bo per-assembly would work. + */ +static void +upload_assembly(struct v3dv_pipeline_stage *p_stage, + const void *data, + uint32_t size) +{ + /* We are uploading the assembly just once, so at this point we shouldn't + * have any bo + */ + assert(p_stage->assembly_bo == NULL); + struct v3dv_device *device = p_stage->pipeline->device; + + struct v3dv_bo *bo = v3dv_bo_alloc(device, size); + if (!bo) { + fprintf(stderr, "failed to allocate memory for shader\n"); + abort(); + } + + bool ok = v3dv_bo_map(device, bo, size); + if (!ok) { + fprintf(stderr, "failed to map source shader buffer\n"); + abort(); + } + + memcpy(bo->map, data, size); + + v3dv_bo_unmap(device, bo); + + p_stage->assembly_bo = bo; +} + +static void +compile_pipeline_stage(struct v3dv_pipeline_stage *p_stage) +{ + struct v3dv_physical_device *physical_device = + &p_stage->pipeline->device->instance->physicalDevice; + const struct v3d_compiler *compiler = physical_device->compiler; + + /* We don't support variants (and probably will never support them) */ + int variant_id = 0; + + /* Note that we are assigning program_id slightly differently that + * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin + * would have a different program_id, while v3d would have the same for + * both. For the case of v3dv, it is more natural to have an id this way, + * as right now we are using it for debugging, not for shader-db. + */ + p_stage->program_id = physical_device->next_program_id++; + + if (V3D_DEBUG & (V3D_DEBUG_NIR | + v3d_debug_flag_for_shader_stage(p_stage->stage))) { + fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n", + gl_shader_stage_name(p_stage->stage), + p_stage->program_id); + nir_print_shader(p_stage->nir, stderr); + fprintf(stderr, "\n"); + } + + uint64_t *qpu_insts; + uint32_t qpu_insts_size; + + qpu_insts = v3d_compile(compiler, + &p_stage->key.base, &p_stage->prog_data.base, + p_stage->nir, + shader_debug_output, NULL, + p_stage->program_id, + variant_id, + &qpu_insts_size); + + if (!qpu_insts) { + fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n", + gl_shader_stage_name(p_stage->stage), + p_stage->program_id); + } else { + upload_assembly(p_stage, qpu_insts, qpu_insts_size); + } + + free(qpu_insts); +} + +static VkResult +pipeline_compile_graphics(struct v3dv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc) +{ + struct v3dv_pipeline_stage *stages[MESA_SHADER_STAGES] = { }; + struct v3dv_device *device = pipeline->device; + + /* First pass to get the the common info from the shader and the nir + * shader. We don't care of the coord shader for now. + */ + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i]; + gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); + + struct v3dv_pipeline_stage *p_stage = + vk_zalloc2(&device->alloc, alloc, sizeof(*p_stage), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + p_stage->pipeline = pipeline; + p_stage->stage = stage; + if (stage == MESA_SHADER_VERTEX) + p_stage->is_coord = false; + p_stage->entrypoint = sinfo->pName; + p_stage->module = v3dv_shader_module_from_handle(sinfo->module); + + pipeline->active_stages |= sinfo->stage; + + /* FIXME: when cache support is in place, first check if for the given + * spirv module and options, we already have a nir shader. + */ + p_stage->nir = shader_module_compile_to_nir(pipeline->device, p_stage); + + stages[stage] = p_stage; + } + + + + for (int stage = MESA_SHADER_STAGES - 1; stage >= 0; stage--) { + if (stages[stage] == NULL || stages[stage]->entrypoint == NULL) + continue; + + struct v3dv_pipeline_stage *p_stage = stages[stage]; + + switch(stage) { + case MESA_SHADER_VERTEX: + /* Right now we only support pipelines with both vertex and fragment + * shader. + */ + assert(pipeline->fs); + + pipeline->vs = p_stage; + + pipeline->vs_bin = pipeline_stage_create_vs_bin(pipeline->vs, alloc); + + /* Note that at this point we would compile twice, one for vs and + * other for vs_bin. For now we are maintaining two pipeline_stage + * and two keys. Eventually we could reuse the key. + */ + pipeline_populate_v3d_vs_key(&pipeline->vs->key.vs, pCreateInfo, pipeline->vs); + pipeline_populate_v3d_vs_key(&pipeline->vs_bin->key.vs, pCreateInfo, pipeline->vs_bin); + + compile_pipeline_stage(pipeline->vs); + compile_pipeline_stage(pipeline->vs_bin); + break; + case MESA_SHADER_FRAGMENT: + pipeline->fs = p_stage; + + pipeline_populate_v3d_fs_key(&p_stage->key.fs, pCreateInfo, + p_stage); + + /* FIXME: create a per-build method with all the lowering + * needed. perhaps move to shader_compile_module_to_nir? */ + v3dv_nir_lower_fs_inputs(p_stage->nir); + v3dv_nir_lower_fs_outputs(p_stage->nir); + + compile_pipeline_stage(pipeline->fs); + break; + default: + unreachable("not supported shader stage"); + } + } + + return VK_SUCCESS; +} + + +static VkResult +pipeline_init(struct v3dv_pipeline *pipeline, + struct v3dv_device *device, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc) +{ + VkResult result = VK_SUCCESS; + + pipeline->device = device; + + V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass); + assert(pCreateInfo->subpass < render_pass->subpass_count); + pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass]; + + result = pipeline_compile_graphics(pipeline, pCreateInfo, alloc); + + if (result != VK_SUCCESS) { + /* Caller would already destroy the pipeline, and we didn't allocate any + * extra info. We don't need to do anything else. + */ + return result; + } + + return result; +} + +static VkResult +graphics_pipeline_create(VkDevice _device, + VkPipelineCache _cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipeline) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + + struct v3dv_pipeline *pipeline; + VkResult result; + + pipeline = vk_zalloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pipeline == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + result = pipeline_init(pipeline, device, + pCreateInfo, + pAllocator); + + if (result != VK_SUCCESS) { + vk_free2(&device->alloc, pAllocator, pipeline); + return result; + } + + *pPipeline = v3dv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} + VkResult v3dv_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, @@ -83,7 +743,22 @@ v3dv_CreateGraphicsPipelines(VkDevice _device, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) { - /* FIXME: stub */ + VkResult result = VK_SUCCESS; - return VK_SUCCESS; + for (uint32_t i = 0; i < count; i++) { + VkResult local_result; + + local_result = graphics_pipeline_create(_device, + pipelineCache, + &pCreateInfos[i], + pAllocator, + &pPipelines[i]); + + if (local_result != VK_SUCCESS) { + result = local_result; + pPipelines[i] = VK_NULL_HANDLE; + } + } + + return result; } diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h index 9d9030ed460..32c96c0dcc5 100644 --- a/src/broadcom/vulkan/v3dv_private.h +++ b/src/broadcom/vulkan/v3dv_private.h @@ -48,6 +48,11 @@ #include "common/v3d_device_info.h" #include "common/v3d_limits.h" +#include "compiler/shader_enums.h" +#include "compiler/spirv/nir_spirv.h" + +#include "compiler/v3d_compiler.h" + #include "vk_debug_report.h" #include "util/set.h" #include "util/hash_table.h" @@ -72,6 +77,7 @@ pack_emit_reloc(void *cl, const void *reloc) {} #include "vk_alloc.h" #include "simulator/v3d_simulator.h" + /* FIXME: pipe_box from Gallium. Needed for some v3d_tiling.c functions. * In the future we might want to drop that depedency, but for now it is * good enough. @@ -117,6 +123,9 @@ struct v3dv_physical_device { struct v3d_device_info devinfo; struct v3d_simulator_file *sim_file; + + const struct v3d_compiler *compiler; + uint32_t next_program_id; }; struct v3dv_app_info { @@ -420,6 +429,77 @@ struct v3dv_shader_module { char data[0]; }; +/* FIXME: the same function at anv, radv and tu, perhaps create common + * place? + */ +static inline gl_shader_stage +vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage) +{ + assert(__builtin_popcount(vk_stage) == 1); + return ffs(vk_stage) - 1; +} + +/* + * Utility struct so shader_module_compile_to_nir and other methods doesn't + * have so many parameters. + * + * FIXME: for the case of the coordinate shader and the vertex shader, module, + * entrypoint, spec_info and nir are the same. There are also info only + * relevant to some stages. But seemed too much a hassle to create a new + * struct only to handle that. Revisit if such kind of info starts to grow. + */ +struct v3dv_pipeline_stage { + struct v3dv_pipeline *pipeline; + + gl_shader_stage stage; + /* FIXME: is_coord only make sense if stage == MESA_SHADER_VERTEX. Perhaps + * a stage base/vs/fs as keys and prog_data? + */ + bool is_coord; + + const struct v3dv_shader_module *module; + const char *entrypoint; + const VkSpecializationInfo *spec_info; + + nir_shader *nir; + + /** A name for this program, so you can track it in shader-db output. */ + uint32_t program_id; + + union { + struct v3d_key base; + struct v3d_vs_key vs; + struct v3d_fs_key fs; + } key; + + union { + struct v3d_prog_data *base; + struct v3d_vs_prog_data *vs; + struct v3d_fs_prog_data *fs; + } prog_data; + + /* FIXME: using one bo per shader. Eventually we would be interested on + * reusing the same bo for all the shaders, like a bo per v3dv_pipeline for + * shaders. + */ + struct v3dv_bo *assembly_bo; +}; + +struct v3dv_pipeline { + struct v3dv_device *device; + + VkShaderStageFlags active_stages; + + struct v3dv_subpass *subpass; + + /* Note: We can't use just a MESA_SHADER_STAGES array as we need to track + * too the coordinate shader + */ + struct v3dv_pipeline_stage *vs; + struct v3dv_pipeline_stage *vs_bin; + struct v3dv_pipeline_stage *fs; +}; + uint32_t v3dv_physical_device_api_version(struct v3dv_physical_device *dev); int v3dv_get_instance_entrypoint_index(const char *name); @@ -518,6 +598,7 @@ V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, VkDeviceMemory) V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, VkFramebuffer) V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, VkImage) V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, VkImageView) +V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, VkPipeline) V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, VkRenderPass) V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_shader_module, VkShaderModule)