From 8b3022c918086cd23b7f8a9e4dd61d1b074fef87 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Tue, 7 Mar 2023 12:44:53 -0500 Subject: [PATCH] lavapipe: implement EXT_shader_object Reviewed-by: Dave Airlie Part-of: --- src/gallium/frontends/lavapipe/lvp_device.c | 14 ++ src/gallium/frontends/lavapipe/lvp_execute.c | 81 +++++++++ src/gallium/frontends/lavapipe/lvp_pipeline.c | 163 ++++++++++++++++++ src/gallium/frontends/lavapipe/lvp_private.h | 5 + 4 files changed, 263 insertions(+) diff --git a/src/gallium/frontends/lavapipe/lvp_device.c b/src/gallium/frontends/lavapipe/lvp_device.c index 9742ab34f25..b21d71aa88d 100644 --- a/src/gallium/frontends/lavapipe/lvp_device.c +++ b/src/gallium/frontends/lavapipe/lvp_device.c @@ -181,6 +181,7 @@ static const struct vk_device_extension_table lvp_device_extensions_supported = .EXT_shader_atomic_float = true, .EXT_shader_atomic_float2 = true, .EXT_shader_demote_to_helper_invocation= true, + .EXT_shader_object = false, .EXT_shader_stencil_export = true, .EXT_shader_subgroup_ballot = true, .EXT_shader_subgroup_vote = true, @@ -852,6 +853,12 @@ VKAPI_ATTR void VKAPI_CALL lvp_GetPhysicalDeviceFeatures2( features->depthClipControl = true; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT: { + VkPhysicalDeviceShaderObjectFeaturesEXT *features = + (VkPhysicalDeviceShaderObjectFeaturesEXT *)ext; + features->shaderObject = true; + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_WORKGROUP_MEMORY_FEATURES: { VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeatures *features = (VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeatures *)ext; @@ -1377,6 +1384,13 @@ VKAPI_ATTR void VKAPI_CALL lvp_GetPhysicalDeviceProperties2( props->graphicsPipelineLibraryIndependentInterpolationDecoration = VK_TRUE; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_PROPERTIES_EXT: { + VkPhysicalDeviceShaderObjectPropertiesEXT *props = (VkPhysicalDeviceShaderObjectPropertiesEXT *)ext; + /* this is basically unsupported */ + lvp_device_get_cache_uuid(props->shaderBinaryUUID); + props->shaderBinaryVersion = 1; + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: { VkPhysicalDeviceRobustness2PropertiesEXT *props = (VkPhysicalDeviceRobustness2PropertiesEXT *)ext; diff --git a/src/gallium/frontends/lavapipe/lvp_execute.c b/src/gallium/frontends/lavapipe/lvp_execute.c index 0c4ac0c1934..18ac5da5867 100644 --- a/src/gallium/frontends/lavapipe/lvp_execute.c +++ b/src/gallium/frontends/lavapipe/lvp_execute.c @@ -162,6 +162,7 @@ struct rendering_state { uint8_t push_constants[128 * 4]; uint16_t push_size[2]; //gfx, compute + uint16_t gfx_push_sizes[MESA_SHADER_COMPUTE]; struct { void *block[MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS * MAX_SETS]; uint16_t size[MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS * MAX_SETS]; @@ -183,6 +184,7 @@ struct rendering_state { uint32_t sample_mask; unsigned min_samples; + unsigned rast_samples; float min_sample_shading; bool force_min_sample; bool sample_shading; @@ -634,6 +636,7 @@ get_viewport_xform(struct rendering_state *state, static void update_samples(struct rendering_state *state, VkSampleCountFlags samples) { + state->rast_samples = samples; state->rs_dirty |= state->rs_state.multisample != (samples > 1); state->rs_state.multisample = samples > 1; state->min_samples = 1; @@ -3991,6 +3994,68 @@ static void handle_set_color_blend_equation(struct vk_cmd_queue_entry *cmd, } } +static void +handle_shaders(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) +{ + struct vk_cmd_bind_shaders_ext *bind = &cmd->u.bind_shaders_ext; + + bool gfx = false; + VkShaderStageFlagBits vkstages = 0; + unsigned new_stages = 0; + unsigned null_stages = 0; + for (unsigned i = 0; i < bind->stage_count; i++) { + gl_shader_stage stage = vk_to_mesa_shader_stage(bind->stages[i]); + assert(stage <= MESA_SHADER_COMPUTE && stage != MESA_SHADER_NONE); + LVP_FROM_HANDLE(lvp_shader, shader, bind->shaders ? bind->shaders[i] : VK_NULL_HANDLE); + if (stage == MESA_SHADER_FRAGMENT) { + if (shader) { + state->force_min_sample = shader->pipeline_nir->nir->info.fs.uses_sample_shading; + state->sample_shading = state->force_min_sample; + update_samples(state, state->rast_samples); + } else { + state->force_min_sample = false; + state->sample_shading = false; + } + } + if (shader) { + vkstages |= bind->stages[i]; + new_stages |= BITFIELD_BIT(stage); + state->shaders[stage] = shader; + } else { + if (state->shaders[stage]) + null_stages |= bind->stages[i]; + } + + if (stage != MESA_SHADER_COMPUTE) { + state->gfx_push_sizes[i] = shader ? shader->layout->push_constant_size : 0; + gfx = true; + } else { + state->push_size[1] = shader ? shader->layout->push_constant_size : 0; + } + } + + if ((new_stages | null_stages) & BITFIELD_MASK(MESA_SHADER_COMPUTE)) { + unbind_graphics_stages(state, null_stages & VK_SHADER_STAGE_ALL_GRAPHICS); + handle_graphics_stages(state, vkstages & VK_SHADER_STAGE_ALL_GRAPHICS, true); + u_foreach_bit(i, new_stages) { + handle_graphics_layout(state, i, state->shaders[i]->layout); + handle_pipeline_access(state, i); + } + } + /* ignore compute unbinds */ + if (new_stages & BITFIELD_BIT(MESA_SHADER_COMPUTE)) { + handle_compute_shader(state, state->shaders[MESA_SHADER_COMPUTE], state->shaders[MESA_SHADER_COMPUTE]->layout); + handle_pipeline_access(state, MESA_SHADER_COMPUTE); + } + + if (gfx) { + state->blend_state.independent_blend_enable = true; + state->push_size[0] = 0; + for (unsigned i = 0; i < ARRAY_SIZE(state->gfx_push_sizes); i++) + state->push_size[0] += state->gfx_push_sizes[i]; + } +} + void lvp_add_enqueue_cmd_entrypoints(struct vk_device_dispatch_table *disp) { struct vk_device_dispatch_table cmd_enqueue_dispatch; @@ -4098,6 +4163,19 @@ void lvp_add_enqueue_cmd_entrypoints(struct vk_device_dispatch_table *disp) ENQUEUE_CMD(CmdSetColorBlendEquationEXT) ENQUEUE_CMD(CmdSetColorWriteMaskEXT) + ENQUEUE_CMD(CmdBindShadersEXT) + /* required for EXT_shader_object */ + ENQUEUE_CMD(CmdSetCoverageModulationModeNV) + ENQUEUE_CMD(CmdSetCoverageModulationTableEnableNV) + ENQUEUE_CMD(CmdSetCoverageModulationTableNV) + ENQUEUE_CMD(CmdSetCoverageReductionModeNV) + ENQUEUE_CMD(CmdSetCoverageToColorEnableNV) + ENQUEUE_CMD(CmdSetCoverageToColorLocationNV) + ENQUEUE_CMD(CmdSetRepresentativeFragmentTestEnableNV) + ENQUEUE_CMD(CmdSetShadingRateImageEnableNV) + ENQUEUE_CMD(CmdSetViewportSwizzleNV) + ENQUEUE_CMD(CmdSetViewportWScalingEnableNV) + #undef ENQUEUE_CMD } @@ -4413,6 +4491,9 @@ static void lvp_execute_cmd_buffer(struct lvp_cmd_buffer *cmd_buffer, case VK_CMD_SET_COLOR_BLEND_EQUATION_EXT: handle_set_color_blend_equation(cmd, state); break; + case VK_CMD_BIND_SHADERS_EXT: + handle_shaders(cmd, state); + break; default: fprintf(stderr, "Unsupported command %s\n", vk_cmd_queue_type_names[cmd->type]); diff --git a/src/gallium/frontends/lavapipe/lvp_pipeline.c b/src/gallium/frontends/lavapipe/lvp_pipeline.c index 6af22aa0373..5468abafe80 100644 --- a/src/gallium/frontends/lavapipe/lvp_pipeline.c +++ b/src/gallium/frontends/lavapipe/lvp_pipeline.c @@ -29,6 +29,7 @@ #include "util/os_time.h" #include "spirv/nir_spirv.h" #include "nir/nir_builder.h" +#include "nir/nir_serialize.h" #include "lvp_lower_vulkan_resource.h" #include "pipe/p_state.h" #include "pipe/p_context.h" @@ -1122,3 +1123,165 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateComputePipelines( return result; } + +VKAPI_ATTR void VKAPI_CALL lvp_DestroyShaderEXT( + VkDevice _device, + VkShaderEXT _shader, + const VkAllocationCallbacks* pAllocator) +{ + LVP_FROM_HANDLE(lvp_device, device, _device); + LVP_FROM_HANDLE(lvp_shader, shader, _shader); + + if (!shader) + return; + shader_destroy(device, shader); + + vk_pipeline_layout_unref(&device->vk, &shader->layout->vk); + blob_finish(&shader->blob); + vk_object_base_finish(&shader->base); + vk_free2(&device->vk.alloc, pAllocator, shader); +} + +static VkShaderEXT +create_shader_object(struct lvp_device *device, const VkShaderCreateInfoEXT *pCreateInfo, const VkAllocationCallbacks *pAllocator) +{ + nir_shader *nir = NULL; + gl_shader_stage stage = vk_to_mesa_shader_stage(pCreateInfo->stage); + assert(stage <= MESA_SHADER_COMPUTE && stage != MESA_SHADER_NONE); + if (pCreateInfo->codeType == VK_SHADER_CODE_TYPE_SPIRV_EXT) { + VkShaderModuleCreateInfo minfo = { + VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + NULL, + 0, + pCreateInfo->codeSize, + pCreateInfo->pCode, + }; + VkPipelineShaderStageCreateFlagBits flags = 0; + if (pCreateInfo->flags & VK_SHADER_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) + flags |= VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT; + if (pCreateInfo->flags & VK_SHADER_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) + flags |= VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT; + VkPipelineShaderStageCreateInfo sinfo = { + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + &minfo, + flags, + pCreateInfo->stage, + VK_NULL_HANDLE, + pCreateInfo->pName, + pCreateInfo->pSpecializationInfo, + }; + VkResult result = compile_spirv(device, &sinfo, &nir); + if (result != VK_SUCCESS) + goto fail; + } else { + assert(pCreateInfo->codeType == VK_SHADER_CODE_TYPE_BINARY_EXT); + if (pCreateInfo->codeSize < SHA1_DIGEST_LENGTH + 1) + return VK_NULL_HANDLE; + struct blob_reader blob; + const uint8_t *data = pCreateInfo->pCode; + size_t size = pCreateInfo->codeSize - SHA1_DIGEST_LENGTH; + unsigned char sha1[20]; + + struct mesa_sha1 sctx; + _mesa_sha1_init(&sctx); + _mesa_sha1_update(&sctx, data + SHA1_DIGEST_LENGTH, size); + _mesa_sha1_final(&sctx, sha1); + if (memcmp(sha1, data, SHA1_DIGEST_LENGTH)) + return VK_NULL_HANDLE; + + blob_reader_init(&blob, data + SHA1_DIGEST_LENGTH, size); + nir = nir_deserialize(NULL, device->pscreen->get_compiler_options(device->pscreen, PIPE_SHADER_IR_NIR, stage), &blob); + if (!nir) + goto fail; + } + if (!nir_shader_get_entrypoint(nir)) + goto fail; + struct lvp_shader *shader = vk_object_zalloc(&device->vk, pAllocator, sizeof(struct lvp_shader), VK_OBJECT_TYPE_SHADER_EXT); + if (!shader) + goto fail; + blob_init(&shader->blob); + VkPipelineLayoutCreateInfo pci = { + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + NULL, + 0, + pCreateInfo->setLayoutCount, + pCreateInfo->pSetLayouts, + pCreateInfo->pushConstantRangeCount, + pCreateInfo->pPushConstantRanges, + }; + shader->layout = lvp_pipeline_layout_create(device, &pci, pAllocator); + lvp_shader_lower(device, nir, shader, shader->layout); + lvp_shader_xfb_init(shader); + if (stage == MESA_SHADER_TESS_EVAL) { + /* spec requires that all tess modes are set in both shaders */ + nir_lower_patch_vertices(shader->pipeline_nir->nir, shader->pipeline_nir->nir->info.tess.tcs_vertices_out, NULL); + shader->tess_ccw = create_pipeline_nir(nir_shader_clone(NULL, shader->pipeline_nir->nir)); + shader->tess_ccw->nir->info.tess.ccw = !shader->pipeline_nir->nir->info.tess.ccw; + shader->tess_ccw_cso = lvp_shader_compile(device, shader, nir_shader_clone(NULL, shader->tess_ccw->nir)); + } else if (stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output) { + /* this is (currently) illegal */ + assert(!nir->info.fs.uses_fbfetch_output); + shader_destroy(device, shader); + + vk_object_base_finish(&shader->base); + vk_free2(&device->vk.alloc, pAllocator, shader); + return VK_NULL_HANDLE; + } + nir_serialize(&shader->blob, nir, true); + shader->shader_cso = lvp_shader_compile(device, shader, nir_shader_clone(NULL, nir)); + return lvp_shader_to_handle(shader); +fail: + ralloc_free(nir); + return VK_NULL_HANDLE; +} + +VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateShadersEXT( + VkDevice _device, + uint32_t createInfoCount, + const VkShaderCreateInfoEXT* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkShaderEXT* pShaders) +{ + LVP_FROM_HANDLE(lvp_device, device, _device); + unsigned i; + for (i = 0; i < createInfoCount; i++) { + pShaders[i] = create_shader_object(device, &pCreateInfos[i], pAllocator); + if (!pShaders[i]) { + if (pCreateInfos[i].codeType == VK_SHADER_CODE_TYPE_BINARY_EXT) { + if (i < createInfoCount - 1) + memset(&pShaders[i + 1], 0, (createInfoCount - i - 1) * sizeof(VkShaderEXT)); + return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); + } + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + } + return VK_SUCCESS; +} + + +VKAPI_ATTR VkResult VKAPI_CALL lvp_GetShaderBinaryDataEXT( + VkDevice device, + VkShaderEXT _shader, + size_t* pDataSize, + void* pData) +{ + LVP_FROM_HANDLE(lvp_shader, shader, _shader); + VkResult ret = VK_SUCCESS; + if (pData) { + if (*pDataSize < shader->blob.size + SHA1_DIGEST_LENGTH) { + ret = VK_INCOMPLETE; + *pDataSize = 0; + } else { + *pDataSize = MIN2(*pDataSize, shader->blob.size + SHA1_DIGEST_LENGTH); + struct mesa_sha1 sctx; + _mesa_sha1_init(&sctx); + _mesa_sha1_update(&sctx, shader->blob.data, shader->blob.size); + _mesa_sha1_final(&sctx, pData); + uint8_t *data = pData; + memcpy(data + SHA1_DIGEST_LENGTH, shader->blob.data, shader->blob.size); + } + } else { + *pDataSize = shader->blob.size + SHA1_DIGEST_LENGTH; + } + return ret; +} diff --git a/src/gallium/frontends/lavapipe/lvp_private.h b/src/gallium/frontends/lavapipe/lvp_private.h index 8cf0c3fc402..122d50dd1b9 100644 --- a/src/gallium/frontends/lavapipe/lvp_private.h +++ b/src/gallium/frontends/lavapipe/lvp_private.h @@ -433,6 +433,8 @@ lvp_pipeline_nir_ref(struct lvp_pipeline_nir **dst, struct lvp_pipeline_nir *src } struct lvp_shader { + struct vk_object_base base; + struct lvp_pipeline_layout *layout; struct lvp_access_info access; struct lvp_pipeline_nir *pipeline_nir; struct lvp_pipeline_nir *tess_ccw; @@ -445,6 +447,7 @@ struct lvp_shader { uint32_t can_inline; //bitmask } inlines; struct pipe_stream_output_info stream_output; + struct blob blob; //preserved for GetShaderBinaryDataEXT }; struct lvp_pipeline { @@ -565,6 +568,8 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_pipeline_cache, base, VkPipelineCache, VK_OBJECT_TYPE_PIPELINE_CACHE) VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_pipeline, base, VkPipeline, VK_OBJECT_TYPE_PIPELINE) +VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_shader, base, VkShaderEXT, + VK_OBJECT_TYPE_SHADER_EXT) VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_pipeline_layout, vk.base, VkPipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT) VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_query_pool, base, VkQueryPool,