diff --git a/src/gallium/frontends/lavapipe/lvp_descriptor_set.c b/src/gallium/frontends/lavapipe/lvp_descriptor_set.c index 180e7e4d827..17a59359e46 100644 --- a/src/gallium/frontends/lavapipe/lvp_descriptor_set.c +++ b/src/gallium/frontends/lavapipe/lvp_descriptor_set.c @@ -1191,3 +1191,163 @@ VKAPI_ATTR void VKAPI_CALL lvp_GetDescriptorEXT( break; } } + +VKAPI_ATTR VkDeviceSize VKAPI_CALL lvp_GetPhysicalDeviceDescriptorSizeEXT( + VkPhysicalDevice physicalDevice, + VkDescriptorType descriptorType) +{ + return lvp_get_descriptor_size(descriptorType); +} + +VKAPI_ATTR VkResult VKAPI_CALL lvp_GetImageOpaqueCaptureDataEXT( + VkDevice device, + uint32_t imageCount, + const VkImage* pImages, + VkHostAddressRangeEXT* pDatas) +{ + return VK_ERROR_FEATURE_NOT_PRESENT; +} + +VKAPI_ATTR VkResult VKAPI_CALL lvp_WriteSamplerDescriptorsEXT( + VkDevice _device, + uint32_t samplerCount, + const VkSamplerCreateInfo* pSamplers, + const VkHostAddressRangeEXT* pDescriptors) +{ + VK_FROM_HANDLE(lvp_device, device, _device); + + struct pipe_sampler_state null_sampler = { + .seamless_cube_map = 1, + .max_lod = 0.25, + }; + + for (unsigned i = 0; i < samplerCount; i++) { + struct lp_sampler_descriptor *desc = pDescriptors[i].address; + /* invariance tests require whole struct to be zeroed */ + memset(desc, 0, sizeof(*desc)); + if (pSamplers) { + struct vk_sampler_state state; + vk_sampler_state_init(&state, &pSamplers[i]); + lvp_sampler_init(device, desc, &state); + } else { + lp_jit_sampler_from_pipe(&desc->jit, &null_sampler); + desc->sampler_index = 0; + } + } + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL lvp_WriteResourceDescriptorsEXT( + VkDevice _device, + uint32_t resourceCount, + const VkResourceDescriptorInfoEXT* pResources, + const VkHostAddressRangeEXT* pDescriptors) +{ + VK_FROM_HANDLE(lvp_device, device, _device); + + for (unsigned i = 0; i < resourceCount; i++) { + struct lp_image_descriptor *image_desc = pDescriptors[i].address; + struct lp_buffer_descriptor *buffer_desc = pDescriptors[i].address; + uint64_t *accel_struct_desc = pDescriptors[i].address; + + switch (pResources[i].type) { + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: { + /* invariance tests require the descriptor to be zeroed */ + memset(image_desc, 0, sizeof(*image_desc)); + + const VkImageDescriptorInfoEXT *image = pResources[i].data.pImage; + if (image && image->pView) { // 0x7ffff7521080 + VkImageView view; + device->vk.dispatch_table.CreateImageView(_device, image->pView, NULL, &view); + VK_FROM_HANDLE(lvp_image_view, iview, view); + + unsigned plane_count = iview->plane_count; + + for (unsigned p = 0; p < plane_count; p++) { + if (pResources[i].type == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE) { + lp_jit_bindless_texture_from_pipe(&image_desc[p].texture, iview->planes[p].sv); + image_desc[p].functions = iview->planes[p].texture_handle->functions; + } else { + lp_jit_image_from_pipe(&image_desc[p].image, &iview->planes[p].iv); + image_desc[p].functions = iview->planes[p].image_handle->functions; + } + } + device->vk.dispatch_table.DestroyImageView(_device, view, NULL); + } else { + if (pResources[i].type == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE) + image_desc->functions = device->null_texture_handle->functions; + else + image_desc->functions = device->null_image_handle->functions; + } + break; + } + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: { + /* invariance tests require the descriptor to be zeroed */ + memset(image_desc, 0, sizeof(*image_desc)); + + const VkTexelBufferDescriptorInfoEXT *bda = pResources[i].data.pTexelBuffer; + if (bda && bda->addressRange.address) { + enum pipe_format pformat = vk_format_to_pipe_format(bda->format); + lp_jit_bindless_texture_buffer_from_bda(&image_desc->texture, (void*)(uintptr_t)bda->addressRange.address); + image_desc->functions = get_texture_handle_bda(device, bda->addressRange.address, bda->addressRange.size, pformat).functions; + } else { + image_desc->functions = device->null_texture_handle->functions; + } + break; + } + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: { + /* invariance tests require the descriptor to be zeroed */ + memset(image_desc, 0, sizeof(*image_desc)); + + const VkTexelBufferDescriptorInfoEXT *bda = pResources[i].data.pTexelBuffer; + if (bda && bda->addressRange.address) { + enum pipe_format pformat = vk_format_to_pipe_format(bda->format); + lp_jit_image_buffer_from_bda(&image_desc->image, (void*)(uintptr_t)bda->addressRange.address, bda->addressRange.size, pformat); + image_desc->functions = get_image_handle_bda(device, bda->addressRange.address, bda->addressRange.size, pformat).functions; + } else { + image_desc->functions = device->null_image_handle->functions; + } + break; + } + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { + /* invariance tests require the descriptor to be zeroed */ + memset(buffer_desc, 0, sizeof(buffer_desc->jit)); + + const VkDeviceAddressRangeEXT *bda = pResources[i].data.pAddressRange; + if (bda) { + struct pipe_constant_buffer ubo = { + .user_buffer = (void *)(uintptr_t)bda->address, + .buffer_size = bda->size, + }; + + lp_jit_buffer_from_pipe_const(&buffer_desc->jit, &ubo, device->pscreen); + } else { + lp_jit_buffer_from_pipe_const(&buffer_desc->jit, &((struct pipe_constant_buffer){0}), device->pscreen); + } + break; + } + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: { + /* invariance tests require the descriptor to be zeroed */ + memset(buffer_desc, 0, sizeof(buffer_desc->jit)); + + const VkDeviceAddressRangeEXT *bda = pResources[i].data.pAddressRange; + if (bda) { + lp_jit_buffer_from_bda(&buffer_desc->jit, (void *)(uintptr_t)bda->address, bda->size); + } else { + lp_jit_buffer_from_pipe(&buffer_desc->jit, &((struct pipe_shader_buffer){0})); + } + break; + } + case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: { + const VkDeviceAddressRangeEXT *bda = pResources[i].data.pAddressRange; + *accel_struct_desc = bda ? bda->address : 0; + break; + } + default: + UNREACHABLE("illegal type passed"); + } + } + return VK_SUCCESS; +} diff --git a/src/gallium/frontends/lavapipe/lvp_device.c b/src/gallium/frontends/lavapipe/lvp_device.c index d26221ea60b..41fa3c2656e 100644 --- a/src/gallium/frontends/lavapipe/lvp_device.c +++ b/src/gallium/frontends/lavapipe/lvp_device.c @@ -51,6 +51,7 @@ #if DETECT_OS_LINUX #include #include +#include #endif #if DETECT_OS_ANDROID @@ -238,6 +239,7 @@ static const struct vk_device_extension_table lvp_device_extensions_supported = .EXT_depth_range_unrestricted = true, .EXT_dynamic_rendering_unused_attachments = true, .EXT_descriptor_buffer = true, + .EXT_descriptor_heap = true, .EXT_descriptor_indexing = true, .EXT_device_generated_commands = true, .EXT_extended_dynamic_state = true, @@ -687,6 +689,10 @@ lvp_get_features(const struct lvp_physical_device *pdevice, /* VK_EXT_extended_dynamic_state */ .extendedDynamicState = true, + /* VK_EXT_descriptor_heap */ + .descriptorHeap = true, + .descriptorHeapCaptureReplay = false, + /* VK_EXT_4444_formats */ .formatA4R4G4B4 = true, .formatA4B4G4R4 = true, @@ -1264,6 +1270,26 @@ lvp_get_properties(const struct lvp_physical_device *device, struct vk_propertie .samplerDescriptorBufferAddressSpaceSize = UINT32_MAX, .descriptorBufferAddressSpaceSize = UINT32_MAX, + /* VK_EXT_descriptor_heap */ + .samplerHeapAlignment = 4, + .resourceHeapAlignment = 4, + .maxSamplerHeapSize = 64 * 4080, + .maxResourceHeapSize = 64 * MAX_DESCRIPTORS * 2, + .minSamplerHeapReservedRange = 0, + .minSamplerHeapReservedRangeWithEmbedded = 0, + .minResourceHeapReservedRange = 0, + .samplerDescriptorSize = sizeof(struct lp_sampler_descriptor), + .imageDescriptorSize = sizeof(struct lp_image_descriptor), + .bufferDescriptorSize = sizeof(struct lp_buffer_descriptor), + .samplerDescriptorAlignment = 4, + .imageDescriptorAlignment = 4, + .bufferDescriptorAlignment = 4, + .maxPushDataSize = 256, + .imageCaptureReplayOpaqueDataSize = 0, + .maxDescriptorHeapEmbeddedSamplers = (2 << 11), + .samplerYcbcrConversionCount = 3, + .sparseDescriptorHeaps = true, + /* VK_EXT_graphics_pipeline_library */ .graphicsPipelineLibraryFastLinking = VK_TRUE, .graphicsPipelineLibraryIndependentInterpolationDecoration = VK_TRUE, @@ -1359,6 +1385,14 @@ lvp_get_properties(const struct lvp_physical_device *device, struct vk_propertie .cooperativeMatrixFlexibleDimensionsMaxDimension = 1024, }; +#if DETECT_OS_LINUX + struct sysinfo si; + sysinfo(&si); + /* just let apps yolo it until they oom */ + p->maxResourceHeapSize = MAX2(p->maxResourceHeapSize, si.totalram / 2); + p->maxSamplerHeapSize = MAX2(p->maxSamplerHeapSize, si.totalram / 2); +#endif + /* Vulkan 1.0 */ strcpy(p->deviceName, device->pscreen->get_name(device->pscreen)); lvp_device_get_cache_uuid(p->pipelineCacheUUID); @@ -1965,8 +1999,12 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateDevice( uint32_t zero = 0; device->zero_buffer = pipe_buffer_create_with_data(device->queue.ctx, 0, PIPE_USAGE_IMMUTABLE, sizeof(uint32_t), &zero); + struct pipe_sampler_state null_sampler = { + .seamless_cube_map = 1, + .max_lod = 0.25, + }; device->null_texture_handle = (void *)(uintptr_t)device->queue.ctx->create_texture_handle(device->queue.ctx, - &(struct pipe_sampler_view){ 0 }, NULL); + &(struct pipe_sampler_view){ 0 }, &null_sampler); device->null_image_handle = (void *)(uintptr_t)device->queue.ctx->create_image_handle(device->queue.ctx, &(struct pipe_image_view){ 0 }); @@ -2661,35 +2699,33 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_ResetEvent( } void -lvp_sampler_init(struct lvp_device *device, struct lp_sampler_descriptor *desc, const VkSamplerCreateInfo *pCreateInfo, const struct vk_sampler *sampler) +lvp_sampler_init(struct lvp_device *device, struct lp_sampler_descriptor *desc, const struct vk_sampler_state *vk_state) { struct pipe_sampler_state state = {0}; - VkClearColorValue border_color = - vk_sampler_border_color_value(pCreateInfo, NULL); - STATIC_ASSERT(sizeof(state.border_color) == sizeof(border_color)); + STATIC_ASSERT(sizeof(state.border_color) == sizeof(vk_state->border_color_value)); - state.wrap_s = vk_conv_wrap_mode(pCreateInfo->addressModeU); - state.wrap_t = vk_conv_wrap_mode(pCreateInfo->addressModeV); - state.wrap_r = vk_conv_wrap_mode(pCreateInfo->addressModeW); - state.min_img_filter = pCreateInfo->minFilter == VK_FILTER_LINEAR ? PIPE_TEX_FILTER_LINEAR : PIPE_TEX_FILTER_NEAREST; - state.min_mip_filter = pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR ? PIPE_TEX_MIPFILTER_LINEAR : PIPE_TEX_MIPFILTER_NEAREST; - state.mag_img_filter = pCreateInfo->magFilter == VK_FILTER_LINEAR ? PIPE_TEX_FILTER_LINEAR : PIPE_TEX_FILTER_NEAREST; - state.min_lod = pCreateInfo->minLod; - state.max_lod = pCreateInfo->maxLod; - state.lod_bias = pCreateInfo->mipLodBias; - if (pCreateInfo->anisotropyEnable) - state.max_anisotropy = pCreateInfo->maxAnisotropy; + state.wrap_s = vk_conv_wrap_mode(vk_state->address_mode_u); + state.wrap_t = vk_conv_wrap_mode(vk_state->address_mode_v); + state.wrap_r = vk_conv_wrap_mode(vk_state->address_mode_w); + state.min_img_filter = vk_state->min_filter == VK_FILTER_LINEAR ? PIPE_TEX_FILTER_LINEAR : PIPE_TEX_FILTER_NEAREST; + state.min_mip_filter = vk_state->mipmap_mode == VK_SAMPLER_MIPMAP_MODE_LINEAR ? PIPE_TEX_MIPFILTER_LINEAR : PIPE_TEX_MIPFILTER_NEAREST; + state.mag_img_filter = vk_state->mag_filter == VK_FILTER_LINEAR ? PIPE_TEX_FILTER_LINEAR : PIPE_TEX_FILTER_NEAREST; + state.min_lod = vk_state->min_lod; + state.max_lod = vk_state->max_lod; + state.lod_bias = vk_state->mip_lod_bias; + if (vk_state->anisotropy_enable) + state.max_anisotropy = vk_state->max_anisotropy; else state.max_anisotropy = 1; - state.unnormalized_coords = pCreateInfo->unnormalizedCoordinates; - state.compare_mode = pCreateInfo->compareEnable ? PIPE_TEX_COMPARE_R_TO_TEXTURE : PIPE_TEX_COMPARE_NONE; - state.compare_func = pCreateInfo->compareOp; - state.seamless_cube_map = !(pCreateInfo->flags & VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT); + state.unnormalized_coords = vk_state->unnormalized_coordinates; + state.compare_mode = vk_state->compare_enable ? PIPE_TEX_COMPARE_R_TO_TEXTURE : PIPE_TEX_COMPARE_NONE; + state.compare_func = vk_state->compare_op; + state.seamless_cube_map = !(vk_state->flags & VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT); STATIC_ASSERT((unsigned)VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE == (unsigned)PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE); STATIC_ASSERT((unsigned)VK_SAMPLER_REDUCTION_MODE_MIN == (unsigned)PIPE_TEX_REDUCTION_MIN); STATIC_ASSERT((unsigned)VK_SAMPLER_REDUCTION_MODE_MAX == (unsigned)PIPE_TEX_REDUCTION_MAX); - state.reduction_mode = (enum pipe_tex_reduction_mode)sampler->reduction_mode; - memcpy(&state.border_color, &border_color, sizeof(border_color)); + state.reduction_mode = (enum pipe_tex_reduction_mode)vk_state->reduction_mode; + memcpy(&state.border_color, &vk_state->border_color_value, sizeof(vk_state->border_color_value)); simple_mtx_lock(&device->queue.lock); struct lp_texture_handle *texture_handle = (void *)(uintptr_t)device->queue.ctx->create_texture_handle(device->queue.ctx, NULL, &state); @@ -2714,7 +2750,9 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateSampler( if (!sampler) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - lvp_sampler_init(device, &sampler->desc, pCreateInfo, &sampler->vk); + struct vk_sampler_state state; + vk_sampler_state_init(&state, pCreateInfo); + lvp_sampler_init(device, &sampler->desc, &state); *pSampler = lvp_sampler_to_handle(sampler); @@ -2735,6 +2773,21 @@ VKAPI_ATTR void VKAPI_CALL lvp_DestroySampler( vk_sampler_destroy(&device->vk, pAllocator, &sampler->vk); } +VKAPI_ATTR VkResult VKAPI_CALL lvp_RegisterCustomBorderColorEXT( + VkDevice device, + const VkSamplerCustomBorderColorCreateInfoEXT* pBorderColor, + VkBool32 requestIndex, + uint32_t* pIndex) +{ + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL lvp_UnregisterCustomBorderColorEXT( + VkDevice device, + uint32_t index) +{ +} + VKAPI_ATTR VkResult VKAPI_CALL lvp_CreatePrivateDataSlot( VkDevice _device, const VkPrivateDataSlotCreateInfo* pCreateInfo, diff --git a/src/gallium/frontends/lavapipe/lvp_device_generated_commands.c b/src/gallium/frontends/lavapipe/lvp_device_generated_commands.c index 6d636529160..e56a348a1ee 100644 --- a/src/gallium/frontends/lavapipe/lvp_device_generated_commands.c +++ b/src/gallium/frontends/lavapipe/lvp_device_generated_commands.c @@ -111,6 +111,8 @@ get_token_info_size(VkIndirectCommandsTokenTypeEXT type) return sizeof(VkIndirectCommandsVertexBufferTokenEXT); case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT: case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_DATA_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_DATA_SEQUENCE_INDEX_EXT: return sizeof(VkIndirectCommandsPushConstantTokenEXT); case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_EXT: return sizeof(VkIndirectCommandsIndexBufferTokenEXT); @@ -221,6 +223,8 @@ lvp_ext_dgc_token_to_cmd_type(const struct lvp_indirect_command_layout_ext *elay return VK_CMD_BIND_VERTEX_BUFFERS2; case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT: case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_DATA_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_DATA_SEQUENCE_INDEX_EXT: return VK_CMD_PUSH_CONSTANTS2; case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_EXT: return VK_CMD_BIND_INDEX_BUFFER2; @@ -257,7 +261,10 @@ lvp_ext_dgc_token_size(const struct lvp_indirect_command_layout_ext *elayout, co UNUSED struct vk_cmd_queue_entry *cmd; enum vk_cmd_type type = lvp_ext_dgc_token_to_cmd_type(elayout, token); size_t size = vk_cmd_queue_type_sizes[type]; - if (token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT || token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT) { + if (token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT || + token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT || + token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_DATA_EXT || + token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_DATA_SEQUENCE_INDEX_EXT) { size += sizeof(*cmd->u.push_constants2.push_constants_info); size += token->data.pPushConstant->updateRange.size; return size; diff --git a/src/gallium/frontends/lavapipe/lvp_execute.c b/src/gallium/frontends/lavapipe/lvp_execute.c index 70ae8356947..2ca9f3c8dd4 100644 --- a/src/gallium/frontends/lavapipe/lvp_execute.c +++ b/src/gallium/frontends/lavapipe/lvp_execute.c @@ -240,6 +240,24 @@ struct rendering_state { struct lvp_shader *advanced_blend_fs_shader; /* shader used to build variant */ }; +static void +handle_set_stage_buffer(struct rendering_state *state, + struct pipe_resource *bo, + size_t offset, + mesa_shader_stage stage, + uint32_t index) +{ + state->const_buffer[stage][index].buffer = bo; + state->const_buffer[stage][index].buffer_offset = offset; + state->const_buffer[stage][index].buffer_size = bo->width0; + state->const_buffer[stage][index].user_buffer = NULL; + + state->constbuf_dirty[stage] = true; + + if (state->num_const_bufs[stage] <= index) + state->num_const_bufs[stage] = index + 1; +} + static struct pipe_resource * get_buffer_resource(struct pipe_context *ctx, void *mem) { @@ -680,6 +698,9 @@ handle_compute_shader(struct rendering_state *state, struct lvp_shader *shader) state->dispatch_info.block[1] = shader->pipeline_nir->nir->info.workgroup_size[1]; state->dispatch_info.block[2] = shader->pipeline_nir->nir->info.workgroup_size[2]; state->compute_shader_dirty = true; + + if (shader->heaps && shader->embedded_samplers) + handle_set_stage_buffer(state, shader->embedded_samplers, 0, MESA_SHADER_COMPUTE, LVP_DESCRIPTOR_HEAP_EMBEDDED); } static void handle_compute_pipeline(struct vk_cmd_queue_entry *cmd, @@ -707,6 +728,9 @@ static void handle_ray_tracing_pipeline(struct vk_cmd_queue_entry *cmd, state->trace_rays_info.block[0] = shader->pipeline_nir->nir->info.workgroup_size[0]; state->trace_rays_info.block[1] = shader->pipeline_nir->nir->info.workgroup_size[1]; state->trace_rays_info.block[2] = shader->pipeline_nir->nir->info.workgroup_size[2]; + + if (shader->heaps && shader->embedded_samplers) + handle_set_stage_buffer(state, shader->embedded_samplers, 0, MESA_SHADER_RAYGEN, LVP_DESCRIPTOR_HEAP_EMBEDDED); } static void @@ -809,6 +833,9 @@ handle_graphics_stages(struct rendering_state *state, VkShaderStageFlagBits shad assert(0); break; } + struct lvp_shader *shader = state->shaders[stage]; + if (shader->heaps && shader->embedded_samplers) + handle_set_stage_buffer(state, shader->embedded_samplers, 0, stage, LVP_DESCRIPTOR_HEAP_EMBEDDED); } } @@ -1233,15 +1260,13 @@ static void handle_pipeline(struct vk_cmd_queue_entry *cmd, } else if (pipeline->type == LVP_PIPELINE_EXEC_GRAPH) { state->exec_graph = pipeline; } - if (pipeline->layout) { + + state->push_size[pipeline->type] = 0; + if (pipeline->layout) state->push_size[pipeline->type] = pipeline->layout->push_constant_size; - } else { - for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); i++) - if (pipeline->shaders[i].push_constant_size) { - state->push_size[pipeline->type] = pipeline->shaders[i].push_constant_size; - break; - } - } + + for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); i++) + state->push_size[pipeline->type] = MAX2(state->push_size[pipeline->type], pipeline->shaders[i].push_constant_size); if (state->push_size[pipeline->type] != state->emitted_push_size[pipeline->type]) { for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); i++) @@ -1319,24 +1344,6 @@ static void handle_vertex_buffers3(struct vk_cmd_queue_entry *cmd, state->vb_dirty = true; } -static void -handle_set_stage_buffer(struct rendering_state *state, - struct pipe_resource *bo, - size_t offset, - mesa_shader_stage stage, - uint32_t index) -{ - state->const_buffer[stage][index].buffer = bo; - state->const_buffer[stage][index].buffer_offset = offset; - state->const_buffer[stage][index].buffer_size = bo->width0; - state->const_buffer[stage][index].user_buffer = NULL; - - state->constbuf_dirty[stage] = true; - - if (state->num_const_bufs[stage] <= index) - state->num_const_bufs[stage] = index + 1; -} - static void handle_set_stage(struct rendering_state *state, struct lvp_descriptor_set *set, enum lvp_pipeline_type pipeline_type, @@ -4560,7 +4567,9 @@ process_sequence_ext(struct rendering_state *state, break; } case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT: - case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT: { + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_DATA_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_DATA_SEQUENCE_INDEX_EXT: { uint32_t *data = input; const VkIndirectCommandsPushConstantTokenEXT *info = token->data.pPushConstant; cmd->u.push_constants2.push_constants_info = (void*)cmdptr; @@ -4570,7 +4579,8 @@ process_sequence_ext(struct rendering_state *state, pci->offset = info->updateRange.offset; pci->size = info->updateRange.size; pci->pValues = (void*)((uint8_t*)cmdptr + sizeof(VkPushConstantsInfoKHR)); - if (token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT) + if (token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT || + token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_DATA_EXT) memcpy((void*)pci->pValues, data, info->updateRange.size); else memcpy((void*)pci->pValues, &seq, info->updateRange.size); @@ -4867,6 +4877,42 @@ handle_descriptor_buffer_offsets(struct vk_cmd_queue_entry *cmd, struct renderin } } +static void +handle_heap(struct vk_cmd_queue_entry *cmd, struct rendering_state *state, enum lvp_descriptor_heap heap) +{ + VkBindHeapInfoEXT *bind = cmd->u.bind_sampler_heap_ext.bind_info; + pipe_resource_reference(&state->desc_buffers[heap], NULL); + state->desc_buffer_addrs[heap] = (void *)(uintptr_t)bind->heapRange.address; + state->desc_buffers[heap] = get_buffer_resource(state->pctx, state->desc_buffer_addrs[heap]); + for (unsigned i = 0; i < MESA_SHADER_RAYGEN + 1; i++) + if (state->desc_buffers[heap]) + handle_set_stage_buffer(state, state->desc_buffers[heap], 0, i, heap); + memset(state->pcbuf_dirty, 1, sizeof(state->pcbuf_dirty)); + memset(state->constbuf_dirty, 1, sizeof(state->constbuf_dirty)); + + // uint32_t *data = (uint32_t *)state->desc_buffer_addrs[heap]; + // printf("heap[%u]:\n", heap); + // for (uint32_t i = 0; i < bind->heapRange.size / 4; i++) + // printf(" %p(0x%x): 0x%x\n", data + i, i * 4, data[i]); +} + +static void +handle_push_data(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) +{ + VkPushDataInfoEXT *push = cmd->u.push_data_ext.push_data_info; + memcpy(state->push_constants + push->offset, push->data.address, push->data.size); + + state->pcbuf_dirty[MESA_SHADER_VERTEX] |= true; + state->pcbuf_dirty[MESA_SHADER_FRAGMENT] |= true; + state->pcbuf_dirty[MESA_SHADER_GEOMETRY] |= true; + state->pcbuf_dirty[MESA_SHADER_TESS_CTRL] |= true; + state->pcbuf_dirty[MESA_SHADER_TESS_EVAL] |= true; + state->pcbuf_dirty[MESA_SHADER_COMPUTE] |= true; + state->pcbuf_dirty[MESA_SHADER_TASK] |= true; + state->pcbuf_dirty[MESA_SHADER_MESH] |= true; + state->pcbuf_dirty[MESA_SHADER_RAYGEN] |= true; +} + static void * lvp_push_internal_buffer(struct rendering_state *state, mesa_shader_stage stage, uint32_t size) { @@ -5406,6 +5452,11 @@ void lvp_add_enqueue_cmd_entrypoints(struct vk_device_dispatch_table *disp) ENQUEUE_CMD(CmdSetColorBlendEquationEXT) ENQUEUE_CMD(CmdSetColorWriteMaskEXT) + /* VK_EXT_descriptor_heap */ + ENQUEUE_CMD(CmdBindSamplerHeapEXT) + ENQUEUE_CMD(CmdBindResourceHeapEXT) + ENQUEUE_CMD(CmdPushDataEXT) + ENQUEUE_CMD(CmdBindShadersEXT) /* required for EXT_shader_object */ ENQUEUE_CMD(CmdSetCoverageModulationModeNV) @@ -5913,6 +5964,15 @@ static void lvp_execute_cmd_buffer(struct list_head *cmds, case VK_CMD_BIND_DESCRIPTOR_BUFFER_EMBEDDED_SAMPLERS2_EXT: handle_descriptor_buffer_embedded_samplers(cmd, state); break; + case VK_CMD_BIND_SAMPLER_HEAP_EXT: + handle_heap(cmd, state, LVP_DESCRIPTOR_HEAP_SAMPLER); + break; + case VK_CMD_BIND_RESOURCE_HEAP_EXT: + handle_heap(cmd, state, LVP_DESCRIPTOR_HEAP_RESOURCE); + break; + case VK_CMD_PUSH_DATA_EXT: + handle_push_data(cmd, state); + break; #ifdef VK_ENABLE_BETA_EXTENSIONS case VK_CMD_INITIALIZE_GRAPH_SCRATCH_MEMORY_AMDX: break; diff --git a/src/gallium/frontends/lavapipe/lvp_pipeline.c b/src/gallium/frontends/lavapipe/lvp_pipeline.c index 685c8ee5d3b..1869b2fff9a 100644 --- a/src/gallium/frontends/lavapipe/lvp_pipeline.c +++ b/src/gallium/frontends/lavapipe/lvp_pipeline.c @@ -24,11 +24,13 @@ #include "lvp_private.h" #include "vk_blend.h" #include "vk_nir_convert_ycbcr.h" +#include "vk_nir_lower_descriptor_heaps.h" #include "vk_pipeline.h" #include "vk_render_pass.h" #include "vk_util.h" #include "glsl_types.h" #include "util/os_time.h" +#include "util/u_inlines.h" #include "spirv/nir_spirv.h" #include "nir/nir_builder.h" #include "nir/nir_serialize.h" @@ -58,6 +60,12 @@ shader_destroy(struct lvp_device *device, struct lvp_shader *shader, bool locked device->queue.ctx->delete_ms_state, }; + if (shader->heaps && shader->embedded_samplers) { + pipe_resource_reference(&shader->embedded_samplers, NULL); + device->pscreen->unmap_memory(device->pscreen, shader->embedded_samplers_memory); + device->pscreen->free_memory(device->pscreen, shader->embedded_samplers_memory); + } + if (!locked) simple_mtx_lock(&device->queue.lock); @@ -346,6 +354,28 @@ lvp_create_pipeline_nir(nir_shader *nir) return pipeline_nir; } +static void +lvp_shader_alloc_embedded_samplers(struct lvp_shader *shader, struct lvp_device *device, uint32_t size) +{ + struct pipe_resource template = { + .bind = PIPE_BIND_CONSTANT_BUFFER, + .screen = device->pscreen, + .target = PIPE_BUFFER, + .format = PIPE_FORMAT_R8_UNORM, + .width0 = size, + .height0 = 1, + .depth0 = 1, + .array_size = 1, + .flags = PIPE_RESOURCE_FLAG_DONT_OVER_ALLOCATE, + }; + + uint64_t embedded_samplers_size = 0; + shader->embedded_samplers = device->pscreen->resource_create_unbacked(device->pscreen, &template, &embedded_samplers_size); + shader->embedded_samplers_memory = device->pscreen->allocate_memory(device->pscreen, embedded_samplers_size); + shader->embedded_samplers_map = device->pscreen->map_memory(device->pscreen, shader->embedded_samplers_memory); + device->pscreen->resource_bind_backing(device->pscreen, shader->embedded_samplers, shader->embedded_samplers_memory, 0, 0, 0); +} + static VkResult compile_spirv(struct lvp_device *pdevice, VkPipelineCreateFlags2KHR pipeline_flags, @@ -381,23 +411,55 @@ compile_spirv(struct lvp_device *pdevice, return result; } +struct lvp_ycbcr_conversion_lookup_info { + const struct lvp_shader *shader; + const VkShaderDescriptorSetAndBindingMappingInfoEXT *mapping; + struct vk_sampler_state_array *embedded_samplers; +}; + static const struct vk_ycbcr_conversion_state * lvp_ycbcr_conversion_lookup(const void *data, uint32_t set, uint32_t binding, uint32_t array_index) { - const struct lvp_pipeline_layout *layout = data; + const struct lvp_ycbcr_conversion_lookup_info *info = data; - const struct lvp_descriptor_set_layout *set_layout = container_of(layout->vk.set_layouts[set], struct lvp_descriptor_set_layout, vk); - const struct lvp_descriptor_set_binding_layout *binding_layout = &set_layout->binding[binding]; - if (!binding_layout->immutable_samplers) - return NULL; + if (!info->shader->heaps) { + const struct lvp_descriptor_set_layout *set_layout = + container_of(info->shader->layout->vk.set_layouts[set], struct lvp_descriptor_set_layout, vk); + const struct lvp_descriptor_set_binding_layout *binding_layout = &set_layout->binding[binding]; + if (!binding_layout->immutable_samplers) + return NULL; - return binding_layout->immutable_ycbcr[array_index].format ? &binding_layout->immutable_ycbcr[array_index] : NULL; + return binding_layout->immutable_ycbcr[array_index].format ? &binding_layout->immutable_ycbcr[array_index] : NULL; + } + + if (set == VK_NIR_YCBCR_SET_IMMUTABLE_SAMPLERS) { + assert(binding < info->embedded_samplers->sampler_count); + return &info->embedded_samplers->samplers[binding].ycbcr_conversion; + } + + if (info->embedded_samplers) { + const VkDescriptorSetAndBindingMappingEXT *mapping = vk_descriptor_heap_mapping( + info->mapping, set, binding, nir_resource_type_combined_sampled_image); + if (!mapping) + return NULL; + + const VkSamplerCreateInfo *sampler_info = vk_descriptor_heap_embedded_sampler(mapping); + if (!sampler_info) + return NULL; + + struct vk_sampler sampler = {0}; + vk_sampler_init(info->shader->base.device, &sampler, sampler_info); + return sampler.ycbcr_conversion ? &sampler.ycbcr_conversion->state : NULL; + } + + return NULL; } /* pipeline is NULL for shader objects. */ static void -lvp_shader_lower(struct lvp_device *pdevice, nir_shader *nir, struct lvp_pipeline_layout *layout, - struct vk_pipeline_robustness_state *robustness) +lvp_shader_lower(struct lvp_device *pdevice, nir_shader *nir, struct lvp_shader *shader, + struct vk_pipeline_robustness_state *robustness, + const VkShaderDescriptorSetAndBindingMappingInfoEXT *mapping) { if (nir->info.stage != MESA_SHADER_TESS_CTRL) NIR_PASS(_, nir, remove_barriers, nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_MESH || nir->info.stage == MESA_SHADER_TASK); @@ -453,6 +515,14 @@ lvp_shader_lower(struct lvp_device *pdevice, nir_shader *nir, struct lvp_pipelin optimize(nir); nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + struct vk_sampler_state_array embedded_samplers; + if (shader->heaps) { + vk_nir_lower_descriptor_heaps_options heaps_options = { + .lower_shader_record_index_to_non_uniform = true, + }; + NIR_PASS(_, nir, vk_nir_lower_descriptor_heaps, mapping, &heaps_options, &embedded_samplers); + } + NIR_PASS(_, nir, nir_lower_io_vars_to_temporaries, nir_shader_get_entrypoint(nir), nir_var_shader_out | nir_var_shader_in); NIR_PASS(_, nir, nir_split_var_copies); @@ -471,15 +541,34 @@ lvp_shader_lower(struct lvp_device *pdevice, nir_shader *nir, struct lvp_pipelin nir_var_mem_global | nir_var_mem_constant, nir_address_format_64bit_global); - NIR_PASS(_, nir, nir_vk_lower_ycbcr_tex, lvp_ycbcr_conversion_lookup, layout); + struct lvp_ycbcr_conversion_lookup_info ycbcr_info = { + .shader = shader, + .mapping = mapping, + .embedded_samplers = &embedded_samplers, + }; + NIR_PASS(_, nir, nir_vk_lower_ycbcr_tex, lvp_ycbcr_conversion_lookup, &ycbcr_info); nir_lower_non_uniform_access_options options = { .types = nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_texture_access | nir_lower_non_uniform_image_access | nir_lower_non_uniform_texture_query | nir_lower_non_uniform_image_query, }; - NIR_PASS(_, nir, nir_lower_non_uniform_access, &options); - lvp_lower_pipeline_layout(pdevice, layout, nir); + if (shader->heaps) { + NIR_PASS(_, nir, lvp_nir_lower_desciptor_heaps, mapping); + + NIR_PASS(_, nir, lvp_nir_lower_push_constants, &shader->push_constant_size); + NIR_PASS(_, nir, nir_lower_non_uniform_access, &options); + + if (embedded_samplers.sampler_count) { + lvp_shader_alloc_embedded_samplers(shader, pdevice, embedded_samplers.sampler_count * sizeof(struct lp_sampler_descriptor)); + for (uint32_t i = 0; i < embedded_samplers.sampler_count; i++) + lvp_sampler_init(pdevice, &shader->embedded_samplers_map[i], &embedded_samplers.samplers[i]); + } + } else { + NIR_PASS(_, nir, nir_lower_non_uniform_access, &options); + lvp_lower_pipeline_layout(pdevice, shader->layout, nir); + NIR_PASS(_, nir, lvp_nir_lower_push_constants, &shader->push_constant_size); + } NIR_PASS(_, nir, lvp_nir_lower_ray_queries); @@ -556,13 +645,22 @@ lvp_spirv_to_nir(struct lvp_pipeline *pipeline, const void *pipeline_pNext, struct lvp_device *device = lvp_pipeline_device(pipeline); VkResult result = compile_spirv(device, pipeline->flags, sinfo, out_nir); if (result == VK_SUCCESS) { + struct lvp_shader *shader = &pipeline->shaders[(*out_nir)->info.stage]; + shader->heaps = pipeline->heaps; + shader->layout = pipeline->layout; + if (pipeline->layout) + shader->push_constant_size = pipeline->layout->push_constant_size; + if (pipeline->type == LVP_PIPELINE_EXEC_GRAPH) lvp_lower_exec_graph(pipeline, *out_nir); struct vk_pipeline_robustness_state robustness; vk_pipeline_robustness_state_fill(&device->vk.robustness_state, &robustness, pipeline_pNext, sinfo->pNext); - lvp_shader_lower(device, *out_nir, pipeline->layout, &robustness); + const VkShaderDescriptorSetAndBindingMappingInfoEXT *mapping = + vk_find_struct_const(sinfo, SHADER_DESCRIPTOR_SET_AND_BINDING_MAPPING_INFO_EXT); + + lvp_shader_lower(device, *out_nir, shader, &robustness, mapping); } return result; @@ -585,7 +683,6 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline, const void *pipeline_pNe if (result == VK_SUCCESS) { struct lvp_shader *shader = &pipeline->shaders[stage]; lvp_shader_init(shader, nir); - shader->push_constant_size = pipeline->layout->push_constant_size; } return result; } @@ -851,6 +948,7 @@ lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline, GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT); const VkPipelineLibraryCreateInfoKHR *libstate = vk_find_struct_const(pCreateInfo, PIPELINE_LIBRARY_CREATE_INFO_KHR); + const VkGraphicsPipelineLibraryFlagsEXT layout_stages = VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT; if (libinfo) @@ -865,17 +963,20 @@ lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline, pipeline->library = true; struct lvp_pipeline_layout *layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout); - - if (!layout || !(layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT)) - /* this is a regular pipeline with no partials: directly reuse */ - pipeline->layout = layout ? (void*)vk_pipeline_layout_ref(&layout->vk) : NULL; - else if (pipeline->stages & layout_stages) { - if ((pipeline->stages & layout_stages) == layout_stages) - /* this has all the layout stages: directly reuse */ - pipeline->layout = (void*)vk_pipeline_layout_ref(&layout->vk); - else { - /* this is a partial: copy for later merging to avoid modifying another layout */ - merge_layouts(&device->vk, pipeline, layout); + if (flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT) { + pipeline->heaps = true; + } else { + if (!layout || !(layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT)) + /* this is a regular pipeline with no partials: directly reuse */ + pipeline->layout = layout ? (void*)vk_pipeline_layout_ref(&layout->vk) : NULL; + else if (pipeline->stages & layout_stages) { + if ((pipeline->stages & layout_stages) == layout_stages) + /* this has all the layout stages: directly reuse */ + pipeline->layout = (void*)vk_pipeline_layout_ref(&layout->vk); + else { + /* this is a partial: copy for later merging to avoid modifying another layout */ + merge_layouts(&device->vk, pipeline, layout); + } } } @@ -900,7 +1001,7 @@ lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline, pipeline->force_min_sample = p->force_min_sample; copy_shader_sanitized(&pipeline->shaders[MESA_SHADER_FRAGMENT], &p->shaders[MESA_SHADER_FRAGMENT]); } - if (p->stages & layout_stages) { + if (p->stages & layout_stages && p->layout) { if (!layout || (layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT)) { merge_layouts(&device->vk, pipeline, p->layout); lvp_forall_gfx_stage(i) { @@ -1001,13 +1102,6 @@ lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline, if (!libstate && !pipeline->library) { lvp_pipeline_shaders_compile(pipeline, false); - if (pipeline->layout) { - for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); i++) { - VkShaderStageFlagBits stage = mesa_to_vk_shader_stage(i); - if (pipeline->layout->push_constant_stages & stage) - pipeline->shaders[i].push_constant_size = pipeline->layout->push_constant_size; - } - } } return VK_SUCCESS; @@ -1130,8 +1224,12 @@ lvp_compute_pipeline_init(struct lvp_pipeline *pipeline, VkPipelineCreateFlagBits2KHR flags) { pipeline->flags = flags; - pipeline->layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout); - vk_pipeline_layout_ref(&pipeline->layout->vk); + if (flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT) { + pipeline->heaps = true; + } else { + pipeline->layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout); + vk_pipeline_layout_ref(&pipeline->layout->vk); + } pipeline->force_min_sample = false; pipeline->type = LVP_PIPELINE_COMPUTE; @@ -1143,8 +1241,6 @@ lvp_compute_pipeline_init(struct lvp_pipeline *pipeline, struct lvp_shader *shader = &pipeline->shaders[MESA_SHADER_COMPUTE]; shader->shader_cso = lvp_shader_compile(device, shader, nir_shader_clone(NULL, shader->pipeline_nir->nir), false); pipeline->compiled = true; - if (pipeline->layout) - shader->push_constant_size = pipeline->layout->push_constant_size; return VK_SUCCESS; } @@ -1319,14 +1415,25 @@ create_shader_object(struct lvp_device *device, const VkShaderCreateInfoEXT *pCr goto fail; shader->push_constant_size = blob_read_uint32(&blob); + + uint32_t embedded_samplers_size = blob_read_uint32(&blob); + if (embedded_samplers_size) { + lvp_shader_alloc_embedded_samplers(shader, device, embedded_samplers_size); + memcpy(shader->embedded_samplers_map, blob_read_bytes(&blob, embedded_samplers_size), embedded_samplers_size); + } } if (!nir_shader_get_entrypoint(nir)) goto fail; blob_init(&shader->blob); - if (pCreateInfo->codeType == VK_SHADER_CODE_TYPE_SPIRV_EXT) - lvp_shader_lower(device, nir, shader->layout, NULL); + shader->heaps = pCreateInfo->flags & VK_SHADER_CREATE_DESCRIPTOR_HEAP_BIT_EXT; + + if (pCreateInfo->codeType == VK_SHADER_CODE_TYPE_SPIRV_EXT) { + const VkShaderDescriptorSetAndBindingMappingInfoEXT *mapping = + vk_find_struct_const(pCreateInfo, SHADER_DESCRIPTOR_SET_AND_BINDING_MAPPING_INFO_EXT); + lvp_shader_lower(device, nir, shader, NULL, mapping); + } lvp_shader_init(shader, nir); @@ -1342,6 +1449,11 @@ create_shader_object(struct lvp_device *device, const VkShaderCreateInfoEXT *pCr blob_write_uint32(&shader->blob, shader->push_constant_size); + uint32_t embedded_samplers_size = shader->embedded_samplers ? shader->embedded_samplers->width0 : 0; + blob_write_uint32(&shader->blob, embedded_samplers_size); + if (shader->embedded_samplers) + blob_write_bytes(&shader->blob, shader->embedded_samplers_map, embedded_samplers_size); + shader->shader_cso = lvp_shader_compile(device, shader, nir_shader_clone(NULL, nir), false); return lvp_shader_to_handle(shader); fail: diff --git a/src/gallium/frontends/lavapipe/lvp_private.h b/src/gallium/frontends/lavapipe/lvp_private.h index fd7ade4033c..ae355949206 100644 --- a/src/gallium/frontends/lavapipe/lvp_private.h +++ b/src/gallium/frontends/lavapipe/lvp_private.h @@ -99,7 +99,7 @@ extern "C" { #define LVP_NUM_QUEUES 1 #define MAX_SETS 8 -#define MAX_DESCRIPTORS 1000000 /* Required by vkd3d-proton */ +#define MAX_DESCRIPTORS ((1<<20) - (1<<15)) /* Required by VK_EXT_descriptor_heap */ #define MAX_PUSH_CONSTANTS_SIZE 256 #define MAX_PUSH_DESCRIPTORS 32 #define MAX_DESCRIPTOR_UNIFORM_BLOCK_SIZE MAX_DESCRIPTORS @@ -150,6 +150,13 @@ void __lvp_finishme(const char *file, int line, const char *format, ...) stage = ffs(__tmp) - 1, __tmp; \ __tmp &= ~(1 << (stage))) +enum lvp_descriptor_heap { + LVP_DESCRIPTOR_HEAP_RESOURCE, + LVP_DESCRIPTOR_HEAP_SAMPLER, + LVP_DESCRIPTOR_HEAP_EMBEDDED, + LVP_DESCRIPTOR_HEAP_COUNT, +}; + struct lvp_physical_device { struct vk_physical_device vk; @@ -447,6 +454,9 @@ lvp_pipeline_nir_ref(struct lvp_pipeline_nir **dst, struct lvp_pipeline_nir *src struct lvp_shader { struct vk_object_base base; struct lvp_pipeline_layout *layout; + struct pipe_memory_allocation *embedded_samplers_memory; + struct lp_sampler_descriptor *embedded_samplers_map; + struct pipe_resource *embedded_samplers; struct lvp_pipeline_nir *pipeline_nir; struct lvp_pipeline_nir *tess_ccw; void *shader_cso; @@ -454,6 +464,7 @@ struct lvp_shader { struct pipe_stream_output_info stream_output; struct blob blob; //preserved for GetShaderBinaryDataEXT uint32_t push_constant_size; + bool heaps; }; enum lvp_pipeline_type { @@ -532,6 +543,7 @@ struct lvp_pipeline { bool library; bool compiled; bool used; + bool heaps; struct { const char *name; @@ -766,7 +778,7 @@ void lvp_nir_lower_blend(nir_shader *nir, const nir_lower_blend_options *opts); void -lvp_sampler_init(struct lvp_device *device, struct lp_sampler_descriptor *desc, const VkSamplerCreateInfo *pCreateInfo, const struct vk_sampler *sampler); +lvp_sampler_init(struct lvp_device *device, struct lp_sampler_descriptor *desc, const struct vk_sampler_state *vk_state); static inline uint8_t lvp_image_aspects_to_plane(ASSERTED const struct lvp_image *image, diff --git a/src/gallium/frontends/lavapipe/lvp_ray_tracing_pipeline.c b/src/gallium/frontends/lavapipe/lvp_ray_tracing_pipeline.c index f259b402ad9..1eb10bfd008 100644 --- a/src/gallium/frontends/lavapipe/lvp_ray_tracing_pipeline.c +++ b/src/gallium/frontends/lavapipe/lvp_ray_tracing_pipeline.c @@ -1114,7 +1114,10 @@ lvp_compile_ray_tracing_pipeline(struct lvp_pipeline *pipeline, struct lvp_shader *shader = &pipeline->shaders[MESA_SHADER_RAYGEN]; lvp_shader_init(shader, b->shader); - shader->push_constant_size = pipeline->layout->push_constant_size; + + if (pipeline->layout) + shader->push_constant_size = pipeline->layout->push_constant_size; + shader->shader_cso = lvp_shader_compile(device, shader, nir_shader_clone(NULL, shader->pipeline_nir->nir), false); _mesa_hash_table_destroy(compiler.functions, NULL); @@ -1123,7 +1126,7 @@ lvp_compile_ray_tracing_pipeline(struct lvp_pipeline *pipeline, static VkResult lvp_create_ray_tracing_pipeline(VkDevice _device, const VkAllocationCallbacks *allocator, const VkRayTracingPipelineCreateInfoKHR *create_info, - VkPipeline *out_pipeline) + VkPipeline *out_pipeline, VkPipelineCreateFlags2KHR flags) { VK_FROM_HANDLE(lvp_device, device, _device); VK_FROM_HANDLE(lvp_pipeline_layout, layout, create_info->layout); @@ -1138,7 +1141,10 @@ lvp_create_ray_tracing_pipeline(VkDevice _device, const VkAllocationCallbacks *a vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE); - vk_pipeline_layout_ref(&layout->vk); + if (flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT) + pipeline->heaps = true; + else + vk_pipeline_layout_ref(&layout->vk); pipeline->layout = layout; pipeline->type = LVP_PIPELINE_RAY_TRACING; @@ -1194,15 +1200,15 @@ lvp_CreateRayTracingPipelinesKHR( uint32_t i = 0; for (; i < createInfoCount; i++) { + VkPipelineCreateFlags2KHR flags = vk_rt_pipeline_create_flags(&pCreateInfos[i]); VkResult tmp_result = lvp_create_ray_tracing_pipeline( - device, pAllocator, pCreateInfos + i, pPipelines + i); + device, pAllocator, pCreateInfos + i, pPipelines + i, flags); if (tmp_result != VK_SUCCESS) { result = tmp_result; pPipelines[i] = VK_NULL_HANDLE; - if (vk_rt_pipeline_create_flags(&pCreateInfos[i]) & - VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR) + if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR) break; } } diff --git a/src/gallium/frontends/lavapipe/meson.build b/src/gallium/frontends/lavapipe/meson.build index 23fa1873382..7742e421dea 100644 --- a/src/gallium/frontends/lavapipe/meson.build +++ b/src/gallium/frontends/lavapipe/meson.build @@ -13,6 +13,7 @@ lvp_entrypoints = custom_target( liblvp_files = files( 'nir/lvp_nir_lower_cooperative_matrix.c', + 'nir/lvp_nir_lower_descriptor_heaps.c', 'nir/lvp_nir_lower_exec_graph.c', 'nir/lvp_nir_lower_input_attachments.c', 'nir/lvp_nir_lower_pipeline_layout.c', diff --git a/src/gallium/frontends/lavapipe/nir/lvp_nir.h b/src/gallium/frontends/lavapipe/nir/lvp_nir.h index c91b4c457f4..e9160a3c20b 100644 --- a/src/gallium/frontends/lavapipe/nir/lvp_nir.h +++ b/src/gallium/frontends/lavapipe/nir/lvp_nir.h @@ -10,6 +10,10 @@ #include "nir/nir.h" #include "nir/nir_builder.h" +#include + +#include "vk_nir_lower_descriptor_heaps.h" + nir_def *lvp_mul_vec3_mat(nir_builder *b, nir_def *vec, nir_def *matrix[], bool translation); void lvp_load_wto_matrix(nir_builder *b, nir_def *instance_addr, nir_def **node_data, nir_def **out); @@ -106,6 +110,8 @@ struct lvp_pipeline; struct lvp_pipeline_layout; struct vk_pipeline_robustness_state; +bool lvp_nir_lower_desciptor_heaps(nir_shader *shader, const VkShaderDescriptorSetAndBindingMappingInfoEXT *mapping); + bool lvp_lower_exec_graph(struct lvp_pipeline *pipeline, nir_shader *nir); bool lvp_lower_input_attachments(nir_shader *shader, bool use_fragcoord_sysval); @@ -114,7 +120,7 @@ void lvp_lower_pipeline_layout(const struct lvp_device *device, struct lvp_pipeline_layout *layout, nir_shader *shader); -bool lvp_nir_lower_push_constants(nir_shader *shader); +bool lvp_nir_lower_push_constants(nir_shader *shader, uint32_t *push_counstants_size); bool lvp_nir_lower_ray_queries(struct nir_shader *shader); diff --git a/src/gallium/frontends/lavapipe/nir/lvp_nir_lower_descriptor_heaps.c b/src/gallium/frontends/lavapipe/nir/lvp_nir_lower_descriptor_heaps.c new file mode 100644 index 00000000000..f50109e5972 --- /dev/null +++ b/src/gallium/frontends/lavapipe/nir/lvp_nir_lower_descriptor_heaps.c @@ -0,0 +1,126 @@ +/* + * Copyright © 2025 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#include "lvp_nir.h" +#include "lvp_private.h" + +static void +lower_buffer(nir_builder *b, nir_intrinsic_instr *intr, uint32_t src_index) +{ + if (nir_src_bit_size(intr->src[src_index]) == 64) + return; + + nir_def *addr = nir_pack_64_2x32(b, nir_channels(b, intr->src[src_index].ssa, 0x3)); + nir_src_rewrite(&intr->src[src_index], addr); +} + +static bool +pass(nir_builder *b, nir_instr *instr, void *data) +{ + b->cursor = nir_before_instr(instr); + + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + switch (intr->intrinsic) { + case nir_intrinsic_load_ubo: + lower_buffer(b, intr, 0); + return true; + + case nir_intrinsic_load_ssbo: + case nir_intrinsic_ssbo_atomic: + case nir_intrinsic_ssbo_atomic_swap: + case nir_intrinsic_get_ssbo_size: + lower_buffer(b, intr, 0); + return true; + + case nir_intrinsic_store_ssbo: + lower_buffer(b, intr, 1); + return true; + + case nir_intrinsic_load_heap_descriptor: { + uint32_t resource_type = nir_intrinsic_resource_type(intr); + + enum lvp_descriptor_heap heap = LVP_DESCRIPTOR_HEAP_RESOURCE; + if (resource_type == VK_SPIRV_RESOURCE_TYPE_SAMPLER_BIT_EXT) + heap = LVP_DESCRIPTOR_HEAP_SAMPLER; + + nir_def *base = nir_load_const_buf_base_addr_lvp(b, nir_imm_int(b, heap + 1)); + nir_def *addr = nir_iadd(b, base, nir_u2u64(b, intr->src[0].ssa)); + + if (resource_type == VK_SPIRV_RESOURCE_TYPE_ACCELERATION_STRUCTURE_BIT_EXT) { + nir_def_replace(&intr->def, nir_build_load_global(b, 1, 64, addr)); + return true; + } + + nir_def_replace(&intr->def, nir_vec3(b, nir_unpack_64_2x32_split_x(b, addr), + nir_unpack_64_2x32_split_y(b, addr), nir_imm_int(b, 0))); + + return true; + } + + case nir_intrinsic_load_resource_heap_data: { + nir_def *base = nir_load_const_buf_base_addr_lvp(b, nir_imm_int(b, LVP_DESCRIPTOR_HEAP_RESOURCE + 1)); + nir_def *addr = nir_iadd(b, base, nir_u2u64(b, intr->src[0].ssa)); + nir_def_replace(&intr->def, nir_build_load_global(b, intr->def.num_components, intr->def.bit_size, addr, + .align_mul = nir_intrinsic_align_mul(intr), + .align_offset = nir_intrinsic_align_offset(intr))); + + return true; + } + + case nir_intrinsic_image_heap_sparse_load: + case nir_intrinsic_image_heap_load: + case nir_intrinsic_image_heap_store: + case nir_intrinsic_image_heap_atomic: + case nir_intrinsic_image_heap_atomic_swap: + case nir_intrinsic_image_heap_size: + case nir_intrinsic_image_heap_samples: { + nir_def *base = nir_load_const_buf_base_addr_lvp(b, nir_imm_int(b, LVP_DESCRIPTOR_HEAP_RESOURCE + 1)); + nir_rewrite_image_intrinsic(intr, nir_iadd(b, base, nir_u2u64(b, intr->src[0].ssa)), nir_image_intrinsic_type_bindless); + return true; + } + + default: + return false; + } + } + + if (instr->type == nir_instr_type_tex) { + nir_tex_instr *tex = nir_instr_as_tex(instr); + + nir_def *plane_ssa = nir_steal_tex_src(tex, nir_tex_src_plane); + uint32_t plane = plane_ssa ? nir_src_as_uint(nir_src_for_ssa(plane_ssa)) : 0; + uint32_t plane_offset = plane * sizeof(struct lp_image_descriptor); + + for (uint32_t i = 0; i < tex->num_srcs; i++) { + if (tex->src[i].src_type == nir_tex_src_texture_heap_offset) { + tex->src[i].src_type = nir_tex_src_texture_handle; + nir_def *base = nir_load_const_buf_base_addr_lvp(b, nir_imm_int(b, LVP_DESCRIPTOR_HEAP_RESOURCE + 1)); + nir_src_rewrite(&tex->src[i].src, nir_iadd(b, base, nir_u2u64(b, nir_iadd_imm(b, tex->src[i].src.ssa, plane_offset)))); + } else if (tex->src[i].src_type == nir_tex_src_sampler_heap_offset) { + tex->src[i].src_type = nir_tex_src_sampler_handle; + nir_def *base = nir_load_const_buf_base_addr_lvp(b, nir_imm_int(b, LVP_DESCRIPTOR_HEAP_SAMPLER + 1)); + nir_src_rewrite(&tex->src[i].src, nir_iadd(b, base, nir_u2u64(b, nir_iadd_imm(b, tex->src[i].src.ssa, plane_offset)))); + } + } + + if (tex->embedded_sampler) { + nir_def *base = nir_load_const_buf_base_addr_lvp(b, nir_imm_int(b, LVP_DESCRIPTOR_HEAP_EMBEDDED + 1)); + nir_def *sampler = nir_iadd_imm(b, base, tex->sampler_index * sizeof(struct lp_sampler_descriptor) + plane_offset); + nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle, sampler); + } + + return true; + } + + return false; +} + +bool +lvp_nir_lower_desciptor_heaps(nir_shader *shader, const VkShaderDescriptorSetAndBindingMappingInfoEXT *mapping) +{ + // nir_print_shader(shader, stdout); + return nir_shader_instructions_pass(shader, pass, nir_metadata_control_flow, NULL); +} diff --git a/src/gallium/frontends/lavapipe/nir/lvp_nir_lower_push_constants.c b/src/gallium/frontends/lavapipe/nir/lvp_nir_lower_push_constants.c index 4a54e35bcd4..60183c16016 100644 --- a/src/gallium/frontends/lavapipe/nir/lvp_nir_lower_push_constants.c +++ b/src/gallium/frontends/lavapipe/nir/lvp_nir_lower_push_constants.c @@ -29,12 +29,15 @@ pass(struct nir_builder *b, nir_intrinsic_instr *intr, void *data) .range = nir_intrinsic_range(intr)); nir_def_replace(&intr->def, load); + uint32_t *push_counstants_size = data; + *push_counstants_size = MAX2(*push_counstants_size, nir_intrinsic_range(intr)); + return true; } bool -lvp_nir_lower_push_constants(nir_shader *shader) +lvp_nir_lower_push_constants(nir_shader *shader, uint32_t *push_counstants_size) { - nir_shader_intrinsics_pass(shader, pass, nir_metadata_control_flow, NULL); + return nir_shader_intrinsics_pass(shader, pass, nir_metadata_control_flow, push_counstants_size); } \ No newline at end of file