From 12a7fc51c77925a5562fd104a8fbd664a46ffc8b Mon Sep 17 00:00:00 2001 From: Konstantin Seurer Date: Tue, 11 Apr 2023 21:35:22 +0200 Subject: [PATCH] lavapipe: Rework descriptor handling Instead of using gallium binding slots for binding descriptors, descriptor sets have UBOs that contain the llvmpipe descriptors. Descriptor sets are bound by binding their UBO to the corresponding slot. Reviewed-by: Dave Airlie Part-of: --- .../frontends/lavapipe/lvp_cmd_buffer.c | 23 +- .../frontends/lavapipe/lvp_descriptor_set.c | 556 +++++------ src/gallium/frontends/lavapipe/lvp_device.c | 59 +- src/gallium/frontends/lavapipe/lvp_execute.c | 880 +++--------------- src/gallium/frontends/lavapipe/lvp_image.c | 47 +- .../lavapipe/lvp_lower_vulkan_resource.c | 315 ++----- src/gallium/frontends/lavapipe/lvp_pipeline.c | 108 +-- src/gallium/frontends/lavapipe/lvp_private.h | 111 +-- 8 files changed, 630 insertions(+), 1469 deletions(-) diff --git a/src/gallium/frontends/lavapipe/lvp_cmd_buffer.c b/src/gallium/frontends/lavapipe/lvp_cmd_buffer.c index 7a493e7872a..41b03493b3d 100644 --- a/src/gallium/frontends/lavapipe/lvp_cmd_buffer.c +++ b/src/gallium/frontends/lavapipe/lvp_cmd_buffer.c @@ -159,27 +159,8 @@ VKAPI_ATTR void VKAPI_CALL lvp_CmdPushDescriptorSetWithTemplateKHR( for (unsigned i = 0; i < templ->entry_count; i++) { VkDescriptorUpdateTemplateEntry *entry = &templ->entry[i]; - unsigned size = 0; - switch (entry->descriptorType) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - size = sizeof(VkDescriptorImageInfo); - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - size = sizeof(VkBufferView); - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - default: - size = sizeof(VkDescriptorBufferInfo); - break; - } + unsigned size = lvp_descriptor_update_template_entry_size(entry->descriptorType); + for (unsigned i = 0; i < entry->descriptorCount; i++) { memcpy((uint8_t*)cmd->u.push_descriptor_set_with_template_khr.data + offset, (const uint8_t*)pData + entry->offset + i * entry->stride, size); offset += size; diff --git a/src/gallium/frontends/lavapipe/lvp_descriptor_set.c b/src/gallium/frontends/lavapipe/lvp_descriptor_set.c index dd8c472fc6e..def2d1cd253 100644 --- a/src/gallium/frontends/lavapipe/lvp_descriptor_set.c +++ b/src/gallium/frontends/lavapipe/lvp_descriptor_set.c @@ -25,6 +25,7 @@ #include "vk_descriptors.h" #include "vk_util.h" #include "util/u_math.h" +#include "util/u_inlines.h" VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateDescriptorSetLayout( VkDevice _device, @@ -60,7 +61,7 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateDescriptorSetLayout( size_t size = sizeof(struct lvp_descriptor_set_layout) + num_bindings * sizeof(set_layout->binding[0]) + - immutable_sampler_count * sizeof(struct lvp_sampler *); + immutable_sampler_count * sizeof(union lp_descriptor*); set_layout = vk_descriptor_set_layout_zalloc(&device->vk, size); if (!set_layout) @@ -68,8 +69,8 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateDescriptorSetLayout( set_layout->immutable_sampler_count = immutable_sampler_count; /* We just allocate all the samplers at the end of the struct */ - struct pipe_sampler_state **samplers = - (struct pipe_sampler_state **)&set_layout->binding[num_bindings]; + union lp_descriptor **samplers = + (union lp_descriptor **)&set_layout->binding[num_bindings]; set_layout->binding_count = num_bindings; set_layout->shader_stages = 0; @@ -84,28 +85,25 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateDescriptorSetLayout( return vk_error(device, result); } + uint32_t uniform_block_size = 0; + uint32_t dynamic_offset_count = 0; for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { const VkDescriptorSetLayoutBinding *binding = bindings + j; uint32_t b = binding->binding; - set_layout->binding[b].array_size = binding->descriptorCount; + uint32_t descriptor_count = binding->descriptorCount; + if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) + descriptor_count = 1; + + set_layout->binding[b].array_size = descriptor_count; set_layout->binding[b].descriptor_index = set_layout->size; set_layout->binding[b].type = binding->descriptorType; set_layout->binding[b].valid = true; - if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) - set_layout->size++; - else - set_layout->size += binding->descriptorCount; + set_layout->binding[b].uniform_block_offset = 0; + set_layout->binding[b].uniform_block_size = 0; - lvp_forall_stage(stage) { - set_layout->binding[b].stage[stage].const_buffer_index = -1; - set_layout->binding[b].stage[stage].shader_buffer_index = -1; - set_layout->binding[b].stage[stage].sampler_index = -1; - set_layout->binding[b].stage[stage].sampler_view_index = -1; - set_layout->binding[b].stage[stage].image_index = -1; - set_layout->binding[b].stage[stage].uniform_block_index = -1; - } + set_layout->size += descriptor_count; if (binding->descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || binding->descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { @@ -115,10 +113,6 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateDescriptorSetLayout( switch (binding->descriptorType) { case VK_DESCRIPTOR_TYPE_SAMPLER: case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - lvp_foreach_stage(s, binding->stageFlags) { - set_layout->binding[b].stage[s].sampler_index = set_layout->stage[s].sampler_count; - set_layout->stage[s].sampler_count += binding->descriptorCount; - } if (binding->pImmutableSamplers) { set_layout->binding[b].immutable_samplers = samplers; samplers += binding->descriptorCount; @@ -126,7 +120,7 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateDescriptorSetLayout( for (uint32_t i = 0; i < binding->descriptorCount; i++) { if (binding->pImmutableSamplers[i]) set_layout->binding[b].immutable_samplers[i] = - &lvp_sampler_from_handle(binding->pImmutableSamplers[i])->state; + &lvp_sampler_from_handle(binding->pImmutableSamplers[i])->desc; else set_layout->binding[b].immutable_samplers[i] = NULL; } @@ -139,42 +133,23 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateDescriptorSetLayout( switch (binding->descriptorType) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - lvp_foreach_stage(s, binding->stageFlags) { - set_layout->binding[b].stage[s].const_buffer_index = set_layout->stage[s].const_buffer_count; - set_layout->stage[s].const_buffer_count += binding->descriptorCount; - } - break; + break; case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: - lvp_foreach_stage(s, binding->stageFlags) { - set_layout->binding[b].stage[s].uniform_block_offset = set_layout->stage[s].uniform_block_size; - set_layout->binding[b].stage[s].uniform_block_index = set_layout->stage[s].uniform_block_count; - set_layout->stage[s].uniform_block_size += binding->descriptorCount; - set_layout->stage[s].uniform_block_sizes[set_layout->stage[s].uniform_block_count++] = binding->descriptorCount; - } - break; + set_layout->binding[b].uniform_block_offset = uniform_block_size; + set_layout->binding[b].uniform_block_size = binding->descriptorCount; + uniform_block_size += binding->descriptorCount; + break; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - lvp_foreach_stage(s, binding->stageFlags) { - set_layout->binding[b].stage[s].shader_buffer_index = set_layout->stage[s].shader_buffer_count; - set_layout->stage[s].shader_buffer_count += binding->descriptorCount; - } break; case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - lvp_foreach_stage(s, binding->stageFlags) { - set_layout->binding[b].stage[s].image_index = set_layout->stage[s].image_count; - set_layout->stage[s].image_count += binding->descriptorCount; - } break; case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - lvp_foreach_stage(s, binding->stageFlags) { - set_layout->binding[b].stage[s].sampler_view_index = set_layout->stage[s].sampler_view_count; - set_layout->stage[s].sampler_view_count += binding->descriptorCount; - } break; default: break; @@ -183,38 +158,8 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateDescriptorSetLayout( set_layout->shader_stages |= binding->stageFlags; } -#ifndef NDEBUG - /* this otherwise crashes later and is annoying to track down */ - unsigned array[] = { - VK_SHADER_STAGE_VERTEX_BIT, - VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, - VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, - VK_SHADER_STAGE_GEOMETRY_BIT, - VK_SHADER_STAGE_FRAGMENT_BIT, - VK_SHADER_STAGE_COMPUTE_BIT, - VK_SHADER_STAGE_TASK_BIT_EXT, - VK_SHADER_STAGE_MESH_BIT_EXT, - }; - lvp_forall_stage(i) { - uint16_t const_buffer_count = 0; - uint16_t shader_buffer_count = 0; - uint16_t sampler_count = 0; - uint16_t sampler_view_count = 0; - uint16_t image_count = 0; - if (set_layout->shader_stages & array[i]) { - const_buffer_count += set_layout->stage[i].const_buffer_count; - shader_buffer_count += set_layout->stage[i].shader_buffer_count; - sampler_count += set_layout->stage[i].sampler_count; - sampler_view_count += set_layout->stage[i].sampler_view_count; - image_count += set_layout->stage[i].image_count; - } - assert(const_buffer_count <= device->physical_device->device_limits.maxPerStageDescriptorUniformBuffers); - assert(shader_buffer_count <= device->physical_device->device_limits.maxPerStageDescriptorStorageBuffers); - assert(sampler_count <= device->physical_device->device_limits.maxPerStageDescriptorSamplers); - assert(sampler_view_count <= device->physical_device->device_limits.maxPerStageDescriptorSampledImages); - assert(image_count <= device->physical_device->device_limits.maxPerStageDescriptorStorageImages); - } -#endif + for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) + set_layout->binding[i].uniform_block_offset += set_layout->size * sizeof(union lp_descriptor); free(bindings); @@ -232,64 +177,6 @@ lvp_pipeline_layout_create(struct lvp_device *device, { struct lvp_pipeline_layout *layout = vk_pipeline_layout_zalloc(&device->vk, sizeof(*layout), pCreateInfo); - for (uint32_t set = 0; set < layout->vk.set_count; set++) { - if (layout->vk.set_layouts[set] == NULL) - continue; - - const struct lvp_descriptor_set_layout *set_layout = - vk_to_lvp_descriptor_set_layout(layout->vk.set_layouts[set]); - - lvp_forall_stage(i) { - layout->stage[i].uniform_block_size += set_layout->stage[i].uniform_block_size; - for (unsigned j = 0; j < set_layout->stage[i].uniform_block_count; j++) { - assert(layout->stage[i].uniform_block_count + j < MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS * MAX_SETS); - layout->stage[i].uniform_block_sizes[layout->stage[i].uniform_block_count + j] = set_layout->stage[i].uniform_block_sizes[j]; - } - layout->stage[i].uniform_block_count += set_layout->stage[i].uniform_block_count; - } - } - -#ifndef NDEBUG - /* this otherwise crashes later and is annoying to track down */ - unsigned array[] = { - VK_SHADER_STAGE_VERTEX_BIT, - VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, - VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, - VK_SHADER_STAGE_GEOMETRY_BIT, - VK_SHADER_STAGE_FRAGMENT_BIT, - VK_SHADER_STAGE_COMPUTE_BIT, - VK_SHADER_STAGE_TASK_BIT_EXT, - VK_SHADER_STAGE_MESH_BIT_EXT, - }; - - lvp_forall_stage(i) { - uint16_t const_buffer_count = 0; - uint16_t shader_buffer_count = 0; - uint16_t sampler_count = 0; - uint16_t sampler_view_count = 0; - uint16_t image_count = 0; - for (unsigned j = 0; j < layout->vk.set_count; j++) { - if (layout->vk.set_layouts[j] == NULL) - continue; - - const struct lvp_descriptor_set_layout *set_layout = - vk_to_lvp_descriptor_set_layout(layout->vk.set_layouts[j]); - - if (set_layout->shader_stages & array[i]) { - const_buffer_count += set_layout->stage[i].const_buffer_count; - shader_buffer_count += set_layout->stage[i].shader_buffer_count; - sampler_count += set_layout->stage[i].sampler_count; - sampler_view_count += set_layout->stage[i].sampler_view_count; - image_count += set_layout->stage[i].image_count; - } - } - assert(const_buffer_count <= device->physical_device->device_limits.maxPerStageDescriptorUniformBuffers); - assert(shader_buffer_count <= device->physical_device->device_limits.maxPerStageDescriptorStorageBuffers); - assert(sampler_count <= device->physical_device->device_limits.maxPerStageDescriptorSamplers); - assert(sampler_view_count <= device->physical_device->device_limits.maxPerStageDescriptorSampledImages); - assert(image_count <= device->physical_device->device_limits.maxPerStageDescriptorStorageImages); - } -#endif layout->push_constant_size = 0; for (unsigned i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) { @@ -320,40 +207,52 @@ lvp_descriptor_set_create(struct lvp_device *device, struct lvp_descriptor_set_layout *layout, struct lvp_descriptor_set **out_set) { - struct lvp_descriptor_set *set; - size_t base_size = sizeof(*set) + layout->size * sizeof(set->descriptors[0]); - size_t size = base_size; - lvp_forall_stage(i) - size += layout->stage[i].uniform_block_size; - set = vk_alloc(&device->vk.alloc /* XXX: Use the pool */, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + struct lvp_descriptor_set *set = vk_zalloc(&device->vk.alloc /* XXX: Use the pool */, + sizeof(struct lvp_descriptor_set), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!set) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - /* A descriptor set may not be 100% filled. Clear the set so we can can - * later detect holes in it. - */ - memset(set, 0, size); - vk_object_base_init(&device->vk, &set->base, VK_OBJECT_TYPE_DESCRIPTOR_SET); set->layout = layout; vk_descriptor_set_layout_ref(&layout->vk); - /* Go through and fill out immutable samplers if we have any */ - struct lvp_descriptor *desc = set->descriptors; - uint8_t *uniform_mem = (uint8_t*)(set) + base_size; - for (uint32_t b = 0; b < layout->binding_count; b++) { - if (layout->binding[b].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { - desc->info.uniform = uniform_mem; - uniform_mem += layout->binding[b].array_size; - desc++; - } else { - if (layout->binding[b].immutable_samplers) { - for (uint32_t i = 0; i < layout->binding[b].array_size; i++) - desc[i].info.sampler = layout->binding[b].immutable_samplers[i]; - } - desc += layout->binding[b].array_size; + uint64_t bo_size = layout->size * sizeof(union lp_descriptor); + + for (unsigned i = 0; i < layout->binding_count; i++) + bo_size += layout->binding[i].uniform_block_size; + + struct pipe_resource template = { + .bind = PIPE_BIND_CONSTANT_BUFFER, + .screen = device->pscreen, + .target = PIPE_BUFFER, + .format = PIPE_FORMAT_R8_UNORM, + .width0 = bo_size, + .height0 = 1, + .depth0 = 1, + .array_size = 1, + .flags = PIPE_RESOURCE_FLAG_DONT_OVER_ALLOCATE, + }; + + set->bo = device->pscreen->resource_create_unbacked(device->pscreen, &template, &bo_size); + set->pmem = device->pscreen->allocate_memory(device->pscreen, bo_size); + + set->map = device->pscreen->map_memory(device->pscreen, set->pmem); + memset(set->map, 0, bo_size); + + device->pscreen->resource_bind_backing(device->pscreen, set->bo, set->pmem, 0); + + for (uint32_t binding_index = 0; binding_index < layout->binding_count; binding_index++) { + const struct lvp_descriptor_set_binding_layout *bind_layout = &set->layout->binding[binding_index]; + if (!bind_layout->immutable_samplers) + continue; + + union lp_descriptor *desc = set->map; + desc += bind_layout->descriptor_index; + + for (uint32_t sampler_index = 0; sampler_index < bind_layout->array_size; sampler_index++) { + if (bind_layout->immutable_samplers[sampler_index]) + desc[sampler_index] = *bind_layout->immutable_samplers[sampler_index]; } } @@ -366,6 +265,10 @@ void lvp_descriptor_set_destroy(struct lvp_device *device, struct lvp_descriptor_set *set) { + pipe_resource_reference(&set->bo, NULL); + device->pscreen->unmap_memory(device->pscreen, set->pmem); + device->pscreen->free_memory(device->pscreen, set->pmem); + vk_descriptor_set_layout_unref(&device->vk, &set->layout->vk); vk_object_base_finish(&set->base); vk_free(&device->vk.alloc, set); @@ -426,22 +329,24 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSets( uint32_t descriptorCopyCount, const VkCopyDescriptorSet* pDescriptorCopies) { + LVP_FROM_HANDLE(lvp_device, device, _device); + for (uint32_t i = 0; i < descriptorWriteCount; i++) { const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; LVP_FROM_HANDLE(lvp_descriptor_set, set, write->dstSet); const struct lvp_descriptor_set_binding_layout *bind_layout = &set->layout->binding[write->dstBinding]; - struct lvp_descriptor *desc = - &set->descriptors[bind_layout->descriptor_index]; + if (write->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { const VkWriteDescriptorSetInlineUniformBlock *uniform_data = vk_find_struct_const(write->pNext, WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK); assert(uniform_data); - desc->type = VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK; - memcpy(desc->info.uniform + write->dstArrayElement, uniform_data->pData, uniform_data->dataSize); + memcpy((uint8_t *)set->map + bind_layout->uniform_block_offset + write->dstArrayElement, uniform_data->pData, uniform_data->dataSize); continue; } - desc += write->dstArrayElement; + + union lp_descriptor *desc = set->map; + desc += bind_layout->descriptor_index + write->dstArrayElement; switch (write->descriptorType) { case VK_DESCRIPTOR_TYPE_SAMPLER: @@ -449,10 +354,8 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSets( LVP_FROM_HANDLE(lvp_sampler, sampler, write->pImageInfo[j].sampler); - desc[j] = (struct lvp_descriptor) { - .type = VK_DESCRIPTOR_TYPE_SAMPLER, - .info.sampler = &sampler->state, - }; + desc[j].sampler = sampler->desc.sampler; + desc[j].sampler_index = sampler->desc.sampler_index; } break; @@ -460,20 +363,20 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSets( for (uint32_t j = 0; j < write->descriptorCount; j++) { LVP_FROM_HANDLE(lvp_image_view, iview, write->pImageInfo[j].imageView); - desc[j].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - desc[j].info.sampler_view = iview ? iview->sv : NULL; - /* - * All consecutive bindings updated via a single VkWriteDescriptorSet structure, except those - * with a descriptorCount of zero, must all either use immutable samplers or must all not - * use immutable samplers - */ - if (bind_layout->immutable_samplers) { - desc[j].info.sampler = bind_layout->immutable_samplers[j]; - } else { - LVP_FROM_HANDLE(lvp_sampler, sampler, - write->pImageInfo[j].sampler); + if (iview) { + lp_jit_texture_from_pipe(&desc[j].texture, iview->sv); + desc[j].sample_functions = iview->texture_handle->functions; - desc[j].info.sampler = &sampler->state; + if (!bind_layout->immutable_samplers) { + LVP_FROM_HANDLE(lvp_sampler, sampler, + write->pImageInfo[j].sampler); + + desc[j].sampler = sampler->desc.sampler; + desc[j].sampler_index = sampler->desc.sampler_index; + } + } else { + desc[j].sample_functions = device->null_texture_handle->functions; + desc[j].sampler_index = 0; } } break; @@ -483,10 +386,13 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSets( LVP_FROM_HANDLE(lvp_image_view, iview, write->pImageInfo[j].imageView); - desc[j] = (struct lvp_descriptor) { - .type = write->descriptorType, - .info.sampler_view = iview ? iview->sv : NULL, - }; + if (iview) { + lp_jit_texture_from_pipe(&desc[j].texture, iview->sv); + desc[j].sample_functions = iview->texture_handle->functions; + } else { + desc[j].sample_functions = device->null_texture_handle->functions; + desc[j].sampler_index = 0; + } } break; case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: @@ -495,10 +401,12 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSets( LVP_FROM_HANDLE(lvp_image_view, iview, write->pImageInfo[j].imageView); - desc[j] = (struct lvp_descriptor) { - .type = write->descriptorType, - .info.image_view = iview ? iview->iv : ((struct pipe_image_view){0}), - }; + if (iview) { + lp_jit_image_from_pipe(&desc[j].image, &iview->iv); + desc[j].image_functions = iview->image_handle->functions; + } else { + desc[j].image_functions = device->null_image_handle->functions; + } } break; @@ -507,10 +415,13 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSets( LVP_FROM_HANDLE(lvp_buffer_view, bview, write->pTexelBufferView[j]); - desc[j] = (struct lvp_descriptor) { - .type = write->descriptorType, - .info.sampler_view = bview ? bview->sv : NULL, - }; + if (bview) { + lp_jit_texture_from_pipe(&desc[j].texture, bview->sv); + desc[j].sample_functions = bview->texture_handle->functions; + } else { + desc[j].sample_functions = device->null_texture_handle->functions; + desc[j].sampler_index = 0; + } } break; @@ -519,10 +430,12 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSets( LVP_FROM_HANDLE(lvp_buffer_view, bview, write->pTexelBufferView[j]); - desc[j] = (struct lvp_descriptor) { - .type = write->descriptorType, - .info.image_view = bview ? bview->iv : ((struct pipe_image_view){0}), - }; + if (bview) { + lp_jit_image_from_pipe(&desc[j].image, &bview->iv); + desc[j].image_functions = bview->image_handle->functions; + } else { + desc[j].image_functions = device->null_image_handle->functions; + } } break; @@ -530,14 +443,21 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSets( case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: for (uint32_t j = 0; j < write->descriptorCount; j++) { LVP_FROM_HANDLE(lvp_buffer, buffer, write->pBufferInfo[j].buffer); - desc[j] = (struct lvp_descriptor) { - .type = write->descriptorType, - .info.ubo.buffer_offset = buffer ? write->pBufferInfo[j].offset : 0, - .info.ubo.buffer = buffer ? buffer->bo : NULL, - .info.ubo.buffer_size = buffer ? write->pBufferInfo[j].range : 0, - }; - if (buffer && write->pBufferInfo[j].range == VK_WHOLE_SIZE) - desc[j].info.ubo.buffer_size = buffer->bo->width0 - desc[j].info.ubo.buffer_offset; + + if (buffer) { + struct pipe_constant_buffer ubo = { + .buffer = buffer->bo, + .buffer_offset = write->pBufferInfo[j].offset, + .buffer_size = write->pBufferInfo[j].range, + }; + + if (write->pBufferInfo[j].range == VK_WHOLE_SIZE) + ubo.buffer_size = buffer->bo->width0 - ubo.buffer_offset; + + lp_jit_buffer_from_pipe_const(&desc[j].buffer, &ubo, device->pscreen); + } else { + lp_jit_buffer_from_pipe_const(&desc[j].buffer, &((struct pipe_constant_buffer){0}), device->pscreen); + } } break; @@ -545,14 +465,21 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSets( case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: for (uint32_t j = 0; j < write->descriptorCount; j++) { LVP_FROM_HANDLE(lvp_buffer, buffer, write->pBufferInfo[j].buffer); - desc[j] = (struct lvp_descriptor) { - .type = write->descriptorType, - .info.ssbo.buffer_offset = buffer ? write->pBufferInfo[j].offset : 0, - .info.ssbo.buffer = buffer ? buffer->bo : NULL, - .info.ssbo.buffer_size = buffer ? write->pBufferInfo[j].range : 0, - }; - if (buffer && write->pBufferInfo[j].range == VK_WHOLE_SIZE) - desc[j].info.ssbo.buffer_size = buffer->bo->width0 - desc[j].info.ssbo.buffer_offset; + + if (buffer) { + struct pipe_shader_buffer ubo = { + .buffer = buffer->bo, + .buffer_offset = write->pBufferInfo[j].offset, + .buffer_size = write->pBufferInfo[j].range, + }; + + if (write->pBufferInfo[j].range == VK_WHOLE_SIZE) + ubo.buffer_size = buffer->bo->width0 - ubo.buffer_offset; + + lp_jit_buffer_from_pipe(&desc[j].buffer, &ubo); + } else { + lp_jit_buffer_from_pipe(&desc[j].buffer, &((struct pipe_shader_buffer){0})); + } } break; @@ -568,18 +495,17 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSets( const struct lvp_descriptor_set_binding_layout *src_layout = &src->layout->binding[copy->srcBinding]; - struct lvp_descriptor *src_desc = - &src->descriptors[src_layout->descriptor_index]; + union lp_descriptor *src_desc = src->map; + src_desc += src_layout->descriptor_index; const struct lvp_descriptor_set_binding_layout *dst_layout = &dst->layout->binding[copy->dstBinding]; - struct lvp_descriptor *dst_desc = - &dst->descriptors[dst_layout->descriptor_index]; + union lp_descriptor *dst_desc = dst->map; + dst_desc += dst_layout->descriptor_index; - if (src_desc->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { - dst_desc->type = VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK; - memcpy(dst_desc->info.uniform + copy->dstArrayElement, - src_desc->info.uniform + copy->srcArrayElement, + if (src_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { + memcpy((uint8_t *)dst->map + dst_layout->uniform_block_offset + copy->dstArrayElement, + (uint8_t *)src->map + src_layout->uniform_block_offset + copy->srcArrayElement, copy->descriptorCount); } else { src_desc += copy->srcArrayElement; @@ -618,9 +544,8 @@ static void lvp_reset_descriptor_pool(struct lvp_device *device, { struct lvp_descriptor_set *set, *tmp; LIST_FOR_EACH_ENTRY_SAFE(set, tmp, &pool->sets, link) { - vk_descriptor_set_layout_unref(&device->vk, &set->layout->vk); list_del(&set->link); - vk_free(&device->vk.alloc, set); + lvp_descriptor_set_destroy(device, set); } } @@ -717,84 +642,137 @@ VKAPI_ATTR void VKAPI_CALL lvp_DestroyDescriptorUpdateTemplate(VkDevice _device, lvp_descriptor_template_templ_unref(device, templ); } -VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSetWithTemplate(VkDevice _device, - VkDescriptorSet descriptorSet, - VkDescriptorUpdateTemplate descriptorUpdateTemplate, - const void *pData) +uint32_t +lvp_descriptor_update_template_entry_size(VkDescriptorType type) { + switch (type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + return sizeof(VkDescriptorImageInfo); + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + return sizeof(VkBufferView); + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + default: + return sizeof(VkDescriptorBufferInfo); + } +} + +void +lvp_descriptor_set_update_with_template(VkDevice _device, VkDescriptorSet descriptorSet, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, + const void *pData, bool push) +{ + LVP_FROM_HANDLE(lvp_device, device, _device); LVP_FROM_HANDLE(lvp_descriptor_set, set, descriptorSet); LVP_FROM_HANDLE(lvp_descriptor_update_template, templ, descriptorUpdateTemplate); uint32_t i, j; + const uint8_t *pSrc = pData; + for (i = 0; i < templ->entry_count; ++i) { VkDescriptorUpdateTemplateEntry *entry = &templ->entry[i]; - const uint8_t *pSrc = ((const uint8_t *) pData) + entry->offset; + + if (!push) + pSrc = ((const uint8_t *) pData) + entry->offset; + const struct lvp_descriptor_set_binding_layout *bind_layout = &set->layout->binding[entry->dstBinding]; - struct lvp_descriptor *desc = - &set->descriptors[bind_layout->descriptor_index]; + if (entry->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { - desc->type = VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK; - memcpy(desc->info.uniform + entry->dstArrayElement, pSrc, entry->descriptorCount); + memcpy((uint8_t *)set->map + bind_layout->uniform_block_offset + entry->dstArrayElement, pSrc, entry->descriptorCount); continue; } + + union lp_descriptor *desc = set->map; + desc += bind_layout->descriptor_index; + for (j = 0; j < entry->descriptorCount; ++j) { unsigned idx = j + entry->dstArrayElement; switch (entry->descriptorType) { case VK_DESCRIPTOR_TYPE_SAMPLER: { LVP_FROM_HANDLE(lvp_sampler, sampler, *(VkSampler *)pSrc); - desc[idx] = (struct lvp_descriptor) { - .type = VK_DESCRIPTOR_TYPE_SAMPLER, - .info.sampler = &sampler->state, - }; + + desc[idx].sampler = sampler->desc.sampler; + desc[idx].sampler_index = sampler->desc.sampler_index; break; } case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: { VkDescriptorImageInfo *info = (VkDescriptorImageInfo *)pSrc; LVP_FROM_HANDLE(lvp_image_view, iview, info->imageView); - desc[idx] = (struct lvp_descriptor) { - .type = entry->descriptorType, - .info.sampler_view = iview ? iview->sv : NULL, - .info.sampler = info->sampler ? &lvp_sampler_from_handle(info->sampler)->state : NULL, - }; + + if (iview) { + lp_jit_texture_from_pipe(&desc[idx].texture, iview->sv); + desc[idx].sample_functions = iview->texture_handle->functions; + + if (!bind_layout->immutable_samplers) { + LVP_FROM_HANDLE(lvp_sampler, sampler, info->sampler); + + desc[idx].sampler = sampler->desc.sampler; + desc[idx].sampler_index = sampler->desc.sampler_index; + } + } else { + desc[j].sample_functions = device->null_texture_handle->functions; + desc[j].sampler_index = 0; + } break; } case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: { VkDescriptorImageInfo *info = (VkDescriptorImageInfo *)pSrc; LVP_FROM_HANDLE(lvp_image_view, iview, info->imageView); - desc[idx] = (struct lvp_descriptor) { - .type = entry->descriptorType, - .info.sampler_view = iview ? iview->sv : NULL, - }; + + if (iview) { + lp_jit_texture_from_pipe(&desc[idx].texture, iview->sv); + desc[idx].sample_functions = iview->texture_handle->functions; + } else { + desc[j].sample_functions = device->null_texture_handle->functions; + desc[j].sampler_index = 0; + } break; } case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: { LVP_FROM_HANDLE(lvp_image_view, iview, ((VkDescriptorImageInfo *)pSrc)->imageView); - desc[idx] = (struct lvp_descriptor) { - .type = entry->descriptorType, - .info.image_view = iview ? iview->iv : ((struct pipe_image_view){0}), - }; + + if (iview) { + lp_jit_image_from_pipe(&desc[idx].image, &iview->iv); + desc[idx].image_functions = iview->image_handle->functions; + } else { + desc[idx].image_functions = device->null_image_handle->functions; + } break; } case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: { LVP_FROM_HANDLE(lvp_buffer_view, bview, *(VkBufferView *)pSrc); - desc[idx] = (struct lvp_descriptor) { - .type = entry->descriptorType, - .info.sampler_view = bview ? bview->sv : NULL, - }; + + if (bview) { + lp_jit_texture_from_pipe(&desc[idx].texture, bview->sv); + desc[idx].sample_functions = bview->texture_handle->functions; + } else { + desc[j].sample_functions = device->null_texture_handle->functions; + desc[j].sampler_index = 0; + } break; } case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: { LVP_FROM_HANDLE(lvp_buffer_view, bview, *(VkBufferView *)pSrc); - desc[idx] = (struct lvp_descriptor) { - .type = entry->descriptorType, - .info.image_view = bview ? bview->iv : ((struct pipe_image_view){0}), - }; + + if (bview) { + lp_jit_image_from_pipe(&desc[idx].image, &bview->iv); + desc[idx].image_functions = bview->image_handle->functions; + } else { + desc[idx].image_functions = device->null_image_handle->functions; + } break; } @@ -802,14 +780,21 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSetWithTemplate(VkDevice _device, case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: { VkDescriptorBufferInfo *info = (VkDescriptorBufferInfo *)pSrc; LVP_FROM_HANDLE(lvp_buffer, buffer, info->buffer); - desc[idx] = (struct lvp_descriptor) { - .type = entry->descriptorType, - .info.ubo.buffer_offset = buffer ? info->offset : 0, - .info.ubo.buffer = buffer ? buffer->bo : NULL, - .info.ubo.buffer_size = buffer ? info->range : 0, - }; - if (buffer && info->range == VK_WHOLE_SIZE) - desc[idx].info.ubo.buffer_size = buffer->bo->width0 - desc[idx].info.ubo.buffer_offset; + + if (buffer) { + struct pipe_constant_buffer ubo = { + .buffer = buffer->bo, + .buffer_offset = info->offset, + .buffer_size = info->range, + }; + + if (info->range == VK_WHOLE_SIZE) + ubo.buffer_size = buffer->bo->width0 - ubo.buffer_offset; + + lp_jit_buffer_from_pipe_const(&desc[idx].buffer, &ubo, device->pscreen); + } else { + lp_jit_buffer_from_pipe_const(&desc[idx].buffer, &((struct pipe_constant_buffer){0}), device->pscreen); + } break; } @@ -817,20 +802,39 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSetWithTemplate(VkDevice _device, case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { VkDescriptorBufferInfo *info = (VkDescriptorBufferInfo *)pSrc; LVP_FROM_HANDLE(lvp_buffer, buffer, info->buffer); - desc[idx] = (struct lvp_descriptor) { - .type = entry->descriptorType, - .info.ssbo.buffer_offset = buffer ? info->offset : 0, - .info.ssbo.buffer = buffer ? buffer->bo : NULL, - .info.ssbo.buffer_size = buffer ? info->range : 0, - }; - if (buffer && info->range == VK_WHOLE_SIZE) - desc[idx].info.ssbo.buffer_size = buffer->bo->width0 - desc[idx].info.ssbo.buffer_offset; + + if (buffer) { + struct pipe_shader_buffer ubo = { + .buffer = buffer->bo, + .buffer_offset = info->offset, + .buffer_size = info->range, + }; + + if (info->range == VK_WHOLE_SIZE) + ubo.buffer_size = buffer->bo->width0 - ubo.buffer_offset; + + lp_jit_buffer_from_pipe(&desc[idx].buffer, &ubo); + } else { + lp_jit_buffer_from_pipe(&desc[idx].buffer, &((struct pipe_shader_buffer){0})); + } break; } default: break; } - pSrc += entry->stride; + + if (push) + pSrc += lvp_descriptor_update_template_entry_size(entry->descriptorType); + else + pSrc += entry->stride; } } } + +VKAPI_ATTR void VKAPI_CALL +lvp_UpdateDescriptorSetWithTemplate(VkDevice device, VkDescriptorSet descriptorSet, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, + const void *pData) +{ + lvp_descriptor_set_update_with_template(device, descriptorSet, descriptorUpdateTemplate, pData, false); +} diff --git a/src/gallium/frontends/lavapipe/lvp_device.c b/src/gallium/frontends/lavapipe/lvp_device.c index 37156ef6a8a..5d404915a0c 100644 --- a/src/gallium/frontends/lavapipe/lvp_device.c +++ b/src/gallium/frontends/lavapipe/lvp_device.c @@ -1560,6 +1560,11 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateDevice( uint32_t zero = 0; device->zero_buffer = pipe_buffer_create_with_data(device->queue.ctx, 0, PIPE_USAGE_IMMUTABLE, sizeof(uint32_t), &zero); + device->null_texture_handle = (void *)(uintptr_t)device->queue.ctx->create_texture_handle(device->queue.ctx, + &(struct pipe_sampler_view){ 0 }, NULL); + device->null_image_handle = (void *)(uintptr_t)device->queue.ctx->create_image_handle(device->queue.ctx, + &(struct pipe_image_view){ 0 }); + *pDevice = lvp_device_to_handle(device); return VK_SUCCESS; @@ -1572,6 +1577,9 @@ VKAPI_ATTR void VKAPI_CALL lvp_DestroyDevice( { LVP_FROM_HANDLE(lvp_device, device, _device); + device->queue.ctx->delete_texture_handle(device->queue.ctx, (uint64_t)(uintptr_t)device->null_texture_handle); + device->queue.ctx->delete_image_handle(device->queue.ctx, (uint64_t)(uintptr_t)device->null_image_handle); + device->queue.ctx->delete_fs_state(device->queue.ctx, device->noop_fs); if (device->queue.last_fence) @@ -2205,35 +2213,43 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateSampler( vk_object_base_init(&device->vk, &sampler->base, VK_OBJECT_TYPE_SAMPLER); + struct pipe_sampler_state state; VkClearColorValue border_color = vk_sampler_border_color_value(pCreateInfo, NULL); - STATIC_ASSERT(sizeof(sampler->state.border_color) == sizeof(border_color)); + STATIC_ASSERT(sizeof(state.border_color) == sizeof(border_color)); - sampler->state.wrap_s = vk_conv_wrap_mode(pCreateInfo->addressModeU); - sampler->state.wrap_t = vk_conv_wrap_mode(pCreateInfo->addressModeV); - sampler->state.wrap_r = vk_conv_wrap_mode(pCreateInfo->addressModeW); - sampler->state.min_img_filter = pCreateInfo->minFilter == VK_FILTER_LINEAR ? PIPE_TEX_FILTER_LINEAR : PIPE_TEX_FILTER_NEAREST; - sampler->state.min_mip_filter = pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR ? PIPE_TEX_MIPFILTER_LINEAR : PIPE_TEX_MIPFILTER_NEAREST; - sampler->state.mag_img_filter = pCreateInfo->magFilter == VK_FILTER_LINEAR ? PIPE_TEX_FILTER_LINEAR : PIPE_TEX_FILTER_NEAREST; - sampler->state.min_lod = pCreateInfo->minLod; - sampler->state.max_lod = pCreateInfo->maxLod; - sampler->state.lod_bias = pCreateInfo->mipLodBias; + state.wrap_s = vk_conv_wrap_mode(pCreateInfo->addressModeU); + state.wrap_t = vk_conv_wrap_mode(pCreateInfo->addressModeV); + state.wrap_r = vk_conv_wrap_mode(pCreateInfo->addressModeW); + state.min_img_filter = pCreateInfo->minFilter == VK_FILTER_LINEAR ? PIPE_TEX_FILTER_LINEAR : PIPE_TEX_FILTER_NEAREST; + state.min_mip_filter = pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR ? PIPE_TEX_MIPFILTER_LINEAR : PIPE_TEX_MIPFILTER_NEAREST; + state.mag_img_filter = pCreateInfo->magFilter == VK_FILTER_LINEAR ? PIPE_TEX_FILTER_LINEAR : PIPE_TEX_FILTER_NEAREST; + state.min_lod = pCreateInfo->minLod; + state.max_lod = pCreateInfo->maxLod; + state.lod_bias = pCreateInfo->mipLodBias; if (pCreateInfo->anisotropyEnable) - sampler->state.max_anisotropy = pCreateInfo->maxAnisotropy; + state.max_anisotropy = pCreateInfo->maxAnisotropy; else - sampler->state.max_anisotropy = 1; - sampler->state.unnormalized_coords = pCreateInfo->unnormalizedCoordinates; - sampler->state.compare_mode = pCreateInfo->compareEnable ? PIPE_TEX_COMPARE_R_TO_TEXTURE : PIPE_TEX_COMPARE_NONE; - sampler->state.compare_func = pCreateInfo->compareOp; - sampler->state.seamless_cube_map = !(pCreateInfo->flags & VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT); + state.max_anisotropy = 1; + state.unnormalized_coords = pCreateInfo->unnormalizedCoordinates; + state.compare_mode = pCreateInfo->compareEnable ? PIPE_TEX_COMPARE_R_TO_TEXTURE : PIPE_TEX_COMPARE_NONE; + state.compare_func = pCreateInfo->compareOp; + state.seamless_cube_map = !(pCreateInfo->flags & VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT); STATIC_ASSERT((unsigned)VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE == (unsigned)PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE); STATIC_ASSERT((unsigned)VK_SAMPLER_REDUCTION_MODE_MIN == (unsigned)PIPE_TEX_REDUCTION_MIN); STATIC_ASSERT((unsigned)VK_SAMPLER_REDUCTION_MODE_MAX == (unsigned)PIPE_TEX_REDUCTION_MAX); if (reduction_mode_create_info) - sampler->state.reduction_mode = (enum pipe_tex_reduction_mode)reduction_mode_create_info->reductionMode; + state.reduction_mode = (enum pipe_tex_reduction_mode)reduction_mode_create_info->reductionMode; else - sampler->state.reduction_mode = PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE; - memcpy(&sampler->state.border_color, &border_color, sizeof(border_color)); + state.reduction_mode = PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE; + memcpy(&state.border_color, &border_color, sizeof(border_color)); + + simple_mtx_lock(&device->queue.lock); + sampler->texture_handle = (void *)(uintptr_t)device->queue.ctx->create_texture_handle(device->queue.ctx, NULL, &state); + simple_mtx_unlock(&device->queue.lock); + + lp_jit_sampler_from_pipe(&sampler->desc.sampler, &state); + sampler->desc.sampler_index = sampler->texture_handle->sampler_index; *pSampler = lvp_sampler_to_handle(sampler); @@ -2250,6 +2266,11 @@ VKAPI_ATTR void VKAPI_CALL lvp_DestroySampler( if (!_sampler) return; + + simple_mtx_lock(&device->queue.lock); + device->queue.ctx->delete_texture_handle(device->queue.ctx, (uint64_t)(uintptr_t)sampler->texture_handle); + simple_mtx_unlock(&device->queue.lock); + vk_object_base_finish(&sampler->base); vk_free2(&device->vk.alloc, pAllocator, sampler); } diff --git a/src/gallium/frontends/lavapipe/lvp_execute.c b/src/gallium/frontends/lavapipe/lvp_execute.c index 070dea419c1..930c0336dca 100644 --- a/src/gallium/frontends/lavapipe/lvp_execute.c +++ b/src/gallium/frontends/lavapipe/lvp_execute.c @@ -128,28 +128,13 @@ struct rendering_state { unsigned index_offset; struct pipe_resource *index_buffer; struct pipe_constant_buffer const_buffer[LVP_SHADER_STAGES][16]; + struct lvp_descriptor_set *desc_sets[2][MAX_SETS]; int num_const_bufs[LVP_SHADER_STAGES]; int num_vb; unsigned start_vb; struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; struct cso_velems_state velem; - struct lvp_access_info access[LVP_SHADER_STAGES]; - struct pipe_sampler_view *sv[LVP_SHADER_STAGES][PIPE_MAX_SHADER_SAMPLER_VIEWS]; - int num_sampler_views[LVP_SHADER_STAGES]; - struct pipe_sampler_state ss[LVP_SHADER_STAGES][PIPE_MAX_SAMPLERS]; - /* cso_context api is stupid */ - const struct pipe_sampler_state *cso_ss_ptr[LVP_SHADER_STAGES][PIPE_MAX_SAMPLERS]; - int num_sampler_states[LVP_SHADER_STAGES]; - bool sv_dirty[LVP_SHADER_STAGES]; - bool ss_dirty[LVP_SHADER_STAGES]; - - struct pipe_image_view iv[LVP_SHADER_STAGES][PIPE_MAX_SHADER_IMAGES]; - int num_shader_images[LVP_SHADER_STAGES]; - struct pipe_shader_buffer sb[LVP_SHADER_STAGES][PIPE_MAX_SHADER_BUFFERS]; - int num_shader_buffers[LVP_SHADER_STAGES]; - bool iv_dirty[LVP_SHADER_STAGES]; - bool sb_dirty[LVP_SHADER_STAGES]; bool disable_multisample; enum gs_output gs_output_lines : 2; @@ -161,11 +146,6 @@ struct rendering_state { uint8_t push_constants[128 * 4]; uint16_t push_size[2]; //gfx, compute uint16_t gfx_push_sizes[LVP_SHADER_STAGES]; - struct { - void *block[MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS * MAX_SETS]; - uint16_t size[MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS * MAX_SETS]; - uint16_t count; - } uniform_blocks[LVP_SHADER_STAGES]; VkRect2D render_area; bool suspending; @@ -196,6 +176,8 @@ struct rendering_state { bool tess_ccw; void *tess_states[2]; + + struct util_dynarray push_desc_sets; }; static struct pipe_resource * @@ -261,34 +243,18 @@ get_pcbuf_size(struct rendering_state *state, enum pipe_shader_type pstage) return state->has_pcbuf[pstage] ? state->push_size[is_compute] : 0; } -static unsigned -calc_ubo0_size(struct rendering_state *state, enum pipe_shader_type pstage) -{ - unsigned size = get_pcbuf_size(state, pstage); - for (unsigned i = 0; i < state->uniform_blocks[pstage].count; i++) - size += state->uniform_blocks[pstage].size[i]; - return size; -} - static void fill_ubo0(struct rendering_state *state, uint8_t *mem, enum pipe_shader_type pstage) { unsigned push_size = get_pcbuf_size(state, pstage); if (push_size) memcpy(mem, state->push_constants, push_size); - - mem += push_size; - for (unsigned i = 0; i < state->uniform_blocks[pstage].count; i++) { - unsigned size = state->uniform_blocks[pstage].size[i]; - memcpy(mem, state->uniform_blocks[pstage].block[i], size); - mem += size; - } } static void update_pcbuf(struct rendering_state *state, enum pipe_shader_type pstage) { - unsigned size = calc_ubo0_size(state, pstage); + unsigned size = get_pcbuf_size(state, pstage); if (size) { uint8_t *mem; struct pipe_constant_buffer cbuf; @@ -303,7 +269,7 @@ update_pcbuf(struct rendering_state *state, enum pipe_shader_type pstage) } static void -update_inline_shader_state(struct rendering_state *state, enum pipe_shader_type sh, bool pcbuf_dirty, bool constbuf_dirty) +update_inline_shader_state(struct rendering_state *state, enum pipe_shader_type sh, bool pcbuf_dirty) { unsigned stage = tgsi_processor_to_shader_stage(sh); state->inlines_dirty[sh] = false; @@ -325,43 +291,11 @@ update_inline_shader_state(struct rendering_state *state, enum pipe_shader_type unsigned offset = shader->inlines.uniform_offsets[0][i]; if (offset < push_size) { memcpy(&v.vals[0][i], &state->push_constants[offset], sizeof(uint32_t)); - } else { - for (unsigned i = 0; i < state->uniform_blocks[sh].count; i++) { - if (offset < push_size + state->uniform_blocks[sh].size[i]) { - unsigned ubo_offset = offset - push_size; - uint8_t *block = state->uniform_blocks[sh].block[i]; - memcpy(&v.vals[0][i], &block[ubo_offset], sizeof(uint32_t)); - break; - } - push_size += state->uniform_blocks[sh].size[i]; - } } } for (unsigned i = count; i < MAX_INLINABLE_UNIFORMS; i++) v.vals[0][i] = 0; } - if (constbuf_dirty) { - struct pipe_box box = {0}; - u_foreach_bit(slot, shader->inlines.can_inline) { - /* this is already inlined above */ - if (slot == 0) - continue; - unsigned count = shader->inlines.count[slot]; - struct pipe_constant_buffer *cbuf = &state->const_buffer[sh][slot - 1]; - struct pipe_resource *pres = cbuf->buffer; - box.x = cbuf->buffer_offset; - box.width = cbuf->buffer_size - cbuf->buffer_offset; - struct pipe_transfer *xfer; - uint8_t *map = state->pctx->buffer_map(state->pctx, pres, 0, PIPE_MAP_READ, &box, &xfer); - for (unsigned i = 0; i < count; i++) { - unsigned offset = shader->inlines.uniform_offsets[slot][i]; - memcpy(&v.vals[slot][i], map + offset, sizeof(uint32_t)); - } - state->pctx->buffer_unmap(state->pctx, xfer); - for (unsigned i = count; i < MAX_INLINABLE_UNIFORMS; i++) - v.vals[slot][i] = 0; - } - } bool found = false; struct set_entry *entry = _mesa_set_search_or_add_pre_hashed(&shader->inlines.variants, v.mask, &v, &found); void *shader_state; @@ -371,10 +305,6 @@ update_inline_shader_state(struct rendering_state *state, enum pipe_shader_type } else { nir_shader *nir = nir_shader_clone(NULL, base_nir); NIR_PASS_V(nir, lvp_inline_uniforms, shader, v.vals[0], 0); - if (constbuf_dirty) { - u_foreach_bit(slot, shader->inlines.can_inline) - NIR_PASS_V(nir, lvp_inline_uniforms, shader, v.vals[slot], slot); - } lvp_shader_optimize(nir); impl = nir_shader_get_entrypoint(nir); if (ssa_alloc - impl->ssa_alloc < ssa_alloc / 2 && @@ -424,18 +354,10 @@ update_inline_shader_state(struct rendering_state *state, enum pipe_shader_type static void emit_compute_state(struct rendering_state *state) { - if (state->iv_dirty[MESA_SHADER_COMPUTE]) { - state->pctx->set_shader_images(state->pctx, MESA_SHADER_COMPUTE, - 0, state->num_shader_images[MESA_SHADER_COMPUTE], - 0, state->iv[MESA_SHADER_COMPUTE]); - state->iv_dirty[MESA_SHADER_COMPUTE] = false; - } - bool pcbuf_dirty = state->pcbuf_dirty[MESA_SHADER_COMPUTE]; if (state->pcbuf_dirty[MESA_SHADER_COMPUTE]) update_pcbuf(state, MESA_SHADER_COMPUTE); - bool constbuf_dirty = state->constbuf_dirty[MESA_SHADER_COMPUTE]; if (state->constbuf_dirty[MESA_SHADER_COMPUTE]) { for (unsigned i = 0; i < state->num_const_bufs[MESA_SHADER_COMPUTE]; i++) state->pctx->set_constant_buffer(state->pctx, MESA_SHADER_COMPUTE, @@ -444,25 +366,7 @@ static void emit_compute_state(struct rendering_state *state) } if (state->inlines_dirty[MESA_SHADER_COMPUTE]) - update_inline_shader_state(state, MESA_SHADER_COMPUTE, pcbuf_dirty, constbuf_dirty); - - if (state->sb_dirty[MESA_SHADER_COMPUTE]) { - state->pctx->set_shader_buffers(state->pctx, MESA_SHADER_COMPUTE, - 0, state->num_shader_buffers[MESA_SHADER_COMPUTE], - state->sb[MESA_SHADER_COMPUTE], state->access[MESA_SHADER_COMPUTE].buffers_written); - state->sb_dirty[MESA_SHADER_COMPUTE] = false; - } - - if (state->sv_dirty[MESA_SHADER_COMPUTE]) { - state->pctx->set_sampler_views(state->pctx, MESA_SHADER_COMPUTE, 0, state->num_sampler_views[MESA_SHADER_COMPUTE], - 0, false, state->sv[MESA_SHADER_COMPUTE]); - state->sv_dirty[MESA_SHADER_COMPUTE] = false; - } - - if (state->ss_dirty[MESA_SHADER_COMPUTE]) { - cso_set_samplers(state->cso, MESA_SHADER_COMPUTE, state->num_sampler_states[MESA_SHADER_COMPUTE], state->cso_ss_ptr[MESA_SHADER_COMPUTE]); - state->ss_dirty[MESA_SHADER_COMPUTE] = false; - } + update_inline_shader_state(state, MESA_SHADER_COMPUTE, pcbuf_dirty); } static void @@ -573,11 +477,9 @@ static void emit_state(struct rendering_state *state) state->ve_dirty = false; } - bool constbuf_dirty[LVP_SHADER_STAGES] = {false}; bool pcbuf_dirty[LVP_SHADER_STAGES] = {false}; lvp_forall_gfx_stage(sh) { - constbuf_dirty[sh] = state->constbuf_dirty[sh]; if (state->constbuf_dirty[sh]) { for (unsigned idx = 0; idx < state->num_const_bufs[sh]; idx++) state->pctx->set_constant_buffer(state->pctx, sh, @@ -594,38 +496,7 @@ static void emit_state(struct rendering_state *state) lvp_forall_gfx_stage(sh) { if (state->inlines_dirty[sh]) - update_inline_shader_state(state, sh, pcbuf_dirty[sh], constbuf_dirty[sh]); - } - - lvp_forall_gfx_stage(sh) { - if (state->sb_dirty[sh]) { - state->pctx->set_shader_buffers(state->pctx, sh, - 0, state->num_shader_buffers[sh], - state->sb[sh], state->access[tgsi_processor_to_shader_stage(sh)].buffers_written); - } - } - - lvp_forall_gfx_stage(sh) { - if (state->iv_dirty[sh]) { - state->pctx->set_shader_images(state->pctx, sh, - 0, state->num_shader_images[sh], 0, - state->iv[sh]); - } - } - - lvp_forall_gfx_stage(sh) { - if (state->sv_dirty[sh]) { - state->pctx->set_sampler_views(state->pctx, sh, 0, state->num_sampler_views[sh], - 0, false, state->sv[sh]); - state->sv_dirty[sh] = false; - } - } - - lvp_forall_gfx_stage(sh) { - if (state->ss_dirty[sh]) { - cso_set_samplers(state->cso, sh, state->num_sampler_states[sh], state->cso_ss_ptr[sh]); - state->ss_dirty[sh] = false; - } + update_inline_shader_state(state, sh, pcbuf_dirty[sh]); } if (state->vp_dirty) { @@ -646,18 +517,9 @@ handle_compute_shader(struct rendering_state *state, struct lvp_shader *shader, if ((layout->push_constant_stages & VK_SHADER_STAGE_COMPUTE_BIT) > 0) state->has_pcbuf[MESA_SHADER_COMPUTE] = layout->push_constant_size > 0; - state->uniform_blocks[MESA_SHADER_COMPUTE].count = layout->stage[MESA_SHADER_COMPUTE].uniform_block_count; - for (unsigned j = 0; j < layout->stage[MESA_SHADER_COMPUTE].uniform_block_count; j++) - state->uniform_blocks[MESA_SHADER_COMPUTE].size[j] = layout->stage[MESA_SHADER_COMPUTE].uniform_block_sizes[j]; - if (!state->has_pcbuf[MESA_SHADER_COMPUTE] && !layout->stage[MESA_SHADER_COMPUTE].uniform_block_count) - state->pcbuf_dirty[MESA_SHADER_COMPUTE] = false; - state->iv_dirty[MESA_SHADER_COMPUTE] |= state->num_shader_images[MESA_SHADER_COMPUTE] && - (state->access[MESA_SHADER_COMPUTE].images_read != shader->access.images_read || - state->access[MESA_SHADER_COMPUTE].images_written != shader->access.images_written); - state->sb_dirty[MESA_SHADER_COMPUTE] |= state->num_shader_buffers[MESA_SHADER_COMPUTE] && - state->access[MESA_SHADER_COMPUTE].buffers_written != shader->access.buffers_written; - memcpy(&state->access[MESA_SHADER_COMPUTE], &shader->access, sizeof(struct lvp_access_info)); + if (!state->has_pcbuf[MESA_SHADER_COMPUTE]) + state->pcbuf_dirty[MESA_SHADER_COMPUTE] = false; state->dispatch_info.block[0] = shader->pipeline_nir->nir->info.workgroup_size[0]; state->dispatch_info.block[1] = shader->pipeline_nir->nir->info.workgroup_size[1]; @@ -724,11 +586,6 @@ handle_graphics_stages(struct rendering_state *state, VkShaderStageFlagBits shad VkShaderStageFlagBits vk_stage = (1 << b); gl_shader_stage stage = vk_to_mesa_shader_stage(vk_stage); - state->iv_dirty[stage] |= state->num_shader_images[stage] && - (state->access[stage].images_read != state->shaders[stage]->access.images_read || - state->access[stage].images_written != state->shaders[stage]->access.images_written); - state->sb_dirty[stage] |= state->num_shader_buffers[stage] && state->access[stage].buffers_written != state->shaders[stage]->access.buffers_written; - memcpy(&state->access[stage], &state->shaders[stage]->access, sizeof(struct lvp_access_info)); state->has_pcbuf[stage] = false; switch (vk_stage) { @@ -801,9 +658,6 @@ unbind_graphics_stages(struct rendering_state *state, VkShaderStageFlagBits shad { u_foreach_bit(vkstage, shader_stages) { gl_shader_stage stage = vk_to_mesa_shader_stage(1<iv_dirty[stage] |= state->num_shader_images[stage] > 0; - state->sb_dirty[stage] |= state->num_shader_buffers[stage] > 0; - memset(&state->access[stage], 0, sizeof(state->access[stage])); state->has_pcbuf[stage] = false; switch (stage) { case MESA_SHADER_FRAGMENT: @@ -845,12 +699,9 @@ unbind_graphics_stages(struct rendering_state *state, VkShaderStageFlagBits shad static void handle_graphics_layout(struct rendering_state *state, gl_shader_stage stage, struct lvp_pipeline_layout *layout) { - state->uniform_blocks[stage].count = layout->stage[stage].uniform_block_count; - for (unsigned j = 0; j < layout->stage[stage].uniform_block_count; j++) - state->uniform_blocks[stage].size[j] = layout->stage[stage].uniform_block_sizes[j]; if (layout->push_constant_stages & BITFIELD_BIT(stage)) { state->has_pcbuf[stage] = layout->push_constant_size > 0; - if (!state->has_pcbuf[stage] && !state->uniform_blocks[stage].count) + if (!state->has_pcbuf[stage]) state->pcbuf_dirty[stage] = false; } } @@ -1171,24 +1022,6 @@ static void handle_graphics_pipeline(struct lvp_pipeline *pipeline, } } -static void -handle_pipeline_access(struct rendering_state *state, gl_shader_stage stage) -{ - enum pipe_shader_type pstage = pipe_shader_type_from_mesa(stage); - for (unsigned i = 0; i < PIPE_MAX_SHADER_IMAGES; i++) { - state->iv[pstage][i].access = 0; - state->iv[pstage][i].shader_access = 0; - } - u_foreach_bit64(idx, state->access[stage].images_read) { - state->iv[pstage][idx].access |= PIPE_IMAGE_ACCESS_READ; - state->iv[pstage][idx].shader_access |= PIPE_IMAGE_ACCESS_READ; - } - u_foreach_bit64(idx, state->access[stage].images_written) { - state->iv[pstage][idx].access |= PIPE_IMAGE_ACCESS_WRITE; - state->iv[pstage][idx].shader_access |= PIPE_IMAGE_ACCESS_WRITE; - } -} - static void handle_pipeline(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { @@ -1196,12 +1029,8 @@ static void handle_pipeline(struct vk_cmd_queue_entry *cmd, pipeline->used = true; if (pipeline->is_compute_pipeline) { handle_compute_pipeline(cmd, state); - handle_pipeline_access(state, MESA_SHADER_COMPUTE); } else { handle_graphics_pipeline(pipeline, state); - lvp_forall_gfx_stage(sh) { - handle_pipeline_access(state, sh); - } } state->push_size[pipeline->is_compute_pipeline] = pipeline->layout->push_constant_size; } @@ -1214,8 +1043,6 @@ handle_graphics_pipeline_group(struct vk_cmd_queue_entry *cmd, struct rendering_ if (cmd->u.bind_pipeline_shader_group_nv.group_index) pipeline = lvp_pipeline_from_handle(pipeline->groups[cmd->u.bind_pipeline_shader_group_nv.group_index - 1]); handle_graphics_pipeline(pipeline, state); - lvp_forall_gfx_stage(sh) - handle_pipeline_access(state, sh); state->push_size[pipeline->is_compute_pipeline] = pipeline->layout->push_constant_size; } @@ -1242,290 +1069,108 @@ static void handle_vertex_buffers2(struct vk_cmd_queue_entry *cmd, state->vb_dirty = true; } -struct dyn_info { - struct { - uint16_t const_buffer_count; - uint16_t shader_buffer_count; - uint16_t sampler_count; - uint16_t sampler_view_count; - uint16_t image_count; - uint16_t uniform_block_count; - } stage[LVP_SHADER_STAGES]; - - uint32_t dyn_index; - const uint32_t *dynamic_offsets; - uint32_t dynamic_offset_count; -}; - -static void fill_sampler_stage(struct rendering_state *state, - struct dyn_info *dyn_info, - gl_shader_stage stage, - enum pipe_shader_type p_stage, - int array_idx, - const union lvp_descriptor_info *descriptor, - const struct lvp_descriptor_set_binding_layout *binding) -{ - int ss_idx = binding->stage[stage].sampler_index; - if (ss_idx == -1) - return; - ss_idx += array_idx; - ss_idx += dyn_info->stage[stage].sampler_count; - struct pipe_sampler_state *ss = binding->immutable_samplers ? binding->immutable_samplers[array_idx] : descriptor->sampler; - if (!ss) - return; - state->ss[p_stage][ss_idx] = *ss; - if (state->num_sampler_states[p_stage] <= ss_idx) - state->num_sampler_states[p_stage] = ss_idx + 1; - state->ss_dirty[p_stage] = true; -} - -static void fill_sampler_view_stage(struct rendering_state *state, - struct dyn_info *dyn_info, - gl_shader_stage stage, - enum pipe_shader_type p_stage, - int array_idx, - const union lvp_descriptor_info *descriptor, - const struct lvp_descriptor_set_binding_layout *binding) -{ - int sv_idx = binding->stage[stage].sampler_view_index; - if (sv_idx == -1) - return; - sv_idx += array_idx; - sv_idx += dyn_info->stage[stage].sampler_view_count; - - assert(sv_idx < ARRAY_SIZE(state->sv[p_stage])); - state->sv[p_stage][sv_idx] = descriptor->sampler_view; - - if (state->num_sampler_views[p_stage] <= sv_idx) - state->num_sampler_views[p_stage] = sv_idx + 1; - state->sv_dirty[p_stage] = true; -} - -static void fill_image_view_stage(struct rendering_state *state, - struct dyn_info *dyn_info, - gl_shader_stage stage, - enum pipe_shader_type p_stage, - int array_idx, - const union lvp_descriptor_info *descriptor, - const struct lvp_descriptor_set_binding_layout *binding) -{ - int idx = binding->stage[stage].image_index; - if (idx == -1) - return; - idx += array_idx; - idx += dyn_info->stage[stage].image_count; - uint16_t access = state->iv[p_stage][idx].access; - uint16_t shader_access = state->iv[p_stage][idx].shader_access; - state->iv[p_stage][idx] = descriptor->image_view; - state->iv[p_stage][idx].access = access; - state->iv[p_stage][idx].shader_access = shader_access; - - if (state->num_shader_images[p_stage] <= idx) - state->num_shader_images[p_stage] = idx + 1; - - state->iv_dirty[p_stage] = true; -} - -static void handle_descriptor(struct rendering_state *state, - struct dyn_info *dyn_info, - const struct lvp_descriptor_set_binding_layout *binding, - gl_shader_stage stage, - enum pipe_shader_type p_stage, - int array_idx, - VkDescriptorType type, - const union lvp_descriptor_info *descriptor) -{ - bool is_dynamic = type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || - type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC; - - switch (type) { - case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: { - int idx = binding->stage[stage].uniform_block_index; - if (idx == -1) - return; - idx += dyn_info->stage[stage].uniform_block_count; - assert(descriptor->uniform); - state->uniform_blocks[p_stage].block[idx] = descriptor->uniform; - state->pcbuf_dirty[p_stage] = true; - state->inlines_dirty[p_stage] = true; - break; - } - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { - fill_image_view_stage(state, dyn_info, stage, p_stage, array_idx, descriptor, binding); - break; - } - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: { - int idx = binding->stage[stage].const_buffer_index; - if (idx == -1) - return; - idx += array_idx; - idx += dyn_info->stage[stage].const_buffer_count; - state->const_buffer[p_stage][idx] = descriptor->ubo; - if (is_dynamic) { - uint32_t offset = dyn_info->dynamic_offsets[dyn_info->dyn_index + binding->dynamic_index + array_idx]; - state->const_buffer[p_stage][idx].buffer_offset += offset; - } - if (state->num_const_bufs[p_stage] <= idx) - state->num_const_bufs[p_stage] = idx + 1; - state->constbuf_dirty[p_stage] = true; - state->inlines_dirty[p_stage] = true; - break; - } - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { - int idx = binding->stage[stage].shader_buffer_index; - if (idx == -1) - return; - idx += array_idx; - idx += dyn_info->stage[stage].shader_buffer_count; - state->sb[p_stage][idx] = descriptor->ssbo; - if (is_dynamic) { - uint32_t offset = dyn_info->dynamic_offsets[dyn_info->dyn_index + binding->dynamic_index + array_idx]; - state->sb[p_stage][idx].buffer_offset += offset; - } - if (state->num_shader_buffers[p_stage] <= idx) - state->num_shader_buffers[p_stage] = idx + 1; - state->sb_dirty[p_stage] = true; - break; - } - case VK_DESCRIPTOR_TYPE_SAMPLER: - fill_sampler_stage(state, dyn_info, stage, p_stage, array_idx, descriptor, binding); - break; - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - fill_sampler_view_stage(state, dyn_info, stage, p_stage, array_idx, descriptor, binding); - break; - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - fill_sampler_stage(state, dyn_info, stage, p_stage, array_idx, descriptor, binding); - fill_sampler_view_stage(state, dyn_info, stage, p_stage, array_idx, descriptor, binding); - break; - default: - fprintf(stderr, "Unhandled descriptor set %d\n", type); - unreachable("oops"); - break; - } -} - static void handle_set_stage(struct rendering_state *state, - struct dyn_info *dyn_info, - const struct lvp_descriptor_set *set, + struct lvp_descriptor_set *set, gl_shader_stage stage, - enum pipe_shader_type p_stage) + uint32_t index) { - for (unsigned j = 0; j < set->layout->binding_count; j++) { - const struct lvp_descriptor_set_binding_layout *binding; - const struct lvp_descriptor *descriptor; - binding = &set->layout->binding[j]; + state->desc_sets[stage == MESA_SHADER_COMPUTE][index] = set; - if (binding->valid) { - unsigned array_size = binding->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ? 1 : binding->array_size; - for (unsigned i = 0; i < array_size; i++) { - descriptor = &set->descriptors[binding->descriptor_index + i]; - handle_descriptor(state, dyn_info, binding, stage, p_stage, i, descriptor->type, &descriptor->info); - } + state->const_buffer[stage][index].buffer = set->bo; + state->const_buffer[stage][index].buffer_offset = 0; + state->const_buffer[stage][index].buffer_size = set->bo->width0; + state->const_buffer[stage][index].user_buffer = NULL; + + state->constbuf_dirty[stage] = true; + + if (state->num_const_bufs[stage] <= index) + state->num_const_bufs[stage] = index + 1; +} + +static void +apply_dynamic_offsets(struct lvp_descriptor_set **out_set, uint32_t *offsets, uint32_t offset_count, + struct rendering_state *state) +{ + if (!offset_count) + return; + + struct lvp_descriptor_set *in_set = *out_set; + + struct lvp_descriptor_set *set; + lvp_descriptor_set_create(state->device, in_set->layout, &set); + + util_dynarray_append(&state->push_desc_sets, struct lvp_descriptor_set *, set); + + memcpy(set->map, in_set->map, in_set->bo->width0); + + *out_set = set; + + for (uint32_t i = 0; i < set->layout->binding_count; i++) { + const struct lvp_descriptor_set_binding_layout *binding = &set->layout->binding[i]; + if (binding->type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC && + binding->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + continue; + + union lp_descriptor *desc = set->map; + desc += binding->descriptor_index; + + for (uint32_t j = 0; j < binding->array_size; j++) { + uint32_t offset_index = binding->dynamic_index + j; + if (offset_index >= offset_count) + return; + + desc[j].buffer.u = (uint32_t *)((uint8_t *)desc[j].buffer.u + offsets[offset_index]); } } } -static void increment_dyn_info(struct dyn_info *dyn_info, - const struct vk_descriptor_set_layout *vk_layout, - bool inc_dyn) -{ - const struct lvp_descriptor_set_layout *layout = - vk_to_lvp_descriptor_set_layout(vk_layout); - - lvp_forall_stage(stage) { - dyn_info->stage[stage].const_buffer_count += layout->stage[stage].const_buffer_count; - dyn_info->stage[stage].shader_buffer_count += layout->stage[stage].shader_buffer_count; - dyn_info->stage[stage].sampler_count += layout->stage[stage].sampler_count; - dyn_info->stage[stage].sampler_view_count += layout->stage[stage].sampler_view_count; - dyn_info->stage[stage].image_count += layout->stage[stage].image_count; - dyn_info->stage[stage].uniform_block_count += layout->stage[stage].uniform_block_count; - } - if (inc_dyn) - dyn_info->dyn_index += layout->dynamic_offset_count; -} - -static void handle_compute_descriptor_sets(struct vk_cmd_queue_entry *cmd, - struct dyn_info *dyn_info, - struct rendering_state *state) +static void +handle_descriptor_sets(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { struct vk_cmd_bind_descriptor_sets *bds = &cmd->u.bind_descriptor_sets; LVP_FROM_HANDLE(lvp_pipeline_layout, layout, bds->layout); - for (unsigned i = 0; i < bds->first_set; i++) { - increment_dyn_info(dyn_info, layout->vk.set_layouts[i], false); - } - for (unsigned i = 0; i < bds->descriptor_set_count; i++) { - const struct lvp_descriptor_set *set = lvp_descriptor_set_from_handle(bds->descriptor_sets[i]); + uint32_t dynamic_offset_index = 0; - if (set->layout->shader_stages & VK_SHADER_STAGE_COMPUTE_BIT) - handle_set_stage(state, dyn_info, set, MESA_SHADER_COMPUTE, MESA_SHADER_COMPUTE); - increment_dyn_info(dyn_info, layout->vk.set_layouts[bds->first_set + i], true); - } -} - -static void handle_descriptor_sets(struct vk_cmd_queue_entry *cmd, - struct rendering_state *state) -{ - struct vk_cmd_bind_descriptor_sets *bds = &cmd->u.bind_descriptor_sets; - LVP_FROM_HANDLE(lvp_pipeline_layout, layout, bds->layout); - int i; - struct dyn_info dyn_info; - - dyn_info.dyn_index = 0; - dyn_info.dynamic_offsets = bds->dynamic_offsets; - dyn_info.dynamic_offset_count = bds->dynamic_offset_count; - - memset(dyn_info.stage, 0, sizeof(dyn_info.stage)); - if (bds->pipeline_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) { - handle_compute_descriptor_sets(cmd, &dyn_info, state); - return; - } - - for (i = 0; i < bds->first_set; i++) { - increment_dyn_info(&dyn_info, layout->vk.set_layouts[i], false); - } - - for (i = 0; i < bds->descriptor_set_count; i++) { + for (uint32_t i = 0; i < bds->descriptor_set_count; i++) { if (!layout->vk.set_layouts[bds->first_set + i]) continue; - const struct lvp_descriptor_set *set = lvp_descriptor_set_from_handle(bds->descriptor_sets[i]); + struct lvp_descriptor_set *set = lvp_descriptor_set_from_handle(bds->descriptor_sets[i]); if (!set) continue; - /* verify that there's enough total offsets */ - assert(set->layout->dynamic_offset_count <= dyn_info.dynamic_offset_count); - /* verify there's either no offsets... */ - assert(!dyn_info.dynamic_offset_count || - /* or that the total number of offsets required is <= the number remaining */ - set->layout->dynamic_offset_count <= dyn_info.dynamic_offset_count - dyn_info.dyn_index); + + apply_dynamic_offsets(&set, bds->dynamic_offsets + dynamic_offset_index, + bds->dynamic_offset_count - dynamic_offset_index, state); + + dynamic_offset_index += set->layout->dynamic_offset_count; + + if (bds->pipeline_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) { + if (set->layout->shader_stages & VK_SHADER_STAGE_COMPUTE_BIT) + handle_set_stage(state, set, MESA_SHADER_COMPUTE, bds->first_set + i); + continue; + } if (set->layout->shader_stages & VK_SHADER_STAGE_VERTEX_BIT) - handle_set_stage(state, &dyn_info, set, MESA_SHADER_VERTEX, MESA_SHADER_VERTEX); + handle_set_stage(state, set, MESA_SHADER_VERTEX, bds->first_set + i); if (set->layout->shader_stages & VK_SHADER_STAGE_GEOMETRY_BIT) - handle_set_stage(state, &dyn_info, set, MESA_SHADER_GEOMETRY, MESA_SHADER_GEOMETRY); + handle_set_stage(state, set, MESA_SHADER_GEOMETRY, bds->first_set + i); if (set->layout->shader_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) - handle_set_stage(state, &dyn_info, set, MESA_SHADER_TESS_CTRL, MESA_SHADER_TESS_CTRL); + handle_set_stage(state, set, MESA_SHADER_TESS_CTRL, bds->first_set + i); if (set->layout->shader_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) - handle_set_stage(state, &dyn_info, set, MESA_SHADER_TESS_EVAL, MESA_SHADER_TESS_EVAL); + handle_set_stage(state, set, MESA_SHADER_TESS_EVAL, bds->first_set + i); if (set->layout->shader_stages & VK_SHADER_STAGE_FRAGMENT_BIT) - handle_set_stage(state, &dyn_info, set, MESA_SHADER_FRAGMENT, MESA_SHADER_FRAGMENT); + handle_set_stage(state, set, MESA_SHADER_FRAGMENT, bds->first_set + i); if (set->layout->shader_stages & VK_SHADER_STAGE_TASK_BIT_EXT) - handle_set_stage(state, &dyn_info, set, MESA_SHADER_TASK, MESA_SHADER_TASK); + handle_set_stage(state, set, MESA_SHADER_TASK, bds->first_set + i); if (set->layout->shader_stages & VK_SHADER_STAGE_MESH_BIT_EXT) - handle_set_stage(state, &dyn_info, set, MESA_SHADER_MESH, MESA_SHADER_MESH); - - increment_dyn_info(&dyn_info, layout->vk.set_layouts[bds->first_set + i], true); + handle_set_stage(state, set, MESA_SHADER_MESH, bds->first_set + i); } } @@ -3391,317 +3036,71 @@ static void handle_draw_indirect_count(struct vk_cmd_queue_entry *cmd, pipe_resource_reference(&index, NULL); } -static void handle_compute_push_descriptor_set(struct lvp_cmd_push_descriptor_set *pds, - struct dyn_info *dyn_info, - struct rendering_state *state) -{ - const struct lvp_descriptor_set_layout *layout = - vk_to_lvp_descriptor_set_layout(pds->layout->vk.set_layouts[pds->set]); - - if (!(layout->shader_stages & VK_SHADER_STAGE_COMPUTE_BIT)) - return; - for (unsigned i = 0; i < pds->set; i++) { - increment_dyn_info(dyn_info, pds->layout->vk.set_layouts[i], false); - } - unsigned info_idx = 0; - for (unsigned i = 0; i < pds->descriptor_write_count; i++) { - struct lvp_write_descriptor *desc = &pds->descriptors[i]; - const struct lvp_descriptor_set_binding_layout *binding = - &layout->binding[desc->dst_binding]; - - if (!binding->valid) - continue; - - for (unsigned j = 0; j < desc->descriptor_count; j++) { - union lvp_descriptor_info *info = &pds->infos[info_idx + j]; - - handle_descriptor(state, dyn_info, binding, - MESA_SHADER_COMPUTE, MESA_SHADER_COMPUTE, - j, desc->descriptor_type, - info); - } - info_idx += desc->descriptor_count; - } -} - -static struct lvp_cmd_push_descriptor_set * -create_push_descriptor_set(struct rendering_state *state, struct vk_cmd_push_descriptor_set_khr *in_cmd) -{ - LVP_FROM_HANDLE(lvp_pipeline_layout, layout, in_cmd->layout); - struct lvp_cmd_push_descriptor_set *out_cmd; - int count_descriptors = 0; - - for (unsigned i = 0; i < in_cmd->descriptor_write_count; i++) { - count_descriptors += in_cmd->descriptor_writes[i].descriptorCount; - } - - void *descriptors; - void *infos; - void **ptrs[] = {&descriptors, &infos}; - size_t sizes[] = { - in_cmd->descriptor_write_count * sizeof(struct lvp_write_descriptor), - count_descriptors * sizeof(union lvp_descriptor_info), - }; - out_cmd = ptrzalloc(sizeof(struct lvp_cmd_push_descriptor_set), 2, sizes, ptrs); - if (!out_cmd) - return NULL; - - out_cmd->bind_point = in_cmd->pipeline_bind_point; - out_cmd->layout = layout; - out_cmd->set = in_cmd->set; - out_cmd->descriptor_write_count = in_cmd->descriptor_write_count; - out_cmd->descriptors = descriptors; - out_cmd->infos = infos; - - unsigned descriptor_index = 0; - - for (unsigned i = 0; i < in_cmd->descriptor_write_count; i++) { - struct lvp_write_descriptor *desc = &out_cmd->descriptors[i]; - - /* dstSet is ignored */ - desc->dst_binding = in_cmd->descriptor_writes[i].dstBinding; - desc->dst_array_element = in_cmd->descriptor_writes[i].dstArrayElement; - desc->descriptor_count = in_cmd->descriptor_writes[i].descriptorCount; - desc->descriptor_type = in_cmd->descriptor_writes[i].descriptorType; - - for (unsigned j = 0; j < desc->descriptor_count; j++) { - union lvp_descriptor_info *info = &out_cmd->infos[descriptor_index + j]; - switch (desc->descriptor_type) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - if (in_cmd->descriptor_writes[i].pImageInfo[j].sampler) - info->sampler = &lvp_sampler_from_handle(in_cmd->descriptor_writes[i].pImageInfo[j].sampler)->state; - else - info->sampler = NULL; - break; - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - if (in_cmd->descriptor_writes[i].pImageInfo[j].sampler) - info->sampler = &lvp_sampler_from_handle(in_cmd->descriptor_writes[i].pImageInfo[j].sampler)->state; - else - info->sampler = NULL; - if (in_cmd->descriptor_writes[i].pImageInfo[j].imageView) - info->sampler_view = lvp_image_view_from_handle(in_cmd->descriptor_writes[i].pImageInfo[j].imageView)->sv; - else - info->sampler_view = NULL; - break; - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - if (in_cmd->descriptor_writes[i].pImageInfo[j].imageView) - info->sampler_view = lvp_image_view_from_handle(in_cmd->descriptor_writes[i].pImageInfo[j].imageView)->sv; - else - info->sampler_view = NULL; - break; - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - if (in_cmd->descriptor_writes[i].pImageInfo[j].imageView) - info->image_view = lvp_image_view_from_handle(in_cmd->descriptor_writes[i].pImageInfo[j].imageView)->iv; - else - info->image_view = ((struct pipe_image_view){0}); - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: { - struct lvp_buffer_view *bview = lvp_buffer_view_from_handle(in_cmd->descriptor_writes[i].pTexelBufferView[j]); - info->sampler_view = bview ? bview->sv : NULL; - break; - } - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: { - struct lvp_buffer_view *bview = lvp_buffer_view_from_handle(in_cmd->descriptor_writes[i].pTexelBufferView[j]); - info->image_view = bview ? bview->iv : ((struct pipe_image_view){0}); - break; - } - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: { - LVP_FROM_HANDLE(lvp_buffer, buffer, in_cmd->descriptor_writes[i].pBufferInfo[j].buffer); - info->ubo.buffer = buffer ? buffer->bo : NULL; - info->ubo.buffer_offset = buffer ? in_cmd->descriptor_writes[i].pBufferInfo[j].offset : 0; - info->ubo.buffer_size = buffer ? in_cmd->descriptor_writes[i].pBufferInfo[j].range : 0; - if (buffer && in_cmd->descriptor_writes[i].pBufferInfo[j].range == VK_WHOLE_SIZE) - info->ubo.buffer_size = info->ubo.buffer->width0 - info->ubo.buffer_offset; - break; - } - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { - LVP_FROM_HANDLE(lvp_buffer, buffer, in_cmd->descriptor_writes[i].pBufferInfo[j].buffer); - info->ssbo.buffer = buffer ? buffer->bo : NULL; - info->ssbo.buffer_offset = buffer ? in_cmd->descriptor_writes[i].pBufferInfo[j].offset : 0; - info->ssbo.buffer_size = buffer ? in_cmd->descriptor_writes[i].pBufferInfo[j].range : 0; - if (buffer && in_cmd->descriptor_writes[i].pBufferInfo[j].range == VK_WHOLE_SIZE) - info->ssbo.buffer_size = info->ssbo.buffer->width0 - info->ssbo.buffer_offset; - break; - } - default: - break; - } - } - descriptor_index += desc->descriptor_count; - } - - return out_cmd; -} - -static void handle_push_descriptor_set_generic(struct vk_cmd_push_descriptor_set_khr *_pds, - struct rendering_state *state) -{ - struct lvp_cmd_push_descriptor_set *pds = create_push_descriptor_set(state, _pds); - const struct lvp_descriptor_set_layout *layout = - vk_to_lvp_descriptor_set_layout(pds->layout->vk.set_layouts[pds->set]); - - struct dyn_info dyn_info; - memset(&dyn_info.stage, 0, sizeof(dyn_info.stage)); - dyn_info.dyn_index = 0; - if (pds->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) { - handle_compute_push_descriptor_set(pds, &dyn_info, state); - } - - for (unsigned i = 0; i < pds->set; i++) { - increment_dyn_info(&dyn_info, pds->layout->vk.set_layouts[i], false); - } - - unsigned info_idx = 0; - for (unsigned i = 0; i < pds->descriptor_write_count; i++) { - struct lvp_write_descriptor *desc = &pds->descriptors[i]; - const struct lvp_descriptor_set_binding_layout *binding = - &layout->binding[desc->dst_binding]; - - if (!binding->valid) - continue; - - for (unsigned j = 0; j < desc->descriptor_count; j++) { - union lvp_descriptor_info *info = &pds->infos[info_idx + j]; - - if (layout->shader_stages & VK_SHADER_STAGE_VERTEX_BIT) - handle_descriptor(state, &dyn_info, binding, - MESA_SHADER_VERTEX, MESA_SHADER_VERTEX, - j, desc->descriptor_type, - info); - if (layout->shader_stages & VK_SHADER_STAGE_FRAGMENT_BIT) - handle_descriptor(state, &dyn_info, binding, - MESA_SHADER_FRAGMENT, MESA_SHADER_FRAGMENT, - j, desc->descriptor_type, - info); - if (layout->shader_stages & VK_SHADER_STAGE_GEOMETRY_BIT) - handle_descriptor(state, &dyn_info, binding, - MESA_SHADER_GEOMETRY, MESA_SHADER_GEOMETRY, - j, desc->descriptor_type, - info); - if (layout->shader_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) - handle_descriptor(state, &dyn_info, binding, - MESA_SHADER_TESS_CTRL, MESA_SHADER_TESS_CTRL, - j, desc->descriptor_type, - info); - if (layout->shader_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) - handle_descriptor(state, &dyn_info, binding, - MESA_SHADER_TESS_EVAL, MESA_SHADER_TESS_EVAL, - j, desc->descriptor_type, - info); - if (layout->shader_stages & VK_SHADER_STAGE_TASK_BIT_EXT) - handle_descriptor(state, &dyn_info, binding, - MESA_SHADER_TASK, MESA_SHADER_TASK, - j, desc->descriptor_type, - info); - if (layout->shader_stages & VK_SHADER_STAGE_MESH_BIT_EXT) - handle_descriptor(state, &dyn_info, binding, - MESA_SHADER_MESH, MESA_SHADER_MESH, - j, desc->descriptor_type, - info); - } - info_idx += desc->descriptor_count; - } - free(pds); -} - static void handle_push_descriptor_set(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { - handle_push_descriptor_set_generic(&cmd->u.push_descriptor_set_khr, state); + struct vk_cmd_push_descriptor_set_khr *pds = &cmd->u.push_descriptor_set_khr; + LVP_FROM_HANDLE(lvp_pipeline_layout, layout, pds->layout); + struct lvp_descriptor_set_layout *set_layout = (struct lvp_descriptor_set_layout *)layout->vk.set_layouts[pds->set]; + + struct lvp_descriptor_set *set; + lvp_descriptor_set_create(state->device, set_layout, &set); + + util_dynarray_append(&state->push_desc_sets, struct lvp_descriptor_set *, set); + + bool is_compute = pds->pipeline_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE; + struct lvp_descriptor_set *base = state->desc_sets[is_compute][pds->set]; + if (base) + memcpy(set->map, base->map, MIN2(set->bo->width0, base->bo->width0)); + + VkDescriptorSet set_handle = lvp_descriptor_set_to_handle(set); + for (uint32_t i = 0; i < pds->descriptor_write_count; i++) + pds->descriptor_writes[i].dstSet = set_handle; + + lvp_UpdateDescriptorSets(lvp_device_to_handle(state->device), pds->descriptor_write_count, pds->descriptor_writes, 0, NULL); + + struct vk_cmd_queue_entry bind_cmd; + bind_cmd.u.bind_descriptor_sets = (struct vk_cmd_bind_descriptor_sets){ + .pipeline_bind_point = pds->pipeline_bind_point, + .layout = pds->layout, + .first_set = pds->set, + .descriptor_set_count = 1, + .descriptor_sets = &set_handle, + }; + handle_descriptor_sets(&bind_cmd, state); } static void handle_push_descriptor_set_with_template(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { - LVP_FROM_HANDLE(lvp_descriptor_update_template, templ, cmd->u.push_descriptor_set_with_template_khr.descriptor_update_template); - struct vk_cmd_push_descriptor_set_khr *pds; - int pds_size = sizeof(*pds); + struct vk_cmd_push_descriptor_set_with_template_khr *pds = &cmd->u.push_descriptor_set_with_template_khr; + LVP_FROM_HANDLE(lvp_descriptor_update_template, templ, pds->descriptor_update_template); + LVP_FROM_HANDLE(lvp_pipeline_layout, layout, pds->layout); + struct lvp_descriptor_set_layout *set_layout = (struct lvp_descriptor_set_layout *)layout->vk.set_layouts[pds->set]; - pds_size += templ->entry_count * sizeof(struct VkWriteDescriptorSet); + struct lvp_descriptor_set *set; + lvp_descriptor_set_create(state->device, set_layout, &set); - for (unsigned i = 0; i < templ->entry_count; i++) { - VkDescriptorUpdateTemplateEntry *entry = &templ->entry[i]; - switch (entry->descriptorType) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - pds_size += sizeof(VkDescriptorImageInfo) * entry->descriptorCount; - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - pds_size += sizeof(VkBufferView) * entry->descriptorCount; - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - default: - pds_size += sizeof(VkDescriptorBufferInfo) * entry->descriptorCount; - break; - } - } + util_dynarray_append(&state->push_desc_sets, struct lvp_descriptor_set *, set); - pds = calloc(1, pds_size); - if (!pds) - return; + bool is_compute = templ->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE; + struct lvp_descriptor_set *base = state->desc_sets[is_compute][pds->set]; + if (base) + memcpy(set->map, base->map, MIN2(set->bo->width0, base->bo->width0)); - pds->pipeline_bind_point = templ->bind_point; - pds->layout = lvp_pipeline_layout_to_handle(templ->pipeline_layout); - pds->set = templ->set; - pds->descriptor_write_count = templ->entry_count; - pds->descriptor_writes = (struct VkWriteDescriptorSet *)(pds + 1); - const uint8_t *next_info = (const uint8_t *) (pds->descriptor_writes + templ->entry_count); + VkDescriptorSet set_handle = lvp_descriptor_set_to_handle(set); + lvp_descriptor_set_update_with_template(lvp_device_to_handle(state->device), set_handle, + pds->descriptor_update_template, pds->data, true); - const uint8_t *pSrc = cmd->u.push_descriptor_set_with_template_khr.data; - for (unsigned i = 0; i < templ->entry_count; i++) { - struct VkWriteDescriptorSet *desc = &pds->descriptor_writes[i]; - struct VkDescriptorUpdateTemplateEntry *entry = &templ->entry[i]; - - /* dstSet is ignored */ - desc->dstBinding = entry->dstBinding; - desc->dstArrayElement = entry->dstArrayElement; - desc->descriptorCount = entry->descriptorCount; - desc->descriptorType = entry->descriptorType; - desc->pImageInfo = (const VkDescriptorImageInfo *) next_info; - desc->pTexelBufferView = (const VkBufferView *) next_info; - desc->pBufferInfo = (const VkDescriptorBufferInfo *) next_info; - - for (unsigned j = 0; j < desc->descriptorCount; j++) { - switch (desc->descriptorType) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - memcpy((VkDescriptorImageInfo*)&desc->pImageInfo[j], pSrc, sizeof(VkDescriptorImageInfo)); - next_info += sizeof(VkDescriptorImageInfo); - pSrc += sizeof(VkDescriptorImageInfo); - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - memcpy((VkBufferView*)&desc->pTexelBufferView[j], pSrc, sizeof(VkBufferView)); - next_info += sizeof(VkBufferView); - pSrc += sizeof(VkBufferView); - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - default: - memcpy((VkDescriptorBufferInfo*)&desc->pBufferInfo[j], pSrc, sizeof(VkDescriptorBufferInfo)); - next_info += sizeof(VkDescriptorBufferInfo); - pSrc += sizeof(VkDescriptorBufferInfo); - break; - } - } - } - handle_push_descriptor_set_generic(pds, state); - free(pds); + struct vk_cmd_queue_entry bind_cmd; + bind_cmd.u.bind_descriptor_sets = (struct vk_cmd_bind_descriptor_sets){ + .pipeline_bind_point = templ->bind_point, + .layout = pds->layout, + .first_set = pds->set, + .descriptor_set_count = 1, + .descriptor_sets = &set_handle, + }; + handle_descriptor_sets(&bind_cmd, state); } static void handle_bind_transform_feedback_buffers(struct vk_cmd_queue_entry *cmd, @@ -4205,13 +3604,11 @@ handle_shaders(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) handle_graphics_stages(state, vkstages & all_gfx, true); u_foreach_bit(i, new_stages) { handle_graphics_layout(state, i, state->shaders[i]->layout); - handle_pipeline_access(state, i); } } /* ignore compute unbinds */ if (new_stages & BITFIELD_BIT(MESA_SHADER_COMPUTE)) { handle_compute_shader(state, state->shaders[MESA_SHADER_COMPUTE], state->shaders[MESA_SHADER_COMPUTE]->layout); - handle_pipeline_access(state, MESA_SHADER_COMPUTE); } if (gfx) { @@ -4985,6 +4382,7 @@ VkResult lvp_execute_cmds(struct lvp_device *device, state->min_samples_dirty = true; state->sample_mask = UINT32_MAX; state->poison_mem = device->poison_mem; + util_dynarray_init(&state->push_desc_sets, NULL); /* default values */ state->rs_state.line_width = 1.0; @@ -4997,10 +4395,6 @@ VkResult lvp_execute_cmds(struct lvp_device *device, state->rs_state.scissor = true; state->rs_state.no_ms_sample_mask_out = true; - lvp_forall_stage(s) { - for (unsigned i = 0; i < ARRAY_SIZE(state->cso_ss_ptr[s]); i++) - state->cso_ss_ptr[s][i] = &state->ss[s][i]; - } /* create a gallium context */ lvp_execute_cmd_buffer(&cmd_buffer->vk.cmd_queue.cmds, state, device->print_cmds); @@ -5013,6 +4407,14 @@ VkResult lvp_execute_cmds(struct lvp_device *device, } } + if (util_dynarray_num_elements(&state->push_desc_sets, struct lvp_descriptor_set *)) + finish_fence(state); + + util_dynarray_foreach (&state->push_desc_sets, struct lvp_descriptor_set *, set) + lvp_descriptor_set_destroy(device, *set); + + util_dynarray_fini(&state->push_desc_sets); + free(state->color_att); return VK_SUCCESS; } diff --git a/src/gallium/frontends/lavapipe/lvp_image.c b/src/gallium/frontends/lavapipe/lvp_image.c index 41cf8651248..2a214476f94 100644 --- a/src/gallium/frontends/lavapipe/lvp_image.c +++ b/src/gallium/frontends/lavapipe/lvp_image.c @@ -281,10 +281,21 @@ lvp_CreateImageView(VkDevice _device, view->pformat = lvp_vk_format_to_pipe_format(view->vk.format); view->image = image; view->surface = NULL; - if (image->bo->bind & PIPE_BIND_SHADER_IMAGE) + + simple_mtx_lock(&device->queue.lock); + + if (image->bo->bind & PIPE_BIND_SHADER_IMAGE) { view->iv = lvp_create_imageview(view); - if (image->bo->bind & PIPE_BIND_SAMPLER_VIEW) + view->image_handle = (void *)(uintptr_t)device->queue.ctx->create_image_handle(device->queue.ctx, &view->iv); + } + + if (image->bo->bind & PIPE_BIND_SAMPLER_VIEW) { view->sv = lvp_create_samplerview(device->queue.ctx, view); + view->texture_handle = (void *)(uintptr_t)device->queue.ctx->create_texture_handle(device->queue.ctx, view->sv, NULL); + } + + simple_mtx_unlock(&device->queue.lock); + *pView = lvp_image_view_to_handle(view); return VK_SUCCESS; @@ -300,7 +311,15 @@ lvp_DestroyImageView(VkDevice _device, VkImageView _iview, if (!_iview) return; + simple_mtx_lock(&device->queue.lock); + + device->queue.ctx->delete_image_handle(device->queue.ctx, (uint64_t)(uintptr_t)iview->image_handle); + pipe_sampler_view_reference(&iview->sv, NULL); + device->queue.ctx->delete_texture_handle(device->queue.ctx, (uint64_t)(uintptr_t)iview->texture_handle); + + simple_mtx_unlock(&device->queue.lock); + pipe_surface_reference(&iview->surface, NULL); vk_image_view_destroy(&device->vk, pAllocator, &iview->vk); } @@ -535,10 +554,21 @@ lvp_CreateBufferView(VkDevice _device, view->range = view->buffer->size - view->offset; else view->range = pCreateInfo->range; - if (buffer->bo->bind & PIPE_BIND_SAMPLER_VIEW) + + simple_mtx_lock(&device->queue.lock); + + if (buffer->bo->bind & PIPE_BIND_SAMPLER_VIEW) { view->sv = lvp_create_samplerview_buffer(device->queue.ctx, view); - if (buffer->bo->bind & PIPE_BIND_SHADER_IMAGE) + view->texture_handle = (void *)(uintptr_t)device->queue.ctx->create_texture_handle(device->queue.ctx, view->sv, NULL); + } + + if (buffer->bo->bind & PIPE_BIND_SHADER_IMAGE) { view->iv = lvp_create_imageview_buffer(view); + view->image_handle = (void *)(uintptr_t)device->queue.ctx->create_image_handle(device->queue.ctx, &view->iv); + } + + simple_mtx_unlock(&device->queue.lock); + *pView = lvp_buffer_view_to_handle(view); return VK_SUCCESS; @@ -553,7 +583,16 @@ lvp_DestroyBufferView(VkDevice _device, VkBufferView bufferView, if (!bufferView) return; + + simple_mtx_lock(&device->queue.lock); + pipe_sampler_view_reference(&view->sv, NULL); + device->queue.ctx->delete_texture_handle(device->queue.ctx, (uint64_t)(uintptr_t)view->texture_handle); + + device->queue.ctx->delete_image_handle(device->queue.ctx, (uint64_t)(uintptr_t)view->image_handle); + + simple_mtx_unlock(&device->queue.lock); + vk_object_base_finish(&view->base); vk_free2(&device->vk.alloc, pAllocator, view); } diff --git a/src/gallium/frontends/lavapipe/lvp_lower_vulkan_resource.c b/src/gallium/frontends/lavapipe/lvp_lower_vulkan_resource.c index 2d6662ae55e..eeaaaadfa04 100644 --- a/src/gallium/frontends/lavapipe/lvp_lower_vulkan_resource.c +++ b/src/gallium/frontends/lavapipe/lvp_lower_vulkan_resource.c @@ -54,195 +54,84 @@ lower_vulkan_resource_index(const nir_instr *instr, const void *data_cb) return false; } -static bool -lower_uniform_block_access(const nir_instr *instr, const void *data_cb) -{ - if (instr->type != nir_instr_type_intrinsic) - return false; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (intrin->intrinsic != nir_intrinsic_load_deref) - return false; - nir_deref_instr *deref = nir_instr_as_deref(intrin->src[0].ssa->parent_instr); - return deref->modes == nir_var_mem_ubo; -} - -static nir_ssa_def * -lower_block_instr(nir_builder *b, nir_instr *instr, void *data_cb) -{ - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - nir_binding nb = nir_chase_binding(intrin->src[0]); - const struct lvp_pipeline_layout *layout = data_cb; - const struct lvp_descriptor_set_binding_layout *binding = - get_binding_layout(layout, nb.desc_set, nb.binding); - if (binding->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) - return NULL; - if (!binding->array_size) - return NIR_LOWER_INSTR_PROGRESS_REPLACE; - - assert(intrin->src[0].ssa->num_components == 2); - unsigned value = 0; - for (unsigned s = 0; s < nb.desc_set; s++) - value += get_set_layout(layout, s)->stage[b->shader->info.stage].uniform_block_size; - if (layout->push_constant_stages & BITFIELD_BIT(b->shader->info.stage)) - value += layout->push_constant_size; - value += binding->stage[b->shader->info.stage].uniform_block_offset; - - b->cursor = nir_before_instr(instr); - nir_ssa_def *offset = nir_imm_ivec2(b, 0, value); - nir_ssa_def *added = nir_iadd(b, intrin->src[0].ssa, offset); - nir_deref_instr *deref = nir_instr_as_deref(intrin->src[0].ssa->parent_instr); - nir_deref_instr *cast = nir_build_deref_cast(b, added, deref->modes, deref->type, 0); - nir_instr_rewrite_src_ssa(instr, &intrin->src[0], &cast->dest.ssa); - return NIR_LOWER_INSTR_PROGRESS; -} - static nir_ssa_def *lower_vri_intrin_vri(struct nir_builder *b, nir_instr *instr, void *data_cb) { nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); unsigned desc_set_idx = nir_intrinsic_desc_set(intrin); unsigned binding_idx = nir_intrinsic_binding(intrin); - const struct lvp_pipeline_layout *layout = data_cb; const struct lvp_descriptor_set_binding_layout *binding = get_binding_layout(data_cb, desc_set_idx, binding_idx); - int value = 0; - bool is_ubo = (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || - binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC); - /* always load inline uniform blocks from ubo0 */ - if (binding->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) - return nir_imm_ivec2(b, 0, 0); - - for (unsigned s = 0; s < desc_set_idx; s++) { - if (!layout->vk.set_layouts[s]) - continue; - if (is_ubo) - value += get_set_layout(layout, s)->stage[b->shader->info.stage].const_buffer_count; - else - value += get_set_layout(layout, s)->stage[b->shader->info.stage].shader_buffer_count; - } - if (is_ubo) - value += binding->stage[b->shader->info.stage].const_buffer_index + 1; - else - value += binding->stage[b->shader->info.stage].shader_buffer_index; - - /* The SSA size for indices is the same as for pointers. We use - * nir_addr_format_32bit_index_offset so we need a vec2. We don't need all - * that data so just stuff a 0 in the second component. - */ - if (nir_src_is_const(intrin->src[0])) { - value += nir_src_comp_as_int(intrin->src[0], 0); - return nir_imm_ivec2(b, value, 0); - } else - return nir_vec2(b, nir_iadd_imm(b, intrin->src[0].ssa, value), - nir_imm_int(b, 0)); + return nir_vec3(b, nir_imm_int(b, desc_set_idx + 1), + nir_iadd_imm(b, intrin->src[0].ssa, binding->descriptor_index), + nir_imm_int(b, 0)); } static nir_ssa_def *lower_vri_intrin_vrri(struct nir_builder *b, nir_instr *instr, void *data_cb) { nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - nir_ssa_def *old_index = nir_ssa_for_src(b, intrin->src[0], 1); + nir_ssa_def *old_index = nir_ssa_for_src(b, intrin->src[0], 3); nir_ssa_def *delta = nir_ssa_for_src(b, intrin->src[1], 1); - return nir_vec2(b, nir_iadd(b, old_index, delta), - nir_imm_int(b, 0)); + return nir_vec3(b, nir_channel(b, old_index, 0), + nir_iadd(b, nir_channel(b, old_index, 1), delta), + nir_channel(b, old_index, 2)); } static nir_ssa_def *lower_vri_intrin_lvd(struct nir_builder *b, nir_instr *instr, void *data_cb) { nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - nir_ssa_def *index = nir_ssa_for_src(b, intrin->src[0], 1); - return nir_vec2(b, index, nir_imm_int(b, 0)); + return nir_ssa_for_src(b, intrin->src[0], 3); } -/* - * Return a bitset of the texture units or sampler units used by a - * texture instruction. Note that 'used' is expected to be already - * initialized. i.e. this function does not zero-out the bitset before - * setting any bits. - */ -static void -lower_vri_instr_tex_deref(nir_tex_instr *tex, - nir_tex_src_type deref_src_type, - gl_shader_stage stage, - struct lvp_pipeline_layout *layout, - BITSET_WORD used[], // textures or samplers - size_t used_size) // used[] size, in bits +static nir_ssa_def * +vulkan_resource_from_deref(nir_builder *b, nir_deref_instr *deref, const struct lvp_pipeline_layout *layout) { - int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type); + nir_ssa_def *index = nir_imm_int(b, 0); - if (deref_src_idx < 0) - return; + while (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + unsigned array_size = MAX2(glsl_get_aoa_size(deref->type), 1); - nir_deref_instr *deref_instr = nir_src_as_deref(tex->src[deref_src_idx].src); - nir_variable *var = nir_deref_instr_get_variable(deref_instr); - unsigned desc_set_idx = var->data.descriptor_set; - unsigned binding_idx = var->data.binding; - int value = 0; + index = nir_iadd(b, index, nir_imul_imm(b, deref->arr.index.ssa, array_size)); - const struct lvp_descriptor_set_binding_layout *binding = - get_binding_layout(layout, desc_set_idx, binding_idx); - nir_tex_instr_remove_src(tex, deref_src_idx); - for (unsigned s = 0; s < desc_set_idx; s++) { - if (!layout->vk.set_layouts[s]) - continue; - if (deref_src_type == nir_tex_src_sampler_deref) - value += get_set_layout(layout, s)->stage[stage].sampler_count; - else - value += get_set_layout(layout, s)->stage[stage].sampler_view_count; + deref = nir_deref_instr_parent(deref); } - if (deref_src_type == nir_tex_src_sampler_deref) - value += binding->stage[stage].sampler_index; - else - value += binding->stage[stage].sampler_view_index; - if (deref_instr->deref_type == nir_deref_type_array) { - if (nir_src_is_const(deref_instr->arr.index)) - value += nir_src_as_uint(deref_instr->arr.index); - else { - if (deref_src_type == nir_tex_src_sampler_deref) - nir_tex_instr_add_src(tex, nir_tex_src_sampler_offset, deref_instr->arr.index); - else - nir_tex_instr_add_src(tex, nir_tex_src_texture_offset, deref_instr->arr.index); - } - } - if (deref_src_type == nir_tex_src_sampler_deref) - tex->sampler_index = value; - else - tex->texture_index = value; + nir_variable *var = deref->var; - if (deref_instr->deref_type == nir_deref_type_array) { - assert(glsl_type_is_array(var->type)); - assert(value >= 0); - assert(value < used_size); - if (nir_src_is_const(deref_instr->arr.index)) { - BITSET_SET(used, value); - } else { - unsigned size = glsl_get_aoa_size(var->type); - assert(value + size <= used_size); - BITSET_SET_RANGE(used, value, value+size); - } - } else { - assert(value < used_size); - BITSET_SET(used, value); - } + uint32_t binding_base = get_binding_layout(layout, var->data.descriptor_set, var->data.binding)->descriptor_index; + + return nir_vec3(b, nir_imm_int(b, var->data.descriptor_set + 1), + nir_iadd_imm(b, index, binding_base), + nir_imm_int(b, 0)); } static void lower_vri_instr_tex(struct nir_builder *b, nir_tex_instr *tex, void *data_cb) { struct lvp_pipeline_layout *layout = data_cb; - lower_vri_instr_tex_deref(tex, nir_tex_src_sampler_deref, - b->shader->info.stage, layout, - b->shader->info.samplers_used, - BITSET_SIZE(b->shader->info.samplers_used)); - lower_vri_instr_tex_deref(tex, nir_tex_src_texture_deref, - b->shader->info.stage, layout, - b->shader->info.textures_used, - BITSET_SIZE(b->shader->info.textures_used)); + for (unsigned i = 0; i < tex->num_srcs; i++) { + nir_deref_instr *deref; + switch (tex->src[i].src_type) { + case nir_tex_src_texture_deref: + tex->src[i].src_type = nir_tex_src_texture_handle; + deref = nir_src_as_deref(tex->src[i].src); + break; + case nir_tex_src_sampler_deref: + tex->src[i].src_type = nir_tex_src_sampler_handle; + deref = nir_src_as_deref(tex->src[i].src); + break; + default: + continue; + } + + nir_ssa_def *resource = vulkan_resource_from_deref(b, deref, layout); + nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, resource); + } } static void @@ -251,44 +140,43 @@ lower_image_intrinsic(nir_builder *b, void *data_cb) { const struct lvp_pipeline_layout *layout = data_cb; - gl_shader_stage stage = b->shader->info.stage; nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); - nir_variable *var = nir_deref_instr_get_variable(deref); - unsigned desc_set_idx = var->data.descriptor_set; - unsigned binding_idx = var->data.binding; - const struct lvp_descriptor_set_binding_layout *binding = - get_binding_layout(layout, desc_set_idx, binding_idx); - nir_ssa_def *index = NULL; - int value = 0; - for (unsigned s = 0; s < desc_set_idx; s++) { - if (!layout->vk.set_layouts[s]) - continue; - value += get_set_layout(layout, s)->stage[stage].image_count; - } - value += binding->stage[stage].image_index; + nir_ssa_def *resource = vulkan_resource_from_deref(b, deref, layout); + nir_rewrite_image_intrinsic(intrin, resource, true); +} - b->cursor = nir_before_instr(&intrin->instr); - if (deref->deref_type == nir_deref_type_array) { - assert(glsl_type_is_array(var->type)); - assert(value >= 0); - if (nir_src_is_const(deref->arr.index)) { - value += nir_src_as_uint(deref->arr.index); - BITSET_SET(b->shader->info.images_used, value); - index = nir_imm_int(b, value); - } else { - unsigned size = glsl_get_aoa_size(var->type); - BITSET_SET_RANGE(b->shader->info.images_used, - value, value + size - 1); - index = nir_iadd_imm(b, deref->arr.index.ssa, value); - } - } else { - BITSET_SET(b->shader->info.images_used, value); - index = nir_imm_int(b, value); - } +static bool +lower_load_ubo(nir_builder *b, nir_instr *instr, void *data_cb) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; - nir_rewrite_image_intrinsic(intrin, index, false); + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_ubo) + return false; + + nir_binding binding = nir_chase_binding(intrin->src[0]); + /* If binding.success=false, then this is a variable pointer, which we don't support with + * VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK. + */ + if (!binding.success) + return false; + + const struct lvp_descriptor_set_binding_layout *bind_layout = + get_binding_layout(data_cb, binding.desc_set, binding.binding); + if (bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) + return false; + + b->cursor = nir_before_instr(instr); + + nir_instr_rewrite_src(instr, &intrin->src[0], nir_src_for_ssa(nir_imm_int(b, binding.desc_set + 1))); + + nir_ssa_def *offset = nir_iadd_imm(b, intrin->src[1].ssa, bind_layout->uniform_block_offset); + nir_instr_rewrite_src(instr, &intrin->src[1], nir_src_for_ssa(offset)); + + return true; } static nir_ssa_def *lower_vri_instr(struct nir_builder *b, @@ -307,13 +195,11 @@ static nir_ssa_def *lower_vri_instr(struct nir_builder *b, return lower_vri_intrin_lvd(b, instr, data_cb); case nir_intrinsic_get_ssbo_size: { - /* The result of the load_vulkan_descriptor is a vec2(index, offset) - * but we only want the index in get_ssbo_size. - */ - b->cursor = nir_before_instr(&intrin->instr); - nir_ssa_def *index = nir_ssa_for_src(b, intrin->src[0], 1); + /* Ignore the offset component. */ + b->cursor = nir_before_instr(instr); + nir_ssa_def *resource = nir_ssa_for_src(b, intrin->src[0], 2); nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], - nir_src_for_ssa(index)); + nir_src_for_ssa(resource)); return NULL; } case nir_intrinsic_image_deref_sparse_load: @@ -323,6 +209,7 @@ static nir_ssa_def *lower_vri_instr(struct nir_builder *b, case nir_intrinsic_image_deref_atomic_swap: case nir_intrinsic_image_deref_size: case nir_intrinsic_image_deref_samples: + b->cursor = nir_before_instr(instr); lower_image_intrinsic(b, intrin, data_cb); return NULL; @@ -330,8 +217,12 @@ static nir_ssa_def *lower_vri_instr(struct nir_builder *b, return NULL; } } - if (instr->type == nir_instr_type_tex) + + if (instr->type == nir_instr_type_tex) { + b->cursor = nir_before_instr(instr); lower_vri_instr_tex(b, nir_instr_as_tex(instr), data_cb); + } + return NULL; } @@ -339,46 +230,6 @@ void lvp_lower_pipeline_layout(const struct lvp_device *device, struct lvp_pipeline_layout *layout, nir_shader *shader) { - nir_shader_lower_instructions(shader, lower_uniform_block_access, lower_block_instr, layout); + nir_shader_instructions_pass(shader, lower_load_ubo, nir_metadata_block_index | nir_metadata_dominance, layout); nir_shader_lower_instructions(shader, lower_vulkan_resource_index, lower_vri_instr, layout); - nir_foreach_variable_with_modes(var, shader, nir_var_uniform | - nir_var_image) { - const struct glsl_type *type = var->type; - enum glsl_base_type base_type = - glsl_get_base_type(glsl_without_array(type)); - unsigned desc_set_idx = var->data.descriptor_set; - unsigned binding_idx = var->data.binding; - const struct lvp_descriptor_set_binding_layout *binding = - get_binding_layout(layout, desc_set_idx, binding_idx); - int value = 0; - var->data.descriptor_set = 0; - if (base_type == GLSL_TYPE_SAMPLER || base_type == GLSL_TYPE_TEXTURE) { - if (binding->type == VK_DESCRIPTOR_TYPE_SAMPLER) { - for (unsigned s = 0; s < desc_set_idx; s++) { - if (!layout->vk.set_layouts[s]) - continue; - value += get_set_layout(layout, s)->stage[shader->info.stage].sampler_count; - } - value += binding->stage[shader->info.stage].sampler_index; - } else { - for (unsigned s = 0; s < desc_set_idx; s++) { - if (!layout->vk.set_layouts[s]) - continue; - value += get_set_layout(layout, s)->stage[shader->info.stage].sampler_view_count; - } - value += binding->stage[shader->info.stage].sampler_view_index; - } - var->data.binding = value; - } - if (base_type == GLSL_TYPE_IMAGE) { - var->data.descriptor_set = 0; - for (unsigned s = 0; s < desc_set_idx; s++) { - if (!layout->vk.set_layouts[s]) - continue; - value += get_set_layout(layout, s)->stage[shader->info.stage].image_count; - } - value += binding->stage[shader->info.stage].image_index; - var->data.binding = value; - } - } } diff --git a/src/gallium/frontends/lavapipe/lvp_pipeline.c b/src/gallium/frontends/lavapipe/lvp_pipeline.c index b93afcf338e..e2378be1826 100644 --- a/src/gallium/frontends/lavapipe/lvp_pipeline.c +++ b/src/gallium/frontends/lavapipe/lvp_pipeline.c @@ -123,102 +123,6 @@ shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align) *align = comp_size; } -static void -set_image_access(struct lvp_shader *shader, struct lvp_pipeline_layout *layout, nir_shader *nir, - nir_intrinsic_instr *instr, - bool reads, bool writes) -{ - nir_variable *var = nir_intrinsic_get_var(instr, 0); - /* calculate the variable's offset in the layout */ - uint64_t value = 0; - const struct lvp_descriptor_set_binding_layout *binding = - get_binding_layout(layout, var->data.descriptor_set, var->data.binding); - for (unsigned s = 0; s < var->data.descriptor_set; s++) { - if (layout->vk.set_layouts[s]) - value += get_set_layout(layout, s)->stage[nir->info.stage].image_count; - } - value += binding->stage[nir->info.stage].image_index; - const unsigned size = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1; - uint64_t mask = BITFIELD64_MASK(MAX2(size, 1)) << value; - - if (reads) - shader->access.images_read |= mask; - if (writes) - shader->access.images_written |= mask; -} - -static void -set_buffer_access(struct lvp_shader *shader, struct lvp_pipeline_layout *layout, nir_shader *nir, - nir_intrinsic_instr *instr) -{ - nir_variable *var = nir_intrinsic_get_var(instr, 0); - if (!var) { - nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); - if (deref->modes != nir_var_mem_ssbo) - return; - nir_binding b = nir_chase_binding(instr->src[0]); - var = nir_get_binding_variable(nir, b); - if (!var) - return; - } - if (var->data.mode != nir_var_mem_ssbo) - return; - /* calculate the variable's offset in the layout */ - uint64_t value = 0; - const struct lvp_descriptor_set_binding_layout *binding = - get_binding_layout(layout, var->data.descriptor_set, var->data.binding); - for (unsigned s = 0; s < var->data.descriptor_set; s++) { - if (layout->vk.set_layouts[s]) - value += get_set_layout(layout, s)->stage[nir->info.stage].shader_buffer_count; - } - value += binding->stage[nir->info.stage].shader_buffer_index; - /* Structs have been lowered already, so get_aoa_size is sufficient. */ - const unsigned size = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1; - uint64_t mask = BITFIELD64_MASK(MAX2(size, 1)) << value; - shader->access.buffers_written |= mask; -} - -static void -scan_intrinsic(struct lvp_shader *shader, struct lvp_pipeline_layout *layout, nir_shader *nir, nir_intrinsic_instr *instr) -{ - switch (instr->intrinsic) { - case nir_intrinsic_image_deref_sparse_load: - case nir_intrinsic_image_deref_load: - case nir_intrinsic_image_deref_size: - case nir_intrinsic_image_deref_samples: - set_image_access(shader, layout, nir, instr, true, false); - break; - case nir_intrinsic_image_deref_store: - set_image_access(shader, layout, nir, instr, false, true); - break; - case nir_intrinsic_image_deref_atomic: - case nir_intrinsic_image_deref_atomic_swap: - set_image_access(shader, layout, nir, instr, true, true); - break; - case nir_intrinsic_deref_atomic: - case nir_intrinsic_deref_atomic_swap: - case nir_intrinsic_store_deref: - set_buffer_access(shader, layout, nir, instr); - break; - default: break; - } -} - -static void -scan_pipeline_info(struct lvp_shader *shader, struct lvp_pipeline_layout *layout, nir_shader *nir) -{ - nir_foreach_function(function, nir) { - if (function->impl) - nir_foreach_block(block, function->impl) { - nir_foreach_instr(instr, block) { - if (instr->type == nir_instr_type_intrinsic) - scan_intrinsic(shader, layout, nir, nir_instr_as_intrinsic(instr)); - } - } - } - -} - static bool remove_scoped_barriers_impl(nir_builder *b, nir_instr *instr, void *data) { @@ -419,8 +323,8 @@ compile_spirv(struct lvp_device *pdevice, const VkPipelineShaderStageCreateInfo .demote_to_helper_invocation = true, .mesh_shading = true, }, - .ubo_addr_format = nir_address_format_32bit_index_offset, - .ssbo_addr_format = nir_address_format_32bit_index_offset, + .ubo_addr_format = nir_address_format_vec2_index_32bit_offset, + .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset, .phys_ssbo_addr_format = nir_address_format_64bit_global, .push_const_addr_format = nir_address_format_logical, .shared_addr_format = nir_address_format_32bit_offset, @@ -472,13 +376,9 @@ lvp_shader_lower(struct lvp_device *pdevice, nir_shader *nir, struct lvp_shader NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_image, NULL); - scan_pipeline_info(shader, layout, nir); - optimize(nir); nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); - lvp_lower_pipeline_layout(pdevice, layout, nir); - NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true); NIR_PASS_V(nir, nir_split_var_copies); NIR_PASS_V(nir, nir_lower_global_vars_to_local); @@ -488,12 +388,14 @@ lvp_shader_lower(struct lvp_device *pdevice, nir_shader *nir, struct lvp_shader NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo | nir_var_mem_ssbo, - nir_address_format_32bit_index_offset); + nir_address_format_vec2_index_32bit_offset); NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global, nir_address_format_64bit_global); + lvp_lower_pipeline_layout(pdevice, layout, nir); + if (nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_TASK || nir->info.stage == MESA_SHADER_MESH) { diff --git a/src/gallium/frontends/lavapipe/lvp_private.h b/src/gallium/frontends/lavapipe/lvp_private.h index 2f29c0c01d5..e0fa2838599 100644 --- a/src/gallium/frontends/lavapipe/lvp_private.h +++ b/src/gallium/frontends/lavapipe/lvp_private.h @@ -73,6 +73,7 @@ typedef uint32_t xcb_window_t; #include "vk_queue.h" #include "vk_sync.h" #include "vk_sync_timeline.h" +#include "lp_jit.h" #include "wsi_common.h" @@ -198,6 +199,9 @@ struct lvp_device { struct pipe_resource *zero_buffer; /* for zeroed bda */ bool poison_mem; bool print_cmds; + + struct lp_texture_handle *null_texture_handle; + struct lp_texture_handle *null_image_handle; }; void lvp_device_get_cache_uuid(void *uuid); @@ -262,33 +266,32 @@ struct lvp_image_view { struct pipe_surface *surface; /* have we created a pipe surface for this? */ struct lvp_image_view *multisample; //VK_EXT_multisampled_render_to_single_sampled + + struct lp_texture_handle *texture_handle; + struct lp_texture_handle *image_handle; }; struct lvp_sampler { struct vk_object_base base; - struct pipe_sampler_state state; + union lp_descriptor desc; + + struct lp_texture_handle *texture_handle; }; struct lvp_descriptor_set_binding_layout { - uint16_t descriptor_index; + uint32_t descriptor_index; /* Number of array elements in this binding */ VkDescriptorType type; - uint16_t array_size; + uint32_t array_size; bool valid; - int16_t dynamic_index; - struct { - int16_t const_buffer_index; - int16_t shader_buffer_index; - int16_t sampler_index; - int16_t sampler_view_index; - int16_t image_index; - int16_t uniform_block_index; - int16_t uniform_block_offset; - } stage[LVP_SHADER_STAGES]; + uint32_t dynamic_index; + + uint32_t uniform_block_offset; + uint32_t uniform_block_size; /* Immutable samplers (or NULL if no immutable samplers) */ - struct pipe_sampler_state **immutable_samplers; + union lp_descriptor **immutable_samplers; }; struct lvp_descriptor_set_layout { @@ -299,27 +302,16 @@ struct lvp_descriptor_set_layout { uint32_t immutable_sampler_count; /* Number of bindings in this descriptor set */ - uint16_t binding_count; + uint32_t binding_count; /* Total size of the descriptor set with room for all array entries */ - uint16_t size; + uint32_t size; /* Shader stages affected by this descriptor set */ - uint16_t shader_stages; - - struct { - uint16_t const_buffer_count; - uint16_t shader_buffer_count; - uint16_t sampler_count; - uint16_t sampler_view_count; - uint16_t image_count; - uint16_t uniform_block_count; - uint16_t uniform_block_size; - uint16_t uniform_block_sizes[MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS]; //zero-indexed - } stage[LVP_SHADER_STAGES]; + uint32_t shader_stages; /* Number of dynamic offsets used by this descriptor set */ - uint16_t dynamic_offset_count; + uint32_t dynamic_offset_count; /* Bindings in this descriptor set */ struct lvp_descriptor_set_binding_layout binding[0]; @@ -331,28 +323,15 @@ vk_to_lvp_descriptor_set_layout(const struct vk_descriptor_set_layout *layout) return container_of(layout, const struct lvp_descriptor_set_layout, vk); } -union lvp_descriptor_info { - struct { - struct pipe_sampler_state *sampler; - struct pipe_sampler_view *sampler_view; - }; - struct pipe_image_view image_view; - struct pipe_shader_buffer ssbo; - struct pipe_constant_buffer ubo; - uint8_t *uniform; -}; - -struct lvp_descriptor { - VkDescriptorType type; - - union lvp_descriptor_info info; -}; - struct lvp_descriptor_set { struct vk_object_base base; struct lvp_descriptor_set_layout *layout; struct list_head link; - struct lvp_descriptor descriptors[0]; + + /* Buffer holding the descriptors. */ + struct pipe_memory_allocation *pmem; + struct pipe_resource *bo; + void *map; }; struct lvp_descriptor_pool { @@ -374,6 +353,8 @@ struct lvp_descriptor_update_template { VkDescriptorUpdateTemplateEntry entry[0]; }; +uint32_t lvp_descriptor_update_template_entry_size(VkDescriptorType type); + static inline void lvp_descriptor_template_templ_ref(struct lvp_descriptor_update_template *templ) { @@ -404,16 +385,16 @@ void lvp_descriptor_set_destroy(struct lvp_device *device, struct lvp_descriptor_set *set); +void +lvp_descriptor_set_update_with_template(VkDevice _device, VkDescriptorSet descriptorSet, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, + const void *pData, bool push); + struct lvp_pipeline_layout { struct vk_pipeline_layout vk; uint32_t push_constant_size; VkShaderStageFlags push_constant_stages; - struct { - uint16_t uniform_block_size; - uint16_t uniform_block_count; - uint16_t uniform_block_sizes[MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS * MAX_SETS]; - } stage[LVP_SHADER_STAGES]; }; @@ -422,12 +403,6 @@ lvp_pipeline_layout_create(struct lvp_device *device, const VkPipelineLayoutCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator); -struct lvp_access_info { - uint64_t images_read; - uint64_t images_written; - uint64_t buffers_written; -}; - struct lvp_pipeline_nir { int ref_cnt; nir_shader *nir; @@ -458,7 +433,6 @@ struct lvp_inline_variant { struct lvp_shader { struct vk_object_base base; struct lvp_pipeline_layout *layout; - struct lvp_access_info access; struct lvp_pipeline_nir *pipeline_nir; struct lvp_pipeline_nir *tess_ccw; void *shader_cso; @@ -528,6 +502,9 @@ struct lvp_buffer_view { struct lvp_buffer *buffer; uint32_t offset; uint64_t range; + + struct lp_texture_handle *texture_handle; + struct lp_texture_handle *image_handle; }; struct lvp_query_pool { @@ -617,22 +594,6 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_sampler, base, VkSampler, VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_indirect_command_layout, base, VkIndirectCommandsLayoutNV, VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV) -struct lvp_write_descriptor { - uint32_t dst_binding; - uint32_t dst_array_element; - uint32_t descriptor_count; - VkDescriptorType descriptor_type; -}; - -struct lvp_cmd_push_descriptor_set { - VkPipelineBindPoint bind_point; - struct lvp_pipeline_layout *layout; - uint32_t set; - uint32_t descriptor_write_count; - struct lvp_write_descriptor *descriptors; - union lvp_descriptor_info *infos; -}; - void lvp_add_enqueue_cmd_entrypoints(struct vk_device_dispatch_table *disp); VkResult lvp_execute_cmds(struct lvp_device *device,