diff --git a/src/gallium/frontends/lavapipe/lvp_descriptor_set.c b/src/gallium/frontends/lavapipe/lvp_descriptor_set.c index 175a9262bf0..f8b4f86ec2c 100644 --- a/src/gallium/frontends/lavapipe/lvp_descriptor_set.c +++ b/src/gallium/frontends/lavapipe/lvp_descriptor_set.c @@ -98,7 +98,10 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateDescriptorSetLayout( set_layout->binding[b].descriptor_index = set_layout->size; set_layout->binding[b].type = binding->descriptorType; set_layout->binding[b].valid = true; - set_layout->size += binding->descriptorCount; + if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) + set_layout->size++; + else + set_layout->size += binding->descriptorCount; for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES; stage++) { set_layout->binding[b].stage[stage].const_buffer_index = -1; @@ -106,6 +109,7 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateDescriptorSetLayout( set_layout->binding[b].stage[stage].sampler_index = -1; set_layout->binding[b].stage[stage].sampler_view_index = -1; set_layout->binding[b].stage[stage].image_index = -1; + set_layout->binding[b].stage[stage].uniform_block_index = -1; } if (binding->descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || @@ -141,6 +145,14 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateDescriptorSetLayout( set_layout->stage[s].const_buffer_count += binding->descriptorCount; } break; + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: + lvp_foreach_stage(s, binding->stageFlags) { + set_layout->binding[b].stage[s].uniform_block_offset = set_layout->stage[s].uniform_block_size; + set_layout->binding[b].stage[s].uniform_block_index = set_layout->stage[s].uniform_block_count; + set_layout->stage[s].uniform_block_size += binding->descriptorCount; + set_layout->stage[s].uniform_block_sizes[set_layout->stage[s].uniform_block_count++] = binding->descriptorCount; + } + break; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: lvp_foreach_stage(s, binding->stageFlags) { @@ -260,6 +272,14 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreatePipelineLayout( LVP_FROM_HANDLE(lvp_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[set]); layout->set[set].layout = set_layout; + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + layout->stage[i].uniform_block_size += set_layout->stage[i].uniform_block_size; + for (unsigned j = 0; j < set_layout->stage[i].uniform_block_count; j++) { + assert(layout->stage[i].uniform_block_count + j < MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS * MAX_SETS); + layout->stage[i].uniform_block_sizes[layout->stage[i].uniform_block_count + j] = set_layout->stage[i].uniform_block_sizes[j]; + } + layout->stage[i].uniform_block_count += set_layout->stage[i].uniform_block_count; + } lvp_descriptor_set_layout_ref(set_layout); } @@ -341,8 +361,10 @@ lvp_descriptor_set_create(struct lvp_device *device, struct lvp_descriptor_set **out_set) { struct lvp_descriptor_set *set; - size_t size = sizeof(*set) + layout->size * sizeof(set->descriptors[0]); - + size_t base_size = sizeof(*set) + layout->size * sizeof(set->descriptors[0]); + size_t size = base_size; + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) + size += layout->stage[i].uniform_block_size; set = vk_alloc(&device->vk.alloc /* XXX: Use the pool */, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!set) @@ -360,12 +382,19 @@ lvp_descriptor_set_create(struct lvp_device *device, /* Go through and fill out immutable samplers if we have any */ struct lvp_descriptor *desc = set->descriptors; + uint8_t *uniform_mem = (uint8_t*)(set) + base_size; for (uint32_t b = 0; b < layout->binding_count; b++) { - if (layout->binding[b].immutable_samplers) { - for (uint32_t i = 0; i < layout->binding[b].array_size; i++) - desc[i].info.sampler = layout->binding[b].immutable_samplers[i]; + if (layout->binding[b].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { + desc->info.uniform = uniform_mem; + uniform_mem += layout->binding[b].array_size; + desc++; + } else { + if (layout->binding[b].immutable_samplers) { + for (uint32_t i = 0; i < layout->binding[b].array_size; i++) + desc[i].info.sampler = layout->binding[b].immutable_samplers[i]; + } + desc += layout->binding[b].array_size; } - desc += layout->binding[b].array_size; } *out_set = set; @@ -444,6 +473,14 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSets( &set->layout->binding[write->dstBinding]; struct lvp_descriptor *desc = &set->descriptors[bind_layout->descriptor_index]; + if (write->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { + const VkWriteDescriptorSetInlineUniformBlock *uniform_data = + vk_find_struct_const(write->pNext, WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK); + assert(uniform_data); + desc->type = VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK; + memcpy(desc->info.uniform + write->dstArrayElement, uniform_data->pData, uniform_data->dataSize); + continue; + } desc += write->dstArrayElement; switch (write->descriptorType) { @@ -540,16 +577,24 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSets( &src->layout->binding[copy->srcBinding]; struct lvp_descriptor *src_desc = &src->descriptors[src_layout->descriptor_index]; - src_desc += copy->srcArrayElement; const struct lvp_descriptor_set_binding_layout *dst_layout = &dst->layout->binding[copy->dstBinding]; struct lvp_descriptor *dst_desc = &dst->descriptors[dst_layout->descriptor_index]; - dst_desc += copy->dstArrayElement; - for (uint32_t j = 0; j < copy->descriptorCount; j++) - dst_desc[j] = src_desc[j]; + if (src_desc->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { + dst_desc->type = VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK; + memcpy(dst_desc->info.uniform + copy->dstArrayElement, + src_desc->info.uniform + copy->srcArrayElement, + copy->descriptorCount); + } else { + src_desc += copy->srcArrayElement; + dst_desc += copy->dstArrayElement; + + for (uint32_t j = 0; j < copy->descriptorCount; j++) + dst_desc[j] = src_desc[j]; + } } } @@ -689,6 +734,11 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSetWithTemplate(VkDevice _device, &set->layout->binding[entry->dstBinding]; struct lvp_descriptor *desc = &set->descriptors[bind_layout->descriptor_index]; + if (entry->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) { + desc->type = VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT; + memcpy(desc->info.uniform + entry->dstArrayElement, pSrc, entry->descriptorCount); + continue; + } for (j = 0; j < entry->descriptorCount; ++j) { unsigned idx = j + entry->dstArrayElement; switch (entry->descriptorType) { diff --git a/src/gallium/frontends/lavapipe/lvp_execute.c b/src/gallium/frontends/lavapipe/lvp_execute.c index d17e24a04e2..6d1aef7b296 100644 --- a/src/gallium/frontends/lavapipe/lvp_execute.c +++ b/src/gallium/frontends/lavapipe/lvp_execute.c @@ -146,6 +146,11 @@ struct rendering_state { uint8_t push_constants[128 * 4]; uint16_t push_size[2]; //gfx, compute + struct { + void *block[MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS * MAX_SETS]; + uint16_t size[MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS * MAX_SETS]; + uint16_t count; + } uniform_blocks[PIPE_SHADER_TYPES]; const struct lvp_render_pass *pass; struct lvp_subpass *subpass; @@ -208,6 +213,8 @@ static unsigned calc_ubo0_size(struct rendering_state *state, enum pipe_shader_type pstage) { unsigned size = get_pcbuf_size(state, pstage); + for (unsigned i = 0; i < state->uniform_blocks[pstage].count; i++) + size += state->uniform_blocks[pstage].size[i]; return size; } @@ -217,6 +224,13 @@ fill_ubo0(struct rendering_state *state, uint8_t *mem, enum pipe_shader_type pst unsigned push_size = get_pcbuf_size(state, pstage); if (push_size) memcpy(mem, state->push_constants, push_size); + + mem += push_size; + for (unsigned i = 0; i < state->uniform_blocks[pstage].count; i++) { + unsigned size = state->uniform_blocks[pstage].size[i]; + memcpy(mem, state->uniform_blocks[pstage].block[i], size); + mem += size; + } } static void @@ -418,7 +432,10 @@ static void handle_compute_pipeline(struct vk_cmd_queue_entry *cmd, if ((pipeline->layout->push_constant_stages & VK_SHADER_STAGE_COMPUTE_BIT) > 0) state->has_pcbuf[PIPE_SHADER_COMPUTE] = pipeline->layout->push_constant_size > 0; - if (!state->has_pcbuf[PIPE_SHADER_COMPUTE]) + state->uniform_blocks[PIPE_SHADER_COMPUTE].count = pipeline->layout->stage[MESA_SHADER_COMPUTE].uniform_block_count; + for (unsigned j = 0; j < pipeline->layout->stage[MESA_SHADER_COMPUTE].uniform_block_count; j++) + state->uniform_blocks[PIPE_SHADER_COMPUTE].size[j] = pipeline->layout->stage[MESA_SHADER_COMPUTE].uniform_block_sizes[j]; + if (!state->has_pcbuf[PIPE_SHADER_COMPUTE] && !pipeline->layout->stage[MESA_SHADER_COMPUTE].uniform_block_count) state->pcbuf_dirty[PIPE_SHADER_COMPUTE] = false; state->dispatch_info.block[0] = pipeline->pipeline_nir[MESA_SHADER_COMPUTE]->info.workgroup_size[0]; @@ -547,10 +564,16 @@ static void handle_graphics_pipeline(struct vk_cmd_queue_entry *cmd, for (enum pipe_shader_type sh = PIPE_SHADER_VERTEX; sh < PIPE_SHADER_COMPUTE; sh++) state->has_pcbuf[sh] = false; + for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { + enum pipe_shader_type sh = pipe_shader_type_from_mesa(i); + state->uniform_blocks[sh].count = pipeline->layout->stage[i].uniform_block_count; + for (unsigned j = 0; j < pipeline->layout->stage[i].uniform_block_count; j++) + state->uniform_blocks[sh].size[j] = pipeline->layout->stage[i].uniform_block_sizes[j]; + } u_foreach_bit(stage, pipeline->layout->push_constant_stages) { enum pipe_shader_type sh = pipe_shader_type_from_mesa(stage); state->has_pcbuf[sh] = pipeline->layout->push_constant_size > 0; - if (!state->has_pcbuf[sh]) + if (!state->has_pcbuf[sh] && !state->uniform_blocks[sh].count) state->pcbuf_dirty[sh] = false; } @@ -992,6 +1015,7 @@ struct dyn_info { uint16_t sampler_count; uint16_t sampler_view_count; uint16_t image_count; + uint16_t uniform_block_count; } stage[MESA_SHADER_STAGES]; uint32_t dyn_index; @@ -1230,6 +1254,16 @@ static void handle_descriptor(struct rendering_state *state, type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC; switch (type) { + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: { + int idx = binding->stage[stage].uniform_block_index; + if (idx == -1) + return; + idx += dyn_info->stage[stage].uniform_block_count; + assert(descriptor->uniform); + state->uniform_blocks[p_stage].block[idx] = descriptor->uniform; + state->pcbuf_dirty[p_stage] = true; + break; + } case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { fill_image_view_stage(state, dyn_info, stage, p_stage, array_idx, descriptor, binding); @@ -1299,6 +1333,7 @@ static void handle_descriptor(struct rendering_state *state, break; default: fprintf(stderr, "Unhandled descriptor set %d\n", type); + unreachable("oops"); break; } } @@ -1316,7 +1351,8 @@ static void handle_set_stage(struct rendering_state *state, binding = &set->layout->binding[j]; if (binding->valid) { - for (int i = 0; i < binding->array_size; i++) { + unsigned array_size = binding->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ? 1 : binding->array_size; + for (int i = 0; i < array_size; i++) { descriptor = &set->descriptors[binding->descriptor_index + i]; handle_descriptor(state, dyn_info, binding, stage, p_stage, i, descriptor->type, &descriptor->info); } @@ -1333,6 +1369,7 @@ static void increment_dyn_info(struct dyn_info *dyn_info, dyn_info->stage[stage].sampler_count += layout->stage[stage].sampler_count; dyn_info->stage[stage].sampler_view_count += layout->stage[stage].sampler_view_count; dyn_info->stage[stage].image_count += layout->stage[stage].image_count; + dyn_info->stage[stage].uniform_block_count += layout->stage[stage].uniform_block_count; } if (inc_dyn) dyn_info->dyn_index += layout->dynamic_offset_count; diff --git a/src/gallium/frontends/lavapipe/lvp_lower_vulkan_resource.c b/src/gallium/frontends/lavapipe/lvp_lower_vulkan_resource.c index 42938693ca4..8edf36293aa 100644 --- a/src/gallium/frontends/lavapipe/lvp_lower_vulkan_resource.c +++ b/src/gallium/frontends/lavapipe/lvp_lower_vulkan_resource.c @@ -47,6 +47,48 @@ lower_vulkan_resource_index(const nir_instr *instr, const void *data_cb) return false; } +static bool +lower_uniform_block_access(const nir_instr *instr, const void *data_cb) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_deref) + return false; + nir_deref_instr *deref = nir_instr_as_deref(intrin->src[0].ssa->parent_instr); + return deref->modes == nir_var_mem_ubo; +} + +static nir_ssa_def * +lower_block_instr(nir_builder *b, nir_instr *instr, void *data_cb) +{ + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + nir_binding nb = nir_chase_binding(intrin->src[0]); + struct lvp_pipeline_layout *layout = data_cb; + struct lvp_descriptor_set_binding_layout *binding = &layout->set[nb.desc_set].layout->binding[nb.binding]; + if (binding->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) + return NULL; + if (!binding->array_size) + return NIR_LOWER_INSTR_PROGRESS_REPLACE; + + assert(intrin->src[0].ssa->num_components == 2); + unsigned value = 0; + for (unsigned s = 0; s < nb.desc_set; s++) + value += layout->set[s].layout->stage[b->shader->info.stage].uniform_block_size; + if (layout->push_constant_stages & BITFIELD_BIT(b->shader->info.stage)) + value += layout->push_constant_size; + value += binding->stage[b->shader->info.stage].uniform_block_offset; + + b->cursor = nir_before_instr(instr); + nir_ssa_def *offset = nir_imm_ivec2(b, 0, value); + nir_ssa_def *added = nir_iadd(b, intrin->src[0].ssa, offset); + nir_deref_instr *deref = nir_instr_as_deref(intrin->src[0].ssa->parent_instr); + nir_deref_instr *cast = nir_build_deref_cast(b, added, deref->modes, deref->type, 0); + nir_instr_rewrite_src_ssa(instr, &intrin->src[0], &cast->dest.ssa); + return NIR_LOWER_INSTR_PROGRESS; +} + static nir_ssa_def *lower_vri_intrin_vri(struct nir_builder *b, nir_instr *instr, void *data_cb) { @@ -59,6 +101,10 @@ static nir_ssa_def *lower_vri_intrin_vri(struct nir_builder *b, bool is_ubo = (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC); + /* always load inline uniform blocks from ubo0 */ + if (binding->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) + return nir_imm_ivec2(b, 0, 0); + for (unsigned s = 0; s < desc_set_idx; s++) { if (is_ubo) value += layout->set[s].layout->stage[b->shader->info.stage].const_buffer_count; @@ -209,6 +255,7 @@ void lvp_lower_pipeline_layout(const struct lvp_device *device, struct lvp_pipeline_layout *layout, nir_shader *shader) { + nir_shader_lower_instructions(shader, lower_uniform_block_access, lower_block_instr, layout); nir_shader_lower_instructions(shader, lower_vulkan_resource_index, lower_vri_instr, layout); nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) { diff --git a/src/gallium/frontends/lavapipe/lvp_private.h b/src/gallium/frontends/lavapipe/lvp_private.h index 937b054c175..d36fdde98b7 100644 --- a/src/gallium/frontends/lavapipe/lvp_private.h +++ b/src/gallium/frontends/lavapipe/lvp_private.h @@ -77,6 +77,8 @@ extern "C" { #define MAX_SETS 8 #define MAX_PUSH_CONSTANTS_SIZE 128 #define MAX_PUSH_DESCRIPTORS 32 +#define MAX_DESCRIPTOR_UNIFORM_BLOCK_SIZE 4096 +#define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS 8 #ifdef _WIN32 #define lvp_printflike(a, b) @@ -336,6 +338,8 @@ struct lvp_descriptor_set_binding_layout { int16_t sampler_index; int16_t sampler_view_index; int16_t image_index; + int16_t uniform_block_index; + int16_t uniform_block_offset; } stage[MESA_SHADER_STAGES]; /* Immutable samplers (or NULL if no immutable samplers) */ @@ -365,6 +369,9 @@ struct lvp_descriptor_set_layout { uint16_t sampler_count; uint16_t sampler_view_count; uint16_t image_count; + uint16_t uniform_block_count; + uint16_t uniform_block_size; + uint16_t uniform_block_sizes[MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS]; //zero-indexed } stage[MESA_SHADER_STAGES]; /* Number of dynamic offsets used by this descriptor set */ @@ -405,6 +412,7 @@ union lvp_descriptor_info { VkDeviceSize range; }; struct lvp_buffer_view *buffer_view; + uint8_t *uniform; }; struct lvp_descriptor { @@ -461,6 +469,9 @@ struct lvp_pipeline_layout { uint32_t push_constant_size; VkShaderStageFlags push_constant_stages; struct { + uint16_t uniform_block_size; + uint16_t uniform_block_count; + uint16_t uniform_block_sizes[MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS * MAX_SETS]; } stage[MESA_SHADER_STAGES]; };