diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index 8876112ec88..c12916e10cd 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -785,6 +785,12 @@ VkResult anv_CreateDescriptorSetLayout( set_layout->descriptor_buffer_surface_size = descriptor_buffer_surface_size; set_layout->descriptor_buffer_sampler_size = descriptor_buffer_sampler_size; + if (pCreateInfo->flags & + VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT) { + assert(set_layout->descriptor_buffer_surface_size == 0); + assert(set_layout->descriptor_buffer_sampler_size == 0); + } + *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout); return VK_SUCCESS; diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index a601d9969e0..4a2c65d73e2 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -38,6 +38,8 @@ struct intel_sample_positions; struct intel_urb_config; +struct anv_embedded_sampler; +struct anv_pipeline_embedded_sampler_binding; typedef struct nir_builder nir_builder; typedef struct nir_shader nir_shader; @@ -374,3 +376,7 @@ genX(cmd_buffer_flush_push_descriptors)(struct anv_cmd_buffer *cmd_buffer, /* Return the binding table stages that need to be updated */ return push_buffer_dirty | push_descriptor_dirty; } + +void genX(emit_embedded_sampler)(struct anv_device *device, + struct anv_embedded_sampler *sampler, + struct anv_pipeline_embedded_sampler_binding *binding); diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 9e68be277b3..ef907b8f650 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -40,8 +40,9 @@ #define sizeof_field(type, field) sizeof(((type *)0)->field) enum binding_property { - BINDING_PROPERTY_NORMAL = BITFIELD_BIT(0), - BINDING_PROPERTY_PUSHABLE = BITFIELD_BIT(1), + BINDING_PROPERTY_NORMAL = BITFIELD_BIT(0), + BINDING_PROPERTY_PUSHABLE = BITFIELD_BIT(1), + BINDING_PROPERTY_EMBEDDED_SAMPLER = BITFIELD_BIT(2), }; struct apply_pipeline_layout_state { @@ -72,6 +73,9 @@ struct apply_pipeline_layout_state { /* Sampler table offset */ uint8_t sampler_offset; + /* Embedded sampler index */ + uint16_t embedded_sampler_index; + /* Properties of the binding */ enum binding_property properties; @@ -123,8 +127,10 @@ static void add_binding(struct apply_pipeline_layout_state *state, uint32_t set, uint32_t binding) { + const struct anv_descriptor_set_layout *set_layout = + state->layout->set[set].layout; const struct anv_descriptor_set_binding_layout *bind_layout = - &state->layout->set[set].layout->binding[binding]; + &set_layout->binding[binding]; assert(set < state->layout->num_sets); assert(binding < state->layout->set[set].layout->binding_count); @@ -143,6 +149,9 @@ add_binding(struct apply_pipeline_layout_state *state, state->has_dynamic_buffers = true; state->set[set].binding[binding].properties |= BINDING_PROPERTY_NORMAL; + + if (set_layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT) + state->set[set].binding[binding].properties |= BINDING_PROPERTY_EMBEDDED_SAMPLER; } const VkDescriptorBindingFlags non_pushable_binding_flags = @@ -263,6 +272,9 @@ descriptor_has_bti(nir_intrinsic_instr *intrin, const struct anv_descriptor_set_binding_layout *bind_layout = &state->layout->set[set].layout->binding[binding]; + if (state->set[set].binding[binding].properties & BINDING_PROPERTY_EMBEDDED_SAMPLER) + return false; + uint32_t surface_index; if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM) surface_index = state->set[set].desc_offset; @@ -867,7 +879,7 @@ build_surface_index_for_binding(nir_builder *b, set_offset, surface_index, array_index, - nir_undef(b, 1, 32) /* bindless_base_offset */, + nir_imm_int(b, 0) /* bindless_base_offset */, .desc_set = set, .binding = binding, .resource_block_intel = state->set[set].binding[binding].push_block, @@ -892,11 +904,18 @@ build_sampler_handle_for_binding(nir_builder *b, binding_descriptor_offset(state, bind_layout, true /* sampler */); const unsigned descriptor_stride = binding_descriptor_stride(state, bind_layout, true /* sampler */); + const bool is_embedded = + state->set[set].binding[binding].properties & BINDING_PROPERTY_EMBEDDED_SAMPLER; const bool is_bindless = is_binding_bindless(set, binding, true /* sampler */, state); - nir_def *set_offset, *sampler_index; + nir_def *set_offset, *sampler_index, *sampler_base_offset = nir_imm_int(b, 0); - if (is_bindless) { + if (is_embedded) { + set_offset = nir_imm_int(b, 0xdeaddead); + sampler_index = nir_load_reloc_const_intel( + b, BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE + + state->set[set].binding[binding].embedded_sampler_index); + } else if (is_bindless) { if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) { set_offset = nir_imm_int(b, 0xdeaddead); @@ -947,17 +966,22 @@ build_sampler_handle_for_binding(nir_builder *b, state->set[set].binding[binding].sampler_offset + plane); } + nir_resource_data_intel sampler_resource = nir_resource_intel_sampler; + if (is_bindless) + sampler_resource |= nir_resource_intel_bindless; + if (is_embedded) + sampler_resource |= nir_resource_intel_sampler_embedded; + if (non_uniform) + sampler_resource |= nir_resource_intel_non_uniform; + return nir_resource_intel(b, set_offset, sampler_index, array_index, - nir_undef(b, 1, 32) /* bindless_base_offset */, + sampler_base_offset, .desc_set = set, .binding = binding, - .resource_access_intel = - (is_bindless ? nir_resource_intel_bindless : 0) | - (non_uniform ? nir_resource_intel_non_uniform : 0) | - nir_resource_intel_sampler); + .resource_access_intel = sampler_resource); } static nir_def * @@ -1980,6 +2004,38 @@ add_push_entry(struct anv_pipeline_push_map *push_map, }; } +static void +add_embedded_sampler_entry(struct apply_pipeline_layout_state *state, + struct anv_pipeline_bind_map *map, + uint32_t set, uint32_t binding) +{ + state->set[set].binding[binding].embedded_sampler_index = + map->embedded_sampler_count; + struct anv_pipeline_embedded_sampler_binding *sampler = + &map->embedded_sampler_to_binding[map->embedded_sampler_count++]; + const struct anv_descriptor_set_layout *set_layout = + state->layout->set[set].layout; + const struct anv_descriptor_set_binding_layout *bind_layout = + &set_layout->binding[binding]; + + *sampler = (struct anv_pipeline_embedded_sampler_binding) { + .set = set, + .binding = binding, + }; + + assert(sizeof(sampler->sampler_state) == + sizeof(bind_layout->immutable_samplers[0]->state_no_bc[0])); + memcpy(sampler->sampler_state, + bind_layout->immutable_samplers[0]->state_no_bc[0], + sizeof(sampler->sampler_state)); + + assert(sizeof(sampler->border_color) == + sizeof(bind_layout->immutable_samplers[0]->vk.border_color_value.uint32)); + memcpy(sampler->border_color, + bind_layout->immutable_samplers[0]->vk.border_color_value.uint32, + sizeof(sampler->border_color)); +} + static bool binding_should_use_surface_binding_table(const struct apply_pipeline_layout_state *state, const struct anv_descriptor_set_binding_layout *binding) @@ -2086,8 +2142,8 @@ anv_nir_apply_pipeline_layout(nir_shader *shader, sizeof(push_map->block_to_descriptor[0]) * map->surface_count); push_map->block_count = map->surface_count; - /* Count used bindings and add push blocks for promotion to push - * constants + /* Count used bindings, assign embedded sampler indices & add push blocks + * for promotion to push constants */ unsigned used_binding_count = 0; for (uint32_t set = 0; set < layout->num_sets; set++) { @@ -2103,15 +2159,18 @@ anv_nir_apply_pipeline_layout(nir_shader *shader, const struct anv_descriptor_set_binding_layout *bind_layout = &set_layout->binding[b]; - if (!binding_is_promotable_to_push(bind_layout)) - continue; - if (bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { - state.set[set].binding[b].push_block = push_map->block_count; - for (unsigned i = 0; i < bind_layout->array_size; i++) - add_push_entry(push_map, set, b, i, layout, bind_layout); - } else { - state.set[set].binding[b].push_block = state.set[set].desc_offset; + if (state.set[set].binding[b].properties & BINDING_PROPERTY_EMBEDDED_SAMPLER) + add_embedded_sampler_entry(&state, map, set, b); + + if (binding_is_promotable_to_push(bind_layout)) { + if (bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { + state.set[set].binding[b].push_block = push_map->block_count; + for (unsigned i = 0; i < bind_layout->array_size; i++) + add_push_entry(push_map, set, b, i, layout, bind_layout); + } else { + state.set[set].binding[b].push_block = state.set[set].desc_offset; + } } } } diff --git a/src/intel/vulkan/anv_nir_lower_resource_intel.c b/src/intel/vulkan/anv_nir_lower_resource_intel.c index 603831b8b11..d0bca9862d9 100644 --- a/src/intel/vulkan/anv_nir_lower_resource_intel.c +++ b/src/intel/vulkan/anv_nir_lower_resource_intel.c @@ -95,8 +95,17 @@ lower_resource_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data) const bool is_sampler = (nir_intrinsic_resource_access_intel(intrin) & nir_resource_intel_sampler) != 0; + const bool is_embedded_sampler = + (nir_intrinsic_resource_access_intel(intrin) & + nir_resource_intel_sampler_embedded) != 0; const struct lower_resource_state *state = data; + /* Ignore binding table accesses & embedded samplers */ + if (is_embedded_sampler) { + assert(state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER); + return false; + } + if (!is_bindless) return true; @@ -104,6 +113,7 @@ lower_resource_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data) nir_def *set_offset = intrin->src[0].ssa; nir_def *binding_offset = intrin->src[1].ssa; + nir_def *sampler_base_offset = intrin->src[3].ssa; /* When using indirect descriptor, the surface handles are loaded from the * descriptor buffer and do not need any offset. @@ -135,8 +145,12 @@ lower_resource_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data) * set, resource_intel::src[0] has to be shifted right by 6 (bringing * it back in bytes). */ - if (!is_sampler) + if (is_sampler) { + set_offset = nir_ushr_imm(b, set_offset, 6); + set_offset = nir_iadd(b, set_offset, sampler_base_offset); + } else { binding_offset = nir_ishl_imm(b, binding_offset, 6); + } } nir_src_rewrite(&intrin->src[1], diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 712ee0e7ca9..291269a4bad 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -420,6 +420,7 @@ struct anv_pipeline_stage { struct anv_pipeline_binding surface_to_descriptor[256]; struct anv_pipeline_binding sampler_to_descriptor[256]; + struct anv_pipeline_embedded_sampler_binding embedded_sampler_to_binding[2048]; struct anv_pipeline_bind_map bind_map; bool uses_bt_for_push_descs; @@ -2005,7 +2006,8 @@ anv_graphics_pipeline_load_nir(struct anv_graphics_base_pipeline *pipeline, stages[s].bind_map = (struct anv_pipeline_bind_map) { .surface_to_descriptor = stages[s].surface_to_descriptor, - .sampler_to_descriptor = stages[s].sampler_to_descriptor + .sampler_to_descriptor = stages[s].sampler_to_descriptor, + .embedded_sampler_to_binding = stages[s].embedded_sampler_to_binding, }; /* Only use the create NIR from the pStages[] element if we don't have @@ -2614,7 +2616,8 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, stage.bind_map = (struct anv_pipeline_bind_map) { .surface_to_descriptor = stage.surface_to_descriptor, - .sampler_to_descriptor = stage.sampler_to_descriptor + .sampler_to_descriptor = stage.sampler_to_descriptor, + .embedded_sampler_to_binding = stage.embedded_sampler_to_binding, }; /* Set up a binding for the gl_NumWorkGroups */ @@ -3018,7 +3021,8 @@ anv_graphics_pipeline_import_lib(struct anv_graphics_base_pipeline *pipeline, stages[s].bind_map = (struct anv_pipeline_bind_map) { .surface_to_descriptor = stages[s].surface_to_descriptor, - .sampler_to_descriptor = stages[s].sampler_to_descriptor + .sampler_to_descriptor = stages[s].sampler_to_descriptor, + .embedded_sampler_to_binding = stages[s].embedded_sampler_to_binding, }; } @@ -3411,9 +3415,6 @@ compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline, if (stage->code == NULL) return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); - /* Ray-tracing shaders don't have a "real" bind map */ - struct anv_pipeline_bind_map empty_bind_map = {}; - struct anv_shader_upload_params upload_params = { .stage = stage->stage, .key_data = &stage->cache_key, @@ -3424,7 +3425,7 @@ compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline, .prog_data_size = brw_prog_data_size(stage->stage), .stats = stage->stats, .num_stats = 1, - .bind_map = &empty_bind_map, + .bind_map = &stage->bind_map, .push_desc_info = &stage->push_desc_info, .dynamic_push_values = stage->dynamic_push_values, }; @@ -3550,6 +3551,9 @@ anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline, }, }; + stages[i].bind_map.embedded_sampler_to_binding = + stages[i].embedded_sampler_to_binding; + pipeline->base.active_stages |= sinfo->stage; anv_stage_write_shader_hash(&stages[i], device); diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 2f8945a474d..73b9577050e 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -52,6 +52,13 @@ anv_shader_bin_destroy(struct vk_device *_device, struct anv_shader_bin *shader = container_of(object, struct anv_shader_bin, base); + for (uint32_t i = 0; i < shader->bind_map.embedded_sampler_count; i++) { + anv_state_pool_free(&device->dynamic_state_db_pool, + shader->embedded_samplers[i].sampler_state); + anv_state_pool_free(&device->dynamic_state_db_pool, + shader->embedded_samplers[i].border_color_state); + } + anv_state_pool_free(&device->instruction_state_pool, shader->kernel); vk_pipeline_cache_object_finish(&shader->base); vk_free(&device->vk.alloc, shader); @@ -68,7 +75,28 @@ const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2] = { NULL }; -struct anv_shader_bin * +static void +anv_shader_bin_rewrite_embedded_samplers(struct anv_device *device, + struct anv_shader_bin *shader, + const struct anv_pipeline_bind_map *bind_map, + const struct brw_stage_prog_data *prog_data_in) +{ + int rv_count = 0; + struct brw_shader_reloc_value reloc_values[BRW_MAX_EMBEDDED_SAMPLERS]; + + for (uint32_t i = 0; i < bind_map->embedded_sampler_count; i++) { + reloc_values[rv_count++] = (struct brw_shader_reloc_value) { + .id = BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE + i, + .value = shader->embedded_samplers[i].sampler_state.offset, + }; + } + + brw_write_shader_relocs(&device->physical->compiler->isa, + shader->kernel.map, prog_data_in, + reloc_values, rv_count); +} + +static struct anv_shader_bin * anv_shader_bin_create(struct anv_device *device, gl_shader_stage stage, const void *key_data, uint32_t key_size, @@ -98,8 +126,13 @@ anv_shader_bin_create(struct anv_device *device, bind_map->surface_count); VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, sampler_to_descriptor, bind_map->sampler_count); + VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_embedded_sampler_binding, + embedded_sampler_to_binding, + bind_map->embedded_sampler_count); VK_MULTIALLOC_DECL(&ma, struct brw_kernel_arg_desc, kernel_args, bind_map->kernel_arg_count); + VK_MULTIALLOC_DECL(&ma, struct anv_embedded_sampler, embedded_samplers, + bind_map->embedded_sampler_count); if (!vk_multialloc_alloc(&ma, &device->vk.alloc, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) @@ -116,6 +149,13 @@ anv_shader_bin_create(struct anv_device *device, memcpy(shader->kernel.map, kernel_data, kernel_size); shader->kernel_size = kernel_size; + for (uint32_t i = 0; i < bind_map->embedded_sampler_count; i++) { + anv_genX(device->info, emit_embedded_sampler)( + device, &embedded_samplers[i], + &bind_map->embedded_sampler_to_binding[i]); + } + shader->embedded_samplers = embedded_samplers; + uint64_t shader_data_addr = device->physical->va.instruction_state_pool.addr + shader->kernel.offset + @@ -136,6 +176,7 @@ anv_shader_bin_create(struct anv_device *device, .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW, .value = shader_data_addr, }; + assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0); assert(shader_data_addr >> 32 == device->physical->va.instruction_state_pool.addr >> 32); reloc_values[rv_count++] = (struct brw_shader_reloc_value) { .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH, @@ -166,6 +207,8 @@ anv_shader_bin_create(struct anv_device *device, shader->kernel.map, prog_data_in, reloc_values, rv_count); + anv_shader_bin_rewrite_embedded_samplers(device, shader, bind_map, prog_data_in); + memcpy(prog_data, prog_data_in, prog_data_size); typed_memcpy(prog_data_relocs, prog_data_in->relocs, prog_data_in->num_relocs); @@ -194,12 +237,19 @@ anv_shader_bin_create(struct anv_device *device, typed_memcpy(&shader->push_desc_info, push_desc_info, 1); shader->bind_map = *bind_map; + typed_memcpy(surface_to_descriptor, bind_map->surface_to_descriptor, bind_map->surface_count); shader->bind_map.surface_to_descriptor = surface_to_descriptor; + typed_memcpy(sampler_to_descriptor, bind_map->sampler_to_descriptor, bind_map->sampler_count); shader->bind_map.sampler_to_descriptor = sampler_to_descriptor; + + typed_memcpy(embedded_sampler_to_binding, bind_map->embedded_sampler_to_binding, + bind_map->embedded_sampler_count); + shader->bind_map.embedded_sampler_to_binding = embedded_sampler_to_binding; + typed_memcpy(kernel_args, bind_map->kernel_args, bind_map->kernel_arg_count); shader->bind_map.kernel_args = kernel_args; @@ -259,6 +309,7 @@ anv_shader_bin_serialize(struct vk_pipeline_cache_object *object, sizeof(shader->bind_map.push_sha1)); blob_write_uint32(blob, shader->bind_map.surface_count); blob_write_uint32(blob, shader->bind_map.sampler_count); + blob_write_uint32(blob, shader->bind_map.embedded_sampler_count); if (shader->stage == MESA_SHADER_KERNEL) { uint32_t packed = (uint32_t)shader->bind_map.kernel_args_size << 16 | (uint32_t)shader->bind_map.kernel_arg_count; @@ -270,6 +321,9 @@ anv_shader_bin_serialize(struct vk_pipeline_cache_object *object, blob_write_bytes(blob, shader->bind_map.sampler_to_descriptor, shader->bind_map.sampler_count * sizeof(*shader->bind_map.sampler_to_descriptor)); + blob_write_bytes(blob, shader->bind_map.embedded_sampler_to_binding, + shader->bind_map.embedded_sampler_count * + sizeof(*shader->bind_map.embedded_sampler_to_binding)); blob_write_bytes(blob, shader->bind_map.kernel_args, shader->bind_map.kernel_arg_count * sizeof(*shader->bind_map.kernel_args)); @@ -326,6 +380,7 @@ anv_shader_bin_deserialize(struct vk_pipeline_cache *cache, blob_copy_bytes(blob, bind_map.push_sha1, sizeof(bind_map.push_sha1)); bind_map.surface_count = blob_read_uint32(blob); bind_map.sampler_count = blob_read_uint32(blob); + bind_map.embedded_sampler_count = blob_read_uint32(blob); if (stage == MESA_SHADER_KERNEL) { uint32_t packed = blob_read_uint32(blob); bind_map.kernel_args_size = (uint16_t)(packed >> 16); @@ -337,6 +392,9 @@ anv_shader_bin_deserialize(struct vk_pipeline_cache *cache, bind_map.sampler_to_descriptor = (void *) blob_read_bytes(blob, bind_map.sampler_count * sizeof(*bind_map.sampler_to_descriptor)); + bind_map.embedded_sampler_to_binding = (void *) + blob_read_bytes(blob, bind_map.embedded_sampler_count * + sizeof(*bind_map.embedded_sampler_to_binding)); bind_map.kernel_args = (void *) blob_read_bytes(blob, bind_map.kernel_arg_count * sizeof(*bind_map.kernel_args)); @@ -377,6 +435,7 @@ anv_device_search_for_kernel(struct anv_device *device, *user_cache_hit = object != NULL && cache_hit && cache != device->default_pipeline_cache; } + if (object == NULL) return NULL; @@ -392,6 +451,8 @@ anv_device_upload_kernel(struct anv_device *device, if (cache == NULL) cache = device->default_pipeline_cache; + + struct anv_shader_bin *shader = anv_shader_bin_create(device, params->stage, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 3b6bfb3d8c9..2ed7a411aab 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -180,6 +180,7 @@ struct intel_perf_query_result; #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */ #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32 +#define MAX_EMBEDDED_SAMPLERS 2048 /* We need 16 for UBO block reads to work and 32 for push UBOs. However, we * use 64 here to avoid cache issues. This could most likely bring it back to * 32 if we had different virtual addresses for the different views on a given @@ -1300,6 +1301,7 @@ struct anv_queue { struct nir_xfb_info; struct anv_pipeline_bind_map; +struct anv_pipeline_sets_layout; struct anv_push_descriptor_info; enum anv_dynamic_push_bits; @@ -2885,6 +2887,27 @@ struct anv_pipeline_binding { }; }; +struct anv_pipeline_embedded_sampler_binding { + /** The descriptor set this sampler belongs to */ + uint8_t set; + + /** The binding in the set this sampler belongs to */ + uint32_t binding; + + /* No need to track binding elements for embedded samplers as : + * + * VUID-VkDescriptorSetLayoutBinding-flags-08006: + * + * "If VkDescriptorSetLayoutCreateInfo:flags contains + * VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT, + * descriptorCount must: less than or equal to 1" + */ + + uint32_t sampler_state[4]; + + uint32_t border_color[4]; +}; + struct anv_push_range { /** Index in the descriptor set */ uint32_t index; @@ -4122,11 +4145,13 @@ struct anv_pipeline_bind_map { uint32_t surface_count; uint32_t sampler_count; + uint32_t embedded_sampler_count; uint16_t kernel_args_size; uint16_t kernel_arg_count; struct anv_pipeline_binding * surface_to_descriptor; struct anv_pipeline_binding * sampler_to_descriptor; + struct anv_pipeline_embedded_sampler_binding* embedded_sampler_to_binding; struct brw_kernel_arg_desc * kernel_args; struct anv_push_range push_ranges[4]; @@ -4172,6 +4197,11 @@ struct anv_shader_upload_params { enum anv_dynamic_push_bits dynamic_push_values; }; +struct anv_embedded_sampler { + struct anv_state sampler_state; + struct anv_state border_color_state; +}; + struct anv_shader_bin { struct vk_pipeline_cache_object base; @@ -4193,22 +4223,14 @@ struct anv_shader_bin { struct anv_pipeline_bind_map bind_map; enum anv_dynamic_push_bits dynamic_push_values; + + /* Not saved in the pipeline cache. + * + * Array of length bind_map.embedded_sampler_count + */ + struct anv_embedded_sampler *embedded_samplers; }; -struct anv_shader_bin * -anv_shader_bin_create(struct anv_device *device, - gl_shader_stage stage, - const void *key, uint32_t key_size, - const void *kernel, uint32_t kernel_size, - const struct brw_stage_prog_data *prog_data, - uint32_t prog_data_size, - const struct brw_compile_stats *stats, uint32_t num_stats, - const struct nir_xfb_info *xfb_info, - const struct anv_pipeline_bind_map *bind_map, - const struct anv_push_descriptor_info *push_desc_info, - enum anv_dynamic_push_bits dynamic_push_values); - - static inline struct anv_shader_bin * anv_shader_bin_ref(struct anv_shader_bin *shader) { @@ -5642,6 +5664,8 @@ struct anv_sampler { uint32_t state[3][4]; uint32_t db_state[3][4]; + /* Packed SAMPLER_STATE without the border color pointer. */ + uint32_t state_no_bc[3][4]; uint32_t n_planes; /* Blob of sampler state data which is guaranteed to be 32-byte aligned diff --git a/src/intel/vulkan/genX_init_state.c b/src/intel/vulkan/genX_init_state.c index c5f9ff9dc37..45a27addce7 100644 --- a/src/intel/vulkan/genX_init_state.c +++ b/src/intel/vulkan/genX_init_state.c @@ -1241,7 +1241,11 @@ VkResult genX(CreateSampler)( sampler->vk.reduction_mode != VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE, }; - _mesa_sha1_update(&ctx, &sampler_state, sizeof(sampler_state)); + /* Pack a version of the SAMPLER_STATE without the border color. We'll + * use it to store into the shader cache and also for hashing. + */ + GENX(SAMPLER_STATE_pack)(NULL, sampler->state_no_bc[p], &sampler_state); + _mesa_sha1_update(&ctx, sampler->state_no_bc[p], sizeof(sampler->state_no_bc[p])); /* Put border color after the hashing, we don't want the allocation * order of border colors to influence the hash. We just need th @@ -1289,6 +1293,34 @@ VkResult genX(CreateSampler)( return VK_SUCCESS; } +void +genX(emit_embedded_sampler)(struct anv_device *device, + struct anv_embedded_sampler *sampler, + struct anv_pipeline_embedded_sampler_binding *binding) +{ + sampler->border_color_state = + anv_state_pool_alloc(&device->dynamic_state_db_pool, + sizeof(struct gfx8_border_color), 64); + memcpy(sampler->border_color_state.map, + binding->border_color, + sizeof(binding->border_color)); + + sampler->sampler_state = + anv_state_pool_alloc(&device->dynamic_state_db_pool, + ANV_SAMPLER_STATE_SIZE, 32); + + struct GENX(SAMPLER_STATE) sampler_state = { + .BorderColorPointer = sampler->border_color_state.offset, + }; + uint32_t dwords[GENX(SAMPLER_STATE_length)]; + GENX(SAMPLER_STATE_pack)(NULL, dwords, &sampler_state); + + for (uint32_t i = 0; i < GENX(SAMPLER_STATE_length); i++) { + ((uint32_t *)sampler->sampler_state.map)[i] = + dwords[i] | binding->sampler_state[i]; + } +} + /* Wa_14015814527 * * Check if task shader was utilized within cmd_buffer, if so