diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 1d8d02b275a..6cb87a9ced8 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -112,6 +112,32 @@ get_bo_from_pool(struct intel_batch_decode_bo *ret, return false; } +/* Shader heap: find the backing BO for a GPU VA */ +static bool +get_bo_from_shader_heap(struct intel_batch_decode_bo *ret, + const struct anv_device *device, + uint64_t address) +{ + unsigned i; + BITSET_FOREACH_SET(i, device->shader_heap.allocated_bos, ANV_SHADER_HEAP_MAX_BOS) { + struct anv_bo *bo = device->shader_heap.bos[i].bo; + + /* Match the 48b-addressing convention used elsewhere */ + uint64_t base = intel_48b_address(bo->offset); + uint64_t size = bo->size; + + if (address >= base && address < base + size) { + *ret = (struct intel_batch_decode_bo) { + .addr = base, + .size = size, + .map = bo->map, + }; + return true; + } + } + return false; +} + /* Finding a buffer for batch decoding */ static struct intel_batch_decode_bo decode_get_bo(void *v_batch, bool ppgtt, uint64_t address) @@ -123,7 +149,7 @@ decode_get_bo(void *v_batch, bool ppgtt, uint64_t address) if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address)) return ret_bo; - if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address)) + if (get_bo_from_shader_heap(&ret_bo, device, address)) return ret_bo; if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address)) return ret_bo; @@ -551,13 +577,9 @@ VkResult anv_CreateDevice( if (result != VK_SUCCESS) goto fail_dynamic_state_pool; - result = anv_state_pool_init(&device->instruction_state_pool, device, - &(struct anv_state_pool_params) { - .name = "instruction pool", - .base_address = device->physical->va.instruction_state_pool.addr, - .block_size = 16384, - .max_size = device->physical->va.instruction_state_pool.size, - }); + result = anv_shader_heap_init(&device->shader_heap, device, + device->physical->va.instruction_state_pool, + 21 /* 2MiB */, 27 /* 64MiB */); if (result != VK_SUCCESS) goto fail_custom_border_color_pool; @@ -573,7 +595,7 @@ VkResult anv_CreateDevice( .max_size = device->physical->va.scratch_surface_state_pool.size, }); if (result != VK_SUCCESS) - goto fail_instruction_state_pool; + goto fail_shader_vma_heap; result = anv_state_pool_init(&device->internal_surface_state_pool, device, &(struct anv_state_pool_params) { @@ -1094,8 +1116,8 @@ VkResult anv_CreateDevice( fail_scratch_surface_state_pool: if (device->info->verx10 >= 125) anv_state_pool_finish(&device->scratch_surface_state_pool); - fail_instruction_state_pool: - anv_state_pool_finish(&device->instruction_state_pool); + fail_shader_vma_heap: + anv_shader_heap_finish(&device->shader_heap); fail_custom_border_color_pool: anv_state_reserved_array_pool_finish(&device->custom_border_colors); fail_dynamic_state_pool: @@ -1251,7 +1273,8 @@ void anv_DestroyDevice( anv_state_pool_finish(&device->internal_surface_state_pool); if (device->physical->indirect_descriptors) anv_state_pool_finish(&device->bindless_surface_state_pool); - anv_state_pool_finish(&device->instruction_state_pool); + + anv_shader_heap_finish(&device->shader_heap); anv_state_pool_finish(&device->dynamic_state_pool); anv_state_pool_finish(&device->general_state_pool); diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 1146284c2c1..bf60fb0d186 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -59,8 +59,7 @@ anv_shader_internal_destroy(struct vk_device *_device, for (uint32_t i = 0; i < shader->bind_map.embedded_sampler_count; i++) anv_embedded_sampler_unref(device, shader->embedded_samplers[i]); - ANV_DMR_SP_FREE(&device->vk.base, &device->instruction_state_pool, shader->kernel); - anv_state_pool_free(&device->instruction_state_pool, shader->kernel); + anv_shader_heap_free(&device->shader_heap, shader->kernel); vk_pipeline_cache_object_finish(&shader->base); vk_free(&device->vk.alloc, shader); } @@ -96,6 +95,7 @@ anv_shader_internal_create(struct anv_device *device, VK_MULTIALLOC_DECL(&ma, struct intel_shader_reloc, prog_data_relocs, prog_data_in->num_relocs); VK_MULTIALLOC_DECL(&ma, uint32_t, prog_data_param, prog_data_in->nr_params); + VK_MULTIALLOC_DECL(&ma, void, code, kernel_size); VK_MULTIALLOC_DECL_SIZE(&ma, nir_xfb_info, xfb_info, xfb_info_in == NULL ? 0 : @@ -121,17 +121,27 @@ anv_shader_internal_create(struct anv_device *device, shader->stage = stage; - shader->kernel = - anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64); - ANV_DMR_SP_ALLOC(&device->vk.base, &device->instruction_state_pool, shader->kernel); - memcpy(shader->kernel.map, kernel_data, kernel_size); + shader->code = code; + memcpy(shader->code, kernel_data, kernel_size); + + shader->kernel = anv_shader_heap_alloc(&device->shader_heap, + kernel_size, 64, false, 0); + if (shader->kernel.alloc_size == 0) { + vk_pipeline_cache_object_finish(&shader->base); + vk_free(&device->vk.alloc, shader); + return NULL; + } + + anv_shader_heap_upload(&device->shader_heap, shader->kernel, + kernel_data, kernel_size); + shader->kernel_size = kernel_size; if (bind_map->embedded_sampler_count > 0) { shader->embedded_samplers = embedded_samplers; if (anv_device_get_embedded_samplers(device, embedded_samplers, bind_map) != VK_SUCCESS) { - ANV_DMR_SP_FREE(&device->vk.base, &device->instruction_state_pool, shader->kernel); - anv_state_pool_free(&device->instruction_state_pool, shader->kernel); + anv_shader_heap_free(&device->shader_heap, shader->kernel); + vk_pipeline_cache_object_finish(&shader->base); vk_free(&device->vk.alloc, shader); return NULL; } @@ -192,7 +202,7 @@ anv_shader_internal_serialize(struct vk_pipeline_cache_object *object, blob_write_uint32(blob, shader->stage); blob_write_uint32(blob, shader->kernel_size); - blob_write_bytes(blob, shader->kernel.map, shader->kernel_size); + blob_write_bytes(blob, shader->code, shader->kernel_size); blob_write_uint32(blob, shader->prog_data_size); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 5015156462d..d7d13b78158 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1285,7 +1285,7 @@ struct anv_shader { void *code; - struct anv_state kernel; + struct anv_shader_alloc kernel; const struct brw_stage_prog_data *prog_data; @@ -2540,7 +2540,6 @@ struct anv_device { struct anv_state_pool general_state_pool; struct anv_state_pool aux_tt_pool; struct anv_state_pool dynamic_state_pool; - struct anv_state_pool instruction_state_pool; struct anv_state_pool binding_table_pool; struct anv_state_pool scratch_surface_state_pool; struct anv_state_pool internal_surface_state_pool; @@ -5220,7 +5219,9 @@ struct anv_shader_internal { mesa_shader_stage stage; - struct anv_state kernel; + void *code; + + struct anv_shader_alloc kernel; uint32_t kernel_size; const struct brw_stage_prog_data *prog_data; diff --git a/src/intel/vulkan/anv_shader.c b/src/intel/vulkan/anv_shader.c index afdeba16eac..219cb5a7b4c 100644 --- a/src/intel/vulkan/anv_shader.c +++ b/src/intel/vulkan/anv_shader.c @@ -23,7 +23,7 @@ anv_shader_destroy(struct vk_device *vk_device, for (uint32_t i = 0; i < shader->bind_map.embedded_sampler_count; i++) anv_embedded_sampler_unref(device, shader->embedded_samplers[i]); - anv_state_pool_free(&device->instruction_state_pool, shader->kernel); + anv_shader_heap_free(&device->shader_heap, shader->kernel); anv_reloc_list_finish(&shader->relocs); vk_shader_free(vk_device, pAllocator, vk_shader); } @@ -629,10 +629,9 @@ anv_shader_create(struct anv_device *device, memcpy(shader->code, shader_data->code, shader_data->prog_data.base.program_size); - shader->kernel = - anv_state_pool_alloc(&device->instruction_state_pool, - shader_data->prog_data.base.program_size, 64); - ANV_DMR_SP_ALLOC(&device->vk.base, &device->instruction_state_pool, shader->kernel); + shader->kernel = anv_shader_heap_alloc(&device->shader_heap, + shader_data->prog_data.base.program_size, + 64, false, 0); if (shader->kernel.alloc_size == 0) { result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); goto error_embedded_samplers; @@ -688,8 +687,9 @@ anv_shader_create(struct anv_device *device, if (result != VK_SUCCESS) goto error_state; - memcpy(shader->kernel.map, shader_data->code, - shader_data->prog_data.base.program_size); + anv_shader_heap_upload(&device->shader_heap, + shader->kernel, shader_data->code, + shader_data->prog_data.base.program_size); if (mesa_shader_stage_is_rt(shader->vk.stage)) { const struct brw_bs_prog_data *bs_prog_data = @@ -717,8 +717,7 @@ anv_shader_create(struct anv_device *device, return VK_SUCCESS; error_state: - ANV_DMR_SP_FREE(&device->vk.base, &device->instruction_state_pool, shader->kernel); - anv_state_pool_free(&device->instruction_state_pool, shader->kernel); + anv_shader_heap_free(&device->shader_heap, shader->kernel); error_embedded_samplers: for (uint32_t s = 0; s < shader->bind_map.embedded_sampler_count; s++) anv_embedded_sampler_unref(device, shader->embedded_samplers[s]); diff --git a/src/intel/vulkan/i915/anv_batch_chain.c b/src/intel/vulkan/i915/anv_batch_chain.c index 6fb52448cd3..1fd4e76041c 100644 --- a/src/intel/vulkan/i915/anv_batch_chain.c +++ b/src/intel/vulkan/i915/anv_batch_chain.c @@ -352,6 +352,27 @@ out: return result; } +static VkResult +pin_shader_heap(struct anv_device *device, + struct anv_execbuf *execbuf, + struct anv_shader_heap *heap) +{ + VkResult result = VK_SUCCESS; + + simple_mtx_lock(&heap->mutex); + + unsigned i; + BITSET_FOREACH_SET(i, heap->allocated_bos, ANV_SHADER_HEAP_MAX_BOS) { + result = anv_execbuf_add_bo(device, execbuf, heap->bos[i].bo, NULL, 0); + if (result != VK_SUCCESS) + goto out; + } + +out: + simple_mtx_unlock(&heap->mutex); + return result; +} + static uint32_t calc_batch_start_offset(struct anv_bo *bo) { @@ -414,7 +435,7 @@ setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf, if (result != VK_SUCCESS) return result; - result = pin_state_pool(device, execbuf, &device->instruction_state_pool); + result = pin_shader_heap(device, execbuf, &device->shader_heap); if (result != VK_SUCCESS) return result;