mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 13:50:11 +01:00
anv: Switch shaders to dedicated VMA allocator
Switched to the new VMA allocator that provides explicit GPU VA control via util_vma_heap. This is architectural preparation for ray tracing capture/replay, which requires the ability to reserve and allocate shaders at specific VAs. The state pool's free-list design makes VA reservation difficult to add, while the new chunk allocator is designed for explicit VA management from the ground up. Signed-off-by: Michael Cheng <michael.cheng@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38869>
This commit is contained in:
parent
1fa327ac32
commit
8ba197c9ef
5 changed files with 88 additions and 34 deletions
|
|
@ -112,6 +112,32 @@ get_bo_from_pool(struct intel_batch_decode_bo *ret,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Shader heap: find the backing BO for a GPU VA */
|
||||||
|
static bool
|
||||||
|
get_bo_from_shader_heap(struct intel_batch_decode_bo *ret,
|
||||||
|
const struct anv_device *device,
|
||||||
|
uint64_t address)
|
||||||
|
{
|
||||||
|
unsigned i;
|
||||||
|
BITSET_FOREACH_SET(i, device->shader_heap.allocated_bos, ANV_SHADER_HEAP_MAX_BOS) {
|
||||||
|
struct anv_bo *bo = device->shader_heap.bos[i].bo;
|
||||||
|
|
||||||
|
/* Match the 48b-addressing convention used elsewhere */
|
||||||
|
uint64_t base = intel_48b_address(bo->offset);
|
||||||
|
uint64_t size = bo->size;
|
||||||
|
|
||||||
|
if (address >= base && address < base + size) {
|
||||||
|
*ret = (struct intel_batch_decode_bo) {
|
||||||
|
.addr = base,
|
||||||
|
.size = size,
|
||||||
|
.map = bo->map,
|
||||||
|
};
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/* Finding a buffer for batch decoding */
|
/* Finding a buffer for batch decoding */
|
||||||
static struct intel_batch_decode_bo
|
static struct intel_batch_decode_bo
|
||||||
decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
|
decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
|
||||||
|
|
@ -123,7 +149,7 @@ decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
|
||||||
|
|
||||||
if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
|
if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
|
||||||
return ret_bo;
|
return ret_bo;
|
||||||
if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address))
|
if (get_bo_from_shader_heap(&ret_bo, device, address))
|
||||||
return ret_bo;
|
return ret_bo;
|
||||||
if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
|
if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
|
||||||
return ret_bo;
|
return ret_bo;
|
||||||
|
|
@ -551,13 +577,9 @@ VkResult anv_CreateDevice(
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
goto fail_dynamic_state_pool;
|
goto fail_dynamic_state_pool;
|
||||||
|
|
||||||
result = anv_state_pool_init(&device->instruction_state_pool, device,
|
result = anv_shader_heap_init(&device->shader_heap, device,
|
||||||
&(struct anv_state_pool_params) {
|
device->physical->va.instruction_state_pool,
|
||||||
.name = "instruction pool",
|
21 /* 2MiB */, 27 /* 64MiB */);
|
||||||
.base_address = device->physical->va.instruction_state_pool.addr,
|
|
||||||
.block_size = 16384,
|
|
||||||
.max_size = device->physical->va.instruction_state_pool.size,
|
|
||||||
});
|
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
goto fail_custom_border_color_pool;
|
goto fail_custom_border_color_pool;
|
||||||
|
|
||||||
|
|
@ -573,7 +595,7 @@ VkResult anv_CreateDevice(
|
||||||
.max_size = device->physical->va.scratch_surface_state_pool.size,
|
.max_size = device->physical->va.scratch_surface_state_pool.size,
|
||||||
});
|
});
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
goto fail_instruction_state_pool;
|
goto fail_shader_vma_heap;
|
||||||
|
|
||||||
result = anv_state_pool_init(&device->internal_surface_state_pool, device,
|
result = anv_state_pool_init(&device->internal_surface_state_pool, device,
|
||||||
&(struct anv_state_pool_params) {
|
&(struct anv_state_pool_params) {
|
||||||
|
|
@ -1094,8 +1116,8 @@ VkResult anv_CreateDevice(
|
||||||
fail_scratch_surface_state_pool:
|
fail_scratch_surface_state_pool:
|
||||||
if (device->info->verx10 >= 125)
|
if (device->info->verx10 >= 125)
|
||||||
anv_state_pool_finish(&device->scratch_surface_state_pool);
|
anv_state_pool_finish(&device->scratch_surface_state_pool);
|
||||||
fail_instruction_state_pool:
|
fail_shader_vma_heap:
|
||||||
anv_state_pool_finish(&device->instruction_state_pool);
|
anv_shader_heap_finish(&device->shader_heap);
|
||||||
fail_custom_border_color_pool:
|
fail_custom_border_color_pool:
|
||||||
anv_state_reserved_array_pool_finish(&device->custom_border_colors);
|
anv_state_reserved_array_pool_finish(&device->custom_border_colors);
|
||||||
fail_dynamic_state_pool:
|
fail_dynamic_state_pool:
|
||||||
|
|
@ -1251,7 +1273,8 @@ void anv_DestroyDevice(
|
||||||
anv_state_pool_finish(&device->internal_surface_state_pool);
|
anv_state_pool_finish(&device->internal_surface_state_pool);
|
||||||
if (device->physical->indirect_descriptors)
|
if (device->physical->indirect_descriptors)
|
||||||
anv_state_pool_finish(&device->bindless_surface_state_pool);
|
anv_state_pool_finish(&device->bindless_surface_state_pool);
|
||||||
anv_state_pool_finish(&device->instruction_state_pool);
|
|
||||||
|
anv_shader_heap_finish(&device->shader_heap);
|
||||||
anv_state_pool_finish(&device->dynamic_state_pool);
|
anv_state_pool_finish(&device->dynamic_state_pool);
|
||||||
anv_state_pool_finish(&device->general_state_pool);
|
anv_state_pool_finish(&device->general_state_pool);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -59,8 +59,7 @@ anv_shader_internal_destroy(struct vk_device *_device,
|
||||||
for (uint32_t i = 0; i < shader->bind_map.embedded_sampler_count; i++)
|
for (uint32_t i = 0; i < shader->bind_map.embedded_sampler_count; i++)
|
||||||
anv_embedded_sampler_unref(device, shader->embedded_samplers[i]);
|
anv_embedded_sampler_unref(device, shader->embedded_samplers[i]);
|
||||||
|
|
||||||
ANV_DMR_SP_FREE(&device->vk.base, &device->instruction_state_pool, shader->kernel);
|
anv_shader_heap_free(&device->shader_heap, shader->kernel);
|
||||||
anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
|
|
||||||
vk_pipeline_cache_object_finish(&shader->base);
|
vk_pipeline_cache_object_finish(&shader->base);
|
||||||
vk_free(&device->vk.alloc, shader);
|
vk_free(&device->vk.alloc, shader);
|
||||||
}
|
}
|
||||||
|
|
@ -96,6 +95,7 @@ anv_shader_internal_create(struct anv_device *device,
|
||||||
VK_MULTIALLOC_DECL(&ma, struct intel_shader_reloc, prog_data_relocs,
|
VK_MULTIALLOC_DECL(&ma, struct intel_shader_reloc, prog_data_relocs,
|
||||||
prog_data_in->num_relocs);
|
prog_data_in->num_relocs);
|
||||||
VK_MULTIALLOC_DECL(&ma, uint32_t, prog_data_param, prog_data_in->nr_params);
|
VK_MULTIALLOC_DECL(&ma, uint32_t, prog_data_param, prog_data_in->nr_params);
|
||||||
|
VK_MULTIALLOC_DECL(&ma, void, code, kernel_size);
|
||||||
|
|
||||||
VK_MULTIALLOC_DECL_SIZE(&ma, nir_xfb_info, xfb_info,
|
VK_MULTIALLOC_DECL_SIZE(&ma, nir_xfb_info, xfb_info,
|
||||||
xfb_info_in == NULL ? 0 :
|
xfb_info_in == NULL ? 0 :
|
||||||
|
|
@ -121,17 +121,27 @@ anv_shader_internal_create(struct anv_device *device,
|
||||||
|
|
||||||
shader->stage = stage;
|
shader->stage = stage;
|
||||||
|
|
||||||
shader->kernel =
|
shader->code = code;
|
||||||
anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
|
memcpy(shader->code, kernel_data, kernel_size);
|
||||||
ANV_DMR_SP_ALLOC(&device->vk.base, &device->instruction_state_pool, shader->kernel);
|
|
||||||
memcpy(shader->kernel.map, kernel_data, kernel_size);
|
shader->kernel = anv_shader_heap_alloc(&device->shader_heap,
|
||||||
|
kernel_size, 64, false, 0);
|
||||||
|
if (shader->kernel.alloc_size == 0) {
|
||||||
|
vk_pipeline_cache_object_finish(&shader->base);
|
||||||
|
vk_free(&device->vk.alloc, shader);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
anv_shader_heap_upload(&device->shader_heap, shader->kernel,
|
||||||
|
kernel_data, kernel_size);
|
||||||
|
|
||||||
shader->kernel_size = kernel_size;
|
shader->kernel_size = kernel_size;
|
||||||
|
|
||||||
if (bind_map->embedded_sampler_count > 0) {
|
if (bind_map->embedded_sampler_count > 0) {
|
||||||
shader->embedded_samplers = embedded_samplers;
|
shader->embedded_samplers = embedded_samplers;
|
||||||
if (anv_device_get_embedded_samplers(device, embedded_samplers, bind_map) != VK_SUCCESS) {
|
if (anv_device_get_embedded_samplers(device, embedded_samplers, bind_map) != VK_SUCCESS) {
|
||||||
ANV_DMR_SP_FREE(&device->vk.base, &device->instruction_state_pool, shader->kernel);
|
anv_shader_heap_free(&device->shader_heap, shader->kernel);
|
||||||
anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
|
vk_pipeline_cache_object_finish(&shader->base);
|
||||||
vk_free(&device->vk.alloc, shader);
|
vk_free(&device->vk.alloc, shader);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
@ -192,7 +202,7 @@ anv_shader_internal_serialize(struct vk_pipeline_cache_object *object,
|
||||||
blob_write_uint32(blob, shader->stage);
|
blob_write_uint32(blob, shader->stage);
|
||||||
|
|
||||||
blob_write_uint32(blob, shader->kernel_size);
|
blob_write_uint32(blob, shader->kernel_size);
|
||||||
blob_write_bytes(blob, shader->kernel.map, shader->kernel_size);
|
blob_write_bytes(blob, shader->code, shader->kernel_size);
|
||||||
|
|
||||||
blob_write_uint32(blob, shader->prog_data_size);
|
blob_write_uint32(blob, shader->prog_data_size);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1285,7 +1285,7 @@ struct anv_shader {
|
||||||
|
|
||||||
void *code;
|
void *code;
|
||||||
|
|
||||||
struct anv_state kernel;
|
struct anv_shader_alloc kernel;
|
||||||
|
|
||||||
const struct brw_stage_prog_data *prog_data;
|
const struct brw_stage_prog_data *prog_data;
|
||||||
|
|
||||||
|
|
@ -2540,7 +2540,6 @@ struct anv_device {
|
||||||
struct anv_state_pool general_state_pool;
|
struct anv_state_pool general_state_pool;
|
||||||
struct anv_state_pool aux_tt_pool;
|
struct anv_state_pool aux_tt_pool;
|
||||||
struct anv_state_pool dynamic_state_pool;
|
struct anv_state_pool dynamic_state_pool;
|
||||||
struct anv_state_pool instruction_state_pool;
|
|
||||||
struct anv_state_pool binding_table_pool;
|
struct anv_state_pool binding_table_pool;
|
||||||
struct anv_state_pool scratch_surface_state_pool;
|
struct anv_state_pool scratch_surface_state_pool;
|
||||||
struct anv_state_pool internal_surface_state_pool;
|
struct anv_state_pool internal_surface_state_pool;
|
||||||
|
|
@ -5220,7 +5219,9 @@ struct anv_shader_internal {
|
||||||
|
|
||||||
mesa_shader_stage stage;
|
mesa_shader_stage stage;
|
||||||
|
|
||||||
struct anv_state kernel;
|
void *code;
|
||||||
|
|
||||||
|
struct anv_shader_alloc kernel;
|
||||||
uint32_t kernel_size;
|
uint32_t kernel_size;
|
||||||
|
|
||||||
const struct brw_stage_prog_data *prog_data;
|
const struct brw_stage_prog_data *prog_data;
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ anv_shader_destroy(struct vk_device *vk_device,
|
||||||
for (uint32_t i = 0; i < shader->bind_map.embedded_sampler_count; i++)
|
for (uint32_t i = 0; i < shader->bind_map.embedded_sampler_count; i++)
|
||||||
anv_embedded_sampler_unref(device, shader->embedded_samplers[i]);
|
anv_embedded_sampler_unref(device, shader->embedded_samplers[i]);
|
||||||
|
|
||||||
anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
|
anv_shader_heap_free(&device->shader_heap, shader->kernel);
|
||||||
anv_reloc_list_finish(&shader->relocs);
|
anv_reloc_list_finish(&shader->relocs);
|
||||||
vk_shader_free(vk_device, pAllocator, vk_shader);
|
vk_shader_free(vk_device, pAllocator, vk_shader);
|
||||||
}
|
}
|
||||||
|
|
@ -629,10 +629,9 @@ anv_shader_create(struct anv_device *device,
|
||||||
memcpy(shader->code, shader_data->code,
|
memcpy(shader->code, shader_data->code,
|
||||||
shader_data->prog_data.base.program_size);
|
shader_data->prog_data.base.program_size);
|
||||||
|
|
||||||
shader->kernel =
|
shader->kernel = anv_shader_heap_alloc(&device->shader_heap,
|
||||||
anv_state_pool_alloc(&device->instruction_state_pool,
|
shader_data->prog_data.base.program_size,
|
||||||
shader_data->prog_data.base.program_size, 64);
|
64, false, 0);
|
||||||
ANV_DMR_SP_ALLOC(&device->vk.base, &device->instruction_state_pool, shader->kernel);
|
|
||||||
if (shader->kernel.alloc_size == 0) {
|
if (shader->kernel.alloc_size == 0) {
|
||||||
result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||||
goto error_embedded_samplers;
|
goto error_embedded_samplers;
|
||||||
|
|
@ -688,8 +687,9 @@ anv_shader_create(struct anv_device *device,
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
goto error_state;
|
goto error_state;
|
||||||
|
|
||||||
memcpy(shader->kernel.map, shader_data->code,
|
anv_shader_heap_upload(&device->shader_heap,
|
||||||
shader_data->prog_data.base.program_size);
|
shader->kernel, shader_data->code,
|
||||||
|
shader_data->prog_data.base.program_size);
|
||||||
|
|
||||||
if (mesa_shader_stage_is_rt(shader->vk.stage)) {
|
if (mesa_shader_stage_is_rt(shader->vk.stage)) {
|
||||||
const struct brw_bs_prog_data *bs_prog_data =
|
const struct brw_bs_prog_data *bs_prog_data =
|
||||||
|
|
@ -717,8 +717,7 @@ anv_shader_create(struct anv_device *device,
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
|
|
||||||
error_state:
|
error_state:
|
||||||
ANV_DMR_SP_FREE(&device->vk.base, &device->instruction_state_pool, shader->kernel);
|
anv_shader_heap_free(&device->shader_heap, shader->kernel);
|
||||||
anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
|
|
||||||
error_embedded_samplers:
|
error_embedded_samplers:
|
||||||
for (uint32_t s = 0; s < shader->bind_map.embedded_sampler_count; s++)
|
for (uint32_t s = 0; s < shader->bind_map.embedded_sampler_count; s++)
|
||||||
anv_embedded_sampler_unref(device, shader->embedded_samplers[s]);
|
anv_embedded_sampler_unref(device, shader->embedded_samplers[s]);
|
||||||
|
|
|
||||||
|
|
@ -352,6 +352,27 @@ out:
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
pin_shader_heap(struct anv_device *device,
|
||||||
|
struct anv_execbuf *execbuf,
|
||||||
|
struct anv_shader_heap *heap)
|
||||||
|
{
|
||||||
|
VkResult result = VK_SUCCESS;
|
||||||
|
|
||||||
|
simple_mtx_lock(&heap->mutex);
|
||||||
|
|
||||||
|
unsigned i;
|
||||||
|
BITSET_FOREACH_SET(i, heap->allocated_bos, ANV_SHADER_HEAP_MAX_BOS) {
|
||||||
|
result = anv_execbuf_add_bo(device, execbuf, heap->bos[i].bo, NULL, 0);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
simple_mtx_unlock(&heap->mutex);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
static uint32_t
|
static uint32_t
|
||||||
calc_batch_start_offset(struct anv_bo *bo)
|
calc_batch_start_offset(struct anv_bo *bo)
|
||||||
{
|
{
|
||||||
|
|
@ -414,7 +435,7 @@ setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
result = pin_state_pool(device, execbuf, &device->instruction_state_pool);
|
result = pin_shader_heap(device, execbuf, &device->shader_heap);
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue