From 4ceaed7839afd724b2a2f10f6879f54199c041ad Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 24 Oct 2022 14:12:28 +0300 Subject: [PATCH] anv: split internal surface states from descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Intel HW we use the same mechanism for internal operations surfaces as well as application surfaces (VkDescriptor). This change splits the surface pool in 2, one part dedicated to internal allocations, the other to application VkDescriptors. To do so, the STATE_BASE_ADDRESS::SurfaceStateBaseAddress points to a 4Gb area, with the following layout : - 1Gb of binding table pool - 2Gb of internal surface states - 1Gb of bindless surface states That way any entry from the binding table can refer to both internal & bindless surface states but none of the driver allocations interfere with the allocation of the application. Based off a change from Sviatoslav Peleshko. v2: Allocate image view null surface state from bindless heap (Sviatoslav) Removed debug stuff (Sviatoslav) Signed-off-by: Lionel Landwerlin Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7110 Cc: mesa-stable Tested-by: Sviatoslav Peleshko Reviewed-by: Tapani Pälli Part-of: --- src/intel/vulkan/anv_allocator.c | 6 +-- src/intel/vulkan/anv_batch_chain.c | 6 ++- src/intel/vulkan/anv_cmd_buffer.c | 4 +- src/intel/vulkan/anv_descriptor_set.c | 11 +++-- src/intel/vulkan/anv_device.c | 35 ++++++++++------ src/intel/vulkan/anv_image.c | 59 ++++++++++++++------------- src/intel/vulkan/anv_private.h | 58 ++++++++++++++++---------- src/intel/vulkan/genX_blorp_exec.c | 2 +- src/intel/vulkan/genX_cmd_buffer.c | 33 ++++++++------- src/intel/vulkan/genX_state.c | 4 +- 10 files changed, 127 insertions(+), 91 deletions(-) diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 16c3ce4726d..57c554d63b1 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -1210,7 +1210,7 @@ anv_scratch_pool_finish(struct anv_device *device, struct anv_scratch_pool *pool for (unsigned i = 0; i < 16; i++) { if (pool->surf_states[i].map != NULL) { - anv_state_pool_free(&device->surface_state_pool, + anv_state_pool_free(&device->internal_surface_state_pool, pool->surf_states[i]); } } @@ -1303,7 +1303,7 @@ anv_scratch_pool_get_surf(struct anv_device *device, struct anv_address addr = { .bo = bo }; struct anv_state state = - anv_state_pool_alloc(&device->surface_state_pool, + anv_state_pool_alloc(&device->internal_surface_state_pool, device->isl_dev.ss.size, 64); isl_buffer_fill_state(&device->isl_dev, state.map, @@ -1318,7 +1318,7 @@ anv_scratch_pool_get_surf(struct anv_device *device, uint32_t current = p_atomic_cmpxchg(&pool->surfs[scratch_size_log2], 0, state.offset); if (current) { - anv_state_pool_free(&device->surface_state_pool, state); + anv_state_pool_free(&device->internal_surface_state_pool, state); return current; } else { pool->surf_states[scratch_size_log2] = state; diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index fcdce4dff43..8615f2195dd 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -1443,7 +1443,11 @@ setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf, } /* Add all the global BOs to the object list for softpin case. */ - result = pin_state_pool(device, execbuf, &device->surface_state_pool); + result = pin_state_pool(device, execbuf, &device->internal_surface_state_pool); + if (result != VK_SUCCESS) + return result; + + result = pin_state_pool(device, execbuf, &device->bindless_surface_state_pool); if (result != VK_SUCCESS) return result; diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 5f1672576fe..969d34473b6 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -115,7 +115,7 @@ anv_create_cmd_buffer(struct vk_command_pool *pool, goto fail_vk; anv_state_stream_init(&cmd_buffer->surface_state_stream, - &device->surface_state_pool, 4096); + &device->internal_surface_state_pool, 4096); anv_state_stream_init(&cmd_buffer->dynamic_state_stream, &device->dynamic_state_pool, 16384); anv_state_stream_init(&cmd_buffer->general_state_stream, @@ -194,7 +194,7 @@ anv_cmd_buffer_reset(struct vk_command_buffer *vk_cmd_buffer, anv_state_stream_finish(&cmd_buffer->surface_state_stream); anv_state_stream_init(&cmd_buffer->surface_state_stream, - &cmd_buffer->device->surface_state_pool, 4096); + &cmd_buffer->device->internal_surface_state_pool, 4096); anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); anv_state_stream_init(&cmd_buffer->dynamic_state_stream, diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index c452d2e4ea4..71bca53b9ca 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -924,8 +924,12 @@ VkResult anv_CreateDescriptorPool( pool->bo = NULL; } + /* All the surface states allocated by the descriptor pool are internal. We + * have to allocate them to handle the fact that we do not have surface + * states for VkBuffers. + */ anv_state_stream_init(&pool->surface_state_stream, - &device->surface_state_pool, 4096); + &device->internal_surface_state_pool, 4096); pool->surface_state_free_list = NULL; list_inithead(&pool->desc_sets); @@ -984,7 +988,7 @@ VkResult anv_ResetDescriptorPool( anv_state_stream_finish(&pool->surface_state_stream); anv_state_stream_init(&pool->surface_state_stream, - &device->surface_state_pool, 4096); + &device->internal_surface_state_pool, 4096); pool->surface_state_free_list = NULL; return VK_SUCCESS; @@ -1062,7 +1066,8 @@ anv_descriptor_pool_alloc_state(struct anv_descriptor_pool *pool) assert(state.alloc_size == 64); return state; } else { - return anv_state_stream_alloc(&pool->surface_state_stream, 64, 64); + struct anv_state state = anv_state_stream_alloc(&pool->surface_state_stream, 64, 64); + return state; } } diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 9a4bd921b31..1d5f32f2930 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -3062,7 +3062,9 @@ decode_get_bo(void *v_batch, bool ppgtt, uint64_t address) return ret_bo; if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address)) return ret_bo; - if (get_bo_from_pool(&ret_bo, &device->surface_state_pool.block_pool, address)) + if (get_bo_from_pool(&ret_bo, &device->internal_surface_state_pool.block_pool, address)) + return ret_bo; + if (get_bo_from_pool(&ret_bo, &device->bindless_surface_state_pool.block_pool, address)) return ret_bo; if (!device->cmd_buffer_being_decoded) @@ -3292,7 +3294,7 @@ VkResult anv_CreateDevice( decode_get_bo, NULL, device); device->decoder_ctx.dynamic_base = DYNAMIC_STATE_POOL_MIN_ADDRESS; - device->decoder_ctx.surface_base = SURFACE_STATE_POOL_MIN_ADDRESS; + device->decoder_ctx.surface_base = INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS; device->decoder_ctx.instruction_base = INSTRUCTION_STATE_POOL_MIN_ADDRESS; } @@ -3434,12 +3436,18 @@ VkResult anv_CreateDevice( if (result != VK_SUCCESS) goto fail_dynamic_state_pool; - result = anv_state_pool_init(&device->surface_state_pool, device, - "surface state pool", - SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096); + result = anv_state_pool_init(&device->internal_surface_state_pool, device, + "internal surface state pool", + INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096); if (result != VK_SUCCESS) goto fail_instruction_state_pool; + result = anv_state_pool_init(&device->bindless_surface_state_pool, device, + "bindless surface state pool", + BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096); + if (result != VK_SUCCESS) + goto fail_internal_surface_state_pool; + if (device->info->verx10 >= 125) { /* We're using 3DSTATE_BINDING_TABLE_POOL_ALLOC to give the binding * table its own base address separately from surface state base. @@ -3450,16 +3458,16 @@ VkResult anv_CreateDevice( BINDING_TABLE_POOL_BLOCK_SIZE); } else { int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS - - (int64_t)SURFACE_STATE_POOL_MIN_ADDRESS; + (int64_t)INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS; assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0); result = anv_state_pool_init(&device->binding_table_pool, device, "binding table pool", - SURFACE_STATE_POOL_MIN_ADDRESS, + INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS, bt_pool_offset, BINDING_TABLE_POOL_BLOCK_SIZE); } if (result != VK_SUCCESS) - goto fail_surface_state_pool; + goto fail_bindless_surface_state_pool; if (device->info->has_aux_map) { device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator, @@ -3540,7 +3548,7 @@ VkResult anv_CreateDevice( * to zero and they have a valid descriptor. */ device->null_surface_state = - anv_state_pool_alloc(&device->surface_state_pool, + anv_state_pool_alloc(&device->internal_surface_state_pool, device->isl_dev.ss.size, device->isl_dev.ss.align); isl_null_fill_state(&device->isl_dev, device->null_surface_state.map, @@ -3637,8 +3645,10 @@ VkResult anv_CreateDevice( } fail_binding_table_pool: anv_state_pool_finish(&device->binding_table_pool); - fail_surface_state_pool: - anv_state_pool_finish(&device->surface_state_pool); + fail_bindless_surface_state_pool: + anv_state_pool_finish(&device->bindless_surface_state_pool); + fail_internal_surface_state_pool: + anv_state_pool_finish(&device->internal_surface_state_pool); fail_instruction_state_pool: anv_state_pool_finish(&device->instruction_state_pool); fail_dynamic_state_pool: @@ -3727,7 +3737,8 @@ void anv_DestroyDevice( } anv_state_pool_finish(&device->binding_table_pool); - anv_state_pool_finish(&device->surface_state_pool); + anv_state_pool_finish(&device->internal_surface_state_pool); + anv_state_pool_finish(&device->bindless_surface_state_pool); anv_state_pool_finish(&device->instruction_state_pool); anv_state_pool_finish(&device->dynamic_state_pool); anv_state_pool_finish(&device->general_state_pool); diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 0834e6d7c1c..86d0e322203 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -2334,9 +2334,9 @@ anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo, static struct anv_state -alloc_surface_state(struct anv_device *device) +alloc_bindless_surface_state(struct anv_device *device) { - return anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + return anv_state_pool_alloc(&device->bindless_surface_state_pool, 64, 64); } static enum isl_channel_select @@ -2610,8 +2610,10 @@ anv_CreateImageView(VkDevice _device, if (iview->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) { - iview->planes[vplane].optimal_sampler_surface_state.state = alloc_surface_state(device); - iview->planes[vplane].general_sampler_surface_state.state = alloc_surface_state(device); + iview->planes[vplane].optimal_sampler_surface_state.state = + alloc_bindless_surface_state(device); + iview->planes[vplane].general_sampler_surface_state.state = + alloc_bindless_surface_state(device); enum isl_aux_usage general_aux_usage = anv_layout_to_aux_usage(device->info, image, 1UL << iaspect_bit, @@ -2643,7 +2645,8 @@ anv_CreateImageView(VkDevice _device, anv_layout_to_aux_usage(device->info, image, 1UL << iaspect_bit, VK_IMAGE_USAGE_STORAGE_BIT, VK_IMAGE_LAYOUT_GENERAL); - iview->planes[vplane].storage_surface_state.state = alloc_surface_state(device); + iview->planes[vplane].storage_surface_state.state = + alloc_bindless_surface_state(device); anv_image_fill_surface_state(device, image, 1ULL << iaspect_bit, &iview->planes[vplane].isl, ISL_SURF_USAGE_STORAGE_BIT, @@ -2651,10 +2654,9 @@ anv_CreateImageView(VkDevice _device, 0, &iview->planes[vplane].storage_surface_state); + iview->planes[vplane].lowered_storage_surface_state.state = + alloc_bindless_surface_state(device); if (isl_is_storage_image_format(format.isl_format)) { - iview->planes[vplane].lowered_storage_surface_state.state = - alloc_surface_state(device); - anv_image_fill_surface_state(device, image, 1ULL << iaspect_bit, &iview->planes[vplane].isl, ISL_SURF_USAGE_STORAGE_BIT, @@ -2670,8 +2672,13 @@ anv_CreateImageView(VkDevice _device, */ assert(isl_format_supports_typed_writes(device->info, format.isl_format)); - iview->planes[vplane].lowered_storage_surface_state.state = - device->null_surface_state; + isl_null_fill_state(&device->isl_dev, + iview->planes[vplane].lowered_storage_surface_state.state.map, + .size = { + .w = image->vk.extent.width, + .h = image->vk.extent.height, + .d = image->vk.extent.depth, + }); } } } @@ -2692,27 +2699,23 @@ anv_DestroyImageView(VkDevice _device, VkImageView _iview, return; for (uint32_t plane = 0; plane < iview->n_planes; plane++) { - /* Check offset instead of alloc_size because this they might be - * device->null_surface_state which always has offset == 0. We don't - * own that one so we don't want to accidentally free it. - */ - if (iview->planes[plane].optimal_sampler_surface_state.state.offset) { - anv_state_pool_free(&device->surface_state_pool, + if (iview->planes[plane].optimal_sampler_surface_state.state.alloc_size) { + anv_state_pool_free(&device->bindless_surface_state_pool, iview->planes[plane].optimal_sampler_surface_state.state); } - if (iview->planes[plane].general_sampler_surface_state.state.offset) { - anv_state_pool_free(&device->surface_state_pool, + if (iview->planes[plane].general_sampler_surface_state.state.alloc_size) { + anv_state_pool_free(&device->bindless_surface_state_pool, iview->planes[plane].general_sampler_surface_state.state); } - if (iview->planes[plane].storage_surface_state.state.offset) { - anv_state_pool_free(&device->surface_state_pool, + if (iview->planes[plane].storage_surface_state.state.alloc_size) { + anv_state_pool_free(&device->bindless_surface_state_pool, iview->planes[plane].storage_surface_state.state); } - if (iview->planes[plane].lowered_storage_surface_state.state.offset) { - anv_state_pool_free(&device->surface_state_pool, + if (iview->planes[plane].lowered_storage_surface_state.state.alloc_size) { + anv_state_pool_free(&device->bindless_surface_state_pool, iview->planes[plane].lowered_storage_surface_state.state); } } @@ -2748,7 +2751,7 @@ anv_CreateBufferView(VkDevice _device, view->address = anv_address_add(buffer->address, pCreateInfo->offset); if (buffer->vk.usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) { - view->surface_state = alloc_surface_state(device); + view->surface_state = alloc_bindless_surface_state(device); anv_fill_buffer_surface_state(device, view->surface_state, format.isl_format, format.swizzle, @@ -2759,8 +2762,8 @@ anv_CreateBufferView(VkDevice _device, } if (buffer->vk.usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) { - view->storage_surface_state = alloc_surface_state(device); - view->lowered_storage_surface_state = alloc_surface_state(device); + view->storage_surface_state = alloc_bindless_surface_state(device); + view->lowered_storage_surface_state = alloc_bindless_surface_state(device); anv_fill_buffer_surface_state(device, view->storage_surface_state, format.isl_format, format.swizzle, @@ -2808,15 +2811,15 @@ anv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, return; if (view->surface_state.alloc_size > 0) - anv_state_pool_free(&device->surface_state_pool, + anv_state_pool_free(&device->bindless_surface_state_pool, view->surface_state); if (view->storage_surface_state.alloc_size > 0) - anv_state_pool_free(&device->surface_state_pool, + anv_state_pool_free(&device->bindless_surface_state_pool, view->storage_surface_state); if (view->lowered_storage_surface_state.alloc_size > 0) - anv_state_pool_free(&device->surface_state_pool, + anv_state_pool_free(&device->bindless_surface_state_pool, view->lowered_storage_surface_state); vk_object_free(&device->vk, pAllocator, view); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 2ff0c906348..5a2a12c203f 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -139,31 +139,34 @@ struct intel_perf_query_result; * heap. This is to work around a VF cache issue described in a comment in * anv_physical_device_init_heaps. * - * (2) the binding table pool is located at lower addresses than the surface - * state pool, within a 4 GiB range. This allows surface state base addresses - * to cover both binding tables (16 bit offsets) and surface states (32 bit - * offsets). + * (2) the binding table pool is located at lower addresses than the BT + * (binding table) surface state pool, within a 4 GiB range which also + * contains the bindless surface state pool. This allows surface state base + * addresses to cover both binding tables (16 bit offsets), the internal + * surface states (32 bit offsets) and the bindless surface states. * * (3) the last 4 GiB of the address space is withheld from the high * heap. Various hardware units will read past the end of an object for * various reasons. This healthy margin prevents reads from wrapping around * 48-bit addresses. */ -#define GENERAL_STATE_POOL_MIN_ADDRESS 0x000000200000ULL /* 2 MiB */ -#define GENERAL_STATE_POOL_MAX_ADDRESS 0x00003fffffffULL -#define LOW_HEAP_MIN_ADDRESS 0x000040000000ULL /* 1 GiB */ -#define LOW_HEAP_MAX_ADDRESS 0x00007fffffffULL -#define DYNAMIC_STATE_POOL_MIN_ADDRESS 0x0000c0000000ULL /* 3 GiB */ -#define DYNAMIC_STATE_POOL_MAX_ADDRESS 0x0000ffffffffULL -#define BINDING_TABLE_POOL_MIN_ADDRESS 0x000100000000ULL /* 4 GiB */ -#define BINDING_TABLE_POOL_MAX_ADDRESS 0x00013fffffffULL -#define SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB */ -#define SURFACE_STATE_POOL_MAX_ADDRESS 0x00017fffffffULL -#define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */ -#define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL -#define CLIENT_VISIBLE_HEAP_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */ -#define CLIENT_VISIBLE_HEAP_MAX_ADDRESS 0x0009bfffffffULL -#define HIGH_HEAP_MIN_ADDRESS 0x0009c0000000ULL /* 39 GiB */ +#define GENERAL_STATE_POOL_MIN_ADDRESS 0x000000200000ULL /* 2 MiB */ +#define GENERAL_STATE_POOL_MAX_ADDRESS 0x00003fffffffULL +#define LOW_HEAP_MIN_ADDRESS 0x000040000000ULL /* 1 GiB */ +#define LOW_HEAP_MAX_ADDRESS 0x00007fffffffULL +#define DYNAMIC_STATE_POOL_MIN_ADDRESS 0x0000c0000000ULL /* 3 GiB */ +#define DYNAMIC_STATE_POOL_MAX_ADDRESS 0x0000ffffffffULL +#define BINDING_TABLE_POOL_MIN_ADDRESS 0x000100000000ULL /* 4 GiB */ +#define BINDING_TABLE_POOL_MAX_ADDRESS 0x00013fffffffULL +#define INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB */ +#define INTERNAL_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL +#define BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */ +#define BINDLESS_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL +#define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000200000000ULL /* 8 GiB */ +#define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x00023fffffffULL +#define CLIENT_VISIBLE_HEAP_MIN_ADDRESS 0x000240000000ULL /* 9 GiB */ +#define CLIENT_VISIBLE_HEAP_MAX_ADDRESS 0x000a3fffffffULL +#define HIGH_HEAP_MIN_ADDRESS 0x000a40000000ULL /* 41 GiB */ #define GENERAL_STATE_POOL_SIZE \ (GENERAL_STATE_POOL_MAX_ADDRESS - GENERAL_STATE_POOL_MIN_ADDRESS + 1) @@ -174,8 +177,10 @@ struct intel_perf_query_result; #define BINDING_TABLE_POOL_SIZE \ (BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1) #define BINDING_TABLE_POOL_BLOCK_SIZE (65536) -#define SURFACE_STATE_POOL_SIZE \ - (SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1) +#define INTERNAL_SURFACE_STATE_POOL_SIZE \ + (INTERNAL_SURFACE_STATE_POOL_MAX_ADDRESS - INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS + 1) +#define BINDLESS_SURFACE_STATE_POOL_SIZE \ + (BINDLESS_SURFACE_STATE_POOL_MAX_ADDRESS - BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS + 1) #define INSTRUCTION_STATE_POOL_SIZE \ (INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1) #define CLIENT_VISIBLE_HEAP_SIZE \ @@ -1153,7 +1158,8 @@ struct anv_device { struct anv_state_pool dynamic_state_pool; struct anv_state_pool instruction_state_pool; struct anv_state_pool binding_table_pool; - struct anv_state_pool surface_state_pool; + struct anv_state_pool internal_surface_state_pool; + struct anv_state_pool bindless_surface_state_pool; struct anv_state_reserved_pool custom_border_colors; @@ -1255,6 +1261,14 @@ anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) anv_state_pool_free(&device->binding_table_pool, state); } +static inline struct anv_state +anv_bindless_state_for_binding_table(struct anv_state state) +{ + state.offset += BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS - + INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS; + return state; +} + static inline uint32_t anv_mocs(const struct anv_device *device, const struct anv_bo *bo, diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index 3f693eaabec..b45e796d294 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -111,7 +111,7 @@ blorp_get_surface_base_address(struct blorp_batch *batch) { struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; return (struct blorp_address) { - .buffer = cmd_buffer->device->surface_state_pool.block_pool.bo, + .buffer = cmd_buffer->device->internal_surface_state_pool.block_pool.bo, .offset = 0, }; } diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 3c55f7761a8..c47eb877b90 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -193,7 +193,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) sba.DynamicStateBufferSizeModifyEnable = true; sba.InstructionBuffersizeModifyEnable = true; sba.BindlessSurfaceStateBaseAddress = - (struct anv_address) { device->surface_state_pool.block_pool.bo, 0 }; + (struct anv_address) { device->bindless_surface_state_pool.block_pool.bo, 0 }; sba.BindlessSurfaceStateSize = (1 << 20) - 1; sba.BindlessSurfaceStateMOCS = mocs; sba.BindlessSurfaceStateBaseAddressModifyEnable = true; @@ -945,7 +945,7 @@ genX(copy_fast_clear_dwords)(struct anv_cmd_buffer *cmd_buffer, assert(image->vk.aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV); struct anv_address ss_clear_addr = { - .bo = cmd_buffer->device->surface_state_pool.block_pool.bo, + .bo = cmd_buffer->device->internal_surface_state_pool.block_pool.bo, .offset = surface_state.offset + cmd_buffer->device->isl_dev.ss.clear_value_offset, }; @@ -1673,7 +1673,7 @@ genX(CmdExecuteCommands)( * we allocated for them in BeginCommandBuffer. */ struct anv_bo *ss_bo = - primary->device->surface_state_pool.block_pool.bo; + primary->device->internal_surface_state_pool.block_pool.bo; struct anv_state src_state = primary->state.gfx.att_states; struct anv_state dst_state = secondary->state.gfx.att_states; assert(src_state.alloc_size == dst_state.alloc_size); @@ -2435,7 +2435,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, (desc->layout == VK_IMAGE_LAYOUT_GENERAL) ? desc->image_view->planes[binding->plane].general_sampler_surface_state : desc->image_view->planes[binding->plane].optimal_sampler_surface_state; - surface_state = sstate.state; + surface_state = + anv_bindless_state_for_binding_table(sstate.state); assert(surface_state.alloc_size); } else { surface_state = cmd_buffer->device->null_surface_state; @@ -2449,7 +2450,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, binding->lowered_storage_surface ? desc->image_view->planes[binding->plane].lowered_storage_surface_state : desc->image_view->planes[binding->plane].storage_surface_state; - surface_state = sstate.state; + surface_state = + anv_bindless_state_for_binding_table(sstate.state); assert(surface_state.alloc_size); if (surface_state.offset == 0) { mesa_loge("Bound a image to a descriptor where the " @@ -2483,7 +2485,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: if (desc->buffer_view) { - surface_state = desc->buffer_view->surface_state; + surface_state = anv_bindless_state_for_binding_table( + desc->buffer_view->surface_state); assert(surface_state.alloc_size); } else { surface_state = cmd_buffer->device->null_surface_state; @@ -2509,8 +2512,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, struct anv_address address = anv_address_add(desc->buffer->address, offset); - surface_state = - anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); + surface_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer); enum isl_format format = anv_isl_format_for_descriptor_type(cmd_buffer->device, desc->type); @@ -2531,9 +2533,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: if (desc->buffer_view) { - surface_state = binding->lowered_storage_surface + surface_state = anv_bindless_state_for_binding_table( + binding->lowered_storage_surface ? desc->buffer_view->lowered_storage_surface_state - : desc->buffer_view->storage_surface_state; + : desc->buffer_view->storage_surface_state); assert(surface_state.alloc_size); } else { surface_state = cmd_buffer->device->null_surface_state; @@ -2544,6 +2547,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, assert(!"Invalid descriptor type"); continue; } + assert(surface_state.map); bt_map[s] = surface_state.offset + state_offset; break; @@ -2681,7 +2685,6 @@ flush_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer, struct anv_cmd_pipeline_state *state, struct anv_pipeline *pipeline) { - const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev; struct anv_descriptor_set *set = &state->push_descriptor->set; struct anv_descriptor_set_layout *layout = set->layout; @@ -2691,9 +2694,7 @@ flush_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer, struct anv_descriptor *desc = &set->descriptors[desc_idx]; struct anv_buffer_view *bview = desc->set_buffer_view; - bview->surface_state = - anv_state_stream_alloc(&cmd_buffer->surface_state_stream, - isl_dev->ss.size, isl_dev->ss.align); + bview->surface_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer); anv_descriptor_write_surface_state(cmd_buffer->device, desc, bview->surface_state); } @@ -2704,9 +2705,7 @@ flush_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer, anv_isl_format_for_descriptor_type(cmd_buffer->device, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); - set->desc_surface_state = - anv_state_stream_alloc(&cmd_buffer->surface_state_stream, - isl_dev->ss.size, isl_dev->ss.align); + set->desc_surface_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer); anv_fill_buffer_surface_state(cmd_buffer->device, set->desc_surface_state, format, ISL_SWIZZLE_IDENTITY, diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index cfb3bc89d1a..bd0daf58698 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -195,7 +195,7 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch) sba.StatelessDataPortAccessMOCS = mocs; sba.SurfaceStateBaseAddress = - (struct anv_address) { .offset = SURFACE_STATE_POOL_MIN_ADDRESS }; + (struct anv_address) { .offset = INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS }; sba.SurfaceStateMOCS = mocs; sba.SurfaceStateBaseAddressModifyEnable = true; @@ -220,7 +220,7 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch) sba.InstructionBuffersizeModifyEnable = true; sba.BindlessSurfaceStateBaseAddress = - (struct anv_address) { .offset = SURFACE_STATE_POOL_MIN_ADDRESS }; + (struct anv_address) { .offset = BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS }; sba.BindlessSurfaceStateSize = (1 << 20) - 1; sba.BindlessSurfaceStateMOCS = mocs; sba.BindlessSurfaceStateBaseAddressModifyEnable = true;