zink: make (some) vk allocation commands more robust against vram depletion

as has recently been exposed by ci, there are some cases where running
lots of tests simultaneously can temporarily result in depleted vram,
which torpedos everything

as this scenario is transient (vram will very soon become available again),
it makes more sense to add some retries at fixed intervals to try soldiering
onward instead of exploding and probably blocking a merge

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25938>
This commit is contained in:
Mike Blumenkrantz 2023-10-25 11:41:02 -04:00 committed by Marge Bot
parent c4283e32e3
commit d2abb4f975
3 changed files with 141 additions and 88 deletions

View file

@ -334,16 +334,22 @@ create_batch_state(struct zink_context *ctx)
VkCommandPoolCreateInfo cpci = {0};
cpci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
cpci.queueFamilyIndex = screen->gfx_queue;
VkResult result = VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->cmdpool);
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkCreateCommandPool failed (%s)", vk_Result_to_str(result));
goto fail;
}
result = VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->unsynchronized_cmdpool);
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkCreateCommandPool failed (%s)", vk_Result_to_str(result));
goto fail;
}
VkResult result;
VRAM_ALLOC_LOOP(result,
VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->cmdpool),
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkCreateCommandPool failed (%s)", vk_Result_to_str(result));
goto fail;
}
);
VRAM_ALLOC_LOOP(result,
VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->unsynchronized_cmdpool),
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkCreateCommandPool failed (%s)", vk_Result_to_str(result));
goto fail;
}
);
VkCommandBuffer cmdbufs[2];
VkCommandBufferAllocateInfo cbai = {0};
@ -352,21 +358,26 @@ create_batch_state(struct zink_context *ctx)
cbai.commandPool = bs->cmdpool;
cbai.commandBufferCount = 2;
result = VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, cmdbufs);
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", vk_Result_to_str(result));
goto fail;
}
VRAM_ALLOC_LOOP(result,
VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, cmdbufs),
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", vk_Result_to_str(result));
goto fail;
}
);
bs->cmdbuf = cmdbufs[0];
bs->reordered_cmdbuf = cmdbufs[1];
cbai.commandPool = bs->unsynchronized_cmdpool;
cbai.commandBufferCount = 1;
result = VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, &bs->unsynchronized_cmdbuf);
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", vk_Result_to_str(result));
goto fail;
}
VRAM_ALLOC_LOOP(result,
VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, &bs->unsynchronized_cmdbuf);,
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", vk_Result_to_str(result));
goto fail;
}
);
#define SET_CREATE_OR_FAIL(ptr) \
if (!_mesa_set_init(ptr, bs, _mesa_hash_pointer, _mesa_key_pointer_equal)) \
@ -512,17 +523,22 @@ zink_start_batch(struct zink_context *ctx, struct zink_batch *batch)
cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
cbbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
VkResult result = VKCTX(BeginCommandBuffer)(batch->state->cmdbuf, &cbbi);
if (result != VK_SUCCESS)
mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result));
result = VKCTX(BeginCommandBuffer)(batch->state->reordered_cmdbuf, &cbbi);
if (result != VK_SUCCESS)
mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result));
result = VKCTX(BeginCommandBuffer)(batch->state->unsynchronized_cmdbuf, &cbbi);
if (result != VK_SUCCESS)
mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result));
VkResult result;
VRAM_ALLOC_LOOP(result,
VKCTX(BeginCommandBuffer)(batch->state->cmdbuf, &cbbi),
if (result != VK_SUCCESS)
mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result));
);
VRAM_ALLOC_LOOP(result,
VKCTX(BeginCommandBuffer)(batch->state->reordered_cmdbuf, &cbbi),
if (result != VK_SUCCESS)
mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result));
);
VRAM_ALLOC_LOOP(result,
VKCTX(BeginCommandBuffer)(batch->state->unsynchronized_cmdbuf, &cbbi),
if (result != VK_SUCCESS)
mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result));
);
batch->state->fence.completed = false;
if (ctx->last_fence) {
@ -673,12 +689,15 @@ submit_queue(void *data, void *gdata, int thread_index)
tsi.signalSemaphoreValueCount = si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount;
VkResult result = VKSCR(EndCommandBuffer)(bs->cmdbuf);
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result));
bs->is_device_lost = true;
goto end;
}
VkResult result;
VRAM_ALLOC_LOOP(result,
VKSCR(EndCommandBuffer)(bs->cmdbuf),
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result));
bs->is_device_lost = true;
goto end;
}
);
if (bs->has_barriers) {
if (bs->unordered_write_access) {
VkMemoryBarrier mb;
@ -690,31 +709,37 @@ submit_queue(void *data, void *gdata, int thread_index)
bs->unordered_write_stages, 0,
0, 1, &mb, 0, NULL, 0, NULL);
}
result = VKSCR(EndCommandBuffer)(bs->reordered_cmdbuf);
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result));
bs->is_device_lost = true;
goto end;
}
VRAM_ALLOC_LOOP(result,
VKSCR(EndCommandBuffer)(bs->reordered_cmdbuf),
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result));
bs->is_device_lost = true;
goto end;
}
);
}
if (bs->has_unsync) {
result = VKSCR(EndCommandBuffer)(bs->unsynchronized_cmdbuf);
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result));
bs->is_device_lost = true;
goto end;
}
VRAM_ALLOC_LOOP(result,
VKSCR(EndCommandBuffer)(bs->unsynchronized_cmdbuf),
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result));
bs->is_device_lost = true;
goto end;
}
);
}
if (!si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount)
num_si--;
simple_mtx_lock(&screen->queue_lock);
result = VKSCR(QueueSubmit)(screen->queue, num_si, submit, VK_NULL_HANDLE);
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkQueueSubmit failed (%s)", vk_Result_to_str(result));
bs->is_device_lost = true;
}
VRAM_ALLOC_LOOP(result,
VKSCR(QueueSubmit)(screen->queue, num_si, submit, VK_NULL_HANDLE),
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkQueueSubmit failed (%s)", vk_Result_to_str(result));
bs->is_device_lost = true;
}
);
simple_mtx_unlock(&screen->queue_lock);
unsigned i = 0;

View file

@ -430,13 +430,15 @@ zink_create_gfx_pipeline(struct zink_screen *screen,
VkPipeline pipeline;
u_rwlock_wrlock(&prog->base.pipeline_cache_lock);
VkResult result = VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache,
1, &pci, NULL, &pipeline);
u_rwlock_wrunlock(&prog->base.pipeline_cache_lock);
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result));
return VK_NULL_HANDLE;
}
VkResult result;
VRAM_ALLOC_LOOP(result,
VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache, 1, &pci, NULL, &pipeline),
u_rwlock_wrunlock(&prog->base.pipeline_cache_lock);
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result));
return VK_NULL_HANDLE;
}
);
return pipeline;
}
@ -498,14 +500,16 @@ zink_create_compute_pipeline(struct zink_screen *screen, struct zink_compute_pro
pci.stage = stage;
VkPipeline pipeline;
VkResult result;
u_rwlock_wrlock(&comp->base.pipeline_cache_lock);
VkResult result = VKSCR(CreateComputePipelines)(screen->dev, comp->base.pipeline_cache,
1, &pci, NULL, &pipeline);
u_rwlock_wrunlock(&comp->base.pipeline_cache_lock);
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkCreateComputePipelines failed (%s)", vk_Result_to_str(result));
return VK_NULL_HANDLE;
}
VRAM_ALLOC_LOOP(result,
VKSCR(CreateComputePipelines)(screen->dev, comp->base.pipeline_cache, 1, &pci, NULL, &pipeline),
u_rwlock_wrunlock(&comp->base.pipeline_cache_lock);
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkCreateComputePipelines failed (%s)", vk_Result_to_str(result));
return VK_NULL_HANDLE;
}
);
return pipeline;
}
@ -618,11 +622,14 @@ zink_create_gfx_pipeline_output(struct zink_screen *screen, struct zink_gfx_pipe
pci.pDynamicState = &pipelineDynamicStateCreateInfo;
VkPipeline pipeline;
if (VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci,
NULL, &pipeline) != VK_SUCCESS) {
mesa_loge("ZINK: vkCreateGraphicsPipelines failed");
return VK_NULL_HANDLE;
}
VkResult result;
VRAM_ALLOC_LOOP(result,
VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci, NULL, &pipeline),
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result));
return VK_NULL_HANDLE;
}
);
return pipeline;
}
@ -696,11 +703,14 @@ zink_create_gfx_pipeline_input(struct zink_screen *screen,
pci.pDynamicState = &pipelineDynamicStateCreateInfo;
VkPipeline pipeline;
if (VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci,
NULL, &pipeline) != VK_SUCCESS) {
mesa_loge("ZINK: vkCreateGraphicsPipelines failed");
return VK_NULL_HANDLE;
}
VkResult result;
VRAM_ALLOC_LOOP(result,
VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci, NULL, &pipeline),
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result));
return VK_NULL_HANDLE;
}
);
return pipeline;
}
@ -831,10 +841,14 @@ create_gfx_pipeline_library(struct zink_screen *screen, struct zink_shader_objec
pci.flags |= VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT;
VkPipeline pipeline;
if (VKSCR(CreateGraphicsPipelines)(screen->dev, pipeline_cache, 1, &pci, NULL, &pipeline) != VK_SUCCESS) {
mesa_loge("ZINK: vkCreateGraphicsPipelines failed");
return VK_NULL_HANDLE;
}
VkResult result;
VRAM_ALLOC_LOOP(result,
VKSCR(CreateGraphicsPipelines)(screen->dev, pipeline_cache, 1, &pci, NULL, &pipeline),
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkCreateGraphicsPipelines failed");
return VK_NULL_HANDLE;
}
);
return pipeline;
}
@ -886,13 +900,15 @@ zink_create_gfx_pipeline_combined(struct zink_screen *screen, struct zink_gfx_pr
VkPipeline pipeline;
u_rwlock_wrlock(&prog->base.pipeline_cache_lock);
VkResult result = VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache, 1, &pci, NULL, &pipeline);
if (result != VK_SUCCESS && result != VK_PIPELINE_COMPILE_REQUIRED_EXT) {
mesa_loge("ZINK: vkCreateGraphicsPipelines failed");
VkResult result;
VRAM_ALLOC_LOOP(result,
VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache, 1, &pci, NULL, &pipeline),
u_rwlock_wrunlock(&prog->base.pipeline_cache_lock);
return VK_NULL_HANDLE;
}
u_rwlock_wrunlock(&prog->base.pipeline_cache_lock);
if (result != VK_SUCCESS && result != VK_PIPELINE_COMPILE_REQUIRED_EXT) {
mesa_loge("ZINK: vkCreateGraphicsPipelines failed");
return VK_NULL_HANDLE;
}
);
return pipeline;
}

View file

@ -114,6 +114,18 @@ zink_string_vkflags_unroll(char *buf, size_t bufsize, uint64_t flags, zink_vkfla
return idx;
}
#define VRAM_ALLOC_LOOP(RET, DOIT, ...) \
do { \
unsigned _us[] = {0, 1000, 10000, 500000, 1000000}; \
for (unsigned _i = 0; _i < ARRAY_SIZE(_us); _i++) { \
RET = DOIT; \
if (RET == VK_SUCCESS || RET != VK_ERROR_OUT_OF_DEVICE_MEMORY) \
break; \
os_time_sleep(_us[_i]); \
} \
__VA_ARGS__ \
} while (0)
VkSemaphore
zink_create_semaphore(struct zink_screen *screen);