zink: make (some) vk allocation commands more robust against vram depletion

as has recently been exposed by ci, there are some cases where running
lots of tests simultaneously can temporarily result in depleted vram,
which torpedos everything

as this scenario is transient (vram will very soon become available again),
it makes more sense to add some retries at fixed intervals to try soldiering
onward instead of exploding and probably blocking a merge

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25938>
This commit is contained in:
Mike Blumenkrantz 2023-10-25 11:41:02 -04:00 committed by Marge Bot
parent c4283e32e3
commit d2abb4f975
3 changed files with 141 additions and 88 deletions

View file

@ -334,16 +334,22 @@ create_batch_state(struct zink_context *ctx)
VkCommandPoolCreateInfo cpci = {0}; VkCommandPoolCreateInfo cpci = {0};
cpci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; cpci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
cpci.queueFamilyIndex = screen->gfx_queue; cpci.queueFamilyIndex = screen->gfx_queue;
VkResult result = VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->cmdpool); VkResult result;
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkCreateCommandPool failed (%s)", vk_Result_to_str(result)); VRAM_ALLOC_LOOP(result,
goto fail; VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->cmdpool),
} if (result != VK_SUCCESS) {
result = VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->unsynchronized_cmdpool); mesa_loge("ZINK: vkCreateCommandPool failed (%s)", vk_Result_to_str(result));
if (result != VK_SUCCESS) { goto fail;
mesa_loge("ZINK: vkCreateCommandPool failed (%s)", vk_Result_to_str(result)); }
goto fail; );
} VRAM_ALLOC_LOOP(result,
VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->unsynchronized_cmdpool),
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkCreateCommandPool failed (%s)", vk_Result_to_str(result));
goto fail;
}
);
VkCommandBuffer cmdbufs[2]; VkCommandBuffer cmdbufs[2];
VkCommandBufferAllocateInfo cbai = {0}; VkCommandBufferAllocateInfo cbai = {0};
@ -352,21 +358,26 @@ create_batch_state(struct zink_context *ctx)
cbai.commandPool = bs->cmdpool; cbai.commandPool = bs->cmdpool;
cbai.commandBufferCount = 2; cbai.commandBufferCount = 2;
result = VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, cmdbufs); VRAM_ALLOC_LOOP(result,
if (result != VK_SUCCESS) { VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, cmdbufs),
mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", vk_Result_to_str(result)); if (result != VK_SUCCESS) {
goto fail; mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", vk_Result_to_str(result));
} goto fail;
}
);
bs->cmdbuf = cmdbufs[0]; bs->cmdbuf = cmdbufs[0];
bs->reordered_cmdbuf = cmdbufs[1]; bs->reordered_cmdbuf = cmdbufs[1];
cbai.commandPool = bs->unsynchronized_cmdpool; cbai.commandPool = bs->unsynchronized_cmdpool;
cbai.commandBufferCount = 1; cbai.commandBufferCount = 1;
result = VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, &bs->unsynchronized_cmdbuf); VRAM_ALLOC_LOOP(result,
if (result != VK_SUCCESS) { VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, &bs->unsynchronized_cmdbuf);,
mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", vk_Result_to_str(result)); if (result != VK_SUCCESS) {
goto fail; mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", vk_Result_to_str(result));
} goto fail;
}
);
#define SET_CREATE_OR_FAIL(ptr) \ #define SET_CREATE_OR_FAIL(ptr) \
if (!_mesa_set_init(ptr, bs, _mesa_hash_pointer, _mesa_key_pointer_equal)) \ if (!_mesa_set_init(ptr, bs, _mesa_hash_pointer, _mesa_key_pointer_equal)) \
@ -512,17 +523,22 @@ zink_start_batch(struct zink_context *ctx, struct zink_batch *batch)
cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
cbbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; cbbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
VkResult result = VKCTX(BeginCommandBuffer)(batch->state->cmdbuf, &cbbi); VkResult result;
if (result != VK_SUCCESS) VRAM_ALLOC_LOOP(result,
mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result)); VKCTX(BeginCommandBuffer)(batch->state->cmdbuf, &cbbi),
if (result != VK_SUCCESS)
result = VKCTX(BeginCommandBuffer)(batch->state->reordered_cmdbuf, &cbbi); mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result));
if (result != VK_SUCCESS) );
mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result)); VRAM_ALLOC_LOOP(result,
VKCTX(BeginCommandBuffer)(batch->state->reordered_cmdbuf, &cbbi),
result = VKCTX(BeginCommandBuffer)(batch->state->unsynchronized_cmdbuf, &cbbi); if (result != VK_SUCCESS)
if (result != VK_SUCCESS) mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result));
mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result)); );
VRAM_ALLOC_LOOP(result,
VKCTX(BeginCommandBuffer)(batch->state->unsynchronized_cmdbuf, &cbbi),
if (result != VK_SUCCESS)
mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result));
);
batch->state->fence.completed = false; batch->state->fence.completed = false;
if (ctx->last_fence) { if (ctx->last_fence) {
@ -673,12 +689,15 @@ submit_queue(void *data, void *gdata, int thread_index)
tsi.signalSemaphoreValueCount = si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount; tsi.signalSemaphoreValueCount = si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount;
VkResult result = VKSCR(EndCommandBuffer)(bs->cmdbuf); VkResult result;
if (result != VK_SUCCESS) { VRAM_ALLOC_LOOP(result,
mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result)); VKSCR(EndCommandBuffer)(bs->cmdbuf),
bs->is_device_lost = true; if (result != VK_SUCCESS) {
goto end; mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result));
} bs->is_device_lost = true;
goto end;
}
);
if (bs->has_barriers) { if (bs->has_barriers) {
if (bs->unordered_write_access) { if (bs->unordered_write_access) {
VkMemoryBarrier mb; VkMemoryBarrier mb;
@ -690,31 +709,37 @@ submit_queue(void *data, void *gdata, int thread_index)
bs->unordered_write_stages, 0, bs->unordered_write_stages, 0,
0, 1, &mb, 0, NULL, 0, NULL); 0, 1, &mb, 0, NULL, 0, NULL);
} }
result = VKSCR(EndCommandBuffer)(bs->reordered_cmdbuf); VRAM_ALLOC_LOOP(result,
if (result != VK_SUCCESS) { VKSCR(EndCommandBuffer)(bs->reordered_cmdbuf),
mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result)); if (result != VK_SUCCESS) {
bs->is_device_lost = true; mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result));
goto end; bs->is_device_lost = true;
} goto end;
}
);
} }
if (bs->has_unsync) { if (bs->has_unsync) {
result = VKSCR(EndCommandBuffer)(bs->unsynchronized_cmdbuf); VRAM_ALLOC_LOOP(result,
if (result != VK_SUCCESS) { VKSCR(EndCommandBuffer)(bs->unsynchronized_cmdbuf),
mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result)); if (result != VK_SUCCESS) {
bs->is_device_lost = true; mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result));
goto end; bs->is_device_lost = true;
} goto end;
}
);
} }
if (!si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount) if (!si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount)
num_si--; num_si--;
simple_mtx_lock(&screen->queue_lock); simple_mtx_lock(&screen->queue_lock);
result = VKSCR(QueueSubmit)(screen->queue, num_si, submit, VK_NULL_HANDLE); VRAM_ALLOC_LOOP(result,
if (result != VK_SUCCESS) { VKSCR(QueueSubmit)(screen->queue, num_si, submit, VK_NULL_HANDLE),
mesa_loge("ZINK: vkQueueSubmit failed (%s)", vk_Result_to_str(result)); if (result != VK_SUCCESS) {
bs->is_device_lost = true; mesa_loge("ZINK: vkQueueSubmit failed (%s)", vk_Result_to_str(result));
} bs->is_device_lost = true;
}
);
simple_mtx_unlock(&screen->queue_lock); simple_mtx_unlock(&screen->queue_lock);
unsigned i = 0; unsigned i = 0;

View file

@ -430,13 +430,15 @@ zink_create_gfx_pipeline(struct zink_screen *screen,
VkPipeline pipeline; VkPipeline pipeline;
u_rwlock_wrlock(&prog->base.pipeline_cache_lock); u_rwlock_wrlock(&prog->base.pipeline_cache_lock);
VkResult result = VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache, VkResult result;
1, &pci, NULL, &pipeline); VRAM_ALLOC_LOOP(result,
u_rwlock_wrunlock(&prog->base.pipeline_cache_lock); VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache, 1, &pci, NULL, &pipeline),
if (result != VK_SUCCESS) { u_rwlock_wrunlock(&prog->base.pipeline_cache_lock);
mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result)); if (result != VK_SUCCESS) {
return VK_NULL_HANDLE; mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result));
} return VK_NULL_HANDLE;
}
);
return pipeline; return pipeline;
} }
@ -498,14 +500,16 @@ zink_create_compute_pipeline(struct zink_screen *screen, struct zink_compute_pro
pci.stage = stage; pci.stage = stage;
VkPipeline pipeline; VkPipeline pipeline;
VkResult result;
u_rwlock_wrlock(&comp->base.pipeline_cache_lock); u_rwlock_wrlock(&comp->base.pipeline_cache_lock);
VkResult result = VKSCR(CreateComputePipelines)(screen->dev, comp->base.pipeline_cache, VRAM_ALLOC_LOOP(result,
1, &pci, NULL, &pipeline); VKSCR(CreateComputePipelines)(screen->dev, comp->base.pipeline_cache, 1, &pci, NULL, &pipeline),
u_rwlock_wrunlock(&comp->base.pipeline_cache_lock); u_rwlock_wrunlock(&comp->base.pipeline_cache_lock);
if (result != VK_SUCCESS) { if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkCreateComputePipelines failed (%s)", vk_Result_to_str(result)); mesa_loge("ZINK: vkCreateComputePipelines failed (%s)", vk_Result_to_str(result));
return VK_NULL_HANDLE; return VK_NULL_HANDLE;
} }
);
return pipeline; return pipeline;
} }
@ -618,11 +622,14 @@ zink_create_gfx_pipeline_output(struct zink_screen *screen, struct zink_gfx_pipe
pci.pDynamicState = &pipelineDynamicStateCreateInfo; pci.pDynamicState = &pipelineDynamicStateCreateInfo;
VkPipeline pipeline; VkPipeline pipeline;
if (VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci, VkResult result;
NULL, &pipeline) != VK_SUCCESS) { VRAM_ALLOC_LOOP(result,
mesa_loge("ZINK: vkCreateGraphicsPipelines failed"); VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci, NULL, &pipeline),
return VK_NULL_HANDLE; if (result != VK_SUCCESS) {
} mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result));
return VK_NULL_HANDLE;
}
);
return pipeline; return pipeline;
} }
@ -696,11 +703,14 @@ zink_create_gfx_pipeline_input(struct zink_screen *screen,
pci.pDynamicState = &pipelineDynamicStateCreateInfo; pci.pDynamicState = &pipelineDynamicStateCreateInfo;
VkPipeline pipeline; VkPipeline pipeline;
if (VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci, VkResult result;
NULL, &pipeline) != VK_SUCCESS) { VRAM_ALLOC_LOOP(result,
mesa_loge("ZINK: vkCreateGraphicsPipelines failed"); VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci, NULL, &pipeline),
return VK_NULL_HANDLE; if (result != VK_SUCCESS) {
} mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result));
return VK_NULL_HANDLE;
}
);
return pipeline; return pipeline;
} }
@ -831,10 +841,14 @@ create_gfx_pipeline_library(struct zink_screen *screen, struct zink_shader_objec
pci.flags |= VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT; pci.flags |= VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT;
VkPipeline pipeline; VkPipeline pipeline;
if (VKSCR(CreateGraphicsPipelines)(screen->dev, pipeline_cache, 1, &pci, NULL, &pipeline) != VK_SUCCESS) { VkResult result;
mesa_loge("ZINK: vkCreateGraphicsPipelines failed"); VRAM_ALLOC_LOOP(result,
return VK_NULL_HANDLE; VKSCR(CreateGraphicsPipelines)(screen->dev, pipeline_cache, 1, &pci, NULL, &pipeline),
} if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkCreateGraphicsPipelines failed");
return VK_NULL_HANDLE;
}
);
return pipeline; return pipeline;
} }
@ -886,13 +900,15 @@ zink_create_gfx_pipeline_combined(struct zink_screen *screen, struct zink_gfx_pr
VkPipeline pipeline; VkPipeline pipeline;
u_rwlock_wrlock(&prog->base.pipeline_cache_lock); u_rwlock_wrlock(&prog->base.pipeline_cache_lock);
VkResult result = VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache, 1, &pci, NULL, &pipeline); VkResult result;
if (result != VK_SUCCESS && result != VK_PIPELINE_COMPILE_REQUIRED_EXT) { VRAM_ALLOC_LOOP(result,
mesa_loge("ZINK: vkCreateGraphicsPipelines failed"); VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache, 1, &pci, NULL, &pipeline),
u_rwlock_wrunlock(&prog->base.pipeline_cache_lock); u_rwlock_wrunlock(&prog->base.pipeline_cache_lock);
return VK_NULL_HANDLE; if (result != VK_SUCCESS && result != VK_PIPELINE_COMPILE_REQUIRED_EXT) {
} mesa_loge("ZINK: vkCreateGraphicsPipelines failed");
u_rwlock_wrunlock(&prog->base.pipeline_cache_lock); return VK_NULL_HANDLE;
}
);
return pipeline; return pipeline;
} }

View file

@ -114,6 +114,18 @@ zink_string_vkflags_unroll(char *buf, size_t bufsize, uint64_t flags, zink_vkfla
return idx; return idx;
} }
#define VRAM_ALLOC_LOOP(RET, DOIT, ...) \
do { \
unsigned _us[] = {0, 1000, 10000, 500000, 1000000}; \
for (unsigned _i = 0; _i < ARRAY_SIZE(_us); _i++) { \
RET = DOIT; \
if (RET == VK_SUCCESS || RET != VK_ERROR_OUT_OF_DEVICE_MEMORY) \
break; \
os_time_sleep(_us[_i]); \
} \
__VA_ARGS__ \
} while (0)
VkSemaphore VkSemaphore
zink_create_semaphore(struct zink_screen *screen); zink_create_semaphore(struct zink_screen *screen);