vulkan/wsi/x11: Ensure we create at least minImageCount images.

Doom Eternal happily creates a swapchain with 2 images for IMMEDIATE...

This fixes a 10% performance issues with Doom Eternal for me.

Since the game only sets a minImageCount increasing till our own minimum
is totally okay.

CC: <stable@lists.freedesktop.org>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2684
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3156
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4368>
This commit is contained in:
Bas Nieuwenhuizen 2020-03-28 22:43:32 +01:00 committed by Marge Bot
parent 7b3976d3f8
commit 5f97dfc4c8

View file

@ -450,6 +450,33 @@ x11_surface_get_support(VkIcdSurfaceBase *icd_surface,
return VK_SUCCESS;
}
static uint32_t
x11_get_min_image_count(struct wsi_device *wsi_device)
{
if (wsi_device->x11.override_minImageCount)
return wsi_device->x11.override_minImageCount;
/* For IMMEDIATE and FIFO, most games work in a pipelined manner where the
* can produce frames at a rate of 1/MAX(CPU duration, GPU duration), but
* the render latency is CPU duration + GPU duration.
*
* This means that with scanout from pageflipping we need 3 frames to run
* full speed:
* 1) CPU rendering work
* 2) GPU rendering work
* 3) scanout
*
* Once we have a nonblocking acquire that returns a semaphore we can merge
* 1 and 3. Hence the ideal implementation needs only 2 images, but games
* cannot tellwe currently do not have an ideal implementation and that
* hence they need to allocate 3 images. So let us do it for them.
*
* This is a tradeoff as it uses more memory than needed for non-fullscreen
* and non-performance intensive applications.
*/
return 3;
}
static VkResult
x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface,
struct wsi_device *wsi_device,
@ -502,31 +529,10 @@ x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface,
VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
}
/* For IMMEDIATE and FIFO, most games work in a pipelined manner where the
* can produce frames at a rate of 1/MAX(CPU duration, GPU duration), but
* the render latency is CPU duration + GPU duration.
*
* This means that with scanout from pageflipping we need 3 frames to run
* full speed:
* 1) CPU rendering work
* 2) GPU rendering work
* 3) scanout
*
* Once we have a nonblocking acquire that returns a semaphore we can merge
* 1 and 3. Hence the ideal implementation needs only 2 images, but games
* cannot tellwe currently do not have an ideal implementation and that
* hence they need to allocate 3 images. So let us do it for them.
*
* This is a tradeoff as it uses more memory than needed for non-fullscreen
* and non-performance intensive applications.
*/
caps->minImageCount = 3;
caps->minImageCount = x11_get_min_image_count(wsi_device);
/* There is no real maximum */
caps->maxImageCount = 0;
if (wsi_device->x11.override_minImageCount)
caps->minImageCount = wsi_device->x11.override_minImageCount;
caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
caps->maxImageArrayLayers = 1;
@ -1436,10 +1442,12 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR);
unsigned num_images = pCreateInfo->minImageCount;
if (wsi_device->x11.strict_imageCount)
num_images = pCreateInfo->minImageCount;
else if (present_mode == VK_PRESENT_MODE_MAILBOX_KHR)
num_images = MAX2(num_images, 5);
if (!wsi_device->x11.strict_imageCount) {
if (present_mode == VK_PRESENT_MODE_MAILBOX_KHR)
num_images = MAX2(num_images, 5);
num_images = MAX2(num_images, x11_get_min_image_count(wsi_device));
}
xcb_connection_t *conn = x11_surface_get_connection(icd_surface);
struct wsi_x11_connection *wsi_conn =