From 0603a354f3cf8c1c42b6c40b6022dd27bddc2b74 Mon Sep 17 00:00:00 2001 From: Sjoerd Siebinga Date: Tue, 14 Apr 2026 15:14:31 +0200 Subject: [PATCH] render: replace fatal aborts on GPU reset with EGL context recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hyprland currently triggers RASSERT (SIGABRT) on three GPU-reset signals: glGetGraphicsResetStatus() reporting a reset in begin() / beginSimple(), glGetError() returning GL_CONTEXT_LOST at end(), and renderTextureInternal() receiving an invalid texture. The original abort message acknowledged this as a gap: "Cannot continue until proper GPU reset handling is implemented." On recent Intel (Xe driver on Meteor Lake/Arc graphics), TLB invalidation timeouts cause the kernel to issue recoverable GPU resets and re-arm the rendering engines without losing the EGL display. The compositor is expected to re-establish its context and carry on. The current RASSERT takes down the entire user session on what is, from the kernel's perspective, a handled fault — and these resets can cluster during suspend/resume or under memory pressure. This patch adds a minimal recovery mechanism: - attemptContextReset() unbinds and rebinds the EGL context via eglMakeCurrent(EGL_NO_CONTEXT) then eglMakeCurrent(m_eglContext), and marks shaders for reinitialization on the next frame. - begin() and beginSimple() check a per-monitor m_gpuResetCooldown counter first; if > 0, they skip the frame and decrement. Otherwise, if glGetGraphicsResetStatus reports a reset, they log the reason, call attemptContextReset(), and either skip one frame (on success) or set a 60-frame cooldown (on failure) to avoid tight-loop recovery attempts. pMonitor is reset so the skipped frame doesn't hold a dangling reference. - end() no longer aborts on GL_CONTEXT_LOST; it logs and sets the cooldown, letting the next begin() drive recovery. - renderTextureInternal() replaces the RASSERT-on-invalid-texture with a logged skip. Post-reset textures are often invalid until reuploaded; skipping the draw for one frame is preferable to SIGABRT. The existing RASSERT(pMonitor, "...without begin()!") is kept — that's a programmer error, not a recoverable state. Tested on a Framework 13 / Intel Xe setup that was previously crashing on GPU resets; with this patch the compositor logs the reset, skips a handful of frames, and resumes rendering normally. Combined with the prior two commits (surface and blur null-guards), this covers the follow-on nulls that surface during the recovery window. --- src/render/OpenGL.cpp | 59 ++++++++++++++++++++++++++++++++++++++----- src/render/OpenGL.hpp | 2 ++ 2 files changed, 55 insertions(+), 6 deletions(-) diff --git a/src/render/OpenGL.cpp b/src/render/OpenGL.cpp index 87d742d6a..2b57ba9f4 100644 --- a/src/render/OpenGL.cpp +++ b/src/render/OpenGL.cpp @@ -663,9 +663,28 @@ EGLImageKHR CHyprOpenGLImpl::createEGLImage(const Aquamarine::SDMABUFAttrs& attr return image; } +bool CHyprOpenGLImpl::attemptContextReset() { + Log::logger->log(Log::ERR, "GPU reset: attempting EGL context recovery..."); + eglMakeCurrent(m_eglDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); + if (eglMakeCurrent(m_eglDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, m_eglContext) != EGL_TRUE) { + Log::logger->log(Log::ERR, "GPU reset: eglMakeCurrent failed, context may be lost"); + return false; + } + m_shadersInitialized = false; + Log::logger->log(Log::WARN, "GPU reset: EGL context re-established, shaders will reinitialize on next frame."); + return true; +} + void CHyprOpenGLImpl::beginSimple(PHLMONITOR pMonitor, const CRegion& damage, SP rb, SP fb) { g_pHyprRenderer->m_renderData.pMonitor = pMonitor; + if (m_gpuResetCooldown > 0) { + m_gpuResetCooldown--; + Log::logger->log(Log::WARN, "GPU reset recovery cooldown, skipping frame ({} remaining)", m_gpuResetCooldown); + g_pHyprRenderer->m_renderData.pMonitor.reset(); + return; + } + const GLenum RESETSTATUS = glGetGraphicsResetStatus(); if (RESETSTATUS != GL_NO_ERROR) { std::string errStr = ""; @@ -675,7 +694,14 @@ void CHyprOpenGLImpl::beginSimple(PHLMONITOR pMonitor, const CRegion& damage, SP case GL_UNKNOWN_CONTEXT_RESET: errStr = "GL_UNKNOWN_CONTEXT_RESET"; break; default: errStr = "UNKNOWN??"; break; } - RASSERT(false, "Aborting, glGetGraphicsResetStatus returned {}. Cannot continue until proper GPU reset handling is implemented.", errStr); + Log::logger->log(Log::ERR, "GPU reset detected in beginSimple: {}. Attempting EGL context recovery.", errStr); + if (!attemptContextReset()) { + Log::logger->log(Log::ERR, "GPU reset recovery failed. Skipping frames for cooldown."); + m_gpuResetCooldown = 60; + } else { + Log::logger->log(Log::WARN, "GPU reset recovery succeeded. Skipping current frame to reinitialize."); + } + g_pHyprRenderer->m_renderData.pMonitor.reset(); return; } @@ -714,6 +740,13 @@ void CHyprOpenGLImpl::makeEGLCurrent() { void CHyprOpenGLImpl::begin(PHLMONITOR pMonitor, const CRegion& damage_, SP fb, std::optional finalDamage) { g_pHyprRenderer->m_renderData.pMonitor = pMonitor; + if (m_gpuResetCooldown > 0) { + m_gpuResetCooldown--; + Log::logger->log(Log::WARN, "GPU reset recovery cooldown, skipping frame ({} remaining)", m_gpuResetCooldown); + g_pHyprRenderer->m_renderData.pMonitor.reset(); + return; + } + const GLenum RESETSTATUS = glGetGraphicsResetStatus(); if (RESETSTATUS != GL_NO_ERROR) { std::string errStr = ""; @@ -723,7 +756,14 @@ void CHyprOpenGLImpl::begin(PHLMONITOR pMonitor, const CRegion& damage_, SPlog(Log::ERR, "GPU reset detected in begin: {}. Attempting EGL context recovery.", errStr); + if (!attemptContextReset()) { + Log::logger->log(Log::ERR, "GPU reset recovery failed. Skipping frames for cooldown."); + m_gpuResetCooldown = 60; + } else { + Log::logger->log(Log::WARN, "GPU reset recovery succeeded. Skipping current frame to reinitialize."); + } + g_pHyprRenderer->m_renderData.pMonitor.reset(); return; } @@ -856,8 +896,10 @@ void CHyprOpenGLImpl::end() { // check for gl errors const GLenum ERR = glGetError(); - if UNLIKELY (ERR == GL_CONTEXT_LOST) /* We don't have infra to recover from this */ - RASSERT(false, "glGetError at Opengl::end() returned GL_CONTEXT_LOST. Cannot continue until proper GPU reset handling is implemented."); + if UNLIKELY (ERR == GL_CONTEXT_LOST) { + Log::logger->log(Log::ERR, "glGetError at Opengl::end() returned GL_CONTEXT_LOST. Recovery will trigger on next begin()."); + m_gpuResetCooldown = 60; + } } } @@ -1443,8 +1485,13 @@ WP CHyprOpenGLImpl::renderToFBInternal(SP tex, const STexture void CHyprOpenGLImpl::renderTextureInternal(SP tex, const CBox& box, const STextureRenderData& data) { RASSERT(g_pHyprRenderer->m_renderData.pMonitor, "Tried to render texture without begin()!"); - RASSERT(tex, "Attempted to draw nullptr texture!"); - RASSERT(tex->ok(), "Attempted to draw invalid texture!"); + + if UNLIKELY (!tex || !tex->ok()) { + // After a GPU reset, textures become invalid. Skip the draw + // instead of aborting — recovery will happen on the next begin(). + Log::logger->log(Log::ERR, "renderTextureInternal: invalid texture (likely GPU reset). Skipping draw."); + return; + } TRACY_GPU_ZONE("RenderTextureInternalWithDamage"); diff --git a/src/render/OpenGL.hpp b/src/render/OpenGL.hpp index 78518ffe7..c74cb5a24 100644 --- a/src/render/OpenGL.hpp +++ b/src/render/OpenGL.hpp @@ -313,6 +313,8 @@ namespace Render::GL { bool m_applyFinalShader = false; bool m_blend = false; bool m_offloadedFramebuffer = false; + int m_gpuResetCooldown = 0; + bool attemptContextReset(); bool m_cmSupported = true; SP m_finalScreenShader;