mirror of
https://github.com/hyprwm/Hyprland
synced 2026-05-09 04:58:07 +02:00
render: replace fatal aborts on GPU reset with EGL context recovery
Hyprland currently triggers RASSERT (SIGABRT) on three GPU-reset signals: glGetGraphicsResetStatus() reporting a reset in begin() / beginSimple(), glGetError() returning GL_CONTEXT_LOST at end(), and renderTextureInternal() receiving an invalid texture. The original abort message acknowledged this as a gap: "Cannot continue until proper GPU reset handling is implemented." On recent Intel (Xe driver on Meteor Lake/Arc graphics), TLB invalidation timeouts cause the kernel to issue recoverable GPU resets and re-arm the rendering engines without losing the EGL display. The compositor is expected to re-establish its context and carry on. The current RASSERT takes down the entire user session on what is, from the kernel's perspective, a handled fault — and these resets can cluster during suspend/resume or under memory pressure. This patch adds a minimal recovery mechanism: - attemptContextReset() unbinds and rebinds the EGL context via eglMakeCurrent(EGL_NO_CONTEXT) then eglMakeCurrent(m_eglContext), and marks shaders for reinitialization on the next frame. - begin() and beginSimple() check a per-monitor m_gpuResetCooldown counter first; if > 0, they skip the frame and decrement. Otherwise, if glGetGraphicsResetStatus reports a reset, they log the reason, call attemptContextReset(), and either skip one frame (on success) or set a 60-frame cooldown (on failure) to avoid tight-loop recovery attempts. pMonitor is reset so the skipped frame doesn't hold a dangling reference. - end() no longer aborts on GL_CONTEXT_LOST; it logs and sets the cooldown, letting the next begin() drive recovery. - renderTextureInternal() replaces the RASSERT-on-invalid-texture with a logged skip. Post-reset textures are often invalid until reuploaded; skipping the draw for one frame is preferable to SIGABRT. The existing RASSERT(pMonitor, "...without begin()!") is kept — that's a programmer error, not a recoverable state. Tested on a Framework 13 / Intel Xe setup that was previously crashing on GPU resets; with this patch the compositor logs the reset, skips a handful of frames, and resumes rendering normally. Combined with the prior two commits (surface and blur null-guards), this covers the follow-on nulls that surface during the recovery window.
This commit is contained in:
parent
15357c9685
commit
0603a354f3
2 changed files with 55 additions and 6 deletions
|
|
@ -663,9 +663,28 @@ EGLImageKHR CHyprOpenGLImpl::createEGLImage(const Aquamarine::SDMABUFAttrs& attr
|
|||
return image;
|
||||
}
|
||||
|
||||
bool CHyprOpenGLImpl::attemptContextReset() {
|
||||
Log::logger->log(Log::ERR, "GPU reset: attempting EGL context recovery...");
|
||||
eglMakeCurrent(m_eglDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT);
|
||||
if (eglMakeCurrent(m_eglDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, m_eglContext) != EGL_TRUE) {
|
||||
Log::logger->log(Log::ERR, "GPU reset: eglMakeCurrent failed, context may be lost");
|
||||
return false;
|
||||
}
|
||||
m_shadersInitialized = false;
|
||||
Log::logger->log(Log::WARN, "GPU reset: EGL context re-established, shaders will reinitialize on next frame.");
|
||||
return true;
|
||||
}
|
||||
|
||||
void CHyprOpenGLImpl::beginSimple(PHLMONITOR pMonitor, const CRegion& damage, SP<IRenderbuffer> rb, SP<IFramebuffer> fb) {
|
||||
g_pHyprRenderer->m_renderData.pMonitor = pMonitor;
|
||||
|
||||
if (m_gpuResetCooldown > 0) {
|
||||
m_gpuResetCooldown--;
|
||||
Log::logger->log(Log::WARN, "GPU reset recovery cooldown, skipping frame ({} remaining)", m_gpuResetCooldown);
|
||||
g_pHyprRenderer->m_renderData.pMonitor.reset();
|
||||
return;
|
||||
}
|
||||
|
||||
const GLenum RESETSTATUS = glGetGraphicsResetStatus();
|
||||
if (RESETSTATUS != GL_NO_ERROR) {
|
||||
std::string errStr = "";
|
||||
|
|
@ -675,7 +694,14 @@ void CHyprOpenGLImpl::beginSimple(PHLMONITOR pMonitor, const CRegion& damage, SP
|
|||
case GL_UNKNOWN_CONTEXT_RESET: errStr = "GL_UNKNOWN_CONTEXT_RESET"; break;
|
||||
default: errStr = "UNKNOWN??"; break;
|
||||
}
|
||||
RASSERT(false, "Aborting, glGetGraphicsResetStatus returned {}. Cannot continue until proper GPU reset handling is implemented.", errStr);
|
||||
Log::logger->log(Log::ERR, "GPU reset detected in beginSimple: {}. Attempting EGL context recovery.", errStr);
|
||||
if (!attemptContextReset()) {
|
||||
Log::logger->log(Log::ERR, "GPU reset recovery failed. Skipping frames for cooldown.");
|
||||
m_gpuResetCooldown = 60;
|
||||
} else {
|
||||
Log::logger->log(Log::WARN, "GPU reset recovery succeeded. Skipping current frame to reinitialize.");
|
||||
}
|
||||
g_pHyprRenderer->m_renderData.pMonitor.reset();
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -714,6 +740,13 @@ void CHyprOpenGLImpl::makeEGLCurrent() {
|
|||
void CHyprOpenGLImpl::begin(PHLMONITOR pMonitor, const CRegion& damage_, SP<IFramebuffer> fb, std::optional<CRegion> finalDamage) {
|
||||
g_pHyprRenderer->m_renderData.pMonitor = pMonitor;
|
||||
|
||||
if (m_gpuResetCooldown > 0) {
|
||||
m_gpuResetCooldown--;
|
||||
Log::logger->log(Log::WARN, "GPU reset recovery cooldown, skipping frame ({} remaining)", m_gpuResetCooldown);
|
||||
g_pHyprRenderer->m_renderData.pMonitor.reset();
|
||||
return;
|
||||
}
|
||||
|
||||
const GLenum RESETSTATUS = glGetGraphicsResetStatus();
|
||||
if (RESETSTATUS != GL_NO_ERROR) {
|
||||
std::string errStr = "";
|
||||
|
|
@ -723,7 +756,14 @@ void CHyprOpenGLImpl::begin(PHLMONITOR pMonitor, const CRegion& damage_, SP<IFra
|
|||
case GL_UNKNOWN_CONTEXT_RESET: errStr = "GL_UNKNOWN_CONTEXT_RESET"; break;
|
||||
default: errStr = "UNKNOWN??"; break;
|
||||
}
|
||||
RASSERT(false, "Aborting, glGetGraphicsResetStatus returned {}. Cannot continue until proper GPU reset handling is implemented.", errStr);
|
||||
Log::logger->log(Log::ERR, "GPU reset detected in begin: {}. Attempting EGL context recovery.", errStr);
|
||||
if (!attemptContextReset()) {
|
||||
Log::logger->log(Log::ERR, "GPU reset recovery failed. Skipping frames for cooldown.");
|
||||
m_gpuResetCooldown = 60;
|
||||
} else {
|
||||
Log::logger->log(Log::WARN, "GPU reset recovery succeeded. Skipping current frame to reinitialize.");
|
||||
}
|
||||
g_pHyprRenderer->m_renderData.pMonitor.reset();
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -856,8 +896,10 @@ void CHyprOpenGLImpl::end() {
|
|||
// check for gl errors
|
||||
const GLenum ERR = glGetError();
|
||||
|
||||
if UNLIKELY (ERR == GL_CONTEXT_LOST) /* We don't have infra to recover from this */
|
||||
RASSERT(false, "glGetError at Opengl::end() returned GL_CONTEXT_LOST. Cannot continue until proper GPU reset handling is implemented.");
|
||||
if UNLIKELY (ERR == GL_CONTEXT_LOST) {
|
||||
Log::logger->log(Log::ERR, "glGetError at Opengl::end() returned GL_CONTEXT_LOST. Recovery will trigger on next begin().");
|
||||
m_gpuResetCooldown = 60;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1443,8 +1485,13 @@ WP<CShader> CHyprOpenGLImpl::renderToFBInternal(SP<ITexture> tex, const STexture
|
|||
|
||||
void CHyprOpenGLImpl::renderTextureInternal(SP<ITexture> tex, const CBox& box, const STextureRenderData& data) {
|
||||
RASSERT(g_pHyprRenderer->m_renderData.pMonitor, "Tried to render texture without begin()!");
|
||||
RASSERT(tex, "Attempted to draw nullptr texture!");
|
||||
RASSERT(tex->ok(), "Attempted to draw invalid texture!");
|
||||
|
||||
if UNLIKELY (!tex || !tex->ok()) {
|
||||
// After a GPU reset, textures become invalid. Skip the draw
|
||||
// instead of aborting — recovery will happen on the next begin().
|
||||
Log::logger->log(Log::ERR, "renderTextureInternal: invalid texture (likely GPU reset). Skipping draw.");
|
||||
return;
|
||||
}
|
||||
|
||||
TRACY_GPU_ZONE("RenderTextureInternalWithDamage");
|
||||
|
||||
|
|
|
|||
|
|
@ -313,6 +313,8 @@ namespace Render::GL {
|
|||
bool m_applyFinalShader = false;
|
||||
bool m_blend = false;
|
||||
bool m_offloadedFramebuffer = false;
|
||||
int m_gpuResetCooldown = 0;
|
||||
bool attemptContextReset();
|
||||
bool m_cmSupported = true;
|
||||
|
||||
SP<CShader> m_finalScreenShader;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue