From 3d4246e22e90ffef5fe0cd935bd54c7f862e82f7 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 20 Mar 2009 12:19:40 +0100 Subject: [PATCH 01/26] slang: Use _mesa_snprintf() wrapper. --- src/mesa/shader/slang/slang_codegen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c index 8263aae3343..a7cfc45e6f0 100644 --- a/src/mesa/shader/slang/slang_codegen.c +++ b/src/mesa/shader/slang/slang_codegen.c @@ -1441,7 +1441,7 @@ _slang_gen_function_call(slang_assemble_ctx *A, slang_function *fun, if (A->pragmas->Debug) { char s[1000]; - snprintf(s, sizeof(s), "Call/inline %s()", (char *) fun->header.a_name); + _mesa_snprintf(s, sizeof(s), "Call/inline %s()", (char *) fun->header.a_name); n->Comment = _slang_strdup(s); } From 4489f9efee58f2cba374298f2d43c96e5cd2ff41 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 20 Mar 2009 13:05:51 +0100 Subject: [PATCH 02/26] stw: Keep per-thread storage for current context and pixel format. --- src/gallium/state_trackers/wgl/SConscript | 1 + .../state_trackers/wgl/shared/stw_context.c | 12 +-- .../state_trackers/wgl/shared/stw_device.c | 23 ++++ .../wgl/shared/stw_pixelformat.c | 9 +- .../state_trackers/wgl/shared/stw_tls.c | 101 ++++++++++++++++++ .../state_trackers/wgl/shared/stw_tls.h | 53 +++++++++ .../state_trackers/wgl/shared/stw_winsys.h | 6 ++ 7 files changed, 193 insertions(+), 12 deletions(-) create mode 100644 src/gallium/state_trackers/wgl/shared/stw_tls.c create mode 100644 src/gallium/state_trackers/wgl/shared/stw_tls.h diff --git a/src/gallium/state_trackers/wgl/SConscript b/src/gallium/state_trackers/wgl/SConscript index 2141b02d688..038a7a31b32 100644 --- a/src/gallium/state_trackers/wgl/SConscript +++ b/src/gallium/state_trackers/wgl/SConscript @@ -30,6 +30,7 @@ if env['platform'] in ['windows']: 'shared/stw_arbextensionsstring.c', 'shared/stw_getprocaddress.c', 'shared/stw_arbpixelformat.c', + 'shared/stw_tls.c', ] wgl = env.ConvenienceLibrary( diff --git a/src/gallium/state_trackers/wgl/shared/stw_context.c b/src/gallium/state_trackers/wgl/shared/stw_context.c index d77daac39cd..69f25d6187e 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_context.c +++ b/src/gallium/state_trackers/wgl/shared/stw_context.c @@ -39,9 +39,7 @@ #include "shared/stw_pixelformat.h" #include "stw_public.h" #include "stw_context.h" - -static HDC current_hdc = NULL; -static UINT_PTR current_hglrc = 0; +#include "stw_tls.h" BOOL stw_copy_context( @@ -264,13 +262,13 @@ get_window_size( HDC hdc, GLuint *width, GLuint *height ) UINT_PTR stw_get_current_context( void ) { - return current_hglrc; + return stw_tls_get_data()->currentGLRC; } HDC stw_get_current_dc( void ) { - return current_hdc; + return stw_tls_get_data()->currentDC; } BOOL @@ -295,8 +293,8 @@ stw_make_current( if (ctx == NULL) return FALSE; - current_hdc = hdc; - current_hglrc = hglrc; + stw_tls_get_data()->currentDC = hdc; + stw_tls_get_data()->currentGLRC = hglrc; if (glcurctx != NULL) { curctx = (struct stw_context *) glcurctx->DriverCtx; diff --git a/src/gallium/state_trackers/wgl/shared/stw_device.c b/src/gallium/state_trackers/wgl/shared/stw_device.c index 0dca856d73b..4bec036fe35 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_device.c +++ b/src/gallium/state_trackers/wgl/shared/stw_device.c @@ -35,6 +35,7 @@ #include "shared/stw_winsys.h" #include "shared/stw_pixelformat.h" #include "shared/stw_public.h" +#include "shared/stw_tls.h" #ifdef WIN32_THREADS extern _glthread_Mutex OneTimeLock; @@ -70,6 +71,8 @@ st_init(const struct stw_winsys *stw_winsys) assert(!stw_dev); + stw_tls_init(); + stw_dev = &stw_dev_storage; memset(stw_dev, 0, sizeof(*stw_dev)); @@ -101,6 +104,24 @@ error1: } +boolean +st_init_thread(void) +{ + if (!stw_tls_init_thread()) { + return FALSE; + } + + return TRUE; +} + + +void +st_cleanup_thread(void) +{ + stw_tls_cleanup_thread(); +} + + void st_cleanup(void) { @@ -133,6 +154,8 @@ st_cleanup(void) debug_memory_end(stw_dev->memdbg_no); #endif + stw_tls_cleanup(); + stw_dev = NULL; } diff --git a/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c b/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c index 2992a1ac0a1..b216ca5c823 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c +++ b/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c @@ -28,6 +28,7 @@ #include "util/u_debug.h" #include "stw_pixelformat.h" #include "stw_public.h" +#include "stw_tls.h" #define MAX_PIXELFORMATS 16 @@ -35,8 +36,6 @@ static struct pixelformat_info pixelformats[MAX_PIXELFORMATS]; static uint pixelformat_count = 0; static uint pixelformat_extended_count = 0; -static uint currentpixelformat = 0; - static void add_standard_pixelformats( @@ -248,7 +247,7 @@ int stw_pixelformat_get( HDC hdc ) { - return currentpixelformat; + return stw_tls_get_data()->currentPixelFormat; } @@ -267,8 +266,8 @@ stw_pixelformat_set( if (index >= count) return FALSE; - currentpixelformat = iPixelFormat; - + stw_tls_get_data()->currentPixelFormat = iPixelFormat; + /* Some applications mistakenly use the undocumented wglSetPixelFormat * function instead of SetPixelFormat, so we call SetPixelFormat here to * avoid opengl32.dll's wglCreateContext to fail */ diff --git a/src/gallium/state_trackers/wgl/shared/stw_tls.c b/src/gallium/state_trackers/wgl/shared/stw_tls.c new file mode 100644 index 00000000000..e72bafb8804 --- /dev/null +++ b/src/gallium/state_trackers/wgl/shared/stw_tls.c @@ -0,0 +1,101 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include + +#include "pipe/p_compiler.h" +#include "util/u_memory.h" +#include "stw_tls.h" + +static DWORD tlsIndex = TLS_OUT_OF_INDEXES; + +boolean +stw_tls_init(void) +{ + tlsIndex = TlsAlloc(); + if (tlsIndex == TLS_OUT_OF_INDEXES) { + return FALSE; + } + + return TRUE; +} + +boolean +stw_tls_init_thread(void) +{ + struct stw_tls_data *data; + + if (tlsIndex == TLS_OUT_OF_INDEXES) { + return FALSE; + } + + data = MALLOC(sizeof(*data)); + if (!data) { + return FALSE; + } + + data->currentPixelFormat = 0; + data->currentDC = NULL; + data->currentGLRC = 0; + + TlsSetValue(tlsIndex, data); + + return TRUE; +} + +void +stw_tls_cleanup_thread(void) +{ + struct stw_tls_data *data; + + if (tlsIndex == TLS_OUT_OF_INDEXES) { + return; + } + + data = (struct stw_tls_data *) TlsGetValue(tlsIndex); + TlsSetValue(tlsIndex, NULL); + FREE(data); +} + +void +stw_tls_cleanup(void) +{ + if (tlsIndex != TLS_OUT_OF_INDEXES) { + TlsFree(tlsIndex); + tlsIndex = TLS_OUT_OF_INDEXES; + } +} + +struct stw_tls_data * +stw_tls_get_data(void) +{ + if (tlsIndex == TLS_OUT_OF_INDEXES) { + return NULL; + } + + return (struct stw_tls_data *) TlsGetValue(tlsIndex); +} diff --git a/src/gallium/state_trackers/wgl/shared/stw_tls.h b/src/gallium/state_trackers/wgl/shared/stw_tls.h new file mode 100644 index 00000000000..23b61e68ff5 --- /dev/null +++ b/src/gallium/state_trackers/wgl/shared/stw_tls.h @@ -0,0 +1,53 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef STW_TLS_H +#define STW_TLS_H + +struct stw_tls_data +{ + uint currentPixelFormat; + HDC currentDC; + UINT_PTR currentGLRC; +}; + +boolean +stw_tls_init(void); + +boolean +stw_tls_init_thread(void); + +void +stw_tls_cleanup_thread(void); + +void +stw_tls_cleanup(void); + +struct stw_tls_data * +stw_tls_get_data(void); + +#endif /* STW_TLS_H */ diff --git a/src/gallium/state_trackers/wgl/shared/stw_winsys.h b/src/gallium/state_trackers/wgl/shared/stw_winsys.h index a85a9a22577..e4a1d4f979f 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_winsys.h +++ b/src/gallium/state_trackers/wgl/shared/stw_winsys.h @@ -53,6 +53,12 @@ struct stw_winsys boolean st_init(const struct stw_winsys *stw_winsys); +boolean +st_init_thread(void); + +void +st_cleanup_thread(void); + void st_cleanup(void); From 36e985e96e6da817042ba1b2dfadf96f85e32afb Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 20 Mar 2009 13:06:28 +0100 Subject: [PATCH 03/26] winsys/gdi: Init state tracker's per-thread data. --- src/gallium/winsys/gdi/gdi_softpipe_winsys.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c index 440666d835e..d5d9431865c 100644 --- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c +++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c @@ -312,9 +312,20 @@ DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) { switch (fdwReason) { case DLL_PROCESS_ATTACH: - return st_init(&stw_winsys); + if (!st_init(&stw_winsys)) { + return FALSE; + } + return st_init_thread(); + + case DLL_THREAD_ATTACH: + return st_init_thread(); + + case DLL_THREAD_DETACH: + st_cleanup_thread(); + break; case DLL_PROCESS_DETACH: + st_cleanup_thread(); st_cleanup(); break; } From 5465f3adf93bd58b528bd6703b2367eb00c78c31 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 20 Mar 2009 15:45:00 +0100 Subject: [PATCH 04/26] stw: Use u_handle_table to maintain context list. --- .../state_trackers/wgl/shared/stw_context.c | 24 +++++++------------ .../state_trackers/wgl/shared/stw_device.c | 19 ++++++++++----- .../state_trackers/wgl/shared/stw_device.h | 8 ++----- 3 files changed, 23 insertions(+), 28 deletions(-) diff --git a/src/gallium/state_trackers/wgl/shared/stw_context.c b/src/gallium/state_trackers/wgl/shared/stw_context.c index 69f25d6187e..31cb025ac56 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_context.c +++ b/src/gallium/state_trackers/wgl/shared/stw_context.c @@ -135,17 +135,7 @@ stw_create_layer_context( pipe_mutex_lock( stw_dev->mutex ); { - UINT_PTR i; - - for (i = 0; i < STW_CONTEXT_MAX; i++) { - if (stw_dev->ctx_array[i].ctx == NULL) { - /* success: - */ - stw_dev->ctx_array[i].ctx = ctx; - hglrc = i + 1; - break; - } - } + hglrc = handle_table_add(stw_dev->ctx_table, ctx); } pipe_mutex_unlock( stw_dev->mutex ); @@ -195,12 +185,14 @@ stw_delete_context( if (WindowFromDC( ctx->hdc ) != NULL) ReleaseDC( WindowFromDC( ctx->hdc ), ctx->hdc ); - st_destroy_context( ctx->st ); + pipe_mutex_lock(stw_dev->mutex); + { + st_destroy_context(ctx->st); + FREE(ctx); + handle_table_remove(stw_dev->ctx_table, hglrc); + } + pipe_mutex_unlock(stw_dev->mutex); - FREE( ctx ); - - stw_dev->ctx_array[hglrc - 1].ctx = NULL; - ret = TRUE; } diff --git a/src/gallium/state_trackers/wgl/shared/stw_device.c b/src/gallium/state_trackers/wgl/shared/stw_device.c index 4bec036fe35..3c1eb1ad393 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_device.c +++ b/src/gallium/state_trackers/wgl/shared/stw_device.c @@ -94,6 +94,11 @@ st_init(const struct stw_winsys *stw_winsys) pipe_mutex_init( stw_dev->mutex ); + stw_dev->ctx_table = handle_table_create(); + if (!stw_dev->ctx_table) { + goto error1; + } + pixelformat_init(); return TRUE; @@ -135,9 +140,12 @@ st_cleanup(void) pipe_mutex_lock( stw_dev->mutex ); { /* Ensure all contexts are destroyed */ - for (i = 0; i < STW_CONTEXT_MAX; i++) - if (stw_dev->ctx_array[i].ctx) - stw_delete_context( i + 1 ); + i = handle_table_get_first_handle(stw_dev->ctx_table); + while (i) { + stw_delete_context(i); + i = handle_table_get_next_handle(stw_dev->ctx_table, i); + } + handle_table_destroy(stw_dev->ctx_table); } pipe_mutex_unlock( stw_dev->mutex ); @@ -163,13 +171,12 @@ st_cleanup(void) struct stw_context * stw_lookup_context( UINT_PTR dhglrc ) { - if (dhglrc == 0 || - dhglrc >= STW_CONTEXT_MAX) + if (dhglrc == 0) return NULL; if (stw_dev == NULL) return NULL; - return stw_dev->ctx_array[dhglrc - 1].ctx; + return (struct stw_context *) handle_table_get(stw_dev->ctx_table, dhglrc); } diff --git a/src/gallium/state_trackers/wgl/shared/stw_device.h b/src/gallium/state_trackers/wgl/shared/stw_device.h index 80da14b84f4..6a9cee0d028 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_device.h +++ b/src/gallium/state_trackers/wgl/shared/stw_device.h @@ -31,9 +31,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_thread.h" - - -#define STW_CONTEXT_MAX 32 +#include "util/u_handle_table.h" struct pipe_screen; @@ -45,9 +43,7 @@ struct stw_device pipe_mutex mutex; - struct { - struct stw_context *ctx; - } ctx_array[STW_CONTEXT_MAX]; + struct handle_table *ctx_table; #ifdef DEBUG unsigned long memdbg_no; From e8aa5a10185623a820364141b756d128a1171919 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 20 Mar 2009 16:13:39 +0100 Subject: [PATCH 05/26] progs/wgl: Make context current to get GL_RENDERER string. --- progs/wgl/wglthreads/wglthreads.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/progs/wgl/wglthreads/wglthreads.c b/progs/wgl/wglthreads/wglthreads.c index 32f3e45edf5..9ca7f025dc3 100644 --- a/progs/wgl/wglthreads/wglthreads.c +++ b/progs/wgl/wglthreads/wglthreads.c @@ -483,7 +483,9 @@ create_window(struct winthread *wt, HGLRC shareCtx) wt->WinHeight = height; wt->NewSize = GL_TRUE; + wglMakeCurrent(hdc, ctx); printf("wglthreads: %d: GL_RENDERER = %s\n", wt->Index, (char *) glGetString(GL_RENDERER)); + wglMakeCurrent(NULL, NULL); if (Texture/* && wt->Index == 0*/) { MakeNewTexture(wt); From 9bbffcced4355ff11e11c5b01c4d0eea6b020119 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 20 Mar 2009 16:16:12 +0100 Subject: [PATCH 06/26] progs/wgl: Create GL context in a thread that actually uses it. --- progs/wgl/sharedtex_mt/sharedtex_mt.c | 65 ++++++++++++++------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/progs/wgl/sharedtex_mt/sharedtex_mt.c b/progs/wgl/sharedtex_mt/sharedtex_mt.c index 137c9c10af2..c461530e4ba 100644 --- a/progs/wgl/sharedtex_mt/sharedtex_mt.c +++ b/progs/wgl/sharedtex_mt/sharedtex_mt.c @@ -49,6 +49,7 @@ struct window { HGLRC Context; float Angle; int Id; + HGLRC sharedContext; }; @@ -172,8 +173,6 @@ AddWindow(int xpos, int ypos, HGLRC sCtx) { struct window *win = &Windows[NumWindows]; WNDCLASS wc = {0}; - PIXELFORMATDESCRIPTOR pfd = {0}; - int visinfo; int width = 300, height = 300; if (NumWindows >= MAX_WINDOWS) @@ -208,33 +207,7 @@ AddWindow(int xpos, int ypos, HGLRC sCtx) Error("Couldn't create window"); } - win->hDC = GetDC(win->Win); - if (!win->hDC) { - Error("Couldn't obtain HDC"); - } - - pfd.cColorBits = 24; - pfd.cDepthBits = 24; - pfd.dwFlags = PFD_DOUBLEBUFFER | PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL; - pfd.iLayerType = PFD_MAIN_PLANE; - pfd.iPixelType = PFD_TYPE_RGBA; - pfd.nSize = sizeof(pfd); - pfd.nVersion = 1; - - visinfo = ChoosePixelFormat(win->hDC, &pfd); - if (!visinfo) { - Error("Unable to find RGB, Z, double-buffered visual"); - } - - SetPixelFormat(win->hDC, visinfo, &pfd); - win->Context = wglCreateContext(win->hDC); - if (!win->Context) { - Error("Couldn't create WGL context"); - } - - if (sCtx) { - wglShareLists(sCtx, win->Context); - } + win->sharedContext = sCtx; ShowWindow(win->Win, SW_SHOW); @@ -244,7 +217,6 @@ AddWindow(int xpos, int ypos, HGLRC sCtx) static void InitGLstuff(void) - { glGenTextures(3, Textures); @@ -432,9 +404,39 @@ threadRunner (void *arg) { struct thread_init_arg *tia = (struct thread_init_arg *) arg; struct window *win; + PIXELFORMATDESCRIPTOR pfd = {0}; + int visinfo; win = &Windows[tia->id]; + win->hDC = GetDC(win->Win); + if (!win->hDC) { + Error("Couldn't obtain HDC"); + } + + pfd.cColorBits = 24; + pfd.cDepthBits = 24; + pfd.dwFlags = PFD_DOUBLEBUFFER | PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL; + pfd.iLayerType = PFD_MAIN_PLANE; + pfd.iPixelType = PFD_TYPE_RGBA; + pfd.nSize = sizeof(pfd); + pfd.nVersion = 1; + + visinfo = ChoosePixelFormat(win->hDC, &pfd); + if (!visinfo) { + Error("Unable to find RGB, Z, double-buffered visual"); + } + + SetPixelFormat(win->hDC, visinfo, &pfd); + win->Context = wglCreateContext(win->hDC); + if (!win->Context) { + Error("Couldn't create WGL context"); + } + + if (win->sharedContext) { + wglShareLists(win->sharedContext, win->Context); + } + while (1) { MSG msg; @@ -464,6 +466,9 @@ threadRunner (void *arg) static void Resize(struct window *h, unsigned int width, unsigned int height) { + if (!h->Context) + return; + EnterCriticalSection(&h->drawMutex); if (!wglMakeCurrent(h->hDC, h->Context)) { From d6e877d0d716805907cbbaca3bcfcee84f98add6 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 20 Mar 2009 16:36:23 +0100 Subject: [PATCH 07/26] stw: Do not err on nil context handle in MakeCurrent(). --- src/gallium/state_trackers/wgl/shared/stw_context.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/gallium/state_trackers/wgl/shared/stw_context.c b/src/gallium/state_trackers/wgl/shared/stw_context.c index 31cb025ac56..89df8b0a2a0 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_context.c +++ b/src/gallium/state_trackers/wgl/shared/stw_context.c @@ -281,9 +281,6 @@ stw_make_current( pipe_mutex_lock( stw_dev->mutex ); ctx = stw_lookup_context( hglrc ); pipe_mutex_unlock( stw_dev->mutex ); - - if (ctx == NULL) - return FALSE; stw_tls_get_data()->currentDC = hdc; stw_tls_get_data()->currentGLRC = hglrc; From cf0122e892df56bc3b013e5d92e487d0fd65f23d Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 20 Mar 2009 16:48:54 +0100 Subject: [PATCH 08/26] progs/wgl: Send a resize message after context have been created. --- progs/wgl/sharedtex_mt/sharedtex_mt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/progs/wgl/sharedtex_mt/sharedtex_mt.c b/progs/wgl/sharedtex_mt/sharedtex_mt.c index c461530e4ba..010eb873b85 100644 --- a/progs/wgl/sharedtex_mt/sharedtex_mt.c +++ b/progs/wgl/sharedtex_mt/sharedtex_mt.c @@ -437,6 +437,8 @@ threadRunner (void *arg) wglShareLists(win->sharedContext, win->Context); } + SendMessage(win->Win, WM_SIZE, 0, 0); + while (1) { MSG msg; From 66175aac7609ad314f25fbdff0d3958af310dc24 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 18 Mar 2009 12:07:09 -0700 Subject: [PATCH 09/26] Fix DRI2 accelerated EXT_texture_from_pixmap with GL_RGB format. This requires upgrading the interface so that the argument to glXBindTexImageEXT isn't just dropped on the floor. Note that this only fixes the accelerated path on Intel, as Mesa's texture format support is missing x8r8g8b8 support (right now, GL_RGB textures get uploaded as a8r8gb8, but in this case we're not doing the upload so we can't really work around it that way). Fixes bugs with compositors trying to use shaders that use alpha channels, on windows without a valid alpha channel. Bug #19910 and likely others as well. Reviewed-by: Ian Romanick --- include/GL/internal/dri_interface.h | 16 +++++++++++-- include/GL/internal/glcore.h | 4 ++++ src/glx/x11/glx_pbuffer.c | 19 +++++++++++++++ src/glx/x11/glxclient.h | 1 + src/glx/x11/glxcmds.c | 18 ++++++++++---- src/mesa/drivers/dri/i915/i830_texstate.c | 10 +++++--- src/mesa/drivers/dri/i915/i915_texstate.c | 11 ++++++--- .../drivers/dri/i965/brw_wm_surface_state.c | 24 +++++++++++++------ src/mesa/drivers/dri/intel/intel_screen.c | 1 + src/mesa/drivers/dri/intel/intel_tex.h | 2 ++ src/mesa/drivers/dri/intel/intel_tex_image.c | 18 ++++++++++++-- 11 files changed, 102 insertions(+), 22 deletions(-) diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h index a726b932340..a83602bfd87 100644 --- a/include/GL/internal/dri_interface.h +++ b/include/GL/internal/dri_interface.h @@ -231,7 +231,7 @@ struct __DRItexOffsetExtensionRec { #define __DRI_TEX_BUFFER "DRI_TexBuffer" -#define __DRI_TEX_BUFFER_VERSION 1 +#define __DRI_TEX_BUFFER_VERSION 2 struct __DRItexBufferExtensionRec { __DRIextension base; @@ -239,11 +239,23 @@ struct __DRItexBufferExtensionRec { * Method to override base texture image with the contents of a * __DRIdrawable. * - * For GLX_EXT_texture_from_pixmap with AIGLX. + * For GLX_EXT_texture_from_pixmap with AIGLX. Deprecated in favor of + * setTexBuffer2 in version 2 of this interface */ void (*setTexBuffer)(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *pDraw); + + /** + * Method to override base texture image with the contents of a + * __DRIdrawable, including the required texture format attribute. + * + * For GLX_EXT_texture_from_pixmap with AIGLX. + */ + void (*setTexBuffer2)(__DRIcontext *pDRICtx, + GLint target, + GLint format, + __DRIdrawable *pDraw); }; /** diff --git a/include/GL/internal/glcore.h b/include/GL/internal/glcore.h index 547b1113707..18f657662af 100644 --- a/include/GL/internal/glcore.h +++ b/include/GL/internal/glcore.h @@ -178,4 +178,8 @@ typedef struct __GLcontextModesRec { #define GLX_TEXTURE_2D_BIT_EXT 0x00000002 #define GLX_TEXTURE_RECTANGLE_BIT_EXT 0x00000004 +#define GLX_TEXTURE_FORMAT_NONE_EXT 0x20D8 +#define GLX_TEXTURE_FORMAT_RGB_EXT 0x20D9 +#define GLX_TEXTURE_FORMAT_RGBA_EXT 0x20DA + #endif /* __gl_core_h_ */ diff --git a/src/glx/x11/glx_pbuffer.c b/src/glx/x11/glx_pbuffer.c index a602cd28817..6bcf965056a 100644 --- a/src/glx/x11/glx_pbuffer.c +++ b/src/glx/x11/glx_pbuffer.c @@ -189,6 +189,21 @@ determineTextureTarget(const int *attribs, int numAttribs) return target; } + + +static GLenum +determineTextureFormat(const int *attribs, int numAttribs) +{ + GLenum target = 0; + int i; + + for (i = 0; i < numAttribs; i++) { + if (attribs[2 * i] == GLX_TEXTURE_FORMAT_EXT) + return attribs[2 * i + 1]; + } + + return 0; +} #endif /** @@ -294,6 +309,9 @@ GetDrawableAttribute(Display * dpy, GLXDrawable drawable, if (pdraw != NULL && !pdraw->textureTarget) pdraw->textureTarget = determineTextureTarget((const int *) data, num_attributes); + if (pdraw != NULL && !pdraw->textureFormat) + pdraw->textureFormat = + determineTextureFormat((const int *) data, num_attributes); } #endif @@ -374,6 +392,7 @@ CreateDrawable(Display * dpy, const __GLcontextModes * fbconfig, } pdraw->textureTarget = determineTextureTarget(attrib_list, i); + pdraw->textureFormat = determineTextureFormat(attrib_list, i); } while (0); #endif diff --git a/src/glx/x11/glxclient.h b/src/glx/x11/glxclient.h index caf58bbd44a..c42e80a0e86 100644 --- a/src/glx/x11/glxclient.h +++ b/src/glx/x11/glxclient.h @@ -161,6 +161,7 @@ struct __GLXDRIdrawableRec { __GLXscreenConfigs *psc; GLenum textureTarget; __DRIdrawable *driDrawable; + GLenum textureFormat; /* EXT_texture_from_pixmap support */ }; /* diff --git a/src/glx/x11/glxcmds.c b/src/glx/x11/glxcmds.c index fc0e593cb35..e5c0db4c968 100644 --- a/src/glx/x11/glxcmds.c +++ b/src/glx/x11/glxcmds.c @@ -2631,11 +2631,19 @@ static void __glXBindTexImageEXT(Display *dpy, if (gc->driContext) { __GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, drawable, NULL); - if (pdraw != NULL) - (*pdraw->psc->texBuffer->setTexBuffer)(gc->__driContext, - pdraw->textureTarget, - pdraw->driDrawable); - + if (pdraw != NULL) { + if (pdraw->psc->texBuffer->base.version >= 2 && + pdraw->psc->texBuffer->setTexBuffer2 != NULL) { + (*pdraw->psc->texBuffer->setTexBuffer2)(gc->__driContext, + pdraw->textureTarget, + pdraw->textureFormat, + pdraw->driDrawable); + } else { + (*pdraw->psc->texBuffer->setTexBuffer)(gc->__driContext, + pdraw->textureTarget, + pdraw->driDrawable); + } + } return; } #endif diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c index c718bb0055d..df43b779a79 100644 --- a/src/mesa/drivers/dri/i915/i830_texstate.c +++ b/src/mesa/drivers/dri/i915/i830_texstate.c @@ -38,7 +38,7 @@ static GLuint -translate_texture_format(GLuint mesa_format) +translate_texture_format(GLuint mesa_format, GLuint internal_format) { switch (mesa_format) { case MESA_FORMAT_L8: @@ -56,7 +56,10 @@ translate_texture_format(GLuint mesa_format) case MESA_FORMAT_ARGB4444: return MAPSURF_16BIT | MT_16BIT_ARGB4444; case MESA_FORMAT_ARGB8888: - return MAPSURF_32BIT | MT_32BIT_ARGB8888; + if (internal_format == GL_RGB) + return MAPSURF_32BIT | MT_32BIT_XRGB8888; + else + return MAPSURF_32BIT | MT_32BIT_ARGB8888; case MESA_FORMAT_YCBCR_REV: return (MAPSURF_422 | MT_422_YCRCB_NORMAL); case MESA_FORMAT_YCBCR: @@ -162,7 +165,8 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) 0, intelObj-> firstLevel); - format = translate_texture_format(firstImage->TexFormat->MesaFormat); + format = translate_texture_format(firstImage->TexFormat->MesaFormat, + firstImage->InternalFormat); pitch = intelObj->mt->pitch * intelObj->mt->cpp; } diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index adbb52a3a3b..6d25f8dd8ef 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -37,7 +37,8 @@ static GLuint -translate_texture_format(GLuint mesa_format, GLenum DepthMode) +translate_texture_format(GLuint mesa_format, GLuint internal_format, + GLenum DepthMode) { switch (mesa_format) { case MESA_FORMAT_L8: @@ -55,7 +56,10 @@ translate_texture_format(GLuint mesa_format, GLenum DepthMode) case MESA_FORMAT_ARGB4444: return MAPSURF_16BIT | MT_16BIT_ARGB4444; case MESA_FORMAT_ARGB8888: - return MAPSURF_32BIT | MT_32BIT_ARGB8888; + if (internal_format == GL_RGB) + return MAPSURF_32BIT | MT_32BIT_XRGB8888; + else + return MAPSURF_32BIT | MT_32BIT_ARGB8888; case MESA_FORMAT_YCBCR_REV: return (MAPSURF_422 | MT_422_YCRCB_NORMAL); case MESA_FORMAT_YCBCR: @@ -173,7 +177,8 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) firstLevel); format = translate_texture_format(firstImage->TexFormat->MesaFormat, - tObj->DepthMode); + firstImage->InternalFormat, + tObj->DepthMode); pitch = intelObj->mt->pitch * intelObj->mt->cpp; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 9b320480b6d..e6113eff87e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -69,7 +69,8 @@ static GLuint translate_tex_target( GLenum target ) } -static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) +static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, + GLenum depth_mode ) { switch( mesa_format ) { case MESA_FORMAT_L8: @@ -89,10 +90,16 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) return BRW_SURFACEFORMAT_R8G8B8_UNORM; case MESA_FORMAT_ARGB8888: - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + if (internal_format == GL_RGB) + return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; + else + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; case MESA_FORMAT_RGBA8888_REV: - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + if (internal_format == GL_RGB) + return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; + else + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; case MESA_FORMAT_RGB565: return BRW_SURFACEFORMAT_B5G6R5_UNORM; @@ -161,7 +168,7 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) struct brw_wm_surface_key { GLenum target, depthmode; dri_bo *bo; - GLint format; + GLint format, internal_format; GLint first_level, last_level; GLint width, height, depth; GLint pitch, cpp; @@ -199,9 +206,11 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; surf.ss0.surface_type = translate_tex_target(key->target); - - if (key->bo) - surf.ss0.surface_format = translate_tex_format(key->format, key->depthmode); + if (key->bo) { + surf.ss0.surface_format = translate_tex_format(key->format, + key->internal_format, + key->depthmode); + } else { switch (key->depth) { case 32: @@ -278,6 +287,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.offset = intelObj->textureOffset; } else { key.format = firstImage->TexFormat->MesaFormat; + key.internal_format = firstImage->InternalFormat; key.pitch = intelObj->mt->pitch; key.depth = firstImage->Depth; key.bo = intelObj->mt->region->buffer; diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index e8c074712cd..d20ea151877 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -211,6 +211,7 @@ static const __DRItexOffsetExtension intelTexOffsetExtension = { static const __DRItexBufferExtension intelTexBufferExtension = { { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION }, intelSetTexBuffer, + intelSetTexBuffer2, }; static const __DRIextension *intelScreenExtensions[] = { diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h index 742ccc043aa..f5372d82fb2 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.h +++ b/src/mesa/drivers/dri/intel/intel_tex.h @@ -149,6 +149,8 @@ void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch); void intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *pDraw); +void intelSetTexBuffer2(__DRIcontext *pDRICtx, + GLint target, GLint format, __DRIdrawable *pDraw); GLuint intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit); diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 943636c37b2..e902187637d 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -714,7 +714,9 @@ intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, } void -intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) +intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, + GLint glx_texture_format, + __DRIdrawable *dPriv) { struct intel_framebuffer *intel_fb = dPriv->driverPrivate; struct intel_context *intel = pDRICtx->driverPrivate; @@ -745,7 +747,10 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) type = GL_BGRA; format = GL_UNSIGNED_BYTE; - internalFormat = (rb->region->cpp == 3 ? 3 : 4); + if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) + internalFormat = GL_RGB; + else + internalFormat = GL_RGBA; mt = intel_miptree_create_for_region(intel, target, internalFormat, @@ -785,3 +790,12 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) _mesa_unlock_texture(&intel->ctx, texObj); } + +void +intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) +{ + /* The old interface didn't have the format argument, so copy our + * implementation's behavior at the time. + */ + intelSetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv); +} From 210b468722ae4d4a97ccd788ad9de58858a0c7fa Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 20 Mar 2009 15:08:59 +0000 Subject: [PATCH 10/26] gallium/util: add upload manager helper module Add a module that will manage uploading and coalescing multiple user-buffers, malloc-buffers and other random data that doesn't happen to be in a GPU buffer already. The module stuffs multiple little uploads into larger GPU buffers to reduce create/destroy overheads, etc. --- src/gallium/auxiliary/util/Makefile | 1 + src/gallium/auxiliary/util/SConscript | 1 + src/gallium/auxiliary/util/u_upload_mgr.c | 220 ++++++++++++++++++++++ src/gallium/auxiliary/util/u_upload_mgr.h | 75 ++++++++ 4 files changed, 297 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_upload_mgr.c create mode 100644 src/gallium/auxiliary/util/u_upload_mgr.h diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 160df8dfa71..d68bdeadcc8 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -24,6 +24,7 @@ C_SOURCES = \ u_tile.c \ u_time.c \ u_timed_winsys.c \ + u_upload_mgr.c \ u_simple_screen.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 9d5dd006f08..0f15c632c3f 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -26,6 +26,7 @@ util = env.ConvenienceLibrary( 'u_tile.c', 'u_time.c', 'u_timed_winsys.c', + 'u_upload_mgr.c', 'u_simple_screen.c', ]) diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c new file mode 100644 index 00000000000..d9c0d7afa89 --- /dev/null +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -0,0 +1,220 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Helper utility for uploading user buffers & other data, and + * coalescing small buffers into larger ones. + */ + +#include "pipe/p_error.h" +#include "pipe/p_inlines.h" +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "u_upload_mgr.h" + + +struct u_upload_mgr { + struct pipe_screen *screen; + + unsigned default_size; + unsigned alignment; + unsigned usage; + + /* The active buffer: + */ + struct pipe_buffer *buffer; + unsigned size; + unsigned offset; +}; + + +struct u_upload_mgr *u_upload_create( struct pipe_screen *screen, + unsigned default_size, + unsigned alignment, + unsigned usage ) +{ + struct u_upload_mgr *upload = CALLOC_STRUCT( u_upload_mgr ); + + upload->default_size = default_size; + upload->screen = screen; + upload->alignment = alignment; + upload->usage = usage; + upload->buffer = NULL; + + return upload; +} + + +static INLINE void +my_buffer_write(struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned offset, unsigned size, unsigned dirty_size, + const void *data) +{ + uint8_t *map; + + assert(offset < buf->size); + assert(offset + size <= buf->size); + assert(dirty_size >= size); + assert(size); + + map = pipe_buffer_map_range(screen, buf, offset, size, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(map); + if(map) { + memcpy(map + offset, data, size); + pipe_buffer_flush_mapped_range(screen, buf, offset, dirty_size); + pipe_buffer_unmap(screen, buf); + } +} + +/* Release old buffer. + * + * This must usually be called prior to firing the command stream + * which references the upload buffer, as many memory managers will + * cause subsequent maps of a fired buffer to wait. + * + * Can improve this with a change to pipe_buffer_write to use the + * DONT_WAIT bit, but for now, it's easiest just to grab a new buffer. + */ +void u_upload_flush( struct u_upload_mgr *upload ) +{ + pipe_buffer_reference( &upload->buffer, NULL ); + upload->size = 0; +} + + +void u_upload_destroy( struct u_upload_mgr *upload ) +{ + u_upload_flush( upload ); + FREE( upload ); +} + + +static enum pipe_error +u_upload_alloc_buffer( struct u_upload_mgr *upload, + unsigned min_size ) +{ + /* Release old buffer, if present: + */ + u_upload_flush( upload ); + + /* Allocate a new one: + */ + upload->size = align(MAX2(upload->default_size, min_size), 4096); + + upload->buffer = pipe_buffer_create( upload->screen, + upload->alignment, + upload->usage | PIPE_BUFFER_USAGE_CPU_WRITE, + upload->size ); + if (upload->buffer == NULL) + goto fail; + + upload->offset = 0; + return 0; + +fail: + if (upload->buffer) + pipe_buffer_reference( &upload->buffer, NULL ); + + return PIPE_ERROR_OUT_OF_MEMORY; +} + + +enum pipe_error u_upload_data( struct u_upload_mgr *upload, + unsigned size, + const void *data, + unsigned *out_offset, + struct pipe_buffer **outbuf ) +{ + unsigned alloc_size = align( size, upload->alignment ); + enum pipe_error ret = PIPE_OK; + + if (upload->offset + alloc_size > upload->size) { + ret = u_upload_alloc_buffer( upload, alloc_size ); + if (ret) + return ret; + } + + /* Copy the data, using map_range if available: + */ + my_buffer_write( upload->screen, + upload->buffer, + upload->offset, + size, + alloc_size, + data ); + + /* Emit the return values: + */ + pipe_buffer_reference( outbuf, upload->buffer ); + *out_offset = upload->offset; + upload->offset += alloc_size; + return PIPE_OK; +} + + +/* As above, but upload the full contents of a buffer. Useful for + * uploading user buffers, avoids generating an explosion of GPU + * buffers if you have an app that does lots of small vertex buffer + * renders or DrawElements calls. + */ +enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, + unsigned offset, + unsigned size, + struct pipe_buffer *inbuf, + unsigned *out_offset, + struct pipe_buffer **outbuf ) +{ + enum pipe_error ret = PIPE_OK; + const char *map = NULL; + + map = (const char *)pipe_buffer_map( + upload->screen, inbuf, PIPE_BUFFER_USAGE_CPU_READ ); + + if (map == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto done; + } + + if (0) + debug_printf("upload ptr %p ofs %d sz %d\n", map, offset, size); + + ret = u_upload_data( upload, + size, + map + offset, + out_offset, + outbuf ); + if (ret) + goto done; + +done: + if (map) + pipe_buffer_unmap( upload->screen, inbuf ); + + return ret; +} diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h new file mode 100644 index 00000000000..745b5834af6 --- /dev/null +++ b/src/gallium/auxiliary/util/u_upload_mgr.h @@ -0,0 +1,75 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Helper utility for uploading user buffers & other data, and + * coalescing small buffers into larger ones. + */ + +#ifndef U_UPLOAD_MGR_H +#define U_UPLOAD_MGR_H + +struct pipe_screen; +struct pipe_buffer; +struct u_upload_mgr; + + +struct u_upload_mgr *u_upload_create( struct pipe_screen *screen, + unsigned default_size, + unsigned alignment, + unsigned usage ); + +void u_upload_destroy( struct u_upload_mgr *upload ); + +/* Unmap and release old buffer. + * + * This must usually be called prior to firing the command stream + * which references the upload buffer, as many memory managers either + * don't like firing a mapped buffer or cause subsequent maps of a + * fired buffer to wait. For now, it's easiest just to grab a new + * buffer. + */ +void u_upload_flush( struct u_upload_mgr *upload ); + + +enum pipe_error u_upload_data( struct u_upload_mgr *upload, + unsigned size, + const void *data, + unsigned *out_offset, + struct pipe_buffer **outbuf ); + + +enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, + unsigned offset, + unsigned size, + struct pipe_buffer *inbuf, + unsigned *out_offset, + struct pipe_buffer **outbuf ); + + + +#endif + From 48f6e754898879898165dca013157dffe2babb12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Wed, 18 Mar 2009 16:54:25 +0000 Subject: [PATCH 11/26] gallium: Explain what happens if buffer_flush_mapped_range isn't called. --- src/gallium/include/pipe/p_screen.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index ed3a026023b..ceac755e71e 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -223,6 +223,13 @@ struct pipe_screen { * specified to buffer_map_range. This is different from the * ARB_map_buffer_range semantics because we don't forbid multiple mappings * of the same buffer (yet). + * + * If the buffer was mapped for writing and no buffer_flush_mapped_range + * call was done until the buffer_unmap is called then the pipe driver will + * assumed that the whole buffer was written. This is for backward + * compatibility purposes and may affect performance -- the state tracker + * should always specify exactly what got written while the buffer was + * mapped. */ void (*buffer_flush_mapped_range)( struct pipe_screen *screen, struct pipe_buffer *buf, From 8852ac2b354522b194e32f8651e3511e69586bd1 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 19 Mar 2009 12:29:03 -0700 Subject: [PATCH 12/26] r300-gallium: A bit more invariant state. --- src/gallium/drivers/r300/r300_reg.h | 17 +++++++--- .../drivers/r300/r300_state_invariant.c | 34 ++++++++++--------- 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 6f3ad970abc..3fe45e13932 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -293,10 +293,19 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */ /* Programmable Stream Control Signed Normalize Control */ -#define R300_VAP_PSC_SGN_NORM_CNTL 0x21dc -# define SGN_NORM_ZERO 0 -# define SGN_NORM_ZERO_CLAMP_MINUS_ONE 1 -# define SGN_NORM_NO_ZERO 2 +#define R300_VAP_PSC_SGN_NORM_CNTL 0x21dc +# define SGN_NORM_ZERO 0 +# define SGN_NORM_ZERO_CLAMP_MINUS_ONE 1 +# define SGN_NORM_NO_ZERO 2 +# define R300_SGN_NORM_NO_ZERO (SGN_NORM_NO_ZERO | \ + (SGN_NORM_NO_ZERO << 2) | (SGN_NORM_NO_ZERO << 4) | \ + (SGN_NORM_NO_ZERO << 6) | (SGN_NORM_NO_ZERO << 8) | \ + (SGN_NORM_NO_ZERO << 10) | (SGN_NORM_NO_ZERO << 12) | \ + (SGN_NORM_NO_ZERO << 14) | (SGN_NORM_NO_ZERO << 16) | \ + (SGN_NORM_NO_ZERO << 18) | (SGN_NORM_NO_ZERO << 20) | \ + (SGN_NORM_NO_ZERO << 22) | (SGN_NORM_NO_ZERO << 24) | \ + (SGN_NORM_NO_ZERO << 26) | (SGN_NORM_NO_ZERO << 28) | \ + (SGN_NORM_NO_ZERO << 30)) /* gap */ diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 3d51a8e65d2..e1837b63801 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -34,11 +34,11 @@ void r300_emit_invariant_state(struct r300_context* r300) struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); - BEGIN_CS(24 + (caps->has_tcl ? 2: 0)); + BEGIN_CS(30 + (caps->has_tcl ? 2: 0)); + /*** Graphics Backend (GB) ***/ /* Various GB enables */ - OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE | - R300_GB_LINE_STUFF_ENABLE | R300_GB_TRIANGLE_STUFF_ENABLE); + OUT_CS_REG(R300_GB_ENABLE, 0x0); /* Subpixel multisampling for AA */ OUT_CS_REG(R300_GB_MSPOS0, 0x66666666); OUT_CS_REG(R300_GB_MSPOS1, 0x66666666); @@ -49,6 +49,8 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W); /* AA enable */ OUT_CS_REG(R300_GB_AA_CONFIG, 0x0); + + /*** Geometry Assembly (GA) ***/ /* GA errata fixes. */ if (caps->is_r500) { OUT_CS_REG(R300_GA_ENHANCE, @@ -62,13 +64,19 @@ void r300_emit_invariant_state(struct r300_context* r300) R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE); } - /* Fog block. */ - OUT_CS_REG(R300_FG_FOG_BLEND, 0x00000000); - OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x00000000); - OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x00000000); - OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x00000000); - OUT_CS_REG(R300_FG_DEPTH_SRC, 0x00000000); + /*** Fog (FG) ***/ + OUT_CS_REG(R300_FG_FOG_BLEND, 0x0); + OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0); + OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0); + OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0); + OUT_CS_REG(R300_FG_DEPTH_SRC, 0x0); + /*** VAP ***/ + /* Max and min vertex index clamp. */ + OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0x0); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, 0xffffff); + /* Sign/normalize control */ + OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); /* TCL-only stuff */ if (caps->has_tcl) { /* Amount of time to wait for vertex fetches in PVS */ @@ -78,7 +86,7 @@ void r300_emit_invariant_state(struct r300_context* r300) END_CS; /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(99 + (caps->has_tcl ? 26 : 0)); + BEGIN_CS(91 + (caps->has_tcl ? 26 : 0)); /* Flush PVS. */ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); @@ -86,9 +94,6 @@ void r300_emit_invariant_state(struct r300_context* r300) R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT); - /* Max and min vertex index clamp. */ - OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, 0xFFFFFF); - OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0x0); /* XXX endian */ if (caps->has_tcl) { OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VC_NO_SWAP); @@ -103,8 +108,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VC_NO_SWAP | R300_VAP_TCL_BYPASS); } - /* XXX magic number not in r300_reg */ - OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0xAAAAAAAA); /* XXX point tex stuffing */ OUT_CS_REG_SEQ(R300_GA_POINT_S0, 1); OUT_CS_32F(0.0); @@ -157,7 +160,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F); /* Vertex size. */ OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8); - OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0xAAAAAAAA); OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_0, 0x00000003); OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x00000000); OUT_CS_REG(R300_TX_ENABLE, 0x0); From f3f5e04103d804a23cfbe8bd264c8e0db64bd31f Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 19 Mar 2009 20:32:08 -0700 Subject: [PATCH 13/26] r300-gallium: Clean up some emit, and some state handlers. --- src/gallium/drivers/r300/r300_emit.c | 17 +------------ src/gallium/drivers/r300/r300_emit.h | 10 ++++++++ src/gallium/drivers/r300/r300_state_inlines.h | 25 ++++++++++++++++--- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index a2e771bd1b2..9bfb89626cd 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -152,21 +152,6 @@ void r500_emit_fragment_shader(struct r300_context* r300, END_CS; } -/* Translate pipe_format into US_OUT_FMT. Note that formats are stored from - * C3 to C0. */ -uint32_t translate_out_fmt(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - return R300_US_OUT_FMT_C4_8 | - R300_C0_SEL_B | R300_C1_SEL_G | - R300_C2_SEL_R | R300_C3_SEL_A; - default: - return R300_US_OUT_FMT_UNUSED; - } - return 0; -} - /* XXX add pitch, stride, clean up */ void r300_emit_fb_state(struct r300_context* r300, struct pipe_framebuffer_state* fb) @@ -182,7 +167,7 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), - translate_out_fmt(fb->cbufs[i]->format)); + r300_translate_out_fmt(fb->cbufs[i]->format)); } if (fb->zsbuf) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 4aba1ee08ce..0bc1f90e6ab 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -28,6 +28,7 @@ #include "r300_context.h" #include "r300_cs.h" #include "r300_screen.h" +#include "r300_state_inlines.h" void r300_emit_blend_state(struct r300_context* r300, struct r300_blend_state* blend); @@ -52,11 +53,20 @@ void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); void r300_emit_rs_block_state(struct r300_context* r300, struct r300_rs_block* rs); +void r300_emit_sampler(struct r300_context* r300, + struct r300_sampler_state* sampler, unsigned offset); + void r300_emit_scissor_state(struct r300_context* r300, struct r300_scissor_state* scissor); +void r300_emit_texture(struct r300_context* r300, + struct r300_texture* tex, unsigned offset); + void r300_emit_vertex_format_state(struct r300_context* r300); +void r300_emit_viewport_state(struct r300_context* r300, + struct r300_viewport_state* viewport); + /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300); diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index fd92c71756b..b80ff1c1aba 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -297,8 +297,7 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_A32R32G32B32: return R300_COLOR_FORMAT_ARGB32323232; case PIPE_FORMAT_A16R16G16B16: - return R300_COLOR_FORMAT_ARGB16161616; */ - /* XXX Not in pipe_format + return R300_COLOR_FORMAT_ARGB16161616; case PIPE_FORMAT_A10R10G10B10_UNORM: return R500_COLOR_FORMAT_ARGB10101010; case PIPE_FORMAT_A2R10G10B10_UNORM: @@ -306,7 +305,7 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_I10_UNORM: return R500_COLOR_FORMAT_I10; */ default: - debug_printf("r300: Implementation error: " \ + debug_printf("r300: Implementation error: " "Got unsupported color format %s in %s\n", pf_name(format), __FUNCTION__); break; @@ -324,7 +323,7 @@ static INLINE uint32_t r300_translate_zsformat(enum pipe_format format) case PIPE_FORMAT_Z24S8_UNORM: return R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; default: - debug_printf("r300: Implementation error: " \ + debug_printf("r300: Implementation error: " "Got unsupported ZS format %s in %s\n", pf_name(format), __FUNCTION__); break; @@ -332,6 +331,24 @@ static INLINE uint32_t r300_translate_zsformat(enum pipe_format format) return 0; } +/* Translate pipe_format into US_OUT_FMT. + * Note that formats are stored from C3 to C0. */ +static INLINE uint32_t r300_translate_out_fmt(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + return R300_US_OUT_FMT_C4_8 | + R300_C0_SEL_B | R300_C1_SEL_G | + R300_C2_SEL_R | R300_C3_SEL_A; + default: + debug_printf("r300: Implementation error: " + "Got unsupported output format %s in %s\n", + pf_name(format), __FUNCTION__); + return R300_US_OUT_FMT_UNUSED; + } + return 0; +} + /* Non-CSO state. (For now.) */ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) From adb40a94b0d8a023cfa900017dc17e26179c1cfd Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 19 Mar 2009 20:36:59 -0700 Subject: [PATCH 14/26] r300-gallium: Clean up r300_swtcl_emit. Some compile warnings, some statements without effect. --- src/gallium/drivers/r300/r300_swtcl_emit.c | 27 ++++------------------ 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/r300/r300_swtcl_emit.c b/src/gallium/drivers/r300/r300_swtcl_emit.c index 3db09514c69..c82ee9c087b 100644 --- a/src/gallium/drivers/r300/r300_swtcl_emit.c +++ b/src/gallium/drivers/r300/r300_swtcl_emit.c @@ -66,7 +66,7 @@ r300_swtcl_render_get_vertex_info(struct vbuf_render* render) r300_update_derived_state(r300); - return &r300->vertex_info; + return &r300->vertex_info.vinfo; } static boolean r300_swtcl_render_allocate_vertices(struct vbuf_render* render, @@ -177,7 +177,6 @@ static boolean r300_swtcl_render_set_primitive(struct vbuf_render* render, static void prepare_render(struct r300_swtcl_render* render, unsigned count) { struct r300_context* r300 = render->r300; - int i; CS_LOCALS(r300); @@ -210,7 +209,6 @@ static void r300_swtcl_render_draw_arrays(struct vbuf_render* render, { struct r300_swtcl_render* r300render = r300_swtcl_render(render); struct r300_context* r300 = r300render->r300; - struct pipe_screen* screen = r300->context.screen; CS_LOCALS(r300); @@ -239,24 +237,22 @@ static void r300_swtcl_render_draw(struct vbuf_render* render, CS_LOCALS(r300); - count /= 4; - prepare_render(r300render, count); /* Send our indices into an index buffer. */ index_buffer = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, - count * 4); + count); if (!index_buffer) { return; } index_map = pipe_buffer_map(screen, index_buffer, PIPE_BUFFER_USAGE_CPU_WRITE); - memcpy(index_map, indices, count * 4); + memcpy(index_map, indices, count); pipe_buffer_unmap(screen, index_buffer); debug_printf("r300: Doing indexbuf render, count %d\n", count); -#if 0 + BEGIN_CS(5); OUT_CS(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0)); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | @@ -266,7 +262,6 @@ static void r300_swtcl_render_draw(struct vbuf_render* render, OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2)); OUT_CS_RELOC(index_buffer, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); END_CS; -#endif } static void r300_swtcl_render_destroy(struct vbuf_render* render) @@ -277,7 +272,6 @@ static void r300_swtcl_render_destroy(struct vbuf_render* render) static struct vbuf_render* r300_swtcl_render_create(struct r300_context* r300) { struct r300_swtcl_render* r300render = CALLOC_STRUCT(r300_swtcl_render); - struct pipe_screen* screen = r300->context.screen; r300render->r300 = r300; @@ -295,19 +289,6 @@ static struct vbuf_render* r300_swtcl_render_create(struct r300_context* r300) r300render->base.release_vertices = r300_swtcl_render_release_vertices; r300render->base.destroy = r300_swtcl_render_destroy; - /* XXX bonghits ahead - r300render->vbo_alloc_size = 128 * 4096; - r300render->vbo_size = r300render->vbo_alloc_size; - r300render->vbo_offset = 0; - r300render->vbo = pipe_buffer_create(screen, - 64, - PIPE_BUFFER_USAGE_VERTEX, - r300render->vbo_size); - r300render->vbo_map = pipe_buffer_map(screen, - r300render->vbo, - PIPE_BUFFER_USAGE_CPU_WRITE); - pipe_buffer_unmap(screen, r300render->vbo); */ - return &r300render->base; } From 04fe31cd5efc5703b9cd975391a992866432f59d Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 20 Mar 2009 00:15:03 -0700 Subject: [PATCH 15/26] r300-gallium: Properly offset scissors. As per r300_reg, classic Mesa, and xf86-video-ati. --- src/gallium/drivers/r300/r300_state.c | 22 ++++++++++++++++------ src/gallium/drivers/r300/r300_surface.c | 12 ++++++++++-- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 58bce22fc81..2a026e7fcac 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -515,12 +515,22 @@ static void r300_set_scissor_state(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); draw_flush(r300->draw); - r300->scissor_state->scissor_top_left = - (state->minx << R300_SCISSORS_X_SHIFT) | - (state->miny << R300_SCISSORS_Y_SHIFT); - r300->scissor_state->scissor_bottom_right = - (state->maxx << R300_SCISSORS_X_SHIFT) | - (state->maxy << R300_SCISSORS_Y_SHIFT); + if (r300_screen(r300->context.screen)->caps->is_r500) { + r300->scissor_state->scissor_top_left = + (state->minx << R300_SCISSORS_X_SHIFT) | + (state->miny << R300_SCISSORS_Y_SHIFT); + r300->scissor_state->scissor_bottom_right = + (state->maxx << R300_SCISSORS_X_SHIFT) | + (state->maxy << R300_SCISSORS_Y_SHIFT); + } else { + /* Offset of 1440 in non-R500 chipsets. */ + r300->scissor_state->scissor_top_left = + ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) | + ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT); + r300->scissor_state->scissor_bottom_right = + ((state->maxx + 1440) << R300_SCISSORS_X_SHIFT) | + ((state->maxy + 1440) << R300_SCISSORS_Y_SHIFT); + } r300->dirty_state |= R300_NEW_SCISSOR; } diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index 2cc0677e52c..3672f60b1b8 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -80,8 +80,16 @@ static void r300_surface_fill(struct pipe_context* pipe, /* Pixel scissors */ OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - OUT_CS((x << R300_SCISSORS_X_SHIFT) | (y << R300_SCISSORS_Y_SHIFT)); - OUT_CS((w << R300_SCISSORS_X_SHIFT) | (h << R300_SCISSORS_Y_SHIFT)); + if (caps->is_r500) { + OUT_CS((x << R300_SCISSORS_X_SHIFT) | (y << R300_SCISSORS_Y_SHIFT)); + OUT_CS((w << R300_SCISSORS_X_SHIFT) | (h << R300_SCISSORS_Y_SHIFT)); + } else { + /* Non-R500 chipsets have an offset of 1440 in their scissors. */ + OUT_CS(((x + 1440) << R300_SCISSORS_X_SHIFT) | + ((y + 1440) << R300_SCISSORS_Y_SHIFT)); + OUT_CS(((w + 1440) << R300_SCISSORS_X_SHIFT) | + ((h + 1440) << R300_SCISSORS_Y_SHIFT)); + } /* The size of the point we're about to draw, in sixths of pixels */ OUT_CS_REG(R300_GA_POINT_SIZE, From f1429580848b471c487e55a9a81b904452f50df5 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 20 Mar 2009 00:35:38 -0700 Subject: [PATCH 16/26] r300-gallium: Clean up surface_fill, prep for surface_copy code. --- src/gallium/drivers/r300/r300_surface.c | 121 +++++++++++------------- 1 file changed, 54 insertions(+), 67 deletions(-) diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index 3672f60b1b8..86fe3fc4f97 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -23,6 +23,55 @@ #include "r300_surface.h" +static void r300_surface_setup(struct pipe_context* pipe, + struct pipe_surface* dest, + unsigned x, unsigned y, + unsigned w, unsigned h) +{ + struct r300_context* r300 = r300_context(pipe); + CS_LOCALS(r300); + struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; + struct r300_texture* tex = (struct r300_texture*)dest->texture; + unsigned pixpitch = tex->stride / tex->tex.block.size; + + r300_emit_blend_state(r300, &blend_clear_state); + r300_emit_blend_color_state(r300, &blend_color_clear_state); + r300_emit_dsa_state(r300, &dsa_clear_state); + r300_emit_rs_state(r300, &rs_clear_state); + + BEGIN_CS(15); + + /* Pixel scissors. */ + OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + if (caps->is_r500) { + OUT_CS((x << R300_SCISSORS_X_SHIFT) | (y << R300_SCISSORS_Y_SHIFT)); + OUT_CS((w << R300_SCISSORS_X_SHIFT) | (h << R300_SCISSORS_Y_SHIFT)); + } else { + /* Non-R500 chipsets have an offset of 1440 in their scissors. */ + OUT_CS(((x + 1440) << R300_SCISSORS_X_SHIFT) | + ((y + 1440) << R300_SCISSORS_Y_SHIFT)); + OUT_CS(((w + 1440) << R300_SCISSORS_X_SHIFT) | + ((h + 1440) << R300_SCISSORS_Y_SHIFT)); + } + + /* Flush colorbuffer and blend caches. */ + OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL); + OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + + /* Setup colorbuffer. */ + OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1); + OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_REG(R300_RB3D_COLORPITCH0, pixpitch | + r300_translate_colorformat(tex->tex.format)); + OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0xf); + + END_CS; +} + /* Provides pipe_context's "surface_fill". Commonly used for clearing * buffers. */ static void r300_surface_fill(struct pipe_context* pipe, @@ -53,10 +102,7 @@ static void r300_surface_fill(struct pipe_context* pipe, return; } - r300_emit_blend_state(r300, &blend_clear_state); - r300_emit_blend_color_state(r300, &blend_color_clear_state); - r300_emit_dsa_state(r300, &dsa_clear_state); - r300_emit_rs_state(r300, &rs_clear_state); + r300_surface_setup(r300, dest, x, y, w, h); /* Fragment shader setup */ if (caps->is_r500) { @@ -67,7 +113,7 @@ static void r300_surface_fill(struct pipe_context* pipe, r300_emit_rs_block_state(r300, &r300_rs_block_clear_state); } - BEGIN_CS(36); + BEGIN_CS(21); /* Viewport setup */ OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); @@ -78,37 +124,11 @@ static void r300_surface_fill(struct pipe_context* pipe, OUT_CS_32F(1.0); OUT_CS_32F(0.0); - /* Pixel scissors */ - OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - if (caps->is_r500) { - OUT_CS((x << R300_SCISSORS_X_SHIFT) | (y << R300_SCISSORS_Y_SHIFT)); - OUT_CS((w << R300_SCISSORS_X_SHIFT) | (h << R300_SCISSORS_Y_SHIFT)); - } else { - /* Non-R500 chipsets have an offset of 1440 in their scissors. */ - OUT_CS(((x + 1440) << R300_SCISSORS_X_SHIFT) | - ((y + 1440) << R300_SCISSORS_Y_SHIFT)); - OUT_CS(((w + 1440) << R300_SCISSORS_X_SHIFT) | - ((h + 1440) << R300_SCISSORS_Y_SHIFT)); - } - /* The size of the point we're about to draw, in sixths of pixels */ OUT_CS_REG(R300_GA_POINT_SIZE, ((h * 6) & R300_POINTSIZE_Y_MASK) | ((w * 6) << R300_POINTSIZE_X_SHIFT)); - /* Flush colorbuffer and blend caches. */ - OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, - R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D | - R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL); - OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, - R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); - - OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1); - OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_CS_REG(R300_RB3D_COLORPITCH0, pixpitch | - r300_translate_colorformat(tex->tex.format)); - OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0x0000000F); /* XXX Packet3 */ OUT_CS(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | @@ -151,47 +171,14 @@ static void r300_surface_copy(struct pipe_context* pipe, " dimensions %dx%d (pixel pitch %d)\n", src, srcx, srcy, dest, destx, desty, w, h, pixpitch); + /* if ((srctex == desttex) && + ((destx < srcx + w) || (srcx < destx + w)) && + ((desty < srcy + h) || (srcy < destx + h))) { */ if (TRUE) { debug_printf("r300: Falling back on surface_copy\n"); return util_surface_copy(pipe, FALSE, dest, destx, desty, src, srcx, srcy, w, h); } -#if 0 - BEGIN_CS(); - OUT_CS_REG(RADEON_DEFAULT_SC_BOTTOM_RIGHT,(RADEON_DEFAULT_SC_RIGHT_MAX | - RADEON_DEFAULT_SC_BOTTOM_MAX)); - OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, (RADEON_GMC_DST_PITCH_OFFSET_CNTL | - RADEON_GMC_SRC_PITCH_OFFSET_CNTL | - RADEON_GMC_BRUSH_NONE | - (datatype << 8) | - RADEON_GMC_SRC_DATATYPE_COLOR | - RADEON_ROP[rop].rop | - RADEON_DP_SRC_SOURCE_MEMORY | - RADEON_GMC_CLR_CMP_CNTL_DIS)); - OUT_CS_REG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff); - OUT_CS_REG(RADEON_DP_BRUSH_BKGD_CLR, 0x0); - OUT_CS_REG(RADEON_DP_SRC_FRGD_CLR, 0xffffffff); - OUT_CS_REG(RADEON_DP_SRC_BKGD_CLR, 0x0); - OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, planemask); - OUT_ACCEL_REG(RADEON_DP_CNTL, ((info->accel_state->xdir >= 0 ? RADEON_DST_X_LEFT_TO_RIGHT : 0) | - (info->accel_state->ydir >= 0 ? RADEON_DST_Y_TOP_TO_BOTTOM : 0)); -); - - OUT_CS_REG_SEQ(RADEON_DST_PITCH_OFFSET, 1); - OUT_CS_RELOC(desttex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - - OUT_CS_REG_SEQ(RADEON_SRC_PITCH_OFFSET, 1); - OUT_CS_RELOC(srctex->buffer, 0, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); - - OUT_CS_REG(RADEON_SRC_Y_X, (srcy << 16) | srcx); - OUT_CS_REG(RADEON_DST_Y_X, (desty << 16) | destx); - OUT_CS_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16) | w); - OUT_CS_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL); - OUT_CS_REG(RADEON_WAIT_UNTIL, - RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE); - END_CS; -#endif } void r300_init_surface_functions(struct r300_context* r300) From 8066edb2a254d15ed92c2d350a7799adf3cca0d7 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 20 Mar 2009 00:43:29 -0700 Subject: [PATCH 17/26] r300-gallium: Simplify/neaten up packet3. Deck chairs on the Hindenburg. :3 --- src/gallium/drivers/r300/r300_cs.h | 3 --- src/gallium/drivers/r300/r300_cs_inlines.h | 9 +++++++++ src/gallium/drivers/r300/r300_surface.c | 4 ++-- src/gallium/drivers/r300/r300_swtcl_emit.c | 8 ++++---- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index d8038ff1e19..443dfc0233d 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -47,9 +47,6 @@ #define CP_PACKET0(register, count) \ (RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2)) -#define CP_PACKET3(op, count) \ - (RADEON_CP_PACKET3 | (op) | ((count) << 16)) - #define CS_LOCALS(context) \ struct r300_winsys* cs_winsys = context->winsys; \ struct radeon_cs* cs = cs_winsys->cs; \ diff --git a/src/gallium/drivers/r300/r300_cs_inlines.h b/src/gallium/drivers/r300/r300_cs_inlines.h index 03bb608eb9a..64bd58193a7 100644 --- a/src/gallium/drivers/r300/r300_cs_inlines.h +++ b/src/gallium/drivers/r300/r300_cs_inlines.h @@ -36,15 +36,24 @@ OUT_CS(CP_PACKET0(register, ((count) - 1)) | RADEON_ONE_REG_WR); \ } while (0) +/* XXX might no longer be needed */ #define R300_PACIFY do { \ OUT_CS_REG(RADEON_WAIT_UNTIL, (1 << 14) | (1 << 15) | (1 << 16) | (1 << 17) | \ (1 << 18)); \ } while (0) +/* XXX do we still use this? */ #define R300_SCREENDOOR do { \ OUT_CS_REG(R300_SC_SCREENDOOR, 0x0); \ R300_PACIFY; \ OUT_CS_REG(R300_SC_SCREENDOOR, 0xffffff); \ } while (0) +#define CP_PACKET3(op, count) \ + (RADEON_CP_PACKET3 | (op) | ((count) << 16)) + +#define R300_CS_PKT3(op, count) do { \ + OUT_CS(CP_PACKET3(op, count)); \ +} while (0) + #endif /* R300_CS_H */ diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index 86fe3fc4f97..db18975a10f 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -129,8 +129,8 @@ static void r300_surface_fill(struct pipe_context* pipe, ((h * 6) & R300_POINTSIZE_Y_MASK) | ((w * 6) << R300_POINTSIZE_X_SHIFT)); - /* XXX Packet3 */ - OUT_CS(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); + /* Packet3 with our point vertex */ + OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 8); OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | (1 << R300_PRIM_NUM_VERTICES_SHIFT)); OUT_CS_32F(w / 2.0); diff --git a/src/gallium/drivers/r300/r300_swtcl_emit.c b/src/gallium/drivers/r300/r300_swtcl_emit.c index c82ee9c087b..83c25f496bf 100644 --- a/src/gallium/drivers/r300/r300_swtcl_emit.c +++ b/src/gallium/drivers/r300/r300_swtcl_emit.c @@ -194,7 +194,7 @@ static void prepare_render(struct r300_swtcl_render* render, unsigned count) * VBPNTR [relocated BO] */ BEGIN_CS(7); - OUT_CS(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 3)); + OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); OUT_CS(1); OUT_CS(r300->vertex_info.vinfo.size | (r300->vertex_info.vinfo.size << 8)); @@ -219,7 +219,7 @@ static void r300_swtcl_render_draw_arrays(struct vbuf_render* render, debug_printf("r300: Doing vbuf render, count %d\n", count); BEGIN_CS(2); - OUT_CS(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0)); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | r300render->hwprim); END_CS; @@ -254,11 +254,11 @@ static void r300_swtcl_render_draw(struct vbuf_render* render, debug_printf("r300: Doing indexbuf render, count %d\n", count); BEGIN_CS(5); - OUT_CS(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0)); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300render->hwprim | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - OUT_CS(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2)); + OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2)); OUT_CS_RELOC(index_buffer, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); END_CS; From edfaa686091a4f6238b8f315a475d90ff2c2f5f5 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 20 Mar 2009 00:48:53 -0700 Subject: [PATCH 18/26] r300-gallium: Put r300_cs_inlines to bed. Guess it was a mistake in the first place. Oops. --- src/gallium/drivers/r300/r300_cs.h | 17 ++++++- src/gallium/drivers/r300/r300_cs_inlines.h | 59 ---------------------- 2 files changed, 16 insertions(+), 60 deletions(-) delete mode 100644 src/gallium/drivers/r300/r300_cs_inlines.h diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 443dfc0233d..2b9a441147d 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -115,6 +115,21 @@ cs_winsys->flush_cs(cs); \ } while (0) -#include "r300_cs_inlines.h" +#define RADEON_ONE_REG_WR (1 << 15) + +#define OUT_CS_ONE_REG(register, count) do { \ + if (VERY_VERBOSE_REGISTERS) \ + debug_printf("r300: writing data sequence of %d to 0x%04X\n", \ + count, register); \ + assert(register); \ + OUT_CS(CP_PACKET0(register, ((count) - 1)) | RADEON_ONE_REG_WR); \ +} while (0) + +#define CP_PACKET3(op, count) \ + (RADEON_CP_PACKET3 | (op) | ((count) << 16)) + +#define R300_CS_PKT3(op, count) do { \ + OUT_CS(CP_PACKET3(op, count)); \ +} while (0) #endif /* R300_CS_H */ diff --git a/src/gallium/drivers/r300/r300_cs_inlines.h b/src/gallium/drivers/r300/r300_cs_inlines.h deleted file mode 100644 index 64bd58193a7..00000000000 --- a/src/gallium/drivers/r300/r300_cs_inlines.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -/* r300_cs_inlines: This is just a handful of useful inlines for sending - * (very) common instructions to the CS buffer. Should only be included from - * r300_cs.h, probably. */ - -#ifdef R300_CS_H - -#define RADEON_ONE_REG_WR (1 << 15) - -#define OUT_CS_ONE_REG(register, count) do { \ - if (VERY_VERBOSE_REGISTERS) \ - debug_printf("r300: writing data sequence of %d to 0x%04X\n", \ - count, register); \ - assert(register); \ - OUT_CS(CP_PACKET0(register, ((count) - 1)) | RADEON_ONE_REG_WR); \ -} while (0) - -/* XXX might no longer be needed */ -#define R300_PACIFY do { \ - OUT_CS_REG(RADEON_WAIT_UNTIL, (1 << 14) | (1 << 15) | (1 << 16) | (1 << 17) | \ - (1 << 18)); \ -} while (0) - -/* XXX do we still use this? */ -#define R300_SCREENDOOR do { \ - OUT_CS_REG(R300_SC_SCREENDOOR, 0x0); \ - R300_PACIFY; \ - OUT_CS_REG(R300_SC_SCREENDOOR, 0xffffff); \ -} while (0) - -#define CP_PACKET3(op, count) \ - (RADEON_CP_PACKET3 | (op) | ((count) << 16)) - -#define R300_CS_PKT3(op, count) do { \ - OUT_CS(CP_PACKET3(op, count)); \ -} while (0) - -#endif /* R300_CS_H */ From f411a66c0679c1aa7a9ee3d1eb633a8cbf3ef5f2 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 20 Mar 2009 14:47:49 -0700 Subject: [PATCH 19/26] r300-gallium: Misspelled macro name. *pulls paper bag down over head* --- src/gallium/drivers/r300/r300_cs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 2b9a441147d..9913678d272 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -128,7 +128,7 @@ #define CP_PACKET3(op, count) \ (RADEON_CP_PACKET3 | (op) | ((count) << 16)) -#define R300_CS_PKT3(op, count) do { \ +#define OUT_CS_PKT3(op, count) do { \ OUT_CS(CP_PACKET3(op, count)); \ } while (0) From c9caecaffaa6144668a2c732467b49872981b656 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 20 Mar 2009 09:20:53 -0600 Subject: [PATCH 20/26] docs: updated Mesa extension enum info --- docs/enums.txt | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/enums.txt b/docs/enums.txt index 6c43fc18914..b37768e2027 100644 --- a/docs/enums.txt +++ b/docs/enums.txt @@ -1,4 +1,6 @@ +See the OpenGL ARB enum registry at http://www.opengl.org/registry/api/enum.spec + Blocks allocated to Mesa: 0x8750-0x875F 0x8BB0-0x8BBF @@ -30,12 +32,12 @@ MESA_ycbcr_texture.spec: GL_MESA_pack_invert.spec GL_PACK_INVERT_MESA 0x8758 -GL_MESA_shader_debug.spec: +GL_MESA_shader_debug.spec: (obsolete) GL_DEBUG_OBJECT_MESA 0x8759 GL_DEBUG_PRINT_MESA 0x875A GL_DEBUG_ASSERT_MESA 0x875B -GL_MESA_program_debug.spec: +GL_MESA_program_debug.spec: (obsolete) GL_FRAGMENT_PROGRAM_CALLBACK_MESA 0x???? GL_VERTEX_PROGRAM_CALLBACK_MESA 0x???? GL_FRAGMENT_PROGRAM_POSITION_MESA 0x???? @@ -45,3 +47,11 @@ GL_MESA_program_debug.spec: GL_VERTEX_PROGRAM_CALLBACK_FUNC_MESA 0x???? GL_VERTEX_PROGRAM_CALLBACK_DATA_MESA 0x???? +GL_MESAX_texture_stack: + GL_TEXTURE_1D_STACK_MESAX 0x8759 + GL_TEXTURE_2D_STACK_MESAX 0x875A + GL_PROXY_TEXTURE_1D_STACK_MESAX 0x875B + GL_PROXY_TEXTURE_2D_STACK_MESAX 0x875C + GL_TEXTURE_1D_STACK_BINDING_MESAX 0x875D + GL_TEXTURE_2D_STACK_BINDING_MESAX 0x875E + From 12256fc2b2e0a54db24210a4b86f6fb5919d0fe8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 20 Mar 2009 17:08:30 -0600 Subject: [PATCH 21/26] mesa: linear scan register allocation for shader programs This is a check-point commit; not turned on yet. Use the linear scan register allocation algorithm to re-allocate temporary registers. This is done by computing the live intervals for registers and reallocating temps with that information. For some shaders this dramatically reduces the number of temp registers needed. For the time being we give up on a few cases such as relative-indexed temps and subroutine calls (but we inline most GLSL functions anyway). --- src/mesa/shader/prog_optimize.c | 428 ++++++++++++++++++++++++++++++-- 1 file changed, 407 insertions(+), 21 deletions(-) diff --git a/src/mesa/shader/prog_optimize.c b/src/mesa/shader/prog_optimize.c index ec06da141da..458a69f70b9 100644 --- a/src/mesa/shader/prog_optimize.c +++ b/src/mesa/shader/prog_optimize.c @@ -33,6 +33,9 @@ #include "prog_print.h" +#define MAX_LOOP_NESTING 50 + + static GLboolean dbg = GL_FALSE; @@ -75,6 +78,37 @@ remove_instructions(struct gl_program *prog, const GLboolean *removeFlags) } +/** + * Remap register indexes according to map. + * \param prog the program to search/replace + * \param file the type of register file to search/replace + * \param map maps old register indexes to new indexes + */ +static void +replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[]) +{ + GLuint i; + + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *inst = prog->Instructions + i; + const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == file) { + GLuint index = inst->SrcReg[j].Index; + ASSERT(map[index] >= 0); + inst->SrcReg[j].Index = map[index]; + } + } + if (inst->DstReg.File == file) { + const GLuint index = inst->DstReg.Index; + ASSERT(map[index] >= 0); + inst->DstReg.Index = map[index]; + } + } +} + + /** * Consolidate temporary registers to use low numbers. For example, if the * shader only uses temps 4, 5, 8, replace them with 0, 1, 2. @@ -83,7 +117,7 @@ static void _mesa_consolidate_registers(struct gl_program *prog) { GLboolean tempUsed[MAX_PROGRAM_TEMPS]; - GLuint tempMap[MAX_PROGRAM_TEMPS]; + GLint tempMap[MAX_PROGRAM_TEMPS]; GLuint tempMax = 0, i; if (dbg) { @@ -92,6 +126,10 @@ _mesa_consolidate_registers(struct gl_program *prog) memset(tempUsed, 0, sizeof(tempUsed)); + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + tempMap[i] = -1; + } + /* set tempUsed[i] if temporary [i] is referenced */ for (i = 0; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; @@ -132,26 +170,8 @@ _mesa_consolidate_registers(struct gl_program *prog) } } - /* now replace occurances of old temp indexes with new indexes */ - for (i = 0; i < prog->NumInstructions; i++) { - struct prog_instruction *inst = prog->Instructions + i; - const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); - GLuint j; - for (j = 0; j < numSrc; j++) { - if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { - GLuint index = inst->SrcReg[j].Index; - assert(index <= tempMax); - assert(tempUsed[index]); - inst->SrcReg[j].Index = tempMap[index]; - } - } - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - const GLuint index = inst->DstReg.Index; - assert(tempUsed[index]); - assert(index <= tempMax); - inst->DstReg.Index = tempMap[index]; - } - } + replace_regs(prog, PROGRAM_TEMPORARY, tempMap); + if (dbg) { _mesa_printf("Optimize: End register consolidation\n"); } @@ -409,6 +429,370 @@ _mesa_remove_extra_moves(struct gl_program *prog) } +/** A live register interval */ +struct interval +{ + GLuint Reg; /** The temporary register index */ + GLuint Start, End; /** Start/end instruction numbers */ +}; + + +/** A list of register intervals */ +struct interval_list +{ + GLuint Num; + struct interval Intervals[MAX_PROGRAM_TEMPS]; +}; + + +static void +append_interval(struct interval_list *list, const struct interval *inv) +{ + list->Intervals[list->Num++] = *inv; +} + + +/** Insert interval inv into list, sorted by interval end */ +static void +insert_interval_by_end(struct interval_list *list, const struct interval *inv) +{ + /* XXX we could do a binary search insertion here since list is sorted */ + GLint i = list->Num - 1; + while (i >= 0 && list->Intervals[i].End > inv->End) { + list->Intervals[i + 1] = list->Intervals[i]; + i--; + } + list->Intervals[i + 1] = *inv; + list->Num++; + +#ifdef DEBUG + { + GLuint i; + for (i = 0; i + 1 < list->Num; i++) { + ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End); + } + } +#endif +} + + +/** Remove the given interval from the interval list */ +static void +remove_interval(struct interval_list *list, const struct interval *inv) +{ + /* XXX we could binary search since list is sorted */ + GLuint k; + for (k = 0; k < list->Num; k++) { + if (list->Intervals[k].Reg == inv->Reg) { + /* found, remove it */ + ASSERT(list->Intervals[k].Start == inv->Start); + ASSERT(list->Intervals[k].End == inv->End); + while (k < list->Num - 1) { + list->Intervals[k] = list->Intervals[k + 1]; + k++; + } + list->Num--; + return; + } + } +} + + +/** called by qsort() */ +static int +compare_start(const void *a, const void *b) +{ + const struct interval *ia = (const struct interval *) a; + const struct interval *ib = (const struct interval *) b; + if (ia->Start < ib->Start) + return -1; + else if (ia->Start > ib->Start) + return +1; + else + return 0; +} + +/** sort the interval list according to interval starts */ +static void +sort_interval_list_by_start(struct interval_list *list) +{ + qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start); +#ifdef DEBUG + { + GLuint i; + for (i = 0; i + 1 < list->Num; i++) { + ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start); + } + } +#endif +} + + +/** + * Update the intermediate interval info for register 'index' and + * instruction 'ic'. + */ +static void +update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic) +{ + ASSERT(index < MAX_PROGRAM_TEMPS); + if (intBegin[index] == -1) { + ASSERT(intEnd[index] == -1); + intBegin[index] = intEnd[index] = ic; + } + else { + intEnd[index] = ic; + } +} + + +/** + * Find the live intervals for each temporary register in the program. + * For register R, the interval [A,B] indicates that R is referenced + * from instruction A through instruction B. + * Special consideration is needed for loops and subroutines. + * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason + */ +static GLboolean +find_live_intervals(struct gl_program *prog, + struct interval_list *liveIntervals) +{ + struct loop_info + { + GLuint Start, End; /**< Start, end instructions of loop */ + }; + struct loop_info loopStack[MAX_LOOP_NESTING]; + GLuint loopStackDepth = 0; + GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS]; + GLuint i; + + /* + * Note: we'll return GL_FALSE below if we find relative indexing + * into the TEMP register file. We can't handle that yet. + * We also give up on subroutines for now. + */ + + if (dbg) { + _mesa_printf("Optimize: Begin find intervals\n"); + } + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ + intBegin[i] = intEnd[i] = -1; + } + + /* Scan instructions looking for temporary registers */ + for (i = 0; i < prog->NumInstructions; i++) { + const struct prog_instruction *inst = prog->Instructions + i; + if (inst->Opcode == OPCODE_BGNLOOP) { + loopStack[loopStackDepth].Start = i; + loopStack[loopStackDepth].End = inst->BranchTarget; + loopStackDepth++; + } + else if (inst->Opcode == OPCODE_ENDLOOP) { + loopStackDepth--; + } + else if (inst->Opcode == OPCODE_CAL) { + return GL_FALSE; + } + else { + const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + const GLuint index = inst->SrcReg[j].Index; + if (inst->SrcReg[j].RelAddr) + return GL_FALSE; + update_interval(intBegin, intEnd, index, i); + if (loopStackDepth > 0) { + /* extend temp register's interval to end of loop */ + GLuint loopEnd = loopStack[loopStackDepth - 1].End; + update_interval(intBegin, intEnd, index, loopEnd); + } + } + } + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + const GLuint index = inst->DstReg.Index; + if (inst->DstReg.RelAddr) + return GL_FALSE; + update_interval(intBegin, intEnd, index, i); + if (loopStackDepth > 0) { + /* extend temp register's interval to end of loop */ + GLuint loopEnd = loopStack[loopStackDepth - 1].End; + update_interval(intBegin, intEnd, index, loopEnd); + } + } + } + } + + /* Build live intervals list from intermediate arrays */ + liveIntervals->Num = 0; + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (intBegin[i] >= 0) { + struct interval inv; + inv.Reg = i; + inv.Start = intBegin[i]; + inv.End = intEnd[i]; + append_interval(liveIntervals, &inv); + } + } + + /* Sort the list according to interval starts */ + sort_interval_list_by_start(liveIntervals); + + if (dbg) { + /* print interval info */ + for (i = 0; i < liveIntervals->Num; i++) { + const struct interval *inv = liveIntervals->Intervals + i; + _mesa_printf("Reg[%d] live [%d, %d]:", + inv->Reg, inv->Start, inv->End); + if (1) { + int j; + for (j = 0; j < inv->Start; j++) + _mesa_printf(" "); + for (j = inv->Start; j <= inv->End; j++) + _mesa_printf("x"); + } + _mesa_printf("\n"); + } + } + + return GL_TRUE; +} + + +static GLuint +alloc_register(GLboolean usedRegs[MAX_PROGRAM_TEMPS]) +{ + GLuint k; + for (k = 0; k < MAX_PROGRAM_TEMPS; k++) { + if (!usedRegs[k]) { + usedRegs[k] = GL_TRUE; + return k; + } + } + return MAX_PROGRAM_TEMPS; +} + + +/** + * This function implements "Linear Scan Register Allocation" to reduce + * the number of temporary registers used by the program. + * + * We compute the "live interval" for all temporary registers then + * examine the overlap of the intervals to allocate new registers. + * Basically, if two intervals do not overlap, they can use the same register. + */ +static void +_mesa_reallocate_registers(struct gl_program *prog) +{ + struct interval_list liveIntervals; + GLint registerMap[MAX_PROGRAM_TEMPS]; + GLboolean usedRegs[MAX_PROGRAM_TEMPS]; + GLuint i; + GLuint maxTemp = 0; + + if (dbg) { + _mesa_printf("Optimize: Begin live-interval register reallocation\n"); + _mesa_print_program(prog); + } + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ + registerMap[i] = -1; + usedRegs[i] = GL_FALSE; + } + + if (!find_live_intervals(prog, &liveIntervals)) { + if (dbg) + _mesa_printf("Aborting register reallocation\n"); + return; + } + + { + struct interval_list activeIntervals; + activeIntervals.Num = 0; + + /* loop over live intervals, allocating a new register for each */ + for (i = 0; i < liveIntervals.Num; i++) { + const struct interval *live = liveIntervals.Intervals + i; + + if (dbg) + _mesa_printf("Consider register %u\n", live->Reg); + + /* Expire old intervals. Intervals which have ended with respect + * to the live interval can have their remapped registers freed. + */ + { + GLint j; + for (j = 0; j < activeIntervals.Num; j++) { + const struct interval *inv = activeIntervals.Intervals + j; + if (inv->End >= live->Start) { + /* Stop now. Since the activeInterval list is sorted + * we know we don't have to go further. + */ + break; + } + else { + /* Interval 'inv' has expired */ + const GLint regNew = registerMap[inv->Reg]; + ASSERT(regNew >= 0); + + if (dbg) + _mesa_printf(" expire interval for reg %u\n", inv->Reg); + + /* remove interval j from active list */ + remove_interval(&activeIntervals, inv); + j--; /* counter-act j++ in for-loop above */ + + /* return register regNew to the free pool */ + if (dbg) + _mesa_printf(" free reg %d\n", regNew); + ASSERT(usedRegs[regNew] == GL_TRUE); + usedRegs[regNew] = GL_FALSE; + } + } + } + + /* find a free register for this live interval */ + { + const GLuint k = alloc_register(usedRegs); + if (k == MAX_PROGRAM_TEMPS) { + /* out of registers, give up */ + return; + } + registerMap[live->Reg] = k; + maxTemp = MAX2(maxTemp, k); + if (dbg) + _mesa_printf(" remap register %d -> %d\n", live->Reg, k); + } + + /* Insert this live interval into the active list which is sorted + * by increasing end points. + */ + insert_interval_by_end(&activeIntervals, live); + } + } + + if (maxTemp + 1 < liveIntervals.Num) { + /* OK, we've reduced the number of registers needed. + * Scan the program and replace all the old temporary register + * indexes with the new indexes. + */ + replace_regs(prog, PROGRAM_TEMPORARY, registerMap); + + prog->NumTemporaries = maxTemp + 1; + } + + if (dbg) { + _mesa_printf("Optimize: End live-interval register reallocation\n"); + _mesa_printf("Num temp regs before: %u after: %u\n", + liveIntervals.Num, maxTemp + 1); + _mesa_print_program(prog); + } +} + + + + /** * Apply optimizations to the given program to eliminate unnecessary * instructions, temp regs, etc. @@ -424,4 +808,6 @@ _mesa_optimize_program(GLcontext *ctx, struct gl_program *program) if (1) _mesa_consolidate_registers(program); + else /*NEW*/ + _mesa_reallocate_registers(program); } From 1f45ae0813f72fa92f52e0ebc440922362dc7cce Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 20 Mar 2009 20:15:58 -0600 Subject: [PATCH 22/26] mesa: add new internal state var for window size Actually, window width - 1, height - 1 --- src/mesa/shader/prog_statevars.c | 13 +++++++++++++ src/mesa/shader/prog_statevars.h | 1 + 2 files changed, 14 insertions(+) diff --git a/src/mesa/shader/prog_statevars.c b/src/mesa/shader/prog_statevars.c index f51d9e26512..aeb7cf6de20 100644 --- a/src/mesa/shader/prog_statevars.c +++ b/src/mesa/shader/prog_statevars.c @@ -506,6 +506,13 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], } } return; + case STATE_FB_SIZE: + value[0] = (GLfloat) (ctx->DrawBuffer->Width - 1); + value[1] = (GLfloat) (ctx->DrawBuffer->Height - 1); + value[2] = 0.0F; + value[3] = 0.0F; + return; + case STATE_ROT_MATRIX_0: { const int unit = (int) state[2]; @@ -628,6 +635,9 @@ _mesa_program_state_flags(const gl_state_index state[STATE_LENGTH]) case STATE_PCM_BIAS: return _NEW_PIXEL; + case STATE_FB_SIZE: + return _NEW_BUFFERS; + default: /* unknown state indexes are silently ignored and * no flag set, since it is handled by the driver. @@ -828,6 +838,9 @@ append_token(char *dst, gl_state_index k) case STATE_SHADOW_AMBIENT: append(dst, "CompareFailValue"); break; + case STATE_FB_SIZE: + append(dst, "FbSize"); + break; case STATE_ROT_MATRIX_0: append(dst, "rotMatrixRow0"); break; diff --git a/src/mesa/shader/prog_statevars.h b/src/mesa/shader/prog_statevars.h index d563080db1c..1180d9eaa4a 100644 --- a/src/mesa/shader/prog_statevars.h +++ b/src/mesa/shader/prog_statevars.h @@ -117,6 +117,7 @@ typedef enum gl_state_index_ { STATE_PCM_SCALE, /**< Post color matrix RGBA scale */ STATE_PCM_BIAS, /**< Post color matrix RGBA bias */ STATE_SHADOW_AMBIENT, /**< ARB_shadow_ambient fail value; token[2] is texture unit index */ + STATE_FB_SIZE, /**< (width-1, height-1, 0, 0) */ STATE_ROT_MATRIX_0, /**< ATI_envmap_bumpmap, rot matrix row 0 */ STATE_ROT_MATRIX_1, /**< ATI_envmap_bumpmap, rot matrix row 1 */ STATE_INTERNAL_DRIVER /* first available state index for drivers (must be last) */ From 401cbd0d2365e5b2d371a2a01edf1cecca4a99dd Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 20 Mar 2009 20:25:34 -0600 Subject: [PATCH 23/26] gallium: remove use of origin_lower_left This was used to indicate OpenGL's lower-left origin for fragment window coordinates for polygon stipple and gl_FragCoord. Now: - fragment coordinate origin is always upper-left corner - GL polygon stipple is inverted and shifted before given to gallium - GL fragment programs that use INPUT[WPOS] are modified to use an inverted window coord which is placed in a temp register. Note: the origin_lower_left field still exists in pipe_rasterizer_state. Remove it when all the drivers, etc. no longer reference it. --- .../drivers/softpipe/sp_quad_stipple.c | 18 +- src/gallium/drivers/softpipe/sp_setup.c | 13 +- src/mesa/state_tracker/st_atom_rasterizer.c | 1 + src/mesa/state_tracker/st_atom_stipple.c | 46 ++++- src/mesa/state_tracker/st_context.h | 3 +- src/mesa/state_tracker/st_mesa_to_tgsi.c | 179 ++++++++++++++++-- 6 files changed, 207 insertions(+), 53 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c index 05e862f0977..07162db7b6e 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stipple.c +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -22,21 +22,11 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad) if (quad->input.prim == QUAD_PRIM_TRI) { struct softpipe_context *softpipe = qs->softpipe; /* need to invert Y to index into OpenGL's stipple pattern */ - int y0, y1; - uint stipple0, stipple1; const int col0 = quad->input.x0 % 32; - - if (softpipe->rasterizer->origin_lower_left) { - y0 = softpipe->framebuffer.height - 1 - quad->input.y0; - y1 = y0 - 1; - } - else { - y0 = quad->input.y0; - y1 = y0 + 1; - } - - stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; - stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; + const int y0 = quad->input.y0; + const int y1 = y0 + 1; + const uint stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; + const uint stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; /* turn off quad mask bits that fail the stipple test */ if ((stipple0 & (bit31 >> col0)) == 0) diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 0925653b5d5..96cb09b9051 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -732,18 +732,9 @@ setup_fragcoord_coeff(struct setup_context *setup, uint slot) setup->coef[slot].dadx[0] = 1.0; setup->coef[slot].dady[0] = 0.0; /*Y*/ - if (setup->softpipe->rasterizer->origin_lower_left) { - /* y=0=bottom */ - const int winHeight = setup->softpipe->framebuffer.height; - setup->coef[slot].a0[1] = (float) (winHeight - 1); - setup->coef[slot].dady[1] = -1.0; - } - else { - /* y=0=top */ - setup->coef[slot].a0[1] = 0.0; - setup->coef[slot].dady[1] = 1.0; - } + setup->coef[slot].a0[1] = 0.0; setup->coef[slot].dadx[1] = 0.0; + setup->coef[slot].dady[1] = 1.0; /*Z*/ setup->coef[slot].a0[2] = setup->posCoef.a0[2]; setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c index ea76487bcfc..5bdcaa4a014 100644 --- a/src/mesa/state_tracker/st_atom_rasterizer.c +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -79,6 +79,7 @@ static void update_raster_state( struct st_context *st ) memset(raster, 0, sizeof(*raster)); + /* XXX obsolete field, remove someday */ raster->origin_lower_left = 1; /* Always true for OpenGL */ /* _NEW_POLYGON, _NEW_BUFFERS diff --git a/src/mesa/state_tracker/st_atom_stipple.c b/src/mesa/state_tracker/st_atom_stipple.c index f395930ab40..31e124b3293 100644 --- a/src/mesa/state_tracker/st_atom_stipple.c +++ b/src/mesa/state_tracker/st_atom_stipple.c @@ -39,24 +39,52 @@ #include "pipe/p_defines.h" -static void -update_stipple( struct st_context *st ) +/** + * OpenGL's polygon stipple is indexed with window coordinates in which + * the origin (0,0) is the lower-left corner of the window. + * With Gallium, the origin is the upper-left corner of the window. + * To convert GL's polygon stipple to what gallium expects we need to + * invert the pattern vertically and rotate the stipple rows according + * to the window height. + */ +static void +invert_stipple(GLuint dest[32], const GLuint src[32], GLuint winHeight) { - const GLuint sz = sizeof(st->state.poly_stipple.stipple); - assert(sz == sizeof(st->ctx->PolygonStipple)); + GLuint i; - if (memcmp(&st->state.poly_stipple.stipple, st->ctx->PolygonStipple, sz)) { - /* state has changed */ - memcpy(st->state.poly_stipple.stipple, st->ctx->PolygonStipple, sz); - st->pipe->set_polygon_stipple(st->pipe, &st->state.poly_stipple); + for (i = 0; i < 32; i++) { + dest[i] = src[(winHeight - 1 - i) & 0x1f]; } } + +static void +update_stipple( struct st_context *st ) +{ + const GLuint sz = sizeof(st->state.poly_stipple); + assert(sz == sizeof(st->ctx->PolygonStipple)); + + if (memcmp(st->state.poly_stipple, st->ctx->PolygonStipple, sz)) { + /* state has changed */ + struct pipe_poly_stipple newStipple; + + memcpy(st->state.poly_stipple, st->ctx->PolygonStipple, sz); + + invert_stipple(newStipple.stipple, st->ctx->PolygonStipple, + st->ctx->DrawBuffer->Height); + + st->pipe->set_polygon_stipple(st->pipe, &newStipple); + } +} + + +/** Update the stipple when the pattern or window height changes */ const struct st_tracked_state st_update_polygon_stipple = { "st_update_polygon_stipple", /* name */ { /* dirty */ - (_NEW_POLYGONSTIPPLE), /* mesa */ + (_NEW_POLYGONSTIPPLE | + _NEW_BUFFERS), /* mesa */ 0, /* st */ }, update_stipple /* update */ diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index d7518ab6897..ae8c2978bf8 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -93,12 +93,13 @@ struct st_context struct pipe_constant_buffer constants[2]; struct pipe_framebuffer_state framebuffer; struct pipe_texture *sampler_texture[PIPE_MAX_SAMPLERS]; - struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_viewport_state viewport; GLuint num_samplers; GLuint num_textures; + + GLuint poly_stipple[32]; /**< In OpenGL's bottom-to-top order */ } state; struct { diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index cbf3f334c08..ffa607dd87c 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -219,8 +219,9 @@ compile_instruction( const GLuint immediateMapping[], GLboolean indirectAccess, GLuint preamble_size, - GLuint processor, - GLboolean *insideSubroutine) + GLuint procType, + GLboolean *insideSubroutine, + GLint wposTemp) { GLuint i; struct tgsi_full_dst_register *fulldst; @@ -247,19 +248,29 @@ compile_instruction( GLuint j; fullsrc = &fullinst->FullSrcRegisters[i]; - fullsrc->SrcRegister.File = map_register_file( - inst->SrcReg[i].File, - inst->SrcReg[i].Index, - immediateMapping, - indirectAccess ); - fullsrc->SrcRegister.Index = map_register_file_index( - fullsrc->SrcRegister.File, - inst->SrcReg[i].Index, - inputMapping, - outputMapping, - immediateMapping, - indirectAccess ); + if (procType == TGSI_PROCESSOR_FRAGMENT && + inst->SrcReg[i].File == PROGRAM_INPUT && + inst->SrcReg[i].Index == FRAG_ATTRIB_WPOS) { + /* special case of INPUT[WPOS] */ + fullsrc->SrcRegister.File = TGSI_FILE_TEMPORARY; + fullsrc->SrcRegister.Index = wposTemp; + } + else { + /* any other src register */ + fullsrc->SrcRegister.File = map_register_file( + inst->SrcReg[i].File, + inst->SrcReg[i].Index, + immediateMapping, + indirectAccess ); + fullsrc->SrcRegister.Index = map_register_file_index( + fullsrc->SrcRegister.File, + inst->SrcReg[i].Index, + inputMapping, + outputMapping, + immediateMapping, + indirectAccess ); + } /* swizzle (ext swizzle also depends on negation) */ { @@ -733,6 +744,111 @@ find_temporaries(const struct gl_program *program, } +/** + * Find an unused temporary in the tempsUsed array. + */ +static int +find_free_temporary(GLboolean tempsUsed[MAX_PROGRAM_TEMPS]) +{ + int i; + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (!tempsUsed[i]) { + tempsUsed[i] = GL_TRUE; + return i; + } + } + return -1; +} + + +/** helper for building simple TGSI instruction, one src register */ +static void +build_tgsi_instruction1(struct tgsi_full_instruction *inst, + int opcode, + int dstFile, int dstIndex, int writemask, + int srcFile1, int srcIndex1) +{ + *inst = tgsi_default_full_instruction(); + + inst->Instruction.Opcode = opcode; + + inst->Instruction.NumDstRegs = 1; + inst->FullDstRegisters[0].DstRegister.File = dstFile; + inst->FullDstRegisters[0].DstRegister.Index = dstIndex; + inst->FullDstRegisters[0].DstRegister.WriteMask = writemask; + + inst->Instruction.NumSrcRegs = 1; + inst->FullSrcRegisters[0].SrcRegister.File = srcFile1; + inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1; +} + + +/** helper for building simple TGSI instruction, two src registers */ +static void +build_tgsi_instruction2(struct tgsi_full_instruction *inst, + int opcode, + int dstFile, int dstIndex, int writemask, + int srcFile1, int srcIndex1, + int srcFile2, int srcIndex2) +{ + *inst = tgsi_default_full_instruction(); + + inst->Instruction.Opcode = opcode; + + inst->Instruction.NumDstRegs = 1; + inst->FullDstRegisters[0].DstRegister.File = dstFile; + inst->FullDstRegisters[0].DstRegister.Index = dstIndex; + inst->FullDstRegisters[0].DstRegister.WriteMask = writemask; + + inst->Instruction.NumSrcRegs = 2; + inst->FullSrcRegisters[0].SrcRegister.File = srcFile1; + inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1; + inst->FullSrcRegisters[1].SrcRegister.File = srcFile2; + inst->FullSrcRegisters[1].SrcRegister.Index = srcIndex2; +} + + + +/** + * Emit the TGSI instructions for inverting the WPOS y coordinate. + */ +static int +emit_inverted_wpos(struct tgsi_token *tokens, + int wpos_temp, + int winsize_const, + int wpos_input, + struct tgsi_header *header, int maxTokens) +{ + struct tgsi_full_instruction fullinst; + int ti = 0; + + /* MOV wpos_temp.xzw, input[wpos]; */ + build_tgsi_instruction1(&fullinst, + TGSI_OPCODE_MOV, + TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_XZW, + TGSI_FILE_INPUT, 0); + + ti += tgsi_build_full_instruction(&fullinst, + &tokens[ti], + header, + maxTokens - ti); + + /* SUB wpos_temp.y, const[winsize_const] - input[wpos_input]; */ + build_tgsi_instruction2(&fullinst, + TGSI_OPCODE_SUB, + TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_Y, + TGSI_FILE_CONSTANT, winsize_const, + TGSI_FILE_INPUT, wpos_input); + + ti += tgsi_build_full_instruction(&fullinst, + &tokens[ti], + header, + maxTokens - ti); + + return ti; +} + + /** @@ -778,16 +894,34 @@ st_translate_mesa_program( GLuint ti; /* token index */ struct tgsi_header *header; struct tgsi_processor *processor; - struct tgsi_full_instruction fullinst; GLuint preamble_size = 0; GLuint immediates[1000]; GLuint numImmediates = 0; GLboolean insideSubroutine = GL_FALSE; GLboolean indirectAccess = GL_FALSE; + GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1]; + GLint wposTemp = -1, winHeightConst = -1; assert(procType == TGSI_PROCESSOR_FRAGMENT || procType == TGSI_PROCESSOR_VERTEX); + find_temporaries(program, tempsUsed); + + if (procType == TGSI_PROCESSOR_FRAGMENT) { + if (program->InputsRead & FRAG_BIT_WPOS) { + /* Fragment program uses fragment position input. + * Need to replace instances of INPUT[WPOS] with temp T + * where T = INPUT[WPOS] by y is inverted. + */ + static const gl_state_index winSizeState[STATE_LENGTH] + = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 }; + winHeightConst = _mesa_add_state_reference(program->Parameters, + winSizeState); + wposTemp = find_free_temporary(tempsUsed); + } + } + + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); header = (struct tgsi_header *) &tokens[1]; @@ -884,11 +1018,9 @@ st_translate_mesa_program( /* temporary decls */ { - GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1]; GLboolean inside_range = GL_FALSE; GLuint start_range = 0; - find_temporaries(program, tempsUsed); tempsUsed[MAX_PROGRAM_TEMPS] = GL_FALSE; for (i = 0; i < MAX_PROGRAM_TEMPS + 1; i++) { if (tempsUsed[i] && !inside_range) { @@ -1018,7 +1150,17 @@ st_translate_mesa_program( } } + /* invert WPOS fragment input */ + if (wposTemp >= 0) { + ti += emit_inverted_wpos(&tokens[ti], wposTemp, winHeightConst, + inputMapping[FRAG_ATTRIB_WPOS], + header, maxTokens - ti); + preamble_size = 2; /* two instructions added */ + } + for (i = 0; i < program->NumInstructions; i++) { + struct tgsi_full_instruction fullinst; + compile_instruction( &program->Instructions[i], &fullinst, @@ -1028,7 +1170,8 @@ st_translate_mesa_program( indirectAccess, preamble_size, procType, - &insideSubroutine ); + &insideSubroutine, + wposTemp); ti += tgsi_build_full_instruction( &fullinst, From d7b7b63bd7cca80e99ad9701f8b56ee365053647 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sat, 21 Mar 2009 11:46:54 +0100 Subject: [PATCH 24/26] st: Silence compiler warnings. --- src/mesa/state_tracker/st_cb_texture.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index edfa8854d89..311d812ccfb 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -467,7 +467,7 @@ st_TexImage(GLcontext * ctx, */ if (stObj->pt) { if (stObj->teximage_realloc || - level > stObj->pt->last_level || + level > (GLint) stObj->pt->last_level || (stObj->pt->last_level == level && stObj->pt->target != PIPE_TEXTURE_CUBE && !st_texture_match_image(stObj->pt, &stImage->base, @@ -803,7 +803,6 @@ st_TexSubimage(GLcontext * ctx, PIPE_TRANSFER_WRITE, xoffset, yoffset, width, height); - dstRowStride = stImage->transfer->stride; } if (!texImage->Data) { @@ -812,6 +811,7 @@ st_TexSubimage(GLcontext * ctx, } src = (const GLubyte *) pixels; + dstRowStride = stImage->transfer->stride; for (i = 0; i++ < depth;) { if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat, From 1bb60d25e09d71861bdb4485378880140b65b062 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 21 Mar 2009 10:37:33 +0000 Subject: [PATCH 25/26] gallium: remove remaining references to origin_lower_left --- src/gallium/drivers/trace/tr_state.c | 1 - src/gallium/include/pipe/p_state.h | 1 - src/gallium/state_trackers/g3dvl/vl_context.c | 1 - src/mesa/state_tracker/st_atom_rasterizer.c | 3 --- 4 files changed, 6 deletions(-) diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index b6a1ce0d625..f9fbe9aee79 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -123,7 +123,6 @@ void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state) trace_dump_member(uint, state, line_stipple_pattern); trace_dump_member(bool, state, line_last_pixel); trace_dump_member(bool, state, bypass_vs_clip_and_viewport); - trace_dump_member(bool, state, origin_lower_left); trace_dump_member(bool, state, flatshade_first); trace_dump_member(bool, state, gl_rasterization_rules); diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index aad41fab110..9c7baa3d92e 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -117,7 +117,6 @@ struct pipe_rasterizer_state */ unsigned bypass_vs_clip_and_viewport:1; - unsigned origin_lower_left:1; /**< Is (0,0) the lower-left corner? */ unsigned flatshade_first:1; /**< take color attribute from the first vertex of a primitive */ unsigned gl_rasterization_rules:1; /**< enable tweaks for GL rasterization? */ diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 1d8ad0b0469..5cfd233c4c1 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -42,7 +42,6 @@ static int vlInitCommon(struct vlContext *context) rast.line_stipple_pattern = 0; rast.line_last_pixel = 0; rast.bypass_vs_clip_and_viewport = 0; - rast.origin_lower_left = 0; rast.line_width = 1; rast.point_smooth = 0; rast.point_size = 1; diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c index 5bdcaa4a014..61687fbc3e2 100644 --- a/src/mesa/state_tracker/st_atom_rasterizer.c +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -79,9 +79,6 @@ static void update_raster_state( struct st_context *st ) memset(raster, 0, sizeof(*raster)); - /* XXX obsolete field, remove someday */ - raster->origin_lower_left = 1; /* Always true for OpenGL */ - /* _NEW_POLYGON, _NEW_BUFFERS */ { From 699897e81c623e53be51fba0488f535b0a8d7761 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sat, 21 Mar 2009 12:18:09 +0100 Subject: [PATCH 26/26] tgsi: Document KIL, KILP instructions. --- src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index b83abd40933..5b21a2be0bd 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -382,7 +382,9 @@ TGSI Instruction Specification 1.5.7 KILP - Predicated Discard - TBD + if (cc.x || cc.y || cc.z || cc.w) + discard + endif 1.5.8 LG2 - Logarithm Base 2 @@ -599,7 +601,9 @@ TGSI Instruction Specification 1.8.2 KIL - Conditional Discard - TBD + if (src.x < 0.0 || src.y < 0.0 || src.z < 0.0 || src.w < 0.0) + discard + endif 1.8.3 SCS - Sine Cosine