nv04-nv40: new 2D: add new Gallium-independent 2D engine

This patch add a brand new nv04-nv40 2D engine module. It should correctly implement all operations involving swizzled, and 3D-swizzled surfaces. This code is independent from the Gallium framework and can thus be reused in the DDX and classic Mesa drivers (it's only likely to be useful in the latter, though). Currently, surface_copy and surface_fill are broken for 3D textures, for swizzled source textures and possibly for some misaligned cases The code is based around the new nv04_region structure, which encapsulates the information from pipe_surface needed for the 2D engine and CPU copies. The use of nv04_region makes the code independent of the Gallium framework and allows to transform the nv04_region without clobbering the nv04_region. The existing M2MF, blitter, and SWIZZLED_SURFACE paths have been improved and a new CPU path has been added. There is also support to tell the caller to use the 3D engine. The main feature of the copy/fill setup algorithm is linearization/contiguous-linearization of swizzled surfaces. The idea of linearization is that some swizzled surfaces are laid out like linear ones (1xN, 2xN, Nx1) and can thus be used as such (e.g. useful for copying single pixels). Also, some rectangles (e.g. the whole surface) are contiguous in memory. If both the source and destination rectangles are swizzled but contiguous, then they can be regarded as both linear: this is the idea of "contiguous linearization". This, for instance, allows to use the 2D engine to duplicate the content of a swizzled surface to another swizzled surface, by pretending they are actually linear. After linearization, the result may not be 64-byte aligned. Another transformation is done to enlarge the linear surface so that it becomes 64-byte aligned. This is also used to 64-byte align swizzled texture mipmaps. The inner loop of the CPU path is as optimized as possible without using SSE/SSE2. Future improvements could include SSE/SSE2 support, and possibly a faster coordinate swizzling algorithm (which is however not used in the inner loop). It may be a good idea to autogenerate swizzling code at least for all possible POT 2D texture dimensions (less than 256), maybe for all 3D ones too (less than 4096). Also, it woud be a very good idea to make a copy with the GPU first if the source surface is in uncached memory.
2026-05-07 02:48:06 +02:00 · 2010-01-19 18:51:10 +01:00 · 2010-01-19 18:51:10 +01:00 · 24a4ea003f
commit 24a4ea003f
parent 23639dc046
4 changed files with 1478 additions and 0 deletions
--- a/src/gallium/drivers/nvfx/Makefile
+++ b/src/gallium/drivers/nvfx/Makefile
@ -5,6 +5,7 @@ LIBNAME = nvfx

 C_SOURCES = \
 	nv04_surface_2d.c \
+	nv04_2d.c \
 	nvfx_buffer.c \
 	nvfx_context.c \
 	nvfx_clear.c \
--- a/src/gallium/drivers/nvfx/nv04_2d.c
+++ b/src/gallium/drivers/nvfx/nv04_2d.c
--- a/src/gallium/drivers/nvfx/nv04_2d.h
+++ b/src/gallium/drivers/nvfx/nv04_2d.h
@ -0,0 +1,87 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Ben Skeggs
+ * Copyright 2009 Younes Manton
+ * Copyright 2010 Luca Barbieri
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+/* this code has no Mesa or Gallium dependency and can be reused in the classic Mesa driver or DDX */
+
+#ifndef __NV04_2D_H__
+#define __NV04_2D_H__
+
+struct nv04_2d_context;
+struct nouveau_channel;
+struct nouveau_bo;
+
+// NOTE: all functions taking this as a parameter will CLOBBER it (except for ->bo)
+struct nv04_region {
+	struct nouveau_bo* bo;
+	int offset;
+	unsigned pitch; // 0 -> swizzled
+	unsigned bpps; // bpp shift (0, 1, 2; 3, 4 for fp/compressed)
+	unsigned x, y, z;
+	unsigned w, h, d;
+};
+
+void
+nv04_memcpy(struct nv04_2d_context *ctx,
+		struct nouveau_bo* dstbo, int dstoff,
+		struct nouveau_bo* srcbo, int srcoff,
+		unsigned size);
+
+unsigned
+nv04_region_begin(struct nv04_region* rgn, unsigned w, unsigned h);
+
+unsigned
+nv04_region_end(struct nv04_region* rgn, unsigned w, unsigned h);
+
+void
+nv04_2d_context_takedown(struct nv04_2d_context *pctx);
+
+struct nv04_2d_context *
+nv04_2d_context_init(struct nouveau_channel* chan);
+
+void
+nv04_region_copy_cpu(struct nv04_region* dst, struct nv04_region* src, int w, int h);
+
+void
+nv04_region_fill_cpu(struct nv04_region* dst, int w, int h, unsigned value);
+
+int
+nv04_region_copy_2d(struct nv04_2d_context *ctx,
+		struct nv04_region* dst, struct nv04_region* src,
+		int w, int h,
+		int cs2d_format, int sifm_format,
+		int dst_to_gpu, int src_on_gpu);
+
+int
+nv04_region_fill_2d(struct nv04_2d_context *ctx,
+		struct nv04_region *dst,
+                int w, int h,
+                unsigned value);
+
+#endif
--- a/src/gallium/drivers/nvfx/nv04_2d_loops.h
+++ b/src/gallium/drivers/nvfx/nv04_2d_loops.h
@ -0,0 +1,70 @@
+#ifndef T
+{
+	if(dst->bpps == 0)
+#define T uint8_t
+#include "nv04_2d_loops.h"
+#undef T
+	else if(dst->bpps == 1)
+#define T uint16_t
+#include "nv04_2d_loops.h"
+#undef T
+	else if(dst->bpps == 2)
+#define T uint32_t
+#include "nv04_2d_loops.h"
+#undef T
+	else
+		assert(0);
+}
+#else
+#ifdef SWIZZLED_COPY_LOOPS
+{
+	if(!dst->pitch)
+	{
+		if(!src->pitch)
+		{
+			LOOP_Y
+			{
+				T* pdst = (T*)mdst + dswy[iy];
+				T* psrc = (T*)msrc + sswy[iy];
+				LOOP_X
+				{
+					assert((char*)&psrc[sswx[ix] + 1] <= ((char*)src->bo->map + src->bo->size));
+					assert((char*)&pdst[dswx[ix] + 1] <= ((char*)dst->bo->map + dst->bo->size));
+					pdst[dswx[ix]] = psrc[sswx[ix]];
+				}
+			}
+		}
+		else
+		{
+			T* psrc = (T*)(msrc + ((dir > 0) ? src->y : (src->y + h - 1)) * src->pitch) + src->x;
+			LOOP_Y
+			{
+				T* pdst = (T*)mdst + dswy[iy];
+				LOOP_X
+				{
+					assert((char*)&psrc[ix + 1] <= ((char*)src->bo->map + src->bo->size));
+					assert((char*)&pdst[dswx[ix] + 1] <= ((char*)dst->bo->map + dst->bo->size));
+					pdst[dswx[ix]] = psrc[ix];
+				}
+				psrc = (T*)((char*)psrc + dir * src->pitch);
+			}
+		}
+	}
+	else
+	{
+		T* pdst = (T*)(mdst + ((dir > 0) ? dst->y : (dst->y + h - 1)) * dst->pitch) + dst->x;
+		LOOP_Y
+		{
+			T* psrc = (T*)msrc + sswy[iy];
+			LOOP_X
+			{
+				assert((char*)&psrc[sswx[ix] + 1] <= ((char*)src->bo->map + src->bo->size));
+				assert((char*)&pdst[ix + 1] <= ((char*)dst->bo->map + dst->bo->size));
+				pdst[ix] = psrc[sswx[ix]];
+			}
+			pdst = (T*)((char*)pdst + dir * dst->pitch);
+		}
+	}
+}
+#endif
+#endif