Add basic timing for memcpys

2026-05-06 07:18:17 +02:00 · 2006-02-09 23:04:44 +00:00 · 2006-02-09 23:04:44 +00:00 · f8246f504b
commit f8246f504b
parent 499458bcdb
1 changed files with 83 additions and 15 deletions
--- a/src/mesa/drivers/dri/i915/intel_tex.c
+++ b/src/mesa/drivers/dri/i915/intel_tex.c
@ -47,8 +47,7 @@ static void intelFreeTextureImageData( GLcontext *ctx,
   struct intel_texture_image *intelImage = intel_texture_image(texImage);

   if (intelImage->mt) {
-      intel_miptree_release(intel, intelImage->mt);
-      intelImage->mt = NULL;
+      intel_miptree_release(intel, &intelImage->mt);
   }
   
   if (texImage->Data) {
@ -67,6 +66,84 @@ static void *do_memcpy( void *dest, const void *src, size_t n )
 }


+#ifndef __x86_64__
+static unsigned
+fastrdtsc(void)
+{
+    unsigned eax;
+    __asm__ volatile ("\t"
+	"pushl  %%ebx\n\t"
+	"cpuid\n\t" ".byte 0x0f, 0x31\n\t" "popl %%ebx\n":"=a" (eax)
+	:"0"(0)
+	:"ecx", "edx", "cc");
+
+    return eax;
+}
+#else
+static unsigned
+fastrdtsc(void)
+{
+    unsigned eax;
+    __asm__ volatile ("\t"
+	"cpuid\n\t" ".byte 0x0f, 0x31\n\t" :"=a" (eax)
+	:"0"(0)
+		      :"ecx", "edx", "ebx", "cc");
+
+    return eax;
+}
+#endif
+
+static unsigned
+time_diff(unsigned t, unsigned t2)
+{
+    return ((t < t2) ? t2 - t : 0xFFFFFFFFU - (t - t2 - 1));
+}
+
+
+/* The system memcpy (at least on ubuntu 5.10) has problems copying
+ * to agp (writecombined) memory from a source which isn't 64-byte
+ * aligned - there is a 4x performance falloff.
+ *
+ * The x86 __memcpy is immune to this but is slightly slower
+ * (10%-ish) than the system memcpy.
+ *
+ * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but
+ * isn't much faster than x86_memcpy for agp copies.
+ * 
+ * TODO: switch dynamically.
+ */
+static void *do_memcpy( void *dest, const void *src, size_t n )
+{
+   if ( (((unsigned)src) & 63) ||
+	(((unsigned)dest) & 63)) {
+      return  __memcpy(dest, src, n);	
+   }
+   else
+      return memcpy(dest, src, n);
+}
+
+
+static void *timed_memcpy( void *dest, const void *src, size_t n )
+{
+   void *ret;
+   unsigned t1, t2;
+   double rate;
+
+   if ( (((unsigned)src) & 63) ||
+	(((unsigned)dest) & 63)) 
+      _mesa_printf("Warning - non-aligned texture copy!\n");
+
+   t1 = fastrdtsc();
+   ret =  do_memcpy(dest, src, n);	
+   t2 = fastrdtsc();
+
+   rate = time_diff(t1, t2);
+   rate /= (double) n;
+   _mesa_printf("timed_memcpy: %u %u --> %f clocks/byte\n", t1, t2, rate); 
+   return ret;
+}
+
+
 void intelInitTextureFuncs(struct dd_function_table * functions)
 {
   functions->ChooseTextureFormat = intelChooseTextureFormat;
@ -85,17 +162,8 @@ void intelInitTextureFuncs(struct dd_function_table * functions)
   functions->UpdateTexturePalette = 0;
   functions->IsTextureResident = intelIsTextureResident;

-   /* The system memcpy (at least on ubuntu 5.10) has problems copying
-    * to agp (writecombined) memory from a source which isn't 64-byte
-    * aligned - there is a 4x performance falloff.
-    *
-    * The x86 __memcpy is immune to this but is slightly slower
-    * (10%-ish) than the system memcpy.
-    *
-    * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but
-    * isn't much faster than x86_memcpy for agp copies.
-    * 
-    * TODO: switch dynamically.
-    */
-   functions->TextureMemCpy = do_memcpy;
+   if (INTEL_DEBUG & DEBUG_BUFMGR)
+      functions->TextureMemCpy = timed_memcpy;
+   else
+      functions->TextureMemCpy = do_memcpy;
 }