mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 07:18:17 +02:00
Add basic timing for memcpys
This commit is contained in:
parent
499458bcdb
commit
f8246f504b
1 changed files with 83 additions and 15 deletions
|
|
@ -47,8 +47,7 @@ static void intelFreeTextureImageData( GLcontext *ctx,
|
|||
struct intel_texture_image *intelImage = intel_texture_image(texImage);
|
||||
|
||||
if (intelImage->mt) {
|
||||
intel_miptree_release(intel, intelImage->mt);
|
||||
intelImage->mt = NULL;
|
||||
intel_miptree_release(intel, &intelImage->mt);
|
||||
}
|
||||
|
||||
if (texImage->Data) {
|
||||
|
|
@ -67,6 +66,84 @@ static void *do_memcpy( void *dest, const void *src, size_t n )
|
|||
}
|
||||
|
||||
|
||||
#ifndef __x86_64__
|
||||
static unsigned
|
||||
fastrdtsc(void)
|
||||
{
|
||||
unsigned eax;
|
||||
__asm__ volatile ("\t"
|
||||
"pushl %%ebx\n\t"
|
||||
"cpuid\n\t" ".byte 0x0f, 0x31\n\t" "popl %%ebx\n":"=a" (eax)
|
||||
:"0"(0)
|
||||
:"ecx", "edx", "cc");
|
||||
|
||||
return eax;
|
||||
}
|
||||
#else
|
||||
static unsigned
|
||||
fastrdtsc(void)
|
||||
{
|
||||
unsigned eax;
|
||||
__asm__ volatile ("\t"
|
||||
"cpuid\n\t" ".byte 0x0f, 0x31\n\t" :"=a" (eax)
|
||||
:"0"(0)
|
||||
:"ecx", "edx", "ebx", "cc");
|
||||
|
||||
return eax;
|
||||
}
|
||||
#endif
|
||||
|
||||
static unsigned
|
||||
time_diff(unsigned t, unsigned t2)
|
||||
{
|
||||
return ((t < t2) ? t2 - t : 0xFFFFFFFFU - (t - t2 - 1));
|
||||
}
|
||||
|
||||
|
||||
/* The system memcpy (at least on ubuntu 5.10) has problems copying
|
||||
* to agp (writecombined) memory from a source which isn't 64-byte
|
||||
* aligned - there is a 4x performance falloff.
|
||||
*
|
||||
* The x86 __memcpy is immune to this but is slightly slower
|
||||
* (10%-ish) than the system memcpy.
|
||||
*
|
||||
* The sse_memcpy seems to have a slight cliff at 64/32 bytes, but
|
||||
* isn't much faster than x86_memcpy for agp copies.
|
||||
*
|
||||
* TODO: switch dynamically.
|
||||
*/
|
||||
static void *do_memcpy( void *dest, const void *src, size_t n )
|
||||
{
|
||||
if ( (((unsigned)src) & 63) ||
|
||||
(((unsigned)dest) & 63)) {
|
||||
return __memcpy(dest, src, n);
|
||||
}
|
||||
else
|
||||
return memcpy(dest, src, n);
|
||||
}
|
||||
|
||||
|
||||
static void *timed_memcpy( void *dest, const void *src, size_t n )
|
||||
{
|
||||
void *ret;
|
||||
unsigned t1, t2;
|
||||
double rate;
|
||||
|
||||
if ( (((unsigned)src) & 63) ||
|
||||
(((unsigned)dest) & 63))
|
||||
_mesa_printf("Warning - non-aligned texture copy!\n");
|
||||
|
||||
t1 = fastrdtsc();
|
||||
ret = do_memcpy(dest, src, n);
|
||||
t2 = fastrdtsc();
|
||||
|
||||
rate = time_diff(t1, t2);
|
||||
rate /= (double) n;
|
||||
_mesa_printf("timed_memcpy: %u %u --> %f clocks/byte\n", t1, t2, rate);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
void intelInitTextureFuncs(struct dd_function_table * functions)
|
||||
{
|
||||
functions->ChooseTextureFormat = intelChooseTextureFormat;
|
||||
|
|
@ -85,17 +162,8 @@ void intelInitTextureFuncs(struct dd_function_table * functions)
|
|||
functions->UpdateTexturePalette = 0;
|
||||
functions->IsTextureResident = intelIsTextureResident;
|
||||
|
||||
/* The system memcpy (at least on ubuntu 5.10) has problems copying
|
||||
* to agp (writecombined) memory from a source which isn't 64-byte
|
||||
* aligned - there is a 4x performance falloff.
|
||||
*
|
||||
* The x86 __memcpy is immune to this but is slightly slower
|
||||
* (10%-ish) than the system memcpy.
|
||||
*
|
||||
* The sse_memcpy seems to have a slight cliff at 64/32 bytes, but
|
||||
* isn't much faster than x86_memcpy for agp copies.
|
||||
*
|
||||
* TODO: switch dynamically.
|
||||
*/
|
||||
functions->TextureMemCpy = do_memcpy;
|
||||
if (INTEL_DEBUG & DEBUG_BUFMGR)
|
||||
functions->TextureMemCpy = timed_memcpy;
|
||||
else
|
||||
functions->TextureMemCpy = do_memcpy;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue