progs/perf: a few more vbo upload modes

Some tests, eg small SubData are probably overwhelmed by the cost of
performing the draw after each upload.  Add a varient which does a lot
of subdata uploads and then a single draw.

Also try to avoid cache-artifacts in the upload timings.
This commit is contained in:
Keith Whitwell 2009-09-21 15:56:17 +01:00
parent 25a580c841
commit 7ce0421fb7

View file

@ -31,12 +31,16 @@
#include "glmain.h" #include "glmain.h"
#include "common.h" #include "common.h"
/* Copy data out of a large array to avoid caching effects:
*/
#define DATA_SIZE (16*1024*1024)
int WinWidth = 100, WinHeight = 100; int WinWidth = 100, WinHeight = 100;
static GLuint VBO; static GLuint VBO;
static GLsizei VBOSize = 0; static GLsizei VBOSize = 0;
static GLsizei SubSize = 0;
static GLubyte *VBOData = NULL; static GLubyte *VBOData = NULL;
static const GLboolean DrawPoint = GL_TRUE; static const GLboolean DrawPoint = GL_TRUE;
@ -61,11 +65,23 @@ static void
UploadVBO(unsigned count) UploadVBO(unsigned count)
{ {
unsigned i; unsigned i;
for (i = 0; i < count; i++) { unsigned total = 0;
glBufferDataARB(GL_ARRAY_BUFFER, VBOSize, VBOData, GL_STREAM_DRAW_ARB); unsigned src = 0;
if (DrawPoint) for (i = 0; i < count; i++) {
glDrawArrays(GL_POINTS, 0, 1); glBufferDataARB(GL_ARRAY_BUFFER, VBOSize, VBOData + src, GL_STREAM_DRAW_ARB);
glDrawArrays(GL_POINTS, 0, 1);
/* Throw in an occasional flush to work around a driver crash:
*/
total += VBOSize;
if (total >= 16*1024*1024) {
glFlush();
total = 0;
}
src += VBOSize;
src %= DATA_SIZE;
} }
glFinish(); glFinish();
} }
@ -75,18 +91,42 @@ static void
UploadSubVBO(unsigned count) UploadSubVBO(unsigned count)
{ {
unsigned i; unsigned i;
unsigned src = 0;
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
if (BufferSubDataInHalves) { unsigned offset = (i * SubSize) % VBOSize;
GLsizei half = VBOSize / 2; glBufferSubDataARB(GL_ARRAY_BUFFER, offset, SubSize, VBOData + src);
glBufferSubDataARB(GL_ARRAY_BUFFER, 0, half, VBOData);
glBufferSubDataARB(GL_ARRAY_BUFFER, half, half, VBOData + half); if (DrawPoint) {
} glDrawArrays(GL_POINTS, offset / sizeof(Vertex0), 1);
else {
glBufferSubDataARB(GL_ARRAY_BUFFER, 0, VBOSize, VBOData);
} }
if (DrawPoint) src += SubSize;
glDrawArrays(GL_POINTS, 0, 1); src %= DATA_SIZE;
}
glFinish();
}
/* Do multiple small SubData uploads, the a DrawArrays. This may be a
* fairer comparison to back-to-back BufferData calls:
*/
static void
BatchUploadSubVBO(unsigned count)
{
unsigned i = 0, j;
unsigned period = VBOSize / SubSize;
unsigned src = 0;
while (i < count) {
for (j = 0; j < period && i < count; j++, i++) {
unsigned offset = j * SubSize;
glBufferSubDataARB(GL_ARRAY_BUFFER, offset, SubSize, VBOData + src);
}
glDrawArrays(GL_POINTS, 0, 1);
src += SubSize;
src %= DATA_SIZE;
} }
glFinish(); glFinish();
} }
@ -109,28 +149,61 @@ PerfDraw(void)
{ {
double rate, mbPerSec; double rate, mbPerSec;
int sub, sz; int sub, sz;
int i;
VBOData = calloc(DATA_SIZE, 1);
for (i = 0; i < DATA_SIZE / sizeof(Vertex0); i++) {
memcpy(VBOData + i * sizeof(Vertex0),
Vertex0,
sizeof(Vertex0));
}
/* loop over whole/sub buffer upload */ /* loop over whole/sub buffer upload */
for (sub = 0; sub < 2; sub++) { for (sub = 0; sub < 3; sub++) {
/* loop over VBO sizes */ if (sub == 2) {
for (sz = 0; Sizes[sz]; sz++) { VBOSize = 1024 * 1024;
VBOSize = Sizes[sz];
VBOData = malloc(VBOSize); glBufferDataARB(GL_ARRAY_BUFFER, VBOSize, VBOData, GL_STREAM_DRAW_ARB);
memcpy(VBOData, Vertex0, sizeof(Vertex0));
if (sub) for (sz = 0; Sizes[sz] < VBOSize; sz++) {
SubSize = Sizes[sz];
rate = PerfMeasureRate(UploadSubVBO); rate = PerfMeasureRate(UploadSubVBO);
else
rate = PerfMeasureRate(UploadVBO);
mbPerSec = rate * VBOSize / (1024.0 * 1024.0); mbPerSec = rate * SubSize / (1024.0 * 1024.0);
perf_printf(" glBufferSubDataARB(size = %d, VBOSize = %d): %.1f MB/sec\n",
SubSize, VBOSize, mbPerSec);
}
perf_printf(" glBuffer%sDataARB(size = %d): %.1f MB/sec\n", for (sz = 0; Sizes[sz] < VBOSize; sz++) {
(sub ? "Sub" : ""), VBOSize, mbPerSec); SubSize = Sizes[sz];
rate = PerfMeasureRate(BatchUploadSubVBO);
free(VBOData); mbPerSec = rate * SubSize / (1024.0 * 1024.0);
perf_printf(" glBufferSubDataARB(size = %d, VBOSize = %d), batched: %.1f MB/sec\n",
SubSize, VBOSize, mbPerSec);
}
}
else {
/* loop over VBO sizes */
for (sz = 0; Sizes[sz]; sz++) {
SubSize = VBOSize = Sizes[sz];
if (sub == 1)
rate = PerfMeasureRate(UploadSubVBO);
else
rate = PerfMeasureRate(UploadVBO);
mbPerSec = rate * VBOSize / (1024.0 * 1024.0);
perf_printf(" glBuffer%sDataARB(size = %d): %.1f MB/sec\n",
(sub ? "Sub" : ""), VBOSize, mbPerSec);
}
} }
} }