radeon/r200/r300: Fix swtcl prediction to work after primitie change.

Swtcl calls flush everytime primitive changes so prediction has to made again
after flushing.
This commit is contained in:
Pauli Nieminen 2009-08-27 14:21:13 +03:00
parent 44e86dde6d
commit 570d4e375a
4 changed files with 88 additions and 43 deletions

View file

@ -201,24 +201,32 @@ static void r200SetVertexFormat( GLcontext *ctx )
}
}
static void r200_predict_emit_size( GLcontext *ctx )
{
r200ContextPtr rmesa = R200_CONTEXT( ctx );
const int vertex_array_size = 7;
const int prim_size = 3;
if (!rmesa->radeon.swtcl.emit_prediction) {
const int state_size = radeonCountStateEmitSize(&rmesa->radeon);
if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
state_size +
vertex_array_size + prim_size,
__FUNCTION__))
rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon);
else
rmesa->radeon.swtcl.emit_prediction = state_size;
rmesa->radeon.swtcl.emit_prediction += vertex_array_size + prim_size
+ rmesa->radeon.cmdbuf.cs->cdw;
}
}
static void r200RenderStart( GLcontext *ctx )
{
const int vertex_array_size = 7;
const int prim_size = 3;
r200ContextPtr rmesa = R200_CONTEXT( ctx );
r200SetVertexFormat( ctx );
if (RADEON_DEBUG & DEBUG_VERTS)
fprintf(stderr, "%s\n", __func__);
if (!rmesa->radeon.swtcl.primitive_counter) {
if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
radeonCountStateEmitSize(&rmesa->radeon) +
vertex_array_size + prim_size,
__FUNCTION__))
rmesa->radeon.swtcl.primitive_counter = 0;
else
rmesa->radeon.swtcl.primitive_counter = 1;
}
r200_predict_emit_size( ctx );
}
@ -296,8 +304,13 @@ void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
r200EmitVbufPrim( rmesa,
rmesa->radeon.swtcl.hw_primitive,
rmesa->radeon.swtcl.numverts);
if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw )
WARN_ONCE("Rendering was %d commands larger than predicted size."
" We might overflow command buffer.\n",
rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction );
rmesa->radeon.swtcl.primitive_counter = 0;
rmesa->radeon.swtcl.emit_prediction = 0;
r200_predict_emit_size( ctx );
}
@ -905,7 +918,7 @@ void r200InitSwtcl( GLcontext *ctx )
init_rast_tab();
firsttime = 0;
}
rmesa->radeon.swtcl.primitive_counter = 0;
rmesa->radeon.swtcl.emit_prediction = 0;
tnl->Driver.Render.Start = r200RenderStart;
tnl->Driver.Render.Finish = r200RenderFinish;

View file

@ -490,6 +490,27 @@ static void r300ChooseRenderState( GLcontext *ctx )
rmesa->radeon.swtcl.RenderIndex = index;
}
}
static void r300_predict_emit_size( GLcontext *ctx )
{
r300ContextPtr rmesa = R300_CONTEXT( ctx );
if (!rmesa->radeon.swtcl.emit_prediction) {
const int vertex_size = 7;
const int prim_size = 3;
const int cache_flush_size = 4;
const int state_size = radeonCountStateEmitSize(&rmesa->radeon);
if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
state_size +
+ vertex_size + prim_size,
__FUNCTION__))
rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon);
else
rmesa->radeon.swtcl.emit_prediction = state_size;
rmesa->radeon.swtcl.emit_prediction += rmesa->radeon.cmdbuf.cs->cdw
+ vertex_size + prim_size + cache_flush_size * 2;
}
}
void r300RenderStart(GLcontext *ctx)
@ -508,20 +529,7 @@ void r300RenderStart(GLcontext *ctx)
r300UpdateShaderStates(rmesa);
const int vertex_size = 7;
const int prim_size = 3;
if (!rmesa->radeon.swtcl.primitive_counter) {
if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
radeonCountStateEmitSize(&rmesa->radeon) +
+ vertex_size + prim_size,
__FUNCTION__))
rmesa->radeon.swtcl.primitive_counter = 0;
else
rmesa->radeon.swtcl.primitive_counter = 1;
}
r300EmitCacheFlush(rmesa);
r300_predict_emit_size( ctx );
/* investigate if we can put back flush optimisation if needed */
if (rmesa->radeon.dma.flush != NULL) {
@ -577,7 +585,7 @@ void r300InitSwtcl(GLcontext *ctx)
init_rast_tab();
firsttime = 0;
}
rmesa->radeon.swtcl.primitive_counter = 0;
rmesa->radeon.swtcl.emit_prediction = 0;
tnl->Driver.Render.Start = r300RenderStart;
tnl->Driver.Render.Finish = r300RenderFinish;
@ -644,6 +652,8 @@ void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
fprintf(stderr, "%s\n", __func__);
r300ContextPtr rmesa = R300_CONTEXT(ctx);
r300EmitCacheFlush(rmesa);
radeonEmitState(&rmesa->radeon);
r300_emit_scissor(ctx);
r300EmitVertexAOS(rmesa,
@ -655,6 +665,11 @@ void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
rmesa->radeon.swtcl.hw_primitive,
rmesa->radeon.swtcl.numverts);
r300EmitCacheFlush(rmesa);
rmesa->radeon.swtcl.primitive_counter = 0;
if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw )
WARN_ONCE("Rendering was %d commands larger than predicted size."
" We might overflow command buffer.\n",
rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction );
rmesa->radeon.swtcl.emit_prediction = 0;
r300_predict_emit_size( ctx );
COMMIT_BATCH();
}

View file

@ -342,7 +342,7 @@ struct radeon_swtcl_info {
struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
GLuint vertex_attr_count;
GLuint primitive_counter;
GLuint emit_prediction;
};
#define RADEON_MAX_AOS_ARRAYS 16

View file

@ -220,6 +220,27 @@ static void radeonSetVertexFormat( GLcontext *ctx )
}
}
static void radeon_predict_emit_size( GLcontext* ctx )
{
r100ContextPtr rmesa = R100_CONTEXT( ctx );
if (!rmesa->radeon.swtcl.emit_prediction) {
const int state_size = radeonCountStateEmitSize( &rmesa->radeon );
const int scissor_size = 8;
const int prims_size = 8;
const int vertex_size = 7;
if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
state_size +
(scissor_size + prims_size + vertex_size),
__FUNCTION__))
rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize( &rmesa->radeon );
else
rmesa->radeon.swtcl.emit_prediction = state_size;
rmesa->radeon.swtcl.emit_prediction += scissor_size + prims_size + vertex_size
+ rmesa->radeon.cmdbuf.cs->cdw;
}
}
static void radeonRenderStart( GLcontext *ctx )
{
@ -230,16 +251,7 @@ static void radeonRenderStart( GLcontext *ctx )
if (rmesa->radeon.dma.flush != 0 &&
rmesa->radeon.dma.flush != rcommon_flush_last_swtcl_prim)
rmesa->radeon.dma.flush( ctx );
if (!rmesa->radeon.swtcl.primitive_counter) {
if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
radeonCountStateEmitSize( &rmesa->radeon ) +
(8 + 8 + 7), /* scissor + primis + VertexAOS */
__FUNCTION__))
rmesa->radeon.swtcl.primitive_counter = 0;
else
rmesa->radeon.swtcl.primitive_counter = 1;
}
radeon_predict_emit_size( ctx );
}
@ -307,9 +319,14 @@ void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
rmesa->swtcl.vertex_format,
rmesa->radeon.swtcl.hw_primitive,
rmesa->radeon.swtcl.numverts);
if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw )
WARN_ONCE("Rendering was %d commands larger than predicted size."
" We might overflow command buffer.\n",
rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction );
radeon_predict_emit_size( ctx );
rmesa->radeon.swtcl.primitive_counter = 0;
rmesa->radeon.swtcl.emit_prediction = 0;
}
@ -814,8 +831,8 @@ void radeonInitSwtcl( GLcontext *ctx )
if (firsttime) {
init_rast_tab();
firsttime = 0;
rmesa->radeon.swtcl.primitive_counter = 0;
}
rmesa->radeon.swtcl.emit_prediction = 0;
tnl->Driver.Render.Start = radeonRenderStart;
tnl->Driver.Render.Finish = radeonRenderFinish;