mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 13:10:10 +01:00
i965: Compute VS attribute WA bits earlier and check if they changed.
BRW_NEW_VERTICES is flagged every time we draw a primitive. Having
the brw_vs_prog atom depend on BRW_NEW_VERTICES meant that we had to
compute the VS program key and do a program cache lookup for every
single primitive. This is painfully expensive.
The workaround bit computation is almost entirely based on the vertex
attribute arrays (brw->vb.inputs[i]), which are set by brw_merge_inputs.
The only thing it uses the VS program for is to see which VS inputs are
actually read. brw_merge_inputs() happens once per primitive, and can
safely look at the currently bound vertex program, as it doesn't change
in the middle of a draw.
This patch moves the workaround bit computation to brw_merge_inputs(),
right after assigning brw->vb.inputs[i], and stores the previous WA bit
values in the context. If they've actually changed from the last draw
(which is uncommon), we signal that we need a new vertex program,
causing brw_vs_prog to compute a new key.
Improves performance in Gl32Batch7 by 13.6123% +/- 0.739652% (n=166)
on Haswell GT3e. I'm told Baytrail shows similar gains.
v2: Introduce a new BRW_NEW_VS_ATTRIB_WORKAROUNDS dirty bit, rather
than reusing BRW_NEW_VERTEX_PROGRAM (suggested by Chris Forbes).
This prevents unnecessary re-emission of surface/sampler related
atoms (and an SOL atom on Sandybridge).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
This commit is contained in:
parent
0b4a688691
commit
ae45a5a28d
4 changed files with 57 additions and 37 deletions
|
|
@ -201,6 +201,7 @@ enum brw_state_id {
|
|||
BRW_STATE_SF_VP,
|
||||
BRW_STATE_CLIP_VP,
|
||||
BRW_STATE_SAMPLER_STATE_TABLE,
|
||||
BRW_STATE_VS_ATTRIB_WORKAROUNDS,
|
||||
BRW_NUM_STATE_BITS
|
||||
};
|
||||
|
||||
|
|
@ -279,6 +280,7 @@ enum brw_state_id {
|
|||
#define BRW_NEW_SF_VP (1ull << BRW_STATE_SF_VP)
|
||||
#define BRW_NEW_CLIP_VP (1ull << BRW_STATE_CLIP_VP)
|
||||
#define BRW_NEW_SAMPLER_STATE_TABLE (1ull << BRW_STATE_SAMPLER_STATE_TABLE)
|
||||
#define BRW_NEW_VS_ATTRIB_WORKAROUNDS (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS)
|
||||
|
||||
struct brw_state_flags {
|
||||
/** State update flags signalled by mesa internals */
|
||||
|
|
@ -1133,6 +1135,14 @@ struct brw_context
|
|||
* the same VB packed over and over again.
|
||||
*/
|
||||
unsigned int start_vertex_bias;
|
||||
|
||||
/**
|
||||
* Certain vertex attribute formats aren't natively handled by the
|
||||
* hardware and require special VS code to fix up their values.
|
||||
*
|
||||
* These bitfields indicate which workarounds are needed.
|
||||
*/
|
||||
uint8_t attrib_wa_flags[VERT_ATTRIB_MAX];
|
||||
} vb;
|
||||
|
||||
struct {
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@
|
|||
#include "brw_defines.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_vs.h"
|
||||
|
||||
#include "intel_batchbuffer.h"
|
||||
#include "intel_buffers.h"
|
||||
|
|
@ -281,6 +282,7 @@ static void brw_emit_prim(struct brw_context *brw,
|
|||
static void brw_merge_inputs( struct brw_context *brw,
|
||||
const struct gl_client_array *arrays[])
|
||||
{
|
||||
const struct gl_context *ctx = &brw->ctx;
|
||||
GLuint i;
|
||||
|
||||
for (i = 0; i < brw->vb.nr_buffers; i++) {
|
||||
|
|
@ -293,6 +295,46 @@ static void brw_merge_inputs( struct brw_context *brw,
|
|||
brw->vb.inputs[i].buffer = -1;
|
||||
brw->vb.inputs[i].glarray = arrays[i];
|
||||
}
|
||||
|
||||
if (brw->gen < 8 && !brw->is_haswell) {
|
||||
struct gl_program *vp = &ctx->VertexProgram._Current->Base;
|
||||
/* Prior to Haswell, the hardware can't natively support GL_FIXED or
|
||||
* 2_10_10_10_REV vertex formats. Set appropriate workaround flags.
|
||||
*/
|
||||
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
|
||||
if (!(vp->InputsRead & BITFIELD64_BIT(i)))
|
||||
continue;
|
||||
|
||||
uint8_t wa_flags = 0;
|
||||
|
||||
switch (brw->vb.inputs[i].glarray->Type) {
|
||||
|
||||
case GL_FIXED:
|
||||
wa_flags = brw->vb.inputs[i].glarray->Size;
|
||||
break;
|
||||
|
||||
case GL_INT_2_10_10_10_REV:
|
||||
wa_flags |= BRW_ATTRIB_WA_SIGN;
|
||||
/* fallthough */
|
||||
|
||||
case GL_UNSIGNED_INT_2_10_10_10_REV:
|
||||
if (brw->vb.inputs[i].glarray->Format == GL_BGRA)
|
||||
wa_flags |= BRW_ATTRIB_WA_BGRA;
|
||||
|
||||
if (brw->vb.inputs[i].glarray->Normalized)
|
||||
wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
|
||||
else if (!brw->vb.inputs[i].glarray->Integer)
|
||||
wa_flags |= BRW_ATTRIB_WA_SCALE;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (brw->vb.attrib_wa_flags[i] != wa_flags) {
|
||||
brw->vb.attrib_wa_flags[i] = wa_flags;
|
||||
brw->state.dirty.brw |= BRW_NEW_VS_ATTRIB_WORKAROUNDS;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -525,6 +525,7 @@ static struct dirty_bit_map brw_bits[] = {
|
|||
DEFINE_BIT(BRW_NEW_SF_VP),
|
||||
DEFINE_BIT(BRW_NEW_CLIP_VP),
|
||||
DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
|
||||
DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
|
||||
{0, 0, 0}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -453,42 +453,9 @@ static void brw_upload_vs_prog(struct brw_context *brw)
|
|||
brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
|
||||
&key.base.tex);
|
||||
|
||||
/* BRW_NEW_VERTICES */
|
||||
if (brw->gen < 8 && !brw->is_haswell) {
|
||||
/* Prior to Haswell, the hardware can't natively support GL_FIXED or
|
||||
* 2_10_10_10_REV vertex formats. Set appropriate workaround flags.
|
||||
*/
|
||||
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
|
||||
if (!(vp->program.Base.InputsRead & BITFIELD64_BIT(i)))
|
||||
continue;
|
||||
|
||||
uint8_t wa_flags = 0;
|
||||
|
||||
switch (brw->vb.inputs[i].glarray->Type) {
|
||||
|
||||
case GL_FIXED:
|
||||
wa_flags = brw->vb.inputs[i].glarray->Size;
|
||||
break;
|
||||
|
||||
case GL_INT_2_10_10_10_REV:
|
||||
wa_flags |= BRW_ATTRIB_WA_SIGN;
|
||||
/* fallthough */
|
||||
|
||||
case GL_UNSIGNED_INT_2_10_10_10_REV:
|
||||
if (brw->vb.inputs[i].glarray->Format == GL_BGRA)
|
||||
wa_flags |= BRW_ATTRIB_WA_BGRA;
|
||||
|
||||
if (brw->vb.inputs[i].glarray->Normalized)
|
||||
wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
|
||||
else if (!brw->vb.inputs[i].glarray->Integer)
|
||||
wa_flags |= BRW_ATTRIB_WA_SCALE;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
key.gl_attrib_wa_flags[i] = wa_flags;
|
||||
}
|
||||
}
|
||||
/* BRW_NEW_VS_ATTRIB_WORKAROUNDS */
|
||||
memcpy(key.gl_attrib_wa_flags, brw->vb.attrib_wa_flags,
|
||||
sizeof(brw->vb.attrib_wa_flags));
|
||||
|
||||
if (!brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG,
|
||||
&key, sizeof(key),
|
||||
|
|
@ -526,7 +493,7 @@ const struct brw_tracked_state brw_vs_prog = {
|
|||
_NEW_TEXTURE |
|
||||
_NEW_TRANSFORM,
|
||||
.brw = BRW_NEW_VERTEX_PROGRAM |
|
||||
BRW_NEW_VERTICES,
|
||||
BRW_NEW_VS_ATTRIB_WORKAROUNDS,
|
||||
},
|
||||
.emit = brw_upload_vs_prog
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue