i965: Split the VS binding table to a separate table.

This is a step toward making the samplers/binding tables reflect
sampler uniform mappings instead of embedding those in the programs.
No significant performance difference on the microbenchmark (n=10).

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Eric Anholt 2012-02-15 13:33:07 -08:00
parent 8387156620
commit 07e00b3040
9 changed files with 94 additions and 17 deletions

View file

@ -409,6 +409,8 @@ struct brw_vs_prog_data {
bool uses_new_param_layout;
bool uses_vertexid;
bool userclip;
int num_surfaces;
};
@ -468,7 +470,7 @@ struct brw_vs_ouput_sizes {
* (VS, HS, DS, GS, PS), we currently share a single binding table for all of
* them. This is purely for convenience.
*
* Currently our binding tables are (arbitrarily) programmed as follows:
* Currently our SOL/WM binding tables are (arbitrarily) programmed as follows:
*
* +-------------------------------+
* | 0 | Draw buffer 0 | .
@ -476,18 +478,28 @@ struct brw_vs_ouput_sizes {
* | : | : | > Only relevant to the WM.
* | 7 | Draw buffer 7 | /
* |-----|-------------------------| `
* | 8 | VS Pull Constant Buffer |
* | 9 | WM Pull Constant Buffer |
* | 8 | WM Pull Constant Buffer |
* |-----|-------------------------|
* | 10 | Texture 0 |
* | 9 | Texture 0 |
* | . | . |
* | : | : |
* | 25 | Texture 15 |
* | 24 | Texture 15 |
* +-----|-------------------------+
* | 26 | SOL Binding 0 |
* | 25 | SOL Binding 0 |
* | . | . |
* | : | : |
* | 89 | SOL Binding 63 |
* | 88 | SOL Binding 63 |
* +-------------------------------+
*
* Our VS binding tables are programmed as follows:
*
* +-----+-------------------------+ `
* | 0 | VS Pull Constant Buffer |
* +-----+-------------------------+
* | 1 | Texture 0 |
* | . | . |
* | : | : |
* | 16 | Texture 15 |
* +-------------------------------+
*
* Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be
@ -495,7 +507,6 @@ struct brw_vs_ouput_sizes {
* first so we can use headerless render target writes for RT 0.
*/
#define SURF_INDEX_DRAW(d) (d)
#define SURF_INDEX_VERT_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 0)
#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 2 + (t))
#define SURF_INDEX_SOL_BINDING(t) (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) + (t))
@ -503,6 +514,10 @@ struct brw_vs_ouput_sizes {
/** Maximum size of the binding table. */
#define BRW_MAX_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
#define SURF_INDEX_VERT_CONST_BUFFER (0)
#define SURF_INDEX_VS_TEXTURE(t) (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t))
#define BRW_MAX_VS_SURFACES SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT)
enum brw_cache_id {
BRW_BLEND_STATE,
BRW_DEPTH_STENCIL_STATE,
@ -841,6 +856,9 @@ struct brw_context
*/
uint8_t *ra_reg_to_grf;
/** @} */
uint32_t bind_bo_offset;
uint32_t surf_offset[BRW_MAX_VS_SURFACES];
} vs;
struct {

View file

@ -77,7 +77,7 @@ static void upload_binding_table_pointers(struct brw_context *brw)
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
OUT_BATCH(brw->bind.bo_offset);
OUT_BATCH(brw->vs.bind_bo_offset);
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
@ -115,7 +115,7 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw)
GEN6_BINDING_TABLE_MODIFY_GS |
GEN6_BINDING_TABLE_MODIFY_PS |
(4 - 2));
OUT_BATCH(brw->bind.bo_offset); /* vs */
OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
OUT_BATCH(brw->bind.bo_offset); /* gs */
OUT_BATCH(brw->bind.bo_offset); /* wm/ps */
ADVANCE_BATCH();

View file

@ -71,6 +71,7 @@ extern const struct brw_tracked_state brw_wm_prog;
extern const struct brw_tracked_state brw_renderbuffer_surfaces;
extern const struct brw_tracked_state brw_texture_surfaces;
extern const struct brw_tracked_state brw_binding_table;
extern const struct brw_tracked_state brw_vs_binding_table;
extern const struct brw_tracked_state brw_wm_unit;
extern const struct brw_tracked_state brw_psp_urb_cbs;

View file

@ -70,6 +70,7 @@ static const struct brw_tracked_state *gen4_atoms[] =
&brw_wm_pull_constants,
&brw_renderbuffer_surfaces,
&brw_texture_surfaces,
&brw_vs_binding_table,
&brw_binding_table,
&brw_samplers,
@ -146,6 +147,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
&gen6_renderbuffer_surfaces,
&brw_texture_surfaces,
&gen6_sol_surface,
&brw_vs_binding_table,
&brw_binding_table,
&brw_samplers,
@ -214,6 +216,7 @@ const struct brw_tracked_state *gen7_atoms[] =
&brw_wm_pull_constants,
&gen6_renderbuffer_surfaces,
&brw_texture_surfaces,
&brw_vs_binding_table,
&brw_binding_table,
&gen7_samplers,

View file

@ -465,7 +465,7 @@ vec4_visitor::generate_tex(vec4_instruction *inst,
dst,
inst->base_mrf,
src,
SURF_INDEX_TEXTURE(inst->sampler),
SURF_INDEX_VS_TEXTURE(inst->sampler),
inst->sampler,
WRITEMASK_XYZW,
msg_type,

View file

@ -247,6 +247,11 @@ do_vs_prog(struct brw_context *brw,
brw_old_vs_emit(&c);
}
if (c.prog_data.nr_pull_params)
c.prog_data.num_surfaces = 1;
if (c.vp->program.Base.SamplersUsed)
c.prog_data.num_surfaces = BRW_MAX_VS_SURFACES;
/* Scratch space is used for register spilling */
if (c.last_scratch) {
c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch);

View file

@ -65,7 +65,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw)
if (brw->vs.const_bo) {
drm_intel_bo_unreference(brw->vs.const_bo);
brw->vs.const_bo = NULL;
brw->bind.surf_offset[SURF_INDEX_VERT_CONST_BUFFER] = 0;
brw->vs.surf_offset[SURF_INDEX_VERT_CONST_BUFFER] = 0;
brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
}
return;
@ -97,7 +97,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw)
const int surf = SURF_INDEX_VERT_CONST_BUFFER;
intel->vtbl.create_constant_surface(brw, brw->vs.const_bo,
params->NumParameters,
&brw->bind.surf_offset[surf]);
&brw->vs.surf_offset[surf]);
brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
}
@ -110,3 +110,50 @@ const struct brw_tracked_state brw_vs_pull_constants = {
},
.emit = brw_upload_vs_pull_constants,
};
/**
* Constructs the binding table for the WM surface state, which maps unit
* numbers to surface state objects.
*/
static void
brw_vs_upload_binding_table(struct brw_context *brw)
{
uint32_t *bind;
int i;
/* CACHE_NEW_VS_PROG: Skip making a binding table if we don't use textures or
* pull constants.
*/
if (brw->vs.prog_data->num_surfaces == 0) {
if (brw->vs.bind_bo_offset != 0) {
brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
brw->vs.bind_bo_offset = 0;
}
return;
}
/* Might want to calculate nr_surfaces first, to avoid taking up so much
* space for the binding table.
*/
bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
sizeof(uint32_t) * BRW_MAX_SURFACES,
32, &brw->vs.bind_bo_offset);
/* BRW_NEW_SURFACES and BRW_NEW_VS_CONSTBUF */
for (i = 0; i < BRW_MAX_VS_SURFACES; i++) {
bind[i] = brw->vs.surf_offset[i];
}
brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
}
const struct brw_tracked_state brw_vs_binding_table = {
.dirty = {
.mesa = 0,
.brw = (BRW_NEW_BATCH |
BRW_NEW_VS_CONSTBUF |
BRW_NEW_SURFACES),
.cache = CACHE_NEW_VS_PROG
},
.emit = brw_vs_upload_binding_table,
};

View file

@ -1097,6 +1097,10 @@ brw_update_texture_surfaces(struct brw_context *brw)
} else {
brw->bind.surf_offset[surf] = 0;
}
/* For now, just mirror the texture setup to the VS slots. */
brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(i)] =
brw->bind.surf_offset[surf];
}
brw->state.dirty.brw |= BRW_NEW_SURFACES;
@ -1128,12 +1132,11 @@ brw_upload_binding_table(struct brw_context *brw)
sizeof(uint32_t) * BRW_MAX_SURFACES,
32, &brw->bind.bo_offset);
/* BRW_NEW_SURFACES and BRW_NEW_VS_CONSTBUF */
/* BRW_NEW_SURFACES */
for (i = 0; i < BRW_MAX_SURFACES; i++) {
bind[i] = brw->bind.surf_offset[i];
}
brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
}
@ -1141,7 +1144,6 @@ const struct brw_tracked_state brw_binding_table = {
.dirty = {
.mesa = 0,
.brw = (BRW_NEW_BATCH |
BRW_NEW_VS_CONSTBUF |
BRW_NEW_SURFACES),
.cache = 0
},

View file

@ -37,9 +37,10 @@ upload_vs_state(struct brw_context *brw)
gen7_emit_vs_workaround_flush(intel);
/* BRW_NEW_VS_BINDING_TABLE */
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_VS << 16 | (2 - 2));
OUT_BATCH(brw->bind.bo_offset);
OUT_BATCH(brw->vs.bind_bo_offset);
ADVANCE_BATCH();
/* CACHE_NEW_SAMPLER */