i965: Split the gen6 GS binding table to a separate table.

Improves VS state change microbenchmark performance by 7.08729% +/-
1.22289% (n=10) on gen7, because we don't upload the 64 dwords of
unused binding table any more.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Eric Anholt 2012-02-15 14:15:14 -08:00
parent 07e00b3040
commit f9c3ea32cd
5 changed files with 75 additions and 10 deletions

View file

@ -484,11 +484,6 @@ struct brw_vs_ouput_sizes {
* | . | . |
* | : | : |
* | 24 | Texture 15 |
* +-----|-------------------------+
* | 25 | SOL Binding 0 |
* | . | . |
* | : | : |
* | 88 | SOL Binding 63 |
* +-------------------------------+
*
* Our VS binding tables are programmed as follows:
@ -502,6 +497,15 @@ struct brw_vs_ouput_sizes {
* | 16 | Texture 15 |
* +-------------------------------+
*
* Our (gen6) GS binding tables are programmed as follows:
*
* +-----+-------------------------+
* | 0 | SOL Binding 0 |
* | . | . |
* | : | : |
* | 63 | SOL Binding 63 |
* +-----+-------------------------+
*
* Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be
* the identity function or things will break. We do want to keep draw buffers
* first so we can use headerless render target writes for RT 0.
@ -509,15 +513,17 @@ struct brw_vs_ouput_sizes {
#define SURF_INDEX_DRAW(d) (d)
#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 2 + (t))
#define SURF_INDEX_SOL_BINDING(t) (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) + (t))
/** Maximum size of the binding table. */
#define BRW_MAX_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
#define BRW_MAX_SURFACES SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT)
#define SURF_INDEX_VERT_CONST_BUFFER (0)
#define SURF_INDEX_VS_TEXTURE(t) (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t))
#define BRW_MAX_VS_SURFACES SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT)
#define SURF_INDEX_SOL_BINDING(t) ((t))
#define BRW_MAX_GS_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
enum brw_cache_id {
BRW_BLEND_STATE,
BRW_DEPTH_STENCIL_STATE,
@ -868,6 +874,9 @@ struct brw_context
/** Offset in the program cache to the CLIP program pre-gen6 */
uint32_t prog_offset;
uint32_t state_offset;
uint32_t bind_bo_offset;
uint32_t surf_offset[BRW_MAX_VS_SURFACES];
} gs;
struct {

View file

@ -116,7 +116,7 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw)
GEN6_BINDING_TABLE_MODIFY_PS |
(4 - 2));
OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
OUT_BATCH(brw->bind.bo_offset); /* gs */
OUT_BATCH(brw->gs.bind_bo_offset); /* gs */
OUT_BATCH(brw->bind.bo_offset); /* wm/ps */
ADVANCE_BATCH();
}

View file

@ -90,6 +90,7 @@ extern const struct brw_tracked_state gen6_clip_vp;
extern const struct brw_tracked_state gen6_color_calc_state;
extern const struct brw_tracked_state gen6_depth_stencil_state;
extern const struct brw_tracked_state gen6_gs_state;
extern const struct brw_tracked_state gen6_gs_binding_table;
extern const struct brw_tracked_state gen6_renderbuffer_surfaces;
extern const struct brw_tracked_state gen6_sampler_state;
extern const struct brw_tracked_state gen6_scissor_state;

View file

@ -148,6 +148,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
&brw_texture_surfaces,
&gen6_sol_surface,
&brw_vs_binding_table,
&gen6_gs_binding_table,
&brw_binding_table,
&brw_samplers,

View file

@ -30,6 +30,7 @@
#include "brw_context.h"
#include "intel_batchbuffer.h"
#include "brw_defines.h"
#include "brw_state.h"
static void
gen6_update_sol_surfaces(struct brw_context *brw)
@ -54,11 +55,11 @@ gen6_update_sol_surfaces(struct brw_context *brw)
xfb_obj->Offset[buffer] / 4 +
linked_xfb_info->Outputs[i].DstOffset;
brw_update_sol_surface(
brw, xfb_obj->Buffers[buffer], &brw->bind.surf_offset[surf_index],
brw, xfb_obj->Buffers[buffer], &brw->gs.surf_offset[surf_index],
linked_xfb_info->Outputs[i].NumComponents,
linked_xfb_info->BufferStride[buffer], buffer_offset);
} else {
brw->bind.surf_offset[surf_index] = 0;
brw->gs.surf_offset[surf_index] = 0;
}
}
@ -75,6 +76,59 @@ const struct brw_tracked_state gen6_sol_surface = {
.emit = gen6_update_sol_surfaces,
};
/**
* Constructs the binding table for the WM surface state, which maps unit
* numbers to surface state objects.
*/
static void
brw_gs_upload_binding_table(struct brw_context *brw)
{
struct gl_context *ctx = &brw->intel.ctx;
/* BRW_NEW_VERTEX_PROGRAM */
const struct gl_shader_program *shaderprog =
ctx->Shader.CurrentVertexProgram;
const struct gl_transform_feedback_info *linked_xfb_info =
&shaderprog->LinkedTransformFeedback;
/* Currently we only ever upload surfaces for SOL. */
bool has_surfaces = linked_xfb_info->NumOutputs != 0;
uint32_t *bind;
/* CACHE_NEW_GS_PROG: Skip making a binding table if we don't use textures or
* pull constants.
*/
if (!has_surfaces) {
if (brw->gs.bind_bo_offset != 0) {
brw->state.dirty.brw |= BRW_NEW_GS_BINDING_TABLE;
brw->gs.bind_bo_offset = 0;
}
return;
}
/* Might want to calculate nr_surfaces first, to avoid taking up so much
* space for the binding table.
*/
bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
sizeof(uint32_t) * BRW_MAX_SURFACES,
32, &brw->gs.bind_bo_offset);
/* BRW_NEW_SURFACES */
memcpy(bind, brw->gs.surf_offset, BRW_MAX_GS_SURFACES * sizeof(uint32_t));
brw->state.dirty.brw |= BRW_NEW_GS_BINDING_TABLE;
}
const struct brw_tracked_state gen6_gs_binding_table = {
.dirty = {
.mesa = 0,
.brw = (BRW_NEW_BATCH |
BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_SURFACES),
.cache = 0
},
.emit = brw_gs_upload_binding_table,
};
static void
gen6_update_sol_indices(struct brw_context *brw)
{