i965: Split shader_time entries into separate cachelines.

This avoids some snooping overhead between EUs processing separate shaders
(so VS versus FS).

Improves performance of a minecraft trace with shader_time by 28.9% +/-
18.3% (n=7), and performance of my old GLSL demo by 93.7% +/- 0.8% (n=4).

v2: Add a define for the stride with a comment explaining its units and
    why.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Eric Anholt 2013-03-11 12:59:06 -07:00
parent a35a19a6ea
commit db3a0f13ef
4 changed files with 13 additions and 4 deletions

View file

@ -571,6 +571,14 @@ struct brw_vs_prog_data {
#define SURF_INDEX_SOL_BINDING(t) ((t))
#define BRW_MAX_GS_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
/**
* Stride in bytes between shader_time entries.
*
* We separate entries by a cacheline to reduce traffic between EUs writing to
* different entries.
*/
#define SHADER_TIME_STRIDE 64
enum brw_cache_id {
BRW_BLEND_STATE,
BRW_DEPTH_STENCIL_STATE,

View file

@ -621,7 +621,7 @@ fs_visitor::emit_shader_time_write(enum shader_time_shader_type type,
fs_reg offset_mrf = fs_reg(MRF, base_mrf);
offset_mrf.type = BRW_REGISTER_TYPE_UD;
emit(MOV(offset_mrf, fs_reg(shader_time_index * 4)));
emit(MOV(offset_mrf, fs_reg(shader_time_index * SHADER_TIME_STRIDE)));
fs_reg time_mrf = fs_reg(MRF, base_mrf + 1);
time_mrf.type = BRW_REGISTER_TYPE_UD;

View file

@ -228,7 +228,8 @@ brw_init_shader_time(struct brw_context *brw)
const int max_entries = 4096;
brw->shader_time.bo = drm_intel_bo_alloc(intel->bufmgr, "shader time",
max_entries * 4, 4096);
max_entries * SHADER_TIME_STRIDE,
4096);
brw->shader_time.programs = rzalloc_array(brw, struct gl_shader_program *,
max_entries);
brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
@ -409,7 +410,7 @@ brw_collect_shader_time(struct brw_context *brw)
uint32_t *times = brw->shader_time.bo->virtual;
for (int i = 0; i < brw->shader_time.num_entries; i++) {
brw->shader_time.cumulative[i] += times[i];
brw->shader_time.cumulative[i] += times[i * SHADER_TIME_STRIDE / 4];
}
/* Zero the BO out to clear it out for our next collection.

View file

@ -1225,7 +1225,7 @@ vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type,
dst_reg offset_mrf = dst_reg(MRF, base_mrf);
offset_mrf.type = BRW_REGISTER_TYPE_UD;
emit(MOV(offset_mrf, src_reg(shader_time_index * 4)));
emit(MOV(offset_mrf, src_reg(shader_time_index * SHADER_TIME_STRIDE)));
dst_reg time_mrf = dst_reg(MRF, base_mrf + 1);
time_mrf.type = BRW_REGISTER_TYPE_UD;