mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-03 12:08:06 +02:00
i965: Split shader_time entries into separate cachelines.
This avoids some snooping overhead between EUs processing separate shaders
(so VS versus FS).
Improves performance of a minecraft trace with shader_time by 28.9% +/-
18.3% (n=7), and performance of my old GLSL demo by 93.7% +/- 0.8% (n=4).
v2: Add a define for the stride with a comment explaining its units and
why.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
a35a19a6ea
commit
db3a0f13ef
4 changed files with 13 additions and 4 deletions
|
|
@ -571,6 +571,14 @@ struct brw_vs_prog_data {
|
|||
#define SURF_INDEX_SOL_BINDING(t) ((t))
|
||||
#define BRW_MAX_GS_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
|
||||
|
||||
/**
|
||||
* Stride in bytes between shader_time entries.
|
||||
*
|
||||
* We separate entries by a cacheline to reduce traffic between EUs writing to
|
||||
* different entries.
|
||||
*/
|
||||
#define SHADER_TIME_STRIDE 64
|
||||
|
||||
enum brw_cache_id {
|
||||
BRW_BLEND_STATE,
|
||||
BRW_DEPTH_STENCIL_STATE,
|
||||
|
|
|
|||
|
|
@ -621,7 +621,7 @@ fs_visitor::emit_shader_time_write(enum shader_time_shader_type type,
|
|||
|
||||
fs_reg offset_mrf = fs_reg(MRF, base_mrf);
|
||||
offset_mrf.type = BRW_REGISTER_TYPE_UD;
|
||||
emit(MOV(offset_mrf, fs_reg(shader_time_index * 4)));
|
||||
emit(MOV(offset_mrf, fs_reg(shader_time_index * SHADER_TIME_STRIDE)));
|
||||
|
||||
fs_reg time_mrf = fs_reg(MRF, base_mrf + 1);
|
||||
time_mrf.type = BRW_REGISTER_TYPE_UD;
|
||||
|
|
|
|||
|
|
@ -228,7 +228,8 @@ brw_init_shader_time(struct brw_context *brw)
|
|||
|
||||
const int max_entries = 4096;
|
||||
brw->shader_time.bo = drm_intel_bo_alloc(intel->bufmgr, "shader time",
|
||||
max_entries * 4, 4096);
|
||||
max_entries * SHADER_TIME_STRIDE,
|
||||
4096);
|
||||
brw->shader_time.programs = rzalloc_array(brw, struct gl_shader_program *,
|
||||
max_entries);
|
||||
brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
|
||||
|
|
@ -409,7 +410,7 @@ brw_collect_shader_time(struct brw_context *brw)
|
|||
uint32_t *times = brw->shader_time.bo->virtual;
|
||||
|
||||
for (int i = 0; i < brw->shader_time.num_entries; i++) {
|
||||
brw->shader_time.cumulative[i] += times[i];
|
||||
brw->shader_time.cumulative[i] += times[i * SHADER_TIME_STRIDE / 4];
|
||||
}
|
||||
|
||||
/* Zero the BO out to clear it out for our next collection.
|
||||
|
|
|
|||
|
|
@ -1225,7 +1225,7 @@ vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type,
|
|||
|
||||
dst_reg offset_mrf = dst_reg(MRF, base_mrf);
|
||||
offset_mrf.type = BRW_REGISTER_TYPE_UD;
|
||||
emit(MOV(offset_mrf, src_reg(shader_time_index * 4)));
|
||||
emit(MOV(offset_mrf, src_reg(shader_time_index * SHADER_TIME_STRIDE)));
|
||||
|
||||
dst_reg time_mrf = dst_reg(MRF, base_mrf + 1);
|
||||
time_mrf.type = BRW_REGISTER_TYPE_UD;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue