mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 02:00:12 +01:00
intel: Use a CPU map of the batch on LLC-sharing architectures.
Before, we were keeping a CPU-only buffer to accumulate the batchbuffer in, which was an improvement over mapping the batch through the GTT directly (since any readback or other failure to stream through write combining correctly would hurt). However, on LLC-sharing architectures we can do better by mapping the batch directly, which reduces the cache footprint of the application since we no longer have this extra copy of a batchbuffer around. Improves performance of GLBenchmark 2.1 offscreen on IVB by 3.5% +/- 0.4% (n=21). Improves Lightsmark performance by 1.1 +/- 0.1% (n=76). Improves cairo-gl performance by 1.9% +/- 1.4% (n=57). No statistically significant difference in GLB2.1 on SNB (n=37). Improves cairo-gl performance by 2.1% +/- 0.1% (n=278).
This commit is contained in:
parent
e1598cb642
commit
99fe2b36cf
4 changed files with 24 additions and 9 deletions
|
|
@ -68,6 +68,11 @@ intel_batchbuffer_init(struct intel_context *intel)
|
|||
"pipe_control workaround",
|
||||
4096, 4096);
|
||||
}
|
||||
|
||||
if (!intel->has_llc) {
|
||||
intel->batch.cpu_map = malloc(intel->maxBatchSize);
|
||||
intel->batch.map = intel->batch.cpu_map;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -83,6 +88,10 @@ intel_batchbuffer_reset(struct intel_context *intel)
|
|||
|
||||
intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer",
|
||||
intel->maxBatchSize, 4096);
|
||||
if (intel->has_llc) {
|
||||
drm_intel_bo_map(intel->batch.bo, true);
|
||||
intel->batch.map = intel->batch.bo->virtual;
|
||||
}
|
||||
|
||||
intel->batch.reserved_space = BATCH_RESERVED;
|
||||
intel->batch.state_batch_offset = intel->batch.bo->size;
|
||||
|
|
@ -114,6 +123,7 @@ intel_batchbuffer_reset_to_saved(struct intel_context *intel)
|
|||
void
|
||||
intel_batchbuffer_free(struct intel_context *intel)
|
||||
{
|
||||
free(intel->batch.cpu_map);
|
||||
drm_intel_bo_unreference(intel->batch.last_bo);
|
||||
drm_intel_bo_unreference(intel->batch.bo);
|
||||
drm_intel_bo_unreference(intel->batch.workaround_bo);
|
||||
|
|
@ -168,12 +178,16 @@ do_flush_locked(struct intel_context *intel)
|
|||
struct intel_batchbuffer *batch = &intel->batch;
|
||||
int ret = 0;
|
||||
|
||||
ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);
|
||||
if (ret == 0 && batch->state_batch_offset != batch->bo->size) {
|
||||
ret = drm_intel_bo_subdata(batch->bo,
|
||||
batch->state_batch_offset,
|
||||
batch->bo->size - batch->state_batch_offset,
|
||||
(char *)batch->map + batch->state_batch_offset);
|
||||
if (intel->has_llc) {
|
||||
drm_intel_bo_unmap(batch->bo);
|
||||
} else {
|
||||
ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);
|
||||
if (ret == 0 && batch->state_batch_offset != batch->bo->size) {
|
||||
ret = drm_intel_bo_subdata(batch->bo,
|
||||
batch->state_batch_offset,
|
||||
batch->bo->size - batch->state_batch_offset,
|
||||
(char *)batch->map + batch->state_batch_offset);
|
||||
}
|
||||
}
|
||||
|
||||
if (!intel->intelScreen->no_hw) {
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ intel_batchbuffer_require_space(struct intel_context *intel,
|
|||
intel->batch.is_blit = is_blit;
|
||||
|
||||
#ifdef DEBUG
|
||||
assert(sz < sizeof(intel->batch.map) - BATCH_RESERVED);
|
||||
assert(sz < intel->maxBatchSize - BATCH_RESERVED);
|
||||
#endif
|
||||
if (intel_batchbuffer_space(intel) < sz)
|
||||
intel_batchbuffer_flush(intel);
|
||||
|
|
|
|||
|
|
@ -708,7 +708,7 @@ intelInitContext(struct intel_context *intel,
|
|||
if (intel->gen < 4)
|
||||
intel->maxBatchSize = 4096;
|
||||
else
|
||||
intel->maxBatchSize = sizeof(intel->batch.map);
|
||||
intel->maxBatchSize = BATCH_SZ;
|
||||
|
||||
intel->bufmgr = intelScreen->bufmgr;
|
||||
|
||||
|
|
|
|||
|
|
@ -129,7 +129,8 @@ struct intel_batchbuffer {
|
|||
|
||||
uint16_t emit, total;
|
||||
uint16_t used, reserved_space;
|
||||
uint32_t map[8192];
|
||||
uint32_t *map;
|
||||
uint32_t *cpu_map;
|
||||
#define BATCH_SZ (8192*sizeof(uint32_t))
|
||||
|
||||
uint32_t state_batch_offset;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue