mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-20 21:00:29 +01:00
i915: Accumulate the VB into a local buffer and subdata it in.
This lets GEM use pwrite, for an additional 4% or so speedup.
This commit is contained in:
parent
62d66caeba
commit
a42dac1879
7 changed files with 48 additions and 31 deletions
|
|
@ -677,9 +677,6 @@ i830_new_batch(struct intel_context *intel)
|
|||
struct i830_context *i830 = i830_context(&intel->ctx);
|
||||
i830->state.emitted = 0;
|
||||
|
||||
/* Signal that we should put new vertices into a new vertex buffer. */
|
||||
intel->prim.needs_new_vb = GL_TRUE;
|
||||
|
||||
/* Check that we didn't just wrap our batchbuffer at a bad time. */
|
||||
assert(!intel->no_batch_wrap);
|
||||
}
|
||||
|
|
@ -722,4 +719,5 @@ i830InitVtbl(struct i830_context *i830)
|
|||
i830->intel.vtbl.render_prevalidate = i830_render_prevalidate;
|
||||
i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty;
|
||||
i830->intel.vtbl.note_unlock = i830_note_unlock;
|
||||
i830->intel.vtbl.finish_batch = intel_finish_vb;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -589,8 +589,6 @@ i915_new_batch(struct intel_context *intel)
|
|||
* difficulties associated with them (physical address requirements).
|
||||
*/
|
||||
i915->state.emitted = 0;
|
||||
/* Signal that we should put new vertices into a new vertex buffer. */
|
||||
intel->prim.needs_new_vb = GL_TRUE;
|
||||
|
||||
/* Check that we didn't just wrap our batchbuffer at a bad time. */
|
||||
assert(!intel->no_batch_wrap);
|
||||
|
|
@ -633,4 +631,5 @@ i915InitVtbl(struct i915_context *i915)
|
|||
i915->intel.vtbl.flush_cmd = i915_flush_cmd;
|
||||
i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty;
|
||||
i915->intel.vtbl.note_unlock = i915_note_unlock;
|
||||
i915->intel.vtbl.finish_batch = intel_finish_vb;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -77,31 +77,28 @@ uint32_t *intel_get_prim_space(struct intel_context *intel, unsigned int count)
|
|||
|
||||
/* Check for space in the existing VB */
|
||||
if (intel->prim.vb_bo == NULL ||
|
||||
intel->prim.needs_new_vb ||
|
||||
(intel->prim.current_offset +
|
||||
count * intel->vertex_size * 4) > INTEL_VB_SIZE ||
|
||||
(intel->prim.count + count) >= (1 << 16)) {
|
||||
/* Flush existing prim if any */
|
||||
INTEL_FIREVERTICES(intel);
|
||||
|
||||
intel_finish_vb(intel);
|
||||
|
||||
/* Start a new VB */
|
||||
dri_bo_unreference(intel->prim.vb_bo);
|
||||
if (intel->prim.vb == NULL)
|
||||
intel->prim.vb = malloc(INTEL_VB_SIZE);
|
||||
intel->prim.vb_bo = dri_bo_alloc(intel->bufmgr, "vb",
|
||||
INTEL_VB_SIZE, 4);
|
||||
intel->prim.start_offset = 0;
|
||||
intel->prim.current_offset = 0;
|
||||
|
||||
dri_bufmgr_check_aperture_space(intel->prim.vb_bo);
|
||||
|
||||
intel->prim.needs_new_vb = GL_FALSE;
|
||||
|
||||
dri_bo_map(intel->prim.vb_bo, GL_TRUE);
|
||||
}
|
||||
|
||||
intel->prim.flush = intel_flush_prim;
|
||||
|
||||
addr = (uint32_t *)((char *)intel->prim.vb_bo->virtual +
|
||||
intel->prim.current_offset);
|
||||
addr = (uint32_t *)(intel->prim.vb + intel->prim.current_offset);
|
||||
intel->prim.current_offset += intel->vertex_size * 4 * count;
|
||||
intel->prim.count += count;
|
||||
|
||||
|
|
@ -112,6 +109,7 @@ uint32_t *intel_get_prim_space(struct intel_context *intel, unsigned int count)
|
|||
void intel_flush_prim(struct intel_context *intel)
|
||||
{
|
||||
BATCH_LOCALS;
|
||||
dri_bo *vb_bo;
|
||||
|
||||
/* Must be called after an intel_start_prim. */
|
||||
assert(intel->prim.primitive != ~0);
|
||||
|
|
@ -119,9 +117,13 @@ void intel_flush_prim(struct intel_context *intel)
|
|||
if (intel->prim.count == 0)
|
||||
return;
|
||||
|
||||
intel_wait_flips(intel);
|
||||
/* Keep a reference on the BO as it may get finished as we start the
|
||||
* batch emit.
|
||||
*/
|
||||
vb_bo = intel->prim.vb_bo;
|
||||
dri_bo_reference(vb_bo);
|
||||
|
||||
dri_bo_unmap(intel->prim.vb_bo);
|
||||
intel_wait_flips(intel);
|
||||
|
||||
intel->vtbl.emit_state(intel);
|
||||
|
||||
|
|
@ -147,7 +149,7 @@ void intel_flush_prim(struct intel_context *intel)
|
|||
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
|
||||
I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
|
||||
assert((intel->prim.start_offset & !S0_VB_OFFSET_MASK) == 0);
|
||||
OUT_RELOC(intel->prim.vb_bo, I915_GEM_DOMAIN_VERTEX, 0,
|
||||
OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0,
|
||||
intel->prim.start_offset);
|
||||
OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) |
|
||||
(intel->vertex_size << S1_VERTEX_PITCH_SHIFT));
|
||||
|
|
@ -167,7 +169,7 @@ void intel_flush_prim(struct intel_context *intel)
|
|||
I1_LOAD_S(0) | I1_LOAD_S(2) | 1);
|
||||
/* S0 */
|
||||
assert((intel->prim.start_offset & !S0_VB_OFFSET_MASK_830) == 0);
|
||||
OUT_RELOC(intel->prim.vb_bo, I915_GEM_DOMAIN_VERTEX, 0,
|
||||
OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0,
|
||||
intel->prim.start_offset |
|
||||
(intel->vertex_size << S0_VB_PITCH_SHIFT_830) |
|
||||
S0_VB_ENABLE_830);
|
||||
|
|
@ -193,17 +195,35 @@ void intel_flush_prim(struct intel_context *intel)
|
|||
|
||||
intel->no_batch_wrap = GL_FALSE;
|
||||
|
||||
/* If we're going to keep using this VB for more primitives, map it
|
||||
* again.
|
||||
*/
|
||||
if (!intel->prim.needs_new_vb)
|
||||
dri_bo_map(intel->prim.vb_bo, GL_TRUE);
|
||||
|
||||
intel->prim.flush = NULL;
|
||||
intel->prim.start_offset = intel->prim.current_offset;
|
||||
if (!IS_9XX(intel->intelScreen->deviceID))
|
||||
intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128);
|
||||
intel->prim.count = 0;
|
||||
|
||||
dri_bo_unreference(vb_bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Uploads the locally-accumulated VB into the buffer object.
|
||||
*
|
||||
* This avoids us thrashing the cachelines in and out as the buffer gets
|
||||
* filled, dispatched, then reused as the hardware completes rendering from it,
|
||||
* and also lets us clflush less if we dispatch with a partially-filled VB.
|
||||
*
|
||||
* This is called normally from get_space when we're finishing a BO, but also
|
||||
* at batch flush time so that we don't try accessing the contents of a
|
||||
* just-dispatched buffer.
|
||||
*/
|
||||
void intel_finish_vb(struct intel_context *intel)
|
||||
{
|
||||
if (intel->prim.vb_bo == NULL)
|
||||
return;
|
||||
|
||||
dri_bo_subdata(intel->prim.vb_bo, 0, intel->prim.start_offset,
|
||||
intel->prim.vb);
|
||||
dri_bo_unreference(intel->prim.vb_bo);
|
||||
intel->prim.vb_bo = NULL;
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@
|
|||
|
||||
#include "mtypes.h"
|
||||
|
||||
#define INTEL_VB_SIZE (8 * 1024)
|
||||
#define INTEL_VB_SIZE (32 * 1024)
|
||||
/** 3 dwords of state_immediate and 2 of 3dprim, in intel_flush_prim */
|
||||
#define INTEL_PRIM_EMIT_SIZE (5 * 4)
|
||||
|
||||
|
|
@ -49,5 +49,6 @@ extern void intelChooseRenderState(GLcontext * ctx);
|
|||
void intel_set_prim(struct intel_context *intel, uint32_t prim);
|
||||
GLuint *intel_get_prim_space(struct intel_context *intel, unsigned int count);
|
||||
void intel_flush_prim(struct intel_context *intel);
|
||||
void intel_finish_vb(struct intel_context *intel);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -250,6 +250,9 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
|
|||
* avoid that in the first place. */
|
||||
batch->ptr = batch->map;
|
||||
|
||||
if (intel->vtbl.finish_batch)
|
||||
intel->vtbl.finish_batch(intel);
|
||||
|
||||
/* TODO: Just pass the relocation list and dma buffer up to the
|
||||
* kernel.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -727,6 +727,7 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
|
|||
intel->Fallback = 0; /* don't call _swrast_Flush later */
|
||||
|
||||
intel_batchbuffer_free(intel->batch);
|
||||
free(intel->prim.vb);
|
||||
|
||||
if (release_texture_heaps) {
|
||||
/* This share group is about to go away, free our private
|
||||
|
|
|
|||
|
|
@ -86,6 +86,7 @@ struct intel_context
|
|||
{
|
||||
void (*destroy) (struct intel_context * intel);
|
||||
void (*emit_state) (struct intel_context * intel);
|
||||
void (*finish_batch) (struct intel_context * intel);
|
||||
void (*new_batch) (struct intel_context * intel);
|
||||
void (*emit_invarient_state) (struct intel_context * intel);
|
||||
void (*note_fence) (struct intel_context *intel, GLuint fence);
|
||||
|
|
@ -185,16 +186,10 @@ struct intel_context
|
|||
uint32_t primitive; /**< Current hardware primitive type */
|
||||
void (*flush) (struct intel_context *);
|
||||
dri_bo *vb_bo;
|
||||
uint8_t *vb;
|
||||
unsigned int start_offset; /**< Byte offset of primitive sequence */
|
||||
unsigned int current_offset; /**< Byte offset of next vertex */
|
||||
unsigned int count; /**< Number of vertices in current primitive */
|
||||
/**
|
||||
* Signals when a new VB should be started, regardless of remaining
|
||||
* space.
|
||||
*
|
||||
* Used to avoid rewriting a VB that's being rendered from.
|
||||
*/
|
||||
GLboolean needs_new_vb;
|
||||
} prim;
|
||||
|
||||
GLuint stats_wm;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue