mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 07:10:15 +01:00
i965: Create a helper function for emitting PIPE_CONTROL writes.
There are a lot of places that use PIPE_CONTROL to write a value to a buffer (either an immediate write, TIMESTAMP, or PS_DEPTH_COUNT). Creating a single function to do this seems convenient. As part of this refactor, we now set the PPGTT/GTT selection bit correctly on Gen7+. Previously, we set bit 2 of DW2 on all platforms. This is correct for Sandybridge, but actually part of the address on Ivybridge and later! Broadwell will also increase the length of these packets by 1; with the refactoring, we should have to adjust that in substantially fewer places, giving us confidence that we've hit them all. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Eric Anholt <eric@anholt.net> Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
parent
35458a99c0
commit
f5dd608db2
4 changed files with 69 additions and 93 deletions
|
|
@ -49,36 +49,15 @@
|
|||
void
|
||||
brw_write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
|
||||
{
|
||||
if (brw->gen >= 6) {
|
||||
/* Emit workaround flushes: */
|
||||
if (brw->gen == 6) {
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD);
|
||||
}
|
||||
|
||||
BEGIN_BATCH(5);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
|
||||
OUT_BATCH(PIPE_CONTROL_WRITE_TIMESTAMP);
|
||||
OUT_RELOC(query_bo,
|
||||
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
|
||||
PIPE_CONTROL_GLOBAL_GTT_WRITE |
|
||||
idx * sizeof(uint64_t));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
} else {
|
||||
BEGIN_BATCH(4);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) |
|
||||
PIPE_CONTROL_WRITE_TIMESTAMP);
|
||||
OUT_RELOC(query_bo,
|
||||
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
|
||||
PIPE_CONTROL_GLOBAL_GTT_WRITE |
|
||||
idx * sizeof(uint64_t));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
if (brw->gen == 6) {
|
||||
/* Emit Sandybridge workaround flush: */
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD);
|
||||
}
|
||||
|
||||
brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_TIMESTAMP,
|
||||
query_bo, idx * sizeof(uint64_t), 0, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -89,21 +68,10 @@ write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
|
|||
{
|
||||
assert(brw->gen < 6);
|
||||
|
||||
BEGIN_BATCH(4);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) |
|
||||
PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_DEPTH_COUNT);
|
||||
/* This object could be mapped cacheable, but we don't have an exposed
|
||||
* mechanism to support that. Since it's going uncached, tell GEM that
|
||||
* we're writing to it. The usual clflush should be all that's required
|
||||
* to pick up the results.
|
||||
*/
|
||||
OUT_RELOC(query_bo,
|
||||
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
|
||||
PIPE_CONTROL_GLOBAL_GTT_WRITE |
|
||||
(idx * sizeof(uint64_t)));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
brw_emit_pipe_control_write(brw,
|
||||
PIPE_CONTROL_WRITE_DEPTH_COUNT
|
||||
| PIPE_CONTROL_DEPTH_STALL,
|
||||
query_bo, idx * sizeof(uint64_t), 0, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -49,17 +49,10 @@ write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
|
|||
if (brw->gen == 6)
|
||||
intel_emit_post_sync_nonzero_flush(brw);
|
||||
|
||||
BEGIN_BATCH(5);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
|
||||
OUT_BATCH(PIPE_CONTROL_DEPTH_STALL |
|
||||
PIPE_CONTROL_WRITE_DEPTH_COUNT);
|
||||
OUT_RELOC(query_bo,
|
||||
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
|
||||
PIPE_CONTROL_GLOBAL_GTT_WRITE |
|
||||
(idx * sizeof(uint64_t)));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
brw_emit_pipe_control_write(brw,
|
||||
PIPE_CONTROL_WRITE_DEPTH_COUNT
|
||||
| PIPE_CONTROL_DEPTH_STALL,
|
||||
query_bo, idx * sizeof(uint64_t), 0, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -458,6 +458,44 @@ brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit a PIPE_CONTROL that writes to a buffer object.
|
||||
*
|
||||
* \p flags should contain one of the following items:
|
||||
* - PIPE_CONTROL_WRITE_IMMEDIATE
|
||||
* - PIPE_CONTROL_WRITE_TIMESTAMP
|
||||
* - PIPE_CONTROL_WRITE_DEPTH_COUNT
|
||||
*/
|
||||
void
|
||||
brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
|
||||
drm_intel_bo *bo, uint32_t offset,
|
||||
uint32_t imm_lower, uint32_t imm_upper)
|
||||
{
|
||||
if (brw->gen >= 6) {
|
||||
/* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24
|
||||
* on later platforms. We always use PPGTT on Gen7+.
|
||||
*/
|
||||
unsigned gen6_gtt = brw->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0;
|
||||
|
||||
BEGIN_BATCH(5);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
|
||||
OUT_BATCH(flags);
|
||||
OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
|
||||
gen6_gtt | offset);
|
||||
OUT_BATCH(imm_lower);
|
||||
OUT_BATCH(imm_upper);
|
||||
ADVANCE_BATCH();
|
||||
} else {
|
||||
BEGIN_BATCH(4);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
|
||||
OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
|
||||
PIPE_CONTROL_GLOBAL_GTT_WRITE | offset);
|
||||
OUT_BATCH(imm_lower);
|
||||
OUT_BATCH(imm_upper);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Restriction [DevSNB, DevIVB]:
|
||||
*
|
||||
|
|
@ -492,15 +530,11 @@ void
|
|||
gen7_emit_vs_workaround_flush(struct brw_context *brw)
|
||||
{
|
||||
assert(brw->gen == 7);
|
||||
|
||||
BEGIN_BATCH(5);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
|
||||
OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
|
||||
OUT_RELOC(brw->batch.workaround_bo,
|
||||
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
|
||||
OUT_BATCH(0); /* write data */
|
||||
OUT_BATCH(0); /* write data */
|
||||
ADVANCE_BATCH();
|
||||
brw_emit_pipe_control_write(brw,
|
||||
PIPE_CONTROL_WRITE_IMMEDIATE
|
||||
| PIPE_CONTROL_DEPTH_STALL,
|
||||
brw->batch.workaround_bo, 0,
|
||||
0, 0);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -510,27 +544,11 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw)
|
|||
void
|
||||
gen7_emit_cs_stall_flush(struct brw_context *brw)
|
||||
{
|
||||
BEGIN_BATCH(5);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
|
||||
/* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20]
|
||||
* CS Stall):
|
||||
*
|
||||
* One of the following must also be set:
|
||||
* - Render Target Cache Flush Enable ([12] of DW1)
|
||||
* - Depth Cache Flush Enable ([0] of DW1)
|
||||
* - Stall at Pixel Scoreboard ([1] of DW1)
|
||||
* - Depth Stall ([13] of DW1)
|
||||
* - Post-Sync Operation ([13] of DW1)
|
||||
*
|
||||
* We choose to do a Post-Sync Operation (Write Immediate Data), since
|
||||
* it seems like it will incur the least additional performance penalty.
|
||||
*/
|
||||
OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
|
||||
OUT_RELOC(brw->batch.workaround_bo,
|
||||
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
brw_emit_pipe_control_write(brw,
|
||||
PIPE_CONTROL_CS_STALL
|
||||
| PIPE_CONTROL_WRITE_IMMEDIATE,
|
||||
brw->batch.workaround_bo, 0,
|
||||
0, 0);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -581,14 +599,8 @@ intel_emit_post_sync_nonzero_flush(struct brw_context *brw)
|
|||
PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD);
|
||||
|
||||
BEGIN_BATCH(5);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
|
||||
OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
|
||||
OUT_RELOC(brw->batch.workaround_bo,
|
||||
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
|
||||
OUT_BATCH(0); /* write data */
|
||||
OUT_BATCH(0); /* write data */
|
||||
ADVANCE_BATCH();
|
||||
brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE,
|
||||
brw->batch.workaround_bo, 0, 0, 0);
|
||||
|
||||
brw->batch.need_workaround_flush = false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -65,6 +65,9 @@ bool intel_batchbuffer_emit_reloc64(struct brw_context *brw,
|
|||
uint32_t write_domain,
|
||||
uint32_t offset);
|
||||
void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags);
|
||||
void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
|
||||
drm_intel_bo *bo, uint32_t offset,
|
||||
uint32_t imm_lower, uint32_t imm_upper);
|
||||
void intel_batchbuffer_emit_mi_flush(struct brw_context *brw);
|
||||
void intel_emit_post_sync_nonzero_flush(struct brw_context *brw);
|
||||
void intel_emit_depth_stall_flushes(struct brw_context *brw);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue