i965: Move all the depth/stencil/hiz offset logic into the workaround.

Given that we have the mask information here (assuming the rebase is to
the same tiling, which is safe), we can just save a set of miptrees and
offsets and the global intra-tile offset in the context and cut out a
bunch of logic.  This will also save emitting the next fix I need to do
twice.

Acked-by: Chad Versace <chad.versace@linux.intel.com>
This commit is contained in:
Eric Anholt 2012-11-04 12:47:02 -08:00
parent 9ec6a54ba9
commit 52ee1a7269
3 changed files with 138 additions and 186 deletions

View file

@ -1072,6 +1072,21 @@ struct brw_context
bool enable_cut_index;
} prim_restart;
/** Computed depth/stencil/hiz state from the current attached
* renderbuffers, valid only during the drawing state upload loop after
* brw_workaround_depthstencil_alignment().
*/
struct {
struct intel_mipmap_tree *depth_mt;
struct intel_mipmap_tree *stencil_mt;
struct intel_mipmap_tree *hiz_mt;
/* Inter-tile (page-aligned) byte offsets. */
uint32_t depth_offset, hiz_offset, stencil_offset;
/* Intra-tile x,y offsets for drawing to depth/stencil/hiz */
uint32_t tile_x, tile_y;
} depthstencil;
uint32_t num_instances;
int basevertex;
};

View file

@ -329,7 +329,7 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
struct intel_mipmap_tree *depth_mt = NULL;
struct intel_mipmap_tree *stencil_mt = NULL;
uint32_t depth_tile_x = 0, depth_tile_y = 0;
uint32_t tile_x = 0, tile_y = 0, stencil_tile_x = 0, stencil_tile_y = 0;
if (depth_irb)
depth_mt = depth_irb->mt;
@ -341,18 +341,22 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
&tile_mask_x, &tile_mask_y);
if (depth_irb) {
depth_tile_x = depth_irb->draw_x & tile_mask_x;
depth_tile_y = depth_irb->draw_y & tile_mask_y;
tile_x = depth_irb->draw_x & tile_mask_x;
tile_y = depth_irb->draw_y & tile_mask_y;
/* The low 3 bits of x and y tile offset are ignored by the hardware.
* Rebase if they're set, so that we can actually render to the buffer.
/* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
* (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
* Coordinate Offset X/Y":
*
* "The 3 LSBs of both offsets must be zero to ensure correct
* alignment"
*/
if (depth_tile_x & 7 || depth_tile_y & 7)
if (tile_x & 7 || tile_y & 7)
rebase_depth = true;
/* We didn't even have intra-tile offsets before g45. */
if (intel->gen == 4 && !intel->is_g4x) {
if (depth_tile_x || depth_tile_y)
if (tile_x || tile_y)
rebase_depth = true;
}
@ -368,8 +372,8 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
intel_renderbuffer_set_draw_offset(stencil_irb);
}
depth_tile_x = depth_irb->draw_x & tile_mask_x;
depth_tile_y = depth_irb->draw_y & tile_mask_y;
tile_x = depth_irb->draw_x & tile_mask_x;
tile_y = depth_irb->draw_y & tile_mask_y;
}
if (stencil_irb) {
@ -381,8 +385,8 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
* post-stencil depth test will also rebase depth to try to match it
* up).
*/
if (depth_tile_x != stencil_tile_x ||
depth_tile_y != stencil_tile_y) {
if (tile_x != stencil_tile_x ||
tile_y != stencil_tile_y) {
rebase_stencil = true;
}
}
@ -390,8 +394,8 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
/* If we have (just) stencil, check it for ignored low bits as well */
if (stencil_irb) {
uint32_t stencil_tile_x = stencil_irb->draw_x & tile_mask_x;
uint32_t stencil_tile_y = stencil_irb->draw_y & tile_mask_y;
stencil_tile_x = stencil_irb->draw_x & tile_mask_x;
stencil_tile_y = stencil_irb->draw_y & tile_mask_y;
if (stencil_tile_x & 7 || stencil_tile_y & 7)
rebase_stencil = true;
@ -405,24 +409,96 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
if (rebase_stencil) {
intel_renderbuffer_move_to_temp(intel, stencil_irb);
uint32_t stencil_tile_x = stencil_irb->draw_x & tile_mask_x;
uint32_t stencil_tile_y = stencil_irb->draw_y & tile_mask_y;
stencil_tile_x = stencil_irb->draw_x & tile_mask_x;
stencil_tile_y = stencil_irb->draw_y & tile_mask_y;
if (depth_irb && depth_irb->mt == stencil_mt) {
intel_miptree_reference(&depth_irb->mt, stencil_irb->mt);
intel_renderbuffer_set_draw_offset(depth_irb);
} else if (depth_irb && !rebase_depth) {
if (depth_tile_x != stencil_tile_x ||
depth_tile_y != stencil_tile_y) {
if (tile_x != stencil_tile_x ||
tile_y != stencil_tile_y) {
intel_renderbuffer_move_to_temp(intel, depth_irb);
tile_x = depth_irb->draw_x & tile_mask_x;
tile_y = depth_irb->draw_y & tile_mask_y;
if (stencil_irb && stencil_irb->mt == depth_mt) {
intel_miptree_reference(&stencil_irb->mt, depth_irb->mt);
intel_renderbuffer_set_draw_offset(stencil_irb);
}
WARN_ONCE(stencil_tile_x != tile_x ||
stencil_tile_y != tile_y,
"Rebased stencil tile offset (%d,%d) doesn't match depth "
"tile offset (%d,%d).\n",
stencil_tile_x, stencil_tile_y,
tile_x, tile_y);
}
}
}
if (!depth_irb) {
tile_x = stencil_tile_x;
tile_y = stencil_tile_y;
}
/* While we just tried to get everything aligned, we may have failed to do
* so in the case of rendering to array or 3D textures, where nonzero faces
* will still have an offset post-rebase. At least give an informative
* warning.
*/
WARN_ONCE((tile_x & 7) || (tile_y & 7),
"Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
"Truncating offset, bad rendering may occur.\n");
tile_x &= ~7;
tile_y &= ~7;
/* Now, after rebasing, save off the new dephtstencil state so the hardware
* packets can just dereference that without re-calculating tile offsets.
*/
brw->depthstencil.tile_x = tile_x;
brw->depthstencil.tile_y = tile_y;
brw->depthstencil.depth_offset = 0;
brw->depthstencil.stencil_offset = 0;
brw->depthstencil.hiz_offset = 0;
brw->depthstencil.depth_mt = NULL;
brw->depthstencil.stencil_mt = NULL;
brw->depthstencil.hiz_mt = NULL;
if (depth_irb) {
depth_mt = depth_irb->mt;
brw->depthstencil.depth_mt = depth_mt;
brw->depthstencil.depth_offset =
intel_region_get_aligned_offset(depth_mt->region,
depth_irb->draw_x & ~tile_mask_x,
depth_irb->draw_y & ~tile_mask_y,
false);
if (depth_mt->hiz_mt) {
brw->depthstencil.hiz_mt = depth_mt->hiz_mt;
brw->depthstencil.hiz_offset =
intel_region_get_aligned_offset(depth_mt->region,
depth_irb->draw_x & ~tile_mask_x,
(depth_irb->draw_y & ~tile_mask_y) /
2,
false);
}
}
if (stencil_irb) {
stencil_mt = stencil_irb->mt;
if (stencil_mt->stencil_mt)
stencil_mt = stencil_mt->stencil_mt;
brw->depthstencil.stencil_mt = stencil_mt;
if (stencil_mt->format == MESA_FORMAT_S8) {
/* Note: we can't compute the stencil offset using
* intel_region_get_aligned_offset(), because stencil_region claims
* that the region is untiled even though it's W tiled.
*/
brw->depthstencil.stencil_offset =
(stencil_irb->draw_y & ~tile_mask_y) * stencil_mt->region->pitch +
(stencil_irb->draw_x & ~tile_mask_x) * 64;
}
}
}
static void emit_depthbuffer(struct brw_context *brw)
@ -433,19 +509,16 @@ static void emit_depthbuffer(struct brw_context *brw)
/* _NEW_BUFFERS */
struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
struct intel_mipmap_tree *depth_mt = NULL;
struct intel_mipmap_tree *stencil_mt = NULL;
struct intel_region *hiz_region = NULL;
struct intel_mipmap_tree *depth_mt = brw->depthstencil.depth_mt;
struct intel_mipmap_tree *stencil_mt = brw->depthstencil.stencil_mt;
struct intel_mipmap_tree *hiz_mt = brw->depthstencil.hiz_mt;
uint32_t tile_x = brw->depthstencil.tile_x;
uint32_t tile_y = brw->depthstencil.tile_y;
unsigned int len;
bool separate_stencil = false;
if (depth_irb){
depth_mt = depth_irb->mt;
if (depth_mt &&
depth_mt->hiz_mt) {
hiz_region = depth_irb->mt->hiz_mt->region;
}
}
if (stencil_mt && stencil_mt->format == MESA_FORMAT_S8)
separate_stencil = true;
/* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
* non-pipelined state that will need the PIPE_CONTROL workaround.
@ -455,54 +528,6 @@ static void emit_depthbuffer(struct brw_context *brw)
intel_emit_depth_stall_flushes(intel);
}
/* Find the real separate stencil mt if present. */
if (stencil_irb) {
stencil_mt = stencil_irb->mt;
if (stencil_mt->stencil_mt)
stencil_mt = stencil_mt->stencil_mt;
if (stencil_mt->format == MESA_FORMAT_S8) {
separate_stencil = true;
}
}
uint32_t tile_mask_x, tile_mask_y;
brw_get_depthstencil_tile_masks(depth_mt, stencil_mt,
&tile_mask_x, &tile_mask_y);
/* The intra-tile offsets should already have been forced into agreement by
* gen7_workaround_depthstencil_alignment().
*/
uint32_t tile_x = 0, tile_y = 0;
if (depth_mt) {
tile_x = depth_irb->draw_x & tile_mask_x;
tile_y = depth_irb->draw_y & tile_mask_y;
if (stencil_mt) {
assert((stencil_irb->draw_x & tile_mask_x) == tile_x);
assert((stencil_irb->draw_y & tile_mask_y) == tile_y);
}
} else if (stencil_mt) {
tile_x = stencil_irb->draw_x & tile_mask_x;
tile_y = stencil_irb->draw_y & tile_mask_y;
}
/* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
* (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
* Coordinate Offset X/Y":
*
* "The 3 LSBs of both offsets must be zero to ensure correct
* alignment"
*
* This should already have been corrected by
* gen6_workaround_depthstencil_alignment.
*/
WARN_ONCE((tile_x & 7) || (tile_y & 7),
"Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
"Truncating offset, bad rendering may occur.\n");
tile_x &= ~7;
tile_y &= ~7;
/* If there's a packed depth/stencil bound to stencil only, we need to
* emit the packed depth/stencil buffer packet.
*/
@ -580,32 +605,26 @@ static void emit_depthbuffer(struct brw_context *brw)
ADVANCE_BATCH();
} else {
struct intel_region *region = depth_irb->mt->region;
uint32_t offset;
struct intel_region *region = depth_mt->region;
/* If using separate stencil, hiz must be enabled. */
assert(!separate_stencil || hiz_region);
assert(!separate_stencil || hiz_mt);
assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
assert(!hiz_region || region->tiling == I915_TILING_Y);
offset = intel_region_get_aligned_offset(region,
depth_irb->draw_x & ~tile_mask_x,
depth_irb->draw_y & ~tile_mask_y,
false);
assert(!hiz_mt || region->tiling == I915_TILING_Y);
BEGIN_BATCH(len);
OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
OUT_BATCH(((region->pitch * region->cpp) - 1) |
(brw_depthbuffer_format(brw) << 18) |
((hiz_region ? 1 : 0) << 21) | /* separate stencil enable */
((hiz_region ? 1 : 0) << 22) | /* hiz enable */
((hiz_mt ? 1 : 0) << 21) | /* separate stencil enable */
((hiz_mt ? 1 : 0) << 22) | /* hiz enable */
(BRW_TILEWALK_YMAJOR << 26) |
((region->tiling != I915_TILING_NONE) << 27) |
(BRW_SURFACE_2D << 29));
OUT_RELOC(region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
offset);
brw->depthstencil.depth_offset);
OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
(((depth_irb->Base.Base.Width + tile_x) - 1) << 6) |
(((depth_irb->Base.Base.Height + tile_y) - 1) << 19));
@ -622,7 +641,7 @@ static void emit_depthbuffer(struct brw_context *brw)
ADVANCE_BATCH();
}
if (hiz_region || separate_stencil) {
if (hiz_mt || separate_stencil) {
/*
* In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
* stencil enable' and 'hiz enable' bits were set. Therefore we must
@ -632,19 +651,13 @@ static void emit_depthbuffer(struct brw_context *brw)
*/
/* Emit hiz buffer. */
if (hiz_region) {
uint32_t hiz_offset =
intel_region_get_aligned_offset(hiz_region,
depth_irb->draw_x & ~tile_mask_x,
(depth_irb->draw_y & ~tile_mask_y) / 2,
false);
if (hiz_mt) {
BEGIN_BATCH(3);
OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
OUT_RELOC(hiz_region->bo,
OUT_BATCH(hiz_mt->region->pitch * hiz_mt->region->cpp - 1);
OUT_RELOC(hiz_mt->region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
hiz_offset);
brw->depthstencil.hiz_offset);
ADVANCE_BATCH();
} else {
BEGIN_BATCH(3);
@ -658,14 +671,6 @@ static void emit_depthbuffer(struct brw_context *brw)
if (separate_stencil) {
struct intel_region *region = stencil_mt->region;
/* Note: we can't compute the stencil offset using
* intel_region_get_aligned_offset(), because stencil_region claims
* that the region is untiled; in fact it's W tiled.
*/
uint32_t stencil_offset =
(stencil_irb->draw_y & ~tile_mask_y) * region->pitch +
(stencil_irb->draw_x & ~tile_mask_x) * 64;
BEGIN_BATCH(3);
OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
/* The stencil buffer has quirky pitch requirements. From Vol 2a,
@ -676,7 +681,7 @@ static void emit_depthbuffer(struct brw_context *brw)
OUT_BATCH(2 * region->pitch * region->cpp - 1);
OUT_RELOC(region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
stencil_offset);
brw->depthstencil.stencil_offset);
ADVANCE_BATCH();
} else {
BEGIN_BATCH(3);
@ -695,7 +700,7 @@ static void emit_depthbuffer(struct brw_context *brw)
* 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
* when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
*/
if (intel->gen >= 6 || hiz_region) {
if (intel->gen >= 6 || hiz_mt) {
if (intel->gen == 6)
intel_emit_post_sync_nonzero_flush(intel);

View file

@ -38,63 +38,14 @@ static void emit_depthbuffer(struct brw_context *brw)
/* _NEW_BUFFERS */
struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
struct intel_mipmap_tree *depth_mt = NULL,
*stencil_mt = NULL,
*hiz_mt = NULL;
struct intel_mipmap_tree *depth_mt = brw->depthstencil.depth_mt;
struct intel_mipmap_tree *stencil_mt = brw->depthstencil.stencil_mt;
struct intel_mipmap_tree *hiz_mt = brw->depthstencil.hiz_mt;
uint32_t tile_x = brw->depthstencil.tile_x;
uint32_t tile_y = brw->depthstencil.tile_y;
if (drb)
depth_mt = drb->mt;
if (depth_mt)
hiz_mt = depth_mt->hiz_mt;
if (srb) {
stencil_mt = srb->mt;
if (stencil_mt->stencil_mt)
stencil_mt = stencil_mt->stencil_mt;
assert(stencil_mt->format == MESA_FORMAT_S8);
}
uint32_t tile_mask_x, tile_mask_y;
brw_get_depthstencil_tile_masks(depth_mt, stencil_mt,
&tile_mask_x, &tile_mask_y);
/* The intra-tile offsets should already have been forced into agreement by
* gen7_workaround_depthstencil_alignment().
*/
uint32_t tile_x = 0, tile_y = 0;
if (depth_mt) {
tile_x = drb->draw_x & tile_mask_x;
tile_y = drb->draw_y & tile_mask_y;
if (stencil_mt) {
assert((srb->draw_x & tile_mask_x) == tile_x);
assert((srb->draw_y & tile_mask_y) == tile_y);
}
} else if (stencil_mt) {
tile_x = srb->draw_x & tile_mask_x;
tile_y = srb->draw_y & tile_mask_y;
}
/* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
* (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
* Coordinate Offset X/Y":
*
* "The 3 LSBs of both offsets must be zero to ensure correct
* alignment"
*
* This should already have been corrected by
* gen6_workaround_depthstencil_alignment.
*/
WARN_ONCE((tile_x & 7) || (tile_y & 7),
"Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
"Truncating offset, bad rendering may occur.\n");
tile_x &= ~7;
tile_y &= ~7;
/* Gen7 doesn't support packed depth/stencil */
assert(stencil_mt == NULL || depth_mt != stencil_mt);
/* Gen7 only supports separate stencil */
assert(!stencil_mt || stencil_mt->format == MESA_FORMAT_S8);
assert(!depth_mt || !_mesa_is_format_packed_depth_stencil(depth_mt->format));
intel_emit_depth_stall_flushes(intel);
@ -126,12 +77,6 @@ static void emit_depthbuffer(struct brw_context *brw)
ADVANCE_BATCH();
} else {
struct intel_region *region = depth_mt->region;
uint32_t offset;
offset = intel_region_get_aligned_offset(region,
drb->draw_x & ~tile_mask_x,
drb->draw_y & ~tile_mask_y,
false);
assert(region->tiling == I915_TILING_Y);
@ -146,7 +91,7 @@ static void emit_depthbuffer(struct brw_context *brw)
(BRW_SURFACE_2D << 29));
OUT_RELOC(region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
offset);
brw->depthstencil.depth_offset);
OUT_BATCH((((drb->Base.Base.Width + tile_x) - 1) << 4) |
(((drb->Base.Base.Height + tile_y) - 1) << 18));
OUT_BATCH(0);
@ -162,18 +107,13 @@ static void emit_depthbuffer(struct brw_context *brw)
OUT_BATCH(0);
ADVANCE_BATCH();
} else {
uint32_t hiz_offset =
intel_region_get_aligned_offset(hiz_mt->region,
drb->draw_x & ~tile_mask_x,
(drb->draw_y & ~tile_mask_y) / 2,
false);
BEGIN_BATCH(3);
OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
OUT_BATCH(hiz_mt->region->pitch * hiz_mt->region->cpp - 1);
OUT_RELOC(hiz_mt->region->bo,
I915_GEM_DOMAIN_RENDER,
I915_GEM_DOMAIN_RENDER,
hiz_offset);
brw->depthstencil.hiz_offset);
ADVANCE_BATCH();
}
@ -186,14 +126,6 @@ static void emit_depthbuffer(struct brw_context *brw)
} else {
const int enabled = intel->is_haswell ? HSW_STENCIL_ENABLED : 0;
/* Note: We can't compute the stencil offset using
* intel_region_get_aligned_offset(), because the stencil region claims
* that the region is untiled; in fact it's W tiled.
*/
uint32_t stencil_offset =
(srb->draw_y & ~tile_mask_y) * stencil_mt->region->pitch +
(srb->draw_x & ~tile_mask_x) * 64;
BEGIN_BATCH(3);
OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
/* The stencil buffer has quirky pitch requirements. From the Graphics
@ -214,7 +146,7 @@ static void emit_depthbuffer(struct brw_context *brw)
(2 * stencil_mt->region->pitch * stencil_mt->region->cpp - 1));
OUT_RELOC(stencil_mt->region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
stencil_offset);
brw->depthstencil.stencil_offset);
ADVANCE_BATCH();
}