i965: Rewrite the HiZ op

The HiZ op was implemented as a meta-op. This patch reimplements it by
emitting a special HiZ batch. This fixes several known bugs, and likely
a lot of undiscovered ones too.

==== Why the HiZ meta-op needed to die ====

The HiZ op was implemented as a meta-op, which caused lots of trouble. All
other meta-ops occur as a result of some GL call (for example, glClear and
glGenerateMipmap), but the HiZ meta-op was special. It was called in
places that Mesa (in particular, the vbo and swrast modules) did not
expect---and were not prepared for---state changes to occur (for example:
glDraw; glCallList; within glBegin/End blocks; and within
swrast_prepare_render as a result of intel_miptree_map).

In an attempt to work around these unexpected state changes, I added two
hooks in i965:
  - A hook for glDraw, located in brw_predraw_resolve_buffers (which is
    called in the glDraw path). This hook detected if a predraw resolve
    meta-op had occurred, and would hackishly repropagate some GL state
    if necessary. This ensured that the meta-op state changes would not
    intefere with the vbo module's subsequent execution of glDraw.
  - A hook for glBegin, implemented by brwPrepareExecBegin. This hook
    resolved all buffers before entering
    a glBegin/End block, thus preventing an infinitely recurring call to
    vbo_exec_FlushVertices. The vbo module calls vbo_exec_FlushVertices to
    flush its vertex queue in response to GL state changes.

Unfortunately, these hooks were not sufficient. The meta-op state changes
still interacted badly with glPopAttrib (as discovered in bug 44927) and
with swrast rendering (as discovered by debugging gen6's swrast fallback
for glBitmap). I expect there are more undiscovered bugs. Rather than play
whack-a-mole in a minefield, the sane approach is to replace the HiZ
meta-op with something safer.

==== How it was killed ====

This patch consists of several logical components:
  1. Rewrite the HiZ op by replacing function gen6_resolve_slice with
     gen6_hiz_exec and gen7_hiz_exec. The new functions do not call
     a meta-op, but instead manually construct and emit a batch to "draw"
     the HiZ op's rectangle primitive. The new functions alter no GL
     state.
  2. Add fields to brw_context::hiz for the new HiZ op.
  3. Emit a workaround flush when toggling 3DSTATE_VS.VsFunctionEnable.
  4. Kill all dead HiZ code:
     - the function gen6_resolve_slice
     - the dirty flag BRW_NEW_HIZ
     - the dead fields in brw_context::hiz
     - the state packet manipulation triggered by the now removed
       brw_context::hiz::op
     - the meta-op workaround in brw_predraw_resolve_buffers (discussed
       above)
     - the meta-op workaround brwPrepareExecBegin (discussed above)

Note: This is a candidate for the 8.0 branch.
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Acked-by: Paul Berry <stereotype441@gmail.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=43327
Reported-by: xunx.fang@intel.com
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=44927
Reported-by: chao.a.chen@intel.com
Signed-off-by: Chad Versace <chad.versace@linux.intel.com>
This commit is contained in:
Chad Versace 2012-01-26 11:01:36 -08:00
parent d59466279e
commit 7b36c68ba6
19 changed files with 1156 additions and 527 deletions

View file

@ -100,6 +100,7 @@ i965_C_FILES := \
gen7_cc_state.c \
gen7_clip_state.c \
gen7_disable.c \
gen7_hiz.c \
gen7_misc_state.c \
gen7_sampler_state.c \
gen7_sf_state.c \

View file

@ -41,8 +41,6 @@
#include "brw_draw.h"
#include "brw_state.h"
#include "gen6_hiz.h"
#include "intel_fbo.h"
#include "intel_mipmap_tree.h"
#include "intel_regions.h"
@ -57,58 +55,6 @@
* Mesa's Driver Functions
***************************************/
/**
* \brief Prepare for entry into glBegin/glEnd block.
*
* Resolve buffers before entering a glBegin/glEnd block. This is
* necessary to prevent recursive calls to FLUSH_VERTICES.
*
* This resolves the depth buffer of each enabled depth texture and the HiZ
* buffer of the attached depth renderbuffer.
*
* Details
* -------
* When vertices are queued during a glBegin/glEnd block, those vertices must
* be drawn before any rendering state changes. To ensure this, Mesa calls
* FLUSH_VERTICES as a prehook to such state changes. Therefore,
* FLUSH_VERTICES itself cannot change rendering state without falling into a
* recursive trap.
*
* This precludes meta-ops, namely buffer resolves, from occurring while any
* vertices are queued. To prevent that situation, we resolve some buffers on
* entering a glBegin/glEnd
*
* \see brwCleanupExecEnd()
*/
static void brwPrepareExecBegin(struct gl_context *ctx)
{
struct brw_context *brw = brw_context(ctx);
struct intel_context *intel = &brw->intel;
struct intel_renderbuffer *draw_irb;
struct intel_texture_object *tex_obj;
if (!intel->has_hiz) {
/* The context uses no feature that requires buffer resolves. */
return;
}
/* Resolve each enabled texture. */
for (int i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
if (!ctx->Texture.Unit[i]._ReallyEnabled)
continue;
tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
if (!tex_obj || !tex_obj->mt)
continue;
intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt);
}
/* Resolve the attached depth buffer. */
draw_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
if (draw_irb) {
intel_renderbuffer_resolve_hiz(intel, draw_irb);
}
}
static void brwInitDriverFunctions(struct intel_screen *screen,
struct dd_function_table *functions)
{
@ -117,7 +63,6 @@ static void brwInitDriverFunctions(struct intel_screen *screen,
brwInitFragProgFuncs( functions );
brw_init_queryobj_functions(functions);
functions->PrepareExecBegin = brwPrepareExecBegin;
functions->BeginTransformFeedback = brw_begin_transform_feedback;
if (screen->gen >= 7)

View file

@ -119,6 +119,10 @@
#define BRW_MAX_CURBE (32*16)
struct brw_context;
struct brw_instruction;
struct brw_vs_prog_key;
struct brw_wm_prog_key;
struct brw_wm_prog_data;
enum brw_state_id {
BRW_STATE_URB_FENCE,
@ -144,7 +148,6 @@ enum brw_state_id {
BRW_STATE_VS_CONSTBUF,
BRW_STATE_PROGRAM_CACHE,
BRW_STATE_STATE_BASE_ADDRESS,
BRW_STATE_HIZ,
BRW_STATE_SOL_INDICES,
};
@ -174,7 +177,6 @@ enum brw_state_id {
#define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF)
#define BRW_NEW_PROGRAM_CACHE (1 << BRW_STATE_PROGRAM_CACHE)
#define BRW_NEW_STATE_BASE_ADDRESS (1 << BRW_STATE_STATE_BASE_ADDRESS)
#define BRW_NEW_HIZ (1 << BRW_STATE_HIZ)
#define BRW_NEW_SOL_INDICES (1 << BRW_STATE_SOL_INDICES)
struct brw_state_flags {
@ -950,38 +952,18 @@ struct brw_context
int state_batch_count;
/**
* \brief State needed to execute HiZ meta-ops
* \brief State needed to execute HiZ ops.
*
* All fields except \c op are initialized by gen6_hiz_init().
* \see gen6_hiz_init()
* \see gen6_hiz_exec()
*/
struct brw_hiz_state {
/**
* \brief Indicates which HiZ operation is in progress.
/** \brief VBO for rectangle primitive.
*
* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
* - 7.5.3.1 Depth Buffer Clear
* - 7.5.3.2 Depth Buffer Resolve
* - 7.5.3.3 Hierarchical Depth Buffer Resolve
* Rather than using glGenBuffers(), we allocate the VBO directly
* through drm.
*/
enum brw_hiz_op {
BRW_HIZ_OP_NONE = 0,
BRW_HIZ_OP_DEPTH_CLEAR,
BRW_HIZ_OP_DEPTH_RESOLVE,
BRW_HIZ_OP_HIZ_RESOLVE,
} op;
/** \brief Shader state */
struct {
GLuint program;
GLuint position_vbo;
GLint position_location;
} shader;
/** \brief VAO for the rectangle primitive's vertices. */
GLuint vao;
GLuint fbo;
struct gl_renderbuffer *depth_rb;
drm_intel_bo *vertex_bo;
} hiz;
struct brw_sol_state {

View file

@ -126,12 +126,7 @@ static void gen6_set_prim(struct brw_context *brw,
DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode));
if (brw->hiz.op) {
assert(prim->mode == GL_TRIANGLES);
hw_prim = _3DPRIM_RECTLIST;
} else {
hw_prim = prim_to_hw_prim[prim->mode];
}
hw_prim = prim_to_hw_prim[prim->mode];
if (hw_prim != brw->primitive) {
brw->primitive = hw_prim;
@ -307,17 +302,11 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
struct intel_renderbuffer *depth_irb;
struct intel_texture_object *tex_obj;
bool did_resolve = false;
/* Avoid recursive HiZ op. */
if (brw->hiz.op) {
return;
}
/* Resolve the depth buffer's HiZ buffer. */
depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
if (depth_irb && depth_irb->mt) {
did_resolve |= intel_renderbuffer_resolve_hiz(intel, depth_irb);
intel_renderbuffer_resolve_hiz(intel, depth_irb);
}
/* Resolve depth buffer of each enabled depth texture. */
@ -327,33 +316,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
if (!tex_obj || !tex_obj->mt)
continue;
did_resolve |= intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt);
}
if (did_resolve) {
/* Call vbo_bind_array() to synchronize the vbo module's vertex
* attributes to the gl_context's.
*
* Details
* -------
* The vbo module tracks vertex attributes separately from the
* gl_context. Specifically, the vbo module maintins vertex attributes
* in vbo_exec_context::array::inputs, which is synchronized with
* gl_context::Array::ArrayObj::VertexAttrib by vbo_bind_array().
* vbo_draw_arrays() calls vbo_bind_array() to perform the
* synchronization before calling the real draw call,
* vbo_context::draw_arrays.
*
* At this point (after performing a resolve meta-op but before calling
* vbo_bind_array), the gl_context's vertex attributes have been
* restored to their original state (that is, their state before the
* meta-op began), but the vbo module's vertex attribute are those used
* in the last meta-op. Therefore we must manually synchronize the two with
* vbo_bind_array() before continuing with the original draw command.
*/
_mesa_update_state(ctx);
vbo_bind_arrays(ctx);
_mesa_update_state(ctx);
intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt);
}
}
@ -372,9 +335,7 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
struct intel_renderbuffer *depth_irb =
intel_get_renderbuffer(fb, BUFFER_DEPTH);
if (depth_irb &&
ctx->Depth.Mask &&
!brw->hiz.op) {
if (depth_irb && ctx->Depth.Mask) {
intel_renderbuffer_set_needs_depth_resolve(depth_irb);
}
}

View file

@ -372,7 +372,6 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
DEFINE_BIT(BRW_NEW_HIZ),
{0, 0, 0}
};

View file

@ -50,6 +50,7 @@
#include "brw_wm.h"
#include "gen6_hiz.h"
#include "gen7_hiz.h"
#include "glsl/ralloc.h"
@ -70,9 +71,11 @@ static void brw_destroy_context( struct intel_context *intel )
brw_destroy_state(brw);
brw_draw_destroy( brw );
ralloc_free(brw->wm.compile_data);
dri_bo_release(&brw->curbe.curbe_bo);
dri_bo_release(&brw->hiz.vertex_bo);
dri_bo_release(&brw->vs.const_bo);
dri_bo_release(&brw->wm.const_bo);
@ -236,8 +239,15 @@ void brwInitVtbl( struct brw_context *brw )
brw->intel.vtbl.is_hiz_depth_format = brw_is_hiz_depth_format;
if (brw->intel.has_hiz) {
brw->intel.vtbl.resolve_depth_slice = gen6_resolve_depth_slice;
brw->intel.vtbl.resolve_hiz_slice = gen6_resolve_hiz_slice;
if (brw->intel.gen == 7) {
brw->intel.vtbl.resolve_depth_slice = gen7_resolve_depth_slice;
brw->intel.vtbl.resolve_hiz_slice = gen7_resolve_hiz_slice;
} else if (brw->intel.gen == 6) {
brw->intel.vtbl.resolve_depth_slice = gen6_resolve_depth_slice;
brw->intel.vtbl.resolve_hiz_slice = gen6_resolve_hiz_slice;
} else {
assert(0);
}
}
if (brw->intel.gen >= 7) {

View file

@ -67,23 +67,6 @@ upload_clip_state(struct brw_context *brw)
GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
}
if (brw->hiz.op) {
/* HiZ operations emit a rectangle primitive, which requires clipping to
* be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
* Section 1.3 3D Primitives Overview:
* RECTLIST:
* Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
* Mode should be set to a value other than CLIPMODE_NORMAL.
*/
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
return;
}
if (!ctx->Transform.DepthClamp)
depth_clamp = GEN6_CLIP_Z_TEST;
@ -124,8 +107,7 @@ const struct brw_tracked_state gen6_clip_state = {
.dirty = {
.mesa = _NEW_TRANSFORM | _NEW_LIGHT,
.brw = (BRW_NEW_CONTEXT |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_HIZ),
BRW_NEW_FRAGMENT_PROGRAM),
.cache = 0
},
.emit = upload_clip_state,

View file

@ -82,11 +82,7 @@ gen6_upload_depth_stencil_state(struct brw_context *brw)
}
/* _NEW_DEPTH */
if ((ctx->Depth.Test || brw->hiz.op) && depth_irb) {
assert(brw->hiz.op != BRW_HIZ_OP_DEPTH_RESOLVE || ctx->Depth.Test);
assert(brw->hiz.op != BRW_HIZ_OP_HIZ_RESOLVE || !ctx->Depth.Test);
assert(brw->hiz.op != BRW_HIZ_OP_DEPTH_CLEAR || !ctx->Depth.Test);
if (ctx->Depth.Test && depth_irb) {
ds->ds2.depth_test_enable = ctx->Depth.Test;
ds->ds2.depth_test_func = intel_translate_compare_func(ctx->Depth.Func);
ds->ds2.depth_write_enable = ctx->Depth.Mask;
@ -98,8 +94,7 @@ gen6_upload_depth_stencil_state(struct brw_context *brw)
const struct brw_tracked_state gen6_depth_stencil_state = {
.dirty = {
.mesa = _NEW_DEPTH | _NEW_STENCIL | _NEW_BUFFERS,
.brw = (BRW_NEW_BATCH |
BRW_NEW_HIZ),
.brw = BRW_NEW_BATCH,
.cache = 0,
},
.emit = gen6_upload_depth_stencil_state,

View file

@ -21,345 +21,621 @@
* IN THE SOFTWARE.
*/
#include "gen6_hiz.h"
#include <assert.h>
#include "mesa/drivers/common/meta.h"
#include "mesa/main/arrayobj.h"
#include "mesa/main/bufferobj.h"
#include "mesa/main/depth.h"
#include "mesa/main/enable.h"
#include "mesa/main/fbobject.h"
#include "mesa/main/framebuffer.h"
#include "mesa/main/get.h"
#include "mesa/main/renderbuffer.h"
#include "mesa/main/shaderapi.h"
#include "mesa/main/varray.h"
#include "intel_batchbuffer.h"
#include "intel_fbo.h"
#include "intel_mipmap_tree.h"
#include "intel_regions.h"
#include "intel_tex.h"
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_state.h"
static const uint32_t gen6_hiz_meta_save =
/* Disable alpha, depth, and stencil test.
*
* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
* - 7.5.3.1 Depth Buffer Clear
* - 7.5.3.2 Depth Buffer Resolve
* - 7.5.3.3 Hierarchical Depth Buffer Resolve
*/
MESA_META_ALPHA_TEST |
MESA_META_DEPTH_TEST |
MESA_META_STENCIL_TEST |
/* Disable viewport mapping.
*
* From page 11 of the Sandy Bridge PRM, Volume 2, Part 1, Section 1.3
* 3D Primitives Overview:
* RECTLIST:
* Viewport Mapping must be DISABLED (as is typical with the use of
* screen- space coordinates).
*
* We must also manually disable 3DSTATE_SF.Viewport_Transform_Enable.
*/
MESA_META_VIEWPORT |
/* Disable clipping.
*
* From page 11 of the Sandy Bridge PRM, Volume 2, Part 1, Section 1.3
* 3D Primitives Overview:
* Either the CLIP unit should be DISABLED, or the CLIP units Clip
* Mode should be set to a value other than CLIPMODE_NORMAL.
*/
MESA_META_CLIP |
/* Render a solid rectangle (set 3DSTATE_SF.FrontFace_Fill_Mode).
*
* From page 249 of the Sandy Bridge PRM, Volume 2, Part 1, Section
* 6.4.1.1 3DSTATE_SF, FrontFace_Fill_Mode:
* SOLID: Any triangle or rectangle object found to be front-facing
* is rendered as a solid object. This setting is required when
* (rendering rectangle (RECTLIST) objects.
* Also see field BackFace_Fill_Mode.
*
* Note: MESA_META_RASTERIZAION also disables culling, but that is
* irrelevant. See 3DSTATE_SF.Cull_Mode.
*/
MESA_META_RASTERIZATION |
/* Each HiZ operation uses a vertex shader and VAO. */
MESA_META_SHADER |
MESA_META_VERTEX |
/* Disable scissoring.
*
* Scissoring is disabled for resolves because a resolve operation
* should resolve the entire buffer. Scissoring is disabled for depth
* clears because, if we are performing a partial depth clear, then we
* specify the clear region with the RECTLIST vertices.
*/
MESA_META_SCISSOR |
MESA_META_SELECT_FEEDBACK;
static void
gen6_hiz_get_framebuffer_enum(struct gl_context *ctx,
GLenum *bind_enum,
GLenum *get_enum)
{
if (ctx->Extensions.EXT_framebuffer_blit && ctx->API == API_OPENGL) {
/* Different buffers may be bound to GL_DRAW_FRAMEBUFFER and
* GL_READ_FRAMEBUFFER. Take care to not disrupt the read buffer.
*/
*bind_enum = GL_DRAW_FRAMEBUFFER;
*get_enum = GL_DRAW_FRAMEBUFFER_BINDING;
} else {
/* The enums GL_DRAW_FRAMEBUFFER and GL_READ_FRAMEBUFFER do not exist.
* The bound framebuffer is both the read and draw buffer.
*/
*bind_enum = GL_FRAMEBUFFER;
*get_enum = GL_FRAMEBUFFER_BINDING;
}
}
#include "gen6_hiz.h"
/**
* Initialize static data needed for HiZ operations.
* \name Constants for HiZ VBO
* \{
*
* \see brw_context::hiz::vertex_bo
*/
static void
#define GEN6_HIZ_NUM_VERTICES 3
#define GEN6_HIZ_NUM_VUE_ELEMS 8
#define GEN6_HIZ_VBO_SIZE (GEN6_HIZ_NUM_VERTICES \
* GEN6_HIZ_NUM_VUE_ELEMS \
* sizeof(float))
/** \} */
/**
* \brief Initialize data needed for the HiZ op.
*
* This called when executing the first HiZ op.
* \see brw_context::hiz
*/
void
gen6_hiz_init(struct brw_context *brw)
{
struct gl_context *ctx = &brw->intel.ctx;
struct intel_context *intel = &brw->intel;
struct brw_hiz_state *hiz = &brw->hiz;
GLenum fb_bind_enum, fb_get_enum;
if (hiz->fbo != 0)
return;
hiz->vertex_bo = drm_intel_bo_alloc(intel->bufmgr, "bufferobj",
GEN6_HIZ_VBO_SIZE, /* size */
64); /* alignment */
gen6_hiz_get_framebuffer_enum(ctx, &fb_bind_enum, &fb_get_enum);
if (!hiz->vertex_bo)
_mesa_error(ctx, GL_OUT_OF_MEMORY, "failed to allocate internal VBO");
}
/* Create depthbuffer.
void
gen6_hiz_emit_batch_head(struct brw_context *brw)
{
struct gl_context *ctx = &brw->intel.ctx;
struct intel_context *intel = &brw->intel;
struct brw_hiz_state *hiz = &brw->hiz;
/* To ensure that the batch contains only the resolve, flush the batch
* before beginning and after finishing emitting the resolve packets.
*
* Until glRenderbufferStorage is called, the renderbuffer hash table
* maps the renderbuffer name to a dummy renderbuffer. We need the
* renderbuffer to be registered in the hash table so that framebuffer
* validation succeeds, so we hackishly allocate storage then immediately
* discard it.
* Ideally, we would not need to flush for the resolve op. But, I suspect
* that it's unsafe for CMD_PIPELINE_SELECT to occur multiple times in
* a single batch, and there is no safe way to ensure that other than by
* fencing the resolve with flushes. Ideally, we would just detect if
* a batch is in progress and do the right thing, but that would require
* the ability to *safely* access brw_context::state::dirty::brw
* outside of the brw_upload_state() codepath.
*/
GLuint depth_rb_name;
_mesa_GenRenderbuffersEXT(1, &depth_rb_name);
_mesa_BindRenderbufferEXT(GL_RENDERBUFFER, depth_rb_name);
_mesa_RenderbufferStorageEXT(GL_RENDERBUFFER, GL_DEPTH_COMPONENT, 32, 32);
_mesa_reference_renderbuffer(&hiz->depth_rb,
_mesa_lookup_renderbuffer(ctx, depth_rb_name));
intel_miptree_release(&((struct intel_renderbuffer*) hiz->depth_rb)->mt);
intel_flush(ctx);
/* Setup FBO. */
_mesa_GenFramebuffersEXT(1, &hiz->fbo);
_mesa_BindFramebufferEXT(fb_bind_enum, hiz->fbo);
_mesa_FramebufferRenderbufferEXT(fb_bind_enum,
GL_DEPTH_ATTACHMENT,
GL_RENDERBUFFER,
hiz->depth_rb->Name);
/* CMD_PIPELINE_SELECT
*
* Select the 3D pipeline, as opposed to the media pipeline.
*/
{
BEGIN_BATCH(1);
OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16);
ADVANCE_BATCH();
}
/* Compile vertex shader. */
const char *vs_source =
"attribute vec4 position;\n"
"void main()\n"
"{\n"
" gl_Position = position;\n"
"}\n";
GLuint vs = _mesa_CreateShaderObjectARB(GL_VERTEX_SHADER);
_mesa_ShaderSourceARB(vs, 1, &vs_source, NULL);
_mesa_CompileShaderARB(vs);
/* 3DSTATE_MULTISAMPLE */
{
int length = intel->gen == 7 ? 4 : 3;
/* Compile fragment shader. */
const char *fs_source = "void main() {}";
GLuint fs = _mesa_CreateShaderObjectARB(GL_FRAGMENT_SHADER);
_mesa_ShaderSourceARB(fs, 1, &fs_source, NULL);
_mesa_CompileShaderARB(fs);
BEGIN_BATCH(length);
OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (3 - 2));
OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
MS_NUMSAMPLES_1);
OUT_BATCH(0);
if (length >= 4)
OUT_BATCH(0);
ADVANCE_BATCH();
/* Link and use program. */
hiz->shader.program = _mesa_CreateProgramObjectARB();
_mesa_AttachShader(hiz->shader.program, vs);
_mesa_AttachShader(hiz->shader.program, fs);
_mesa_LinkProgramARB(hiz->shader.program);
_mesa_UseProgramObjectARB(hiz->shader.program);
}
/* Create and bind VAO. */
_mesa_GenVertexArrays(1, &hiz->vao);
_mesa_BindVertexArray(hiz->vao);
/* 3DSTATE_SAMPLE_MASK */
{
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
OUT_BATCH(1);
ADVANCE_BATCH();
}
/* Setup VBO for 'position'. */
hiz->shader.position_location =
_mesa_GetAttribLocationARB(hiz->shader.program, "position");
_mesa_GenBuffersARB(1, &hiz->shader.position_vbo);
_mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, hiz->shader.position_vbo);
_mesa_VertexAttribPointerARB(hiz->shader.position_location,
2, /*components*/
GL_FLOAT,
GL_FALSE, /*normalized?*/
0, /*stride*/
NULL);
_mesa_EnableVertexAttribArrayARB(hiz->shader.position_location);
/* Cleanup. */
_mesa_DeleteShader(vs);
_mesa_DeleteShader(fs);
/* CMD_STATE_BASE_ADDRESS
*
* From the Sandy Bridge PRM, Volume 1, Part 1, Table STATE_BASE_ADDRESS:
* The following commands must be reissued following any change to the
* base addresses:
* 3DSTATE_CC_POINTERS
* 3DSTATE_BINDING_TABLE_POINTERS
* 3DSTATE_SAMPLER_STATE_POINTERS
* 3DSTATE_VIEWPORT_STATE_POINTERS
* MEDIA_STATE_POINTERS
*/
{
BEGIN_BATCH(10);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
OUT_BATCH(1); /* GeneralStateBaseAddressModifyEnable */
/* SurfaceStateBaseAddress */
OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
/* DynamicStateBaseAddress */
OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
OUT_BATCH(1); /* IndirectObjectBaseAddress */
OUT_BATCH(1); /* InstructionBaseAddress */
OUT_BATCH(1); /* GeneralStateUpperBound */
OUT_BATCH(1); /* DynamicStateUpperBound */
OUT_BATCH(1); /* IndirectObjectUpperBound*/
OUT_BATCH(1); /* InstructionAccessUpperBound */
ADVANCE_BATCH();
}
}
/**
* Wrap \c brw->hiz.depth_rb around a miptree.
*
* \see gen6_hiz_teardown_depth_buffer()
*/
static void
gen6_hiz_setup_depth_buffer(struct brw_context *brw,
struct intel_mipmap_tree *mt,
unsigned int level,
unsigned int layer)
void
gen6_hiz_emit_vertices(struct brw_context *brw,
struct intel_mipmap_tree *mt,
unsigned int level,
unsigned int layer)
{
struct gl_renderbuffer *rb = brw->hiz.depth_rb;
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
rb->Format = mt->format;
rb->_BaseFormat = _mesa_get_format_base_format(rb->Format);
rb->InternalFormat = rb->_BaseFormat;
rb->Width = mt->level[level].width;
rb->Height = mt->level[level].height;
irb->mt_level = level;
irb->mt_layer = layer;
intel_miptree_reference(&irb->mt, mt);
intel_renderbuffer_set_draw_offset(irb);
}
/**
* Release the region from \c brw->hiz.depth_rb.
*
* \see gen6_hiz_setup_depth_buffer()
*/
static void
gen6_hiz_teardown_depth_buffer(struct gl_renderbuffer *rb)
{
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
intel_miptree_release(&irb->mt);
}
static void
gen6_resolve_slice(struct intel_context *intel,
struct intel_mipmap_tree *mt,
unsigned int level,
unsigned int layer,
enum brw_hiz_op op)
{
struct gl_context *ctx = &intel->ctx;
struct brw_context *brw = brw_context(ctx);
struct intel_context *intel = &brw->intel;
struct brw_hiz_state *hiz = &brw->hiz;
GLenum fb_bind_enum, fb_get_enum;
/* Do not recurse. */
assert(!brw->hiz.op);
assert(mt->hiz_mt != NULL);
assert(level >= mt->first_level);
assert(level <= mt->last_level);
assert(layer < mt->level[level].depth);
gen6_hiz_get_framebuffer_enum(ctx, &fb_bind_enum, &fb_get_enum);
/* Save state. */
GLint save_drawbuffer;
GLint save_renderbuffer;
_mesa_meta_begin(ctx, gen6_hiz_meta_save);
_mesa_GetIntegerv(fb_get_enum, &save_drawbuffer);
_mesa_GetIntegerv(GL_RENDERBUFFER_BINDING, &save_renderbuffer);
/* Initialize context data for HiZ operations. */
gen6_hiz_init(brw);
/* Set depth state. */
if (!ctx->Depth.Mask) {
/* This sets 3DSTATE_WM.Depth_Buffer_Write_Enable. */
_mesa_DepthMask(GL_TRUE);
}
if (op == BRW_HIZ_OP_DEPTH_RESOLVE) {
_mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE);
_mesa_DepthFunc(GL_NEVER);
}
/* Setup FBO. */
gen6_hiz_setup_depth_buffer(brw, mt, level, layer);
_mesa_BindFramebufferEXT(fb_bind_enum, hiz->fbo);
/* A rectangle primitive (3DPRIM_RECTLIST) consists of only three vertices.
* The vertices reside in screen space with DirectX coordinates (this is,
* (0, 0) is the upper left corner).
/* Setup VBO for the rectangle primitive..
*
* A rectangle primitive (3DPRIM_RECTLIST) consists of only three
* vertices. The vertices reside in screen space with DirectX coordinates
* (that is, (0, 0) is the upper left corner).
*
* v2 ------ implied
* | |
* | |
* v0 ----- v1
*/
const int width = hiz->depth_rb->Width;
const int height = hiz->depth_rb->Height;
const GLfloat positions[] = {
0, height,
width, height,
0, 0,
};
/* Setup program and vertex attributes. */
_mesa_UseProgramObjectARB(hiz->shader.program);
_mesa_BindVertexArray(hiz->vao);
_mesa_BindBufferARB(GL_ARRAY_BUFFER, hiz->shader.position_vbo);
_mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(positions), positions,
GL_DYNAMIC_DRAW_ARB);
/* Execute the HiZ operation. */
brw->hiz.op = op;
brw->state.dirty.brw |= BRW_NEW_HIZ;
_mesa_DrawArrays(GL_TRIANGLES, 0, 3);
brw->state.dirty.brw |= BRW_NEW_HIZ;
brw->hiz.op = BRW_HIZ_OP_NONE;
/* Restore state.
*
* The order in which state is restored is significant. The draw buffer
* used for the HiZ op has no stencil buffer, and glStencilFunc() clamps
* the stencil reference value to the range allowed by the draw buffer's
* number of stencil bits. So, the draw buffer binding must be restored
* before the stencil state, or else the stencil ref will be clamped to 0.
* Since the VS is disabled, the clipper loads each VUE directly from
* the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
* 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
* dw0: Reserved, MBZ.
* dw1: Render Target Array Index. The HiZ op does not use indexed
* vertices, so set the dword to 0.
* dw2: Viewport Index. The HiZ op disables viewport mapping and
* scissoring, so set the dword to 0.
* dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so
* set the dword to 0.
* dw4: Vertex Position X.
* dw5: Vertex Position Y.
* dw6: Vertex Position Z.
* dw7: Vertex Position W.
*
* For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
* "Vertex URB Entry (VUE) Formats".
*/
gen6_hiz_teardown_depth_buffer(hiz->depth_rb);
_mesa_BindRenderbufferEXT(GL_RENDERBUFFER, save_renderbuffer);
_mesa_BindFramebufferEXT(fb_bind_enum, save_drawbuffer);
_mesa_meta_end(ctx);
{
const int width = mt->level[level].width;
const int height = mt->level[level].height;
const float vertices[GEN6_HIZ_VBO_SIZE] = {
/* v0 */ 0, 0, 0, 0, 0, height, 0, 1,
/* v1 */ 0, 0, 0, 0, width, height, 0, 1,
/* v2 */ 0, 0, 0, 0, 0, 0, 0, 1,
};
drm_intel_bo_subdata(hiz->vertex_bo, 0, GEN6_HIZ_VBO_SIZE, vertices);
}
/* 3DSTATE_VERTEX_BUFFERS */
{
const int num_buffers = 1;
const int batch_length = 1 + 4 * num_buffers;
uint32_t dw0 = GEN6_VB0_ACCESS_VERTEXDATA |
(GEN6_HIZ_NUM_VUE_ELEMS * sizeof(float)) << BRW_VB0_PITCH_SHIFT;
if (intel->gen >= 7)
dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
BEGIN_BATCH(batch_length);
OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2));
OUT_BATCH(dw0);
/* start address */
OUT_RELOC(hiz->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
/* end address */
OUT_RELOC(hiz->vertex_bo, I915_GEM_DOMAIN_VERTEX,
0, hiz->vertex_bo->size - 1);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_VERTEX_ELEMENTS
*
* Fetch dwords 0 - 7 from each VUE. See the comments above where
* hiz->vertex_bo is filled with data.
*/
{
const int num_elements = 2;
const int batch_length = 1 + 2 * num_elements;
BEGIN_BATCH(batch_length);
OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2));
/* Element 0 */
OUT_BATCH(GEN6_VE0_VALID |
BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
0 << BRW_VE0_SRC_OFFSET_SHIFT);
OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
/* Element 1 */
OUT_BATCH(GEN6_VE0_VALID |
BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
16 << BRW_VE0_SRC_OFFSET_SHIFT);
OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
ADVANCE_BATCH();
}
}
/**
* \brief Execute a HiZ op on a miptree slice.
*
* To execute the HiZ op, this function manually constructs and emits a batch
* to "draw" the HiZ op's rectangle primitive. The batchbuffer is flushed
* before constructing and after emitting the batch.
*
* This function alters no GL state.
*
* For an overview of HiZ ops, see the following sections of the Sandy Bridge
* PRM, Volume 1, Part 2:
* - 7.5.3.1 Depth Buffer Clear
* - 7.5.3.2 Depth Buffer Resolve
* - 7.5.3.3 Hierarchical Depth Buffer Resolve
*/
static void
gen6_hiz_exec(struct intel_context *intel,
struct intel_mipmap_tree *mt,
unsigned int level,
unsigned int layer,
enum gen6_hiz_op op)
{
struct gl_context *ctx = &intel->ctx;
struct brw_context *brw = brw_context(ctx);
struct brw_hiz_state *hiz = &brw->hiz;
assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */
assert(mt->hiz_mt != NULL);
intel_miptree_check_level_layer(mt, level, layer);
if (hiz->vertex_bo == NULL)
gen6_hiz_init(brw);
if (hiz->vertex_bo == NULL) {
/* Ouch. Give up. */
return;
}
gen6_hiz_emit_batch_head(brw);
gen6_hiz_emit_vertices(brw, mt, level, layer);
/* 3DSTATE_URB
*
* Assign the entire URB to the VS. Even though the VS disabled, URB space
* is still needed because the clipper loads the VUE's from the URB. From
* the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
* Dword 1.15:0 "VS Number of URB Entries":
* This field is always used (even if VS Function Enable is DISABLED).
*
* The warning below appears in the PRM (Section 3DSTATE_URB), but we can
* safely ignore it because this batch contains only one draw call.
* Because of URB corruption caused by allocating a previous GS unit
* URB entry to the VS unit, software is required to send a GS NULL
* Fence (Send URB fence with VS URB size == 1 and GS URB size == 0)
* plus a dummy DRAW call before any case where VS will be taking over
* GS URB space.
*/
{
BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
OUT_BATCH(brw->urb.max_vs_entries << GEN6_URB_VS_ENTRIES_SHIFT);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_CC_STATE_POINTERS
*
* The pointer offsets are relative to
* CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
*
* The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
*/
{
uint32_t depthstencil_offset;
gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset);
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2));
OUT_BATCH(1); /* BLEND_STATE offset */
OUT_BATCH(depthstencil_offset | 1); /* DEPTH_STENCIL_STATE offset */
OUT_BATCH(1); /* COLOR_CALC_STATE offset */
ADVANCE_BATCH();
}
/* 3DSTATE_VS
*
* Disable vertex shader.
*/
{
/* From the BSpec, Volume 2a, Part 3 "Vertex Shader", Section
* 3DSTATE_VS, Dword 5.0 "VS Function Enable":
* [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS
* command that causes the VS Function Enable to toggle. Pipeline
* flush can be executed by sending a PIPE_CONTROL command with CS
* stall bit set and a post sync operation.
*/
intel_emit_post_sync_nonzero_flush(intel);
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_GS
*
* Disable the geometry shader.
*/
{
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_CLIP
*
* Disable the clipper.
*
* The HiZ op emits a rectangle primitive, which requires clipping to
* be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
* Section 1.3 "3D Primitives Overview":
* RECTLIST:
* Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
* Mode should be set to a value other than CLIPMODE_NORMAL.
*
* Also disable perspective divide. This doesn't change the clipper's
* output, but does spare a few electrons.
*/
{
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_SF
*
* Disable ViewportTransformEnable (dw2.1)
*
* From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
* Primitives Overview":
* RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
* use of screen- space coordinates).
*
* A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
* and BackFaceFillMode (dw2.5:6) to SOLID(0).
*
* From the Sandy Bridge PRM, Volume 2, Part 1, Section
* 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
* SOLID: Any triangle or rectangle object found to be front-facing
* is rendered as a solid object. This setting is required when
* (rendering rectangle (RECTLIST) objects.
*/
{
BEGIN_BATCH(20);
OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));
OUT_BATCH((1 - 1) << GEN6_SF_NUM_OUTPUTS_SHIFT | /* only position */
1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
0 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
for (int i = 0; i < 18; ++i)
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_WM
*
* Disable thread dispatch (dw5.19) and enable the HiZ op.
*
* Even though thread dispatch is disabled, max threads (dw5.25:31) must be
* nonzero to prevent the GPU from hanging. See the valid ranges in the
* BSpec, Volume 2a.11 Windower, Section 3DSTATE_WM, Dword 5.25:31
* "Maximum Number Of Threads".
*/
{
uint32_t dw4 = 0;
switch (op) {
case GEN6_HIZ_OP_DEPTH_CLEAR:
assert(!"not implemented");
dw4 |= GEN6_WM_DEPTH_CLEAR;
break;
case GEN6_HIZ_OP_DEPTH_RESOLVE:
dw4 |= GEN6_WM_DEPTH_RESOLVE;
break;
case GEN6_HIZ_OP_HIZ_RESOLVE:
dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
break;
default:
assert(0);
break;
}
BEGIN_BATCH(9);
OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(dw4);
OUT_BATCH((brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
OUT_BATCH((1 - 1) << GEN6_WM_NUM_SF_OUTPUTS_SHIFT); /* only position */
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_DEPTH_BUFFER */
{
uint32_t width = mt->level[level].width;
uint32_t height = mt->level[level].height;
uint32_t tile_x;
uint32_t tile_y;
uint32_t offset;
{
/* Construct a dummy renderbuffer just to extract tile offsets. */
struct intel_renderbuffer rb;
rb.mt = mt;
rb.mt_level = level;
rb.mt_layer = layer;
intel_renderbuffer_set_draw_offset(&rb);
offset = intel_renderbuffer_tile_offsets(&rb, &tile_x, &tile_y);
}
uint32_t format;
switch (mt->format) {
case MESA_FORMAT_Z16: format = BRW_DEPTHFORMAT_D16_UNORM; break;
case MESA_FORMAT_Z32_FLOAT: format = BRW_DEPTHFORMAT_D32_FLOAT; break;
case MESA_FORMAT_X8_Z24: format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break;
default: assert(0); break;
}
intel_emit_post_sync_nonzero_flush(intel);
intel_emit_depth_stall_flushes(intel);
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) |
format << 18 |
1 << 21 | /* separate stencil enable */
1 << 22 | /* hiz enable */
BRW_TILEWALK_YMAJOR << 26 |
1 << 27 | /* y-tiled */
BRW_SURFACE_2D << 29);
OUT_RELOC(mt->region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
offset);
OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 |
(width + tile_x - 1) << 6 |
(height + tile_y - 1) << 19);
OUT_BATCH(0);
OUT_BATCH(tile_x |
tile_y << 16);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_HIER_DEPTH_BUFFER */
{
struct intel_region *hiz_region = mt->hiz_mt->region;
BEGIN_BATCH(3);
OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
OUT_RELOC(hiz_region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
0);
ADVANCE_BATCH();
}
/* 3DSTATE_STENCIL_BUFFER */
{
BEGIN_BATCH(3);
OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_CLEAR_PARAMS
*
* From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
* [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
* packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
*/
{
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_DRAWING_RECTANGLE */
{
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(((mt->level[level].width - 1) & 0xffff) |
((mt->level[level].height - 1) << 16));
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DPRIMITIVE */
{
BEGIN_BATCH(6);
OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
_3DPRIM_RECTLIST << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
OUT_BATCH(3); /* vertex count per instance */
OUT_BATCH(0);
OUT_BATCH(1); /* instance count */
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* See comments above at first invocation of intel_flush() in
* gen6_hiz_emit_batch_head().
*/
intel_flush(ctx);
/* Be safe. */
brw->state.dirty.brw = ~0;
brw->state.dirty.cache = ~0;
}
/**
* \param out_offset is relative to
* CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
*/
void
gen6_hiz_emit_depth_stencil_state(struct brw_context *brw,
enum gen6_hiz_op op,
uint32_t *out_offset)
{
struct gen6_depth_stencil_state *state;
state = brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE,
sizeof(*state), 64,
out_offset);
memset(state, 0, sizeof(*state));
/* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
* - 7.5.3.1 Depth Buffer Clear
* - 7.5.3.2 Depth Buffer Resolve
* - 7.5.3.3 Hierarchical Depth Buffer Resolve
*/
state->ds2.depth_write_enable = 1;
if (op == GEN6_HIZ_OP_DEPTH_RESOLVE) {
state->ds2.depth_test_enable = 1;
state->ds2.depth_test_func = COMPAREFUNC_NEVER;
}
}
/** \see intel_context::vtbl::resolve_hiz_slice */
void
gen6_resolve_hiz_slice(struct intel_context *intel,
struct intel_mipmap_tree *mt,
uint32_t level,
uint32_t layer)
{
gen6_resolve_slice(intel, mt, level, layer, BRW_HIZ_OP_HIZ_RESOLVE);
gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
}
/** \see intel_context::vtbl::resolve_depth_slice */
void
gen6_resolve_depth_slice(struct intel_context *intel,
struct intel_mipmap_tree *mt,
uint32_t level,
uint32_t layer)
{
gen6_resolve_slice(intel, mt, level, layer, BRW_HIZ_OP_DEPTH_RESOLVE);
gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
}

View file

@ -28,6 +28,44 @@
struct intel_context;
struct intel_mipmap_tree;
/**
* For an overview of the HiZ operations, see the following sections of the
* Sandy Bridge PRM, Volume 1, Part2:
* - 7.5.3.1 Depth Buffer Clear
* - 7.5.3.2 Depth Buffer Resolve
* - 7.5.3.3 Hierarchical Depth Buffer Resolve
*/
enum gen6_hiz_op {
GEN6_HIZ_OP_DEPTH_CLEAR,
GEN6_HIZ_OP_DEPTH_RESOLVE,
GEN6_HIZ_OP_HIZ_RESOLVE,
};
/**
* \name HiZ internals
* \{
*
* Used internally by gen6_hiz_exec() and gen7_hiz_exec().
*/
void
gen6_hiz_init(struct brw_context *brw);
void
gen6_hiz_emit_batch_head(struct brw_context *brw);
void
gen6_hiz_emit_vertices(struct brw_context *brw,
struct intel_mipmap_tree *mt,
unsigned int level,
unsigned int layer);
void
gen6_hiz_emit_depth_stencil_state(struct brw_context *brw,
enum gen6_hiz_op op,
uint32_t *out_offset);
/** \} */
void
gen6_resolve_hiz_slice(struct intel_context *intel,
struct intel_mipmap_tree *mt,

View file

@ -149,17 +149,8 @@ upload_sf_state(struct brw_context *brw)
urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
dw2 = GEN6_SF_STATISTICS_ENABLE;
/* Enable viewport transform only if no HiZ operation is progress
*
* From page 11 of the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
* Primitives Overview":
* RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
* use of screen- space coordinates).
*/
if (!brw->hiz.op)
dw2 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
dw2 = GEN6_SF_STATISTICS_ENABLE |
GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
dw3 = 0;
dw4 = 0;
@ -354,8 +345,7 @@ const struct brw_tracked_state gen6_sf_state = {
_NEW_POINT |
_NEW_TRANSFORM),
.brw = (BRW_NEW_CONTEXT |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_HIZ),
BRW_NEW_FRAGMENT_PROGRAM),
.cache = CACHE_NEW_VS_PROG
},
.emit = upload_sf_state,

View file

@ -133,6 +133,15 @@ upload_vs_state(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
uint32_t floating_point_mode = 0;
/* From the BSpec, Volume 2a, Part 3 "Vertex Shader", Section
* 3DSTATE_VS, Dword 5.0 "VS Function Enable":
* [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS
* command that causes the VS Function Enable to toggle. Pipeline
* flush can be executed by sending a PIPE_CONTROL command with CS
* stall bit set and a post sync operation.
*/
intel_emit_post_sync_nonzero_flush(intel);
if (brw->vs.push_const_size == 0) {
/* Disable the push constant buffers. */
BEGIN_BATCH(5);

View file

@ -149,23 +149,6 @@ upload_wm_state(struct brw_context *brw)
dw4 |= (brw->wm.prog_data->first_curbe_grf_16 <<
GEN6_WM_DISPATCH_START_GRF_SHIFT_2);
switch (brw->hiz.op) {
case BRW_HIZ_OP_NONE:
break;
case BRW_HIZ_OP_DEPTH_CLEAR:
dw4 |= GEN6_WM_DEPTH_CLEAR;
break;
case BRW_HIZ_OP_DEPTH_RESOLVE:
dw4 |= GEN6_WM_DEPTH_RESOLVE;
break;
case BRW_HIZ_OP_HIZ_RESOLVE:
dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
break;
default:
assert(0);
break;
}
dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
/* CACHE_NEW_WM_PROG */
@ -233,8 +216,7 @@ const struct brw_tracked_state gen6_wm_state = {
_NEW_PROGRAM_CONSTANTS |
_NEW_POLYGON),
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_BATCH |
BRW_NEW_HIZ),
BRW_NEW_BATCH),
.cache = (CACHE_NEW_SAMPLER |
CACHE_NEW_WM_PROG)
},

View file

@ -39,23 +39,6 @@ upload_clip_state(struct brw_context *brw)
/* BRW_NEW_FRAGMENT_PROGRAM */
const struct gl_fragment_program *fprog = brw->fragment_program;
if (brw->hiz.op) {
/* HiZ operations emit a rectangle primitive, which requires clipping to
* be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
* Section 1.3 3D Primitives Overview:
* RECTLIST:
* Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
* Mode should be set to a value other than CLIPMODE_NORMAL.
*/
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
return;
}
/* _NEW_BUFFERS */
bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
@ -133,8 +116,7 @@ const struct brw_tracked_state gen7_clip_state = {
_NEW_LIGHT |
_NEW_TRANSFORM),
.brw = (BRW_NEW_CONTEXT |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_HIZ),
BRW_NEW_FRAGMENT_PROGRAM),
.cache = 0
},
.emit = upload_clip_state,

View file

@ -0,0 +1,463 @@
/*
* Copyright © 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include "intel_batchbuffer.h"
#include "intel_fbo.h"
#include "intel_mipmap_tree.h"
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_state.h"
#include "gen6_hiz.h"
#include "gen7_hiz.h"
/**
* \copydoc gen6_hiz_exec()
*/
static void
gen7_hiz_exec(struct intel_context *intel,
struct intel_mipmap_tree *mt,
unsigned int level,
unsigned int layer,
enum gen6_hiz_op op)
{
struct gl_context *ctx = &intel->ctx;
struct brw_context *brw = brw_context(ctx);
struct brw_hiz_state *hiz = &brw->hiz;
assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */
assert(mt->hiz_mt != NULL);
intel_miptree_check_level_layer(mt, level, layer);
if (hiz->vertex_bo == NULL)
gen6_hiz_init(brw);
if (hiz->vertex_bo == NULL) {
/* Ouch. Give up. */
return;
}
uint32_t depth_format;
switch (mt->format) {
case MESA_FORMAT_Z16: depth_format = BRW_DEPTHFORMAT_D16_UNORM; break;
case MESA_FORMAT_Z32_FLOAT: depth_format = BRW_DEPTHFORMAT_D32_FLOAT; break;
case MESA_FORMAT_X8_Z24: depth_format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break;
default: assert(0); break;
}
gen6_hiz_emit_batch_head(brw);
gen6_hiz_emit_vertices(brw, mt, level, layer);
/* 3DSTATE_URB_VS
* 3DSTATE_URB_HS
* 3DSTATE_URB_DS
* 3DSTATE_URB_GS
*
* If the 3DSTATE_URB_VS is emitted, than the others must be also. From the
* BSpec, Volume 2a "3D Pipeline Overview", Section 1.7.1 3DSTATE_URB_VS:
* 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
* programmed in order for the programming of this state to be
* valid.
*/
{
/* The minimum valid value is 32. See 3DSTATE_URB_VS,
* Dword 1.15:0 "VS Number of URB Entries".
*/
int num_vs_entries = 32;
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2));
OUT_BATCH(1 << GEN7_URB_ENTRY_SIZE_SHIFT |
0 << GEN7_URB_STARTING_ADDRESS_SHIFT |
num_vs_entries);
ADVANCE_BATCH();
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2));
OUT_BATCH(0);
ADVANCE_BATCH();
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2));
OUT_BATCH(0);
ADVANCE_BATCH();
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_URB_DS << 16 | (2 - 2));
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS
*
* The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
*/
{
uint32_t depthstencil_offset;
gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset);
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2));
OUT_BATCH(depthstencil_offset | 1);
ADVANCE_BATCH();
}
/* 3DSTATE_VS
*
* Disable vertex shader.
*/
{
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_HS
*
* Disable the hull shader.
*/
{
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_TE
*
* Disable the tesselation engine.
*/
{
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_DS
*
* Disable the domain shader.
*/
{
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_DS << 16 | (6));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_GS
*
* Disable the geometry shader.
*/
{
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_STREAMOUT
*
* Disable streamout.
*/
{
BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_CLIP
*
* Disable the clipper.
*
* The HiZ op emits a rectangle primitive, which requires clipping to
* be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
* Section 1.3 "3D Primitives Overview":
* RECTLIST:
* Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
* Mode should be set to a value other than CLIPMODE_NORMAL.
*
* Also disable perspective divide. This doesn't change the clipper's
* output, but does spare a few electrons.
*/
{
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_SF
*
* Disable ViewportTransformEnable (dw1.1)
*
* From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
* Primitives Overview":
* RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
* use of screen- space coordinates).
*
* A solid rectangle must be rendered, so set FrontFaceFillMode (dw1.6:5)
* and BackFaceFillMode (dw1.4:3) to SOLID(0).
*
* From the Sandy Bridge PRM, Volume 2, Part 1, Section
* 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
* SOLID: Any triangle or rectangle object found to be front-facing
* is rendered as a solid object. This setting is required when
* (rendering rectangle (RECTLIST) objects.
*/
{
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2));
OUT_BATCH(depth_format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_SBE */
{
BEGIN_BATCH(14);
OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2));
OUT_BATCH((1 - 1) << GEN7_SBE_NUM_OUTPUTS_SHIFT | /* only position */
1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
for (int i = 0; i < 12; ++i)
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_WM
*
* Disable PS thread dispatch (dw1.29) and enable the HiZ op.
*/
{
uint32_t dw1 = 0;
switch (op) {
case GEN6_HIZ_OP_DEPTH_CLEAR:
assert(!"not implemented");
dw1 |= GEN7_WM_DEPTH_CLEAR;
break;
case GEN6_HIZ_OP_DEPTH_RESOLVE:
dw1 |= GEN7_WM_DEPTH_RESOLVE;
break;
case GEN6_HIZ_OP_HIZ_RESOLVE:
dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
break;
default:
assert(0);
break;
}
BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
OUT_BATCH(dw1);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_PS
*
* Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite
* that, thread dispatch info must still be specified.
* - Maximum Number of Threads (dw4.24:31) must be nonzero, as the BSpec
* states that the valid range for this field is [0x3, 0x2f].
* - A dispatch mode must be given; that is, at least one of the
* "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was
* discovered through simulator error messages.
*/
{
BEGIN_BATCH(8);
OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH((brw->max_wm_threads - 1) << GEN7_PS_MAX_THREADS_SHIFT);
OUT_BATCH(GEN7_PS_32_DISPATCH_ENABLE);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_DEPTH_BUFFER */
{
uint32_t width = mt->level[level].width;
uint32_t height = mt->level[level].height;
uint32_t tile_x;
uint32_t tile_y;
uint32_t offset;
{
/* Construct a dummy renderbuffer just to extract tile offsets. */
struct intel_renderbuffer rb;
rb.mt = mt;
rb.mt_level = level;
rb.mt_layer = layer;
intel_renderbuffer_set_draw_offset(&rb);
offset = intel_renderbuffer_tile_offsets(&rb, &tile_x, &tile_y);
}
intel_emit_depth_stall_flushes(intel);
BEGIN_BATCH(7);
OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) |
depth_format << 18 |
1 << 22 | /* hiz enable */
1 << 28 | /* depth write */
BRW_SURFACE_2D << 29);
OUT_RELOC(mt->region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
offset);
OUT_BATCH((width + tile_x - 1) << 4 |
(height + tile_y - 1) << 18);
OUT_BATCH(0);
OUT_BATCH(tile_x |
tile_y << 16);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_HIER_DEPTH_BUFFER */
{
struct intel_region *hiz_region = mt->hiz_mt->region;
BEGIN_BATCH(3);
OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
OUT_RELOC(hiz_region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
0);
ADVANCE_BATCH();
}
/* 3DSTATE_STENCIL_BUFFER */
{
BEGIN_BATCH(3);
OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_CLEAR_PARAMS
*
* From the BSpec, Volume 2a.11 Windower, Section 1.5.6.3.2
* 3DSTATE_CLEAR_PARAMS:
* [DevIVB] 3DSTATE_CLEAR_PARAMS must always be programmed in the along
* with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER,
* 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER).
*/
{
BEGIN_BATCH(3);
OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_DRAWING_RECTANGLE */
{
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(((mt->level[level].width - 1) & 0xffff) |
((mt->level[level].height - 1) << 16));
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DPRIMITIVE */
{
BEGIN_BATCH(7);
OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL |
_3DPRIM_RECTLIST);
OUT_BATCH(3); /* vertex count per instance */
OUT_BATCH(0);
OUT_BATCH(1); /* instance count */
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* See comments above at first invocation of intel_flush() in
* gen6_hiz_emit_batch_head().
*/
intel_flush(ctx);
/* Be safe. */
brw->state.dirty.brw = ~0;
brw->state.dirty.cache = ~0;
}
/** \copydoc gen6_resolve_hiz_slice() */
void
gen7_resolve_hiz_slice(struct intel_context *intel,
struct intel_mipmap_tree *mt,
uint32_t level,
uint32_t layer)
{
gen7_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
}
/** \copydoc gen6_resolve_depth_slice() */
void
gen7_resolve_depth_slice(struct intel_context *intel,
struct intel_mipmap_tree *mt,
uint32_t level,
uint32_t layer)
{
gen7_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
}

View file

@ -0,0 +1,43 @@
/*
* Copyright © 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#pragma once
#include <stdint.h>
struct intel_context;
struct intel_mipmap_tree;
/** \copydoc gen6_resolve_hiz_slice() */
void
gen7_resolve_hiz_slice(struct intel_context *intel,
struct intel_mipmap_tree *mt,
uint32_t level,
uint32_t layer);
/** \copydoc gen6_resolve_depth_slice() */
void
gen7_resolve_depth_slice(struct intel_context *intel,
struct intel_mipmap_tree *mt,
uint32_t level,
uint32_t layer);

View file

@ -149,8 +149,7 @@ const struct brw_tracked_state gen7_sbe_state = {
_NEW_PROGRAM |
_NEW_TRANSFORM),
.brw = (BRW_NEW_CONTEXT |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_HIZ),
BRW_NEW_FRAGMENT_PROGRAM),
.cache = CACHE_NEW_VS_PROG
},
.emit = upload_sbe_state,
@ -166,17 +165,8 @@ upload_sf_state(struct brw_context *brw)
/* _NEW_BUFFERS */
bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
dw1 = GEN6_SF_STATISTICS_ENABLE;
/* Enable viewport transform only if no HiZ operation is progress
*
* From page 11 of the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
* Primitives Overview":
* RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
* use of screen- space coordinates).
*/
if (!brw->hiz.op)
dw1 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
dw1 = GEN6_SF_STATISTICS_ENABLE |
GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
/* _NEW_BUFFERS */
dw1 |= (brw_depthbuffer_format(brw) << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
@ -310,8 +300,7 @@ const struct brw_tracked_state gen7_sf_state = {
_NEW_SCISSOR |
_NEW_BUFFERS |
_NEW_POINT),
.brw = (BRW_NEW_CONTEXT |
BRW_NEW_HIZ),
.brw = BRW_NEW_CONTEXT,
.cache = CACHE_NEW_VS_PROG
},
.emit = upload_sf_state,

View file

@ -49,23 +49,6 @@ upload_wm_state(struct brw_context *brw)
dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0;
dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5;
switch (brw->hiz.op) {
case BRW_HIZ_OP_NONE:
break;
case BRW_HIZ_OP_DEPTH_CLEAR:
dw1 |= GEN7_WM_DEPTH_CLEAR;
break;
case BRW_HIZ_OP_DEPTH_RESOLVE:
dw1 |= GEN7_WM_DEPTH_RESOLVE;
break;
case BRW_HIZ_OP_HIZ_RESOLVE:
dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
break;
default:
assert(0);
break;
}
/* _NEW_LINE */
if (ctx->Line.StippleFlag)
dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;
@ -106,7 +89,6 @@ const struct brw_tracked_state gen7_wm_state = {
.mesa = (_NEW_LINE | _NEW_LIGHT | _NEW_POLYGON |
_NEW_COLOR | _NEW_BUFFERS),
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_HIZ |
BRW_NEW_BATCH),
.cache = 0,
},

View file