From e1f9820b47e3f124c49cd2ab4e09328e0cc3e638 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 26 Jan 2012 11:01:36 -0800 Subject: [PATCH] i965: Rewrite the HiZ op This is a combination of 4 commits. The first commit rewrites the HiZ op, and remaining three fix bugs introduced by the rewrite. ======== commit 1 ======== i965: Rewrite the HiZ op The HiZ op was implemented as a meta-op. This patch reimplements it by emitting a special HiZ batch. This fixes several known bugs, and likely a lot of undiscovered ones too. ==== Why the HiZ meta-op needed to die ==== The HiZ op was implemented as a meta-op, which caused lots of trouble. All other meta-ops occur as a result of some GL call (for example, glClear and glGenerateMipmap), but the HiZ meta-op was special. It was called in places that Mesa (in particular, the vbo and swrast modules) did not expect---and were not prepared for---state changes to occur (for example: glDraw; glCallList; within glBegin/End blocks; and within swrast_prepare_render as a result of intel_miptree_map). In an attempt to work around these unexpected state changes, I added two hooks in i965: - A hook for glDraw, located in brw_predraw_resolve_buffers (which is called in the glDraw path). This hook detected if a predraw resolve meta-op had occurred, and would hackishly repropagate some GL state if necessary. This ensured that the meta-op state changes would not intefere with the vbo module's subsequent execution of glDraw. - A hook for glBegin, implemented by brwPrepareExecBegin. This hook resolved all buffers before entering a glBegin/End block, thus preventing an infinitely recurring call to vbo_exec_FlushVertices. The vbo module calls vbo_exec_FlushVertices to flush its vertex queue in response to GL state changes. Unfortunately, these hooks were not sufficient. The meta-op state changes still interacted badly with glPopAttrib (as discovered in bug 44927) and with swrast rendering (as discovered by debugging gen6's swrast fallback for glBitmap). I expect there are more undiscovered bugs. Rather than play whack-a-mole in a minefield, the sane approach is to replace the HiZ meta-op with something safer. ==== How it was killed ==== This patch consists of several logical components: 1. Rewrite the HiZ op by replacing function gen6_resolve_slice with gen6_hiz_exec and gen7_hiz_exec. The new functions do not call a meta-op, but instead manually construct and emit a batch to "draw" the HiZ op's rectangle primitive. The new functions alter no GL state. 2. Add fields to brw_context::hiz for the new HiZ op. 3. Emit a workaround flush when toggling 3DSTATE_VS.VsFunctionEnable. 4. Kill all dead HiZ code: - the function gen6_resolve_slice - the dirty flag BRW_NEW_HIZ - the dead fields in brw_context::hiz - the state packet manipulation triggered by the now removed brw_context::hiz::op - the meta-op workaround in brw_predraw_resolve_buffers (discussed above) - the meta-op workaround brwPrepareExecBegin (discussed above) Note: This is a candidate for the 8.0 branch. Reviewed-by: Eric Anholt Reviewed-by: Kenneth Graunke Acked-by: Paul Berry Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=43327 Reported-by: xunx.fang@intel.com Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=44927 Reported-by: chao.a.chen@intel.com Signed-off-by: Chad Versace (cherry picked from commit 7b36c68ba6899c7f30fd56b7ef07a78b027771ac) ======== commit 2 ======== i965/gen7: Fix GPU hangs from the HiZ op. The wm max threads is in the same dword as the dispatch enable. The hardware gets super angry if you set max threads to 0, even if you aren't dispatching threads. (cherry picked from commit e5b225afbd581ccf5d61e9d6c566e26e74abe91e) ======== commit 3 ======== i965/gen7: Fix the length of the DS state packet in the HiZ op. Reviewed-by: Kenneth Graunke Reviewed-by: Chad Versace (cherry picked from commit cdcfaa64e3a237517a1e1c913e8ea18d8bc5fa63) ======== commit 4 ======== i965/gen7: Fix the length of the MULTISAMPLE state packet in the HiZ op. Reviewed-by: Kenneth Graunke Reviewed-by: Chad Versace (cherry picked from commit a7750c9fb5db9d76318c35a901f5359bf586cddf) --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_context.c | 55 -- src/mesa/drivers/dri/i965/brw_context.h | 40 +- src/mesa/drivers/dri/i965/brw_draw.c | 47 +- src/mesa/drivers/dri/i965/brw_state_upload.c | 1 - src/mesa/drivers/dri/i965/brw_vtbl.c | 14 +- src/mesa/drivers/dri/i965/gen6_clip_state.c | 20 +- src/mesa/drivers/dri/i965/gen6_depthstencil.c | 9 +- src/mesa/drivers/dri/i965/gen6_hiz.c | 850 ++++++++++++------ src/mesa/drivers/dri/i965/gen6_hiz.h | 38 + src/mesa/drivers/dri/i965/gen6_sf_state.c | 16 +- src/mesa/drivers/dri/i965/gen6_vs_state.c | 9 + src/mesa/drivers/dri/i965/gen6_wm_state.c | 20 +- src/mesa/drivers/dri/i965/gen7_clip_state.c | 20 +- src/mesa/drivers/dri/i965/gen7_hiz.c | 464 ++++++++++ src/mesa/drivers/dri/i965/gen7_hiz.h | 43 + src/mesa/drivers/dri/i965/gen7_sf_state.c | 19 +- src/mesa/drivers/dri/i965/gen7_wm_state.c | 18 - src/mesa/drivers/dri/i965/junk | 0 19 files changed, 1157 insertions(+), 527 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/gen7_hiz.c create mode 100644 src/mesa/drivers/dri/i965/gen7_hiz.h create mode 100644 src/mesa/drivers/dri/i965/junk diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 3eeac6f91b2..dad60763955 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -101,6 +101,7 @@ i965_C_SOURCES := \ gen7_cc_state.c \ gen7_clip_state.c \ gen7_disable.c \ + gen7_hiz.c \ gen7_misc_state.c \ gen7_sampler_state.c \ gen7_sf_state.c \ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index f7bda1d0bd8..a66ccc7ec51 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -41,8 +41,6 @@ #include "brw_draw.h" #include "brw_state.h" -#include "gen6_hiz.h" - #include "intel_fbo.h" #include "intel_mipmap_tree.h" #include "intel_regions.h" @@ -57,58 +55,6 @@ * Mesa's Driver Functions ***************************************/ -/** - * \brief Prepare for entry into glBegin/glEnd block. - * - * Resolve buffers before entering a glBegin/glEnd block. This is - * necessary to prevent recursive calls to FLUSH_VERTICES. - * - * This resolves the depth buffer of each enabled depth texture and the HiZ - * buffer of the attached depth renderbuffer. - * - * Details - * ------- - * When vertices are queued during a glBegin/glEnd block, those vertices must - * be drawn before any rendering state changes. To ensure this, Mesa calls - * FLUSH_VERTICES as a prehook to such state changes. Therefore, - * FLUSH_VERTICES itself cannot change rendering state without falling into a - * recursive trap. - * - * This precludes meta-ops, namely buffer resolves, from occurring while any - * vertices are queued. To prevent that situation, we resolve some buffers on - * entering a glBegin/glEnd - * - * \see brwCleanupExecEnd() - */ -static void brwPrepareExecBegin(struct gl_context *ctx) -{ - struct brw_context *brw = brw_context(ctx); - struct intel_context *intel = &brw->intel; - struct intel_renderbuffer *draw_irb; - struct intel_texture_object *tex_obj; - - if (!intel->has_hiz) { - /* The context uses no feature that requires buffer resolves. */ - return; - } - - /* Resolve each enabled texture. */ - for (int i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { - if (!ctx->Texture.Unit[i]._ReallyEnabled) - continue; - tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); - if (!tex_obj || !tex_obj->mt) - continue; - intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt); - } - - /* Resolve the attached depth buffer. */ - draw_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); - if (draw_irb) { - intel_renderbuffer_resolve_hiz(intel, draw_irb); - } -} - static void brwInitDriverFunctions(struct intel_screen *screen, struct dd_function_table *functions) { @@ -117,7 +63,6 @@ static void brwInitDriverFunctions(struct intel_screen *screen, brwInitFragProgFuncs( functions ); brw_init_queryobj_functions(functions); - functions->PrepareExecBegin = brwPrepareExecBegin; functions->BeginTransformFeedback = brw_begin_transform_feedback; if (screen->gen >= 7) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index c027beff3d6..72e505940e9 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -119,6 +119,10 @@ #define BRW_MAX_CURBE (32*16) struct brw_context; +struct brw_instruction; +struct brw_vs_prog_key; +struct brw_wm_prog_key; +struct brw_wm_prog_data; enum brw_state_id { BRW_STATE_URB_FENCE, @@ -144,7 +148,6 @@ enum brw_state_id { BRW_STATE_VS_CONSTBUF, BRW_STATE_PROGRAM_CACHE, BRW_STATE_STATE_BASE_ADDRESS, - BRW_STATE_HIZ, BRW_STATE_SOL_INDICES, }; @@ -174,7 +177,6 @@ enum brw_state_id { #define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF) #define BRW_NEW_PROGRAM_CACHE (1 << BRW_STATE_PROGRAM_CACHE) #define BRW_NEW_STATE_BASE_ADDRESS (1 << BRW_STATE_STATE_BASE_ADDRESS) -#define BRW_NEW_HIZ (1 << BRW_STATE_HIZ) #define BRW_NEW_SOL_INDICES (1 << BRW_STATE_SOL_INDICES) struct brw_state_flags { @@ -950,38 +952,18 @@ struct brw_context int state_batch_count; /** - * \brief State needed to execute HiZ meta-ops + * \brief State needed to execute HiZ ops. * - * All fields except \c op are initialized by gen6_hiz_init(). + * \see gen6_hiz_init() + * \see gen6_hiz_exec() */ struct brw_hiz_state { - /** - * \brief Indicates which HiZ operation is in progress. + /** \brief VBO for rectangle primitive. * - * See the following sections of the Sandy Bridge PRM, Volume 1, Part2: - * - 7.5.3.1 Depth Buffer Clear - * - 7.5.3.2 Depth Buffer Resolve - * - 7.5.3.3 Hierarchical Depth Buffer Resolve + * Rather than using glGenBuffers(), we allocate the VBO directly + * through drm. */ - enum brw_hiz_op { - BRW_HIZ_OP_NONE = 0, - BRW_HIZ_OP_DEPTH_CLEAR, - BRW_HIZ_OP_DEPTH_RESOLVE, - BRW_HIZ_OP_HIZ_RESOLVE, - } op; - - /** \brief Shader state */ - struct { - GLuint program; - GLuint position_vbo; - GLint position_location; - } shader; - - /** \brief VAO for the rectangle primitive's vertices. */ - GLuint vao; - - GLuint fbo; - struct gl_renderbuffer *depth_rb; + drm_intel_bo *vertex_bo; } hiz; struct brw_sol_state { diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 621195d0229..d6f4653e2d6 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -126,12 +126,7 @@ static void gen6_set_prim(struct brw_context *brw, DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode)); - if (brw->hiz.op) { - assert(prim->mode == GL_TRIANGLES); - hw_prim = _3DPRIM_RECTLIST; - } else { - hw_prim = prim_to_hw_prim[prim->mode]; - } + hw_prim = prim_to_hw_prim[prim->mode]; if (hw_prim != brw->primitive) { brw->primitive = hw_prim; @@ -307,17 +302,11 @@ brw_predraw_resolve_buffers(struct brw_context *brw) struct intel_context *intel = &brw->intel; struct intel_renderbuffer *depth_irb; struct intel_texture_object *tex_obj; - bool did_resolve = false; - - /* Avoid recursive HiZ op. */ - if (brw->hiz.op) { - return; - } /* Resolve the depth buffer's HiZ buffer. */ depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); if (depth_irb && depth_irb->mt) { - did_resolve |= intel_renderbuffer_resolve_hiz(intel, depth_irb); + intel_renderbuffer_resolve_hiz(intel, depth_irb); } /* Resolve depth buffer of each enabled depth texture. */ @@ -327,33 +316,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw) tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); if (!tex_obj || !tex_obj->mt) continue; - did_resolve |= intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt); - } - - if (did_resolve) { - /* Call vbo_bind_array() to synchronize the vbo module's vertex - * attributes to the gl_context's. - * - * Details - * ------- - * The vbo module tracks vertex attributes separately from the - * gl_context. Specifically, the vbo module maintins vertex attributes - * in vbo_exec_context::array::inputs, which is synchronized with - * gl_context::Array::ArrayObj::VertexAttrib by vbo_bind_array(). - * vbo_draw_arrays() calls vbo_bind_array() to perform the - * synchronization before calling the real draw call, - * vbo_context::draw_arrays. - * - * At this point (after performing a resolve meta-op but before calling - * vbo_bind_array), the gl_context's vertex attributes have been - * restored to their original state (that is, their state before the - * meta-op began), but the vbo module's vertex attribute are those used - * in the last meta-op. Therefore we must manually synchronize the two with - * vbo_bind_array() before continuing with the original draw command. - */ - _mesa_update_state(ctx); - vbo_bind_arrays(ctx); - _mesa_update_state(ctx); + intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt); } } @@ -372,9 +335,7 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); - if (depth_irb && - ctx->Depth.Mask && - !brw->hiz.op) { + if (depth_irb && ctx->Depth.Mask) { intel_renderbuffer_set_needs_depth_resolve(depth_irb); } } diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index d071f87693f..f5e6fdc4e16 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -372,7 +372,6 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE), DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE), DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS), - DEFINE_BIT(BRW_NEW_HIZ), {0, 0, 0} }; diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index be975d1c41b..724111c3dfe 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -50,6 +50,7 @@ #include "brw_wm.h" #include "gen6_hiz.h" +#include "gen7_hiz.h" #include "glsl/ralloc.h" @@ -70,9 +71,11 @@ static void brw_destroy_context( struct intel_context *intel ) brw_destroy_state(brw); brw_draw_destroy( brw ); + ralloc_free(brw->wm.compile_data); dri_bo_release(&brw->curbe.curbe_bo); + dri_bo_release(&brw->hiz.vertex_bo); dri_bo_release(&brw->vs.const_bo); dri_bo_release(&brw->wm.const_bo); @@ -236,8 +239,15 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.is_hiz_depth_format = brw_is_hiz_depth_format; if (brw->intel.has_hiz) { - brw->intel.vtbl.resolve_depth_slice = gen6_resolve_depth_slice; - brw->intel.vtbl.resolve_hiz_slice = gen6_resolve_hiz_slice; + if (brw->intel.gen == 7) { + brw->intel.vtbl.resolve_depth_slice = gen7_resolve_depth_slice; + brw->intel.vtbl.resolve_hiz_slice = gen7_resolve_hiz_slice; + } else if (brw->intel.gen == 6) { + brw->intel.vtbl.resolve_depth_slice = gen6_resolve_depth_slice; + brw->intel.vtbl.resolve_hiz_slice = gen6_resolve_hiz_slice; + } else { + assert(0); + } } if (brw->intel.gen >= 7) { diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index d2a5f755726..b3bb8aee3ec 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -67,23 +67,6 @@ upload_clip_state(struct brw_context *brw) GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE; } - if (brw->hiz.op) { - /* HiZ operations emit a rectangle primitive, which requires clipping to - * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 - * Section 1.3 3D Primitives Overview: - * RECTLIST: - * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip - * Mode should be set to a value other than CLIPMODE_NORMAL. - */ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - return; - } - if (!ctx->Transform.DepthClamp) depth_clamp = GEN6_CLIP_Z_TEST; @@ -124,8 +107,7 @@ const struct brw_tracked_state gen6_clip_state = { .dirty = { .mesa = _NEW_TRANSFORM | _NEW_LIGHT, .brw = (BRW_NEW_CONTEXT | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_HIZ), + BRW_NEW_FRAGMENT_PROGRAM), .cache = 0 }, .emit = upload_clip_state, diff --git a/src/mesa/drivers/dri/i965/gen6_depthstencil.c b/src/mesa/drivers/dri/i965/gen6_depthstencil.c index d9f686a259e..4ea517ff336 100644 --- a/src/mesa/drivers/dri/i965/gen6_depthstencil.c +++ b/src/mesa/drivers/dri/i965/gen6_depthstencil.c @@ -82,11 +82,7 @@ gen6_upload_depth_stencil_state(struct brw_context *brw) } /* _NEW_DEPTH */ - if ((ctx->Depth.Test || brw->hiz.op) && depth_irb) { - assert(brw->hiz.op != BRW_HIZ_OP_DEPTH_RESOLVE || ctx->Depth.Test); - assert(brw->hiz.op != BRW_HIZ_OP_HIZ_RESOLVE || !ctx->Depth.Test); - assert(brw->hiz.op != BRW_HIZ_OP_DEPTH_CLEAR || !ctx->Depth.Test); - + if (ctx->Depth.Test && depth_irb) { ds->ds2.depth_test_enable = ctx->Depth.Test; ds->ds2.depth_test_func = intel_translate_compare_func(ctx->Depth.Func); ds->ds2.depth_write_enable = ctx->Depth.Mask; @@ -98,8 +94,7 @@ gen6_upload_depth_stencil_state(struct brw_context *brw) const struct brw_tracked_state gen6_depth_stencil_state = { .dirty = { .mesa = _NEW_DEPTH | _NEW_STENCIL | _NEW_BUFFERS, - .brw = (BRW_NEW_BATCH | - BRW_NEW_HIZ), + .brw = BRW_NEW_BATCH, .cache = 0, }, .emit = gen6_upload_depth_stencil_state, diff --git a/src/mesa/drivers/dri/i965/gen6_hiz.c b/src/mesa/drivers/dri/i965/gen6_hiz.c index d7698ed2aa2..6d46b28297b 100644 --- a/src/mesa/drivers/dri/i965/gen6_hiz.c +++ b/src/mesa/drivers/dri/i965/gen6_hiz.c @@ -21,345 +21,621 @@ * IN THE SOFTWARE. */ -#include "gen6_hiz.h" - #include -#include "mesa/drivers/common/meta.h" - -#include "mesa/main/arrayobj.h" -#include "mesa/main/bufferobj.h" -#include "mesa/main/depth.h" -#include "mesa/main/enable.h" -#include "mesa/main/fbobject.h" -#include "mesa/main/framebuffer.h" -#include "mesa/main/get.h" -#include "mesa/main/renderbuffer.h" -#include "mesa/main/shaderapi.h" -#include "mesa/main/varray.h" - +#include "intel_batchbuffer.h" #include "intel_fbo.h" #include "intel_mipmap_tree.h" -#include "intel_regions.h" -#include "intel_tex.h" #include "brw_context.h" #include "brw_defines.h" +#include "brw_state.h" -static const uint32_t gen6_hiz_meta_save = - - /* Disable alpha, depth, and stencil test. - * - * See the following sections of the Sandy Bridge PRM, Volume 1, Part2: - * - 7.5.3.1 Depth Buffer Clear - * - 7.5.3.2 Depth Buffer Resolve - * - 7.5.3.3 Hierarchical Depth Buffer Resolve - */ - MESA_META_ALPHA_TEST | - MESA_META_DEPTH_TEST | - MESA_META_STENCIL_TEST | - - /* Disable viewport mapping. - * - * From page 11 of the Sandy Bridge PRM, Volume 2, Part 1, Section 1.3 - * 3D Primitives Overview: - * RECTLIST: - * Viewport Mapping must be DISABLED (as is typical with the use of - * screen- space coordinates). - * - * We must also manually disable 3DSTATE_SF.Viewport_Transform_Enable. - */ - MESA_META_VIEWPORT | - - /* Disable clipping. - * - * From page 11 of the Sandy Bridge PRM, Volume 2, Part 1, Section 1.3 - * 3D Primitives Overview: - * Either the CLIP unit should be DISABLED, or the CLIP unit’s Clip - * Mode should be set to a value other than CLIPMODE_NORMAL. - */ - MESA_META_CLIP | - - /* Render a solid rectangle (set 3DSTATE_SF.FrontFace_Fill_Mode). - * - * From page 249 of the Sandy Bridge PRM, Volume 2, Part 1, Section - * 6.4.1.1 3DSTATE_SF, FrontFace_Fill_Mode: - * SOLID: Any triangle or rectangle object found to be front-facing - * is rendered as a solid object. This setting is required when - * (rendering rectangle (RECTLIST) objects. - * Also see field BackFace_Fill_Mode. - * - * Note: MESA_META_RASTERIZAION also disables culling, but that is - * irrelevant. See 3DSTATE_SF.Cull_Mode. - */ - MESA_META_RASTERIZATION | - - /* Each HiZ operation uses a vertex shader and VAO. */ - MESA_META_SHADER | - MESA_META_VERTEX | - - /* Disable scissoring. - * - * Scissoring is disabled for resolves because a resolve operation - * should resolve the entire buffer. Scissoring is disabled for depth - * clears because, if we are performing a partial depth clear, then we - * specify the clear region with the RECTLIST vertices. - */ - MESA_META_SCISSOR | - - MESA_META_SELECT_FEEDBACK; - -static void -gen6_hiz_get_framebuffer_enum(struct gl_context *ctx, - GLenum *bind_enum, - GLenum *get_enum) -{ - if (ctx->Extensions.EXT_framebuffer_blit && ctx->API == API_OPENGL) { - /* Different buffers may be bound to GL_DRAW_FRAMEBUFFER and - * GL_READ_FRAMEBUFFER. Take care to not disrupt the read buffer. - */ - *bind_enum = GL_DRAW_FRAMEBUFFER; - *get_enum = GL_DRAW_FRAMEBUFFER_BINDING; - } else { - /* The enums GL_DRAW_FRAMEBUFFER and GL_READ_FRAMEBUFFER do not exist. - * The bound framebuffer is both the read and draw buffer. - */ - *bind_enum = GL_FRAMEBUFFER; - *get_enum = GL_FRAMEBUFFER_BINDING; - } -} +#include "gen6_hiz.h" /** - * Initialize static data needed for HiZ operations. + * \name Constants for HiZ VBO + * \{ + * + * \see brw_context::hiz::vertex_bo */ -static void +#define GEN6_HIZ_NUM_VERTICES 3 +#define GEN6_HIZ_NUM_VUE_ELEMS 8 +#define GEN6_HIZ_VBO_SIZE (GEN6_HIZ_NUM_VERTICES \ + * GEN6_HIZ_NUM_VUE_ELEMS \ + * sizeof(float)) +/** \} */ + +/** + * \brief Initialize data needed for the HiZ op. + * + * This called when executing the first HiZ op. + * \see brw_context::hiz + */ +void gen6_hiz_init(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; struct brw_hiz_state *hiz = &brw->hiz; - GLenum fb_bind_enum, fb_get_enum; - if (hiz->fbo != 0) - return; + hiz->vertex_bo = drm_intel_bo_alloc(intel->bufmgr, "bufferobj", + GEN6_HIZ_VBO_SIZE, /* size */ + 64); /* alignment */ - gen6_hiz_get_framebuffer_enum(ctx, &fb_bind_enum, &fb_get_enum); + if (!hiz->vertex_bo) + _mesa_error(ctx, GL_OUT_OF_MEMORY, "failed to allocate internal VBO"); +} - /* Create depthbuffer. +void +gen6_hiz_emit_batch_head(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + struct brw_hiz_state *hiz = &brw->hiz; + + /* To ensure that the batch contains only the resolve, flush the batch + * before beginning and after finishing emitting the resolve packets. * - * Until glRenderbufferStorage is called, the renderbuffer hash table - * maps the renderbuffer name to a dummy renderbuffer. We need the - * renderbuffer to be registered in the hash table so that framebuffer - * validation succeeds, so we hackishly allocate storage then immediately - * discard it. + * Ideally, we would not need to flush for the resolve op. But, I suspect + * that it's unsafe for CMD_PIPELINE_SELECT to occur multiple times in + * a single batch, and there is no safe way to ensure that other than by + * fencing the resolve with flushes. Ideally, we would just detect if + * a batch is in progress and do the right thing, but that would require + * the ability to *safely* access brw_context::state::dirty::brw + * outside of the brw_upload_state() codepath. */ - GLuint depth_rb_name; - _mesa_GenRenderbuffersEXT(1, &depth_rb_name); - _mesa_BindRenderbufferEXT(GL_RENDERBUFFER, depth_rb_name); - _mesa_RenderbufferStorageEXT(GL_RENDERBUFFER, GL_DEPTH_COMPONENT, 32, 32); - _mesa_reference_renderbuffer(&hiz->depth_rb, - _mesa_lookup_renderbuffer(ctx, depth_rb_name)); - intel_miptree_release(&((struct intel_renderbuffer*) hiz->depth_rb)->mt); + intel_flush(ctx); - /* Setup FBO. */ - _mesa_GenFramebuffersEXT(1, &hiz->fbo); - _mesa_BindFramebufferEXT(fb_bind_enum, hiz->fbo); - _mesa_FramebufferRenderbufferEXT(fb_bind_enum, - GL_DEPTH_ATTACHMENT, - GL_RENDERBUFFER, - hiz->depth_rb->Name); + /* CMD_PIPELINE_SELECT + * + * Select the 3D pipeline, as opposed to the media pipeline. + */ + { + BEGIN_BATCH(1); + OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16); + ADVANCE_BATCH(); + } - /* Compile vertex shader. */ - const char *vs_source = - "attribute vec4 position;\n" - "void main()\n" - "{\n" - " gl_Position = position;\n" - "}\n"; - GLuint vs = _mesa_CreateShaderObjectARB(GL_VERTEX_SHADER); - _mesa_ShaderSourceARB(vs, 1, &vs_source, NULL); - _mesa_CompileShaderARB(vs); + /* 3DSTATE_MULTISAMPLE */ + { + int length = intel->gen == 7 ? 4 : 3; - /* Compile fragment shader. */ - const char *fs_source = "void main() {}"; - GLuint fs = _mesa_CreateShaderObjectARB(GL_FRAGMENT_SHADER); - _mesa_ShaderSourceARB(fs, 1, &fs_source, NULL); - _mesa_CompileShaderARB(fs); + BEGIN_BATCH(length); + OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (length - 2)); + OUT_BATCH(MS_PIXEL_LOCATION_CENTER | + MS_NUMSAMPLES_1); + OUT_BATCH(0); + if (length >= 4) + OUT_BATCH(0); + ADVANCE_BATCH(); - /* Link and use program. */ - hiz->shader.program = _mesa_CreateProgramObjectARB(); - _mesa_AttachShader(hiz->shader.program, vs); - _mesa_AttachShader(hiz->shader.program, fs); - _mesa_LinkProgramARB(hiz->shader.program); - _mesa_UseProgramObjectARB(hiz->shader.program); + } - /* Create and bind VAO. */ - _mesa_GenVertexArrays(1, &hiz->vao); - _mesa_BindVertexArray(hiz->vao); + /* 3DSTATE_SAMPLE_MASK */ + { + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2)); + OUT_BATCH(1); + ADVANCE_BATCH(); + } - /* Setup VBO for 'position'. */ - hiz->shader.position_location = - _mesa_GetAttribLocationARB(hiz->shader.program, "position"); - _mesa_GenBuffersARB(1, &hiz->shader.position_vbo); - _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, hiz->shader.position_vbo); - _mesa_VertexAttribPointerARB(hiz->shader.position_location, - 2, /*components*/ - GL_FLOAT, - GL_FALSE, /*normalized?*/ - 0, /*stride*/ - NULL); - _mesa_EnableVertexAttribArrayARB(hiz->shader.position_location); - - /* Cleanup. */ - _mesa_DeleteShader(vs); - _mesa_DeleteShader(fs); + /* CMD_STATE_BASE_ADDRESS + * + * From the Sandy Bridge PRM, Volume 1, Part 1, Table STATE_BASE_ADDRESS: + * The following commands must be reissued following any change to the + * base addresses: + * 3DSTATE_CC_POINTERS + * 3DSTATE_BINDING_TABLE_POINTERS + * 3DSTATE_SAMPLER_STATE_POINTERS + * 3DSTATE_VIEWPORT_STATE_POINTERS + * MEDIA_STATE_POINTERS + */ + { + BEGIN_BATCH(10); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); + OUT_BATCH(1); /* GeneralStateBaseAddressModifyEnable */ + /* SurfaceStateBaseAddress */ + OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); + /* DynamicStateBaseAddress */ + OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER | + I915_GEM_DOMAIN_INSTRUCTION), 0, 1); + OUT_BATCH(1); /* IndirectObjectBaseAddress */ + OUT_BATCH(1); /* InstructionBaseAddress */ + OUT_BATCH(1); /* GeneralStateUpperBound */ + OUT_BATCH(1); /* DynamicStateUpperBound */ + OUT_BATCH(1); /* IndirectObjectUpperBound*/ + OUT_BATCH(1); /* InstructionAccessUpperBound */ + ADVANCE_BATCH(); + } } -/** - * Wrap \c brw->hiz.depth_rb around a miptree. - * - * \see gen6_hiz_teardown_depth_buffer() - */ -static void -gen6_hiz_setup_depth_buffer(struct brw_context *brw, - struct intel_mipmap_tree *mt, - unsigned int level, - unsigned int layer) +void +gen6_hiz_emit_vertices(struct brw_context *brw, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int layer) { - struct gl_renderbuffer *rb = brw->hiz.depth_rb; - struct intel_renderbuffer *irb = intel_renderbuffer(rb); - - rb->Format = mt->format; - rb->_BaseFormat = _mesa_get_format_base_format(rb->Format); - rb->InternalFormat = rb->_BaseFormat; - rb->Width = mt->level[level].width; - rb->Height = mt->level[level].height; - - irb->mt_level = level; - irb->mt_layer = layer; - - intel_miptree_reference(&irb->mt, mt); - intel_renderbuffer_set_draw_offset(irb); -} - -/** - * Release the region from \c brw->hiz.depth_rb. - * - * \see gen6_hiz_setup_depth_buffer() - */ -static void -gen6_hiz_teardown_depth_buffer(struct gl_renderbuffer *rb) -{ - struct intel_renderbuffer *irb = intel_renderbuffer(rb); - intel_miptree_release(&irb->mt); -} - -static void -gen6_resolve_slice(struct intel_context *intel, - struct intel_mipmap_tree *mt, - unsigned int level, - unsigned int layer, - enum brw_hiz_op op) -{ - struct gl_context *ctx = &intel->ctx; - struct brw_context *brw = brw_context(ctx); + struct intel_context *intel = &brw->intel; struct brw_hiz_state *hiz = &brw->hiz; - GLenum fb_bind_enum, fb_get_enum; - /* Do not recurse. */ - assert(!brw->hiz.op); - - assert(mt->hiz_mt != NULL); - assert(level >= mt->first_level); - assert(level <= mt->last_level); - assert(layer < mt->level[level].depth); - - gen6_hiz_get_framebuffer_enum(ctx, &fb_bind_enum, &fb_get_enum); - - /* Save state. */ - GLint save_drawbuffer; - GLint save_renderbuffer; - _mesa_meta_begin(ctx, gen6_hiz_meta_save); - _mesa_GetIntegerv(fb_get_enum, &save_drawbuffer); - _mesa_GetIntegerv(GL_RENDERBUFFER_BINDING, &save_renderbuffer); - - /* Initialize context data for HiZ operations. */ - gen6_hiz_init(brw); - - /* Set depth state. */ - if (!ctx->Depth.Mask) { - /* This sets 3DSTATE_WM.Depth_Buffer_Write_Enable. */ - _mesa_DepthMask(GL_TRUE); - } - if (op == BRW_HIZ_OP_DEPTH_RESOLVE) { - _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE); - _mesa_DepthFunc(GL_NEVER); - } - - /* Setup FBO. */ - gen6_hiz_setup_depth_buffer(brw, mt, level, layer); - _mesa_BindFramebufferEXT(fb_bind_enum, hiz->fbo); - - - /* A rectangle primitive (3DPRIM_RECTLIST) consists of only three vertices. - * The vertices reside in screen space with DirectX coordinates (this is, - * (0, 0) is the upper left corner). + /* Setup VBO for the rectangle primitive.. + * + * A rectangle primitive (3DPRIM_RECTLIST) consists of only three + * vertices. The vertices reside in screen space with DirectX coordinates + * (that is, (0, 0) is the upper left corner). * * v2 ------ implied * | | * | | * v0 ----- v1 - */ - const int width = hiz->depth_rb->Width; - const int height = hiz->depth_rb->Height; - const GLfloat positions[] = { - 0, height, - width, height, - 0, 0, - }; - - /* Setup program and vertex attributes. */ - _mesa_UseProgramObjectARB(hiz->shader.program); - _mesa_BindVertexArray(hiz->vao); - _mesa_BindBufferARB(GL_ARRAY_BUFFER, hiz->shader.position_vbo); - _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(positions), positions, - GL_DYNAMIC_DRAW_ARB); - - /* Execute the HiZ operation. */ - brw->hiz.op = op; - brw->state.dirty.brw |= BRW_NEW_HIZ; - _mesa_DrawArrays(GL_TRIANGLES, 0, 3); - brw->state.dirty.brw |= BRW_NEW_HIZ; - brw->hiz.op = BRW_HIZ_OP_NONE; - - /* Restore state. * - * The order in which state is restored is significant. The draw buffer - * used for the HiZ op has no stencil buffer, and glStencilFunc() clamps - * the stencil reference value to the range allowed by the draw buffer's - * number of stencil bits. So, the draw buffer binding must be restored - * before the stencil state, or else the stencil ref will be clamped to 0. + * Since the VS is disabled, the clipper loads each VUE directly from + * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and + * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows: + * dw0: Reserved, MBZ. + * dw1: Render Target Array Index. The HiZ op does not use indexed + * vertices, so set the dword to 0. + * dw2: Viewport Index. The HiZ op disables viewport mapping and + * scissoring, so set the dword to 0. + * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so + * set the dword to 0. + * dw4: Vertex Position X. + * dw5: Vertex Position Y. + * dw6: Vertex Position Z. + * dw7: Vertex Position W. + * + * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1 + * "Vertex URB Entry (VUE) Formats". */ - gen6_hiz_teardown_depth_buffer(hiz->depth_rb); - _mesa_BindRenderbufferEXT(GL_RENDERBUFFER, save_renderbuffer); - _mesa_BindFramebufferEXT(fb_bind_enum, save_drawbuffer); - _mesa_meta_end(ctx); + { + const int width = mt->level[level].width; + const int height = mt->level[level].height; + + const float vertices[GEN6_HIZ_VBO_SIZE] = { + /* v0 */ 0, 0, 0, 0, 0, height, 0, 1, + /* v1 */ 0, 0, 0, 0, width, height, 0, 1, + /* v2 */ 0, 0, 0, 0, 0, 0, 0, 1, + }; + + drm_intel_bo_subdata(hiz->vertex_bo, 0, GEN6_HIZ_VBO_SIZE, vertices); + } + + /* 3DSTATE_VERTEX_BUFFERS */ + { + const int num_buffers = 1; + const int batch_length = 1 + 4 * num_buffers; + + uint32_t dw0 = GEN6_VB0_ACCESS_VERTEXDATA | + (GEN6_HIZ_NUM_VUE_ELEMS * sizeof(float)) << BRW_VB0_PITCH_SHIFT; + + if (intel->gen >= 7) + dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; + + BEGIN_BATCH(batch_length); + OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2)); + OUT_BATCH(dw0); + /* start address */ + OUT_RELOC(hiz->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); + /* end address */ + OUT_RELOC(hiz->vertex_bo, I915_GEM_DOMAIN_VERTEX, + 0, hiz->vertex_bo->size - 1); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_VERTEX_ELEMENTS + * + * Fetch dwords 0 - 7 from each VUE. See the comments above where + * hiz->vertex_bo is filled with data. + */ + { + const int num_elements = 2; + const int batch_length = 1 + 2 * num_elements; + + BEGIN_BATCH(batch_length); + OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2)); + /* Element 0 */ + OUT_BATCH(GEN6_VE0_VALID | + BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | + 0 << BRW_VE0_SRC_OFFSET_SHIFT); + OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT); + /* Element 1 */ + OUT_BATCH(GEN6_VE0_VALID | + BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | + 16 << BRW_VE0_SRC_OFFSET_SHIFT); + OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT); + ADVANCE_BATCH(); + } } +/** + * \brief Execute a HiZ op on a miptree slice. + * + * To execute the HiZ op, this function manually constructs and emits a batch + * to "draw" the HiZ op's rectangle primitive. The batchbuffer is flushed + * before constructing and after emitting the batch. + * + * This function alters no GL state. + * + * For an overview of HiZ ops, see the following sections of the Sandy Bridge + * PRM, Volume 1, Part 2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + */ +static void +gen6_hiz_exec(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int layer, + enum gen6_hiz_op op) +{ + struct gl_context *ctx = &intel->ctx; + struct brw_context *brw = brw_context(ctx); + struct brw_hiz_state *hiz = &brw->hiz; + + assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */ + assert(mt->hiz_mt != NULL); + intel_miptree_check_level_layer(mt, level, layer); + + if (hiz->vertex_bo == NULL) + gen6_hiz_init(brw); + + if (hiz->vertex_bo == NULL) { + /* Ouch. Give up. */ + return; + } + + gen6_hiz_emit_batch_head(brw); + gen6_hiz_emit_vertices(brw, mt, level, layer); + + /* 3DSTATE_URB + * + * Assign the entire URB to the VS. Even though the VS disabled, URB space + * is still needed because the clipper loads the VUE's from the URB. From + * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE, + * Dword 1.15:0 "VS Number of URB Entries": + * This field is always used (even if VS Function Enable is DISABLED). + * + * The warning below appears in the PRM (Section 3DSTATE_URB), but we can + * safely ignore it because this batch contains only one draw call. + * Because of URB corruption caused by allocating a previous GS unit + * URB entry to the VS unit, software is required to send a “GS NULL + * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0) + * plus a dummy DRAW call before any case where VS will be taking over + * GS URB space. + */ + { + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); + OUT_BATCH(brw->urb.max_vs_entries << GEN6_URB_VS_ENTRIES_SHIFT); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_CC_STATE_POINTERS + * + * The pointer offsets are relative to + * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + * + * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE. + */ + { + uint32_t depthstencil_offset; + gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset); + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2)); + OUT_BATCH(1); /* BLEND_STATE offset */ + OUT_BATCH(depthstencil_offset | 1); /* DEPTH_STENCIL_STATE offset */ + OUT_BATCH(1); /* COLOR_CALC_STATE offset */ + ADVANCE_BATCH(); + } + + /* 3DSTATE_VS + * + * Disable vertex shader. + */ + { + /* From the BSpec, Volume 2a, Part 3 "Vertex Shader", Section + * 3DSTATE_VS, Dword 5.0 "VS Function Enable": + * [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS + * command that causes the VS Function Enable to toggle. Pipeline + * flush can be executed by sending a PIPE_CONTROL command with CS + * stall bit set and a post sync operation. + */ + intel_emit_post_sync_nonzero_flush(intel); + + BEGIN_BATCH(6); + OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_GS + * + * Disable the geometry shader. + */ + { + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_CLIP + * + * Disable the clipper. + * + * The HiZ op emits a rectangle primitive, which requires clipping to + * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 + * Section 1.3 "3D Primitives Overview": + * RECTLIST: + * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip + * Mode should be set to a value other than CLIPMODE_NORMAL. + * + * Also disable perspective divide. This doesn't change the clipper's + * output, but does spare a few electrons. + */ + { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_SF + * + * Disable ViewportTransformEnable (dw2.1) + * + * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D + * Primitives Overview": + * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the + * use of screen- space coordinates). + * + * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3) + * and BackFaceFillMode (dw2.5:6) to SOLID(0). + * + * From the Sandy Bridge PRM, Volume 2, Part 1, Section + * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: + * SOLID: Any triangle or rectangle object found to be front-facing + * is rendered as a solid object. This setting is required when + * (rendering rectangle (RECTLIST) objects. + */ + { + BEGIN_BATCH(20); + OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2)); + OUT_BATCH((1 - 1) << GEN6_SF_NUM_OUTPUTS_SHIFT | /* only position */ + 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | + 0 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT); + for (int i = 0; i < 18; ++i) + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_WM + * + * Disable thread dispatch (dw5.19) and enable the HiZ op. + * + * Even though thread dispatch is disabled, max threads (dw5.25:31) must be + * nonzero to prevent the GPU from hanging. See the valid ranges in the + * BSpec, Volume 2a.11 Windower, Section 3DSTATE_WM, Dword 5.25:31 + * "Maximum Number Of Threads". + */ + { + uint32_t dw4 = 0; + + switch (op) { + case GEN6_HIZ_OP_DEPTH_CLEAR: + assert(!"not implemented"); + dw4 |= GEN6_WM_DEPTH_CLEAR; + break; + case GEN6_HIZ_OP_DEPTH_RESOLVE: + dw4 |= GEN6_WM_DEPTH_RESOLVE; + break; + case GEN6_HIZ_OP_HIZ_RESOLVE: + dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; + break; + default: + assert(0); + break; + } + + BEGIN_BATCH(9); + OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(dw4); + OUT_BATCH((brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT); + OUT_BATCH((1 - 1) << GEN6_WM_NUM_SF_OUTPUTS_SHIFT); /* only position */ + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DEPTH_BUFFER */ + { + uint32_t width = mt->level[level].width; + uint32_t height = mt->level[level].height; + + uint32_t tile_x; + uint32_t tile_y; + uint32_t offset; + { + /* Construct a dummy renderbuffer just to extract tile offsets. */ + struct intel_renderbuffer rb; + rb.mt = mt; + rb.mt_level = level; + rb.mt_layer = layer; + intel_renderbuffer_set_draw_offset(&rb); + offset = intel_renderbuffer_tile_offsets(&rb, &tile_x, &tile_y); + } + + uint32_t format; + switch (mt->format) { + case MESA_FORMAT_Z16: format = BRW_DEPTHFORMAT_D16_UNORM; break; + case MESA_FORMAT_Z32_FLOAT: format = BRW_DEPTHFORMAT_D32_FLOAT; break; + case MESA_FORMAT_X8_Z24: format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break; + default: assert(0); break; + } + + intel_emit_post_sync_nonzero_flush(intel); + intel_emit_depth_stall_flushes(intel); + + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); + OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) | + format << 18 | + 1 << 21 | /* separate stencil enable */ + 1 << 22 | /* hiz enable */ + BRW_TILEWALK_YMAJOR << 26 | + 1 << 27 | /* y-tiled */ + BRW_SURFACE_2D << 29); + OUT_RELOC(mt->region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + offset); + OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 | + (width + tile_x - 1) << 6 | + (height + tile_y - 1) << 19); + OUT_BATCH(0); + OUT_BATCH(tile_x | + tile_y << 16); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_HIER_DEPTH_BUFFER */ + { + struct intel_region *hiz_region = mt->hiz_mt->region; + + BEGIN_BATCH(3); + OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); + OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1); + OUT_RELOC(hiz_region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_STENCIL_BUFFER */ + { + BEGIN_BATCH(3); + OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_CLEAR_PARAMS + * + * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS: + * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE + * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes. + */ + { + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DRAWING_RECTANGLE */ + { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(((mt->level[level].width - 1) & 0xffff) | + ((mt->level[level].height - 1) << 16)); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DPRIMITIVE */ + { + BEGIN_BATCH(6); + OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | + _3DPRIM_RECTLIST << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | + GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); + OUT_BATCH(3); /* vertex count per instance */ + OUT_BATCH(0); + OUT_BATCH(1); /* instance count */ + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* See comments above at first invocation of intel_flush() in + * gen6_hiz_emit_batch_head(). + */ + intel_flush(ctx); + + /* Be safe. */ + brw->state.dirty.brw = ~0; + brw->state.dirty.cache = ~0; +} + +/** + * \param out_offset is relative to + * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + */ +void +gen6_hiz_emit_depth_stencil_state(struct brw_context *brw, + enum gen6_hiz_op op, + uint32_t *out_offset) +{ + struct gen6_depth_stencil_state *state; + state = brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, + sizeof(*state), 64, + out_offset); + memset(state, 0, sizeof(*state)); + + /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + */ + state->ds2.depth_write_enable = 1; + if (op == GEN6_HIZ_OP_DEPTH_RESOLVE) { + state->ds2.depth_test_enable = 1; + state->ds2.depth_test_func = COMPAREFUNC_NEVER; + } +} + +/** \see intel_context::vtbl::resolve_hiz_slice */ void gen6_resolve_hiz_slice(struct intel_context *intel, struct intel_mipmap_tree *mt, uint32_t level, uint32_t layer) { - gen6_resolve_slice(intel, mt, level, layer, BRW_HIZ_OP_HIZ_RESOLVE); + gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE); } - +/** \see intel_context::vtbl::resolve_depth_slice */ void gen6_resolve_depth_slice(struct intel_context *intel, struct intel_mipmap_tree *mt, uint32_t level, uint32_t layer) { - gen6_resolve_slice(intel, mt, level, layer, BRW_HIZ_OP_DEPTH_RESOLVE); + gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE); } diff --git a/src/mesa/drivers/dri/i965/gen6_hiz.h b/src/mesa/drivers/dri/i965/gen6_hiz.h index 49290123ff0..0a13ba076ac 100644 --- a/src/mesa/drivers/dri/i965/gen6_hiz.h +++ b/src/mesa/drivers/dri/i965/gen6_hiz.h @@ -28,6 +28,44 @@ struct intel_context; struct intel_mipmap_tree; +/** + * For an overview of the HiZ operations, see the following sections of the + * Sandy Bridge PRM, Volume 1, Part2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + */ +enum gen6_hiz_op { + GEN6_HIZ_OP_DEPTH_CLEAR, + GEN6_HIZ_OP_DEPTH_RESOLVE, + GEN6_HIZ_OP_HIZ_RESOLVE, +}; + +/** + * \name HiZ internals + * \{ + * + * Used internally by gen6_hiz_exec() and gen7_hiz_exec(). + */ + +void +gen6_hiz_init(struct brw_context *brw); + +void +gen6_hiz_emit_batch_head(struct brw_context *brw); + +void +gen6_hiz_emit_vertices(struct brw_context *brw, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int layer); + +void +gen6_hiz_emit_depth_stencil_state(struct brw_context *brw, + enum gen6_hiz_op op, + uint32_t *out_offset); +/** \} */ + void gen6_resolve_hiz_slice(struct intel_context *intel, struct intel_mipmap_tree *mt, diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 163b54c3557..07b8e6dd837 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -149,17 +149,8 @@ upload_sf_state(struct brw_context *brw) urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; - dw2 = GEN6_SF_STATISTICS_ENABLE; - - /* Enable viewport transform only if no HiZ operation is progress - * - * From page 11 of the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D - * Primitives Overview": - * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the - * use of screen- space coordinates). - */ - if (!brw->hiz.op) - dw2 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; + dw2 = GEN6_SF_STATISTICS_ENABLE | + GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; dw3 = 0; dw4 = 0; @@ -354,8 +345,7 @@ const struct brw_tracked_state gen6_sf_state = { _NEW_POINT | _NEW_TRANSFORM), .brw = (BRW_NEW_CONTEXT | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_HIZ), + BRW_NEW_FRAGMENT_PROGRAM), .cache = CACHE_NEW_VS_PROG }, .emit = upload_sf_state, diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 63efaa4f32d..3392a9f442e 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -133,6 +133,15 @@ upload_vs_state(struct brw_context *brw) struct intel_context *intel = &brw->intel; uint32_t floating_point_mode = 0; + /* From the BSpec, Volume 2a, Part 3 "Vertex Shader", Section + * 3DSTATE_VS, Dword 5.0 "VS Function Enable": + * [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS + * command that causes the VS Function Enable to toggle. Pipeline + * flush can be executed by sending a PIPE_CONTROL command with CS + * stall bit set and a post sync operation. + */ + intel_emit_post_sync_nonzero_flush(intel); + if (brw->vs.push_const_size == 0) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 3669811be55..205e6480e07 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -149,23 +149,6 @@ upload_wm_state(struct brw_context *brw) dw4 |= (brw->wm.prog_data->first_curbe_grf_16 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2); - switch (brw->hiz.op) { - case BRW_HIZ_OP_NONE: - break; - case BRW_HIZ_OP_DEPTH_CLEAR: - dw4 |= GEN6_WM_DEPTH_CLEAR; - break; - case BRW_HIZ_OP_DEPTH_RESOLVE: - dw4 |= GEN6_WM_DEPTH_RESOLVE; - break; - case BRW_HIZ_OP_HIZ_RESOLVE: - dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; - break; - default: - assert(0); - break; - } - dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; /* CACHE_NEW_WM_PROG */ @@ -233,8 +216,7 @@ const struct brw_tracked_state gen6_wm_state = { _NEW_PROGRAM_CONSTANTS | _NEW_POLYGON), .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_BATCH | - BRW_NEW_HIZ), + BRW_NEW_BATCH), .cache = (CACHE_NEW_SAMPLER | CACHE_NEW_WM_PROG) }, diff --git a/src/mesa/drivers/dri/i965/gen7_clip_state.c b/src/mesa/drivers/dri/i965/gen7_clip_state.c index 9be3ce9c846..c32cd988297 100644 --- a/src/mesa/drivers/dri/i965/gen7_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen7_clip_state.c @@ -39,23 +39,6 @@ upload_clip_state(struct brw_context *brw) /* BRW_NEW_FRAGMENT_PROGRAM */ const struct gl_fragment_program *fprog = brw->fragment_program; - if (brw->hiz.op) { - /* HiZ operations emit a rectangle primitive, which requires clipping to - * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 - * Section 1.3 3D Primitives Overview: - * RECTLIST: - * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip - * Mode should be set to a value other than CLIPMODE_NORMAL. - */ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - return; - } - /* _NEW_BUFFERS */ bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; @@ -133,8 +116,7 @@ const struct brw_tracked_state gen7_clip_state = { _NEW_LIGHT | _NEW_TRANSFORM), .brw = (BRW_NEW_CONTEXT | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_HIZ), + BRW_NEW_FRAGMENT_PROGRAM), .cache = 0 }, .emit = upload_clip_state, diff --git a/src/mesa/drivers/dri/i965/gen7_hiz.c b/src/mesa/drivers/dri/i965/gen7_hiz.c new file mode 100644 index 00000000000..34e51ab4b50 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen7_hiz.c @@ -0,0 +1,464 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "intel_batchbuffer.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" + +#include "gen6_hiz.h" +#include "gen7_hiz.h" + +/** + * \copydoc gen6_hiz_exec() + */ +static void +gen7_hiz_exec(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int layer, + enum gen6_hiz_op op) +{ + struct gl_context *ctx = &intel->ctx; + struct brw_context *brw = brw_context(ctx); + struct brw_hiz_state *hiz = &brw->hiz; + + assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */ + assert(mt->hiz_mt != NULL); + intel_miptree_check_level_layer(mt, level, layer); + + if (hiz->vertex_bo == NULL) + gen6_hiz_init(brw); + + if (hiz->vertex_bo == NULL) { + /* Ouch. Give up. */ + return; + } + + uint32_t depth_format; + switch (mt->format) { + case MESA_FORMAT_Z16: depth_format = BRW_DEPTHFORMAT_D16_UNORM; break; + case MESA_FORMAT_Z32_FLOAT: depth_format = BRW_DEPTHFORMAT_D32_FLOAT; break; + case MESA_FORMAT_X8_Z24: depth_format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break; + default: assert(0); break; + } + + gen6_hiz_emit_batch_head(brw); + gen6_hiz_emit_vertices(brw, mt, level, layer); + + /* 3DSTATE_URB_VS + * 3DSTATE_URB_HS + * 3DSTATE_URB_DS + * 3DSTATE_URB_GS + * + * If the 3DSTATE_URB_VS is emitted, than the others must be also. From the + * BSpec, Volume 2a "3D Pipeline Overview", Section 1.7.1 3DSTATE_URB_VS: + * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be + * programmed in order for the programming of this state to be + * valid. + */ + { + /* The minimum valid value is 32. See 3DSTATE_URB_VS, + * Dword 1.15:0 "VS Number of URB Entries". + */ + int num_vs_entries = 32; + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2)); + OUT_BATCH(1 << GEN7_URB_ENTRY_SIZE_SHIFT | + 0 << GEN7_URB_STARTING_ADDRESS_SHIFT | + num_vs_entries); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_DS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS + * + * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + */ + { + uint32_t depthstencil_offset; + gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2)); + OUT_BATCH(depthstencil_offset | 1); + ADVANCE_BATCH(); + } + + /* 3DSTATE_VS + * + * Disable vertex shader. + */ + { + BEGIN_BATCH(6); + OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_HS + * + * Disable the hull shader. + */ + { + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_TE + * + * Disable the tesselation engine. + */ + { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DS + * + * Disable the domain shader. + */ + { + BEGIN_BATCH(6); + OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_GS + * + * Disable the geometry shader. + */ + { + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_STREAMOUT + * + * Disable streamout. + */ + { + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_CLIP + * + * Disable the clipper. + * + * The HiZ op emits a rectangle primitive, which requires clipping to + * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 + * Section 1.3 "3D Primitives Overview": + * RECTLIST: + * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip + * Mode should be set to a value other than CLIPMODE_NORMAL. + * + * Also disable perspective divide. This doesn't change the clipper's + * output, but does spare a few electrons. + */ + { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_SF + * + * Disable ViewportTransformEnable (dw1.1) + * + * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D + * Primitives Overview": + * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the + * use of screen- space coordinates). + * + * A solid rectangle must be rendered, so set FrontFaceFillMode (dw1.6:5) + * and BackFaceFillMode (dw1.4:3) to SOLID(0). + * + * From the Sandy Bridge PRM, Volume 2, Part 1, Section + * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: + * SOLID: Any triangle or rectangle object found to be front-facing + * is rendered as a solid object. This setting is required when + * (rendering rectangle (RECTLIST) objects. + */ + { + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2)); + OUT_BATCH(depth_format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_SBE */ + { + BEGIN_BATCH(14); + OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2)); + OUT_BATCH((1 - 1) << GEN7_SBE_NUM_OUTPUTS_SHIFT | /* only position */ + 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | + 0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT); + for (int i = 0; i < 12; ++i) + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_WM + * + * Disable PS thread dispatch (dw1.29) and enable the HiZ op. + */ + { + uint32_t dw1 = 0; + + switch (op) { + case GEN6_HIZ_OP_DEPTH_CLEAR: + assert(!"not implemented"); + dw1 |= GEN7_WM_DEPTH_CLEAR; + break; + case GEN6_HIZ_OP_DEPTH_RESOLVE: + dw1 |= GEN7_WM_DEPTH_RESOLVE; + break; + case GEN6_HIZ_OP_HIZ_RESOLVE: + dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE; + break; + default: + assert(0); + break; + } + + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2)); + OUT_BATCH(dw1); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_PS + * + * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite + * that, thread dispatch info must still be specified. + * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the BSpec + * states that the valid range for this field is [0x3, 0x2f]. + * - A dispatch mode must be given; that is, at least one of the + * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was + * discovered through simulator error messages. + */ + { + BEGIN_BATCH(8); + OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(((brw->max_wm_threads - 1) << GEN7_PS_MAX_THREADS_SHIFT) | + GEN7_PS_32_DISPATCH_ENABLE); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DEPTH_BUFFER */ + { + uint32_t width = mt->level[level].width; + uint32_t height = mt->level[level].height; + + uint32_t tile_x; + uint32_t tile_y; + uint32_t offset; + { + /* Construct a dummy renderbuffer just to extract tile offsets. */ + struct intel_renderbuffer rb; + rb.mt = mt; + rb.mt_level = level; + rb.mt_layer = layer; + intel_renderbuffer_set_draw_offset(&rb); + offset = intel_renderbuffer_tile_offsets(&rb, &tile_x, &tile_y); + } + + intel_emit_depth_stall_flushes(intel); + + BEGIN_BATCH(7); + OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); + OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) | + depth_format << 18 | + 1 << 22 | /* hiz enable */ + 1 << 28 | /* depth write */ + BRW_SURFACE_2D << 29); + OUT_RELOC(mt->region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + offset); + OUT_BATCH((width + tile_x - 1) << 4 | + (height + tile_y - 1) << 18); + OUT_BATCH(0); + OUT_BATCH(tile_x | + tile_y << 16); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_HIER_DEPTH_BUFFER */ + { + struct intel_region *hiz_region = mt->hiz_mt->region; + + BEGIN_BATCH(3); + OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); + OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1); + OUT_RELOC(hiz_region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_STENCIL_BUFFER */ + { + BEGIN_BATCH(3); + OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_CLEAR_PARAMS + * + * From the BSpec, Volume 2a.11 Windower, Section 1.5.6.3.2 + * 3DSTATE_CLEAR_PARAMS: + * [DevIVB] 3DSTATE_CLEAR_PARAMS must always be programmed in the along + * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER, + * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER). + */ + { + BEGIN_BATCH(3); + OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DRAWING_RECTANGLE */ + { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(((mt->level[level].width - 1) & 0xffff) | + ((mt->level[level].height - 1) << 16)); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DPRIMITIVE */ + { + BEGIN_BATCH(7); + OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2)); + OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL | + _3DPRIM_RECTLIST); + OUT_BATCH(3); /* vertex count per instance */ + OUT_BATCH(0); + OUT_BATCH(1); /* instance count */ + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* See comments above at first invocation of intel_flush() in + * gen6_hiz_emit_batch_head(). + */ + intel_flush(ctx); + + /* Be safe. */ + brw->state.dirty.brw = ~0; + brw->state.dirty.cache = ~0; +} + +/** \copydoc gen6_resolve_hiz_slice() */ +void +gen7_resolve_hiz_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + gen7_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE); +} + +/** \copydoc gen6_resolve_depth_slice() */ +void +gen7_resolve_depth_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + gen7_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE); +} diff --git a/src/mesa/drivers/dri/i965/gen7_hiz.h b/src/mesa/drivers/dri/i965/gen7_hiz.h new file mode 100644 index 00000000000..b89ffb00711 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen7_hiz.h @@ -0,0 +1,43 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include + +struct intel_context; +struct intel_mipmap_tree; + +/** \copydoc gen6_resolve_hiz_slice() */ +void +gen7_resolve_hiz_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer); + +/** \copydoc gen6_resolve_depth_slice() */ +void +gen7_resolve_depth_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer); diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index da7ef81cddf..b215af2cf4a 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -149,8 +149,7 @@ const struct brw_tracked_state gen7_sbe_state = { _NEW_PROGRAM | _NEW_TRANSFORM), .brw = (BRW_NEW_CONTEXT | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_HIZ), + BRW_NEW_FRAGMENT_PROGRAM), .cache = CACHE_NEW_VS_PROG }, .emit = upload_sbe_state, @@ -166,17 +165,8 @@ upload_sf_state(struct brw_context *brw) /* _NEW_BUFFERS */ bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; - dw1 = GEN6_SF_STATISTICS_ENABLE; - - /* Enable viewport transform only if no HiZ operation is progress - * - * From page 11 of the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D - * Primitives Overview": - * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the - * use of screen- space coordinates). - */ - if (!brw->hiz.op) - dw1 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; + dw1 = GEN6_SF_STATISTICS_ENABLE | + GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; /* _NEW_BUFFERS */ dw1 |= (brw_depthbuffer_format(brw) << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT); @@ -310,8 +300,7 @@ const struct brw_tracked_state gen7_sf_state = { _NEW_SCISSOR | _NEW_BUFFERS | _NEW_POINT), - .brw = (BRW_NEW_CONTEXT | - BRW_NEW_HIZ), + .brw = BRW_NEW_CONTEXT, .cache = CACHE_NEW_VS_PROG }, .emit = upload_sf_state, diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 32222f922d8..870590fbe7e 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -49,23 +49,6 @@ upload_wm_state(struct brw_context *brw) dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0; dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5; - switch (brw->hiz.op) { - case BRW_HIZ_OP_NONE: - break; - case BRW_HIZ_OP_DEPTH_CLEAR: - dw1 |= GEN7_WM_DEPTH_CLEAR; - break; - case BRW_HIZ_OP_DEPTH_RESOLVE: - dw1 |= GEN7_WM_DEPTH_RESOLVE; - break; - case BRW_HIZ_OP_HIZ_RESOLVE: - dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE; - break; - default: - assert(0); - break; - } - /* _NEW_LINE */ if (ctx->Line.StippleFlag) dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE; @@ -106,7 +89,6 @@ const struct brw_tracked_state gen7_wm_state = { .mesa = (_NEW_LINE | _NEW_LIGHT | _NEW_POLYGON | _NEW_COLOR | _NEW_BUFFERS), .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_HIZ | BRW_NEW_BATCH), .cache = 0, }, diff --git a/src/mesa/drivers/dri/i965/junk b/src/mesa/drivers/dri/i965/junk new file mode 100644 index 00000000000..e69de29bb2d