diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index d5dccaf5bbf..3d6ee6f3ed1 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -680,6 +680,8 @@ brwCreateContext(gl_api api, /* Reinitialize the context point state. It depends on ctx->Const values. */ _mesa_init_point(ctx); + intel_fbo_init(brw); + intel_batchbuffer_init(brw); if (brw->gen >= 6) { @@ -703,8 +705,6 @@ brwCreateContext(gl_api api, intelInitExtensions(ctx); - intel_fbo_init(brw); - brw_init_surface_formats(brw); if (brw->is_g4x || brw->gen >= 5) { diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index afa335041e9..53a72c25374 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1024,6 +1024,13 @@ struct brw_context drm_intel_context *hw_ctx; + /** + * Set of drm_intel_bo * that have been rendered to within this batchbuffer + * and would need flushing before being used from another cache domain that + * isn't coherent with it (i.e. the sampler). + */ + struct set *render_cache; + /** * Number of resets observed in the system at context creation. * diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 1e018bb0d76..09aaf41ae6e 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -306,8 +306,8 @@ static void brw_merge_inputs( struct brw_context *brw, /* * \brief Resolve buffers before drawing. * - * Resolve the depth buffer's HiZ buffer and resolve the depth buffer of each - * enabled depth texture. + * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each + * enabled depth texture, and flush the render cache for any dirty textures. * * (In the future, this will also perform MSAA resolves). */ @@ -323,9 +323,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw) if (depth_irb) intel_renderbuffer_resolve_hiz(brw, depth_irb); - /* Resolve depth buffer of each enabled depth texture, and color buffer of - * each fast-clear-enabled color texture. - */ + /* Resolve depth buffer and render cache of each enabled texture. */ for (int i = 0; i < ctx->Const.MaxCombinedTextureImageUnits; i++) { if (!ctx->Texture.Unit[i]._ReallyEnabled) continue; @@ -334,6 +332,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw) continue; intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); intel_miptree_resolve_color(brw, tex_obj->mt); + brw_render_cache_set_check_flush(brw, tex_obj->mt->region->bo); } } @@ -345,6 +344,9 @@ brw_predraw_resolve_buffers(struct brw_context *brw) * * If the color buffer is a multisample window system buffer, then * mark that it needs a downsample. + * + * Also mark any render targets which will be textured as needing a render + * cache flush. */ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) { @@ -363,8 +365,18 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) intel_renderbuffer_set_needs_downsample(front_irb); if (back_irb) intel_renderbuffer_set_needs_downsample(back_irb); - if (depth_irb && ctx->Depth.Mask) + if (depth_irb && ctx->Depth.Mask) { intel_renderbuffer_att_set_needs_depth_resolve(depth_att); + brw_render_cache_set_add_bo(brw, depth_irb->mt->region->bo); + } + + for (int i = 0; i < fb->_NumColorDrawBuffers; i++) { + struct intel_renderbuffer *irb = + intel_renderbuffer(fb->_ColorDrawBuffers[i]); + + if (irb) + brw_render_cache_set_add_bo(brw, irb->mt->region->bo); + } } /* May fail if out of video memory for texture or vbo upload, or on diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index ca88b940f43..673a25d741c 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -583,6 +583,9 @@ brw_emit_depthbuffer(struct brw_context *brw) height = stencil_irb->Base.Base.Height; } + if (depth_mt) + brw_render_cache_set_check_flush(brw, depth_mt->region->bo); + brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset, depthbuffer_format, depth_surface_type, stencil_mt, hiz, separate_stencil, diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index fbbd5274f13..9c177a250a1 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -30,6 +30,7 @@ #include "intel_reg.h" #include "intel_bufmgr.h" #include "intel_buffers.h" +#include "intel_fbo.h" #include "brw_context.h" static void @@ -88,6 +89,7 @@ intel_batchbuffer_reset(struct brw_context *brw) brw->batch.last_bo = brw->batch.bo; intel_batchbuffer_clear_cache(brw); + brw_render_cache_set_clear(brw); brw->batch.bo = drm_intel_bo_alloc(brw->bufmgr, "batchbuffer", BATCH_SZ, 4096); @@ -660,4 +662,6 @@ intel_batchbuffer_emit_mi_flush(struct brw_context *brw) } brw_emit_pipe_control_flush(brw, flags); } + + brw_render_cache_set_clear(brw); } diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index acb5d72db3e..1e56d3ca49e 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -36,6 +36,8 @@ #include "main/context.h" #include "main/teximage.h" #include "main/image.h" +#include "main/hash_table.h" +#include "main/set.h" #include "swrast/swrast.h" #include "drivers/common/meta.h" @@ -516,24 +518,6 @@ intel_render_texture(struct gl_context * ctx, } -/** - * Called by Mesa when rendering to a texture is done. - */ -static void -intel_finish_render_texture(struct gl_context * ctx, struct gl_renderbuffer *rb) -{ - struct brw_context *brw = brw_context(ctx); - - DBG("Finish render %s texture\n", _mesa_get_format_name(rb->Format)); - - /* Since we've (probably) rendered to the texture and will (likely) use - * it in the texture domain later on in this batchbuffer, flush the - * batch. Once again, we wish for a domain tracker in libdrm to cover - * usage inside of a batchbuffer like GEM does in the kernel. - */ - intel_batchbuffer_emit_mi_flush(brw); -} - #define fbo_incomplete(fb, ...) do { \ static GLuint msg_id = 0; \ if (unlikely(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) { \ @@ -889,6 +873,43 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw, intel_miptree_release(&new_mt); } +void +brw_render_cache_set_clear(struct brw_context *brw) +{ + struct set_entry *entry; + + set_foreach(brw->render_cache, entry) { + _mesa_set_remove(brw->render_cache, entry); + } +} + +void +brw_render_cache_set_add_bo(struct brw_context *brw, drm_intel_bo *bo) +{ + _mesa_set_add(brw->render_cache, _mesa_hash_pointer(bo), bo); +} + +/** + * Emits an appropriate flush for a BO if it has been rendered to within the + * same batchbuffer as a read that's about to be emitted. + * + * The GPU has separate, incoherent caches for the render cache and the + * sampler cache, along with other caches. Usually data in the different + * caches don't interact (e.g. we don't render to our driver-generated + * immediate constant data), but for render-to-texture in FBOs we definitely + * do. When a batchbuffer is flushed, the kernel will ensure that everything + * necessary is flushed before another use of that BO, but for reuse from + * different caches within a batchbuffer, it's all our responsibility. + */ +void +brw_render_cache_set_check_flush(struct brw_context *brw, drm_intel_bo *bo) +{ + if (!_mesa_set_search(brw->render_cache, _mesa_hash_pointer(bo), bo)) + return; + + intel_batchbuffer_emit_mi_flush(brw); +} + /** * Do one-time context initializations related to GL_EXT_framebuffer_object. * Hook in device driver functions. @@ -902,9 +923,10 @@ intel_fbo_init(struct brw_context *brw) dd->MapRenderbuffer = intel_map_renderbuffer; dd->UnmapRenderbuffer = intel_unmap_renderbuffer; dd->RenderTexture = intel_render_texture; - dd->FinishRenderTexture = intel_finish_render_texture; dd->ValidateFramebuffer = intel_validate_framebuffer; dd->BlitFramebuffer = intel_blit_framebuffer; dd->EGLImageTargetRenderbufferStorage = intel_image_target_renderbuffer_storage; + + brw->render_cache = _mesa_set_create(brw, _mesa_key_pointer_equal); } diff --git a/src/mesa/drivers/dri/i965/intel_fbo.h b/src/mesa/drivers/dri/i965/intel_fbo.h index 45e2cd89d9e..46a1793b659 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.h +++ b/src/mesa/drivers/dri/i965/intel_fbo.h @@ -195,6 +195,18 @@ void intel_renderbuffer_move_to_temp(struct brw_context *brw, struct intel_renderbuffer *irb, bool invalidate); +void +intel_renderbuffer_downsample(struct brw_context *brw, + struct intel_renderbuffer *irb); + +void +intel_renderbuffer_upsample(struct brw_context *brw, + struct intel_renderbuffer *irb); + +void brw_render_cache_set_clear(struct brw_context *brw); +void brw_render_cache_set_add_bo(struct brw_context *brw, drm_intel_bo *bo); +void brw_render_cache_set_check_flush(struct brw_context *brw, drm_intel_bo *bo); + unsigned intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples);