mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
i965: Avoid flushing the batch for every blorp op.
This brings over the batch-wrap-prevention and aperture space checking code from the normal brw_draw.c path, so that we don't need to flush the batch every time. There's a risk here if the intel_emit_post_sync_nonzero_flush() call isn't high enough up in the state emit sequences -- before, we implicitly had one at the batch flush before any state was emitted, so Mesa's workaround emits didn't really matter. Since the SNB fixes by Ken, I didn't see any regressions after 3 piglit runs. Improves cairo-gl performance by 13.7733% +/- 1.74876% (n=30/32) Improves minecraft apitrace performance by 1.03183% +/- 0.482297% (n=90). Reduces low-resolution GLB 2.7 performance by 1.17553% +/- 0.432263% (n=88) Reduces Lightsmark performance by 3.70246% +/- 0.322432% (n=126) No statistically significant performance difference on unigine tropics (n=10) No statistically significant performance difference on openarena (n=755) The two apps that are hurt happen to include stalls on busy buffer objects, so I think this is an effect of missing out on an opportune flush. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Paul Berry <stereotype441@gmail.com>
This commit is contained in:
parent
fd03dd6ddd
commit
185b5a54c9
4 changed files with 50 additions and 17 deletions
|
|
@ -21,6 +21,7 @@
|
|||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include "intel_batchbuffer.h"
|
||||
#include "intel_fbo.h"
|
||||
|
||||
|
|
@ -195,6 +196,26 @@ intel_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
|
|||
void
|
||||
brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
uint32_t estimated_max_batch_usage = 1500;
|
||||
bool check_aperture_failed_once = false;
|
||||
|
||||
/* Flush the sampler and render caches. We definitely need to flush the
|
||||
* sampler cache so that we get updated contents from the render cache for
|
||||
* the glBlitFramebuffer() source. Also, we are sometimes warned in the
|
||||
* docs to flush the cache between reinterpretations of the same surface
|
||||
* data with different formats, which blorp does for stencil and depth
|
||||
* data.
|
||||
*/
|
||||
intel_batchbuffer_emit_mi_flush(brw);
|
||||
|
||||
retry:
|
||||
intel_batchbuffer_require_space(brw, estimated_max_batch_usage, false);
|
||||
intel_batchbuffer_save_state(brw);
|
||||
drm_intel_bo *saved_bo = brw->batch.bo;
|
||||
uint32_t saved_used = brw->batch.used;
|
||||
uint32_t saved_state_batch_offset = brw->batch.state_batch_offset;
|
||||
|
||||
switch (brw->gen) {
|
||||
case 6:
|
||||
gen6_blorp_exec(brw, params);
|
||||
|
|
@ -208,6 +229,35 @@ brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params)
|
|||
break;
|
||||
}
|
||||
|
||||
/* Make sure we didn't wrap the batch unintentionally, and make sure we
|
||||
* reserved enough space that a wrap will never happen.
|
||||
*/
|
||||
assert(brw->batch.bo == saved_bo);
|
||||
assert((brw->batch.used - saved_used) * 4 +
|
||||
(saved_state_batch_offset - brw->batch.state_batch_offset) <
|
||||
estimated_max_batch_usage);
|
||||
/* Shut up compiler warnings on release build */
|
||||
(void)saved_bo;
|
||||
(void)saved_used;
|
||||
(void)saved_state_batch_offset;
|
||||
|
||||
/* Check if the blorp op we just did would make our batch likely to fail to
|
||||
* map all the BOs into the GPU at batch exec time later. If so, flush the
|
||||
* batch and try again with nothing else in the batch.
|
||||
*/
|
||||
if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
|
||||
if (!check_aperture_failed_once) {
|
||||
check_aperture_failed_once = true;
|
||||
intel_batchbuffer_reset_to_saved(brw);
|
||||
intel_batchbuffer_flush(brw);
|
||||
goto retry;
|
||||
} else {
|
||||
int ret = intel_batchbuffer_flush(brw);
|
||||
WARN_ONCE(ret == -ENOSPC,
|
||||
"i965: blorp emit exceeded available aperture space\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(brw->always_flush_batch))
|
||||
intel_batchbuffer_flush(brw);
|
||||
|
||||
|
|
|
|||
|
|
@ -370,10 +370,6 @@ private:
|
|||
void
|
||||
gen6_blorp_init(struct brw_context *brw);
|
||||
|
||||
void
|
||||
gen6_blorp_emit_batch_head(struct brw_context *brw,
|
||||
const brw_blorp_params *params);
|
||||
|
||||
void
|
||||
gen6_blorp_emit_state_base_address(struct brw_context *brw,
|
||||
const brw_blorp_params *params);
|
||||
|
|
|
|||
|
|
@ -45,17 +45,6 @@
|
|||
* sizeof(float))
|
||||
/** \} */
|
||||
|
||||
void
|
||||
gen6_blorp_emit_batch_head(struct brw_context *brw,
|
||||
const brw_blorp_params *params)
|
||||
{
|
||||
/* To ensure that the batch contains only the resolve, flush the batch
|
||||
* before beginning and after finishing emitting the resolve packets.
|
||||
*/
|
||||
intel_batchbuffer_flush(brw);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* CMD_STATE_BASE_ADDRESS
|
||||
*
|
||||
|
|
@ -1045,7 +1034,6 @@ gen6_blorp_exec(struct brw_context *brw,
|
|||
uint32_t wm_bind_bo_offset = 0;
|
||||
|
||||
uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
|
||||
gen6_blorp_emit_batch_head(brw, params);
|
||||
gen6_emit_3dstate_multisample(brw, params->num_samples);
|
||||
gen6_emit_3dstate_sample_mask(brw, params->num_samples, 1.0, false, ~0u);
|
||||
gen6_blorp_emit_state_base_address(brw, params);
|
||||
|
|
|
|||
|
|
@ -844,7 +844,6 @@ gen7_blorp_exec(struct brw_context *brw,
|
|||
uint32_t sampler_offset = 0;
|
||||
|
||||
uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
|
||||
gen6_blorp_emit_batch_head(brw, params);
|
||||
gen6_emit_3dstate_multisample(brw, params->num_samples);
|
||||
gen6_emit_3dstate_sample_mask(brw, params->num_samples, 1.0, false, ~0u);
|
||||
gen6_blorp_emit_state_base_address(brw, params);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue