mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
freedreno: Autotune bypass vs GMEM rendering decision
In some cases, like gl_driver2, we have all the characteristics that make our current simplistic bypass vs GMEM decision pick GMEM (ie. batch starts with a clear, has blend enabled, has a high draw count, etc), but each draw touches very few pixels and the per-tile state-change overhead leaves us CP limited. We would be better in this case picking the bypass path. So use feedback from # of samples-passed in previous render passes to the same FBO to give us a bit more information to make better choices. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2798 Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9535>
This commit is contained in:
parent
0610c7ba84
commit
5b96689fa0
9 changed files with 511 additions and 4 deletions
|
|
@ -1,5 +1,7 @@
|
|||
C_SOURCES := \
|
||||
disasm.h \
|
||||
freedreno_autotune.c \
|
||||
freedreno_autotune.h \
|
||||
freedreno_batch.c \
|
||||
freedreno_batch.h \
|
||||
freedreno_batch_cache.c \
|
||||
|
|
|
|||
|
|
@ -471,6 +471,50 @@ check_vsc_overflow(struct fd_context *ctx)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_common_init(struct fd_batch *batch)
|
||||
{
|
||||
struct fd_ringbuffer *ring = batch->gmem;
|
||||
struct fd_autotune *at = &batch->ctx->autotune;
|
||||
struct fd_batch_result *result = batch->autotune_result;
|
||||
|
||||
if (!result)
|
||||
return;
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
|
||||
OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
|
||||
OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_start));
|
||||
|
||||
fd6_event_write(batch, ring, ZPASS_DONE, false);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_common_fini(struct fd_batch *batch)
|
||||
{
|
||||
struct fd_ringbuffer *ring = batch->gmem;
|
||||
struct fd_autotune *at = &batch->ctx->autotune;
|
||||
struct fd_batch_result *result = batch->autotune_result;
|
||||
|
||||
if (!result)
|
||||
return;
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
|
||||
OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
|
||||
OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_end));
|
||||
|
||||
fd6_event_write(batch, ring, ZPASS_DONE, false);
|
||||
|
||||
// TODO is there a better event to use.. a single ZPASS_DONE_TS would be nice
|
||||
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
|
||||
OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS));
|
||||
OUT_RELOC(ring, results_ptr(at, fence));
|
||||
OUT_RING(ring, result->fence);
|
||||
}
|
||||
|
||||
/*
|
||||
* Emit conditional CP_INDIRECT_BRANCH based on VSC_STATE[p], ie. the IB
|
||||
* is skipped for tiles that have no visible geometry.
|
||||
|
|
@ -731,6 +775,8 @@ fd6_emit_tile_init(struct fd_batch *batch)
|
|||
}
|
||||
|
||||
update_render_cntl(batch, pfb, false);
|
||||
|
||||
emit_common_init(batch);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1316,6 +1362,8 @@ fd6_emit_tile_fini(struct fd_batch *batch)
|
|||
{
|
||||
struct fd_ringbuffer *ring = batch->gmem;
|
||||
|
||||
emit_common_fini(batch);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
|
||||
OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE);
|
||||
|
||||
|
|
@ -1479,6 +1527,8 @@ fd6_emit_sysmem_prep(struct fd_batch *batch)
|
|||
emit_msaa(ring, pfb->samples);
|
||||
|
||||
update_render_cntl(batch, pfb, false);
|
||||
|
||||
emit_common_init(batch);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1486,6 +1536,8 @@ fd6_emit_sysmem_fini(struct fd_batch *batch)
|
|||
{
|
||||
struct fd_ringbuffer *ring = batch->gmem;
|
||||
|
||||
emit_common_fini(batch);
|
||||
|
||||
if (batch->epilogue)
|
||||
fd6_emit_ib(batch->gmem, batch->epilogue);
|
||||
|
||||
|
|
|
|||
261
src/gallium/drivers/freedreno/freedreno_autotune.c
Normal file
261
src/gallium/drivers/freedreno/freedreno_autotune.c
Normal file
|
|
@ -0,0 +1,261 @@
|
|||
/*
|
||||
* Copyright © 2021 Google, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "freedreno_autotune.h"
|
||||
#include "freedreno_batch.h"
|
||||
#include "freedreno_util.h"
|
||||
|
||||
|
||||
/**
|
||||
* Tracks, for a given batch key (which maps to a FBO/framebuffer state),
|
||||
*
|
||||
* ralloc parent is fd_autotune::ht
|
||||
*/
|
||||
struct fd_batch_history {
|
||||
struct fd_batch_key *key;
|
||||
|
||||
/* Entry in fd_autotune::lru: */
|
||||
struct list_head node;
|
||||
|
||||
unsigned num_results;
|
||||
|
||||
/**
|
||||
* List of recent fd_batch_result's
|
||||
*/
|
||||
struct list_head results;
|
||||
#define MAX_RESULTS 5
|
||||
};
|
||||
|
||||
|
||||
static struct fd_batch_history *
|
||||
get_history(struct fd_autotune *at, struct fd_batch *batch)
|
||||
{
|
||||
struct fd_batch_history *history;
|
||||
|
||||
if (!batch->key)
|
||||
return NULL;
|
||||
|
||||
struct hash_entry *entry =
|
||||
_mesa_hash_table_search_pre_hashed(at->ht, batch->hash, batch->key);
|
||||
|
||||
if (entry) {
|
||||
history = entry->data;
|
||||
goto found;
|
||||
}
|
||||
|
||||
history = rzalloc_size(at->ht, sizeof(*history));
|
||||
|
||||
history->key = fd_batch_key_clone(history, batch->key);
|
||||
list_inithead(&history->node);
|
||||
list_inithead(&history->results);
|
||||
|
||||
/* Note: We cap # of cached GMEM states at 20.. so assuming double-
|
||||
* buffering, 40 should be a good place to cap cached autotune state
|
||||
*/
|
||||
if (at->ht->entries >= 40) {
|
||||
struct fd_batch_history *last =
|
||||
list_last_entry(&at->lru, struct fd_batch_history, node);
|
||||
_mesa_hash_table_remove_key(at->ht, last->key);
|
||||
list_del(&last->node);
|
||||
ralloc_free(last);
|
||||
}
|
||||
|
||||
_mesa_hash_table_insert_pre_hashed(at->ht, batch->hash, history->key, history);
|
||||
|
||||
found:
|
||||
/* Move to the head of the LRU: */
|
||||
list_delinit(&history->node);
|
||||
list_add(&history->node, &at->lru);
|
||||
|
||||
return history;
|
||||
}
|
||||
|
||||
static void
|
||||
result_destructor(void *r)
|
||||
{
|
||||
struct fd_batch_result *result = r;
|
||||
|
||||
/* Just in case we manage to somehow still be on the pending_results list: */
|
||||
list_del(&result->node);
|
||||
}
|
||||
|
||||
static struct fd_batch_result *
|
||||
get_result(struct fd_autotune *at, struct fd_batch_history *history)
|
||||
{
|
||||
struct fd_batch_result *result = rzalloc_size(history, sizeof(*result));
|
||||
|
||||
result->fence = ++at->fence_counter; /* pre-increment so zero isn't valid fence */
|
||||
result->idx = at->idx_counter++;
|
||||
|
||||
if (at->idx_counter >= ARRAY_SIZE(at->results->result))
|
||||
at->idx_counter = 0;
|
||||
|
||||
result->history = history;
|
||||
list_addtail(&result->node, &at->pending_results);
|
||||
|
||||
ralloc_set_destructor(result, result_destructor);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void
|
||||
process_results(struct fd_autotune *at)
|
||||
{
|
||||
uint32_t current_fence = at->results->fence;
|
||||
|
||||
list_for_each_entry_safe (struct fd_batch_result, result, &at->pending_results, node) {
|
||||
if (result->fence > current_fence)
|
||||
break;
|
||||
|
||||
struct fd_batch_history *history = result->history;
|
||||
|
||||
result->samples_passed = at->results->result[result->idx].samples_end -
|
||||
at->results->result[result->idx].samples_start;
|
||||
|
||||
list_delinit(&result->node);
|
||||
list_add(&result->node, &history->results);
|
||||
|
||||
if (history->num_results < MAX_RESULTS) {
|
||||
history->num_results++;
|
||||
} else {
|
||||
/* Once above a limit, start popping old results off the
|
||||
* tail of the list:
|
||||
*/
|
||||
struct fd_batch_result *old_result =
|
||||
list_last_entry(&history->results, struct fd_batch_result, node);
|
||||
list_delinit(&old_result->node);
|
||||
ralloc_free(old_result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
fallback_use_bypass(struct fd_batch *batch)
|
||||
{
|
||||
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
|
||||
|
||||
/* Fallback logic if we have no historical data about the rendertarget: */
|
||||
if (batch->cleared || batch->gmem_reason ||
|
||||
((batch->num_draws > 5) && !batch->blit) ||
|
||||
(pfb->samples > 1)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* A magic 8-ball that tells the gmem code whether we should do bypass mode
|
||||
* for moar fps.
|
||||
*/
|
||||
bool
|
||||
fd_autotune_use_bypass(struct fd_autotune *at, struct fd_batch *batch)
|
||||
{
|
||||
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
|
||||
|
||||
process_results(at);
|
||||
|
||||
/* Only enable on gen's that opt-in (and actually have sample-passed
|
||||
* collection wired up:
|
||||
*/
|
||||
if (!batch->ctx->screen->gmem_reason_mask)
|
||||
return fallback_use_bypass(batch);
|
||||
|
||||
if (batch->gmem_reason & ~batch->ctx->screen->gmem_reason_mask)
|
||||
return fallback_use_bypass(batch);
|
||||
|
||||
for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
|
||||
/* If ms-rtt is involved, force GMEM, as we don't currently
|
||||
* implement a temporary render target that we can MSAA resolve
|
||||
* from
|
||||
*/
|
||||
if (pfb->cbufs[i] && pfb->cbufs[i]->nr_samples)
|
||||
return fallback_use_bypass(batch);
|
||||
}
|
||||
|
||||
struct fd_batch_history *history = get_history(at, batch);
|
||||
if (!history)
|
||||
return fallback_use_bypass(batch);
|
||||
|
||||
batch->autotune_result = get_result(at, history);
|
||||
batch->autotune_result->cost = batch->cost;
|
||||
|
||||
bool use_bypass = fallback_use_bypass(batch);
|
||||
|
||||
if (use_bypass)
|
||||
return true;
|
||||
|
||||
if (history->num_results > 0) {
|
||||
uint32_t total_samples = 0;
|
||||
|
||||
// TODO we should account for clears somehow
|
||||
// TODO should we try to notice if there is a drastic change from
|
||||
// frame to frame?
|
||||
list_for_each_entry (struct fd_batch_result, result, &history->results, node) {
|
||||
total_samples += result->samples_passed;
|
||||
}
|
||||
|
||||
float avg_samples = (float)total_samples / (float)history->num_results;
|
||||
|
||||
/* Low sample count could mean there was only a clear.. or there was
|
||||
* a clear plus draws that touch no or few samples
|
||||
*/
|
||||
if (avg_samples < 500.0)
|
||||
return true;
|
||||
|
||||
/* Cost-per-sample is an estimate for the average number of reads+
|
||||
* writes for a given passed sample.
|
||||
*/
|
||||
float sample_cost = batch->cost;
|
||||
sample_cost /= batch->num_draws;
|
||||
|
||||
float total_draw_cost = (avg_samples * sample_cost) / batch->num_draws;
|
||||
DBG("%08x:%u\ttotal_samples=%u, avg_samples=%f, sample_cost=%f, total_draw_cost=%f\n",
|
||||
batch->hash, batch->num_draws, total_samples, avg_samples, sample_cost, total_draw_cost);
|
||||
|
||||
if (total_draw_cost < 3000.0)
|
||||
return true;
|
||||
}
|
||||
|
||||
return use_bypass;
|
||||
}
|
||||
|
||||
void
|
||||
fd_autotune_init(struct fd_autotune *at, struct fd_device *dev)
|
||||
{
|
||||
at->ht = _mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals);
|
||||
list_inithead(&at->lru);
|
||||
|
||||
at->results_mem = fd_bo_new(dev, sizeof(struct fd_autotune_results),
|
||||
DRM_FREEDRENO_GEM_TYPE_KMEM, "autotune");
|
||||
at->results = fd_bo_map(at->results_mem);
|
||||
|
||||
list_inithead(&at->pending_results);
|
||||
}
|
||||
|
||||
void
|
||||
fd_autotune_fini(struct fd_autotune *at)
|
||||
{
|
||||
_mesa_hash_table_destroy(at->ht, NULL);
|
||||
fd_bo_del(at->results_mem);
|
||||
}
|
||||
177
src/gallium/drivers/freedreno/freedreno_autotune.h
Normal file
177
src/gallium/drivers/freedreno/freedreno_autotune.h
Normal file
|
|
@ -0,0 +1,177 @@
|
|||
/*
|
||||
* Copyright © 2021 Google, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef FREEDRENO_AUTOTUNE_H
|
||||
#define FREEDRENO_AUTOTUNE_H
|
||||
|
||||
#include "util/hash_table.h"
|
||||
#include "util/list.h"
|
||||
|
||||
#include "freedreno_util.h"
|
||||
|
||||
struct fd_autotune_results;
|
||||
|
||||
/**
|
||||
* "autotune" our decisions about bypass vs GMEM rendering, based on historical
|
||||
* data about a given render target.
|
||||
*
|
||||
* In deciding which path to take there are tradeoffs, including some that
|
||||
* are not reasonably estimateable without having some additional information:
|
||||
*
|
||||
* (1) If you know you are touching every pixel (ie. there is a glClear()),
|
||||
* then the GMEM path will at least not cost more memory bandwidth than
|
||||
* sysmem[1]
|
||||
*
|
||||
* (2) If there is no clear, GMEM could potentially cost *more* bandwidth
|
||||
* due to sysmem->GMEM restore pass.
|
||||
*
|
||||
* (3) If you see a high draw count, that is an indication that there will be
|
||||
* enough pixels accessed multiple times to benefit from the reduced
|
||||
* memory bandwidth that GMEM brings
|
||||
*
|
||||
* (4) But high draw count where there is not much overdraw can actually be
|
||||
* faster in bypass mode if it is pushing a lot of state change, due to
|
||||
* not having to go thru the state changes per-tile[2]
|
||||
*
|
||||
* The approach taken is to measure the samples-passed for the batch to estimate
|
||||
* the amount of overdraw to detect cases where the number of pixels touched is
|
||||
* low.
|
||||
*
|
||||
* Note however, that (at least since a5xx) we have PERF_RB_{Z,C}_{READ,WRITE}
|
||||
* performance countables, which give a more direct measurement of what we want
|
||||
* to know (ie. is framebuffer memory access high enough to prefer GMEM), but
|
||||
* with the downside of consuming half of the available RB counters. With the
|
||||
* additional complication that external perfcntr collection (fdperf, perfetto)
|
||||
* and the drive could be stomping on each other's feet. (Also reading the
|
||||
* perfcntrs accurately requires a WFI.)
|
||||
*
|
||||
* [1] ignoring UBWC
|
||||
* [2] ignoring early-tile-exit optimizations, but any draw that touches all/
|
||||
* most of the tiles late in the tile-pass can defeat that
|
||||
*/
|
||||
struct fd_autotune {
|
||||
|
||||
/**
|
||||
* Cache to map batch->key (also used for batch-cache) to historical
|
||||
* information about rendering to that particular render target.
|
||||
*/
|
||||
struct hash_table *ht;
|
||||
|
||||
/**
|
||||
* List of recently used historical results (to age out old results)
|
||||
*/
|
||||
struct list_head lru;
|
||||
|
||||
/**
|
||||
* GPU buffer used to communicate back results to the CPU
|
||||
*/
|
||||
struct fd_bo *results_mem;
|
||||
struct fd_autotune_results *results;
|
||||
|
||||
/**
|
||||
* List of per-batch results that we are waiting for the GPU to finish
|
||||
* with before reading back the results.
|
||||
*/
|
||||
struct list_head pending_results;
|
||||
|
||||
uint32_t fence_counter;
|
||||
uint32_t idx_counter;
|
||||
};
|
||||
|
||||
/**
|
||||
* The layout of the memory used to read back per-batch results from the
|
||||
* GPU
|
||||
*
|
||||
* Note this struct is intentionally aligned to 4k. And hw requires the
|
||||
* sample start/stop locations to be 128b aligned.
|
||||
*/
|
||||
struct fd_autotune_results {
|
||||
|
||||
/**
|
||||
* The GPU writes back a "fence" seqno value from the cmdstream after
|
||||
* it finishes writing it's result slot, so that the CPU knows when
|
||||
* results are valid
|
||||
*/
|
||||
uint32_t fence;
|
||||
|
||||
uint32_t __pad0;
|
||||
uint64_t __pad1;
|
||||
|
||||
/**
|
||||
* From the cmdstream, the captured samples-passed values are recorded
|
||||
* at the start and end of the batch.
|
||||
*
|
||||
* Note that we do the math on the CPU to avoid a WFI. But pre-emption
|
||||
* may force us to revisit that.
|
||||
*/
|
||||
struct {
|
||||
uint64_t samples_start;
|
||||
uint64_t __pad0;
|
||||
uint64_t samples_end;
|
||||
uint64_t __pad1;
|
||||
} result[127];
|
||||
};
|
||||
|
||||
#define offset(base, ptr) ((uint8_t *)(ptr) - (uint8_t *)(base))
|
||||
#define results_ptr(at, member) \
|
||||
(at)->results_mem, offset((at)->results, &(at)->results->member), 0, 0
|
||||
|
||||
struct fd_batch_history;
|
||||
|
||||
/**
|
||||
* Tracks the results from an individual batch. Initially created per batch,
|
||||
* and appended to the tail of at->pending_results. At a later time, when
|
||||
* the GPU has finished writing the results,
|
||||
*
|
||||
* ralloc parent is the associated fd_batch_history
|
||||
*/
|
||||
struct fd_batch_result {
|
||||
|
||||
/**
|
||||
* The index/slot in fd_autotune_results::result[] to write start/end
|
||||
* counter to
|
||||
*/
|
||||
unsigned idx;
|
||||
|
||||
/**
|
||||
* Fence value to write back to fd_autotune_results::fence after both
|
||||
* start/end values written
|
||||
*/
|
||||
uint32_t fence;
|
||||
|
||||
/*
|
||||
* Below here, only used internally within autotune
|
||||
*/
|
||||
struct fd_batch_history *history;
|
||||
struct list_head node;
|
||||
uint32_t cost;
|
||||
uint64_t samples_passed;
|
||||
};
|
||||
|
||||
void fd_autotune_init(struct fd_autotune *at, struct fd_device *dev);
|
||||
void fd_autotune_fini(struct fd_autotune *at);
|
||||
|
||||
struct fd_batch;
|
||||
bool fd_autotune_use_bypass(struct fd_autotune *at, struct fd_batch *batch) assert_dt;
|
||||
|
||||
#endif /* FREEDRENO_AUTOTUNE_H */
|
||||
|
|
@ -44,6 +44,7 @@
|
|||
|
||||
struct fd_resource;
|
||||
struct fd_batch_key;
|
||||
struct fd_batch_result;
|
||||
|
||||
/* A batch tracks everything about a cmdstream batch/submit, including the
|
||||
* ringbuffers used for binning, draw, and gmem cmds, list of associated
|
||||
|
|
@ -144,6 +145,13 @@ struct fd_batch {
|
|||
*/
|
||||
unsigned cost;
|
||||
|
||||
/* Tells the gen specific backend where to write stats used for
|
||||
* the autotune module.
|
||||
*
|
||||
* Pointer only valid during gmem emit code.
|
||||
*/
|
||||
struct fd_batch_result *autotune_result;
|
||||
|
||||
unsigned num_draws; /* number of draws in current batch */
|
||||
unsigned num_vertices; /* number of vertices in current batch */
|
||||
|
||||
|
|
|
|||
|
|
@ -371,6 +371,8 @@ fd_context_destroy(struct pipe_context *pctx)
|
|||
|
||||
u_trace_context_fini(&ctx->trace_context);
|
||||
|
||||
fd_autotune_fini(&ctx->autotune);
|
||||
|
||||
if (FD_DBG(BSTAT) || FD_DBG(MSGS)) {
|
||||
mesa_logi("batch_total=%u, batch_sysmem=%u, batch_gmem=%u, batch_nondraw=%u, batch_restore=%u\n",
|
||||
(uint32_t)ctx->stats.batch_total, (uint32_t)ctx->stats.batch_sysmem,
|
||||
|
|
@ -644,6 +646,8 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
|
|||
u_trace_context_init(&ctx->trace_context, pctx,
|
||||
fd_trace_record_ts, fd_trace_read_ts);
|
||||
|
||||
fd_autotune_init(&ctx->autotune, screen->dev);
|
||||
|
||||
return pctx;
|
||||
|
||||
fail:
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@
|
|||
#include "util/u_threaded_context.h"
|
||||
#include "util/u_trace.h"
|
||||
|
||||
#include "freedreno_autotune.h"
|
||||
#include "freedreno_screen.h"
|
||||
#include "freedreno_gmem.h"
|
||||
#include "freedreno_util.h"
|
||||
|
|
@ -209,6 +210,8 @@ struct fd_context {
|
|||
struct slab_child_pool transfer_pool dt;
|
||||
struct slab_child_pool transfer_pool_unsync; /* for threaded_context */
|
||||
|
||||
struct fd_autotune autotune dt;
|
||||
|
||||
/**
|
||||
* query related state:
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -680,10 +680,8 @@ fd_gmem_render_tiles(struct fd_batch *batch)
|
|||
}
|
||||
|
||||
if (ctx->emit_sysmem_prep && !batch->nondraw) {
|
||||
if (batch->cleared || batch->gmem_reason ||
|
||||
((batch->num_draws > 5) && !batch->blit) ||
|
||||
(pfb->samples > 1)) {
|
||||
} else if (!FD_DBG(NOBYPASS)) {
|
||||
if (fd_autotune_use_bypass(&ctx->autotune, batch) &&
|
||||
!FD_DBG(NOBYPASS)) {
|
||||
sysmem = true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -19,6 +19,8 @@
|
|||
# SOFTWARE.
|
||||
|
||||
files_libfreedreno = files(
|
||||
'freedreno_autotune.c',
|
||||
'freedreno_autotune.h',
|
||||
'freedreno_batch.c',
|
||||
'freedreno_batch.h',
|
||||
'freedreno_batch_cache.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue