mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 04:48:08 +02:00
tu: Disable concurrent binning by default due to perf regressions
Unfortunately we have to disable concurrent binning by default because it hurts performance in a number of desktop games without any case where we know it helps. There are less vertex fetch resource available in BV compared to BR, so when binning runs in BV, there are many vertices, and vertices are attribute heavy - BV has much worse performance than BR, sometimes more than 50% worse. Even with worse performance it won't be bad if concurrent binning actually overlapped with other workload in those cases, but in case of desktop games - there is almost never a chance for overlap. However it's impossible to statically find out if binning on BV would be much slower than on BR, and we also cannot statically predict if there is enough overlap (if any) to cover for the performance penalty. Given the above, I don't see a way out but to make concurrent binning opt in via `tu_allow_concurrent_binning` driconf toggle. Still allow concurrent binning in CI to catch issues early. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
This commit is contained in:
parent
261876984a
commit
6e080e4adf
6 changed files with 26 additions and 4 deletions
|
|
@ -31,6 +31,18 @@ tests_per_group = 10000
|
|||
[deqp.env]
|
||||
TU_DEBUG = "gmem,unaligned_store"
|
||||
|
||||
# force-gmem with concurrent binning allowed to test concurrent binning
|
||||
[[deqp]]
|
||||
deqp = "/deqp-vk/external/vulkancts/modules/vulkan/deqp-vk"
|
||||
caselists = ["/deqp-vk/mustpass/vk-main.txt"]
|
||||
include = ["dEQP-VK.renderpass2.*"]
|
||||
prefix = "gmem-cb-"
|
||||
fraction = 20
|
||||
tests_per_group = 10000
|
||||
[deqp.env]
|
||||
TU_DEBUG = "gmem"
|
||||
tu_allow_concurrent_binning = "true"
|
||||
|
||||
# force-sysmem testing
|
||||
[[deqp]]
|
||||
deqp = "/deqp-vk/external/vulkancts/modules/vulkan/deqp-vk"
|
||||
|
|
|
|||
|
|
@ -489,7 +489,7 @@ tu_emit_cache_flush(struct tu_cmd_buffer *cmd_buffer)
|
|||
|
||||
if ((flushes & TU_CMD_FLAG_WAIT_FOR_BR) && CHIP >= A7XX &&
|
||||
!(cmd_buffer->state.pass && cmd_buffer->state.renderpass_cb_disabled) &&
|
||||
!TU_DEBUG(NO_CONCURRENT_BINNING)) {
|
||||
cmd_buffer->device->instance->allow_concurrent_binning) {
|
||||
trace_start_concurrent_binning_barrier(&cmd_buffer->trace, cs, cmd_buffer);
|
||||
|
||||
/* Wait-for-BR when repeated a lot of times per frame can add up
|
||||
|
|
@ -3109,8 +3109,8 @@ tu7_emit_concurrent_binning_start(struct tu_cmd_buffer *cmd,
|
|||
tu7_cb_disable_reason(
|
||||
(!cmd->state.lrz.fast_clear && cmd->state.lrz.image_view), cmd,
|
||||
"LRZ fast clear disabled") ||
|
||||
tu7_cb_disable_reason(TU_DEBUG(NO_CONCURRENT_BINNING), cmd,
|
||||
"TU_DEBUG(NO_CONCURRENT_BINNING)")) {
|
||||
tu7_cb_disable_reason(!cmd->device->instance->allow_concurrent_binning, cmd,
|
||||
"globally disabled")) {
|
||||
tu_cs_emit_pkt7(cs, CP_THREAD_CONTROL, 1);
|
||||
tu_cs_emit(cs, CP_THREAD_CONTROL_0_THREAD(CP_SET_THREAD_BR) |
|
||||
CP_THREAD_CONTROL_0_CONCURRENT_BIN_DISABLE);
|
||||
|
|
|
|||
|
|
@ -1830,6 +1830,7 @@ static const driOptionDescription tu_dri_options[] = {
|
|||
DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
|
||||
DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
|
||||
DRI_CONF_VK_XWAYLAND_WAIT_READY(false)
|
||||
DRI_CONF_TU_ALLOW_CONCURRENT_BINNING(false)
|
||||
DRI_CONF_SECTION_END
|
||||
|
||||
DRI_CONF_SECTION_DEBUG
|
||||
|
|
@ -1884,6 +1885,9 @@ tu_init_dri_options(struct tu_instance *instance)
|
|||
driQueryOptionb(&instance->dri_options, "tu_emulate_alpha_to_coverage");
|
||||
instance->autotune_algo =
|
||||
driQueryOptionstr(&instance->dri_options, "tu_autotune_algorithm");
|
||||
instance->allow_concurrent_binning =
|
||||
(driQueryOptionb(&instance->dri_options, "tu_allow_concurrent_binning") && !TU_DEBUG(NO_CONCURRENT_BINNING)) ||
|
||||
TU_DEBUG(FORCE_CONCURRENT_BINNING);
|
||||
}
|
||||
|
||||
static uint32_t instance_count = 0;
|
||||
|
|
|
|||
|
|
@ -240,6 +240,8 @@ struct tu_instance
|
|||
|
||||
/* Configuration option to use a specific autotune algorithm by default. */
|
||||
const char *autotune_algo;
|
||||
|
||||
bool allow_concurrent_binning;
|
||||
};
|
||||
VK_DEFINE_HANDLE_CASTS(tu_instance, vk.base, VkInstance,
|
||||
VK_OBJECT_TYPE_INSTANCE)
|
||||
|
|
|
|||
|
|
@ -182,7 +182,7 @@ resolve_vis_stream_patchpoints(struct tu_queue *queue,
|
|||
* streams and therefore should be avoided.
|
||||
*/
|
||||
uint32_t min_vis_stream_count =
|
||||
(TU_DEBUG(NO_CONCURRENT_BINNING) || dev->physical_device->info->chip < 7) ?
|
||||
(!dev->instance->allow_concurrent_binning || dev->physical_device->info->chip < 7) ?
|
||||
1 : MIN2(MAX2(rp_count, 1), TU_MAX_VIS_STREAMS);
|
||||
uint32_t vis_stream_count;
|
||||
|
||||
|
|
|
|||
|
|
@ -668,6 +668,10 @@
|
|||
DRI_CONF_OPT_S_NODEF(tu_autotune_algorithm, \
|
||||
"Set the preferred autotune algorithm")
|
||||
|
||||
#define DRI_CONF_TU_ALLOW_CONCURRENT_BINNING(def) \
|
||||
DRI_CONF_OPT_B(tu_allow_concurrent_binning, def, \
|
||||
"Allow concurrent binning on A7XX+, the CB is disabled by default because it regresses performance on desktop games")
|
||||
|
||||
/**
|
||||
* \brief Honeykrisp specific configuration options
|
||||
*/
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue