swr: knob overrides for Intel Xeon Phi

Architecture benefits from having more threads/work outstanding.

Patch by Jan Zielinski.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
Tim Rowley 2017-10-17 15:11:19 -05:00
parent 028ffa5e18
commit bfda35c8dd
5 changed files with 37 additions and 1 deletions

View file

@ -39,6 +39,7 @@
#include "api.h"
#include "backend.h"
#include "knobs.h"
static struct pipe_surface *
swr_create_surface(struct pipe_context *pipe,
@ -483,6 +484,8 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
ctx->blendJIT =
new std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC>;
ctx->max_draws_in_flight = KNOB_MAX_DRAWS_IN_FLIGHT;
SWR_CREATECONTEXT_INFO createInfo;
memset(&createInfo, 0, sizeof(createInfo));
createInfo.privateStateSize = sizeof(swr_draw_context);
@ -491,6 +494,30 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
createInfo.pfnClearTile = swr_StoreHotTileClear;
createInfo.pfnUpdateStats = swr_UpdateStats;
createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE;
SWR_THREADING_INFO threadingInfo {0};
threadingInfo.MAX_WORKER_THREADS = KNOB_MAX_WORKER_THREADS;
threadingInfo.MAX_NUMA_NODES = KNOB_MAX_NUMA_NODES;
threadingInfo.MAX_CORES_PER_NUMA_NODE = KNOB_MAX_CORES_PER_NUMA_NODE;
threadingInfo.MAX_THREADS_PER_CORE = KNOB_MAX_THREADS_PER_CORE;
threadingInfo.SINGLE_THREADED = KNOB_SINGLE_THREADED;
// Use non-standard settings for KNL
if (swr_screen(p_screen)->is_knl)
{
if (nullptr == getenv("KNOB_MAX_THREADS_PER_CORE"))
threadingInfo.MAX_THREADS_PER_CORE = 2;
if (nullptr == getenv("KNOB_MAX_DRAWS_IN_FLIGHT"))
{
ctx->max_draws_in_flight = 2048;
createInfo.MAX_DRAWS_IN_FLIGHT = ctx->max_draws_in_flight;
}
}
createInfo.pThreadInfo = &threadingInfo;
ctx->swrContext = ctx->api.pfnSwrCreateContext(&createInfo);
ctx->api.pfnSwrInit();

View file

@ -173,6 +173,8 @@ struct swr_context {
unsigned dirty; /**< Mask of SWR_NEW_x flags */
SWR_INTERFACE api;
uint32_t max_draws_in_flight;
};
static INLINE struct swr_context *

View file

@ -38,11 +38,14 @@ swr_create_screen(struct sw_winsys *winsys)
util_cpu_detect();
bool is_knl = false;
if (!strlen(filename) &&
util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512er) {
#if HAVE_SWR_KNL
fprintf(stderr, "KNL ");
sprintf(filename, "%s%s%s", UTIL_DL_PREFIX, "swrKNL", UTIL_DL_EXT);
is_knl = true;
#else
fprintf(stderr, "KNL (not built) ");
#endif
@ -99,6 +102,7 @@ swr_create_screen(struct sw_winsys *winsys)
struct pipe_screen *screen = swr_create_screen_internal(winsys);
swr_screen(screen)->pfnSwrGetInterface = (PFNSwrGetInterface)pApiProc;
swr_screen(screen)->is_knl = is_knl;
return screen;
}

View file

@ -45,7 +45,7 @@ swr_copy_to_scratch_space(struct swr_context *ctx,
ptr = ctx->api.pfnSwrAllocDrawContextMemory(ctx->swrContext, size, 4);
} else {
/* Allocate enough so that MAX_DRAWS_IN_FLIGHT sets fit. */
unsigned int max_size_in_flight = size * KNOB_MAX_DRAWS_IN_FLIGHT;
uint32_t max_size_in_flight = size * ctx->max_draws_in_flight;
/* Need to grow space */
if (max_size_in_flight > space->current_size) {

View file

@ -54,6 +54,9 @@ struct swr_screen {
#endif
PFNSwrGetInterface pfnSwrGetInterface;
/* Do we run on Xeon Phi? */
bool is_knl;
};
static INLINE struct swr_screen *