mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 09:18:04 +02:00
anv: Update values for DispatchTimeoutCounter
BTD unit will keep accumulating the threads and then eventually dispatch those active threads once it reaches the counter. I guess dispatching too fast will not have full occupancy at the BTD unit, instead we just pick the half of max value for counter. This patch also add drirc option to dispatch_timeout_counter and tweak values internally with respect to HW limits. Default value we have right now is 512 clocks, we can for sure tune it per app. Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40733>
This commit is contained in:
parent
8a990b5a1c
commit
f36b6c8f13
5 changed files with 121 additions and 12 deletions
|
|
@ -567,3 +567,47 @@ genX(cmd_buffer_rhwo_wa_14024015672)(struct anv_cmd_buffer *cmd_buffer,
|
|||
cmd_buffer->state.pending_rhwo_optimization_enabled != rhwo_opt_enable)
|
||||
cmd_buffer->state.pending_rhwo_optimization_enabled = rhwo_opt_enable;
|
||||
}
|
||||
|
||||
static inline int
|
||||
genX(anv_get_btd_dispatch_timeout_counter)(uint32_t dispatch_timeout_counter)
|
||||
{
|
||||
/* This is the timeout after which the bucketed thread dispatcher will
|
||||
* kick off a wave of threads. It could be tweaked on a per application
|
||||
* basis (drirc).
|
||||
*/
|
||||
uint32_t clamped_timeout_counter = 0;
|
||||
#if GFX_VERx10 >= 200
|
||||
clamped_timeout_counter = CLAMP(dispatch_timeout_counter, 64, 4096);
|
||||
if (clamped_timeout_counter <= 256) {
|
||||
clamped_timeout_counter = DIV_ROUND_UP(clamped_timeout_counter, 64) - 1;
|
||||
} else {
|
||||
clamped_timeout_counter = util_logbase2(clamped_timeout_counter) - 5;
|
||||
}
|
||||
#else
|
||||
/* Bspec 43851: Field Dispatch Timeout Counter:
|
||||
*
|
||||
* Concatenated Dispatch Timeout Counter_high [6:5], Dispatch Timeout Counter_low[1:0]
|
||||
*
|
||||
* 0000 : 128 clocks
|
||||
* 0001 : 256 clocks
|
||||
* 0010 : 384 clocks
|
||||
* 0011 : 512 clocks
|
||||
* 0100 : 640 clocks
|
||||
* 0101 : 768 clocks
|
||||
* 0110 : 896 clocks
|
||||
* 0111 : 1024 clocks
|
||||
* 0100 : 1152 clocks
|
||||
* 0101 : 1280 clocks
|
||||
* 0110 : 1408 clocks
|
||||
* 0111 : 1536 clocks
|
||||
* 1100 : 1664 clocks
|
||||
* 1101 : 1792 clocks
|
||||
* 1110 : 1920 clocks
|
||||
* 1111 : 2048 clocks
|
||||
*/
|
||||
clamped_timeout_counter =
|
||||
DIV_ROUND_UP(CLAMP(dispatch_timeout_counter, 128, 2048), 128) - 1;
|
||||
#endif
|
||||
|
||||
return clamped_timeout_counter;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,6 +63,27 @@ static const driOptionDescription anv_dri_options[] = {
|
|||
DRI_CONF_ENUM(512, "512 stackids")
|
||||
DRI_CONF_ENUM(1024, "1024 stackids")
|
||||
DRI_CONF_ENUM(2048, "2048 stackids"))
|
||||
DRI_CONF_OPT_E(dispatch_timeout_counter, 512, 64, 4096,
|
||||
"Force BTD child dispatches if dispatches do not happen naturally for number of clocks equal to the programmed timeout counter",
|
||||
DRI_CONF_ENUM(64, "64 clocks")
|
||||
DRI_CONF_ENUM(128, "128 clocks")
|
||||
DRI_CONF_ENUM(192, "192 clocks")
|
||||
DRI_CONF_ENUM(256, "256 clocks")
|
||||
DRI_CONF_ENUM(384, "384 clocks")
|
||||
DRI_CONF_ENUM(512, "512 clocks")
|
||||
DRI_CONF_ENUM(640, "640 clocks")
|
||||
DRI_CONF_ENUM(768, "768 clocks")
|
||||
DRI_CONF_ENUM(896, "896 clocks")
|
||||
DRI_CONF_ENUM(1024, "1024 clocks")
|
||||
DRI_CONF_ENUM(1152, "1152 clocks")
|
||||
DRI_CONF_ENUM(1280, "1280 clocks")
|
||||
DRI_CONF_ENUM(1408, "1408 clocks")
|
||||
DRI_CONF_ENUM(1536, "1536 clocks")
|
||||
DRI_CONF_ENUM(1664, "1664 clocks")
|
||||
DRI_CONF_ENUM(1792, "1792 clocks")
|
||||
DRI_CONF_ENUM(1920, "1920 clocks")
|
||||
DRI_CONF_ENUM(2048, "2048 clocks")
|
||||
DRI_CONF_ENUM(4096, "4096 clocks"))
|
||||
DRI_CONF_ANV_UPPER_BOUND_DESCRIPTOR_POOL_SAMPLER(false)
|
||||
DRI_CONF_SECTION_END
|
||||
|
||||
|
|
@ -300,6 +321,36 @@ anv_init_dri_options(struct anv_instance *instance)
|
|||
}
|
||||
instance->force_guc_low_latency =
|
||||
driQueryOptionb(&instance->dri_options, "force_guc_low_latency");
|
||||
|
||||
instance->dispatch_timeout_counter =
|
||||
driQueryOptioni(&instance->dri_options, "dispatch_timeout_counter");
|
||||
switch(instance->dispatch_timeout_counter) {
|
||||
case 64:
|
||||
case 128:
|
||||
case 192:
|
||||
case 256:
|
||||
case 384:
|
||||
case 512:
|
||||
case 640:
|
||||
case 768:
|
||||
case 896:
|
||||
case 1024:
|
||||
case 1152:
|
||||
case 1280:
|
||||
case 1408:
|
||||
case 1536:
|
||||
case 1664:
|
||||
case 1792:
|
||||
case 1920:
|
||||
case 2048:
|
||||
case 4096:
|
||||
break;
|
||||
default:
|
||||
mesa_logw("Invalid value provided for drirc dispatch_timeout_counter=%u, reverting to 512.",
|
||||
instance->dispatch_timeout_counter);
|
||||
instance->dispatch_timeout_counter = 512;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
VkResult anv_CreateInstance(
|
||||
|
|
|
|||
|
|
@ -1872,6 +1872,10 @@ struct anv_instance {
|
|||
* Ray tracing configuration.
|
||||
*/
|
||||
unsigned stack_ids;
|
||||
/**
|
||||
* 3DSTATE_BTD dispatch timeout counter configuration.
|
||||
*/
|
||||
unsigned dispatch_timeout_counter;
|
||||
};
|
||||
|
||||
VkResult anv_init_wsi(struct anv_physical_device *physical_device);
|
||||
|
|
|
|||
|
|
@ -1306,12 +1306,17 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
|
|||
#endif
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BTD), btd) {
|
||||
/* TODO: This is the timeout after which the bucketed thread dispatcher
|
||||
* will kick off a wave of threads. We go with the lowest value
|
||||
* for now. It could be tweaked on a per application basis
|
||||
* (drirc).
|
||||
*/
|
||||
btd.DispatchTimeoutCounter = _64clocks;
|
||||
uint32_t dispatch_timeout_counter =
|
||||
cmd_buffer->device->physical->instance->dispatch_timeout_counter;
|
||||
uint32_t clamped_timeout_counter =
|
||||
genX(anv_get_btd_dispatch_timeout_counter)(dispatch_timeout_counter);
|
||||
#if GFX_VERx10 >= 200
|
||||
btd.DispatchTimeoutCounter = clamped_timeout_counter;
|
||||
#else
|
||||
btd.DispatchTimeoutCounter = clamped_timeout_counter & 0x3;
|
||||
btd.DispatchTimeoutCounterExtend = (clamped_timeout_counter >> 2) & 0x3;
|
||||
#endif
|
||||
|
||||
/* BSpec 43851: "This field must be programmed to 6h i.e. memory backed
|
||||
* buffer must be 128KB."
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -353,12 +353,17 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch)
|
|||
#if GFX_VERx10 >= 125
|
||||
if (ANV_SUPPORT_RT && device->info->has_ray_tracing) {
|
||||
anv_batch_emit(batch, GENX(3DSTATE_BTD), btd) {
|
||||
/* TODO: This is the timeout after which the bucketed thread
|
||||
* dispatcher will kick off a wave of threads. We go with the
|
||||
* lowest value for now. It could be tweaked on a per
|
||||
* application basis (drirc).
|
||||
*/
|
||||
btd.DispatchTimeoutCounter = _64clocks;
|
||||
uint32_t dispatch_timeout_counter =
|
||||
device->physical->instance->dispatch_timeout_counter;
|
||||
uint32_t clamped_timeout_counter =
|
||||
genX(anv_get_btd_dispatch_timeout_counter)(dispatch_timeout_counter);
|
||||
#if GFX_VERx10 >= 200
|
||||
btd.DispatchTimeoutCounter = clamped_timeout_counter;
|
||||
#else
|
||||
btd.DispatchTimeoutCounter = clamped_timeout_counter & 0x3;
|
||||
btd.DispatchTimeoutCounterExtend = (clamped_timeout_counter >> 2) & 0x3;
|
||||
#endif
|
||||
|
||||
/* BSpec 43851: "This field must be programmed to 6h i.e. memory
|
||||
* backed buffer must be 128KB."
|
||||
*/
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue