diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 920d4799f5b..fec0f28028b 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -567,3 +567,47 @@ genX(cmd_buffer_rhwo_wa_14024015672)(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.pending_rhwo_optimization_enabled != rhwo_opt_enable) cmd_buffer->state.pending_rhwo_optimization_enabled = rhwo_opt_enable; } + +static inline int +genX(anv_get_btd_dispatch_timeout_counter)(uint32_t dispatch_timeout_counter) +{ + /* This is the timeout after which the bucketed thread dispatcher will + * kick off a wave of threads. It could be tweaked on a per application + * basis (drirc). + */ + uint32_t clamped_timeout_counter = 0; +#if GFX_VERx10 >= 200 + clamped_timeout_counter = CLAMP(dispatch_timeout_counter, 64, 4096); + if (clamped_timeout_counter <= 256) { + clamped_timeout_counter = DIV_ROUND_UP(clamped_timeout_counter, 64) - 1; + } else { + clamped_timeout_counter = util_logbase2(clamped_timeout_counter) - 5; + } +#else + /* Bspec 43851: Field Dispatch Timeout Counter: + * + * Concatenated Dispatch Timeout Counter_high [6:5], Dispatch Timeout Counter_low[1:0] + * + * 0000 : 128 clocks + * 0001 : 256 clocks + * 0010 : 384 clocks + * 0011 : 512 clocks + * 0100 : 640 clocks + * 0101 : 768 clocks + * 0110 : 896 clocks + * 0111 : 1024 clocks + * 0100 : 1152 clocks + * 0101 : 1280 clocks + * 0110 : 1408 clocks + * 0111 : 1536 clocks + * 1100 : 1664 clocks + * 1101 : 1792 clocks + * 1110 : 1920 clocks + * 1111 : 2048 clocks + */ + clamped_timeout_counter = + DIV_ROUND_UP(CLAMP(dispatch_timeout_counter, 128, 2048), 128) - 1; +#endif + + return clamped_timeout_counter; +} diff --git a/src/intel/vulkan/anv_instance.c b/src/intel/vulkan/anv_instance.c index da370863556..79658879da8 100644 --- a/src/intel/vulkan/anv_instance.c +++ b/src/intel/vulkan/anv_instance.c @@ -63,6 +63,27 @@ static const driOptionDescription anv_dri_options[] = { DRI_CONF_ENUM(512, "512 stackids") DRI_CONF_ENUM(1024, "1024 stackids") DRI_CONF_ENUM(2048, "2048 stackids")) + DRI_CONF_OPT_E(dispatch_timeout_counter, 512, 64, 4096, + "Force BTD child dispatches if dispatches do not happen naturally for number of clocks equal to the programmed timeout counter", + DRI_CONF_ENUM(64, "64 clocks") + DRI_CONF_ENUM(128, "128 clocks") + DRI_CONF_ENUM(192, "192 clocks") + DRI_CONF_ENUM(256, "256 clocks") + DRI_CONF_ENUM(384, "384 clocks") + DRI_CONF_ENUM(512, "512 clocks") + DRI_CONF_ENUM(640, "640 clocks") + DRI_CONF_ENUM(768, "768 clocks") + DRI_CONF_ENUM(896, "896 clocks") + DRI_CONF_ENUM(1024, "1024 clocks") + DRI_CONF_ENUM(1152, "1152 clocks") + DRI_CONF_ENUM(1280, "1280 clocks") + DRI_CONF_ENUM(1408, "1408 clocks") + DRI_CONF_ENUM(1536, "1536 clocks") + DRI_CONF_ENUM(1664, "1664 clocks") + DRI_CONF_ENUM(1792, "1792 clocks") + DRI_CONF_ENUM(1920, "1920 clocks") + DRI_CONF_ENUM(2048, "2048 clocks") + DRI_CONF_ENUM(4096, "4096 clocks")) DRI_CONF_ANV_UPPER_BOUND_DESCRIPTOR_POOL_SAMPLER(false) DRI_CONF_SECTION_END @@ -300,6 +321,36 @@ anv_init_dri_options(struct anv_instance *instance) } instance->force_guc_low_latency = driQueryOptionb(&instance->dri_options, "force_guc_low_latency"); + + instance->dispatch_timeout_counter = + driQueryOptioni(&instance->dri_options, "dispatch_timeout_counter"); + switch(instance->dispatch_timeout_counter) { + case 64: + case 128: + case 192: + case 256: + case 384: + case 512: + case 640: + case 768: + case 896: + case 1024: + case 1152: + case 1280: + case 1408: + case 1536: + case 1664: + case 1792: + case 1920: + case 2048: + case 4096: + break; + default: + mesa_logw("Invalid value provided for drirc dispatch_timeout_counter=%u, reverting to 512.", + instance->dispatch_timeout_counter); + instance->dispatch_timeout_counter = 512; + break; + } } VkResult anv_CreateInstance( diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 360b4cbe508..d57715a7f86 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1872,6 +1872,10 @@ struct anv_instance { * Ray tracing configuration. */ unsigned stack_ids; + /** + * 3DSTATE_BTD dispatch timeout counter configuration. + */ + unsigned dispatch_timeout_counter; }; VkResult anv_init_wsi(struct anv_physical_device *physical_device); diff --git a/src/intel/vulkan/genX_cmd_compute.c b/src/intel/vulkan/genX_cmd_compute.c index 0d6397fc5db..e9dd3528c5b 100644 --- a/src/intel/vulkan/genX_cmd_compute.c +++ b/src/intel/vulkan/genX_cmd_compute.c @@ -1306,12 +1306,17 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer, #endif anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BTD), btd) { - /* TODO: This is the timeout after which the bucketed thread dispatcher - * will kick off a wave of threads. We go with the lowest value - * for now. It could be tweaked on a per application basis - * (drirc). - */ - btd.DispatchTimeoutCounter = _64clocks; + uint32_t dispatch_timeout_counter = + cmd_buffer->device->physical->instance->dispatch_timeout_counter; + uint32_t clamped_timeout_counter = + genX(anv_get_btd_dispatch_timeout_counter)(dispatch_timeout_counter); +#if GFX_VERx10 >= 200 + btd.DispatchTimeoutCounter = clamped_timeout_counter; +#else + btd.DispatchTimeoutCounter = clamped_timeout_counter & 0x3; + btd.DispatchTimeoutCounterExtend = (clamped_timeout_counter >> 2) & 0x3; +#endif + /* BSpec 43851: "This field must be programmed to 6h i.e. memory backed * buffer must be 128KB." */ diff --git a/src/intel/vulkan/genX_init_state.c b/src/intel/vulkan/genX_init_state.c index f6ceddcac9f..578452c6c4a 100644 --- a/src/intel/vulkan/genX_init_state.c +++ b/src/intel/vulkan/genX_init_state.c @@ -353,12 +353,17 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch) #if GFX_VERx10 >= 125 if (ANV_SUPPORT_RT && device->info->has_ray_tracing) { anv_batch_emit(batch, GENX(3DSTATE_BTD), btd) { - /* TODO: This is the timeout after which the bucketed thread - * dispatcher will kick off a wave of threads. We go with the - * lowest value for now. It could be tweaked on a per - * application basis (drirc). - */ - btd.DispatchTimeoutCounter = _64clocks; + uint32_t dispatch_timeout_counter = + device->physical->instance->dispatch_timeout_counter; + uint32_t clamped_timeout_counter = + genX(anv_get_btd_dispatch_timeout_counter)(dispatch_timeout_counter); +#if GFX_VERx10 >= 200 + btd.DispatchTimeoutCounter = clamped_timeout_counter; +#else + btd.DispatchTimeoutCounter = clamped_timeout_counter & 0x3; + btd.DispatchTimeoutCounterExtend = (clamped_timeout_counter >> 2) & 0x3; +#endif + /* BSpec 43851: "This field must be programmed to 6h i.e. memory * backed buffer must be 128KB." */