diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 311d114537c..6a6c098c6eb 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -1413,7 +1413,7 @@ iris_init_render_context(struct iris_batch *batch) #if GFX_VER >= 30 iris_emit_cmd(batch, GENX(STATE_COMPUTE_MODE), cm) { cm.EnableVariableRegisterSizeAllocationMask = 1; - cm.EnableVariableRegisterSizeAllocation = true; + cm.EnableVariableRegisterSizeAllocation = !INTEL_DEBUG(DEBUG_NO_VRT); } #endif @@ -1549,7 +1549,7 @@ iris_init_compute_context(struct iris_batch *batch) iris_emit_cmd(batch, GENX(STATE_COMPUTE_MODE), cm) { #if GFX_VER >= 30 cm.EnableVariableRegisterSizeAllocationMask = 1; - cm.EnableVariableRegisterSizeAllocation = true; + cm.EnableVariableRegisterSizeAllocation = !INTEL_DEBUG(DEBUG_NO_VRT); #endif #if GFX_VER >= 20 cm.AsyncComputeThreadLimit = pixel_async_compute_thread_limit; diff --git a/src/intel/common/intel_common.c b/src/intel/common/intel_common.c index 8c3053dc009..d9ebaa36784 100644 --- a/src/intel/common/intel_common.c +++ b/src/intel/common/intel_common.c @@ -5,6 +5,8 @@ #include +#include "dev/intel_debug.h" + #include "intel_common.h" #include "intel_engine.h" @@ -47,7 +49,7 @@ intel_compute_engine_async_threads_limit(const struct intel_device_info *devinfo uint8_t pixel_async_compute_thread_limit = 2; uint8_t z_pass_async_compute_thread_limit = 0; uint8_t np_z_async_throttle_settings = 0; - bool has_vrt = devinfo->verx10 >= 300; + bool has_vrt = devinfo->verx10 >= 300 && !INTEL_DEBUG(DEBUG_NO_VRT); /* When VRT is enabled async threads limits don't have effect */ if (!slm_or_barrier_enabled || has_vrt) { diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index 0a5afe83c2b..a534f4d5a5c 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -253,6 +253,7 @@ brw_get_compiler_config_value(const struct brw_compiler *compiler) DEBUG_DO32, DEBUG_SOFT64, DEBUG_NO_SEND_GATHER, + DEBUG_NO_VRT, }; for (uint32_t i = 0; i < ARRAY_SIZE(debug_bits); i++) { insert_u64_bit(&config, INTEL_DEBUG(debug_bits[i])); @@ -385,3 +386,13 @@ brw_stage_prog_data_add_printf(struct brw_stage_prog_data *prog_data, print->arg_sizes, sizeof(print->arg_sizes[0]) *print->num_args); } } + +unsigned +ptl_register_blocks(unsigned grf_used) +{ + if (INTEL_DEBUG(DEBUG_NO_VRT)) + return (BRW_MAX_GRF / 32) - 1; + + const unsigned n = DIV_ROUND_UP(grf_used, 32) - 1; + return (n < 6 ? n : 7); +} diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index a8e6e6a1ecb..eec83d7faca 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -652,12 +652,7 @@ struct brw_stage_prog_data { * Convert a number of GRF registers used (grf_used in prog_data) into * a number of GRF register blocks supported by the hardware on PTL+. */ -static inline unsigned -ptl_register_blocks(unsigned grf_used) -{ - const unsigned n = DIV_ROUND_UP(grf_used, 32) - 1; - return (n < 6 ? n : 7); -} +unsigned ptl_register_blocks(unsigned grf_used); static inline uint32_t * brw_stage_prog_data_add_params(struct brw_stage_prog_data *prog_data, diff --git a/src/intel/compiler/brw_reg_allocate.cpp b/src/intel/compiler/brw_reg_allocate.cpp index 81a51369b96..fc326489a28 100644 --- a/src/intel/compiler/brw_reg_allocate.cpp +++ b/src/intel/compiler/brw_reg_allocate.cpp @@ -29,6 +29,7 @@ #include "brw_shader.h" #include "brw_builder.h" #include "brw_cfg.h" +#include "dev/intel_debug.h" #include "util/set.h" #include "util/register_allocate.h" @@ -80,7 +81,8 @@ extern "C" void brw_alloc_reg_sets(struct brw_compiler *compiler) { const struct intel_device_info *devinfo = compiler->devinfo; - int base_reg_count = (devinfo->ver >= 30 ? XE3_MAX_GRF / reg_unit(devinfo) : + int base_reg_count = (devinfo->ver >= 30 && !INTEL_DEBUG(DEBUG_NO_VRT) ? + XE3_MAX_GRF / reg_unit(devinfo) : BRW_MAX_GRF); /* The registers used to make up almost all values handled in the compiler diff --git a/src/intel/dev/intel_debug.c b/src/intel/dev/intel_debug.c index c132b8f03b1..f9a50b9b451 100644 --- a/src/intel/dev/intel_debug.c +++ b/src/intel/dev/intel_debug.c @@ -121,6 +121,7 @@ static const struct debug_control_bitset debug_control[] = { OPT1("shader-print", DEBUG_SHADER_PRINT), OPT1("cl-quiet", DEBUG_CL_QUIET), OPT1("no-send-gather", DEBUG_NO_SEND_GATHER), + OPT1("no-vrt", DEBUG_NO_VRT), OPT1("shaders-lineno", DEBUG_SHADERS_LINENO), OPT1("show_shader_stage", DEBUG_SHOW_SHADER_STAGE), { NULL, } diff --git a/src/intel/dev/intel_debug.h b/src/intel/dev/intel_debug.h index 1635ba740aa..3d2b6133ba5 100644 --- a/src/intel/dev/intel_debug.h +++ b/src/intel/dev/intel_debug.h @@ -92,6 +92,7 @@ enum intel_debug_flag { DEBUG_BVH_TLAS_IR_AS, DEBUG_BVH_NO_BUILD, DEBUG_NO_SEND_GATHER, + DEBUG_NO_VRT, DEBUG_RT_NO_TRACE, DEBUG_SHADERS_LINENO, DEBUG_SHOW_SHADER_STAGE, diff --git a/src/intel/vulkan/genX_init_state.c b/src/intel/vulkan/genX_init_state.c index 331fb6ba070..6075568991b 100644 --- a/src/intel/vulkan/genX_init_state.c +++ b/src/intel/vulkan/genX_init_state.c @@ -636,7 +636,7 @@ init_render_queue_state(struct anv_queue *queue, bool is_companion_rcs_batch) #if GFX_VERx10 >= 125 anv_batch_emit(batch, GENX(STATE_COMPUTE_MODE), cm) { #if GFX_VER >= 30 - cm.EnableVariableRegisterSizeAllocation = true; + cm.EnableVariableRegisterSizeAllocation = !INTEL_DEBUG(DEBUG_NO_VRT); #endif cm.Mask1 = 0xffff; #if GFX_VERx10 >= 200 @@ -774,7 +774,7 @@ init_compute_queue_state(struct anv_queue *queue) anv_batch_emit(batch, GENX(STATE_COMPUTE_MODE), cm) { #if GFX_VER >= 30 cm.EnableVariableRegisterSizeAllocationMask = 1; - cm.EnableVariableRegisterSizeAllocation = true; + cm.EnableVariableRegisterSizeAllocation = !INTEL_DEBUG(DEBUG_NO_VRT); #endif #if GFX_VER >= 20 cm.AsyncComputeThreadLimit = ACTL_Max8;