mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-27 19:10:14 +01:00
intel: Add INTEL_DEBUG=no-vrt
Add support for disabling the VRT (Variable Register Thread) feature. The strategy here is to force the old BRW_MAX_GRF limit for the register allocator (locks the upper limit) and make sure ptl_register_blocks() always return that amount of blocks (locks the lower limit). Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35781>
This commit is contained in:
parent
636c37990a
commit
887642b0f2
8 changed files with 24 additions and 12 deletions
|
|
@ -1413,7 +1413,7 @@ iris_init_render_context(struct iris_batch *batch)
|
|||
#if GFX_VER >= 30
|
||||
iris_emit_cmd(batch, GENX(STATE_COMPUTE_MODE), cm) {
|
||||
cm.EnableVariableRegisterSizeAllocationMask = 1;
|
||||
cm.EnableVariableRegisterSizeAllocation = true;
|
||||
cm.EnableVariableRegisterSizeAllocation = !INTEL_DEBUG(DEBUG_NO_VRT);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -1549,7 +1549,7 @@ iris_init_compute_context(struct iris_batch *batch)
|
|||
iris_emit_cmd(batch, GENX(STATE_COMPUTE_MODE), cm) {
|
||||
#if GFX_VER >= 30
|
||||
cm.EnableVariableRegisterSizeAllocationMask = 1;
|
||||
cm.EnableVariableRegisterSizeAllocation = true;
|
||||
cm.EnableVariableRegisterSizeAllocation = !INTEL_DEBUG(DEBUG_NO_VRT);
|
||||
#endif
|
||||
#if GFX_VER >= 20
|
||||
cm.AsyncComputeThreadLimit = pixel_async_compute_thread_limit;
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@
|
|||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "dev/intel_debug.h"
|
||||
|
||||
#include "intel_common.h"
|
||||
|
||||
#include "intel_engine.h"
|
||||
|
|
@ -47,7 +49,7 @@ intel_compute_engine_async_threads_limit(const struct intel_device_info *devinfo
|
|||
uint8_t pixel_async_compute_thread_limit = 2;
|
||||
uint8_t z_pass_async_compute_thread_limit = 0;
|
||||
uint8_t np_z_async_throttle_settings = 0;
|
||||
bool has_vrt = devinfo->verx10 >= 300;
|
||||
bool has_vrt = devinfo->verx10 >= 300 && !INTEL_DEBUG(DEBUG_NO_VRT);
|
||||
|
||||
/* When VRT is enabled async threads limits don't have effect */
|
||||
if (!slm_or_barrier_enabled || has_vrt) {
|
||||
|
|
|
|||
|
|
@ -253,6 +253,7 @@ brw_get_compiler_config_value(const struct brw_compiler *compiler)
|
|||
DEBUG_DO32,
|
||||
DEBUG_SOFT64,
|
||||
DEBUG_NO_SEND_GATHER,
|
||||
DEBUG_NO_VRT,
|
||||
};
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(debug_bits); i++) {
|
||||
insert_u64_bit(&config, INTEL_DEBUG(debug_bits[i]));
|
||||
|
|
@ -385,3 +386,13 @@ brw_stage_prog_data_add_printf(struct brw_stage_prog_data *prog_data,
|
|||
print->arg_sizes, sizeof(print->arg_sizes[0]) *print->num_args);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned
|
||||
ptl_register_blocks(unsigned grf_used)
|
||||
{
|
||||
if (INTEL_DEBUG(DEBUG_NO_VRT))
|
||||
return (BRW_MAX_GRF / 32) - 1;
|
||||
|
||||
const unsigned n = DIV_ROUND_UP(grf_used, 32) - 1;
|
||||
return (n < 6 ? n : 7);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -652,12 +652,7 @@ struct brw_stage_prog_data {
|
|||
* Convert a number of GRF registers used (grf_used in prog_data) into
|
||||
* a number of GRF register blocks supported by the hardware on PTL+.
|
||||
*/
|
||||
static inline unsigned
|
||||
ptl_register_blocks(unsigned grf_used)
|
||||
{
|
||||
const unsigned n = DIV_ROUND_UP(grf_used, 32) - 1;
|
||||
return (n < 6 ? n : 7);
|
||||
}
|
||||
unsigned ptl_register_blocks(unsigned grf_used);
|
||||
|
||||
static inline uint32_t *
|
||||
brw_stage_prog_data_add_params(struct brw_stage_prog_data *prog_data,
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@
|
|||
#include "brw_shader.h"
|
||||
#include "brw_builder.h"
|
||||
#include "brw_cfg.h"
|
||||
#include "dev/intel_debug.h"
|
||||
#include "util/set.h"
|
||||
#include "util/register_allocate.h"
|
||||
|
||||
|
|
@ -80,7 +81,8 @@ extern "C" void
|
|||
brw_alloc_reg_sets(struct brw_compiler *compiler)
|
||||
{
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
int base_reg_count = (devinfo->ver >= 30 ? XE3_MAX_GRF / reg_unit(devinfo) :
|
||||
int base_reg_count = (devinfo->ver >= 30 && !INTEL_DEBUG(DEBUG_NO_VRT) ?
|
||||
XE3_MAX_GRF / reg_unit(devinfo) :
|
||||
BRW_MAX_GRF);
|
||||
|
||||
/* The registers used to make up almost all values handled in the compiler
|
||||
|
|
|
|||
|
|
@ -121,6 +121,7 @@ static const struct debug_control_bitset debug_control[] = {
|
|||
OPT1("shader-print", DEBUG_SHADER_PRINT),
|
||||
OPT1("cl-quiet", DEBUG_CL_QUIET),
|
||||
OPT1("no-send-gather", DEBUG_NO_SEND_GATHER),
|
||||
OPT1("no-vrt", DEBUG_NO_VRT),
|
||||
OPT1("shaders-lineno", DEBUG_SHADERS_LINENO),
|
||||
OPT1("show_shader_stage", DEBUG_SHOW_SHADER_STAGE),
|
||||
{ NULL, }
|
||||
|
|
|
|||
|
|
@ -92,6 +92,7 @@ enum intel_debug_flag {
|
|||
DEBUG_BVH_TLAS_IR_AS,
|
||||
DEBUG_BVH_NO_BUILD,
|
||||
DEBUG_NO_SEND_GATHER,
|
||||
DEBUG_NO_VRT,
|
||||
DEBUG_RT_NO_TRACE,
|
||||
DEBUG_SHADERS_LINENO,
|
||||
DEBUG_SHOW_SHADER_STAGE,
|
||||
|
|
|
|||
|
|
@ -636,7 +636,7 @@ init_render_queue_state(struct anv_queue *queue, bool is_companion_rcs_batch)
|
|||
#if GFX_VERx10 >= 125
|
||||
anv_batch_emit(batch, GENX(STATE_COMPUTE_MODE), cm) {
|
||||
#if GFX_VER >= 30
|
||||
cm.EnableVariableRegisterSizeAllocation = true;
|
||||
cm.EnableVariableRegisterSizeAllocation = !INTEL_DEBUG(DEBUG_NO_VRT);
|
||||
#endif
|
||||
cm.Mask1 = 0xffff;
|
||||
#if GFX_VERx10 >= 200
|
||||
|
|
@ -774,7 +774,7 @@ init_compute_queue_state(struct anv_queue *queue)
|
|||
anv_batch_emit(batch, GENX(STATE_COMPUTE_MODE), cm) {
|
||||
#if GFX_VER >= 30
|
||||
cm.EnableVariableRegisterSizeAllocationMask = 1;
|
||||
cm.EnableVariableRegisterSizeAllocation = true;
|
||||
cm.EnableVariableRegisterSizeAllocation = !INTEL_DEBUG(DEBUG_NO_VRT);
|
||||
#endif
|
||||
#if GFX_VER >= 20
|
||||
cm.AsyncComputeThreadLimit = ACTL_Max8;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue