mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-04 19:10:35 +02:00
When the register allocator decides to spill a value, all writes to that value are spilled and all reads are filled. In regions where there is not high register pressure, a spill of a value may be followed by a fill of that same file while the spilled register is still live. This optimization pass finds these cases, and it converts the fill to a move from the still-live register. The restriction that the spill and the fill must have matching NoMask really hampers this optimization. With the restriction removed, the pass was more than 2x helpful. v2: Require force_writemask_all to be the same for the spill and the fill. v3: Use FIXED_GRF for register overlap tests. Since this is after register allocation, the VGRF values will not tell the whole truth. v4: Use brw_transform_inst. Suggested by Caio. The allows two of the loops to be merged. Add brw_scratch_inst::offset instead of storing it as a source. Suggested by Lionel. v5: Add no-fill-opt debug option to disable optimizations. Suggested by Lionel. v6: Move a calculation outside a loop. Suggested by Lionel. v7: Check that spill ranges overlap instead of just checking initial offset. Zero shaders in fossil-db were affected, but some CTS with spill_fs were fixed (e.g., dEQP-VK.subgroups.arithmetic.compute.subgroupmin_uint64_t_requiredsubgroupsize). Suggested by Lionel. v8: Add DEBUG_NO_FILL_OPT to debug_bits in brw_get_compiler_config_value(). Noticed by Lionel. shader-db: Lunar Lake total instructions in shared programs: 17249907 -> 17249903 (<.01%) instructions in affected programs: 10684 -> 10680 (-0.04%) helped: 2 / HURT: 0 total cycles in shared programs: 893092630 -> 893092398 (<.01%) cycles in affected programs: 237320 -> 237088 (-0.10%) helped: 2 / HURT: 0 total fills in shared programs: 1903 -> 1901 (-0.11%) fills in affected programs: 110 -> 108 (-1.82%) helped: 2 / HURT: 0 Meteor Lake and DG2 had similar results. (Meteor Lake shown) total instructions in shared programs: 19968898 -> 19968778 (<.01%) instructions in affected programs: 33020 -> 32900 (-0.36%) helped: 10 / HURT: 0 total cycles in shared programs: 885157211 -> 884925015 (-0.03%) cycles in affected programs: 39944544 -> 39712348 (-0.58%) helped: 8 / HURT: 2 total fills in shared programs: 4454 -> 4394 (-1.35%) fills in affected programs: 2678 -> 2618 (-2.24%) helped: 10 / HURT: 0 fossil-db: Lunar Lake Totals: Instrs: 930445228 -> 929949528 (-0.05%) Cycle count: 105195579417 -> 105126671329 (-0.07%); split: -0.07%, +0.00% Spill count: 3495279 -> 3494400 (-0.03%) Fill count: 6767063 -> 6520785 (-3.64%) Totals from 43844 (2.17% of 2018922) affected shaders: Instrs: 212614840 -> 212119140 (-0.23%) Cycle count: 19151130510 -> 19082222422 (-0.36%); split: -0.39%, +0.03% Spill count: 2831100 -> 2830221 (-0.03%) Fill count: 6128316 -> 5882038 (-4.02%) Meteor Lake and DG2 had similar results. (Meteor Lake shown) Totals: Instrs: 1001375893 -> 1001113407 (-0.03%) Cycle count: 92746180943 -> 92679877883 (-0.07%); split: -0.08%, +0.01% Spill count: 3729157 -> 3728585 (-0.02%) Fill count: 6697296 -> 6566874 (-1.95%) Totals from 35062 (1.53% of 2284674) affected shaders: Instrs: 179819265 -> 179556779 (-0.15%) Cycle count: 18111194752 -> 18044891692 (-0.37%); split: -0.41%, +0.04% Spill count: 2453752 -> 2453180 (-0.02%) Fill count: 5279259 -> 5148837 (-2.47%) Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37827>
312 lines
11 KiB
C
312 lines
11 KiB
C
/*
|
|
* Copyright 2003 VMware, Inc.
|
|
* Copyright © 2006 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
/**
|
|
* \file intel_debug.c
|
|
*
|
|
* Support for the INTEL_DEBUG environment variable, along with other
|
|
* miscellaneous debugging code.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "dev/intel_debug.h"
|
|
#include "util/macros.h"
|
|
#include "util/u_debug.h"
|
|
#include "util/u_math.h"
|
|
#include "c11/threads.h"
|
|
|
|
BITSET_WORD intel_debug[BITSET_WORDS(INTEL_DEBUG_MAX)] = {0};
|
|
|
|
struct debug_control_bitset {
|
|
const char *string;
|
|
uint32_t range[2];
|
|
};
|
|
|
|
static const struct debug_control_bitset debug_control[] = {
|
|
#define OPT1(name, bit) \
|
|
{ .string = name, .range = { bit, bit }, }
|
|
#define OPT2(name, start, end) \
|
|
{ .string = name, .range = { start, end }, }
|
|
OPT1("tex", DEBUG_TEXTURE),
|
|
OPT1("blit", DEBUG_BLIT),
|
|
OPT1("fall", DEBUG_PERF),
|
|
OPT1("perf", DEBUG_PERF),
|
|
OPT1("perfmon", DEBUG_PERFMON),
|
|
OPT1("bat", DEBUG_BATCH),
|
|
OPT1("buf", DEBUG_BUFMGR),
|
|
OPT1("fs", DEBUG_WM),
|
|
OPT1("gs", DEBUG_GS),
|
|
OPT1("sync", DEBUG_SYNC),
|
|
OPT1("sf", DEBUG_SF),
|
|
OPT1("submit", DEBUG_SUBMIT),
|
|
OPT1("wm", DEBUG_WM),
|
|
OPT1("urb", DEBUG_URB),
|
|
OPT1("vs", DEBUG_VS),
|
|
OPT1("clip", DEBUG_CLIP),
|
|
OPT1("no16", DEBUG_NO16),
|
|
OPT1("blorp", DEBUG_BLORP),
|
|
OPT1("nodualobj", DEBUG_NO_DUAL_OBJECT_GS),
|
|
OPT1("optimizer", DEBUG_OPTIMIZER),
|
|
OPT1("mda", DEBUG_MDA),
|
|
OPT1("ann", DEBUG_ANNOTATION),
|
|
OPT1("no8", DEBUG_NO8),
|
|
OPT1("no-oaconfig", DEBUG_NO_OACONFIG),
|
|
OPT1("no-fill-opt", DEBUG_NO_FILL_OPT),
|
|
OPT1("spill_fs", DEBUG_SPILL_FS),
|
|
OPT1("spill_vec4", DEBUG_SPILL_VEC4),
|
|
OPT1("cs", DEBUG_CS),
|
|
OPT1("hex", DEBUG_HEX),
|
|
OPT1("nocompact", DEBUG_NO_COMPACTION),
|
|
OPT1("hs", DEBUG_TCS),
|
|
OPT1("tcs", DEBUG_TCS),
|
|
OPT1("ds", DEBUG_TES),
|
|
OPT1("tes", DEBUG_TES),
|
|
OPT1("l3", DEBUG_L3),
|
|
OPT1("do32", DEBUG_DO32),
|
|
OPT1("norbc", DEBUG_NO_CCS),
|
|
OPT1("noccs", DEBUG_NO_CCS),
|
|
OPT1("noccs-modifier", DEBUG_NO_CCS_MODIFIER),
|
|
OPT1("nohiz", DEBUG_NO_HIZ),
|
|
OPT1("color", DEBUG_COLOR),
|
|
OPT1("reemit", DEBUG_REEMIT),
|
|
OPT1("soft64", DEBUG_SOFT64),
|
|
OPT1("bt", DEBUG_BT),
|
|
OPT1("pc", DEBUG_PIPE_CONTROL),
|
|
OPT1("nofc", DEBUG_NO_FAST_CLEAR),
|
|
OPT1("no32", DEBUG_NO32),
|
|
OPT2("shaders", DEBUG_VS, DEBUG_RT),
|
|
OPT1("rt", DEBUG_RT),
|
|
OPT1("rt_notrace", DEBUG_RT_NO_TRACE),
|
|
OPT1("bvh_blas", DEBUG_BVH_BLAS),
|
|
OPT1("bvh_tlas", DEBUG_BVH_TLAS),
|
|
OPT1("bvh_blas_ir_hdr", DEBUG_BVH_BLAS_IR_HDR),
|
|
OPT1("bvh_tlas_ir_hdr", DEBUG_BVH_TLAS_IR_HDR),
|
|
OPT1("bvh_blas_ir_as", DEBUG_BVH_BLAS_IR_AS),
|
|
OPT1("bvh_tlas_ir_as", DEBUG_BVH_TLAS_IR_AS),
|
|
OPT1("bvh_no_build", DEBUG_BVH_NO_BUILD),
|
|
OPT1("task", DEBUG_TASK),
|
|
OPT1("mesh", DEBUG_MESH),
|
|
OPT1("stall", DEBUG_STALL),
|
|
OPT1("capture-all", DEBUG_CAPTURE_ALL),
|
|
OPT1("perf-symbol-names", DEBUG_PERF_SYMBOL_NAMES),
|
|
OPT1("swsb-stall", DEBUG_SWSB_STALL),
|
|
OPT1("heaps", DEBUG_HEAPS),
|
|
OPT1("isl", DEBUG_ISL),
|
|
OPT1("sparse", DEBUG_SPARSE),
|
|
OPT1("draw_bkp", DEBUG_DRAW_BKP),
|
|
OPT1("dispatch_bkp", DEBUG_DISPATCH_BKP),
|
|
OPT1("bat-stats", DEBUG_BATCH_STATS),
|
|
OPT1("reg-pressure", DEBUG_REG_PRESSURE),
|
|
OPT1("shader-print", DEBUG_SHADER_PRINT),
|
|
OPT1("cl-quiet", DEBUG_CL_QUIET),
|
|
OPT1("no-send-gather", DEBUG_NO_SEND_GATHER),
|
|
OPT1("no-vrt", DEBUG_NO_VRT),
|
|
OPT1("shaders-lineno", DEBUG_SHADERS_LINENO),
|
|
OPT1("show_shader_stage", DEBUG_SHOW_SHADER_STAGE),
|
|
{ NULL, }
|
|
#undef OPT1
|
|
#undef OPT2
|
|
};
|
|
uint64_t intel_simd = 0;
|
|
|
|
static const struct debug_control simd_control[] = {
|
|
{ "fs8", DEBUG_FS_SIMD8 },
|
|
{ "fs16", DEBUG_FS_SIMD16 },
|
|
{ "fs32", DEBUG_FS_SIMD32 },
|
|
{ "fs2x8", DEBUG_FS_SIMD2X8 },
|
|
{ "fs4x8", DEBUG_FS_SIMD4X8 },
|
|
{ "fs2x16", DEBUG_FS_SIMD2X16 },
|
|
{ "cs8", DEBUG_CS_SIMD8 },
|
|
{ "cs16", DEBUG_CS_SIMD16 },
|
|
{ "cs32", DEBUG_CS_SIMD32 },
|
|
{ "ts8", DEBUG_TS_SIMD8 },
|
|
{ "ts16", DEBUG_TS_SIMD16 },
|
|
{ "ts32", DEBUG_TS_SIMD32 },
|
|
{ "ms8", DEBUG_MS_SIMD8 },
|
|
{ "ms16", DEBUG_MS_SIMD16 },
|
|
{ "ms32", DEBUG_MS_SIMD32 },
|
|
{ "rt8", DEBUG_RT_SIMD8 },
|
|
{ "rt16", DEBUG_RT_SIMD16 },
|
|
{ "rt32", DEBUG_RT_SIMD32 },
|
|
{ NULL, 0 }
|
|
};
|
|
|
|
uint64_t
|
|
intel_debug_flag_for_shader_stage(mesa_shader_stage stage)
|
|
{
|
|
uint64_t flags[] = {
|
|
[MESA_SHADER_VERTEX] = DEBUG_VS,
|
|
[MESA_SHADER_TESS_CTRL] = DEBUG_TCS,
|
|
[MESA_SHADER_TESS_EVAL] = DEBUG_TES,
|
|
[MESA_SHADER_GEOMETRY] = DEBUG_GS,
|
|
[MESA_SHADER_FRAGMENT] = DEBUG_WM,
|
|
[MESA_SHADER_COMPUTE] = DEBUG_CS,
|
|
[MESA_SHADER_KERNEL] = DEBUG_CS,
|
|
|
|
[MESA_SHADER_TASK] = DEBUG_TASK,
|
|
[MESA_SHADER_MESH] = DEBUG_MESH,
|
|
|
|
[MESA_SHADER_RAYGEN] = DEBUG_RT,
|
|
[MESA_SHADER_ANY_HIT] = DEBUG_RT,
|
|
[MESA_SHADER_CLOSEST_HIT] = DEBUG_RT,
|
|
[MESA_SHADER_MISS] = DEBUG_RT,
|
|
[MESA_SHADER_INTERSECTION] = DEBUG_RT,
|
|
[MESA_SHADER_CALLABLE] = DEBUG_RT,
|
|
};
|
|
return flags[stage];
|
|
}
|
|
|
|
#define DEBUG_FS_SIMD (DEBUG_FS_SIMD8 | DEBUG_FS_SIMD16 | \
|
|
DEBUG_FS_SIMD32)
|
|
#define DEBUG_CS_SIMD (DEBUG_CS_SIMD8 | DEBUG_CS_SIMD16 | DEBUG_CS_SIMD32)
|
|
#define DEBUG_TS_SIMD (DEBUG_TS_SIMD8 | DEBUG_TS_SIMD16 | DEBUG_TS_SIMD32)
|
|
#define DEBUG_MS_SIMD (DEBUG_MS_SIMD8 | DEBUG_MS_SIMD16 | DEBUG_MS_SIMD32)
|
|
#define DEBUG_RT_SIMD (DEBUG_RT_SIMD8 | DEBUG_RT_SIMD16 | DEBUG_RT_SIMD32)
|
|
|
|
#define DEBUG_SIMD8_ALL \
|
|
(DEBUG_FS_SIMD8 | \
|
|
DEBUG_CS_SIMD8 | \
|
|
DEBUG_TS_SIMD8 | \
|
|
DEBUG_MS_SIMD8 | \
|
|
DEBUG_RT_SIMD8)
|
|
|
|
#define DEBUG_SIMD16_ALL \
|
|
(DEBUG_FS_SIMD16 | \
|
|
DEBUG_CS_SIMD16 | \
|
|
DEBUG_TS_SIMD16 | \
|
|
DEBUG_MS_SIMD16 | \
|
|
DEBUG_RT_SIMD16)
|
|
|
|
#define DEBUG_SIMD32_ALL \
|
|
(DEBUG_FS_SIMD32 | \
|
|
DEBUG_CS_SIMD32 | \
|
|
DEBUG_TS_SIMD32 | \
|
|
DEBUG_MS_SIMD32 | \
|
|
DEBUG_RT_SIMD32)
|
|
|
|
uint64_t intel_debug_batch_frame_start = 0;
|
|
uint64_t intel_debug_batch_frame_stop = -1;
|
|
|
|
uint32_t intel_debug_bkp_before_draw_count = 0;
|
|
uint32_t intel_debug_bkp_after_draw_count = 0;
|
|
uint32_t intel_shader_dump_filter = 0;
|
|
|
|
uint32_t intel_debug_bkp_before_dispatch_count = 0;
|
|
uint32_t intel_debug_bkp_after_dispatch_count = 0;
|
|
|
|
static void
|
|
parse_debug_bitset(const char *env, const struct debug_control_bitset *tbl)
|
|
{
|
|
/* Check if env is NULL or empty */
|
|
if (!env || !*env)
|
|
return;
|
|
|
|
char *copy = strdup(env);
|
|
if (!copy)
|
|
return;
|
|
|
|
/* Tokenize the string by space or comma */
|
|
for (char *tok = strtok(copy, ", "); tok; tok = strtok(NULL, ", ")) {
|
|
/* Check for negation prefix, useful if user would like to disable certian flags */
|
|
bool negate = (*tok == '~' || *tok == '-');
|
|
if (negate)
|
|
tok++;
|
|
|
|
for (unsigned i = 0; tbl[i].string; i++) {
|
|
if (strcasecmp(tok, tbl[i].string) != 0)
|
|
continue;
|
|
|
|
for (unsigned bit = tbl[i].range[0]; bit <= tbl[i].range[1]; bit++) {
|
|
if (negate)
|
|
BITSET_CLEAR(intel_debug, bit);
|
|
else
|
|
BITSET_SET(intel_debug, bit);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
free(copy);
|
|
}
|
|
|
|
static void
|
|
process_intel_debug_variable_once(void)
|
|
{
|
|
BITSET_ZERO(intel_debug);
|
|
parse_debug_bitset(os_get_option("INTEL_DEBUG"), debug_control);
|
|
|
|
intel_simd = parse_debug_string(os_get_option("INTEL_SIMD_DEBUG"), simd_control);
|
|
intel_debug_batch_frame_start =
|
|
debug_get_num_option("INTEL_DEBUG_BATCH_FRAME_START", 0);
|
|
intel_debug_batch_frame_stop =
|
|
debug_get_num_option("INTEL_DEBUG_BATCH_FRAME_STOP", -1);
|
|
|
|
intel_debug_bkp_before_draw_count =
|
|
debug_get_num_option("INTEL_DEBUG_BKP_BEFORE_DRAW_COUNT", 0);
|
|
intel_debug_bkp_after_draw_count =
|
|
debug_get_num_option("INTEL_DEBUG_BKP_AFTER_DRAW_COUNT", 0);
|
|
|
|
intel_shader_dump_filter =
|
|
debug_get_num_option("INTEL_SHADER_DUMP_FILTER", 0);
|
|
|
|
intel_debug_bkp_before_dispatch_count =
|
|
debug_get_num_option("INTEL_DEBUG_BKP_BEFORE_DISPATCH_COUNT", 0);
|
|
intel_debug_bkp_after_dispatch_count =
|
|
debug_get_num_option("INTEL_DEBUG_BKP_AFTER_DISPATCH_COUNT", 0);
|
|
|
|
if (!(intel_simd & DEBUG_FS_SIMD))
|
|
intel_simd |= DEBUG_FS_SIMD;
|
|
if (!(intel_simd & DEBUG_CS_SIMD))
|
|
intel_simd |= DEBUG_CS_SIMD;
|
|
if (!(intel_simd & DEBUG_TS_SIMD))
|
|
intel_simd |= DEBUG_TS_SIMD;
|
|
if (!(intel_simd & DEBUG_MS_SIMD))
|
|
intel_simd |= DEBUG_MS_SIMD;
|
|
if (!(intel_simd & DEBUG_RT_SIMD))
|
|
intel_simd |= DEBUG_RT_SIMD;
|
|
|
|
if (BITSET_TEST(intel_debug, DEBUG_NO8))
|
|
intel_simd &= ~DEBUG_SIMD8_ALL;
|
|
|
|
if (BITSET_TEST(intel_debug, DEBUG_NO16))
|
|
intel_simd &= ~DEBUG_SIMD16_ALL;
|
|
|
|
if (BITSET_TEST(intel_debug, DEBUG_NO32))
|
|
intel_simd &= ~DEBUG_SIMD32_ALL;
|
|
|
|
BITSET_CLEAR(intel_debug, DEBUG_NO8);
|
|
BITSET_CLEAR(intel_debug, DEBUG_NO16);
|
|
BITSET_CLEAR(intel_debug, DEBUG_NO32);
|
|
}
|
|
|
|
void
|
|
process_intel_debug_variable(void)
|
|
{
|
|
static once_flag process_intel_debug_variable_flag = ONCE_FLAG_INIT;
|
|
|
|
call_once(&process_intel_debug_variable_flag,
|
|
process_intel_debug_variable_once);
|
|
}
|