From ecba845c0d4ed12a1a5cbeb7524730601a76c493 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 22 Apr 2021 09:21:59 -0500 Subject: [PATCH] intel/compiler: Don't insert barriers for NULL sources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Normally, we never see NULL in a source. However, starting with eab1c55590b1, we can with a SHADER_OPCODE_SEND if it only has the first payload. We were inserting barriers which adds unnecessary scheduling dependencies and takes a lot of compile time because inserting a single barrier is an O(n) operation. All the extra O(n) can have a surprisingly large effect. This cuts the runtime of dEQP-VK.binding_model.buffer_device_address.set3.depth3. basessbo.convertcheckuv2.store.single.std140.frag by a factor of 20x for a debug build. Shader-db results on ICL: total instructions in shared programs: 19918983 -> 19921610 (0.01%) instructions in affected programs: 884074 -> 886701 (0.30%) helped: 1688 HURT: 817 helped stats (abs) min: 1 max: 163 x̄: 4.23 x̃: 1 helped stats (rel) min: 0.02% max: 12.50% x̄: 1.08% x̃: 0.61% HURT stats (abs) min: 1 max: 2674 x̄: 11.95 x̃: 2 HURT stats (rel) min: 0.11% max: 70.22% x̄: 1.71% x̃: 1.03% 95% mean confidence interval for instructions value: -1.97 4.06 95% mean confidence interval for instructions %-change: -0.28% -0.06% Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 976503324 -> 975884809 (-0.06%) cycles in affected programs: 82581703 -> 81963188 (-0.75%) helped: 4144 HURT: 5010 helped stats (abs) min: 1 max: 79294 x̄: 311.31 x̃: 8 helped stats (rel) min: <.01% max: 53.69% x̄: 2.00% x̃: 0.51% HURT stats (abs) min: 1 max: 92266 x̄: 134.04 x̃: 8 HURT stats (rel) min: <.01% max: 218.09% x̄: 3.25% x̃: 0.53% 95% mean confidence interval for cycles value: -119.85 -15.29 95% mean confidence interval for cycles %-change: 0.68% 1.07% Inconclusive result (value mean confidence interval and %-change mean confidence interval disagree). total spills in shared programs: 10659 -> 12014 (12.71%) spills in affected programs: 441 -> 1796 (307.26%) helped: 7 HURT: 12 total fills in shared programs: 11551 -> 14429 (24.92%) fills in affected programs: 993 -> 3871 (289.83%) helped: 8 HURT: 11 total sends in shared programs: 1025832 -> 1025353 (-0.05%) sends in affected programs: 2241 -> 1762 (-21.37%) helped: 105 HURT: 1 helped stats (abs) min: 1 max: 87 x̄: 4.57 x̃: 2 helped stats (rel) min: 5.56% max: 54.72% x̄: 11.37% x̃: 10.00% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 100.00% max: 100.00% x̄: 100.00% x̃: 100.00% 95% mean confidence interval for sends value: -7.39 -1.65 95% mean confidence interval for sends %-change: -12.95% -7.70% Sends are helped. LOST: 93 GAINED: 109 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/4648 Fixes: eab1c55590b1 "intel/fs: Support SENDS in SHADER_OPCODE_SEND" Reviewed-by: Eric Anholt Part-of: (cherry picked from commit 134af5ada240d163fee91c2d8cbf5128590d7c86) --- .pick_status.json | 2 +- src/intel/compiler/brw_schedule_instructions.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 8f2959cdea4..66bf9d53102 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -85,7 +85,7 @@ "description": "intel/compiler: Don't insert barriers for NULL sources", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": "eab1c55590b15260d6e1ceb65f96661a5e42ad00" }, diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 3821114bf99..1da29055aea 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -1142,7 +1142,7 @@ fs_instruction_scheduler::calculate_deps() } } else if (inst->src[i].is_accumulator()) { add_dep(last_accumulator_write, n); - } else if (inst->src[i].file == ARF) { + } else if (inst->src[i].file == ARF && !inst->src[i].is_null()) { add_barrier_deps(n); } } @@ -1271,7 +1271,7 @@ fs_instruction_scheduler::calculate_deps() } } else if (inst->src[i].is_accumulator()) { add_dep(n, last_accumulator_write, 0); - } else if (inst->src[i].file == ARF) { + } else if (inst->src[i].file == ARF && !inst->src[i].is_null()) { add_barrier_deps(n); } } @@ -1395,7 +1395,7 @@ vec4_instruction_scheduler::calculate_deps() } else if (inst->src[i].is_accumulator()) { assert(last_accumulator_write); add_dep(last_accumulator_write, n); - } else if (inst->src[i].file == ARF) { + } else if (inst->src[i].file == ARF && !inst->src[i].is_null()) { add_barrier_deps(n); } } @@ -1480,7 +1480,7 @@ vec4_instruction_scheduler::calculate_deps() add_dep(n, last_fixed_grf_write); } else if (inst->src[i].is_accumulator()) { add_dep(n, last_accumulator_write); - } else if (inst->src[i].file == ARF) { + } else if (inst->src[i].file == ARF && !inst->src[i].is_null()) { add_barrier_deps(n); } }