diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp
index ce74f0a6723..c0fae6187f3 100644
--- a/src/intel/compiler/brw_ir_performance.cpp
+++ b/src/intel/compiler/brw_ir_performance.cpp
@@ -1505,16 +1505,23 @@ namespace {
                             const backend_instruction *),
                          unsigned dispatch_width)
    {
-      /* XXX - Plumbing the trip counts from NIR loop analysis would allow us
-       *       to do a better job regarding the loop weights.  And some branch
-       *       divergence analysis would allow us to do a better job with
-       *       branching weights.
+      /* XXX - Note that the previous version of this code used worst-case
+       *       scenario estimation of branching divergence for SIMD32 shaders,
+       *       but this heuristic was removed to improve performance in common
+       *       scenarios. Wider shader variants are less optimal when divergence
+       *       is high, e.g. when application renders complex scene on a small
+       *       surface. It is assumed that such renders are short, so their
+       *       time doesn't matter and when it comes to the overall performance,
+       *       they are dominated by more optimal larger renders.
+       *
+       *       It's possible that we could do better with divergence analysis
+       *       by isolating branches which are 100% uniform.
+       *
+       *       Plumbing the trip counts from NIR loop analysis would allow us
+       *       to do a better job regarding the loop weights.
        *
        *       In the meantime use values that roughly match the control flow
-       *       weights used elsewhere in the compiler back-end -- Main
-       *       difference is the worst-case scenario branch_weight used for
-       *       SIMD32 which accounts for the possibility of a dynamically
-       *       uniform branch becoming divergent in SIMD32.
+       *       weights used elsewhere in the compiler back-end.
        *
        *       Note that we provide slightly more pessimistic weights on
        *       Gen12+ for SIMD32, since the effective warp size on that
@@ -1523,7 +1530,6 @@ namespace {
        *       previous generations, giving narrower SIMD modes a performance
        *       advantage in several test-cases with non-uniform discard jumps.
        */
-      const float branch_weight = (dispatch_width > 16 ? 1.0 : 0.5);
       const float discard_weight = (dispatch_width > 16 || s->devinfo->gen < 12 ?
                                     1.0 : 0.5);
       const float loop_weight = 10;
@@ -1539,16 +1545,12 @@ namespace {
 
             issue_instruction(st, s->devinfo, inst);
 
-            if (inst->opcode == BRW_OPCODE_ENDIF)
-               st.weight /= branch_weight;
-            else if (inst->opcode == FS_OPCODE_PLACEHOLDER_HALT && discard_count)
+            if (inst->opcode == FS_OPCODE_PLACEHOLDER_HALT && discard_count)
                st.weight /= discard_weight;
 
             elapsed += (st.unit_ready[unit_fe] - clock0) * st.weight;
 
-            if (inst->opcode == BRW_OPCODE_IF)
-               st.weight *= branch_weight;
-            else if (inst->opcode == BRW_OPCODE_DO)
+            if (inst->opcode == BRW_OPCODE_DO)
                st.weight *= loop_weight;
             else if (inst->opcode == BRW_OPCODE_WHILE)
                st.weight /= loop_weight;