diff --git a/src/intel/compiler/brw_fs_validate.cpp b/src/intel/compiler/brw_fs_validate.cpp
index 9d38dd3bed2..d80532fc4af 100644
--- a/src/intel/compiler/brw_fs_validate.cpp
+++ b/src/intel/compiler/brw_fs_validate.cpp
@@ -280,155 +280,241 @@ brw_fs_validate(const fs_visitor &s)
 
    s.cfg->validate(_mesa_shader_stage_to_abbrev(s.stage));
 
-   foreach_block_and_inst (block, fs_inst, inst, s.cfg) {
-      brw_validate_instruction_phase(s, inst);
+   foreach_block(block, s.cfg) {
+      foreach_inst_in_block (fs_inst, inst, block) {
+         brw_validate_instruction_phase(s, inst);
 
-      switch (inst->opcode) {
-      case SHADER_OPCODE_SEND:
-         fsv_assert(is_uniform(inst->src[0]) && is_uniform(inst->src[1]));
-         break;
+         switch (inst->opcode) {
+         case SHADER_OPCODE_SEND:
+            fsv_assert(is_uniform(inst->src[0]) && is_uniform(inst->src[1]));
+            break;
 
-      case BRW_OPCODE_MOV:
-         fsv_assert(inst->sources == 1);
-         break;
+         case BRW_OPCODE_MOV:
+            fsv_assert(inst->sources == 1);
+            break;
 
-      case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
-      case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
-      case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
-         validate_memory_logical(s, inst);
-         break;
+         case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
+         case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
+         case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
+            validate_memory_logical(s, inst);
+            break;
 
-      default:
-         break;
-      }
+         default:
+            break;
+         }
 
-      /* On Xe2, the "write the accumulator in addition to the explicit
-       * destination" bit no longer exists. Try to catch uses of this feature
-       * earlier in the process.
-       */
-      if (devinfo->ver >= 20 && inst->writes_accumulator) {
-         fsv_assert(inst->dst.is_accumulator() ||
-                    inst->opcode == BRW_OPCODE_ADDC ||
-                    inst->opcode == BRW_OPCODE_MACH ||
-                    inst->opcode == BRW_OPCODE_SUBB);
-      }
+         /* On Xe2, the "write the accumulator in addition to the explicit
+          * destination" bit no longer exists. Try to catch uses of this
+          * feature earlier in the process.
+          */
+         if (devinfo->ver >= 20 && inst->writes_accumulator) {
+            fsv_assert(inst->dst.is_accumulator() ||
+                       inst->opcode == BRW_OPCODE_ADDC ||
+                       inst->opcode == BRW_OPCODE_MACH ||
+                       inst->opcode == BRW_OPCODE_SUBB);
+         }
 
-      if (inst->is_3src(s.compiler)) {
-         const unsigned integer_sources =
-            brw_type_is_int(inst->src[0].type) +
-            brw_type_is_int(inst->src[1].type) +
-            brw_type_is_int(inst->src[2].type);
-         const unsigned float_sources =
-            brw_type_is_float(inst->src[0].type) +
-            brw_type_is_float(inst->src[1].type) +
-            brw_type_is_float(inst->src[2].type);
+         if (inst->is_3src(s.compiler)) {
+            const unsigned integer_sources =
+               brw_type_is_int(inst->src[0].type) +
+               brw_type_is_int(inst->src[1].type) +
+               brw_type_is_int(inst->src[2].type);
+            const unsigned float_sources =
+               brw_type_is_float(inst->src[0].type) +
+               brw_type_is_float(inst->src[1].type) +
+               brw_type_is_float(inst->src[2].type);
 
-         fsv_assert((integer_sources == 3 && float_sources == 0) ||
-                    (integer_sources == 0 && float_sources == 3));
+            fsv_assert((integer_sources == 3 && float_sources == 0) ||
+                       (integer_sources == 0 && float_sources == 3));
 
-         if (devinfo->ver >= 10) {
-            for (unsigned i = 0; i < 3; i++) {
-               if (inst->src[i].file == IMM)
-                  continue;
+            if (devinfo->ver >= 10) {
+               for (unsigned i = 0; i < 3; i++) {
+                  if (inst->src[i].file == IMM)
+                     continue;
 
-               switch (inst->src[i].vstride) {
-               case BRW_VERTICAL_STRIDE_0:
-               case BRW_VERTICAL_STRIDE_4:
-               case BRW_VERTICAL_STRIDE_8:
-               case BRW_VERTICAL_STRIDE_16:
-                  break;
+                  switch (inst->src[i].vstride) {
+                  case BRW_VERTICAL_STRIDE_0:
+                  case BRW_VERTICAL_STRIDE_4:
+                  case BRW_VERTICAL_STRIDE_8:
+                  case BRW_VERTICAL_STRIDE_16:
+                     break;
 
-               case BRW_VERTICAL_STRIDE_1:
-                  fsv_assert_lte(12, devinfo->ver);
-                  break;
+                  case BRW_VERTICAL_STRIDE_1:
+                     fsv_assert_lte(12, devinfo->ver);
+                     break;
 
                case BRW_VERTICAL_STRIDE_2:
                   fsv_assert_lte(devinfo->ver, 11);
                   break;
 
-               default:
-                  fsv_assert(!"invalid vstride");
-                  break;
+                  default:
+                     fsv_assert(!"invalid vstride");
+                     break;
+                  }
                }
-            }
-         } else if (s.grf_used != 0) {
-            /* Only perform the pre-Gfx10 checks after register allocation has
-             * occured.
-             *
-             * Many passes (e.g., constant copy propagation) will genenerate
-             * invalid 3-source instructions with the expectation that later
-             * passes (e.g., combine constants) will fix them.
-             */
-            for (unsigned i = 0; i < 3; i++) {
-               fsv_assert_ne(inst->src[i].file, IMM);
-
-               /* A stride of 1 (the usual case) or 0, with a special
-                * "repctrl" bit, is allowed. The repctrl bit doesn't work for
-                * 64-bit datatypes, so if the source type is 64-bit then only
-                * a stride of 1 is allowed. From the Broadwell PRM, Volume 7
-                * "3D Media GPGPU", page 944:
+            } else if (s.grf_used != 0) {
+               /* Only perform the pre-Gfx10 checks after register allocation
+                * has occured.
                 *
-                *    This is applicable to 32b datatypes and 16b datatype. 64b
-                *    datatypes cannot use the replicate control.
+                * Many passes (e.g., constant copy propagation) will
+                * genenerate invalid 3-source instructions with the
+                * expectation that later passes (e.g., combine constants) will
+                * fix them.
                 */
-               const unsigned stride_in_bytes = byte_stride(inst->src[i]);
-               const unsigned size_in_bytes = brw_type_size_bytes(inst->src[i].type);
-               if (stride_in_bytes == 0) {
-                  /* If the source is_scalar, then the stride will be
-                   * converted to <4;4,1> in brw_lower_scalar_fp64_MAD after
-                   * SIMD splitting.
+               for (unsigned i = 0; i < 3; i++) {
+                  fsv_assert_ne(inst->src[i].file, IMM);
+
+                  /* A stride of 1 (the usual case) or 0, with a special
+                   * "repctrl" bit, is allowed. The repctrl bit doesn't work
+                   * for 64-bit datatypes, so if the source type is 64-bit
+                   * then only a stride of 1 is allowed. From the Broadwell
+                   * PRM, Volume 7 "3D Media GPGPU", page 944:
+                   *
+                   *    This is applicable to 32b datatypes and 16b datatype.
+                   *    64b datatypes cannot use the replicate control.
                    */
-                  if (!inst->src[i].is_scalar)
-                     fsv_assert_lte(size_in_bytes, 4);
-               } else {
-                  fsv_assert_eq(stride_in_bytes, size_in_bytes);
+                  const unsigned stride_in_bytes = byte_stride(inst->src[i]);
+                  const unsigned size_in_bytes = brw_type_size_bytes(inst->src[i].type);
+                  if (stride_in_bytes == 0) {
+                     /* If the source is_scalar, then the stride will be
+                      * converted to <4;4,1> in brw_lower_scalar_fp64_MAD
+                      * after SIMD splitting.
+                      */
+                     if (!inst->src[i].is_scalar)
+                        fsv_assert_lte(size_in_bytes, 4);
+                  } else {
+                     fsv_assert_eq(stride_in_bytes, size_in_bytes);
+                  }
                }
             }
          }
-      }
 
-      if (inst->dst.file == VGRF) {
-         fsv_assert_lte(inst->dst.offset / REG_SIZE + regs_written(inst),
-                        s.alloc.sizes[inst->dst.nr]);
+         if (inst->dst.file == VGRF) {
+            fsv_assert_lte(inst->dst.offset / REG_SIZE + regs_written(inst),
+                           s.alloc.sizes[inst->dst.nr]);
 
-         if (inst->exec_size > 1)
-            fsv_assert_ne(inst->dst.stride, 0);
-      }
-
-      for (unsigned i = 0; i < inst->sources; i++) {
-         if (inst->src[i].file == VGRF) {
-            fsv_assert_lte(inst->src[i].offset / REG_SIZE + regs_read(devinfo, inst, i),
-                           s.alloc.sizes[inst->src[i].nr]);
+            if (inst->exec_size > 1)
+               fsv_assert_ne(inst->dst.stride, 0);
          }
-      }
 
-      /* Accumulator Registers, bspec 47251:
-       *
-       * "When destination is accumulator with offset 0, destination
-       * horizontal stride must be 1."
-       */
-      if (intel_needs_workaround(devinfo, 14014617373) &&
-          inst->dst.is_accumulator() &&
-          phys_subnr(devinfo, inst->dst) == 0) {
-         fsv_assert_eq(inst->dst.hstride, 1);
-      }
-
-      if (inst->is_math() && intel_needs_workaround(devinfo, 22016140776)) {
-         /* Wa_22016140776:
-          *
-          *    Scalar broadcast on HF math (packed or unpacked) must not be
-          *    used.  Compiler must use a mov instruction to expand the scalar
-          *    value to a vector before using in a HF (packed or unpacked)
-          *    math operation.
-          *
-          * Since copy propagation knows about this restriction, nothing
-          * should be able to generate these invalid source strides. Detect
-          * potential problems sooner rather than later.
-          */
          for (unsigned i = 0; i < inst->sources; i++) {
-            fsv_assert(inst->src[i].is_scalar ||
-                       !is_uniform(inst->src[i]) ||
-                       inst->src[i].type != BRW_TYPE_HF);
+            if (inst->src[i].file == VGRF) {
+               fsv_assert_lte(inst->src[i].offset / REG_SIZE + regs_read(devinfo, inst, i),
+                              s.alloc.sizes[inst->src[i].nr]);
+            }
+         }
+
+         /* Accumulator Registers, bspec 47251:
+          *
+          * "When destination is accumulator with offset 0, destination
+          * horizontal stride must be 1."
+          */
+         if (intel_needs_workaround(devinfo, 14014617373) &&
+             inst->dst.is_accumulator() &&
+             phys_subnr(devinfo, inst->dst) == 0) {
+            fsv_assert_eq(inst->dst.hstride, 1);
+         }
+
+         if (inst->is_math() && intel_needs_workaround(devinfo, 22016140776)) {
+            /* Wa_22016140776:
+             *
+             *    Scalar broadcast on HF math (packed or unpacked) must not be
+             *    used. Compiler must use a mov instruction to expand the
+             *    scalar value to a vector before using in a HF (packed or
+             *    unpacked) math operation.
+             *
+             * Since copy propagation knows about this restriction, nothing
+             * should be able to generate these invalid source strides. Detect
+             * potential problems sooner rather than later.
+             */
+            if (devinfo->ver >= 20 && inst->writes_accumulator) {
+               fsv_assert(inst->dst.is_accumulator() ||
+                          inst->opcode == BRW_OPCODE_ADDC ||
+                          inst->opcode == BRW_OPCODE_MACH ||
+                          inst->opcode == BRW_OPCODE_SUBB);
+            }
+
+            if (inst->is_3src(s.compiler)) {
+               const unsigned integer_sources =
+                  brw_type_is_int(inst->src[0].type) +
+                  brw_type_is_int(inst->src[1].type) +
+                  brw_type_is_int(inst->src[2].type);
+               const unsigned float_sources =
+                  brw_type_is_float(inst->src[0].type) +
+                  brw_type_is_float(inst->src[1].type) +
+                  brw_type_is_float(inst->src[2].type);
+
+               fsv_assert((integer_sources == 3 && float_sources == 0) ||
+                          (integer_sources == 0 && float_sources == 3));
+
+               if (devinfo->ver >= 10) {
+                  for (unsigned i = 0; i < 3; i++) {
+                     if (inst->src[i].file == IMM)
+                        continue;
+
+                     switch (inst->src[i].vstride) {
+                     case BRW_VERTICAL_STRIDE_0:
+                     case BRW_VERTICAL_STRIDE_4:
+                     case BRW_VERTICAL_STRIDE_8:
+                     case BRW_VERTICAL_STRIDE_16:
+                        break;
+
+                     case BRW_VERTICAL_STRIDE_1:
+                        fsv_assert_lte(12, devinfo->ver);
+                        break;
+
+                     case BRW_VERTICAL_STRIDE_2:
+                        fsv_assert_lte(devinfo->ver, 11);
+                        break;
+
+                     default:
+                        fsv_assert(!"invalid vstride");
+                        break;
+                     }
+                  }
+               } else if (s.grf_used != 0) {
+                  /* Only perform the pre-Gfx10 checks after register
+                   * allocation has occured.
+                   *
+                   * Many passes (e.g., constant copy propagation) will
+                   * genenerate invalid 3-source instructions with the
+                   * expectation that later passes (e.g., combine constants)
+                   * will fix them.
+                   */
+                  for (unsigned i = 0; i < 3; i++) {
+                     fsv_assert_ne(inst->src[i].file, IMM);
+
+                     /* A stride of 1 (the usual case) or 0, with a special
+                      * "repctrl" bit, is allowed. The repctrl bit doesn't
+                      * work for 64-bit datatypes, so if the source type is
+                      * 64-bit then only a stride of 1 is allowed. From the
+                      * Broadwell PRM, Volume 7 "3D Media GPGPU", page 944:
+                      *
+                      *    This is applicable to 32b datatypes and 16b
+                      *    datatype. 64b datatypes cannot use the replicate
+                      *    control.
+                      */
+                     const unsigned stride_in_bytes = byte_stride(inst->src[i]);
+                     const unsigned size_in_bytes = brw_type_size_bytes(inst->src[i].type);
+                     if (stride_in_bytes == 0) {
+                        fsv_assert_lte(size_in_bytes, 4);
+                     } else {
+                        fsv_assert_eq(stride_in_bytes, size_in_bytes);
+                     }
+                  }
+               }
+            }
+
+            if (inst->dst.file == VGRF) {
+               fsv_assert_lte(inst->dst.offset / REG_SIZE + regs_written(inst),
+                              s.alloc.sizes[inst->dst.nr]);
+            }
+
+            for (unsigned i = 0; i < inst->sources; i++) {
+               fsv_assert(inst->src[i].is_scalar ||
+                          !is_uniform(inst->src[i]) ||
+                          inst->src[i].type != BRW_TYPE_HF);
+            }
          }
       }
    }