diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index d8675c6fbcf..cc60c2868ac 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1817,14 +1817,63 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
    case nir_op_bitfield_insert:
       unreachable("not reached: should have been lowered");
 
+   /* For all shift operations:
+    *
+    * Gen4 - Gen7: After application of source modifiers, the low 5-bits of
+    * src1 are used an unsigned value for the shift count.
+    *
+    * Gen8: As with earlier platforms, but for Q and UQ types on src0, the low
+    * 6-bit of src1 are used.
+    *
+    * Gen9+: The low bits of src1 matching the size of src0 (e.g., 4-bits for
+    * W or UW src0).
+    *
+    * The implication is that the following instruction will produce a
+    * different result on Gen9+ than on previous platforms:
+    *
+    *    shr(8)    g4<1>UW    g12<8,8,1>UW    0x0010UW
+    *
+    * where Gen9+ will shift by zero, and earlier platforms will shift by 16.
+    *
+    * This does not seem to be the case.  Experimentally, I have determined
+    * that shifts of 16-bit values on Gen8 behave properly.  Shifts of 8-bit
+    * values on both Gen8 and Gen9 do not.  Gen11+ lowers 8-bit values, so
+    * those platforms were not tested.  No features expose access to 8- or
+    * 16-bit types on Gen7 or earlier, so those platforms were not tested
+    * either.  See
+    * https://gitlab.freedesktop.org/mesa/crucible/-/merge_requests/76.
+    */
    case nir_op_ishl:
-      bld.SHL(result, op[0], op[1]);
+      if (type_sz(op[0].type) == 1) {
+         fs_reg tmp = bld.vgrf(op[1].type);
+         bld.AND(tmp, op[1], retype(brw_imm_ud(7), op[1].type));
+         bld.SHL(result, op[0], tmp);
+      } else {
+         bld.SHL(result, op[0], op[1]);
+      }
+
       break;
+
    case nir_op_ishr:
-      bld.ASR(result, op[0], op[1]);
+      if (type_sz(op[0].type) == 1) {
+         fs_reg tmp = bld.vgrf(op[1].type);
+         bld.AND(tmp, op[1], retype(brw_imm_ud(7), op[1].type));
+         bld.ASR(result, op[0], tmp);
+      } else {
+         bld.ASR(result, op[0], op[1]);
+      }
+
       break;
+
    case nir_op_ushr:
-      bld.SHR(result, op[0], op[1]);
+      if (type_sz(op[0].type) == 1) {
+         fs_reg tmp = bld.vgrf(op[1].type);
+         bld.AND(tmp, op[1], retype(brw_imm_ud(7), op[1].type));
+         bld.SHR(result, op[0], tmp);
+      } else {
+         bld.SHR(result, op[0], op[1]);
+      }
+
       break;
 
    case nir_op_urol:
diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp
index abeac65577d..4e32f0cc892 100644
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -1797,20 +1797,31 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
       }
       break;
 
+   /* For all shift operations:
+    *
+    * Gen4 - Gen7: After application of source modifiers, the low 5-bits of
+    * src1 are used an unsigned value for the shift count.
+    *
+    * Later platforms alter this behavior, but those platforms are not
+    * supported by this code generator.
+    */
    case nir_op_ishl:
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
+      assert(type_sz(op[0].type) == 4);
       try_immediate_source(instr, op, false);
       emit(SHL(dst, op[0], op[1]));
       break;
 
    case nir_op_ishr:
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
+      assert(type_sz(op[0].type) == 4);
       try_immediate_source(instr, op, false);
       emit(ASR(dst, op[0], op[1]));
       break;
 
    case nir_op_ushr:
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
+      assert(type_sz(op[0].type) == 4);
       try_immediate_source(instr, op, false);
       emit(SHR(dst, op[0], op[1]));
       break;