diff --git a/src/intel/compiler/brw/brw_compile_gs.cpp b/src/intel/compiler/brw/brw_compile_gs.cpp
index 7639070dc60..8f69c33cd63 100644
--- a/src/intel/compiler/brw/brw_compile_gs.cpp
+++ b/src/intel/compiler/brw/brw_compile_gs.cpp
@@ -176,7 +176,8 @@ brw_compile_gs(const struct brw_compiler *compiler,
                        pos_slots);
 
    brw_nir_apply_key(nir, compiler, &key->base, dispatch_width);
-   brw_nir_lower_gs_inputs(nir, &input_vue_map);
+   brw_nir_lower_gs_inputs(nir, compiler->devinfo, &input_vue_map,
+                           &prog_data->base.urb_read_length);
    brw_nir_lower_vue_outputs(nir);
    brw_postprocess_nir(nir, compiler, dispatch_width,
                        params->base.archiver, debug_enabled,
@@ -338,11 +339,6 @@ brw_compile_gs(const struct brw_compiler *compiler,
 
    prog_data->vertices_in = nir->info.gs.vertices_in;
 
-   /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
-    * need to program a URB read length of ceiling(num_slots / 2).
-    */
-   prog_data->base.urb_read_length = (input_vue_map.num_slots + 1) / 2;
-
    /* Now that prog_data setup is done, we are ready to actually compile the
     * program.
     */
diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c
index 0f8d596d0c9..618f1010d57 100644
--- a/src/intel/compiler/brw/brw_nir.c
+++ b/src/intel/compiler/brw/brw_nir.c
@@ -905,7 +905,9 @@ brw_nir_lower_vs_inputs(nir_shader *nir)
 
 void
 brw_nir_lower_gs_inputs(nir_shader *nir,
-                        const struct intel_vue_map *vue_map)
+                        const struct intel_device_info *devinfo,
+                        const struct intel_vue_map *vue_map,
+                        unsigned *out_urb_read_length)
 {
    nir_foreach_shader_in_variable(var, nir)
       var->data.driver_location = var->data.location;
@@ -950,6 +952,28 @@ brw_nir_lower_gs_inputs(nir_shader *nir,
          }
       }
    }
+
+   unsigned urb_read_length = 0;
+
+   if (nir->info.gs.invocations == 1) {
+      /* URB read length is in 256-bit units, which is two vec4s. */
+      urb_read_length = DIV_ROUND_UP(vue_map->num_slots, 2);
+
+      /* Because we're operating in scalar mode, the two vec4s take
+       * up 8 registers.  Additionally, the GS reads URB Read Length
+       * for each vertex being processed, each unit of read length
+       * takes up 8 * VerticesIn registers.
+       */
+      const unsigned regs_per_read = 8 * nir->info.gs.vertices_in;
+
+      /* Limit to 24 registers worth of pushed inputs */
+      const unsigned max_push_regs = 24;
+
+      if (urb_read_length * regs_per_read > max_push_regs)
+         urb_read_length = max_push_regs / regs_per_read;
+   }
+
+   *out_urb_read_length = urb_read_length;
 }
 
 void
diff --git a/src/intel/compiler/brw/brw_nir.h b/src/intel/compiler/brw/brw_nir.h
index 556f481e62f..f0f014542c7 100644
--- a/src/intel/compiler/brw/brw_nir.h
+++ b/src/intel/compiler/brw/brw_nir.h
@@ -235,7 +235,9 @@ bool brw_nir_lower_outputs_to_urb_intrinsics(nir_shader *, const struct brw_lowe
 
 void brw_nir_lower_vs_inputs(nir_shader *nir);
 void brw_nir_lower_gs_inputs(nir_shader *nir,
-                             const struct intel_vue_map *vue_map);
+                             const struct intel_device_info *devinfo,
+                             const struct intel_vue_map *vue_map,
+                             unsigned *out_urb_read_length);
 void brw_nir_lower_tes_inputs(nir_shader *nir,
                               const struct intel_device_info *devinfo,
                               const struct intel_vue_map *vue);
diff --git a/src/intel/compiler/brw/brw_thread_payload.cpp b/src/intel/compiler/brw/brw_thread_payload.cpp
index 1fc6751f1a4..78b45cd64f7 100644
--- a/src/intel/compiler/brw/brw_thread_payload.cpp
+++ b/src/intel/compiler/brw/brw_thread_payload.cpp
@@ -100,7 +100,6 @@ brw_tes_thread_payload::brw_tes_thread_payload(const brw_shader &v)
 
 brw_gs_thread_payload::brw_gs_thread_payload(brw_shader &v)
 {
-   struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(v.prog_data);
    struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(v.prog_data);
    const brw_builder bld = brw_builder(&v);
 
@@ -136,21 +135,6 @@ brw_gs_thread_payload::brw_gs_thread_payload(brw_shader &v)
    r += v.nir->info.gs.vertices_in * reg_unit(v.devinfo);
 
    num_regs = r;
-
-   /* Use a maximum of 24 registers for push-model inputs. */
-   const unsigned max_push_components = 24;
-
-   /* If pushing our inputs would take too many registers, reduce the URB read
-    * length (which is in HWords, or 8 registers), and resort to pulling.
-    *
-    * Note that the GS reads <URB Read Length> HWords for every vertex - so we
-    * have to multiply by VerticesIn to obtain the total storage requirement.
-    */
-   if (8 * vue_prog_data->urb_read_length * v.nir->info.gs.vertices_in >
-       max_push_components) {
-      vue_prog_data->urb_read_length =
-         ROUND_DOWN_TO(max_push_components / v.nir->info.gs.vertices_in, 8) / 8;
-   }
 }
 
 static inline void