From f82bcd56fc9a9f30f62a64a6f8dc56acb6d8911d Mon Sep 17 00:00:00 2001
From: Caio Oliveira <caio.oliveira@intel.com>
Date: Fri, 31 Jan 2025 12:50:20 -0800
Subject: [PATCH] intel/brw: Add functions to allocate VGRF space

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33334>
---
 src/intel/compiler/brw_builder.h              |  6 +---
 src/intel/compiler/brw_from_nir.cpp           | 26 ++++++++--------
 src/intel/compiler/brw_fs.cpp                 | 16 ++++++++++
 src/intel/compiler/brw_fs.h                   |  3 ++
 src/intel/compiler/brw_fs_visitor.cpp         | 30 +++++++++----------
 src/intel/compiler/brw_lower.cpp              |  7 ++---
 .../brw_lower_integer_multiplication.cpp      | 15 +++++-----
 .../compiler/brw_lower_logical_sends.cpp      | 19 ++++++------
 src/intel/compiler/brw_lower_regioning.cpp    |  2 +-
 src/intel/compiler/brw_opt.cpp                |  4 +--
 .../compiler/brw_opt_combine_constants.cpp    |  2 +-
 src/intel/compiler/brw_opt_virtual_grfs.cpp   |  2 +-
 src/intel/compiler/brw_reg_allocate.cpp       |  2 +-
 13 files changed, 73 insertions(+), 61 deletions(-)

diff --git a/src/intel/compiler/brw_builder.h b/src/intel/compiler/brw_builder.h
index 01cafa60dd8..a50e72d5ff2 100644
--- a/src/intel/compiler/brw_builder.h
+++ b/src/intel/compiler/brw_builder.h
@@ -199,14 +199,10 @@ public:
    brw_reg
    vgrf(enum brw_reg_type type, unsigned n = 1) const
    {
-      const unsigned unit = reg_unit(shader->devinfo);
       assert(dispatch_width() <= 32);
 
       if (n > 0)
-         return brw_vgrf(shader->alloc.allocate(
-                            DIV_ROUND_UP(n * brw_type_size_bytes(type) * dispatch_width(),
-                                         unit * REG_SIZE) * unit),
-                         type);
+         return brw_allocate_vgrf(*shader, type, n * dispatch_width());
       else
          return retype(null_reg_ud(), type);
    }
diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp
index c7dfeeadf1d..46418543536 100644
--- a/src/intel/compiler/brw_from_nir.cpp
+++ b/src/intel/compiler/brw_from_nir.cpp
@@ -5023,7 +5023,7 @@ get_timestamp(const brw_builder &bld)
    brw_reg ts = brw_reg(retype(brw_vec4_reg(ARF,
                                           BRW_ARF_TIMESTAMP, 0), BRW_TYPE_UD));
 
-   brw_reg dst = brw_vgrf(s.alloc.allocate(1), BRW_TYPE_UD);
+   brw_reg dst = retype(brw_allocate_vgrf_units(s, 1), BRW_TYPE_UD);
 
    /* We want to read the 3 fields we care about even if it's not enabled in
     * the dispatch.
@@ -5084,8 +5084,8 @@ emit_urb_direct_vec4_write(const brw_builder &bld,
       brw_reg srcs[URB_LOGICAL_NUM_SRCS];
       srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
       srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
-      srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(bld.shader->alloc.allocate(length),
-                                            BRW_TYPE_F);
+      srcs[URB_LOGICAL_SRC_DATA] =
+         retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
       srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length);
       bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
 
@@ -5154,8 +5154,8 @@ emit_urb_direct_vec4_write_xe2(const brw_builder &bld,
       brw_reg srcs[URB_LOGICAL_NUM_SRCS];
       srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
       srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
-      int nr = bld.shader->alloc.allocate(comps * runit);
-      srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(nr, BRW_TYPE_F);
+      srcs[URB_LOGICAL_SRC_DATA] =
+         retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F);
       srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps);
       hbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0);
 
@@ -5217,8 +5217,8 @@ emit_urb_indirect_vec4_write(const brw_builder &bld,
       srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
       srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off;
       srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
-      srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(bld.shader->alloc.allocate(length),
-                                            BRW_TYPE_F);
+      srcs[URB_LOGICAL_SRC_DATA] =
+         retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
       srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length);
       bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
 
@@ -5288,8 +5288,8 @@ emit_urb_indirect_writes_xe2(const brw_builder &bld, nir_intrinsic_instr *instr,
       brw_reg srcs[URB_LOGICAL_NUM_SRCS];
       srcs[URB_LOGICAL_SRC_HANDLE] = addr;
       srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
-      int nr = bld.shader->alloc.allocate(comps * runit);
-      srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(nr, BRW_TYPE_F);
+      srcs[URB_LOGICAL_SRC_DATA] =
+         retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F);
       srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps);
       wbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0);
 
@@ -5348,8 +5348,8 @@ emit_urb_indirect_writes(const brw_builder &bld, nir_intrinsic_instr *instr,
          srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
          srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = final_offset;
          srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = mask;
-         srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(bld.shader->alloc.allocate(length),
-                                               BRW_TYPE_F);
+         srcs[URB_LOGICAL_SRC_DATA] =
+            retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
          srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length);
          bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
 
@@ -7525,8 +7525,8 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
    /* Allocate enough space for the components + one physical register for the
     * residency data.
     */
-   brw_reg dst = brw_vgrf(
-      bld.shader->alloc.allocate(total_regs * reg_unit(devinfo)),
+   brw_reg dst = retype(
+      brw_allocate_vgrf_units(*bld.shader, total_regs * reg_unit(devinfo)),
       dst_type);
 
    brw_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 8f255e2087e..9fc29ba97f6 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -837,3 +837,19 @@ bool brw_should_print_shader(const nir_shader *shader, uint64_t debug_flag)
 {
    return INTEL_DEBUG(debug_flag) && (!shader->info.internal || NIR_DEBUG(PRINT_INTERNAL));
 }
+
+brw_reg
+brw_allocate_vgrf(fs_visitor &s, brw_reg_type type, unsigned count)
+{
+   const unsigned unit = reg_unit(s.devinfo);
+   const unsigned size = DIV_ROUND_UP(count * brw_type_size_bytes(type),
+                                      unit * REG_SIZE) * unit;
+   return retype(brw_allocate_vgrf_units(s, size), type);
+}
+
+brw_reg
+brw_allocate_vgrf_units(fs_visitor &s, unsigned units_of_REGSIZE)
+{
+   return brw_vgrf(s.alloc.allocate(units_of_REGSIZE), BRW_TYPE_UD);
+}
+
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 4ac970f3020..0e39bfc57c5 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -472,3 +472,6 @@ bool brw_workaround_source_arf_before_eot(fs_visitor &s);
 /* Helpers. */
 unsigned brw_get_lowered_simd_width(const fs_visitor *shader,
                                     const brw_inst *inst);
+
+brw_reg brw_allocate_vgrf(fs_visitor &s, brw_reg_type type, unsigned count);
+brw_reg brw_allocate_vgrf_units(fs_visitor &s, unsigned units_of_REGSIZE);
diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp
index a8d60d91889..b3e7cc73d11 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -130,8 +130,8 @@ fs_visitor::emit_urb_writes(const brw_reg &gs_vertex_count)
             break;
          }
 
-         brw_reg zero = brw_vgrf(alloc.allocate(dispatch_width / 8),
-                                BRW_TYPE_UD);
+         brw_reg zero =
+            retype(brw_allocate_vgrf_units(*this, dispatch_width / 8), BRW_TYPE_UD);
          bld.MOV(zero, brw_imm_ud(0u));
 
          if (vue_map->slots_valid & VARYING_BIT_PRIMITIVE_SHADING_RATE &&
@@ -139,8 +139,8 @@ fs_visitor::emit_urb_writes(const brw_reg &gs_vertex_count)
             sources[length++] = this->outputs[VARYING_SLOT_PRIMITIVE_SHADING_RATE];
          } else if (devinfo->has_coarse_pixel_primitive_and_cb) {
             uint32_t one_fp16 = 0x3C00;
-            brw_reg one_by_one_fp16 = brw_vgrf(alloc.allocate(dispatch_width / 8),
-                                              BRW_TYPE_UD);
+            brw_reg one_by_one_fp16 =
+               retype(brw_allocate_vgrf_units(*this, dispatch_width / 8), BRW_TYPE_UD);
             bld.MOV(one_by_one_fp16, brw_imm_ud((one_fp16 << 16) | one_fp16));
             sources[length++] = one_by_one_fp16;
          } else {
@@ -213,8 +213,8 @@ fs_visitor::emit_urb_writes(const brw_reg &gs_vertex_count)
 
          srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
          srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = per_slot_offsets;
-         srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(alloc.allocate((dispatch_width / 8) * length),
-                                               BRW_TYPE_F);
+         srcs[URB_LOGICAL_SRC_DATA] =
+            retype(brw_allocate_vgrf_units(*this, (dispatch_width / 8) * length), BRW_TYPE_F);
          srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length);
          abld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, length, 0);
 
@@ -253,10 +253,10 @@ fs_visitor::emit_urb_writes(const brw_reg &gs_vertex_count)
       if (stage == MESA_SHADER_GEOMETRY)
          return;
 
-      brw_reg uniform_urb_handle = brw_vgrf(alloc.allocate(dispatch_width / 8),
-                                           BRW_TYPE_UD);
-      brw_reg payload = brw_vgrf(alloc.allocate(dispatch_width / 8),
-                                BRW_TYPE_UD);
+      brw_reg uniform_urb_handle =
+         retype(brw_allocate_vgrf_units(*this, dispatch_width / 8), BRW_TYPE_UD);
+      brw_reg payload =
+         retype(brw_allocate_vgrf_units(*this, dispatch_width / 8), BRW_TYPE_UD);
 
       bld.exec_all().MOV(uniform_urb_handle, urb_handle);
 
@@ -280,9 +280,9 @@ fs_visitor::emit_urb_writes(const brw_reg &gs_vertex_count)
     */
    if (intel_needs_workaround(devinfo, 1805992985) && stage == MESA_SHADER_TESS_EVAL) {
       assert(dispatch_width == 8);
-      brw_reg uniform_urb_handle = brw_vgrf(alloc.allocate(1), BRW_TYPE_UD);
-      brw_reg uniform_mask = brw_vgrf(alloc.allocate(1), BRW_TYPE_UD);
-      brw_reg payload = brw_vgrf(alloc.allocate(4), BRW_TYPE_UD);
+      brw_reg uniform_urb_handle = retype(brw_allocate_vgrf_units(*this, 1), BRW_TYPE_UD);
+      brw_reg uniform_mask = retype(brw_allocate_vgrf_units(*this, 1), BRW_TYPE_UD);
+      brw_reg payload = retype(brw_allocate_vgrf_units(*this, 4), BRW_TYPE_UD);
 
       /* Workaround requires all 8 channels (lanes) to be valid. This is
        * understood to mean they all need to be alive. First trick is to find
@@ -334,8 +334,8 @@ fs_visitor::emit_cs_terminate()
     * make sure it uses the appropriate register range.
     */
    struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_TYPE_UD);
-   brw_reg payload = brw_vgrf(alloc.allocate(reg_unit(devinfo)),
-                             BRW_TYPE_UD);
+   brw_reg payload =
+      retype(brw_allocate_vgrf_units(*this, reg_unit(devinfo)), BRW_TYPE_UD);
    ubld.group(8 * reg_unit(devinfo), 0).MOV(payload, g0);
 
    /* Set the descriptor to "Dereference Resource" and "Root Thread" */
diff --git a/src/intel/compiler/brw_lower.cpp b/src/intel/compiler/brw_lower.cpp
index c7ab9c26c62..5587f3eb8bc 100644
--- a/src/intel/compiler/brw_lower.cpp
+++ b/src/intel/compiler/brw_lower.cpp
@@ -525,8 +525,7 @@ brw_lower_sends_overlapping_payload(fs_visitor &s)
          const unsigned arg = inst->mlen < inst->ex_mlen ? 2 : 3;
          const unsigned len = MIN2(inst->mlen, inst->ex_mlen);
 
-         brw_reg tmp = brw_vgrf(s.alloc.allocate(len),
-                               BRW_TYPE_UD);
+         brw_reg tmp = retype(brw_allocate_vgrf_units(s, len), BRW_TYPE_UD);
 
          /* Sadly, we've lost all notion of channels and bit sizes at this
           * point.  Just WE_all it.
@@ -567,8 +566,8 @@ brw_lower_3src_null_dest(fs_visitor &s)
 
    foreach_block_and_inst_safe (block, brw_inst, inst, s.cfg) {
       if (inst->is_3src(s.compiler) && inst->dst.is_null()) {
-         inst->dst = brw_vgrf(s.alloc.allocate(s.dispatch_width / 8),
-                              inst->dst.type);
+         inst->dst = retype(brw_allocate_vgrf_units(s, s.dispatch_width / 8),
+                            inst->dst.type);
          progress = true;
       }
    }
diff --git a/src/intel/compiler/brw_lower_integer_multiplication.cpp b/src/intel/compiler/brw_lower_integer_multiplication.cpp
index 48c110d76df..008a1d5c9b0 100644
--- a/src/intel/compiler/brw_lower_integer_multiplication.cpp
+++ b/src/intel/compiler/brw_lower_integer_multiplication.cpp
@@ -220,12 +220,11 @@ brw_lower_mul_dword_inst(fs_visitor &s, brw_inst *inst, bblock_t *block)
                           inst->src[1], inst->size_read(devinfo, 1)) ||
           inst->dst.stride >= 4) {
          needs_mov = true;
-         low = brw_vgrf(s.alloc.allocate(regs_written(inst)),
-                        inst->dst.type);
+         low = retype(brw_allocate_vgrf_units(s, regs_written(inst)), inst->dst.type);
       }
 
       /* Get a new VGRF but keep the same stride as inst->dst */
-      brw_reg high = brw_vgrf(s.alloc.allocate(regs_written(inst)), inst->dst.type);
+      brw_reg high = retype(brw_allocate_vgrf_units(s, regs_written(inst)), inst->dst.type);
       high.stride = inst->dst.stride;
       high.offset = inst->dst.offset % REG_SIZE;
 
@@ -317,17 +316,17 @@ brw_lower_mul_qword_inst(fs_visitor &s, brw_inst *inst, bblock_t *block)
    unsigned int q_regs = regs_written(inst);
    unsigned int d_regs = (q_regs + 1) / 2;
 
-   brw_reg bd = brw_vgrf(s.alloc.allocate(q_regs), BRW_TYPE_UQ);
-   brw_reg ad = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD);
-   brw_reg bc = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD);
+   brw_reg bd = retype(brw_allocate_vgrf_units(s, q_regs), BRW_TYPE_UQ);
+   brw_reg ad = retype(brw_allocate_vgrf_units(s, d_regs), BRW_TYPE_UD);
+   brw_reg bc = retype(brw_allocate_vgrf_units(s, d_regs), BRW_TYPE_UD);
 
    /* Here we need the full 64 bit result for 32b * 32b. */
    if (devinfo->has_integer_dword_mul) {
       ibld.MUL(bd, subscript(inst->src[0], BRW_TYPE_UD, 0),
                subscript(inst->src[1], BRW_TYPE_UD, 0));
    } else {
-      brw_reg bd_high = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD);
-      brw_reg bd_low  = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD);
+      brw_reg bd_high = retype(brw_allocate_vgrf_units(s, d_regs), BRW_TYPE_UD);
+      brw_reg bd_low  = retype(brw_allocate_vgrf_units(s, d_regs), BRW_TYPE_UD);
       const unsigned acc_width = reg_unit(devinfo) * 8;
       brw_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), BRW_TYPE_UD),
                              inst->group % acc_width);
diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp
index a4a5dca7612..b66425099bb 100644
--- a/src/intel/compiler/brw_lower_logical_sends.cpp
+++ b/src/intel/compiler/brw_lower_logical_sends.cpp
@@ -45,8 +45,7 @@ lower_urb_read_logical_send(const brw_builder &bld, brw_inst *inst)
    if (per_slot_present)
       payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS];
 
-   brw_reg payload = brw_vgrf(bld.shader->alloc.allocate(header_size),
-                             BRW_TYPE_F);
+   brw_reg payload = retype(brw_allocate_vgrf_units(*bld.shader, header_size), BRW_TYPE_F);
    bld.LOAD_PAYLOAD(payload, payload_sources, header_size, header_size);
 
    inst->opcode = SHADER_OPCODE_SEND;
@@ -146,8 +145,8 @@ lower_urb_write_logical_send(const brw_builder &bld, brw_inst *inst)
                            inst->components_read(URB_LOGICAL_SRC_DATA);
 
    brw_reg *payload_sources = new brw_reg[length];
-   brw_reg payload = brw_vgrf(bld.shader->alloc.allocate(length),
-                             BRW_TYPE_F);
+   brw_reg payload = retype(brw_allocate_vgrf_units(*bld.shader, length),
+                            BRW_TYPE_F);
 
    unsigned header_size = 0;
    payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_HANDLE];
@@ -375,7 +374,7 @@ lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst,
 
    if (fs_payload.aa_dest_stencil_reg[0]) {
       assert(inst->group < 16);
-      sources[length] = brw_vgrf(bld.shader->alloc.allocate(1), BRW_TYPE_F);
+      sources[length] = retype(brw_allocate_vgrf_units(*bld.shader, 1), BRW_TYPE_F);
       bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha")
          .MOV(sources[length],
               brw_reg(brw_vec8_grf(fs_payload.aa_dest_stencil_reg[0], 0)));
@@ -394,8 +393,8 @@ lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst,
    }
 
    if (sample_mask.file != BAD_FILE) {
-      const brw_reg tmp = brw_vgrf(bld.shader->alloc.allocate(reg_unit(devinfo)),
-                                  BRW_TYPE_UD);
+      const brw_reg tmp = retype(brw_allocate_vgrf_units(*bld.shader, reg_unit(devinfo)),
+                                 BRW_TYPE_UD);
 
       /* Hand over gl_SampleMask.  Only the lower 16 bits of each channel are
        * relevant.  Since it's unsigned single words one vgrf is always
@@ -456,7 +455,7 @@ lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst,
    /* Send from the GRF */
    brw_reg payload = brw_vgrf(-1, BRW_TYPE_F);
    brw_inst *load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size);
-   payload.nr = bld.shader->alloc.allocate(regs_written(load));
+   payload.nr = brw_allocate_vgrf_units(*bld.shader, regs_written(load)).nr;
    load->dst = payload;
 
    uint32_t msg_ctl = brw_fb_write_msg_control(inst, prog_data);
@@ -1093,8 +1092,8 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst,
    }
 
    const brw_reg src_payload =
-      brw_vgrf(bld.shader->alloc.allocate(length * bld.dispatch_width() / 8),
-               BRW_TYPE_F);
+      retype(brw_allocate_vgrf_units(*bld.shader, length * bld.dispatch_width() / 8),
+             BRW_TYPE_F);
    /* In case of 16-bit payload each component takes one full register in
     * both SIMD8H and SIMD16H modes. In both cases one reg can hold 16
     * elements. In SIMD8H case hardware simply expects the components to be
diff --git a/src/intel/compiler/brw_lower_regioning.cpp b/src/intel/compiler/brw_lower_regioning.cpp
index f5f96a62ef9..01c9f1acd0d 100644
--- a/src/intel/compiler/brw_lower_regioning.cpp
+++ b/src/intel/compiler/brw_lower_regioning.cpp
@@ -560,7 +560,7 @@ namespace {
                       inst->exec_size * stride *
                       brw_type_size_bytes(inst->src[i].type),
                       reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo);
-      brw_reg tmp = brw_vgrf(v->alloc.allocate(size), inst->src[i].type);
+      brw_reg tmp = retype(brw_allocate_vgrf_units(*v, size), inst->src[i].type);
       ibld.UNDEF(tmp);
       tmp = byte_offset(horiz_stride(tmp, stride),
                         required_src_byte_offset(devinfo, inst, i));
diff --git a/src/intel/compiler/brw_opt.cpp b/src/intel/compiler/brw_opt.cpp
index 0a872f59177..0c9098efe0e 100644
--- a/src/intel/compiler/brw_opt.cpp
+++ b/src/intel/compiler/brw_opt.cpp
@@ -359,8 +359,8 @@ brw_opt_split_sends(fs_visitor &s)
       assert(lp2->size_written % REG_SIZE == 0);
       assert((lp1->size_written + lp2->size_written) / REG_SIZE == send->mlen);
 
-      lp1->dst = brw_vgrf(s.alloc.allocate(lp1->size_written / REG_SIZE), lp1->dst.type);
-      lp2->dst = brw_vgrf(s.alloc.allocate(lp2->size_written / REG_SIZE), lp2->dst.type);
+      lp1->dst = retype(brw_allocate_vgrf_units(s, lp1->size_written / REG_SIZE), lp1->dst.type);
+      lp2->dst = retype(brw_allocate_vgrf_units(s, lp2->size_written / REG_SIZE), lp2->dst.type);
 
       send->resize_sources(4);
       send->src[2] = lp1->dst;
diff --git a/src/intel/compiler/brw_opt_combine_constants.cpp b/src/intel/compiler/brw_opt_combine_constants.cpp
index 57a12d9fc8f..ff1eb19a4c2 100644
--- a/src/intel/compiler/brw_opt_combine_constants.cpp
+++ b/src/intel/compiler/brw_opt_combine_constants.cpp
@@ -1203,7 +1203,7 @@ allocate_slots(fs_visitor &s,
 
          if ((x & mask) == mask) {
             if (regs[i].nr == UINT_MAX)
-               regs[i].nr = s.alloc.allocate(reg_unit(s.devinfo));
+               regs[i].nr = brw_allocate_vgrf_units(s, reg_unit(s.devinfo)).nr;
 
             regs[i].avail &= ~(mask << j);
 
diff --git a/src/intel/compiler/brw_opt_virtual_grfs.cpp b/src/intel/compiler/brw_opt_virtual_grfs.cpp
index 616c35fc269..a2b8935e170 100644
--- a/src/intel/compiler/brw_opt_virtual_grfs.cpp
+++ b/src/intel/compiler/brw_opt_virtual_grfs.cpp
@@ -115,7 +115,7 @@ brw_opt_split_virtual_grfs(fs_visitor &s)
             has_splits = true;
             vgrf_has_split[i] = true;
             assert(offset <= MAX_VGRF_SIZE(s.devinfo));
-            unsigned grf = s.alloc.allocate(offset);
+            unsigned grf = brw_allocate_vgrf_units(s, offset).nr;
             for (unsigned k = reg - offset; k < reg; k++)
                new_virtual_grf[k] = grf;
             offset = 0;
diff --git a/src/intel/compiler/brw_reg_allocate.cpp b/src/intel/compiler/brw_reg_allocate.cpp
index b311abb8a79..46529defbf9 100644
--- a/src/intel/compiler/brw_reg_allocate.cpp
+++ b/src/intel/compiler/brw_reg_allocate.cpp
@@ -1082,7 +1082,7 @@ brw_reg_alloc::choose_spill_reg()
 brw_reg
 brw_reg_alloc::alloc_spill_reg(unsigned size, int ip)
 {
-   int vgrf = fs->alloc.allocate(ALIGN(size, reg_unit(devinfo)));
+   int vgrf = brw_allocate_vgrf_units(*fs, ALIGN(size, reg_unit(devinfo))).nr;
    int class_idx = DIV_ROUND_UP(size, reg_unit(devinfo)) - 1;
    int n = ra_add_node(g, compiler->reg_set.classes[class_idx]);
    assert(n == first_vgrf_node + vgrf);