From f82bcd56fc9a9f30f62a64a6f8dc56acb6d8911d Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Fri, 31 Jan 2025 12:50:20 -0800 Subject: [PATCH] intel/brw: Add functions to allocate VGRF space Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/compiler/brw_builder.h | 6 +--- src/intel/compiler/brw_from_nir.cpp | 26 ++++++++-------- src/intel/compiler/brw_fs.cpp | 16 ++++++++++ src/intel/compiler/brw_fs.h | 3 ++ src/intel/compiler/brw_fs_visitor.cpp | 30 +++++++++---------- src/intel/compiler/brw_lower.cpp | 7 ++--- .../brw_lower_integer_multiplication.cpp | 15 +++++----- .../compiler/brw_lower_logical_sends.cpp | 19 ++++++------ src/intel/compiler/brw_lower_regioning.cpp | 2 +- src/intel/compiler/brw_opt.cpp | 4 +-- .../compiler/brw_opt_combine_constants.cpp | 2 +- src/intel/compiler/brw_opt_virtual_grfs.cpp | 2 +- src/intel/compiler/brw_reg_allocate.cpp | 2 +- 13 files changed, 73 insertions(+), 61 deletions(-) diff --git a/src/intel/compiler/brw_builder.h b/src/intel/compiler/brw_builder.h index 01cafa60dd8..a50e72d5ff2 100644 --- a/src/intel/compiler/brw_builder.h +++ b/src/intel/compiler/brw_builder.h @@ -199,14 +199,10 @@ public: brw_reg vgrf(enum brw_reg_type type, unsigned n = 1) const { - const unsigned unit = reg_unit(shader->devinfo); assert(dispatch_width() <= 32); if (n > 0) - return brw_vgrf(shader->alloc.allocate( - DIV_ROUND_UP(n * brw_type_size_bytes(type) * dispatch_width(), - unit * REG_SIZE) * unit), - type); + return brw_allocate_vgrf(*shader, type, n * dispatch_width()); else return retype(null_reg_ud(), type); } diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp index c7dfeeadf1d..46418543536 100644 --- a/src/intel/compiler/brw_from_nir.cpp +++ b/src/intel/compiler/brw_from_nir.cpp @@ -5023,7 +5023,7 @@ get_timestamp(const brw_builder &bld) brw_reg ts = brw_reg(retype(brw_vec4_reg(ARF, BRW_ARF_TIMESTAMP, 0), BRW_TYPE_UD)); - brw_reg dst = brw_vgrf(s.alloc.allocate(1), BRW_TYPE_UD); + brw_reg dst = retype(brw_allocate_vgrf_units(s, 1), BRW_TYPE_UD); /* We want to read the 3 fields we care about even if it's not enabled in * the dispatch. @@ -5084,8 +5084,8 @@ emit_urb_direct_vec4_write(const brw_builder &bld, brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16); - srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(bld.shader->alloc.allocate(length), - BRW_TYPE_F); + srcs[URB_LOGICAL_SRC_DATA] = + retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F); srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length); bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0); @@ -5154,8 +5154,8 @@ emit_urb_direct_vec4_write_xe2(const brw_builder &bld, brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16); - int nr = bld.shader->alloc.allocate(comps * runit); - srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(nr, BRW_TYPE_F); + srcs[URB_LOGICAL_SRC_DATA] = + retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F); srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps); hbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0); @@ -5217,8 +5217,8 @@ emit_urb_indirect_vec4_write(const brw_builder &bld, srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16); - srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(bld.shader->alloc.allocate(length), - BRW_TYPE_F); + srcs[URB_LOGICAL_SRC_DATA] = + retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F); srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length); bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0); @@ -5288,8 +5288,8 @@ emit_urb_indirect_writes_xe2(const brw_builder &bld, nir_intrinsic_instr *instr, brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = addr; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16); - int nr = bld.shader->alloc.allocate(comps * runit); - srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(nr, BRW_TYPE_F); + srcs[URB_LOGICAL_SRC_DATA] = + retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F); srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps); wbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0); @@ -5348,8 +5348,8 @@ emit_urb_indirect_writes(const brw_builder &bld, nir_intrinsic_instr *instr, srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = final_offset; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = mask; - srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(bld.shader->alloc.allocate(length), - BRW_TYPE_F); + srcs[URB_LOGICAL_SRC_DATA] = + retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F); srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length); bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0); @@ -7525,8 +7525,8 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, /* Allocate enough space for the components + one physical register for the * residency data. */ - brw_reg dst = brw_vgrf( - bld.shader->alloc.allocate(total_regs * reg_unit(devinfo)), + brw_reg dst = retype( + brw_allocate_vgrf_units(*bld.shader, total_regs * reg_unit(devinfo)), dst_type); brw_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs)); diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 8f255e2087e..9fc29ba97f6 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -837,3 +837,19 @@ bool brw_should_print_shader(const nir_shader *shader, uint64_t debug_flag) { return INTEL_DEBUG(debug_flag) && (!shader->info.internal || NIR_DEBUG(PRINT_INTERNAL)); } + +brw_reg +brw_allocate_vgrf(fs_visitor &s, brw_reg_type type, unsigned count) +{ + const unsigned unit = reg_unit(s.devinfo); + const unsigned size = DIV_ROUND_UP(count * brw_type_size_bytes(type), + unit * REG_SIZE) * unit; + return retype(brw_allocate_vgrf_units(s, size), type); +} + +brw_reg +brw_allocate_vgrf_units(fs_visitor &s, unsigned units_of_REGSIZE) +{ + return brw_vgrf(s.alloc.allocate(units_of_REGSIZE), BRW_TYPE_UD); +} + diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 4ac970f3020..0e39bfc57c5 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -472,3 +472,6 @@ bool brw_workaround_source_arf_before_eot(fs_visitor &s); /* Helpers. */ unsigned brw_get_lowered_simd_width(const fs_visitor *shader, const brw_inst *inst); + +brw_reg brw_allocate_vgrf(fs_visitor &s, brw_reg_type type, unsigned count); +brw_reg brw_allocate_vgrf_units(fs_visitor &s, unsigned units_of_REGSIZE); diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index a8d60d91889..b3e7cc73d11 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -130,8 +130,8 @@ fs_visitor::emit_urb_writes(const brw_reg &gs_vertex_count) break; } - brw_reg zero = brw_vgrf(alloc.allocate(dispatch_width / 8), - BRW_TYPE_UD); + brw_reg zero = + retype(brw_allocate_vgrf_units(*this, dispatch_width / 8), BRW_TYPE_UD); bld.MOV(zero, brw_imm_ud(0u)); if (vue_map->slots_valid & VARYING_BIT_PRIMITIVE_SHADING_RATE && @@ -139,8 +139,8 @@ fs_visitor::emit_urb_writes(const brw_reg &gs_vertex_count) sources[length++] = this->outputs[VARYING_SLOT_PRIMITIVE_SHADING_RATE]; } else if (devinfo->has_coarse_pixel_primitive_and_cb) { uint32_t one_fp16 = 0x3C00; - brw_reg one_by_one_fp16 = brw_vgrf(alloc.allocate(dispatch_width / 8), - BRW_TYPE_UD); + brw_reg one_by_one_fp16 = + retype(brw_allocate_vgrf_units(*this, dispatch_width / 8), BRW_TYPE_UD); bld.MOV(one_by_one_fp16, brw_imm_ud((one_fp16 << 16) | one_fp16)); sources[length++] = one_by_one_fp16; } else { @@ -213,8 +213,8 @@ fs_visitor::emit_urb_writes(const brw_reg &gs_vertex_count) srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = per_slot_offsets; - srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(alloc.allocate((dispatch_width / 8) * length), - BRW_TYPE_F); + srcs[URB_LOGICAL_SRC_DATA] = + retype(brw_allocate_vgrf_units(*this, (dispatch_width / 8) * length), BRW_TYPE_F); srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length); abld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, length, 0); @@ -253,10 +253,10 @@ fs_visitor::emit_urb_writes(const brw_reg &gs_vertex_count) if (stage == MESA_SHADER_GEOMETRY) return; - brw_reg uniform_urb_handle = brw_vgrf(alloc.allocate(dispatch_width / 8), - BRW_TYPE_UD); - brw_reg payload = brw_vgrf(alloc.allocate(dispatch_width / 8), - BRW_TYPE_UD); + brw_reg uniform_urb_handle = + retype(brw_allocate_vgrf_units(*this, dispatch_width / 8), BRW_TYPE_UD); + brw_reg payload = + retype(brw_allocate_vgrf_units(*this, dispatch_width / 8), BRW_TYPE_UD); bld.exec_all().MOV(uniform_urb_handle, urb_handle); @@ -280,9 +280,9 @@ fs_visitor::emit_urb_writes(const brw_reg &gs_vertex_count) */ if (intel_needs_workaround(devinfo, 1805992985) && stage == MESA_SHADER_TESS_EVAL) { assert(dispatch_width == 8); - brw_reg uniform_urb_handle = brw_vgrf(alloc.allocate(1), BRW_TYPE_UD); - brw_reg uniform_mask = brw_vgrf(alloc.allocate(1), BRW_TYPE_UD); - brw_reg payload = brw_vgrf(alloc.allocate(4), BRW_TYPE_UD); + brw_reg uniform_urb_handle = retype(brw_allocate_vgrf_units(*this, 1), BRW_TYPE_UD); + brw_reg uniform_mask = retype(brw_allocate_vgrf_units(*this, 1), BRW_TYPE_UD); + brw_reg payload = retype(brw_allocate_vgrf_units(*this, 4), BRW_TYPE_UD); /* Workaround requires all 8 channels (lanes) to be valid. This is * understood to mean they all need to be alive. First trick is to find @@ -334,8 +334,8 @@ fs_visitor::emit_cs_terminate() * make sure it uses the appropriate register range. */ struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_TYPE_UD); - brw_reg payload = brw_vgrf(alloc.allocate(reg_unit(devinfo)), - BRW_TYPE_UD); + brw_reg payload = + retype(brw_allocate_vgrf_units(*this, reg_unit(devinfo)), BRW_TYPE_UD); ubld.group(8 * reg_unit(devinfo), 0).MOV(payload, g0); /* Set the descriptor to "Dereference Resource" and "Root Thread" */ diff --git a/src/intel/compiler/brw_lower.cpp b/src/intel/compiler/brw_lower.cpp index c7ab9c26c62..5587f3eb8bc 100644 --- a/src/intel/compiler/brw_lower.cpp +++ b/src/intel/compiler/brw_lower.cpp @@ -525,8 +525,7 @@ brw_lower_sends_overlapping_payload(fs_visitor &s) const unsigned arg = inst->mlen < inst->ex_mlen ? 2 : 3; const unsigned len = MIN2(inst->mlen, inst->ex_mlen); - brw_reg tmp = brw_vgrf(s.alloc.allocate(len), - BRW_TYPE_UD); + brw_reg tmp = retype(brw_allocate_vgrf_units(s, len), BRW_TYPE_UD); /* Sadly, we've lost all notion of channels and bit sizes at this * point. Just WE_all it. @@ -567,8 +566,8 @@ brw_lower_3src_null_dest(fs_visitor &s) foreach_block_and_inst_safe (block, brw_inst, inst, s.cfg) { if (inst->is_3src(s.compiler) && inst->dst.is_null()) { - inst->dst = brw_vgrf(s.alloc.allocate(s.dispatch_width / 8), - inst->dst.type); + inst->dst = retype(brw_allocate_vgrf_units(s, s.dispatch_width / 8), + inst->dst.type); progress = true; } } diff --git a/src/intel/compiler/brw_lower_integer_multiplication.cpp b/src/intel/compiler/brw_lower_integer_multiplication.cpp index 48c110d76df..008a1d5c9b0 100644 --- a/src/intel/compiler/brw_lower_integer_multiplication.cpp +++ b/src/intel/compiler/brw_lower_integer_multiplication.cpp @@ -220,12 +220,11 @@ brw_lower_mul_dword_inst(fs_visitor &s, brw_inst *inst, bblock_t *block) inst->src[1], inst->size_read(devinfo, 1)) || inst->dst.stride >= 4) { needs_mov = true; - low = brw_vgrf(s.alloc.allocate(regs_written(inst)), - inst->dst.type); + low = retype(brw_allocate_vgrf_units(s, regs_written(inst)), inst->dst.type); } /* Get a new VGRF but keep the same stride as inst->dst */ - brw_reg high = brw_vgrf(s.alloc.allocate(regs_written(inst)), inst->dst.type); + brw_reg high = retype(brw_allocate_vgrf_units(s, regs_written(inst)), inst->dst.type); high.stride = inst->dst.stride; high.offset = inst->dst.offset % REG_SIZE; @@ -317,17 +316,17 @@ brw_lower_mul_qword_inst(fs_visitor &s, brw_inst *inst, bblock_t *block) unsigned int q_regs = regs_written(inst); unsigned int d_regs = (q_regs + 1) / 2; - brw_reg bd = brw_vgrf(s.alloc.allocate(q_regs), BRW_TYPE_UQ); - brw_reg ad = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD); - brw_reg bc = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD); + brw_reg bd = retype(brw_allocate_vgrf_units(s, q_regs), BRW_TYPE_UQ); + brw_reg ad = retype(brw_allocate_vgrf_units(s, d_regs), BRW_TYPE_UD); + brw_reg bc = retype(brw_allocate_vgrf_units(s, d_regs), BRW_TYPE_UD); /* Here we need the full 64 bit result for 32b * 32b. */ if (devinfo->has_integer_dword_mul) { ibld.MUL(bd, subscript(inst->src[0], BRW_TYPE_UD, 0), subscript(inst->src[1], BRW_TYPE_UD, 0)); } else { - brw_reg bd_high = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD); - brw_reg bd_low = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD); + brw_reg bd_high = retype(brw_allocate_vgrf_units(s, d_regs), BRW_TYPE_UD); + brw_reg bd_low = retype(brw_allocate_vgrf_units(s, d_regs), BRW_TYPE_UD); const unsigned acc_width = reg_unit(devinfo) * 8; brw_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), BRW_TYPE_UD), inst->group % acc_width); diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index a4a5dca7612..b66425099bb 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -45,8 +45,7 @@ lower_urb_read_logical_send(const brw_builder &bld, brw_inst *inst) if (per_slot_present) payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS]; - brw_reg payload = brw_vgrf(bld.shader->alloc.allocate(header_size), - BRW_TYPE_F); + brw_reg payload = retype(brw_allocate_vgrf_units(*bld.shader, header_size), BRW_TYPE_F); bld.LOAD_PAYLOAD(payload, payload_sources, header_size, header_size); inst->opcode = SHADER_OPCODE_SEND; @@ -146,8 +145,8 @@ lower_urb_write_logical_send(const brw_builder &bld, brw_inst *inst) inst->components_read(URB_LOGICAL_SRC_DATA); brw_reg *payload_sources = new brw_reg[length]; - brw_reg payload = brw_vgrf(bld.shader->alloc.allocate(length), - BRW_TYPE_F); + brw_reg payload = retype(brw_allocate_vgrf_units(*bld.shader, length), + BRW_TYPE_F); unsigned header_size = 0; payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_HANDLE]; @@ -375,7 +374,7 @@ lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst, if (fs_payload.aa_dest_stencil_reg[0]) { assert(inst->group < 16); - sources[length] = brw_vgrf(bld.shader->alloc.allocate(1), BRW_TYPE_F); + sources[length] = retype(brw_allocate_vgrf_units(*bld.shader, 1), BRW_TYPE_F); bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha") .MOV(sources[length], brw_reg(brw_vec8_grf(fs_payload.aa_dest_stencil_reg[0], 0))); @@ -394,8 +393,8 @@ lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst, } if (sample_mask.file != BAD_FILE) { - const brw_reg tmp = brw_vgrf(bld.shader->alloc.allocate(reg_unit(devinfo)), - BRW_TYPE_UD); + const brw_reg tmp = retype(brw_allocate_vgrf_units(*bld.shader, reg_unit(devinfo)), + BRW_TYPE_UD); /* Hand over gl_SampleMask. Only the lower 16 bits of each channel are * relevant. Since it's unsigned single words one vgrf is always @@ -456,7 +455,7 @@ lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst, /* Send from the GRF */ brw_reg payload = brw_vgrf(-1, BRW_TYPE_F); brw_inst *load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size); - payload.nr = bld.shader->alloc.allocate(regs_written(load)); + payload.nr = brw_allocate_vgrf_units(*bld.shader, regs_written(load)).nr; load->dst = payload; uint32_t msg_ctl = brw_fb_write_msg_control(inst, prog_data); @@ -1093,8 +1092,8 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst, } const brw_reg src_payload = - brw_vgrf(bld.shader->alloc.allocate(length * bld.dispatch_width() / 8), - BRW_TYPE_F); + retype(brw_allocate_vgrf_units(*bld.shader, length * bld.dispatch_width() / 8), + BRW_TYPE_F); /* In case of 16-bit payload each component takes one full register in * both SIMD8H and SIMD16H modes. In both cases one reg can hold 16 * elements. In SIMD8H case hardware simply expects the components to be diff --git a/src/intel/compiler/brw_lower_regioning.cpp b/src/intel/compiler/brw_lower_regioning.cpp index f5f96a62ef9..01c9f1acd0d 100644 --- a/src/intel/compiler/brw_lower_regioning.cpp +++ b/src/intel/compiler/brw_lower_regioning.cpp @@ -560,7 +560,7 @@ namespace { inst->exec_size * stride * brw_type_size_bytes(inst->src[i].type), reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo); - brw_reg tmp = brw_vgrf(v->alloc.allocate(size), inst->src[i].type); + brw_reg tmp = retype(brw_allocate_vgrf_units(*v, size), inst->src[i].type); ibld.UNDEF(tmp); tmp = byte_offset(horiz_stride(tmp, stride), required_src_byte_offset(devinfo, inst, i)); diff --git a/src/intel/compiler/brw_opt.cpp b/src/intel/compiler/brw_opt.cpp index 0a872f59177..0c9098efe0e 100644 --- a/src/intel/compiler/brw_opt.cpp +++ b/src/intel/compiler/brw_opt.cpp @@ -359,8 +359,8 @@ brw_opt_split_sends(fs_visitor &s) assert(lp2->size_written % REG_SIZE == 0); assert((lp1->size_written + lp2->size_written) / REG_SIZE == send->mlen); - lp1->dst = brw_vgrf(s.alloc.allocate(lp1->size_written / REG_SIZE), lp1->dst.type); - lp2->dst = brw_vgrf(s.alloc.allocate(lp2->size_written / REG_SIZE), lp2->dst.type); + lp1->dst = retype(brw_allocate_vgrf_units(s, lp1->size_written / REG_SIZE), lp1->dst.type); + lp2->dst = retype(brw_allocate_vgrf_units(s, lp2->size_written / REG_SIZE), lp2->dst.type); send->resize_sources(4); send->src[2] = lp1->dst; diff --git a/src/intel/compiler/brw_opt_combine_constants.cpp b/src/intel/compiler/brw_opt_combine_constants.cpp index 57a12d9fc8f..ff1eb19a4c2 100644 --- a/src/intel/compiler/brw_opt_combine_constants.cpp +++ b/src/intel/compiler/brw_opt_combine_constants.cpp @@ -1203,7 +1203,7 @@ allocate_slots(fs_visitor &s, if ((x & mask) == mask) { if (regs[i].nr == UINT_MAX) - regs[i].nr = s.alloc.allocate(reg_unit(s.devinfo)); + regs[i].nr = brw_allocate_vgrf_units(s, reg_unit(s.devinfo)).nr; regs[i].avail &= ~(mask << j); diff --git a/src/intel/compiler/brw_opt_virtual_grfs.cpp b/src/intel/compiler/brw_opt_virtual_grfs.cpp index 616c35fc269..a2b8935e170 100644 --- a/src/intel/compiler/brw_opt_virtual_grfs.cpp +++ b/src/intel/compiler/brw_opt_virtual_grfs.cpp @@ -115,7 +115,7 @@ brw_opt_split_virtual_grfs(fs_visitor &s) has_splits = true; vgrf_has_split[i] = true; assert(offset <= MAX_VGRF_SIZE(s.devinfo)); - unsigned grf = s.alloc.allocate(offset); + unsigned grf = brw_allocate_vgrf_units(s, offset).nr; for (unsigned k = reg - offset; k < reg; k++) new_virtual_grf[k] = grf; offset = 0; diff --git a/src/intel/compiler/brw_reg_allocate.cpp b/src/intel/compiler/brw_reg_allocate.cpp index b311abb8a79..46529defbf9 100644 --- a/src/intel/compiler/brw_reg_allocate.cpp +++ b/src/intel/compiler/brw_reg_allocate.cpp @@ -1082,7 +1082,7 @@ brw_reg_alloc::choose_spill_reg() brw_reg brw_reg_alloc::alloc_spill_reg(unsigned size, int ip) { - int vgrf = fs->alloc.allocate(ALIGN(size, reg_unit(devinfo))); + int vgrf = brw_allocate_vgrf_units(*fs, ALIGN(size, reg_unit(devinfo))).nr; int class_idx = DIV_ROUND_UP(size, reg_unit(devinfo)) - 1; int n = ra_add_node(g, compiler->reg_set.classes[class_idx]); assert(n == first_vgrf_node + vgrf);