intel/brw: Add functions to allocate VGRF space

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33334>
This commit is contained in:
Caio Oliveira 2025-01-31 12:50:20 -08:00
parent 5c717e68ce
commit f82bcd56fc
13 changed files with 73 additions and 61 deletions

View file

@ -199,14 +199,10 @@ public:
brw_reg brw_reg
vgrf(enum brw_reg_type type, unsigned n = 1) const vgrf(enum brw_reg_type type, unsigned n = 1) const
{ {
const unsigned unit = reg_unit(shader->devinfo);
assert(dispatch_width() <= 32); assert(dispatch_width() <= 32);
if (n > 0) if (n > 0)
return brw_vgrf(shader->alloc.allocate( return brw_allocate_vgrf(*shader, type, n * dispatch_width());
DIV_ROUND_UP(n * brw_type_size_bytes(type) * dispatch_width(),
unit * REG_SIZE) * unit),
type);
else else
return retype(null_reg_ud(), type); return retype(null_reg_ud(), type);
} }

View file

@ -5023,7 +5023,7 @@ get_timestamp(const brw_builder &bld)
brw_reg ts = brw_reg(retype(brw_vec4_reg(ARF, brw_reg ts = brw_reg(retype(brw_vec4_reg(ARF,
BRW_ARF_TIMESTAMP, 0), BRW_TYPE_UD)); BRW_ARF_TIMESTAMP, 0), BRW_TYPE_UD));
brw_reg dst = brw_vgrf(s.alloc.allocate(1), BRW_TYPE_UD); brw_reg dst = retype(brw_allocate_vgrf_units(s, 1), BRW_TYPE_UD);
/* We want to read the 3 fields we care about even if it's not enabled in /* We want to read the 3 fields we care about even if it's not enabled in
* the dispatch. * the dispatch.
@ -5084,8 +5084,8 @@ emit_urb_direct_vec4_write(const brw_builder &bld,
brw_reg srcs[URB_LOGICAL_NUM_SRCS]; brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16); srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(bld.shader->alloc.allocate(length), srcs[URB_LOGICAL_SRC_DATA] =
BRW_TYPE_F); retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length); srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length);
bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0); bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
@ -5154,8 +5154,8 @@ emit_urb_direct_vec4_write_xe2(const brw_builder &bld,
brw_reg srcs[URB_LOGICAL_NUM_SRCS]; brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16); srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
int nr = bld.shader->alloc.allocate(comps * runit); srcs[URB_LOGICAL_SRC_DATA] =
srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(nr, BRW_TYPE_F); retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F);
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps); srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps);
hbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0); hbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0);
@ -5217,8 +5217,8 @@ emit_urb_indirect_vec4_write(const brw_builder &bld,
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off; srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16); srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(bld.shader->alloc.allocate(length), srcs[URB_LOGICAL_SRC_DATA] =
BRW_TYPE_F); retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length); srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length);
bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0); bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
@ -5288,8 +5288,8 @@ emit_urb_indirect_writes_xe2(const brw_builder &bld, nir_intrinsic_instr *instr,
brw_reg srcs[URB_LOGICAL_NUM_SRCS]; brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = addr; srcs[URB_LOGICAL_SRC_HANDLE] = addr;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16); srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
int nr = bld.shader->alloc.allocate(comps * runit); srcs[URB_LOGICAL_SRC_DATA] =
srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(nr, BRW_TYPE_F); retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F);
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps); srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps);
wbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0); wbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0);
@ -5348,8 +5348,8 @@ emit_urb_indirect_writes(const brw_builder &bld, nir_intrinsic_instr *instr,
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = final_offset; srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = final_offset;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = mask; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = mask;
srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(bld.shader->alloc.allocate(length), srcs[URB_LOGICAL_SRC_DATA] =
BRW_TYPE_F); retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length); srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length);
bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0); bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
@ -7525,8 +7525,8 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
/* Allocate enough space for the components + one physical register for the /* Allocate enough space for the components + one physical register for the
* residency data. * residency data.
*/ */
brw_reg dst = brw_vgrf( brw_reg dst = retype(
bld.shader->alloc.allocate(total_regs * reg_unit(devinfo)), brw_allocate_vgrf_units(*bld.shader, total_regs * reg_unit(devinfo)),
dst_type); dst_type);
brw_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs)); brw_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));

View file

@ -837,3 +837,19 @@ bool brw_should_print_shader(const nir_shader *shader, uint64_t debug_flag)
{ {
return INTEL_DEBUG(debug_flag) && (!shader->info.internal || NIR_DEBUG(PRINT_INTERNAL)); return INTEL_DEBUG(debug_flag) && (!shader->info.internal || NIR_DEBUG(PRINT_INTERNAL));
} }
brw_reg
brw_allocate_vgrf(fs_visitor &s, brw_reg_type type, unsigned count)
{
const unsigned unit = reg_unit(s.devinfo);
const unsigned size = DIV_ROUND_UP(count * brw_type_size_bytes(type),
unit * REG_SIZE) * unit;
return retype(brw_allocate_vgrf_units(s, size), type);
}
brw_reg
brw_allocate_vgrf_units(fs_visitor &s, unsigned units_of_REGSIZE)
{
return brw_vgrf(s.alloc.allocate(units_of_REGSIZE), BRW_TYPE_UD);
}

View file

@ -472,3 +472,6 @@ bool brw_workaround_source_arf_before_eot(fs_visitor &s);
/* Helpers. */ /* Helpers. */
unsigned brw_get_lowered_simd_width(const fs_visitor *shader, unsigned brw_get_lowered_simd_width(const fs_visitor *shader,
const brw_inst *inst); const brw_inst *inst);
brw_reg brw_allocate_vgrf(fs_visitor &s, brw_reg_type type, unsigned count);
brw_reg brw_allocate_vgrf_units(fs_visitor &s, unsigned units_of_REGSIZE);

View file

@ -130,8 +130,8 @@ fs_visitor::emit_urb_writes(const brw_reg &gs_vertex_count)
break; break;
} }
brw_reg zero = brw_vgrf(alloc.allocate(dispatch_width / 8), brw_reg zero =
BRW_TYPE_UD); retype(brw_allocate_vgrf_units(*this, dispatch_width / 8), BRW_TYPE_UD);
bld.MOV(zero, brw_imm_ud(0u)); bld.MOV(zero, brw_imm_ud(0u));
if (vue_map->slots_valid & VARYING_BIT_PRIMITIVE_SHADING_RATE && if (vue_map->slots_valid & VARYING_BIT_PRIMITIVE_SHADING_RATE &&
@ -139,8 +139,8 @@ fs_visitor::emit_urb_writes(const brw_reg &gs_vertex_count)
sources[length++] = this->outputs[VARYING_SLOT_PRIMITIVE_SHADING_RATE]; sources[length++] = this->outputs[VARYING_SLOT_PRIMITIVE_SHADING_RATE];
} else if (devinfo->has_coarse_pixel_primitive_and_cb) { } else if (devinfo->has_coarse_pixel_primitive_and_cb) {
uint32_t one_fp16 = 0x3C00; uint32_t one_fp16 = 0x3C00;
brw_reg one_by_one_fp16 = brw_vgrf(alloc.allocate(dispatch_width / 8), brw_reg one_by_one_fp16 =
BRW_TYPE_UD); retype(brw_allocate_vgrf_units(*this, dispatch_width / 8), BRW_TYPE_UD);
bld.MOV(one_by_one_fp16, brw_imm_ud((one_fp16 << 16) | one_fp16)); bld.MOV(one_by_one_fp16, brw_imm_ud((one_fp16 << 16) | one_fp16));
sources[length++] = one_by_one_fp16; sources[length++] = one_by_one_fp16;
} else { } else {
@ -213,8 +213,8 @@ fs_visitor::emit_urb_writes(const brw_reg &gs_vertex_count)
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = per_slot_offsets; srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = per_slot_offsets;
srcs[URB_LOGICAL_SRC_DATA] = brw_vgrf(alloc.allocate((dispatch_width / 8) * length), srcs[URB_LOGICAL_SRC_DATA] =
BRW_TYPE_F); retype(brw_allocate_vgrf_units(*this, (dispatch_width / 8) * length), BRW_TYPE_F);
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length); srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length);
abld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, length, 0); abld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, length, 0);
@ -253,10 +253,10 @@ fs_visitor::emit_urb_writes(const brw_reg &gs_vertex_count)
if (stage == MESA_SHADER_GEOMETRY) if (stage == MESA_SHADER_GEOMETRY)
return; return;
brw_reg uniform_urb_handle = brw_vgrf(alloc.allocate(dispatch_width / 8), brw_reg uniform_urb_handle =
BRW_TYPE_UD); retype(brw_allocate_vgrf_units(*this, dispatch_width / 8), BRW_TYPE_UD);
brw_reg payload = brw_vgrf(alloc.allocate(dispatch_width / 8), brw_reg payload =
BRW_TYPE_UD); retype(brw_allocate_vgrf_units(*this, dispatch_width / 8), BRW_TYPE_UD);
bld.exec_all().MOV(uniform_urb_handle, urb_handle); bld.exec_all().MOV(uniform_urb_handle, urb_handle);
@ -280,9 +280,9 @@ fs_visitor::emit_urb_writes(const brw_reg &gs_vertex_count)
*/ */
if (intel_needs_workaround(devinfo, 1805992985) && stage == MESA_SHADER_TESS_EVAL) { if (intel_needs_workaround(devinfo, 1805992985) && stage == MESA_SHADER_TESS_EVAL) {
assert(dispatch_width == 8); assert(dispatch_width == 8);
brw_reg uniform_urb_handle = brw_vgrf(alloc.allocate(1), BRW_TYPE_UD); brw_reg uniform_urb_handle = retype(brw_allocate_vgrf_units(*this, 1), BRW_TYPE_UD);
brw_reg uniform_mask = brw_vgrf(alloc.allocate(1), BRW_TYPE_UD); brw_reg uniform_mask = retype(brw_allocate_vgrf_units(*this, 1), BRW_TYPE_UD);
brw_reg payload = brw_vgrf(alloc.allocate(4), BRW_TYPE_UD); brw_reg payload = retype(brw_allocate_vgrf_units(*this, 4), BRW_TYPE_UD);
/* Workaround requires all 8 channels (lanes) to be valid. This is /* Workaround requires all 8 channels (lanes) to be valid. This is
* understood to mean they all need to be alive. First trick is to find * understood to mean they all need to be alive. First trick is to find
@ -334,8 +334,8 @@ fs_visitor::emit_cs_terminate()
* make sure it uses the appropriate register range. * make sure it uses the appropriate register range.
*/ */
struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_TYPE_UD); struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_TYPE_UD);
brw_reg payload = brw_vgrf(alloc.allocate(reg_unit(devinfo)), brw_reg payload =
BRW_TYPE_UD); retype(brw_allocate_vgrf_units(*this, reg_unit(devinfo)), BRW_TYPE_UD);
ubld.group(8 * reg_unit(devinfo), 0).MOV(payload, g0); ubld.group(8 * reg_unit(devinfo), 0).MOV(payload, g0);
/* Set the descriptor to "Dereference Resource" and "Root Thread" */ /* Set the descriptor to "Dereference Resource" and "Root Thread" */

View file

@ -525,8 +525,7 @@ brw_lower_sends_overlapping_payload(fs_visitor &s)
const unsigned arg = inst->mlen < inst->ex_mlen ? 2 : 3; const unsigned arg = inst->mlen < inst->ex_mlen ? 2 : 3;
const unsigned len = MIN2(inst->mlen, inst->ex_mlen); const unsigned len = MIN2(inst->mlen, inst->ex_mlen);
brw_reg tmp = brw_vgrf(s.alloc.allocate(len), brw_reg tmp = retype(brw_allocate_vgrf_units(s, len), BRW_TYPE_UD);
BRW_TYPE_UD);
/* Sadly, we've lost all notion of channels and bit sizes at this /* Sadly, we've lost all notion of channels and bit sizes at this
* point. Just WE_all it. * point. Just WE_all it.
@ -567,7 +566,7 @@ brw_lower_3src_null_dest(fs_visitor &s)
foreach_block_and_inst_safe (block, brw_inst, inst, s.cfg) { foreach_block_and_inst_safe (block, brw_inst, inst, s.cfg) {
if (inst->is_3src(s.compiler) && inst->dst.is_null()) { if (inst->is_3src(s.compiler) && inst->dst.is_null()) {
inst->dst = brw_vgrf(s.alloc.allocate(s.dispatch_width / 8), inst->dst = retype(brw_allocate_vgrf_units(s, s.dispatch_width / 8),
inst->dst.type); inst->dst.type);
progress = true; progress = true;
} }

View file

@ -220,12 +220,11 @@ brw_lower_mul_dword_inst(fs_visitor &s, brw_inst *inst, bblock_t *block)
inst->src[1], inst->size_read(devinfo, 1)) || inst->src[1], inst->size_read(devinfo, 1)) ||
inst->dst.stride >= 4) { inst->dst.stride >= 4) {
needs_mov = true; needs_mov = true;
low = brw_vgrf(s.alloc.allocate(regs_written(inst)), low = retype(brw_allocate_vgrf_units(s, regs_written(inst)), inst->dst.type);
inst->dst.type);
} }
/* Get a new VGRF but keep the same stride as inst->dst */ /* Get a new VGRF but keep the same stride as inst->dst */
brw_reg high = brw_vgrf(s.alloc.allocate(regs_written(inst)), inst->dst.type); brw_reg high = retype(brw_allocate_vgrf_units(s, regs_written(inst)), inst->dst.type);
high.stride = inst->dst.stride; high.stride = inst->dst.stride;
high.offset = inst->dst.offset % REG_SIZE; high.offset = inst->dst.offset % REG_SIZE;
@ -317,17 +316,17 @@ brw_lower_mul_qword_inst(fs_visitor &s, brw_inst *inst, bblock_t *block)
unsigned int q_regs = regs_written(inst); unsigned int q_regs = regs_written(inst);
unsigned int d_regs = (q_regs + 1) / 2; unsigned int d_regs = (q_regs + 1) / 2;
brw_reg bd = brw_vgrf(s.alloc.allocate(q_regs), BRW_TYPE_UQ); brw_reg bd = retype(brw_allocate_vgrf_units(s, q_regs), BRW_TYPE_UQ);
brw_reg ad = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD); brw_reg ad = retype(brw_allocate_vgrf_units(s, d_regs), BRW_TYPE_UD);
brw_reg bc = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD); brw_reg bc = retype(brw_allocate_vgrf_units(s, d_regs), BRW_TYPE_UD);
/* Here we need the full 64 bit result for 32b * 32b. */ /* Here we need the full 64 bit result for 32b * 32b. */
if (devinfo->has_integer_dword_mul) { if (devinfo->has_integer_dword_mul) {
ibld.MUL(bd, subscript(inst->src[0], BRW_TYPE_UD, 0), ibld.MUL(bd, subscript(inst->src[0], BRW_TYPE_UD, 0),
subscript(inst->src[1], BRW_TYPE_UD, 0)); subscript(inst->src[1], BRW_TYPE_UD, 0));
} else { } else {
brw_reg bd_high = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD); brw_reg bd_high = retype(brw_allocate_vgrf_units(s, d_regs), BRW_TYPE_UD);
brw_reg bd_low = brw_vgrf(s.alloc.allocate(d_regs), BRW_TYPE_UD); brw_reg bd_low = retype(brw_allocate_vgrf_units(s, d_regs), BRW_TYPE_UD);
const unsigned acc_width = reg_unit(devinfo) * 8; const unsigned acc_width = reg_unit(devinfo) * 8;
brw_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), BRW_TYPE_UD), brw_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), BRW_TYPE_UD),
inst->group % acc_width); inst->group % acc_width);

View file

@ -45,8 +45,7 @@ lower_urb_read_logical_send(const brw_builder &bld, brw_inst *inst)
if (per_slot_present) if (per_slot_present)
payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS]; payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS];
brw_reg payload = brw_vgrf(bld.shader->alloc.allocate(header_size), brw_reg payload = retype(brw_allocate_vgrf_units(*bld.shader, header_size), BRW_TYPE_F);
BRW_TYPE_F);
bld.LOAD_PAYLOAD(payload, payload_sources, header_size, header_size); bld.LOAD_PAYLOAD(payload, payload_sources, header_size, header_size);
inst->opcode = SHADER_OPCODE_SEND; inst->opcode = SHADER_OPCODE_SEND;
@ -146,7 +145,7 @@ lower_urb_write_logical_send(const brw_builder &bld, brw_inst *inst)
inst->components_read(URB_LOGICAL_SRC_DATA); inst->components_read(URB_LOGICAL_SRC_DATA);
brw_reg *payload_sources = new brw_reg[length]; brw_reg *payload_sources = new brw_reg[length];
brw_reg payload = brw_vgrf(bld.shader->alloc.allocate(length), brw_reg payload = retype(brw_allocate_vgrf_units(*bld.shader, length),
BRW_TYPE_F); BRW_TYPE_F);
unsigned header_size = 0; unsigned header_size = 0;
@ -375,7 +374,7 @@ lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst,
if (fs_payload.aa_dest_stencil_reg[0]) { if (fs_payload.aa_dest_stencil_reg[0]) {
assert(inst->group < 16); assert(inst->group < 16);
sources[length] = brw_vgrf(bld.shader->alloc.allocate(1), BRW_TYPE_F); sources[length] = retype(brw_allocate_vgrf_units(*bld.shader, 1), BRW_TYPE_F);
bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha") bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha")
.MOV(sources[length], .MOV(sources[length],
brw_reg(brw_vec8_grf(fs_payload.aa_dest_stencil_reg[0], 0))); brw_reg(brw_vec8_grf(fs_payload.aa_dest_stencil_reg[0], 0)));
@ -394,7 +393,7 @@ lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst,
} }
if (sample_mask.file != BAD_FILE) { if (sample_mask.file != BAD_FILE) {
const brw_reg tmp = brw_vgrf(bld.shader->alloc.allocate(reg_unit(devinfo)), const brw_reg tmp = retype(brw_allocate_vgrf_units(*bld.shader, reg_unit(devinfo)),
BRW_TYPE_UD); BRW_TYPE_UD);
/* Hand over gl_SampleMask. Only the lower 16 bits of each channel are /* Hand over gl_SampleMask. Only the lower 16 bits of each channel are
@ -456,7 +455,7 @@ lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst,
/* Send from the GRF */ /* Send from the GRF */
brw_reg payload = brw_vgrf(-1, BRW_TYPE_F); brw_reg payload = brw_vgrf(-1, BRW_TYPE_F);
brw_inst *load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size); brw_inst *load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size);
payload.nr = bld.shader->alloc.allocate(regs_written(load)); payload.nr = brw_allocate_vgrf_units(*bld.shader, regs_written(load)).nr;
load->dst = payload; load->dst = payload;
uint32_t msg_ctl = brw_fb_write_msg_control(inst, prog_data); uint32_t msg_ctl = brw_fb_write_msg_control(inst, prog_data);
@ -1093,7 +1092,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst,
} }
const brw_reg src_payload = const brw_reg src_payload =
brw_vgrf(bld.shader->alloc.allocate(length * bld.dispatch_width() / 8), retype(brw_allocate_vgrf_units(*bld.shader, length * bld.dispatch_width() / 8),
BRW_TYPE_F); BRW_TYPE_F);
/* In case of 16-bit payload each component takes one full register in /* In case of 16-bit payload each component takes one full register in
* both SIMD8H and SIMD16H modes. In both cases one reg can hold 16 * both SIMD8H and SIMD16H modes. In both cases one reg can hold 16

View file

@ -560,7 +560,7 @@ namespace {
inst->exec_size * stride * inst->exec_size * stride *
brw_type_size_bytes(inst->src[i].type), brw_type_size_bytes(inst->src[i].type),
reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo); reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo);
brw_reg tmp = brw_vgrf(v->alloc.allocate(size), inst->src[i].type); brw_reg tmp = retype(brw_allocate_vgrf_units(*v, size), inst->src[i].type);
ibld.UNDEF(tmp); ibld.UNDEF(tmp);
tmp = byte_offset(horiz_stride(tmp, stride), tmp = byte_offset(horiz_stride(tmp, stride),
required_src_byte_offset(devinfo, inst, i)); required_src_byte_offset(devinfo, inst, i));

View file

@ -359,8 +359,8 @@ brw_opt_split_sends(fs_visitor &s)
assert(lp2->size_written % REG_SIZE == 0); assert(lp2->size_written % REG_SIZE == 0);
assert((lp1->size_written + lp2->size_written) / REG_SIZE == send->mlen); assert((lp1->size_written + lp2->size_written) / REG_SIZE == send->mlen);
lp1->dst = brw_vgrf(s.alloc.allocate(lp1->size_written / REG_SIZE), lp1->dst.type); lp1->dst = retype(brw_allocate_vgrf_units(s, lp1->size_written / REG_SIZE), lp1->dst.type);
lp2->dst = brw_vgrf(s.alloc.allocate(lp2->size_written / REG_SIZE), lp2->dst.type); lp2->dst = retype(brw_allocate_vgrf_units(s, lp2->size_written / REG_SIZE), lp2->dst.type);
send->resize_sources(4); send->resize_sources(4);
send->src[2] = lp1->dst; send->src[2] = lp1->dst;

View file

@ -1203,7 +1203,7 @@ allocate_slots(fs_visitor &s,
if ((x & mask) == mask) { if ((x & mask) == mask) {
if (regs[i].nr == UINT_MAX) if (regs[i].nr == UINT_MAX)
regs[i].nr = s.alloc.allocate(reg_unit(s.devinfo)); regs[i].nr = brw_allocate_vgrf_units(s, reg_unit(s.devinfo)).nr;
regs[i].avail &= ~(mask << j); regs[i].avail &= ~(mask << j);

View file

@ -115,7 +115,7 @@ brw_opt_split_virtual_grfs(fs_visitor &s)
has_splits = true; has_splits = true;
vgrf_has_split[i] = true; vgrf_has_split[i] = true;
assert(offset <= MAX_VGRF_SIZE(s.devinfo)); assert(offset <= MAX_VGRF_SIZE(s.devinfo));
unsigned grf = s.alloc.allocate(offset); unsigned grf = brw_allocate_vgrf_units(s, offset).nr;
for (unsigned k = reg - offset; k < reg; k++) for (unsigned k = reg - offset; k < reg; k++)
new_virtual_grf[k] = grf; new_virtual_grf[k] = grf;
offset = 0; offset = 0;

View file

@ -1082,7 +1082,7 @@ brw_reg_alloc::choose_spill_reg()
brw_reg brw_reg
brw_reg_alloc::alloc_spill_reg(unsigned size, int ip) brw_reg_alloc::alloc_spill_reg(unsigned size, int ip)
{ {
int vgrf = fs->alloc.allocate(ALIGN(size, reg_unit(devinfo))); int vgrf = brw_allocate_vgrf_units(*fs, ALIGN(size, reg_unit(devinfo))).nr;
int class_idx = DIV_ROUND_UP(size, reg_unit(devinfo)) - 1; int class_idx = DIV_ROUND_UP(size, reg_unit(devinfo)) - 1;
int n = ra_add_node(g, compiler->reg_set.classes[class_idx]); int n = ra_add_node(g, compiler->reg_set.classes[class_idx]);
assert(n == first_vgrf_node + vgrf); assert(n == first_vgrf_node + vgrf);