diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp index d2cb4ea4d4a..77f1d93eb28 100644 --- a/src/intel/compiler/brw_fs_reg_allocate.cpp +++ b/src/intel/compiler/brw_fs_reg_allocate.cpp @@ -266,7 +266,6 @@ public: node_count = 0; first_payload_node = 0; - scratch_header_node = 0; grf127_send_hack_node = 0; first_vgrf_node = 0; last_vgrf_node = 0; @@ -296,6 +295,8 @@ private: uint32_t spill_offset, int ip); fs_reg build_single_offset(const fs_builder &bld, uint32_t spill_offset, int ip); + fs_reg build_legacy_scratch_header(const fs_builder &bld, + uint32_t spill_offset, int ip); void emit_unspill(const fs_builder &bld, struct shader_stats *stats, fs_reg dst, uint32_t spill_offset, unsigned count, int ip); @@ -304,7 +305,6 @@ private: void set_spill_costs(); int choose_spill_reg(); - fs_reg alloc_scratch_header(); fs_reg alloc_spill_reg(unsigned size, int ip); void spill_reg(unsigned spill_reg); @@ -325,7 +325,6 @@ private: int node_count; int first_payload_node; - int scratch_header_node; int grf127_send_hack_node; int first_vgrf_node; int last_vgrf_node; @@ -334,8 +333,6 @@ private: int *spill_vgrf_ip; int spill_vgrf_ip_alloc; int spill_node_count; - - fs_reg scratch_header; }; namespace { @@ -393,10 +390,6 @@ fs_reg_alloc::setup_live_interference(unsigned node, ra_add_node_interference(g, node, first_payload_node + i); } - /* Everything interferes with the scratch header */ - if (scratch_header_node >= 0) - ra_add_node_interference(g, node, scratch_header_node); - /* Add interference with every vgrf whose live range intersects this * node's. We only need to look at nodes below this one as the reflexivity * of interference will take care of the rest. @@ -530,11 +523,6 @@ fs_reg_alloc::build_interference_graph(bool allow_spilling) first_vgrf_node = node_count; node_count += fs->alloc.count; last_vgrf_node = node_count - 1; - if (devinfo->verx10 < 125 && allow_spilling) { - scratch_header_node = node_count++; - } else { - scratch_header_node = -1; - } first_spill_node = node_count; fs->calculate_payload_ranges(payload_node_count, @@ -631,6 +619,31 @@ fs_reg_alloc::build_lane_offsets(const fs_builder &bld, uint32_t spill_offset, i return offset; } +/** + * Generate a scratch header for pre-LSC platforms. + */ +fs_reg +fs_reg_alloc::build_legacy_scratch_header(const fs_builder &bld, + uint32_t spill_offset, int ip) +{ + const fs_builder ubld8 = bld.exec_all().group(8, 0); + const fs_builder ubld1 = bld.exec_all().group(1, 0); + + /* Allocate a spill header and make it interfere with g0 */ + fs_reg header = retype(alloc_spill_reg(1, ip), BRW_TYPE_UD); + ra_add_node_interference(g, first_vgrf_node + header.nr, first_payload_node); + + fs_inst *inst = ubld8.emit(SHADER_OPCODE_SCRATCH_HEADER, header); + _mesa_set_add(spill_insts, inst); + + /* Write the scratch offset */ + assert(spill_offset % 16 == 0); + inst = ubld1.MOV(component(header, 2), brw_imm_ud(spill_offset / 16)); + _mesa_set_add(spill_insts, inst); + + return header; +} + void fs_reg_alloc::emit_unspill(const fs_builder &bld, struct shader_stats *stats, @@ -689,12 +702,7 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld, unspill_inst->send_is_volatile = true; unspill_inst->send_ex_desc_scratch = true; } else { - fs_reg header = this->scratch_header; - fs_builder ubld = bld.exec_all().group(1, 0); - assert(spill_offset % 16 == 0); - unspill_inst = ubld.MOV(component(header, 2), - brw_imm_ud(spill_offset / 16)); - _mesa_set_add(spill_insts, unspill_inst); + fs_reg header = build_legacy_scratch_header(bld, spill_offset, ip); const unsigned bti = GFX8_BTI_STATELESS_NON_COHERENT; const fs_reg ex_desc = brw_imm_ud(0); @@ -767,12 +775,7 @@ fs_reg_alloc::emit_spill(const fs_builder &bld, spill_inst->send_is_volatile = false; spill_inst->send_ex_desc_scratch = true; } else { - fs_reg header = this->scratch_header; - fs_builder ubld = bld.exec_all().group(1, 0); - assert(spill_offset % 16 == 0); - spill_inst = ubld.MOV(component(header, 2), - brw_imm_ud(spill_offset / 16)); - _mesa_set_add(spill_insts, spill_inst); + fs_reg header = build_legacy_scratch_header(bld, spill_offset, ip); const unsigned bti = GFX8_BTI_STATELESS_NON_COHERENT; const fs_reg ex_desc = brw_imm_ud(0); @@ -900,19 +903,6 @@ fs_reg_alloc::choose_spill_reg() return node - first_vgrf_node; } -fs_reg -fs_reg_alloc::alloc_scratch_header() -{ - int vgrf = fs->alloc.allocate(1); - assert(first_vgrf_node + vgrf == scratch_header_node); - ra_set_node_class(g, scratch_header_node, - compiler->fs_reg_set.classes[0]); - - setup_live_interference(scratch_header_node, 0, INT_MAX); - - return fs_reg(VGRF, vgrf, BRW_TYPE_UD); -} - fs_reg fs_reg_alloc::alloc_spill_reg(unsigned size, int ip) { @@ -953,28 +943,7 @@ fs_reg_alloc::spill_reg(unsigned spill_reg) unsigned int spill_offset = fs->last_scratch; assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */ - /* Spills may use MRFs 13-15 in the SIMD16 case. Our texturing is done - * using up to 11 MRFs starting from either m1 or m2, and fb writes can use - * up to m13 (gfx6+ simd16: 2 header + 8 color + 2 src0alpha + 2 omask) or - * m15 (gfx4-5 simd16: 2 header + 8 color + 1 aads + 2 src depth + 2 dst - * depth), starting from m1. In summary: We may not be able to spill in - * SIMD16 mode, because we'd stomp the FB writes. - */ - if (!fs->spilled_any_registers) { - if (devinfo->verx10 >= 125) { - /* We will allocate a register on the fly */ - } else { - this->scratch_header = alloc_scratch_header(); - fs_builder ubld = fs_builder(fs, 8).exec_all().at( - fs->cfg->first_block(), fs->cfg->first_block()->start()); - - fs_inst *inst = ubld.emit(SHADER_OPCODE_SCRATCH_HEADER, - this->scratch_header); - _mesa_set_add(spill_insts, inst); - } - - fs->spilled_any_registers = true; - } + fs->spilled_any_registers = true; fs->last_scratch += size * REG_SIZE;