diff --git a/src/intel/compiler/brw_inst.cpp b/src/intel/compiler/brw_inst.cpp index 535a287b66f..637f602dd6f 100644 --- a/src/intel/compiler/brw_inst.cpp +++ b/src/intel/compiler/brw_inst.cpp @@ -4,6 +4,7 @@ */ #include "brw_eu.h" +#include "brw_fs.h" #include "brw_cfg.h" #include "brw_compiler.h" #include "brw_inst.h" @@ -1253,11 +1254,10 @@ is_multi_copy_payload(const struct intel_device_info *devinfo, * instruction. */ bool -is_coalescing_payload(const struct intel_device_info *devinfo, - const brw::simple_allocator &alloc, const brw_inst *inst) +is_coalescing_payload(const fs_visitor &s, const brw_inst *inst) { - return is_identity_payload(devinfo, VGRF, inst) && + return is_identity_payload(s.devinfo, VGRF, inst) && inst->src[0].offset == 0 && - alloc.sizes[inst->src[0].nr] * REG_SIZE == inst->size_written; + s.alloc.sizes[inst->src[0].nr] * REG_SIZE == inst->size_written; } diff --git a/src/intel/compiler/brw_inst.h b/src/intel/compiler/brw_inst.h index 4417abf417b..79ce4b36d4e 100644 --- a/src/intel/compiler/brw_inst.h +++ b/src/intel/compiler/brw_inst.h @@ -25,7 +25,6 @@ #pragma once #include -#include "brw_ir_allocator.h" #include "brw_reg.h" #include "compiler/glsl/list.h" @@ -378,8 +377,7 @@ bool is_identity_payload(const struct intel_device_info *devinfo, bool is_multi_copy_payload(const struct intel_device_info *devinfo, const brw_inst *inst); -bool is_coalescing_payload(const struct intel_device_info *devinfo, - const brw::simple_allocator &alloc, const brw_inst *inst); +bool is_coalescing_payload(const struct fs_visitor &s, const brw_inst *inst); bool has_bank_conflict(const struct brw_isa_info *isa, const brw_inst *inst); diff --git a/src/intel/compiler/brw_opt_combine_constants.cpp b/src/intel/compiler/brw_opt_combine_constants.cpp index 6888e332932..57a12d9fc8f 100644 --- a/src/intel/compiler/brw_opt_combine_constants.cpp +++ b/src/intel/compiler/brw_opt_combine_constants.cpp @@ -1183,16 +1183,15 @@ struct register_allocation { }; static brw_reg -allocate_slots(const intel_device_info *devinfo, +allocate_slots(fs_visitor &s, struct register_allocation *regs, unsigned num_regs, - unsigned bytes, unsigned align_bytes, - brw::simple_allocator &alloc) + unsigned bytes, unsigned align_bytes) { assert(bytes == 2 || bytes == 4 || bytes == 8); assert(align_bytes == 2 || align_bytes == 4 || align_bytes == 8); const unsigned slots_per_reg = - REG_SIZE * reg_unit(devinfo) / sizeof(uint16_t); + REG_SIZE * reg_unit(s.devinfo) / sizeof(uint16_t); const unsigned words = bytes / 2; const unsigned align_words = align_bytes / 2; @@ -1204,7 +1203,7 @@ allocate_slots(const intel_device_info *devinfo, if ((x & mask) == mask) { if (regs[i].nr == UINT_MAX) - regs[i].nr = alloc.allocate(reg_unit(devinfo)); + regs[i].nr = s.alloc.allocate(reg_unit(s.devinfo)); regs[i].avail &= ~(mask << j); @@ -1243,10 +1242,9 @@ deallocate_slots(const struct intel_device_info *devinfo, } static void -parcel_out_registers(const intel_device_info *devinfo, +parcel_out_registers(fs_visitor &s, struct imm *imm, unsigned len, const bblock_t *cur_block, - struct register_allocation *regs, unsigned num_regs, - brw::simple_allocator &alloc) + struct register_allocation *regs, unsigned num_regs) { /* Each basic block has two distinct set of constants. There is the set of * constants that only have uses in that block, and there is the set of @@ -1267,10 +1265,9 @@ parcel_out_registers(const intel_device_info *devinfo, for (unsigned i = 0; i < len; i++) { if (imm[i].block == cur_block && imm[i].used_in_single_block == used_in_single_block) { - const brw_reg reg = allocate_slots(devinfo, regs, num_regs, + const brw_reg reg = allocate_slots(s, regs, num_regs, imm[i].size, - get_alignment_for_imm(&imm[i]), - alloc); + get_alignment_for_imm(&imm[i])); imm[i].nr = reg.nr; imm[i].subreg_offset = reg.offset; @@ -1280,7 +1277,7 @@ parcel_out_registers(const intel_device_info *devinfo, for (unsigned i = 0; i < len; i++) { if (imm[i].block == cur_block && imm[i].used_in_single_block) { - deallocate_slots(devinfo, regs, num_regs, imm[i].nr, + deallocate_slots(s.devinfo, regs, num_regs, imm[i].nr, imm[i].subreg_offset, imm[i].size); } } @@ -1529,8 +1526,7 @@ brw_opt_combine_constants(fs_visitor &s) } foreach_block(block, s.cfg) { - parcel_out_registers(devinfo, table.imm, table.len, block, regs, - table.len, s.alloc); + parcel_out_registers(s, table.imm, table.len, block, regs, table.len); } free(regs); diff --git a/src/intel/compiler/brw_opt_copy_propagation.cpp b/src/intel/compiler/brw_opt_copy_propagation.cpp index 97a7349068d..7c3f3737026 100644 --- a/src/intel/compiler/brw_opt_copy_propagation.cpp +++ b/src/intel/compiler/brw_opt_copy_propagation.cpp @@ -657,15 +657,14 @@ instruction_requires_packed_data(brw_inst *inst) } static bool -try_copy_propagate(const brw_compiler *compiler, brw_inst *inst, +try_copy_propagate(fs_visitor &s, brw_inst *inst, acp_entry *entry, int arg, - const brw::simple_allocator &alloc, uint8_t max_polygons) { if (inst->src[arg].file != VGRF) return false; - const struct intel_device_info *devinfo = compiler->devinfo; + const struct intel_device_info *devinfo = s.devinfo; assert(entry->src.file == VGRF || entry->src.file == UNIFORM || entry->src.file == ATTR || entry->src.file == FIXED_GRF); @@ -685,7 +684,7 @@ try_copy_propagate(const brw_compiler *compiler, brw_inst *inst, * temporaries which should match is_coalescing_payload(). */ if (entry->opcode == SHADER_OPCODE_LOAD_PAYLOAD && - (is_coalescing_payload(devinfo, alloc, inst) || + (is_coalescing_payload(s, inst) || is_multi_copy_payload(devinfo, inst))) return false; @@ -718,9 +717,9 @@ try_copy_propagate(const brw_compiler *compiler, brw_inst *inst, entry->src.file == VGRF) { int other_src = arg == 2 ? 3 : 2; unsigned other_size = inst->src[other_src].file == VGRF ? - alloc.sizes[inst->src[other_src].nr] : + s.alloc.sizes[inst->src[other_src].nr] : inst->size_read(devinfo, other_src); - unsigned prop_src_size = alloc.sizes[entry->src.nr]; + unsigned prop_src_size = s.alloc.sizes[entry->src.nr]; if (other_size + prop_src_size > 15) return false; } @@ -765,7 +764,7 @@ try_copy_propagate(const brw_compiler *compiler, brw_inst *inst, */ if (!can_take_stride(inst, dst_type, arg, entry_stride * inst->src[arg].stride, - compiler)) + s.compiler)) return false; /* From the Cherry Trail/Braswell PRMs, Volume 7: 3D Media GPGPU: @@ -806,7 +805,7 @@ try_copy_propagate(const brw_compiler *compiler, brw_inst *inst, if (entry->src.file == ATTR && max_polygons > 1 && (has_dst_aligned_region_restriction(devinfo, inst, dst_type) || instruction_requires_packed_data(inst) || - (inst->is_3src(compiler) && arg == 2) || + (inst->is_3src(s.compiler) && arg == 2) || entry->dst.type != inst->src[arg].type)) return false; @@ -1307,12 +1306,11 @@ commute_immediates(brw_inst *inst) * list. */ static bool -opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx, +opt_copy_propagation_local(fs_visitor &s, linear_ctx *lin_ctx, bblock_t *block, struct acp &acp, - const brw::simple_allocator &alloc, uint8_t max_polygons) { - const struct intel_device_info *devinfo = compiler->devinfo; + const struct intel_device_info *devinfo = s.devinfo; bool progress = false; foreach_inst_in_block(brw_inst, inst, block) { @@ -1331,8 +1329,7 @@ opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx, break; } } else { - if (try_copy_propagate(compiler, inst, *iter, i, alloc, - max_polygons)) { + if (try_copy_propagate(s, inst, *iter, i, max_polygons)) { progress = true; break; } @@ -1342,7 +1339,7 @@ opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx, if (constant_progress) { commute_immediates(inst); - brw_opt_constant_fold_instruction(compiler->devinfo, inst); + brw_opt_constant_fold_instruction(devinfo, inst); progress = true; } @@ -1428,8 +1425,8 @@ brw_opt_copy_propagation(fs_visitor &s) * the set of copies available at the end of the block. */ foreach_block (block, s.cfg) { - progress = opt_copy_propagation_local(s.compiler, lin_ctx, block, - out_acp[block->num], s.alloc, + progress = opt_copy_propagation_local(s, lin_ctx, block, + out_acp[block->num], s.max_polygons) || progress; /* If the destination of an ACP entry exists only within this block, @@ -1469,8 +1466,8 @@ brw_opt_copy_propagation(fs_visitor &s) } } - progress = opt_copy_propagation_local(s.compiler, lin_ctx, block, - in_acp, s.alloc, s.max_polygons) || + progress = opt_copy_propagation_local(s, lin_ctx, block, + in_acp, s.max_polygons) || progress; } @@ -1484,13 +1481,12 @@ brw_opt_copy_propagation(fs_visitor &s) } static bool -try_copy_propagate_def(const brw_compiler *compiler, - const brw::simple_allocator &alloc, +try_copy_propagate_def(fs_visitor &s, brw_inst *def, const brw_reg &val, brw_inst *inst, int arg, uint8_t max_polygons) { - const struct intel_device_info *devinfo = compiler->devinfo; + const struct intel_device_info *devinfo = s.devinfo; assert(val.file != BAD_FILE); @@ -1546,9 +1542,9 @@ try_copy_propagate_def(const brw_compiler *compiler, val.file == VGRF) { int other_src = arg == 2 ? 3 : 2; unsigned other_size = inst->src[other_src].file == VGRF ? - alloc.sizes[inst->src[other_src].nr] : + s.alloc.sizes[inst->src[other_src].nr] : inst->size_read(devinfo, other_src); - unsigned prop_src_size = alloc.sizes[val.nr]; + unsigned prop_src_size = s.alloc.sizes[val.nr]; if (other_size + prop_src_size > 15) return false; } @@ -1582,7 +1578,7 @@ try_copy_propagate_def(const brw_compiler *compiler, */ if (!can_take_stride(inst, dst_type, arg, entry_stride * inst->src[arg].stride, - compiler)) + s.compiler)) return false; /* Bail if the source FIXED_GRF region of the copy cannot be trivially @@ -1644,7 +1640,7 @@ try_copy_propagate_def(const brw_compiler *compiler, if (max_polygons > 1 && val.file == ATTR && (has_dst_aligned_region_restriction(devinfo, inst, dst_type) || instruction_requires_packed_data(inst) || - (inst->is_3src(compiler) && arg == 2) || + (inst->is_3src(s.compiler) && arg == 2) || def->dst.type != inst->src[arg].type)) return false; @@ -1831,7 +1827,7 @@ brw_opt_copy_propagation_defs(fs_visitor &s) def->src[0].file != BAD_FILE && def->src[0].file != IMM && is_identity_payload(s.devinfo, def->src[0].file, def)) { source_progress = - try_copy_propagate_def(s.compiler, s.alloc, def, def->src[0], + try_copy_propagate_def(s, def, def->src[0], inst, i, s.max_polygons); if (source_progress) { @@ -1857,8 +1853,7 @@ brw_opt_copy_propagation_defs(fs_visitor &s) val.file == ATTR || val.file == UNIFORM || (val.file == FIXED_GRF && val.is_contiguous())) { source_progress = - try_copy_propagate_def(s.compiler, s.alloc, def, val, inst, i, - s.max_polygons); + try_copy_propagate_def(s, def, val, inst, i, s.max_polygons); } if (source_progress) { diff --git a/src/intel/compiler/brw_opt_cse.cpp b/src/intel/compiler/brw_opt_cse.cpp index 1259ebf3c0c..8c7683a6640 100644 --- a/src/intel/compiler/brw_opt_cse.cpp +++ b/src/intel/compiler/brw_opt_cse.cpp @@ -131,7 +131,7 @@ is_expression(const fs_visitor *v, const brw_inst *const inst) case SHADER_OPCODE_MEMORY_LOAD_LOGICAL: return inst->src[MEMORY_LOGICAL_MODE].ud == MEMORY_MODE_CONSTANT; case SHADER_OPCODE_LOAD_PAYLOAD: - return !is_coalescing_payload(v->devinfo, v->alloc, inst); + return !is_coalescing_payload(*v, inst); default: return inst->is_send_from_grf() && !inst->has_side_effects() && !inst->is_volatile(); diff --git a/src/intel/compiler/brw_opt_register_coalesce.cpp b/src/intel/compiler/brw_opt_register_coalesce.cpp index 4416906390f..7996f47b88a 100644 --- a/src/intel/compiler/brw_opt_register_coalesce.cpp +++ b/src/intel/compiler/brw_opt_register_coalesce.cpp @@ -86,7 +86,7 @@ is_coalesce_candidate(const fs_visitor *v, const brw_inst *inst) return false; if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { - if (!is_coalescing_payload(v->devinfo, v->alloc, inst)) { + if (!is_coalescing_payload(*v, inst)) { return false; } } @@ -192,11 +192,11 @@ can_coalesce_vars(const intel_device_info *devinfo, * SEND instruction's payload to more than would fit in g112-g127. */ static bool -would_violate_eot_restriction(const brw::simple_allocator &alloc, +would_violate_eot_restriction(fs_visitor &s, const cfg_t *cfg, unsigned dst_reg, unsigned src_reg) { - if (alloc.sizes[dst_reg] > alloc.sizes[src_reg]) { + if (s.alloc.sizes[dst_reg] > s.alloc.sizes[src_reg]) { foreach_inst_in_block_reverse(brw_inst, send, cfg->last_block()) { if (send->opcode != SHADER_OPCODE_SEND || !send->eot) continue; @@ -205,13 +205,13 @@ would_violate_eot_restriction(const brw::simple_allocator &alloc, (send->sources >= 4 && send->src[3].file == VGRF && send->src[3].nr == src_reg)) { const unsigned s2 = - send->src[2].file == VGRF ? alloc.sizes[send->src[2].nr] : 0; + send->src[2].file == VGRF ? s.alloc.sizes[send->src[2].nr] : 0; const unsigned s3 = send->sources >= 4 && send->src[3].file == VGRF ? - alloc.sizes[send->src[3].nr] : 0; + s.alloc.sizes[send->src[3].nr] : 0; const unsigned increase = - alloc.sizes[dst_reg] - alloc.sizes[src_reg]; + s.alloc.sizes[dst_reg] - s.alloc.sizes[src_reg]; if (s2 + s3 + increase > 15) return true; @@ -303,7 +303,7 @@ brw_opt_register_coalesce(fs_visitor &s) src_var[i] = live.var_from_vgrf[src_reg] + i; if (!can_coalesce_vars(devinfo, live, s.cfg, block, inst, dst_var[i], src_var[i]) || - would_violate_eot_restriction(s.alloc, s.cfg, dst_reg, src_reg)) { + would_violate_eot_restriction(s, s.cfg, dst_reg, src_reg)) { can_coalesce = false; src_reg = ~0u; break;