diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 57af40a32bc..0991b733921 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -647,7 +647,7 @@ fs_inst::components_read(unsigned i) const } unsigned -fs_inst::size_read(int arg) const +fs_inst::size_read(const struct intel_device_info *devinfo, int arg) const { switch (opcode) { case SHADER_OPCODE_SEND: @@ -775,7 +775,7 @@ fs_inst::flags_read(const intel_device_info *devinfo) const } else { unsigned mask = 0; for (int i = 0; i < sources; i++) { - mask |= brw_fs_flag_mask(src[i], size_read(i)); + mask |= brw_fs_flag_mask(src[i], size_read(devinfo, i)); } return mask; } diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp b/src/intel/compiler/brw_fs_bank_conflicts.cpp index 007744f30ea..4dc179bfe63 100644 --- a/src/intel/compiler/brw_fs_bank_conflicts.cpp +++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp @@ -517,7 +517,8 @@ namespace { for (int i = 0; i < inst->sources; i++) { if (is_grf(inst->src[i])) - p.require_contiguous(reg_of(inst->src[i]), regs_read(inst, i)); + p.require_contiguous(reg_of(inst->src[i]), + regs_read(v->devinfo, inst, i)); } } diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp b/src/intel/compiler/brw_fs_cmod_propagation.cpp index 703624670df..ea1b6a1695e 100644 --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp @@ -180,7 +180,7 @@ cmod_propagate_not(const intel_device_info *devinfo, bblock_t *block, foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (regions_overlap(scan_inst->dst, scan_inst->size_written, - inst->src[0], inst->size_read(0))) { + inst->src[0], inst->size_read(devinfo, 0))) { if (scan_inst->opcode != BRW_OPCODE_OR && scan_inst->opcode != BRW_OPCODE_AND) break; @@ -288,7 +288,7 @@ opt_cmod_propagation_local(const intel_device_info *devinfo, bblock_t *block) const unsigned flags_written = inst->flags_written(devinfo); foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (regions_overlap(scan_inst->dst, scan_inst->size_written, - inst->src[0], inst->size_read(0))) { + inst->src[0], inst->size_read(devinfo, 0))) { /* If the scan instruction writes a different flag register than * the instruction we're trying to propagate from, bail. * diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 50e02dce4f7..d23cffcd6bd 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -685,7 +685,8 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst, * temporaries which should match is_coalescing_payload(). */ if (entry->opcode == SHADER_OPCODE_LOAD_PAYLOAD && - (is_coalescing_payload(alloc, inst) || is_multi_copy_payload(inst))) + (is_coalescing_payload(devinfo, alloc, inst) || + is_multi_copy_payload(devinfo, inst))) return false; assert(entry->dst.file == VGRF); @@ -695,7 +696,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst, /* Bail if inst is reading a range that isn't contained in the range * that entry is writing. */ - if (!region_contained_in(inst->src[arg], inst->size_read(arg), + if (!region_contained_in(inst->src[arg], inst->size_read(devinfo, arg), entry->dst, entry->size_written)) return false; @@ -718,7 +719,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst, int other_src = arg == 2 ? 3 : 2; unsigned other_size = inst->src[other_src].file == VGRF ? alloc.sizes[inst->src[other_src].nr] : - inst->size_read(other_src); + inst->size_read(devinfo, other_src); unsigned prop_src_size = alloc.sizes[entry->src.nr]; if (other_size + prop_src_size > 15) return false; @@ -1208,7 +1209,8 @@ try_constant_propagate_value(brw_reg val, brw_reg_type dst_type, static bool -try_constant_propagate(fs_inst *inst, acp_entry *entry, int arg) +try_constant_propagate(const struct intel_device_info *devinfo, + fs_inst *inst, acp_entry *entry, int arg) { if (inst->src[arg].file != VGRF) return false; @@ -1220,7 +1222,7 @@ try_constant_propagate(fs_inst *inst, acp_entry *entry, int arg) /* Bail if inst is reading a range that isn't contained in the range * that entry is writing. */ - if (!region_contained_in(inst->src[arg], inst->size_read(arg), + if (!region_contained_in(inst->src[arg], inst->size_read(devinfo, arg), entry->dst, entry->size_written)) return false; @@ -1236,13 +1238,13 @@ try_constant_propagate(fs_inst *inst, acp_entry *entry, int arg) } static bool -can_propagate_from(fs_inst *inst) +can_propagate_from(const struct intel_device_info *devinfo, fs_inst *inst) { return (inst->opcode == BRW_OPCODE_MOV && inst->dst.file == VGRF && ((inst->src[0].file == VGRF && !grf_regions_overlap(inst->dst, inst->size_written, - inst->src[0], inst->size_read(0))) || + inst->src[0], inst->size_read(devinfo, 0))) || inst->src[0].file == ATTR || inst->src[0].file == UNIFORM || inst->src[0].file == IMM || @@ -1256,7 +1258,7 @@ can_propagate_from(fs_inst *inst) inst->is_raw_move()) && /* Subset of !is_partial_write() conditions. */ !inst->predicate && inst->dst.is_contiguous()) || - is_identity_payload(FIXED_GRF, inst); + is_identity_payload(devinfo, FIXED_GRF, inst); } static void @@ -1310,6 +1312,7 @@ opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx, const brw::simple_allocator &alloc, uint8_t max_polygons) { + const struct intel_device_info *devinfo = compiler->devinfo; bool progress = false; foreach_inst_in_block(fs_inst, inst, block) { @@ -1323,7 +1326,7 @@ opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx, iter != acp.end() && (*iter)->dst.nr == inst->src[i].nr; ++iter) { if ((*iter)->src.file == IMM) { - if (try_constant_propagate(inst, *iter, i)) { + if (try_constant_propagate(devinfo, inst, *iter, i)) { constant_progress = true; break; } @@ -1368,13 +1371,13 @@ opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx, /* If this instruction's source could potentially be folded into the * operand of another instruction, add it to the ACP. */ - if (can_propagate_from(inst)) { + if (can_propagate_from(devinfo, inst)) { acp_entry *entry = linear_zalloc(lin_ctx, acp_entry); entry->dst = inst->dst; entry->src = inst->src[0]; entry->size_written = inst->size_written; for (unsigned i = 0; i < inst->sources; i++) - entry->size_read += inst->size_read(i); + entry->size_read += inst->size_read(devinfo, i); entry->opcode = inst->opcode; entry->is_partial_write = inst->is_partial_write(); entry->force_writemask_all = inst->force_writemask_all; @@ -1397,7 +1400,7 @@ opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx, entry->dst = dst; entry->src = retype(inst->src[i], t); entry->size_written = size_written; - entry->size_read = inst->size_read(i); + entry->size_read = inst->size_read(devinfo, i); entry->opcode = inst->opcode; entry->force_writemask_all = inst->force_writemask_all; acp.add(entry); @@ -1544,7 +1547,7 @@ try_copy_propagate_def(const brw_compiler *compiler, int other_src = arg == 2 ? 3 : 2; unsigned other_size = inst->src[other_src].file == VGRF ? alloc.sizes[inst->src[other_src].nr] : - inst->size_read(other_src); + inst->size_read(devinfo, other_src); unsigned prop_src_size = alloc.sizes[val.nr]; if (other_size + prop_src_size > 15) return false; @@ -1717,10 +1720,11 @@ try_copy_propagate_def(const brw_compiler *compiler, } static bool -try_constant_propagate_def(fs_inst *def, brw_reg val, fs_inst *inst, int arg) +try_constant_propagate_def(const struct intel_device_info *devinfo, + fs_inst *def, brw_reg val, fs_inst *inst, int arg) { /* Bail if inst is reading more than a single vector component of entry */ - if (inst->size_read(arg) > def->dst.component_size(inst->exec_size)) + if (inst->size_read(devinfo, arg) > def->dst.component_size(inst->exec_size)) return false; return try_constant_propagate_value(val, def->dst.type, inst, arg); @@ -1815,9 +1819,9 @@ brw_fs_opt_copy_propagation_defs(fs_visitor &s) bool source_progress = false; if (def->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { - if (inst->size_read(i) == def->size_written && + if (inst->size_read(s.devinfo, i) == def->size_written && def->src[0].file != BAD_FILE && def->src[0].file != IMM && - is_identity_payload(def->src[0].file, def)) { + is_identity_payload(s.devinfo, def->src[0].file, def)) { source_progress = try_copy_propagate_def(s.compiler, s.alloc, def, def->src[0], inst, i, s.max_polygons); @@ -1834,10 +1838,10 @@ brw_fs_opt_copy_propagation_defs(fs_visitor &s) } brw_reg val = - find_value_for_offset(def, inst->src[i], inst->size_read(i)); + find_value_for_offset(def, inst->src[i], inst->size_read(s.devinfo, i)); if (val.file == IMM) { - if (try_constant_propagate_def(def, val, inst, i)) { + if (try_constant_propagate_def(s.devinfo, def, val, inst, i)) { source_progress = true; constant_progress = true; } diff --git a/src/intel/compiler/brw_fs_cse.cpp b/src/intel/compiler/brw_fs_cse.cpp index e1c0ab6b0af..87f9718a909 100644 --- a/src/intel/compiler/brw_fs_cse.cpp +++ b/src/intel/compiler/brw_fs_cse.cpp @@ -122,7 +122,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case SHADER_OPCODE_LOAD_SUBGROUP_INVOCATION: return true; case SHADER_OPCODE_LOAD_PAYLOAD: - return !is_coalescing_payload(v->alloc, inst); + return !is_coalescing_payload(v->devinfo, v->alloc, inst); default: return inst->is_send_from_grf() && !inst->has_side_effects() && !inst->is_volatile(); diff --git a/src/intel/compiler/brw_fs_dead_code_eliminate.cpp b/src/intel/compiler/brw_fs_dead_code_eliminate.cpp index d22d5ba21d4..d90ee753a39 100644 --- a/src/intel/compiler/brw_fs_dead_code_eliminate.cpp +++ b/src/intel/compiler/brw_fs_dead_code_eliminate.cpp @@ -161,7 +161,7 @@ brw_fs_opt_dead_code_eliminate(fs_visitor &s) if (inst->src[i].file == VGRF) { int var = live_vars.var_from_reg(inst->src[i]); - for (unsigned j = 0; j < regs_read(inst, i); j++) { + for (unsigned j = 0; j < regs_read(devinfo, inst, i); j++) { BITSET_SET(live, var + j); } } diff --git a/src/intel/compiler/brw_fs_live_variables.cpp b/src/intel/compiler/brw_fs_live_variables.cpp index d2a5b211d2e..6197fa59d97 100644 --- a/src/intel/compiler/brw_fs_live_variables.cpp +++ b/src/intel/compiler/brw_fs_live_variables.cpp @@ -120,7 +120,7 @@ fs_live_variables::setup_def_use() if (reg.file != VGRF) continue; - for (unsigned j = 0; j < regs_read(inst, i); j++) { + for (unsigned j = 0; j < regs_read(devinfo, inst, i); j++) { setup_one_read(bd, ip, reg); reg.offset += REG_SIZE; } @@ -342,7 +342,7 @@ fs_live_variables::validate(const fs_visitor *s) const for (unsigned i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF && !check_register_live_range(this, ip, - inst->src[i], regs_read(inst, i))) + inst->src[i], regs_read(devinfo, inst, i))) return false; } diff --git a/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp b/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp index a3a39af65d3..efae3994786 100644 --- a/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp +++ b/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp @@ -217,9 +217,9 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block) brw_reg low = inst->dst; if (orig_dst.is_null() || regions_overlap(inst->dst, inst->size_written, - inst->src[0], inst->size_read(0)) || + inst->src[0], inst->size_read(devinfo, 0)) || regions_overlap(inst->dst, inst->size_written, - inst->src[1], inst->size_read(1)) || + inst->src[1], inst->size_read(devinfo, 1)) || inst->dst.stride >= 4) { needs_mov = true; low = brw_vgrf(s.alloc.allocate(regs_written(inst)), diff --git a/src/intel/compiler/brw_fs_lower_simd_width.cpp b/src/intel/compiler/brw_fs_lower_simd_width.cpp index f66ac2b46ee..ac7ad3ed797 100644 --- a/src/intel/compiler/brw_fs_lower_simd_width.cpp +++ b/src/intel/compiler/brw_fs_lower_simd_width.cpp @@ -85,7 +85,7 @@ get_fpu_lowered_simd_width(const fs_visitor *shader, unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE); for (unsigned i = 0; i < inst->sources; i++) - reg_count = MAX3(reg_count, DIV_ROUND_UP(inst->size_read(i), REG_SIZE), + reg_count = MAX3(reg_count, DIV_ROUND_UP(inst->size_read(devinfo, i), REG_SIZE), (inst->src[i].file == ATTR ? attr_reg_count : 0)); /* Calculate the maximum execution size of the instruction based on the @@ -559,7 +559,7 @@ needs_dst_copy(const fs_builder &lbld, const fs_inst *inst) * the data read from the same source by other lowered instructions. */ if (regions_overlap(inst->dst, inst->size_written, - inst->src[i], inst->size_read(i)) && + inst->src[i], inst->size_read(lbld.shader->devinfo, i)) && !inst->dst.equals(inst->src[i])) return true; } diff --git a/src/intel/compiler/brw_fs_opt_virtual_grfs.cpp b/src/intel/compiler/brw_fs_opt_virtual_grfs.cpp index 5b6a674ac12..e76b242d153 100644 --- a/src/intel/compiler/brw_fs_opt_virtual_grfs.cpp +++ b/src/intel/compiler/brw_fs_opt_virtual_grfs.cpp @@ -84,7 +84,7 @@ brw_fs_opt_split_virtual_grfs(fs_visitor &s) for (unsigned i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) { unsigned reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE; - for (unsigned j = 1; j < regs_read(inst, i); j++) + for (unsigned j = 1; j < regs_read(s.devinfo, inst, i); j++) split_points[reg + j] = false; } } diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp index ec4e98df3b6..ac65859e313 100644 --- a/src/intel/compiler/brw_fs_reg_allocate.cpp +++ b/src/intel/compiler/brw_fs_reg_allocate.cpp @@ -199,7 +199,7 @@ void fs_visitor::calculate_payload_ranges(bool allow_spilling, continue; for (unsigned j = reg_nr / reg_unit(devinfo); - j < DIV_ROUND_UP(reg_nr + regs_read(inst, i), + j < DIV_ROUND_UP(reg_nr + regs_read(devinfo, inst, i), reg_unit(devinfo)); j++) { payload_last_use_ip[j] = use_ip; @@ -933,7 +933,7 @@ fs_reg_alloc::set_spill_costs() foreach_block_and_inst(block, fs_inst, inst, fs->cfg) { for (unsigned int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) - spill_costs[inst->src[i].nr] += regs_read(inst, i) * block_scale; + spill_costs[inst->src[i].nr] += regs_read(devinfo, inst, i) * block_scale; } if (inst->dst.file == VGRF) @@ -1079,7 +1079,7 @@ fs_reg_alloc::spill_reg(unsigned spill_reg) if (inst->src[i].file == VGRF && inst->src[i].nr == spill_reg) { /* Count registers needed in units of physical registers */ - int count = align(regs_read(inst, i), reg_unit(devinfo)); + int count = align(regs_read(devinfo, inst, i), reg_unit(devinfo)); /* Align the spilling offset the physical register size */ int subset_spill_offset = spill_offset + ROUND_DOWN_TO(inst->src[i].offset, REG_SIZE * reg_unit(devinfo)); diff --git a/src/intel/compiler/brw_fs_register_coalesce.cpp b/src/intel/compiler/brw_fs_register_coalesce.cpp index a68df7525df..e389519aa80 100644 --- a/src/intel/compiler/brw_fs_register_coalesce.cpp +++ b/src/intel/compiler/brw_fs_register_coalesce.cpp @@ -88,7 +88,7 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst) return false; if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { - if (!is_coalescing_payload(v->alloc, inst)) { + if (!is_coalescing_payload(v->devinfo, v->alloc, inst)) { return false; } } @@ -97,7 +97,8 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst) } static bool -can_coalesce_vars(const fs_live_variables &live, const cfg_t *cfg, +can_coalesce_vars(const intel_device_info *devinfo, + const fs_live_variables &live, const cfg_t *cfg, const bblock_t *block, const fs_inst *inst, int dst_var, int src_var) { @@ -161,7 +162,7 @@ can_coalesce_vars(const fs_live_variables &live, const cfg_t *cfg, * copy. This effectively moves the write from the copy up. */ for (int j = 0; j < scan_inst->sources; j++) { - if (regions_overlap(scan_inst->src[j], scan_inst->size_read(j), + if (regions_overlap(scan_inst->src[j], scan_inst->size_read(devinfo, j), inst->dst, inst->size_written)) return false; /* registers interfere */ } @@ -176,7 +177,7 @@ can_coalesce_vars(const fs_live_variables &live, const cfg_t *cfg, /* See the big comment above */ if (regions_overlap(scan_inst->dst, scan_inst->size_written, - inst->src[0], inst->size_read(0))) { + inst->src[0], inst->size_read(devinfo, 0))) { if (seen_copy || scan_block != block || (scan_inst->force_writemask_all && !inst->force_writemask_all)) return false; @@ -303,7 +304,7 @@ brw_fs_opt_register_coalesce(fs_visitor &s) dst_var[i] = live.var_from_vgrf[dst_reg] + dst_reg_offset[i]; src_var[i] = live.var_from_vgrf[src_reg] + i; - if (!can_coalesce_vars(live, s.cfg, block, inst, dst_var[i], src_var[i]) || + if (!can_coalesce_vars(devinfo, live, s.cfg, block, inst, dst_var[i], src_var[i]) || would_violate_eot_restriction(s.alloc, s.cfg, dst_reg, src_reg)) { can_coalesce = false; src_reg = ~0u; diff --git a/src/intel/compiler/brw_fs_saturate_propagation.cpp b/src/intel/compiler/brw_fs_saturate_propagation.cpp index 06baceb281b..cb0dad57172 100644 --- a/src/intel/compiler/brw_fs_saturate_propagation.cpp +++ b/src/intel/compiler/brw_fs_saturate_propagation.cpp @@ -143,7 +143,7 @@ opt_saturate_propagation_local(fs_visitor &s, bblock_t *block) foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (scan_inst->exec_size == inst->exec_size && regions_overlap(scan_inst->dst, scan_inst->size_written, - inst->src[0], inst->size_read(0))) { + inst->src[0], inst->size_read(s.devinfo, 0))) { if (scan_inst->is_partial_write() || (scan_inst->dst.type != inst->dst.type && !scan_inst->can_change_types())) @@ -167,8 +167,8 @@ opt_saturate_propagation_local(fs_visitor &s, bblock_t *block) if (scan_inst->src[i].file == VGRF && scan_inst->src[i].nr == inst->src[0].nr && regions_overlap( - scan_inst->src[i], scan_inst->size_read(i), - inst->src[0], inst->size_read(0))) { + scan_inst->src[i], scan_inst->size_read(s.devinfo, i), + inst->src[0], inst->size_read(s.devinfo, 0))) { if (scan_inst->opcode != BRW_OPCODE_MOV || !scan_inst->saturate || scan_inst->src[0].abs || diff --git a/src/intel/compiler/brw_fs_scoreboard.cpp b/src/intel/compiler/brw_fs_scoreboard.cpp index a0955b92c81..c270f1ab4dd 100644 --- a/src/intel/compiler/brw_fs_scoreboard.cpp +++ b/src/intel/compiler/brw_fs_scoreboard.cpp @@ -1049,7 +1049,7 @@ namespace { is_ordered ? dependency(TGL_REGDIST_SRC, jp, exec_all) : dependency::done; - for (unsigned j = 0; j < regs_read(inst, i); j++) { + for (unsigned j = 0; j < regs_read(devinfo, inst, i); j++) { const brw_reg r = byte_offset(inst->src[i], REG_SIZE * j); sb.set(r, shadow(sb.get(r), rd_dep)); } @@ -1163,7 +1163,7 @@ namespace { scoreboard &sb = sbs[block->num]; for (unsigned i = 0; i < inst->sources; i++) { - for (unsigned j = 0; j < regs_read(inst, i); j++) + for (unsigned j = 0; j < regs_read(devinfo, inst, i); j++) add_dependency(ids, deps[ip], dependency_for_read( sb.get(byte_offset(inst->src[i], REG_SIZE * j)))); } diff --git a/src/intel/compiler/brw_fs_validate.cpp b/src/intel/compiler/brw_fs_validate.cpp index 78808344beb..113484580b7 100644 --- a/src/intel/compiler/brw_fs_validate.cpp +++ b/src/intel/compiler/brw_fs_validate.cpp @@ -381,7 +381,7 @@ brw_fs_validate(const fs_visitor &s) for (unsigned i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) { - fsv_assert_lte(inst->src[i].offset / REG_SIZE + regs_read(inst, i), + fsv_assert_lte(inst->src[i].offset / REG_SIZE + regs_read(devinfo, inst, i), s.alloc.sizes[inst->src[i].nr]); } } diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index 66c9d082841..27dbecf92dd 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -57,7 +57,7 @@ public: bool is_payload(unsigned arg) const; bool is_partial_write() const; unsigned components_read(unsigned i) const; - unsigned size_read(int arg) const; + unsigned size_read(const struct intel_device_info *devinfo, int arg) const; bool can_do_source_mods(const struct intel_device_info *devinfo) const; bool can_do_cmod() const; bool can_change_types() const; @@ -295,15 +295,15 @@ regs_written(const fs_inst *inst) * UNIFORM files and 32B for all other files. */ inline unsigned -regs_read(const fs_inst *inst, unsigned i) +regs_read(const struct intel_device_info *devinfo, const fs_inst *inst, unsigned i) { if (inst->src[i].file == IMM) return 1; const unsigned reg_size = inst->src[i].file == UNIFORM ? 4 : REG_SIZE; return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size + - inst->size_read(i) - - MIN2(inst->size_read(i), reg_padding(inst->src[i])), + inst->size_read(devinfo, i) - + MIN2(inst->size_read(devinfo, i), reg_padding(inst->src[i])), reg_size); } @@ -475,7 +475,8 @@ has_subdword_integer_region_restriction(const intel_device_info *devinfo, * multiple virtual registers in any order is allowed. */ inline bool -is_copy_payload(brw_reg_file file, const fs_inst *inst) +is_copy_payload(const struct intel_device_info *devinfo, + brw_reg_file file, const fs_inst *inst) { if (inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD || inst->is_partial_write() || inst->saturate || @@ -491,7 +492,7 @@ is_copy_payload(brw_reg_file file, const fs_inst *inst) return false; if (regions_overlap(inst->dst, inst->size_written, - inst->src[i], inst->size_read(i))) + inst->src[i], inst->size_read(devinfo, i))) return false; } @@ -504,8 +505,10 @@ is_copy_payload(brw_reg_file file, const fs_inst *inst) * destination without any reordering. */ inline bool -is_identity_payload(brw_reg_file file, const fs_inst *inst) { - if (is_copy_payload(file, inst)) { +is_identity_payload(const struct intel_device_info *devinfo, + brw_reg_file file, const fs_inst *inst) +{ + if (is_copy_payload(devinfo, file, inst)) { brw_reg reg = inst->src[0]; for (unsigned i = 0; i < inst->sources; i++) { @@ -513,7 +516,7 @@ is_identity_payload(brw_reg_file file, const fs_inst *inst) { if (!inst->src[i].equals(reg)) return false; - reg = byte_offset(reg, inst->size_read(i)); + reg = byte_offset(reg, inst->size_read(devinfo, i)); } return true; @@ -533,8 +536,10 @@ is_identity_payload(brw_reg_file file, const fs_inst *inst) { * instructions. */ inline bool -is_multi_copy_payload(const fs_inst *inst) { - if (is_copy_payload(VGRF, inst)) { +is_multi_copy_payload(const struct intel_device_info *devinfo, + const fs_inst *inst) +{ + if (is_copy_payload(devinfo, VGRF, inst)) { for (unsigned i = 0; i < inst->sources; i++) { if (inst->src[i].nr != inst->src[0].nr) return true; @@ -557,9 +562,10 @@ is_multi_copy_payload(const fs_inst *inst) { * instruction. */ inline bool -is_coalescing_payload(const brw::simple_allocator &alloc, const fs_inst *inst) +is_coalescing_payload(const struct intel_device_info *devinfo, + const brw::simple_allocator &alloc, const fs_inst *inst) { - return is_identity_payload(VGRF, inst) && + return is_identity_payload(devinfo, VGRF, inst) && inst->src[0].offset == 0 && alloc.sizes[inst->src[0].nr] * REG_SIZE == inst->size_written; } diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp index ffe9bc963e4..9c33d5b8b44 100644 --- a/src/intel/compiler/brw_ir_performance.cpp +++ b/src/intel/compiler/brw_ir_performance.cpp @@ -128,11 +128,11 @@ namespace { * messages which require the total size. */ if (inst->opcode == SHADER_OPCODE_SEND) { - ss = DIV_ROUND_UP(inst->size_read(2), REG_SIZE) + - DIV_ROUND_UP(inst->size_read(3), REG_SIZE); + ss = DIV_ROUND_UP(inst->size_read(devinfo, 2), REG_SIZE) + + DIV_ROUND_UP(inst->size_read(devinfo, 3), REG_SIZE); } else { for (unsigned i = 0; i < inst->sources; i++) - ss = MAX2(ss, DIV_ROUND_UP(inst->size_read(i), REG_SIZE)); + ss = MAX2(ss, DIV_ROUND_UP(inst->size_read(devinfo, i), REG_SIZE)); } /* Convert the execution size to GRF units. */ @@ -878,7 +878,7 @@ namespace { /* Stall on any source dependencies. */ for (unsigned i = 0; i < inst->sources; i++) { - for (unsigned j = 0; j < regs_read(inst, i); j++) + for (unsigned j = 0; j < regs_read(devinfo, inst, i); j++) stall_on_dependency( st, reg_dependency_id(devinfo, inst->src[i], j)); } @@ -935,7 +935,7 @@ namespace { if (inst->is_send_from_grf()) { for (unsigned i = 0; i < inst->sources; i++) { if (inst->is_payload(i)) { - for (unsigned j = 0; j < regs_read(inst, i); j++) + for (unsigned j = 0; j < regs_read(devinfo, inst, i); j++) mark_read_dependency( st, perf, reg_dependency_id(devinfo, inst->src[i], j)); } diff --git a/src/intel/compiler/brw_print.cpp b/src/intel/compiler/brw_print.cpp index b6fe7fa28da..ee2340bec09 100644 --- a/src/intel/compiler/brw_print.cpp +++ b/src/intel/compiler/brw_print.cpp @@ -583,7 +583,7 @@ brw_print_instruction(const fs_visitor &s, const fs_inst *inst, FILE *file, cons fprintf(file, ".%d", inst->src[i].subnr / brw_type_size_bytes(inst->src[i].type)); } else if (inst->src[i].offset || (!s.grf_used && inst->src[i].file == VGRF && - s.alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i))) { + s.alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(s.devinfo, i))) { const unsigned reg_size = (inst->src[i].file == UNIFORM ? 4 : REG_SIZE); fprintf(file, "+%d.%d", inst->src[i].offset / reg_size, inst->src[i].offset % reg_size); diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 4e27c856fc2..17d2e4d55ea 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -800,7 +800,7 @@ instruction_scheduler::count_reads_remaining(const fs_inst *inst) if (inst->src[i].nr >= hw_reg_count) continue; - for (unsigned j = 0; j < regs_read(inst, i); j++) + for (unsigned j = 0; j < regs_read(s->devinfo, inst, i); j++) hw_reads_remaining[inst->src[i].nr + j]++; } } @@ -881,7 +881,7 @@ instruction_scheduler::update_register_pressure(const fs_inst *inst) reads_remaining[inst->src[i].nr]--; } else if (inst->src[i].file == FIXED_GRF && inst->src[i].nr < hw_reg_count) { - for (unsigned off = 0; off < regs_read(inst, i); off++) + for (unsigned off = 0; off < regs_read(s->devinfo, inst, i); off++) hw_reads_remaining[inst->src[i].nr + off]--; } } @@ -910,7 +910,7 @@ instruction_scheduler::get_register_pressure_benefit(const fs_inst *inst) if (inst->src[i].file == FIXED_GRF && inst->src[i].nr < hw_reg_count) { - for (unsigned off = 0; off < regs_read(inst, i); off++) { + for (unsigned off = 0; off < regs_read(s->devinfo, inst, i); off++) { int reg = inst->src[i].nr + off; if (!BITSET_TEST(hw_liveout[block_idx], reg) && hw_reads_remaining[reg] == 1) { @@ -1212,11 +1212,11 @@ instruction_scheduler::calculate_deps() /* read-after-write deps. */ for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) { - for (unsigned r = 0; r < regs_read(inst, i); r++) + for (unsigned r = 0; r < regs_read(s->devinfo, inst, i); r++) add_dep(last_grf_write[grf_index(inst->src[i]) + r], n); } else if (inst->src[i].file == FIXED_GRF) { if (post_reg_alloc) { - for (unsigned r = 0; r < regs_read(inst, i); r++) + for (unsigned r = 0; r < regs_read(s->devinfo, inst, i); r++) add_dep(last_grf_write[inst->src[i].nr + r], n); } else { add_dep(last_fixed_grf_write, n); @@ -1297,11 +1297,11 @@ instruction_scheduler::calculate_deps() /* write-after-read deps. */ for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) { - for (unsigned r = 0; r < regs_read(inst, i); r++) + for (unsigned r = 0; r < regs_read(s->devinfo, inst, i); r++) add_dep(n, last_grf_write[grf_index(inst->src[i]) + r], 0); } else if (inst->src[i].file == FIXED_GRF) { if (post_reg_alloc) { - for (unsigned r = 0; r < regs_read(inst, i); r++) + for (unsigned r = 0; r < regs_read(s->devinfo, inst, i); r++) add_dep(n, last_grf_write[inst->src[i].nr + r], 0); } else { add_dep(n, last_fixed_grf_write, 0);