mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 15:50:17 +01:00
brw: Add devinfo parameter to fs_inst::regs_read
This isn't used now, but future commits will add uses. Doing this as a separate commit removes a lot of "just typing" churn from commits that have real changes to review. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29884>
This commit is contained in:
parent
e129d242f6
commit
ef3dc401da
19 changed files with 85 additions and 73 deletions
|
|
@ -647,7 +647,7 @@ fs_inst::components_read(unsigned i) const
|
|||
}
|
||||
|
||||
unsigned
|
||||
fs_inst::size_read(int arg) const
|
||||
fs_inst::size_read(const struct intel_device_info *devinfo, int arg) const
|
||||
{
|
||||
switch (opcode) {
|
||||
case SHADER_OPCODE_SEND:
|
||||
|
|
@ -775,7 +775,7 @@ fs_inst::flags_read(const intel_device_info *devinfo) const
|
|||
} else {
|
||||
unsigned mask = 0;
|
||||
for (int i = 0; i < sources; i++) {
|
||||
mask |= brw_fs_flag_mask(src[i], size_read(i));
|
||||
mask |= brw_fs_flag_mask(src[i], size_read(devinfo, i));
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -517,7 +517,8 @@ namespace {
|
|||
|
||||
for (int i = 0; i < inst->sources; i++) {
|
||||
if (is_grf(inst->src[i]))
|
||||
p.require_contiguous(reg_of(inst->src[i]), regs_read(inst, i));
|
||||
p.require_contiguous(reg_of(inst->src[i]),
|
||||
regs_read(v->devinfo, inst, i));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -180,7 +180,7 @@ cmod_propagate_not(const intel_device_info *devinfo, bblock_t *block,
|
|||
|
||||
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
|
||||
if (regions_overlap(scan_inst->dst, scan_inst->size_written,
|
||||
inst->src[0], inst->size_read(0))) {
|
||||
inst->src[0], inst->size_read(devinfo, 0))) {
|
||||
if (scan_inst->opcode != BRW_OPCODE_OR &&
|
||||
scan_inst->opcode != BRW_OPCODE_AND)
|
||||
break;
|
||||
|
|
@ -288,7 +288,7 @@ opt_cmod_propagation_local(const intel_device_info *devinfo, bblock_t *block)
|
|||
const unsigned flags_written = inst->flags_written(devinfo);
|
||||
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
|
||||
if (regions_overlap(scan_inst->dst, scan_inst->size_written,
|
||||
inst->src[0], inst->size_read(0))) {
|
||||
inst->src[0], inst->size_read(devinfo, 0))) {
|
||||
/* If the scan instruction writes a different flag register than
|
||||
* the instruction we're trying to propagate from, bail.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -685,7 +685,8 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
* temporaries which should match is_coalescing_payload().
|
||||
*/
|
||||
if (entry->opcode == SHADER_OPCODE_LOAD_PAYLOAD &&
|
||||
(is_coalescing_payload(alloc, inst) || is_multi_copy_payload(inst)))
|
||||
(is_coalescing_payload(devinfo, alloc, inst) ||
|
||||
is_multi_copy_payload(devinfo, inst)))
|
||||
return false;
|
||||
|
||||
assert(entry->dst.file == VGRF);
|
||||
|
|
@ -695,7 +696,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
/* Bail if inst is reading a range that isn't contained in the range
|
||||
* that entry is writing.
|
||||
*/
|
||||
if (!region_contained_in(inst->src[arg], inst->size_read(arg),
|
||||
if (!region_contained_in(inst->src[arg], inst->size_read(devinfo, arg),
|
||||
entry->dst, entry->size_written))
|
||||
return false;
|
||||
|
||||
|
|
@ -718,7 +719,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
int other_src = arg == 2 ? 3 : 2;
|
||||
unsigned other_size = inst->src[other_src].file == VGRF ?
|
||||
alloc.sizes[inst->src[other_src].nr] :
|
||||
inst->size_read(other_src);
|
||||
inst->size_read(devinfo, other_src);
|
||||
unsigned prop_src_size = alloc.sizes[entry->src.nr];
|
||||
if (other_size + prop_src_size > 15)
|
||||
return false;
|
||||
|
|
@ -1208,7 +1209,8 @@ try_constant_propagate_value(brw_reg val, brw_reg_type dst_type,
|
|||
|
||||
|
||||
static bool
|
||||
try_constant_propagate(fs_inst *inst, acp_entry *entry, int arg)
|
||||
try_constant_propagate(const struct intel_device_info *devinfo,
|
||||
fs_inst *inst, acp_entry *entry, int arg)
|
||||
{
|
||||
if (inst->src[arg].file != VGRF)
|
||||
return false;
|
||||
|
|
@ -1220,7 +1222,7 @@ try_constant_propagate(fs_inst *inst, acp_entry *entry, int arg)
|
|||
/* Bail if inst is reading a range that isn't contained in the range
|
||||
* that entry is writing.
|
||||
*/
|
||||
if (!region_contained_in(inst->src[arg], inst->size_read(arg),
|
||||
if (!region_contained_in(inst->src[arg], inst->size_read(devinfo, arg),
|
||||
entry->dst, entry->size_written))
|
||||
return false;
|
||||
|
||||
|
|
@ -1236,13 +1238,13 @@ try_constant_propagate(fs_inst *inst, acp_entry *entry, int arg)
|
|||
}
|
||||
|
||||
static bool
|
||||
can_propagate_from(fs_inst *inst)
|
||||
can_propagate_from(const struct intel_device_info *devinfo, fs_inst *inst)
|
||||
{
|
||||
return (inst->opcode == BRW_OPCODE_MOV &&
|
||||
inst->dst.file == VGRF &&
|
||||
((inst->src[0].file == VGRF &&
|
||||
!grf_regions_overlap(inst->dst, inst->size_written,
|
||||
inst->src[0], inst->size_read(0))) ||
|
||||
inst->src[0], inst->size_read(devinfo, 0))) ||
|
||||
inst->src[0].file == ATTR ||
|
||||
inst->src[0].file == UNIFORM ||
|
||||
inst->src[0].file == IMM ||
|
||||
|
|
@ -1256,7 +1258,7 @@ can_propagate_from(fs_inst *inst)
|
|||
inst->is_raw_move()) &&
|
||||
/* Subset of !is_partial_write() conditions. */
|
||||
!inst->predicate && inst->dst.is_contiguous()) ||
|
||||
is_identity_payload(FIXED_GRF, inst);
|
||||
is_identity_payload(devinfo, FIXED_GRF, inst);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1310,6 +1312,7 @@ opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx,
|
|||
const brw::simple_allocator &alloc,
|
||||
uint8_t max_polygons)
|
||||
{
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
bool progress = false;
|
||||
|
||||
foreach_inst_in_block(fs_inst, inst, block) {
|
||||
|
|
@ -1323,7 +1326,7 @@ opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx,
|
|||
iter != acp.end() && (*iter)->dst.nr == inst->src[i].nr;
|
||||
++iter) {
|
||||
if ((*iter)->src.file == IMM) {
|
||||
if (try_constant_propagate(inst, *iter, i)) {
|
||||
if (try_constant_propagate(devinfo, inst, *iter, i)) {
|
||||
constant_progress = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -1368,13 +1371,13 @@ opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx,
|
|||
/* If this instruction's source could potentially be folded into the
|
||||
* operand of another instruction, add it to the ACP.
|
||||
*/
|
||||
if (can_propagate_from(inst)) {
|
||||
if (can_propagate_from(devinfo, inst)) {
|
||||
acp_entry *entry = linear_zalloc(lin_ctx, acp_entry);
|
||||
entry->dst = inst->dst;
|
||||
entry->src = inst->src[0];
|
||||
entry->size_written = inst->size_written;
|
||||
for (unsigned i = 0; i < inst->sources; i++)
|
||||
entry->size_read += inst->size_read(i);
|
||||
entry->size_read += inst->size_read(devinfo, i);
|
||||
entry->opcode = inst->opcode;
|
||||
entry->is_partial_write = inst->is_partial_write();
|
||||
entry->force_writemask_all = inst->force_writemask_all;
|
||||
|
|
@ -1397,7 +1400,7 @@ opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx,
|
|||
entry->dst = dst;
|
||||
entry->src = retype(inst->src[i], t);
|
||||
entry->size_written = size_written;
|
||||
entry->size_read = inst->size_read(i);
|
||||
entry->size_read = inst->size_read(devinfo, i);
|
||||
entry->opcode = inst->opcode;
|
||||
entry->force_writemask_all = inst->force_writemask_all;
|
||||
acp.add(entry);
|
||||
|
|
@ -1544,7 +1547,7 @@ try_copy_propagate_def(const brw_compiler *compiler,
|
|||
int other_src = arg == 2 ? 3 : 2;
|
||||
unsigned other_size = inst->src[other_src].file == VGRF ?
|
||||
alloc.sizes[inst->src[other_src].nr] :
|
||||
inst->size_read(other_src);
|
||||
inst->size_read(devinfo, other_src);
|
||||
unsigned prop_src_size = alloc.sizes[val.nr];
|
||||
if (other_size + prop_src_size > 15)
|
||||
return false;
|
||||
|
|
@ -1717,10 +1720,11 @@ try_copy_propagate_def(const brw_compiler *compiler,
|
|||
}
|
||||
|
||||
static bool
|
||||
try_constant_propagate_def(fs_inst *def, brw_reg val, fs_inst *inst, int arg)
|
||||
try_constant_propagate_def(const struct intel_device_info *devinfo,
|
||||
fs_inst *def, brw_reg val, fs_inst *inst, int arg)
|
||||
{
|
||||
/* Bail if inst is reading more than a single vector component of entry */
|
||||
if (inst->size_read(arg) > def->dst.component_size(inst->exec_size))
|
||||
if (inst->size_read(devinfo, arg) > def->dst.component_size(inst->exec_size))
|
||||
return false;
|
||||
|
||||
return try_constant_propagate_value(val, def->dst.type, inst, arg);
|
||||
|
|
@ -1815,9 +1819,9 @@ brw_fs_opt_copy_propagation_defs(fs_visitor &s)
|
|||
bool source_progress = false;
|
||||
|
||||
if (def->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
|
||||
if (inst->size_read(i) == def->size_written &&
|
||||
if (inst->size_read(s.devinfo, i) == def->size_written &&
|
||||
def->src[0].file != BAD_FILE && def->src[0].file != IMM &&
|
||||
is_identity_payload(def->src[0].file, def)) {
|
||||
is_identity_payload(s.devinfo, def->src[0].file, def)) {
|
||||
source_progress =
|
||||
try_copy_propagate_def(s.compiler, s.alloc, def, def->src[0],
|
||||
inst, i, s.max_polygons);
|
||||
|
|
@ -1834,10 +1838,10 @@ brw_fs_opt_copy_propagation_defs(fs_visitor &s)
|
|||
}
|
||||
|
||||
brw_reg val =
|
||||
find_value_for_offset(def, inst->src[i], inst->size_read(i));
|
||||
find_value_for_offset(def, inst->src[i], inst->size_read(s.devinfo, i));
|
||||
|
||||
if (val.file == IMM) {
|
||||
if (try_constant_propagate_def(def, val, inst, i)) {
|
||||
if (try_constant_propagate_def(s.devinfo, def, val, inst, i)) {
|
||||
source_progress = true;
|
||||
constant_progress = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
|
|||
case SHADER_OPCODE_LOAD_SUBGROUP_INVOCATION:
|
||||
return true;
|
||||
case SHADER_OPCODE_LOAD_PAYLOAD:
|
||||
return !is_coalescing_payload(v->alloc, inst);
|
||||
return !is_coalescing_payload(v->devinfo, v->alloc, inst);
|
||||
default:
|
||||
return inst->is_send_from_grf() && !inst->has_side_effects() &&
|
||||
!inst->is_volatile();
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ brw_fs_opt_dead_code_eliminate(fs_visitor &s)
|
|||
if (inst->src[i].file == VGRF) {
|
||||
int var = live_vars.var_from_reg(inst->src[i]);
|
||||
|
||||
for (unsigned j = 0; j < regs_read(inst, i); j++) {
|
||||
for (unsigned j = 0; j < regs_read(devinfo, inst, i); j++) {
|
||||
BITSET_SET(live, var + j);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -120,7 +120,7 @@ fs_live_variables::setup_def_use()
|
|||
if (reg.file != VGRF)
|
||||
continue;
|
||||
|
||||
for (unsigned j = 0; j < regs_read(inst, i); j++) {
|
||||
for (unsigned j = 0; j < regs_read(devinfo, inst, i); j++) {
|
||||
setup_one_read(bd, ip, reg);
|
||||
reg.offset += REG_SIZE;
|
||||
}
|
||||
|
|
@ -342,7 +342,7 @@ fs_live_variables::validate(const fs_visitor *s) const
|
|||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == VGRF &&
|
||||
!check_register_live_range(this, ip,
|
||||
inst->src[i], regs_read(inst, i)))
|
||||
inst->src[i], regs_read(devinfo, inst, i)))
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -217,9 +217,9 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
|
|||
brw_reg low = inst->dst;
|
||||
if (orig_dst.is_null() ||
|
||||
regions_overlap(inst->dst, inst->size_written,
|
||||
inst->src[0], inst->size_read(0)) ||
|
||||
inst->src[0], inst->size_read(devinfo, 0)) ||
|
||||
regions_overlap(inst->dst, inst->size_written,
|
||||
inst->src[1], inst->size_read(1)) ||
|
||||
inst->src[1], inst->size_read(devinfo, 1)) ||
|
||||
inst->dst.stride >= 4) {
|
||||
needs_mov = true;
|
||||
low = brw_vgrf(s.alloc.allocate(regs_written(inst)),
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ get_fpu_lowered_simd_width(const fs_visitor *shader,
|
|||
unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE);
|
||||
|
||||
for (unsigned i = 0; i < inst->sources; i++)
|
||||
reg_count = MAX3(reg_count, DIV_ROUND_UP(inst->size_read(i), REG_SIZE),
|
||||
reg_count = MAX3(reg_count, DIV_ROUND_UP(inst->size_read(devinfo, i), REG_SIZE),
|
||||
(inst->src[i].file == ATTR ? attr_reg_count : 0));
|
||||
|
||||
/* Calculate the maximum execution size of the instruction based on the
|
||||
|
|
@ -559,7 +559,7 @@ needs_dst_copy(const fs_builder &lbld, const fs_inst *inst)
|
|||
* the data read from the same source by other lowered instructions.
|
||||
*/
|
||||
if (regions_overlap(inst->dst, inst->size_written,
|
||||
inst->src[i], inst->size_read(i)) &&
|
||||
inst->src[i], inst->size_read(lbld.shader->devinfo, i)) &&
|
||||
!inst->dst.equals(inst->src[i]))
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ brw_fs_opt_split_virtual_grfs(fs_visitor &s)
|
|||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == VGRF) {
|
||||
unsigned reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE;
|
||||
for (unsigned j = 1; j < regs_read(inst, i); j++)
|
||||
for (unsigned j = 1; j < regs_read(s.devinfo, inst, i); j++)
|
||||
split_points[reg + j] = false;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -199,7 +199,7 @@ void fs_visitor::calculate_payload_ranges(bool allow_spilling,
|
|||
continue;
|
||||
|
||||
for (unsigned j = reg_nr / reg_unit(devinfo);
|
||||
j < DIV_ROUND_UP(reg_nr + regs_read(inst, i),
|
||||
j < DIV_ROUND_UP(reg_nr + regs_read(devinfo, inst, i),
|
||||
reg_unit(devinfo));
|
||||
j++) {
|
||||
payload_last_use_ip[j] = use_ip;
|
||||
|
|
@ -933,7 +933,7 @@ fs_reg_alloc::set_spill_costs()
|
|||
foreach_block_and_inst(block, fs_inst, inst, fs->cfg) {
|
||||
for (unsigned int i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == VGRF)
|
||||
spill_costs[inst->src[i].nr] += regs_read(inst, i) * block_scale;
|
||||
spill_costs[inst->src[i].nr] += regs_read(devinfo, inst, i) * block_scale;
|
||||
}
|
||||
|
||||
if (inst->dst.file == VGRF)
|
||||
|
|
@ -1079,7 +1079,7 @@ fs_reg_alloc::spill_reg(unsigned spill_reg)
|
|||
if (inst->src[i].file == VGRF &&
|
||||
inst->src[i].nr == spill_reg) {
|
||||
/* Count registers needed in units of physical registers */
|
||||
int count = align(regs_read(inst, i), reg_unit(devinfo));
|
||||
int count = align(regs_read(devinfo, inst, i), reg_unit(devinfo));
|
||||
/* Align the spilling offset the physical register size */
|
||||
int subset_spill_offset = spill_offset +
|
||||
ROUND_DOWN_TO(inst->src[i].offset, REG_SIZE * reg_unit(devinfo));
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst)
|
|||
return false;
|
||||
|
||||
if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
|
||||
if (!is_coalescing_payload(v->alloc, inst)) {
|
||||
if (!is_coalescing_payload(v->devinfo, v->alloc, inst)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -97,7 +97,8 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst)
|
|||
}
|
||||
|
||||
static bool
|
||||
can_coalesce_vars(const fs_live_variables &live, const cfg_t *cfg,
|
||||
can_coalesce_vars(const intel_device_info *devinfo,
|
||||
const fs_live_variables &live, const cfg_t *cfg,
|
||||
const bblock_t *block, const fs_inst *inst,
|
||||
int dst_var, int src_var)
|
||||
{
|
||||
|
|
@ -161,7 +162,7 @@ can_coalesce_vars(const fs_live_variables &live, const cfg_t *cfg,
|
|||
* copy. This effectively moves the write from the copy up.
|
||||
*/
|
||||
for (int j = 0; j < scan_inst->sources; j++) {
|
||||
if (regions_overlap(scan_inst->src[j], scan_inst->size_read(j),
|
||||
if (regions_overlap(scan_inst->src[j], scan_inst->size_read(devinfo, j),
|
||||
inst->dst, inst->size_written))
|
||||
return false; /* registers interfere */
|
||||
}
|
||||
|
|
@ -176,7 +177,7 @@ can_coalesce_vars(const fs_live_variables &live, const cfg_t *cfg,
|
|||
|
||||
/* See the big comment above */
|
||||
if (regions_overlap(scan_inst->dst, scan_inst->size_written,
|
||||
inst->src[0], inst->size_read(0))) {
|
||||
inst->src[0], inst->size_read(devinfo, 0))) {
|
||||
if (seen_copy || scan_block != block ||
|
||||
(scan_inst->force_writemask_all && !inst->force_writemask_all))
|
||||
return false;
|
||||
|
|
@ -303,7 +304,7 @@ brw_fs_opt_register_coalesce(fs_visitor &s)
|
|||
dst_var[i] = live.var_from_vgrf[dst_reg] + dst_reg_offset[i];
|
||||
src_var[i] = live.var_from_vgrf[src_reg] + i;
|
||||
|
||||
if (!can_coalesce_vars(live, s.cfg, block, inst, dst_var[i], src_var[i]) ||
|
||||
if (!can_coalesce_vars(devinfo, live, s.cfg, block, inst, dst_var[i], src_var[i]) ||
|
||||
would_violate_eot_restriction(s.alloc, s.cfg, dst_reg, src_reg)) {
|
||||
can_coalesce = false;
|
||||
src_reg = ~0u;
|
||||
|
|
|
|||
|
|
@ -143,7 +143,7 @@ opt_saturate_propagation_local(fs_visitor &s, bblock_t *block)
|
|||
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
|
||||
if (scan_inst->exec_size == inst->exec_size &&
|
||||
regions_overlap(scan_inst->dst, scan_inst->size_written,
|
||||
inst->src[0], inst->size_read(0))) {
|
||||
inst->src[0], inst->size_read(s.devinfo, 0))) {
|
||||
if (scan_inst->is_partial_write() ||
|
||||
(scan_inst->dst.type != inst->dst.type &&
|
||||
!scan_inst->can_change_types()))
|
||||
|
|
@ -167,8 +167,8 @@ opt_saturate_propagation_local(fs_visitor &s, bblock_t *block)
|
|||
if (scan_inst->src[i].file == VGRF &&
|
||||
scan_inst->src[i].nr == inst->src[0].nr &&
|
||||
regions_overlap(
|
||||
scan_inst->src[i], scan_inst->size_read(i),
|
||||
inst->src[0], inst->size_read(0))) {
|
||||
scan_inst->src[i], scan_inst->size_read(s.devinfo, i),
|
||||
inst->src[0], inst->size_read(s.devinfo, 0))) {
|
||||
if (scan_inst->opcode != BRW_OPCODE_MOV ||
|
||||
!scan_inst->saturate ||
|
||||
scan_inst->src[0].abs ||
|
||||
|
|
|
|||
|
|
@ -1049,7 +1049,7 @@ namespace {
|
|||
is_ordered ? dependency(TGL_REGDIST_SRC, jp, exec_all) :
|
||||
dependency::done;
|
||||
|
||||
for (unsigned j = 0; j < regs_read(inst, i); j++) {
|
||||
for (unsigned j = 0; j < regs_read(devinfo, inst, i); j++) {
|
||||
const brw_reg r = byte_offset(inst->src[i], REG_SIZE * j);
|
||||
sb.set(r, shadow(sb.get(r), rd_dep));
|
||||
}
|
||||
|
|
@ -1163,7 +1163,7 @@ namespace {
|
|||
scoreboard &sb = sbs[block->num];
|
||||
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
for (unsigned j = 0; j < regs_read(inst, i); j++)
|
||||
for (unsigned j = 0; j < regs_read(devinfo, inst, i); j++)
|
||||
add_dependency(ids, deps[ip], dependency_for_read(
|
||||
sb.get(byte_offset(inst->src[i], REG_SIZE * j))));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -381,7 +381,7 @@ brw_fs_validate(const fs_visitor &s)
|
|||
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == VGRF) {
|
||||
fsv_assert_lte(inst->src[i].offset / REG_SIZE + regs_read(inst, i),
|
||||
fsv_assert_lte(inst->src[i].offset / REG_SIZE + regs_read(devinfo, inst, i),
|
||||
s.alloc.sizes[inst->src[i].nr]);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ public:
|
|||
bool is_payload(unsigned arg) const;
|
||||
bool is_partial_write() const;
|
||||
unsigned components_read(unsigned i) const;
|
||||
unsigned size_read(int arg) const;
|
||||
unsigned size_read(const struct intel_device_info *devinfo, int arg) const;
|
||||
bool can_do_source_mods(const struct intel_device_info *devinfo) const;
|
||||
bool can_do_cmod() const;
|
||||
bool can_change_types() const;
|
||||
|
|
@ -295,15 +295,15 @@ regs_written(const fs_inst *inst)
|
|||
* UNIFORM files and 32B for all other files.
|
||||
*/
|
||||
inline unsigned
|
||||
regs_read(const fs_inst *inst, unsigned i)
|
||||
regs_read(const struct intel_device_info *devinfo, const fs_inst *inst, unsigned i)
|
||||
{
|
||||
if (inst->src[i].file == IMM)
|
||||
return 1;
|
||||
|
||||
const unsigned reg_size = inst->src[i].file == UNIFORM ? 4 : REG_SIZE;
|
||||
return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size +
|
||||
inst->size_read(i) -
|
||||
MIN2(inst->size_read(i), reg_padding(inst->src[i])),
|
||||
inst->size_read(devinfo, i) -
|
||||
MIN2(inst->size_read(devinfo, i), reg_padding(inst->src[i])),
|
||||
reg_size);
|
||||
}
|
||||
|
||||
|
|
@ -475,7 +475,8 @@ has_subdword_integer_region_restriction(const intel_device_info *devinfo,
|
|||
* multiple virtual registers in any order is allowed.
|
||||
*/
|
||||
inline bool
|
||||
is_copy_payload(brw_reg_file file, const fs_inst *inst)
|
||||
is_copy_payload(const struct intel_device_info *devinfo,
|
||||
brw_reg_file file, const fs_inst *inst)
|
||||
{
|
||||
if (inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD ||
|
||||
inst->is_partial_write() || inst->saturate ||
|
||||
|
|
@ -491,7 +492,7 @@ is_copy_payload(brw_reg_file file, const fs_inst *inst)
|
|||
return false;
|
||||
|
||||
if (regions_overlap(inst->dst, inst->size_written,
|
||||
inst->src[i], inst->size_read(i)))
|
||||
inst->src[i], inst->size_read(devinfo, i)))
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -504,8 +505,10 @@ is_copy_payload(brw_reg_file file, const fs_inst *inst)
|
|||
* destination without any reordering.
|
||||
*/
|
||||
inline bool
|
||||
is_identity_payload(brw_reg_file file, const fs_inst *inst) {
|
||||
if (is_copy_payload(file, inst)) {
|
||||
is_identity_payload(const struct intel_device_info *devinfo,
|
||||
brw_reg_file file, const fs_inst *inst)
|
||||
{
|
||||
if (is_copy_payload(devinfo, file, inst)) {
|
||||
brw_reg reg = inst->src[0];
|
||||
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
|
|
@ -513,7 +516,7 @@ is_identity_payload(brw_reg_file file, const fs_inst *inst) {
|
|||
if (!inst->src[i].equals(reg))
|
||||
return false;
|
||||
|
||||
reg = byte_offset(reg, inst->size_read(i));
|
||||
reg = byte_offset(reg, inst->size_read(devinfo, i));
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
@ -533,8 +536,10 @@ is_identity_payload(brw_reg_file file, const fs_inst *inst) {
|
|||
* instructions.
|
||||
*/
|
||||
inline bool
|
||||
is_multi_copy_payload(const fs_inst *inst) {
|
||||
if (is_copy_payload(VGRF, inst)) {
|
||||
is_multi_copy_payload(const struct intel_device_info *devinfo,
|
||||
const fs_inst *inst)
|
||||
{
|
||||
if (is_copy_payload(devinfo, VGRF, inst)) {
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].nr != inst->src[0].nr)
|
||||
return true;
|
||||
|
|
@ -557,9 +562,10 @@ is_multi_copy_payload(const fs_inst *inst) {
|
|||
* instruction.
|
||||
*/
|
||||
inline bool
|
||||
is_coalescing_payload(const brw::simple_allocator &alloc, const fs_inst *inst)
|
||||
is_coalescing_payload(const struct intel_device_info *devinfo,
|
||||
const brw::simple_allocator &alloc, const fs_inst *inst)
|
||||
{
|
||||
return is_identity_payload(VGRF, inst) &&
|
||||
return is_identity_payload(devinfo, VGRF, inst) &&
|
||||
inst->src[0].offset == 0 &&
|
||||
alloc.sizes[inst->src[0].nr] * REG_SIZE == inst->size_written;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -128,11 +128,11 @@ namespace {
|
|||
* messages which require the total size.
|
||||
*/
|
||||
if (inst->opcode == SHADER_OPCODE_SEND) {
|
||||
ss = DIV_ROUND_UP(inst->size_read(2), REG_SIZE) +
|
||||
DIV_ROUND_UP(inst->size_read(3), REG_SIZE);
|
||||
ss = DIV_ROUND_UP(inst->size_read(devinfo, 2), REG_SIZE) +
|
||||
DIV_ROUND_UP(inst->size_read(devinfo, 3), REG_SIZE);
|
||||
} else {
|
||||
for (unsigned i = 0; i < inst->sources; i++)
|
||||
ss = MAX2(ss, DIV_ROUND_UP(inst->size_read(i), REG_SIZE));
|
||||
ss = MAX2(ss, DIV_ROUND_UP(inst->size_read(devinfo, i), REG_SIZE));
|
||||
}
|
||||
|
||||
/* Convert the execution size to GRF units. */
|
||||
|
|
@ -878,7 +878,7 @@ namespace {
|
|||
|
||||
/* Stall on any source dependencies. */
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
for (unsigned j = 0; j < regs_read(inst, i); j++)
|
||||
for (unsigned j = 0; j < regs_read(devinfo, inst, i); j++)
|
||||
stall_on_dependency(
|
||||
st, reg_dependency_id(devinfo, inst->src[i], j));
|
||||
}
|
||||
|
|
@ -935,7 +935,7 @@ namespace {
|
|||
if (inst->is_send_from_grf()) {
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
if (inst->is_payload(i)) {
|
||||
for (unsigned j = 0; j < regs_read(inst, i); j++)
|
||||
for (unsigned j = 0; j < regs_read(devinfo, inst, i); j++)
|
||||
mark_read_dependency(
|
||||
st, perf, reg_dependency_id(devinfo, inst->src[i], j));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -583,7 +583,7 @@ brw_print_instruction(const fs_visitor &s, const fs_inst *inst, FILE *file, cons
|
|||
fprintf(file, ".%d", inst->src[i].subnr / brw_type_size_bytes(inst->src[i].type));
|
||||
} else if (inst->src[i].offset ||
|
||||
(!s.grf_used && inst->src[i].file == VGRF &&
|
||||
s.alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i))) {
|
||||
s.alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(s.devinfo, i))) {
|
||||
const unsigned reg_size = (inst->src[i].file == UNIFORM ? 4 : REG_SIZE);
|
||||
fprintf(file, "+%d.%d", inst->src[i].offset / reg_size,
|
||||
inst->src[i].offset % reg_size);
|
||||
|
|
|
|||
|
|
@ -800,7 +800,7 @@ instruction_scheduler::count_reads_remaining(const fs_inst *inst)
|
|||
if (inst->src[i].nr >= hw_reg_count)
|
||||
continue;
|
||||
|
||||
for (unsigned j = 0; j < regs_read(inst, i); j++)
|
||||
for (unsigned j = 0; j < regs_read(s->devinfo, inst, i); j++)
|
||||
hw_reads_remaining[inst->src[i].nr + j]++;
|
||||
}
|
||||
}
|
||||
|
|
@ -881,7 +881,7 @@ instruction_scheduler::update_register_pressure(const fs_inst *inst)
|
|||
reads_remaining[inst->src[i].nr]--;
|
||||
} else if (inst->src[i].file == FIXED_GRF &&
|
||||
inst->src[i].nr < hw_reg_count) {
|
||||
for (unsigned off = 0; off < regs_read(inst, i); off++)
|
||||
for (unsigned off = 0; off < regs_read(s->devinfo, inst, i); off++)
|
||||
hw_reads_remaining[inst->src[i].nr + off]--;
|
||||
}
|
||||
}
|
||||
|
|
@ -910,7 +910,7 @@ instruction_scheduler::get_register_pressure_benefit(const fs_inst *inst)
|
|||
|
||||
if (inst->src[i].file == FIXED_GRF &&
|
||||
inst->src[i].nr < hw_reg_count) {
|
||||
for (unsigned off = 0; off < regs_read(inst, i); off++) {
|
||||
for (unsigned off = 0; off < regs_read(s->devinfo, inst, i); off++) {
|
||||
int reg = inst->src[i].nr + off;
|
||||
if (!BITSET_TEST(hw_liveout[block_idx], reg) &&
|
||||
hw_reads_remaining[reg] == 1) {
|
||||
|
|
@ -1212,11 +1212,11 @@ instruction_scheduler::calculate_deps()
|
|||
/* read-after-write deps. */
|
||||
for (int i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == VGRF) {
|
||||
for (unsigned r = 0; r < regs_read(inst, i); r++)
|
||||
for (unsigned r = 0; r < regs_read(s->devinfo, inst, i); r++)
|
||||
add_dep(last_grf_write[grf_index(inst->src[i]) + r], n);
|
||||
} else if (inst->src[i].file == FIXED_GRF) {
|
||||
if (post_reg_alloc) {
|
||||
for (unsigned r = 0; r < regs_read(inst, i); r++)
|
||||
for (unsigned r = 0; r < regs_read(s->devinfo, inst, i); r++)
|
||||
add_dep(last_grf_write[inst->src[i].nr + r], n);
|
||||
} else {
|
||||
add_dep(last_fixed_grf_write, n);
|
||||
|
|
@ -1297,11 +1297,11 @@ instruction_scheduler::calculate_deps()
|
|||
/* write-after-read deps. */
|
||||
for (int i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == VGRF) {
|
||||
for (unsigned r = 0; r < regs_read(inst, i); r++)
|
||||
for (unsigned r = 0; r < regs_read(s->devinfo, inst, i); r++)
|
||||
add_dep(n, last_grf_write[grf_index(inst->src[i]) + r], 0);
|
||||
} else if (inst->src[i].file == FIXED_GRF) {
|
||||
if (post_reg_alloc) {
|
||||
for (unsigned r = 0; r < regs_read(inst, i); r++)
|
||||
for (unsigned r = 0; r < regs_read(s->devinfo, inst, i); r++)
|
||||
add_dep(n, last_grf_write[inst->src[i].nr + r], 0);
|
||||
} else {
|
||||
add_dep(n, last_fixed_grf_write, 0);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue