mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 17:50:11 +01:00
intel/brw: Pass fs_visitor around instead of the simple_allocator
In preparation for getting rid of the simple_allocator. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33334>
This commit is contained in:
parent
75b77382b8
commit
5c717e68ce
6 changed files with 46 additions and 57 deletions
|
|
@ -4,6 +4,7 @@
|
|||
*/
|
||||
|
||||
#include "brw_eu.h"
|
||||
#include "brw_fs.h"
|
||||
#include "brw_cfg.h"
|
||||
#include "brw_compiler.h"
|
||||
#include "brw_inst.h"
|
||||
|
|
@ -1253,11 +1254,10 @@ is_multi_copy_payload(const struct intel_device_info *devinfo,
|
|||
* instruction.
|
||||
*/
|
||||
bool
|
||||
is_coalescing_payload(const struct intel_device_info *devinfo,
|
||||
const brw::simple_allocator &alloc, const brw_inst *inst)
|
||||
is_coalescing_payload(const fs_visitor &s, const brw_inst *inst)
|
||||
{
|
||||
return is_identity_payload(devinfo, VGRF, inst) &&
|
||||
return is_identity_payload(s.devinfo, VGRF, inst) &&
|
||||
inst->src[0].offset == 0 &&
|
||||
alloc.sizes[inst->src[0].nr] * REG_SIZE == inst->size_written;
|
||||
s.alloc.sizes[inst->src[0].nr] * REG_SIZE == inst->size_written;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@
|
|||
#pragma once
|
||||
|
||||
#include <assert.h>
|
||||
#include "brw_ir_allocator.h"
|
||||
#include "brw_reg.h"
|
||||
#include "compiler/glsl/list.h"
|
||||
|
||||
|
|
@ -378,8 +377,7 @@ bool is_identity_payload(const struct intel_device_info *devinfo,
|
|||
bool is_multi_copy_payload(const struct intel_device_info *devinfo,
|
||||
const brw_inst *inst);
|
||||
|
||||
bool is_coalescing_payload(const struct intel_device_info *devinfo,
|
||||
const brw::simple_allocator &alloc, const brw_inst *inst);
|
||||
bool is_coalescing_payload(const struct fs_visitor &s, const brw_inst *inst);
|
||||
|
||||
bool has_bank_conflict(const struct brw_isa_info *isa, const brw_inst *inst);
|
||||
|
||||
|
|
|
|||
|
|
@ -1183,16 +1183,15 @@ struct register_allocation {
|
|||
};
|
||||
|
||||
static brw_reg
|
||||
allocate_slots(const intel_device_info *devinfo,
|
||||
allocate_slots(fs_visitor &s,
|
||||
struct register_allocation *regs, unsigned num_regs,
|
||||
unsigned bytes, unsigned align_bytes,
|
||||
brw::simple_allocator &alloc)
|
||||
unsigned bytes, unsigned align_bytes)
|
||||
{
|
||||
assert(bytes == 2 || bytes == 4 || bytes == 8);
|
||||
assert(align_bytes == 2 || align_bytes == 4 || align_bytes == 8);
|
||||
|
||||
const unsigned slots_per_reg =
|
||||
REG_SIZE * reg_unit(devinfo) / sizeof(uint16_t);
|
||||
REG_SIZE * reg_unit(s.devinfo) / sizeof(uint16_t);
|
||||
|
||||
const unsigned words = bytes / 2;
|
||||
const unsigned align_words = align_bytes / 2;
|
||||
|
|
@ -1204,7 +1203,7 @@ allocate_slots(const intel_device_info *devinfo,
|
|||
|
||||
if ((x & mask) == mask) {
|
||||
if (regs[i].nr == UINT_MAX)
|
||||
regs[i].nr = alloc.allocate(reg_unit(devinfo));
|
||||
regs[i].nr = s.alloc.allocate(reg_unit(s.devinfo));
|
||||
|
||||
regs[i].avail &= ~(mask << j);
|
||||
|
||||
|
|
@ -1243,10 +1242,9 @@ deallocate_slots(const struct intel_device_info *devinfo,
|
|||
}
|
||||
|
||||
static void
|
||||
parcel_out_registers(const intel_device_info *devinfo,
|
||||
parcel_out_registers(fs_visitor &s,
|
||||
struct imm *imm, unsigned len, const bblock_t *cur_block,
|
||||
struct register_allocation *regs, unsigned num_regs,
|
||||
brw::simple_allocator &alloc)
|
||||
struct register_allocation *regs, unsigned num_regs)
|
||||
{
|
||||
/* Each basic block has two distinct set of constants. There is the set of
|
||||
* constants that only have uses in that block, and there is the set of
|
||||
|
|
@ -1267,10 +1265,9 @@ parcel_out_registers(const intel_device_info *devinfo,
|
|||
for (unsigned i = 0; i < len; i++) {
|
||||
if (imm[i].block == cur_block &&
|
||||
imm[i].used_in_single_block == used_in_single_block) {
|
||||
const brw_reg reg = allocate_slots(devinfo, regs, num_regs,
|
||||
const brw_reg reg = allocate_slots(s, regs, num_regs,
|
||||
imm[i].size,
|
||||
get_alignment_for_imm(&imm[i]),
|
||||
alloc);
|
||||
get_alignment_for_imm(&imm[i]));
|
||||
|
||||
imm[i].nr = reg.nr;
|
||||
imm[i].subreg_offset = reg.offset;
|
||||
|
|
@ -1280,7 +1277,7 @@ parcel_out_registers(const intel_device_info *devinfo,
|
|||
|
||||
for (unsigned i = 0; i < len; i++) {
|
||||
if (imm[i].block == cur_block && imm[i].used_in_single_block) {
|
||||
deallocate_slots(devinfo, regs, num_regs, imm[i].nr,
|
||||
deallocate_slots(s.devinfo, regs, num_regs, imm[i].nr,
|
||||
imm[i].subreg_offset, imm[i].size);
|
||||
}
|
||||
}
|
||||
|
|
@ -1529,8 +1526,7 @@ brw_opt_combine_constants(fs_visitor &s)
|
|||
}
|
||||
|
||||
foreach_block(block, s.cfg) {
|
||||
parcel_out_registers(devinfo, table.imm, table.len, block, regs,
|
||||
table.len, s.alloc);
|
||||
parcel_out_registers(s, table.imm, table.len, block, regs, table.len);
|
||||
}
|
||||
|
||||
free(regs);
|
||||
|
|
|
|||
|
|
@ -657,15 +657,14 @@ instruction_requires_packed_data(brw_inst *inst)
|
|||
}
|
||||
|
||||
static bool
|
||||
try_copy_propagate(const brw_compiler *compiler, brw_inst *inst,
|
||||
try_copy_propagate(fs_visitor &s, brw_inst *inst,
|
||||
acp_entry *entry, int arg,
|
||||
const brw::simple_allocator &alloc,
|
||||
uint8_t max_polygons)
|
||||
{
|
||||
if (inst->src[arg].file != VGRF)
|
||||
return false;
|
||||
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
const struct intel_device_info *devinfo = s.devinfo;
|
||||
|
||||
assert(entry->src.file == VGRF || entry->src.file == UNIFORM ||
|
||||
entry->src.file == ATTR || entry->src.file == FIXED_GRF);
|
||||
|
|
@ -685,7 +684,7 @@ try_copy_propagate(const brw_compiler *compiler, brw_inst *inst,
|
|||
* temporaries which should match is_coalescing_payload().
|
||||
*/
|
||||
if (entry->opcode == SHADER_OPCODE_LOAD_PAYLOAD &&
|
||||
(is_coalescing_payload(devinfo, alloc, inst) ||
|
||||
(is_coalescing_payload(s, inst) ||
|
||||
is_multi_copy_payload(devinfo, inst)))
|
||||
return false;
|
||||
|
||||
|
|
@ -718,9 +717,9 @@ try_copy_propagate(const brw_compiler *compiler, brw_inst *inst,
|
|||
entry->src.file == VGRF) {
|
||||
int other_src = arg == 2 ? 3 : 2;
|
||||
unsigned other_size = inst->src[other_src].file == VGRF ?
|
||||
alloc.sizes[inst->src[other_src].nr] :
|
||||
s.alloc.sizes[inst->src[other_src].nr] :
|
||||
inst->size_read(devinfo, other_src);
|
||||
unsigned prop_src_size = alloc.sizes[entry->src.nr];
|
||||
unsigned prop_src_size = s.alloc.sizes[entry->src.nr];
|
||||
if (other_size + prop_src_size > 15)
|
||||
return false;
|
||||
}
|
||||
|
|
@ -765,7 +764,7 @@ try_copy_propagate(const brw_compiler *compiler, brw_inst *inst,
|
|||
*/
|
||||
if (!can_take_stride(inst, dst_type, arg,
|
||||
entry_stride * inst->src[arg].stride,
|
||||
compiler))
|
||||
s.compiler))
|
||||
return false;
|
||||
|
||||
/* From the Cherry Trail/Braswell PRMs, Volume 7: 3D Media GPGPU:
|
||||
|
|
@ -806,7 +805,7 @@ try_copy_propagate(const brw_compiler *compiler, brw_inst *inst,
|
|||
if (entry->src.file == ATTR && max_polygons > 1 &&
|
||||
(has_dst_aligned_region_restriction(devinfo, inst, dst_type) ||
|
||||
instruction_requires_packed_data(inst) ||
|
||||
(inst->is_3src(compiler) && arg == 2) ||
|
||||
(inst->is_3src(s.compiler) && arg == 2) ||
|
||||
entry->dst.type != inst->src[arg].type))
|
||||
return false;
|
||||
|
||||
|
|
@ -1307,12 +1306,11 @@ commute_immediates(brw_inst *inst)
|
|||
* list.
|
||||
*/
|
||||
static bool
|
||||
opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx,
|
||||
opt_copy_propagation_local(fs_visitor &s, linear_ctx *lin_ctx,
|
||||
bblock_t *block, struct acp &acp,
|
||||
const brw::simple_allocator &alloc,
|
||||
uint8_t max_polygons)
|
||||
{
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
const struct intel_device_info *devinfo = s.devinfo;
|
||||
bool progress = false;
|
||||
|
||||
foreach_inst_in_block(brw_inst, inst, block) {
|
||||
|
|
@ -1331,8 +1329,7 @@ opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx,
|
|||
break;
|
||||
}
|
||||
} else {
|
||||
if (try_copy_propagate(compiler, inst, *iter, i, alloc,
|
||||
max_polygons)) {
|
||||
if (try_copy_propagate(s, inst, *iter, i, max_polygons)) {
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -1342,7 +1339,7 @@ opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx,
|
|||
|
||||
if (constant_progress) {
|
||||
commute_immediates(inst);
|
||||
brw_opt_constant_fold_instruction(compiler->devinfo, inst);
|
||||
brw_opt_constant_fold_instruction(devinfo, inst);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
|
|
@ -1428,8 +1425,8 @@ brw_opt_copy_propagation(fs_visitor &s)
|
|||
* the set of copies available at the end of the block.
|
||||
*/
|
||||
foreach_block (block, s.cfg) {
|
||||
progress = opt_copy_propagation_local(s.compiler, lin_ctx, block,
|
||||
out_acp[block->num], s.alloc,
|
||||
progress = opt_copy_propagation_local(s, lin_ctx, block,
|
||||
out_acp[block->num],
|
||||
s.max_polygons) || progress;
|
||||
|
||||
/* If the destination of an ACP entry exists only within this block,
|
||||
|
|
@ -1469,8 +1466,8 @@ brw_opt_copy_propagation(fs_visitor &s)
|
|||
}
|
||||
}
|
||||
|
||||
progress = opt_copy_propagation_local(s.compiler, lin_ctx, block,
|
||||
in_acp, s.alloc, s.max_polygons) ||
|
||||
progress = opt_copy_propagation_local(s, lin_ctx, block,
|
||||
in_acp, s.max_polygons) ||
|
||||
progress;
|
||||
}
|
||||
|
||||
|
|
@ -1484,13 +1481,12 @@ brw_opt_copy_propagation(fs_visitor &s)
|
|||
}
|
||||
|
||||
static bool
|
||||
try_copy_propagate_def(const brw_compiler *compiler,
|
||||
const brw::simple_allocator &alloc,
|
||||
try_copy_propagate_def(fs_visitor &s,
|
||||
brw_inst *def, const brw_reg &val,
|
||||
brw_inst *inst, int arg,
|
||||
uint8_t max_polygons)
|
||||
{
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
const struct intel_device_info *devinfo = s.devinfo;
|
||||
|
||||
assert(val.file != BAD_FILE);
|
||||
|
||||
|
|
@ -1546,9 +1542,9 @@ try_copy_propagate_def(const brw_compiler *compiler,
|
|||
val.file == VGRF) {
|
||||
int other_src = arg == 2 ? 3 : 2;
|
||||
unsigned other_size = inst->src[other_src].file == VGRF ?
|
||||
alloc.sizes[inst->src[other_src].nr] :
|
||||
s.alloc.sizes[inst->src[other_src].nr] :
|
||||
inst->size_read(devinfo, other_src);
|
||||
unsigned prop_src_size = alloc.sizes[val.nr];
|
||||
unsigned prop_src_size = s.alloc.sizes[val.nr];
|
||||
if (other_size + prop_src_size > 15)
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1582,7 +1578,7 @@ try_copy_propagate_def(const brw_compiler *compiler,
|
|||
*/
|
||||
if (!can_take_stride(inst, dst_type, arg,
|
||||
entry_stride * inst->src[arg].stride,
|
||||
compiler))
|
||||
s.compiler))
|
||||
return false;
|
||||
|
||||
/* Bail if the source FIXED_GRF region of the copy cannot be trivially
|
||||
|
|
@ -1644,7 +1640,7 @@ try_copy_propagate_def(const brw_compiler *compiler,
|
|||
if (max_polygons > 1 && val.file == ATTR &&
|
||||
(has_dst_aligned_region_restriction(devinfo, inst, dst_type) ||
|
||||
instruction_requires_packed_data(inst) ||
|
||||
(inst->is_3src(compiler) && arg == 2) ||
|
||||
(inst->is_3src(s.compiler) && arg == 2) ||
|
||||
def->dst.type != inst->src[arg].type))
|
||||
return false;
|
||||
|
||||
|
|
@ -1831,7 +1827,7 @@ brw_opt_copy_propagation_defs(fs_visitor &s)
|
|||
def->src[0].file != BAD_FILE && def->src[0].file != IMM &&
|
||||
is_identity_payload(s.devinfo, def->src[0].file, def)) {
|
||||
source_progress =
|
||||
try_copy_propagate_def(s.compiler, s.alloc, def, def->src[0],
|
||||
try_copy_propagate_def(s, def, def->src[0],
|
||||
inst, i, s.max_polygons);
|
||||
|
||||
if (source_progress) {
|
||||
|
|
@ -1857,8 +1853,7 @@ brw_opt_copy_propagation_defs(fs_visitor &s)
|
|||
val.file == ATTR || val.file == UNIFORM ||
|
||||
(val.file == FIXED_GRF && val.is_contiguous())) {
|
||||
source_progress =
|
||||
try_copy_propagate_def(s.compiler, s.alloc, def, val, inst, i,
|
||||
s.max_polygons);
|
||||
try_copy_propagate_def(s, def, val, inst, i, s.max_polygons);
|
||||
}
|
||||
|
||||
if (source_progress) {
|
||||
|
|
|
|||
|
|
@ -131,7 +131,7 @@ is_expression(const fs_visitor *v, const brw_inst *const inst)
|
|||
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
|
||||
return inst->src[MEMORY_LOGICAL_MODE].ud == MEMORY_MODE_CONSTANT;
|
||||
case SHADER_OPCODE_LOAD_PAYLOAD:
|
||||
return !is_coalescing_payload(v->devinfo, v->alloc, inst);
|
||||
return !is_coalescing_payload(*v, inst);
|
||||
default:
|
||||
return inst->is_send_from_grf() && !inst->has_side_effects() &&
|
||||
!inst->is_volatile();
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ is_coalesce_candidate(const fs_visitor *v, const brw_inst *inst)
|
|||
return false;
|
||||
|
||||
if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
|
||||
if (!is_coalescing_payload(v->devinfo, v->alloc, inst)) {
|
||||
if (!is_coalescing_payload(*v, inst)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -192,11 +192,11 @@ can_coalesce_vars(const intel_device_info *devinfo,
|
|||
* SEND instruction's payload to more than would fit in g112-g127.
|
||||
*/
|
||||
static bool
|
||||
would_violate_eot_restriction(const brw::simple_allocator &alloc,
|
||||
would_violate_eot_restriction(fs_visitor &s,
|
||||
const cfg_t *cfg,
|
||||
unsigned dst_reg, unsigned src_reg)
|
||||
{
|
||||
if (alloc.sizes[dst_reg] > alloc.sizes[src_reg]) {
|
||||
if (s.alloc.sizes[dst_reg] > s.alloc.sizes[src_reg]) {
|
||||
foreach_inst_in_block_reverse(brw_inst, send, cfg->last_block()) {
|
||||
if (send->opcode != SHADER_OPCODE_SEND || !send->eot)
|
||||
continue;
|
||||
|
|
@ -205,13 +205,13 @@ would_violate_eot_restriction(const brw::simple_allocator &alloc,
|
|||
(send->sources >= 4 &&
|
||||
send->src[3].file == VGRF && send->src[3].nr == src_reg)) {
|
||||
const unsigned s2 =
|
||||
send->src[2].file == VGRF ? alloc.sizes[send->src[2].nr] : 0;
|
||||
send->src[2].file == VGRF ? s.alloc.sizes[send->src[2].nr] : 0;
|
||||
const unsigned s3 = send->sources >= 4 &&
|
||||
send->src[3].file == VGRF ?
|
||||
alloc.sizes[send->src[3].nr] : 0;
|
||||
s.alloc.sizes[send->src[3].nr] : 0;
|
||||
|
||||
const unsigned increase =
|
||||
alloc.sizes[dst_reg] - alloc.sizes[src_reg];
|
||||
s.alloc.sizes[dst_reg] - s.alloc.sizes[src_reg];
|
||||
|
||||
if (s2 + s3 + increase > 15)
|
||||
return true;
|
||||
|
|
@ -303,7 +303,7 @@ brw_opt_register_coalesce(fs_visitor &s)
|
|||
src_var[i] = live.var_from_vgrf[src_reg] + i;
|
||||
|
||||
if (!can_coalesce_vars(devinfo, live, s.cfg, block, inst, dst_var[i], src_var[i]) ||
|
||||
would_violate_eot_restriction(s.alloc, s.cfg, dst_reg, src_reg)) {
|
||||
would_violate_eot_restriction(s, s.cfg, dst_reg, src_reg)) {
|
||||
can_coalesce = false;
|
||||
src_reg = ~0u;
|
||||
break;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue