aco: Add call info

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34531>
This commit is contained in:
Natalie Vock 2025-02-17 18:42:48 +01:00 committed by Marge Bot
parent af812862b7
commit 3667a7b687
4 changed files with 320 additions and 0 deletions

View file

@ -2324,6 +2324,8 @@ public:
bool is_callee = false;
bool has_call = false;
bool bypass_reg_preservation = false;
ABI callee_abi = {};
RegisterDemand callee_param_demand = RegisterDemand();
struct {
monotonic_buffer_resource memory;

View file

@ -18,6 +18,31 @@
namespace aco {
struct parameter_info {
bool discardable;
bool is_reg;
union {
Definition def;
unsigned scratch_offset;
};
};
struct call_info {
nir_call_instr* nir_instr;
Instruction* aco_instr;
std::vector<parameter_info> return_info;
unsigned scratch_param_size;
};
struct callee_info {
std::vector<parameter_info> param_infos;
parameter_info return_address;
parameter_info stack_ptr;
unsigned reg_param_count = 0;
unsigned reg_discardable_param_count = 0;
unsigned scratch_param_size = 0;
};
enum aco_color_output_type {
ACO_TYPE_ANY32,
ACO_TYPE_FLOAT16,
@ -135,6 +160,13 @@ struct isel_context {
uint32_t wqm_instruction_idx;
BITSET_DECLARE(output_args, AC_MAX_ARGS);
/* Function information */
ABI callee_abi;
struct callee_info callee_info;
std::vector<call_info> call_infos;
Temp next_divergent_pc;
Temp next_pc;
};
inline Temp
@ -257,6 +289,10 @@ void build_end_with_regs(isel_context* ctx, std::vector<Operand>& regs);
Instruction* add_startpgm(struct isel_context* ctx);
void finish_program(isel_context* ctx);
struct callee_info get_callee_info(amd_gfx_level gfx_level, const ABI& abi, unsigned param_count,
const nir_parameter* parameters, Program* program,
RegisterDemand reg_limit);
#define isel_err(...) _isel_err(ctx, __FILE__, __LINE__, __VA_ARGS__)
void _isel_err(isel_context* ctx, const char* file, unsigned line, const nir_instr* instr,

View file

@ -8,9 +8,12 @@
#include "aco_builder.h"
#include "aco_instruction_selection.h"
#include "aco_ir.h"
#include "aco_nir_call_attribs.h"
#include "util/memstream.h"
#include <optional>
namespace aco {
void
@ -820,4 +823,275 @@ finish_program(isel_context* ctx)
}
}
struct param_assignment_info {
uint16_t required_alignment;
uint16_t provided_alignment;
RegClass rc;
parameter_info* dst_info;
bool is_return_param;
/* If true, this parameter shouldn't count toward the callee info's reg_param_count because it
* receives special handling (e.g. the call return address being a definition instead of an
* operand).
*/
bool is_system_param;
/* This parameter must reside in a register. Used for stack pointers as well as s_swappc
* operands.
*/
bool force_reg;
};
std::optional<PhysReg>
find_reg(BITSET_WORD* regs, RegClass rc)
{
uint16_t start = 0;
uint16_t size = 128;
if (rc.type() == RegType::vgpr) {
start = 256;
size = 256;
}
uint16_t contiguous_size = 0;
for (uint16_t i = 0; i < size; ++i) {
if (!BITSET_TEST(regs, start + i)) {
contiguous_size = 0;
continue;
}
if (++contiguous_size >= rc.size())
return PhysReg{(unsigned)(start + i - contiguous_size + 1)};
}
return {};
}
void
find_param_regs(Program* program, const ABI& abi, callee_info& info,
std::vector<struct param_assignment_info>& params, RegisterDemand reg_limit)
{
unsigned scratch_param_bytes = 0;
RegisterDemand param_demand = RegisterDemand();
BITSET_DECLARE(preserved_regs, 512);
BITSET_DECLARE(clobbered_regs, 512);
abi.preservedRegisters(preserved_regs, reg_limit);
BITSET_COPY(clobbered_regs, preserved_regs);
BITSET_NOT(clobbered_regs);
bool has_preserved_regs = !BITSET_IS_EMPTY(preserved_regs);
std::stable_sort(params.begin(), params.end(),
[](const param_assignment_info& first, const param_assignment_info& second)
{
/* Assign parameters with larger alignments first so we can use parameters
* with smaller alignments as padding
*/
return first.provided_alignment > second.provided_alignment;
});
std::stable_sort(params.begin(), params.end(),
[](const param_assignment_info& first, const param_assignment_info& second)
{
/* Move parameters forced into registers to the very front so we assign
* them first.
*/
return first.force_reg && !second.force_reg;
});
for (size_t i = 1; i < params.size(); ++i) {
assert(!params[i].force_reg || params[i - 1].force_reg);
}
/* Reverse parameters and start from the end, to make erasing elements cheap */
std::reverse(params.begin(), params.end());
while (!params.empty()) {
RegClass rc = params.back().rc;
bool discardable = params.back().dst_info->discardable || params.back().is_return_param;
BITSET_WORD* regs;
if (has_preserved_regs && !discardable)
regs = preserved_regs;
else
regs = clobbered_regs;
auto next_reg = find_reg(regs, rc);
/* Force parameter into scratch if it exceeds the ABI's maximum parameter demand */
if (abi.max_param_demand != RegisterDemand() &&
(param_demand + Temp(0, rc)).exceeds(abi.max_param_demand))
next_reg = {};
if (next_reg && next_reg->reg() % params.back().required_alignment) {
/* We found a register, but it's not aligned properly. Check if we can add some padding
* (and ideally stuff a different parameter in there).
*/
uint16_t required_padding =
params.back().required_alignment - (next_reg->reg() % params.back().required_alignment);
uint16_t aligned_size = rc.size() + required_padding;
for (unsigned i = 0; i < aligned_size; ++i) {
/* The added padding exceeds the size of the register range. Just bail out at this
* point.
* TODO: we could probably try finding a new register, but then we'd need to reevaluate
* alignment etc...
*/
if (!BITSET_TEST(regs, next_reg->advance(i * 4).reg())) {
next_reg = {};
break;
}
}
/* Try finding a small parameter to put inside the padding space */
for (auto it2 = std::next(params.rbegin()); next_reg && it2 != params.rend(); ++it2) {
if (it2->rc.type() != params.back().rc.type() ||
it2->dst_info->discardable != discardable)
continue;
if (it2->rc.size() > required_padding || (it2->required_alignment % next_reg->reg()))
continue;
param_demand += Temp(0, it2->rc);
it2->dst_info->def.setPrecolored(*next_reg);
for (unsigned i = 0; i < it2->rc.size(); ++i)
BITSET_CLEAR(regs, next_reg->reg() + i);
if (!it2->is_system_param) {
++info.reg_param_count;
if (discardable)
++info.reg_discardable_param_count;
}
params.erase(std::prev(it2.base()));
break;
}
if (next_reg)
next_reg = next_reg->advance(required_padding * 4);
}
if (next_reg) {
param_demand += Temp(0, params.back().rc);
params.back().dst_info->def.setPrecolored(*next_reg);
BITSET_CLEAR_RANGE(regs, next_reg->reg(), next_reg->reg() + params.back().rc.size() - 1);
if (!params.back().is_system_param) {
++info.reg_param_count;
if (discardable)
++info.reg_discardable_param_count;
}
} else {
assert(!params.back().force_reg);
params.back().dst_info->is_reg = false;
params.back().dst_info->scratch_offset = scratch_param_bytes;
scratch_param_bytes += rc.size() * 4;
}
params.pop_back();
}
info.scratch_param_size = scratch_param_bytes;
if (program)
program->callee_param_demand = param_demand;
}
struct callee_info
get_callee_info(amd_gfx_level gfx_level, const ABI& abi, unsigned param_count,
const nir_parameter* parameters, Program* program, RegisterDemand reg_limit)
{
struct callee_info info = {};
info.param_infos.reserve(param_count);
std::vector<param_assignment_info> assignment_infos;
assignment_infos.reserve(param_count + 2);
Temp return_addr = program ? program->allocateTmp(s2) : Temp();
Definition return_def = Definition(return_addr);
info.return_address = {};
info.return_address.discardable = false;
info.return_address.is_reg = true;
info.return_address.def = return_def;
param_assignment_info return_def_info = {};
return_def_info.required_alignment = 2;
return_def_info.provided_alignment = 2;
return_def_info.rc = s2;
return_def_info.dst_info = &info.return_address;
return_def_info.is_return_param = false;
return_def_info.is_system_param = true;
return_def_info.force_reg = true;
assignment_infos.push_back(return_def_info);
if (gfx_level >= GFX9) {
Temp stack_ptr = program ? program->allocateTmp(s1) : Temp();
Definition stack_def = Definition(stack_ptr);
info.stack_ptr = {};
info.stack_ptr.discardable = false;
info.stack_ptr.is_reg = true;
info.stack_ptr.def = stack_def;
param_assignment_info stack_ptr_info = {};
stack_ptr_info.required_alignment = 1;
stack_ptr_info.provided_alignment = 1;
stack_ptr_info.rc = s1;
stack_ptr_info.dst_info = &info.stack_ptr;
stack_ptr_info.is_return_param = false;
stack_ptr_info.is_system_param = true;
stack_ptr_info.force_reg = true;
assignment_infos.push_back(stack_ptr_info);
} else {
Temp scratch_rsrc = program ? program->allocateTmp(s4) : Temp();
Definition rsrc_def = Definition(scratch_rsrc);
info.stack_ptr = {};
info.stack_ptr.discardable = false;
info.stack_ptr.is_reg = true;
info.stack_ptr.def = rsrc_def;
param_assignment_info rsrc_info = {};
rsrc_info.required_alignment = 4;
rsrc_info.provided_alignment = 4;
rsrc_info.rc = s4;
rsrc_info.dst_info = &info.stack_ptr;
rsrc_info.is_return_param = false;
rsrc_info.is_system_param = true;
rsrc_info.force_reg = true;
assignment_infos.push_back(rsrc_info);
}
size_t info_base = assignment_infos.size();
for (unsigned i = 0; i < param_count; ++i) {
RegType type = parameters[i].is_uniform ? RegType::sgpr : RegType::vgpr;
unsigned byte_size = align(parameters[i].bit_size, 32) / 8 * parameters[i].num_components;
RegClass rc = RegClass(type, byte_size / 4);
Temp dst = program ? program->allocateTmp(rc) : Temp();
Definition def = Definition(dst);
parameter_info param_info = {};
param_info.discardable =
!!(parameters[i].driver_attributes & ACO_NIR_PARAM_ATTRIB_DISCARDABLE);
param_info.is_reg = true;
param_info.def = def;
info.param_infos.push_back(param_info);
uint16_t required_alignment = 1;
uint16_t provided_alignment = 1;
if (rc.type() == RegType::sgpr) {
if (rc.size() > 2)
required_alignment = 4;
else if (rc.size() > 1)
required_alignment = 2;
}
if (rc.size() % 4 == 0)
provided_alignment = 4;
else if (rc.size() % 2 == 0)
provided_alignment = 2;
param_assignment_info assignment_info = {};
assignment_info.required_alignment = required_alignment;
assignment_info.provided_alignment = provided_alignment;
assignment_info.rc = rc;
assignment_info.is_return_param = parameters[i].is_return;
/* Force the first two parameters (callee addresses) into registers - they're assumed to be
* accessible through a temp.
*/
assignment_info.force_reg = i <= 1;
assignment_infos.push_back(assignment_info);
}
for (unsigned i = 0; i < param_count; ++i)
assignment_infos[info_base + i].dst_info = &info.param_infos[i];
find_param_regs(program, abi, info, assignment_infos, reg_limit);
return info;
}
} // namespace aco

View file

@ -414,6 +414,8 @@ init_context(isel_context* ctx, nir_shader* shader)
ctx->program->allocateRange(impl->ssa_alloc);
RegClass* regclasses = ctx->program->temp_rc.data() + ctx->first_temp_id;
unsigned call_count = 0;
/* TODO: make this recursive to improve compile times */
bool done = false;
while (!done) {
@ -702,12 +704,18 @@ init_context(isel_context* ctx, nir_shader* shader)
regclasses[phi->def.index] = rc;
break;
}
case nir_instr_type_call: {
++call_count;
break;
}
default: break;
}
}
}
}
ctx->call_infos.reserve(call_count);
ctx->program->config->spi_ps_input_ena = ctx->program->info.ps.spi_ps_input_ena;
ctx->program->config->spi_ps_input_addr = ctx->program->info.ps.spi_ps_input_addr;