mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-06 04:30:10 +01:00
aco: Add call info
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34531>
This commit is contained in:
parent
af812862b7
commit
3667a7b687
4 changed files with 320 additions and 0 deletions
|
|
@ -2324,6 +2324,8 @@ public:
|
|||
bool is_callee = false;
|
||||
bool has_call = false;
|
||||
bool bypass_reg_preservation = false;
|
||||
ABI callee_abi = {};
|
||||
RegisterDemand callee_param_demand = RegisterDemand();
|
||||
|
||||
struct {
|
||||
monotonic_buffer_resource memory;
|
||||
|
|
|
|||
|
|
@ -18,6 +18,31 @@
|
|||
|
||||
namespace aco {
|
||||
|
||||
struct parameter_info {
|
||||
bool discardable;
|
||||
bool is_reg;
|
||||
union {
|
||||
Definition def;
|
||||
unsigned scratch_offset;
|
||||
};
|
||||
};
|
||||
|
||||
struct call_info {
|
||||
nir_call_instr* nir_instr;
|
||||
Instruction* aco_instr;
|
||||
std::vector<parameter_info> return_info;
|
||||
unsigned scratch_param_size;
|
||||
};
|
||||
|
||||
struct callee_info {
|
||||
std::vector<parameter_info> param_infos;
|
||||
parameter_info return_address;
|
||||
parameter_info stack_ptr;
|
||||
unsigned reg_param_count = 0;
|
||||
unsigned reg_discardable_param_count = 0;
|
||||
unsigned scratch_param_size = 0;
|
||||
};
|
||||
|
||||
enum aco_color_output_type {
|
||||
ACO_TYPE_ANY32,
|
||||
ACO_TYPE_FLOAT16,
|
||||
|
|
@ -135,6 +160,13 @@ struct isel_context {
|
|||
uint32_t wqm_instruction_idx;
|
||||
|
||||
BITSET_DECLARE(output_args, AC_MAX_ARGS);
|
||||
|
||||
/* Function information */
|
||||
ABI callee_abi;
|
||||
struct callee_info callee_info;
|
||||
std::vector<call_info> call_infos;
|
||||
Temp next_divergent_pc;
|
||||
Temp next_pc;
|
||||
};
|
||||
|
||||
inline Temp
|
||||
|
|
@ -257,6 +289,10 @@ void build_end_with_regs(isel_context* ctx, std::vector<Operand>& regs);
|
|||
Instruction* add_startpgm(struct isel_context* ctx);
|
||||
void finish_program(isel_context* ctx);
|
||||
|
||||
struct callee_info get_callee_info(amd_gfx_level gfx_level, const ABI& abi, unsigned param_count,
|
||||
const nir_parameter* parameters, Program* program,
|
||||
RegisterDemand reg_limit);
|
||||
|
||||
#define isel_err(...) _isel_err(ctx, __FILE__, __LINE__, __VA_ARGS__)
|
||||
|
||||
void _isel_err(isel_context* ctx, const char* file, unsigned line, const nir_instr* instr,
|
||||
|
|
|
|||
|
|
@ -8,9 +8,12 @@
|
|||
#include "aco_builder.h"
|
||||
#include "aco_instruction_selection.h"
|
||||
#include "aco_ir.h"
|
||||
#include "aco_nir_call_attribs.h"
|
||||
|
||||
#include "util/memstream.h"
|
||||
|
||||
#include <optional>
|
||||
|
||||
namespace aco {
|
||||
|
||||
void
|
||||
|
|
@ -820,4 +823,275 @@ finish_program(isel_context* ctx)
|
|||
}
|
||||
}
|
||||
|
||||
struct param_assignment_info {
|
||||
uint16_t required_alignment;
|
||||
uint16_t provided_alignment;
|
||||
RegClass rc;
|
||||
parameter_info* dst_info;
|
||||
bool is_return_param;
|
||||
/* If true, this parameter shouldn't count toward the callee info's reg_param_count because it
|
||||
* receives special handling (e.g. the call return address being a definition instead of an
|
||||
* operand).
|
||||
*/
|
||||
bool is_system_param;
|
||||
/* This parameter must reside in a register. Used for stack pointers as well as s_swappc
|
||||
* operands.
|
||||
*/
|
||||
bool force_reg;
|
||||
};
|
||||
|
||||
std::optional<PhysReg>
|
||||
find_reg(BITSET_WORD* regs, RegClass rc)
|
||||
{
|
||||
uint16_t start = 0;
|
||||
uint16_t size = 128;
|
||||
if (rc.type() == RegType::vgpr) {
|
||||
start = 256;
|
||||
size = 256;
|
||||
}
|
||||
|
||||
uint16_t contiguous_size = 0;
|
||||
for (uint16_t i = 0; i < size; ++i) {
|
||||
if (!BITSET_TEST(regs, start + i)) {
|
||||
contiguous_size = 0;
|
||||
continue;
|
||||
}
|
||||
if (++contiguous_size >= rc.size())
|
||||
return PhysReg{(unsigned)(start + i - contiguous_size + 1)};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
void
|
||||
find_param_regs(Program* program, const ABI& abi, callee_info& info,
|
||||
std::vector<struct param_assignment_info>& params, RegisterDemand reg_limit)
|
||||
{
|
||||
unsigned scratch_param_bytes = 0;
|
||||
RegisterDemand param_demand = RegisterDemand();
|
||||
|
||||
BITSET_DECLARE(preserved_regs, 512);
|
||||
BITSET_DECLARE(clobbered_regs, 512);
|
||||
abi.preservedRegisters(preserved_regs, reg_limit);
|
||||
BITSET_COPY(clobbered_regs, preserved_regs);
|
||||
BITSET_NOT(clobbered_regs);
|
||||
bool has_preserved_regs = !BITSET_IS_EMPTY(preserved_regs);
|
||||
|
||||
std::stable_sort(params.begin(), params.end(),
|
||||
[](const param_assignment_info& first, const param_assignment_info& second)
|
||||
{
|
||||
/* Assign parameters with larger alignments first so we can use parameters
|
||||
* with smaller alignments as padding
|
||||
*/
|
||||
return first.provided_alignment > second.provided_alignment;
|
||||
});
|
||||
std::stable_sort(params.begin(), params.end(),
|
||||
[](const param_assignment_info& first, const param_assignment_info& second)
|
||||
{
|
||||
/* Move parameters forced into registers to the very front so we assign
|
||||
* them first.
|
||||
*/
|
||||
return first.force_reg && !second.force_reg;
|
||||
});
|
||||
for (size_t i = 1; i < params.size(); ++i) {
|
||||
assert(!params[i].force_reg || params[i - 1].force_reg);
|
||||
}
|
||||
/* Reverse parameters and start from the end, to make erasing elements cheap */
|
||||
std::reverse(params.begin(), params.end());
|
||||
|
||||
while (!params.empty()) {
|
||||
RegClass rc = params.back().rc;
|
||||
bool discardable = params.back().dst_info->discardable || params.back().is_return_param;
|
||||
|
||||
BITSET_WORD* regs;
|
||||
if (has_preserved_regs && !discardable)
|
||||
regs = preserved_regs;
|
||||
else
|
||||
regs = clobbered_regs;
|
||||
|
||||
auto next_reg = find_reg(regs, rc);
|
||||
/* Force parameter into scratch if it exceeds the ABI's maximum parameter demand */
|
||||
if (abi.max_param_demand != RegisterDemand() &&
|
||||
(param_demand + Temp(0, rc)).exceeds(abi.max_param_demand))
|
||||
next_reg = {};
|
||||
|
||||
if (next_reg && next_reg->reg() % params.back().required_alignment) {
|
||||
/* We found a register, but it's not aligned properly. Check if we can add some padding
|
||||
* (and ideally stuff a different parameter in there).
|
||||
*/
|
||||
uint16_t required_padding =
|
||||
params.back().required_alignment - (next_reg->reg() % params.back().required_alignment);
|
||||
uint16_t aligned_size = rc.size() + required_padding;
|
||||
for (unsigned i = 0; i < aligned_size; ++i) {
|
||||
/* The added padding exceeds the size of the register range. Just bail out at this
|
||||
* point.
|
||||
* TODO: we could probably try finding a new register, but then we'd need to reevaluate
|
||||
* alignment etc...
|
||||
*/
|
||||
if (!BITSET_TEST(regs, next_reg->advance(i * 4).reg())) {
|
||||
next_reg = {};
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Try finding a small parameter to put inside the padding space */
|
||||
for (auto it2 = std::next(params.rbegin()); next_reg && it2 != params.rend(); ++it2) {
|
||||
if (it2->rc.type() != params.back().rc.type() ||
|
||||
it2->dst_info->discardable != discardable)
|
||||
continue;
|
||||
if (it2->rc.size() > required_padding || (it2->required_alignment % next_reg->reg()))
|
||||
continue;
|
||||
|
||||
param_demand += Temp(0, it2->rc);
|
||||
|
||||
it2->dst_info->def.setPrecolored(*next_reg);
|
||||
for (unsigned i = 0; i < it2->rc.size(); ++i)
|
||||
BITSET_CLEAR(regs, next_reg->reg() + i);
|
||||
if (!it2->is_system_param) {
|
||||
++info.reg_param_count;
|
||||
if (discardable)
|
||||
++info.reg_discardable_param_count;
|
||||
}
|
||||
params.erase(std::prev(it2.base()));
|
||||
break;
|
||||
}
|
||||
if (next_reg)
|
||||
next_reg = next_reg->advance(required_padding * 4);
|
||||
}
|
||||
if (next_reg) {
|
||||
param_demand += Temp(0, params.back().rc);
|
||||
params.back().dst_info->def.setPrecolored(*next_reg);
|
||||
BITSET_CLEAR_RANGE(regs, next_reg->reg(), next_reg->reg() + params.back().rc.size() - 1);
|
||||
if (!params.back().is_system_param) {
|
||||
++info.reg_param_count;
|
||||
if (discardable)
|
||||
++info.reg_discardable_param_count;
|
||||
}
|
||||
} else {
|
||||
assert(!params.back().force_reg);
|
||||
params.back().dst_info->is_reg = false;
|
||||
params.back().dst_info->scratch_offset = scratch_param_bytes;
|
||||
scratch_param_bytes += rc.size() * 4;
|
||||
}
|
||||
params.pop_back();
|
||||
}
|
||||
|
||||
info.scratch_param_size = scratch_param_bytes;
|
||||
if (program)
|
||||
program->callee_param_demand = param_demand;
|
||||
}
|
||||
|
||||
struct callee_info
|
||||
get_callee_info(amd_gfx_level gfx_level, const ABI& abi, unsigned param_count,
|
||||
const nir_parameter* parameters, Program* program, RegisterDemand reg_limit)
|
||||
{
|
||||
struct callee_info info = {};
|
||||
info.param_infos.reserve(param_count);
|
||||
|
||||
std::vector<param_assignment_info> assignment_infos;
|
||||
assignment_infos.reserve(param_count + 2);
|
||||
|
||||
Temp return_addr = program ? program->allocateTmp(s2) : Temp();
|
||||
Definition return_def = Definition(return_addr);
|
||||
info.return_address = {};
|
||||
info.return_address.discardable = false;
|
||||
info.return_address.is_reg = true;
|
||||
info.return_address.def = return_def;
|
||||
|
||||
param_assignment_info return_def_info = {};
|
||||
return_def_info.required_alignment = 2;
|
||||
return_def_info.provided_alignment = 2;
|
||||
return_def_info.rc = s2;
|
||||
return_def_info.dst_info = &info.return_address;
|
||||
return_def_info.is_return_param = false;
|
||||
return_def_info.is_system_param = true;
|
||||
return_def_info.force_reg = true;
|
||||
assignment_infos.push_back(return_def_info);
|
||||
|
||||
if (gfx_level >= GFX9) {
|
||||
Temp stack_ptr = program ? program->allocateTmp(s1) : Temp();
|
||||
Definition stack_def = Definition(stack_ptr);
|
||||
info.stack_ptr = {};
|
||||
info.stack_ptr.discardable = false;
|
||||
info.stack_ptr.is_reg = true;
|
||||
info.stack_ptr.def = stack_def;
|
||||
|
||||
param_assignment_info stack_ptr_info = {};
|
||||
stack_ptr_info.required_alignment = 1;
|
||||
stack_ptr_info.provided_alignment = 1;
|
||||
stack_ptr_info.rc = s1;
|
||||
stack_ptr_info.dst_info = &info.stack_ptr;
|
||||
stack_ptr_info.is_return_param = false;
|
||||
stack_ptr_info.is_system_param = true;
|
||||
stack_ptr_info.force_reg = true;
|
||||
assignment_infos.push_back(stack_ptr_info);
|
||||
} else {
|
||||
Temp scratch_rsrc = program ? program->allocateTmp(s4) : Temp();
|
||||
Definition rsrc_def = Definition(scratch_rsrc);
|
||||
info.stack_ptr = {};
|
||||
info.stack_ptr.discardable = false;
|
||||
info.stack_ptr.is_reg = true;
|
||||
info.stack_ptr.def = rsrc_def;
|
||||
|
||||
param_assignment_info rsrc_info = {};
|
||||
rsrc_info.required_alignment = 4;
|
||||
rsrc_info.provided_alignment = 4;
|
||||
rsrc_info.rc = s4;
|
||||
rsrc_info.dst_info = &info.stack_ptr;
|
||||
rsrc_info.is_return_param = false;
|
||||
rsrc_info.is_system_param = true;
|
||||
rsrc_info.force_reg = true;
|
||||
assignment_infos.push_back(rsrc_info);
|
||||
}
|
||||
|
||||
size_t info_base = assignment_infos.size();
|
||||
|
||||
for (unsigned i = 0; i < param_count; ++i) {
|
||||
RegType type = parameters[i].is_uniform ? RegType::sgpr : RegType::vgpr;
|
||||
unsigned byte_size = align(parameters[i].bit_size, 32) / 8 * parameters[i].num_components;
|
||||
RegClass rc = RegClass(type, byte_size / 4);
|
||||
|
||||
Temp dst = program ? program->allocateTmp(rc) : Temp();
|
||||
Definition def = Definition(dst);
|
||||
|
||||
parameter_info param_info = {};
|
||||
param_info.discardable =
|
||||
!!(parameters[i].driver_attributes & ACO_NIR_PARAM_ATTRIB_DISCARDABLE);
|
||||
param_info.is_reg = true;
|
||||
param_info.def = def;
|
||||
info.param_infos.push_back(param_info);
|
||||
|
||||
uint16_t required_alignment = 1;
|
||||
uint16_t provided_alignment = 1;
|
||||
|
||||
if (rc.type() == RegType::sgpr) {
|
||||
if (rc.size() > 2)
|
||||
required_alignment = 4;
|
||||
else if (rc.size() > 1)
|
||||
required_alignment = 2;
|
||||
}
|
||||
if (rc.size() % 4 == 0)
|
||||
provided_alignment = 4;
|
||||
else if (rc.size() % 2 == 0)
|
||||
provided_alignment = 2;
|
||||
|
||||
param_assignment_info assignment_info = {};
|
||||
assignment_info.required_alignment = required_alignment;
|
||||
assignment_info.provided_alignment = provided_alignment;
|
||||
assignment_info.rc = rc;
|
||||
assignment_info.is_return_param = parameters[i].is_return;
|
||||
/* Force the first two parameters (callee addresses) into registers - they're assumed to be
|
||||
* accessible through a temp.
|
||||
*/
|
||||
assignment_info.force_reg = i <= 1;
|
||||
assignment_infos.push_back(assignment_info);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < param_count; ++i)
|
||||
assignment_infos[info_base + i].dst_info = &info.param_infos[i];
|
||||
|
||||
find_param_regs(program, abi, info, assignment_infos, reg_limit);
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
} // namespace aco
|
||||
|
|
|
|||
|
|
@ -414,6 +414,8 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
ctx->program->allocateRange(impl->ssa_alloc);
|
||||
RegClass* regclasses = ctx->program->temp_rc.data() + ctx->first_temp_id;
|
||||
|
||||
unsigned call_count = 0;
|
||||
|
||||
/* TODO: make this recursive to improve compile times */
|
||||
bool done = false;
|
||||
while (!done) {
|
||||
|
|
@ -702,12 +704,18 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
regclasses[phi->def.index] = rc;
|
||||
break;
|
||||
}
|
||||
case nir_instr_type_call: {
|
||||
++call_count;
|
||||
break;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ctx->call_infos.reserve(call_count);
|
||||
|
||||
ctx->program->config->spi_ps_input_ena = ctx->program->info.ps.spi_ps_input_ena;
|
||||
ctx->program->config->spi_ps_input_addr = ctx->program->info.ps.spi_ps_input_addr;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue