aco: add more anonymous namespaces

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29740>
This commit is contained in:
Georg Lehmann 2024-06-15 16:17:29 +02:00 committed by Marge Bot
parent c9c483bf02
commit 046414e061
6 changed files with 186 additions and 177 deletions

View file

@ -15,6 +15,8 @@
namespace aco {
namespace {
enum class pred_defined : uint8_t {
undef = 0,
const_1 = 1,
@ -378,6 +380,8 @@ lower_subdword_phis(Program* program, Block* block, aco_ptr<Instruction>& phi)
return;
}
} /* end namespace */
void
lower_phis(Program* program)
{

View file

@ -14,6 +14,8 @@
namespace aco {
namespace {
struct lower_context {
Program* program;
Block* block;
@ -2088,37 +2090,6 @@ emit_set_mode_from_block(Builder& bld, Program& program, Block* block)
emit_set_mode(bld, block->fp_mode, set_round, set_denorm);
}
void
hw_init_scratch(Builder& bld, Definition def, Operand scratch_addr, Operand scratch_offset)
{
/* Since we know what the high 16 bits of scratch_hi is, we can set all the high 16
* bits in the same instruction that we add the carry.
*/
Operand hi_add = Operand::c32(0xffff0000 - S_008F04_SWIZZLE_ENABLE_GFX6(1));
Operand scratch_addr_lo(scratch_addr.physReg(), s1);
Operand scratch_addr_hi(scratch_addr_lo.physReg().advance(4), s1);
if (bld.program->gfx_level >= GFX10) {
PhysReg scratch_lo = def.physReg();
PhysReg scratch_hi = def.physReg().advance(4);
bld.sop2(aco_opcode::s_add_u32, Definition(scratch_lo, s1), Definition(scc, s1),
scratch_addr_lo, scratch_offset);
bld.sop2(aco_opcode::s_addc_u32, Definition(scratch_hi, s1), Definition(scc, s1),
scratch_addr_hi, hi_add, Operand(scc, s1));
/* "((size - 1) << 11) | register" (FLAT_SCRATCH_LO/HI is encoded as register
* 20/21) */
bld.sopk(aco_opcode::s_setreg_b32, Operand(scratch_lo, s1), (31 << 11) | 20);
bld.sopk(aco_opcode::s_setreg_b32, Operand(scratch_hi, s1), (31 << 11) | 21);
} else {
bld.sop2(aco_opcode::s_add_u32, Definition(flat_scr_lo, s1), Definition(scc, s1),
scratch_addr_lo, scratch_offset);
bld.sop2(aco_opcode::s_addc_u32, Definition(flat_scr_hi, s1), Definition(scc, s1),
scratch_addr_hi, hi_add, Operand(scc, s1));
}
}
void
lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
{
@ -2179,6 +2150,39 @@ lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
std::copy(vaddr, vaddr + num_vaddr, std::next(instr->operands.begin(), 3));
}
} /* end namespace */
void
hw_init_scratch(Builder& bld, Definition def, Operand scratch_addr, Operand scratch_offset)
{
/* Since we know what the high 16 bits of scratch_hi is, we can set all the high 16
* bits in the same instruction that we add the carry.
*/
Operand hi_add = Operand::c32(0xffff0000 - S_008F04_SWIZZLE_ENABLE_GFX6(1));
Operand scratch_addr_lo(scratch_addr.physReg(), s1);
Operand scratch_addr_hi(scratch_addr_lo.physReg().advance(4), s1);
if (bld.program->gfx_level >= GFX10) {
PhysReg scratch_lo = def.physReg();
PhysReg scratch_hi = def.physReg().advance(4);
bld.sop2(aco_opcode::s_add_u32, Definition(scratch_lo, s1), Definition(scc, s1),
scratch_addr_lo, scratch_offset);
bld.sop2(aco_opcode::s_addc_u32, Definition(scratch_hi, s1), Definition(scc, s1),
scratch_addr_hi, hi_add, Operand(scc, s1));
/* "((size - 1) << 11) | register" (FLAT_SCRATCH_LO/HI is encoded as register
* 20/21) */
bld.sopk(aco_opcode::s_setreg_b32, Operand(scratch_lo, s1), (31 << 11) | 20);
bld.sopk(aco_opcode::s_setreg_b32, Operand(scratch_hi, s1), (31 << 11) | 21);
} else {
bld.sop2(aco_opcode::s_add_u32, Definition(flat_scr_lo, s1), Definition(scc, s1),
scratch_addr_lo, scratch_offset);
bld.sop2(aco_opcode::s_addc_u32, Definition(flat_scr_hi, s1), Definition(scc, s1),
scratch_addr_hi, hi_add, Operand(scc, s1));
}
}
void
lower_to_hw_instr(Program* program)
{

View file

@ -16,6 +16,7 @@
namespace aco {
namespace {
/**
* The optimizer works in 4 phases:
* (1) The first pass collects information for each ssa-def,
@ -600,12 +601,6 @@ can_apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
instr->opcode != aco_opcode::v_wmma_i32_16x16x16_iu4;
}
bool
is_operand_vgpr(Operand op)
{
return op.isTemp() && op.getTemp().type() == RegType::vgpr;
}
/* only covers special cases */
bool
alu_can_accept_constant(const aco_ptr<Instruction>& instr, unsigned operand)
@ -5287,6 +5282,8 @@ apply_literals(opt_ctx& ctx, aco_ptr<Instruction>& instr)
ctx.instructions.emplace_back(std::move(instr));
}
} /* end namespace */
void
optimize(Program* program)
{

View file

@ -14,6 +14,8 @@
namespace aco {
namespace {
const std::array<const char*, num_reduce_ops> reduce_ops = []()
{
std::array<const char*, num_reduce_ops> ret{};
@ -141,39 +143,6 @@ print_constant(uint8_t reg, FILE* output)
}
}
void
aco_print_operand(const Operand* operand, FILE* output, unsigned flags)
{
if (operand->isLiteral() || (operand->isConstant() && operand->bytes() == 1)) {
if (operand->bytes() == 1)
fprintf(output, "0x%.2x", operand->constantValue());
else if (operand->bytes() == 2)
fprintf(output, "0x%.4x", operand->constantValue());
else
fprintf(output, "0x%x", operand->constantValue());
} else if (operand->isConstant()) {
print_constant(operand->physReg().reg(), output);
} else if (operand->isUndefined()) {
print_reg_class(operand->regClass(), output);
fprintf(output, "undef");
} else {
if (operand->isLateKill())
fprintf(output, "(latekill)");
if (operand->is16bit())
fprintf(output, "(is16bit)");
if (operand->is24bit())
fprintf(output, "(is24bit)");
if ((flags & print_kill) && operand->isKill())
fprintf(output, "(kill)");
if (!(flags & print_no_ssa))
fprintf(output, "%%%d%s", operand->tempId(), operand->isFixed() ? ":" : "");
if (operand->isFixed())
print_physReg(operand->physReg(), operand->bytes(), output, flags);
}
}
static void
print_definition(const Definition* definition, FILE* output, unsigned flags)
{
@ -842,95 +811,6 @@ print_vopd_instr(enum amd_gfx_level gfx_level, const Instruction* instr, FILE* o
}
}
void
aco_print_instr(enum amd_gfx_level gfx_level, const Instruction* instr, FILE* output,
unsigned flags)
{
if (instr->isVOPD()) {
print_vopd_instr(gfx_level, instr, output, flags);
return;
}
if (!instr->definitions.empty()) {
for (unsigned i = 0; i < instr->definitions.size(); ++i) {
print_definition(&instr->definitions[i], output, flags);
if (i + 1 != instr->definitions.size())
fprintf(output, ", ");
}
fprintf(output, " = ");
}
fprintf(output, "%s", instr_info.name[(int)instr->opcode]);
if (instr->operands.size()) {
const unsigned num_operands = instr->operands.size();
bitarray8 abs = 0;
bitarray8 neg = 0;
bitarray8 neg_lo = 0;
bitarray8 neg_hi = 0;
bitarray8 opsel = 0;
bitarray8 f2f32 = 0;
bitarray8 opsel_lo = 0;
bitarray8 opsel_hi = -1;
if (instr->opcode == aco_opcode::v_fma_mix_f32 ||
instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
instr->opcode == aco_opcode::v_fma_mixhi_f16) {
const VALU_instruction& vop3p = instr->valu();
abs = vop3p.abs;
neg = vop3p.neg;
f2f32 = vop3p.opsel_hi;
opsel = f2f32 & vop3p.opsel_lo;
} else if (instr->isVOP3P()) {
const VALU_instruction& vop3p = instr->valu();
neg = vop3p.neg_lo & vop3p.neg_hi;
neg_lo = vop3p.neg_lo & ~neg;
neg_hi = vop3p.neg_hi & ~neg;
opsel_lo = vop3p.opsel_lo;
opsel_hi = vop3p.opsel_hi;
} else if (instr->isVALU() && instr->opcode != aco_opcode::v_permlane16_b32 &&
instr->opcode != aco_opcode::v_permlanex16_b32) {
const VALU_instruction& valu = instr->valu();
abs = valu.abs;
neg = valu.neg;
opsel = valu.opsel;
}
for (unsigned i = 0; i < num_operands; ++i) {
if (i)
fprintf(output, ", ");
else
fprintf(output, " ");
if (i < 3) {
if (neg[i])
fprintf(output, "-");
if (abs[i])
fprintf(output, "|");
if (opsel[i])
fprintf(output, "hi(");
else if (f2f32[i])
fprintf(output, "lo(");
}
aco_print_operand(&instr->operands[i], output, flags);
if (i < 3) {
if (f2f32[i] || opsel[i])
fprintf(output, ")");
if (abs[i])
fprintf(output, "|");
if (opsel_lo[i] || !opsel_hi[i])
fprintf(output, ".%c%c", opsel_lo[i] ? 'y' : 'x', opsel_hi[i] ? 'y' : 'x');
if (neg_lo[i])
fprintf(output, "*[-1,1]");
if (neg_hi[i])
fprintf(output, "*[1,-1]");
}
}
}
print_instr_format_specific(gfx_level, instr, output);
}
static void
print_block_kind(uint16_t kind, FILE* output)
{
@ -1046,6 +926,130 @@ aco_print_block(enum amd_gfx_level gfx_level, const Block* block, FILE* output,
}
}
} /* end namespace */
void
aco_print_operand(const Operand* operand, FILE* output, unsigned flags)
{
if (operand->isLiteral() || (operand->isConstant() && operand->bytes() == 1)) {
if (operand->bytes() == 1)
fprintf(output, "0x%.2x", operand->constantValue());
else if (operand->bytes() == 2)
fprintf(output, "0x%.4x", operand->constantValue());
else
fprintf(output, "0x%x", operand->constantValue());
} else if (operand->isConstant()) {
print_constant(operand->physReg().reg(), output);
} else if (operand->isUndefined()) {
print_reg_class(operand->regClass(), output);
fprintf(output, "undef");
} else {
if (operand->isLateKill())
fprintf(output, "(latekill)");
if (operand->is16bit())
fprintf(output, "(is16bit)");
if (operand->is24bit())
fprintf(output, "(is24bit)");
if ((flags & print_kill) && operand->isKill())
fprintf(output, "(kill)");
if (!(flags & print_no_ssa))
fprintf(output, "%%%d%s", operand->tempId(), operand->isFixed() ? ":" : "");
if (operand->isFixed())
print_physReg(operand->physReg(), operand->bytes(), output, flags);
}
}
void
aco_print_instr(enum amd_gfx_level gfx_level, const Instruction* instr, FILE* output,
unsigned flags)
{
if (instr->isVOPD()) {
print_vopd_instr(gfx_level, instr, output, flags);
return;
}
if (!instr->definitions.empty()) {
for (unsigned i = 0; i < instr->definitions.size(); ++i) {
print_definition(&instr->definitions[i], output, flags);
if (i + 1 != instr->definitions.size())
fprintf(output, ", ");
}
fprintf(output, " = ");
}
fprintf(output, "%s", instr_info.name[(int)instr->opcode]);
if (instr->operands.size()) {
const unsigned num_operands = instr->operands.size();
bitarray8 abs = 0;
bitarray8 neg = 0;
bitarray8 neg_lo = 0;
bitarray8 neg_hi = 0;
bitarray8 opsel = 0;
bitarray8 f2f32 = 0;
bitarray8 opsel_lo = 0;
bitarray8 opsel_hi = -1;
if (instr->opcode == aco_opcode::v_fma_mix_f32 ||
instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
instr->opcode == aco_opcode::v_fma_mixhi_f16) {
const VALU_instruction& vop3p = instr->valu();
abs = vop3p.abs;
neg = vop3p.neg;
f2f32 = vop3p.opsel_hi;
opsel = f2f32 & vop3p.opsel_lo;
} else if (instr->isVOP3P()) {
const VALU_instruction& vop3p = instr->valu();
neg = vop3p.neg_lo & vop3p.neg_hi;
neg_lo = vop3p.neg_lo & ~neg;
neg_hi = vop3p.neg_hi & ~neg;
opsel_lo = vop3p.opsel_lo;
opsel_hi = vop3p.opsel_hi;
} else if (instr->isVALU() && instr->opcode != aco_opcode::v_permlane16_b32 &&
instr->opcode != aco_opcode::v_permlanex16_b32) {
const VALU_instruction& valu = instr->valu();
abs = valu.abs;
neg = valu.neg;
opsel = valu.opsel;
}
for (unsigned i = 0; i < num_operands; ++i) {
if (i)
fprintf(output, ", ");
else
fprintf(output, " ");
if (i < 3) {
if (neg[i])
fprintf(output, "-");
if (abs[i])
fprintf(output, "|");
if (opsel[i])
fprintf(output, "hi(");
else if (f2f32[i])
fprintf(output, "lo(");
}
aco_print_operand(&instr->operands[i], output, flags);
if (i < 3) {
if (f2f32[i] || opsel[i])
fprintf(output, ")");
if (abs[i])
fprintf(output, "|");
if (opsel_lo[i] || !opsel_hi[i])
fprintf(output, ".%c%c", opsel_lo[i] ? 'y' : 'x', opsel_hi[i] ? 'y' : 'x');
if (neg_lo[i])
fprintf(output, "*[-1,1]");
if (neg_hi[i])
fprintf(output, "*[1,-1]");
}
}
}
print_instr_format_specific(gfx_level, instr, output);
}
void
aco_print_program(const Program* program, FILE* output, unsigned flags)
{

View file

@ -28,6 +28,8 @@
namespace aco {
namespace {
enum MoveResult {
move_success,
move_fail_ssa,
@ -1228,6 +1230,8 @@ schedule_block(sched_ctx& ctx, Program* program, Block* block)
}
}
} /* end namespace */
void
schedule_program(Program* program)
{

View file

@ -15,16 +15,7 @@
namespace aco {
/* sgpr_presched/vgpr_presched */
void
collect_presched_stats(Program* program)
{
RegisterDemand presched_demand;
for (Block& block : program->blocks)
presched_demand.update(block.register_demand);
program->statistics[aco_statistic_sgpr_presched] = presched_demand.sgpr;
program->statistics[aco_statistic_vgpr_presched] = presched_demand.vgpr;
}
namespace {
class BlockCycleEstimator {
public:
@ -50,7 +41,6 @@ public:
int32_t reg_available[512] = {0};
std::deque<int32_t> mem_ops[wait_type_num];
unsigned predict_cost(aco_ptr<Instruction>& instr);
void add(aco_ptr<Instruction>& instr);
void join(const BlockCycleEstimator& other);
@ -377,13 +367,6 @@ BlockCycleEstimator::get_dependency_cost(aco_ptr<Instruction>& instr)
return deps_available - cur_cycle;
}
unsigned
BlockCycleEstimator::predict_cost(aco_ptr<Instruction>& instr)
{
int32_t dep = get_dependency_cost(instr);
return dep + std::max(cycles_until_res_available(instr) - dep, 0);
}
static bool
is_vector(aco_opcode op)
{
@ -476,6 +459,19 @@ BlockCycleEstimator::join(const BlockCycleEstimator& pred)
}
}
} /* end namespace */
/* sgpr_presched/vgpr_presched */
void
collect_presched_stats(Program* program)
{
RegisterDemand presched_demand;
for (Block& block : program->blocks)
presched_demand.update(block.register_demand);
program->statistics[aco_statistic_sgpr_presched] = presched_demand.sgpr;
program->statistics[aco_statistic_vgpr_presched] = presched_demand.vgpr;
}
/* instructions/branches/vmem_clauses/smem_clauses/cycles */
void
collect_preasm_stats(Program* program)