mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 22:49:13 +02:00
aco: add more anonymous namespaces
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29740>
This commit is contained in:
parent
c9c483bf02
commit
046414e061
6 changed files with 186 additions and 177 deletions
|
|
@ -15,6 +15,8 @@
|
|||
|
||||
namespace aco {
|
||||
|
||||
namespace {
|
||||
|
||||
enum class pred_defined : uint8_t {
|
||||
undef = 0,
|
||||
const_1 = 1,
|
||||
|
|
@ -378,6 +380,8 @@ lower_subdword_phis(Program* program, Block* block, aco_ptr<Instruction>& phi)
|
|||
return;
|
||||
}
|
||||
|
||||
} /* end namespace */
|
||||
|
||||
void
|
||||
lower_phis(Program* program)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@
|
|||
|
||||
namespace aco {
|
||||
|
||||
namespace {
|
||||
|
||||
struct lower_context {
|
||||
Program* program;
|
||||
Block* block;
|
||||
|
|
@ -2088,37 +2090,6 @@ emit_set_mode_from_block(Builder& bld, Program& program, Block* block)
|
|||
emit_set_mode(bld, block->fp_mode, set_round, set_denorm);
|
||||
}
|
||||
|
||||
void
|
||||
hw_init_scratch(Builder& bld, Definition def, Operand scratch_addr, Operand scratch_offset)
|
||||
{
|
||||
/* Since we know what the high 16 bits of scratch_hi is, we can set all the high 16
|
||||
* bits in the same instruction that we add the carry.
|
||||
*/
|
||||
Operand hi_add = Operand::c32(0xffff0000 - S_008F04_SWIZZLE_ENABLE_GFX6(1));
|
||||
Operand scratch_addr_lo(scratch_addr.physReg(), s1);
|
||||
Operand scratch_addr_hi(scratch_addr_lo.physReg().advance(4), s1);
|
||||
|
||||
if (bld.program->gfx_level >= GFX10) {
|
||||
PhysReg scratch_lo = def.physReg();
|
||||
PhysReg scratch_hi = def.physReg().advance(4);
|
||||
|
||||
bld.sop2(aco_opcode::s_add_u32, Definition(scratch_lo, s1), Definition(scc, s1),
|
||||
scratch_addr_lo, scratch_offset);
|
||||
bld.sop2(aco_opcode::s_addc_u32, Definition(scratch_hi, s1), Definition(scc, s1),
|
||||
scratch_addr_hi, hi_add, Operand(scc, s1));
|
||||
|
||||
/* "((size - 1) << 11) | register" (FLAT_SCRATCH_LO/HI is encoded as register
|
||||
* 20/21) */
|
||||
bld.sopk(aco_opcode::s_setreg_b32, Operand(scratch_lo, s1), (31 << 11) | 20);
|
||||
bld.sopk(aco_opcode::s_setreg_b32, Operand(scratch_hi, s1), (31 << 11) | 21);
|
||||
} else {
|
||||
bld.sop2(aco_opcode::s_add_u32, Definition(flat_scr_lo, s1), Definition(scc, s1),
|
||||
scratch_addr_lo, scratch_offset);
|
||||
bld.sop2(aco_opcode::s_addc_u32, Definition(flat_scr_hi, s1), Definition(scc, s1),
|
||||
scratch_addr_hi, hi_add, Operand(scc, s1));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
|
||||
{
|
||||
|
|
@ -2179,6 +2150,39 @@ lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
|
|||
std::copy(vaddr, vaddr + num_vaddr, std::next(instr->operands.begin(), 3));
|
||||
}
|
||||
|
||||
} /* end namespace */
|
||||
|
||||
void
|
||||
hw_init_scratch(Builder& bld, Definition def, Operand scratch_addr, Operand scratch_offset)
|
||||
{
|
||||
/* Since we know what the high 16 bits of scratch_hi is, we can set all the high 16
|
||||
* bits in the same instruction that we add the carry.
|
||||
*/
|
||||
Operand hi_add = Operand::c32(0xffff0000 - S_008F04_SWIZZLE_ENABLE_GFX6(1));
|
||||
Operand scratch_addr_lo(scratch_addr.physReg(), s1);
|
||||
Operand scratch_addr_hi(scratch_addr_lo.physReg().advance(4), s1);
|
||||
|
||||
if (bld.program->gfx_level >= GFX10) {
|
||||
PhysReg scratch_lo = def.physReg();
|
||||
PhysReg scratch_hi = def.physReg().advance(4);
|
||||
|
||||
bld.sop2(aco_opcode::s_add_u32, Definition(scratch_lo, s1), Definition(scc, s1),
|
||||
scratch_addr_lo, scratch_offset);
|
||||
bld.sop2(aco_opcode::s_addc_u32, Definition(scratch_hi, s1), Definition(scc, s1),
|
||||
scratch_addr_hi, hi_add, Operand(scc, s1));
|
||||
|
||||
/* "((size - 1) << 11) | register" (FLAT_SCRATCH_LO/HI is encoded as register
|
||||
* 20/21) */
|
||||
bld.sopk(aco_opcode::s_setreg_b32, Operand(scratch_lo, s1), (31 << 11) | 20);
|
||||
bld.sopk(aco_opcode::s_setreg_b32, Operand(scratch_hi, s1), (31 << 11) | 21);
|
||||
} else {
|
||||
bld.sop2(aco_opcode::s_add_u32, Definition(flat_scr_lo, s1), Definition(scc, s1),
|
||||
scratch_addr_lo, scratch_offset);
|
||||
bld.sop2(aco_opcode::s_addc_u32, Definition(flat_scr_hi, s1), Definition(scc, s1),
|
||||
scratch_addr_hi, hi_add, Operand(scc, s1));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
lower_to_hw_instr(Program* program)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
namespace aco {
|
||||
|
||||
namespace {
|
||||
/**
|
||||
* The optimizer works in 4 phases:
|
||||
* (1) The first pass collects information for each ssa-def,
|
||||
|
|
@ -600,12 +601,6 @@ can_apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
instr->opcode != aco_opcode::v_wmma_i32_16x16x16_iu4;
|
||||
}
|
||||
|
||||
bool
|
||||
is_operand_vgpr(Operand op)
|
||||
{
|
||||
return op.isTemp() && op.getTemp().type() == RegType::vgpr;
|
||||
}
|
||||
|
||||
/* only covers special cases */
|
||||
bool
|
||||
alu_can_accept_constant(const aco_ptr<Instruction>& instr, unsigned operand)
|
||||
|
|
@ -5287,6 +5282,8 @@ apply_literals(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
ctx.instructions.emplace_back(std::move(instr));
|
||||
}
|
||||
|
||||
} /* end namespace */
|
||||
|
||||
void
|
||||
optimize(Program* program)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@
|
|||
|
||||
namespace aco {
|
||||
|
||||
namespace {
|
||||
|
||||
const std::array<const char*, num_reduce_ops> reduce_ops = []()
|
||||
{
|
||||
std::array<const char*, num_reduce_ops> ret{};
|
||||
|
|
@ -141,39 +143,6 @@ print_constant(uint8_t reg, FILE* output)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
aco_print_operand(const Operand* operand, FILE* output, unsigned flags)
|
||||
{
|
||||
if (operand->isLiteral() || (operand->isConstant() && operand->bytes() == 1)) {
|
||||
if (operand->bytes() == 1)
|
||||
fprintf(output, "0x%.2x", operand->constantValue());
|
||||
else if (operand->bytes() == 2)
|
||||
fprintf(output, "0x%.4x", operand->constantValue());
|
||||
else
|
||||
fprintf(output, "0x%x", operand->constantValue());
|
||||
} else if (operand->isConstant()) {
|
||||
print_constant(operand->physReg().reg(), output);
|
||||
} else if (operand->isUndefined()) {
|
||||
print_reg_class(operand->regClass(), output);
|
||||
fprintf(output, "undef");
|
||||
} else {
|
||||
if (operand->isLateKill())
|
||||
fprintf(output, "(latekill)");
|
||||
if (operand->is16bit())
|
||||
fprintf(output, "(is16bit)");
|
||||
if (operand->is24bit())
|
||||
fprintf(output, "(is24bit)");
|
||||
if ((flags & print_kill) && operand->isKill())
|
||||
fprintf(output, "(kill)");
|
||||
|
||||
if (!(flags & print_no_ssa))
|
||||
fprintf(output, "%%%d%s", operand->tempId(), operand->isFixed() ? ":" : "");
|
||||
|
||||
if (operand->isFixed())
|
||||
print_physReg(operand->physReg(), operand->bytes(), output, flags);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_definition(const Definition* definition, FILE* output, unsigned flags)
|
||||
{
|
||||
|
|
@ -842,95 +811,6 @@ print_vopd_instr(enum amd_gfx_level gfx_level, const Instruction* instr, FILE* o
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
aco_print_instr(enum amd_gfx_level gfx_level, const Instruction* instr, FILE* output,
|
||||
unsigned flags)
|
||||
{
|
||||
if (instr->isVOPD()) {
|
||||
print_vopd_instr(gfx_level, instr, output, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!instr->definitions.empty()) {
|
||||
for (unsigned i = 0; i < instr->definitions.size(); ++i) {
|
||||
print_definition(&instr->definitions[i], output, flags);
|
||||
if (i + 1 != instr->definitions.size())
|
||||
fprintf(output, ", ");
|
||||
}
|
||||
fprintf(output, " = ");
|
||||
}
|
||||
fprintf(output, "%s", instr_info.name[(int)instr->opcode]);
|
||||
if (instr->operands.size()) {
|
||||
const unsigned num_operands = instr->operands.size();
|
||||
bitarray8 abs = 0;
|
||||
bitarray8 neg = 0;
|
||||
bitarray8 neg_lo = 0;
|
||||
bitarray8 neg_hi = 0;
|
||||
bitarray8 opsel = 0;
|
||||
bitarray8 f2f32 = 0;
|
||||
bitarray8 opsel_lo = 0;
|
||||
bitarray8 opsel_hi = -1;
|
||||
|
||||
if (instr->opcode == aco_opcode::v_fma_mix_f32 ||
|
||||
instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
|
||||
instr->opcode == aco_opcode::v_fma_mixhi_f16) {
|
||||
const VALU_instruction& vop3p = instr->valu();
|
||||
abs = vop3p.abs;
|
||||
neg = vop3p.neg;
|
||||
f2f32 = vop3p.opsel_hi;
|
||||
opsel = f2f32 & vop3p.opsel_lo;
|
||||
} else if (instr->isVOP3P()) {
|
||||
const VALU_instruction& vop3p = instr->valu();
|
||||
neg = vop3p.neg_lo & vop3p.neg_hi;
|
||||
neg_lo = vop3p.neg_lo & ~neg;
|
||||
neg_hi = vop3p.neg_hi & ~neg;
|
||||
opsel_lo = vop3p.opsel_lo;
|
||||
opsel_hi = vop3p.opsel_hi;
|
||||
} else if (instr->isVALU() && instr->opcode != aco_opcode::v_permlane16_b32 &&
|
||||
instr->opcode != aco_opcode::v_permlanex16_b32) {
|
||||
const VALU_instruction& valu = instr->valu();
|
||||
abs = valu.abs;
|
||||
neg = valu.neg;
|
||||
opsel = valu.opsel;
|
||||
}
|
||||
for (unsigned i = 0; i < num_operands; ++i) {
|
||||
if (i)
|
||||
fprintf(output, ", ");
|
||||
else
|
||||
fprintf(output, " ");
|
||||
|
||||
if (i < 3) {
|
||||
if (neg[i])
|
||||
fprintf(output, "-");
|
||||
if (abs[i])
|
||||
fprintf(output, "|");
|
||||
if (opsel[i])
|
||||
fprintf(output, "hi(");
|
||||
else if (f2f32[i])
|
||||
fprintf(output, "lo(");
|
||||
}
|
||||
|
||||
aco_print_operand(&instr->operands[i], output, flags);
|
||||
|
||||
if (i < 3) {
|
||||
if (f2f32[i] || opsel[i])
|
||||
fprintf(output, ")");
|
||||
if (abs[i])
|
||||
fprintf(output, "|");
|
||||
|
||||
if (opsel_lo[i] || !opsel_hi[i])
|
||||
fprintf(output, ".%c%c", opsel_lo[i] ? 'y' : 'x', opsel_hi[i] ? 'y' : 'x');
|
||||
|
||||
if (neg_lo[i])
|
||||
fprintf(output, "*[-1,1]");
|
||||
if (neg_hi[i])
|
||||
fprintf(output, "*[1,-1]");
|
||||
}
|
||||
}
|
||||
}
|
||||
print_instr_format_specific(gfx_level, instr, output);
|
||||
}
|
||||
|
||||
static void
|
||||
print_block_kind(uint16_t kind, FILE* output)
|
||||
{
|
||||
|
|
@ -1046,6 +926,130 @@ aco_print_block(enum amd_gfx_level gfx_level, const Block* block, FILE* output,
|
|||
}
|
||||
}
|
||||
|
||||
} /* end namespace */
|
||||
|
||||
void
|
||||
aco_print_operand(const Operand* operand, FILE* output, unsigned flags)
|
||||
{
|
||||
if (operand->isLiteral() || (operand->isConstant() && operand->bytes() == 1)) {
|
||||
if (operand->bytes() == 1)
|
||||
fprintf(output, "0x%.2x", operand->constantValue());
|
||||
else if (operand->bytes() == 2)
|
||||
fprintf(output, "0x%.4x", operand->constantValue());
|
||||
else
|
||||
fprintf(output, "0x%x", operand->constantValue());
|
||||
} else if (operand->isConstant()) {
|
||||
print_constant(operand->physReg().reg(), output);
|
||||
} else if (operand->isUndefined()) {
|
||||
print_reg_class(operand->regClass(), output);
|
||||
fprintf(output, "undef");
|
||||
} else {
|
||||
if (operand->isLateKill())
|
||||
fprintf(output, "(latekill)");
|
||||
if (operand->is16bit())
|
||||
fprintf(output, "(is16bit)");
|
||||
if (operand->is24bit())
|
||||
fprintf(output, "(is24bit)");
|
||||
if ((flags & print_kill) && operand->isKill())
|
||||
fprintf(output, "(kill)");
|
||||
|
||||
if (!(flags & print_no_ssa))
|
||||
fprintf(output, "%%%d%s", operand->tempId(), operand->isFixed() ? ":" : "");
|
||||
|
||||
if (operand->isFixed())
|
||||
print_physReg(operand->physReg(), operand->bytes(), output, flags);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
aco_print_instr(enum amd_gfx_level gfx_level, const Instruction* instr, FILE* output,
|
||||
unsigned flags)
|
||||
{
|
||||
if (instr->isVOPD()) {
|
||||
print_vopd_instr(gfx_level, instr, output, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!instr->definitions.empty()) {
|
||||
for (unsigned i = 0; i < instr->definitions.size(); ++i) {
|
||||
print_definition(&instr->definitions[i], output, flags);
|
||||
if (i + 1 != instr->definitions.size())
|
||||
fprintf(output, ", ");
|
||||
}
|
||||
fprintf(output, " = ");
|
||||
}
|
||||
fprintf(output, "%s", instr_info.name[(int)instr->opcode]);
|
||||
if (instr->operands.size()) {
|
||||
const unsigned num_operands = instr->operands.size();
|
||||
bitarray8 abs = 0;
|
||||
bitarray8 neg = 0;
|
||||
bitarray8 neg_lo = 0;
|
||||
bitarray8 neg_hi = 0;
|
||||
bitarray8 opsel = 0;
|
||||
bitarray8 f2f32 = 0;
|
||||
bitarray8 opsel_lo = 0;
|
||||
bitarray8 opsel_hi = -1;
|
||||
|
||||
if (instr->opcode == aco_opcode::v_fma_mix_f32 ||
|
||||
instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
|
||||
instr->opcode == aco_opcode::v_fma_mixhi_f16) {
|
||||
const VALU_instruction& vop3p = instr->valu();
|
||||
abs = vop3p.abs;
|
||||
neg = vop3p.neg;
|
||||
f2f32 = vop3p.opsel_hi;
|
||||
opsel = f2f32 & vop3p.opsel_lo;
|
||||
} else if (instr->isVOP3P()) {
|
||||
const VALU_instruction& vop3p = instr->valu();
|
||||
neg = vop3p.neg_lo & vop3p.neg_hi;
|
||||
neg_lo = vop3p.neg_lo & ~neg;
|
||||
neg_hi = vop3p.neg_hi & ~neg;
|
||||
opsel_lo = vop3p.opsel_lo;
|
||||
opsel_hi = vop3p.opsel_hi;
|
||||
} else if (instr->isVALU() && instr->opcode != aco_opcode::v_permlane16_b32 &&
|
||||
instr->opcode != aco_opcode::v_permlanex16_b32) {
|
||||
const VALU_instruction& valu = instr->valu();
|
||||
abs = valu.abs;
|
||||
neg = valu.neg;
|
||||
opsel = valu.opsel;
|
||||
}
|
||||
for (unsigned i = 0; i < num_operands; ++i) {
|
||||
if (i)
|
||||
fprintf(output, ", ");
|
||||
else
|
||||
fprintf(output, " ");
|
||||
|
||||
if (i < 3) {
|
||||
if (neg[i])
|
||||
fprintf(output, "-");
|
||||
if (abs[i])
|
||||
fprintf(output, "|");
|
||||
if (opsel[i])
|
||||
fprintf(output, "hi(");
|
||||
else if (f2f32[i])
|
||||
fprintf(output, "lo(");
|
||||
}
|
||||
|
||||
aco_print_operand(&instr->operands[i], output, flags);
|
||||
|
||||
if (i < 3) {
|
||||
if (f2f32[i] || opsel[i])
|
||||
fprintf(output, ")");
|
||||
if (abs[i])
|
||||
fprintf(output, "|");
|
||||
|
||||
if (opsel_lo[i] || !opsel_hi[i])
|
||||
fprintf(output, ".%c%c", opsel_lo[i] ? 'y' : 'x', opsel_hi[i] ? 'y' : 'x');
|
||||
|
||||
if (neg_lo[i])
|
||||
fprintf(output, "*[-1,1]");
|
||||
if (neg_hi[i])
|
||||
fprintf(output, "*[1,-1]");
|
||||
}
|
||||
}
|
||||
}
|
||||
print_instr_format_specific(gfx_level, instr, output);
|
||||
}
|
||||
|
||||
void
|
||||
aco_print_program(const Program* program, FILE* output, unsigned flags)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -28,6 +28,8 @@
|
|||
|
||||
namespace aco {
|
||||
|
||||
namespace {
|
||||
|
||||
enum MoveResult {
|
||||
move_success,
|
||||
move_fail_ssa,
|
||||
|
|
@ -1228,6 +1230,8 @@ schedule_block(sched_ctx& ctx, Program* program, Block* block)
|
|||
}
|
||||
}
|
||||
|
||||
} /* end namespace */
|
||||
|
||||
void
|
||||
schedule_program(Program* program)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -15,16 +15,7 @@
|
|||
|
||||
namespace aco {
|
||||
|
||||
/* sgpr_presched/vgpr_presched */
|
||||
void
|
||||
collect_presched_stats(Program* program)
|
||||
{
|
||||
RegisterDemand presched_demand;
|
||||
for (Block& block : program->blocks)
|
||||
presched_demand.update(block.register_demand);
|
||||
program->statistics[aco_statistic_sgpr_presched] = presched_demand.sgpr;
|
||||
program->statistics[aco_statistic_vgpr_presched] = presched_demand.vgpr;
|
||||
}
|
||||
namespace {
|
||||
|
||||
class BlockCycleEstimator {
|
||||
public:
|
||||
|
|
@ -50,7 +41,6 @@ public:
|
|||
int32_t reg_available[512] = {0};
|
||||
std::deque<int32_t> mem_ops[wait_type_num];
|
||||
|
||||
unsigned predict_cost(aco_ptr<Instruction>& instr);
|
||||
void add(aco_ptr<Instruction>& instr);
|
||||
void join(const BlockCycleEstimator& other);
|
||||
|
||||
|
|
@ -377,13 +367,6 @@ BlockCycleEstimator::get_dependency_cost(aco_ptr<Instruction>& instr)
|
|||
return deps_available - cur_cycle;
|
||||
}
|
||||
|
||||
unsigned
|
||||
BlockCycleEstimator::predict_cost(aco_ptr<Instruction>& instr)
|
||||
{
|
||||
int32_t dep = get_dependency_cost(instr);
|
||||
return dep + std::max(cycles_until_res_available(instr) - dep, 0);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_vector(aco_opcode op)
|
||||
{
|
||||
|
|
@ -476,6 +459,19 @@ BlockCycleEstimator::join(const BlockCycleEstimator& pred)
|
|||
}
|
||||
}
|
||||
|
||||
} /* end namespace */
|
||||
|
||||
/* sgpr_presched/vgpr_presched */
|
||||
void
|
||||
collect_presched_stats(Program* program)
|
||||
{
|
||||
RegisterDemand presched_demand;
|
||||
for (Block& block : program->blocks)
|
||||
presched_demand.update(block.register_demand);
|
||||
program->statistics[aco_statistic_sgpr_presched] = presched_demand.sgpr;
|
||||
program->statistics[aco_statistic_vgpr_presched] = presched_demand.vgpr;
|
||||
}
|
||||
|
||||
/* instructions/branches/vmem_clauses/smem_clauses/cycles */
|
||||
void
|
||||
collect_preasm_stats(Program* program)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue