aco: add type information for operands/definitions

More information available for use in the optimizer.

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29695>
This commit is contained in:
Georg Lehmann 2025-04-29 15:55:47 +02:00 committed by Marge Bot
parent 6c1c116a0f
commit 3f70433ff0
7 changed files with 944 additions and 852 deletions

View file

@ -528,11 +528,7 @@ can_use_input_modifiers(amd_gfx_level gfx_level, aco_opcode op, int idx)
if (op == aco_opcode::v_mov_b32)
return gfx_level >= GFX10;
if (op == aco_opcode::v_ldexp_f16 || op == aco_opcode::v_ldexp_f32 ||
op == aco_opcode::v_ldexp_f64)
return idx == 0;
return instr_info.can_use_input_modifiers[(int)op];
return instr_info.alu_opcode_infos[(int)op].input_modifiers & BITFIELD_BIT(idx);
}
bool
@ -842,21 +838,12 @@ get_operand_size(aco_ptr<Instruction>& instr, unsigned index)
{
if (instr->isPseudo())
return instr->operands[index].bytes() * 8u;
else if (instr->opcode == aco_opcode::v_mad_u64_u32 ||
instr->opcode == aco_opcode::v_mad_i64_i32)
return index == 2 ? 64 : 32;
else if (instr->opcode == aco_opcode::v_fma_mix_f32 ||
instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
instr->opcode == aco_opcode::v_fma_mixhi_f16)
return instr->valu().opsel_hi[index] ? 16 : 32;
else if (instr->opcode == aco_opcode::v_interp_p10_f16_f32_inreg ||
instr->opcode == aco_opcode::v_interp_p10_rtz_f16_f32_inreg)
return index == 1 ? 32 : 16;
else if (instr->opcode == aco_opcode::v_interp_p2_f16_f32_inreg ||
instr->opcode == aco_opcode::v_interp_p2_rtz_f16_f32_inreg)
return index == 0 ? 16 : 32;
else if (instr->isVALU() || instr->isSALU())
return instr_info.operand_size[(int)instr->opcode];
return instr_info.alu_opcode_infos[(int)instr->opcode].op_types[index].constant_bits();
else
return 0;
}

View file

@ -2362,22 +2362,67 @@ dominates_linear(const Block& parent, const Block& child)
child.linear_dom_post_index <= parent.linear_dom_post_index;
}
struct aco_type {
aco_base_type base_type : 4;
uint8_t num_components : 4;
uint8_t bit_size;
inline unsigned bytes() const { return (bit_size * num_components) / 8; }
inline unsigned dwords() const { return DIV_ROUND_UP(bytes(), 4); }
/* Constant size used by Operand::c16/c32/c64/get_const.
* 0 means no inline constants are supported for this type.
*/
inline unsigned constant_bits() const
{
switch (base_type) {
case aco_base_type_bfloat: /* XXX might be useful some day. */
case aco_base_type_none:
case aco_base_type_lanemask: return 0;
case aco_base_type_float:
if (bit_size == 16 && (num_components == 1 || num_components == 2))
return 16;
else if (bit_size == 32 && num_components == 1)
return 32;
else if (bit_size == 64 && num_components == 1)
return 64;
return 0;
case aco_base_type_uint:
if (bit_size == 16 && (num_components == 1 || num_components == 2))
return 32; /* 16bit int alu uses 32bit float constants. */
else if (bit_size == 32 && num_components == 1)
return 32;
else if (bit_size == 64 && num_components == 1)
return 64;
return 0;
case aco_base_type_int: assert(bit_size == 64 && num_components == 1); return 64;
}
return 0;
}
};
struct aco_alu_opcode_info {
uint8_t num_operands : 3;
uint8_t num_defs : 2;
uint8_t input_modifiers : 3;
uint8_t output_modifiers : 1;
aco_type op_types[4];
aco_type def_types[3];
fixed_reg op_fixed_reg[4];
fixed_reg def_fixed_reg[3];
};
typedef struct {
const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
const int16_t opcode_gfx11[static_cast<int>(aco_opcode::num_opcodes)];
const int16_t opcode_gfx12[static_cast<int>(aco_opcode::num_opcodes)];
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
const char* name[static_cast<int>(aco_opcode::num_opcodes)];
const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
/* sizes used for input/output modifiers and constants */
const unsigned operand_size[static_cast<int>(aco_opcode::num_opcodes)];
const instr_class classes[static_cast<int>(aco_opcode::num_opcodes)];
const uint32_t definitions[static_cast<int>(aco_opcode::num_opcodes)];
const uint32_t operands[static_cast<int>(aco_opcode::num_opcodes)];
const aco_alu_opcode_info alu_opcode_infos[static_cast<int>(aco_opcode::num_opcodes)];
} Info;
extern const Info instr_info;

File diff suppressed because it is too large Load diff

View file

@ -14,9 +14,15 @@ namespace aco {
<%
opcode_names = sorted(instructions.keys())
can_use_input_modifiers = "".join([instructions[name].input_mod for name in reversed(opcode_names)])
can_use_output_modifiers = "".join([instructions[name].output_mod for name in reversed(opcode_names)])
is_atomic = "".join([instructions[name].is_atomic for name in reversed(opcode_names)])
# Record which operand of each instruction can use modifiers.
operand_mods = dict();
for name in opcode_names:
op_mods = 0
for operand in reversed(instructions[name].operands):
op_mods <<= 1
op_mods |= int(operand.mods)
operand_mods[name] = op_mods
%>
extern const aco::Info instr_info = {
@ -45,8 +51,6 @@ extern const aco::Info instr_info = {
${instructions[name].op.gfx12},
% endfor
},
std::bitset<${len(opcode_names)}>("${can_use_input_modifiers}"),
std::bitset<${len(opcode_names)}>("${can_use_output_modifiers}"),
std::bitset<${len(opcode_names)}>("${is_atomic}"),
{
% for name in opcode_names:
@ -58,11 +62,6 @@ extern const aco::Info instr_info = {
aco::Format::${str(instructions[name].format.name)},
% endfor
},
{
% for name in opcode_names:
${instructions[name].operand_size},
% endfor
},
{
% for name in opcode_names:
instr_class::${instructions[name].cls.value},
@ -70,12 +69,40 @@ extern const aco::Info instr_info = {
},
{
% for name in opcode_names:
${hex(instructions[name].definitions)},
% endfor
},
{
% for name in opcode_names:
${hex(instructions[name].operands)},
{ // ${name}
${len(instructions[name].operands)},
${len(instructions[name].definitions)},
${operand_mods[name]},
${int(instructions[name].definitions[0].mods) if len(instructions[name].definitions) > 0 else 0},
{
% for operand in instructions[name].operands:
{
${operand.base_type.name},
${operand.num_components},
${operand.bit_size},
},
% endfor
},
{
% for definition in instructions[name].definitions:
{
${definition.base_type.name},
${definition.num_components},
${definition.bit_size},
},
% endfor
},
{
% for operand in instructions[name].operands:
${operand.fixed_reg.name},
% endfor
},
{
% for definition in instructions[name].definitions:
${definition.fixed_reg.name},
% endfor
},
},
% endfor
},
};

View file

@ -15,6 +15,18 @@ template = """\
namespace aco {
enum aco_base_type : uint8_t {
% for e in AcoBaseType:
${e.name} = ${hex(e.value)},
% endfor
};
enum fixed_reg : uint8_t {
% for e in FixedReg:
${e.name} = ${hex(e.value)},
% endfor
};
enum class Format : uint16_t {
% for e in Format:
${e.name} = ${hex(e.value)},
@ -41,7 +53,7 @@ enum class aco_opcode : uint16_t {
}
#endif /* _ACO_OPCODES_ */"""
from aco_opcodes import instructions, InstrClass, Format
from aco_opcodes import instructions, InstrClass, Format, AcoBaseType, FixedReg
from mako.template import Template
print(Template(template).render(instructions=instructions, InstrClass=InstrClass, Format=Format))
print(Template(template).render(instructions=instructions, InstrClass=InstrClass, Format=Format, AcoBaseType=AcoBaseType, FixedReg=FixedReg))

View file

@ -1554,7 +1554,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
}
if (instr->isVALU() || (instr->isVINTRP() && instr->opcode != aco_opcode::v_interp_mov_f32)) {
if (instr_info.can_use_output_modifiers[(int)instr->opcode] || instr->isVINTRP() ||
if (instr_info.alu_opcode_infos[(int)instr->opcode].output_modifiers || instr->isVINTRP() ||
instr->opcode == aco_opcode::v_cndmask_b32) {
bool canonicalized = true;
if (!does_fp_op_flush_denorms(ctx, instr->opcode)) {
@ -2924,7 +2924,7 @@ bool
apply_omod_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr)
{
if (instr->definitions.empty() || ctx.uses[instr->definitions[0].tempId()] != 1 ||
!instr_info.can_use_output_modifiers[(int)instr->opcode])
!instr_info.alu_opcode_infos[(int)instr->opcode].output_modifiers)
return false;
bool can_vop3 = can_use_VOP3(ctx, instr);
@ -3300,7 +3300,8 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
!vop3p->opsel_lo[1] && !vop3p->opsel_hi[1]) {
Instruction* op_instr = ctx.info[instr->operands[0].tempId()].parent_instr;
if (op_instr->isVOP3P() && instr_info.can_use_output_modifiers[(int)op_instr->opcode]) {
if (op_instr->isVOP3P() &&
instr_info.alu_opcode_infos[(int)op_instr->opcode].output_modifiers) {
op_instr->valu().clamp = true;
propagate_swizzles(&op_instr->valu(), vop3p->opsel_lo[0], vop3p->opsel_hi[0]);
instr->definitions[0].swapTemp(op_instr->definitions[0]);

View file

@ -144,43 +144,45 @@ validate_ir(Program* program)
"Definition RC not consistent.", instr.get());
}
unsigned pck_defs = instr_info.definitions[(int)instr->opcode];
unsigned pck_ops = instr_info.operands[(int)instr->opcode];
const aco_alu_opcode_info& opcode_info = instr_info.alu_opcode_infos[(int)instr->opcode];
if (pck_defs != 0) {
if (opcode_info.num_defs) {
unsigned num_defs = opcode_info.num_defs;
/* Before GFX10 v_cmpx also writes VCC. */
if (instr->isVOPC() && program->gfx_level < GFX10 && pck_defs == exec_hi)
pck_defs = vcc | (exec_hi << 8);
if (instr->isVOPC() && program->gfx_level < GFX10 &&
opcode_info.def_fixed_reg[0] == fixed_exec)
num_defs = 2;
for (unsigned i = 0; i < 4; i++) {
uint32_t def = (pck_defs >> (i * 8)) & 0xff;
if (def == 0) {
check(i == instr->definitions.size(), "Too many definitions", instr.get());
break;
check(num_defs >= instr->definitions.size(), "Too many definitions", instr.get());
check(num_defs <= instr->definitions.size(), "Too few definitions", instr.get());
num_defs = MIN2(num_defs, instr->definitions.size());
for (unsigned i = 0; i < num_defs; i++) {
aco_type type;
fixed_reg fixed_reg;
if (instr->isVOPC() && program->gfx_level < GFX10 &&
opcode_info.def_fixed_reg[0] == fixed_exec) {
type = opcode_info.def_types[0];
fixed_reg = i == 0 ? not_fixed : fixed_exec;
} else {
check(i < instr->definitions.size(), "Too few definitions", instr.get());
if (i >= instr->definitions.size())
break;
type = opcode_info.def_types[i];
fixed_reg = opcode_info.def_fixed_reg[i];
}
if (def == m0) {
if (fixed_reg == fixed_m0) {
check(instr->definitions[i].isFixed() && instr->definitions[i].physReg() == m0,
"Definition needs m0", instr.get());
} else if (def == scc) {
} else if (fixed_reg == fixed_scc) {
check(instr->definitions[i].isFixed() && instr->definitions[i].physReg() == scc,
"Definition needs scc", instr.get());
} else if (def == exec_hi) {
RegClass rc = instr->isSALU() ? s2 : program->lane_mask;
} else if (fixed_reg == fixed_exec) {
RegClass rc = type.bit_size == 1 ? program->lane_mask
: RegClass::get(RegType::sgpr, type.bytes());
check(instr->definitions[i].isFixed() &&
instr->definitions[i].physReg() == exec &&
instr->definitions[i].regClass() == rc,
"Definition needs exec", instr.get());
} else if (def == exec_lo) {
check(instr->definitions[i].isFixed() &&
instr->definitions[i].physReg() == exec_lo &&
instr->definitions[i].regClass() == s1,
"Definition needs exec_lo", instr.get());
} else if (def == vcc) {
} else if (type.bit_size == 1) {
check(instr->definitions[i].regClass() == program->lane_mask,
"Definition has to be lane mask", instr.get());
check(!instr->definitions[i].isFixed() ||
@ -188,48 +190,45 @@ validate_ir(Program* program)
instr->isSDWA(),
"Definition has to be vcc", instr.get());
} else {
check(instr->definitions[i].size() == def, "Definition has wrong size",
check(instr->definitions[i].size() == type.dwords(), "Definition has wrong size",
instr.get());
}
}
}
if (pck_ops != 0) {
for (unsigned i = 0; i < 4; i++) {
uint32_t op = (pck_ops >> (i * 8)) & 0xff;
if (op == 0) {
check(i == instr->operands.size(), "Too many operands", instr.get());
break;
} else {
check(i < instr->operands.size(), "Too few operands", instr.get());
if (i >= instr->operands.size())
break;
}
if (opcode_info.num_operands) {
unsigned num_ops = opcode_info.num_operands;
check(num_ops >= instr->operands.size(), "Too many operands", instr.get());
check(num_ops <= instr->operands.size(), "Too few operands", instr.get());
num_ops = MIN2(num_ops, instr->operands.size());
if (op == m0) {
for (unsigned i = 0; i < num_ops; i++) {
aco_type type = opcode_info.op_types[i];
fixed_reg fixed_reg = opcode_info.op_fixed_reg[i];
if (fixed_reg == fixed_m0) {
check(instr->operands[i].isFixed() && instr->operands[i].physReg() == m0,
"Operand needs m0", instr.get());
} else if (op == scc) {
} else if (fixed_reg == fixed_scc) {
check(instr->operands[i].isFixed() && instr->operands[i].physReg() == scc,
"Operand needs scc", instr.get());
} else if (op == exec_hi) {
RegClass rc = instr->isSALU() ? s2 : program->lane_mask;
} else if (fixed_reg == fixed_exec) {
RegClass rc = type.bit_size == 1 ? program->lane_mask
: RegClass::get(RegType::sgpr, type.bytes());
check(instr->operands[i].isFixed() && instr->operands[i].physReg() == exec &&
instr->operands[i].hasRegClass() && instr->operands[i].regClass() == rc,
"Operand needs exec", instr.get());
} else if (op == exec_lo) {
check(instr->operands[i].isFixed() && instr->operands[i].physReg() == exec_lo &&
instr->operands[i].hasRegClass() && instr->operands[i].regClass() == s1,
"Operand needs exec_lo", instr.get());
} else if (op == vcc) {
} else if (type.bit_size == 1) {
check(instr->operands[i].hasRegClass() &&
instr->operands[i].regClass() == program->lane_mask,
"Operand has to be lane mask", instr.get());
check(!instr->operands[i].isFixed() || instr->operands[i].physReg() == vcc ||
instr->isVOP3(),
"Operand has to be vcc", instr.get());
} else if (fixed_reg == fixed_imm) {
check(instr->operands[i].isLiteral(), "Operand has to be literal", instr.get());
} else {
check(instr->operands[i].size() == op ||
check(instr->operands[i].size() == type.dwords() ||
(instr->operands[i].isFixed() && instr->operands[i].physReg() >= 128 &&
instr->operands[i].physReg() < 256),
"Operand has wrong size", instr.get());