mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 07:28:11 +02:00
aco: add type information for operands/definitions
More information available for use in the optimizer. Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29695>
This commit is contained in:
parent
6c1c116a0f
commit
3f70433ff0
7 changed files with 944 additions and 852 deletions
|
|
@ -528,11 +528,7 @@ can_use_input_modifiers(amd_gfx_level gfx_level, aco_opcode op, int idx)
|
|||
if (op == aco_opcode::v_mov_b32)
|
||||
return gfx_level >= GFX10;
|
||||
|
||||
if (op == aco_opcode::v_ldexp_f16 || op == aco_opcode::v_ldexp_f32 ||
|
||||
op == aco_opcode::v_ldexp_f64)
|
||||
return idx == 0;
|
||||
|
||||
return instr_info.can_use_input_modifiers[(int)op];
|
||||
return instr_info.alu_opcode_infos[(int)op].input_modifiers & BITFIELD_BIT(idx);
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
@ -842,21 +838,12 @@ get_operand_size(aco_ptr<Instruction>& instr, unsigned index)
|
|||
{
|
||||
if (instr->isPseudo())
|
||||
return instr->operands[index].bytes() * 8u;
|
||||
else if (instr->opcode == aco_opcode::v_mad_u64_u32 ||
|
||||
instr->opcode == aco_opcode::v_mad_i64_i32)
|
||||
return index == 2 ? 64 : 32;
|
||||
else if (instr->opcode == aco_opcode::v_fma_mix_f32 ||
|
||||
instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
|
||||
instr->opcode == aco_opcode::v_fma_mixhi_f16)
|
||||
return instr->valu().opsel_hi[index] ? 16 : 32;
|
||||
else if (instr->opcode == aco_opcode::v_interp_p10_f16_f32_inreg ||
|
||||
instr->opcode == aco_opcode::v_interp_p10_rtz_f16_f32_inreg)
|
||||
return index == 1 ? 32 : 16;
|
||||
else if (instr->opcode == aco_opcode::v_interp_p2_f16_f32_inreg ||
|
||||
instr->opcode == aco_opcode::v_interp_p2_rtz_f16_f32_inreg)
|
||||
return index == 0 ? 16 : 32;
|
||||
else if (instr->isVALU() || instr->isSALU())
|
||||
return instr_info.operand_size[(int)instr->opcode];
|
||||
return instr_info.alu_opcode_infos[(int)instr->opcode].op_types[index].constant_bits();
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2362,22 +2362,67 @@ dominates_linear(const Block& parent, const Block& child)
|
|||
child.linear_dom_post_index <= parent.linear_dom_post_index;
|
||||
}
|
||||
|
||||
struct aco_type {
|
||||
aco_base_type base_type : 4;
|
||||
uint8_t num_components : 4;
|
||||
uint8_t bit_size;
|
||||
|
||||
inline unsigned bytes() const { return (bit_size * num_components) / 8; }
|
||||
inline unsigned dwords() const { return DIV_ROUND_UP(bytes(), 4); }
|
||||
|
||||
/* Constant size used by Operand::c16/c32/c64/get_const.
|
||||
* 0 means no inline constants are supported for this type.
|
||||
*/
|
||||
inline unsigned constant_bits() const
|
||||
{
|
||||
switch (base_type) {
|
||||
case aco_base_type_bfloat: /* XXX might be useful some day. */
|
||||
case aco_base_type_none:
|
||||
case aco_base_type_lanemask: return 0;
|
||||
case aco_base_type_float:
|
||||
if (bit_size == 16 && (num_components == 1 || num_components == 2))
|
||||
return 16;
|
||||
else if (bit_size == 32 && num_components == 1)
|
||||
return 32;
|
||||
else if (bit_size == 64 && num_components == 1)
|
||||
return 64;
|
||||
return 0;
|
||||
case aco_base_type_uint:
|
||||
if (bit_size == 16 && (num_components == 1 || num_components == 2))
|
||||
return 32; /* 16bit int alu uses 32bit float constants. */
|
||||
else if (bit_size == 32 && num_components == 1)
|
||||
return 32;
|
||||
else if (bit_size == 64 && num_components == 1)
|
||||
return 64;
|
||||
return 0;
|
||||
case aco_base_type_int: assert(bit_size == 64 && num_components == 1); return 64;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct aco_alu_opcode_info {
|
||||
uint8_t num_operands : 3;
|
||||
uint8_t num_defs : 2;
|
||||
uint8_t input_modifiers : 3;
|
||||
uint8_t output_modifiers : 1;
|
||||
aco_type op_types[4];
|
||||
aco_type def_types[3];
|
||||
fixed_reg op_fixed_reg[4];
|
||||
fixed_reg def_fixed_reg[3];
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
|
||||
const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
|
||||
const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
|
||||
const int16_t opcode_gfx11[static_cast<int>(aco_opcode::num_opcodes)];
|
||||
const int16_t opcode_gfx12[static_cast<int>(aco_opcode::num_opcodes)];
|
||||
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
|
||||
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
|
||||
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
|
||||
const char* name[static_cast<int>(aco_opcode::num_opcodes)];
|
||||
const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
|
||||
/* sizes used for input/output modifiers and constants */
|
||||
const unsigned operand_size[static_cast<int>(aco_opcode::num_opcodes)];
|
||||
const instr_class classes[static_cast<int>(aco_opcode::num_opcodes)];
|
||||
const uint32_t definitions[static_cast<int>(aco_opcode::num_opcodes)];
|
||||
const uint32_t operands[static_cast<int>(aco_opcode::num_opcodes)];
|
||||
const aco_alu_opcode_info alu_opcode_infos[static_cast<int>(aco_opcode::num_opcodes)];
|
||||
} Info;
|
||||
|
||||
extern const Info instr_info;
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -14,9 +14,15 @@ namespace aco {
|
|||
|
||||
<%
|
||||
opcode_names = sorted(instructions.keys())
|
||||
can_use_input_modifiers = "".join([instructions[name].input_mod for name in reversed(opcode_names)])
|
||||
can_use_output_modifiers = "".join([instructions[name].output_mod for name in reversed(opcode_names)])
|
||||
is_atomic = "".join([instructions[name].is_atomic for name in reversed(opcode_names)])
|
||||
# Record which operand of each instruction can use modifiers.
|
||||
operand_mods = dict();
|
||||
for name in opcode_names:
|
||||
op_mods = 0
|
||||
for operand in reversed(instructions[name].operands):
|
||||
op_mods <<= 1
|
||||
op_mods |= int(operand.mods)
|
||||
operand_mods[name] = op_mods
|
||||
%>
|
||||
|
||||
extern const aco::Info instr_info = {
|
||||
|
|
@ -45,8 +51,6 @@ extern const aco::Info instr_info = {
|
|||
${instructions[name].op.gfx12},
|
||||
% endfor
|
||||
},
|
||||
std::bitset<${len(opcode_names)}>("${can_use_input_modifiers}"),
|
||||
std::bitset<${len(opcode_names)}>("${can_use_output_modifiers}"),
|
||||
std::bitset<${len(opcode_names)}>("${is_atomic}"),
|
||||
{
|
||||
% for name in opcode_names:
|
||||
|
|
@ -58,11 +62,6 @@ extern const aco::Info instr_info = {
|
|||
aco::Format::${str(instructions[name].format.name)},
|
||||
% endfor
|
||||
},
|
||||
{
|
||||
% for name in opcode_names:
|
||||
${instructions[name].operand_size},
|
||||
% endfor
|
||||
},
|
||||
{
|
||||
% for name in opcode_names:
|
||||
instr_class::${instructions[name].cls.value},
|
||||
|
|
@ -70,12 +69,40 @@ extern const aco::Info instr_info = {
|
|||
},
|
||||
{
|
||||
% for name in opcode_names:
|
||||
${hex(instructions[name].definitions)},
|
||||
% endfor
|
||||
},
|
||||
{
|
||||
% for name in opcode_names:
|
||||
${hex(instructions[name].operands)},
|
||||
{ // ${name}
|
||||
${len(instructions[name].operands)},
|
||||
${len(instructions[name].definitions)},
|
||||
${operand_mods[name]},
|
||||
${int(instructions[name].definitions[0].mods) if len(instructions[name].definitions) > 0 else 0},
|
||||
{
|
||||
% for operand in instructions[name].operands:
|
||||
{
|
||||
${operand.base_type.name},
|
||||
${operand.num_components},
|
||||
${operand.bit_size},
|
||||
},
|
||||
% endfor
|
||||
},
|
||||
{
|
||||
% for definition in instructions[name].definitions:
|
||||
{
|
||||
${definition.base_type.name},
|
||||
${definition.num_components},
|
||||
${definition.bit_size},
|
||||
},
|
||||
% endfor
|
||||
},
|
||||
{
|
||||
% for operand in instructions[name].operands:
|
||||
${operand.fixed_reg.name},
|
||||
% endfor
|
||||
},
|
||||
{
|
||||
% for definition in instructions[name].definitions:
|
||||
${definition.fixed_reg.name},
|
||||
% endfor
|
||||
},
|
||||
},
|
||||
% endfor
|
||||
},
|
||||
};
|
||||
|
|
|
|||
|
|
@ -15,6 +15,18 @@ template = """\
|
|||
|
||||
namespace aco {
|
||||
|
||||
enum aco_base_type : uint8_t {
|
||||
% for e in AcoBaseType:
|
||||
${e.name} = ${hex(e.value)},
|
||||
% endfor
|
||||
};
|
||||
|
||||
enum fixed_reg : uint8_t {
|
||||
% for e in FixedReg:
|
||||
${e.name} = ${hex(e.value)},
|
||||
% endfor
|
||||
};
|
||||
|
||||
enum class Format : uint16_t {
|
||||
% for e in Format:
|
||||
${e.name} = ${hex(e.value)},
|
||||
|
|
@ -41,7 +53,7 @@ enum class aco_opcode : uint16_t {
|
|||
}
|
||||
#endif /* _ACO_OPCODES_ */"""
|
||||
|
||||
from aco_opcodes import instructions, InstrClass, Format
|
||||
from aco_opcodes import instructions, InstrClass, Format, AcoBaseType, FixedReg
|
||||
from mako.template import Template
|
||||
|
||||
print(Template(template).render(instructions=instructions, InstrClass=InstrClass, Format=Format))
|
||||
print(Template(template).render(instructions=instructions, InstrClass=InstrClass, Format=Format, AcoBaseType=AcoBaseType, FixedReg=FixedReg))
|
||||
|
|
|
|||
|
|
@ -1554,7 +1554,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
}
|
||||
|
||||
if (instr->isVALU() || (instr->isVINTRP() && instr->opcode != aco_opcode::v_interp_mov_f32)) {
|
||||
if (instr_info.can_use_output_modifiers[(int)instr->opcode] || instr->isVINTRP() ||
|
||||
if (instr_info.alu_opcode_infos[(int)instr->opcode].output_modifiers || instr->isVINTRP() ||
|
||||
instr->opcode == aco_opcode::v_cndmask_b32) {
|
||||
bool canonicalized = true;
|
||||
if (!does_fp_op_flush_denorms(ctx, instr->opcode)) {
|
||||
|
|
@ -2924,7 +2924,7 @@ bool
|
|||
apply_omod_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
{
|
||||
if (instr->definitions.empty() || ctx.uses[instr->definitions[0].tempId()] != 1 ||
|
||||
!instr_info.can_use_output_modifiers[(int)instr->opcode])
|
||||
!instr_info.alu_opcode_infos[(int)instr->opcode].output_modifiers)
|
||||
return false;
|
||||
|
||||
bool can_vop3 = can_use_VOP3(ctx, instr);
|
||||
|
|
@ -3300,7 +3300,8 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
!vop3p->opsel_lo[1] && !vop3p->opsel_hi[1]) {
|
||||
|
||||
Instruction* op_instr = ctx.info[instr->operands[0].tempId()].parent_instr;
|
||||
if (op_instr->isVOP3P() && instr_info.can_use_output_modifiers[(int)op_instr->opcode]) {
|
||||
if (op_instr->isVOP3P() &&
|
||||
instr_info.alu_opcode_infos[(int)op_instr->opcode].output_modifiers) {
|
||||
op_instr->valu().clamp = true;
|
||||
propagate_swizzles(&op_instr->valu(), vop3p->opsel_lo[0], vop3p->opsel_hi[0]);
|
||||
instr->definitions[0].swapTemp(op_instr->definitions[0]);
|
||||
|
|
|
|||
|
|
@ -144,43 +144,45 @@ validate_ir(Program* program)
|
|||
"Definition RC not consistent.", instr.get());
|
||||
}
|
||||
|
||||
unsigned pck_defs = instr_info.definitions[(int)instr->opcode];
|
||||
unsigned pck_ops = instr_info.operands[(int)instr->opcode];
|
||||
const aco_alu_opcode_info& opcode_info = instr_info.alu_opcode_infos[(int)instr->opcode];
|
||||
|
||||
if (pck_defs != 0) {
|
||||
if (opcode_info.num_defs) {
|
||||
unsigned num_defs = opcode_info.num_defs;
|
||||
/* Before GFX10 v_cmpx also writes VCC. */
|
||||
if (instr->isVOPC() && program->gfx_level < GFX10 && pck_defs == exec_hi)
|
||||
pck_defs = vcc | (exec_hi << 8);
|
||||
if (instr->isVOPC() && program->gfx_level < GFX10 &&
|
||||
opcode_info.def_fixed_reg[0] == fixed_exec)
|
||||
num_defs = 2;
|
||||
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
uint32_t def = (pck_defs >> (i * 8)) & 0xff;
|
||||
if (def == 0) {
|
||||
check(i == instr->definitions.size(), "Too many definitions", instr.get());
|
||||
break;
|
||||
check(num_defs >= instr->definitions.size(), "Too many definitions", instr.get());
|
||||
check(num_defs <= instr->definitions.size(), "Too few definitions", instr.get());
|
||||
num_defs = MIN2(num_defs, instr->definitions.size());
|
||||
|
||||
for (unsigned i = 0; i < num_defs; i++) {
|
||||
aco_type type;
|
||||
fixed_reg fixed_reg;
|
||||
if (instr->isVOPC() && program->gfx_level < GFX10 &&
|
||||
opcode_info.def_fixed_reg[0] == fixed_exec) {
|
||||
type = opcode_info.def_types[0];
|
||||
fixed_reg = i == 0 ? not_fixed : fixed_exec;
|
||||
} else {
|
||||
check(i < instr->definitions.size(), "Too few definitions", instr.get());
|
||||
if (i >= instr->definitions.size())
|
||||
break;
|
||||
type = opcode_info.def_types[i];
|
||||
fixed_reg = opcode_info.def_fixed_reg[i];
|
||||
}
|
||||
|
||||
if (def == m0) {
|
||||
if (fixed_reg == fixed_m0) {
|
||||
check(instr->definitions[i].isFixed() && instr->definitions[i].physReg() == m0,
|
||||
"Definition needs m0", instr.get());
|
||||
} else if (def == scc) {
|
||||
} else if (fixed_reg == fixed_scc) {
|
||||
check(instr->definitions[i].isFixed() && instr->definitions[i].physReg() == scc,
|
||||
"Definition needs scc", instr.get());
|
||||
} else if (def == exec_hi) {
|
||||
RegClass rc = instr->isSALU() ? s2 : program->lane_mask;
|
||||
} else if (fixed_reg == fixed_exec) {
|
||||
RegClass rc = type.bit_size == 1 ? program->lane_mask
|
||||
: RegClass::get(RegType::sgpr, type.bytes());
|
||||
check(instr->definitions[i].isFixed() &&
|
||||
instr->definitions[i].physReg() == exec &&
|
||||
instr->definitions[i].regClass() == rc,
|
||||
"Definition needs exec", instr.get());
|
||||
} else if (def == exec_lo) {
|
||||
check(instr->definitions[i].isFixed() &&
|
||||
instr->definitions[i].physReg() == exec_lo &&
|
||||
instr->definitions[i].regClass() == s1,
|
||||
"Definition needs exec_lo", instr.get());
|
||||
} else if (def == vcc) {
|
||||
} else if (type.bit_size == 1) {
|
||||
check(instr->definitions[i].regClass() == program->lane_mask,
|
||||
"Definition has to be lane mask", instr.get());
|
||||
check(!instr->definitions[i].isFixed() ||
|
||||
|
|
@ -188,48 +190,45 @@ validate_ir(Program* program)
|
|||
instr->isSDWA(),
|
||||
"Definition has to be vcc", instr.get());
|
||||
} else {
|
||||
check(instr->definitions[i].size() == def, "Definition has wrong size",
|
||||
check(instr->definitions[i].size() == type.dwords(), "Definition has wrong size",
|
||||
instr.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pck_ops != 0) {
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
uint32_t op = (pck_ops >> (i * 8)) & 0xff;
|
||||
if (op == 0) {
|
||||
check(i == instr->operands.size(), "Too many operands", instr.get());
|
||||
break;
|
||||
} else {
|
||||
check(i < instr->operands.size(), "Too few operands", instr.get());
|
||||
if (i >= instr->operands.size())
|
||||
break;
|
||||
}
|
||||
if (opcode_info.num_operands) {
|
||||
unsigned num_ops = opcode_info.num_operands;
|
||||
check(num_ops >= instr->operands.size(), "Too many operands", instr.get());
|
||||
check(num_ops <= instr->operands.size(), "Too few operands", instr.get());
|
||||
num_ops = MIN2(num_ops, instr->operands.size());
|
||||
|
||||
if (op == m0) {
|
||||
for (unsigned i = 0; i < num_ops; i++) {
|
||||
aco_type type = opcode_info.op_types[i];
|
||||
fixed_reg fixed_reg = opcode_info.op_fixed_reg[i];
|
||||
|
||||
if (fixed_reg == fixed_m0) {
|
||||
check(instr->operands[i].isFixed() && instr->operands[i].physReg() == m0,
|
||||
"Operand needs m0", instr.get());
|
||||
} else if (op == scc) {
|
||||
} else if (fixed_reg == fixed_scc) {
|
||||
check(instr->operands[i].isFixed() && instr->operands[i].physReg() == scc,
|
||||
"Operand needs scc", instr.get());
|
||||
} else if (op == exec_hi) {
|
||||
RegClass rc = instr->isSALU() ? s2 : program->lane_mask;
|
||||
} else if (fixed_reg == fixed_exec) {
|
||||
RegClass rc = type.bit_size == 1 ? program->lane_mask
|
||||
: RegClass::get(RegType::sgpr, type.bytes());
|
||||
check(instr->operands[i].isFixed() && instr->operands[i].physReg() == exec &&
|
||||
instr->operands[i].hasRegClass() && instr->operands[i].regClass() == rc,
|
||||
"Operand needs exec", instr.get());
|
||||
} else if (op == exec_lo) {
|
||||
check(instr->operands[i].isFixed() && instr->operands[i].physReg() == exec_lo &&
|
||||
instr->operands[i].hasRegClass() && instr->operands[i].regClass() == s1,
|
||||
"Operand needs exec_lo", instr.get());
|
||||
} else if (op == vcc) {
|
||||
} else if (type.bit_size == 1) {
|
||||
check(instr->operands[i].hasRegClass() &&
|
||||
instr->operands[i].regClass() == program->lane_mask,
|
||||
"Operand has to be lane mask", instr.get());
|
||||
check(!instr->operands[i].isFixed() || instr->operands[i].physReg() == vcc ||
|
||||
instr->isVOP3(),
|
||||
"Operand has to be vcc", instr.get());
|
||||
} else if (fixed_reg == fixed_imm) {
|
||||
check(instr->operands[i].isLiteral(), "Operand has to be literal", instr.get());
|
||||
} else {
|
||||
check(instr->operands[i].size() == op ||
|
||||
check(instr->operands[i].size() == type.dwords() ||
|
||||
(instr->operands[i].isFixed() && instr->operands[i].physReg() >= 128 &&
|
||||
instr->operands[i].physReg() < 256),
|
||||
"Operand has wrong size", instr.get());
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue