mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 22:30:12 +01:00
aco: compact various Instruction classes
No pipelinedb changes. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3332>
This commit is contained in:
parent
ebaab89761
commit
ffb4790279
7 changed files with 99 additions and 100 deletions
|
|
@ -516,10 +516,9 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
||||||
encoding |= opcode << 16;
|
encoding |= opcode << 16;
|
||||||
encoding |= (vop3->clamp ? 1 : 0) << 15;
|
encoding |= (vop3->clamp ? 1 : 0) << 15;
|
||||||
}
|
}
|
||||||
|
encoding |= vop3->opsel << 11;
|
||||||
for (unsigned i = 0; i < 3; i++)
|
for (unsigned i = 0; i < 3; i++)
|
||||||
encoding |= vop3->abs[i] << (8+i);
|
encoding |= vop3->abs[i] << (8+i);
|
||||||
for (unsigned i = 0; i < 4; i++)
|
|
||||||
encoding |= vop3->opsel[i] << (11+i);
|
|
||||||
if (instr->definitions.size() == 2)
|
if (instr->definitions.size() == 2)
|
||||||
encoding |= instr->definitions[1].physReg() << 8;
|
encoding |= instr->definitions[1].physReg() << 8;
|
||||||
encoding |= (0xFF & instr->definitions[0].physReg());
|
encoding |= (0xFF & instr->definitions[0].physReg());
|
||||||
|
|
|
||||||
|
|
@ -4424,7 +4424,7 @@ void visit_image_size(isel_context *ctx, nir_intrinsic_instr *instr)
|
||||||
aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>(aco_opcode::image_get_resinfo, Format::MIMG, 2, 1)};
|
aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>(aco_opcode::image_get_resinfo, Format::MIMG, 2, 1)};
|
||||||
mimg->operands[0] = Operand(lod);
|
mimg->operands[0] = Operand(lod);
|
||||||
mimg->operands[1] = Operand(resource);
|
mimg->operands[1] = Operand(resource);
|
||||||
unsigned& dmask = mimg->dmask;
|
uint8_t& dmask = mimg->dmask;
|
||||||
mimg->dim = ac_get_image_dim(ctx->options->chip_class, dim, is_array);
|
mimg->dim = ac_get_image_dim(ctx->options->chip_class, dim, is_array);
|
||||||
mimg->dmask = (1 << instr->dest.ssa.num_components) - 1;
|
mimg->dmask = (1 << instr->dest.ssa.num_components) - 1;
|
||||||
mimg->da = glsl_sampler_type_is_array(type);
|
mimg->da = glsl_sampler_type_is_array(type);
|
||||||
|
|
|
||||||
|
|
@ -102,7 +102,7 @@ enum class Format : std::uint16_t {
|
||||||
SDWA = 1 << 15,
|
SDWA = 1 << 15,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum barrier_interaction {
|
enum barrier_interaction : uint8_t {
|
||||||
barrier_none = 0,
|
barrier_none = 0,
|
||||||
barrier_buffer = 0x1,
|
barrier_buffer = 0x1,
|
||||||
barrier_image = 0x2,
|
barrier_image = 0x2,
|
||||||
|
|
@ -701,11 +701,11 @@ struct SOP2_instruction : public Instruction {
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
struct SMEM_instruction : public Instruction {
|
struct SMEM_instruction : public Instruction {
|
||||||
bool glc; /* VI+: globally coherent */
|
bool glc : 1; /* VI+: globally coherent */
|
||||||
bool dlc; /* NAVI: device level coherent */
|
bool dlc : 1; /* NAVI: device level coherent */
|
||||||
bool nv; /* VEGA only: Non-volatile */
|
bool nv : 1; /* VEGA only: Non-volatile */
|
||||||
bool can_reorder;
|
bool can_reorder : 1;
|
||||||
bool disable_wqm;
|
bool disable_wqm : 1;
|
||||||
barrier_interaction barrier;
|
barrier_interaction barrier;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -720,10 +720,10 @@ struct VOPC_instruction : public Instruction {
|
||||||
|
|
||||||
struct VOP3A_instruction : public Instruction {
|
struct VOP3A_instruction : public Instruction {
|
||||||
bool abs[3];
|
bool abs[3];
|
||||||
bool opsel[4];
|
|
||||||
bool clamp;
|
|
||||||
unsigned omod;
|
|
||||||
bool neg[3];
|
bool neg[3];
|
||||||
|
uint8_t opsel : 4;
|
||||||
|
uint8_t omod : 2;
|
||||||
|
bool clamp : 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -733,17 +733,17 @@ struct VOP3A_instruction : public Instruction {
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
struct DPP_instruction : public Instruction {
|
struct DPP_instruction : public Instruction {
|
||||||
uint16_t dpp_ctrl;
|
|
||||||
uint8_t row_mask;
|
|
||||||
uint8_t bank_mask;
|
|
||||||
bool abs[2];
|
bool abs[2];
|
||||||
bool neg[2];
|
bool neg[2];
|
||||||
bool bound_ctrl;
|
uint16_t dpp_ctrl;
|
||||||
|
uint8_t row_mask : 4;
|
||||||
|
uint8_t bank_mask : 4;
|
||||||
|
bool bound_ctrl : 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Interp_instruction : public Instruction {
|
struct Interp_instruction : public Instruction {
|
||||||
unsigned attribute;
|
uint8_t attribute;
|
||||||
unsigned component;
|
uint8_t component;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -770,16 +770,16 @@ struct DS_instruction : public Instruction {
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
struct MUBUF_instruction : public Instruction {
|
struct MUBUF_instruction : public Instruction {
|
||||||
unsigned offset; /* Unsigned byte offset - 12 bit */
|
uint16_t offset : 12; /* Unsigned byte offset - 12 bit */
|
||||||
bool offen; /* Supply an offset from VGPR (VADDR) */
|
bool offen : 1; /* Supply an offset from VGPR (VADDR) */
|
||||||
bool idxen; /* Supply an index from VGPR (VADDR) */
|
bool idxen : 1; /* Supply an index from VGPR (VADDR) */
|
||||||
bool glc; /* globally coherent */
|
bool glc : 1; /* globally coherent */
|
||||||
bool dlc; /* NAVI: device level coherent */
|
bool dlc : 1; /* NAVI: device level coherent */
|
||||||
bool slc; /* system level coherent */
|
bool slc : 1; /* system level coherent */
|
||||||
bool tfe; /* texture fail enable */
|
bool tfe : 1; /* texture fail enable */
|
||||||
bool lds; /* Return read-data to LDS instead of VGPRs */
|
bool lds : 1; /* Return read-data to LDS instead of VGPRs */
|
||||||
bool disable_wqm; /* Require an exec mask without helper invocations */
|
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
|
||||||
bool can_reorder;
|
bool can_reorder : 1;
|
||||||
barrier_interaction barrier;
|
barrier_interaction barrier;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -792,17 +792,17 @@ struct MUBUF_instruction : public Instruction {
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
struct MTBUF_instruction : public Instruction {
|
struct MTBUF_instruction : public Instruction {
|
||||||
|
uint16_t offset; /* Unsigned byte offset - 12 bit */
|
||||||
uint8_t dfmt : 4; /* Data Format of data in memory buffer */
|
uint8_t dfmt : 4; /* Data Format of data in memory buffer */
|
||||||
uint8_t nfmt : 3; /* Numeric format of data in memory */
|
uint8_t nfmt : 3; /* Numeric format of data in memory */
|
||||||
unsigned offset; /* Unsigned byte offset - 12 bit */
|
bool offen : 1; /* Supply an offset from VGPR (VADDR) */
|
||||||
bool offen; /* Supply an offset from VGPR (VADDR) */
|
bool idxen : 1; /* Supply an index from VGPR (VADDR) */
|
||||||
bool idxen; /* Supply an index from VGPR (VADDR) */
|
bool glc : 1; /* globally coherent */
|
||||||
bool glc; /* globally coherent */
|
bool dlc : 1; /* NAVI: device level coherent */
|
||||||
bool dlc; /* NAVI: device level coherent */
|
bool slc : 1; /* system level coherent */
|
||||||
bool slc; /* system level coherent */
|
bool tfe : 1; /* texture fail enable */
|
||||||
bool tfe; /* texture fail enable */
|
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
|
||||||
bool disable_wqm; /* Require an exec mask without helper invocations */
|
bool can_reorder : 1;
|
||||||
bool can_reorder;
|
|
||||||
barrier_interaction barrier;
|
barrier_interaction barrier;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -815,20 +815,20 @@ struct MTBUF_instruction : public Instruction {
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
struct MIMG_instruction : public Instruction {
|
struct MIMG_instruction : public Instruction {
|
||||||
unsigned dmask; /* Data VGPR enable mask */
|
uint8_t dmask; /* Data VGPR enable mask */
|
||||||
unsigned dim; /* NAVI: dimensionality */
|
uint8_t dim : 3; /* NAVI: dimensionality */
|
||||||
bool unrm; /* Force address to be un-normalized */
|
bool unrm : 1; /* Force address to be un-normalized */
|
||||||
bool dlc; /* NAVI: device level coherent */
|
bool dlc : 1; /* NAVI: device level coherent */
|
||||||
bool glc; /* globally coherent */
|
bool glc : 1; /* globally coherent */
|
||||||
bool slc; /* system level coherent */
|
bool slc : 1; /* system level coherent */
|
||||||
bool tfe; /* texture fail enable */
|
bool tfe : 1; /* texture fail enable */
|
||||||
bool da; /* declare an array */
|
bool da : 1; /* declare an array */
|
||||||
bool lwe; /* Force data to be un-normalized */
|
bool lwe : 1; /* Force data to be un-normalized */
|
||||||
bool r128; /* NAVI: Texture resource size */
|
bool r128 : 1; /* NAVI: Texture resource size */
|
||||||
bool a16; /* VEGA, NAVI: Address components are 16-bits */
|
bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */
|
||||||
bool d16; /* Convert 32-bit data to 16-bit data */
|
bool d16 : 1; /* Convert 32-bit data to 16-bit data */
|
||||||
bool disable_wqm; /* Require an exec mask without helper invocations */
|
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
|
||||||
bool can_reorder;
|
bool can_reorder : 1;
|
||||||
barrier_interaction barrier;
|
barrier_interaction barrier;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -841,22 +841,22 @@ struct MIMG_instruction : public Instruction {
|
||||||
*/
|
*/
|
||||||
struct FLAT_instruction : public Instruction {
|
struct FLAT_instruction : public Instruction {
|
||||||
uint16_t offset; /* Vega/Navi only */
|
uint16_t offset; /* Vega/Navi only */
|
||||||
bool slc; /* system level coherent */
|
bool slc : 1; /* system level coherent */
|
||||||
bool glc; /* globally coherent */
|
bool glc : 1; /* globally coherent */
|
||||||
bool dlc; /* NAVI: device level coherent */
|
bool dlc : 1; /* NAVI: device level coherent */
|
||||||
bool lds;
|
bool lds : 1;
|
||||||
bool nv;
|
bool nv : 1;
|
||||||
bool disable_wqm; /* Require an exec mask without helper invocations */
|
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
|
||||||
bool can_reorder;
|
bool can_reorder : 1;
|
||||||
barrier_interaction barrier;
|
barrier_interaction barrier;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Export_instruction : public Instruction {
|
struct Export_instruction : public Instruction {
|
||||||
unsigned enabled_mask;
|
uint8_t enabled_mask;
|
||||||
unsigned dest;
|
uint8_t dest;
|
||||||
bool compressed;
|
bool compressed : 1;
|
||||||
bool done;
|
bool done : 1;
|
||||||
bool valid_mask;
|
bool valid_mask : 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Pseudo_instruction : public Instruction {
|
struct Pseudo_instruction : public Instruction {
|
||||||
|
|
@ -943,10 +943,10 @@ constexpr bool Instruction::usesModifiers() const noexcept
|
||||||
return false;
|
return false;
|
||||||
const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
|
const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
|
||||||
for (unsigned i = 0; i < operands.size(); i++) {
|
for (unsigned i = 0; i < operands.size(); i++) {
|
||||||
if (vop3->abs[i] || vop3->opsel[i] || vop3->neg[i])
|
if (vop3->abs[i] || vop3->neg[i])
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return vop3->opsel[3] || vop3->clamp || vop3->omod;
|
return vop3->opsel || vop3->clamp || vop3->omod;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr bool is_phi(Instruction* instr)
|
constexpr bool is_phi(Instruction* instr)
|
||||||
|
|
|
||||||
|
|
@ -533,7 +533,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
|
||||||
Definition(PhysReg{vtmp+i}, v1),
|
Definition(PhysReg{vtmp+i}, v1),
|
||||||
Operand(PhysReg{tmp+i}, v1),
|
Operand(PhysReg{tmp+i}, v1),
|
||||||
Operand(0xffffffffu), Operand(0xffffffffu)).instr;
|
Operand(0xffffffffu), Operand(0xffffffffu)).instr;
|
||||||
static_cast<VOP3A_instruction*>(perm)->opsel[0] = true; /* FI (Fetch Inactive) */
|
static_cast<VOP3A_instruction*>(perm)->opsel = 1; /* FI (Fetch Inactive) */
|
||||||
}
|
}
|
||||||
bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(UINT64_MAX));
|
bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(UINT64_MAX));
|
||||||
|
|
||||||
|
|
@ -644,7 +644,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
|
||||||
Definition(PhysReg{vtmp+i}, v1),
|
Definition(PhysReg{vtmp+i}, v1),
|
||||||
Operand(PhysReg{tmp+i}, v1),
|
Operand(PhysReg{tmp+i}, v1),
|
||||||
Operand(0xffffffffu), Operand(0xffffffffu)).instr;
|
Operand(0xffffffffu), Operand(0xffffffffu)).instr;
|
||||||
static_cast<VOP3A_instruction*>(perm)->opsel[0] = true; /* FI (Fetch Inactive) */
|
static_cast<VOP3A_instruction*>(perm)->opsel = 1; /* FI (Fetch Inactive) */
|
||||||
}
|
}
|
||||||
emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size());
|
emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size());
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -47,9 +47,9 @@ struct InstrHash {
|
||||||
VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr);
|
VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr);
|
||||||
for (unsigned i = 0; i < 3; i++) {
|
for (unsigned i = 0; i < 3; i++) {
|
||||||
hash ^= vop3->abs[i] << (i*3 + 0);
|
hash ^= vop3->abs[i] << (i*3 + 0);
|
||||||
hash ^= vop3->opsel[i] << (i*3 + 1);
|
|
||||||
hash ^= vop3->neg[i] << (i*3 + 2);
|
hash ^= vop3->neg[i] << (i*3 + 2);
|
||||||
}
|
}
|
||||||
|
hash ^= vop3->opsel * 13;
|
||||||
hash ^= (vop3->clamp << 28) * 13;
|
hash ^= (vop3->clamp << 28) * 13;
|
||||||
hash += vop3->omod << 19;
|
hash += vop3->omod << 19;
|
||||||
}
|
}
|
||||||
|
|
@ -134,12 +134,12 @@ struct InstrPred {
|
||||||
VOP3A_instruction* b3 = static_cast<VOP3A_instruction*>(b);
|
VOP3A_instruction* b3 = static_cast<VOP3A_instruction*>(b);
|
||||||
for (unsigned i = 0; i < 3; i++) {
|
for (unsigned i = 0; i < 3; i++) {
|
||||||
if (a3->abs[i] != b3->abs[i] ||
|
if (a3->abs[i] != b3->abs[i] ||
|
||||||
a3->opsel[i] != b3->opsel[i] ||
|
|
||||||
a3->neg[i] != b3->neg[i])
|
a3->neg[i] != b3->neg[i])
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return a3->clamp == b3->clamp &&
|
return a3->clamp == b3->clamp &&
|
||||||
a3->omod == b3->omod;
|
a3->omod == b3->omod &&
|
||||||
|
a3->opsel == b3->opsel;
|
||||||
}
|
}
|
||||||
if (a->isDPP()) {
|
if (a->isDPP()) {
|
||||||
DPP_instruction* aDPP = static_cast<DPP_instruction*>(a);
|
DPP_instruction* aDPP = static_cast<DPP_instruction*>(a);
|
||||||
|
|
|
||||||
|
|
@ -948,10 +948,9 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
||||||
}
|
}
|
||||||
case aco_opcode::v_med3_f32: { /* clamp */
|
case aco_opcode::v_med3_f32: { /* clamp */
|
||||||
VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr.get());
|
VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr.get());
|
||||||
if (vop3->abs[0] || vop3->neg[0] || vop3->opsel[0] ||
|
if (vop3->abs[0] || vop3->abs[1] || vop3->abs[2] ||
|
||||||
vop3->abs[1] || vop3->neg[1] || vop3->opsel[1] ||
|
vop3->neg[0] || vop3->neg[1] || vop3->neg[2] ||
|
||||||
vop3->abs[2] || vop3->neg[2] || vop3->opsel[2] ||
|
vop3->omod != 0 || vop3->opsel != 0)
|
||||||
vop3->omod != 0)
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
unsigned idx = 0;
|
unsigned idx = 0;
|
||||||
|
|
@ -1173,7 +1172,7 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr)
|
||||||
|
|
||||||
bool neg[2] = {false, false};
|
bool neg[2] = {false, false};
|
||||||
bool abs[2] = {false, false};
|
bool abs[2] = {false, false};
|
||||||
bool opsel[2] = {false, false};
|
uint8_t opsel = 0;
|
||||||
Instruction *op_instr[2];
|
Instruction *op_instr[2];
|
||||||
Temp op[2];
|
Temp op[2];
|
||||||
|
|
||||||
|
|
@ -1191,11 +1190,11 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr)
|
||||||
|
|
||||||
if (op_instr[i]->isVOP3()) {
|
if (op_instr[i]->isVOP3()) {
|
||||||
VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(op_instr[i]);
|
VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(op_instr[i]);
|
||||||
if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel[0] != vop3->opsel[1])
|
if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2)
|
||||||
return false;
|
return false;
|
||||||
neg[i] = vop3->neg[0];
|
neg[i] = vop3->neg[0];
|
||||||
abs[i] = vop3->abs[0];
|
abs[i] = vop3->abs[0];
|
||||||
opsel[i] = vop3->opsel[0];
|
opsel |= (vop3->opsel & 1) << i;
|
||||||
}
|
}
|
||||||
|
|
||||||
Temp op0 = op_instr[i]->operands[0].getTemp();
|
Temp op0 = op_instr[i]->operands[0].getTemp();
|
||||||
|
|
@ -1216,13 +1215,13 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr)
|
||||||
|
|
||||||
aco_opcode new_op = is_or ? aco_opcode::v_cmp_u_f32 : aco_opcode::v_cmp_o_f32;
|
aco_opcode new_op = is_or ? aco_opcode::v_cmp_u_f32 : aco_opcode::v_cmp_o_f32;
|
||||||
Instruction *new_instr;
|
Instruction *new_instr;
|
||||||
if (neg[0] || neg[1] || abs[0] || abs[1] || opsel[0] || opsel[1]) {
|
if (neg[0] || neg[1] || abs[0] || abs[1] || opsel) {
|
||||||
VOP3A_instruction *vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
|
VOP3A_instruction *vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
|
||||||
for (unsigned i = 0; i < 2; i++) {
|
for (unsigned i = 0; i < 2; i++) {
|
||||||
vop3->neg[i] = neg[i];
|
vop3->neg[i] = neg[i];
|
||||||
vop3->abs[i] = abs[i];
|
vop3->abs[i] = abs[i];
|
||||||
vop3->opsel[i] = opsel[i];
|
|
||||||
}
|
}
|
||||||
|
vop3->opsel = opsel;
|
||||||
new_instr = static_cast<Instruction *>(vop3);
|
new_instr = static_cast<Instruction *>(vop3);
|
||||||
} else {
|
} else {
|
||||||
new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
|
new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
|
||||||
|
|
@ -1289,10 +1288,10 @@ bool combine_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& instr)
|
||||||
VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
|
VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
|
||||||
VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
|
VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
|
||||||
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
|
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
|
||||||
memcpy(new_vop3->opsel, cmp_vop3->opsel, sizeof(new_vop3->opsel));
|
|
||||||
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
|
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
|
||||||
new_vop3->clamp = cmp_vop3->clamp;
|
new_vop3->clamp = cmp_vop3->clamp;
|
||||||
new_vop3->omod = cmp_vop3->omod;
|
new_vop3->omod = cmp_vop3->omod;
|
||||||
|
new_vop3->opsel = cmp_vop3->opsel;
|
||||||
new_instr = new_vop3;
|
new_instr = new_vop3;
|
||||||
} else {
|
} else {
|
||||||
new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
|
new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
|
||||||
|
|
@ -1385,10 +1384,10 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in
|
||||||
VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
|
VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
|
||||||
VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
|
VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
|
||||||
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
|
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
|
||||||
memcpy(new_vop3->opsel, cmp_vop3->opsel, sizeof(new_vop3->opsel));
|
|
||||||
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
|
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
|
||||||
new_vop3->clamp = cmp_vop3->clamp;
|
new_vop3->clamp = cmp_vop3->clamp;
|
||||||
new_vop3->omod = cmp_vop3->omod;
|
new_vop3->omod = cmp_vop3->omod;
|
||||||
|
new_vop3->opsel = cmp_vop3->opsel;
|
||||||
new_instr = new_vop3;
|
new_instr = new_vop3;
|
||||||
} else {
|
} else {
|
||||||
new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
|
new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
|
||||||
|
|
@ -1434,10 +1433,10 @@ bool combine_inverse_comparison(opt_ctx &ctx, aco_ptr<Instruction>& instr)
|
||||||
VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_opcode, asVOP3(Format::VOPC), 2, 1);
|
VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_opcode, asVOP3(Format::VOPC), 2, 1);
|
||||||
VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
|
VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
|
||||||
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
|
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
|
||||||
memcpy(new_vop3->opsel, cmp_vop3->opsel, sizeof(new_vop3->opsel));
|
|
||||||
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
|
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
|
||||||
new_vop3->clamp = cmp_vop3->clamp;
|
new_vop3->clamp = cmp_vop3->clamp;
|
||||||
new_vop3->omod = cmp_vop3->omod;
|
new_vop3->omod = cmp_vop3->omod;
|
||||||
|
new_vop3->opsel = cmp_vop3->opsel;
|
||||||
new_instr = new_vop3;
|
new_instr = new_vop3;
|
||||||
} else {
|
} else {
|
||||||
new_instr = create_instruction<VOPC_instruction>(new_opcode, Format::VOPC, 2, 1);
|
new_instr = create_instruction<VOPC_instruction>(new_opcode, Format::VOPC, 2, 1);
|
||||||
|
|
@ -1458,8 +1457,8 @@ bool combine_inverse_comparison(opt_ctx &ctx, aco_ptr<Instruction>& instr)
|
||||||
* op1(0, op2(1, 2)) if swap = true */
|
* op1(0, op2(1, 2)) if swap = true */
|
||||||
bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2,
|
bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2,
|
||||||
Instruction* op1_instr, bool swap, const char *shuffle_str,
|
Instruction* op1_instr, bool swap, const char *shuffle_str,
|
||||||
Operand operands[3], bool neg[3], bool abs[3], bool opsel[3],
|
Operand operands[3], bool neg[3], bool abs[3], uint8_t *opsel,
|
||||||
bool *op1_clamp, unsigned *op1_omod,
|
bool *op1_clamp, uint8_t *op1_omod,
|
||||||
bool *inbetween_neg, bool *inbetween_abs, bool *inbetween_opsel)
|
bool *inbetween_neg, bool *inbetween_abs, bool *inbetween_opsel)
|
||||||
{
|
{
|
||||||
/* checks */
|
/* checks */
|
||||||
|
|
@ -1492,8 +1491,8 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2,
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (inbetween_opsel)
|
if (inbetween_opsel)
|
||||||
*inbetween_opsel = op1_vop3 ? op1_vop3->opsel[swap] : false;
|
*inbetween_opsel = op1_vop3 ? op1_vop3->opsel & (1 << swap) : false;
|
||||||
else if (op1_vop3 && op1_vop3->opsel[swap])
|
else if (op1_vop3 && op1_vop3->opsel & (1 << swap))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
int shuffle[3];
|
int shuffle[3];
|
||||||
|
|
@ -1504,13 +1503,15 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2,
|
||||||
operands[shuffle[0]] = op1_instr->operands[!swap];
|
operands[shuffle[0]] = op1_instr->operands[!swap];
|
||||||
neg[shuffle[0]] = op1_vop3 ? op1_vop3->neg[!swap] : false;
|
neg[shuffle[0]] = op1_vop3 ? op1_vop3->neg[!swap] : false;
|
||||||
abs[shuffle[0]] = op1_vop3 ? op1_vop3->abs[!swap] : false;
|
abs[shuffle[0]] = op1_vop3 ? op1_vop3->abs[!swap] : false;
|
||||||
opsel[shuffle[0]] = op1_vop3 ? op1_vop3->opsel[!swap] : false;
|
if (op1_vop3 && op1_vop3->opsel & (1 << !swap))
|
||||||
|
*opsel |= 1 << shuffle[0];
|
||||||
|
|
||||||
for (unsigned i = 0; i < 2; i++) {
|
for (unsigned i = 0; i < 2; i++) {
|
||||||
operands[shuffle[i + 1]] = op2_instr->operands[i];
|
operands[shuffle[i + 1]] = op2_instr->operands[i];
|
||||||
neg[shuffle[i + 1]] = op2_vop3 ? op2_vop3->neg[i] : false;
|
neg[shuffle[i + 1]] = op2_vop3 ? op2_vop3->neg[i] : false;
|
||||||
abs[shuffle[i + 1]] = op2_vop3 ? op2_vop3->abs[i] : false;
|
abs[shuffle[i + 1]] = op2_vop3 ? op2_vop3->abs[i] : false;
|
||||||
opsel[shuffle[i + 1]] = op2_vop3 ? op2_vop3->opsel[i] : false;
|
if (op2_vop3 && op2_vop3->opsel & (1 << i))
|
||||||
|
*opsel |= 1 << shuffle[i + 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check operands */
|
/* check operands */
|
||||||
|
|
@ -1530,15 +1531,15 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2,
|
||||||
}
|
}
|
||||||
|
|
||||||
void create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr<Instruction>& instr,
|
void create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr<Instruction>& instr,
|
||||||
Operand operands[3], bool neg[3], bool abs[3], bool opsel[3],
|
Operand operands[3], bool neg[3], bool abs[3], uint8_t opsel,
|
||||||
bool clamp, unsigned omod)
|
bool clamp, unsigned omod)
|
||||||
{
|
{
|
||||||
VOP3A_instruction *new_instr = create_instruction<VOP3A_instruction>(opcode, Format::VOP3A, 3, 1);
|
VOP3A_instruction *new_instr = create_instruction<VOP3A_instruction>(opcode, Format::VOP3A, 3, 1);
|
||||||
memcpy(new_instr->abs, abs, sizeof(bool[3]));
|
memcpy(new_instr->abs, abs, sizeof(bool[3]));
|
||||||
memcpy(new_instr->opsel, opsel, sizeof(bool[3]));
|
|
||||||
memcpy(new_instr->neg, neg, sizeof(bool[3]));
|
memcpy(new_instr->neg, neg, sizeof(bool[3]));
|
||||||
new_instr->clamp = clamp;
|
new_instr->clamp = clamp;
|
||||||
new_instr->omod = omod;
|
new_instr->omod = omod;
|
||||||
|
new_instr->opsel = opsel;
|
||||||
new_instr->operands[0] = operands[0];
|
new_instr->operands[0] = operands[0];
|
||||||
new_instr->operands[1] = operands[1];
|
new_instr->operands[1] = operands[1];
|
||||||
new_instr->operands[2] = operands[2];
|
new_instr->operands[2] = operands[2];
|
||||||
|
|
@ -1558,11 +1559,11 @@ bool combine_three_valu_op(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
Operand operands[3];
|
Operand operands[3];
|
||||||
bool neg[3], abs[3], opsel[3], clamp;
|
bool neg[3], abs[3], clamp;
|
||||||
unsigned omod;
|
uint8_t opsel = 0, omod = 0;
|
||||||
if (match_op3_for_vop3(ctx, instr->opcode, op2,
|
if (match_op3_for_vop3(ctx, instr->opcode, op2,
|
||||||
instr.get(), swap, shuffle,
|
instr.get(), swap, shuffle,
|
||||||
operands, neg, abs, opsel,
|
operands, neg, abs, &opsel,
|
||||||
&clamp, &omod, NULL, NULL, NULL)) {
|
&clamp, &omod, NULL, NULL, NULL)) {
|
||||||
ctx.uses[instr->operands[swap].tempId()]--;
|
ctx.uses[instr->operands[swap].tempId()]--;
|
||||||
create_vop3_for_op3(ctx, new_op, instr, operands, neg, abs, opsel, clamp, omod);
|
create_vop3_for_op3(ctx, new_op, instr, operands, neg, abs, opsel, clamp, omod);
|
||||||
|
|
@ -1751,10 +1752,10 @@ bool combine_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr,
|
||||||
|
|
||||||
for (unsigned swap = 0; swap < 2; swap++) {
|
for (unsigned swap = 0; swap < 2; swap++) {
|
||||||
Operand operands[3];
|
Operand operands[3];
|
||||||
bool neg[3], abs[3], opsel[3], clamp, inbetween_neg, inbetween_abs;
|
bool neg[3], abs[3], clamp, inbetween_neg, inbetween_abs;
|
||||||
unsigned omod;
|
uint8_t opsel = 0, omod = 0;
|
||||||
if (match_op3_for_vop3(ctx, instr->opcode, other_op, instr.get(), swap,
|
if (match_op3_for_vop3(ctx, instr->opcode, other_op, instr.get(), swap,
|
||||||
"012", operands, neg, abs, opsel,
|
"012", operands, neg, abs, &opsel,
|
||||||
&clamp, &omod, &inbetween_neg, &inbetween_abs, NULL)) {
|
&clamp, &omod, &inbetween_neg, &inbetween_abs, NULL)) {
|
||||||
int const0_idx = -1, const1_idx = -1;
|
int const0_idx = -1, const1_idx = -1;
|
||||||
uint32_t const0 = 0, const1 = 0;
|
uint32_t const0 = 0, const1 = 0;
|
||||||
|
|
@ -1779,9 +1780,9 @@ bool combine_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr,
|
||||||
if (const0_idx < 0 || const1_idx < 0)
|
if (const0_idx < 0 || const1_idx < 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (opsel[const0_idx])
|
if (opsel & (1 << const0_idx))
|
||||||
const0 >>= 16;
|
const0 >>= 16;
|
||||||
if (opsel[const1_idx])
|
if (opsel & (1 << const1_idx))
|
||||||
const1 >>= 16;
|
const1 >>= 16;
|
||||||
|
|
||||||
int lower_idx = const0_idx;
|
int lower_idx = const0_idx;
|
||||||
|
|
|
||||||
|
|
@ -1497,9 +1497,8 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
|
||||||
instr->operands[1].getTemp().type() == RegType::vgpr) { /* TODO: swap src0 and src1 in this case */
|
instr->operands[1].getTemp().type() == RegType::vgpr) { /* TODO: swap src0 and src1 in this case */
|
||||||
VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr.get());
|
VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr.get());
|
||||||
bool can_use_mac = !(vop3->abs[0] || vop3->abs[1] || vop3->abs[2] ||
|
bool can_use_mac = !(vop3->abs[0] || vop3->abs[1] || vop3->abs[2] ||
|
||||||
vop3->opsel[0] || vop3->opsel[1] || vop3->opsel[2] ||
|
|
||||||
vop3->neg[0] || vop3->neg[1] || vop3->neg[2] ||
|
vop3->neg[0] || vop3->neg[1] || vop3->neg[2] ||
|
||||||
vop3->clamp || vop3->omod);
|
vop3->clamp || vop3->omod || vop3->opsel);
|
||||||
if (can_use_mac) {
|
if (can_use_mac) {
|
||||||
instr->format = Format::VOP2;
|
instr->format = Format::VOP2;
|
||||||
instr->opcode = aco_opcode::v_mac_f32;
|
instr->opcode = aco_opcode::v_mac_f32;
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue