mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-16 19:10:43 +01:00
aco: change return type of create_instruction() to Instruction*
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28370>
This commit is contained in:
parent
cd62f97719
commit
9b0ebcc39b
17 changed files with 298 additions and 297 deletions
|
|
@ -611,9 +611,9 @@ handle_instruction_gfx6(State& state, NOP_ctx_gfx6& ctx, aco_ptr<Instruction>& i
|
|||
// TODO: try to schedule the NOP-causing instruction up to reduce the number of stall cycles
|
||||
if (NOPs) {
|
||||
/* create NOP */
|
||||
aco_ptr<SALU_instruction> nop{
|
||||
aco_ptr<Instruction> nop{
|
||||
create_instruction<SALU_instruction>(aco_opcode::s_nop, Format::SOPP, 0, 0)};
|
||||
nop->imm = NOPs - 1;
|
||||
nop->salu().imm = NOPs - 1;
|
||||
new_instructions.emplace_back(std::move(nop));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -237,7 +237,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
|
|||
|
||||
/* create ssa names for outer exec masks */
|
||||
if (info.has_discard && preds.size() > 1) {
|
||||
aco_ptr<Pseudo_instruction> phi;
|
||||
aco_ptr<Instruction> phi;
|
||||
for (int i = 0; i < info.num_exec_masks - 1; i++) {
|
||||
phi.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi,
|
||||
Format::PSEUDO, preds.size(), 1));
|
||||
|
|
@ -251,7 +251,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
|
|||
|
||||
if (info.has_divergent_continue) {
|
||||
/* create ssa name for loop active mask */
|
||||
aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> phi{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};
|
||||
phi->definitions[0] = bld.def(bld.lm);
|
||||
phi->operands[0] = get_exec_op(ctx.info[preds[0]].exec.back().first);
|
||||
|
|
@ -312,7 +312,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
|
|||
ctx.info[idx].exec.emplace_back(same, type);
|
||||
} else {
|
||||
/* create phi for loop footer */
|
||||
aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> phi{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};
|
||||
phi->definitions[0] = bld.def(bld.lm);
|
||||
for (unsigned i = 0; i < phi->operands.size(); i++)
|
||||
|
|
|
|||
|
|
@ -993,17 +993,17 @@ emit_waitcnt(wait_ctx& ctx, std::vector<aco_ptr<Instruction>>& instructions, wai
|
|||
{
|
||||
if (imm.vs != wait_imm::unset_counter) {
|
||||
assert(ctx.gfx_level >= GFX10);
|
||||
SALU_instruction* waitcnt_vs =
|
||||
Instruction* waitcnt_vs =
|
||||
create_instruction<SALU_instruction>(aco_opcode::s_waitcnt_vscnt, Format::SOPK, 1, 0);
|
||||
waitcnt_vs->operands[0] = Operand(sgpr_null, s1);
|
||||
waitcnt_vs->imm = imm.vs;
|
||||
waitcnt_vs->salu().imm = imm.vs;
|
||||
instructions.emplace_back(waitcnt_vs);
|
||||
imm.vs = wait_imm::unset_counter;
|
||||
}
|
||||
if (!imm.empty()) {
|
||||
SALU_instruction* waitcnt =
|
||||
Instruction* waitcnt =
|
||||
create_instruction<SALU_instruction>(aco_opcode::s_waitcnt, Format::SOPP, 0, 0);
|
||||
waitcnt->imm = imm.pack(ctx.gfx_level);
|
||||
waitcnt->salu().imm = imm.pack(ctx.gfx_level);
|
||||
instructions.emplace_back(waitcnt);
|
||||
}
|
||||
imm = wait_imm();
|
||||
|
|
@ -1030,9 +1030,9 @@ emit_delay_alu(wait_ctx& ctx, std::vector<aco_ptr<Instruction>>& instructions,
|
|||
imm |= ((uint32_t)alu_delay_wait::SALU_CYCLE_1 + cycles - 1) << (imm ? 7 : 0);
|
||||
}
|
||||
|
||||
SALU_instruction* inst =
|
||||
Instruction* inst =
|
||||
create_instruction<SALU_instruction>(aco_opcode::s_delay_alu, Format::SOPP, 0, 0);
|
||||
inst->imm = imm;
|
||||
inst->salu().imm = imm;
|
||||
inst->pass_flags = (delay.valu_cycles | (delay.trans_cycles << 16));
|
||||
instructions.emplace_back(inst);
|
||||
delay = alu_delay_info();
|
||||
|
|
|
|||
|
|
@ -380,7 +380,7 @@ emit_split_vector(isel_context* ctx, Temp vec_src, unsigned num_components)
|
|||
} else {
|
||||
rc = RegClass(vec_src.type(), vec_src.size() / num_components);
|
||||
}
|
||||
aco_ptr<Pseudo_instruction> split{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> split{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_split_vector, Format::PSEUDO, 1, num_components)};
|
||||
split->operands[0] = Operand(vec_src);
|
||||
std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
|
||||
|
|
@ -432,7 +432,7 @@ expand_vector(isel_context* ctx, Temp vec_src, Temp dst, unsigned num_components
|
|||
if (zero_padding)
|
||||
padding = bld.copy(bld.def(dst_rc), Operand::zero(component_bytes));
|
||||
|
||||
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)};
|
||||
vec->definitions[0] = Definition(dst);
|
||||
unsigned k = 0;
|
||||
|
|
@ -553,7 +553,7 @@ byte_align_vector(isel_context* ctx, Temp vec, Operand offset, Temp dst, unsigne
|
|||
if (dst.type() == RegType::vgpr) {
|
||||
/* if dst is vgpr - split the src and create a shrunk version according to the mask. */
|
||||
num_components = dst.bytes() / component_size;
|
||||
aco_ptr<Pseudo_instruction> create_vec{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> create_vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)};
|
||||
for (unsigned i = 0; i < num_components; i++)
|
||||
create_vec->operands[i] = Operand(elems[i]);
|
||||
|
|
@ -749,7 +749,7 @@ get_alu_src(struct isel_context* ctx, nir_alu_src src, unsigned size = 1)
|
|||
} else {
|
||||
assert(size <= 4);
|
||||
std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
|
||||
aco_ptr<Pseudo_instruction> vec_instr{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> vec_instr{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, size, 1)};
|
||||
for (unsigned i = 0; i < size; ++i) {
|
||||
elems[i] = emit_extract_vector(ctx, vec, src.swizzle[i], elem_rc);
|
||||
|
|
@ -823,7 +823,7 @@ void
|
|||
emit_sop2_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst,
|
||||
bool writes_scc, uint8_t uses_ub = 0)
|
||||
{
|
||||
aco_ptr<SALU_instruction> sop2{
|
||||
aco_ptr<Instruction> sop2{
|
||||
create_instruction<SALU_instruction>(op, Format::SOP2, 2, writes_scc ? 2 : 1)};
|
||||
sop2->operands[0] = Operand(get_alu_src(ctx, instr->src[0]));
|
||||
sop2->operands[1] = Operand(get_alu_src(ctx, instr->src[1]));
|
||||
|
|
@ -1407,7 +1407,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
elems[i] = get_alu_src(ctx, instr->src[i]);
|
||||
|
||||
if (instr->def.bit_size >= 32 || dst.type() == RegType::vgpr) {
|
||||
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, instr->def.num_components, 1)};
|
||||
RegClass elem_rc = RegClass::get(RegType::vgpr, instr->def.bit_size / 8u);
|
||||
for (unsigned i = 0; i < num; ++i) {
|
||||
|
|
@ -1484,7 +1484,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
if (dst.size() == 1)
|
||||
bld.copy(Definition(dst), packed[0]);
|
||||
else {
|
||||
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)};
|
||||
vec->definitions[0] = Definition(dst);
|
||||
for (unsigned i = 0; i < dst.size(); ++i)
|
||||
|
|
@ -3954,7 +3954,7 @@ visit_load_const(isel_context* ctx, nir_load_const_instr* instr)
|
|||
bld.copy(Definition(dst), Operand::c32(instr->value[0].u32));
|
||||
} else {
|
||||
assert(dst.size() != 1);
|
||||
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)};
|
||||
if (instr->def.bit_size == 64)
|
||||
for (unsigned i = 0; i < dst.size(); i++)
|
||||
|
|
@ -3978,7 +3978,7 @@ emit_readfirstlane(isel_context* ctx, Temp src, Temp dst)
|
|||
} else if (src.size() == 1) {
|
||||
bld.vop1(aco_opcode::v_readfirstlane_b32, Definition(dst), src);
|
||||
} else {
|
||||
aco_ptr<Pseudo_instruction> split{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> split{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_split_vector, Format::PSEUDO, 1, src.size())};
|
||||
split->operands[0] = Operand(src);
|
||||
|
||||
|
|
@ -3990,7 +3990,7 @@ emit_readfirstlane(isel_context* ctx, Temp src, Temp dst)
|
|||
Instruction* split_raw = split.get();
|
||||
ctx->block->instructions.emplace_back(std::move(split));
|
||||
|
||||
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, src.size(), 1)};
|
||||
vec->definitions[0] = Definition(dst);
|
||||
for (unsigned i = 0; i < src.size(); i++) {
|
||||
|
|
@ -4246,7 +4246,7 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
|
|||
tmp[num_tmps++] = vals[i++];
|
||||
}
|
||||
if (num_tmps > 1) {
|
||||
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, num_tmps, 1)};
|
||||
for (unsigned j = 0; j < num_tmps; j++)
|
||||
vec->operands[j] = Operand(tmp[j]);
|
||||
|
|
@ -4272,7 +4272,7 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
|
|||
allocated_vec[components_split++] = tmp[0];
|
||||
} else {
|
||||
assert(tmp_size % elem_rc.bytes() == 0);
|
||||
aco_ptr<Pseudo_instruction> split{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> split{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_split_vector, Format::PSEUDO, 1, tmp_size / elem_rc.bytes())};
|
||||
for (auto& def : split->definitions) {
|
||||
Temp component = bld.tmp(elem_rc);
|
||||
|
|
@ -4305,7 +4305,7 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
|
|||
int padding_bytes =
|
||||
MAX2((int)info.dst.bytes() - int(allocated_vec[0].bytes() * info.num_components), 0);
|
||||
|
||||
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, info.num_components + !!padding_bytes, 1)};
|
||||
for (unsigned i = 0; i < info.num_components; i++)
|
||||
vec->operands[i] = Operand(allocated_vec[i]);
|
||||
|
|
@ -4440,7 +4440,7 @@ smem_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned
|
|||
op = buffer ? aco_opcode::s_buffer_load_dwordx16 : aco_opcode::s_load_dwordx16;
|
||||
}
|
||||
|
||||
aco_ptr<SMEM_instruction> load{create_instruction<SMEM_instruction>(op, Format::SMEM, 2, 1)};
|
||||
aco_ptr<Instruction> load{create_instruction<SMEM_instruction>(op, Format::SMEM, 2, 1)};
|
||||
if (buffer) {
|
||||
if (const_offset)
|
||||
offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset,
|
||||
|
|
@ -4460,9 +4460,10 @@ smem_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned
|
|||
RegClass rc(RegType::sgpr, DIV_ROUND_UP(bytes_needed, 4u));
|
||||
Temp val = dst_hint.id() && dst_hint.regClass() == rc ? dst_hint : bld.tmp(rc);
|
||||
load->definitions[0] = Definition(val);
|
||||
load->glc = info.glc;
|
||||
load->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
load->sync = info.sync;
|
||||
load->smem().glc = info.glc;
|
||||
load->smem().dlc =
|
||||
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
load->smem().sync = info.sync;
|
||||
bld.insert(std::move(load));
|
||||
return val;
|
||||
}
|
||||
|
|
@ -4514,18 +4515,19 @@ mubuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne
|
|||
bytes_size = 16;
|
||||
op = aco_opcode::buffer_load_dwordx4;
|
||||
}
|
||||
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
|
||||
aco_ptr<Instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
|
||||
mubuf->operands[0] = Operand(info.resource);
|
||||
mubuf->operands[1] = vaddr;
|
||||
mubuf->operands[2] = soffset;
|
||||
mubuf->offen = offen;
|
||||
mubuf->idxen = idxen;
|
||||
mubuf->glc = info.glc;
|
||||
mubuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
mubuf->slc = info.slc;
|
||||
mubuf->sync = info.sync;
|
||||
mubuf->offset = const_offset;
|
||||
mubuf->swizzled = info.swizzle_component_size != 0;
|
||||
mubuf->mubuf().offen = offen;
|
||||
mubuf->mubuf().idxen = idxen;
|
||||
mubuf->mubuf().glc = info.glc;
|
||||
mubuf->mubuf().dlc =
|
||||
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
mubuf->mubuf().slc = info.slc;
|
||||
mubuf->mubuf().sync = info.sync;
|
||||
mubuf->mubuf().offset = const_offset;
|
||||
mubuf->mubuf().swizzled = info.swizzle_component_size != 0;
|
||||
RegClass rc = RegClass::get(RegType::vgpr, bytes_size);
|
||||
Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
|
||||
mubuf->definitions[0] = Definition(val);
|
||||
|
|
@ -4581,17 +4583,18 @@ mubuf_load_format_callback(Builder& bld, const LoadEmitInfo& info, Temp offset,
|
|||
}
|
||||
}
|
||||
|
||||
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
|
||||
aco_ptr<Instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
|
||||
mubuf->operands[0] = Operand(info.resource);
|
||||
mubuf->operands[1] = vaddr;
|
||||
mubuf->operands[2] = soffset;
|
||||
mubuf->offen = offen;
|
||||
mubuf->idxen = idxen;
|
||||
mubuf->glc = info.glc;
|
||||
mubuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
mubuf->slc = info.slc;
|
||||
mubuf->sync = info.sync;
|
||||
mubuf->offset = const_offset;
|
||||
mubuf->mubuf().offen = offen;
|
||||
mubuf->mubuf().idxen = idxen;
|
||||
mubuf->mubuf().glc = info.glc;
|
||||
mubuf->mubuf().dlc =
|
||||
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
mubuf->mubuf().slc = info.slc;
|
||||
mubuf->mubuf().sync = info.sync;
|
||||
mubuf->mubuf().offset = const_offset;
|
||||
RegClass rc = RegClass::get(RegType::vgpr, bytes_needed);
|
||||
Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
|
||||
mubuf->definitions[0] = Definition(val);
|
||||
|
|
@ -4629,11 +4632,11 @@ scratch_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsig
|
|||
}
|
||||
RegClass rc = RegClass::get(RegType::vgpr, bytes_size);
|
||||
Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
|
||||
aco_ptr<FLAT_instruction> flat{create_instruction<FLAT_instruction>(op, Format::SCRATCH, 2, 1)};
|
||||
aco_ptr<Instruction> flat{create_instruction<FLAT_instruction>(op, Format::SCRATCH, 2, 1)};
|
||||
flat->operands[0] = offset.regClass() == s1 ? Operand(v1) : Operand(offset);
|
||||
flat->operands[1] = offset.regClass() == s1 ? Operand(offset) : Operand(s1);
|
||||
flat->sync = info.sync;
|
||||
flat->offset = const_offset;
|
||||
flat->scratch().sync = info.sync;
|
||||
flat->scratch().offset = const_offset;
|
||||
flat->definitions[0] = Definition(val);
|
||||
bld.insert(std::move(flat));
|
||||
|
||||
|
|
@ -4793,21 +4796,20 @@ global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsign
|
|||
RegClass rc = RegClass::get(RegType::vgpr, bytes_size);
|
||||
Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
|
||||
if (use_mubuf) {
|
||||
aco_ptr<MUBUF_instruction> mubuf{
|
||||
create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
|
||||
aco_ptr<Instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
|
||||
mubuf->operands[0] = Operand(get_gfx6_global_rsrc(bld, addr));
|
||||
mubuf->operands[1] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
|
||||
mubuf->operands[2] = Operand(offset);
|
||||
mubuf->glc = info.glc;
|
||||
mubuf->dlc = false;
|
||||
mubuf->offset = const_offset;
|
||||
mubuf->addr64 = addr.type() == RegType::vgpr;
|
||||
mubuf->disable_wqm = false;
|
||||
mubuf->sync = info.sync;
|
||||
mubuf->mubuf().glc = info.glc;
|
||||
mubuf->mubuf().dlc = false;
|
||||
mubuf->mubuf().offset = const_offset;
|
||||
mubuf->mubuf().addr64 = addr.type() == RegType::vgpr;
|
||||
mubuf->mubuf().disable_wqm = false;
|
||||
mubuf->mubuf().sync = info.sync;
|
||||
mubuf->definitions[0] = Definition(val);
|
||||
bld.insert(std::move(mubuf));
|
||||
} else {
|
||||
aco_ptr<FLAT_instruction> flat{
|
||||
aco_ptr<Instruction> flat{
|
||||
create_instruction<FLAT_instruction>(op, global ? Format::GLOBAL : Format::FLAT, 2, 1)};
|
||||
if (addr.regClass() == s2) {
|
||||
assert(global && offset.id() && offset.type() == RegType::vgpr);
|
||||
|
|
@ -4818,12 +4820,12 @@ global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsign
|
|||
flat->operands[0] = Operand(addr);
|
||||
flat->operands[1] = Operand(s1);
|
||||
}
|
||||
flat->glc = info.glc;
|
||||
flat->dlc =
|
||||
flat->flatlike().glc = info.glc;
|
||||
flat->flatlike().dlc =
|
||||
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
flat->sync = info.sync;
|
||||
flat->flatlike().sync = info.sync;
|
||||
assert(global || !const_offset);
|
||||
flat->offset = const_offset;
|
||||
flat->flatlike().offset = const_offset;
|
||||
flat->definitions[0] = Definition(val);
|
||||
bld.insert(std::move(flat));
|
||||
}
|
||||
|
|
@ -5178,7 +5180,7 @@ create_vec_from_array(isel_context* ctx, Temp arr[], unsigned cnt, RegType reg_t
|
|||
dst = bld.tmp(RegClass(reg_type, cnt * dword_size));
|
||||
|
||||
std::array<Temp, NIR_MAX_VEC_COMPONENTS> allocated_vec;
|
||||
aco_ptr<Pseudo_instruction> instr{
|
||||
aco_ptr<Instruction> instr{
|
||||
create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, cnt, 1)};
|
||||
instr->definitions[0] = Definition(dst);
|
||||
|
||||
|
|
@ -5553,7 +5555,7 @@ emit_load_frag_coord(isel_context* ctx, Temp dst, unsigned num_components)
|
|||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
||||
aco_ptr<Pseudo_instruction> vec(create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> vec(create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1));
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
if (ctx->args->frag_pos[i].used)
|
||||
|
|
@ -5617,7 +5619,7 @@ visit_load_interpolated_input(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
if (instr->def.num_components == 1) {
|
||||
emit_interp_instr(ctx, idx, component, coords, dst, prim_mask);
|
||||
} else {
|
||||
aco_ptr<Pseudo_instruction> vec(create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> vec(create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, instr->def.num_components, 1));
|
||||
for (unsigned i = 0; i < instr->def.num_components; i++) {
|
||||
Temp tmp = ctx->program->allocateTmp(instr->def.bit_size == 16 ? v2b : v1);
|
||||
|
|
@ -5709,19 +5711,20 @@ mtbuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne
|
|||
abort();
|
||||
}
|
||||
|
||||
aco_ptr<MTBUF_instruction> mtbuf{create_instruction<MTBUF_instruction>(op, Format::MTBUF, 3, 1)};
|
||||
aco_ptr<Instruction> mtbuf{create_instruction<MTBUF_instruction>(op, Format::MTBUF, 3, 1)};
|
||||
mtbuf->operands[0] = Operand(info.resource);
|
||||
mtbuf->operands[1] = vaddr;
|
||||
mtbuf->operands[2] = soffset;
|
||||
mtbuf->offen = offen;
|
||||
mtbuf->idxen = idxen;
|
||||
mtbuf->glc = info.glc;
|
||||
mtbuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
mtbuf->slc = info.slc;
|
||||
mtbuf->sync = info.sync;
|
||||
mtbuf->offset = const_offset;
|
||||
mtbuf->dfmt = fetch_fmt & 0xf;
|
||||
mtbuf->nfmt = fetch_fmt >> 4;
|
||||
mtbuf->mtbuf().offen = offen;
|
||||
mtbuf->mtbuf().idxen = idxen;
|
||||
mtbuf->mtbuf().glc = info.glc;
|
||||
mtbuf->mtbuf().dlc =
|
||||
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
mtbuf->mtbuf().slc = info.slc;
|
||||
mtbuf->mtbuf().sync = info.sync;
|
||||
mtbuf->mtbuf().offset = const_offset;
|
||||
mtbuf->mtbuf().dfmt = fetch_fmt & 0xf;
|
||||
mtbuf->mtbuf().nfmt = fetch_fmt >> 4;
|
||||
RegClass rc = RegClass::get(RegType::vgpr, bytes_size);
|
||||
Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
|
||||
mtbuf->definitions[0] = Definition(val);
|
||||
|
|
@ -5757,7 +5760,7 @@ visit_load_fs_input(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
unsigned num_components = instr->def.num_components;
|
||||
if (instr->def.bit_size == 64)
|
||||
num_components *= 2;
|
||||
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)};
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
unsigned chan_component = (component + i) % 4;
|
||||
|
|
@ -5876,7 +5879,7 @@ visit_load_push_constant(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
if ((ctx->args->inline_push_const_mask | mask) == ctx->args->inline_push_const_mask &&
|
||||
start + count <= (sizeof(ctx->args->inline_push_const_mask) * 8u)) {
|
||||
std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
|
||||
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, count, 1)};
|
||||
unsigned arg_index =
|
||||
util_bitcount64(ctx->args->inline_push_const_mask & BITFIELD64_MASK(start));
|
||||
|
|
@ -6066,7 +6069,7 @@ emit_mimg(Builder& bld, aco_opcode op, Temp dst, Temp rsrc, Operand samp, std::v
|
|||
if (nsa_size < coords.size()) {
|
||||
Temp coord = coords[nsa_size];
|
||||
if (coords.size() - nsa_size > 1) {
|
||||
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, coords.size() - nsa_size, 1)};
|
||||
|
||||
unsigned coord_size = 0;
|
||||
|
|
@ -6088,7 +6091,7 @@ emit_mimg(Builder& bld, aco_opcode op, Temp dst, Temp rsrc, Operand samp, std::v
|
|||
|
||||
bool has_dst = dst.id() != 0;
|
||||
|
||||
aco_ptr<MIMG_instruction> mimg{
|
||||
aco_ptr<Instruction> mimg{
|
||||
create_instruction<MIMG_instruction>(op, Format::MIMG, 3 + coords.size(), has_dst)};
|
||||
if (has_dst)
|
||||
mimg->definitions[0] = Definition(dst);
|
||||
|
|
@ -6100,11 +6103,9 @@ emit_mimg(Builder& bld, aco_opcode op, Temp dst, Temp rsrc, Operand samp, std::v
|
|||
if (coords[i].regClass().is_linear_vgpr())
|
||||
mimg->operands[3 + i].setLateKill(true);
|
||||
}
|
||||
mimg->strict_wqm = strict_wqm;
|
||||
mimg->mimg().strict_wqm = strict_wqm;
|
||||
|
||||
MIMG_instruction* res = mimg.get();
|
||||
bld.insert(std::move(mimg));
|
||||
return res;
|
||||
return &bld.insert(std::move(mimg))->mimg();
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -6257,8 +6258,8 @@ emit_tfe_init(Builder& bld, Temp dst)
|
|||
{
|
||||
Temp tmp = bld.tmp(dst.regClass());
|
||||
|
||||
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)};
|
||||
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector,
|
||||
Format::PSEUDO, dst.size(), 1)};
|
||||
for (unsigned i = 0; i < dst.size(); i++)
|
||||
vec->operands[i] = Operand::zero();
|
||||
vec->definitions[0] = Definition(tmp);
|
||||
|
|
@ -6332,19 +6333,19 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
default: unreachable(">4 channel buffer image load");
|
||||
}
|
||||
}
|
||||
aco_ptr<MUBUF_instruction> load{
|
||||
aco_ptr<Instruction> load{
|
||||
create_instruction<MUBUF_instruction>(opcode, Format::MUBUF, 3 + is_sparse, 1)};
|
||||
load->operands[0] = Operand(resource);
|
||||
load->operands[1] = Operand(vindex);
|
||||
load->operands[2] = Operand::c32(0);
|
||||
load->definitions[0] = Definition(tmp);
|
||||
load->idxen = true;
|
||||
load->glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT);
|
||||
load->dlc =
|
||||
load->glc && (ctx->options->gfx_level == GFX10 || ctx->options->gfx_level == GFX10_3);
|
||||
load->sync = sync;
|
||||
load->tfe = is_sparse;
|
||||
if (load->tfe)
|
||||
load->mubuf().idxen = true;
|
||||
load->mubuf().glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT);
|
||||
load->mubuf().dlc = load->mubuf().glc &&
|
||||
(ctx->options->gfx_level == GFX10 || ctx->options->gfx_level == GFX10_3);
|
||||
load->mubuf().sync = sync;
|
||||
load->mubuf().tfe = is_sparse;
|
||||
if (load->mubuf().tfe)
|
||||
load->operands[3] = emit_tfe_init(bld, tmp);
|
||||
ctx->block->instructions.emplace_back(std::move(load));
|
||||
} else {
|
||||
|
|
@ -6446,7 +6447,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
if (dmask_count == 1) {
|
||||
data = emit_extract_vector(ctx, data, ffs(dmask) - 1, rc);
|
||||
} else {
|
||||
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, dmask_count, 1)};
|
||||
uint32_t index = 0;
|
||||
u_foreach_bit (bit, dmask) {
|
||||
|
|
@ -6480,17 +6481,17 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
default: unreachable(">4 channel buffer image store");
|
||||
}
|
||||
}
|
||||
aco_ptr<MUBUF_instruction> store{
|
||||
aco_ptr<Instruction> store{
|
||||
create_instruction<MUBUF_instruction>(opcode, Format::MUBUF, 4, 0)};
|
||||
store->operands[0] = Operand(rsrc);
|
||||
store->operands[1] = Operand(vindex);
|
||||
store->operands[2] = Operand::c32(0);
|
||||
store->operands[3] = Operand(data);
|
||||
store->idxen = true;
|
||||
store->glc = glc;
|
||||
store->dlc = false;
|
||||
store->disable_wqm = true;
|
||||
store->sync = sync;
|
||||
store->mubuf().idxen = true;
|
||||
store->mubuf().glc = glc;
|
||||
store->mubuf().dlc = false;
|
||||
store->mubuf().disable_wqm = true;
|
||||
store->mubuf().sync = sync;
|
||||
ctx->program->needs_exact = true;
|
||||
ctx->block->instructions.emplace_back(std::move(store));
|
||||
return;
|
||||
|
|
@ -6634,7 +6635,7 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
|
||||
// assert(ctx->options->gfx_level < GFX9 && "GFX9 stride size workaround not yet
|
||||
// implemented.");
|
||||
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(
|
||||
aco_ptr<Instruction> mubuf{create_instruction<MUBUF_instruction>(
|
||||
is_64bit ? buf_op64 : buf_op, Format::MUBUF, 4, return_previous ? 1 : 0)};
|
||||
mubuf->operands[0] = Operand(resource);
|
||||
mubuf->operands[1] = Operand(vindex);
|
||||
|
|
@ -6644,12 +6645,12 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
|
||||
if (return_previous)
|
||||
mubuf->definitions[0] = def;
|
||||
mubuf->offset = 0;
|
||||
mubuf->idxen = true;
|
||||
mubuf->glc = return_previous;
|
||||
mubuf->dlc = false; /* Not needed for atomics */
|
||||
mubuf->disable_wqm = true;
|
||||
mubuf->sync = sync;
|
||||
mubuf->mubuf().offset = 0;
|
||||
mubuf->mubuf().idxen = true;
|
||||
mubuf->mubuf().glc = return_previous;
|
||||
mubuf->mubuf().dlc = false; /* Not needed for atomics */
|
||||
mubuf->mubuf().disable_wqm = true;
|
||||
mubuf->mubuf().sync = sync;
|
||||
ctx->program->needs_exact = true;
|
||||
ctx->block->instructions.emplace_back(std::move(mubuf));
|
||||
if (return_previous && cmpswap)
|
||||
|
|
@ -6728,18 +6729,17 @@ visit_store_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
for (unsigned i = 0; i < write_count; i++) {
|
||||
aco_opcode op = get_buffer_store_op(write_datas[i].bytes());
|
||||
|
||||
aco_ptr<MUBUF_instruction> store{
|
||||
create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, 0)};
|
||||
aco_ptr<Instruction> store{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, 0)};
|
||||
store->operands[0] = Operand(rsrc);
|
||||
store->operands[1] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1);
|
||||
store->operands[2] = offset.type() == RegType::sgpr ? Operand(offset) : Operand::c32(0);
|
||||
store->operands[3] = Operand(write_datas[i]);
|
||||
store->offset = offsets[i];
|
||||
store->offen = (offset.type() == RegType::vgpr);
|
||||
store->glc = glc;
|
||||
store->dlc = false;
|
||||
store->disable_wqm = true;
|
||||
store->sync = sync;
|
||||
store->mubuf().offset = offsets[i];
|
||||
store->mubuf().offen = (offset.type() == RegType::vgpr);
|
||||
store->mubuf().glc = glc;
|
||||
store->mubuf().dlc = false;
|
||||
store->mubuf().disable_wqm = true;
|
||||
store->mubuf().sync = sync;
|
||||
ctx->program->needs_exact = true;
|
||||
ctx->block->instructions.emplace_back(std::move(store));
|
||||
}
|
||||
|
|
@ -6767,7 +6767,7 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
Temp dst = get_ssa_temp(ctx, &instr->def);
|
||||
|
||||
aco_opcode op = instr->def.bit_size == 32 ? op32 : op64;
|
||||
aco_ptr<MUBUF_instruction> mubuf{
|
||||
aco_ptr<Instruction> mubuf{
|
||||
create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, return_previous ? 1 : 0)};
|
||||
mubuf->operands[0] = Operand(rsrc);
|
||||
mubuf->operands[1] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1);
|
||||
|
|
@ -6777,12 +6777,12 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
|
||||
if (return_previous)
|
||||
mubuf->definitions[0] = def;
|
||||
mubuf->offset = 0;
|
||||
mubuf->offen = (offset.type() == RegType::vgpr);
|
||||
mubuf->glc = return_previous;
|
||||
mubuf->dlc = false; /* Not needed for atomics */
|
||||
mubuf->disable_wqm = true;
|
||||
mubuf->sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
|
||||
mubuf->mubuf().offset = 0;
|
||||
mubuf->mubuf().offen = (offset.type() == RegType::vgpr);
|
||||
mubuf->mubuf().glc = return_previous;
|
||||
mubuf->mubuf().dlc = false; /* Not needed for atomics */
|
||||
mubuf->mubuf().disable_wqm = true;
|
||||
mubuf->mubuf().sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
|
||||
ctx->program->needs_exact = true;
|
||||
ctx->block->instructions.emplace_back(std::move(mubuf));
|
||||
if (return_previous && cmpswap)
|
||||
|
|
@ -6901,7 +6901,7 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
default: unreachable("store_global not implemented for this size.");
|
||||
}
|
||||
|
||||
aco_ptr<FLAT_instruction> flat{
|
||||
aco_ptr<Instruction> flat{
|
||||
create_instruction<FLAT_instruction>(op, global ? Format::GLOBAL : Format::FLAT, 3, 0)};
|
||||
if (write_address.regClass() == s2) {
|
||||
assert(global && write_offset.id() && write_offset.type() == RegType::vgpr);
|
||||
|
|
@ -6913,12 +6913,12 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
flat->operands[1] = Operand(s1);
|
||||
}
|
||||
flat->operands[2] = Operand(write_datas[i]);
|
||||
flat->glc = glc;
|
||||
flat->dlc = false;
|
||||
flat->flatlike().glc = glc;
|
||||
flat->flatlike().dlc = false;
|
||||
assert(global || !write_const_offset);
|
||||
flat->offset = write_const_offset;
|
||||
flat->disable_wqm = true;
|
||||
flat->sync = sync;
|
||||
flat->flatlike().offset = write_const_offset;
|
||||
flat->flatlike().disable_wqm = true;
|
||||
flat->flatlike().sync = sync;
|
||||
ctx->program->needs_exact = true;
|
||||
ctx->block->instructions.emplace_back(std::move(flat));
|
||||
} else {
|
||||
|
|
@ -6928,19 +6928,18 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
|
||||
Temp rsrc = get_gfx6_global_rsrc(bld, write_address);
|
||||
|
||||
aco_ptr<MUBUF_instruction> mubuf{
|
||||
create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, 0)};
|
||||
aco_ptr<Instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, 0)};
|
||||
mubuf->operands[0] = Operand(rsrc);
|
||||
mubuf->operands[1] =
|
||||
write_address.type() == RegType::vgpr ? Operand(write_address) : Operand(v1);
|
||||
mubuf->operands[2] = Operand(write_offset);
|
||||
mubuf->operands[3] = Operand(write_datas[i]);
|
||||
mubuf->glc = glc;
|
||||
mubuf->dlc = false;
|
||||
mubuf->offset = write_const_offset;
|
||||
mubuf->addr64 = write_address.type() == RegType::vgpr;
|
||||
mubuf->disable_wqm = true;
|
||||
mubuf->sync = sync;
|
||||
mubuf->mubuf().glc = glc;
|
||||
mubuf->mubuf().dlc = false;
|
||||
mubuf->mubuf().offset = write_const_offset;
|
||||
mubuf->mubuf().addr64 = write_address.type() == RegType::vgpr;
|
||||
mubuf->mubuf().disable_wqm = true;
|
||||
mubuf->mubuf().sync = sync;
|
||||
ctx->program->needs_exact = true;
|
||||
ctx->block->instructions.emplace_back(std::move(mubuf));
|
||||
}
|
||||
|
|
@ -7029,7 +7028,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
}
|
||||
|
||||
aco_opcode op = instr->def.bit_size == 32 ? op32 : op64;
|
||||
aco_ptr<FLAT_instruction> flat{create_instruction<FLAT_instruction>(
|
||||
aco_ptr<Instruction> flat{create_instruction<FLAT_instruction>(
|
||||
op, global ? Format::GLOBAL : Format::FLAT, 3, return_previous ? 1 : 0)};
|
||||
if (addr.regClass() == s2) {
|
||||
assert(global && offset.id() && offset.type() == RegType::vgpr);
|
||||
|
|
@ -7043,12 +7042,12 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
flat->operands[2] = Operand(data);
|
||||
if (return_previous)
|
||||
flat->definitions[0] = Definition(dst);
|
||||
flat->glc = return_previous;
|
||||
flat->dlc = false; /* Not needed for atomics */
|
||||
flat->flatlike().glc = return_previous;
|
||||
flat->flatlike().dlc = false; /* Not needed for atomics */
|
||||
assert(global || !const_offset);
|
||||
flat->offset = const_offset;
|
||||
flat->disable_wqm = true;
|
||||
flat->sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
|
||||
flat->flatlike().offset = const_offset;
|
||||
flat->flatlike().disable_wqm = true;
|
||||
flat->flatlike().sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
|
||||
ctx->program->needs_exact = true;
|
||||
ctx->block->instructions.emplace_back(std::move(flat));
|
||||
} else {
|
||||
|
|
@ -7061,7 +7060,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
|
||||
aco_opcode op = instr->def.bit_size == 32 ? op32 : op64;
|
||||
|
||||
aco_ptr<MUBUF_instruction> mubuf{
|
||||
aco_ptr<Instruction> mubuf{
|
||||
create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, return_previous ? 1 : 0)};
|
||||
mubuf->operands[0] = Operand(rsrc);
|
||||
mubuf->operands[1] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
|
||||
|
|
@ -7071,12 +7070,12 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
|
||||
if (return_previous)
|
||||
mubuf->definitions[0] = def;
|
||||
mubuf->glc = return_previous;
|
||||
mubuf->dlc = false;
|
||||
mubuf->offset = const_offset;
|
||||
mubuf->addr64 = addr.type() == RegType::vgpr;
|
||||
mubuf->disable_wqm = true;
|
||||
mubuf->sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
|
||||
mubuf->mubuf().glc = return_previous;
|
||||
mubuf->mubuf().dlc = false;
|
||||
mubuf->mubuf().offset = const_offset;
|
||||
mubuf->mubuf().addr64 = addr.type() == RegType::vgpr;
|
||||
mubuf->mubuf().disable_wqm = true;
|
||||
mubuf->mubuf().sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
|
||||
ctx->program->needs_exact = true;
|
||||
ctx->block->instructions.emplace_back(std::move(mubuf));
|
||||
if (return_previous && cmpswap)
|
||||
|
|
@ -7473,7 +7472,7 @@ visit_shared_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
offset = 0;
|
||||
}
|
||||
|
||||
aco_ptr<DS_instruction> ds;
|
||||
aco_ptr<Instruction> ds;
|
||||
ds.reset(
|
||||
create_instruction<DS_instruction>(op, Format::DS, num_operands, return_previous ? 1 : 0));
|
||||
ds->operands[0] = Operand(address);
|
||||
|
|
@ -7485,10 +7484,10 @@ visit_shared_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
std::swap(ds->operands[1], ds->operands[2]);
|
||||
}
|
||||
ds->operands[num_operands - 1] = m;
|
||||
ds->offset0 = offset;
|
||||
ds->ds().offset0 = offset;
|
||||
if (return_previous)
|
||||
ds->definitions[0] = Definition(get_ssa_temp(ctx, &instr->def));
|
||||
ds->sync = memory_sync_info(storage_shared, semantic_atomicrmw);
|
||||
ds->ds().sync = memory_sync_info(storage_shared, semantic_atomicrmw);
|
||||
|
||||
if (m.isUndefined())
|
||||
ds->operands.pop_back();
|
||||
|
|
@ -7916,7 +7915,7 @@ emit_reduction_instr(isel_context* ctx, aco_opcode aco_op, ReduceOp op, unsigned
|
|||
if (clobber_vcc)
|
||||
defs[num_defs++] = bld.def(bld.lm, vcc);
|
||||
|
||||
Pseudo_reduction_instruction* reduce = create_instruction<Pseudo_reduction_instruction>(
|
||||
Instruction* reduce = create_instruction<Pseudo_reduction_instruction>(
|
||||
aco_op, Format::PSEUDO_REDUCTION, 3, num_defs);
|
||||
reduce->operands[0] = Operand(src);
|
||||
/* setup_reduce_temp will update these undef operands if needed */
|
||||
|
|
@ -7924,8 +7923,8 @@ emit_reduction_instr(isel_context* ctx, aco_opcode aco_op, ReduceOp op, unsigned
|
|||
reduce->operands[2] = Operand(v1.as_linear());
|
||||
std::copy(defs, defs + num_defs, reduce->definitions.begin());
|
||||
|
||||
reduce->reduce_op = op;
|
||||
reduce->cluster_size = cluster_size;
|
||||
reduce->reduction().reduce_op = op;
|
||||
reduce->reduction().cluster_size = cluster_size;
|
||||
bld.insert(std::move(reduce));
|
||||
|
||||
return dst.getTemp();
|
||||
|
|
@ -8111,7 +8110,7 @@ create_fs_dual_src_export_gfx11(isel_context* ctx, const struct aco_export_mrt*
|
|||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
||||
aco_ptr<Pseudo_instruction> exp{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> exp{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_dual_src_export_gfx11, Format::PSEUDO, 8, 6)};
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
exp->operands[i] = mrt0 ? mrt0->out[i] : Operand(v1);
|
||||
|
|
@ -9149,7 +9148,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m, offset0, offset1, true);
|
||||
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile);
|
||||
|
||||
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, instr->num_components, 1)};
|
||||
unsigned write_mask = nir_intrinsic_write_mask(instr);
|
||||
|
||||
|
|
@ -9209,36 +9208,37 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
|
||||
const bool row_en = instr->intrinsic == nir_intrinsic_export_row_amd;
|
||||
|
||||
aco_ptr<Export_instruction> exp{
|
||||
aco_ptr<Instruction> exp{
|
||||
create_instruction<Export_instruction>(aco_opcode::exp, Format::EXP, 4 + row_en, 0)};
|
||||
|
||||
exp->dest = target;
|
||||
exp->enabled_mask = write_mask;
|
||||
exp->compressed = flags & AC_EXP_FLAG_COMPRESSED;
|
||||
exp->exp().dest = target;
|
||||
exp->exp().enabled_mask = write_mask;
|
||||
exp->exp().compressed = flags & AC_EXP_FLAG_COMPRESSED;
|
||||
|
||||
/* ACO may reorder position/mrt export instructions, then mark done for last
|
||||
* export instruction. So don't respect the nir AC_EXP_FLAG_DONE for position/mrt
|
||||
* exports here and leave it to ACO.
|
||||
*/
|
||||
if (target == V_008DFC_SQ_EXP_PRIM)
|
||||
exp->done = flags & AC_EXP_FLAG_DONE;
|
||||
exp->exp().done = flags & AC_EXP_FLAG_DONE;
|
||||
else
|
||||
exp->done = false;
|
||||
exp->exp().done = false;
|
||||
|
||||
/* ACO may reorder mrt export instructions, then mark valid mask for last
|
||||
* export instruction. So don't respect the nir AC_EXP_FLAG_VALID_MASK for mrt
|
||||
* exports here and leave it to ACO.
|
||||
*/
|
||||
if (target > V_008DFC_SQ_EXP_NULL)
|
||||
exp->valid_mask = flags & AC_EXP_FLAG_VALID_MASK;
|
||||
exp->exp().valid_mask = flags & AC_EXP_FLAG_VALID_MASK;
|
||||
else
|
||||
exp->valid_mask = false;
|
||||
exp->exp().valid_mask = false;
|
||||
|
||||
exp->row_en = row_en;
|
||||
exp->exp().row_en = row_en;
|
||||
|
||||
/* Compressed export uses two bits for a channel. */
|
||||
uint32_t channel_mask =
|
||||
exp->compressed ? (write_mask & 0x3 ? 1 : 0) | (write_mask & 0xc ? 2 : 0) : write_mask;
|
||||
uint32_t channel_mask = exp->exp().compressed
|
||||
? (write_mask & 0x3 ? 1 : 0) | (write_mask & 0xc ? 2 : 0)
|
||||
: write_mask;
|
||||
|
||||
Temp value = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
|
|
@ -9287,7 +9287,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
if (it != ctx->allocated_vec.end())
|
||||
num_src = src.bytes() / it->second[0].bytes();
|
||||
|
||||
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_start_linear_vgpr, Format::PSEUDO, num_src + !!begin_size, 1)};
|
||||
|
||||
if (begin_size)
|
||||
|
|
@ -9730,15 +9730,15 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
|||
}
|
||||
}
|
||||
|
||||
aco_ptr<MUBUF_instruction> mubuf{
|
||||
aco_ptr<Instruction> mubuf{
|
||||
create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3 + instr->is_sparse, 1)};
|
||||
mubuf->operands[0] = Operand(resource);
|
||||
mubuf->operands[1] = Operand(coords[0]);
|
||||
mubuf->operands[2] = Operand::c32(0);
|
||||
mubuf->definitions[0] = Definition(tmp_dst);
|
||||
mubuf->idxen = true;
|
||||
mubuf->tfe = instr->is_sparse;
|
||||
if (mubuf->tfe)
|
||||
mubuf->mubuf().idxen = true;
|
||||
mubuf->mubuf().tfe = instr->is_sparse;
|
||||
if (mubuf->mubuf().tfe)
|
||||
mubuf->operands[3] = emit_tfe_init(bld, tmp_dst);
|
||||
ctx->block->instructions.emplace_back(std::move(mubuf));
|
||||
|
||||
|
|
@ -10008,7 +10008,7 @@ get_phi_operand(isel_context* ctx, nir_def* ssa, RegClass rc, bool logical)
|
|||
void
|
||||
visit_phi(isel_context* ctx, nir_phi_instr* instr)
|
||||
{
|
||||
aco_ptr<Pseudo_instruction> phi;
|
||||
aco_ptr<Instruction> phi;
|
||||
Temp dst = get_ssa_temp(ctx, &instr->def);
|
||||
assert(instr->def.bit_size != 1 || dst.regClass() == ctx->program->lane_mask);
|
||||
|
||||
|
|
@ -10111,7 +10111,7 @@ visit_undef(isel_context* ctx, nir_undef_instr* instr)
|
|||
if (dst.size() == 1) {
|
||||
Builder(ctx->program, ctx->block).copy(Definition(dst), Operand::zero());
|
||||
} else {
|
||||
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)};
|
||||
for (unsigned i = 0; i < dst.size(); i++)
|
||||
vec->operands[i] = Operand::zero();
|
||||
|
|
@ -10210,7 +10210,7 @@ end_loop(isel_context* ctx, loop_context* lc)
|
|||
/* trim linear phis in loop header */
|
||||
for (auto&& instr : loop_entry->instructions) {
|
||||
if (instr->opcode == aco_opcode::p_linear_phi) {
|
||||
aco_ptr<Pseudo_instruction> new_phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, loop_entry->linear_predecessors.size(), 1)};
|
||||
aco_ptr<Instruction> new_phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, loop_entry->linear_predecessors.size(), 1)};
|
||||
new_phi->definitions[0] = instr->definitions[0];
|
||||
for (unsigned i = 0; i < new_phi->operands.size(); i++)
|
||||
new_phi->operands[i] = instr->operands[i];
|
||||
|
|
@ -10484,13 +10484,14 @@ begin_divergent_if_then(isel_context* ctx, if_context* ic, Temp cond,
|
|||
|
||||
/* branch to linear then block */
|
||||
assert(cond.regClass() == ctx->program->lane_mask);
|
||||
aco_ptr<Pseudo_branch_instruction> branch;
|
||||
aco_ptr<Instruction> branch;
|
||||
branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_z,
|
||||
Format::PSEUDO_BRANCH, 1, 1));
|
||||
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
|
||||
branch->operands[0] = Operand(cond);
|
||||
branch->selection_control_remove = sel_ctrl == nir_selection_control_flatten ||
|
||||
sel_ctrl == nir_selection_control_divergent_always_taken;
|
||||
branch->branch().selection_control_remove =
|
||||
sel_ctrl == nir_selection_control_flatten ||
|
||||
sel_ctrl == nir_selection_control_divergent_always_taken;
|
||||
ctx->block->instructions.push_back(std::move(branch));
|
||||
|
||||
ic->BB_if_idx = ctx->block->index;
|
||||
|
|
@ -10528,7 +10529,7 @@ begin_divergent_if_else(isel_context* ctx, if_context* ic,
|
|||
Block* BB_then_logical = ctx->block;
|
||||
append_logical_end(BB_then_logical);
|
||||
/* branch from logical then block to invert block */
|
||||
aco_ptr<Pseudo_branch_instruction> branch;
|
||||
aco_ptr<Instruction> branch;
|
||||
branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch,
|
||||
Format::PSEUDO_BRANCH, 0, 1));
|
||||
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
|
||||
|
|
@ -10561,8 +10562,9 @@ begin_divergent_if_else(isel_context* ctx, if_context* ic,
|
|||
branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch,
|
||||
Format::PSEUDO_BRANCH, 0, 1));
|
||||
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
|
||||
branch->selection_control_remove = sel_ctrl == nir_selection_control_flatten ||
|
||||
sel_ctrl == nir_selection_control_divergent_always_taken;
|
||||
branch->branch().selection_control_remove =
|
||||
sel_ctrl == nir_selection_control_flatten ||
|
||||
sel_ctrl == nir_selection_control_divergent_always_taken;
|
||||
ctx->block->instructions.push_back(std::move(branch));
|
||||
|
||||
ic->exec_potentially_empty_discard_old |= ctx->cf_info.exec_potentially_empty_discard;
|
||||
|
|
@ -10593,7 +10595,7 @@ end_divergent_if(isel_context* ctx, if_context* ic)
|
|||
append_logical_end(BB_else_logical);
|
||||
|
||||
/* branch from logical else block to endif block */
|
||||
aco_ptr<Pseudo_branch_instruction> branch;
|
||||
aco_ptr<Instruction> branch;
|
||||
branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch,
|
||||
Format::PSEUDO_BRANCH, 0, 1));
|
||||
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
|
||||
|
|
@ -10650,7 +10652,7 @@ begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond)
|
|||
append_logical_end(ctx->block);
|
||||
ctx->block->kind |= block_kind_uniform;
|
||||
|
||||
aco_ptr<Pseudo_branch_instruction> branch;
|
||||
aco_ptr<Instruction> branch;
|
||||
aco_opcode branch_opcode = aco_opcode::p_cbranch_z;
|
||||
branch.reset(
|
||||
create_instruction<Pseudo_branch_instruction>(branch_opcode, Format::PSEUDO_BRANCH, 1, 1));
|
||||
|
|
@ -10687,7 +10689,7 @@ begin_uniform_if_else(isel_context* ctx, if_context* ic)
|
|||
if (!ic->uniform_has_then_branch) {
|
||||
append_logical_end(BB_then);
|
||||
/* branch from then block to endif block */
|
||||
aco_ptr<Pseudo_branch_instruction> branch;
|
||||
aco_ptr<Instruction> branch;
|
||||
branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch,
|
||||
Format::PSEUDO_BRANCH, 0, 1));
|
||||
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
|
||||
|
|
@ -10719,7 +10721,7 @@ end_uniform_if(isel_context* ctx, if_context* ic)
|
|||
if (!ctx->cf_info.has_branch) {
|
||||
append_logical_end(BB_else);
|
||||
/* branch from then block to endif block */
|
||||
aco_ptr<Pseudo_branch_instruction> branch;
|
||||
aco_ptr<Instruction> branch;
|
||||
branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch,
|
||||
Format::PSEUDO_BRANCH, 0, 1));
|
||||
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
|
||||
|
|
@ -10747,7 +10749,7 @@ visit_if(isel_context* ctx, nir_if* if_stmt)
|
|||
{
|
||||
Temp cond = get_ssa_temp(ctx, if_stmt->condition.ssa);
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
aco_ptr<Pseudo_branch_instruction> branch;
|
||||
aco_ptr<Instruction> branch;
|
||||
if_context ic;
|
||||
|
||||
if (!nir_src_is_divergent(if_stmt->condition)) { /* uniform condition */
|
||||
|
|
@ -11138,7 +11140,7 @@ create_fs_jump_to_epilog(isel_context* ctx)
|
|||
|
||||
Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.epilog_pc));
|
||||
|
||||
aco_ptr<Pseudo_instruction> jump{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> jump{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_jump_to_epilog, Format::PSEUDO, 1 + exports.size(), 0)};
|
||||
jump->operands[0] = Operand(continue_pc);
|
||||
for (unsigned i = 0; i < exports.size(); i++) {
|
||||
|
|
@ -11192,8 +11194,8 @@ passthrough_all_args(isel_context* ctx, std::vector<Operand>& regs)
|
|||
static void
|
||||
build_end_with_regs(isel_context* ctx, std::vector<Operand>& regs)
|
||||
{
|
||||
aco_ptr<Pseudo_instruction> end{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_end_with_regs, Format::PSEUDO, regs.size(), 0)};
|
||||
aco_ptr<Instruction> end{create_instruction<Pseudo_instruction>(aco_opcode::p_end_with_regs,
|
||||
Format::PSEUDO, regs.size(), 0)};
|
||||
|
||||
for (unsigned i = 0; i < regs.size(); i++)
|
||||
end->operands[i] = regs[i];
|
||||
|
|
@ -11240,7 +11242,7 @@ create_tcs_jump_to_epilog(isel_context* ctx)
|
|||
|
||||
Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.epilog_pc));
|
||||
|
||||
aco_ptr<Pseudo_instruction> jump{
|
||||
aco_ptr<Instruction> jump{
|
||||
create_instruction<Pseudo_instruction>(aco_opcode::p_jump_to_epilog, Format::PSEUDO, 14, 0)};
|
||||
jump->operands[0] = Operand(continue_pc);
|
||||
jump->operands[1] = ring_offsets;
|
||||
|
|
@ -11374,7 +11376,7 @@ create_fs_end_for_epilog(isel_context* ctx)
|
|||
ctx->program->needs_exact = true;
|
||||
}
|
||||
|
||||
Pseudo_instruction*
|
||||
Instruction*
|
||||
add_startpgm(struct isel_context* ctx)
|
||||
{
|
||||
unsigned def_count = 0;
|
||||
|
|
@ -11388,7 +11390,7 @@ add_startpgm(struct isel_context* ctx)
|
|||
def_count++;
|
||||
}
|
||||
|
||||
Pseudo_instruction* startpgm =
|
||||
Instruction* startpgm =
|
||||
create_instruction<Pseudo_instruction>(aco_opcode::p_startpgm, Format::PSEUDO, 0, def_count);
|
||||
ctx->block->instructions.emplace_back(startpgm);
|
||||
for (unsigned i = 0, arg = 0; i < ctx->args->arg_count; i++) {
|
||||
|
|
@ -11478,7 +11480,7 @@ fix_ls_vgpr_init_bug(isel_context* ctx)
|
|||
}
|
||||
|
||||
void
|
||||
split_arguments(isel_context* ctx, Pseudo_instruction* startpgm)
|
||||
split_arguments(isel_context* ctx, Instruction* startpgm)
|
||||
{
|
||||
/* Split all arguments except for the first (ring_offsets) and the last
|
||||
* (exec) so that the dead channels don't stay live throughout the program.
|
||||
|
|
@ -11645,7 +11647,7 @@ insert_rt_jump_next(isel_context& ctx, const struct ac_shader_args* args)
|
|||
for (unsigned i = 0; i < ctx.args->arg_count; i++)
|
||||
src_count += !!BITSET_TEST(ctx.output_args, i);
|
||||
|
||||
Pseudo_instruction* ret =
|
||||
Instruction* ret =
|
||||
create_instruction<Pseudo_instruction>(aco_opcode::p_return, Format::PSEUDO, src_count, 0);
|
||||
ctx.block->instructions.emplace_back(ret);
|
||||
|
||||
|
|
@ -11682,7 +11684,7 @@ select_program_rt(isel_context& ctx, unsigned shader_count, struct nir_shader* c
|
|||
init_context(&ctx, nir);
|
||||
setup_fp_mode(&ctx, nir);
|
||||
|
||||
Pseudo_instruction* startpgm = add_startpgm(&ctx);
|
||||
Instruction* startpgm = add_startpgm(&ctx);
|
||||
append_logical_start(ctx.block);
|
||||
split_arguments(&ctx, startpgm);
|
||||
visit_cf_list(&ctx, &nir_shader_get_entrypoint(nir)->body);
|
||||
|
|
@ -11839,7 +11841,7 @@ create_merged_jump_to_epilog(isel_context* ctx)
|
|||
Temp continue_pc =
|
||||
convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.next_stage_pc));
|
||||
|
||||
aco_ptr<Pseudo_instruction> jump{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> jump{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_jump_to_epilog, Format::PSEUDO, 1 + regs.size(), 0)};
|
||||
jump->operands[0] = Operand(continue_pc);
|
||||
for (unsigned i = 0; i < regs.size(); i++) {
|
||||
|
|
@ -11884,7 +11886,7 @@ select_shader(isel_context& ctx, nir_shader* nir, const bool need_startpgm, cons
|
|||
|
||||
if (need_startpgm) {
|
||||
/* Needs to be after init_context() for FS. */
|
||||
Pseudo_instruction* startpgm = add_startpgm(&ctx);
|
||||
Instruction* startpgm = add_startpgm(&ctx);
|
||||
append_logical_start(ctx.block);
|
||||
|
||||
if (ctx.options->has_ls_vgpr_init_bug && ctx.stage == vertex_tess_control_hs &&
|
||||
|
|
|
|||
|
|
@ -1671,7 +1671,7 @@ struct instr_deleter_functor {
|
|||
template <typename T> using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
|
||||
|
||||
template <typename T>
|
||||
T*
|
||||
Instruction*
|
||||
create_instruction(aco_opcode opcode, Format format, uint32_t num_operands,
|
||||
uint32_t num_definitions)
|
||||
{
|
||||
|
|
@ -1679,7 +1679,7 @@ create_instruction(aco_opcode opcode, Format format, uint32_t num_operands,
|
|||
sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
|
||||
void* data = instruction_buffer->allocate(size, alignof(uint32_t));
|
||||
memset(data, 0, size);
|
||||
T* inst = (T*)data;
|
||||
Instruction* inst = (Instruction*)data;
|
||||
|
||||
inst->opcode = opcode;
|
||||
inst->format = format;
|
||||
|
|
|
|||
|
|
@ -108,8 +108,8 @@ get_output(Program* program, unsigned block_idx, ssa_state* state)
|
|||
}
|
||||
|
||||
/* create phi */
|
||||
aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_linear_phi, Format::PSEUDO, num_preds, 1)};
|
||||
aco_ptr<Instruction> phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi,
|
||||
Format::PSEUDO, num_preds, 1)};
|
||||
for (unsigned i = 0; i < num_preds; i++)
|
||||
phi->operands[i] = state->outputs[block.linear_preds[i]];
|
||||
phi->definitions[0] = Definition(output.getTemp());
|
||||
|
|
@ -347,8 +347,8 @@ lower_divergent_bool_phi(Program* program, ssa_state* state, Block* block,
|
|||
|
||||
unsigned num_preds = block->linear_preds.size();
|
||||
if (phi->operands.size() != num_preds) {
|
||||
Pseudo_instruction* new_phi{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_linear_phi, Format::PSEUDO, num_preds, 1)};
|
||||
Instruction* new_phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi,
|
||||
Format::PSEUDO, num_preds, 1)};
|
||||
new_phi->definitions[0] = phi->definitions[0];
|
||||
phi.reset(new_phi);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -424,8 +424,8 @@ emit_copies_block(Builder& bld, std::map<uint32_t, ltg_node>& ltg, RegType type)
|
|||
// TODO: this should be restricted to a feasible number of registers
|
||||
// and otherwise use a temporary to avoid having to reload more (spilled)
|
||||
// variables than we have registers.
|
||||
aco_ptr<Pseudo_instruction> copy{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_parallelcopy, Format::PSEUDO, num, num)};
|
||||
aco_ptr<Instruction> copy{create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy,
|
||||
Format::PSEUDO, num, num)};
|
||||
it = ltg.begin();
|
||||
for (unsigned i = 0; i < num; i++) {
|
||||
while (it->second.cp.def.regClass().type() != type)
|
||||
|
|
|
|||
|
|
@ -601,13 +601,13 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
|
|||
|
||||
if (src.regClass() == v1b) {
|
||||
if (ctx->program->gfx_level >= GFX8 && ctx->program->gfx_level < GFX11) {
|
||||
aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(
|
||||
aco_ptr<Instruction> sdwa{create_instruction<SDWA_instruction>(
|
||||
aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
|
||||
sdwa->operands[0] = Operand(PhysReg{tmp}, v1);
|
||||
sdwa->definitions[0] = Definition(PhysReg{tmp}, v1);
|
||||
bool sext = reduce_op == imin8 || reduce_op == imax8;
|
||||
sdwa->sel[0] = SubdwordSel(1, 0, sext);
|
||||
sdwa->dst_sel = SubdwordSel::dword;
|
||||
sdwa->sdwa().sel[0] = SubdwordSel(1, 0, sext);
|
||||
sdwa->sdwa().dst_sel = SubdwordSel::dword;
|
||||
bld.insert(std::move(sdwa));
|
||||
} else {
|
||||
aco_opcode opcode;
|
||||
|
|
@ -624,13 +624,13 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
|
|||
bool is_add_cmp = reduce_op == iadd16 || reduce_op == imax16 || reduce_op == imin16 ||
|
||||
reduce_op == umin16 || reduce_op == umax16;
|
||||
if (ctx->program->gfx_level >= GFX10 && ctx->program->gfx_level < GFX11 && is_add_cmp) {
|
||||
aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(
|
||||
aco_ptr<Instruction> sdwa{create_instruction<SDWA_instruction>(
|
||||
aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
|
||||
sdwa->operands[0] = Operand(PhysReg{tmp}, v1);
|
||||
sdwa->definitions[0] = Definition(PhysReg{tmp}, v1);
|
||||
bool sext = reduce_op == imin16 || reduce_op == imax16 || reduce_op == iadd16;
|
||||
sdwa->sel[0] = SubdwordSel(2, 0, sext);
|
||||
sdwa->dst_sel = SubdwordSel::dword;
|
||||
sdwa->sdwa().sel[0] = SubdwordSel(2, 0, sext);
|
||||
sdwa->sdwa().dst_sel = SubdwordSel::dword;
|
||||
bld.insert(std::move(sdwa));
|
||||
} else if (ctx->program->gfx_level <= GFX7 ||
|
||||
(ctx->program->gfx_level >= GFX11 && is_add_cmp)) {
|
||||
|
|
@ -2259,7 +2259,7 @@ lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
|
|||
instr->mimg().strict_wqm = false;
|
||||
|
||||
if ((3 + num_vaddr) > instr->operands.size()) {
|
||||
MIMG_instruction* new_instr = create_instruction<MIMG_instruction>(
|
||||
Instruction* new_instr = create_instruction<MIMG_instruction>(
|
||||
instr->opcode, Format::MIMG, 3 + num_vaddr, instr->definitions.size());
|
||||
std::copy(instr->definitions.cbegin(), instr->definitions.cend(),
|
||||
new_instr->definitions.begin());
|
||||
|
|
|
|||
|
|
@ -905,7 +905,7 @@ smem_combine(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
smem.operands.back() = Operand(base);
|
||||
}
|
||||
} else {
|
||||
SMEM_instruction* new_instr = create_instruction<SMEM_instruction>(
|
||||
Instruction* new_instr = create_instruction<SMEM_instruction>(
|
||||
smem.opcode, Format::SMEM, smem.operands.size() + 1, smem.definitions.size());
|
||||
new_instr->operands[0] = smem.operands[0];
|
||||
new_instr->operands[1] = Operand::c32(offset);
|
||||
|
|
@ -914,11 +914,11 @@ smem_combine(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
new_instr->operands.back() = Operand(base);
|
||||
if (!smem.definitions.empty())
|
||||
new_instr->definitions[0] = smem.definitions[0];
|
||||
new_instr->sync = smem.sync;
|
||||
new_instr->glc = smem.glc;
|
||||
new_instr->dlc = smem.dlc;
|
||||
new_instr->nv = smem.nv;
|
||||
new_instr->disable_wqm = smem.disable_wqm;
|
||||
new_instr->smem().sync = smem.sync;
|
||||
new_instr->smem().glc = smem.glc;
|
||||
new_instr->smem().dlc = smem.dlc;
|
||||
new_instr->smem().nv = smem.nv;
|
||||
new_instr->smem().disable_wqm = smem.disable_wqm;
|
||||
instr.reset(new_instr);
|
||||
}
|
||||
}
|
||||
|
|
@ -2312,10 +2312,10 @@ combine_ordering_test(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
case 64: new_op = is_or ? aco_opcode::v_cmp_u_f64 : aco_opcode::v_cmp_o_f64; break;
|
||||
}
|
||||
bool needs_vop3 = num_sgprs > 1 || (opsel[0] && op[0].type() != RegType::vgpr);
|
||||
VALU_instruction* new_instr = create_instruction<VALU_instruction>(
|
||||
Instruction* new_instr = create_instruction<VALU_instruction>(
|
||||
new_op, needs_vop3 ? asVOP3(Format::VOPC) : Format::VOPC, 2, 1);
|
||||
|
||||
new_instr->opsel = opsel;
|
||||
new_instr->valu().opsel = opsel;
|
||||
new_instr->operands[0] = copy_operand(ctx, Operand(op[0]));
|
||||
new_instr->operands[1] = copy_operand(ctx, Operand(op[1]));
|
||||
new_instr->definitions[0] = instr->definitions[0];
|
||||
|
|
@ -2381,13 +2381,13 @@ combine_comparison_ordering(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
return false;
|
||||
|
||||
aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode);
|
||||
VALU_instruction* new_instr = create_instruction<VALU_instruction>(
|
||||
Instruction* new_instr = create_instruction<VALU_instruction>(
|
||||
new_op, cmp->isVOP3() ? asVOP3(Format::VOPC) : Format::VOPC, 2, 1);
|
||||
new_instr->neg = cmp_valu.neg;
|
||||
new_instr->abs = cmp_valu.abs;
|
||||
new_instr->clamp = cmp_valu.clamp;
|
||||
new_instr->omod = cmp_valu.omod;
|
||||
new_instr->opsel = cmp_valu.opsel;
|
||||
new_instr->valu().neg = cmp_valu.neg;
|
||||
new_instr->valu().abs = cmp_valu.abs;
|
||||
new_instr->valu().clamp = cmp_valu.clamp;
|
||||
new_instr->valu().omod = cmp_valu.omod;
|
||||
new_instr->valu().opsel = cmp_valu.opsel;
|
||||
new_instr->operands[0] = copy_operand(ctx, cmp->operands[0]);
|
||||
new_instr->operands[1] = copy_operand(ctx, cmp->operands[1]);
|
||||
new_instr->definitions[0] = instr->definitions[0];
|
||||
|
|
@ -2701,12 +2701,12 @@ create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr<Instruction>& instr
|
|||
Operand operands[3], uint8_t neg, uint8_t abs, uint8_t opsel, bool clamp,
|
||||
unsigned omod)
|
||||
{
|
||||
VALU_instruction* new_instr = create_instruction<VALU_instruction>(opcode, Format::VOP3, 3, 1);
|
||||
new_instr->neg = neg;
|
||||
new_instr->abs = abs;
|
||||
new_instr->clamp = clamp;
|
||||
new_instr->omod = omod;
|
||||
new_instr->opsel = opsel;
|
||||
Instruction* new_instr = create_instruction<VALU_instruction>(opcode, Format::VOP3, 3, 1);
|
||||
new_instr->valu().neg = neg;
|
||||
new_instr->valu().abs = abs;
|
||||
new_instr->valu().clamp = clamp;
|
||||
new_instr->valu().omod = omod;
|
||||
new_instr->valu().opsel = opsel;
|
||||
new_instr->operands[0] = operands[0];
|
||||
new_instr->operands[1] = operands[1];
|
||||
new_instr->operands[2] = operands[2];
|
||||
|
|
@ -3746,7 +3746,7 @@ combine_add_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr, bool is_sub)
|
|||
ctx.uses[instr->operands[i].tempId()]--;
|
||||
|
||||
aco_opcode mad_op = is_sub ? aco_opcode::v_mad_i32_i24 : aco_opcode::v_mad_u32_u24;
|
||||
aco_ptr<VALU_instruction> new_instr{
|
||||
aco_ptr<Instruction> new_instr{
|
||||
create_instruction<VALU_instruction>(mad_op, Format::VOP3, 3, 1)};
|
||||
for (unsigned op_idx = 0; op_idx < 3; ++op_idx)
|
||||
new_instr->operands[op_idx] = ops[op_idx];
|
||||
|
|
@ -3930,23 +3930,23 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
|
||||
/* turn mul + packed add into v_pk_fma_f16 */
|
||||
aco_opcode mad = fadd ? aco_opcode::v_pk_fma_f16 : aco_opcode::v_pk_mad_u16;
|
||||
aco_ptr<VALU_instruction> fma{create_instruction<VALU_instruction>(mad, Format::VOP3P, 3, 1)};
|
||||
aco_ptr<Instruction> fma{create_instruction<VALU_instruction>(mad, Format::VOP3P, 3, 1)};
|
||||
fma->operands[0] = copy_operand(ctx, mul_instr->operands[0]);
|
||||
fma->operands[1] = copy_operand(ctx, mul_instr->operands[1]);
|
||||
fma->operands[2] = instr->operands[add_op_idx];
|
||||
fma->clamp = vop3p->clamp;
|
||||
fma->neg_lo = mul_neg_lo;
|
||||
fma->neg_hi = mul_neg_hi;
|
||||
fma->opsel_lo = mul_opsel_lo;
|
||||
fma->opsel_hi = mul_opsel_hi;
|
||||
propagate_swizzles(fma.get(), vop3p->opsel_lo[1 - add_op_idx],
|
||||
fma->valu().clamp = vop3p->clamp;
|
||||
fma->valu().neg_lo = mul_neg_lo;
|
||||
fma->valu().neg_hi = mul_neg_hi;
|
||||
fma->valu().opsel_lo = mul_opsel_lo;
|
||||
fma->valu().opsel_hi = mul_opsel_hi;
|
||||
propagate_swizzles(&fma->valu(), vop3p->opsel_lo[1 - add_op_idx],
|
||||
vop3p->opsel_hi[1 - add_op_idx]);
|
||||
fma->opsel_lo[2] = vop3p->opsel_lo[add_op_idx];
|
||||
fma->opsel_hi[2] = vop3p->opsel_hi[add_op_idx];
|
||||
fma->neg_lo[2] = vop3p->neg_lo[add_op_idx];
|
||||
fma->neg_hi[2] = vop3p->neg_hi[add_op_idx];
|
||||
fma->neg_lo[1] = fma->neg_lo[1] ^ vop3p->neg_lo[1 - add_op_idx];
|
||||
fma->neg_hi[1] = fma->neg_hi[1] ^ vop3p->neg_hi[1 - add_op_idx];
|
||||
fma->valu().opsel_lo[2] = vop3p->opsel_lo[add_op_idx];
|
||||
fma->valu().opsel_hi[2] = vop3p->opsel_hi[add_op_idx];
|
||||
fma->valu().neg_lo[2] = vop3p->neg_lo[add_op_idx];
|
||||
fma->valu().neg_hi[2] = vop3p->neg_hi[add_op_idx];
|
||||
fma->valu().neg_lo[1] = fma->valu().neg_lo[1] ^ vop3p->neg_lo[1 - add_op_idx];
|
||||
fma->valu().neg_hi[1] = fma->valu().neg_hi[1] ^ vop3p->neg_hi[1 - add_op_idx];
|
||||
fma->definitions[0] = instr->definitions[0];
|
||||
fma->pass_flags = instr->pass_flags;
|
||||
instr = std::move(fma);
|
||||
|
|
@ -3995,26 +3995,26 @@ to_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
|
||||
bool is_add = instr->opcode != aco_opcode::v_mul_f32;
|
||||
|
||||
aco_ptr<VALU_instruction> vop3p{
|
||||
aco_ptr<Instruction> vop3p{
|
||||
create_instruction<VALU_instruction>(aco_opcode::v_fma_mix_f32, Format::VOP3P, 3, 1)};
|
||||
|
||||
for (unsigned i = 0; i < instr->operands.size(); i++) {
|
||||
vop3p->operands[is_add + i] = instr->operands[i];
|
||||
vop3p->neg_lo[is_add + i] = instr->valu().neg[i];
|
||||
vop3p->neg_hi[is_add + i] = instr->valu().abs[i];
|
||||
vop3p->valu().neg_lo[is_add + i] = instr->valu().neg[i];
|
||||
vop3p->valu().neg_hi[is_add + i] = instr->valu().abs[i];
|
||||
}
|
||||
if (instr->opcode == aco_opcode::v_mul_f32) {
|
||||
vop3p->operands[2] = Operand::zero();
|
||||
vop3p->neg_lo[2] = true;
|
||||
vop3p->valu().neg_lo[2] = true;
|
||||
} else if (is_add) {
|
||||
vop3p->operands[0] = Operand::c32(0x3f800000);
|
||||
if (instr->opcode == aco_opcode::v_sub_f32)
|
||||
vop3p->neg_lo[2] ^= true;
|
||||
vop3p->valu().neg_lo[2] ^= true;
|
||||
else if (instr->opcode == aco_opcode::v_subrev_f32)
|
||||
vop3p->neg_lo[1] ^= true;
|
||||
vop3p->valu().neg_lo[1] ^= true;
|
||||
}
|
||||
vop3p->definitions[0] = instr->definitions[0];
|
||||
vop3p->clamp = instr->valu().clamp;
|
||||
vop3p->valu().clamp = instr->valu().clamp;
|
||||
vop3p->pass_flags = instr->pass_flags;
|
||||
instr = std::move(vop3p);
|
||||
|
||||
|
|
@ -4418,7 +4418,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
neg[2 - add_op_idx] = neg[2 - add_op_idx] ^ true;
|
||||
|
||||
aco_ptr<Instruction> add_instr = std::move(instr);
|
||||
aco_ptr<VALU_instruction> mad;
|
||||
aco_ptr<Instruction> mad;
|
||||
if (add_instr->isVOP3P() || mul_instr->isVOP3P()) {
|
||||
assert(!omod);
|
||||
assert(!opsel);
|
||||
|
|
@ -4448,14 +4448,14 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
mad->operands[i] = op[i];
|
||||
mad->neg[i] = neg[i];
|
||||
mad->abs[i] = abs[i];
|
||||
mad->valu().neg[i] = neg[i];
|
||||
mad->valu().abs[i] = abs[i];
|
||||
}
|
||||
mad->omod = omod;
|
||||
mad->clamp = clamp;
|
||||
mad->opsel_lo = opsel_lo;
|
||||
mad->opsel_hi = opsel_hi;
|
||||
mad->opsel = opsel;
|
||||
mad->valu().omod = omod;
|
||||
mad->valu().clamp = clamp;
|
||||
mad->valu().opsel_lo = opsel_lo;
|
||||
mad->valu().opsel_hi = opsel_hi;
|
||||
mad->valu().opsel = opsel;
|
||||
mad->definitions[0] = add_instr->definitions[0];
|
||||
mad->definitions[0].setPrecise(add_instr->definitions[0].isPrecise() ||
|
||||
mul_instr->definitions[0].isPrecise());
|
||||
|
|
@ -4481,7 +4481,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
ctx.uses[instr->operands[i].tempId()]--;
|
||||
ctx.uses[ctx.info[instr->operands[i].tempId()].temp.id()]++;
|
||||
|
||||
aco_ptr<VALU_instruction> new_instr{
|
||||
aco_ptr<Instruction> new_instr{
|
||||
create_instruction<VALU_instruction>(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1)};
|
||||
new_instr->operands[0] = Operand::zero();
|
||||
new_instr->operands[1] = instr->operands[!i];
|
||||
|
|
@ -4805,7 +4805,7 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
if (op.isTemp())
|
||||
ctx.uses[op.tempId()]++;
|
||||
|
||||
aco_ptr<Pseudo_instruction> extract{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> extract{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, 1, 1)};
|
||||
extract->operands[0] = op;
|
||||
extract->definitions[0] = instr->definitions[idx];
|
||||
|
|
@ -4818,7 +4818,7 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
if (!done && num_used == 1 &&
|
||||
instr->operands[0].bytes() % instr->definitions[idx].bytes() == 0 &&
|
||||
split_offset % instr->definitions[idx].bytes() == 0) {
|
||||
aco_ptr<Pseudo_instruction> extract{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> extract{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_extract_vector, Format::PSEUDO, 2, 1)};
|
||||
extract->operands[0] = instr->operands[0];
|
||||
extract->operands[1] =
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ setup_reduce_temp(Program* program)
|
|||
|
||||
if ((int)last_top_level_block_idx != inserted_at) {
|
||||
reduceTmp = program->allocateTmp(reduceTmp.regClass());
|
||||
aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> create{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
|
||||
create->definitions[0] = Definition(reduceTmp);
|
||||
/* find the right place to insert this definition */
|
||||
|
|
@ -154,7 +154,7 @@ setup_reduce_temp(Program* program)
|
|||
|
||||
if (need_vtmp && (int)last_top_level_block_idx != vtmp_inserted_at) {
|
||||
vtmp = program->allocateTmp(vtmp.regClass());
|
||||
aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> create{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
|
||||
create->definitions[0] = Definition(vtmp);
|
||||
if (last_top_level_block_idx == block.index) {
|
||||
|
|
|
|||
|
|
@ -2885,7 +2885,7 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vector<std::pair<Operand, Definiti
|
|||
if (parallelcopy.empty())
|
||||
return;
|
||||
|
||||
aco_ptr<Pseudo_instruction> pc;
|
||||
aco_ptr<Instruction> pc;
|
||||
pc.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO,
|
||||
parallelcopy.size(), parallelcopy.size()));
|
||||
bool linear_vgpr = false;
|
||||
|
|
@ -2935,8 +2935,8 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vector<std::pair<Operand, Definiti
|
|||
|
||||
handle_pseudo(ctx, tmp_file, pc.get());
|
||||
} else {
|
||||
pc->needs_scratch_reg = sgpr_operands_alias_defs || linear_vgpr;
|
||||
pc->tmp_in_scc = false;
|
||||
pc->pseudo().needs_scratch_reg = sgpr_operands_alias_defs || linear_vgpr;
|
||||
pc->pseudo().tmp_in_scc = false;
|
||||
}
|
||||
|
||||
instructions.emplace_back(std::move(pc));
|
||||
|
|
|
|||
|
|
@ -686,9 +686,8 @@ create_vopd_instruction(const SchedILPContext& ctx, unsigned idx)
|
|||
get_vopd_opcode_operands(x, x_info, swap_x, &x_op, &num_operands, operands);
|
||||
get_vopd_opcode_operands(y, y_info, swap_y, &y_op, &num_operands, operands + num_operands);
|
||||
|
||||
VOPD_instruction* instr =
|
||||
create_instruction<VOPD_instruction>(x_op, Format::VOPD, num_operands, 2);
|
||||
instr->opy = y_op;
|
||||
Instruction* instr = create_instruction<VOPD_instruction>(x_op, Format::VOPD, num_operands, 2);
|
||||
instr->vopd().opy = y_op;
|
||||
instr->definitions[0] = x->definitions[0];
|
||||
instr->definitions[1] = y->definitions[0];
|
||||
std::copy(operands, operands + num_operands, instr->operands.begin());
|
||||
|
|
|
|||
|
|
@ -376,7 +376,7 @@ do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t spill_id)
|
|||
res->definitions[0] = Definition(new_name);
|
||||
return res;
|
||||
} else {
|
||||
aco_ptr<Pseudo_instruction> reload{
|
||||
aco_ptr<Instruction> reload{
|
||||
create_instruction<Pseudo_instruction>(aco_opcode::p_reload, Format::PSEUDO, 1, 1)};
|
||||
reload->operands[0] = Operand::c32(spill_id);
|
||||
reload->definitions[0] = Definition(new_name);
|
||||
|
|
@ -845,7 +845,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
|
|||
for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx])
|
||||
ctx.add_interference(def_spill_id, pair.second);
|
||||
|
||||
aco_ptr<Pseudo_instruction> spill{
|
||||
aco_ptr<Instruction> spill{
|
||||
create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
|
||||
spill->operands[0] = spill_op;
|
||||
spill->operands[1] = Operand::c32(def_spill_id);
|
||||
|
|
@ -915,7 +915,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
|
|||
ctx.renames[pred_idx].erase(rename_it);
|
||||
}
|
||||
|
||||
aco_ptr<Pseudo_instruction> spill{
|
||||
aco_ptr<Instruction> spill{
|
||||
create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
|
||||
spill->operands[0] = Operand(var);
|
||||
spill->operands[1] = Operand::c32(pair.second);
|
||||
|
|
@ -1054,7 +1054,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
|
|||
if (!is_same) {
|
||||
/* the variable was renamed differently in the predecessors: we have to create a phi */
|
||||
aco_opcode opcode = pair.first.is_linear() ? aco_opcode::p_linear_phi : aco_opcode::p_phi;
|
||||
aco_ptr<Pseudo_instruction> phi{
|
||||
aco_ptr<Instruction> phi{
|
||||
create_instruction<Pseudo_instruction>(opcode, Format::PSEUDO, preds.size(), 1)};
|
||||
rename = ctx.program->allocateTmp(pair.first.regClass());
|
||||
for (unsigned i = 0; i < phi->operands.size(); i++) {
|
||||
|
|
@ -1229,7 +1229,7 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, RegisterDemand s
|
|||
}
|
||||
|
||||
/* add spill to new instructions */
|
||||
aco_ptr<Pseudo_instruction> spill{
|
||||
aco_ptr<Instruction> spill{
|
||||
create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
|
||||
spill->operands[0] = Operand(to_spill);
|
||||
spill->operands[1] = Operand::c32(spill_id);
|
||||
|
|
@ -1757,7 +1757,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
|
|||
if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) {
|
||||
Temp linear_vgpr = ctx.program->allocateTmp(v1.as_linear());
|
||||
vgpr_spill_temps[spill_slot / ctx.wave_size] = linear_vgpr;
|
||||
aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> create{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
|
||||
create->definitions[0] = Definition(linear_vgpr);
|
||||
/* find the right place to insert this definition */
|
||||
|
|
@ -1774,7 +1774,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
|
|||
}
|
||||
|
||||
/* spill sgpr: just add the vgpr temp to operands */
|
||||
Pseudo_instruction* spill =
|
||||
Instruction* spill =
|
||||
create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 3, 0);
|
||||
spill->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]);
|
||||
spill->operands[0].setLateKill(true);
|
||||
|
|
@ -1798,7 +1798,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
|
|||
if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) {
|
||||
Temp linear_vgpr = ctx.program->allocateTmp(v1.as_linear());
|
||||
vgpr_spill_temps[spill_slot / ctx.wave_size] = linear_vgpr;
|
||||
aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>(
|
||||
aco_ptr<Instruction> create{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
|
||||
create->definitions[0] = Definition(linear_vgpr);
|
||||
/* find the right place to insert this definition */
|
||||
|
|
@ -1815,8 +1815,8 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
|
|||
}
|
||||
|
||||
/* reload sgpr: just add the vgpr temp to operands */
|
||||
Pseudo_instruction* reload = create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_reload, Format::PSEUDO, 2, 1);
|
||||
Instruction* reload = create_instruction<Pseudo_instruction>(aco_opcode::p_reload,
|
||||
Format::PSEUDO, 2, 1);
|
||||
reload->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]);
|
||||
reload->operands[0].setLateKill(true);
|
||||
reload->operands[1] = Operand::c32(spill_slot % ctx.wave_size);
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ insert_parallelcopies(ssa_elimination_ctx& ctx)
|
|||
}
|
||||
|
||||
std::vector<aco_ptr<Instruction>>::iterator it = std::next(block.instructions.begin(), idx);
|
||||
aco_ptr<Pseudo_instruction> pc{
|
||||
aco_ptr<Instruction> pc{
|
||||
create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO,
|
||||
logical_phi_info.size(), logical_phi_info.size())};
|
||||
unsigned i = 0;
|
||||
|
|
@ -107,7 +107,7 @@ insert_parallelcopies(ssa_elimination_ctx& ctx)
|
|||
i++;
|
||||
}
|
||||
/* this shouldn't be needed since we're only copying vgprs */
|
||||
pc->tmp_in_scc = false;
|
||||
pc->pseudo().tmp_in_scc = false;
|
||||
block.instructions.insert(it, std::move(pc));
|
||||
}
|
||||
|
||||
|
|
@ -122,7 +122,7 @@ insert_parallelcopies(ssa_elimination_ctx& ctx)
|
|||
--it;
|
||||
assert((*it)->isBranch());
|
||||
PhysReg scratch_sgpr = (*it)->definitions[0].physReg();
|
||||
aco_ptr<Pseudo_instruction> pc{
|
||||
aco_ptr<Instruction> pc{
|
||||
create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO,
|
||||
linear_phi_info.size(), linear_phi_info.size())};
|
||||
unsigned i = 0;
|
||||
|
|
@ -131,9 +131,9 @@ insert_parallelcopies(ssa_elimination_ctx& ctx)
|
|||
pc->operands[i] = phi_info.op;
|
||||
i++;
|
||||
}
|
||||
pc->tmp_in_scc = block.scc_live_out;
|
||||
pc->scratch_sgpr = scratch_sgpr;
|
||||
pc->needs_scratch_reg = true;
|
||||
pc->pseudo().tmp_in_scc = block.scc_live_out;
|
||||
pc->pseudo().scratch_sgpr = scratch_sgpr;
|
||||
pc->pseudo().needs_scratch_reg = true;
|
||||
block.instructions.insert(it, std::move(pc));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -269,7 +269,7 @@ BEGIN_TEST(assembler.v_add3)
|
|||
|
||||
//~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080
|
||||
//~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080
|
||||
aco_ptr<VALU_instruction> add3{
|
||||
aco_ptr<Instruction> add3{
|
||||
create_instruction<VALU_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
|
||||
add3->operands[0] = Operand::zero();
|
||||
add3->operands[1] = Operand::zero();
|
||||
|
|
@ -288,13 +288,13 @@ BEGIN_TEST(assembler.v_add3_clamp)
|
|||
|
||||
//~gfx9>> integer addition + clamp ; d1ff8000 02010080
|
||||
//~gfx10>> integer addition + clamp ; d76d8000 02010080
|
||||
aco_ptr<VALU_instruction> add3{
|
||||
aco_ptr<Instruction> add3{
|
||||
create_instruction<VALU_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
|
||||
add3->operands[0] = Operand::zero();
|
||||
add3->operands[1] = Operand::zero();
|
||||
add3->operands[2] = Operand::zero();
|
||||
add3->definitions[0] = Definition(PhysReg(0), v1);
|
||||
add3->clamp = 1;
|
||||
add3->valu().clamp = 1;
|
||||
bld.insert(std::move(add3));
|
||||
|
||||
finish_assembler_test();
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ create_global()
|
|||
static void
|
||||
create_mimg(bool nsa, Temp desc = Temp(0, s8))
|
||||
{
|
||||
aco_ptr<MIMG_instruction> mimg{
|
||||
aco_ptr<Instruction> mimg{
|
||||
create_instruction<MIMG_instruction>(aco_opcode::image_sample, Format::MIMG, 5, 1)};
|
||||
mimg->definitions[0] = Definition(PhysReg(256), v1);
|
||||
mimg->operands[0] = Operand(desc);
|
||||
|
|
@ -78,8 +78,8 @@ create_mimg(bool nsa, Temp desc = Temp(0, s8))
|
|||
mimg->operands[2] = Operand(v1);
|
||||
for (unsigned i = 0; i < 2; i++)
|
||||
mimg->operands[3 + i] = Operand(PhysReg(256 + (nsa ? i * 2 : i)), v1);
|
||||
mimg->dmask = 0x1;
|
||||
mimg->dim = ac_image_2d;
|
||||
mimg->mimg().dmask = 0x1;
|
||||
mimg->mimg().dim = ac_image_2d;
|
||||
|
||||
bld.insert(std::move(mimg));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ create_mubuf_store(PhysReg src = PhysReg(256))
|
|||
void
|
||||
create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords)
|
||||
{
|
||||
aco_ptr<MIMG_instruction> mimg{
|
||||
aco_ptr<Instruction> mimg{
|
||||
create_instruction<MIMG_instruction>(aco_opcode::image_sample, Format::MIMG, 3 + addrs, 1)};
|
||||
mimg->definitions[0] = Definition(PhysReg(256), v1);
|
||||
mimg->operands[0] = Operand(PhysReg(0), s8);
|
||||
|
|
@ -50,8 +50,8 @@ create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords)
|
|||
mimg->operands[2] = Operand(v1);
|
||||
for (unsigned i = 0; i < addrs; i++)
|
||||
mimg->operands[3 + i] = Operand(PhysReg(256 + (nsa ? i * 2 : i)), v1);
|
||||
mimg->dmask = 0x1;
|
||||
mimg->dim = ac_image_2d;
|
||||
mimg->mimg().dmask = 0x1;
|
||||
mimg->mimg().dim = ac_image_2d;
|
||||
|
||||
assert(get_mimg_nsa_dwords(mimg.get()) + 2 == instr_dwords);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue