mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-07 04:20:18 +01:00
r600/sfn: Switch to register intrinsics
Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24212>
This commit is contained in:
parent
886a7ae786
commit
10938cddd7
9 changed files with 291 additions and 189 deletions
|
|
@ -503,7 +503,7 @@ bool AluInstr::can_replace_source(PRegister old_src, PVirtualValue new_src)
|
|||
/* If the old or new source is an array element, we assume that there
|
||||
* might have been an (untracked) indirect access, so don't replace
|
||||
* this source */
|
||||
if (old_src->pin() == pin_array || new_src->pin() == pin_array)
|
||||
if (old_src->pin() == pin_array && new_src->pin() == pin_array)
|
||||
return false;
|
||||
|
||||
auto [addr, dummy, index] = indirect_addr();
|
||||
|
|
|
|||
|
|
@ -934,8 +934,8 @@ r600_shader_from_nir(struct r600_context *rctx,
|
|||
|
||||
NIR_PASS_V(sh, nir_lower_bool_to_int32);
|
||||
|
||||
NIR_PASS_V(sh, nir_lower_locals_to_regs, 32);
|
||||
NIR_PASS_V(sh, nir_convert_from_ssa, true, false);
|
||||
NIR_PASS_V(sh, nir_lower_locals_to_reg_intrinsics, 32);
|
||||
NIR_PASS_V(sh, nir_convert_from_ssa, true, true);
|
||||
NIR_PASS_V(sh, nir_opt_dce);
|
||||
|
||||
if (rctx->screen->b.debug_flags & DBG_ALL_SHADERS) {
|
||||
|
|
|
|||
|
|
@ -373,14 +373,18 @@ CopyPropFwdVisitor::visit(AluInstr *instr)
|
|||
auto ii = dest->uses().begin();
|
||||
auto ie = dest->uses().end();
|
||||
|
||||
auto mov_block_id = instr->block_id();
|
||||
|
||||
while(ii != ie) {
|
||||
auto i = *ii;
|
||||
auto target_block_id = i->block_id();
|
||||
|
||||
++ii;
|
||||
/* SSA can always be propagated, registers only in the same block
|
||||
* and only if they are assigned in the same block */
|
||||
bool can_propagate = dest->has_flag(Register::ssa);
|
||||
bool dest_can_propagate = dest->has_flag(Register::ssa);
|
||||
|
||||
if (!can_propagate) {
|
||||
if (!dest_can_propagate) {
|
||||
|
||||
/* Register can propagate if the assignment was in the same
|
||||
* block, and we don't have a second assignment coming later
|
||||
|
|
@ -391,12 +395,12 @@ CopyPropFwdVisitor::visit(AluInstr *instr)
|
|||
* 3: MOV SN.x, R0.x
|
||||
*
|
||||
* Here we can't prpagate the move in 1 to SN.x in 3 */
|
||||
if ((instr->block_id() == i->block_id() && instr->index() < i->index())) {
|
||||
can_propagate = true;
|
||||
if ((mov_block_id == target_block_id && instr->index() < i->index())) {
|
||||
dest_can_propagate = true;
|
||||
if (dest->parents().size() > 1) {
|
||||
for (auto p : dest->parents()) {
|
||||
if (p->block_id() == i->block_id() && p->index() > instr->index()) {
|
||||
can_propagate = false;
|
||||
dest_can_propagate = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -404,7 +408,32 @@ CopyPropFwdVisitor::visit(AluInstr *instr)
|
|||
}
|
||||
}
|
||||
|
||||
if (can_propagate) {
|
||||
bool src_can_propagate = false;
|
||||
if (auto rsrc = src->as_register()) {
|
||||
if (rsrc->has_flag(Register::ssa)) {
|
||||
src_can_propagate = true;
|
||||
} else if (mov_block_id == target_block_id) {
|
||||
if (rsrc->addr()) {
|
||||
if (i->block_id() == mov_block_id &&
|
||||
i->index() == instr->index() + 1)
|
||||
src_can_propagate = true;
|
||||
} else {
|
||||
src_can_propagate = true;
|
||||
for (auto p : rsrc->parents()) {
|
||||
if (p->block_id() == mov_block_id &&
|
||||
p->index() > instr->index() &&
|
||||
p->index() < i->index()) {
|
||||
src_can_propagate = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
src_can_propagate = true;
|
||||
}
|
||||
|
||||
if (dest_can_propagate && src_can_propagate) {
|
||||
sfn_log << SfnLog::opt << " Try replace in " << i->block_id() << ":"
|
||||
<< i->index() << *i << "\n";
|
||||
|
||||
|
|
|
|||
|
|
@ -552,7 +552,8 @@ Shader::process(nir_shader *nir)
|
|||
|
||||
allocate_reserved_registers();
|
||||
|
||||
allocate_local_registers(&func->impl->registers);
|
||||
value_factory().allocate_registers(m_register_allocations);
|
||||
m_required_registers = value_factory().array_registers();
|
||||
|
||||
sfn_log << SfnLog::trans << "Process shader \n";
|
||||
foreach_list_typed(nir_cf_node, node, node, &func->impl->body)
|
||||
|
|
@ -566,14 +567,6 @@ Shader::process(nir_shader *nir)
|
|||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
Shader::allocate_local_registers(const exec_list *registers)
|
||||
{
|
||||
if (value_factory().allocate_registers(registers))
|
||||
m_indirect_files |= 1 << TGSI_FILE_TEMPORARY;
|
||||
m_required_registers = value_factory().array_registers();
|
||||
}
|
||||
|
||||
bool
|
||||
Shader::scan_shader(const nir_function *func)
|
||||
{
|
||||
|
|
@ -682,6 +675,9 @@ Shader::scan_instruction(nir_instr *instr)
|
|||
(nir_var_mem_ssbo | nir_var_mem_global | nir_var_image) &&
|
||||
nir_intrinsic_memory_scope(intr) != SCOPE_NONE);
|
||||
break;
|
||||
case nir_intrinsic_decl_reg:
|
||||
m_register_allocations.push_back(intr);
|
||||
break;
|
||||
default:;
|
||||
}
|
||||
return true;
|
||||
|
|
@ -918,7 +914,18 @@ Shader::process_intrinsic(nir_intrinsic_instr *intr)
|
|||
return emit_atomic_local_shared(intr);
|
||||
case nir_intrinsic_shader_clock:
|
||||
return emit_shader_clock(intr);
|
||||
|
||||
case nir_intrinsic_load_reg:
|
||||
return emit_load_reg(intr);
|
||||
case nir_intrinsic_load_reg_indirect:
|
||||
return emit_load_reg_indirect(intr);
|
||||
case nir_intrinsic_store_reg:
|
||||
return emit_store_reg(intr);
|
||||
case nir_intrinsic_store_reg_indirect:
|
||||
return emit_store_reg_indirect(intr);
|
||||
case nir_intrinsic_decl_reg:
|
||||
// Registers and arrays are allocated at
|
||||
// conversion startup time
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
@ -966,6 +973,130 @@ Shader::emit_load_to_register(PVirtualValue src)
|
|||
return dest;
|
||||
}
|
||||
|
||||
// add visitor to resolve array and register
|
||||
class RegisterAccessHandler : public RegisterVisitor {
|
||||
|
||||
public:
|
||||
RegisterAccessHandler(Shader& shader, nir_intrinsic_instr *intr);
|
||||
|
||||
void visit(LocalArrayValue& value) override {(void)value; assert(0);}
|
||||
void visit(UniformValue& value) override {(void)value; assert(0);}
|
||||
void visit(LiteralConstant& value) override {(void)value; assert(0);}
|
||||
void visit(InlineConstant& value) override {(void)value; assert(0);}
|
||||
|
||||
Shader& sh;
|
||||
nir_intrinsic_instr *ir;
|
||||
PVirtualValue addr{nullptr};
|
||||
bool success{true};
|
||||
};
|
||||
|
||||
class RegisterReadHandler : public RegisterAccessHandler {
|
||||
|
||||
public:
|
||||
using RegisterAccessHandler::RegisterAccessHandler;
|
||||
using RegisterAccessHandler::visit;
|
||||
|
||||
void visit(LocalArray& value) override;
|
||||
void visit(Register& value) override;
|
||||
};
|
||||
|
||||
bool Shader::emit_load_reg(nir_intrinsic_instr *intr)
|
||||
{
|
||||
RegisterReadHandler visitor(*this, intr);
|
||||
auto handle = value_factory().src(intr->src[0], 0);
|
||||
handle->accept(visitor);
|
||||
return visitor.success;
|
||||
}
|
||||
|
||||
bool Shader::emit_load_reg_indirect(nir_intrinsic_instr *intr)
|
||||
{
|
||||
RegisterReadHandler visitor(*this, intr);
|
||||
visitor.addr = value_factory().src(intr->src[1], 0);
|
||||
auto handle = value_factory().src(intr->src[0], 0);
|
||||
handle->accept(visitor);
|
||||
return visitor.success;
|
||||
}
|
||||
|
||||
class RegisterWriteHandler : public RegisterAccessHandler {
|
||||
|
||||
public:
|
||||
using RegisterAccessHandler::RegisterAccessHandler;
|
||||
using RegisterAccessHandler::visit;
|
||||
|
||||
void visit(LocalArray& value) override;
|
||||
void visit(Register& value) override;
|
||||
};
|
||||
|
||||
|
||||
bool Shader::emit_store_reg(nir_intrinsic_instr *intr)
|
||||
{
|
||||
RegisterWriteHandler visitor(*this, intr);
|
||||
auto handle = value_factory().src(intr->src[1], 0);
|
||||
handle->accept(visitor);
|
||||
return visitor.success;
|
||||
}
|
||||
|
||||
bool Shader::emit_store_reg_indirect(nir_intrinsic_instr *intr)
|
||||
{
|
||||
RegisterWriteHandler visitor(*this, intr);
|
||||
visitor.addr = value_factory().src(intr->src[2], 0);
|
||||
|
||||
auto handle = value_factory().src(intr->src[1], 0);
|
||||
handle->accept(visitor);
|
||||
return visitor.success;
|
||||
}
|
||||
|
||||
RegisterAccessHandler::RegisterAccessHandler(Shader& shader, nir_intrinsic_instr *intr):
|
||||
sh(shader),
|
||||
ir(intr)
|
||||
{}
|
||||
|
||||
void RegisterReadHandler::visit(LocalArray& array)
|
||||
{
|
||||
int slots = ir->dest.ssa.bit_size / 32;
|
||||
auto pin = ir->dest.ssa.num_components > 1 ? pin_none : pin_free;
|
||||
for (int i = 0; i < ir->dest.ssa.num_components; ++i) {
|
||||
for (int s = 0; s < slots; ++s) {
|
||||
int chan = i * slots + s;
|
||||
auto dest = sh.value_factory().dest(ir->dest, chan, pin);
|
||||
auto src = array.element(nir_intrinsic_base(ir), addr, chan);
|
||||
sh.emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::write));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RegisterReadHandler::visit(Register& reg)
|
||||
{
|
||||
auto dest = sh.value_factory().dest(ir->dest, 0, pin_free);
|
||||
sh.emit_instruction(new AluInstr(op1_mov, dest, ®, AluInstr::write));
|
||||
}
|
||||
|
||||
void RegisterWriteHandler::visit(LocalArray& array)
|
||||
{
|
||||
int writemask = nir_intrinsic_write_mask(ir);
|
||||
int slots = ir->src->ssa->bit_size / 32;
|
||||
|
||||
for (int i = 0; i < ir->num_components; ++i) {
|
||||
if (!(writemask & (1 << i)))
|
||||
continue;
|
||||
for (int s = 0; s < slots; ++s) {
|
||||
int chan = i * slots + s;
|
||||
|
||||
auto dest = array.element(nir_intrinsic_base(ir), addr, chan);
|
||||
auto src = sh.value_factory().src(ir->src[0], chan);
|
||||
sh.emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::write));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RegisterWriteHandler::visit(Register& dest)
|
||||
{
|
||||
int writemask = nir_intrinsic_write_mask(ir);
|
||||
assert(writemask == 1);
|
||||
auto src = sh.value_factory().src(ir->src[0], 0);
|
||||
sh.emit_instruction(new AluInstr(op1_mov, &dest, src, AluInstr::write));
|
||||
}
|
||||
|
||||
bool
|
||||
Shader::emit_atomic_local_shared(nir_intrinsic_instr *instr)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -298,8 +298,6 @@ private:
|
|||
bool scan_uniforms(nir_variable *uniform);
|
||||
void allocate_reserved_registers();
|
||||
|
||||
void allocate_local_registers(const exec_list *registers);
|
||||
|
||||
virtual int do_allocate_reserved_registers() = 0;
|
||||
|
||||
bool scan_instruction(nir_instr *instr);
|
||||
|
|
@ -323,6 +321,10 @@ private:
|
|||
bool emit_shader_clock(nir_intrinsic_instr *instr);
|
||||
bool emit_wait_ack();
|
||||
bool emit_scoped_barrier(nir_intrinsic_instr *instr);
|
||||
bool emit_load_reg(nir_intrinsic_instr *intr);
|
||||
bool emit_load_reg_indirect(nir_intrinsic_instr *intr);
|
||||
bool emit_store_reg(nir_intrinsic_instr *intr);
|
||||
bool emit_store_reg_indirect(nir_intrinsic_instr *intr);
|
||||
|
||||
bool equal_to(const Shader& other) const;
|
||||
void finalize();
|
||||
|
|
@ -400,6 +402,8 @@ private:
|
|||
InstructionChain m_chain_instr;
|
||||
std::list<Instr *, Allocator<Instr *>> m_loops;
|
||||
int m_control_flow_depth{0};
|
||||
std::list<nir_intrinsic_instr*> m_register_allocations;
|
||||
|
||||
};
|
||||
|
||||
std::pair<unsigned, unsigned>
|
||||
|
|
|
|||
|
|
@ -53,37 +53,42 @@ ValueFactory::set_virtual_register_base(int base)
|
|||
}
|
||||
|
||||
bool
|
||||
ValueFactory::allocate_registers(const exec_list *registers)
|
||||
ValueFactory::allocate_registers(const std::list<nir_intrinsic_instr *>& regs)
|
||||
{
|
||||
bool has_arrays = false;
|
||||
struct array_entry {
|
||||
unsigned index;
|
||||
unsigned length;
|
||||
unsigned ncomponents;
|
||||
int ncomponents;
|
||||
|
||||
bool operator()(const array_entry& a, const array_entry& b) const
|
||||
{
|
||||
return a.length < b.length ||
|
||||
(a.length == b.length && a.ncomponents > b.ncomponents);
|
||||
return a.ncomponents < b.ncomponents ||
|
||||
(a.ncomponents == b.ncomponents && a.length < b.length);
|
||||
}
|
||||
};
|
||||
|
||||
using array_list =
|
||||
std::priority_queue<array_entry, std::vector<array_entry>, array_entry>;
|
||||
|
||||
std::list<unsigned> non_array;
|
||||
array_list arrays;
|
||||
for(auto intr : regs) {
|
||||
unsigned num_elms = nir_intrinsic_num_array_elems(intr);
|
||||
int num_comp = nir_intrinsic_num_components(intr);
|
||||
int bit_size = nir_intrinsic_bit_size(intr);
|
||||
|
||||
foreach_list_typed(nir_register, reg, node, registers)
|
||||
{
|
||||
if (reg->num_array_elems) {
|
||||
if (num_elms > 0 || num_comp > 1 || bit_size > 32) {
|
||||
array_entry ae = {
|
||||
reg->index, reg->num_array_elems, reg->bit_size / 32 * reg->num_components};
|
||||
intr->dest.ssa.index,
|
||||
num_elms ? num_elms : 1,
|
||||
bit_size / 32 * num_comp};
|
||||
arrays.push(ae);
|
||||
has_arrays = true;
|
||||
} else {
|
||||
non_array.push_back(intr->dest.ssa.index);
|
||||
}
|
||||
}
|
||||
|
||||
int ncomponents = 0;
|
||||
int free_components = 4;
|
||||
int sel = m_next_register_index;
|
||||
unsigned length = 0;
|
||||
|
||||
|
|
@ -94,48 +99,39 @@ ValueFactory::allocate_registers(const exec_list *registers)
|
|||
/* This is a bit hackish, return an id that encodes the array merge. To
|
||||
* make sure that the mapping doesn't go wrong we have to make sure the
|
||||
* arrays is longer than the number of instances in this arrays slot */
|
||||
if (a.ncomponents + ncomponents > 4 || a.length > length) {
|
||||
if (a.ncomponents > free_components || a.length > length) {
|
||||
sel = m_next_register_index;
|
||||
ncomponents = 0;
|
||||
length = 0;
|
||||
free_components = 4;
|
||||
m_next_register_index += a.length;
|
||||
}
|
||||
|
||||
if (ncomponents == 0)
|
||||
m_next_register_index += a.length;
|
||||
uint32_t frac = free_components - a.ncomponents;
|
||||
|
||||
uint32_t frac = ncomponents;
|
||||
auto array = new LocalArray(sel, a.ncomponents, a.length, frac);
|
||||
|
||||
for (unsigned i = 0; i < a.ncomponents; ++i) {
|
||||
for (int i = 0; i < a.ncomponents; ++i) {
|
||||
RegisterKey key(a.index, i, vp_array);
|
||||
m_channel_counts.inc_count(i);
|
||||
m_channel_counts.inc_count(frac + i, a.length);
|
||||
m_registers[key] = array;
|
||||
sfn_log << SfnLog::reg << __func__ << ": Allocate array " << key << ":" << *array
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
ncomponents += a.ncomponents;
|
||||
free_components -= a.ncomponents;
|
||||
length = a.length;
|
||||
}
|
||||
|
||||
m_required_array_registers = m_next_register_index ? m_next_register_index : 0;
|
||||
|
||||
foreach_list_typed(nir_register, reg, node, registers)
|
||||
{
|
||||
if (!reg->num_array_elems) {
|
||||
uint32_t sel = m_next_register_index++;
|
||||
unsigned num_components = reg->num_components * reg->bit_size / 32;
|
||||
for (auto chan = 0u; chan < num_components; ++chan) {
|
||||
RegisterKey key(reg->index, chan, vp_register);
|
||||
m_channel_counts.inc_count(chan);
|
||||
m_registers[key] =
|
||||
new Register(sel, chan, num_components > 1 ? pin_none : pin_free);
|
||||
sfn_log << SfnLog::reg << "allocate register " << key << ":"
|
||||
<< *m_registers[key] << "\n";
|
||||
}
|
||||
}
|
||||
for (auto index : non_array) {
|
||||
RegisterKey key(index, 0, vp_register);
|
||||
auto chan = m_channel_counts.least_used(0xf);
|
||||
m_registers[key] = new Register(m_next_register_index++,
|
||||
chan, pin_free);
|
||||
m_channel_counts.inc_count(chan);
|
||||
}
|
||||
return has_arrays;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int ValueFactory::new_register_index()
|
||||
|
|
@ -212,44 +208,11 @@ public:
|
|||
int m_chan;
|
||||
};
|
||||
|
||||
PRegister
|
||||
ValueFactory::resolve_array(nir_register *reg,
|
||||
nir_src *indirect,
|
||||
int base_offset,
|
||||
int chan)
|
||||
{
|
||||
PVirtualValue addr = nullptr;
|
||||
auto type = reg->num_array_elems ? vp_array : vp_register;
|
||||
RegisterKey key(reg->index, chan, type);
|
||||
auto ireg = m_registers.find(key);
|
||||
if (ireg == m_registers.end()) {
|
||||
std::cerr << "Key " << key << " not found\n";
|
||||
assert(0);
|
||||
}
|
||||
|
||||
if (reg->num_array_elems) {
|
||||
|
||||
if (indirect)
|
||||
addr = src(*indirect, 0);
|
||||
|
||||
TranslateRegister array_resolution(base_offset, addr, chan);
|
||||
|
||||
ireg->second->accept(array_resolution);
|
||||
assert(array_resolution.m_value);
|
||||
return array_resolution.m_value;
|
||||
} else {
|
||||
return ireg->second;
|
||||
}
|
||||
}
|
||||
|
||||
PRegister
|
||||
ValueFactory::dest(const nir_dest& dst, int chan, Pin pin_channel, uint8_t chan_mask)
|
||||
{
|
||||
if (dst.is_ssa) {
|
||||
return dest(dst.ssa, chan, pin_channel, chan_mask);
|
||||
} else {
|
||||
return resolve_array(dst.reg.reg, dst.reg.indirect, dst.reg.base_offset, chan);
|
||||
}
|
||||
assert(dst.is_ssa);
|
||||
return dest(dst.ssa, chan, pin_channel, chan_mask);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -320,30 +283,12 @@ ValueFactory::dest_vec4(const nir_dest& dst, Pin pin)
|
|||
{
|
||||
if (pin != pin_group && pin != pin_chgr)
|
||||
pin = pin_chan;
|
||||
if (dst.is_ssa) {
|
||||
PRegister x = dest(dst, 0, pin);
|
||||
PRegister y = dest(dst, 1, pin);
|
||||
PRegister z = dest(dst, 2, pin);
|
||||
PRegister w = dest(dst, 3, pin);
|
||||
return RegisterVec4(x, y, z, w, pin);
|
||||
} else {
|
||||
assert(!dst.reg.indirect);
|
||||
PRegister v[4];
|
||||
int sel = -1;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
RegisterKey key(dst.reg.reg->index, i, vp_register);
|
||||
v[i] = m_registers[key];
|
||||
assert(sel >= 0 || v[i]);
|
||||
if (sel < 0)
|
||||
sel = v[i]->sel();
|
||||
|
||||
if (!v[i]) {
|
||||
v[i] = m_registers[key] = new Register(sel, i, pin_group);
|
||||
}
|
||||
}
|
||||
return RegisterVec4(v[0], v[1], v[2], v[3], pin);
|
||||
}
|
||||
unreachable("unsupported");
|
||||
assert(dst.is_ssa);
|
||||
PRegister x = dest(dst, 0, pin);
|
||||
PRegister y = dest(dst, 1, pin);
|
||||
PRegister z = dest(dst, 2, pin);
|
||||
PRegister w = dest(dst, 3, pin);
|
||||
return RegisterVec4(x, y, z, w, pin);
|
||||
}
|
||||
|
||||
PRegister ValueFactory::addr()
|
||||
|
|
@ -386,16 +331,12 @@ ValueFactory::src(const nir_src& src, int chan)
|
|||
{
|
||||
sfn_log << SfnLog::reg << "search (ref) " << (void *)&src << "\n";
|
||||
|
||||
if (src.is_ssa) {
|
||||
sfn_log << SfnLog::reg << "search ssa " << src.ssa->index << " c:" << chan
|
||||
<< " got ";
|
||||
auto val = ssa_src(*src.ssa, chan);
|
||||
sfn_log << *val << "\n";
|
||||
return val;
|
||||
} else {
|
||||
sfn_log << SfnLog::reg << "search reg " << src.reg.reg->index << "\n";
|
||||
return local_register(src.reg, chan);
|
||||
}
|
||||
assert(src.is_ssa);
|
||||
sfn_log << SfnLog::reg << "search ssa " << src.ssa->index << " c:" << chan
|
||||
<< " got ";
|
||||
auto val = ssa_src(*src.ssa, chan);
|
||||
sfn_log << *val << "\n";
|
||||
return val;
|
||||
}
|
||||
|
||||
PVirtualValue
|
||||
|
|
@ -486,22 +427,23 @@ ValueFactory::ssa_src(const nir_ssa_def& ssa, int chan)
|
|||
if (ival != m_values.end())
|
||||
return ival->second;
|
||||
|
||||
RegisterKey rkey(ssa.index, chan, vp_register);
|
||||
sfn_log << SfnLog::reg << "search src with key" << rkey << "\n";
|
||||
|
||||
ireg = m_registers.find(rkey);
|
||||
if (ireg != m_registers.end())
|
||||
return ireg->second;
|
||||
|
||||
RegisterKey array_key(ssa.index, chan, vp_array);
|
||||
sfn_log << SfnLog::reg << "search array with key" << array_key << "\n";
|
||||
auto iarray = m_registers.find(array_key);
|
||||
if (iarray != m_registers.end())
|
||||
return iarray->second;
|
||||
|
||||
std::cerr << "Didn't find source with key " << key << "\n";
|
||||
unreachable("Source values should always exist");
|
||||
}
|
||||
|
||||
PRegister
|
||||
ValueFactory::local_register(const nir_register_dest& dst, int chan)
|
||||
{
|
||||
return resolve_array(dst.reg, dst.indirect, dst.base_offset, chan);
|
||||
}
|
||||
|
||||
PRegister
|
||||
ValueFactory::local_register(const nir_register_src& src, int chan)
|
||||
{
|
||||
return resolve_array(src.reg, src.indirect, src.base_offset, chan);
|
||||
}
|
||||
|
||||
PVirtualValue
|
||||
ValueFactory::literal(uint32_t value)
|
||||
{
|
||||
|
|
@ -1040,11 +982,9 @@ ValueFactory::prepare_live_range_map()
|
|||
continue;
|
||||
|
||||
if (key.value.pool == vp_array) {
|
||||
if (key.value.chan == 0) {
|
||||
auto array = static_cast<LocalArray *>(reg);
|
||||
for (auto& a : *array) {
|
||||
result.append_register(a);
|
||||
}
|
||||
auto array = static_cast<LocalArray *>(reg);
|
||||
for (auto& a : *array) {
|
||||
result.append_register(a);
|
||||
}
|
||||
} else {
|
||||
if (reg->chan() < 4)
|
||||
|
|
|
|||
|
|
@ -178,6 +178,7 @@ struct register_key_hash {
|
|||
class ChannelCounts {
|
||||
public:
|
||||
void inc_count(int chan) { ++m_counts[chan]; }
|
||||
void inc_count(int chan, int n) { m_counts[chan] += n; }
|
||||
int least_used(uint8_t mask) const
|
||||
{
|
||||
int least_used = 0;
|
||||
|
|
@ -222,8 +223,9 @@ public:
|
|||
|
||||
int new_register_index();
|
||||
|
||||
bool allocate_registers(const exec_list *registers);
|
||||
bool allocate_registers(const std::list<nir_intrinsic_instr *>& regs);
|
||||
PRegister allocate_pinned_register(int sel, int chan);
|
||||
|
||||
RegisterVec4 allocate_pinned_vec4(int sel, bool is_ssa);
|
||||
|
||||
void inject_value(const nir_dest& dest, int chan, PVirtualValue value);
|
||||
|
|
@ -292,11 +294,6 @@ public:
|
|||
private:
|
||||
PVirtualValue ssa_src(const nir_ssa_def& dest, int chan);
|
||||
|
||||
PRegister local_register(const nir_register_dest& dest, int chan);
|
||||
PRegister local_register(const nir_register_src& dest, int chan);
|
||||
PRegister
|
||||
resolve_array(nir_register *reg, nir_src *indirect, int base_offset, int chan);
|
||||
|
||||
int m_next_register_index;
|
||||
int m_next_temp_channel{0};
|
||||
|
||||
|
|
|
|||
|
|
@ -873,9 +873,10 @@ LocalArray::LocalArray(int base_sel, int nchannels, int size, int frac):
|
|||
sfn_log << SfnLog::reg << "Allocate array A" << base_sel << "(" << size << ", " << frac
|
||||
<< ", " << nchannels << ")\n";
|
||||
|
||||
auto pin = m_size > 1 ? pin_array : (nchannels > 1 ? pin_none : pin_free);
|
||||
for (int c = 0; c < nchannels; ++c) {
|
||||
for (unsigned i = 0; i < m_size; ++i) {
|
||||
PRegister reg = new Register(base_sel + i, c + frac, pin_array);
|
||||
PRegister reg = new Register(base_sel + i, c + frac, pin);
|
||||
m_values[m_size * c + i] = new LocalArrayValue(reg, *this);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1912,27 +1912,27 @@ ELSE
|
|||
ALU MOV A2[S37.x].x : I[0] {W}
|
||||
ALU MOV A2[S37.x].y : L[0x3dcccccd] {WL}
|
||||
ENDIF
|
||||
ALU MOV S1025.x@group{s} : A2[0].x {W}
|
||||
ALU MOV S1025.y@group{s} : A2[0].y {WL}
|
||||
ALU MOV S1025.z@group{s} : A2[1].x {W}
|
||||
ALU MOV S1025.w@group{s} : A2[1].y {WL}
|
||||
ALU MOV S1027.x@group{s} : A2[2].x {W}
|
||||
ALU MOV S1027.y@group{s} : A2[2].y {WL}
|
||||
ALU MOV S1027.z@group{s} : A2[3].x {W}
|
||||
ALU MOV S1027.w@group{s} : A2[3].y {WL}
|
||||
ALU MOV S1029.x@group{s} : A2[0].z {W}
|
||||
ALU MOV S1029.y@group{s} : A2[0].w {WL}
|
||||
ALU MOV S1029.z@group{s} : A2[1].z {W}
|
||||
ALU MOV S1029.w@group{s} : A2[1].w {WL}
|
||||
ALU MOV S1031.x@group{s} : A2[2].z {W}
|
||||
ALU MOV S1031.y@group{s} : A2[2].w {WL}
|
||||
ALU MOV S1031.z@group{s} : A2[3].z {W}
|
||||
ALU MOV S1031.w@group{s} : A2[3].w {WL}
|
||||
EXPORT_DONE POS 0 S19.xyzw
|
||||
EXPORT PARAM 0 S1025.xyzw
|
||||
EXPORT PARAM 1 S1027.xyzw
|
||||
EXPORT PARAM 2 S1029.xyzw
|
||||
EXPORT_DONE PARAM 3 S1031.xyzw
|
||||
ALU MOV S46.x@group{s} : A2[0].x {W}
|
||||
ALU MOV S46.y@group{s} : A2[0].y {W}
|
||||
ALU MOV S46.z@group{s} : A2[1].x {W}
|
||||
ALU MOV S46.w@group{s} : A2[1].y {WL}
|
||||
EXPORT PARAM 0 S46.xyzw
|
||||
ALU MOV S47.x@group{s} : A2[2].x {W}
|
||||
ALU MOV S47.y@group{s} : A2[2].y {W}
|
||||
ALU MOV S47.z@group{s} : A2[3].x {W}
|
||||
ALU MOV S47.w@group{s} : A2[3].y {WL}
|
||||
EXPORT PARAM 1 S47.xyzw
|
||||
ALU MOV S48.x@group{s} : A2[0].z {W}
|
||||
ALU MOV S48.y@group{s} : A2[0].w {W}
|
||||
ALU MOV S48.z@group{s} : A2[1].z {W}
|
||||
ALU MOV S48.w@group{s} : A2[1].w {WL}
|
||||
EXPORT PARAM 2 S48.xyzw
|
||||
ALU MOV S49.x@group{s} : A2[2].z {W}
|
||||
ALU MOV S49.y@group{s} : A2[2].w {W}
|
||||
ALU MOV S49.z@group{s} : A2[3].z {W}
|
||||
ALU MOV S49.w@group{s} : A2[3].w {WL}
|
||||
EXPORT_DONE PARAM 3 S49.xyzw
|
||||
)";
|
||||
|
||||
const char *shader_with_dest_array_opt_scheduled =
|
||||
|
|
@ -2022,36 +2022,36 @@ ELSE
|
|||
ALU_GROUP_END
|
||||
ENDIF
|
||||
ALU_GROUP_BEGIN
|
||||
ALU MOV S1025.x@chgr : A2[0].x {W}
|
||||
ALU MOV S1025.y@chgr : A2[0].y {W}
|
||||
ALU MOV S1025.z@chgr : A2[1].x {W}
|
||||
ALU MOV S1025.w@chgr : A2[1].y {W}
|
||||
ALU MOV S1027.x@group : A2[2].x {WL}
|
||||
ALU MOV S46.x@chgr : A2[0].x {W}
|
||||
ALU MOV S46.y@chgr : A2[0].y {W}
|
||||
ALU MOV S46.z@chgr : A2[1].x {W}
|
||||
ALU MOV S46.w@chgr : A2[1].y {W}
|
||||
ALU MOV S47.x@group : A2[2].x {WL}
|
||||
ALU_GROUP_END
|
||||
ALU_GROUP_BEGIN
|
||||
ALU MOV S1029.x@chgr : A2[0].z {W}
|
||||
ALU MOV S1027.y@chgr : A2[2].y {W}
|
||||
ALU MOV S1027.z@chgr : A2[3].x {W}
|
||||
ALU MOV S1027.w@chgr : A2[3].y {W}
|
||||
ALU MOV S1029.y@group : A2[0].w {WL}
|
||||
ALU MOV S48.x@chgr : A2[0].z {W}
|
||||
ALU MOV S47.y@chgr : A2[2].y {W}
|
||||
ALU MOV S47.z@chgr : A2[3].x {W}
|
||||
ALU MOV S47.w@chgr : A2[3].y {W}
|
||||
ALU MOV S48.y@group : A2[0].w {WL}
|
||||
ALU_GROUP_END
|
||||
ALU_GROUP_BEGIN
|
||||
ALU MOV S1031.x@chgr : A2[2].z {W}
|
||||
ALU MOV S1031.y@chgr : A2[2].w {W}
|
||||
ALU MOV S1029.z@chgr : A2[1].z {W}
|
||||
ALU MOV S1029.w@chgr : A2[1].w {W}
|
||||
ALU MOV S1031.z@group : A2[3].z {WL}
|
||||
ALU MOV S49.x@chgr : A2[2].z {W}
|
||||
ALU MOV S49.y@chgr : A2[2].w {W}
|
||||
ALU MOV S48.z@chgr : A2[1].z {W}
|
||||
ALU MOV S48.w@chgr : A2[1].w {W}
|
||||
ALU MOV S49.z@group : A2[3].z {WL}
|
||||
ALU_GROUP_END
|
||||
ALU_GROUP_BEGIN
|
||||
ALU MOV S1031.w@chgr : A2[3].w {WL}
|
||||
ALU MOV S49.w@chgr : A2[3].w {WL}
|
||||
ALU_GROUP_END
|
||||
BLOCK_END
|
||||
BLOCK_START
|
||||
EXPORT_DONE POS 0 S19.xyzw
|
||||
EXPORT PARAM 0 S1025.xyzw
|
||||
EXPORT PARAM 1 S1027.xyzw
|
||||
EXPORT PARAM 2 S1029.xyzw
|
||||
EXPORT_DONE PARAM 3 S1031.xyzw
|
||||
EXPORT PARAM 0 S46.xyzw
|
||||
EXPORT PARAM 1 S47.xyzw
|
||||
EXPORT PARAM 2 S48.xyzw
|
||||
EXPORT_DONE PARAM 3 S49.xyzw
|
||||
BLOCK END\n
|
||||
)";
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue