r600/sfn: Switch to register intrinsics

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24212>
This commit is contained in:
Gert Wollny 2023-07-06 16:23:21 +02:00 committed by Marge Bot
parent 886a7ae786
commit 10938cddd7
9 changed files with 291 additions and 189 deletions

View file

@ -503,7 +503,7 @@ bool AluInstr::can_replace_source(PRegister old_src, PVirtualValue new_src)
/* If the old or new source is an array element, we assume that there
* might have been an (untracked) indirect access, so don't replace
* this source */
if (old_src->pin() == pin_array || new_src->pin() == pin_array)
if (old_src->pin() == pin_array && new_src->pin() == pin_array)
return false;
auto [addr, dummy, index] = indirect_addr();

View file

@ -934,8 +934,8 @@ r600_shader_from_nir(struct r600_context *rctx,
NIR_PASS_V(sh, nir_lower_bool_to_int32);
NIR_PASS_V(sh, nir_lower_locals_to_regs, 32);
NIR_PASS_V(sh, nir_convert_from_ssa, true, false);
NIR_PASS_V(sh, nir_lower_locals_to_reg_intrinsics, 32);
NIR_PASS_V(sh, nir_convert_from_ssa, true, true);
NIR_PASS_V(sh, nir_opt_dce);
if (rctx->screen->b.debug_flags & DBG_ALL_SHADERS) {

View file

@ -373,14 +373,18 @@ CopyPropFwdVisitor::visit(AluInstr *instr)
auto ii = dest->uses().begin();
auto ie = dest->uses().end();
auto mov_block_id = instr->block_id();
while(ii != ie) {
auto i = *ii;
auto target_block_id = i->block_id();
++ii;
/* SSA can always be propagated, registers only in the same block
* and only if they are assigned in the same block */
bool can_propagate = dest->has_flag(Register::ssa);
bool dest_can_propagate = dest->has_flag(Register::ssa);
if (!can_propagate) {
if (!dest_can_propagate) {
/* Register can propagate if the assignment was in the same
* block, and we don't have a second assignment coming later
@ -391,12 +395,12 @@ CopyPropFwdVisitor::visit(AluInstr *instr)
* 3: MOV SN.x, R0.x
*
* Here we can't prpagate the move in 1 to SN.x in 3 */
if ((instr->block_id() == i->block_id() && instr->index() < i->index())) {
can_propagate = true;
if ((mov_block_id == target_block_id && instr->index() < i->index())) {
dest_can_propagate = true;
if (dest->parents().size() > 1) {
for (auto p : dest->parents()) {
if (p->block_id() == i->block_id() && p->index() > instr->index()) {
can_propagate = false;
dest_can_propagate = false;
break;
}
}
@ -404,7 +408,32 @@ CopyPropFwdVisitor::visit(AluInstr *instr)
}
}
if (can_propagate) {
bool src_can_propagate = false;
if (auto rsrc = src->as_register()) {
if (rsrc->has_flag(Register::ssa)) {
src_can_propagate = true;
} else if (mov_block_id == target_block_id) {
if (rsrc->addr()) {
if (i->block_id() == mov_block_id &&
i->index() == instr->index() + 1)
src_can_propagate = true;
} else {
src_can_propagate = true;
for (auto p : rsrc->parents()) {
if (p->block_id() == mov_block_id &&
p->index() > instr->index() &&
p->index() < i->index()) {
src_can_propagate = false;
break;
}
}
}
}
} else {
src_can_propagate = true;
}
if (dest_can_propagate && src_can_propagate) {
sfn_log << SfnLog::opt << " Try replace in " << i->block_id() << ":"
<< i->index() << *i << "\n";

View file

@ -552,7 +552,8 @@ Shader::process(nir_shader *nir)
allocate_reserved_registers();
allocate_local_registers(&func->impl->registers);
value_factory().allocate_registers(m_register_allocations);
m_required_registers = value_factory().array_registers();
sfn_log << SfnLog::trans << "Process shader \n";
foreach_list_typed(nir_cf_node, node, node, &func->impl->body)
@ -566,14 +567,6 @@ Shader::process(nir_shader *nir)
return true;
}
void
Shader::allocate_local_registers(const exec_list *registers)
{
if (value_factory().allocate_registers(registers))
m_indirect_files |= 1 << TGSI_FILE_TEMPORARY;
m_required_registers = value_factory().array_registers();
}
bool
Shader::scan_shader(const nir_function *func)
{
@ -682,6 +675,9 @@ Shader::scan_instruction(nir_instr *instr)
(nir_var_mem_ssbo | nir_var_mem_global | nir_var_image) &&
nir_intrinsic_memory_scope(intr) != SCOPE_NONE);
break;
case nir_intrinsic_decl_reg:
m_register_allocations.push_back(intr);
break;
default:;
}
return true;
@ -918,7 +914,18 @@ Shader::process_intrinsic(nir_intrinsic_instr *intr)
return emit_atomic_local_shared(intr);
case nir_intrinsic_shader_clock:
return emit_shader_clock(intr);
case nir_intrinsic_load_reg:
return emit_load_reg(intr);
case nir_intrinsic_load_reg_indirect:
return emit_load_reg_indirect(intr);
case nir_intrinsic_store_reg:
return emit_store_reg(intr);
case nir_intrinsic_store_reg_indirect:
return emit_store_reg_indirect(intr);
case nir_intrinsic_decl_reg:
// Registers and arrays are allocated at
// conversion startup time
return true;
default:
return false;
}
@ -966,6 +973,130 @@ Shader::emit_load_to_register(PVirtualValue src)
return dest;
}
// add visitor to resolve array and register
class RegisterAccessHandler : public RegisterVisitor {
public:
RegisterAccessHandler(Shader& shader, nir_intrinsic_instr *intr);
void visit(LocalArrayValue& value) override {(void)value; assert(0);}
void visit(UniformValue& value) override {(void)value; assert(0);}
void visit(LiteralConstant& value) override {(void)value; assert(0);}
void visit(InlineConstant& value) override {(void)value; assert(0);}
Shader& sh;
nir_intrinsic_instr *ir;
PVirtualValue addr{nullptr};
bool success{true};
};
class RegisterReadHandler : public RegisterAccessHandler {
public:
using RegisterAccessHandler::RegisterAccessHandler;
using RegisterAccessHandler::visit;
void visit(LocalArray& value) override;
void visit(Register& value) override;
};
bool Shader::emit_load_reg(nir_intrinsic_instr *intr)
{
RegisterReadHandler visitor(*this, intr);
auto handle = value_factory().src(intr->src[0], 0);
handle->accept(visitor);
return visitor.success;
}
bool Shader::emit_load_reg_indirect(nir_intrinsic_instr *intr)
{
RegisterReadHandler visitor(*this, intr);
visitor.addr = value_factory().src(intr->src[1], 0);
auto handle = value_factory().src(intr->src[0], 0);
handle->accept(visitor);
return visitor.success;
}
class RegisterWriteHandler : public RegisterAccessHandler {
public:
using RegisterAccessHandler::RegisterAccessHandler;
using RegisterAccessHandler::visit;
void visit(LocalArray& value) override;
void visit(Register& value) override;
};
bool Shader::emit_store_reg(nir_intrinsic_instr *intr)
{
RegisterWriteHandler visitor(*this, intr);
auto handle = value_factory().src(intr->src[1], 0);
handle->accept(visitor);
return visitor.success;
}
bool Shader::emit_store_reg_indirect(nir_intrinsic_instr *intr)
{
RegisterWriteHandler visitor(*this, intr);
visitor.addr = value_factory().src(intr->src[2], 0);
auto handle = value_factory().src(intr->src[1], 0);
handle->accept(visitor);
return visitor.success;
}
RegisterAccessHandler::RegisterAccessHandler(Shader& shader, nir_intrinsic_instr *intr):
sh(shader),
ir(intr)
{}
void RegisterReadHandler::visit(LocalArray& array)
{
int slots = ir->dest.ssa.bit_size / 32;
auto pin = ir->dest.ssa.num_components > 1 ? pin_none : pin_free;
for (int i = 0; i < ir->dest.ssa.num_components; ++i) {
for (int s = 0; s < slots; ++s) {
int chan = i * slots + s;
auto dest = sh.value_factory().dest(ir->dest, chan, pin);
auto src = array.element(nir_intrinsic_base(ir), addr, chan);
sh.emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::write));
}
}
}
void RegisterReadHandler::visit(Register& reg)
{
auto dest = sh.value_factory().dest(ir->dest, 0, pin_free);
sh.emit_instruction(new AluInstr(op1_mov, dest, &reg, AluInstr::write));
}
void RegisterWriteHandler::visit(LocalArray& array)
{
int writemask = nir_intrinsic_write_mask(ir);
int slots = ir->src->ssa->bit_size / 32;
for (int i = 0; i < ir->num_components; ++i) {
if (!(writemask & (1 << i)))
continue;
for (int s = 0; s < slots; ++s) {
int chan = i * slots + s;
auto dest = array.element(nir_intrinsic_base(ir), addr, chan);
auto src = sh.value_factory().src(ir->src[0], chan);
sh.emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::write));
}
}
}
void RegisterWriteHandler::visit(Register& dest)
{
int writemask = nir_intrinsic_write_mask(ir);
assert(writemask == 1);
auto src = sh.value_factory().src(ir->src[0], 0);
sh.emit_instruction(new AluInstr(op1_mov, &dest, src, AluInstr::write));
}
bool
Shader::emit_atomic_local_shared(nir_intrinsic_instr *instr)
{

View file

@ -298,8 +298,6 @@ private:
bool scan_uniforms(nir_variable *uniform);
void allocate_reserved_registers();
void allocate_local_registers(const exec_list *registers);
virtual int do_allocate_reserved_registers() = 0;
bool scan_instruction(nir_instr *instr);
@ -323,6 +321,10 @@ private:
bool emit_shader_clock(nir_intrinsic_instr *instr);
bool emit_wait_ack();
bool emit_scoped_barrier(nir_intrinsic_instr *instr);
bool emit_load_reg(nir_intrinsic_instr *intr);
bool emit_load_reg_indirect(nir_intrinsic_instr *intr);
bool emit_store_reg(nir_intrinsic_instr *intr);
bool emit_store_reg_indirect(nir_intrinsic_instr *intr);
bool equal_to(const Shader& other) const;
void finalize();
@ -400,6 +402,8 @@ private:
InstructionChain m_chain_instr;
std::list<Instr *, Allocator<Instr *>> m_loops;
int m_control_flow_depth{0};
std::list<nir_intrinsic_instr*> m_register_allocations;
};
std::pair<unsigned, unsigned>

View file

@ -53,37 +53,42 @@ ValueFactory::set_virtual_register_base(int base)
}
bool
ValueFactory::allocate_registers(const exec_list *registers)
ValueFactory::allocate_registers(const std::list<nir_intrinsic_instr *>& regs)
{
bool has_arrays = false;
struct array_entry {
unsigned index;
unsigned length;
unsigned ncomponents;
int ncomponents;
bool operator()(const array_entry& a, const array_entry& b) const
{
return a.length < b.length ||
(a.length == b.length && a.ncomponents > b.ncomponents);
return a.ncomponents < b.ncomponents ||
(a.ncomponents == b.ncomponents && a.length < b.length);
}
};
using array_list =
std::priority_queue<array_entry, std::vector<array_entry>, array_entry>;
std::list<unsigned> non_array;
array_list arrays;
for(auto intr : regs) {
unsigned num_elms = nir_intrinsic_num_array_elems(intr);
int num_comp = nir_intrinsic_num_components(intr);
int bit_size = nir_intrinsic_bit_size(intr);
foreach_list_typed(nir_register, reg, node, registers)
{
if (reg->num_array_elems) {
if (num_elms > 0 || num_comp > 1 || bit_size > 32) {
array_entry ae = {
reg->index, reg->num_array_elems, reg->bit_size / 32 * reg->num_components};
intr->dest.ssa.index,
num_elms ? num_elms : 1,
bit_size / 32 * num_comp};
arrays.push(ae);
has_arrays = true;
} else {
non_array.push_back(intr->dest.ssa.index);
}
}
int ncomponents = 0;
int free_components = 4;
int sel = m_next_register_index;
unsigned length = 0;
@ -94,48 +99,39 @@ ValueFactory::allocate_registers(const exec_list *registers)
/* This is a bit hackish, return an id that encodes the array merge. To
* make sure that the mapping doesn't go wrong we have to make sure the
* arrays is longer than the number of instances in this arrays slot */
if (a.ncomponents + ncomponents > 4 || a.length > length) {
if (a.ncomponents > free_components || a.length > length) {
sel = m_next_register_index;
ncomponents = 0;
length = 0;
free_components = 4;
m_next_register_index += a.length;
}
if (ncomponents == 0)
m_next_register_index += a.length;
uint32_t frac = free_components - a.ncomponents;
uint32_t frac = ncomponents;
auto array = new LocalArray(sel, a.ncomponents, a.length, frac);
for (unsigned i = 0; i < a.ncomponents; ++i) {
for (int i = 0; i < a.ncomponents; ++i) {
RegisterKey key(a.index, i, vp_array);
m_channel_counts.inc_count(i);
m_channel_counts.inc_count(frac + i, a.length);
m_registers[key] = array;
sfn_log << SfnLog::reg << __func__ << ": Allocate array " << key << ":" << *array
<< "\n";
}
ncomponents += a.ncomponents;
free_components -= a.ncomponents;
length = a.length;
}
m_required_array_registers = m_next_register_index ? m_next_register_index : 0;
foreach_list_typed(nir_register, reg, node, registers)
{
if (!reg->num_array_elems) {
uint32_t sel = m_next_register_index++;
unsigned num_components = reg->num_components * reg->bit_size / 32;
for (auto chan = 0u; chan < num_components; ++chan) {
RegisterKey key(reg->index, chan, vp_register);
m_channel_counts.inc_count(chan);
m_registers[key] =
new Register(sel, chan, num_components > 1 ? pin_none : pin_free);
sfn_log << SfnLog::reg << "allocate register " << key << ":"
<< *m_registers[key] << "\n";
}
}
for (auto index : non_array) {
RegisterKey key(index, 0, vp_register);
auto chan = m_channel_counts.least_used(0xf);
m_registers[key] = new Register(m_next_register_index++,
chan, pin_free);
m_channel_counts.inc_count(chan);
}
return has_arrays;
return true;
}
int ValueFactory::new_register_index()
@ -212,44 +208,11 @@ public:
int m_chan;
};
PRegister
ValueFactory::resolve_array(nir_register *reg,
nir_src *indirect,
int base_offset,
int chan)
{
PVirtualValue addr = nullptr;
auto type = reg->num_array_elems ? vp_array : vp_register;
RegisterKey key(reg->index, chan, type);
auto ireg = m_registers.find(key);
if (ireg == m_registers.end()) {
std::cerr << "Key " << key << " not found\n";
assert(0);
}
if (reg->num_array_elems) {
if (indirect)
addr = src(*indirect, 0);
TranslateRegister array_resolution(base_offset, addr, chan);
ireg->second->accept(array_resolution);
assert(array_resolution.m_value);
return array_resolution.m_value;
} else {
return ireg->second;
}
}
PRegister
ValueFactory::dest(const nir_dest& dst, int chan, Pin pin_channel, uint8_t chan_mask)
{
if (dst.is_ssa) {
return dest(dst.ssa, chan, pin_channel, chan_mask);
} else {
return resolve_array(dst.reg.reg, dst.reg.indirect, dst.reg.base_offset, chan);
}
assert(dst.is_ssa);
return dest(dst.ssa, chan, pin_channel, chan_mask);
}
void
@ -320,30 +283,12 @@ ValueFactory::dest_vec4(const nir_dest& dst, Pin pin)
{
if (pin != pin_group && pin != pin_chgr)
pin = pin_chan;
if (dst.is_ssa) {
PRegister x = dest(dst, 0, pin);
PRegister y = dest(dst, 1, pin);
PRegister z = dest(dst, 2, pin);
PRegister w = dest(dst, 3, pin);
return RegisterVec4(x, y, z, w, pin);
} else {
assert(!dst.reg.indirect);
PRegister v[4];
int sel = -1;
for (int i = 0; i < 4; ++i) {
RegisterKey key(dst.reg.reg->index, i, vp_register);
v[i] = m_registers[key];
assert(sel >= 0 || v[i]);
if (sel < 0)
sel = v[i]->sel();
if (!v[i]) {
v[i] = m_registers[key] = new Register(sel, i, pin_group);
}
}
return RegisterVec4(v[0], v[1], v[2], v[3], pin);
}
unreachable("unsupported");
assert(dst.is_ssa);
PRegister x = dest(dst, 0, pin);
PRegister y = dest(dst, 1, pin);
PRegister z = dest(dst, 2, pin);
PRegister w = dest(dst, 3, pin);
return RegisterVec4(x, y, z, w, pin);
}
PRegister ValueFactory::addr()
@ -386,16 +331,12 @@ ValueFactory::src(const nir_src& src, int chan)
{
sfn_log << SfnLog::reg << "search (ref) " << (void *)&src << "\n";
if (src.is_ssa) {
sfn_log << SfnLog::reg << "search ssa " << src.ssa->index << " c:" << chan
<< " got ";
auto val = ssa_src(*src.ssa, chan);
sfn_log << *val << "\n";
return val;
} else {
sfn_log << SfnLog::reg << "search reg " << src.reg.reg->index << "\n";
return local_register(src.reg, chan);
}
assert(src.is_ssa);
sfn_log << SfnLog::reg << "search ssa " << src.ssa->index << " c:" << chan
<< " got ";
auto val = ssa_src(*src.ssa, chan);
sfn_log << *val << "\n";
return val;
}
PVirtualValue
@ -486,22 +427,23 @@ ValueFactory::ssa_src(const nir_ssa_def& ssa, int chan)
if (ival != m_values.end())
return ival->second;
RegisterKey rkey(ssa.index, chan, vp_register);
sfn_log << SfnLog::reg << "search src with key" << rkey << "\n";
ireg = m_registers.find(rkey);
if (ireg != m_registers.end())
return ireg->second;
RegisterKey array_key(ssa.index, chan, vp_array);
sfn_log << SfnLog::reg << "search array with key" << array_key << "\n";
auto iarray = m_registers.find(array_key);
if (iarray != m_registers.end())
return iarray->second;
std::cerr << "Didn't find source with key " << key << "\n";
unreachable("Source values should always exist");
}
PRegister
ValueFactory::local_register(const nir_register_dest& dst, int chan)
{
return resolve_array(dst.reg, dst.indirect, dst.base_offset, chan);
}
PRegister
ValueFactory::local_register(const nir_register_src& src, int chan)
{
return resolve_array(src.reg, src.indirect, src.base_offset, chan);
}
PVirtualValue
ValueFactory::literal(uint32_t value)
{
@ -1040,11 +982,9 @@ ValueFactory::prepare_live_range_map()
continue;
if (key.value.pool == vp_array) {
if (key.value.chan == 0) {
auto array = static_cast<LocalArray *>(reg);
for (auto& a : *array) {
result.append_register(a);
}
auto array = static_cast<LocalArray *>(reg);
for (auto& a : *array) {
result.append_register(a);
}
} else {
if (reg->chan() < 4)

View file

@ -178,6 +178,7 @@ struct register_key_hash {
class ChannelCounts {
public:
void inc_count(int chan) { ++m_counts[chan]; }
void inc_count(int chan, int n) { m_counts[chan] += n; }
int least_used(uint8_t mask) const
{
int least_used = 0;
@ -222,8 +223,9 @@ public:
int new_register_index();
bool allocate_registers(const exec_list *registers);
bool allocate_registers(const std::list<nir_intrinsic_instr *>& regs);
PRegister allocate_pinned_register(int sel, int chan);
RegisterVec4 allocate_pinned_vec4(int sel, bool is_ssa);
void inject_value(const nir_dest& dest, int chan, PVirtualValue value);
@ -292,11 +294,6 @@ public:
private:
PVirtualValue ssa_src(const nir_ssa_def& dest, int chan);
PRegister local_register(const nir_register_dest& dest, int chan);
PRegister local_register(const nir_register_src& dest, int chan);
PRegister
resolve_array(nir_register *reg, nir_src *indirect, int base_offset, int chan);
int m_next_register_index;
int m_next_temp_channel{0};

View file

@ -873,9 +873,10 @@ LocalArray::LocalArray(int base_sel, int nchannels, int size, int frac):
sfn_log << SfnLog::reg << "Allocate array A" << base_sel << "(" << size << ", " << frac
<< ", " << nchannels << ")\n";
auto pin = m_size > 1 ? pin_array : (nchannels > 1 ? pin_none : pin_free);
for (int c = 0; c < nchannels; ++c) {
for (unsigned i = 0; i < m_size; ++i) {
PRegister reg = new Register(base_sel + i, c + frac, pin_array);
PRegister reg = new Register(base_sel + i, c + frac, pin);
m_values[m_size * c + i] = new LocalArrayValue(reg, *this);
}
}

View file

@ -1912,27 +1912,27 @@ ELSE
ALU MOV A2[S37.x].x : I[0] {W}
ALU MOV A2[S37.x].y : L[0x3dcccccd] {WL}
ENDIF
ALU MOV S1025.x@group{s} : A2[0].x {W}
ALU MOV S1025.y@group{s} : A2[0].y {WL}
ALU MOV S1025.z@group{s} : A2[1].x {W}
ALU MOV S1025.w@group{s} : A2[1].y {WL}
ALU MOV S1027.x@group{s} : A2[2].x {W}
ALU MOV S1027.y@group{s} : A2[2].y {WL}
ALU MOV S1027.z@group{s} : A2[3].x {W}
ALU MOV S1027.w@group{s} : A2[3].y {WL}
ALU MOV S1029.x@group{s} : A2[0].z {W}
ALU MOV S1029.y@group{s} : A2[0].w {WL}
ALU MOV S1029.z@group{s} : A2[1].z {W}
ALU MOV S1029.w@group{s} : A2[1].w {WL}
ALU MOV S1031.x@group{s} : A2[2].z {W}
ALU MOV S1031.y@group{s} : A2[2].w {WL}
ALU MOV S1031.z@group{s} : A2[3].z {W}
ALU MOV S1031.w@group{s} : A2[3].w {WL}
EXPORT_DONE POS 0 S19.xyzw
EXPORT PARAM 0 S1025.xyzw
EXPORT PARAM 1 S1027.xyzw
EXPORT PARAM 2 S1029.xyzw
EXPORT_DONE PARAM 3 S1031.xyzw
ALU MOV S46.x@group{s} : A2[0].x {W}
ALU MOV S46.y@group{s} : A2[0].y {W}
ALU MOV S46.z@group{s} : A2[1].x {W}
ALU MOV S46.w@group{s} : A2[1].y {WL}
EXPORT PARAM 0 S46.xyzw
ALU MOV S47.x@group{s} : A2[2].x {W}
ALU MOV S47.y@group{s} : A2[2].y {W}
ALU MOV S47.z@group{s} : A2[3].x {W}
ALU MOV S47.w@group{s} : A2[3].y {WL}
EXPORT PARAM 1 S47.xyzw
ALU MOV S48.x@group{s} : A2[0].z {W}
ALU MOV S48.y@group{s} : A2[0].w {W}
ALU MOV S48.z@group{s} : A2[1].z {W}
ALU MOV S48.w@group{s} : A2[1].w {WL}
EXPORT PARAM 2 S48.xyzw
ALU MOV S49.x@group{s} : A2[2].z {W}
ALU MOV S49.y@group{s} : A2[2].w {W}
ALU MOV S49.z@group{s} : A2[3].z {W}
ALU MOV S49.w@group{s} : A2[3].w {WL}
EXPORT_DONE PARAM 3 S49.xyzw
)";
const char *shader_with_dest_array_opt_scheduled =
@ -2022,36 +2022,36 @@ ELSE
ALU_GROUP_END
ENDIF
ALU_GROUP_BEGIN
ALU MOV S1025.x@chgr : A2[0].x {W}
ALU MOV S1025.y@chgr : A2[0].y {W}
ALU MOV S1025.z@chgr : A2[1].x {W}
ALU MOV S1025.w@chgr : A2[1].y {W}
ALU MOV S1027.x@group : A2[2].x {WL}
ALU MOV S46.x@chgr : A2[0].x {W}
ALU MOV S46.y@chgr : A2[0].y {W}
ALU MOV S46.z@chgr : A2[1].x {W}
ALU MOV S46.w@chgr : A2[1].y {W}
ALU MOV S47.x@group : A2[2].x {WL}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU MOV S1029.x@chgr : A2[0].z {W}
ALU MOV S1027.y@chgr : A2[2].y {W}
ALU MOV S1027.z@chgr : A2[3].x {W}
ALU MOV S1027.w@chgr : A2[3].y {W}
ALU MOV S1029.y@group : A2[0].w {WL}
ALU MOV S48.x@chgr : A2[0].z {W}
ALU MOV S47.y@chgr : A2[2].y {W}
ALU MOV S47.z@chgr : A2[3].x {W}
ALU MOV S47.w@chgr : A2[3].y {W}
ALU MOV S48.y@group : A2[0].w {WL}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU MOV S1031.x@chgr : A2[2].z {W}
ALU MOV S1031.y@chgr : A2[2].w {W}
ALU MOV S1029.z@chgr : A2[1].z {W}
ALU MOV S1029.w@chgr : A2[1].w {W}
ALU MOV S1031.z@group : A2[3].z {WL}
ALU MOV S49.x@chgr : A2[2].z {W}
ALU MOV S49.y@chgr : A2[2].w {W}
ALU MOV S48.z@chgr : A2[1].z {W}
ALU MOV S48.w@chgr : A2[1].w {W}
ALU MOV S49.z@group : A2[3].z {WL}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU MOV S1031.w@chgr : A2[3].w {WL}
ALU MOV S49.w@chgr : A2[3].w {WL}
ALU_GROUP_END
BLOCK_END
BLOCK_START
EXPORT_DONE POS 0 S19.xyzw
EXPORT PARAM 0 S1025.xyzw
EXPORT PARAM 1 S1027.xyzw
EXPORT PARAM 2 S1029.xyzw
EXPORT_DONE PARAM 3 S1031.xyzw
EXPORT PARAM 0 S46.xyzw
EXPORT PARAM 1 S47.xyzw
EXPORT PARAM 2 S48.xyzw
EXPORT_DONE PARAM 3 S49.xyzw
BLOCK END\n
)";