diff --git a/src/panfrost/compiler/bifrost/bi_ra.c b/src/panfrost/compiler/bifrost/bi_ra.c
index e10ee92bfa3..f8579f8c983 100644
--- a/src/panfrost/compiler/bifrost/bi_ra.c
+++ b/src/panfrost/compiler/bifrost/bi_ra.c
@@ -8,6 +8,7 @@
#include "bi_builder.h"
#include "compiler.h"
#include "nodearray.h"
+#include "valhall.h"
struct lcra_state {
unsigned node_count;
@@ -380,6 +381,9 @@ bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live,
bi_foreach_ssa_src(ins, s) {
if (bi_count_read_registers(ins, s) >= 2)
l->affinity[ins->src[s].value] &= EVEN_BITS_MASK;
+ else if (s < valhall_opcodes[ins->op].nr_srcs &&
+ va_src_info(ins->op, s).size > VA_SIZE_32)
+ l->affinity[ins->src[s].value] &= EVEN_BITS_MASK;
}
}
diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c
index 1690a43292d..98312dd6d06 100644
--- a/src/panfrost/compiler/bifrost/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost/bifrost_compile.c
@@ -2310,6 +2310,15 @@ bi_alu_src_index(bi_builder *b, nir_alu_src src, unsigned comps)
{
unsigned bitsize = nir_src_bit_size(src.src);
+ if (b->shader->arch >= 9 && bitsize == 64) {
+ /* For Valhall, 64-bit instructions only encode one register but will read
+ * the adjacent register that comes right after as well. Therefore we
+ * don't need to extract a single register here.
+ */
+ assert(comps == 1);
+ return bi_src_index(&src.src);
+ }
+
/* the bi_index carries the 32-bit (word) offset separate from the
* subword swizzle, first handle the offset */
@@ -3373,7 +3382,14 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
break;
case nir_op_iadd:
- bi_iadd_to(b, nir_type_int, sz, dst, s0, s1, false);
+ if (sz == 64) {
+ assert(b->shader->arch >= 9);
+ bi_shaddx_s64_to(b, dst, s0, s1, 0);
+ bi_index dsts[4] = {bi_null(), bi_null(), bi_null(), bi_null()};
+ bi_emit_split_i32(b, dsts, dst, 2);
+ bi_cache_collect(b, dst, dsts, 2);
+ } else
+ bi_iadd_to(b, nir_type_int, sz, dst, s0, s1, false);
break;
case nir_op_iadd_sat:
diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.h b/src/panfrost/compiler/bifrost/bifrost_compile.h
index 70f6221d9b7..811f5f4339b 100644
--- a/src/panfrost/compiler/bifrost/bifrost_compile.h
+++ b/src/panfrost/compiler/bifrost/bifrost_compile.h
@@ -116,7 +116,7 @@ bool valhall_can_merge_workgroups(nir_shader *nir);
\
.lower_doubles_options = \
nir_lower_dmod, /* TODO: Don't lower supported 64-bit operations */ \
- .lower_int64_options = ~0, /* TODO: Use IMULD on v7 */ \
+ .lower_int64_options = arch >= 9 ? ~(nir_lower_iadd64) : ~0, \
.lower_mul_high = true, \
.lower_fisnormal = true, \
.lower_uadd_carry = true, \
diff --git a/src/panfrost/compiler/bifrost/bir.c b/src/panfrost/compiler/bifrost/bir.c
index 366a0e5d064..8da950f32ae 100644
--- a/src/panfrost/compiler/bifrost/bir.c
+++ b/src/panfrost/compiler/bifrost/bir.c
@@ -77,6 +77,8 @@ bi_count_read_registers(const bi_instr *ins, unsigned s)
return ins->sr_count_2; /* Dual source blending */
else if (s == 0 && ins->op == BI_OPCODE_SPLIT_I32)
return ins->nr_dests;
+ else if (ins->op == BI_OPCODE_SHADDX_S64 || ins->op == BI_OPCODE_SHADDX_U64)
+ return 2;
else
return 1;
}
@@ -123,7 +125,9 @@ bi_count_write_registers(const bi_instr *ins, unsigned d)
default:
return bi_count_staging_registers(ins);
}
- } else if (ins->op == BI_OPCODE_SEG_ADD_I64) {
+ } else if (ins->op == BI_OPCODE_SEG_ADD_I64 ||
+ ins->op == BI_OPCODE_SHADDX_S64 ||
+ ins->op == BI_OPCODE_SHADDX_U64) {
return 2;
} else if (ins->op == BI_OPCODE_TEXC_DUAL && d == 1) {
return ins->sr_count_2;
diff --git a/src/panfrost/compiler/bifrost/valhall/ISA.xml b/src/panfrost/compiler/bifrost/valhall/ISA.xml
index f4d53389c79..43b292f2c57 100644
--- a/src/panfrost/compiler/bifrost/valhall/ISA.xml
+++ b/src/panfrost/compiler/bifrost/valhall/ISA.xml
@@ -2626,7 +2626,7 @@
B
-
+
Sign or zero extend B to 64-bits, left-shift by `shift`, and add the
64-bit value A. These instructions accelerate address arithmetic, but may
diff --git a/src/panfrost/compiler/bifrost/valhall/va_lower_split_64bit.c b/src/panfrost/compiler/bifrost/valhall/va_lower_split_64bit.c
index 06a0bb05200..6b81346845c 100644
--- a/src/panfrost/compiler/bifrost/valhall/va_lower_split_64bit.c
+++ b/src/panfrost/compiler/bifrost/valhall/va_lower_split_64bit.c
@@ -26,14 +26,18 @@ lower_split_src(bi_context *ctx, bi_instr *I, unsigned s, bi_instr** lut)
return;
}
- /* Check if the source regs are already coming from a split. */
+ /* Check if the source regs are already coming from a split/collect pair. */
bi_index *src_a = &I->src[s];
bi_index *src_b = &I->src[s + 1];
if (bi_is_ssa(*src_a) && bi_is_ssa(*src_b)) {
bi_instr *src_ins_a = lut[src_a->value];
bi_instr *src_ins_b = lut[src_b->value];
- if (src_ins_a->op == BI_OPCODE_SPLIT_I32 && src_ins_a == src_ins_b)
- return;
+ if (src_ins_a->op == BI_OPCODE_SPLIT_I32 && src_ins_a == src_ins_b) {
+ bi_index split_src = src_ins_a->src[0];
+ if (!bi_is_ssa(split_src) ||
+ lut[split_src.value]->op == BI_OPCODE_COLLECT_I32)
+ return;
+ }
}
/* Allocate temporary before the instruction */
@@ -76,7 +80,9 @@ va_lower_split_64bit(bi_context *ctx)
struct va_src_info info = va_src_info(I->op, s);
- if (info.size == VA_SIZE_64)
+ /* Only split if the instruction expects 64-bit inputs as two separate
+ * sources. */
+ if (info.size == VA_SIZE_64 && bi_count_read_registers(I, s) == 1)
lower_split_src(ctx, I, s, lut);
}
}
diff --git a/src/panfrost/compiler/bifrost/valhall/va_pack.c b/src/panfrost/compiler/bifrost/valhall/va_pack.c
index 5eefb9b5ff8..9665cc1cfd5 100644
--- a/src/panfrost/compiler/bifrost/valhall/va_pack.c
+++ b/src/panfrost/compiler/bifrost/valhall/va_pack.c
@@ -325,6 +325,25 @@ va_pack_widen(const bi_instr *I, enum bi_swizzle swz, enum va_size size)
default:
invalid_instruction(I, "32-bit widen");
}
+ } else if (size == VA_SIZE_64) {
+ switch (swz) {
+ case BI_SWIZZLE_H01:
+ return VA_SWIZZLES_64_BIT_NONE;
+ case BI_SWIZZLE_H0:
+ return VA_SWIZZLES_64_BIT_H0;
+ case BI_SWIZZLE_H1:
+ return VA_SWIZZLES_64_BIT_H1;
+ case BI_SWIZZLE_B0:
+ return VA_SWIZZLES_64_BIT_B0;
+ case BI_SWIZZLE_B1:
+ return VA_SWIZZLES_64_BIT_B1;
+ case BI_SWIZZLE_B2:
+ return VA_SWIZZLES_64_BIT_B2;
+ case BI_SWIZZLE_B3:
+ return VA_SWIZZLES_64_BIT_B3;
+ default:
+ invalid_instruction(I, "64-bit widen");
+ }
} else {
invalid_instruction(I, "type size for widen");
}
diff --git a/src/panfrost/compiler/bifrost/valhall/valhall.c.py b/src/panfrost/compiler/bifrost/valhall/valhall.c.py
index 3645092b836..81e9a2ba523 100644
--- a/src/panfrost/compiler/bifrost/valhall/valhall.c.py
+++ b/src/panfrost/compiler/bifrost/valhall/valhall.c.py
@@ -34,8 +34,6 @@ SKIP = set([
"ISUB.u64",
"ISUB.s64",
"IMULD.u64",
- "SHADDX.u64",
- "SHADDX.s64",
"IMULD.u64",
"LSHIFT_AND.i64",
"RSHIFT_AND.i64",