diff --git a/src/panfrost/compiler/bifrost/valhall/ISA.xml b/src/panfrost/compiler/bifrost/valhall/ISA.xml
index 53ddbc06856..772deb1e485 100644
--- a/src/panfrost/compiler/bifrost/valhall/ISA.xml
+++ b/src/panfrost/compiler/bifrost/valhall/ISA.xml
@@ -459,6 +459,20 @@
clamp_0_1
+
+
+ Encoded clamp/special/round for v15.
+
+ none
+ clamp_0_inf
+ clamp_m1_1
+ clamp_0_1
+ n_round_zero
+ n
+ left
+ n_add
+
+
Condition code. Type must be inferred from the instruction. IEEE 754 total
@@ -790,6 +804,83 @@
quiet_nan
+
+
+ Make rshift_and instructions signed.
+
+ none
+
+
+
+
+
+
+
+
+
+
+
+ signed
+
+
+
+
+
+
+
+ Make rshift_or instructions signed.
+
+
+
+
+
+ none
+
+
+
+
+
+
+
+
+ signed
+
+
+
+
+
+
+ Make rshift_xor instructions signed.
+
+
+
+
+
+
+
+
+
+ none
+
+
+ signed
+
+
+
+
+
+
+
+
+ Make atomic instructions compare.
+
+
+ none
+
+ compare
+
+
+
Address to load from after adding offset
Mode descriptor
@@ -1507,7 +1604,9 @@
+
+
Byte offset
Mode descriptor
@@ -1536,6 +1635,7 @@
+
Byte offset
Mode descriptor
@@ -1564,6 +1664,7 @@
+
Byte offset
Mode descriptor
@@ -1592,6 +1693,7 @@
+
Byte offset
Mode descriptor
@@ -1620,6 +1722,7 @@
+
Byte offset
Mode descriptor
@@ -1648,6 +1751,7 @@
+
Byte offset
Mode descriptor
@@ -1676,6 +1780,7 @@
+
Byte offset
Mode descriptor
@@ -1741,7 +1846,9 @@
+
+
Address to load from after adding offset
@@ -1766,7 +1873,9 @@
+
+
Address to load from after adding offset
@@ -1792,6 +1901,7 @@
+
Address to load from after adding offset
@@ -1817,6 +1927,7 @@
+
Address to load from after adding offset
@@ -1842,6 +1953,7 @@
+
Address to load from after adding offset
@@ -1867,6 +1979,7 @@
+
Address to load from after adding offset
@@ -1892,6 +2005,7 @@
+
Address to load from after adding offset
@@ -1917,6 +2031,7 @@
+
Address to load from after adding offset
@@ -2089,6 +2204,7 @@
+
@@ -2118,6 +2234,7 @@
+
@@ -2263,7 +2380,9 @@
modifiers are set. Used to implement gl_FragDepth and gl_FragStencil.
+
+
Updated coverage mask
Depth value
Stencil value
@@ -2790,8 +2909,11 @@
to zero or signaling NaNs to quiet NaNs depending on the mode.
+
+
+
@@ -2840,7 +2962,9 @@
and square root computation respectively.
+
+
@@ -3217,6 +3341,7 @@
`.second` is set (indicating the FATAN_TABLE.f32 instruction).
+
A
B
@@ -3440,7 +3565,9 @@
+
+
A
B
@@ -3476,6 +3603,7 @@
+
A
B
@@ -3785,6 +3913,7 @@
it performs an unsigned right shift.
+
A
shift
@@ -3906,6 +4035,7 @@
it performs an unsigned right shift.
+
A
shift
@@ -4027,6 +4157,7 @@
it performs an unsigned right shift.
+
A
shift
@@ -4051,6 +4182,7 @@
`(A & mask) | (B & ~mask)`.
+
A
B
Mask
@@ -4074,6 +4206,7 @@
`(A & mask) | (B & ~mask)`.
+
A
B
Mask
@@ -4097,6 +4230,7 @@
`(A & mask) | (B & ~mask)`.
+
A
B
Mask
@@ -4716,6 +4850,7 @@
+
@@ -4743,6 +4878,7 @@
+
@@ -5180,7 +5316,6 @@
-
First calculates $A \cdot B + C$ and then biases the exponent by D. Used in
@@ -5189,7 +5324,8 @@
`FMA.f32` operations. Equivalent to `FMA.f32` back-to-back with
`LDEXP.f32`
-
+
+
A
B
C
@@ -5200,11 +5336,6 @@
-
-
-
-
-
First calculates $A \cdot B + C$ and then biases the exponent by D. If $A
= 0$ or $B = 0$, the multiply $A \cdot B$ is treated as zero even if an
@@ -5224,11 +5355,6 @@
-
-
-
-
-
First calculates $A \cdot B + C$ and then biases the exponent by D. If $A
= 0$ or $B = 0$, the multiply is treated as $A$ even if an
@@ -5248,11 +5374,6 @@
-
-
-
-
-
First calculates $A \cdot B + C$ and then biases the exponent by D,
interpreted as a 16-bit value. Used in special transcendental function
diff --git a/src/panfrost/compiler/bifrost/valhall/disasm.py b/src/panfrost/compiler/bifrost/valhall/disasm.py
index d744d6bf45e..05e0facb9a1 100644
--- a/src/panfrost/compiler/bifrost/valhall/disasm.py
+++ b/src/panfrost/compiler/bifrost/valhall/disasm.py
@@ -28,6 +28,10 @@ template = """
#define VA_SRC_UNIFORM_TYPE 0x2
#define VA_SRC_IMM_TYPE 0x3
+#define VA_SRC_V15_MODE1 BIT(8)
+#define VA_SRC_V15_MODE2 BIT(7)
+#define VA_SRC_V15_MODE4 BIT(5)
+
% for name, en in ENUMS.items():
UNUSED static const char *valhall_${name}[] = {
% for v in en.values:
@@ -91,22 +95,84 @@ va_print_float_src(FILE *fp, unsigned type, unsigned value, unsigned size, unsig
fprintf(fp, ".abs");
}
+static inline void
+va_print_src_v15(FILE *fp, unsigned high1, unsigned low8, unsigned size, unsigned fau_page)
+{
+ unsigned src = (high1 << 8) | low8;
+
+ /* Not reg */
+ if (src & VA_SRC_V15_MODE1) {
+ /* Not uniform */
+ if (src & VA_SRC_V15_MODE2) {
+ /* FAU special */
+ if (src & VA_SRC_V15_MODE4) {
+ unsigned value = src & MASK(5);
+ if (fau_page == 0)
+ fputs(valhall_fau_special_page_0[value >> 1] + 1, fp);
+ else if (fau_page == 1)
+ fputs(valhall_fau_special_page_1[value >> 1] + 1, fp);
+ else if (fau_page == 3)
+ fputs(valhall_fau_special_page_3[value >> 1] + 1, fp);
+ else
+ fprintf(fp, "reserved_page2");
+
+ fprintf(fp, ".w%u", value & 1);
+ }
+ /* Imm */
+ else {
+ unsigned value = src & MASK(5);
+ assert(value < 32 && "overflow in LUT");
+ fprintf(fp, "0x%X", va_immediates[value]);
+ }
+ }
+ /* Uniform */
+ else {
+ unsigned value = src & MASK(7);
+ fprintf(fp, "u%u", value >> 1 | (fau_page << 6));
+ if (size <= 32)
+ fprintf(fp, ".w%u", value & 1);
+ }
+ }
+ /* Reg */
+ else {
+ unsigned value = src & MASK(7);
+ bool discard = (src & BIT(7));
+ char *dmark = discard ? "^" : "";
+ if (size > 32)
+ fprintf(fp, "[r%u%s:r%u%s]", value, dmark, value + 1, dmark);
+ else
+ fprintf(fp, "r%u%s", value, dmark);
+ }
+}
+
+static inline void
+va_print_float_src_v15(FILE *fp, unsigned high1, unsigned low8, unsigned size, unsigned fau_page, bool neg, bool abs)
+{
+ va_print_src_v15(fp, high1, low8, size, fau_page);
+
+ if (neg)
+ fprintf(fp, ".neg");
+
+ if (abs)
+ fprintf(fp, ".abs");
+}
+
static inline void
va_print_dest(FILE *fp, unsigned mask, unsigned value, unsigned size)
{
if (size > 32)
fprintf(fp, "[r%u:r%u]", value, value + 1);
- else
+ else {
fprintf(fp, "r%u", value);
-
- if (mask != 0x3)
- fprintf(fp, ".h%u", (mask == 1) ? 0 : 1);
+ if (mask != 0x3)
+ fprintf(fp, ".h%u", (mask == 1) ? 0 : 1);
+ }
}
-<%def name="print_instr(op)">
+<%def name="print_instr(op, v15)">
<% no_comma = True %>
fputs("${op.name}", fp);
-% for mod in op.modifiers:
+% for mod in (op.modifiers_v15 if v15 else op.modifiers):
% if mod.name not in ["staging_register_count", "staging_register_write_count"]:
% if mod.is_enum:
fputs(valhall_${safe_name(mod.enum)}[(instr >> ${mod.start}) & ${hex((1 << mod.size) - 1)}], fp);
@@ -115,10 +181,18 @@ va_print_dest(FILE *fp, unsigned mask, unsigned value, unsigned size)
% endif
% endif
% endfor
+% if v15:
+ fprintf(fp, "%s ", valhall_flow[(instr >> ${op.offset['flow_v15']}) & ${hex(op.mask['flow_v15'])}]);
+% else:
fprintf(fp, "%s ", valhall_flow[(instr >> ${op.offset['flow']}) & ${hex(op.mask['flow'])}]);
+% endif
% for i, dest in enumerate(op.dests):
<% no_comma = False %>
+% if v15:
+ va_print_dest(fp, (instr >> ${dest.offset['mode_v15']}) & ${hex(dest.mask['mode_v15'])}, (instr >> ${dest.offset['value_v15']}) & ${hex(dest.mask['value_v15'])}, ${dest.size});
+% else:
va_print_dest(fp, (instr >> ${dest.offset['mode']}) & ${hex(dest.mask['mode'])}, (instr >> ${dest.offset['value']}) & ${hex(dest.mask['value'])}, ${dest.size});
+% endif
% endfor
% for index, sr in enumerate(op.staging):
% if not no_comma:
@@ -130,13 +204,12 @@ va_print_dest(FILE *fp, unsigned mask, unsigned value, unsigned size)
if sr.count != 0:
sr_count = sr.count;
else:
- for mod in op.modifiers:
+ for mod in (op.modifiers_v15 if v15 else op.modifiers):
if mod.name == "staging_register_write_count" and sr.write:
sr_count = f"(((instr >> {mod.start}) & {hex((1 << mod.size) - 1)}) + 1)";
elif mod.name == "staging_register_count":
sr_count = f"((instr >> {mod.start}) & {hex((1 << mod.size) - 1)})";
%>
-// assert(((instr >> ${sr.start}) & 0xC0) == ${sr.encoded_flags});
fprintf(fp, "@");
for (unsigned i = 0; i < ${sr_count}; ++i) {
fprintf(fp, "%sr%u", (i == 0) ? "" : ":",
@@ -148,6 +221,28 @@ va_print_dest(FILE *fp, unsigned mask, unsigned value, unsigned size)
fputs(", ", fp);
% endif
<% no_comma = False %>
+% if v15:
+% if src.absneg:
+ va_print_float_src_v15(fp, (instr >> ${src.offset['high1_v15']}) & ${hex(src.mask['high1_v15'])}, (instr >> ${src.offset['low8_v15']}) & ${hex(src.mask['low8_v15'])},
+ ${src.size}, (instr >> ${op.offset['fau_page_v15']}) & ${hex(op.mask['fau_page_v15'])},
+% if op.name[:4] == "FMA." and i == 0:
+ false,
+ instr & BIT(${src.offset['abs']}));
+% elif op.name[:10] == "FMA_RSCALE" and i == 2:
+ instr & BIT(${src.offset['neg'] + 1}),
+ false);
+% else:
+ instr & BIT(${src.offset['neg']}),
+ instr & BIT(${src.offset['abs']}));
+% endif
+% elif src.is_float:
+ va_print_float_src_v15(fp, (instr >> ${src.offset['high1_v15']}) & ${src.mask['high1_v15']}, (instr >> ${src.offset['low8_v15']}) & ${hex(src.mask['low8_v15'])},
+ ${src.size}, (instr >> ${op.offset['fau_page_v15']}) & ${hex(op.mask['fau_page_v15'])}, false, false);
+% else:
+ va_print_src_v15(fp, (instr >> ${src.offset['high1_v15']}) & ${src.mask['high1_v15']}, (instr >> ${src.offset['low8_v15']}) & ${hex(src.mask['low8_v15'])},
+ ${src.size}, (instr >> ${op.offset['fau_page_v15']}) & ${hex(op.mask['fau_page_v15'])});
+% endif
+% else:
% if src.absneg:
va_print_float_src(fp, (instr >> ${src.offset['mode']}) & ${hex(src.mask['mode'])}, (instr >> ${src.offset['value']}) & ${hex(src.mask['value'])},
${src.size}, (instr >> ${op.offset['fau_page']}) & ${hex(op.mask['fau_page'])},
@@ -160,6 +255,7 @@ va_print_dest(FILE *fp, unsigned mask, unsigned value, unsigned size)
va_print_src(fp, (instr >> ${src.offset['mode']}) & ${src.mask['mode']}, (instr >> ${src.offset['value']}) & ${hex(src.mask['value'])},
${src.size}, (instr >> ${op.offset['fau_page']}) & ${hex(op.mask['fau_page'])});
% endif
+% endif
% if src.swizzle:
% if src.size == 32:
fputs(valhall_widen[(instr >> ${src.offset['swizzle']}) & ${hex(src.mask['swizzle'])}], fp);
@@ -183,7 +279,7 @@ va_print_dest(FILE *fp, unsigned mask, unsigned value, unsigned size)
if (instr & BIT(${src.offset['not']})) fputs(".not", fp);
% endif
% endfor
-% for imm in op.immediates:
+% for imm in (op.immediates_v15 if v15 else op.immediates):
<%
prefix = "#" if imm.name == "constant" else imm.name + ":"
fmt = "%d" if imm.signed else "0x%X"
@@ -192,16 +288,16 @@ va_print_dest(FILE *fp, unsigned mask, unsigned value, unsigned size)
% endfor
%def>
-<%def name="recurse_subcodes(op_bucket)">
+<%def name="recurse_subcodes(op_bucket, v15)">
%if op_bucket.instr:
-${print_instr(op_bucket.instr)}
+${print_instr(op_bucket.instr, v15)}
%else:
opcode = (instr >> ${op_bucket.start}) & ${hex(op_bucket.mask)};
switch (opcode) {
%for op in op_bucket.children:
case ${hex(op)}:
{
-${recurse_subcodes(op_bucket.children[op])}
+${recurse_subcodes(op_bucket.children[op], v15)}
break;
}
%endfor
@@ -215,7 +311,7 @@ va_disasm_instr(FILE *fp, uint64_t instr)
{
unsigned opcode;
-${recurse_subcodes(OPCODES)}
+${recurse_subcodes(OPCODES, False)}
}
void
@@ -223,7 +319,7 @@ va_disasm_instr_v15(FILE *fp, uint64_t instr)
{
unsigned opcode;
-${recurse_subcodes(OPCODES_V15)}
+${recurse_subcodes(OPCODES_V15, True)}
}
static bool is_branch(uint64_t instr)
diff --git a/src/panfrost/compiler/bifrost/valhall/valhall.c.py b/src/panfrost/compiler/bifrost/valhall/valhall.c.py
index b8808bd30e4..31e00f34e31 100644
--- a/src/panfrost/compiler/bifrost/valhall/valhall.c.py
+++ b/src/panfrost/compiler/bifrost/valhall/valhall.c.py
@@ -97,7 +97,7 @@ valhall_opcodes[BI_NUM_OPCODES] = {
sr_control = 0
if len(op.staging) > 0:
- sr_control = op.staging[0].encoded_flags >> 6
+ sr_control = op.staging[0].encoded_flags
%>
[BI_OPCODE_${name.replace('.', '_').upper()}] = {
.exact = ${hex(exact(op.opcode))}ULL,
@@ -154,7 +154,7 @@ valhall_v15_opcodes[BI_NUM_OPCODES] = {
sr_control = 0
if len(op.staging) > 0:
- sr_control = op.staging[0].encoded_flags >> 6
+ sr_control = op.staging[0].encoded_flags
%>
[BI_OPCODE_${name.replace('.', '_').upper()}] = {
.exact = ${hex(exact(op.opcode_v15))}ULL,
diff --git a/src/panfrost/compiler/bifrost/valhall/valhall.py b/src/panfrost/compiler/bifrost/valhall/valhall.py
index c6cbf31ed86..b6a10fb3077 100644
--- a/src/panfrost/compiler/bifrost/valhall/valhall.py
+++ b/src/panfrost/compiler/bifrost/valhall/valhall.py
@@ -14,6 +14,7 @@ import sys
instructions = []
MODIFIERS = {}
+MODIFIERS_V15 = {}
enums = {}
immediates = []
@@ -102,6 +103,11 @@ class Source:
self.offset['value'] = self.start
self.mask['value'] = bitmask(6)
+ self.offset['high1_v15'] = (index + 48)
+ self.mask['high1_v15'] = bitmask(1)
+ self.offset['low8_v15'] = self.start
+ self.mask['low8_v15'] = bitmask(8)
+
if absneg:
self.offset['neg'] = 32 + 2 + ((2 - index) * 2)
self.offset['abs'] = 33 + 2 + ((2 - index) * 2)
@@ -137,6 +143,11 @@ class Dest:
self.offset['value'] = self.start
self.mask['value'] = bitmask(6)
+ self.offset['mode_v15'] = self.start + 13
+ self.mask['mode_v15'] = bitmask(2)
+ self.offset['value_v15'] = self.start
+ self.mask['value_v15'] = bitmask(8)
+
class Staging:
def __init__(self, read = False, write = False, count = 0, flags = 'true', name = ""):
self.name = name
@@ -152,6 +163,14 @@ class Staging:
self.offset['value'] = self.start
self.mask['value'] = bitmask(6)
+ self.offset['flags'] = self.start + 6
+ self.mask['flags'] = bitmask(2)
+
+ self.offset['value_v15'] = self.start
+ self.mask['value_v15'] = bitmask(8)
+ self.offset['flags_v15'] = 38
+ self.mask['flags_v15'] = bitmask(2)
+
# For compatibility
self.absneg = False
@@ -166,11 +185,14 @@ class Staging:
if not self.flags:
self.encoded_flags = 0
+ self.encoded_flags_v15 = 0
elif flags == 'rw':
- self.encoded_flags = 0xc0
+ self.encoded_flags = 0b11
+ self.encoded_flags_v15 = 0b11
else:
assert(flags == 'true')
- self.encoded_flags = (0x80 if write else 0) | (0x40 if read else 0)
+ self.encoded_flags = (0b10 if write else 0) | (0b01 if read else 0)
+ self.encoded_flags_v15 = (0b10 if read else 0) | (0b01 if read and write else 0)
class Immediate:
def __init__(self, name, start, size, signed):
@@ -186,14 +208,16 @@ class Opcode:
self.mask = mask
class Instruction:
- def __init__(self, name, opcode, opcode_v15, srcs = [], dests = [], immediates = [], modifiers = [], staging = None, unit = None):
+ def __init__(self, name, opcode, opcode_v15, srcs = [], dests = [], immediates = [], immediates_v15 = [], modifiers = [], modifiers_v15 = [], staging = None, unit = None):
self.name = name
self.srcs = srcs
self.dests = dests
self.opcode = opcode
self.opcode_v15 = opcode_v15
self.immediates = immediates
+ self.immediates_v15 = immediates_v15
self.modifiers = modifiers
+ self.modifiers_v15 = modifiers_v15
self.staging = staging
self.unit = unit
self.is_signed = len(name.split(".")) > 1 and ('s' in name.split(".")[1])
@@ -206,6 +230,11 @@ class Instruction:
self.offset['fau_page'] = 57
self.mask['fau_page'] = bitmask(2)
+ self.offset['flow_v15'] = 58
+ self.mask['flow_v15'] = bitmask(4)
+ self.offset['fau_page_v15'] = 62
+ self.mask['fau_page_v15'] = bitmask(2)
+
# Message-passing instruction <===> not ALU instruction
self.message = unit not in ["FMA", "CVT", "SFU"]
@@ -306,15 +335,25 @@ def build_instr(el, overrides = {}):
# Get immediates
imms = [build_imm(imm) for imm in el.findall('imm')]
+ imms_v15 = [build_imm(imm) for imm in el.findall('imm_v15_override')]
+ for imm in imms:
+ if imm.name not in {imm.name for imm in imms_v15}:
+ imms_v15.append(imm)
modifiers = []
+ modifiers_v15 = []
for mod in el:
if (mod.tag in MODIFIERS) and not (mod.attrib.get('pseudo', False)):
modifiers.append(MODIFIERS[mod.tag])
+ modifiers_v15.append(MODIFIERS_V15[mod.tag])
elif mod.tag =='va_mod':
modifiers.append(build_modifier(mod))
+ elif mod.tag =='va_mod_v15':
+ modifiers_v15.append(build_modifier(mod))
- instr = Instruction(name, opcode, opcode_v15, srcs = sources, dests = dests, immediates = imms, modifiers = modifiers, staging = staging, unit = unit)
+
+ instr = Instruction(name, opcode, opcode_v15, srcs = sources, dests = dests, immediates = imms, immediates_v15 = imms_v15,
+ modifiers = modifiers, modifiers_v15 = modifiers_v15, staging = staging, unit = unit)
instructions.append(instr)
@@ -380,6 +419,7 @@ def typesize(name):
# Parse the ISA
def valhall_parse_isa(xmlfile):
global MODIFIERS
+ global MODIFIERS_V15
global enums
global immediates
global root
@@ -440,6 +480,52 @@ def valhall_parse_isa(xmlfile):
"sample": Modifier("sample_mode", 38, 2),
}
+ MODIFIERS_V15 = {
+ # Texture instructions share a common encoding
+ "wide_indices": Flag("wide_indices", 8),
+ "array_enable": Flag("array_enable", 10),
+ "texel_offset": Flag("texel_offset", 11),
+ "shadow": Flag("shadow", 12),
+ "integer_coordinates": Flag("integer_coordinates", 13),
+ "fetch_component": Modifier("fetch_component", 14, 2),
+ "lod_mode": Modifier("lod_mode", 13, 3),
+ "lod_bias_disable": Modifier("lod_mode", 13, 1),
+ "lod_clamp_disable": Modifier("lod_mode", 14, 1),
+ "write_mask": Modifier("write_mask", 24, 4),
+ "dimension": Modifier("dimension", 28, 2),
+ "skip": Flag("skip", 39),
+ "register_width": Modifier("register_width", 38, 1, force_enum = "register_width"),
+ "secondary_register_width": Modifier("secondary_register_width", 54, 1, force_enum = "register_width"),
+ "vartex_register_width": Modifier("varying_texture_register_width", 24, 2),
+
+ "atom_opc": Modifier("atomic_operation", 24, 4),
+ "atom_opc_1": Modifier("atomic_operation_with_1", 24, 3),
+ "inactive_result": Modifier("inactive_result", 22, 4),
+ "memory_access": Modifier("memory_access", 24, 2),
+ "regfmt": Modifier("register_format", 24, 3),
+ "source_format": Modifier("source_format", 24, 2),
+ "vecsize": Modifier("vector_size", 28, 2),
+
+ "slot": Modifier("slot_v15", 30, 2),
+ "roundmode": Modifier("round_mode", 32, 2),
+ "result_type": Modifier("result_type", 24, 2),
+ "saturate": Flag("saturate", 25),
+ "not_result": Flag("not_result", 34),
+
+ "lane_op": Modifier("lane_operation", 32, 4),
+ "cmp": Modifier("condition", 33, 3),
+ "clamp": Modifier("clamp", 30, 2),
+ "sr_count": Modifier("staging_register_count", 32, 3, implied = True),
+ "sample_and_update": Modifier("sample_and_update_mode", 32, 3),
+ "sr_write_count": Modifier("staging_register_write_count", 35, 3, implied = True),
+
+ "conservative": Flag("conservative", 35),
+ "subgroup": Modifier("subgroup_size", 36, 4),
+ "update": Modifier("update_mode", 35, 2),
+ "sample": Modifier("sample_mode", 37, 2),
+ }
+
+
for child in root:
if child.tag == 'group':
build_group(child)