diff --git a/src/panfrost/compiler/bifrost/valhall/ISA.xml b/src/panfrost/compiler/bifrost/valhall/ISA.xml index 53ddbc06856..772deb1e485 100644 --- a/src/panfrost/compiler/bifrost/valhall/ISA.xml +++ b/src/panfrost/compiler/bifrost/valhall/ISA.xml @@ -459,6 +459,20 @@ clamp_0_1 + + + Encoded clamp/special/round for v15. + + none + clamp_0_inf + clamp_m1_1 + clamp_0_1 + n_round_zero + n + left + n_add + + Condition code. Type must be inferred from the instruction. IEEE 754 total @@ -790,6 +804,83 @@ quiet_nan + + + Make rshift_and instructions signed. + + none + + + + + + + + + + + + signed + + + + + + + + Make rshift_or instructions signed. + + + + + + none + + + + + + + + + signed + + + + + + + Make rshift_xor instructions signed. + + + + + + + + + + none + + + signed + + + + + + + + + Make atomic instructions compare. + + + none + + compare + + + Address to load from after adding offset Mode descriptor @@ -1507,7 +1604,9 @@ + + Byte offset Mode descriptor @@ -1536,6 +1635,7 @@ + Byte offset Mode descriptor @@ -1564,6 +1664,7 @@ + Byte offset Mode descriptor @@ -1592,6 +1693,7 @@ + Byte offset Mode descriptor @@ -1620,6 +1722,7 @@ + Byte offset Mode descriptor @@ -1648,6 +1751,7 @@ + Byte offset Mode descriptor @@ -1676,6 +1780,7 @@ + Byte offset Mode descriptor @@ -1741,7 +1846,9 @@ + + Address to load from after adding offset @@ -1766,7 +1873,9 @@ + + Address to load from after adding offset @@ -1792,6 +1901,7 @@ + Address to load from after adding offset @@ -1817,6 +1927,7 @@ + Address to load from after adding offset @@ -1842,6 +1953,7 @@ + Address to load from after adding offset @@ -1867,6 +1979,7 @@ + Address to load from after adding offset @@ -1892,6 +2005,7 @@ + Address to load from after adding offset @@ -1917,6 +2031,7 @@ + Address to load from after adding offset @@ -2089,6 +2204,7 @@ + @@ -2118,6 +2234,7 @@ + @@ -2263,7 +2380,9 @@ modifiers are set. Used to implement gl_FragDepth and gl_FragStencil. + + Updated coverage mask Depth value Stencil value @@ -2790,8 +2909,11 @@ to zero or signaling NaNs to quiet NaNs depending on the mode. + + + @@ -2840,7 +2962,9 @@ and square root computation respectively. + + @@ -3217,6 +3341,7 @@ `.second` is set (indicating the FATAN_TABLE.f32 instruction). + A B @@ -3440,7 +3565,9 @@ + + A B @@ -3476,6 +3603,7 @@ + A B @@ -3785,6 +3913,7 @@ it performs an unsigned right shift. + A shift @@ -3906,6 +4035,7 @@ it performs an unsigned right shift. + A shift @@ -4027,6 +4157,7 @@ it performs an unsigned right shift. + A shift @@ -4051,6 +4182,7 @@ `(A & mask) | (B & ~mask)`. + A B Mask @@ -4074,6 +4206,7 @@ `(A & mask) | (B & ~mask)`. + A B Mask @@ -4097,6 +4230,7 @@ `(A & mask) | (B & ~mask)`. + A B Mask @@ -4716,6 +4850,7 @@ + @@ -4743,6 +4878,7 @@ + @@ -5180,7 +5316,6 @@ - First calculates $A \cdot B + C$ and then biases the exponent by D. Used in @@ -5189,7 +5324,8 @@ `FMA.f32` operations. Equivalent to `FMA.f32` back-to-back with `LDEXP.f32` - + + A B C @@ -5200,11 +5336,6 @@ - - - - - First calculates $A \cdot B + C$ and then biases the exponent by D. If $A = 0$ or $B = 0$, the multiply $A \cdot B$ is treated as zero even if an @@ -5224,11 +5355,6 @@ - - - - - First calculates $A \cdot B + C$ and then biases the exponent by D. If $A = 0$ or $B = 0$, the multiply is treated as $A$ even if an @@ -5248,11 +5374,6 @@ - - - - - First calculates $A \cdot B + C$ and then biases the exponent by D, interpreted as a 16-bit value. Used in special transcendental function diff --git a/src/panfrost/compiler/bifrost/valhall/disasm.py b/src/panfrost/compiler/bifrost/valhall/disasm.py index d744d6bf45e..05e0facb9a1 100644 --- a/src/panfrost/compiler/bifrost/valhall/disasm.py +++ b/src/panfrost/compiler/bifrost/valhall/disasm.py @@ -28,6 +28,10 @@ template = """ #define VA_SRC_UNIFORM_TYPE 0x2 #define VA_SRC_IMM_TYPE 0x3 +#define VA_SRC_V15_MODE1 BIT(8) +#define VA_SRC_V15_MODE2 BIT(7) +#define VA_SRC_V15_MODE4 BIT(5) + % for name, en in ENUMS.items(): UNUSED static const char *valhall_${name}[] = { % for v in en.values: @@ -91,22 +95,84 @@ va_print_float_src(FILE *fp, unsigned type, unsigned value, unsigned size, unsig fprintf(fp, ".abs"); } +static inline void +va_print_src_v15(FILE *fp, unsigned high1, unsigned low8, unsigned size, unsigned fau_page) +{ + unsigned src = (high1 << 8) | low8; + + /* Not reg */ + if (src & VA_SRC_V15_MODE1) { + /* Not uniform */ + if (src & VA_SRC_V15_MODE2) { + /* FAU special */ + if (src & VA_SRC_V15_MODE4) { + unsigned value = src & MASK(5); + if (fau_page == 0) + fputs(valhall_fau_special_page_0[value >> 1] + 1, fp); + else if (fau_page == 1) + fputs(valhall_fau_special_page_1[value >> 1] + 1, fp); + else if (fau_page == 3) + fputs(valhall_fau_special_page_3[value >> 1] + 1, fp); + else + fprintf(fp, "reserved_page2"); + + fprintf(fp, ".w%u", value & 1); + } + /* Imm */ + else { + unsigned value = src & MASK(5); + assert(value < 32 && "overflow in LUT"); + fprintf(fp, "0x%X", va_immediates[value]); + } + } + /* Uniform */ + else { + unsigned value = src & MASK(7); + fprintf(fp, "u%u", value >> 1 | (fau_page << 6)); + if (size <= 32) + fprintf(fp, ".w%u", value & 1); + } + } + /* Reg */ + else { + unsigned value = src & MASK(7); + bool discard = (src & BIT(7)); + char *dmark = discard ? "^" : ""; + if (size > 32) + fprintf(fp, "[r%u%s:r%u%s]", value, dmark, value + 1, dmark); + else + fprintf(fp, "r%u%s", value, dmark); + } +} + +static inline void +va_print_float_src_v15(FILE *fp, unsigned high1, unsigned low8, unsigned size, unsigned fau_page, bool neg, bool abs) +{ + va_print_src_v15(fp, high1, low8, size, fau_page); + + if (neg) + fprintf(fp, ".neg"); + + if (abs) + fprintf(fp, ".abs"); +} + static inline void va_print_dest(FILE *fp, unsigned mask, unsigned value, unsigned size) { if (size > 32) fprintf(fp, "[r%u:r%u]", value, value + 1); - else + else { fprintf(fp, "r%u", value); - - if (mask != 0x3) - fprintf(fp, ".h%u", (mask == 1) ? 0 : 1); + if (mask != 0x3) + fprintf(fp, ".h%u", (mask == 1) ? 0 : 1); + } } -<%def name="print_instr(op)"> +<%def name="print_instr(op, v15)"> <% no_comma = True %> fputs("${op.name}", fp); -% for mod in op.modifiers: +% for mod in (op.modifiers_v15 if v15 else op.modifiers): % if mod.name not in ["staging_register_count", "staging_register_write_count"]: % if mod.is_enum: fputs(valhall_${safe_name(mod.enum)}[(instr >> ${mod.start}) & ${hex((1 << mod.size) - 1)}], fp); @@ -115,10 +181,18 @@ va_print_dest(FILE *fp, unsigned mask, unsigned value, unsigned size) % endif % endif % endfor +% if v15: + fprintf(fp, "%s ", valhall_flow[(instr >> ${op.offset['flow_v15']}) & ${hex(op.mask['flow_v15'])}]); +% else: fprintf(fp, "%s ", valhall_flow[(instr >> ${op.offset['flow']}) & ${hex(op.mask['flow'])}]); +% endif % for i, dest in enumerate(op.dests): <% no_comma = False %> +% if v15: + va_print_dest(fp, (instr >> ${dest.offset['mode_v15']}) & ${hex(dest.mask['mode_v15'])}, (instr >> ${dest.offset['value_v15']}) & ${hex(dest.mask['value_v15'])}, ${dest.size}); +% else: va_print_dest(fp, (instr >> ${dest.offset['mode']}) & ${hex(dest.mask['mode'])}, (instr >> ${dest.offset['value']}) & ${hex(dest.mask['value'])}, ${dest.size}); +% endif % endfor % for index, sr in enumerate(op.staging): % if not no_comma: @@ -130,13 +204,12 @@ va_print_dest(FILE *fp, unsigned mask, unsigned value, unsigned size) if sr.count != 0: sr_count = sr.count; else: - for mod in op.modifiers: + for mod in (op.modifiers_v15 if v15 else op.modifiers): if mod.name == "staging_register_write_count" and sr.write: sr_count = f"(((instr >> {mod.start}) & {hex((1 << mod.size) - 1)}) + 1)"; elif mod.name == "staging_register_count": sr_count = f"((instr >> {mod.start}) & {hex((1 << mod.size) - 1)})"; %> -// assert(((instr >> ${sr.start}) & 0xC0) == ${sr.encoded_flags}); fprintf(fp, "@"); for (unsigned i = 0; i < ${sr_count}; ++i) { fprintf(fp, "%sr%u", (i == 0) ? "" : ":", @@ -148,6 +221,28 @@ va_print_dest(FILE *fp, unsigned mask, unsigned value, unsigned size) fputs(", ", fp); % endif <% no_comma = False %> +% if v15: +% if src.absneg: + va_print_float_src_v15(fp, (instr >> ${src.offset['high1_v15']}) & ${hex(src.mask['high1_v15'])}, (instr >> ${src.offset['low8_v15']}) & ${hex(src.mask['low8_v15'])}, + ${src.size}, (instr >> ${op.offset['fau_page_v15']}) & ${hex(op.mask['fau_page_v15'])}, +% if op.name[:4] == "FMA." and i == 0: + false, + instr & BIT(${src.offset['abs']})); +% elif op.name[:10] == "FMA_RSCALE" and i == 2: + instr & BIT(${src.offset['neg'] + 1}), + false); +% else: + instr & BIT(${src.offset['neg']}), + instr & BIT(${src.offset['abs']})); +% endif +% elif src.is_float: + va_print_float_src_v15(fp, (instr >> ${src.offset['high1_v15']}) & ${src.mask['high1_v15']}, (instr >> ${src.offset['low8_v15']}) & ${hex(src.mask['low8_v15'])}, + ${src.size}, (instr >> ${op.offset['fau_page_v15']}) & ${hex(op.mask['fau_page_v15'])}, false, false); +% else: + va_print_src_v15(fp, (instr >> ${src.offset['high1_v15']}) & ${src.mask['high1_v15']}, (instr >> ${src.offset['low8_v15']}) & ${hex(src.mask['low8_v15'])}, + ${src.size}, (instr >> ${op.offset['fau_page_v15']}) & ${hex(op.mask['fau_page_v15'])}); +% endif +% else: % if src.absneg: va_print_float_src(fp, (instr >> ${src.offset['mode']}) & ${hex(src.mask['mode'])}, (instr >> ${src.offset['value']}) & ${hex(src.mask['value'])}, ${src.size}, (instr >> ${op.offset['fau_page']}) & ${hex(op.mask['fau_page'])}, @@ -160,6 +255,7 @@ va_print_dest(FILE *fp, unsigned mask, unsigned value, unsigned size) va_print_src(fp, (instr >> ${src.offset['mode']}) & ${src.mask['mode']}, (instr >> ${src.offset['value']}) & ${hex(src.mask['value'])}, ${src.size}, (instr >> ${op.offset['fau_page']}) & ${hex(op.mask['fau_page'])}); % endif +% endif % if src.swizzle: % if src.size == 32: fputs(valhall_widen[(instr >> ${src.offset['swizzle']}) & ${hex(src.mask['swizzle'])}], fp); @@ -183,7 +279,7 @@ va_print_dest(FILE *fp, unsigned mask, unsigned value, unsigned size) if (instr & BIT(${src.offset['not']})) fputs(".not", fp); % endif % endfor -% for imm in op.immediates: +% for imm in (op.immediates_v15 if v15 else op.immediates): <% prefix = "#" if imm.name == "constant" else imm.name + ":" fmt = "%d" if imm.signed else "0x%X" @@ -192,16 +288,16 @@ va_print_dest(FILE *fp, unsigned mask, unsigned value, unsigned size) % endfor -<%def name="recurse_subcodes(op_bucket)"> +<%def name="recurse_subcodes(op_bucket, v15)"> %if op_bucket.instr: -${print_instr(op_bucket.instr)} +${print_instr(op_bucket.instr, v15)} %else: opcode = (instr >> ${op_bucket.start}) & ${hex(op_bucket.mask)}; switch (opcode) { %for op in op_bucket.children: case ${hex(op)}: { -${recurse_subcodes(op_bucket.children[op])} +${recurse_subcodes(op_bucket.children[op], v15)} break; } %endfor @@ -215,7 +311,7 @@ va_disasm_instr(FILE *fp, uint64_t instr) { unsigned opcode; -${recurse_subcodes(OPCODES)} +${recurse_subcodes(OPCODES, False)} } void @@ -223,7 +319,7 @@ va_disasm_instr_v15(FILE *fp, uint64_t instr) { unsigned opcode; -${recurse_subcodes(OPCODES_V15)} +${recurse_subcodes(OPCODES_V15, True)} } static bool is_branch(uint64_t instr) diff --git a/src/panfrost/compiler/bifrost/valhall/valhall.c.py b/src/panfrost/compiler/bifrost/valhall/valhall.c.py index b8808bd30e4..31e00f34e31 100644 --- a/src/panfrost/compiler/bifrost/valhall/valhall.c.py +++ b/src/panfrost/compiler/bifrost/valhall/valhall.c.py @@ -97,7 +97,7 @@ valhall_opcodes[BI_NUM_OPCODES] = { sr_control = 0 if len(op.staging) > 0: - sr_control = op.staging[0].encoded_flags >> 6 + sr_control = op.staging[0].encoded_flags %> [BI_OPCODE_${name.replace('.', '_').upper()}] = { .exact = ${hex(exact(op.opcode))}ULL, @@ -154,7 +154,7 @@ valhall_v15_opcodes[BI_NUM_OPCODES] = { sr_control = 0 if len(op.staging) > 0: - sr_control = op.staging[0].encoded_flags >> 6 + sr_control = op.staging[0].encoded_flags %> [BI_OPCODE_${name.replace('.', '_').upper()}] = { .exact = ${hex(exact(op.opcode_v15))}ULL, diff --git a/src/panfrost/compiler/bifrost/valhall/valhall.py b/src/panfrost/compiler/bifrost/valhall/valhall.py index c6cbf31ed86..b6a10fb3077 100644 --- a/src/panfrost/compiler/bifrost/valhall/valhall.py +++ b/src/panfrost/compiler/bifrost/valhall/valhall.py @@ -14,6 +14,7 @@ import sys instructions = [] MODIFIERS = {} +MODIFIERS_V15 = {} enums = {} immediates = [] @@ -102,6 +103,11 @@ class Source: self.offset['value'] = self.start self.mask['value'] = bitmask(6) + self.offset['high1_v15'] = (index + 48) + self.mask['high1_v15'] = bitmask(1) + self.offset['low8_v15'] = self.start + self.mask['low8_v15'] = bitmask(8) + if absneg: self.offset['neg'] = 32 + 2 + ((2 - index) * 2) self.offset['abs'] = 33 + 2 + ((2 - index) * 2) @@ -137,6 +143,11 @@ class Dest: self.offset['value'] = self.start self.mask['value'] = bitmask(6) + self.offset['mode_v15'] = self.start + 13 + self.mask['mode_v15'] = bitmask(2) + self.offset['value_v15'] = self.start + self.mask['value_v15'] = bitmask(8) + class Staging: def __init__(self, read = False, write = False, count = 0, flags = 'true', name = ""): self.name = name @@ -152,6 +163,14 @@ class Staging: self.offset['value'] = self.start self.mask['value'] = bitmask(6) + self.offset['flags'] = self.start + 6 + self.mask['flags'] = bitmask(2) + + self.offset['value_v15'] = self.start + self.mask['value_v15'] = bitmask(8) + self.offset['flags_v15'] = 38 + self.mask['flags_v15'] = bitmask(2) + # For compatibility self.absneg = False @@ -166,11 +185,14 @@ class Staging: if not self.flags: self.encoded_flags = 0 + self.encoded_flags_v15 = 0 elif flags == 'rw': - self.encoded_flags = 0xc0 + self.encoded_flags = 0b11 + self.encoded_flags_v15 = 0b11 else: assert(flags == 'true') - self.encoded_flags = (0x80 if write else 0) | (0x40 if read else 0) + self.encoded_flags = (0b10 if write else 0) | (0b01 if read else 0) + self.encoded_flags_v15 = (0b10 if read else 0) | (0b01 if read and write else 0) class Immediate: def __init__(self, name, start, size, signed): @@ -186,14 +208,16 @@ class Opcode: self.mask = mask class Instruction: - def __init__(self, name, opcode, opcode_v15, srcs = [], dests = [], immediates = [], modifiers = [], staging = None, unit = None): + def __init__(self, name, opcode, opcode_v15, srcs = [], dests = [], immediates = [], immediates_v15 = [], modifiers = [], modifiers_v15 = [], staging = None, unit = None): self.name = name self.srcs = srcs self.dests = dests self.opcode = opcode self.opcode_v15 = opcode_v15 self.immediates = immediates + self.immediates_v15 = immediates_v15 self.modifiers = modifiers + self.modifiers_v15 = modifiers_v15 self.staging = staging self.unit = unit self.is_signed = len(name.split(".")) > 1 and ('s' in name.split(".")[1]) @@ -206,6 +230,11 @@ class Instruction: self.offset['fau_page'] = 57 self.mask['fau_page'] = bitmask(2) + self.offset['flow_v15'] = 58 + self.mask['flow_v15'] = bitmask(4) + self.offset['fau_page_v15'] = 62 + self.mask['fau_page_v15'] = bitmask(2) + # Message-passing instruction <===> not ALU instruction self.message = unit not in ["FMA", "CVT", "SFU"] @@ -306,15 +335,25 @@ def build_instr(el, overrides = {}): # Get immediates imms = [build_imm(imm) for imm in el.findall('imm')] + imms_v15 = [build_imm(imm) for imm in el.findall('imm_v15_override')] + for imm in imms: + if imm.name not in {imm.name for imm in imms_v15}: + imms_v15.append(imm) modifiers = [] + modifiers_v15 = [] for mod in el: if (mod.tag in MODIFIERS) and not (mod.attrib.get('pseudo', False)): modifiers.append(MODIFIERS[mod.tag]) + modifiers_v15.append(MODIFIERS_V15[mod.tag]) elif mod.tag =='va_mod': modifiers.append(build_modifier(mod)) + elif mod.tag =='va_mod_v15': + modifiers_v15.append(build_modifier(mod)) - instr = Instruction(name, opcode, opcode_v15, srcs = sources, dests = dests, immediates = imms, modifiers = modifiers, staging = staging, unit = unit) + + instr = Instruction(name, opcode, opcode_v15, srcs = sources, dests = dests, immediates = imms, immediates_v15 = imms_v15, + modifiers = modifiers, modifiers_v15 = modifiers_v15, staging = staging, unit = unit) instructions.append(instr) @@ -380,6 +419,7 @@ def typesize(name): # Parse the ISA def valhall_parse_isa(xmlfile): global MODIFIERS + global MODIFIERS_V15 global enums global immediates global root @@ -440,6 +480,52 @@ def valhall_parse_isa(xmlfile): "sample": Modifier("sample_mode", 38, 2), } + MODIFIERS_V15 = { + # Texture instructions share a common encoding + "wide_indices": Flag("wide_indices", 8), + "array_enable": Flag("array_enable", 10), + "texel_offset": Flag("texel_offset", 11), + "shadow": Flag("shadow", 12), + "integer_coordinates": Flag("integer_coordinates", 13), + "fetch_component": Modifier("fetch_component", 14, 2), + "lod_mode": Modifier("lod_mode", 13, 3), + "lod_bias_disable": Modifier("lod_mode", 13, 1), + "lod_clamp_disable": Modifier("lod_mode", 14, 1), + "write_mask": Modifier("write_mask", 24, 4), + "dimension": Modifier("dimension", 28, 2), + "skip": Flag("skip", 39), + "register_width": Modifier("register_width", 38, 1, force_enum = "register_width"), + "secondary_register_width": Modifier("secondary_register_width", 54, 1, force_enum = "register_width"), + "vartex_register_width": Modifier("varying_texture_register_width", 24, 2), + + "atom_opc": Modifier("atomic_operation", 24, 4), + "atom_opc_1": Modifier("atomic_operation_with_1", 24, 3), + "inactive_result": Modifier("inactive_result", 22, 4), + "memory_access": Modifier("memory_access", 24, 2), + "regfmt": Modifier("register_format", 24, 3), + "source_format": Modifier("source_format", 24, 2), + "vecsize": Modifier("vector_size", 28, 2), + + "slot": Modifier("slot_v15", 30, 2), + "roundmode": Modifier("round_mode", 32, 2), + "result_type": Modifier("result_type", 24, 2), + "saturate": Flag("saturate", 25), + "not_result": Flag("not_result", 34), + + "lane_op": Modifier("lane_operation", 32, 4), + "cmp": Modifier("condition", 33, 3), + "clamp": Modifier("clamp", 30, 2), + "sr_count": Modifier("staging_register_count", 32, 3, implied = True), + "sample_and_update": Modifier("sample_and_update_mode", 32, 3), + "sr_write_count": Modifier("staging_register_write_count", 35, 3, implied = True), + + "conservative": Flag("conservative", 35), + "subgroup": Modifier("subgroup_size", 36, 4), + "update": Modifier("update_mode", 35, 2), + "sample": Modifier("sample_mode", 37, 2), + } + + for child in root: if child.tag == 'group': build_group(child)