From cf8f79a9fcfcca1e4337ed22c02595516024c22c Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 14 Sep 2020 13:48:28 -0400 Subject: [PATCH] pan/bi: Add packing generator From the ISA definition, we can generate a function for each instruction that looks at the bi_instruction in the intermediate representation and emits a 20- or 23-bit word (for ADD/FMA respectively) containing that instruction with all of its modifiers. These will approximate the old packing routines, although the mapping of bi_instruction to machine instructions will be hardcoded (at least for now). Signed-off-by: Alyssa Rosenzweig Reviewed-by: Daniel Stone Part-of: --- src/panfrost/bifrost/gen_pack.py | 584 +++++++++++++++++++++++++++++++ 1 file changed, 584 insertions(+) create mode 100644 src/panfrost/bifrost/gen_pack.py diff --git a/src/panfrost/bifrost/gen_pack.py b/src/panfrost/bifrost/gen_pack.py new file mode 100644 index 00000000000..9eb191d2145 --- /dev/null +++ b/src/panfrost/bifrost/gen_pack.py @@ -0,0 +1,584 @@ +# +# Copyright (C) 2020 Collabora, Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +import sys +from isa_parse import parse_instructions, opname_to_c +from mako.template import Template + +instructions = parse_instructions(sys.argv[1]) + +# Packs sources into an argument. Offset argument to work around a quirk of our +# compiler IR when dealing with staging registers (TODO: reorder in the IR to +# fix this) +def pack_sources(sources, body, pack_exprs, offset): + for i, src in enumerate(sources): + body.append('unsigned src{} = bi_get_src(ins, regs, {});'.format(i, i + offset)) + + # Validate the source + if src[1] != 0xFF: + body.append('assert((1 << src{}) & {});'.format(i, hex(src[1]))) + + # Sources are state-invariant + for state in pack_exprs: + state.append('(src{} << {})'.format(i, src[0])) + + body.append('') + +# Gets the argument that the source modifier applies to from the name if +# applicable, otherwise defaults to the first argument + +def mod_arg(mod): + return int(mod[-1]) if mod[-1] in "0123" else 0 + +# Widen/lane/swz/swap/replicate modifiers conceptually act as a combined extend +# + swizzle. We look at the size of the argument to determine if we apply +# them, and look at the swizzle to pick which one. + +def pack_widen(mod, opts, body, pack_exprs): + marg = mod_arg(mod) + + body.append('unsigned {}_sz = nir_alu_type_get_type_size(ins->src_types[{}]);'.format(mod, mod_arg(mod))) + body.append('unsigned {}_temp = 0;'.format(mod)) + + first = True + for i, op in enumerate(opts): + if op is None or op == 'reserved': + continue + + t_else = 'else ' if not first else '' + first = False + + if op in ['none', 'w0']: + body.append('{}if ({}_sz == 32) {}_temp = {};'.format(t_else, mod, mod, i)) + elif op == 'd0': + body.append('{}if ({}_sz == 64) {}_temp = {};'.format(t_else, mod, mod, i)) + else: + assert(op[0] in ['h', 'b']) + sz = 16 if op[0] == 'h' else 8 + + # Condition on the swizzle + conds = ['ins->swizzle[{}][{}] == {}'.format(marg, idx, lane) for idx, lane in enumerate(op[1:])] + cond = " && ".join(conds) + + body.append('{}if ({}_sz == {} && {}) {}_temp = {};'.format(t_else, mod, sz, cond, mod, i)) + body.append('else unreachable("Could not pattern match widen");') + + return mod + '_temp' + +# abs/neg are stored in ins->src_{abs,neg}[src] arrays +def pack_absneg(mod, opts, body, pack_exprs): + return 'ins->src_{}[{}]'.format(mod[0:-1] if mod[-1] in "0123" else mod, mod_arg(mod)) + +# ins->roundmode is the native format (RTE/RTP/RTN/RTZ) for most ops. But there +# are some others we might encounter that we don't support in the IR at this +# point, and there are a few that force a subset of round modes. + +def pack_round(mod, opts, body, pack_exprs): + if opts == ['none', 'rtz']: + body.append('assert(ins->roundmode == BIFROST_RTE || ins->roundmode == BIFROST_RTZ);') + return '(ins->roundmode == BIFROST_RTZ) ? 1 : 0' + elif opts == ['rtn', 'rtp']: + body.append('assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP);') + return '(ins->roundmode == BIFROST_RTP) ? 1 : 0' + elif opts[0:4] == ['none', 'rtp', 'rtn', 'rtz']: + return 'ins->roundmode' + else: + assert False + +# Likewise, matches our native format + +def pack_clamp(mod, opts, body, pack_exprs): + if opts == ['none', 'clamp_0_inf', 'clamp_m1_1', 'clamp_0_1']: + return 'ins->outmod' + elif opts == ['none', 'clamp_0_1']: + body.append('assert(ins->outmod == BIFROST_NONE || ins->outmod == BIFROST_SAT);') + return '(ins->outmod == BIFROST_SAT) ? 1 : 0' + else: + assert False + +# Our modifiers match up in name, but there is no shortage of orders. So just +# emit a table on the fly for it, since you won't get something much better. +# ENUM_BI_COND must be kept synced with `enum bi_cond` in compiler.h + +ENUM_BI_COND = [ + "al", + "lt", + "le", + "ge", + "gt", + "eq", + "ne", +] + +def pack_cmpf(mod, opts, body, pack_exprs): + # Generate a table mapping ENUM_BI_COND to opts, or an invalid + # sentintel if not used (which will then be asserted out in a debug build). + table = [str(opts.index(x)) if x in opts else '~0' for x in ENUM_BI_COND] + + body.append('unsigned cmpf_table[] = {') + body.append(' ' + ', '.join(table)) + body.append('};') + + return 'cmpf_table[ins->cond]' + +# Since our IR is explicitly typed, we look at the size/sign to determine sign +# extension behaviour +def pack_extend(mod, opts, body, pack_exprs): + body.append('ASSERTED bool {}_small = nir_alu_type_get_type_size(ins->src_types[{}]) <= 16;'.format(mod, mod_arg(mod))) + body.append('bool {}_signed = nir_alu_type_get_base_type(ins->src_types[{}]) == nir_type_int;'.format(mod, mod_arg(mod))) + + if opts == ['none', 'sext', 'zext', 'reserved']: + return '{}_small ? ({}_signed ? 1 : 2) : 0'.format(mod, mod) + else: + assert opts == ['zext', 'sext'] + body.append('assert({}_small);'.format(mod)) + return '{}_signed ? 1 : 0'.format(mod) + +# Packs special varying loads. Our BIFROST_FRAGZ etc defines match the hw in +# the bottom two bits (TODO drop upper bits) +def pack_varying_name(mod, opts, body, pack_exprs): + assert(opts[0] == 'point' and opts[2] == 'frag_w' and opts[3] == 'frag_z') + return 'ins->constant.u64 & 0x3' + +def pack_not_src1(mod, opts, body, pack_exprs): + return 'ins->bitwise.src1_invert ? {} : {}'.format(opts.index('not'), opts.index('none')) + +def pack_not_result(mod, opts, body, pack_exprs): + return 'ins->bitwise.dest_invert ? {} : {}'.format(opts.index('not'), opts.index('none')) + +REGISTER_FORMATS = { + 'f64': 'nir_type_float64', + 'f32': 'nir_type_float32', + 'f16': 'nir_type_float16', + 'u64': 'nir_type_uint64', + 'u32': 'nir_type_uint32', + 'u16': 'nir_type_uint16', + 'i64': 'nir_type_int64', + 's32': 'nir_type_int32', + 's16': 'nir_type_int16' +} + +def pack_register_format(mod, opts, body, pack_exprs): + body.append('unsigned {}_temp = 0;'.format(mod)) + + first = True + for i, op in enumerate(opts): + if op is None or op == 'reserved': + continue + + t_else = 'else ' if not first else '' + first = False + nir_type = REGISTER_FORMATS.get(op) + + if nir_type: + body.append('{}if (ins->format == {}) {}_temp = {};'.format(t_else, nir_type, mod, i)) + + assert not first + body.append('else unreachable("Could not pattern match register format");') + return mod + '_temp' + +def pack_seg(mod, opts, body, pack_exprs): + if len(opts) == 8: + body.append('assert(ins->segment);') + return 'ins->segment' + elif opts == ['none', 'wgl']: + body.append('assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS);') + return 'ins->segment == BI_SEGMENT_WLS ? 1 : 0' + else: + assert(False) + +# TODO: Update modes (perf / slow) For now just force store, except for special +# varyings for which we force clobber +def pack_update(mod, opts, body, pack_exprs): + if opts == ['store', 'retrieve', 'conditional', 'clobber']: + return '(ins->constant.u64 >= 20) ? 3 : 0' + else: + assert(opts[0] == 'store') + return '0' + +# Processes modifiers. If used directly, emits a pack. Otherwise, just +# processes the value (grabbing it from the IR). This must sync with the IR. + +modifier_map = { + "widen": pack_widen, + "widen0": pack_widen, + "widen1": pack_widen, + "lane": pack_widen, + "lane0": pack_widen, + "lane1": pack_widen, + "lane2": pack_widen, + "lane3": pack_widen, + "lanes0": pack_widen, + "lanes1": pack_widen, + "lanes2": pack_widen, + "swz": pack_widen, + "swz0": pack_widen, + "swz1": pack_widen, + "swz2": pack_widen, + "swap0": pack_widen, + "swap1": pack_widen, + "swap2": pack_widen, + "replicate0": pack_widen, + "replicate1": pack_widen, + + "abs": pack_absneg, + "abs0": pack_absneg, + "abs1": pack_absneg, + "abs2": pack_absneg, + "neg": pack_absneg, + "neg0": pack_absneg, + "neg1": pack_absneg, + "neg2": pack_absneg, + + "extend": pack_extend, + "extend0": pack_extend, + "extend1": pack_extend, + "extend2": pack_extend, + "sign0": pack_extend, + "sign1": pack_extend, + + "clamp": pack_clamp, + "round": pack_round, + "cmpf": pack_cmpf, + "varying_name": pack_varying_name, + "not1": pack_not_src1, + "not_result": pack_not_result, + "register_format": pack_register_format, + "seg": pack_seg, + "update": pack_update, + + # Just a minus one modifier + "vecsize": lambda a,b,c,d: 'ins->vector_channels - 1', + + # 0: compute 1: zero + "lod_mode": lambda a,b,c,d: '1 - ins->texture.compute_lod', + + # Not much choice in the matter... + "divzero": lambda a,b,c,d: '0', + "sem": lambda a,b,c,d: '0', # IEEE 754 compliant NaN rules + + # We don't support these in the IR yet (TODO) + "saturate": lambda a,b,c,d: '0', # clamp to min/max int + "mask": lambda a,b,c,d: '0', # clz(~0) = ~0 + "result_type": lambda a,opts,c,d: str(opts.index('m1')), # #1, #1.0, ~0 for cmp + "special": lambda a,b,c,d: '0', # none, which source wins.. + "offset": lambda a,b,c,d: '0', # sin/cos thing + "adj": lambda a,b,c,d: '0', # sin/cos thing + "sqrt": lambda a,b,c,d: '0', # sin/cos thing + "log": lambda a,b,c,d: '1', # frexpe mode -- TODO: other transcendentals for g71 + "scale": lambda a,b,c,d: '0', # sin/cos thing + "precision": lambda a,b,c,d: '0', # log thing + "mode": lambda a,b,c,d: '0', # log thing + "func": lambda a,b,c,d: '0', # pow special case thing + "h": lambda a,b,c,d: '0', # VN_ASST1.f16 + "l": lambda a,b,c,d: '0', # VN_ASST1.f16 + "sample": lambda a,b,c,d: '0', # LD_VAR center + "function": lambda a,b,c,d: '3', # LD_VAR_FLAT none + "preserve_null": lambda a,b,c,d: '0', # SEG_ADD none + "skip": lambda a,b,c,d: '0', # texturing (no skip) + "bytes2": lambda a,b,c,d: '0', # NIR shifts are in bits + "result_word": lambda a,b,c,d: '0', # 32-bit only shifts for now (TODO) + "source": lambda a,b,c,d: '7', # cycle_counter for LD_GCLK + "lane_op": lambda a,b,c,d: '0', # CLPER none + "subgroup": lambda a,b,c,d: '1', # CLPER subgroup4 + "inactive_result": lambda a,b,c,d: '0', # CLPER zero + "threads": lambda a,b,c,d: '0', # IMULD odd + "stencil": lambda a,b,c,d: '1', # ZS_EMIT stencil + "z": lambda a,b,c,d: '1', # ZS_EMIT z + "combine": lambda a,b,c,d: '0', # BRANCHC any + "format": lambda a,b,c,d: '1', # LEA_TEX_IMM u32 + "test_mode": lambda a,b,c,d: '0', # JUMP_EX z + "stack_mode": lambda a,b,c,d: '2', # JUMP_EX none + "atom_opc": lambda a,b,c,d: '2', # ATOM_C aadd + "mux": lambda a,b,c,d: '1', # MUX int_zero +} + +def pack_modifier(mod, width, default, opts, body, pack_exprs): + # Invoke the specific one + fn = modifier_map.get(mod) + + if fn is None: + return None + + expr = fn(mod, opts, body, pack_exprs) + body.append('unsigned {} = {};'.format(mod, expr)) + + # Validate we don't overflow + try: + assert(int(expr) < (1 << width)) + except: + body.append('assert({} < {});'.format(mod, (1 << width))) + + body.append('') + + return True + +# Compiles an S-expression (and/or/eq/neq, modifiers, `ordering`, immediates) +# into a C boolean expression suitable to stick in an if-statement. Takes an +# imm_map to map modifiers to immediate values, parametrized by the ctx that +# we're looking up in (the first, non-immediate argument of the equality) + +SEXPR_BINARY = { + "and": "&&", + "or": "||", + "eq": "==", + "neq": "!=" +} + +def compile_s_expr(expr, imm_map, ctx): + if expr[0] == 'alias': + return compile_s_expr(expr[1], imm_map, ctx) + elif expr == ['eq', 'ordering', '#gt']: + return '(src0 > src1)' + elif expr == ['neq', 'ordering', '#lt']: + return '(src0 >= src1)' + elif expr == ['neq', 'ordering', '#gt']: + return '(src0 <= src1)' + elif expr == ['eq', 'ordering', '#lt']: + return '(src0 < src1)' + elif expr == ['eq', 'ordering', '#eq']: + return '(src0 == src1)' + elif isinstance(expr, list): + sep = " {} ".format(SEXPR_BINARY[expr[0]]) + return "(" + sep.join([compile_s_expr(s, imm_map, expr[1]) for s in expr[1:]]) + ")" + elif expr[0] == '#': + return str(imm_map[ctx][expr[1:]]) + else: + return expr + +# Packs a derived value. We just iterate through the possible choices and test +# whether the encoding matches, and if so we use it. + +def pack_derived(pos, exprs, imm_map, body, pack_exprs): + body.append('unsigned derived_{} = 0;'.format(pos)) + + first = True + for i, expr in enumerate(exprs): + if expr is not None: + cond = compile_s_expr(expr, imm_map, None) + body.append('{}if {} derived_{} = {};'.format('' if first else 'else ', cond, pos, i)) + first = False + + assert (not first) + body.append('else unreachable("No pattern match at pos {}");'.format(pos)) + body.append('') + + assert(pos is not None) + pack_exprs.append('(derived_{} << {})'.format(pos, pos)) + +# Table mapping immediate names in the machine to expressions of `ins` to +# lookup the value in the IR, performing adjustments as needed + +IMMEDIATE_TABLE = { + 'attribute_index': 'bi_get_immediate(ins, 0)', + 'varying_index': 'bi_get_immediate(ins, 0)', + 'index': 'bi_get_immediate(ins, 0)', + 'image_index': 'ins->texture.texture_index', + 'sampler_index': 'ins->texture.sampler_index', + 'table': '63', # Bindless (flat addressing) mode for DTSEL_IMM + + # Not supported in the IR (TODO) + 'shift': '0', + 'fill': '0', # WMASK +} + +# Generates a routine to pack a single variant of a single- instruction. +# Template applies the needed formatting and combine to OR together all the +# pack_exprs to avoid bit fields. +# +# Argument swapping is sensitive to the order of operations. Dependencies: +# sources (RW), modifiers (RW), derived values (W). Hence we emit sources and +# modifiers first, then perform a swap if necessary overwriting +# sources/modifiers, and last calculate derived values and pack. + +variant_template = Template("""static inline unsigned +pan_pack_${name}(bi_clause *clause, bi_instruction *ins, bi_registers *regs) +{ +${"\\n".join([(" " + x) for x in common_body])} +% if single_state: +% for (pack_exprs, s_body, _) in states: +${"\\n".join([" " + x for x in s_body + ["return {};".format( " | ".join(pack_exprs))]])} +% endfor +% else: +% for i, (pack_exprs, s_body, cond) in enumerate(states): + ${'} else ' if i > 0 else ''}if ${cond} { +${"\\n".join([" " + x for x in s_body + ["return {};".format(" | ".join(pack_exprs))]])} +% endfor + } else { + unreachable("No matching state found in ${name}"); + } +% endif +} +""") + +def pack_variant(opname, states): + # Expressions to be ORed together for the final pack, an array per state + pack_exprs = [[hex(state[1]["exact"][1])] for state in states] + + # Computations which need to be done to encode first, across states + common_body = [] + + # Map from modifier names to a map from modifier values to encoded values + # String -> { String -> Uint }. This can be shared across states since + # modifiers are (except the pos values) constant across state. + imm_map = {} + + # Pack sources. Offset over to deal with staging/immediate weirdness in our + # IR (TODO: reorder sources upstream so this goes away). Note sources are + # constant across states. + staging = states[0][1].get("staging", "") + offset = 0 + if staging in ["r", "rw"]: + offset += 1 + + offset += len(set(["attribute_index", "varying_index", "index"]) & set([x[0] for x in states[0][1].get("immediates", [])])) + + if opname == '+LD_VAR_SPECIAL': + offset += 1 + + pack_sources(states[0][1].get("srcs", []), common_body, pack_exprs, offset) + + modifiers_handled = [] + for st in states: + for ((mod, _, width), default, opts) in st[1].get("modifiers", []): + if mod in modifiers_handled: + continue + + modifiers_handled.append(mod) + + if pack_modifier(mod, width, default, opts, common_body, pack_exprs) is None: + return None + + imm_map[mod] = { x: y for y, x in enumerate(opts) } + + for i, st in enumerate(states): + for ((mod, pos, width), default, opts) in st[1].get("modifiers", []): + if pos is not None: + pack_exprs[i].append('({} << {})'.format(mod, pos)) + + for ((src_a, src_b), cond, remap) in st[1].get("swaps", []): + # Figure out which vars to swap, in order to swap the arguments. This + # always includes the sources themselves, and may include source + # modifiers (with the same source indices). We swap based on which + # matches A, this is arbitrary but if we swapped both nothing would end + # up swapping at all since it would swap back. + + vars_to_swap = ['src'] + for ((mod, _, width), default, opts) in st[1].get("modifiers", []): + if mod[-1] in str(src_a): + vars_to_swap.append(mod[0:-1]) + + common_body.append('if {}'.format(compile_s_expr(cond, imm_map, None)) + ' {') + + # Emit the swaps. We use a temp, and wrap in a block to avoid naming + # collisions with multiple swaps. {{Doubling}} to escape the format. + + for v in vars_to_swap: + common_body.append(' {{ unsigned temp = {}{}; {}{} = {}{}; {}{} = temp; }}'.format(v, src_a, v, src_a, v, src_b, v, src_b)) + + # Also, remap. Bidrectional swaps are explicit in the XML. + for v in remap: + maps = remap[v] + imm = imm_map[v] + + for i, l in enumerate(maps): + common_body.append(' {}if ({} == {}) {} = {};'.format('' if i == 0 else 'else ', v, imm[l], v, imm[maps[l]])) + + common_body.append('}') + common_body.append('') + + for (name, pos, width) in st[1].get("immediates", []): + if name not in IMMEDIATE_TABLE: + return None + + common_body.append('unsigned {} = {};'.format(name, IMMEDIATE_TABLE[name])) + + for st in pack_exprs: + st.append('({} << {})'.format(name, pos)) + + if staging == 'r': + common_body.append('bi_read_data_register(clause, ins);') + elif staging == 'w': + common_body.append('bi_write_data_register(clause, ins);') + elif staging == '': + pass + else: + assert staging == 'rw' + # XXX: register allocation requirement (!) + common_body.append('bi_read_data_register(clause, ins);') + common_body.append('assert(ins->src[0] == ins->dest);') + + # After this, we have to branch off, since deriveds *do* vary based on state. + state_body = [[] for s in states] + + for i, (_, st) in enumerate(states): + for ((pos, width), exprs) in st.get("derived", []): + pack_derived(pos, exprs, imm_map, state_body[i], pack_exprs[i]) + + # How do we pick a state? Accumulate the conditions + state_conds = [compile_s_expr(st[0], imm_map, None) for st in states] if len(states) > 1 else [None] + + if state_conds == None: + assert (states[0][0] == None) + + # Finally, we'll collect everything together + return variant_template.render(name = opname_to_c(opname), states = zip(pack_exprs, state_body, state_conds), common_body = common_body, single_state = (len(states) == 1)) + +HEADER = """/* + * Copyright (C) 2020 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Autogenerated file, do not edit */ + +#ifndef _BI_GENERATED_PACK_H +#define _BI_GENERATED_PACK_H + +#include "compiler.h" +#include "bi_pack_helpers.h" +""" + +print(HEADER) + +packs = [pack_variant(e, instructions[e]) for e in instructions] +for p in packs: + print(p) + +print("#endif")