From 6a12ea02feeebac4be8986616327db31926b3ce7 Mon Sep 17 00:00:00 2001 From: Italo Nicola Date: Fri, 16 Apr 2021 10:23:48 +0000 Subject: [PATCH] pan/mdg: properly encode/decode ldst instructions Signed-off-by: Italo Nicola Reviewed-by: Alyssa Rosenzweig Part-of: --- src/panfrost/midgard/compiler.h | 13 +- src/panfrost/midgard/disassemble.c | 295 ++++++++++++------ src/panfrost/midgard/helpers.h | 43 ++- src/panfrost/midgard/midgard.h | 84 +++-- src/panfrost/midgard/midgard_address.c | 39 ++- src/panfrost/midgard/midgard_compile.c | 113 ++++--- src/panfrost/midgard/midgard_emit.c | 87 ++++-- .../midgard/midgard_opt_perspective.c | 11 +- src/panfrost/midgard/midgard_ra.c | 6 +- src/panfrost/midgard/midgard_schedule.c | 17 +- src/panfrost/midgard/mir_promote_uniforms.c | 4 +- 11 files changed, 465 insertions(+), 247 deletions(-) diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index 5a5623c2cd3..828464a26a5 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -516,9 +516,9 @@ void mir_insert_instruction_after_scheduled(compiler_context *ctx, midgard_block void mir_flip(midgard_instruction *ins); void mir_compute_temp_count(compiler_context *ctx); -#define LDST_GLOBAL 0x3E -#define LDST_SHARED 0x2E -#define LDST_SCRATCH 0x2A +#define LDST_GLOBAL (REGISTER_LDST_ZERO << 2) +#define LDST_SHARED ((REGISTER_LDST_LOCAL_STORAGE_PTR << 2) | COMPONENT_Z) +#define LDST_SCRATCH ((REGISTER_LDST_PC_SP << 2) | COMPONENT_Z) void mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset, unsigned seg); void mir_set_ubo_offset(midgard_instruction *ins, nir_src *src, unsigned bias); @@ -573,8 +573,11 @@ v_load_store_scratch( .op = is_store ? midgard_op_st_128 : midgard_op_ld_128, .load_store = { /* For register spilling - to thread local storage */ - .arg_1 = 0xEA, - .arg_2 = 0x1E, + .arg_reg = REGISTER_LDST_LOCAL_STORAGE_PTR, + .arg_comp = COMPONENT_Z, + .bitsize_toggle = true, + .index_format = midgard_index_address_u32, + .index_reg = REGISTER_LDST_ZERO, }, /* If we spill an unspill, RA goes into an infinite loop */ diff --git a/src/panfrost/midgard/disassemble.c b/src/panfrost/midgard/disassemble.c index b784f99f014..1e8e8a48e42 100644 --- a/src/panfrost/midgard/disassemble.c +++ b/src/panfrost/midgard/disassemble.c @@ -304,6 +304,13 @@ static char *argmod_names[3] = { ".x2", }; +static char *index_format_names[4] = { + "", + ".u64", + ".u32", + ".s32" +}; + static void print_outmod(FILE *fp, unsigned outmod, bool is_int) { @@ -760,6 +767,21 @@ print_alu_mask(FILE *fp, uint8_t mask, unsigned bits, midgard_shrink_mode shrink fprintf(fp, " /* %X */", mask); } +/* TODO: 16-bit mode */ +static void +print_ldst_mask(FILE *fp, unsigned mask, unsigned swizzle) { + fprintf(fp, "."); + + for (unsigned i = 0; i < 4; ++i) { + bool write = (mask & (1 << i)) != 0; + unsigned c = (swizzle >> (i * 2)) & 3; + /* We can't omit the swizzle here since many ldst ops have a + * combined swizzle/writemask, and it would be ambiguous to not + * print the masked-out components. */ + fprintf(fp, "%c", write ? components[c] : '~'); + } +} + /* Prints the 4-bit masks found in texture and load/store ops, as opposed to * the 8-bit masks found in (vector) ALU ops. Supports texture-style 16-bit * mode as well, but not load/store-style 16-bit mode. */ @@ -1235,41 +1257,40 @@ print_alu_word(FILE *fp, uint32_t *words, unsigned num_quad_words, return branch_forward; } -static void +/* TODO: how can we use this now that we know that these params can't be known + * before run time in every single case? Maybe just use it in the cases we can? */ +UNUSED static void print_varying_parameters(FILE *fp, midgard_load_store_word *word) { - midgard_varying_parameter param; - unsigned v = word->varying_parameters; - memcpy(¶m, &v, sizeof(param)); + midgard_varying_params p = midgard_unpack_varying_params(*word); - if (param.is_varying) { - /* If a varying, there are qualifiers */ - if (param.flat) - fprintf(fp, ".flat"); + /* If a varying, there are qualifiers */ + if (p.flat_shading) + fprintf(fp, ".flat"); - if (param.interpolation != midgard_interp_default) { - if (param.interpolation == midgard_interp_centroid) - fprintf(fp, ".centroid"); - else if (param.interpolation == midgard_interp_sample) - fprintf(fp, ".sample"); - else - fprintf(fp, ".interp%d", param.interpolation); - } + if (p.perspective_correction) + fprintf(fp, ".correction"); - if (param.modifier != midgard_varying_mod_none) { - if (param.modifier == midgard_varying_mod_perspective_w) - fprintf(fp, ".perspectivew"); - else if (param.modifier == midgard_varying_mod_perspective_z) - fprintf(fp, ".perspectivez"); - else - fprintf(fp, ".mod%d", param.modifier); - } - } else if (param.flat || param.interpolation || param.modifier) { - fprintf(fp, " /* is_varying not set but varying metadata attached */"); + if (p.centroid_mapping) + fprintf(fp, ".centroid"); + + if (p.interpolate_sample) + fprintf(fp, ".sample"); + + switch (p.modifier) { + case midgard_varying_mod_perspective_y: + fprintf(fp, ".perspectivey"); + break; + case midgard_varying_mod_perspective_z: + fprintf(fp, ".perspectivez"); + break; + case midgard_varying_mod_perspective_w: + fprintf(fp, ".perspectivew"); + break; + default: + unreachable("invalid varying modifier"); + break; } - - if (param.zero0 || param.zero1 || param.zero2) - fprintf(fp, " /* zero tripped, %u %u %u */ ", param.zero0, param.zero1, param.zero2); } static bool @@ -1304,33 +1325,14 @@ is_op_attribute(unsigned op) return false; } +/* Helper to print integer well-formatted, but only when non-zero. */ static void -print_load_store_arg(FILE *fp, uint8_t arg, unsigned index) +midgard_print_sint(FILE *fp, int n) { - /* Try to interpret as a register */ - midgard_ldst_register_select sel; - memcpy(&sel, &arg, sizeof(arg)); - - /* If unknown is set, we're not sure what this is or how to - * interpret it. But if it's zero, we get it. */ - - if (sel.unknown) { - fprintf(fp, "0x%02X", arg); - return; - } - - print_ldst_read_reg(fp, sel.select); - fprintf(fp, ".%c", components[sel.component]); - - /* Only print a shift if it's non-zero. Shifts only make sense for the - * second index. For the first, we're not sure what it means yet */ - - if (index == 1) { - if (sel.shift) - fprintf(fp, " << %u", sel.shift); - } else { - fprintf(fp, " /* %X */", sel.shift); - } + if (n > 0) + fprintf(fp, " + 0x%X", n); + else if (n < 0) + fprintf(fp, " - 0x%X", -n); } static void @@ -1347,63 +1349,160 @@ print_load_store_instr(FILE *fp, uint64_t data) print_ld_st_opcode(fp, word->op); - unsigned address = word->address; + if (word->op == midgard_op_trap) { + fprintf(fp, " 0x%X\n", word->signed_offset); + return; + } + + /* Print opcode modifiers */ + + if (OP_USES_ATTRIB(word->op)) /* which attrib table? */ + fprintf(fp, ".%s", (word->index_format >> 1) ? "secondary" : "primary"); + else if (word->op == midgard_op_ld_cubemap_coords || OP_IS_PROJECTION(word->op)) + fprintf(fp, ".%s", word->bitsize_toggle ? "f32" : "f16"); + + fprintf(fp, " "); + + /* src/dest register */ + + if (!OP_IS_STORE(word->op)) { + print_ldst_write_reg(fp, word->reg); + + /* Some opcodes don't have a swizzable src register, and + * instead the swizzle is applied before the result is written + * to the dest reg. For these ops, we combine the writemask + * with the swizzle to display them in the disasm compactly. */ + unsigned swizzle = word->swizzle; + if ((OP_IS_REG2REG_LDST(word->op) && + word->op != midgard_op_lea && + word->op != midgard_op_lea_image) || OP_IS_ATOMIC(word->op)) + swizzle = 0xE4; + print_ldst_mask(fp, word->mask, swizzle); + } else { + print_ldst_read_reg(fp, word->reg); + print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough, + midgard_reg_mode_32, 0xFF); + } + + /* ld_ubo args */ + if (OP_IS_UBO_READ(word->op)) { + if (word->signed_offset & 1) { /* buffer index imm */ + unsigned imm = midgard_unpack_ubo_index_imm(*word); + fprintf(fp, ", %u", imm); + } else { /* buffer index from reg */ + fprintf(fp, ", "); + print_ldst_read_reg(fp, word->arg_reg); + fprintf(fp, ".%c", components[word->arg_comp]); + } + + fprintf(fp, ", "); + print_ldst_read_reg(fp, word->index_reg); + fprintf(fp, ".%c << %u", components[word->index_comp], word->index_shift); + midgard_print_sint(fp, UNPACK_LDST_UBO_OFS(word->signed_offset)); + } + + /* mem addr expression */ + if (OP_HAS_ADDRESS(word->op)) { + fprintf(fp, ", [ "); + print_ldst_read_reg(fp, word->arg_reg); + fprintf(fp, ".u%d.%c", + word->bitsize_toggle ? 64 : 32, components[word->arg_comp]); + + if ((word->op < midgard_op_atomic_cmpxchg || + word->op > midgard_op_atomic_cmpxchg64_be) && + word->index_reg != 0x7) { + fprintf(fp, " + ("); + print_ldst_read_reg(fp, word->index_reg); + fprintf(fp, "%s.%c << %u)", + index_format_names[word->index_format], + components[word->index_comp], word->index_shift); + } + + midgard_print_sint(fp, word->signed_offset); + + fprintf(fp, " ]"); + } + + /* src reg for reg2reg ldst opcodes */ + if (OP_IS_REG2REG_LDST(word->op)) { + fprintf(fp, ", "); + print_ldst_read_reg(fp, word->arg_reg); + print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough, + midgard_reg_mode_32, 0xFF); + } + + /* atomic ops encode the source arg where the ldst swizzle would be. */ + if (OP_IS_ATOMIC(word->op)) { + unsigned src = (word->swizzle >> 2) & 0x7; + unsigned src_comp = word->swizzle & 0x3; + fprintf(fp, ", "); + print_ldst_read_reg(fp, src); + fprintf(fp, ".%c", components[src_comp]); + } + + /* CMPXCHG encodes the extra comparison arg where the index reg would be. */ + if (word->op >= midgard_op_atomic_cmpxchg && + word->op <= midgard_op_atomic_cmpxchg64_be) { + fprintf(fp, ", "); + print_ldst_read_reg(fp, word->index_reg); + fprintf(fp, ".%c", components[word->index_comp]); + } + + /* index reg for attr/vary/images, selector for ld/st_special */ + if (OP_IS_SPECIAL(word->op) || OP_USES_ATTRIB(word->op)) { + fprintf(fp, ", "); + print_ldst_read_reg(fp, word->index_reg); + fprintf(fp, ".%c << %u", components[word->index_comp], word->index_shift); + midgard_print_sint(fp, UNPACK_LDST_ATTRIB_OFS(word->signed_offset)); + } + + /* vertex reg for attrib/varying ops, coord reg for image ops */ + if (OP_USES_ATTRIB(word->op)) { + fprintf(fp, ", "); + print_ldst_read_reg(fp, word->arg_reg); + + if (OP_IS_IMAGE(word->op)) + fprintf(fp, ".u%d", word->bitsize_toggle ? 64 : 32); + + fprintf(fp, ".%c", components[word->arg_comp]); + + if (word->bitsize_toggle && !OP_IS_IMAGE(word->op)) + midgard_print_sint(fp, UNPACK_LDST_VERTEX_OFS(word->signed_offset)); + } + + /* TODO: properly decode format specifier for PACK/UNPACK ops */ + if (OP_IS_PACK_COLOUR(word->op) || OP_IS_UNPACK_COLOUR(word->op)) { + fprintf(fp, ", "); + unsigned format_specifier = (word->signed_offset << 4) | word->index_shift; + fprintf(fp, "0x%X", format_specifier); + } + + fprintf(fp, "\n"); + + /* Debugging stuff */ if (is_op_varying(word->op)) { - print_varying_parameters(fp, word); + /* Do some analysis: check if direct access */ - /* Do some analysis: check if direct cacess */ - - if ((word->arg_2 == 0x1E) && midg_stats.varying_count >= 0) - update_stats(&midg_stats.varying_count, address); + if (word->index_reg == 0x7 && midg_stats.varying_count >= 0) + update_stats(&midg_stats.varying_count, + UNPACK_LDST_ATTRIB_OFS(word->signed_offset)); else midg_stats.varying_count = -16; } else if (is_op_attribute(word->op)) { - if ((word->arg_2 == 0x1E) && midg_stats.attribute_count >= 0) - update_stats(&midg_stats.attribute_count, address); + if (word->index_reg == 0x7 && midg_stats.attribute_count >= 0) + update_stats(&midg_stats.attribute_count, + UNPACK_LDST_ATTRIB_OFS(word->signed_offset)); else midg_stats.attribute_count = -16; } - fprintf(fp, " "); - - if (!OP_IS_STORE(word->op)) - print_ldst_write_reg(fp, word->reg); - else - print_ldst_read_reg(fp, word->reg); - - print_mask_4(fp, word->mask, false); - if (!OP_IS_STORE(word->op)) update_dest(word->reg); - bool is_ubo = OP_IS_UBO_READ(word->op); - - if (is_ubo) { - /* UBOs use their own addressing scheme */ - - int lo = word->varying_parameters >> 7; - int hi = word->address; - - /* TODO: Combine fields logically */ - address = (hi << 3) | lo; - } - - fprintf(fp, ", %u", address); - - print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough, midgard_reg_mode_32, 0xFF); - - fprintf(fp, ", "); - - if (is_ubo) { - fprintf(fp, "ubo%u", word->arg_1); - update_stats(&midg_stats.uniform_buffer_count, word->arg_1); - } else - print_load_store_arg(fp, word->arg_1, 0); - - fprintf(fp, ", "); - print_load_store_arg(fp, word->arg_2, 1); - fprintf(fp, " /* %X */\n", word->varying_parameters); + if (OP_IS_UBO_READ(word->op)) + update_stats(&midg_stats.uniform_buffer_count, + UNPACK_LDST_UBO_OFS(word->signed_offset)); midg_stats.instruction_count++; } diff --git a/src/panfrost/midgard/helpers.h b/src/panfrost/midgard/helpers.h index fffe0d21142..c8efbf60ca9 100644 --- a/src/panfrost/midgard/helpers.h +++ b/src/panfrost/midgard/helpers.h @@ -355,9 +355,9 @@ mir_is_simple_swizzle(unsigned *swizzle, unsigned mask) /* Packs a load/store argument */ static inline uint8_t -midgard_ldst_reg(unsigned reg, unsigned component, unsigned size) +midgard_ldst_comp(unsigned reg, unsigned component, unsigned size) { - assert((reg == REGISTER_LDST_BASE) || (reg == REGISTER_LDST_BASE + 1)); + assert((reg & ~1) == 0); assert(size == 16 || size == 32 || size == 64); /* Shift so everything is in terms of 32-bit units */ @@ -369,17 +369,38 @@ midgard_ldst_reg(unsigned reg, unsigned component, unsigned size) component >>= 1; } - midgard_ldst_register_select sel = { - .component = component, - .select = reg - 26 - }; - - uint8_t packed; - memcpy(&packed, &sel, sizeof(packed)); - - return packed; + return component; } +/* Packs/unpacks a ubo index immediate */ + +void midgard_pack_ubo_index_imm(midgard_load_store_word *word, unsigned index); +unsigned midgard_unpack_ubo_index_imm(midgard_load_store_word word); + +/* Packs/unpacks varying parameters. + * FIXME: IMPORTANT: We currently handle varying mode weirdly, by passing all + * parameters via an offset and using REGISTER_LDST_ZERO as base. This works + * for most parameters, but does not allow us to encode/decode direct sample + * position. */ +void midgard_pack_varying_params(midgard_load_store_word *word, midgard_varying_params p); +midgard_varying_params midgard_unpack_varying_params(midgard_load_store_word word); + +/* Load/store ops' displacement helpers. + * This is useful because different types of load/store ops have different + * displacement bitsize. */ + +#define UNPACK_LDST_ATTRIB_OFS(a) ((a) >> 9) +#define UNPACK_LDST_VERTEX_OFS(a) util_sign_extend((a) & 0x1FF, 9) +#define UNPACK_LDST_SELECTOR_OFS(a) ((a) >> 9) +#define UNPACK_LDST_UBO_OFS(a) ((a) >> 2) +#define UNPACK_LDST_MEM_OFS(a) ((a)) + +#define PACK_LDST_ATTRIB_OFS(a) ((a) << 9) +#define PACK_LDST_VERTEX_OFS(a) ((a) & 0x1FF) +#define PACK_LDST_SELECTOR_OFS(a) ((a) << 9) +#define PACK_LDST_UBO_OFS(a) ((a) << 2) +#define PACK_LDST_MEM_OFS(a) ((a)) + static inline bool midgard_is_branch_unit(unsigned unit) { diff --git a/src/panfrost/midgard/midgard.h b/src/panfrost/midgard/midgard.h index e9559659c1c..1d873de5325 100644 --- a/src/panfrost/midgard/midgard.h +++ b/src/panfrost/midgard/midgard.h @@ -664,31 +664,37 @@ typedef enum { typedef enum { midgard_varying_mod_none = 0, - /* Other values unknown */ - - /* Take the would-be result and divide all components by its z/w + /* Take the would-be result and divide all components by its y/z/w * (perspective division baked in with the load) */ + midgard_varying_mod_perspective_y = 1, midgard_varying_mod_perspective_z = 2, midgard_varying_mod_perspective_w = 3, + + /* The result is a 64-bit cubemap descriptor to use with + * midgard_tex_op_normal or midgard_tex_op_gradient */ + midgard_varying_mod_cubemap = 4, } midgard_varying_modifier; typedef struct __attribute__((__packed__)) { - unsigned zero0 : 1; /* Always zero */ + midgard_varying_modifier modifier : 3; - midgard_varying_modifier modifier : 2; + bool flat_shading : 1; - unsigned zero1: 1; /* Always zero */ + /* These are ignored if flat_shading is enabled. */ + bool perspective_correction : 1; + bool centroid_mapping : 1; - /* Varying qualifiers, zero if not a varying */ - unsigned flat : 1; - unsigned is_varying : 1; /* Always one for varying, but maybe something else? */ - midgard_interpolation interpolation : 2; + /* This is ignored if the shader only runs once per pixel. */ + bool interpolate_sample : 1; - unsigned zero2 : 2; /* Always zero */ + bool zero0 : 1; /* Always zero */ + + unsigned direct_sample_pos_x : 4; + unsigned direct_sample_pos_y : 4; } -midgard_varying_parameter; +midgard_varying_params; /* 8-bit register/etc selector for load/store ops */ typedef struct @@ -711,26 +717,56 @@ __attribute__((__packed__)) } midgard_ldst_register_select; +typedef enum { + /* 0 is reserved */ + midgard_index_address_u64 = 1, + midgard_index_address_u32 = 2, + midgard_index_address_s32 = 3, +} midgard_index_address_format; + typedef struct __attribute__((__packed__)) { midgard_load_store_op op : 8; - unsigned reg : 5; - unsigned mask : 4; + + /* Source/dest reg */ + unsigned reg : 5; + + /* Generally is a writemask. + * For ST_ATTR and ST_TEX, unused. + * For other stores, each bit masks 1/4th of the output. */ + unsigned mask : 4; + + /* Swizzle for stores, but for atomics it encodes also the source + * register. This fits because atomics dont need a swizzle since they + * are not vectorized instructions. */ unsigned swizzle : 8; - /* Load/store ops can take two additional registers as arguments, but - * these are limited to load/store registers with only a few supported - * mask/swizzle combinations. The tradeoff is these are much more - * compact, requiring 8-bits each rather than 17-bits for a full - * reg/mask/swizzle. Usually (?) encoded as - * midgard_ldst_register_select. */ - unsigned arg_1 : 8; - unsigned arg_2 : 8; + /* Arg reg, meaning changes according to each opcode */ + unsigned arg_comp : 2; + unsigned arg_reg : 3; - unsigned varying_parameters : 10; + /* 64-bit address enable + * 32-bit data type enable for CUBEMAP and perspective div. + * Explicit indexing enable for LD_ATTR. + * 64-bit coordinate enable for LD_IMAGE. */ + bool bitsize_toggle : 1; - unsigned address : 9; + /* These are mainly used for opcodes that have addresses. + * For cmpxchg, index_reg is used for the comparison value. + * For ops that access the attrib table, bit 1 encodes which table. + * For LD_VAR and LD/ST_ATTR, bit 0 enables dest/src type inferral. */ + midgard_index_address_format index_format : 2; + unsigned index_comp : 2; + unsigned index_reg : 3; + unsigned index_shift : 4; + + /* Generaly is a signed offset, but has different bitsize and starts at + * different bits depending on the opcode, LDST_*_DISPLACEMENT helpers + * are recommended when packing/unpacking this attribute. + * For LD_UBO, bit 0 enables ubo index immediate. + * For LD_TILEBUFFER_RAW, bit 0 disables sample index immediate. */ + int signed_offset : 18; } midgard_load_store_word; diff --git a/src/panfrost/midgard/midgard_address.c b/src/panfrost/midgard/midgard_address.c index c55e0fbe2c5..beb661ec50c 100644 --- a/src/panfrost/midgard/midgard_address.c +++ b/src/panfrost/midgard/midgard_address.c @@ -36,17 +36,11 @@ * This allows for fast indexing into arrays. This file tries to pattern match the offset in NIR with this form to reduce pressure on the ALU pipe. */ -enum index_type { - ITYPE_U64 = 1 << 6, - ITYPE_U32 = 2 << 6, // zero-extend - ITYPE_I32 = 3 << 6, // sign-extend -}; - struct mir_address { nir_ssa_scalar A; nir_ssa_scalar B; - enum index_type type; + midgard_index_address_format type; unsigned shift; unsigned bias; }; @@ -136,7 +130,7 @@ mir_match_u2u64(struct mir_address *address) nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0); address->B = arg; - address->type = ITYPE_U32; + address->type = midgard_index_address_u32; } /* Matches i2i64 and sets type */ @@ -155,7 +149,7 @@ mir_match_i2i64(struct mir_address *address) nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0); address->B = arg; - address->type = ITYPE_I32; + address->type = midgard_index_address_s32; } /* Matches ishl to shift */ @@ -210,7 +204,7 @@ mir_match_offset(nir_ssa_def *offset, bool first_free, bool extend) { struct mir_address address = { .B = { .def = offset }, - .type = extend ? ITYPE_U64 : ITYPE_U32, + .type = extend ? midgard_index_address_u64 : midgard_index_address_u32, }; mir_match_mov(&address); @@ -243,14 +237,16 @@ mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset, bool force_sext = (nir_src_bit_size(*offset) < 64); if (!offset->is_ssa) { - ins->load_store.arg_1 |= seg; + ins->load_store.bitsize_toggle = true; + ins->load_store.arg_comp = seg & 0x3; + ins->load_store.arg_reg = (seg >> 2) & 0x7; ins->src[2] = nir_src_index(ctx, offset); ins->src_types[2] = nir_type_uint | nir_src_bit_size(*offset); if (force_sext) - ins->load_store.arg_1 |= ITYPE_I32; + ins->load_store.index_format = midgard_index_address_s32; else - ins->load_store.arg_1 |= ITYPE_U64; + ins->load_store.index_format = midgard_index_address_u64; return; } @@ -263,23 +259,26 @@ mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset, ins->src[1] = nir_ssa_index(match.A.def); ins->swizzle[1][0] = match.A.comp; ins->src_types[1] = nir_type_uint | match.A.def->bit_size; - } else - ins->load_store.arg_1 |= seg; + } else { + ins->load_store.bitsize_toggle = true; + ins->load_store.arg_comp = seg & 0x3; + ins->load_store.arg_reg = (seg >> 2) & 0x7; + } if (match.B.def) { ins->src[2] = nir_ssa_index(match.B.def); ins->swizzle[2][0] = match.B.comp; ins->src_types[2] = nir_type_uint | match.B.def->bit_size; } else - ins->load_store.arg_2 = 0x1E; + ins->load_store.index_reg = REGISTER_LDST_ZERO; if (force_sext) - match.type = ITYPE_I32; + match.type = midgard_index_address_s32; - ins->load_store.arg_1 |= match.type; + ins->load_store.index_format = match.type; assert(match.shift <= 7); - ins->load_store.arg_2 |= (match.shift) << 5; + ins->load_store.index_shift = match.shift; ins->constants.u32[0] = match.bias; } @@ -298,6 +297,6 @@ mir_set_ubo_offset(midgard_instruction *ins, nir_src *src, unsigned bias) ins->swizzle[2][i] = match.B.comp; } - ins->load_store.arg_2 |= (match.shift) << 5; + ins->load_store.index_shift = match.shift; ins->constants.u32[0] = match.bias + bias; } diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 9f945d0dc93..1e17f6ceef5 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -112,7 +112,7 @@ schedule_barrier(compiler_context *ctx) .swizzle = SWIZZLE_IDENTITY_4, \ .op = midgard_op_##name, \ .load_store = { \ - .address = address \ + .signed_offset = address \ } \ }; \ \ @@ -1164,20 +1164,20 @@ emit_ubo_read( if (indirect_offset) { ins.src[2] = nir_src_index(ctx, indirect_offset); ins.src_types[2] = nir_type_uint32; - ins.load_store.arg_2 = (indirect_shift << 5); + ins.load_store.index_shift = indirect_shift; /* X component for the whole swizzle to prevent register * pressure from ballooning from the extra components */ for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[2]); ++i) ins.swizzle[2][i] = 0; } else { - ins.load_store.arg_2 = 0x1E; + ins.load_store.index_reg = REGISTER_LDST_ZERO; } if (indirect_offset && indirect_offset->is_ssa && !indirect_shift) mir_set_ubo_offset(&ins, indirect_offset, offset); - ins.load_store.arg_1 = index; + midgard_pack_ubo_index_imm(&ins.load_store, index); return emit_mir_instruction(ctx, ins); } @@ -1274,12 +1274,6 @@ emit_atomic( nir_src *src_offset = nir_get_io_offset_src(instr); if (op == midgard_op_atomic_cmpxchg) { - for(unsigned i = 0; i < 2; ++i) - ins.swizzle[1][i] = i; - - ins.src[1] = is_image ? image_direct_address : nir_src_index(ctx, src_offset); - ins.src_types[1] = nir_type_uint64; - unsigned xchg_val_src = is_image ? 4 : 2; unsigned xchg_val = nir_src_index(ctx, &instr->src[xchg_val_src]); emit_explicit_constant(ctx, xchg_val, xchg_val); @@ -1288,8 +1282,18 @@ emit_atomic( ins.src_types[2] = type | bitsize; ins.src[3] = xchg_val; - if (is_shared) - ins.load_store.arg_1 |= 0x6E; + if (is_shared) { + ins.load_store.arg_reg = REGISTER_LDST_LOCAL_STORAGE_PTR; + ins.load_store.arg_comp = COMPONENT_Z; + ins.load_store.bitsize_toggle = true; + } else { + for(unsigned i = 0; i < 2; ++i) + ins.swizzle[1][i] = i; + + ins.src[1] = is_image ? image_direct_address : + nir_src_index(ctx, src_offset); + ins.src_types[1] = nir_type_uint64; + } } else if (is_image) { for(unsigned i = 0; i < 2; ++i) ins.swizzle[2][i] = i; @@ -1297,7 +1301,9 @@ emit_atomic( ins.src[2] = image_direct_address; ins.src_types[2] = nir_type_uint64; - ins.load_store.arg_1 |= 0x7E; + ins.load_store.arg_reg = REGISTER_LDST_ZERO; + ins.load_store.bitsize_toggle = true; + ins.load_store.index_format = midgard_index_address_u64; } else mir_set_offset(ctx, &ins, src_offset, is_shared ? LDST_SHARED : LDST_GLOBAL); @@ -1316,7 +1322,7 @@ emit_varying_read( /* XXX: Half-floats? */ /* TODO: swizzle, mask */ - midgard_instruction ins = m_ld_vary_32(dest, offset); + midgard_instruction ins = m_ld_vary_32(dest, PACK_LDST_ATTRIB_OFS(offset)); ins.mask = mask_of(nr_comp); ins.dest_type = type; @@ -1328,23 +1334,22 @@ emit_varying_read( for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[0]); ++i) ins.swizzle[0][i] = MIN2(i + component, COMPONENT_W); - midgard_varying_parameter p = { - .is_varying = 1, - .interpolation = midgard_interp_default, - .flat = flat, - }; - unsigned u; - memcpy(&u, &p, sizeof(p)); - ins.load_store.varying_parameters = u; + midgard_varying_params p = { + .flat_shading = flat, + .perspective_correction = 1, + .interpolate_sample = true, + }; + midgard_pack_varying_params(&ins.load_store, p); if (indirect_offset) { ins.src[2] = nir_src_index(ctx, indirect_offset); ins.src_types[2] = nir_type_uint32; } else - ins.load_store.arg_2 = 0x1E; + ins.load_store.index_reg = REGISTER_LDST_ZERO; - ins.load_store.arg_1 = 0x9E; + ins.load_store.arg_reg = REGISTER_LDST_ZERO; + ins.load_store.index_format = midgard_index_address_u32; /* Use the type appropriate load */ switch (type) { @@ -1402,16 +1407,16 @@ emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic) emit_explicit_constant(ctx, val, val); nir_alu_type type = nir_intrinsic_src_type(instr); - ins = st_image(type, val, address); + ins = st_image(type, val, PACK_LDST_ATTRIB_OFS(address)); nir_alu_type base_type = nir_alu_type_get_base_type(type); ins.src_types[0] = base_type | nir_src_bit_size(instr->src[3]); } else if (is_atomic) { /* emit lea_image */ unsigned dest = make_compiler_temp_reg(ctx); - ins = m_lea_image(dest, address); + ins = m_lea_image(dest, PACK_LDST_ATTRIB_OFS(address)); ins.mask = mask_of(2); /* 64-bit memory address */ } else { /* emit ld_image_* */ nir_alu_type type = nir_intrinsic_dest_type(instr); - ins = ld_image(type, nir_dest_index(&instr->dest), address); + ins = ld_image(type, nir_dest_index(&instr->dest), PACK_LDST_ATTRIB_OFS(address)); ins.mask = mask_of(nir_intrinsic_dest_components(instr)); ins.dest_type = type; } @@ -1420,7 +1425,7 @@ emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic) ins.src[1] = coord_reg; ins.src_types[1] = nir_type_uint16; if (nr_dim == 3 || is_array) { - ins.load_store.arg_1 |= 0x20; + ins.load_store.bitsize_toggle = true; } /* Image index reg */ @@ -1428,7 +1433,7 @@ emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic) ins.src[2] = nir_src_index(ctx, index); ins.src_types[2] = nir_type_uint32; } else - ins.load_store.arg_2 = 0x1E; + ins.load_store.index_reg = REGISTER_LDST_ZERO; emit_mir_instruction(ctx, ins); @@ -1441,9 +1446,9 @@ emit_attr_read( unsigned dest, unsigned offset, unsigned nr_comp, nir_alu_type t) { - midgard_instruction ins = m_ld_attr_32(dest, offset); - ins.load_store.arg_1 = 0x1E; - ins.load_store.arg_2 = 0x1E; + midgard_instruction ins = m_ld_attr_32(dest, PACK_LDST_ATTRIB_OFS(offset)); + ins.load_store.arg_reg = REGISTER_LDST_ZERO; + ins.load_store.index_reg = REGISTER_LDST_ZERO; ins.mask = mask_of(nr_comp); /* Use the type appropriate load */ @@ -1493,12 +1498,12 @@ compute_builtin_arg(nir_op op) { switch (op) { case nir_intrinsic_load_work_group_id: - return 0x14; + return REGISTER_LDST_GROUP_ID; case nir_intrinsic_load_local_invocation_id: - return 0x10; + return REGISTER_LDST_LOCAL_THREAD_ID; case nir_intrinsic_load_global_invocation_id: case nir_intrinsic_load_global_invocation_id_zero_base: - return 0x18; + return REGISTER_LDST_GLOBAL_THREAD_ID; default: unreachable("Invalid compute paramater loaded"); } @@ -1567,7 +1572,7 @@ emit_compute_builtin(compiler_context *ctx, nir_intrinsic_instr *instr) midgard_instruction ins = m_ldst_mov(reg, 0); ins.mask = mask_of(3); ins.swizzle[0][3] = COMPONENT_X; /* xyzx */ - ins.load_store.arg_1 = compute_builtin_arg(instr->intrinsic); + ins.load_store.arg_reg = compute_builtin_arg(instr->intrinsic); emit_mir_instruction(ctx, ins); } @@ -1598,8 +1603,8 @@ emit_special(compiler_context *ctx, nir_intrinsic_instr *instr, unsigned idx) midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0); ld.op = midgard_op_ld_special_32u; - ld.load_store.address = idx; - ld.load_store.arg_2 = 0x1E; + ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(idx); + ld.load_store.index_reg = REGISTER_LDST_ZERO; for (int i = 0; i < 4; ++i) ld.swizzle[0][i] = COMPONENT_X; @@ -1790,20 +1795,25 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0); - ld.load_store.arg_2 = output_load_rt_addr(ctx, instr); + unsigned target = output_load_rt_addr(ctx, instr); + ld.load_store.index_comp = target & 0x3; + ld.load_store.index_reg = target >> 2; if (nir_src_is_const(instr->src[0])) { - ld.load_store.arg_1 = nir_src_as_uint(instr->src[0]); + unsigned sample = nir_src_as_uint(instr->src[0]); + ld.load_store.arg_comp = sample & 0x3; + ld.load_store.arg_reg = sample >> 2; } else { - ld.load_store.varying_parameters = 2; + /* Enable sample index via register. */ + ld.load_store.signed_offset |= 1; ld.src[1] = nir_src_index(ctx, &instr->src[0]); ld.src_types[1] = nir_type_int32; } if (ctx->quirks & MIDGARD_OLD_BLEND) { ld.op = midgard_op_ld_special_32u; - ld.load_store.address = 16; - ld.load_store.arg_2 = 0x1E; + ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(16); + ld.load_store.index_reg = REGISTER_LDST_ZERO; } emit_mir_instruction(ctx, ld); @@ -1821,7 +1831,9 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) else ld = m_ld_tilebuffer_32f(reg, 0); - ld.load_store.arg_2 = output_load_rt_addr(ctx, instr); + unsigned index = output_load_rt_addr(ctx, instr); + ld.load_store.index_comp = index & 0x3; + ld.load_store.index_reg = index >> 2; for (unsigned c = 4; c < 16; ++c) ld.swizzle[0][c] = 0; @@ -1831,8 +1843,8 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) ld.op = midgard_op_ld_special_16f; else ld.op = midgard_op_ld_special_32f; - ld.load_store.address = 1; - ld.load_store.arg_2 = 0x1E; + ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(1); + ld.load_store.index_reg = REGISTER_LDST_ZERO; } emit_mir_instruction(ctx, ld); @@ -1924,9 +1936,10 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) unsigned dst_component = nir_intrinsic_component(instr); unsigned nr_comp = nir_src_num_components(instr->src[0]); - midgard_instruction st = m_st_vary_32(reg, offset); - st.load_store.arg_1 = 0x9E; - st.load_store.arg_2 = 0x1E; + midgard_instruction st = m_st_vary_32(reg, PACK_LDST_ATTRIB_OFS(offset)); + st.load_store.arg_reg = REGISTER_LDST_ZERO; + st.load_store.index_format = midgard_index_address_u32; + st.load_store.index_reg = REGISTER_LDST_ZERO; switch (nir_alu_type_get_base_type(nir_intrinsic_src_type(instr))) { case nir_type_uint: @@ -2211,7 +2224,7 @@ set_tex_coord(compiler_context *ctx, nir_tex_instr *instr, ld.src[1] = coords; ld.src_types[1] = ins->src_types[1]; ld.mask = 0x3; /* xy */ - ld.load_store.arg_1 = 0x20; + ld.load_store.bitsize_toggle = true; ld.swizzle[1][3] = COMPONENT_X; emit_mir_instruction(ctx, ld); diff --git a/src/panfrost/midgard/midgard_emit.c b/src/panfrost/midgard/midgard_emit.c index 47bd57e1018..057a1f49dca 100644 --- a/src/panfrost/midgard/midgard_emit.c +++ b/src/panfrost/midgard/midgard_emit.c @@ -44,6 +44,47 @@ mir_get_imod(bool shift, nir_alu_type T, bool half, bool scalar) return midgard_int_zero_extend; } +void +midgard_pack_ubo_index_imm(midgard_load_store_word *word, unsigned index) +{ + word->arg_comp = index & 0x3; + word->arg_reg = (index >> 2) & 0x7; + word->bitsize_toggle = (index >> 5) & 0x1; + word->index_format = (index >> 6) & 0x3; +} + +unsigned +midgard_unpack_ubo_index_imm(midgard_load_store_word word) +{ + unsigned ubo = word.arg_comp | + (word.arg_reg << 2) | + (word.bitsize_toggle << 5) | + (word.index_format << 6); + + return ubo; +} + +void midgard_pack_varying_params(midgard_load_store_word *word, midgard_varying_params p) +{ + /* Currently these parameters are not supported. */ + assert(p.direct_sample_pos_x == 0 && p.direct_sample_pos_y == 0); + + unsigned u; + memcpy(&u, &p, sizeof(p)); + + word->signed_offset |= u & 0x1FF; +} + +midgard_varying_params midgard_unpack_varying_params(midgard_load_store_word word) +{ + unsigned params = word.signed_offset & 0x1FF; + + midgard_varying_params p; + memcpy(&p, ¶ms, sizeof(p)); + + return p; +} + unsigned mir_pack_mod(midgard_instruction *ins, unsigned i, bool scalar) { @@ -578,15 +619,15 @@ load_store_from_instr(midgard_instruction *ins) } if (ins->src[1] != ~0) { - unsigned src = SSA_REG_FROM_FIXED(ins->src[1]); + ldst.arg_reg = SSA_REG_FROM_FIXED(ins->src[1]) - REGISTER_LDST_BASE; unsigned sz = nir_alu_type_get_type_size(ins->src_types[1]); - ldst.arg_1 |= midgard_ldst_reg(src, ins->swizzle[1][0], sz); + ldst.arg_comp = midgard_ldst_comp(ldst.arg_reg, ins->swizzle[1][0], sz); } if (ins->src[2] != ~0) { - unsigned src = SSA_REG_FROM_FIXED(ins->src[2]); + ldst.index_reg = SSA_REG_FROM_FIXED(ins->src[2]) - REGISTER_LDST_BASE; unsigned sz = nir_alu_type_get_type_size(ins->src_types[2]); - ldst.arg_2 |= midgard_ldst_reg(src, ins->swizzle[2][0], sz); + ldst.index_comp = midgard_ldst_comp(ldst.index_reg, ins->swizzle[2][0], sz); } return ldst; @@ -876,13 +917,22 @@ emit_alu_bundle(compiler_context *ctx, * over some other semantic distinction else well, but it unifies things in the * compiler so I don't mind. */ -static unsigned -mir_ldst_imm_shift(midgard_load_store_op op) +static void +mir_ldst_pack_offset(midgard_instruction *ins, int offset) { - if (OP_IS_UBO_READ(op)) - return 3; + /* These opcodes don't support offsets */ + assert(!OP_IS_REG2REG_LDST(ins->op) || + ins->op == midgard_op_lea || + ins->op == midgard_op_lea_image); + + if (OP_IS_UBO_READ(ins->op)) + ins->load_store.signed_offset |= PACK_LDST_UBO_OFS(offset); + else if (OP_IS_IMAGE(ins->op)) + ins->load_store.signed_offset |= PACK_LDST_ATTRIB_OFS(offset); + else if (OP_IS_SPECIAL(ins->op)) + ins->load_store.signed_offset |= PACK_LDST_SELECTOR_OFS(offset); else - return 1; + ins->load_store.signed_offset |= PACK_LDST_MEM_OFS(offset); } static enum mali_sampler_type @@ -931,22 +981,17 @@ emit_binary_bundle(compiler_context *ctx, /* Copy masks */ for (unsigned i = 0; i < bundle->instruction_count; ++i) { - mir_pack_ldst_mask(bundle->instructions[i]); + midgard_instruction *ins = bundle->instructions[i]; + mir_pack_ldst_mask(ins); /* Atomic ops don't use this swizzle the same way as other ops */ - if (!OP_IS_ATOMIC(bundle->instructions[i]->op)) - mir_pack_swizzle_ldst(bundle->instructions[i]); + if (!OP_IS_ATOMIC(ins->op)) + mir_pack_swizzle_ldst(ins); /* Apply a constant offset */ - unsigned offset = bundle->instructions[i]->constants.u32[0]; - - if (offset) { - unsigned shift = mir_ldst_imm_shift(bundle->instructions[i]->op); - unsigned upper_shift = 10 - shift; - - bundle->instructions[i]->load_store.varying_parameters |= (offset & ((1 << upper_shift) - 1)) << shift; - bundle->instructions[i]->load_store.address |= (offset >> upper_shift); - } + unsigned offset = ins->constants.u32[0]; + if (offset) + mir_ldst_pack_offset(ins, offset); } midgard_load_store_word ldst0 = diff --git a/src/panfrost/midgard/midgard_opt_perspective.c b/src/panfrost/midgard/midgard_opt_perspective.c index 97e9d338519..9a034ac7fbd 100644 --- a/src/panfrost/midgard/midgard_opt_perspective.c +++ b/src/panfrost/midgard/midgard_opt_perspective.c @@ -125,7 +125,7 @@ midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block) midgard_op_ldst_perspective_div_w : midgard_op_ldst_perspective_div_z, .load_store = { - .arg_1 = 0x20 + .bitsize_toggle = true, } }; @@ -167,9 +167,8 @@ midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block) /* We found it, so rewrite it to project. Grab the * modifier */ - unsigned param = v->load_store.varying_parameters; - midgard_varying_parameter p; - memcpy(&p, ¶m, sizeof(p)); + midgard_varying_params p = + midgard_unpack_varying_params(v->load_store); if (p.modifier != midgard_varying_mod_none) break; @@ -181,9 +180,7 @@ midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block) midgard_varying_mod_perspective_w : midgard_varying_mod_perspective_z; - /* Aliasing rules are annoying */ - memcpy(¶m, &p, sizeof(p)); - v->load_store.varying_parameters = param; + midgard_pack_varying_params(&v->load_store, p); /* Use the new destination */ v->dest = to; diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index f28e2a6919d..9a537270bbe 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -988,12 +988,14 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff) .swizzle = SWIZZLE_IDENTITY_4, .op = midgard_op_ld_ubo_128, .load_store = { - .arg_1 = ctx->info->push.words[idx].ubo, - .arg_2 = 0x1E, + .index_reg = REGISTER_LDST_ZERO, }, .constants.u32[0] = ctx->info->push.words[idx].offset }; + midgard_pack_ubo_index_imm(&ld.load_store, + ctx->info->push.words[idx].ubo); + mir_insert_instruction_before_scheduled(ctx, block, before, ld); mir_rewrite_index_src_single(ins, ins->src[i], temp); diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c index fb7c4283637..17ad243d68c 100644 --- a/src/panfrost/midgard/midgard_schedule.c +++ b/src/panfrost/midgard/midgard_schedule.c @@ -140,14 +140,17 @@ mir_create_dependency_graph(midgard_instruction **instructions, unsigned count, if (instructions[i]->type == TAG_LOAD_STORE_4 && load_store_opcode_props[instructions[i]->op].props & LDST_ADDRESS) { - unsigned type; - switch (instructions[i]->load_store.arg_1 & 0x3E) { - case LDST_SHARED: type = 0; break; - case LDST_SCRATCH: type = 1; break; - default: type = 2; break; + unsigned type = instructions[i]->load_store.arg_reg | + instructions[i]->load_store.arg_comp; + + unsigned idx; + switch (type) { + case LDST_SHARED: idx = 0; break; + case LDST_SCRATCH: idx = 1; break; + default: idx = 2; break; } - unsigned prev = prev_ldst[type]; + unsigned prev = prev_ldst[idx]; if (prev != ~0) { BITSET_WORD *dependents = instructions[prev]->dependents; @@ -160,7 +163,7 @@ mir_create_dependency_graph(midgard_instruction **instructions, unsigned count, instructions[i]->nr_dependencies++; } - prev_ldst[type] = i; + prev_ldst[idx] = i; } if (dest < node_count) { diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c index 6a02eaadd15..5a8dbafbd8f 100644 --- a/src/panfrost/midgard/mir_promote_uniforms.c +++ b/src/panfrost/midgard/mir_promote_uniforms.c @@ -73,7 +73,7 @@ mir_analyze_ranges(compiler_context *ctx) mir_foreach_instr_global(ctx, ins) { if (!mir_is_direct_aligned_ubo(ins)) continue; - unsigned ubo = ins->load_store.arg_1; + unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store); unsigned offset = ins->constants.u32[0] / 16; assert(ubo < res.nr_blocks); @@ -276,7 +276,7 @@ midgard_promote_uniforms(compiler_context *ctx) mir_foreach_instr_global_safe(ctx, ins) { if (!mir_is_direct_aligned_ubo(ins)) continue; - unsigned ubo = ins->load_store.arg_1; + unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store); unsigned qword = ins->constants.u32[0] / 16; /* Check if we decided to push this */