pan/mdg: properly encode/decode ldst instructions

Signed-off-by: Italo Nicola <italonicola@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9461>
This commit is contained in:
Italo Nicola 2021-04-16 10:23:48 +00:00 committed by Marge Bot
parent 4b933260d9
commit 6a12ea02fe
11 changed files with 465 additions and 247 deletions

View file

@ -516,9 +516,9 @@ void mir_insert_instruction_after_scheduled(compiler_context *ctx, midgard_block
void mir_flip(midgard_instruction *ins);
void mir_compute_temp_count(compiler_context *ctx);
#define LDST_GLOBAL 0x3E
#define LDST_SHARED 0x2E
#define LDST_SCRATCH 0x2A
#define LDST_GLOBAL (REGISTER_LDST_ZERO << 2)
#define LDST_SHARED ((REGISTER_LDST_LOCAL_STORAGE_PTR << 2) | COMPONENT_Z)
#define LDST_SCRATCH ((REGISTER_LDST_PC_SP << 2) | COMPONENT_Z)
void mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset, unsigned seg);
void mir_set_ubo_offset(midgard_instruction *ins, nir_src *src, unsigned bias);
@ -573,8 +573,11 @@ v_load_store_scratch(
.op = is_store ? midgard_op_st_128 : midgard_op_ld_128,
.load_store = {
/* For register spilling - to thread local storage */
.arg_1 = 0xEA,
.arg_2 = 0x1E,
.arg_reg = REGISTER_LDST_LOCAL_STORAGE_PTR,
.arg_comp = COMPONENT_Z,
.bitsize_toggle = true,
.index_format = midgard_index_address_u32,
.index_reg = REGISTER_LDST_ZERO,
},
/* If we spill an unspill, RA goes into an infinite loop */

View file

@ -304,6 +304,13 @@ static char *argmod_names[3] = {
".x2",
};
static char *index_format_names[4] = {
"",
".u64",
".u32",
".s32"
};
static void
print_outmod(FILE *fp, unsigned outmod, bool is_int)
{
@ -760,6 +767,21 @@ print_alu_mask(FILE *fp, uint8_t mask, unsigned bits, midgard_shrink_mode shrink
fprintf(fp, " /* %X */", mask);
}
/* TODO: 16-bit mode */
static void
print_ldst_mask(FILE *fp, unsigned mask, unsigned swizzle) {
fprintf(fp, ".");
for (unsigned i = 0; i < 4; ++i) {
bool write = (mask & (1 << i)) != 0;
unsigned c = (swizzle >> (i * 2)) & 3;
/* We can't omit the swizzle here since many ldst ops have a
* combined swizzle/writemask, and it would be ambiguous to not
* print the masked-out components. */
fprintf(fp, "%c", write ? components[c] : '~');
}
}
/* Prints the 4-bit masks found in texture and load/store ops, as opposed to
* the 8-bit masks found in (vector) ALU ops. Supports texture-style 16-bit
* mode as well, but not load/store-style 16-bit mode. */
@ -1235,41 +1257,40 @@ print_alu_word(FILE *fp, uint32_t *words, unsigned num_quad_words,
return branch_forward;
}
static void
/* TODO: how can we use this now that we know that these params can't be known
* before run time in every single case? Maybe just use it in the cases we can? */
UNUSED static void
print_varying_parameters(FILE *fp, midgard_load_store_word *word)
{
midgard_varying_parameter param;
unsigned v = word->varying_parameters;
memcpy(&param, &v, sizeof(param));
midgard_varying_params p = midgard_unpack_varying_params(*word);
if (param.is_varying) {
/* If a varying, there are qualifiers */
if (param.flat)
fprintf(fp, ".flat");
/* If a varying, there are qualifiers */
if (p.flat_shading)
fprintf(fp, ".flat");
if (param.interpolation != midgard_interp_default) {
if (param.interpolation == midgard_interp_centroid)
fprintf(fp, ".centroid");
else if (param.interpolation == midgard_interp_sample)
fprintf(fp, ".sample");
else
fprintf(fp, ".interp%d", param.interpolation);
}
if (p.perspective_correction)
fprintf(fp, ".correction");
if (param.modifier != midgard_varying_mod_none) {
if (param.modifier == midgard_varying_mod_perspective_w)
fprintf(fp, ".perspectivew");
else if (param.modifier == midgard_varying_mod_perspective_z)
fprintf(fp, ".perspectivez");
else
fprintf(fp, ".mod%d", param.modifier);
}
} else if (param.flat || param.interpolation || param.modifier) {
fprintf(fp, " /* is_varying not set but varying metadata attached */");
if (p.centroid_mapping)
fprintf(fp, ".centroid");
if (p.interpolate_sample)
fprintf(fp, ".sample");
switch (p.modifier) {
case midgard_varying_mod_perspective_y:
fprintf(fp, ".perspectivey");
break;
case midgard_varying_mod_perspective_z:
fprintf(fp, ".perspectivez");
break;
case midgard_varying_mod_perspective_w:
fprintf(fp, ".perspectivew");
break;
default:
unreachable("invalid varying modifier");
break;
}
if (param.zero0 || param.zero1 || param.zero2)
fprintf(fp, " /* zero tripped, %u %u %u */ ", param.zero0, param.zero1, param.zero2);
}
static bool
@ -1304,33 +1325,14 @@ is_op_attribute(unsigned op)
return false;
}
/* Helper to print integer well-formatted, but only when non-zero. */
static void
print_load_store_arg(FILE *fp, uint8_t arg, unsigned index)
midgard_print_sint(FILE *fp, int n)
{
/* Try to interpret as a register */
midgard_ldst_register_select sel;
memcpy(&sel, &arg, sizeof(arg));
/* If unknown is set, we're not sure what this is or how to
* interpret it. But if it's zero, we get it. */
if (sel.unknown) {
fprintf(fp, "0x%02X", arg);
return;
}
print_ldst_read_reg(fp, sel.select);
fprintf(fp, ".%c", components[sel.component]);
/* Only print a shift if it's non-zero. Shifts only make sense for the
* second index. For the first, we're not sure what it means yet */
if (index == 1) {
if (sel.shift)
fprintf(fp, " << %u", sel.shift);
} else {
fprintf(fp, " /* %X */", sel.shift);
}
if (n > 0)
fprintf(fp, " + 0x%X", n);
else if (n < 0)
fprintf(fp, " - 0x%X", -n);
}
static void
@ -1347,63 +1349,160 @@ print_load_store_instr(FILE *fp, uint64_t data)
print_ld_st_opcode(fp, word->op);
unsigned address = word->address;
if (word->op == midgard_op_trap) {
fprintf(fp, " 0x%X\n", word->signed_offset);
return;
}
/* Print opcode modifiers */
if (OP_USES_ATTRIB(word->op)) /* which attrib table? */
fprintf(fp, ".%s", (word->index_format >> 1) ? "secondary" : "primary");
else if (word->op == midgard_op_ld_cubemap_coords || OP_IS_PROJECTION(word->op))
fprintf(fp, ".%s", word->bitsize_toggle ? "f32" : "f16");
fprintf(fp, " ");
/* src/dest register */
if (!OP_IS_STORE(word->op)) {
print_ldst_write_reg(fp, word->reg);
/* Some opcodes don't have a swizzable src register, and
* instead the swizzle is applied before the result is written
* to the dest reg. For these ops, we combine the writemask
* with the swizzle to display them in the disasm compactly. */
unsigned swizzle = word->swizzle;
if ((OP_IS_REG2REG_LDST(word->op) &&
word->op != midgard_op_lea &&
word->op != midgard_op_lea_image) || OP_IS_ATOMIC(word->op))
swizzle = 0xE4;
print_ldst_mask(fp, word->mask, swizzle);
} else {
print_ldst_read_reg(fp, word->reg);
print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough,
midgard_reg_mode_32, 0xFF);
}
/* ld_ubo args */
if (OP_IS_UBO_READ(word->op)) {
if (word->signed_offset & 1) { /* buffer index imm */
unsigned imm = midgard_unpack_ubo_index_imm(*word);
fprintf(fp, ", %u", imm);
} else { /* buffer index from reg */
fprintf(fp, ", ");
print_ldst_read_reg(fp, word->arg_reg);
fprintf(fp, ".%c", components[word->arg_comp]);
}
fprintf(fp, ", ");
print_ldst_read_reg(fp, word->index_reg);
fprintf(fp, ".%c << %u", components[word->index_comp], word->index_shift);
midgard_print_sint(fp, UNPACK_LDST_UBO_OFS(word->signed_offset));
}
/* mem addr expression */
if (OP_HAS_ADDRESS(word->op)) {
fprintf(fp, ", [ ");
print_ldst_read_reg(fp, word->arg_reg);
fprintf(fp, ".u%d.%c",
word->bitsize_toggle ? 64 : 32, components[word->arg_comp]);
if ((word->op < midgard_op_atomic_cmpxchg ||
word->op > midgard_op_atomic_cmpxchg64_be) &&
word->index_reg != 0x7) {
fprintf(fp, " + (");
print_ldst_read_reg(fp, word->index_reg);
fprintf(fp, "%s.%c << %u)",
index_format_names[word->index_format],
components[word->index_comp], word->index_shift);
}
midgard_print_sint(fp, word->signed_offset);
fprintf(fp, " ]");
}
/* src reg for reg2reg ldst opcodes */
if (OP_IS_REG2REG_LDST(word->op)) {
fprintf(fp, ", ");
print_ldst_read_reg(fp, word->arg_reg);
print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough,
midgard_reg_mode_32, 0xFF);
}
/* atomic ops encode the source arg where the ldst swizzle would be. */
if (OP_IS_ATOMIC(word->op)) {
unsigned src = (word->swizzle >> 2) & 0x7;
unsigned src_comp = word->swizzle & 0x3;
fprintf(fp, ", ");
print_ldst_read_reg(fp, src);
fprintf(fp, ".%c", components[src_comp]);
}
/* CMPXCHG encodes the extra comparison arg where the index reg would be. */
if (word->op >= midgard_op_atomic_cmpxchg &&
word->op <= midgard_op_atomic_cmpxchg64_be) {
fprintf(fp, ", ");
print_ldst_read_reg(fp, word->index_reg);
fprintf(fp, ".%c", components[word->index_comp]);
}
/* index reg for attr/vary/images, selector for ld/st_special */
if (OP_IS_SPECIAL(word->op) || OP_USES_ATTRIB(word->op)) {
fprintf(fp, ", ");
print_ldst_read_reg(fp, word->index_reg);
fprintf(fp, ".%c << %u", components[word->index_comp], word->index_shift);
midgard_print_sint(fp, UNPACK_LDST_ATTRIB_OFS(word->signed_offset));
}
/* vertex reg for attrib/varying ops, coord reg for image ops */
if (OP_USES_ATTRIB(word->op)) {
fprintf(fp, ", ");
print_ldst_read_reg(fp, word->arg_reg);
if (OP_IS_IMAGE(word->op))
fprintf(fp, ".u%d", word->bitsize_toggle ? 64 : 32);
fprintf(fp, ".%c", components[word->arg_comp]);
if (word->bitsize_toggle && !OP_IS_IMAGE(word->op))
midgard_print_sint(fp, UNPACK_LDST_VERTEX_OFS(word->signed_offset));
}
/* TODO: properly decode format specifier for PACK/UNPACK ops */
if (OP_IS_PACK_COLOUR(word->op) || OP_IS_UNPACK_COLOUR(word->op)) {
fprintf(fp, ", ");
unsigned format_specifier = (word->signed_offset << 4) | word->index_shift;
fprintf(fp, "0x%X", format_specifier);
}
fprintf(fp, "\n");
/* Debugging stuff */
if (is_op_varying(word->op)) {
print_varying_parameters(fp, word);
/* Do some analysis: check if direct access */
/* Do some analysis: check if direct cacess */
if ((word->arg_2 == 0x1E) && midg_stats.varying_count >= 0)
update_stats(&midg_stats.varying_count, address);
if (word->index_reg == 0x7 && midg_stats.varying_count >= 0)
update_stats(&midg_stats.varying_count,
UNPACK_LDST_ATTRIB_OFS(word->signed_offset));
else
midg_stats.varying_count = -16;
} else if (is_op_attribute(word->op)) {
if ((word->arg_2 == 0x1E) && midg_stats.attribute_count >= 0)
update_stats(&midg_stats.attribute_count, address);
if (word->index_reg == 0x7 && midg_stats.attribute_count >= 0)
update_stats(&midg_stats.attribute_count,
UNPACK_LDST_ATTRIB_OFS(word->signed_offset));
else
midg_stats.attribute_count = -16;
}
fprintf(fp, " ");
if (!OP_IS_STORE(word->op))
print_ldst_write_reg(fp, word->reg);
else
print_ldst_read_reg(fp, word->reg);
print_mask_4(fp, word->mask, false);
if (!OP_IS_STORE(word->op))
update_dest(word->reg);
bool is_ubo = OP_IS_UBO_READ(word->op);
if (is_ubo) {
/* UBOs use their own addressing scheme */
int lo = word->varying_parameters >> 7;
int hi = word->address;
/* TODO: Combine fields logically */
address = (hi << 3) | lo;
}
fprintf(fp, ", %u", address);
print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough, midgard_reg_mode_32, 0xFF);
fprintf(fp, ", ");
if (is_ubo) {
fprintf(fp, "ubo%u", word->arg_1);
update_stats(&midg_stats.uniform_buffer_count, word->arg_1);
} else
print_load_store_arg(fp, word->arg_1, 0);
fprintf(fp, ", ");
print_load_store_arg(fp, word->arg_2, 1);
fprintf(fp, " /* %X */\n", word->varying_parameters);
if (OP_IS_UBO_READ(word->op))
update_stats(&midg_stats.uniform_buffer_count,
UNPACK_LDST_UBO_OFS(word->signed_offset));
midg_stats.instruction_count++;
}

View file

@ -355,9 +355,9 @@ mir_is_simple_swizzle(unsigned *swizzle, unsigned mask)
/* Packs a load/store argument */
static inline uint8_t
midgard_ldst_reg(unsigned reg, unsigned component, unsigned size)
midgard_ldst_comp(unsigned reg, unsigned component, unsigned size)
{
assert((reg == REGISTER_LDST_BASE) || (reg == REGISTER_LDST_BASE + 1));
assert((reg & ~1) == 0);
assert(size == 16 || size == 32 || size == 64);
/* Shift so everything is in terms of 32-bit units */
@ -369,17 +369,38 @@ midgard_ldst_reg(unsigned reg, unsigned component, unsigned size)
component >>= 1;
}
midgard_ldst_register_select sel = {
.component = component,
.select = reg - 26
};
uint8_t packed;
memcpy(&packed, &sel, sizeof(packed));
return packed;
return component;
}
/* Packs/unpacks a ubo index immediate */
void midgard_pack_ubo_index_imm(midgard_load_store_word *word, unsigned index);
unsigned midgard_unpack_ubo_index_imm(midgard_load_store_word word);
/* Packs/unpacks varying parameters.
* FIXME: IMPORTANT: We currently handle varying mode weirdly, by passing all
* parameters via an offset and using REGISTER_LDST_ZERO as base. This works
* for most parameters, but does not allow us to encode/decode direct sample
* position. */
void midgard_pack_varying_params(midgard_load_store_word *word, midgard_varying_params p);
midgard_varying_params midgard_unpack_varying_params(midgard_load_store_word word);
/* Load/store ops' displacement helpers.
* This is useful because different types of load/store ops have different
* displacement bitsize. */
#define UNPACK_LDST_ATTRIB_OFS(a) ((a) >> 9)
#define UNPACK_LDST_VERTEX_OFS(a) util_sign_extend((a) & 0x1FF, 9)
#define UNPACK_LDST_SELECTOR_OFS(a) ((a) >> 9)
#define UNPACK_LDST_UBO_OFS(a) ((a) >> 2)
#define UNPACK_LDST_MEM_OFS(a) ((a))
#define PACK_LDST_ATTRIB_OFS(a) ((a) << 9)
#define PACK_LDST_VERTEX_OFS(a) ((a) & 0x1FF)
#define PACK_LDST_SELECTOR_OFS(a) ((a) << 9)
#define PACK_LDST_UBO_OFS(a) ((a) << 2)
#define PACK_LDST_MEM_OFS(a) ((a))
static inline bool
midgard_is_branch_unit(unsigned unit)
{

View file

@ -664,31 +664,37 @@ typedef enum {
typedef enum {
midgard_varying_mod_none = 0,
/* Other values unknown */
/* Take the would-be result and divide all components by its z/w
/* Take the would-be result and divide all components by its y/z/w
* (perspective division baked in with the load) */
midgard_varying_mod_perspective_y = 1,
midgard_varying_mod_perspective_z = 2,
midgard_varying_mod_perspective_w = 3,
/* The result is a 64-bit cubemap descriptor to use with
* midgard_tex_op_normal or midgard_tex_op_gradient */
midgard_varying_mod_cubemap = 4,
} midgard_varying_modifier;
typedef struct
__attribute__((__packed__))
{
unsigned zero0 : 1; /* Always zero */
midgard_varying_modifier modifier : 3;
midgard_varying_modifier modifier : 2;
bool flat_shading : 1;
unsigned zero1: 1; /* Always zero */
/* These are ignored if flat_shading is enabled. */
bool perspective_correction : 1;
bool centroid_mapping : 1;
/* Varying qualifiers, zero if not a varying */
unsigned flat : 1;
unsigned is_varying : 1; /* Always one for varying, but maybe something else? */
midgard_interpolation interpolation : 2;
/* This is ignored if the shader only runs once per pixel. */
bool interpolate_sample : 1;
unsigned zero2 : 2; /* Always zero */
bool zero0 : 1; /* Always zero */
unsigned direct_sample_pos_x : 4;
unsigned direct_sample_pos_y : 4;
}
midgard_varying_parameter;
midgard_varying_params;
/* 8-bit register/etc selector for load/store ops */
typedef struct
@ -711,26 +717,56 @@ __attribute__((__packed__))
}
midgard_ldst_register_select;
typedef enum {
/* 0 is reserved */
midgard_index_address_u64 = 1,
midgard_index_address_u32 = 2,
midgard_index_address_s32 = 3,
} midgard_index_address_format;
typedef struct
__attribute__((__packed__))
{
midgard_load_store_op op : 8;
unsigned reg : 5;
unsigned mask : 4;
/* Source/dest reg */
unsigned reg : 5;
/* Generally is a writemask.
* For ST_ATTR and ST_TEX, unused.
* For other stores, each bit masks 1/4th of the output. */
unsigned mask : 4;
/* Swizzle for stores, but for atomics it encodes also the source
* register. This fits because atomics dont need a swizzle since they
* are not vectorized instructions. */
unsigned swizzle : 8;
/* Load/store ops can take two additional registers as arguments, but
* these are limited to load/store registers with only a few supported
* mask/swizzle combinations. The tradeoff is these are much more
* compact, requiring 8-bits each rather than 17-bits for a full
* reg/mask/swizzle. Usually (?) encoded as
* midgard_ldst_register_select. */
unsigned arg_1 : 8;
unsigned arg_2 : 8;
/* Arg reg, meaning changes according to each opcode */
unsigned arg_comp : 2;
unsigned arg_reg : 3;
unsigned varying_parameters : 10;
/* 64-bit address enable
* 32-bit data type enable for CUBEMAP and perspective div.
* Explicit indexing enable for LD_ATTR.
* 64-bit coordinate enable for LD_IMAGE. */
bool bitsize_toggle : 1;
unsigned address : 9;
/* These are mainly used for opcodes that have addresses.
* For cmpxchg, index_reg is used for the comparison value.
* For ops that access the attrib table, bit 1 encodes which table.
* For LD_VAR and LD/ST_ATTR, bit 0 enables dest/src type inferral. */
midgard_index_address_format index_format : 2;
unsigned index_comp : 2;
unsigned index_reg : 3;
unsigned index_shift : 4;
/* Generaly is a signed offset, but has different bitsize and starts at
* different bits depending on the opcode, LDST_*_DISPLACEMENT helpers
* are recommended when packing/unpacking this attribute.
* For LD_UBO, bit 0 enables ubo index immediate.
* For LD_TILEBUFFER_RAW, bit 0 disables sample index immediate. */
int signed_offset : 18;
}
midgard_load_store_word;

View file

@ -36,17 +36,11 @@
* This allows for fast indexing into arrays. This file tries to pattern match the offset in NIR with this form to reduce pressure on the ALU pipe.
*/
enum index_type {
ITYPE_U64 = 1 << 6,
ITYPE_U32 = 2 << 6, // zero-extend
ITYPE_I32 = 3 << 6, // sign-extend
};
struct mir_address {
nir_ssa_scalar A;
nir_ssa_scalar B;
enum index_type type;
midgard_index_address_format type;
unsigned shift;
unsigned bias;
};
@ -136,7 +130,7 @@ mir_match_u2u64(struct mir_address *address)
nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0);
address->B = arg;
address->type = ITYPE_U32;
address->type = midgard_index_address_u32;
}
/* Matches i2i64 and sets type */
@ -155,7 +149,7 @@ mir_match_i2i64(struct mir_address *address)
nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0);
address->B = arg;
address->type = ITYPE_I32;
address->type = midgard_index_address_s32;
}
/* Matches ishl to shift */
@ -210,7 +204,7 @@ mir_match_offset(nir_ssa_def *offset, bool first_free, bool extend)
{
struct mir_address address = {
.B = { .def = offset },
.type = extend ? ITYPE_U64 : ITYPE_U32,
.type = extend ? midgard_index_address_u64 : midgard_index_address_u32,
};
mir_match_mov(&address);
@ -243,14 +237,16 @@ mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset,
bool force_sext = (nir_src_bit_size(*offset) < 64);
if (!offset->is_ssa) {
ins->load_store.arg_1 |= seg;
ins->load_store.bitsize_toggle = true;
ins->load_store.arg_comp = seg & 0x3;
ins->load_store.arg_reg = (seg >> 2) & 0x7;
ins->src[2] = nir_src_index(ctx, offset);
ins->src_types[2] = nir_type_uint | nir_src_bit_size(*offset);
if (force_sext)
ins->load_store.arg_1 |= ITYPE_I32;
ins->load_store.index_format = midgard_index_address_s32;
else
ins->load_store.arg_1 |= ITYPE_U64;
ins->load_store.index_format = midgard_index_address_u64;
return;
}
@ -263,23 +259,26 @@ mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset,
ins->src[1] = nir_ssa_index(match.A.def);
ins->swizzle[1][0] = match.A.comp;
ins->src_types[1] = nir_type_uint | match.A.def->bit_size;
} else
ins->load_store.arg_1 |= seg;
} else {
ins->load_store.bitsize_toggle = true;
ins->load_store.arg_comp = seg & 0x3;
ins->load_store.arg_reg = (seg >> 2) & 0x7;
}
if (match.B.def) {
ins->src[2] = nir_ssa_index(match.B.def);
ins->swizzle[2][0] = match.B.comp;
ins->src_types[2] = nir_type_uint | match.B.def->bit_size;
} else
ins->load_store.arg_2 = 0x1E;
ins->load_store.index_reg = REGISTER_LDST_ZERO;
if (force_sext)
match.type = ITYPE_I32;
match.type = midgard_index_address_s32;
ins->load_store.arg_1 |= match.type;
ins->load_store.index_format = match.type;
assert(match.shift <= 7);
ins->load_store.arg_2 |= (match.shift) << 5;
ins->load_store.index_shift = match.shift;
ins->constants.u32[0] = match.bias;
}
@ -298,6 +297,6 @@ mir_set_ubo_offset(midgard_instruction *ins, nir_src *src, unsigned bias)
ins->swizzle[2][i] = match.B.comp;
}
ins->load_store.arg_2 |= (match.shift) << 5;
ins->load_store.index_shift = match.shift;
ins->constants.u32[0] = match.bias + bias;
}

View file

@ -112,7 +112,7 @@ schedule_barrier(compiler_context *ctx)
.swizzle = SWIZZLE_IDENTITY_4, \
.op = midgard_op_##name, \
.load_store = { \
.address = address \
.signed_offset = address \
} \
}; \
\
@ -1164,20 +1164,20 @@ emit_ubo_read(
if (indirect_offset) {
ins.src[2] = nir_src_index(ctx, indirect_offset);
ins.src_types[2] = nir_type_uint32;
ins.load_store.arg_2 = (indirect_shift << 5);
ins.load_store.index_shift = indirect_shift;
/* X component for the whole swizzle to prevent register
* pressure from ballooning from the extra components */
for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[2]); ++i)
ins.swizzle[2][i] = 0;
} else {
ins.load_store.arg_2 = 0x1E;
ins.load_store.index_reg = REGISTER_LDST_ZERO;
}
if (indirect_offset && indirect_offset->is_ssa && !indirect_shift)
mir_set_ubo_offset(&ins, indirect_offset, offset);
ins.load_store.arg_1 = index;
midgard_pack_ubo_index_imm(&ins.load_store, index);
return emit_mir_instruction(ctx, ins);
}
@ -1274,12 +1274,6 @@ emit_atomic(
nir_src *src_offset = nir_get_io_offset_src(instr);
if (op == midgard_op_atomic_cmpxchg) {
for(unsigned i = 0; i < 2; ++i)
ins.swizzle[1][i] = i;
ins.src[1] = is_image ? image_direct_address : nir_src_index(ctx, src_offset);
ins.src_types[1] = nir_type_uint64;
unsigned xchg_val_src = is_image ? 4 : 2;
unsigned xchg_val = nir_src_index(ctx, &instr->src[xchg_val_src]);
emit_explicit_constant(ctx, xchg_val, xchg_val);
@ -1288,8 +1282,18 @@ emit_atomic(
ins.src_types[2] = type | bitsize;
ins.src[3] = xchg_val;
if (is_shared)
ins.load_store.arg_1 |= 0x6E;
if (is_shared) {
ins.load_store.arg_reg = REGISTER_LDST_LOCAL_STORAGE_PTR;
ins.load_store.arg_comp = COMPONENT_Z;
ins.load_store.bitsize_toggle = true;
} else {
for(unsigned i = 0; i < 2; ++i)
ins.swizzle[1][i] = i;
ins.src[1] = is_image ? image_direct_address :
nir_src_index(ctx, src_offset);
ins.src_types[1] = nir_type_uint64;
}
} else if (is_image) {
for(unsigned i = 0; i < 2; ++i)
ins.swizzle[2][i] = i;
@ -1297,7 +1301,9 @@ emit_atomic(
ins.src[2] = image_direct_address;
ins.src_types[2] = nir_type_uint64;
ins.load_store.arg_1 |= 0x7E;
ins.load_store.arg_reg = REGISTER_LDST_ZERO;
ins.load_store.bitsize_toggle = true;
ins.load_store.index_format = midgard_index_address_u64;
} else
mir_set_offset(ctx, &ins, src_offset, is_shared ? LDST_SHARED : LDST_GLOBAL);
@ -1316,7 +1322,7 @@ emit_varying_read(
/* XXX: Half-floats? */
/* TODO: swizzle, mask */
midgard_instruction ins = m_ld_vary_32(dest, offset);
midgard_instruction ins = m_ld_vary_32(dest, PACK_LDST_ATTRIB_OFS(offset));
ins.mask = mask_of(nr_comp);
ins.dest_type = type;
@ -1328,23 +1334,22 @@ emit_varying_read(
for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[0]); ++i)
ins.swizzle[0][i] = MIN2(i + component, COMPONENT_W);
midgard_varying_parameter p = {
.is_varying = 1,
.interpolation = midgard_interp_default,
.flat = flat,
};
unsigned u;
memcpy(&u, &p, sizeof(p));
ins.load_store.varying_parameters = u;
midgard_varying_params p = {
.flat_shading = flat,
.perspective_correction = 1,
.interpolate_sample = true,
};
midgard_pack_varying_params(&ins.load_store, p);
if (indirect_offset) {
ins.src[2] = nir_src_index(ctx, indirect_offset);
ins.src_types[2] = nir_type_uint32;
} else
ins.load_store.arg_2 = 0x1E;
ins.load_store.index_reg = REGISTER_LDST_ZERO;
ins.load_store.arg_1 = 0x9E;
ins.load_store.arg_reg = REGISTER_LDST_ZERO;
ins.load_store.index_format = midgard_index_address_u32;
/* Use the type appropriate load */
switch (type) {
@ -1402,16 +1407,16 @@ emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic)
emit_explicit_constant(ctx, val, val);
nir_alu_type type = nir_intrinsic_src_type(instr);
ins = st_image(type, val, address);
ins = st_image(type, val, PACK_LDST_ATTRIB_OFS(address));
nir_alu_type base_type = nir_alu_type_get_base_type(type);
ins.src_types[0] = base_type | nir_src_bit_size(instr->src[3]);
} else if (is_atomic) { /* emit lea_image */
unsigned dest = make_compiler_temp_reg(ctx);
ins = m_lea_image(dest, address);
ins = m_lea_image(dest, PACK_LDST_ATTRIB_OFS(address));
ins.mask = mask_of(2); /* 64-bit memory address */
} else { /* emit ld_image_* */
nir_alu_type type = nir_intrinsic_dest_type(instr);
ins = ld_image(type, nir_dest_index(&instr->dest), address);
ins = ld_image(type, nir_dest_index(&instr->dest), PACK_LDST_ATTRIB_OFS(address));
ins.mask = mask_of(nir_intrinsic_dest_components(instr));
ins.dest_type = type;
}
@ -1420,7 +1425,7 @@ emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic)
ins.src[1] = coord_reg;
ins.src_types[1] = nir_type_uint16;
if (nr_dim == 3 || is_array) {
ins.load_store.arg_1 |= 0x20;
ins.load_store.bitsize_toggle = true;
}
/* Image index reg */
@ -1428,7 +1433,7 @@ emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic)
ins.src[2] = nir_src_index(ctx, index);
ins.src_types[2] = nir_type_uint32;
} else
ins.load_store.arg_2 = 0x1E;
ins.load_store.index_reg = REGISTER_LDST_ZERO;
emit_mir_instruction(ctx, ins);
@ -1441,9 +1446,9 @@ emit_attr_read(
unsigned dest, unsigned offset,
unsigned nr_comp, nir_alu_type t)
{
midgard_instruction ins = m_ld_attr_32(dest, offset);
ins.load_store.arg_1 = 0x1E;
ins.load_store.arg_2 = 0x1E;
midgard_instruction ins = m_ld_attr_32(dest, PACK_LDST_ATTRIB_OFS(offset));
ins.load_store.arg_reg = REGISTER_LDST_ZERO;
ins.load_store.index_reg = REGISTER_LDST_ZERO;
ins.mask = mask_of(nr_comp);
/* Use the type appropriate load */
@ -1493,12 +1498,12 @@ compute_builtin_arg(nir_op op)
{
switch (op) {
case nir_intrinsic_load_work_group_id:
return 0x14;
return REGISTER_LDST_GROUP_ID;
case nir_intrinsic_load_local_invocation_id:
return 0x10;
return REGISTER_LDST_LOCAL_THREAD_ID;
case nir_intrinsic_load_global_invocation_id:
case nir_intrinsic_load_global_invocation_id_zero_base:
return 0x18;
return REGISTER_LDST_GLOBAL_THREAD_ID;
default:
unreachable("Invalid compute paramater loaded");
}
@ -1567,7 +1572,7 @@ emit_compute_builtin(compiler_context *ctx, nir_intrinsic_instr *instr)
midgard_instruction ins = m_ldst_mov(reg, 0);
ins.mask = mask_of(3);
ins.swizzle[0][3] = COMPONENT_X; /* xyzx */
ins.load_store.arg_1 = compute_builtin_arg(instr->intrinsic);
ins.load_store.arg_reg = compute_builtin_arg(instr->intrinsic);
emit_mir_instruction(ctx, ins);
}
@ -1598,8 +1603,8 @@ emit_special(compiler_context *ctx, nir_intrinsic_instr *instr, unsigned idx)
midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0);
ld.op = midgard_op_ld_special_32u;
ld.load_store.address = idx;
ld.load_store.arg_2 = 0x1E;
ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(idx);
ld.load_store.index_reg = REGISTER_LDST_ZERO;
for (int i = 0; i < 4; ++i)
ld.swizzle[0][i] = COMPONENT_X;
@ -1790,20 +1795,25 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0);
ld.load_store.arg_2 = output_load_rt_addr(ctx, instr);
unsigned target = output_load_rt_addr(ctx, instr);
ld.load_store.index_comp = target & 0x3;
ld.load_store.index_reg = target >> 2;
if (nir_src_is_const(instr->src[0])) {
ld.load_store.arg_1 = nir_src_as_uint(instr->src[0]);
unsigned sample = nir_src_as_uint(instr->src[0]);
ld.load_store.arg_comp = sample & 0x3;
ld.load_store.arg_reg = sample >> 2;
} else {
ld.load_store.varying_parameters = 2;
/* Enable sample index via register. */
ld.load_store.signed_offset |= 1;
ld.src[1] = nir_src_index(ctx, &instr->src[0]);
ld.src_types[1] = nir_type_int32;
}
if (ctx->quirks & MIDGARD_OLD_BLEND) {
ld.op = midgard_op_ld_special_32u;
ld.load_store.address = 16;
ld.load_store.arg_2 = 0x1E;
ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(16);
ld.load_store.index_reg = REGISTER_LDST_ZERO;
}
emit_mir_instruction(ctx, ld);
@ -1821,7 +1831,9 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
else
ld = m_ld_tilebuffer_32f(reg, 0);
ld.load_store.arg_2 = output_load_rt_addr(ctx, instr);
unsigned index = output_load_rt_addr(ctx, instr);
ld.load_store.index_comp = index & 0x3;
ld.load_store.index_reg = index >> 2;
for (unsigned c = 4; c < 16; ++c)
ld.swizzle[0][c] = 0;
@ -1831,8 +1843,8 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
ld.op = midgard_op_ld_special_16f;
else
ld.op = midgard_op_ld_special_32f;
ld.load_store.address = 1;
ld.load_store.arg_2 = 0x1E;
ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(1);
ld.load_store.index_reg = REGISTER_LDST_ZERO;
}
emit_mir_instruction(ctx, ld);
@ -1924,9 +1936,10 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
unsigned dst_component = nir_intrinsic_component(instr);
unsigned nr_comp = nir_src_num_components(instr->src[0]);
midgard_instruction st = m_st_vary_32(reg, offset);
st.load_store.arg_1 = 0x9E;
st.load_store.arg_2 = 0x1E;
midgard_instruction st = m_st_vary_32(reg, PACK_LDST_ATTRIB_OFS(offset));
st.load_store.arg_reg = REGISTER_LDST_ZERO;
st.load_store.index_format = midgard_index_address_u32;
st.load_store.index_reg = REGISTER_LDST_ZERO;
switch (nir_alu_type_get_base_type(nir_intrinsic_src_type(instr))) {
case nir_type_uint:
@ -2211,7 +2224,7 @@ set_tex_coord(compiler_context *ctx, nir_tex_instr *instr,
ld.src[1] = coords;
ld.src_types[1] = ins->src_types[1];
ld.mask = 0x3; /* xy */
ld.load_store.arg_1 = 0x20;
ld.load_store.bitsize_toggle = true;
ld.swizzle[1][3] = COMPONENT_X;
emit_mir_instruction(ctx, ld);

View file

@ -44,6 +44,47 @@ mir_get_imod(bool shift, nir_alu_type T, bool half, bool scalar)
return midgard_int_zero_extend;
}
void
midgard_pack_ubo_index_imm(midgard_load_store_word *word, unsigned index)
{
word->arg_comp = index & 0x3;
word->arg_reg = (index >> 2) & 0x7;
word->bitsize_toggle = (index >> 5) & 0x1;
word->index_format = (index >> 6) & 0x3;
}
unsigned
midgard_unpack_ubo_index_imm(midgard_load_store_word word)
{
unsigned ubo = word.arg_comp |
(word.arg_reg << 2) |
(word.bitsize_toggle << 5) |
(word.index_format << 6);
return ubo;
}
void midgard_pack_varying_params(midgard_load_store_word *word, midgard_varying_params p)
{
/* Currently these parameters are not supported. */
assert(p.direct_sample_pos_x == 0 && p.direct_sample_pos_y == 0);
unsigned u;
memcpy(&u, &p, sizeof(p));
word->signed_offset |= u & 0x1FF;
}
midgard_varying_params midgard_unpack_varying_params(midgard_load_store_word word)
{
unsigned params = word.signed_offset & 0x1FF;
midgard_varying_params p;
memcpy(&p, &params, sizeof(p));
return p;
}
unsigned
mir_pack_mod(midgard_instruction *ins, unsigned i, bool scalar)
{
@ -578,15 +619,15 @@ load_store_from_instr(midgard_instruction *ins)
}
if (ins->src[1] != ~0) {
unsigned src = SSA_REG_FROM_FIXED(ins->src[1]);
ldst.arg_reg = SSA_REG_FROM_FIXED(ins->src[1]) - REGISTER_LDST_BASE;
unsigned sz = nir_alu_type_get_type_size(ins->src_types[1]);
ldst.arg_1 |= midgard_ldst_reg(src, ins->swizzle[1][0], sz);
ldst.arg_comp = midgard_ldst_comp(ldst.arg_reg, ins->swizzle[1][0], sz);
}
if (ins->src[2] != ~0) {
unsigned src = SSA_REG_FROM_FIXED(ins->src[2]);
ldst.index_reg = SSA_REG_FROM_FIXED(ins->src[2]) - REGISTER_LDST_BASE;
unsigned sz = nir_alu_type_get_type_size(ins->src_types[2]);
ldst.arg_2 |= midgard_ldst_reg(src, ins->swizzle[2][0], sz);
ldst.index_comp = midgard_ldst_comp(ldst.index_reg, ins->swizzle[2][0], sz);
}
return ldst;
@ -876,13 +917,22 @@ emit_alu_bundle(compiler_context *ctx,
* over some other semantic distinction else well, but it unifies things in the
* compiler so I don't mind. */
static unsigned
mir_ldst_imm_shift(midgard_load_store_op op)
static void
mir_ldst_pack_offset(midgard_instruction *ins, int offset)
{
if (OP_IS_UBO_READ(op))
return 3;
/* These opcodes don't support offsets */
assert(!OP_IS_REG2REG_LDST(ins->op) ||
ins->op == midgard_op_lea ||
ins->op == midgard_op_lea_image);
if (OP_IS_UBO_READ(ins->op))
ins->load_store.signed_offset |= PACK_LDST_UBO_OFS(offset);
else if (OP_IS_IMAGE(ins->op))
ins->load_store.signed_offset |= PACK_LDST_ATTRIB_OFS(offset);
else if (OP_IS_SPECIAL(ins->op))
ins->load_store.signed_offset |= PACK_LDST_SELECTOR_OFS(offset);
else
return 1;
ins->load_store.signed_offset |= PACK_LDST_MEM_OFS(offset);
}
static enum mali_sampler_type
@ -931,22 +981,17 @@ emit_binary_bundle(compiler_context *ctx,
/* Copy masks */
for (unsigned i = 0; i < bundle->instruction_count; ++i) {
mir_pack_ldst_mask(bundle->instructions[i]);
midgard_instruction *ins = bundle->instructions[i];
mir_pack_ldst_mask(ins);
/* Atomic ops don't use this swizzle the same way as other ops */
if (!OP_IS_ATOMIC(bundle->instructions[i]->op))
mir_pack_swizzle_ldst(bundle->instructions[i]);
if (!OP_IS_ATOMIC(ins->op))
mir_pack_swizzle_ldst(ins);
/* Apply a constant offset */
unsigned offset = bundle->instructions[i]->constants.u32[0];
if (offset) {
unsigned shift = mir_ldst_imm_shift(bundle->instructions[i]->op);
unsigned upper_shift = 10 - shift;
bundle->instructions[i]->load_store.varying_parameters |= (offset & ((1 << upper_shift) - 1)) << shift;
bundle->instructions[i]->load_store.address |= (offset >> upper_shift);
}
unsigned offset = ins->constants.u32[0];
if (offset)
mir_ldst_pack_offset(ins, offset);
}
midgard_load_store_word ldst0 =

View file

@ -125,7 +125,7 @@ midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block)
midgard_op_ldst_perspective_div_w :
midgard_op_ldst_perspective_div_z,
.load_store = {
.arg_1 = 0x20
.bitsize_toggle = true,
}
};
@ -167,9 +167,8 @@ midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block)
/* We found it, so rewrite it to project. Grab the
* modifier */
unsigned param = v->load_store.varying_parameters;
midgard_varying_parameter p;
memcpy(&p, &param, sizeof(p));
midgard_varying_params p =
midgard_unpack_varying_params(v->load_store);
if (p.modifier != midgard_varying_mod_none)
break;
@ -181,9 +180,7 @@ midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block)
midgard_varying_mod_perspective_w :
midgard_varying_mod_perspective_z;
/* Aliasing rules are annoying */
memcpy(&param, &p, sizeof(p));
v->load_store.varying_parameters = param;
midgard_pack_varying_params(&v->load_store, p);
/* Use the new destination */
v->dest = to;

View file

@ -988,12 +988,14 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
.swizzle = SWIZZLE_IDENTITY_4,
.op = midgard_op_ld_ubo_128,
.load_store = {
.arg_1 = ctx->info->push.words[idx].ubo,
.arg_2 = 0x1E,
.index_reg = REGISTER_LDST_ZERO,
},
.constants.u32[0] = ctx->info->push.words[idx].offset
};
midgard_pack_ubo_index_imm(&ld.load_store,
ctx->info->push.words[idx].ubo);
mir_insert_instruction_before_scheduled(ctx, block, before, ld);
mir_rewrite_index_src_single(ins, ins->src[i], temp);

View file

@ -140,14 +140,17 @@ mir_create_dependency_graph(midgard_instruction **instructions, unsigned count,
if (instructions[i]->type == TAG_LOAD_STORE_4 &&
load_store_opcode_props[instructions[i]->op].props & LDST_ADDRESS) {
unsigned type;
switch (instructions[i]->load_store.arg_1 & 0x3E) {
case LDST_SHARED: type = 0; break;
case LDST_SCRATCH: type = 1; break;
default: type = 2; break;
unsigned type = instructions[i]->load_store.arg_reg |
instructions[i]->load_store.arg_comp;
unsigned idx;
switch (type) {
case LDST_SHARED: idx = 0; break;
case LDST_SCRATCH: idx = 1; break;
default: idx = 2; break;
}
unsigned prev = prev_ldst[type];
unsigned prev = prev_ldst[idx];
if (prev != ~0) {
BITSET_WORD *dependents = instructions[prev]->dependents;
@ -160,7 +163,7 @@ mir_create_dependency_graph(midgard_instruction **instructions, unsigned count,
instructions[i]->nr_dependencies++;
}
prev_ldst[type] = i;
prev_ldst[idx] = i;
}
if (dest < node_count) {

View file

@ -73,7 +73,7 @@ mir_analyze_ranges(compiler_context *ctx)
mir_foreach_instr_global(ctx, ins) {
if (!mir_is_direct_aligned_ubo(ins)) continue;
unsigned ubo = ins->load_store.arg_1;
unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
unsigned offset = ins->constants.u32[0] / 16;
assert(ubo < res.nr_blocks);
@ -276,7 +276,7 @@ midgard_promote_uniforms(compiler_context *ctx)
mir_foreach_instr_global_safe(ctx, ins) {
if (!mir_is_direct_aligned_ubo(ins)) continue;
unsigned ubo = ins->load_store.arg_1;
unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
unsigned qword = ins->constants.u32[0] / 16;
/* Check if we decided to push this */