mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 20:10:17 +01:00
pan/mdg: properly encode/decode ldst instructions
Signed-off-by: Italo Nicola <italonicola@collabora.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9461>
This commit is contained in:
parent
4b933260d9
commit
6a12ea02fe
11 changed files with 465 additions and 247 deletions
|
|
@ -516,9 +516,9 @@ void mir_insert_instruction_after_scheduled(compiler_context *ctx, midgard_block
|
|||
void mir_flip(midgard_instruction *ins);
|
||||
void mir_compute_temp_count(compiler_context *ctx);
|
||||
|
||||
#define LDST_GLOBAL 0x3E
|
||||
#define LDST_SHARED 0x2E
|
||||
#define LDST_SCRATCH 0x2A
|
||||
#define LDST_GLOBAL (REGISTER_LDST_ZERO << 2)
|
||||
#define LDST_SHARED ((REGISTER_LDST_LOCAL_STORAGE_PTR << 2) | COMPONENT_Z)
|
||||
#define LDST_SCRATCH ((REGISTER_LDST_PC_SP << 2) | COMPONENT_Z)
|
||||
|
||||
void mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset, unsigned seg);
|
||||
void mir_set_ubo_offset(midgard_instruction *ins, nir_src *src, unsigned bias);
|
||||
|
|
@ -573,8 +573,11 @@ v_load_store_scratch(
|
|||
.op = is_store ? midgard_op_st_128 : midgard_op_ld_128,
|
||||
.load_store = {
|
||||
/* For register spilling - to thread local storage */
|
||||
.arg_1 = 0xEA,
|
||||
.arg_2 = 0x1E,
|
||||
.arg_reg = REGISTER_LDST_LOCAL_STORAGE_PTR,
|
||||
.arg_comp = COMPONENT_Z,
|
||||
.bitsize_toggle = true,
|
||||
.index_format = midgard_index_address_u32,
|
||||
.index_reg = REGISTER_LDST_ZERO,
|
||||
},
|
||||
|
||||
/* If we spill an unspill, RA goes into an infinite loop */
|
||||
|
|
|
|||
|
|
@ -304,6 +304,13 @@ static char *argmod_names[3] = {
|
|||
".x2",
|
||||
};
|
||||
|
||||
static char *index_format_names[4] = {
|
||||
"",
|
||||
".u64",
|
||||
".u32",
|
||||
".s32"
|
||||
};
|
||||
|
||||
static void
|
||||
print_outmod(FILE *fp, unsigned outmod, bool is_int)
|
||||
{
|
||||
|
|
@ -760,6 +767,21 @@ print_alu_mask(FILE *fp, uint8_t mask, unsigned bits, midgard_shrink_mode shrink
|
|||
fprintf(fp, " /* %X */", mask);
|
||||
}
|
||||
|
||||
/* TODO: 16-bit mode */
|
||||
static void
|
||||
print_ldst_mask(FILE *fp, unsigned mask, unsigned swizzle) {
|
||||
fprintf(fp, ".");
|
||||
|
||||
for (unsigned i = 0; i < 4; ++i) {
|
||||
bool write = (mask & (1 << i)) != 0;
|
||||
unsigned c = (swizzle >> (i * 2)) & 3;
|
||||
/* We can't omit the swizzle here since many ldst ops have a
|
||||
* combined swizzle/writemask, and it would be ambiguous to not
|
||||
* print the masked-out components. */
|
||||
fprintf(fp, "%c", write ? components[c] : '~');
|
||||
}
|
||||
}
|
||||
|
||||
/* Prints the 4-bit masks found in texture and load/store ops, as opposed to
|
||||
* the 8-bit masks found in (vector) ALU ops. Supports texture-style 16-bit
|
||||
* mode as well, but not load/store-style 16-bit mode. */
|
||||
|
|
@ -1235,41 +1257,40 @@ print_alu_word(FILE *fp, uint32_t *words, unsigned num_quad_words,
|
|||
return branch_forward;
|
||||
}
|
||||
|
||||
static void
|
||||
/* TODO: how can we use this now that we know that these params can't be known
|
||||
* before run time in every single case? Maybe just use it in the cases we can? */
|
||||
UNUSED static void
|
||||
print_varying_parameters(FILE *fp, midgard_load_store_word *word)
|
||||
{
|
||||
midgard_varying_parameter param;
|
||||
unsigned v = word->varying_parameters;
|
||||
memcpy(¶m, &v, sizeof(param));
|
||||
midgard_varying_params p = midgard_unpack_varying_params(*word);
|
||||
|
||||
if (param.is_varying) {
|
||||
/* If a varying, there are qualifiers */
|
||||
if (param.flat)
|
||||
fprintf(fp, ".flat");
|
||||
/* If a varying, there are qualifiers */
|
||||
if (p.flat_shading)
|
||||
fprintf(fp, ".flat");
|
||||
|
||||
if (param.interpolation != midgard_interp_default) {
|
||||
if (param.interpolation == midgard_interp_centroid)
|
||||
fprintf(fp, ".centroid");
|
||||
else if (param.interpolation == midgard_interp_sample)
|
||||
fprintf(fp, ".sample");
|
||||
else
|
||||
fprintf(fp, ".interp%d", param.interpolation);
|
||||
}
|
||||
if (p.perspective_correction)
|
||||
fprintf(fp, ".correction");
|
||||
|
||||
if (param.modifier != midgard_varying_mod_none) {
|
||||
if (param.modifier == midgard_varying_mod_perspective_w)
|
||||
fprintf(fp, ".perspectivew");
|
||||
else if (param.modifier == midgard_varying_mod_perspective_z)
|
||||
fprintf(fp, ".perspectivez");
|
||||
else
|
||||
fprintf(fp, ".mod%d", param.modifier);
|
||||
}
|
||||
} else if (param.flat || param.interpolation || param.modifier) {
|
||||
fprintf(fp, " /* is_varying not set but varying metadata attached */");
|
||||
if (p.centroid_mapping)
|
||||
fprintf(fp, ".centroid");
|
||||
|
||||
if (p.interpolate_sample)
|
||||
fprintf(fp, ".sample");
|
||||
|
||||
switch (p.modifier) {
|
||||
case midgard_varying_mod_perspective_y:
|
||||
fprintf(fp, ".perspectivey");
|
||||
break;
|
||||
case midgard_varying_mod_perspective_z:
|
||||
fprintf(fp, ".perspectivez");
|
||||
break;
|
||||
case midgard_varying_mod_perspective_w:
|
||||
fprintf(fp, ".perspectivew");
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid varying modifier");
|
||||
break;
|
||||
}
|
||||
|
||||
if (param.zero0 || param.zero1 || param.zero2)
|
||||
fprintf(fp, " /* zero tripped, %u %u %u */ ", param.zero0, param.zero1, param.zero2);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -1304,33 +1325,14 @@ is_op_attribute(unsigned op)
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Helper to print integer well-formatted, but only when non-zero. */
|
||||
static void
|
||||
print_load_store_arg(FILE *fp, uint8_t arg, unsigned index)
|
||||
midgard_print_sint(FILE *fp, int n)
|
||||
{
|
||||
/* Try to interpret as a register */
|
||||
midgard_ldst_register_select sel;
|
||||
memcpy(&sel, &arg, sizeof(arg));
|
||||
|
||||
/* If unknown is set, we're not sure what this is or how to
|
||||
* interpret it. But if it's zero, we get it. */
|
||||
|
||||
if (sel.unknown) {
|
||||
fprintf(fp, "0x%02X", arg);
|
||||
return;
|
||||
}
|
||||
|
||||
print_ldst_read_reg(fp, sel.select);
|
||||
fprintf(fp, ".%c", components[sel.component]);
|
||||
|
||||
/* Only print a shift if it's non-zero. Shifts only make sense for the
|
||||
* second index. For the first, we're not sure what it means yet */
|
||||
|
||||
if (index == 1) {
|
||||
if (sel.shift)
|
||||
fprintf(fp, " << %u", sel.shift);
|
||||
} else {
|
||||
fprintf(fp, " /* %X */", sel.shift);
|
||||
}
|
||||
if (n > 0)
|
||||
fprintf(fp, " + 0x%X", n);
|
||||
else if (n < 0)
|
||||
fprintf(fp, " - 0x%X", -n);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1347,63 +1349,160 @@ print_load_store_instr(FILE *fp, uint64_t data)
|
|||
|
||||
print_ld_st_opcode(fp, word->op);
|
||||
|
||||
unsigned address = word->address;
|
||||
if (word->op == midgard_op_trap) {
|
||||
fprintf(fp, " 0x%X\n", word->signed_offset);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Print opcode modifiers */
|
||||
|
||||
if (OP_USES_ATTRIB(word->op)) /* which attrib table? */
|
||||
fprintf(fp, ".%s", (word->index_format >> 1) ? "secondary" : "primary");
|
||||
else if (word->op == midgard_op_ld_cubemap_coords || OP_IS_PROJECTION(word->op))
|
||||
fprintf(fp, ".%s", word->bitsize_toggle ? "f32" : "f16");
|
||||
|
||||
fprintf(fp, " ");
|
||||
|
||||
/* src/dest register */
|
||||
|
||||
if (!OP_IS_STORE(word->op)) {
|
||||
print_ldst_write_reg(fp, word->reg);
|
||||
|
||||
/* Some opcodes don't have a swizzable src register, and
|
||||
* instead the swizzle is applied before the result is written
|
||||
* to the dest reg. For these ops, we combine the writemask
|
||||
* with the swizzle to display them in the disasm compactly. */
|
||||
unsigned swizzle = word->swizzle;
|
||||
if ((OP_IS_REG2REG_LDST(word->op) &&
|
||||
word->op != midgard_op_lea &&
|
||||
word->op != midgard_op_lea_image) || OP_IS_ATOMIC(word->op))
|
||||
swizzle = 0xE4;
|
||||
print_ldst_mask(fp, word->mask, swizzle);
|
||||
} else {
|
||||
print_ldst_read_reg(fp, word->reg);
|
||||
print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough,
|
||||
midgard_reg_mode_32, 0xFF);
|
||||
}
|
||||
|
||||
/* ld_ubo args */
|
||||
if (OP_IS_UBO_READ(word->op)) {
|
||||
if (word->signed_offset & 1) { /* buffer index imm */
|
||||
unsigned imm = midgard_unpack_ubo_index_imm(*word);
|
||||
fprintf(fp, ", %u", imm);
|
||||
} else { /* buffer index from reg */
|
||||
fprintf(fp, ", ");
|
||||
print_ldst_read_reg(fp, word->arg_reg);
|
||||
fprintf(fp, ".%c", components[word->arg_comp]);
|
||||
}
|
||||
|
||||
fprintf(fp, ", ");
|
||||
print_ldst_read_reg(fp, word->index_reg);
|
||||
fprintf(fp, ".%c << %u", components[word->index_comp], word->index_shift);
|
||||
midgard_print_sint(fp, UNPACK_LDST_UBO_OFS(word->signed_offset));
|
||||
}
|
||||
|
||||
/* mem addr expression */
|
||||
if (OP_HAS_ADDRESS(word->op)) {
|
||||
fprintf(fp, ", [ ");
|
||||
print_ldst_read_reg(fp, word->arg_reg);
|
||||
fprintf(fp, ".u%d.%c",
|
||||
word->bitsize_toggle ? 64 : 32, components[word->arg_comp]);
|
||||
|
||||
if ((word->op < midgard_op_atomic_cmpxchg ||
|
||||
word->op > midgard_op_atomic_cmpxchg64_be) &&
|
||||
word->index_reg != 0x7) {
|
||||
fprintf(fp, " + (");
|
||||
print_ldst_read_reg(fp, word->index_reg);
|
||||
fprintf(fp, "%s.%c << %u)",
|
||||
index_format_names[word->index_format],
|
||||
components[word->index_comp], word->index_shift);
|
||||
}
|
||||
|
||||
midgard_print_sint(fp, word->signed_offset);
|
||||
|
||||
fprintf(fp, " ]");
|
||||
}
|
||||
|
||||
/* src reg for reg2reg ldst opcodes */
|
||||
if (OP_IS_REG2REG_LDST(word->op)) {
|
||||
fprintf(fp, ", ");
|
||||
print_ldst_read_reg(fp, word->arg_reg);
|
||||
print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough,
|
||||
midgard_reg_mode_32, 0xFF);
|
||||
}
|
||||
|
||||
/* atomic ops encode the source arg where the ldst swizzle would be. */
|
||||
if (OP_IS_ATOMIC(word->op)) {
|
||||
unsigned src = (word->swizzle >> 2) & 0x7;
|
||||
unsigned src_comp = word->swizzle & 0x3;
|
||||
fprintf(fp, ", ");
|
||||
print_ldst_read_reg(fp, src);
|
||||
fprintf(fp, ".%c", components[src_comp]);
|
||||
}
|
||||
|
||||
/* CMPXCHG encodes the extra comparison arg where the index reg would be. */
|
||||
if (word->op >= midgard_op_atomic_cmpxchg &&
|
||||
word->op <= midgard_op_atomic_cmpxchg64_be) {
|
||||
fprintf(fp, ", ");
|
||||
print_ldst_read_reg(fp, word->index_reg);
|
||||
fprintf(fp, ".%c", components[word->index_comp]);
|
||||
}
|
||||
|
||||
/* index reg for attr/vary/images, selector for ld/st_special */
|
||||
if (OP_IS_SPECIAL(word->op) || OP_USES_ATTRIB(word->op)) {
|
||||
fprintf(fp, ", ");
|
||||
print_ldst_read_reg(fp, word->index_reg);
|
||||
fprintf(fp, ".%c << %u", components[word->index_comp], word->index_shift);
|
||||
midgard_print_sint(fp, UNPACK_LDST_ATTRIB_OFS(word->signed_offset));
|
||||
}
|
||||
|
||||
/* vertex reg for attrib/varying ops, coord reg for image ops */
|
||||
if (OP_USES_ATTRIB(word->op)) {
|
||||
fprintf(fp, ", ");
|
||||
print_ldst_read_reg(fp, word->arg_reg);
|
||||
|
||||
if (OP_IS_IMAGE(word->op))
|
||||
fprintf(fp, ".u%d", word->bitsize_toggle ? 64 : 32);
|
||||
|
||||
fprintf(fp, ".%c", components[word->arg_comp]);
|
||||
|
||||
if (word->bitsize_toggle && !OP_IS_IMAGE(word->op))
|
||||
midgard_print_sint(fp, UNPACK_LDST_VERTEX_OFS(word->signed_offset));
|
||||
}
|
||||
|
||||
/* TODO: properly decode format specifier for PACK/UNPACK ops */
|
||||
if (OP_IS_PACK_COLOUR(word->op) || OP_IS_UNPACK_COLOUR(word->op)) {
|
||||
fprintf(fp, ", ");
|
||||
unsigned format_specifier = (word->signed_offset << 4) | word->index_shift;
|
||||
fprintf(fp, "0x%X", format_specifier);
|
||||
}
|
||||
|
||||
fprintf(fp, "\n");
|
||||
|
||||
/* Debugging stuff */
|
||||
|
||||
if (is_op_varying(word->op)) {
|
||||
print_varying_parameters(fp, word);
|
||||
/* Do some analysis: check if direct access */
|
||||
|
||||
/* Do some analysis: check if direct cacess */
|
||||
|
||||
if ((word->arg_2 == 0x1E) && midg_stats.varying_count >= 0)
|
||||
update_stats(&midg_stats.varying_count, address);
|
||||
if (word->index_reg == 0x7 && midg_stats.varying_count >= 0)
|
||||
update_stats(&midg_stats.varying_count,
|
||||
UNPACK_LDST_ATTRIB_OFS(word->signed_offset));
|
||||
else
|
||||
midg_stats.varying_count = -16;
|
||||
} else if (is_op_attribute(word->op)) {
|
||||
if ((word->arg_2 == 0x1E) && midg_stats.attribute_count >= 0)
|
||||
update_stats(&midg_stats.attribute_count, address);
|
||||
if (word->index_reg == 0x7 && midg_stats.attribute_count >= 0)
|
||||
update_stats(&midg_stats.attribute_count,
|
||||
UNPACK_LDST_ATTRIB_OFS(word->signed_offset));
|
||||
else
|
||||
midg_stats.attribute_count = -16;
|
||||
}
|
||||
|
||||
fprintf(fp, " ");
|
||||
|
||||
if (!OP_IS_STORE(word->op))
|
||||
print_ldst_write_reg(fp, word->reg);
|
||||
else
|
||||
print_ldst_read_reg(fp, word->reg);
|
||||
|
||||
print_mask_4(fp, word->mask, false);
|
||||
|
||||
if (!OP_IS_STORE(word->op))
|
||||
update_dest(word->reg);
|
||||
|
||||
bool is_ubo = OP_IS_UBO_READ(word->op);
|
||||
|
||||
if (is_ubo) {
|
||||
/* UBOs use their own addressing scheme */
|
||||
|
||||
int lo = word->varying_parameters >> 7;
|
||||
int hi = word->address;
|
||||
|
||||
/* TODO: Combine fields logically */
|
||||
address = (hi << 3) | lo;
|
||||
}
|
||||
|
||||
fprintf(fp, ", %u", address);
|
||||
|
||||
print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough, midgard_reg_mode_32, 0xFF);
|
||||
|
||||
fprintf(fp, ", ");
|
||||
|
||||
if (is_ubo) {
|
||||
fprintf(fp, "ubo%u", word->arg_1);
|
||||
update_stats(&midg_stats.uniform_buffer_count, word->arg_1);
|
||||
} else
|
||||
print_load_store_arg(fp, word->arg_1, 0);
|
||||
|
||||
fprintf(fp, ", ");
|
||||
print_load_store_arg(fp, word->arg_2, 1);
|
||||
fprintf(fp, " /* %X */\n", word->varying_parameters);
|
||||
if (OP_IS_UBO_READ(word->op))
|
||||
update_stats(&midg_stats.uniform_buffer_count,
|
||||
UNPACK_LDST_UBO_OFS(word->signed_offset));
|
||||
|
||||
midg_stats.instruction_count++;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -355,9 +355,9 @@ mir_is_simple_swizzle(unsigned *swizzle, unsigned mask)
|
|||
/* Packs a load/store argument */
|
||||
|
||||
static inline uint8_t
|
||||
midgard_ldst_reg(unsigned reg, unsigned component, unsigned size)
|
||||
midgard_ldst_comp(unsigned reg, unsigned component, unsigned size)
|
||||
{
|
||||
assert((reg == REGISTER_LDST_BASE) || (reg == REGISTER_LDST_BASE + 1));
|
||||
assert((reg & ~1) == 0);
|
||||
assert(size == 16 || size == 32 || size == 64);
|
||||
|
||||
/* Shift so everything is in terms of 32-bit units */
|
||||
|
|
@ -369,17 +369,38 @@ midgard_ldst_reg(unsigned reg, unsigned component, unsigned size)
|
|||
component >>= 1;
|
||||
}
|
||||
|
||||
midgard_ldst_register_select sel = {
|
||||
.component = component,
|
||||
.select = reg - 26
|
||||
};
|
||||
|
||||
uint8_t packed;
|
||||
memcpy(&packed, &sel, sizeof(packed));
|
||||
|
||||
return packed;
|
||||
return component;
|
||||
}
|
||||
|
||||
/* Packs/unpacks a ubo index immediate */
|
||||
|
||||
void midgard_pack_ubo_index_imm(midgard_load_store_word *word, unsigned index);
|
||||
unsigned midgard_unpack_ubo_index_imm(midgard_load_store_word word);
|
||||
|
||||
/* Packs/unpacks varying parameters.
|
||||
* FIXME: IMPORTANT: We currently handle varying mode weirdly, by passing all
|
||||
* parameters via an offset and using REGISTER_LDST_ZERO as base. This works
|
||||
* for most parameters, but does not allow us to encode/decode direct sample
|
||||
* position. */
|
||||
void midgard_pack_varying_params(midgard_load_store_word *word, midgard_varying_params p);
|
||||
midgard_varying_params midgard_unpack_varying_params(midgard_load_store_word word);
|
||||
|
||||
/* Load/store ops' displacement helpers.
|
||||
* This is useful because different types of load/store ops have different
|
||||
* displacement bitsize. */
|
||||
|
||||
#define UNPACK_LDST_ATTRIB_OFS(a) ((a) >> 9)
|
||||
#define UNPACK_LDST_VERTEX_OFS(a) util_sign_extend((a) & 0x1FF, 9)
|
||||
#define UNPACK_LDST_SELECTOR_OFS(a) ((a) >> 9)
|
||||
#define UNPACK_LDST_UBO_OFS(a) ((a) >> 2)
|
||||
#define UNPACK_LDST_MEM_OFS(a) ((a))
|
||||
|
||||
#define PACK_LDST_ATTRIB_OFS(a) ((a) << 9)
|
||||
#define PACK_LDST_VERTEX_OFS(a) ((a) & 0x1FF)
|
||||
#define PACK_LDST_SELECTOR_OFS(a) ((a) << 9)
|
||||
#define PACK_LDST_UBO_OFS(a) ((a) << 2)
|
||||
#define PACK_LDST_MEM_OFS(a) ((a))
|
||||
|
||||
static inline bool
|
||||
midgard_is_branch_unit(unsigned unit)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -664,31 +664,37 @@ typedef enum {
|
|||
typedef enum {
|
||||
midgard_varying_mod_none = 0,
|
||||
|
||||
/* Other values unknown */
|
||||
|
||||
/* Take the would-be result and divide all components by its z/w
|
||||
/* Take the would-be result and divide all components by its y/z/w
|
||||
* (perspective division baked in with the load) */
|
||||
midgard_varying_mod_perspective_y = 1,
|
||||
midgard_varying_mod_perspective_z = 2,
|
||||
midgard_varying_mod_perspective_w = 3,
|
||||
|
||||
/* The result is a 64-bit cubemap descriptor to use with
|
||||
* midgard_tex_op_normal or midgard_tex_op_gradient */
|
||||
midgard_varying_mod_cubemap = 4,
|
||||
} midgard_varying_modifier;
|
||||
|
||||
typedef struct
|
||||
__attribute__((__packed__))
|
||||
{
|
||||
unsigned zero0 : 1; /* Always zero */
|
||||
midgard_varying_modifier modifier : 3;
|
||||
|
||||
midgard_varying_modifier modifier : 2;
|
||||
bool flat_shading : 1;
|
||||
|
||||
unsigned zero1: 1; /* Always zero */
|
||||
/* These are ignored if flat_shading is enabled. */
|
||||
bool perspective_correction : 1;
|
||||
bool centroid_mapping : 1;
|
||||
|
||||
/* Varying qualifiers, zero if not a varying */
|
||||
unsigned flat : 1;
|
||||
unsigned is_varying : 1; /* Always one for varying, but maybe something else? */
|
||||
midgard_interpolation interpolation : 2;
|
||||
/* This is ignored if the shader only runs once per pixel. */
|
||||
bool interpolate_sample : 1;
|
||||
|
||||
unsigned zero2 : 2; /* Always zero */
|
||||
bool zero0 : 1; /* Always zero */
|
||||
|
||||
unsigned direct_sample_pos_x : 4;
|
||||
unsigned direct_sample_pos_y : 4;
|
||||
}
|
||||
midgard_varying_parameter;
|
||||
midgard_varying_params;
|
||||
|
||||
/* 8-bit register/etc selector for load/store ops */
|
||||
typedef struct
|
||||
|
|
@ -711,26 +717,56 @@ __attribute__((__packed__))
|
|||
}
|
||||
midgard_ldst_register_select;
|
||||
|
||||
typedef enum {
|
||||
/* 0 is reserved */
|
||||
midgard_index_address_u64 = 1,
|
||||
midgard_index_address_u32 = 2,
|
||||
midgard_index_address_s32 = 3,
|
||||
} midgard_index_address_format;
|
||||
|
||||
typedef struct
|
||||
__attribute__((__packed__))
|
||||
{
|
||||
midgard_load_store_op op : 8;
|
||||
unsigned reg : 5;
|
||||
unsigned mask : 4;
|
||||
|
||||
/* Source/dest reg */
|
||||
unsigned reg : 5;
|
||||
|
||||
/* Generally is a writemask.
|
||||
* For ST_ATTR and ST_TEX, unused.
|
||||
* For other stores, each bit masks 1/4th of the output. */
|
||||
unsigned mask : 4;
|
||||
|
||||
/* Swizzle for stores, but for atomics it encodes also the source
|
||||
* register. This fits because atomics dont need a swizzle since they
|
||||
* are not vectorized instructions. */
|
||||
unsigned swizzle : 8;
|
||||
|
||||
/* Load/store ops can take two additional registers as arguments, but
|
||||
* these are limited to load/store registers with only a few supported
|
||||
* mask/swizzle combinations. The tradeoff is these are much more
|
||||
* compact, requiring 8-bits each rather than 17-bits for a full
|
||||
* reg/mask/swizzle. Usually (?) encoded as
|
||||
* midgard_ldst_register_select. */
|
||||
unsigned arg_1 : 8;
|
||||
unsigned arg_2 : 8;
|
||||
/* Arg reg, meaning changes according to each opcode */
|
||||
unsigned arg_comp : 2;
|
||||
unsigned arg_reg : 3;
|
||||
|
||||
unsigned varying_parameters : 10;
|
||||
/* 64-bit address enable
|
||||
* 32-bit data type enable for CUBEMAP and perspective div.
|
||||
* Explicit indexing enable for LD_ATTR.
|
||||
* 64-bit coordinate enable for LD_IMAGE. */
|
||||
bool bitsize_toggle : 1;
|
||||
|
||||
unsigned address : 9;
|
||||
/* These are mainly used for opcodes that have addresses.
|
||||
* For cmpxchg, index_reg is used for the comparison value.
|
||||
* For ops that access the attrib table, bit 1 encodes which table.
|
||||
* For LD_VAR and LD/ST_ATTR, bit 0 enables dest/src type inferral. */
|
||||
midgard_index_address_format index_format : 2;
|
||||
unsigned index_comp : 2;
|
||||
unsigned index_reg : 3;
|
||||
unsigned index_shift : 4;
|
||||
|
||||
/* Generaly is a signed offset, but has different bitsize and starts at
|
||||
* different bits depending on the opcode, LDST_*_DISPLACEMENT helpers
|
||||
* are recommended when packing/unpacking this attribute.
|
||||
* For LD_UBO, bit 0 enables ubo index immediate.
|
||||
* For LD_TILEBUFFER_RAW, bit 0 disables sample index immediate. */
|
||||
int signed_offset : 18;
|
||||
}
|
||||
midgard_load_store_word;
|
||||
|
||||
|
|
|
|||
|
|
@ -36,17 +36,11 @@
|
|||
* This allows for fast indexing into arrays. This file tries to pattern match the offset in NIR with this form to reduce pressure on the ALU pipe.
|
||||
*/
|
||||
|
||||
enum index_type {
|
||||
ITYPE_U64 = 1 << 6,
|
||||
ITYPE_U32 = 2 << 6, // zero-extend
|
||||
ITYPE_I32 = 3 << 6, // sign-extend
|
||||
};
|
||||
|
||||
struct mir_address {
|
||||
nir_ssa_scalar A;
|
||||
nir_ssa_scalar B;
|
||||
|
||||
enum index_type type;
|
||||
midgard_index_address_format type;
|
||||
unsigned shift;
|
||||
unsigned bias;
|
||||
};
|
||||
|
|
@ -136,7 +130,7 @@ mir_match_u2u64(struct mir_address *address)
|
|||
nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0);
|
||||
|
||||
address->B = arg;
|
||||
address->type = ITYPE_U32;
|
||||
address->type = midgard_index_address_u32;
|
||||
}
|
||||
|
||||
/* Matches i2i64 and sets type */
|
||||
|
|
@ -155,7 +149,7 @@ mir_match_i2i64(struct mir_address *address)
|
|||
nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0);
|
||||
|
||||
address->B = arg;
|
||||
address->type = ITYPE_I32;
|
||||
address->type = midgard_index_address_s32;
|
||||
}
|
||||
|
||||
/* Matches ishl to shift */
|
||||
|
|
@ -210,7 +204,7 @@ mir_match_offset(nir_ssa_def *offset, bool first_free, bool extend)
|
|||
{
|
||||
struct mir_address address = {
|
||||
.B = { .def = offset },
|
||||
.type = extend ? ITYPE_U64 : ITYPE_U32,
|
||||
.type = extend ? midgard_index_address_u64 : midgard_index_address_u32,
|
||||
};
|
||||
|
||||
mir_match_mov(&address);
|
||||
|
|
@ -243,14 +237,16 @@ mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset,
|
|||
bool force_sext = (nir_src_bit_size(*offset) < 64);
|
||||
|
||||
if (!offset->is_ssa) {
|
||||
ins->load_store.arg_1 |= seg;
|
||||
ins->load_store.bitsize_toggle = true;
|
||||
ins->load_store.arg_comp = seg & 0x3;
|
||||
ins->load_store.arg_reg = (seg >> 2) & 0x7;
|
||||
ins->src[2] = nir_src_index(ctx, offset);
|
||||
ins->src_types[2] = nir_type_uint | nir_src_bit_size(*offset);
|
||||
|
||||
if (force_sext)
|
||||
ins->load_store.arg_1 |= ITYPE_I32;
|
||||
ins->load_store.index_format = midgard_index_address_s32;
|
||||
else
|
||||
ins->load_store.arg_1 |= ITYPE_U64;
|
||||
ins->load_store.index_format = midgard_index_address_u64;
|
||||
|
||||
return;
|
||||
}
|
||||
|
|
@ -263,23 +259,26 @@ mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset,
|
|||
ins->src[1] = nir_ssa_index(match.A.def);
|
||||
ins->swizzle[1][0] = match.A.comp;
|
||||
ins->src_types[1] = nir_type_uint | match.A.def->bit_size;
|
||||
} else
|
||||
ins->load_store.arg_1 |= seg;
|
||||
} else {
|
||||
ins->load_store.bitsize_toggle = true;
|
||||
ins->load_store.arg_comp = seg & 0x3;
|
||||
ins->load_store.arg_reg = (seg >> 2) & 0x7;
|
||||
}
|
||||
|
||||
if (match.B.def) {
|
||||
ins->src[2] = nir_ssa_index(match.B.def);
|
||||
ins->swizzle[2][0] = match.B.comp;
|
||||
ins->src_types[2] = nir_type_uint | match.B.def->bit_size;
|
||||
} else
|
||||
ins->load_store.arg_2 = 0x1E;
|
||||
ins->load_store.index_reg = REGISTER_LDST_ZERO;
|
||||
|
||||
if (force_sext)
|
||||
match.type = ITYPE_I32;
|
||||
match.type = midgard_index_address_s32;
|
||||
|
||||
ins->load_store.arg_1 |= match.type;
|
||||
ins->load_store.index_format = match.type;
|
||||
|
||||
assert(match.shift <= 7);
|
||||
ins->load_store.arg_2 |= (match.shift) << 5;
|
||||
ins->load_store.index_shift = match.shift;
|
||||
|
||||
ins->constants.u32[0] = match.bias;
|
||||
}
|
||||
|
|
@ -298,6 +297,6 @@ mir_set_ubo_offset(midgard_instruction *ins, nir_src *src, unsigned bias)
|
|||
ins->swizzle[2][i] = match.B.comp;
|
||||
}
|
||||
|
||||
ins->load_store.arg_2 |= (match.shift) << 5;
|
||||
ins->load_store.index_shift = match.shift;
|
||||
ins->constants.u32[0] = match.bias + bias;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ schedule_barrier(compiler_context *ctx)
|
|||
.swizzle = SWIZZLE_IDENTITY_4, \
|
||||
.op = midgard_op_##name, \
|
||||
.load_store = { \
|
||||
.address = address \
|
||||
.signed_offset = address \
|
||||
} \
|
||||
}; \
|
||||
\
|
||||
|
|
@ -1164,20 +1164,20 @@ emit_ubo_read(
|
|||
if (indirect_offset) {
|
||||
ins.src[2] = nir_src_index(ctx, indirect_offset);
|
||||
ins.src_types[2] = nir_type_uint32;
|
||||
ins.load_store.arg_2 = (indirect_shift << 5);
|
||||
ins.load_store.index_shift = indirect_shift;
|
||||
|
||||
/* X component for the whole swizzle to prevent register
|
||||
* pressure from ballooning from the extra components */
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[2]); ++i)
|
||||
ins.swizzle[2][i] = 0;
|
||||
} else {
|
||||
ins.load_store.arg_2 = 0x1E;
|
||||
ins.load_store.index_reg = REGISTER_LDST_ZERO;
|
||||
}
|
||||
|
||||
if (indirect_offset && indirect_offset->is_ssa && !indirect_shift)
|
||||
mir_set_ubo_offset(&ins, indirect_offset, offset);
|
||||
|
||||
ins.load_store.arg_1 = index;
|
||||
midgard_pack_ubo_index_imm(&ins.load_store, index);
|
||||
|
||||
return emit_mir_instruction(ctx, ins);
|
||||
}
|
||||
|
|
@ -1274,12 +1274,6 @@ emit_atomic(
|
|||
nir_src *src_offset = nir_get_io_offset_src(instr);
|
||||
|
||||
if (op == midgard_op_atomic_cmpxchg) {
|
||||
for(unsigned i = 0; i < 2; ++i)
|
||||
ins.swizzle[1][i] = i;
|
||||
|
||||
ins.src[1] = is_image ? image_direct_address : nir_src_index(ctx, src_offset);
|
||||
ins.src_types[1] = nir_type_uint64;
|
||||
|
||||
unsigned xchg_val_src = is_image ? 4 : 2;
|
||||
unsigned xchg_val = nir_src_index(ctx, &instr->src[xchg_val_src]);
|
||||
emit_explicit_constant(ctx, xchg_val, xchg_val);
|
||||
|
|
@ -1288,8 +1282,18 @@ emit_atomic(
|
|||
ins.src_types[2] = type | bitsize;
|
||||
ins.src[3] = xchg_val;
|
||||
|
||||
if (is_shared)
|
||||
ins.load_store.arg_1 |= 0x6E;
|
||||
if (is_shared) {
|
||||
ins.load_store.arg_reg = REGISTER_LDST_LOCAL_STORAGE_PTR;
|
||||
ins.load_store.arg_comp = COMPONENT_Z;
|
||||
ins.load_store.bitsize_toggle = true;
|
||||
} else {
|
||||
for(unsigned i = 0; i < 2; ++i)
|
||||
ins.swizzle[1][i] = i;
|
||||
|
||||
ins.src[1] = is_image ? image_direct_address :
|
||||
nir_src_index(ctx, src_offset);
|
||||
ins.src_types[1] = nir_type_uint64;
|
||||
}
|
||||
} else if (is_image) {
|
||||
for(unsigned i = 0; i < 2; ++i)
|
||||
ins.swizzle[2][i] = i;
|
||||
|
|
@ -1297,7 +1301,9 @@ emit_atomic(
|
|||
ins.src[2] = image_direct_address;
|
||||
ins.src_types[2] = nir_type_uint64;
|
||||
|
||||
ins.load_store.arg_1 |= 0x7E;
|
||||
ins.load_store.arg_reg = REGISTER_LDST_ZERO;
|
||||
ins.load_store.bitsize_toggle = true;
|
||||
ins.load_store.index_format = midgard_index_address_u64;
|
||||
} else
|
||||
mir_set_offset(ctx, &ins, src_offset, is_shared ? LDST_SHARED : LDST_GLOBAL);
|
||||
|
||||
|
|
@ -1316,7 +1322,7 @@ emit_varying_read(
|
|||
/* XXX: Half-floats? */
|
||||
/* TODO: swizzle, mask */
|
||||
|
||||
midgard_instruction ins = m_ld_vary_32(dest, offset);
|
||||
midgard_instruction ins = m_ld_vary_32(dest, PACK_LDST_ATTRIB_OFS(offset));
|
||||
ins.mask = mask_of(nr_comp);
|
||||
ins.dest_type = type;
|
||||
|
||||
|
|
@ -1328,23 +1334,22 @@ emit_varying_read(
|
|||
for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[0]); ++i)
|
||||
ins.swizzle[0][i] = MIN2(i + component, COMPONENT_W);
|
||||
|
||||
midgard_varying_parameter p = {
|
||||
.is_varying = 1,
|
||||
.interpolation = midgard_interp_default,
|
||||
.flat = flat,
|
||||
};
|
||||
|
||||
unsigned u;
|
||||
memcpy(&u, &p, sizeof(p));
|
||||
ins.load_store.varying_parameters = u;
|
||||
midgard_varying_params p = {
|
||||
.flat_shading = flat,
|
||||
.perspective_correction = 1,
|
||||
.interpolate_sample = true,
|
||||
};
|
||||
midgard_pack_varying_params(&ins.load_store, p);
|
||||
|
||||
if (indirect_offset) {
|
||||
ins.src[2] = nir_src_index(ctx, indirect_offset);
|
||||
ins.src_types[2] = nir_type_uint32;
|
||||
} else
|
||||
ins.load_store.arg_2 = 0x1E;
|
||||
ins.load_store.index_reg = REGISTER_LDST_ZERO;
|
||||
|
||||
ins.load_store.arg_1 = 0x9E;
|
||||
ins.load_store.arg_reg = REGISTER_LDST_ZERO;
|
||||
ins.load_store.index_format = midgard_index_address_u32;
|
||||
|
||||
/* Use the type appropriate load */
|
||||
switch (type) {
|
||||
|
|
@ -1402,16 +1407,16 @@ emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic)
|
|||
emit_explicit_constant(ctx, val, val);
|
||||
|
||||
nir_alu_type type = nir_intrinsic_src_type(instr);
|
||||
ins = st_image(type, val, address);
|
||||
ins = st_image(type, val, PACK_LDST_ATTRIB_OFS(address));
|
||||
nir_alu_type base_type = nir_alu_type_get_base_type(type);
|
||||
ins.src_types[0] = base_type | nir_src_bit_size(instr->src[3]);
|
||||
} else if (is_atomic) { /* emit lea_image */
|
||||
unsigned dest = make_compiler_temp_reg(ctx);
|
||||
ins = m_lea_image(dest, address);
|
||||
ins = m_lea_image(dest, PACK_LDST_ATTRIB_OFS(address));
|
||||
ins.mask = mask_of(2); /* 64-bit memory address */
|
||||
} else { /* emit ld_image_* */
|
||||
nir_alu_type type = nir_intrinsic_dest_type(instr);
|
||||
ins = ld_image(type, nir_dest_index(&instr->dest), address);
|
||||
ins = ld_image(type, nir_dest_index(&instr->dest), PACK_LDST_ATTRIB_OFS(address));
|
||||
ins.mask = mask_of(nir_intrinsic_dest_components(instr));
|
||||
ins.dest_type = type;
|
||||
}
|
||||
|
|
@ -1420,7 +1425,7 @@ emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic)
|
|||
ins.src[1] = coord_reg;
|
||||
ins.src_types[1] = nir_type_uint16;
|
||||
if (nr_dim == 3 || is_array) {
|
||||
ins.load_store.arg_1 |= 0x20;
|
||||
ins.load_store.bitsize_toggle = true;
|
||||
}
|
||||
|
||||
/* Image index reg */
|
||||
|
|
@ -1428,7 +1433,7 @@ emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic)
|
|||
ins.src[2] = nir_src_index(ctx, index);
|
||||
ins.src_types[2] = nir_type_uint32;
|
||||
} else
|
||||
ins.load_store.arg_2 = 0x1E;
|
||||
ins.load_store.index_reg = REGISTER_LDST_ZERO;
|
||||
|
||||
emit_mir_instruction(ctx, ins);
|
||||
|
||||
|
|
@ -1441,9 +1446,9 @@ emit_attr_read(
|
|||
unsigned dest, unsigned offset,
|
||||
unsigned nr_comp, nir_alu_type t)
|
||||
{
|
||||
midgard_instruction ins = m_ld_attr_32(dest, offset);
|
||||
ins.load_store.arg_1 = 0x1E;
|
||||
ins.load_store.arg_2 = 0x1E;
|
||||
midgard_instruction ins = m_ld_attr_32(dest, PACK_LDST_ATTRIB_OFS(offset));
|
||||
ins.load_store.arg_reg = REGISTER_LDST_ZERO;
|
||||
ins.load_store.index_reg = REGISTER_LDST_ZERO;
|
||||
ins.mask = mask_of(nr_comp);
|
||||
|
||||
/* Use the type appropriate load */
|
||||
|
|
@ -1493,12 +1498,12 @@ compute_builtin_arg(nir_op op)
|
|||
{
|
||||
switch (op) {
|
||||
case nir_intrinsic_load_work_group_id:
|
||||
return 0x14;
|
||||
return REGISTER_LDST_GROUP_ID;
|
||||
case nir_intrinsic_load_local_invocation_id:
|
||||
return 0x10;
|
||||
return REGISTER_LDST_LOCAL_THREAD_ID;
|
||||
case nir_intrinsic_load_global_invocation_id:
|
||||
case nir_intrinsic_load_global_invocation_id_zero_base:
|
||||
return 0x18;
|
||||
return REGISTER_LDST_GLOBAL_THREAD_ID;
|
||||
default:
|
||||
unreachable("Invalid compute paramater loaded");
|
||||
}
|
||||
|
|
@ -1567,7 +1572,7 @@ emit_compute_builtin(compiler_context *ctx, nir_intrinsic_instr *instr)
|
|||
midgard_instruction ins = m_ldst_mov(reg, 0);
|
||||
ins.mask = mask_of(3);
|
||||
ins.swizzle[0][3] = COMPONENT_X; /* xyzx */
|
||||
ins.load_store.arg_1 = compute_builtin_arg(instr->intrinsic);
|
||||
ins.load_store.arg_reg = compute_builtin_arg(instr->intrinsic);
|
||||
emit_mir_instruction(ctx, ins);
|
||||
}
|
||||
|
||||
|
|
@ -1598,8 +1603,8 @@ emit_special(compiler_context *ctx, nir_intrinsic_instr *instr, unsigned idx)
|
|||
|
||||
midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0);
|
||||
ld.op = midgard_op_ld_special_32u;
|
||||
ld.load_store.address = idx;
|
||||
ld.load_store.arg_2 = 0x1E;
|
||||
ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(idx);
|
||||
ld.load_store.index_reg = REGISTER_LDST_ZERO;
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
ld.swizzle[0][i] = COMPONENT_X;
|
||||
|
|
@ -1790,20 +1795,25 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
|
|||
|
||||
midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0);
|
||||
|
||||
ld.load_store.arg_2 = output_load_rt_addr(ctx, instr);
|
||||
unsigned target = output_load_rt_addr(ctx, instr);
|
||||
ld.load_store.index_comp = target & 0x3;
|
||||
ld.load_store.index_reg = target >> 2;
|
||||
|
||||
if (nir_src_is_const(instr->src[0])) {
|
||||
ld.load_store.arg_1 = nir_src_as_uint(instr->src[0]);
|
||||
unsigned sample = nir_src_as_uint(instr->src[0]);
|
||||
ld.load_store.arg_comp = sample & 0x3;
|
||||
ld.load_store.arg_reg = sample >> 2;
|
||||
} else {
|
||||
ld.load_store.varying_parameters = 2;
|
||||
/* Enable sample index via register. */
|
||||
ld.load_store.signed_offset |= 1;
|
||||
ld.src[1] = nir_src_index(ctx, &instr->src[0]);
|
||||
ld.src_types[1] = nir_type_int32;
|
||||
}
|
||||
|
||||
if (ctx->quirks & MIDGARD_OLD_BLEND) {
|
||||
ld.op = midgard_op_ld_special_32u;
|
||||
ld.load_store.address = 16;
|
||||
ld.load_store.arg_2 = 0x1E;
|
||||
ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(16);
|
||||
ld.load_store.index_reg = REGISTER_LDST_ZERO;
|
||||
}
|
||||
|
||||
emit_mir_instruction(ctx, ld);
|
||||
|
|
@ -1821,7 +1831,9 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
|
|||
else
|
||||
ld = m_ld_tilebuffer_32f(reg, 0);
|
||||
|
||||
ld.load_store.arg_2 = output_load_rt_addr(ctx, instr);
|
||||
unsigned index = output_load_rt_addr(ctx, instr);
|
||||
ld.load_store.index_comp = index & 0x3;
|
||||
ld.load_store.index_reg = index >> 2;
|
||||
|
||||
for (unsigned c = 4; c < 16; ++c)
|
||||
ld.swizzle[0][c] = 0;
|
||||
|
|
@ -1831,8 +1843,8 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
|
|||
ld.op = midgard_op_ld_special_16f;
|
||||
else
|
||||
ld.op = midgard_op_ld_special_32f;
|
||||
ld.load_store.address = 1;
|
||||
ld.load_store.arg_2 = 0x1E;
|
||||
ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(1);
|
||||
ld.load_store.index_reg = REGISTER_LDST_ZERO;
|
||||
}
|
||||
|
||||
emit_mir_instruction(ctx, ld);
|
||||
|
|
@ -1924,9 +1936,10 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
|
|||
unsigned dst_component = nir_intrinsic_component(instr);
|
||||
unsigned nr_comp = nir_src_num_components(instr->src[0]);
|
||||
|
||||
midgard_instruction st = m_st_vary_32(reg, offset);
|
||||
st.load_store.arg_1 = 0x9E;
|
||||
st.load_store.arg_2 = 0x1E;
|
||||
midgard_instruction st = m_st_vary_32(reg, PACK_LDST_ATTRIB_OFS(offset));
|
||||
st.load_store.arg_reg = REGISTER_LDST_ZERO;
|
||||
st.load_store.index_format = midgard_index_address_u32;
|
||||
st.load_store.index_reg = REGISTER_LDST_ZERO;
|
||||
|
||||
switch (nir_alu_type_get_base_type(nir_intrinsic_src_type(instr))) {
|
||||
case nir_type_uint:
|
||||
|
|
@ -2211,7 +2224,7 @@ set_tex_coord(compiler_context *ctx, nir_tex_instr *instr,
|
|||
ld.src[1] = coords;
|
||||
ld.src_types[1] = ins->src_types[1];
|
||||
ld.mask = 0x3; /* xy */
|
||||
ld.load_store.arg_1 = 0x20;
|
||||
ld.load_store.bitsize_toggle = true;
|
||||
ld.swizzle[1][3] = COMPONENT_X;
|
||||
emit_mir_instruction(ctx, ld);
|
||||
|
||||
|
|
|
|||
|
|
@ -44,6 +44,47 @@ mir_get_imod(bool shift, nir_alu_type T, bool half, bool scalar)
|
|||
return midgard_int_zero_extend;
|
||||
}
|
||||
|
||||
void
|
||||
midgard_pack_ubo_index_imm(midgard_load_store_word *word, unsigned index)
|
||||
{
|
||||
word->arg_comp = index & 0x3;
|
||||
word->arg_reg = (index >> 2) & 0x7;
|
||||
word->bitsize_toggle = (index >> 5) & 0x1;
|
||||
word->index_format = (index >> 6) & 0x3;
|
||||
}
|
||||
|
||||
unsigned
|
||||
midgard_unpack_ubo_index_imm(midgard_load_store_word word)
|
||||
{
|
||||
unsigned ubo = word.arg_comp |
|
||||
(word.arg_reg << 2) |
|
||||
(word.bitsize_toggle << 5) |
|
||||
(word.index_format << 6);
|
||||
|
||||
return ubo;
|
||||
}
|
||||
|
||||
void midgard_pack_varying_params(midgard_load_store_word *word, midgard_varying_params p)
|
||||
{
|
||||
/* Currently these parameters are not supported. */
|
||||
assert(p.direct_sample_pos_x == 0 && p.direct_sample_pos_y == 0);
|
||||
|
||||
unsigned u;
|
||||
memcpy(&u, &p, sizeof(p));
|
||||
|
||||
word->signed_offset |= u & 0x1FF;
|
||||
}
|
||||
|
||||
midgard_varying_params midgard_unpack_varying_params(midgard_load_store_word word)
|
||||
{
|
||||
unsigned params = word.signed_offset & 0x1FF;
|
||||
|
||||
midgard_varying_params p;
|
||||
memcpy(&p, ¶ms, sizeof(p));
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
unsigned
|
||||
mir_pack_mod(midgard_instruction *ins, unsigned i, bool scalar)
|
||||
{
|
||||
|
|
@ -578,15 +619,15 @@ load_store_from_instr(midgard_instruction *ins)
|
|||
}
|
||||
|
||||
if (ins->src[1] != ~0) {
|
||||
unsigned src = SSA_REG_FROM_FIXED(ins->src[1]);
|
||||
ldst.arg_reg = SSA_REG_FROM_FIXED(ins->src[1]) - REGISTER_LDST_BASE;
|
||||
unsigned sz = nir_alu_type_get_type_size(ins->src_types[1]);
|
||||
ldst.arg_1 |= midgard_ldst_reg(src, ins->swizzle[1][0], sz);
|
||||
ldst.arg_comp = midgard_ldst_comp(ldst.arg_reg, ins->swizzle[1][0], sz);
|
||||
}
|
||||
|
||||
if (ins->src[2] != ~0) {
|
||||
unsigned src = SSA_REG_FROM_FIXED(ins->src[2]);
|
||||
ldst.index_reg = SSA_REG_FROM_FIXED(ins->src[2]) - REGISTER_LDST_BASE;
|
||||
unsigned sz = nir_alu_type_get_type_size(ins->src_types[2]);
|
||||
ldst.arg_2 |= midgard_ldst_reg(src, ins->swizzle[2][0], sz);
|
||||
ldst.index_comp = midgard_ldst_comp(ldst.index_reg, ins->swizzle[2][0], sz);
|
||||
}
|
||||
|
||||
return ldst;
|
||||
|
|
@ -876,13 +917,22 @@ emit_alu_bundle(compiler_context *ctx,
|
|||
* over some other semantic distinction else well, but it unifies things in the
|
||||
* compiler so I don't mind. */
|
||||
|
||||
static unsigned
|
||||
mir_ldst_imm_shift(midgard_load_store_op op)
|
||||
static void
|
||||
mir_ldst_pack_offset(midgard_instruction *ins, int offset)
|
||||
{
|
||||
if (OP_IS_UBO_READ(op))
|
||||
return 3;
|
||||
/* These opcodes don't support offsets */
|
||||
assert(!OP_IS_REG2REG_LDST(ins->op) ||
|
||||
ins->op == midgard_op_lea ||
|
||||
ins->op == midgard_op_lea_image);
|
||||
|
||||
if (OP_IS_UBO_READ(ins->op))
|
||||
ins->load_store.signed_offset |= PACK_LDST_UBO_OFS(offset);
|
||||
else if (OP_IS_IMAGE(ins->op))
|
||||
ins->load_store.signed_offset |= PACK_LDST_ATTRIB_OFS(offset);
|
||||
else if (OP_IS_SPECIAL(ins->op))
|
||||
ins->load_store.signed_offset |= PACK_LDST_SELECTOR_OFS(offset);
|
||||
else
|
||||
return 1;
|
||||
ins->load_store.signed_offset |= PACK_LDST_MEM_OFS(offset);
|
||||
}
|
||||
|
||||
static enum mali_sampler_type
|
||||
|
|
@ -931,22 +981,17 @@ emit_binary_bundle(compiler_context *ctx,
|
|||
/* Copy masks */
|
||||
|
||||
for (unsigned i = 0; i < bundle->instruction_count; ++i) {
|
||||
mir_pack_ldst_mask(bundle->instructions[i]);
|
||||
midgard_instruction *ins = bundle->instructions[i];
|
||||
mir_pack_ldst_mask(ins);
|
||||
|
||||
/* Atomic ops don't use this swizzle the same way as other ops */
|
||||
if (!OP_IS_ATOMIC(bundle->instructions[i]->op))
|
||||
mir_pack_swizzle_ldst(bundle->instructions[i]);
|
||||
if (!OP_IS_ATOMIC(ins->op))
|
||||
mir_pack_swizzle_ldst(ins);
|
||||
|
||||
/* Apply a constant offset */
|
||||
unsigned offset = bundle->instructions[i]->constants.u32[0];
|
||||
|
||||
if (offset) {
|
||||
unsigned shift = mir_ldst_imm_shift(bundle->instructions[i]->op);
|
||||
unsigned upper_shift = 10 - shift;
|
||||
|
||||
bundle->instructions[i]->load_store.varying_parameters |= (offset & ((1 << upper_shift) - 1)) << shift;
|
||||
bundle->instructions[i]->load_store.address |= (offset >> upper_shift);
|
||||
}
|
||||
unsigned offset = ins->constants.u32[0];
|
||||
if (offset)
|
||||
mir_ldst_pack_offset(ins, offset);
|
||||
}
|
||||
|
||||
midgard_load_store_word ldst0 =
|
||||
|
|
|
|||
|
|
@ -125,7 +125,7 @@ midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block)
|
|||
midgard_op_ldst_perspective_div_w :
|
||||
midgard_op_ldst_perspective_div_z,
|
||||
.load_store = {
|
||||
.arg_1 = 0x20
|
||||
.bitsize_toggle = true,
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -167,9 +167,8 @@ midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block)
|
|||
/* We found it, so rewrite it to project. Grab the
|
||||
* modifier */
|
||||
|
||||
unsigned param = v->load_store.varying_parameters;
|
||||
midgard_varying_parameter p;
|
||||
memcpy(&p, ¶m, sizeof(p));
|
||||
midgard_varying_params p =
|
||||
midgard_unpack_varying_params(v->load_store);
|
||||
|
||||
if (p.modifier != midgard_varying_mod_none)
|
||||
break;
|
||||
|
|
@ -181,9 +180,7 @@ midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block)
|
|||
midgard_varying_mod_perspective_w :
|
||||
midgard_varying_mod_perspective_z;
|
||||
|
||||
/* Aliasing rules are annoying */
|
||||
memcpy(¶m, &p, sizeof(p));
|
||||
v->load_store.varying_parameters = param;
|
||||
midgard_pack_varying_params(&v->load_store, p);
|
||||
|
||||
/* Use the new destination */
|
||||
v->dest = to;
|
||||
|
|
|
|||
|
|
@ -988,12 +988,14 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
|
|||
.swizzle = SWIZZLE_IDENTITY_4,
|
||||
.op = midgard_op_ld_ubo_128,
|
||||
.load_store = {
|
||||
.arg_1 = ctx->info->push.words[idx].ubo,
|
||||
.arg_2 = 0x1E,
|
||||
.index_reg = REGISTER_LDST_ZERO,
|
||||
},
|
||||
.constants.u32[0] = ctx->info->push.words[idx].offset
|
||||
};
|
||||
|
||||
midgard_pack_ubo_index_imm(&ld.load_store,
|
||||
ctx->info->push.words[idx].ubo);
|
||||
|
||||
mir_insert_instruction_before_scheduled(ctx, block, before, ld);
|
||||
|
||||
mir_rewrite_index_src_single(ins, ins->src[i], temp);
|
||||
|
|
|
|||
|
|
@ -140,14 +140,17 @@ mir_create_dependency_graph(midgard_instruction **instructions, unsigned count,
|
|||
if (instructions[i]->type == TAG_LOAD_STORE_4 &&
|
||||
load_store_opcode_props[instructions[i]->op].props & LDST_ADDRESS) {
|
||||
|
||||
unsigned type;
|
||||
switch (instructions[i]->load_store.arg_1 & 0x3E) {
|
||||
case LDST_SHARED: type = 0; break;
|
||||
case LDST_SCRATCH: type = 1; break;
|
||||
default: type = 2; break;
|
||||
unsigned type = instructions[i]->load_store.arg_reg |
|
||||
instructions[i]->load_store.arg_comp;
|
||||
|
||||
unsigned idx;
|
||||
switch (type) {
|
||||
case LDST_SHARED: idx = 0; break;
|
||||
case LDST_SCRATCH: idx = 1; break;
|
||||
default: idx = 2; break;
|
||||
}
|
||||
|
||||
unsigned prev = prev_ldst[type];
|
||||
unsigned prev = prev_ldst[idx];
|
||||
|
||||
if (prev != ~0) {
|
||||
BITSET_WORD *dependents = instructions[prev]->dependents;
|
||||
|
|
@ -160,7 +163,7 @@ mir_create_dependency_graph(midgard_instruction **instructions, unsigned count,
|
|||
instructions[i]->nr_dependencies++;
|
||||
}
|
||||
|
||||
prev_ldst[type] = i;
|
||||
prev_ldst[idx] = i;
|
||||
}
|
||||
|
||||
if (dest < node_count) {
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ mir_analyze_ranges(compiler_context *ctx)
|
|||
mir_foreach_instr_global(ctx, ins) {
|
||||
if (!mir_is_direct_aligned_ubo(ins)) continue;
|
||||
|
||||
unsigned ubo = ins->load_store.arg_1;
|
||||
unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
|
||||
unsigned offset = ins->constants.u32[0] / 16;
|
||||
|
||||
assert(ubo < res.nr_blocks);
|
||||
|
|
@ -276,7 +276,7 @@ midgard_promote_uniforms(compiler_context *ctx)
|
|||
mir_foreach_instr_global_safe(ctx, ins) {
|
||||
if (!mir_is_direct_aligned_ubo(ins)) continue;
|
||||
|
||||
unsigned ubo = ins->load_store.arg_1;
|
||||
unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
|
||||
unsigned qword = ins->constants.u32[0] / 16;
|
||||
|
||||
/* Check if we decided to push this */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue