mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-19 17:58:09 +02:00
hw matches NIR well - just an extra destination on the texture instruction. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33682>
1208 lines
41 KiB
C
1208 lines
41 KiB
C
/*
|
|
* Copyright 2021 Alyssa Rosenzweig
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "agx_compiler.h"
|
|
#include "agx_opcodes.h"
|
|
|
|
/* Binary patches needed for branch offsets */
|
|
struct agx_branch_fixup {
|
|
/* Offset into the binary to patch */
|
|
off_t offset;
|
|
|
|
/* Value to patch with will be block->offset */
|
|
agx_block *block;
|
|
|
|
/* If true, skips to the last instruction of the target block */
|
|
bool skip_to_end;
|
|
};
|
|
|
|
static void
|
|
pack_assert_internal(const agx_instr *I, bool condition, const char *msg)
|
|
{
|
|
if (!condition) {
|
|
fprintf(stderr, "Packing assertion failed for instruction:\n\n");
|
|
agx_print_instr(I, stderr);
|
|
fprintf(stderr, "\n%s\n", msg);
|
|
abort();
|
|
}
|
|
}
|
|
|
|
#define pack_assert_msg(I, cond, msg) \
|
|
pack_assert_internal(I, cond, msg ": " #cond)
|
|
|
|
#define pack_assert(I, cond) pack_assert_internal(I, cond, #cond)
|
|
|
|
static void
|
|
assert_register_is_aligned(const agx_instr *I, agx_index reg)
|
|
{
|
|
pack_assert_msg(I, reg.type == AGX_INDEX_REGISTER, "expecting a register");
|
|
|
|
switch (reg.size) {
|
|
case AGX_SIZE_16:
|
|
return;
|
|
case AGX_SIZE_32:
|
|
pack_assert_msg(I, (reg.value & 1) == 0, "unaligned reg");
|
|
return;
|
|
case AGX_SIZE_64:
|
|
pack_assert_msg(I, (reg.value & 3) == 0, "unaligned reg");
|
|
return;
|
|
}
|
|
|
|
unreachable("Invalid register size");
|
|
}
|
|
|
|
/* Texturing has its own operands */
|
|
static unsigned
|
|
agx_pack_sample_coords(const agx_instr *I, agx_index index, bool *flag,
|
|
bool *is_16)
|
|
{
|
|
/* TODO: Do we have a use case for 16-bit coords? */
|
|
pack_assert_msg(I, index.size == AGX_SIZE_32, "32-bit coordinates");
|
|
pack_assert_msg(I, index.value < 0x100, "coordinate register bound");
|
|
|
|
*is_16 = false;
|
|
*flag = index.discard;
|
|
return index.value;
|
|
}
|
|
|
|
static unsigned
|
|
agx_pack_texture(const agx_instr *I, agx_index base, agx_index index,
|
|
unsigned *packed_base, unsigned *flag)
|
|
{
|
|
if (base.type == AGX_INDEX_IMMEDIATE) {
|
|
pack_assert(I, base.value == 0);
|
|
|
|
/* Texture state registers */
|
|
*packed_base = 0;
|
|
|
|
if (index.type == AGX_INDEX_REGISTER) {
|
|
pack_assert(I, index.size == AGX_SIZE_16);
|
|
*flag = 1;
|
|
} else {
|
|
pack_assert(I, index.type == AGX_INDEX_IMMEDIATE);
|
|
*flag = 0;
|
|
}
|
|
} else {
|
|
pack_assert(I, base.type == AGX_INDEX_UNIFORM);
|
|
pack_assert(I, base.size == AGX_SIZE_64);
|
|
pack_assert(I, (base.value & 3) == 0);
|
|
pack_assert(I, index.size == AGX_SIZE_32);
|
|
|
|
/* Bindless */
|
|
*packed_base = base.value >> 2;
|
|
*flag = 3;
|
|
}
|
|
|
|
return index.value;
|
|
}
|
|
|
|
static unsigned
|
|
agx_pack_sampler(const agx_instr *I, agx_index index, bool *flag)
|
|
{
|
|
if (index.type == AGX_INDEX_REGISTER) {
|
|
pack_assert(I, index.size == AGX_SIZE_16);
|
|
*flag = 1;
|
|
} else {
|
|
pack_assert(I, index.type == AGX_INDEX_IMMEDIATE);
|
|
*flag = 0;
|
|
}
|
|
|
|
return index.value;
|
|
}
|
|
|
|
static unsigned
|
|
agx_pack_sample_compare_offset(const agx_instr *I, agx_index index)
|
|
{
|
|
if (index.type == AGX_INDEX_NULL)
|
|
return 0;
|
|
|
|
pack_assert(I, index.size == AGX_SIZE_32);
|
|
pack_assert(I, index.value < 0x100);
|
|
assert_register_is_aligned(I, index);
|
|
return index.value;
|
|
}
|
|
|
|
static unsigned
|
|
agx_pack_lod(const agx_instr *I, agx_index index, unsigned *lod_mode)
|
|
{
|
|
/* For automatic LOD, the LOD field is unused. Assert as much. */
|
|
if ((*lod_mode) == AGX_LOD_MODE_AUTO_LOD) {
|
|
pack_assert(I, index.type == AGX_INDEX_IMMEDIATE);
|
|
pack_assert(I, index.value == 0);
|
|
return 0;
|
|
}
|
|
|
|
if (index.type == AGX_INDEX_UNIFORM) {
|
|
/* Translate LOD mode from register mode to uniform mode */
|
|
pack_assert(I,
|
|
((*lod_mode) & BITFIELD_BIT(2)) && "must start as reg mode");
|
|
*lod_mode = (*lod_mode) & ~BITFIELD_BIT(2);
|
|
pack_assert(I, index.value < 0x200);
|
|
} else {
|
|
/* Otherwise must be registers */
|
|
pack_assert(I, index.type == AGX_INDEX_REGISTER);
|
|
pack_assert(I, index.value < 0x100);
|
|
}
|
|
|
|
return index.value;
|
|
}
|
|
|
|
static unsigned
|
|
agx_pack_pbe_source(const agx_instr *I, agx_index index, bool *flag)
|
|
{
|
|
pack_assert(I, index.size == AGX_SIZE_16 || index.size == AGX_SIZE_32);
|
|
assert_register_is_aligned(I, index);
|
|
|
|
*flag = (index.size == AGX_SIZE_32);
|
|
return index.value;
|
|
}
|
|
|
|
static unsigned
|
|
agx_pack_pbe_lod(const agx_instr *I, agx_index index, bool *flag)
|
|
{
|
|
pack_assert(I, index.size == AGX_SIZE_16);
|
|
|
|
if (index.type == AGX_INDEX_IMMEDIATE)
|
|
*flag = true;
|
|
else if (index.type == AGX_INDEX_REGISTER)
|
|
*flag = false;
|
|
else
|
|
unreachable("Invalid PBE LOD type");
|
|
|
|
return index.value;
|
|
}
|
|
|
|
/* Load/stores have their own operands */
|
|
|
|
static unsigned
|
|
agx_pack_memory_reg(const agx_instr *I, agx_index index, bool *flag)
|
|
{
|
|
assert_register_is_aligned(I, index);
|
|
|
|
*flag = (index.size >= AGX_SIZE_32);
|
|
return index.value;
|
|
}
|
|
|
|
static unsigned
|
|
agx_pack_memory_base(const agx_instr *I, agx_index index, bool *flag)
|
|
{
|
|
pack_assert(I, index.size == AGX_SIZE_64);
|
|
pack_assert(I, (index.value & 1) == 0);
|
|
|
|
/* Can't seem to access high uniforms from memory instructions */
|
|
pack_assert(I, index.value < 0x100);
|
|
|
|
if (index.type == AGX_INDEX_UNIFORM) {
|
|
*flag = 1;
|
|
} else {
|
|
pack_assert(I, index.type == AGX_INDEX_REGISTER);
|
|
*flag = 0;
|
|
}
|
|
|
|
return index.value;
|
|
}
|
|
|
|
static unsigned
|
|
agx_pack_memory_index(const agx_instr *I, agx_index index, bool *flag)
|
|
{
|
|
if (index.type == AGX_INDEX_IMMEDIATE) {
|
|
pack_assert(I, index.value < 0x10000);
|
|
*flag = 1;
|
|
|
|
return index.value;
|
|
} else {
|
|
pack_assert(I, index.type == AGX_INDEX_REGISTER);
|
|
pack_assert(I, index.size == AGX_SIZE_32);
|
|
pack_assert(I, (index.value & 1) == 0);
|
|
pack_assert(I, index.value < 0x100);
|
|
|
|
*flag = 0;
|
|
return index.value;
|
|
}
|
|
}
|
|
|
|
static uint16_t
|
|
agx_pack_local_base(const agx_instr *I, agx_index index, unsigned *flags)
|
|
{
|
|
pack_assert(I, index.size == AGX_SIZE_16);
|
|
|
|
if (index.type == AGX_INDEX_IMMEDIATE) {
|
|
pack_assert(I, index.value == 0);
|
|
*flags = 2;
|
|
return 0;
|
|
} else if (index.type == AGX_INDEX_UNIFORM) {
|
|
*flags = 1 | ((index.value >> 8) << 1);
|
|
return index.value & BITFIELD_MASK(8);
|
|
} else {
|
|
assert_register_is_aligned(I, index);
|
|
*flags = 0;
|
|
return index.value;
|
|
}
|
|
}
|
|
|
|
static uint16_t
|
|
agx_pack_local_index(const agx_instr *I, agx_index index, bool *flag)
|
|
{
|
|
pack_assert(I, index.size == AGX_SIZE_16);
|
|
|
|
if (index.type == AGX_INDEX_IMMEDIATE) {
|
|
pack_assert(I, index.value < 0x10000);
|
|
*flag = 1;
|
|
return index.value;
|
|
} else {
|
|
assert_register_is_aligned(I, index);
|
|
*flag = 0;
|
|
return index.value;
|
|
}
|
|
}
|
|
|
|
static unsigned
|
|
agx_pack_atomic_source(const agx_instr *I, agx_index index)
|
|
{
|
|
pack_assert_msg(I, index.size == AGX_SIZE_32, "no 64-bit atomics yet");
|
|
assert_register_is_aligned(I, index);
|
|
return index.value;
|
|
}
|
|
|
|
static unsigned
|
|
agx_pack_atomic_dest(const agx_instr *I, agx_index index, bool *flag)
|
|
{
|
|
/* Atomic destinstions are optional (e.g. for update with no return) */
|
|
if (index.type == AGX_INDEX_NULL) {
|
|
*flag = 0;
|
|
return 0;
|
|
}
|
|
|
|
/* But are otherwise registers */
|
|
pack_assert_msg(I, index.size == AGX_SIZE_32, "no 64-bit atomics yet");
|
|
assert_register_is_aligned(I, index);
|
|
*flag = 1;
|
|
return index.value;
|
|
}
|
|
|
|
/* ALU goes through a common path */
|
|
|
|
static unsigned
|
|
agx_pack_alu_dst(const agx_instr *I, agx_index dest)
|
|
{
|
|
assert_register_is_aligned(I, dest);
|
|
unsigned reg = dest.value;
|
|
enum agx_size size = dest.size;
|
|
pack_assert(I, reg < 0x100);
|
|
|
|
return (dest.cache ? (1 << 0) : 0) | ((size >= AGX_SIZE_32) ? (1 << 1) : 0) |
|
|
((size == AGX_SIZE_64) ? (1 << 2) : 0) | ((reg << 2));
|
|
}
|
|
|
|
static unsigned
|
|
agx_pack_alu_src(const agx_instr *I, agx_index src)
|
|
{
|
|
unsigned value = src.value;
|
|
enum agx_size size = src.size;
|
|
|
|
if (src.type == AGX_INDEX_IMMEDIATE) {
|
|
/* Flags 0 for an 8-bit immediate */
|
|
pack_assert(I, value < 0x100);
|
|
|
|
return (value & BITFIELD_MASK(6)) | ((value >> 6) << 10);
|
|
} else if (src.type == AGX_INDEX_UNIFORM) {
|
|
pack_assert(I, size == AGX_SIZE_16 || size == AGX_SIZE_32);
|
|
pack_assert(I, value < AGX_NUM_UNIFORMS);
|
|
|
|
return (value & BITFIELD_MASK(6)) |
|
|
((value & BITFIELD_BIT(8)) ? (1 << 6) : 0) |
|
|
((size == AGX_SIZE_32) ? (1 << 7) : 0) | (0x1 << 8) |
|
|
(((value >> 6) & BITFIELD_MASK(2)) << 10);
|
|
} else {
|
|
assert_register_is_aligned(I, src);
|
|
pack_assert(I, !(src.cache && src.discard));
|
|
|
|
unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1;
|
|
unsigned size_flag = (size == AGX_SIZE_64) ? 0x3
|
|
: (size == AGX_SIZE_32) ? 0x2
|
|
: (size == AGX_SIZE_16) ? 0x0
|
|
: 0x0;
|
|
|
|
return (value & BITFIELD_MASK(6)) | (hint << 6) | (size_flag << 8) |
|
|
(((value >> 6) & BITFIELD_MASK(2)) << 10);
|
|
}
|
|
}
|
|
|
|
static unsigned
|
|
agx_pack_cmpsel_src(const agx_instr *I, agx_index src, enum agx_size dest_size)
|
|
{
|
|
unsigned value = src.value;
|
|
ASSERTED enum agx_size size = src.size;
|
|
|
|
if (src.type == AGX_INDEX_IMMEDIATE) {
|
|
/* Flags 0x4 for an 8-bit immediate */
|
|
pack_assert(I, value < 0x100);
|
|
|
|
return (value & BITFIELD_MASK(6)) | (0x4 << 6) | ((value >> 6) << 10);
|
|
} else if (src.type == AGX_INDEX_UNIFORM) {
|
|
pack_assert(I, size == AGX_SIZE_16 || size == AGX_SIZE_32);
|
|
pack_assert(I, size == dest_size);
|
|
pack_assert(I, value < 0x200);
|
|
|
|
return (value & BITFIELD_MASK(6)) | ((value >> 8) << 6) | (0x3 << 7) |
|
|
(((value >> 6) & BITFIELD_MASK(2)) << 10);
|
|
} else {
|
|
pack_assert(I, src.type == AGX_INDEX_REGISTER);
|
|
pack_assert(I, !(src.cache && src.discard));
|
|
pack_assert(I, size == AGX_SIZE_16 || size == AGX_SIZE_32);
|
|
pack_assert(I, size == dest_size);
|
|
assert_register_is_aligned(I, src);
|
|
|
|
unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1;
|
|
|
|
return (value & BITFIELD_MASK(6)) | (hint << 6) |
|
|
(((value >> 6) & BITFIELD_MASK(2)) << 10);
|
|
}
|
|
}
|
|
|
|
static unsigned
|
|
agx_pack_sample_mask_src(const agx_instr *I, agx_index src)
|
|
{
|
|
unsigned value = src.value;
|
|
unsigned packed_value =
|
|
(value & BITFIELD_MASK(6)) | (((value >> 6) & BITFIELD_MASK(2)) << 10);
|
|
|
|
if (src.type == AGX_INDEX_IMMEDIATE) {
|
|
pack_assert(I, value < 0x100);
|
|
return packed_value | (1 << 7);
|
|
} else {
|
|
pack_assert(I, src.type == AGX_INDEX_REGISTER);
|
|
assert_register_is_aligned(I, src);
|
|
pack_assert(I, !(src.cache && src.discard));
|
|
|
|
return packed_value;
|
|
}
|
|
}
|
|
|
|
static unsigned
|
|
agx_pack_float_mod(agx_index src)
|
|
{
|
|
return (src.abs ? (1 << 0) : 0) | (src.neg ? (1 << 1) : 0);
|
|
}
|
|
|
|
static bool
|
|
agx_all_16(agx_instr *I)
|
|
{
|
|
agx_foreach_dest(I, d) {
|
|
if (!agx_is_null(I->dest[d]) && I->dest[d].size != AGX_SIZE_16)
|
|
return false;
|
|
}
|
|
|
|
agx_foreach_src(I, s) {
|
|
if (!agx_is_null(I->src[s]) && I->src[s].size != AGX_SIZE_16)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/* Generic pack for ALU instructions, which are quite regular */
|
|
|
|
static void
|
|
agx_pack_alu(struct util_dynarray *emission, agx_instr *I)
|
|
{
|
|
struct agx_opcode_info info = agx_opcodes_info[I->op];
|
|
struct agx_encoding encoding = info.encoding;
|
|
|
|
bool is_f16 = (I->op == AGX_OPCODE_HMUL || I->op == AGX_OPCODE_HFMA ||
|
|
I->op == AGX_OPCODE_HADD);
|
|
|
|
pack_assert_msg(I, encoding.exact, "invalid encoding");
|
|
|
|
uint64_t raw = encoding.exact;
|
|
uint16_t extend = 0;
|
|
|
|
// TODO: assert saturable
|
|
if (I->saturate)
|
|
raw |= (1 << 6);
|
|
|
|
if (info.nr_dests) {
|
|
pack_assert(I, info.nr_dests == 1);
|
|
unsigned D = agx_pack_alu_dst(I, I->dest[0]);
|
|
unsigned extend_offset = (sizeof(extend) * 8) - 4;
|
|
|
|
raw |= (D & BITFIELD_MASK(8)) << 7;
|
|
extend |= ((D >> 8) << extend_offset);
|
|
|
|
if (info.immediates & AGX_IMMEDIATE_INVERT_COND) {
|
|
raw |= (uint64_t)(I->invert_cond) << 47;
|
|
}
|
|
} else if (info.immediates & AGX_IMMEDIATE_NEST) {
|
|
raw |= (I->invert_cond << 8);
|
|
raw |= (I->nest << 11);
|
|
raw |= (I->icond << 13);
|
|
}
|
|
|
|
for (unsigned s = 0; s < info.nr_srcs; ++s) {
|
|
bool is_cmpsel = (s >= 2) && (I->op == AGX_OPCODE_ICMPSEL ||
|
|
I->op == AGX_OPCODE_FCMPSEL);
|
|
|
|
unsigned src = is_cmpsel
|
|
? agx_pack_cmpsel_src(I, I->src[s], I->dest[0].size)
|
|
: agx_pack_alu_src(I, I->src[s]);
|
|
|
|
unsigned src_short = (src & BITFIELD_MASK(10));
|
|
unsigned src_extend = (src >> 10);
|
|
|
|
/* Size bit always zero and so omitted for 16-bit */
|
|
if (is_f16 && !is_cmpsel)
|
|
pack_assert(I, (src_short & (1 << 9)) == 0);
|
|
|
|
if (info.is_float || (I->op == AGX_OPCODE_FCMPSEL && !is_cmpsel)) {
|
|
unsigned fmod = agx_pack_float_mod(I->src[s]);
|
|
unsigned fmod_offset = is_f16 ? 9 : 10;
|
|
src_short |= (fmod << fmod_offset);
|
|
} else if (I->op == AGX_OPCODE_IMAD || I->op == AGX_OPCODE_IADD) {
|
|
/* Force unsigned for immediates so uadd_sat works properly */
|
|
bool zext = I->src[s].abs || I->src[s].type == AGX_INDEX_IMMEDIATE;
|
|
bool extends = I->src[s].size < AGX_SIZE_64;
|
|
|
|
unsigned sxt = (extends && !zext) ? (1 << 10) : 0;
|
|
|
|
unsigned negate_src = (I->op == AGX_OPCODE_IMAD) ? 2 : 1;
|
|
pack_assert(I, !I->src[s].neg || s == negate_src);
|
|
src_short |= sxt;
|
|
}
|
|
|
|
/* Sources come at predictable offsets */
|
|
unsigned offset = 16 + (12 * s);
|
|
raw |= (((uint64_t)src_short) << offset);
|
|
|
|
/* Destination and each source get extended in reverse order */
|
|
unsigned extend_offset = (sizeof(extend) * 8) - ((s + 3) * 2);
|
|
extend |= (src_extend << extend_offset);
|
|
}
|
|
|
|
if ((I->op == AGX_OPCODE_IMAD && I->src[2].neg) ||
|
|
(I->op == AGX_OPCODE_IADD && I->src[1].neg))
|
|
raw |= (1 << 27);
|
|
|
|
if (info.immediates & AGX_IMMEDIATE_TRUTH_TABLE) {
|
|
raw |= (I->truth_table & 0x3) << 26;
|
|
raw |= (uint64_t)(I->truth_table >> 2) << 38;
|
|
} else if (info.immediates & AGX_IMMEDIATE_SHIFT) {
|
|
pack_assert(I, I->shift <= 4);
|
|
raw |= (uint64_t)(I->shift & 1) << 39;
|
|
raw |= (uint64_t)(I->shift >> 1) << 52;
|
|
} else if (info.immediates & AGX_IMMEDIATE_BFI_MASK) {
|
|
raw |= (uint64_t)(I->bfi_mask & 0x3) << 38;
|
|
raw |= (uint64_t)((I->bfi_mask >> 2) & 0x3) << 50;
|
|
raw |= (uint64_t)((I->bfi_mask >> 4) & 0x1) << 63;
|
|
} else if (info.immediates & AGX_IMMEDIATE_SIMD_OP) {
|
|
raw |= (uint64_t)(I->simd_op & 0x1) << 28;
|
|
raw |= (uint64_t)((I->simd_op >> 1) & 0x7) << 38;
|
|
raw |= (uint64_t)((I->simd_op >> 4) & 0x1) << 47;
|
|
} else if (info.immediates & AGX_IMMEDIATE_SR) {
|
|
raw |= (uint64_t)(I->sr & 0x3F) << 16;
|
|
raw |= (uint64_t)(I->sr >> 6) << 26;
|
|
} else if (info.immediates & AGX_IMMEDIATE_WRITEOUT)
|
|
raw |= (uint64_t)(I->imm) << 8;
|
|
else if (info.immediates & AGX_IMMEDIATE_IMM)
|
|
raw |= (uint64_t)(I->imm) << 16;
|
|
else if (info.immediates & AGX_IMMEDIATE_ROUND)
|
|
raw |= (uint64_t)(I->imm) << 26;
|
|
else if (info.immediates & (AGX_IMMEDIATE_FCOND | AGX_IMMEDIATE_ICOND))
|
|
raw |= (uint64_t)(I->fcond) << 61;
|
|
|
|
/* Determine length bit */
|
|
unsigned length = encoding.length_short;
|
|
if (I->op == AGX_OPCODE_MOV_IMM && I->dest[0].size == AGX_SIZE_16)
|
|
length -= 2;
|
|
|
|
uint64_t short_mask = BITFIELD64_MASK(8 * length);
|
|
bool length_bit = (extend || (raw & ~short_mask));
|
|
|
|
if (encoding.extensible && length_bit) {
|
|
raw |= (1 << 15);
|
|
length += (length > 8) ? 4 : 2;
|
|
}
|
|
|
|
/* Pack! */
|
|
if (length <= sizeof(uint64_t)) {
|
|
unsigned extend_offset = ((length - sizeof(extend)) * 8);
|
|
|
|
/* XXX: Encode these special cases better */
|
|
switch (I->op) {
|
|
case AGX_OPCODE_IADD:
|
|
case AGX_OPCODE_ICMP_BALLOT:
|
|
case AGX_OPCODE_ICMP_QUAD_BALLOT:
|
|
case AGX_OPCODE_FCMP_BALLOT:
|
|
case AGX_OPCODE_FCMP_QUAD_BALLOT:
|
|
extend_offset -= 16;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
raw |= (uint64_t)extend << extend_offset;
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, length), &raw, length);
|
|
} else {
|
|
/* So far, >8 byte ALU is only to store the extend bits */
|
|
unsigned extend_offset = (((length - sizeof(extend)) * 8) - 64);
|
|
unsigned hi = ((uint64_t)extend) << extend_offset;
|
|
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, length - 8), &hi,
|
|
length - 8);
|
|
}
|
|
}
|
|
|
|
static void
|
|
agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups,
|
|
agx_instr *I, enum u_tristate needs_g13x_coherency)
|
|
{
|
|
switch (I->op) {
|
|
case AGX_OPCODE_LD_TILE:
|
|
case AGX_OPCODE_ST_TILE: {
|
|
bool load = (I->op == AGX_OPCODE_LD_TILE);
|
|
unsigned D = agx_pack_alu_dst(I, load ? I->dest[0] : I->src[0]);
|
|
pack_assert(I, I->mask < 0x10);
|
|
pack_assert(I, I->pixel_offset < 0x200);
|
|
|
|
agx_index sample_index = load ? I->src[0] : I->src[1];
|
|
agx_index coords = load ? I->src[1] : I->src[2];
|
|
pack_assert(I, sample_index.type == AGX_INDEX_REGISTER ||
|
|
sample_index.type == AGX_INDEX_IMMEDIATE);
|
|
pack_assert(I, sample_index.size == AGX_SIZE_16);
|
|
unsigned St = (sample_index.type == AGX_INDEX_REGISTER) ? 1 : 0;
|
|
unsigned S = sample_index.value;
|
|
pack_assert(I, S < 0x100);
|
|
|
|
pack_assert(I, I->explicit_coords == (coords.type == AGX_INDEX_REGISTER));
|
|
unsigned C = I->explicit_coords ? coords.value : 0;
|
|
|
|
uint64_t raw = agx_opcodes_info[I->op].encoding.exact |
|
|
((uint64_t)(D & BITFIELD_MASK(8)) << 7) | (St << 22) |
|
|
((uint64_t)(I->format) << 24) |
|
|
((uint64_t)(C & BITFIELD_MASK(6)) << 16) |
|
|
((uint64_t)(I->pixel_offset & BITFIELD_MASK(7)) << 28) |
|
|
(load || I->explicit_coords ? (1ull << 35) : 0) |
|
|
((uint64_t)(I->mask) << 36) |
|
|
((uint64_t)(I->pixel_offset >> 7) << 40) |
|
|
((uint64_t)(S & BITFIELD_MASK(6)) << 42) |
|
|
(I->explicit_coords ? (1ull << 55) : 0) |
|
|
((uint64_t)(S >> 6) << 56) | ((uint64_t)(C >> 6) << 58) |
|
|
(((uint64_t)(D >> 8)) << 60);
|
|
|
|
unsigned size = 8;
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
|
|
break;
|
|
}
|
|
|
|
case AGX_OPCODE_SAMPLE_MASK: {
|
|
unsigned S = agx_pack_sample_mask_src(I, I->src[1]);
|
|
unsigned T = I->src[0].value;
|
|
bool Tt = I->src[0].type == AGX_INDEX_IMMEDIATE;
|
|
pack_assert(I, Tt || I->src[0].type == AGX_INDEX_REGISTER);
|
|
uint32_t raw = 0xc1 | (Tt ? BITFIELD_BIT(8) : 0) |
|
|
((T & BITFIELD_MASK(6)) << 9) | ((S & 0xff) << 16) |
|
|
((T >> 6) << 24) | ((S >> 8) << 26);
|
|
|
|
unsigned size = 4;
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
|
|
break;
|
|
}
|
|
|
|
case AGX_OPCODE_WAIT: {
|
|
uint64_t raw =
|
|
agx_opcodes_info[I->op].encoding.exact | (I->scoreboard << 8);
|
|
|
|
unsigned size = 2;
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
|
|
break;
|
|
}
|
|
|
|
case AGX_OPCODE_ITER:
|
|
case AGX_OPCODE_ITERPROJ:
|
|
case AGX_OPCODE_LDCF: {
|
|
bool flat = (I->op == AGX_OPCODE_LDCF);
|
|
bool perspective = (I->op == AGX_OPCODE_ITERPROJ);
|
|
unsigned D = agx_pack_alu_dst(I, I->dest[0]);
|
|
unsigned channels = (I->channels & 0x3);
|
|
|
|
agx_index src_I = I->src[0];
|
|
pack_assert(I, src_I.type == AGX_INDEX_IMMEDIATE ||
|
|
src_I.type == AGX_INDEX_REGISTER);
|
|
|
|
unsigned cf_I = src_I.value;
|
|
unsigned cf_J = 0;
|
|
|
|
if (perspective) {
|
|
agx_index src_J = I->src[1];
|
|
pack_assert(I, src_J.type == AGX_INDEX_IMMEDIATE);
|
|
cf_J = src_J.value;
|
|
}
|
|
|
|
pack_assert(I, cf_I < 0x100);
|
|
pack_assert(I, cf_J < 0x100);
|
|
|
|
enum agx_interpolation interp = I->interpolation;
|
|
agx_index sample_index = flat ? agx_null() : I->src[perspective ? 2 : 1];
|
|
|
|
/* Fix up the interpolation enum to distinguish the sample index source */
|
|
if (interp == AGX_INTERPOLATION_SAMPLE) {
|
|
if (sample_index.type == AGX_INDEX_REGISTER)
|
|
interp = AGX_INTERPOLATION_SAMPLE_REGISTER;
|
|
else
|
|
pack_assert(I, sample_index.type == AGX_INDEX_IMMEDIATE);
|
|
} else {
|
|
sample_index = agx_zero();
|
|
}
|
|
|
|
bool kill = false; // TODO: optimize
|
|
bool forward = false; // TODO: optimize
|
|
|
|
uint64_t raw =
|
|
0x21 | (flat ? (1 << 7) : 0) | (perspective ? (1 << 6) : 0) |
|
|
((D & 0xFF) << 7) | (1ull << 15) | /* XXX */
|
|
((cf_I & BITFIELD_MASK(6)) << 16) |
|
|
((src_I.type == AGX_INDEX_REGISTER) ? (1 << 23) : 0) |
|
|
((cf_J & BITFIELD_MASK(6)) << 24) | (((uint64_t)channels) << 30) |
|
|
(((uint64_t)sample_index.value) << 32) | (forward ? (1ull << 46) : 0) |
|
|
(((uint64_t)interp) << 48) | (kill ? (1ull << 52) : 0) |
|
|
(((uint64_t)(D >> 8)) << 56) | ((uint64_t)(cf_I >> 6) << 58) |
|
|
((uint64_t)(cf_J >> 6) << 60);
|
|
|
|
unsigned size = 8;
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
|
|
break;
|
|
}
|
|
|
|
case AGX_OPCODE_ST_VARY: {
|
|
agx_index index_src = I->src[0];
|
|
agx_index value = I->src[1];
|
|
|
|
pack_assert(I, index_src.type == AGX_INDEX_IMMEDIATE ||
|
|
index_src.type == AGX_INDEX_REGISTER);
|
|
pack_assert(I, index_src.value < BITFIELD_MASK(8));
|
|
pack_assert(I, value.type == AGX_INDEX_REGISTER);
|
|
pack_assert(I, value.size == AGX_SIZE_32);
|
|
|
|
uint64_t raw = 0x11 | (I->last ? (1 << 7) : 0) |
|
|
((value.value & 0x3F) << 9) |
|
|
(((uint64_t)(index_src.value & 0x3F)) << 16) |
|
|
(index_src.type == AGX_INDEX_IMMEDIATE ? (1 << 23) : 0) |
|
|
((value.value >> 6) << 24) |
|
|
((index_src.value >> 6) << 26) | (0x8u << 28); /* XXX */
|
|
|
|
unsigned size = 4;
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
|
|
break;
|
|
}
|
|
|
|
case AGX_OPCODE_DEVICE_LOAD:
|
|
case AGX_OPCODE_DEVICE_STORE:
|
|
case AGX_OPCODE_UNIFORM_STORE: {
|
|
bool is_device_store = I->op == AGX_OPCODE_DEVICE_STORE;
|
|
bool is_uniform_store = I->op == AGX_OPCODE_UNIFORM_STORE;
|
|
bool is_store = is_device_store || is_uniform_store;
|
|
bool has_base = !is_uniform_store;
|
|
|
|
/* Uniform stores are required to be 16-bit. The encoding that should be
|
|
* 32-bit annoyingly doesn't work. Fix up the format and size so we can
|
|
* use scalar 32-bit values in the IR and avoid special casing earlier in
|
|
* the compiler.
|
|
*/
|
|
enum agx_format format = is_uniform_store ? AGX_FORMAT_I16 : I->format;
|
|
agx_index reg = is_store ? I->src[0] : I->dest[0];
|
|
unsigned mask = I->mask;
|
|
|
|
if (is_uniform_store && reg.size != AGX_SIZE_16) {
|
|
if (reg.size == AGX_SIZE_64) {
|
|
assert(mask == 1);
|
|
mask = BITFIELD_MASK(4);
|
|
} else {
|
|
assert(reg.size == AGX_SIZE_32);
|
|
assert(mask == 1 || mask == 3);
|
|
mask = BITFIELD_MASK(mask == 3 ? 4 : 2);
|
|
}
|
|
|
|
reg.size = AGX_SIZE_16;
|
|
}
|
|
|
|
unsigned offset_src = (has_base ? 1 : 0) + (is_store ? 1 : 0);
|
|
|
|
bool Rt, At = false, Ot;
|
|
unsigned R = agx_pack_memory_reg(I, reg, &Rt);
|
|
unsigned A =
|
|
has_base ? agx_pack_memory_base(I, I->src[is_store ? 1 : 0], &At) : 0;
|
|
unsigned O = agx_pack_memory_index(I, I->src[offset_src], &Ot);
|
|
unsigned u1 = is_uniform_store ? 0 : 1; // XXX
|
|
unsigned u3 = 0;
|
|
unsigned u4 = is_uniform_store ? 0 : I->coherent ? 7 : 4;
|
|
unsigned u5 = 0;
|
|
bool L = true; /* TODO: when would you want short? */
|
|
|
|
pack_assert(I, mask != 0);
|
|
pack_assert(I, format <= 0x10);
|
|
|
|
uint64_t raw =
|
|
agx_opcodes_info[I->op].encoding.exact |
|
|
((format & BITFIELD_MASK(3)) << 7) | ((R & BITFIELD_MASK(6)) << 10) |
|
|
((A & BITFIELD_MASK(4)) << 16) | ((O & BITFIELD_MASK(4)) << 20) |
|
|
(Ot ? (1 << 24) : 0) | (I->src[offset_src].abs ? (1 << 25) : 0) |
|
|
(is_uniform_store ? (2 << 25) : 0) | (u1 << 26) | (At << 27) |
|
|
(u3 << 28) | (I->scoreboard << 30) |
|
|
(((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |
|
|
(((uint64_t)((A >> 4) & BITFIELD_MASK(4))) << 36) |
|
|
(((uint64_t)((R >> 6) & BITFIELD_MASK(2))) << 40) |
|
|
(((uint64_t)I->shift) << 42) | (((uint64_t)u4) << 44) |
|
|
(L ? (1ull << 47) : 0) | (((uint64_t)(format >> 3)) << 48) |
|
|
(((uint64_t)Rt) << 49) | (((uint64_t)u5) << 50) |
|
|
(((uint64_t)mask) << 52) | (((uint64_t)(O >> 8)) << 56);
|
|
|
|
unsigned size = L ? 8 : 6;
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
|
|
break;
|
|
}
|
|
|
|
case AGX_OPCODE_LOCAL_LOAD:
|
|
case AGX_OPCODE_LOCAL_STORE: {
|
|
bool is_load = I->op == AGX_OPCODE_LOCAL_LOAD;
|
|
bool L = true; /* TODO: when would you want short? */
|
|
unsigned At;
|
|
bool Rt, Ot;
|
|
|
|
unsigned R =
|
|
agx_pack_memory_reg(I, is_load ? I->dest[0] : I->src[0], &Rt);
|
|
unsigned A = agx_pack_local_base(I, is_load ? I->src[0] : I->src[1], &At);
|
|
unsigned O =
|
|
agx_pack_local_index(I, is_load ? I->src[1] : I->src[2], &Ot);
|
|
|
|
uint64_t raw =
|
|
agx_opcodes_info[I->op].encoding.exact | (Rt ? BITFIELD64_BIT(8) : 0) |
|
|
((R & BITFIELD_MASK(6)) << 9) | (L ? BITFIELD64_BIT(15) : 0) |
|
|
((A & BITFIELD_MASK(6)) << 16) | (At << 22) | (I->format << 24) |
|
|
((O & BITFIELD64_MASK(6)) << 28) | (Ot ? BITFIELD64_BIT(34) : 0) |
|
|
(((uint64_t)I->mask) << 36) | (((uint64_t)(O >> 6)) << 48) |
|
|
(((uint64_t)(A >> 6)) << 58) | (((uint64_t)(R >> 6)) << 60);
|
|
|
|
unsigned size = L ? 8 : 6;
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
|
|
break;
|
|
}
|
|
|
|
case AGX_OPCODE_ATOMIC: {
|
|
bool At, Ot, Rt;
|
|
unsigned A = agx_pack_memory_base(I, I->src[1], &At);
|
|
unsigned O = agx_pack_memory_index(I, I->src[2], &Ot);
|
|
unsigned R = agx_pack_atomic_dest(I, I->dest[0], &Rt);
|
|
unsigned S = agx_pack_atomic_source(I, I->src[0]);
|
|
|
|
/* Due to a hardware quirk, there is a bit in the atomic instruction that
|
|
* differs based on the target GPU. So, if we're packing an atomic, the
|
|
* shader must be keyed to a particular GPU (either needs_g13x_coherency
|
|
* or not needs_g13x_coherency). Assert that here.
|
|
*
|
|
* needs_g13x_coherency == U_TRISTATE_UNSET is only allowed for shaders
|
|
* that do not use atomics and are therefore portable across devices.
|
|
*/
|
|
assert(needs_g13x_coherency != U_TRISTATE_UNSET);
|
|
|
|
uint64_t raw =
|
|
agx_opcodes_info[I->op].encoding.exact |
|
|
(((uint64_t)I->atomic_opc) << 6) | ((R & BITFIELD_MASK(6)) << 10) |
|
|
((A & BITFIELD_MASK(4)) << 16) | ((O & BITFIELD_MASK(4)) << 20) |
|
|
(Ot ? (1 << 24) : 0) | (I->src[2].abs ? (1 << 25) : 0) | (At << 27) |
|
|
(I->scoreboard << 30) |
|
|
(((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |
|
|
(((uint64_t)((A >> 4) & BITFIELD_MASK(4))) << 36) |
|
|
(((uint64_t)(R >> 6)) << 40) |
|
|
(needs_g13x_coherency == U_TRISTATE_YES ? BITFIELD64_BIT(45) : 0) |
|
|
(Rt ? BITFIELD64_BIT(47) : 0) | (((uint64_t)S) << 48) |
|
|
(((uint64_t)(O >> 8)) << 56);
|
|
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
|
|
break;
|
|
}
|
|
|
|
case AGX_OPCODE_LOCAL_ATOMIC: {
|
|
bool L = true; /* TODO: Don't force */
|
|
|
|
unsigned At;
|
|
bool Rt = false, Ot;
|
|
|
|
bool Ra = I->dest[0].type != AGX_INDEX_NULL;
|
|
unsigned R = Ra ? agx_pack_memory_reg(I, I->dest[0], &Rt) : 0;
|
|
unsigned S = agx_pack_atomic_source(I, I->src[0]);
|
|
unsigned A = agx_pack_local_base(I, I->src[1], &At);
|
|
unsigned O = agx_pack_local_index(I, I->src[2], &Ot);
|
|
|
|
uint64_t raw =
|
|
agx_opcodes_info[I->op].encoding.exact | (Rt ? BITFIELD64_BIT(8) : 0) |
|
|
((R & BITFIELD_MASK(6)) << 9) | (L ? BITFIELD64_BIT(15) : 0) |
|
|
((A & BITFIELD_MASK(6)) << 16) | (At << 22) |
|
|
(((uint64_t)I->atomic_opc) << 24) | ((O & BITFIELD64_MASK(6)) << 28) |
|
|
(Ot ? BITFIELD64_BIT(34) : 0) | (Ra ? BITFIELD64_BIT(38) : 0) |
|
|
(((uint64_t)(O >> 6)) << 48) | (((uint64_t)(A >> 6)) << 58) |
|
|
(((uint64_t)(R >> 6)) << 60);
|
|
|
|
uint64_t raw2 = S;
|
|
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, 2), &raw2, 2);
|
|
break;
|
|
}
|
|
|
|
case AGX_OPCODE_TEXTURE_LOAD:
|
|
case AGX_OPCODE_IMAGE_LOAD:
|
|
case AGX_OPCODE_TEXTURE_SAMPLE: {
|
|
pack_assert(I, I->mask != 0);
|
|
pack_assert(I, I->format <= 0x10);
|
|
|
|
bool Rt, Ct, St, Cs;
|
|
unsigned Tt;
|
|
unsigned U;
|
|
enum agx_lod_mode lod_mode = I->lod_mode;
|
|
|
|
unsigned R = agx_pack_memory_reg(I, I->dest[0], &Rt);
|
|
unsigned C = agx_pack_sample_coords(I, I->src[0], &Ct, &Cs);
|
|
unsigned T = agx_pack_texture(I, I->src[2], I->src[3], &U, &Tt);
|
|
unsigned S = agx_pack_sampler(I, I->src[4], &St);
|
|
unsigned O = agx_pack_sample_compare_offset(I, I->src[5]);
|
|
unsigned D = agx_pack_lod(I, I->src[1], &lod_mode);
|
|
|
|
unsigned q1 = I->shadow;
|
|
unsigned q2 = I->query_lod ? 2 : 0;
|
|
unsigned q3 = 0xc; // XXX
|
|
unsigned kill = 0; // helper invocation kill bit
|
|
|
|
/* Set bit 43 for image loads. This seems to makes sure that image loads
|
|
* get the value written by the latest image store, not some other image
|
|
* store that was already in flight, fixing
|
|
*
|
|
* KHR-GLES31.core.shader_image_load_store.basic-glsl-misc-fs
|
|
*
|
|
* Apple seems to set this bit unconditionally for read/write image loads
|
|
* and never for readonly image loads. Some sort of cache control.
|
|
*/
|
|
if (I->op == AGX_OPCODE_IMAGE_LOAD) {
|
|
q3 |= 1;
|
|
|
|
/* Cache bypass for multidie coherency */
|
|
if (I->coherent) {
|
|
q3 |= 2;
|
|
}
|
|
}
|
|
|
|
uint32_t extend = ((U & BITFIELD_MASK(5)) << 0) | (kill << 5) |
|
|
(I->sparse ? (1 << 6) : 0) | ((I->dim >> 3) << 7) |
|
|
((R >> 6) << 8) | ((C >> 6) << 10) | ((D >> 6) << 12) |
|
|
((T >> 6) << 14) | ((O & BITFIELD_MASK(6)) << 16) |
|
|
(I->gather << 23) | (I->offset << 27) |
|
|
((S >> 6) << 28) | ((O >> 6) << 30);
|
|
|
|
bool L = (extend != 0);
|
|
|
|
uint64_t raw =
|
|
0x31 | ((I->op != AGX_OPCODE_TEXTURE_SAMPLE) ? (1 << 6) : 0) |
|
|
(Rt ? (1 << 8) : 0) | ((R & BITFIELD_MASK(6)) << 9) |
|
|
(L ? (1 << 15) : 0) | ((C & BITFIELD_MASK(6)) << 16) |
|
|
(Ct ? (1 << 22) : 0) | (q1 << 23) | ((D & BITFIELD_MASK(6)) << 24) |
|
|
(q2 << 30) | (((uint64_t)(T & BITFIELD_MASK(6))) << 32) |
|
|
(((uint64_t)Tt) << 38) |
|
|
(((uint64_t)(I->dim & BITFIELD_MASK(3))) << 40) |
|
|
(((uint64_t)q3) << 43) | (((uint64_t)I->mask) << 48) |
|
|
(((uint64_t)lod_mode) << 52) |
|
|
(((uint64_t)(S & BITFIELD_MASK(6))) << 56) | (((uint64_t)St) << 62) |
|
|
(((uint64_t)I->scoreboard) << 63);
|
|
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
|
|
if (L)
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, 4), &extend, 4);
|
|
|
|
break;
|
|
}
|
|
|
|
case AGX_OPCODE_IMAGE_WRITE: {
|
|
bool Ct, Dt, Rt, Cs;
|
|
unsigned Tt;
|
|
unsigned U;
|
|
|
|
unsigned R = agx_pack_pbe_source(I, I->src[0], &Rt);
|
|
unsigned C = agx_pack_sample_coords(I, I->src[1], &Ct, &Cs);
|
|
unsigned D = agx_pack_pbe_lod(I, I->src[2], &Dt);
|
|
unsigned T = agx_pack_texture(I, I->src[3], I->src[4], &U, &Tt);
|
|
bool rtz = false;
|
|
|
|
pack_assert(I, U < (1 << 5));
|
|
pack_assert(I, D < (1 << 8));
|
|
pack_assert(I, R < (1 << 8));
|
|
pack_assert(I, C < (1 << 8));
|
|
pack_assert(I, T < (1 << 8));
|
|
pack_assert(I, Tt < (1 << 2));
|
|
|
|
unsigned coherency = I->coherent ? 0xf : 0x9;
|
|
|
|
uint64_t raw = agx_opcodes_info[I->op].encoding.exact |
|
|
(Rt ? (1 << 8) : 0) | ((R & BITFIELD_MASK(6)) << 9) |
|
|
((C & BITFIELD_MASK(6)) << 16) | (Ct ? (1 << 22) : 0) |
|
|
((D & BITFIELD_MASK(6)) << 24) | (Dt ? (1u << 31) : 0) |
|
|
(((uint64_t)(T & BITFIELD_MASK(6))) << 32) |
|
|
(((uint64_t)Tt) << 38) |
|
|
(((uint64_t)I->dim & BITFIELD_MASK(3)) << 40) |
|
|
(((uint64_t)coherency) << 43) | (Cs ? (1ull << 47) : 0) |
|
|
(((uint64_t)U) << 48) | (rtz ? (1ull << 53) : 0) |
|
|
((I->dim & BITFIELD_BIT(4)) ? (1ull << 55) : 0) |
|
|
(((uint64_t)R >> 6) << 56) | (((uint64_t)C >> 6) << 58) |
|
|
(((uint64_t)D >> 6) << 60) | (((uint64_t)T >> 6) << 62);
|
|
|
|
if (raw >> 48) {
|
|
raw |= BITFIELD_BIT(15);
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
|
|
} else {
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case AGX_OPCODE_BLOCK_IMAGE_STORE: {
|
|
enum agx_format F = I->format;
|
|
pack_assert(I, F < 0x10);
|
|
|
|
unsigned Tt = 0;
|
|
pack_assert(I, Tt < 0x4);
|
|
|
|
unsigned U;
|
|
unsigned T = agx_pack_texture(I, I->src[0], I->src[1], &U, &Tt);
|
|
pack_assert(I, T < 0x100);
|
|
pack_assert(I, U < (1 << 5));
|
|
|
|
bool Cs = false;
|
|
bool Ct = I->src[3].discard;
|
|
unsigned C = I->src[3].value;
|
|
|
|
agx_index offset = I->src[2];
|
|
pack_assert(I, offset.size == AGX_SIZE_32);
|
|
assert_register_is_aligned(I, offset);
|
|
unsigned R = offset.value;
|
|
|
|
bool unk1 = true;
|
|
|
|
/* This bit has weird behaviour with the interaction of the texture state
|
|
* index and the tilebuffer offset. Probably best not to use it for now.
|
|
*/
|
|
unsigned unk3 = 1;
|
|
|
|
uint32_t word0 = agx_opcodes_info[I->op].encoding.exact |
|
|
(1 << 15) /* we always set length bit for now */ |
|
|
((F & 1) << 8) | ((R & BITFIELD_MASK(6)) << 9) |
|
|
((C & BITFIELD_MASK(6)) << 16) | (Ct ? (1 << 22) : 0) |
|
|
(I->explicit_coords ? (1 << 23) : 0) |
|
|
(unk1 ? (1u << 31) : 0);
|
|
|
|
uint32_t word1 = (T & BITFIELD_MASK(6)) | (Tt << 6) |
|
|
((I->dim & BITFIELD_MASK(3)) << 8) | (9 << 11) |
|
|
(Cs ? (1 << 15) : 0) | (((uint64_t)U) << 16) |
|
|
((I->dim & BITFIELD_BIT(3)) ? (1u << 23) : 0) |
|
|
((R >> 6) << 24) | ((C >> 6) << 26);
|
|
|
|
uint32_t word2 = (F >> 1) | (unk3 ? (1 << 3) : 0) | ((T >> 6) << 14);
|
|
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word0, 4);
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word1, 4);
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, 2), &word2, 2);
|
|
break;
|
|
}
|
|
|
|
case AGX_OPCODE_ZS_EMIT: {
|
|
agx_index S = I->src[0];
|
|
if (S.type == AGX_INDEX_IMMEDIATE)
|
|
pack_assert(I, S.value < BITFIELD_BIT(8));
|
|
else
|
|
assert_register_is_aligned(I, S);
|
|
|
|
agx_index T = I->src[1];
|
|
assert_register_is_aligned(I, T);
|
|
|
|
pack_assert(I, I->zs >= 1 && I->zs <= 3);
|
|
|
|
uint32_t word0 = agx_opcodes_info[I->op].encoding.exact |
|
|
((S.type == AGX_INDEX_IMMEDIATE) ? (1 << 8) : 0) |
|
|
((S.value & BITFIELD_MASK(6)) << 9) |
|
|
((T.value & BITFIELD_MASK(6)) << 16) |
|
|
((T.value >> 6) << 26) | ((S.value >> 6) << 24) |
|
|
(I->zs << 29);
|
|
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word0, 4);
|
|
break;
|
|
}
|
|
|
|
case AGX_OPCODE_JMP_EXEC_ANY:
|
|
case AGX_OPCODE_JMP_EXEC_NONE:
|
|
case AGX_OPCODE_JMP_EXEC_NONE_AFTER: {
|
|
/* We don't implement indirect branches */
|
|
pack_assert(I, I->target != NULL);
|
|
|
|
/* We'll fix the offset later. */
|
|
struct agx_branch_fixup fixup = {
|
|
.block = I->target,
|
|
.offset = emission->size,
|
|
.skip_to_end = I->op == AGX_OPCODE_JMP_EXEC_NONE_AFTER,
|
|
};
|
|
|
|
util_dynarray_append(fixups, struct agx_branch_fixup, fixup);
|
|
|
|
/* The rest of the instruction is fixed */
|
|
struct agx_opcode_info info = agx_opcodes_info[I->op];
|
|
uint64_t raw = info.encoding.exact;
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);
|
|
break;
|
|
}
|
|
|
|
case AGX_OPCODE_DOORBELL: {
|
|
pack_assert(I, I->imm < BITFIELD_MASK(8));
|
|
struct agx_opcode_info info = agx_opcodes_info[I->op];
|
|
uint64_t raw = info.encoding.exact | (I->imm << 40);
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);
|
|
break;
|
|
}
|
|
|
|
case AGX_OPCODE_STACK_UNMAP:
|
|
case AGX_OPCODE_STACK_MAP: {
|
|
agx_index value = I->op == AGX_OPCODE_STACK_MAP ? I->src[0] : I->dest[0];
|
|
|
|
pack_assert(I, value.type == AGX_INDEX_REGISTER);
|
|
pack_assert(I, value.size == AGX_SIZE_32);
|
|
pack_assert(I, I->imm < BITFIELD_MASK(16));
|
|
|
|
unsigned q1 = 0; // XXX
|
|
unsigned q2 = 0; // XXX
|
|
unsigned q3 = 0; // XXX
|
|
unsigned q4 = 16; // XXX
|
|
unsigned q5 = 16; // XXX
|
|
|
|
struct agx_opcode_info info = agx_opcodes_info[I->op];
|
|
uint64_t raw =
|
|
info.encoding.exact | (q1 << 8) | ((value.value & 0x3F) << 10) |
|
|
((I->imm & 0xF) << 20) | (1ull << 24) | // XXX
|
|
(1ull << 26) | // XXX
|
|
(q2 << 30) | ((uint64_t)((I->imm >> 4) & 0xF) << 32) |
|
|
((uint64_t)q3 << 37) | ((uint64_t)(value.value >> 6) << 40) |
|
|
((uint64_t)q4 << 42) | (1ull << 47) | // XXX
|
|
((uint64_t)q5 << 48) | ((uint64_t)(I->imm >> 8) << 56);
|
|
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
|
|
break;
|
|
}
|
|
|
|
case AGX_OPCODE_STACK_LOAD:
|
|
case AGX_OPCODE_STACK_STORE: {
|
|
enum agx_format format = I->format;
|
|
unsigned mask = I->mask;
|
|
|
|
bool is_load = I->op == AGX_OPCODE_STACK_LOAD;
|
|
bool L = true; /* TODO: when would you want short? */
|
|
|
|
pack_assert(I, mask != 0);
|
|
pack_assert(I, format <= 0x10);
|
|
|
|
bool Rt, Ot;
|
|
unsigned R =
|
|
agx_pack_memory_reg(I, is_load ? I->dest[0] : I->src[0], &Rt);
|
|
unsigned O =
|
|
agx_pack_memory_index(I, is_load ? I->src[0] : I->src[1], &Ot);
|
|
|
|
unsigned i1 = 1; // XXX
|
|
unsigned i2 = 0; // XXX
|
|
unsigned i5 = 4; // XXX
|
|
|
|
uint64_t raw =
|
|
agx_opcodes_info[I->op].encoding.exact |
|
|
((format & BITFIELD_MASK(2)) << 8) | ((R & BITFIELD_MASK(6)) << 10) |
|
|
((O & BITFIELD_MASK(4)) << 20) | (Ot ? (1 << 24) : 0) |
|
|
((uint64_t)i1 << 26) | ((uint64_t)I->scoreboard << 30) |
|
|
(((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |
|
|
((uint64_t)i2 << 36) |
|
|
(((uint64_t)((R >> 6) & BITFIELD_MASK(2))) << 40) |
|
|
((uint64_t)i5 << 44) | (L ? (1ull << 47) : 0) |
|
|
(((uint64_t)(format >> 2)) << 50) | (((uint64_t)Rt) << 49) |
|
|
(((uint64_t)mask) << 52) | (((uint64_t)(O >> 8)) << 56);
|
|
|
|
unsigned size = L ? 8 : 6;
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
|
|
break;
|
|
}
|
|
case AGX_OPCODE_STACK_ADJUST: {
|
|
struct agx_opcode_info info = agx_opcodes_info[I->op];
|
|
|
|
unsigned i0 = 0; // XXX
|
|
unsigned i1 = 1; // XXX
|
|
unsigned i2 = 2; // XXX
|
|
unsigned i3 = 0; // XXX
|
|
unsigned i4 = 0; // XXX
|
|
|
|
uint64_t raw =
|
|
info.encoding.exact | ((uint64_t)i0 << 8) | ((uint64_t)i1 << 26) |
|
|
((uint64_t)i2 << 36) | ((uint64_t)i3 << 44) | ((uint64_t)i4 << 50) |
|
|
((I->stack_size & 0xF) << 20) |
|
|
((uint64_t)((I->stack_size >> 4) & 0xF) << 32) | (1ull << 47) | // XXX
|
|
((uint64_t)(I->stack_size >> 8) << 56);
|
|
|
|
memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
|
|
break;
|
|
}
|
|
|
|
default:
|
|
agx_pack_alu(emission, I);
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* Relative branches may be emitted before their targets, so we patch the
|
|
* binary to fix up the branch offsets after the main emit */
|
|
|
|
static void
|
|
agx_fixup_branch(struct util_dynarray *emission, struct agx_branch_fixup fix)
|
|
{
|
|
/* Branch offset is 2 bytes into the jump instruction */
|
|
uint8_t *location = ((uint8_t *)emission->data) + fix.offset + 2;
|
|
|
|
off_t target = fix.skip_to_end ? fix.block->last_offset : fix.block->offset;
|
|
|
|
/* Offsets are relative to the jump instruction */
|
|
int32_t patch = (int32_t)target - (int32_t)fix.offset;
|
|
|
|
/* Patch the binary */
|
|
memcpy(location, &patch, sizeof(patch));
|
|
}
|
|
|
|
void
|
|
agx_pack_binary(agx_context *ctx, struct util_dynarray *emission)
|
|
{
|
|
struct util_dynarray fixups;
|
|
util_dynarray_init(&fixups, ctx);
|
|
|
|
agx_foreach_block(ctx, block) {
|
|
/* Relative to the start of the binary, the block begins at the current
|
|
* number of bytes emitted */
|
|
block->offset = emission->size;
|
|
|
|
agx_foreach_instr_in_block(block, ins) {
|
|
block->last_offset = emission->size;
|
|
agx_pack_instr(emission, &fixups, ins,
|
|
ctx->key->dev.needs_g13x_coherency);
|
|
}
|
|
}
|
|
|
|
util_dynarray_foreach(&fixups, struct agx_branch_fixup, fixup)
|
|
agx_fixup_branch(emission, *fixup);
|
|
|
|
util_dynarray_fini(&fixups);
|
|
|
|
/* Dougall calls the instruction in this footer "trap". Match the blob. */
|
|
if (!ctx->key->no_stop || ctx->is_preamble) {
|
|
for (unsigned i = 0; i < 8; ++i) {
|
|
uint16_t trap = agx_opcodes_info[AGX_OPCODE_TRAP].encoding.exact;
|
|
util_dynarray_append(emission, uint16_t, trap);
|
|
}
|
|
}
|
|
}
|