pan/bi: Drop on-board packing tests

These tests were designed before having access to canonical information
about the hardware and thus had two purposes:

* Validating that our understanding of an instruction (as defined by IR
  semantics) matches hardware behaviour -- obsoleted by new information.

* Validating that the IR packing code is correct -- obsoleted by
  rewriting the IR and rewriting the packing.

I dislike removing tests as much as the next person, but the value of
these will be nil by the end of the series, and will prove burdensome.
Proper unit tests will be useful, however.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8213>
This commit is contained in:
Alyssa Rosenzweig 2020-11-27 10:24:27 -05:00 committed by Marge Bot
parent df09ada411
commit bcd0a285bb
6 changed files with 0 additions and 1768 deletions

View file

@ -30,7 +30,6 @@
#include "compiler/nir_types.h"
#include "util/u_dynarray.h"
#include "bifrost_compile.h"
#include "test/bit.h"
static panfrost_program *
compile_shader(char **argv, bool vertex_only)
@ -101,71 +100,6 @@ disassemble(const char *filename, bool verbose)
free(code);
}
static void
test_vertex(char **argv)
{
void *memctx = NULL; /* TODO */
struct panfrost_device *dev = bit_initialize(memctx);
float iubo[] = {
0.1, 0.2, 0.3, 0.4
};
float iattr[] = {
0.5, 0.6, 0.7, 0.8
};
float expected[] = {
0.6, 0.8, 1.0, 1.2
};
bit_vertex(dev, compile_shader(argv, true),
(uint32_t *) iubo, sizeof(iubo),
(uint32_t *) iattr, sizeof(iattr),
(uint32_t *) expected, sizeof(expected),
BIT_DEBUG_ALL);
}
static void
tests(void)
{
void *memctx = NULL; /* TODO */
struct panfrost_device *dev = bit_initialize(memctx);
bit_packing(dev, BIT_DEBUG_FAIL);
}
static void
run(const char *filename)
{
FILE *fp = fopen(filename, "rb");
assert(fp);
fseek(fp, 0, SEEK_END);
unsigned filesize = ftell(fp);
rewind(fp);
unsigned char *code = malloc(filesize);
unsigned res = fread(code, 1, filesize, fp);
if (res != filesize) {
printf("Couldn't read full file\n");
}
fclose(fp);
void *memctx = NULL; /* TODO */
struct panfrost_device *dev = bit_initialize(memctx);
panfrost_program prog = {
.compiled = {
.data = code,
.size = filesize
},
};
bit_vertex(dev, &prog, NULL, 0, NULL, 0, NULL, 0, BIT_DEBUG_ALL);
free(code);
}
int
main(int argc, char **argv)
{
@ -180,12 +114,6 @@ main(int argc, char **argv)
disassemble(argv[2], false);
else if (strcmp(argv[1], "disasm-verbose") == 0)
disassemble(argv[2], true);
else if (strcmp(argv[1], "tests") == 0)
tests();
else if (strcmp(argv[1], "test-vertex") == 0)
test_vertex(&argv[2]);
else if (strcmp(argv[1], "run") == 0)
run(argv[2]);
else
unreachable("Unknown command. Valid: compile/disasm");

View file

@ -1,711 +0,0 @@
/*
* Copyright (C) 2020 Collabora Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors (Collabora):
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
*/
#include <math.h>
#include "bit.h"
#include "util/half_float.h"
typedef union {
uint64_t u64;
uint32_t u32;
uint16_t u16[2];
uint8_t u8[4];
int64_t i64;
int32_t i32;
int16_t i16[2];
int8_t i8[4];
double f64;
float f32;
uint16_t f16[2];
} bit_t;
/* Interprets a subset of Bifrost IR required for automated testing */
static uint64_t
bit_read(struct bit_state *s, bi_instruction *ins, unsigned index, nir_alu_type T, bool FMA)
{
if (index & BIR_INDEX_REGISTER) {
uint32_t reg = index & ~BIR_INDEX_REGISTER;
assert(reg < 64);
return s->r[reg];
} else if (index & BIR_INDEX_UNIFORM) {
unreachable("Uniform registers to be implemented");
} else if (index & BIR_INDEX_CONSTANT) {
return ins->constant.u64 >> (index & ~BIR_INDEX_CONSTANT);
} else if (index & BIR_INDEX_ZERO) {
return 0;
} else if (index & (BIR_INDEX_PASS | BIFROST_SRC_STAGE)) {
return FMA ? 0 : s->T;
} else if (index & (BIR_INDEX_PASS | BIFROST_SRC_PASS_FMA)) {
return s->T0;
} else if (index & (BIR_INDEX_PASS | BIFROST_SRC_PASS_ADD)) {
return s->T1;
} else if (!index) {
/* Placeholder */
return 0;
} else {
unreachable("Invalid source");
}
}
static void
bit_write(struct bit_state *s, unsigned index, nir_alu_type T, bit_t value, bool FMA)
{
/* Always write stage passthrough */
if (FMA)
s->T = value.u32;
if (index & BIR_INDEX_REGISTER) {
uint32_t reg = index & ~BIR_INDEX_REGISTER;
assert(reg < 64);
s->r[reg] = value.u32;
} else if (!index) {
/* Nothing to do */
} else {
unreachable("Invalid destination");
}
}
#define bh _mesa_float_to_half
#define bf _mesa_half_to_float
#define bv2f16(fxn) \
for (unsigned c = 0; c < 2; ++c) { \
dest.f16[c] = bh(fxn(bf(srcs[0].f16[ins->swizzle[0][c]]), \
bf(srcs[1].f16[ins->swizzle[1][c]]), \
bf(srcs[2].f16[ins->swizzle[2][c]]), \
bf(srcs[3].f16[ins->swizzle[3][c]]))); \
}
#define bv2i16(fxn) \
for (unsigned c = 0; c < 2; ++c) { \
dest.f16[c] = fxn(srcs[0].u16[ins->swizzle[0][c]], \
srcs[1].u16[ins->swizzle[1][c]], \
srcs[2].u16[ins->swizzle[2][c]], \
srcs[3].u16[ins->swizzle[3][c]]); \
}
#define bv4i8(fxn) \
for (unsigned c = 0; c < 4; ++c) { \
dest.u8[c] = fxn(srcs[0].u8[ins->swizzle[0][c]], \
srcs[1].u8[ins->swizzle[1][c]], \
srcs[2].u8[ins->swizzle[2][c]], \
srcs[3].u8[ins->swizzle[3][c]]); \
}
#define bf32(fxn) dest.f32 = fxn(srcs[0].f32, srcs[1].f32, srcs[2].f32, srcs[3].f32)
#define bi32(fxn) dest.i32 = fxn(srcs[0].u32, srcs[1].u32, srcs[2].u32, srcs[3].i32)
#define bfloat(fxn64, fxn32) \
if (ins->dest_type == nir_type_float64) { \
unreachable("TODO: 64-bit"); \
} else if (ins->dest_type == nir_type_float32) { \
bf32(fxn64); \
break; \
} else if (ins->dest_type == nir_type_float16) { \
bv2f16(fxn32); \
break; \
}
#define bint(fxn64, fxn32, fxn16, fxn8) \
if (ins->dest_type == nir_type_int64 || ins->dest_type == nir_type_uint64) { \
unreachable("TODO: 64-bit"); \
} else if (ins->dest_type == nir_type_int32 || ins->dest_type == nir_type_uint32) { \
bi32(fxn32); \
break; \
} else if (ins->dest_type == nir_type_int16 || ins->dest_type == nir_type_uint16) { \
bv2i16(fxn16); \
break; \
} else if (ins->dest_type == nir_type_int8 || ins->dest_type == nir_type_uint8) { \
bv4i8(fxn8); \
break; \
}
#define bpoly(name) \
bfloat(bit_f64 ## name, bit_f32 ## name); \
bint(bit_i64 ## name, bit_i32 ## name, bit_i16 ## name, bit_i8 ## name); \
unreachable("Invalid type");
#define bit_make_float_2(name, expr32, expr64) \
static inline double \
bit_f64 ## name(double a, double b, double c, double d) \
{ \
return expr64; \
} \
static inline float \
bit_f32 ## name(float a, float b, float c, float d) \
{ \
return expr32; \
} \
#define bit_make_float(name, expr) \
bit_make_float_2(name, expr, expr)
#define bit_make_int(name, expr) \
static inline int64_t \
bit_i64 ## name (int64_t a, int64_t b, int64_t c, int64_t d) \
{ \
return expr; \
} \
\
static inline int32_t \
bit_i32 ## name (int32_t a, int32_t b, int32_t c, int32_t d) \
{ \
return expr; \
} \
\
static inline int16_t \
bit_i16 ## name (int16_t a, int16_t b, int16_t c, int16_t d) \
{ \
return expr; \
} \
\
static inline int8_t \
bit_i8 ## name (int8_t a, int8_t b, int8_t c, int8_t d) \
{ \
return expr; \
} \
#define bit_make_poly(name, expr) \
bit_make_float(name, expr) \
bit_make_int(name, expr) \
bit_make_poly(add, a + b);
bit_make_int(sub, a - b);
bit_make_float(fma, (a * b) + c);
bit_make_poly(mov, a);
bit_make_poly(min, MIN2(a, b));
bit_make_poly(max, MAX2(a, b));
bit_make_float_2(floor, floorf(a), floor(a));
bit_make_float_2(ceil, ceilf(a), ceil(a));
bit_make_float_2(trunc, truncf(a), trunc(a));
bit_make_float_2(nearbyint, nearbyintf(a), nearbyint(a));
/* Modifiers */
static float
bit_outmod(float raw, enum bifrost_outmod mod)
{
switch (mod) {
case BIFROST_POS:
return MAX2(raw, 0.0);
case BIFROST_SAT_SIGNED:
return CLAMP(raw, -1.0, 1.0);
case BIFROST_SAT:
return SATURATE(raw);
default:
return raw;
}
}
static float
bit_srcmod(float raw, bool abs, bool neg)
{
if (abs)
raw = fabs(raw);
if (neg)
raw = -raw;
return raw;
}
#define BIT_COND(cond, left, right) \
if (cond == BI_COND_LT) return left < right; \
else if (cond == BI_COND_LE) return left <= right; \
else if (cond == BI_COND_GE) return left >= right; \
else if (cond == BI_COND_GT) return left > right; \
else if (cond == BI_COND_EQ) return left == right; \
else if (cond == BI_COND_NE) return left != right; \
else { return true; }
static bool
bit_eval_cond(enum bi_cond cond, bit_t l, bit_t r, nir_alu_type T, unsigned cl, unsigned cr)
{
if (T == nir_type_float32) {
BIT_COND(cond, l.f32, r.f32);
} else if (T == nir_type_float16) {
float left = bf(l.f16[cl]);
float right = bf(r.f16[cr]);
BIT_COND(cond, left, right);
} else if (T == nir_type_int32) {
int32_t left = l.u32;
int32_t right = r.u32;
BIT_COND(cond, left, right);
} else if (T == nir_type_int16) {
int16_t left = l.i16[cl];
int16_t right = r.i16[cr];
BIT_COND(cond, left, right);
} else if (T == nir_type_uint32) {
BIT_COND(cond, l.u32, r.u32);
} else if (T == nir_type_uint16) {
BIT_COND(cond, l.u16[cl], r.u16[cr]);
} else {
unreachable("Unknown type evaluated");
}
}
static unsigned
bit_cmp(enum bi_cond cond, bit_t l, bit_t r, nir_alu_type T, unsigned cl, unsigned cr, bool d3d)
{
bool v = bit_eval_cond(cond, l, r, T, cl, cr);
/* Fill for D3D but only up to 32-bit... 64-bit is only partial
* (although we probably need a cleverer representation for 64-bit) */
unsigned sz = MIN2(nir_alu_type_get_type_size(T), 32);
unsigned max = (sz == 32) ? (~0) : ((1 << sz) - 1);
return v ? (d3d ? max : 1) : 0;
}
static float
biti_special(float Q, enum bi_special_op op)
{
switch (op) {
case BI_SPECIAL_FRCP: return 1.0 / Q;
case BI_SPECIAL_FRSQ: {
double Qf = 1.0 / sqrt(Q);
return Qf;
}
default: unreachable("Invalid special");
}
}
/* For BI_CONVERT. */
#define _AS_ROUNDMODE(mode) \
((mode == BIFROST_RTZ) ? FP_INT_TOWARDZERO : \
(mode == BIFROST_RTE) ? FP_INT_TONEAREST : \
(mode == BIFROST_RTN) ? FP_INT_DOWNWARD : \
FP_INT_UPWARD)
static float
bit_as_float32(nir_alu_type T, bit_t src, unsigned C)
{
switch (T) {
case nir_type_int32: return src.i32;
case nir_type_uint32: return src.u32;
case nir_type_float16: return bf(src.u16[C]);
default: unreachable("Invalid");
}
}
static uint32_t
bit_as_uint32(nir_alu_type T, bit_t src, unsigned C, enum bifrost_roundmode rm)
{
switch (T) {
case nir_type_float16: return bf(src.u16[C]);
case nir_type_float32: return ufromfpf(src.f32, _AS_ROUNDMODE(rm), 32);
default: unreachable("Invalid");
}
}
static int32_t
bit_as_int32(nir_alu_type T, bit_t src, unsigned C, enum bifrost_roundmode rm)
{
switch (T) {
case nir_type_float16: return bf(src.u16[C]);
case nir_type_float32: return fromfpf(src.f32, _AS_ROUNDMODE(rm), 32);
default: unreachable("Invalid");
}
}
static uint16_t
bit_as_float16(nir_alu_type T, bit_t src, unsigned C)
{
switch (T) {
case nir_type_int32: return bh(src.i32);
case nir_type_uint32: return bh(src.u32);
case nir_type_float32: return bh(src.f32);
case nir_type_int16: return bh(src.i16[C]);
case nir_type_uint16: return bh(src.u16[C]);
default: unreachable("Invalid");
}
}
static uint16_t
bit_as_uint16(nir_alu_type T, bit_t src, unsigned C, enum bifrost_roundmode rm)
{
switch (T) {
case nir_type_int32: return src.i32;
case nir_type_uint32: return src.u32;
case nir_type_float16: return ufromfpf(bf(src.u16[C]), _AS_ROUNDMODE(rm), 16);
case nir_type_float32: return src.f32;
default: unreachable("Invalid");
}
}
static int16_t
bit_as_int16(nir_alu_type T, bit_t src, unsigned C, enum bifrost_roundmode rm)
{
switch (T) {
case nir_type_int32: return src.i32;
case nir_type_uint32: return src.u32;
case nir_type_float16: return fromfpf(bf(src.u16[C]), _AS_ROUNDMODE(rm), 16);
case nir_type_float32: return src.f32;
default: unreachable("Invalid");
}
}
static float
frexp_log(float x, int *e)
{
/* Ignore sign until end */
float xa = fabs(x);
/* frexp reduces to [0.5, 1) */
float f = frexpf(xa, e);
/* reduce to [0.75, 1.5) */
if (f < 0.75) {
f *= 2.0;
(*e)--;
}
/* Reattach sign */
if (xa < 0.0)
f = -f;
return f;
}
void
bit_step(struct bit_state *s, bi_instruction *ins, bool FMA)
{
/* First, load sources */
bit_t srcs[BIR_SRC_COUNT] = { 0 };
bi_foreach_src(ins, src)
srcs[src].u64 = bit_read(s, ins, ins->src[src], ins->src_types[src], FMA);
/* Apply source modifiers if we need to */
if (bi_has_source_mods(ins)) {
bi_foreach_src(ins, src) {
if (ins->src_types[src] == nir_type_float16) {
for (unsigned c = 0; c < 2; ++c) {
srcs[src].f16[c] = bh(bit_srcmod(bf(srcs[src].f16[c]),
ins->src_abs[src],
ins->src_neg[src]));
}
} else if (ins->src_types[src] == nir_type_float32) {
srcs[src].f32 = bit_srcmod(srcs[src].f32,
ins->src_abs[src],
ins->src_neg[src]);
}
}
}
/* Next, do the action of the instruction */
bit_t dest = { 0 };
switch (ins->type) {
case BI_ADD:
bpoly(add);
case BI_BRANCH:
unreachable("Unsupported op");
case BI_CMP: {
nir_alu_type T = ins->src_types[0];
unsigned sz = nir_alu_type_get_type_size(T);
if (sz == 32 || sz == 64) {
dest.u32 = bit_cmp(ins->cond, srcs[0], srcs[1], T, 0, 0, true);
} else if (sz == 16) {
for (unsigned c = 0; c < 2; ++c) {
dest.u16[c] = bit_cmp(ins->cond, srcs[0], srcs[1],
T, ins->swizzle[0][c], ins->swizzle[1][c],
true);
}
} else if (sz == 8) {
for (unsigned c = 0; c < 4; ++c) {
dest.u8[c] = bit_cmp(ins->cond, srcs[0], srcs[1],
T, ins->swizzle[0][c], ins->swizzle[1][c],
true);
}
} else {
unreachable("Invalid");
}
break;
}
case BI_BITWISE: {
/* Apply inverts first */
if (ins->bitwise.src1_invert)
srcs[1].u64 = ~srcs[1].u64;
/* TODO: Shifting */
assert(srcs[2].u32 == 0);
if (ins->op.bitwise == BI_BITWISE_AND)
dest.u64 = srcs[0].u64 & srcs[1].u64;
else if (ins->op.bitwise == BI_BITWISE_OR)
dest.u64 = srcs[0].u64 | srcs[1].u64;
else if (ins->op.bitwise == BI_BITWISE_XOR)
dest.u64 = srcs[0].u64 ^ srcs[1].u64;
else
unreachable("Unsupported op");
if (ins->bitwise.dest_invert)
dest.u64 = ~dest.u64;
break;
}
case BI_CONVERT: {
/* If it exists */
unsigned comp = ins->swizzle[0][1];
if (ins->dest_type == nir_type_float32)
dest.f32 = bit_as_float32(ins->src_types[0], srcs[0], comp);
else if (ins->dest_type == nir_type_uint32)
dest.u32 = bit_as_uint32(ins->src_types[0], srcs[0], comp, ins->roundmode);
else if (ins->dest_type == nir_type_int32)
dest.i32 = bit_as_int32(ins->src_types[0], srcs[0], comp, ins->roundmode);
else if (ins->dest_type == nir_type_float16) {
dest.u16[0] = bit_as_float16(ins->src_types[0], srcs[0], ins->swizzle[0][0]);
dest.u16[1] = bit_as_float16(ins->src_types[0], srcs[0], ins->swizzle[0][1]);
} else if (ins->dest_type == nir_type_uint16) {
dest.u16[0] = bit_as_uint16(ins->src_types[0], srcs[0], ins->swizzle[0][0], ins->roundmode);
dest.u16[1] = bit_as_uint16(ins->src_types[0], srcs[0], ins->swizzle[0][1], ins->roundmode);
} else if (ins->dest_type == nir_type_int16) {
dest.i16[0] = bit_as_int16(ins->src_types[0], srcs[0], ins->swizzle[0][0], ins->roundmode);
dest.i16[1] = bit_as_int16(ins->src_types[0], srcs[0], ins->swizzle[0][1], ins->roundmode);
} else {
unreachable("Unknown convert type");
}
break;
}
case BI_CSEL: {
bool direct = ins->cond == BI_COND_ALWAYS;
unsigned sz = nir_alu_type_get_type_size(ins->src_types[0]);
if (sz == 32) {
bool cond = direct ? srcs[0].u32 :
bit_eval_cond(ins->cond, srcs[0], srcs[1], ins->src_types[0], 0, 0);
dest = cond ? srcs[2] : srcs[3];
} else if (sz == 16) {
for (unsigned c = 0; c < 2; ++c) {
bool cond = direct ? srcs[0].u16[c] :
bit_eval_cond(ins->cond, srcs[0], srcs[1], ins->src_types[0], c, c);
dest.u16[c] = cond ? srcs[2].u16[c] : srcs[3].u16[c];
}
} else {
unreachable("Remaining types todo");
}
break;
}
case BI_FMA: {
bfloat(bit_f64fma, bit_f32fma);
unreachable("Unknown type");
}
case BI_FREXP: {
if (ins->src_types[0] != nir_type_float32)
unreachable("Unknown frexp type");
if (ins->op.frexp == BI_FREXPE_LOG)
frexp_log(srcs[0].f32, &dest.i32);
else
unreachable("Unknown frexp");
break;
}
case BI_IMATH: {
if (ins->op.imath == BI_IMATH_ADD) {
bint(bit_i64add, bit_i32add, bit_i16add, bit_i8add);
} else if (ins->op.imath == BI_IMATH_SUB) {
bint(bit_i64sub, bit_i32sub, bit_i16sub, bit_i8sub);
} else {
unreachable("Unsupported op");
}
break;
}
case BI_MINMAX: {
if (ins->op.minmax == BI_MINMAX_MIN) {
bpoly(min);
} else {
bpoly(max);
}
}
case BI_MOV:
bpoly(mov);
case BI_REDUCE_FMA: {
if (ins->src_types[0] != nir_type_float32)
unreachable("Unknown reduce type");
if (ins->op.reduce == BI_REDUCE_ADD_FREXPM) {
int _nop = 0;
float f = frexp_log(srcs[1].f32, &_nop);
dest.f32 = srcs[0].f32 + f;
} else {
unreachable("Unknown reduce");
}
break;
}
case BI_SPECIAL_FMA:
case BI_SPECIAL_ADD: {
assert(nir_alu_type_get_base_type(ins->dest_type) == nir_type_float);
assert(ins->dest_type != nir_type_float64);
if (ins->op.special == BI_SPECIAL_EXP2_LOW) {
assert(ins->dest_type == nir_type_float32);
dest.f32 = exp2f(srcs[1].f32);
break;
}
float Q = (ins->dest_type == nir_type_float16) ?
bf(srcs[0].u16[ins->swizzle[0][0]]) :
srcs[0].f32;
float R = biti_special(Q, ins->op.special);
if (ins->dest_type == nir_type_float16) {
dest.f16[0] = bh(R);
if (!ins->swizzle[0][0] && ins->op.special == BI_SPECIAL_FRSQ) {
/* Sorry. */
dest.f16[0]++;
}
} else {
dest.f32 = R;
}
break;
}
case BI_TABLE: {
if (ins->op.table == BI_TABLE_LOG2_U_OVER_U_1_LOW) {
assert(ins->dest_type == nir_type_float32);
int _nop = 0;
float f = frexp_log(srcs[0].f32, &_nop);
dest.f32 = log2f(f) / (f - 1.0);
dest.u32++; /* Sorry. */
} else {
unreachable("Unknown table op");
}
break;
}
case BI_SELECT: {
if (ins->src_types[0] == nir_type_uint16) {
for (unsigned c = 0; c < 2; ++c)
dest.u16[c] = srcs[c].u16[ins->swizzle[c][0]];
} else if (ins->src_types[0] == nir_type_uint8) {
for (unsigned c = 0; c < 4; ++c)
dest.u8[c] = srcs[c].u8[ins->swizzle[c][0]];
} else {
unreachable("Unknown type");
}
break;
}
case BI_ROUND: {
if (ins->roundmode == BIFROST_RTP) {
bfloat(bit_f64ceil, bit_f32ceil);
} else if (ins->roundmode == BIFROST_RTN) {
bfloat(bit_f64floor, bit_f32floor);
} else if (ins->roundmode == BIFROST_RTE) {
bfloat(bit_f64nearbyint, bit_f32nearbyint);
} else if (ins->roundmode == BIFROST_RTZ) {
bfloat(bit_f64trunc, bit_f32trunc);
} else
unreachable("Invalid");
break;
}
/* We only interpret vertex shaders */
case BI_DISCARD:
case BI_LOAD_VAR:
case BI_ATEST:
case BI_BLEND:
unreachable("Fragment op used in interpreter");
/* Modeling main memory is more than I bargained for */
case BI_LOAD_UNIFORM:
case BI_LOAD_ATTR:
case BI_LOAD_VAR_ADDRESS:
case BI_LOAD:
case BI_STORE:
case BI_STORE_VAR:
case BI_TEXS:
case BI_TEXC:
case BI_TEXC_DUAL:
unreachable("Unsupported I/O in interpreter");
default:
unreachable("Unsupported op");
}
/* Apply _MSCALE */
if ((ins->type == BI_FMA || ins->type == BI_ADD) && ins->op.mscale) {
unsigned idx = (ins->type == BI_FMA) ? 3 : 2;
assert(ins->src_types[idx] == nir_type_int32);
assert(ins->dest_type == nir_type_float32);
int32_t scale = srcs[idx].i32;
dest.f32 *= exp2f(scale);
}
/* Apply outmod */
if (bi_has_outmod(ins) && ins->outmod != BIFROST_NONE) {
if (ins->dest_type == nir_type_float16) {
for (unsigned c = 0; c < 2; ++c)
dest.f16[c] = bh(bit_outmod(bf(dest.f16[c]), ins->outmod));
} else {
dest.f32 = bit_outmod(dest.f32, ins->outmod);
}
}
/* Finally, store the result */
bit_write(s, ins->dest, ins->dest_type, dest, FMA);
/* For ADD - change out the passthrough */
if (!FMA) {
s->T0 = s->T;
s->T1 = dest.u32;
}
}
#undef bh
#undef bf

View file

@ -1,254 +0,0 @@
/*
* Copyright (C) 2020 Collabora Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors (Collabora):
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
*/
#include "bit.h"
#include "panfrost/lib/decode.h"
#include "drm-uapi/panfrost_drm.h"
#include "panfrost/lib/pan_encoder.h"
/* Standalone compiler tests submitting jobs directly to the hardware. Uses the
* `bit` prefix for `BIfrost Tests` and because bit sounds wicked cool. */
static struct panfrost_bo *
bit_bo_create(struct panfrost_device *dev, size_t size)
{
struct panfrost_bo *bo = panfrost_bo_create(dev, size, PAN_BO_EXECUTE);
pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL);
return bo;
}
struct panfrost_device *
bit_initialize(void *memctx)
{
int fd = drmOpenWithType("panfrost", NULL, DRM_NODE_RENDER);
if (fd < 0)
unreachable("No panfrost device found. Try chmod?");
struct panfrost_device *dev = rzalloc(memctx, struct panfrost_device);
panfrost_open_device(memctx, fd, dev);
pandecode_initialize(true);
printf("%X\n", dev->gpu_id);
return dev;
}
static bool
bit_submit(struct panfrost_device *dev,
enum mali_job_type T,
void *payload, size_t payload_size,
struct panfrost_bo **bos, size_t bo_count, enum bit_debug debug)
{
struct panfrost_bo *job = bit_bo_create(dev, 4096);
pan_pack(job->ptr.cpu, JOB_HEADER, cfg) {
cfg.type = T;
cfg.index = 1;
}
memcpy(job->ptr.cpu + MALI_JOB_HEADER_LENGTH, payload, payload_size);
uint32_t *bo_handles = calloc(sizeof(uint32_t), bo_count);
for (unsigned i = 0; i < bo_count; ++i)
bo_handles[i] = bos[i]->gem_handle;
uint32_t syncobj = 0;
int ret = 0;
ret = drmSyncobjCreate(dev->fd, DRM_SYNCOBJ_CREATE_SIGNALED, &syncobj);
assert(!ret);
struct drm_panfrost_submit submit = {
.jc = job->ptr.gpu,
.bo_handles = (uintptr_t) bo_handles,
.bo_handle_count = bo_count,
.out_sync = syncobj,
};
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
assert(!ret);
free(bo_handles);
drmSyncobjWait(dev->fd, &syncobj, 1, INT64_MAX, 0, NULL);
if (debug >= BIT_DEBUG_ALL)
pandecode_jc(submit.jc, true, dev->gpu_id, false);
return true;
}
/* Checks that the device is alive and responding to basic jobs as a sanity
* check - prerequisite to running code on the device. We test this via a
* WRITE_VALUE job */
bool
bit_sanity_check(struct panfrost_device *dev)
{
struct panfrost_bo *scratch = bit_bo_create(dev, 65536);
((uint32_t *) scratch->ptr.cpu)[0] = 0xAA;
struct mali_write_value_job_payload_packed payload;
pan_pack(&payload, WRITE_VALUE_JOB_PAYLOAD, cfg) {
cfg.address = scratch->ptr.gpu;
cfg.type = MALI_WRITE_VALUE_TYPE_ZERO;
};
struct panfrost_bo *bos[] = { scratch };
bool success = bit_submit(dev, MALI_JOB_TYPE_WRITE_VALUE,
&payload, sizeof(payload), bos, 1, false);
return success && (((uint8_t *) scratch->ptr.cpu)[0] == 0x0);
}
/* Constructs a vertex job */
bool
bit_vertex(struct panfrost_device *dev, panfrost_program *prog,
uint32_t *iubo, size_t sz_ubo,
uint32_t *iattr, size_t sz_attr,
uint32_t *expected, size_t sz_expected, enum bit_debug debug)
{
struct panfrost_bo *shader = bit_bo_create(dev, prog->compiled.size);
struct panfrost_bo *shader_desc = bit_bo_create(dev, 4096);
struct panfrost_bo *ubo = bit_bo_create(dev, 4096);
struct panfrost_bo *var = bit_bo_create(dev, 4096);
struct panfrost_bo *attr = bit_bo_create(dev, 4096);
pan_pack(attr->ptr.cpu, ATTRIBUTE, cfg) {
cfg.format = (MALI_RGBA32UI << 12);
cfg.offset_enable = true;
}
pan_pack(var->ptr.cpu, ATTRIBUTE, cfg) {
cfg.format = (MALI_RGBA32UI << 12);
cfg.offset_enable = false;
}
pan_pack(var->ptr.cpu + 256, ATTRIBUTE_BUFFER, cfg) {
cfg.pointer = (var->ptr.gpu + 1024);
cfg.size = 1024;
}
pan_pack(attr->ptr.cpu + 256, ATTRIBUTE_BUFFER, cfg) {
cfg.pointer = (attr->ptr.gpu + 1024);
cfg.size = 1024;
}
pan_pack(ubo->ptr.cpu, UNIFORM_BUFFER, cfg) {
cfg.entries = sz_ubo / 16;
cfg.pointer = ubo->ptr.gpu + 1024;
}
if (sz_ubo)
memcpy(ubo->ptr.cpu + 1024, iubo, sz_ubo);
if (sz_attr)
memcpy(attr->ptr.cpu + 1024, iattr, sz_attr);
struct panfrost_bo *shmem = bit_bo_create(dev, 4096);
pan_pack(shmem->ptr.cpu, LOCAL_STORAGE, cfg) {
cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
}
pan_pack(shader_desc->ptr.cpu, RENDERER_STATE, cfg) {
cfg.shader.shader = shader->ptr.gpu;
cfg.shader.attribute_count = cfg.shader.varying_count = 1;
cfg.properties.uniform_buffer_count = 1;
cfg.properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
cfg.preload.vertex.vertex_id = true;
cfg.preload.vertex.instance_id = true;
cfg.preload.uniform_count = (sz_ubo / 16);
}
memcpy(shader->ptr.cpu, prog->compiled.data, prog->compiled.size);
struct mali_compute_job_packed job;
pan_section_pack(&job, COMPUTE_JOB, PARAMETERS, cfg) {
cfg.job_task_split = 5;
}
pan_section_pack(&job, COMPUTE_JOB, DRAW, cfg) {
cfg.draw_descriptor_is_64b = true;
cfg.thread_storage = shmem->ptr.gpu;
cfg.state = shader_desc->ptr.gpu;
cfg.push_uniforms = ubo->ptr.gpu + 1024;
cfg.uniform_buffers = ubo->ptr.gpu;
cfg.attributes = attr->ptr.gpu;
cfg.attribute_buffers = attr->ptr.gpu + 256;
cfg.varyings = var->ptr.gpu;
cfg.varying_buffers = var->ptr.gpu + 256;
}
void *invocation = pan_section_ptr(&job, COMPUTE_JOB, INVOCATION);
panfrost_pack_work_groups_compute(invocation,
1, 1, 1,
1, 1, 1,
true);
struct panfrost_bo *bos[] = {
shmem, shader, shader_desc, ubo, var, attr
};
bool succ = bit_submit(dev, MALI_JOB_TYPE_VERTEX,
((void *)&job) + MALI_JOB_HEADER_LENGTH,
MALI_COMPUTE_JOB_LENGTH - MALI_JOB_HEADER_LENGTH,
bos, ARRAY_SIZE(bos), debug);
/* Check the output varyings */
uint32_t *output = (uint32_t *) (var->ptr.cpu + 1024);
float *foutput = (float *) output;
float *fexpected = (float *) expected;
if (sz_expected) {
unsigned comp = memcmp(output, expected, sz_expected);
succ &= (comp == 0);
if (comp && (debug >= BIT_DEBUG_FAIL)) {
fprintf(stderr, "expected [");
for (unsigned i = 0; i < (sz_expected >> 2); ++i)
fprintf(stderr, "%08X /* %f */ ", expected[i], fexpected[i]);
fprintf(stderr, "], got [");
for (unsigned i = 0; i < (sz_expected >> 2); ++i)
fprintf(stderr, "%08X /* %f */ ", output[i], foutput[i]);
fprintf(stderr, "\n");
}
} else if (debug == BIT_DEBUG_ALL) {
fprintf(stderr, "got [");
for (unsigned i = 0; i < 4; ++i)
fprintf(stderr, "%08X /* %f */ ", output[i], foutput[i]);
fprintf(stderr, "\n");
}
return succ;
}

View file

@ -1,654 +0,0 @@
/*
* Copyright (C) 2020 Collabora Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors (Collabora):
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
*/
#include "bit.h"
#include "bi_print.h"
#include "util/half_float.h"
#include "bifrost/disassemble.h"
/* Instruction packing tests */
static void
bit_test_single(struct panfrost_device *dev,
bi_instruction *ins,
uint32_t input[4],
bool fma, enum bit_debug debug)
{
/* First, simulate the instruction */
struct bit_state s = { 0 };
memcpy(s.r, input, 16);
bit_step(&s, ins, fma);
/* Next, wrap it up and pack it */
bi_instruction ldubo = {
.type = BI_LOAD_UNIFORM,
.segment = BI_SEGMENT_UBO,
.src = {
BIR_INDEX_CONSTANT,
BIR_INDEX_ZERO
},
.src_types = {
nir_type_uint32,
nir_type_uint32,
},
.dest = BIR_INDEX_REGISTER | 0,
.dest_type = nir_type_uint32,
.vector_channels = 4,
};
bi_instruction ldva = {
.type = BI_LOAD_VAR_ADDRESS,
.vector_channels = 3,
.dest = BIR_INDEX_REGISTER | 32,
.dest_type = nir_type_uint32,
.format = nir_type_uint32,
.src = {
BIR_INDEX_CONSTANT,
BIR_INDEX_REGISTER | 61,
BIR_INDEX_REGISTER | 62,
0,
},
.src_types = {
nir_type_uint32,
nir_type_uint32,
nir_type_uint32,
nir_type_uint32,
}
};
bi_instruction st = {
.type = BI_STORE_VAR,
.src = {
BIR_INDEX_REGISTER | 0,
ldva.dest, ldva.dest + 1, ldva.dest + 2,
},
.src_types = {
nir_type_uint32,
nir_type_uint32, nir_type_uint32, nir_type_uint32,
},
.vector_channels = 4
};
bi_context *ctx = rzalloc(NULL, bi_context);
ctx->stage = MESA_SHADER_VERTEX;
bi_block *blk = rzalloc(ctx, bi_block);
blk->scheduled = true;
blk->base.predecessors = _mesa_set_create(blk,
_mesa_hash_pointer,
_mesa_key_pointer_equal);
list_inithead(&ctx->blocks);
list_addtail(&blk->base.link, &ctx->blocks);
list_inithead(&blk->clauses);
bi_clause *clauses[4] = {
rzalloc(ctx, bi_clause),
rzalloc(ctx, bi_clause),
rzalloc(ctx, bi_clause),
rzalloc(ctx, bi_clause)
};
for (unsigned i = 0; i < 4; ++i) {
clauses[i]->bundle_count = 1;
list_addtail(&clauses[i]->link, &blk->clauses);
clauses[i]->scoreboard_id = (i & 1);
if (i) {
clauses[i]->dependencies = 1 << (~i & 1);
clauses[i]->staging_barrier = true;
}
}
clauses[0]->bundles[0].add = &ldubo;
clauses[0]->message_type = BIFROST_MESSAGE_ATTRIBUTE;
if (fma)
clauses[1]->bundles[0].fma = ins;
else
clauses[1]->bundles[0].add = ins;
clauses[0]->constant_count = 1;
clauses[1]->constant_count = 1;
clauses[1]->constants[0] = ins->constant.u64;
clauses[2]->bundles[0].add = &ldva;
clauses[3]->bundles[0].add = &st;
clauses[2]->message_type = BIFROST_MESSAGE_ATTRIBUTE;
clauses[3]->message_type = BIFROST_MESSAGE_STORE;
panfrost_program prog = { 0 };
util_dynarray_init(&prog.compiled, NULL);
bi_pack(ctx, &prog.compiled);
bool succ = bit_vertex(dev, &prog, input, 16, NULL, 0,
s.r, 16, debug);
if (debug >= BIT_DEBUG_ALL || (!succ && debug >= BIT_DEBUG_FAIL)) {
bi_print_shader(ctx, stderr);
disassemble_bifrost(stderr, prog.compiled.data, prog.compiled.size, true);
}
if (!succ)
fprintf(stderr, "FAIL\n");
}
/* Utilities for generating tests */
static void
bit_generate_float4(float *mem)
{
for (unsigned i = 0; i < 4; ++i)
mem[i] = (float) ((rand() & 255) - 127) / 16.0;
}
static void
bit_generate_half8(uint16_t *mem)
{
for (unsigned i = 0; i < 8; ++i)
mem[i] = _mesa_float_to_half(((float) (rand() & 255) - 127) / 16.0);
}
static bi_instruction
bit_ins(enum bi_class C, unsigned argc, nir_alu_type base, unsigned size)
{
nir_alu_type T = base | size;
bi_instruction ins = {
.type = C,
.dest = BIR_INDEX_REGISTER | 0,
.dest_type = T,
};
for (unsigned i = 0; i < argc; ++i) {
ins.src[i] = BIR_INDEX_REGISTER | i;
ins.src_types[i] = T;
}
return ins;
}
#define BIT_FOREACH_SWIZZLE(swz, args, sz) \
for (unsigned swz = 0; swz < ((sz == 16) ? (1 << (2 * args)) : 1); ++swz)
static void
bit_apply_swizzle(bi_instruction *ins, unsigned swz, unsigned args, unsigned sz)
{
unsigned slots_per_arg = (sz == 16) ? 4 : 1;
unsigned slots_per_chan = (sz == 16) ? 1 : 0;
unsigned mask = (sz == 16) ? 1 : 0;
for (unsigned i = 0; i < args; ++i) {
for (unsigned j = 0; j < (32 / sz); ++j) {
ins->swizzle[i][j] = ((swz >> (slots_per_arg * i)) >> (slots_per_chan * j)) & mask;
}
}
}
/* Tests all 64 combinations of floating point modifiers for a given
* instruction / floating-type / test type */
static void
bit_fmod_helper(struct panfrost_device *dev,
enum bi_class c, unsigned size, bool fma,
uint32_t *input, enum bit_debug debug, unsigned op)
{
bi_instruction ins = bit_ins(c, 2, nir_type_float, size);
bool fp16 = (size == 16);
bool has_outmods = fma || !fp16;
for (unsigned outmod = 0; outmod < (has_outmods ? 4 : 1); ++outmod) {
BIT_FOREACH_SWIZZLE(swz, 2, size) {
for (unsigned inmod = 0; inmod < 16; ++inmod) {
ins.outmod = outmod;
ins.op.minmax = op;
ins.src_abs[0] = (inmod & 0x1);
ins.src_abs[1] = (inmod & 0x2);
ins.src_neg[0] = (inmod & 0x4);
ins.src_neg[1] = (inmod & 0x8);
bit_apply_swizzle(&ins, swz, 2, size);
bit_test_single(dev, &ins, input, fma, debug);
}
}
}
}
static void
bit_fma_helper(struct panfrost_device *dev,
unsigned size, uint32_t *input, enum bit_debug debug)
{
bi_instruction ins = bit_ins(BI_FMA, 3, nir_type_float, size);
for (unsigned outmod = 0; outmod < 4; ++outmod) {
for (unsigned inmod = 0; inmod < 8; ++inmod) {
ins.outmod = outmod;
ins.src_neg[0] = (inmod & 0x1);
ins.src_neg[1] = (inmod & 0x2);
ins.src_neg[2] = (inmod & 0x4);
bit_test_single(dev, &ins, input, true, debug);
}
}
}
static void
bit_fma_mscale_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
{
bi_instruction ins = bit_ins(BI_FMA, 4, nir_type_float, 32);
ins.op.mscale = true;
ins.src_types[3] = nir_type_int32;
ins.src[2] = ins.src[3]; /* Not enough ports! */
for (unsigned outmod = 0; outmod < 4; ++outmod) {
for (unsigned inmod = 0; inmod < 8; ++inmod) {
ins.outmod = outmod;
ins.src_abs[0] = (inmod & 0x1);
ins.src_neg[1] = (inmod & 0x2);
ins.src_neg[2] = (inmod & 0x4);
bit_test_single(dev, &ins, input, true, debug);
}
}
}
static void
bit_csel_helper(struct panfrost_device *dev,
unsigned size, uint32_t *input, enum bit_debug debug)
{
bi_instruction ins = bit_ins(BI_CSEL, 4, nir_type_uint, size);
/* SCHEDULER: We can only read 3 registers at once. */
ins.src[2] = ins.src[0];
for (enum bi_cond cond = BI_COND_LT; cond <= BI_COND_NE; ++cond) {
ins.cond = cond;
bit_test_single(dev, &ins, input, true, debug);
}
}
static void
bit_special_helper(struct panfrost_device *dev,
unsigned size, uint32_t *input, enum bit_debug debug)
{
bi_instruction ins = bit_ins(BI_SPECIAL_ADD, 2, nir_type_float, size);
uint32_t exp_input[4];
for (enum bi_special_op op = BI_SPECIAL_FRCP; op <= BI_SPECIAL_EXP2_LOW; ++op) {
if (op == BI_SPECIAL_EXP2_LOW) {
/* exp2 only supported in fp32 mode */
if (size != 32)
continue;
/* Give expected input */
exp_input[1] = input[0];
float *ff = (float *) input;
exp_input[0] = (int) (ff[0] * (1 << 24));
}
for (unsigned c = 0; c < ((size == 16) ? 2 : 1); ++c) {
ins.op.special = op;
ins.swizzle[0][0] = c;
bit_test_single(dev, &ins,
op == BI_SPECIAL_EXP2_LOW ? exp_input : input,
false, debug);
}
}
}
static void
bit_table_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
{
bi_instruction ins = bit_ins(BI_TABLE, 1, nir_type_float, 32);
for (enum bi_table_op op = 0; op <= BI_TABLE_LOG2_U_OVER_U_1_LOW; ++op) {
ins.op.table = op;
bit_test_single(dev, &ins, input, false, debug);
}
}
static void
bit_frexp_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
{
bi_instruction ins = bit_ins(BI_FREXP, 1, nir_type_float, 32);
ins.dest_type = nir_type_int32;
for (enum bi_frexp_op op = 0; op <= BI_FREXPE_LOG; ++op) {
ins.op.frexp = op;
bit_test_single(dev, &ins, input, true, debug);
}
}
static void
bit_round_helper(struct panfrost_device *dev, uint32_t *input, unsigned sz, bool FMA, enum bit_debug debug)
{
bi_instruction ins = bit_ins(BI_ROUND, 1, nir_type_float, sz);
for (enum bifrost_roundmode mode = 0; mode <= 3; ++mode) {
BIT_FOREACH_SWIZZLE(swz, 1, sz) {
bit_apply_swizzle(&ins, swz, 1, sz);
ins.roundmode = mode;
bit_test_single(dev, &ins, input, FMA, debug);
}
}
}
static void
bit_reduce_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
{
bi_instruction ins = bit_ins(BI_REDUCE_FMA, 2, nir_type_float, 32);
for (enum bi_reduce_op op = 0; op <= BI_REDUCE_ADD_FREXPM; ++op) {
ins.op.reduce = op;
bit_test_single(dev, &ins, input, true, debug);
}
}
static void
bit_select_helper(struct panfrost_device *dev, uint32_t *input, unsigned size, enum bit_debug debug)
{
unsigned C = 32 / size;
bi_instruction ins = bit_ins(BI_SELECT, C, nir_type_uint, 32);
for (unsigned c = 0; c < C; ++c)
ins.src_types[c] = nir_type_uint | size;
if (size == 8) {
/* SCHEDULER: We can only read 3 registers at once. */
ins.src[2] = ins.src[0];
}
/* Each argument has swizzle {lo, hi} so 2^C options */
unsigned hi = (size == 16) ? 1 : 2;
for (unsigned add = 0; add < ((size == 16) ? 2 : 1); ++add) {
for (unsigned swizzle = 0; swizzle < (1 << C); ++swizzle) {
for (unsigned i = 0; i < C; ++i)
ins.swizzle[i][0] = ((swizzle >> i) & 1) ? hi : 0;
bit_test_single(dev, &ins, input, !add, debug);
}
}
}
static void
bit_fcmp_helper(struct panfrost_device *dev, uint32_t *input, unsigned size, enum bit_debug debug, bool FMA)
{
bi_instruction ins = bit_ins(BI_CMP, 2, nir_type_float, size);
ins.dest_type = nir_type_uint | size;
/* 16-bit has swizzles and abs. 32-bit has abs/neg mods. */
unsigned max_mods = (size == 16) ? 64 : (size == 32) ? 16 : 1;
for (enum bi_cond cond = BI_COND_LT; cond <= BI_COND_NE; ++cond) {
for (unsigned mods = 0; mods < max_mods; ++mods) {
ins.cond = cond;
if (size == 16) {
for (unsigned i = 0; i < 2; ++i) {
ins.swizzle[i][0] = ((mods >> (i * 2)) & 1) ? 1 : 0;
ins.swizzle[i][1] = ((mods >> (i * 2)) & 2) ? 1 : 0;
}
ins.src_abs[0] = (mods & 16) ? true : false;
ins.src_abs[1] = (mods & 32) ? true : false;
} else if (size == 8) {
for (unsigned i = 0; i < 2; ++i) {
for (unsigned j = 0; j < 4; ++j)
ins.swizzle[i][j] = j;
}
} else if (size == 32) {
ins.src_abs[0] = (mods & 1) ? true : false;
ins.src_abs[1] = (mods & 2) ? true : false;
ins.src_neg[0] = (mods & 4) ? true : false;
ins.src_neg[1] = (mods & 8) ? true : false;
}
bit_test_single(dev, &ins, input, FMA, debug);
}
}
}
static void
bit_icmp_helper(struct panfrost_device *dev, uint32_t *input, unsigned size, nir_alu_type T, enum bit_debug debug)
{
bi_instruction ins = bit_ins(BI_CMP, 2, T, size);
ins.dest_type = nir_type_uint | size;
for (enum bi_cond cond = BI_COND_LT; cond <= BI_COND_NE; ++cond) {
BIT_FOREACH_SWIZZLE(swz, 2, size) {
ins.cond = cond;
bit_apply_swizzle(&ins, swz, 2, size);
bit_test_single(dev, &ins, input, false, debug);
}
}
}
static void
bit_convert_helper(struct panfrost_device *dev, unsigned from_size,
unsigned to_size, unsigned cx, unsigned cy, bool FMA,
enum bifrost_roundmode roundmode,
uint32_t *input, enum bit_debug debug)
{
bi_instruction ins = {
.type = BI_CONVERT,
.dest = BIR_INDEX_REGISTER | 0,
.src = { BIR_INDEX_REGISTER | 0 }
};
nir_alu_type Ts[3] = { nir_type_float, nir_type_uint, nir_type_int };
for (unsigned from_base = 0; from_base < 3; ++from_base) {
for (unsigned to_base = 0; to_base < 3; ++to_base) {
/* Discard invalid combinations.. */
if ((from_size == to_size) && (from_base == to_base))
continue;
/* Can't switch signedness */
if (from_base && to_base)
continue;
/* No F16_TO_I32, etc */
if (from_size != to_size && from_base == 0 && to_base)
continue;
if (from_size != to_size && from_base && to_base == 0)
continue;
/* No need, just ignore the upper half */
if (from_size > to_size && from_base == to_base && from_base)
continue;
ins.dest_type = Ts[to_base] | to_size;
ins.src_types[0] = Ts[from_base] | from_size;
ins.roundmode = roundmode;
ins.swizzle[0][0] = cx;
ins.swizzle[0][1] = cy;
if (to_size == 16 && from_size == 32) {
ins.src_types[1] = ins.src_types[0];
ins.src[1] = ins.src[0];
} else {
ins.src[1] = ins.src_types[1] = 0;
}
bit_test_single(dev, &ins, input, FMA, debug);
}
}
}
static void
bit_constant_helper(struct panfrost_device *dev,
uint32_t *input, enum bit_debug debug)
{
enum bi_class C[3] = { BI_MOV, BI_ADD, BI_FMA };
for (unsigned doubled = 0; doubled < 2; ++doubled) {
for (unsigned count = 1; count <= 3; ++count) {
bi_instruction ins = bit_ins(C[count - 1], count, nir_type_float, 32);
ins.src[0] = BIR_INDEX_CONSTANT | 0;
ins.src[1] = (count >= 2) ? BIR_INDEX_CONSTANT | (doubled ? 32 : 0) : 0;
ins.src[2] = (count >= 3) ? BIR_INDEX_ZERO : 0;
ins.constant.u64 = doubled ?
0x3f800000ull | (0x3f000000ull << 32ull) :
0x3f800000ull;
bit_test_single(dev, &ins, input, true, debug);
}
}
}
static void
bit_swizzle_identity(bi_instruction *ins, unsigned args, unsigned size)
{
for (unsigned i = 0; i < 2; ++i) {
for (unsigned j = 0; j < (32 / size); ++j)
ins->swizzle[i][j] = j;
}
}
static void
bit_bitwise_helper(struct panfrost_device *dev, uint32_t *input, unsigned size, enum bit_debug debug)
{
bi_instruction ins = bit_ins(BI_BITWISE, 3, nir_type_uint, size);
bit_swizzle_identity(&ins, 2, size);
/* TODO: shifts */
ins.src[2] = BIR_INDEX_ZERO;
ins.src_types[2] = nir_type_uint8;
for (unsigned op = BI_BITWISE_AND; op <= BI_BITWISE_XOR; ++op) {
ins.op.bitwise = op;
for (unsigned mods = 0; mods < 4; ++mods) {
ins.bitwise.dest_invert = mods & 1;
ins.bitwise.src1_invert = mods & 2;
/* Skip out-of-spec combinations */
if (ins.bitwise.src1_invert && op == BI_BITWISE_XOR)
continue;
bit_test_single(dev, &ins, input, true, debug);
}
}
}
static void
bit_imath_helper(struct panfrost_device *dev, uint32_t *input, unsigned size, enum bit_debug debug, bool FMA)
{
bi_instruction ins = bit_ins(BI_IMATH, 2, nir_type_uint, size);
bit_swizzle_identity(&ins, 2, size);
ins.src[2] = BIR_INDEX_ZERO; /* carry/borrow for FMA */
for (unsigned op = BI_IMATH_ADD; op <= BI_IMATH_SUB; ++op) {
ins.op.imath = op;
bit_test_single(dev, &ins, input, FMA, debug);
}
}
void
bit_packing(struct panfrost_device *dev, enum bit_debug debug)
{
float input32[4];
uint16_t input16[8];
bit_generate_float4(input32);
bit_generate_half8(input16);
bit_constant_helper(dev, (uint32_t *) input32, debug);
for (unsigned sz = 16; sz <= 32; sz *= 2) {
uint32_t *input =
(sz == 16) ? (uint32_t *) input16 :
(uint32_t *) input32;
bit_fmod_helper(dev, BI_ADD, sz, true, input, debug, 0);
bit_fmod_helper(dev, BI_ADD, sz, false, input, debug, 0);
bit_round_helper(dev, (uint32_t *) input32, sz, true, debug);
bit_fmod_helper(dev, BI_MINMAX, sz, false, input, debug, BI_MINMAX_MIN);
bit_fmod_helper(dev, BI_MINMAX, sz, false, input, debug, BI_MINMAX_MAX);
bit_fma_helper(dev, sz, input, debug);
bit_icmp_helper(dev, input, sz, nir_type_uint, debug);
bit_icmp_helper(dev, input, sz, nir_type_int, debug);
}
for (unsigned sz = 16; sz <= 32; sz *= 2)
bit_csel_helper(dev, sz, (uint32_t *) input32, debug);
float special[4] = { 0.9 };
uint32_t special16[4] = { _mesa_float_to_half(special[0]) | (_mesa_float_to_half(0.2) << 16) };
bit_table_helper(dev, (uint32_t *) special, debug);
for (unsigned sz = 16; sz <= 32; sz *= 2) {
uint32_t *input =
(sz == 16) ? special16 :
(uint32_t *) special;
bit_special_helper(dev, sz, input, debug);
}
for (unsigned rm = 0; rm < 4; ++rm) {
bit_convert_helper(dev, 32, 32, 0, 0, false, rm, (uint32_t *) input32, debug);
for (unsigned c = 0; c < 2; ++c)
bit_convert_helper(dev, 32, 16, c, 0, false, rm, (uint32_t *) input32, debug);
bit_convert_helper(dev, 16, 32, 0, 0, false, rm, (uint32_t *) input16, debug);
for (unsigned c = 0; c < 4; ++c)
bit_convert_helper(dev, 16, 16, c & 1, c >> 1, false, rm, (uint32_t *) input16, debug);
}
bit_frexp_helper(dev, (uint32_t *) input32, debug);
bit_reduce_helper(dev, (uint32_t *) input32, debug);
uint32_t mscale_input[4];
memcpy(mscale_input, input32, sizeof(input32));
mscale_input[3] = 0x7;
bit_fma_mscale_helper(dev, mscale_input, debug);
for (unsigned sz = 8; sz <= 16; sz *= 2) {
bit_select_helper(dev, (uint32_t *) input32, sz, debug);
}
bit_fcmp_helper(dev, (uint32_t *) input32, 32, debug, true);
bit_fcmp_helper(dev, (uint32_t *) input32, 16, debug, true);
for (unsigned sz = 8; sz <= 32; sz *= 2) {
bit_bitwise_helper(dev, (uint32_t *) input32, sz, debug);
bit_imath_helper(dev, (uint32_t *) input32, sz, debug, false);
}
bit_imath_helper(dev, (uint32_t *) input32, 32, debug, true);
}

View file

@ -1,74 +0,0 @@
/*
* Copyright (C) 2020 Collabora Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors (Collabora):
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
*/
#ifndef __BIFROST_TEST_H
#define __BIFROST_TEST_H
#include "panfrost/lib/midgard_pack.h"
#include "panfrost/lib/pan_device.h"
#include "panfrost/lib/pan_bo.h"
#include "bifrost_compile.h"
#include "bifrost/compiler.h"
struct panfrost_device *
bit_initialize(void *memctx);
bool bit_sanity_check(struct panfrost_device *dev);
enum bit_debug {
BIT_DEBUG_NONE = 0,
BIT_DEBUG_FAIL,
BIT_DEBUG_ALL
};
bool
bit_vertex(struct panfrost_device *dev, panfrost_program *prog,
uint32_t *iubo, size_t sz_ubo,
uint32_t *iattr, size_t sz_attr,
uint32_t *expected, size_t sz_expected, enum bit_debug debug);
/* BIT interpreter */
struct bit_state {
/* Work registers */
uint32_t r[64];
/* Passthrough within the bundle */
uint32_t T;
/* Passthrough from last bundle */
uint32_t T0;
uint32_t T1;
};
void
bit_step(struct bit_state *s, bi_instruction *ins, bool FMA);
void bit_packing(struct panfrost_device *dev, enum bit_debug debug);
#endif

View file

@ -35,9 +35,6 @@ subdir('lib')
files_bifrost = files(
'bifrost/cmdline.c',
'bifrost/test/bi_submit.c',
'bifrost/test/bi_interpret.c',
'bifrost/test/bi_test_pack.c',
)
bifrost_compiler = executable(