mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-09 17:00:13 +01:00
pan/bi: Drop on-board packing tests
These tests were designed before having access to canonical information about the hardware and thus had two purposes: * Validating that our understanding of an instruction (as defined by IR semantics) matches hardware behaviour -- obsoleted by new information. * Validating that the IR packing code is correct -- obsoleted by rewriting the IR and rewriting the packing. I dislike removing tests as much as the next person, but the value of these will be nil by the end of the series, and will prove burdensome. Proper unit tests will be useful, however. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8213>
This commit is contained in:
parent
df09ada411
commit
bcd0a285bb
6 changed files with 0 additions and 1768 deletions
|
|
@ -30,7 +30,6 @@
|
|||
#include "compiler/nir_types.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "bifrost_compile.h"
|
||||
#include "test/bit.h"
|
||||
|
||||
static panfrost_program *
|
||||
compile_shader(char **argv, bool vertex_only)
|
||||
|
|
@ -101,71 +100,6 @@ disassemble(const char *filename, bool verbose)
|
|||
free(code);
|
||||
}
|
||||
|
||||
static void
|
||||
test_vertex(char **argv)
|
||||
{
|
||||
void *memctx = NULL; /* TODO */
|
||||
struct panfrost_device *dev = bit_initialize(memctx);
|
||||
|
||||
float iubo[] = {
|
||||
0.1, 0.2, 0.3, 0.4
|
||||
};
|
||||
|
||||
float iattr[] = {
|
||||
0.5, 0.6, 0.7, 0.8
|
||||
};
|
||||
|
||||
float expected[] = {
|
||||
0.6, 0.8, 1.0, 1.2
|
||||
};
|
||||
|
||||
bit_vertex(dev, compile_shader(argv, true),
|
||||
(uint32_t *) iubo, sizeof(iubo),
|
||||
(uint32_t *) iattr, sizeof(iattr),
|
||||
(uint32_t *) expected, sizeof(expected),
|
||||
BIT_DEBUG_ALL);
|
||||
}
|
||||
|
||||
static void
|
||||
tests(void)
|
||||
{
|
||||
void *memctx = NULL; /* TODO */
|
||||
struct panfrost_device *dev = bit_initialize(memctx);
|
||||
bit_packing(dev, BIT_DEBUG_FAIL);
|
||||
}
|
||||
|
||||
static void
|
||||
run(const char *filename)
|
||||
{
|
||||
FILE *fp = fopen(filename, "rb");
|
||||
assert(fp);
|
||||
|
||||
fseek(fp, 0, SEEK_END);
|
||||
unsigned filesize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
unsigned char *code = malloc(filesize);
|
||||
unsigned res = fread(code, 1, filesize, fp);
|
||||
if (res != filesize) {
|
||||
printf("Couldn't read full file\n");
|
||||
}
|
||||
fclose(fp);
|
||||
|
||||
void *memctx = NULL; /* TODO */
|
||||
struct panfrost_device *dev = bit_initialize(memctx);
|
||||
|
||||
panfrost_program prog = {
|
||||
.compiled = {
|
||||
.data = code,
|
||||
.size = filesize
|
||||
},
|
||||
};
|
||||
|
||||
bit_vertex(dev, &prog, NULL, 0, NULL, 0, NULL, 0, BIT_DEBUG_ALL);
|
||||
|
||||
free(code);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
|
|
@ -180,12 +114,6 @@ main(int argc, char **argv)
|
|||
disassemble(argv[2], false);
|
||||
else if (strcmp(argv[1], "disasm-verbose") == 0)
|
||||
disassemble(argv[2], true);
|
||||
else if (strcmp(argv[1], "tests") == 0)
|
||||
tests();
|
||||
else if (strcmp(argv[1], "test-vertex") == 0)
|
||||
test_vertex(&argv[2]);
|
||||
else if (strcmp(argv[1], "run") == 0)
|
||||
run(argv[2]);
|
||||
else
|
||||
unreachable("Unknown command. Valid: compile/disasm");
|
||||
|
||||
|
|
|
|||
|
|
@ -1,711 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2020 Collabora Ltd.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors (Collabora):
|
||||
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include "bit.h"
|
||||
#include "util/half_float.h"
|
||||
|
||||
typedef union {
|
||||
uint64_t u64;
|
||||
uint32_t u32;
|
||||
uint16_t u16[2];
|
||||
uint8_t u8[4];
|
||||
int64_t i64;
|
||||
int32_t i32;
|
||||
int16_t i16[2];
|
||||
int8_t i8[4];
|
||||
double f64;
|
||||
float f32;
|
||||
uint16_t f16[2];
|
||||
} bit_t;
|
||||
|
||||
/* Interprets a subset of Bifrost IR required for automated testing */
|
||||
|
||||
static uint64_t
|
||||
bit_read(struct bit_state *s, bi_instruction *ins, unsigned index, nir_alu_type T, bool FMA)
|
||||
{
|
||||
if (index & BIR_INDEX_REGISTER) {
|
||||
uint32_t reg = index & ~BIR_INDEX_REGISTER;
|
||||
assert(reg < 64);
|
||||
return s->r[reg];
|
||||
} else if (index & BIR_INDEX_UNIFORM) {
|
||||
unreachable("Uniform registers to be implemented");
|
||||
} else if (index & BIR_INDEX_CONSTANT) {
|
||||
return ins->constant.u64 >> (index & ~BIR_INDEX_CONSTANT);
|
||||
} else if (index & BIR_INDEX_ZERO) {
|
||||
return 0;
|
||||
} else if (index & (BIR_INDEX_PASS | BIFROST_SRC_STAGE)) {
|
||||
return FMA ? 0 : s->T;
|
||||
} else if (index & (BIR_INDEX_PASS | BIFROST_SRC_PASS_FMA)) {
|
||||
return s->T0;
|
||||
} else if (index & (BIR_INDEX_PASS | BIFROST_SRC_PASS_ADD)) {
|
||||
return s->T1;
|
||||
} else if (!index) {
|
||||
/* Placeholder */
|
||||
return 0;
|
||||
} else {
|
||||
unreachable("Invalid source");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bit_write(struct bit_state *s, unsigned index, nir_alu_type T, bit_t value, bool FMA)
|
||||
{
|
||||
/* Always write stage passthrough */
|
||||
if (FMA)
|
||||
s->T = value.u32;
|
||||
|
||||
if (index & BIR_INDEX_REGISTER) {
|
||||
uint32_t reg = index & ~BIR_INDEX_REGISTER;
|
||||
assert(reg < 64);
|
||||
s->r[reg] = value.u32;
|
||||
} else if (!index) {
|
||||
/* Nothing to do */
|
||||
} else {
|
||||
unreachable("Invalid destination");
|
||||
}
|
||||
}
|
||||
|
||||
#define bh _mesa_float_to_half
|
||||
#define bf _mesa_half_to_float
|
||||
|
||||
#define bv2f16(fxn) \
|
||||
for (unsigned c = 0; c < 2; ++c) { \
|
||||
dest.f16[c] = bh(fxn(bf(srcs[0].f16[ins->swizzle[0][c]]), \
|
||||
bf(srcs[1].f16[ins->swizzle[1][c]]), \
|
||||
bf(srcs[2].f16[ins->swizzle[2][c]]), \
|
||||
bf(srcs[3].f16[ins->swizzle[3][c]]))); \
|
||||
}
|
||||
|
||||
#define bv2i16(fxn) \
|
||||
for (unsigned c = 0; c < 2; ++c) { \
|
||||
dest.f16[c] = fxn(srcs[0].u16[ins->swizzle[0][c]], \
|
||||
srcs[1].u16[ins->swizzle[1][c]], \
|
||||
srcs[2].u16[ins->swizzle[2][c]], \
|
||||
srcs[3].u16[ins->swizzle[3][c]]); \
|
||||
}
|
||||
|
||||
#define bv4i8(fxn) \
|
||||
for (unsigned c = 0; c < 4; ++c) { \
|
||||
dest.u8[c] = fxn(srcs[0].u8[ins->swizzle[0][c]], \
|
||||
srcs[1].u8[ins->swizzle[1][c]], \
|
||||
srcs[2].u8[ins->swizzle[2][c]], \
|
||||
srcs[3].u8[ins->swizzle[3][c]]); \
|
||||
}
|
||||
|
||||
#define bf32(fxn) dest.f32 = fxn(srcs[0].f32, srcs[1].f32, srcs[2].f32, srcs[3].f32)
|
||||
#define bi32(fxn) dest.i32 = fxn(srcs[0].u32, srcs[1].u32, srcs[2].u32, srcs[3].i32)
|
||||
|
||||
#define bfloat(fxn64, fxn32) \
|
||||
if (ins->dest_type == nir_type_float64) { \
|
||||
unreachable("TODO: 64-bit"); \
|
||||
} else if (ins->dest_type == nir_type_float32) { \
|
||||
bf32(fxn64); \
|
||||
break; \
|
||||
} else if (ins->dest_type == nir_type_float16) { \
|
||||
bv2f16(fxn32); \
|
||||
break; \
|
||||
}
|
||||
|
||||
#define bint(fxn64, fxn32, fxn16, fxn8) \
|
||||
if (ins->dest_type == nir_type_int64 || ins->dest_type == nir_type_uint64) { \
|
||||
unreachable("TODO: 64-bit"); \
|
||||
} else if (ins->dest_type == nir_type_int32 || ins->dest_type == nir_type_uint32) { \
|
||||
bi32(fxn32); \
|
||||
break; \
|
||||
} else if (ins->dest_type == nir_type_int16 || ins->dest_type == nir_type_uint16) { \
|
||||
bv2i16(fxn16); \
|
||||
break; \
|
||||
} else if (ins->dest_type == nir_type_int8 || ins->dest_type == nir_type_uint8) { \
|
||||
bv4i8(fxn8); \
|
||||
break; \
|
||||
}
|
||||
|
||||
#define bpoly(name) \
|
||||
bfloat(bit_f64 ## name, bit_f32 ## name); \
|
||||
bint(bit_i64 ## name, bit_i32 ## name, bit_i16 ## name, bit_i8 ## name); \
|
||||
unreachable("Invalid type");
|
||||
|
||||
#define bit_make_float_2(name, expr32, expr64) \
|
||||
static inline double \
|
||||
bit_f64 ## name(double a, double b, double c, double d) \
|
||||
{ \
|
||||
return expr64; \
|
||||
} \
|
||||
static inline float \
|
||||
bit_f32 ## name(float a, float b, float c, float d) \
|
||||
{ \
|
||||
return expr32; \
|
||||
} \
|
||||
|
||||
#define bit_make_float(name, expr) \
|
||||
bit_make_float_2(name, expr, expr)
|
||||
|
||||
#define bit_make_int(name, expr) \
|
||||
static inline int64_t \
|
||||
bit_i64 ## name (int64_t a, int64_t b, int64_t c, int64_t d) \
|
||||
{ \
|
||||
return expr; \
|
||||
} \
|
||||
\
|
||||
static inline int32_t \
|
||||
bit_i32 ## name (int32_t a, int32_t b, int32_t c, int32_t d) \
|
||||
{ \
|
||||
return expr; \
|
||||
} \
|
||||
\
|
||||
static inline int16_t \
|
||||
bit_i16 ## name (int16_t a, int16_t b, int16_t c, int16_t d) \
|
||||
{ \
|
||||
return expr; \
|
||||
} \
|
||||
\
|
||||
static inline int8_t \
|
||||
bit_i8 ## name (int8_t a, int8_t b, int8_t c, int8_t d) \
|
||||
{ \
|
||||
return expr; \
|
||||
} \
|
||||
|
||||
#define bit_make_poly(name, expr) \
|
||||
bit_make_float(name, expr) \
|
||||
bit_make_int(name, expr) \
|
||||
|
||||
bit_make_poly(add, a + b);
|
||||
bit_make_int(sub, a - b);
|
||||
bit_make_float(fma, (a * b) + c);
|
||||
bit_make_poly(mov, a);
|
||||
bit_make_poly(min, MIN2(a, b));
|
||||
bit_make_poly(max, MAX2(a, b));
|
||||
bit_make_float_2(floor, floorf(a), floor(a));
|
||||
bit_make_float_2(ceil, ceilf(a), ceil(a));
|
||||
bit_make_float_2(trunc, truncf(a), trunc(a));
|
||||
bit_make_float_2(nearbyint, nearbyintf(a), nearbyint(a));
|
||||
|
||||
/* Modifiers */
|
||||
|
||||
static float
|
||||
bit_outmod(float raw, enum bifrost_outmod mod)
|
||||
{
|
||||
switch (mod) {
|
||||
case BIFROST_POS:
|
||||
return MAX2(raw, 0.0);
|
||||
case BIFROST_SAT_SIGNED:
|
||||
return CLAMP(raw, -1.0, 1.0);
|
||||
case BIFROST_SAT:
|
||||
return SATURATE(raw);
|
||||
default:
|
||||
return raw;
|
||||
}
|
||||
}
|
||||
|
||||
static float
|
||||
bit_srcmod(float raw, bool abs, bool neg)
|
||||
{
|
||||
if (abs)
|
||||
raw = fabs(raw);
|
||||
|
||||
if (neg)
|
||||
raw = -raw;
|
||||
|
||||
return raw;
|
||||
}
|
||||
|
||||
#define BIT_COND(cond, left, right) \
|
||||
if (cond == BI_COND_LT) return left < right; \
|
||||
else if (cond == BI_COND_LE) return left <= right; \
|
||||
else if (cond == BI_COND_GE) return left >= right; \
|
||||
else if (cond == BI_COND_GT) return left > right; \
|
||||
else if (cond == BI_COND_EQ) return left == right; \
|
||||
else if (cond == BI_COND_NE) return left != right; \
|
||||
else { return true; }
|
||||
|
||||
static bool
|
||||
bit_eval_cond(enum bi_cond cond, bit_t l, bit_t r, nir_alu_type T, unsigned cl, unsigned cr)
|
||||
{
|
||||
if (T == nir_type_float32) {
|
||||
BIT_COND(cond, l.f32, r.f32);
|
||||
} else if (T == nir_type_float16) {
|
||||
float left = bf(l.f16[cl]);
|
||||
float right = bf(r.f16[cr]);
|
||||
BIT_COND(cond, left, right);
|
||||
} else if (T == nir_type_int32) {
|
||||
int32_t left = l.u32;
|
||||
int32_t right = r.u32;
|
||||
BIT_COND(cond, left, right);
|
||||
} else if (T == nir_type_int16) {
|
||||
int16_t left = l.i16[cl];
|
||||
int16_t right = r.i16[cr];
|
||||
BIT_COND(cond, left, right);
|
||||
} else if (T == nir_type_uint32) {
|
||||
BIT_COND(cond, l.u32, r.u32);
|
||||
} else if (T == nir_type_uint16) {
|
||||
BIT_COND(cond, l.u16[cl], r.u16[cr]);
|
||||
} else {
|
||||
unreachable("Unknown type evaluated");
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
bit_cmp(enum bi_cond cond, bit_t l, bit_t r, nir_alu_type T, unsigned cl, unsigned cr, bool d3d)
|
||||
{
|
||||
bool v = bit_eval_cond(cond, l, r, T, cl, cr);
|
||||
|
||||
/* Fill for D3D but only up to 32-bit... 64-bit is only partial
|
||||
* (although we probably need a cleverer representation for 64-bit) */
|
||||
|
||||
unsigned sz = MIN2(nir_alu_type_get_type_size(T), 32);
|
||||
unsigned max = (sz == 32) ? (~0) : ((1 << sz) - 1);
|
||||
|
||||
return v ? (d3d ? max : 1) : 0;
|
||||
}
|
||||
|
||||
static float
|
||||
biti_special(float Q, enum bi_special_op op)
|
||||
{
|
||||
switch (op) {
|
||||
case BI_SPECIAL_FRCP: return 1.0 / Q;
|
||||
case BI_SPECIAL_FRSQ: {
|
||||
double Qf = 1.0 / sqrt(Q);
|
||||
return Qf;
|
||||
}
|
||||
default: unreachable("Invalid special");
|
||||
}
|
||||
}
|
||||
|
||||
/* For BI_CONVERT. */
|
||||
|
||||
#define _AS_ROUNDMODE(mode) \
|
||||
((mode == BIFROST_RTZ) ? FP_INT_TOWARDZERO : \
|
||||
(mode == BIFROST_RTE) ? FP_INT_TONEAREST : \
|
||||
(mode == BIFROST_RTN) ? FP_INT_DOWNWARD : \
|
||||
FP_INT_UPWARD)
|
||||
|
||||
static float
|
||||
bit_as_float32(nir_alu_type T, bit_t src, unsigned C)
|
||||
{
|
||||
switch (T) {
|
||||
case nir_type_int32: return src.i32;
|
||||
case nir_type_uint32: return src.u32;
|
||||
case nir_type_float16: return bf(src.u16[C]);
|
||||
default: unreachable("Invalid");
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
bit_as_uint32(nir_alu_type T, bit_t src, unsigned C, enum bifrost_roundmode rm)
|
||||
{
|
||||
switch (T) {
|
||||
case nir_type_float16: return bf(src.u16[C]);
|
||||
case nir_type_float32: return ufromfpf(src.f32, _AS_ROUNDMODE(rm), 32);
|
||||
default: unreachable("Invalid");
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t
|
||||
bit_as_int32(nir_alu_type T, bit_t src, unsigned C, enum bifrost_roundmode rm)
|
||||
{
|
||||
switch (T) {
|
||||
case nir_type_float16: return bf(src.u16[C]);
|
||||
case nir_type_float32: return fromfpf(src.f32, _AS_ROUNDMODE(rm), 32);
|
||||
default: unreachable("Invalid");
|
||||
}
|
||||
}
|
||||
|
||||
static uint16_t
|
||||
bit_as_float16(nir_alu_type T, bit_t src, unsigned C)
|
||||
{
|
||||
switch (T) {
|
||||
case nir_type_int32: return bh(src.i32);
|
||||
case nir_type_uint32: return bh(src.u32);
|
||||
case nir_type_float32: return bh(src.f32);
|
||||
case nir_type_int16: return bh(src.i16[C]);
|
||||
case nir_type_uint16: return bh(src.u16[C]);
|
||||
default: unreachable("Invalid");
|
||||
}
|
||||
}
|
||||
|
||||
static uint16_t
|
||||
bit_as_uint16(nir_alu_type T, bit_t src, unsigned C, enum bifrost_roundmode rm)
|
||||
{
|
||||
switch (T) {
|
||||
case nir_type_int32: return src.i32;
|
||||
case nir_type_uint32: return src.u32;
|
||||
case nir_type_float16: return ufromfpf(bf(src.u16[C]), _AS_ROUNDMODE(rm), 16);
|
||||
case nir_type_float32: return src.f32;
|
||||
default: unreachable("Invalid");
|
||||
}
|
||||
}
|
||||
|
||||
static int16_t
|
||||
bit_as_int16(nir_alu_type T, bit_t src, unsigned C, enum bifrost_roundmode rm)
|
||||
{
|
||||
switch (T) {
|
||||
case nir_type_int32: return src.i32;
|
||||
case nir_type_uint32: return src.u32;
|
||||
case nir_type_float16: return fromfpf(bf(src.u16[C]), _AS_ROUNDMODE(rm), 16);
|
||||
case nir_type_float32: return src.f32;
|
||||
default: unreachable("Invalid");
|
||||
}
|
||||
}
|
||||
|
||||
static float
|
||||
frexp_log(float x, int *e)
|
||||
{
|
||||
/* Ignore sign until end */
|
||||
float xa = fabs(x);
|
||||
|
||||
/* frexp reduces to [0.5, 1) */
|
||||
float f = frexpf(xa, e);
|
||||
|
||||
/* reduce to [0.75, 1.5) */
|
||||
if (f < 0.75) {
|
||||
f *= 2.0;
|
||||
(*e)--;
|
||||
}
|
||||
|
||||
/* Reattach sign */
|
||||
if (xa < 0.0)
|
||||
f = -f;
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
void
|
||||
bit_step(struct bit_state *s, bi_instruction *ins, bool FMA)
|
||||
{
|
||||
/* First, load sources */
|
||||
bit_t srcs[BIR_SRC_COUNT] = { 0 };
|
||||
|
||||
bi_foreach_src(ins, src)
|
||||
srcs[src].u64 = bit_read(s, ins, ins->src[src], ins->src_types[src], FMA);
|
||||
|
||||
/* Apply source modifiers if we need to */
|
||||
if (bi_has_source_mods(ins)) {
|
||||
bi_foreach_src(ins, src) {
|
||||
if (ins->src_types[src] == nir_type_float16) {
|
||||
for (unsigned c = 0; c < 2; ++c) {
|
||||
srcs[src].f16[c] = bh(bit_srcmod(bf(srcs[src].f16[c]),
|
||||
ins->src_abs[src],
|
||||
ins->src_neg[src]));
|
||||
}
|
||||
} else if (ins->src_types[src] == nir_type_float32) {
|
||||
srcs[src].f32 = bit_srcmod(srcs[src].f32,
|
||||
ins->src_abs[src],
|
||||
ins->src_neg[src]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Next, do the action of the instruction */
|
||||
bit_t dest = { 0 };
|
||||
|
||||
switch (ins->type) {
|
||||
case BI_ADD:
|
||||
bpoly(add);
|
||||
|
||||
case BI_BRANCH:
|
||||
unreachable("Unsupported op");
|
||||
|
||||
case BI_CMP: {
|
||||
nir_alu_type T = ins->src_types[0];
|
||||
unsigned sz = nir_alu_type_get_type_size(T);
|
||||
|
||||
if (sz == 32 || sz == 64) {
|
||||
dest.u32 = bit_cmp(ins->cond, srcs[0], srcs[1], T, 0, 0, true);
|
||||
} else if (sz == 16) {
|
||||
for (unsigned c = 0; c < 2; ++c) {
|
||||
dest.u16[c] = bit_cmp(ins->cond, srcs[0], srcs[1],
|
||||
T, ins->swizzle[0][c], ins->swizzle[1][c],
|
||||
true);
|
||||
}
|
||||
} else if (sz == 8) {
|
||||
for (unsigned c = 0; c < 4; ++c) {
|
||||
dest.u8[c] = bit_cmp(ins->cond, srcs[0], srcs[1],
|
||||
T, ins->swizzle[0][c], ins->swizzle[1][c],
|
||||
true);
|
||||
}
|
||||
} else {
|
||||
unreachable("Invalid");
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case BI_BITWISE: {
|
||||
/* Apply inverts first */
|
||||
if (ins->bitwise.src1_invert)
|
||||
srcs[1].u64 = ~srcs[1].u64;
|
||||
|
||||
/* TODO: Shifting */
|
||||
assert(srcs[2].u32 == 0);
|
||||
|
||||
if (ins->op.bitwise == BI_BITWISE_AND)
|
||||
dest.u64 = srcs[0].u64 & srcs[1].u64;
|
||||
else if (ins->op.bitwise == BI_BITWISE_OR)
|
||||
dest.u64 = srcs[0].u64 | srcs[1].u64;
|
||||
else if (ins->op.bitwise == BI_BITWISE_XOR)
|
||||
dest.u64 = srcs[0].u64 ^ srcs[1].u64;
|
||||
else
|
||||
unreachable("Unsupported op");
|
||||
|
||||
if (ins->bitwise.dest_invert)
|
||||
dest.u64 = ~dest.u64;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case BI_CONVERT: {
|
||||
/* If it exists */
|
||||
unsigned comp = ins->swizzle[0][1];
|
||||
|
||||
if (ins->dest_type == nir_type_float32)
|
||||
dest.f32 = bit_as_float32(ins->src_types[0], srcs[0], comp);
|
||||
else if (ins->dest_type == nir_type_uint32)
|
||||
dest.u32 = bit_as_uint32(ins->src_types[0], srcs[0], comp, ins->roundmode);
|
||||
else if (ins->dest_type == nir_type_int32)
|
||||
dest.i32 = bit_as_int32(ins->src_types[0], srcs[0], comp, ins->roundmode);
|
||||
else if (ins->dest_type == nir_type_float16) {
|
||||
dest.u16[0] = bit_as_float16(ins->src_types[0], srcs[0], ins->swizzle[0][0]);
|
||||
dest.u16[1] = bit_as_float16(ins->src_types[0], srcs[0], ins->swizzle[0][1]);
|
||||
} else if (ins->dest_type == nir_type_uint16) {
|
||||
dest.u16[0] = bit_as_uint16(ins->src_types[0], srcs[0], ins->swizzle[0][0], ins->roundmode);
|
||||
dest.u16[1] = bit_as_uint16(ins->src_types[0], srcs[0], ins->swizzle[0][1], ins->roundmode);
|
||||
} else if (ins->dest_type == nir_type_int16) {
|
||||
dest.i16[0] = bit_as_int16(ins->src_types[0], srcs[0], ins->swizzle[0][0], ins->roundmode);
|
||||
dest.i16[1] = bit_as_int16(ins->src_types[0], srcs[0], ins->swizzle[0][1], ins->roundmode);
|
||||
} else {
|
||||
unreachable("Unknown convert type");
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case BI_CSEL: {
|
||||
bool direct = ins->cond == BI_COND_ALWAYS;
|
||||
unsigned sz = nir_alu_type_get_type_size(ins->src_types[0]);
|
||||
|
||||
if (sz == 32) {
|
||||
bool cond = direct ? srcs[0].u32 :
|
||||
bit_eval_cond(ins->cond, srcs[0], srcs[1], ins->src_types[0], 0, 0);
|
||||
|
||||
dest = cond ? srcs[2] : srcs[3];
|
||||
} else if (sz == 16) {
|
||||
for (unsigned c = 0; c < 2; ++c) {
|
||||
bool cond = direct ? srcs[0].u16[c] :
|
||||
bit_eval_cond(ins->cond, srcs[0], srcs[1], ins->src_types[0], c, c);
|
||||
|
||||
dest.u16[c] = cond ? srcs[2].u16[c] : srcs[3].u16[c];
|
||||
}
|
||||
} else {
|
||||
unreachable("Remaining types todo");
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case BI_FMA: {
|
||||
bfloat(bit_f64fma, bit_f32fma);
|
||||
unreachable("Unknown type");
|
||||
}
|
||||
|
||||
case BI_FREXP: {
|
||||
if (ins->src_types[0] != nir_type_float32)
|
||||
unreachable("Unknown frexp type");
|
||||
|
||||
|
||||
if (ins->op.frexp == BI_FREXPE_LOG)
|
||||
frexp_log(srcs[0].f32, &dest.i32);
|
||||
else
|
||||
unreachable("Unknown frexp");
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case BI_IMATH: {
|
||||
if (ins->op.imath == BI_IMATH_ADD) {
|
||||
bint(bit_i64add, bit_i32add, bit_i16add, bit_i8add);
|
||||
} else if (ins->op.imath == BI_IMATH_SUB) {
|
||||
bint(bit_i64sub, bit_i32sub, bit_i16sub, bit_i8sub);
|
||||
} else {
|
||||
unreachable("Unsupported op");
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case BI_MINMAX: {
|
||||
if (ins->op.minmax == BI_MINMAX_MIN) {
|
||||
bpoly(min);
|
||||
} else {
|
||||
bpoly(max);
|
||||
}
|
||||
}
|
||||
|
||||
case BI_MOV:
|
||||
bpoly(mov);
|
||||
|
||||
case BI_REDUCE_FMA: {
|
||||
if (ins->src_types[0] != nir_type_float32)
|
||||
unreachable("Unknown reduce type");
|
||||
|
||||
if (ins->op.reduce == BI_REDUCE_ADD_FREXPM) {
|
||||
int _nop = 0;
|
||||
float f = frexp_log(srcs[1].f32, &_nop);
|
||||
dest.f32 = srcs[0].f32 + f;
|
||||
} else {
|
||||
unreachable("Unknown reduce");
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case BI_SPECIAL_FMA:
|
||||
case BI_SPECIAL_ADD: {
|
||||
assert(nir_alu_type_get_base_type(ins->dest_type) == nir_type_float);
|
||||
assert(ins->dest_type != nir_type_float64);
|
||||
|
||||
if (ins->op.special == BI_SPECIAL_EXP2_LOW) {
|
||||
assert(ins->dest_type == nir_type_float32);
|
||||
dest.f32 = exp2f(srcs[1].f32);
|
||||
break;
|
||||
}
|
||||
|
||||
float Q = (ins->dest_type == nir_type_float16) ?
|
||||
bf(srcs[0].u16[ins->swizzle[0][0]]) :
|
||||
srcs[0].f32;
|
||||
|
||||
float R = biti_special(Q, ins->op.special);
|
||||
|
||||
if (ins->dest_type == nir_type_float16) {
|
||||
dest.f16[0] = bh(R);
|
||||
|
||||
if (!ins->swizzle[0][0] && ins->op.special == BI_SPECIAL_FRSQ) {
|
||||
/* Sorry. */
|
||||
dest.f16[0]++;
|
||||
}
|
||||
} else {
|
||||
dest.f32 = R;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case BI_TABLE: {
|
||||
if (ins->op.table == BI_TABLE_LOG2_U_OVER_U_1_LOW) {
|
||||
assert(ins->dest_type == nir_type_float32);
|
||||
int _nop = 0;
|
||||
float f = frexp_log(srcs[0].f32, &_nop);
|
||||
dest.f32 = log2f(f) / (f - 1.0);
|
||||
dest.u32++; /* Sorry. */
|
||||
} else {
|
||||
unreachable("Unknown table op");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case BI_SELECT: {
|
||||
if (ins->src_types[0] == nir_type_uint16) {
|
||||
for (unsigned c = 0; c < 2; ++c)
|
||||
dest.u16[c] = srcs[c].u16[ins->swizzle[c][0]];
|
||||
} else if (ins->src_types[0] == nir_type_uint8) {
|
||||
for (unsigned c = 0; c < 4; ++c)
|
||||
dest.u8[c] = srcs[c].u8[ins->swizzle[c][0]];
|
||||
} else {
|
||||
unreachable("Unknown type");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case BI_ROUND: {
|
||||
if (ins->roundmode == BIFROST_RTP) {
|
||||
bfloat(bit_f64ceil, bit_f32ceil);
|
||||
} else if (ins->roundmode == BIFROST_RTN) {
|
||||
bfloat(bit_f64floor, bit_f32floor);
|
||||
} else if (ins->roundmode == BIFROST_RTE) {
|
||||
bfloat(bit_f64nearbyint, bit_f32nearbyint);
|
||||
} else if (ins->roundmode == BIFROST_RTZ) {
|
||||
bfloat(bit_f64trunc, bit_f32trunc);
|
||||
} else
|
||||
unreachable("Invalid");
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
/* We only interpret vertex shaders */
|
||||
case BI_DISCARD:
|
||||
case BI_LOAD_VAR:
|
||||
case BI_ATEST:
|
||||
case BI_BLEND:
|
||||
unreachable("Fragment op used in interpreter");
|
||||
|
||||
/* Modeling main memory is more than I bargained for */
|
||||
case BI_LOAD_UNIFORM:
|
||||
case BI_LOAD_ATTR:
|
||||
case BI_LOAD_VAR_ADDRESS:
|
||||
case BI_LOAD:
|
||||
case BI_STORE:
|
||||
case BI_STORE_VAR:
|
||||
case BI_TEXS:
|
||||
case BI_TEXC:
|
||||
case BI_TEXC_DUAL:
|
||||
unreachable("Unsupported I/O in interpreter");
|
||||
|
||||
default:
|
||||
unreachable("Unsupported op");
|
||||
}
|
||||
|
||||
/* Apply _MSCALE */
|
||||
if ((ins->type == BI_FMA || ins->type == BI_ADD) && ins->op.mscale) {
|
||||
unsigned idx = (ins->type == BI_FMA) ? 3 : 2;
|
||||
|
||||
assert(ins->src_types[idx] == nir_type_int32);
|
||||
assert(ins->dest_type == nir_type_float32);
|
||||
|
||||
int32_t scale = srcs[idx].i32;
|
||||
dest.f32 *= exp2f(scale);
|
||||
}
|
||||
|
||||
/* Apply outmod */
|
||||
if (bi_has_outmod(ins) && ins->outmod != BIFROST_NONE) {
|
||||
if (ins->dest_type == nir_type_float16) {
|
||||
for (unsigned c = 0; c < 2; ++c)
|
||||
dest.f16[c] = bh(bit_outmod(bf(dest.f16[c]), ins->outmod));
|
||||
} else {
|
||||
dest.f32 = bit_outmod(dest.f32, ins->outmod);
|
||||
}
|
||||
}
|
||||
|
||||
/* Finally, store the result */
|
||||
bit_write(s, ins->dest, ins->dest_type, dest, FMA);
|
||||
|
||||
/* For ADD - change out the passthrough */
|
||||
if (!FMA) {
|
||||
s->T0 = s->T;
|
||||
s->T1 = dest.u32;
|
||||
}
|
||||
}
|
||||
|
||||
#undef bh
|
||||
#undef bf
|
||||
|
|
@ -1,254 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2020 Collabora Ltd.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors (Collabora):
|
||||
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
|
||||
*/
|
||||
|
||||
#include "bit.h"
|
||||
#include "panfrost/lib/decode.h"
|
||||
#include "drm-uapi/panfrost_drm.h"
|
||||
#include "panfrost/lib/pan_encoder.h"
|
||||
|
||||
/* Standalone compiler tests submitting jobs directly to the hardware. Uses the
|
||||
* `bit` prefix for `BIfrost Tests` and because bit sounds wicked cool. */
|
||||
|
||||
static struct panfrost_bo *
|
||||
bit_bo_create(struct panfrost_device *dev, size_t size)
|
||||
{
|
||||
struct panfrost_bo *bo = panfrost_bo_create(dev, size, PAN_BO_EXECUTE);
|
||||
pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL);
|
||||
return bo;
|
||||
}
|
||||
|
||||
struct panfrost_device *
|
||||
bit_initialize(void *memctx)
|
||||
{
|
||||
int fd = drmOpenWithType("panfrost", NULL, DRM_NODE_RENDER);
|
||||
|
||||
if (fd < 0)
|
||||
unreachable("No panfrost device found. Try chmod?");
|
||||
|
||||
struct panfrost_device *dev = rzalloc(memctx, struct panfrost_device);
|
||||
panfrost_open_device(memctx, fd, dev);
|
||||
|
||||
pandecode_initialize(true);
|
||||
printf("%X\n", dev->gpu_id);
|
||||
|
||||
return dev;
|
||||
}
|
||||
|
||||
static bool
|
||||
bit_submit(struct panfrost_device *dev,
|
||||
enum mali_job_type T,
|
||||
void *payload, size_t payload_size,
|
||||
struct panfrost_bo **bos, size_t bo_count, enum bit_debug debug)
|
||||
{
|
||||
struct panfrost_bo *job = bit_bo_create(dev, 4096);
|
||||
pan_pack(job->ptr.cpu, JOB_HEADER, cfg) {
|
||||
cfg.type = T;
|
||||
cfg.index = 1;
|
||||
}
|
||||
memcpy(job->ptr.cpu + MALI_JOB_HEADER_LENGTH, payload, payload_size);
|
||||
|
||||
uint32_t *bo_handles = calloc(sizeof(uint32_t), bo_count);
|
||||
|
||||
for (unsigned i = 0; i < bo_count; ++i)
|
||||
bo_handles[i] = bos[i]->gem_handle;
|
||||
|
||||
uint32_t syncobj = 0;
|
||||
int ret = 0;
|
||||
|
||||
ret = drmSyncobjCreate(dev->fd, DRM_SYNCOBJ_CREATE_SIGNALED, &syncobj);
|
||||
assert(!ret);
|
||||
|
||||
struct drm_panfrost_submit submit = {
|
||||
.jc = job->ptr.gpu,
|
||||
.bo_handles = (uintptr_t) bo_handles,
|
||||
.bo_handle_count = bo_count,
|
||||
.out_sync = syncobj,
|
||||
};
|
||||
|
||||
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
|
||||
assert(!ret);
|
||||
free(bo_handles);
|
||||
|
||||
drmSyncobjWait(dev->fd, &syncobj, 1, INT64_MAX, 0, NULL);
|
||||
if (debug >= BIT_DEBUG_ALL)
|
||||
pandecode_jc(submit.jc, true, dev->gpu_id, false);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Checks that the device is alive and responding to basic jobs as a sanity
|
||||
* check - prerequisite to running code on the device. We test this via a
|
||||
* WRITE_VALUE job */
|
||||
|
||||
bool
|
||||
bit_sanity_check(struct panfrost_device *dev)
|
||||
{
|
||||
struct panfrost_bo *scratch = bit_bo_create(dev, 65536);
|
||||
((uint32_t *) scratch->ptr.cpu)[0] = 0xAA;
|
||||
|
||||
struct mali_write_value_job_payload_packed payload;
|
||||
|
||||
pan_pack(&payload, WRITE_VALUE_JOB_PAYLOAD, cfg) {
|
||||
cfg.address = scratch->ptr.gpu;
|
||||
cfg.type = MALI_WRITE_VALUE_TYPE_ZERO;
|
||||
};
|
||||
|
||||
struct panfrost_bo *bos[] = { scratch };
|
||||
bool success = bit_submit(dev, MALI_JOB_TYPE_WRITE_VALUE,
|
||||
&payload, sizeof(payload), bos, 1, false);
|
||||
|
||||
return success && (((uint8_t *) scratch->ptr.cpu)[0] == 0x0);
|
||||
}
|
||||
|
||||
/* Constructs a vertex job */
|
||||
|
||||
bool
|
||||
bit_vertex(struct panfrost_device *dev, panfrost_program *prog,
|
||||
uint32_t *iubo, size_t sz_ubo,
|
||||
uint32_t *iattr, size_t sz_attr,
|
||||
uint32_t *expected, size_t sz_expected, enum bit_debug debug)
|
||||
{
|
||||
struct panfrost_bo *shader = bit_bo_create(dev, prog->compiled.size);
|
||||
struct panfrost_bo *shader_desc = bit_bo_create(dev, 4096);
|
||||
struct panfrost_bo *ubo = bit_bo_create(dev, 4096);
|
||||
struct panfrost_bo *var = bit_bo_create(dev, 4096);
|
||||
struct panfrost_bo *attr = bit_bo_create(dev, 4096);
|
||||
|
||||
pan_pack(attr->ptr.cpu, ATTRIBUTE, cfg) {
|
||||
cfg.format = (MALI_RGBA32UI << 12);
|
||||
cfg.offset_enable = true;
|
||||
}
|
||||
|
||||
pan_pack(var->ptr.cpu, ATTRIBUTE, cfg) {
|
||||
cfg.format = (MALI_RGBA32UI << 12);
|
||||
cfg.offset_enable = false;
|
||||
}
|
||||
|
||||
pan_pack(var->ptr.cpu + 256, ATTRIBUTE_BUFFER, cfg) {
|
||||
cfg.pointer = (var->ptr.gpu + 1024);
|
||||
cfg.size = 1024;
|
||||
}
|
||||
|
||||
pan_pack(attr->ptr.cpu + 256, ATTRIBUTE_BUFFER, cfg) {
|
||||
cfg.pointer = (attr->ptr.gpu + 1024);
|
||||
cfg.size = 1024;
|
||||
}
|
||||
|
||||
pan_pack(ubo->ptr.cpu, UNIFORM_BUFFER, cfg) {
|
||||
cfg.entries = sz_ubo / 16;
|
||||
cfg.pointer = ubo->ptr.gpu + 1024;
|
||||
}
|
||||
|
||||
if (sz_ubo)
|
||||
memcpy(ubo->ptr.cpu + 1024, iubo, sz_ubo);
|
||||
|
||||
if (sz_attr)
|
||||
memcpy(attr->ptr.cpu + 1024, iattr, sz_attr);
|
||||
|
||||
struct panfrost_bo *shmem = bit_bo_create(dev, 4096);
|
||||
|
||||
pan_pack(shmem->ptr.cpu, LOCAL_STORAGE, cfg) {
|
||||
cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
|
||||
}
|
||||
|
||||
pan_pack(shader_desc->ptr.cpu, RENDERER_STATE, cfg) {
|
||||
cfg.shader.shader = shader->ptr.gpu;
|
||||
cfg.shader.attribute_count = cfg.shader.varying_count = 1;
|
||||
cfg.properties.uniform_buffer_count = 1;
|
||||
cfg.properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
|
||||
cfg.preload.vertex.vertex_id = true;
|
||||
cfg.preload.vertex.instance_id = true;
|
||||
cfg.preload.uniform_count = (sz_ubo / 16);
|
||||
}
|
||||
|
||||
memcpy(shader->ptr.cpu, prog->compiled.data, prog->compiled.size);
|
||||
|
||||
struct mali_compute_job_packed job;
|
||||
|
||||
pan_section_pack(&job, COMPUTE_JOB, PARAMETERS, cfg) {
|
||||
cfg.job_task_split = 5;
|
||||
}
|
||||
|
||||
pan_section_pack(&job, COMPUTE_JOB, DRAW, cfg) {
|
||||
cfg.draw_descriptor_is_64b = true;
|
||||
cfg.thread_storage = shmem->ptr.gpu;
|
||||
cfg.state = shader_desc->ptr.gpu;
|
||||
cfg.push_uniforms = ubo->ptr.gpu + 1024;
|
||||
cfg.uniform_buffers = ubo->ptr.gpu;
|
||||
cfg.attributes = attr->ptr.gpu;
|
||||
cfg.attribute_buffers = attr->ptr.gpu + 256;
|
||||
cfg.varyings = var->ptr.gpu;
|
||||
cfg.varying_buffers = var->ptr.gpu + 256;
|
||||
}
|
||||
|
||||
void *invocation = pan_section_ptr(&job, COMPUTE_JOB, INVOCATION);
|
||||
panfrost_pack_work_groups_compute(invocation,
|
||||
1, 1, 1,
|
||||
1, 1, 1,
|
||||
true);
|
||||
|
||||
struct panfrost_bo *bos[] = {
|
||||
shmem, shader, shader_desc, ubo, var, attr
|
||||
};
|
||||
|
||||
bool succ = bit_submit(dev, MALI_JOB_TYPE_VERTEX,
|
||||
((void *)&job) + MALI_JOB_HEADER_LENGTH,
|
||||
MALI_COMPUTE_JOB_LENGTH - MALI_JOB_HEADER_LENGTH,
|
||||
bos, ARRAY_SIZE(bos), debug);
|
||||
|
||||
/* Check the output varyings */
|
||||
|
||||
uint32_t *output = (uint32_t *) (var->ptr.cpu + 1024);
|
||||
float *foutput = (float *) output;
|
||||
float *fexpected = (float *) expected;
|
||||
|
||||
if (sz_expected) {
|
||||
unsigned comp = memcmp(output, expected, sz_expected);
|
||||
succ &= (comp == 0);
|
||||
|
||||
if (comp && (debug >= BIT_DEBUG_FAIL)) {
|
||||
fprintf(stderr, "expected [");
|
||||
|
||||
for (unsigned i = 0; i < (sz_expected >> 2); ++i)
|
||||
fprintf(stderr, "%08X /* %f */ ", expected[i], fexpected[i]);
|
||||
|
||||
fprintf(stderr, "], got [");
|
||||
|
||||
for (unsigned i = 0; i < (sz_expected >> 2); ++i)
|
||||
fprintf(stderr, "%08X /* %f */ ", output[i], foutput[i]);
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
} else if (debug == BIT_DEBUG_ALL) {
|
||||
fprintf(stderr, "got [");
|
||||
|
||||
for (unsigned i = 0; i < 4; ++i)
|
||||
fprintf(stderr, "%08X /* %f */ ", output[i], foutput[i]);
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
return succ;
|
||||
}
|
||||
|
|
@ -1,654 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2020 Collabora Ltd.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors (Collabora):
|
||||
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
|
||||
*/
|
||||
|
||||
#include "bit.h"
|
||||
#include "bi_print.h"
|
||||
#include "util/half_float.h"
|
||||
#include "bifrost/disassemble.h"
|
||||
|
||||
/* Instruction packing tests */
|
||||
|
||||
static void
|
||||
bit_test_single(struct panfrost_device *dev,
|
||||
bi_instruction *ins,
|
||||
uint32_t input[4],
|
||||
bool fma, enum bit_debug debug)
|
||||
{
|
||||
/* First, simulate the instruction */
|
||||
struct bit_state s = { 0 };
|
||||
memcpy(s.r, input, 16);
|
||||
bit_step(&s, ins, fma);
|
||||
|
||||
/* Next, wrap it up and pack it */
|
||||
|
||||
bi_instruction ldubo = {
|
||||
.type = BI_LOAD_UNIFORM,
|
||||
.segment = BI_SEGMENT_UBO,
|
||||
.src = {
|
||||
BIR_INDEX_CONSTANT,
|
||||
BIR_INDEX_ZERO
|
||||
},
|
||||
.src_types = {
|
||||
nir_type_uint32,
|
||||
nir_type_uint32,
|
||||
},
|
||||
.dest = BIR_INDEX_REGISTER | 0,
|
||||
.dest_type = nir_type_uint32,
|
||||
.vector_channels = 4,
|
||||
};
|
||||
|
||||
bi_instruction ldva = {
|
||||
.type = BI_LOAD_VAR_ADDRESS,
|
||||
.vector_channels = 3,
|
||||
.dest = BIR_INDEX_REGISTER | 32,
|
||||
.dest_type = nir_type_uint32,
|
||||
.format = nir_type_uint32,
|
||||
.src = {
|
||||
BIR_INDEX_CONSTANT,
|
||||
BIR_INDEX_REGISTER | 61,
|
||||
BIR_INDEX_REGISTER | 62,
|
||||
0,
|
||||
},
|
||||
.src_types = {
|
||||
nir_type_uint32,
|
||||
nir_type_uint32,
|
||||
nir_type_uint32,
|
||||
nir_type_uint32,
|
||||
}
|
||||
};
|
||||
|
||||
bi_instruction st = {
|
||||
.type = BI_STORE_VAR,
|
||||
.src = {
|
||||
BIR_INDEX_REGISTER | 0,
|
||||
ldva.dest, ldva.dest + 1, ldva.dest + 2,
|
||||
},
|
||||
.src_types = {
|
||||
nir_type_uint32,
|
||||
nir_type_uint32, nir_type_uint32, nir_type_uint32,
|
||||
},
|
||||
.vector_channels = 4
|
||||
};
|
||||
|
||||
bi_context *ctx = rzalloc(NULL, bi_context);
|
||||
ctx->stage = MESA_SHADER_VERTEX;
|
||||
|
||||
bi_block *blk = rzalloc(ctx, bi_block);
|
||||
blk->scheduled = true;
|
||||
|
||||
blk->base.predecessors = _mesa_set_create(blk,
|
||||
_mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
|
||||
list_inithead(&ctx->blocks);
|
||||
list_addtail(&blk->base.link, &ctx->blocks);
|
||||
list_inithead(&blk->clauses);
|
||||
|
||||
bi_clause *clauses[4] = {
|
||||
rzalloc(ctx, bi_clause),
|
||||
rzalloc(ctx, bi_clause),
|
||||
rzalloc(ctx, bi_clause),
|
||||
rzalloc(ctx, bi_clause)
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < 4; ++i) {
|
||||
clauses[i]->bundle_count = 1;
|
||||
list_addtail(&clauses[i]->link, &blk->clauses);
|
||||
clauses[i]->scoreboard_id = (i & 1);
|
||||
|
||||
if (i) {
|
||||
clauses[i]->dependencies = 1 << (~i & 1);
|
||||
clauses[i]->staging_barrier = true;
|
||||
}
|
||||
}
|
||||
|
||||
clauses[0]->bundles[0].add = &ldubo;
|
||||
clauses[0]->message_type = BIFROST_MESSAGE_ATTRIBUTE;
|
||||
|
||||
if (fma)
|
||||
clauses[1]->bundles[0].fma = ins;
|
||||
else
|
||||
clauses[1]->bundles[0].add = ins;
|
||||
|
||||
clauses[0]->constant_count = 1;
|
||||
clauses[1]->constant_count = 1;
|
||||
clauses[1]->constants[0] = ins->constant.u64;
|
||||
|
||||
clauses[2]->bundles[0].add = &ldva;
|
||||
clauses[3]->bundles[0].add = &st;
|
||||
|
||||
clauses[2]->message_type = BIFROST_MESSAGE_ATTRIBUTE;
|
||||
clauses[3]->message_type = BIFROST_MESSAGE_STORE;
|
||||
|
||||
panfrost_program prog = { 0 };
|
||||
util_dynarray_init(&prog.compiled, NULL);
|
||||
bi_pack(ctx, &prog.compiled);
|
||||
|
||||
bool succ = bit_vertex(dev, &prog, input, 16, NULL, 0,
|
||||
s.r, 16, debug);
|
||||
|
||||
if (debug >= BIT_DEBUG_ALL || (!succ && debug >= BIT_DEBUG_FAIL)) {
|
||||
bi_print_shader(ctx, stderr);
|
||||
disassemble_bifrost(stderr, prog.compiled.data, prog.compiled.size, true);
|
||||
}
|
||||
|
||||
if (!succ)
|
||||
fprintf(stderr, "FAIL\n");
|
||||
}
|
||||
|
||||
/* Utilities for generating tests */
|
||||
|
||||
static void
|
||||
bit_generate_float4(float *mem)
|
||||
{
|
||||
for (unsigned i = 0; i < 4; ++i)
|
||||
mem[i] = (float) ((rand() & 255) - 127) / 16.0;
|
||||
}
|
||||
|
||||
static void
|
||||
bit_generate_half8(uint16_t *mem)
|
||||
{
|
||||
for (unsigned i = 0; i < 8; ++i)
|
||||
mem[i] = _mesa_float_to_half(((float) (rand() & 255) - 127) / 16.0);
|
||||
}
|
||||
|
||||
static bi_instruction
|
||||
bit_ins(enum bi_class C, unsigned argc, nir_alu_type base, unsigned size)
|
||||
{
|
||||
nir_alu_type T = base | size;
|
||||
|
||||
bi_instruction ins = {
|
||||
.type = C,
|
||||
.dest = BIR_INDEX_REGISTER | 0,
|
||||
.dest_type = T,
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < argc; ++i) {
|
||||
ins.src[i] = BIR_INDEX_REGISTER | i;
|
||||
ins.src_types[i] = T;
|
||||
}
|
||||
|
||||
return ins;
|
||||
}
|
||||
|
||||
#define BIT_FOREACH_SWIZZLE(swz, args, sz) \
|
||||
for (unsigned swz = 0; swz < ((sz == 16) ? (1 << (2 * args)) : 1); ++swz)
|
||||
|
||||
static void
|
||||
bit_apply_swizzle(bi_instruction *ins, unsigned swz, unsigned args, unsigned sz)
|
||||
{
|
||||
unsigned slots_per_arg = (sz == 16) ? 4 : 1;
|
||||
unsigned slots_per_chan = (sz == 16) ? 1 : 0;
|
||||
unsigned mask = (sz == 16) ? 1 : 0;
|
||||
|
||||
for (unsigned i = 0; i < args; ++i) {
|
||||
for (unsigned j = 0; j < (32 / sz); ++j) {
|
||||
ins->swizzle[i][j] = ((swz >> (slots_per_arg * i)) >> (slots_per_chan * j)) & mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Tests all 64 combinations of floating point modifiers for a given
|
||||
* instruction / floating-type / test type */
|
||||
|
||||
static void
|
||||
bit_fmod_helper(struct panfrost_device *dev,
|
||||
enum bi_class c, unsigned size, bool fma,
|
||||
uint32_t *input, enum bit_debug debug, unsigned op)
|
||||
{
|
||||
bi_instruction ins = bit_ins(c, 2, nir_type_float, size);
|
||||
|
||||
bool fp16 = (size == 16);
|
||||
bool has_outmods = fma || !fp16;
|
||||
|
||||
for (unsigned outmod = 0; outmod < (has_outmods ? 4 : 1); ++outmod) {
|
||||
BIT_FOREACH_SWIZZLE(swz, 2, size) {
|
||||
for (unsigned inmod = 0; inmod < 16; ++inmod) {
|
||||
ins.outmod = outmod;
|
||||
ins.op.minmax = op;
|
||||
ins.src_abs[0] = (inmod & 0x1);
|
||||
ins.src_abs[1] = (inmod & 0x2);
|
||||
ins.src_neg[0] = (inmod & 0x4);
|
||||
ins.src_neg[1] = (inmod & 0x8);
|
||||
bit_apply_swizzle(&ins, swz, 2, size);
|
||||
bit_test_single(dev, &ins, input, fma, debug);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bit_fma_helper(struct panfrost_device *dev,
|
||||
unsigned size, uint32_t *input, enum bit_debug debug)
|
||||
{
|
||||
bi_instruction ins = bit_ins(BI_FMA, 3, nir_type_float, size);
|
||||
|
||||
for (unsigned outmod = 0; outmod < 4; ++outmod) {
|
||||
for (unsigned inmod = 0; inmod < 8; ++inmod) {
|
||||
ins.outmod = outmod;
|
||||
ins.src_neg[0] = (inmod & 0x1);
|
||||
ins.src_neg[1] = (inmod & 0x2);
|
||||
ins.src_neg[2] = (inmod & 0x4);
|
||||
bit_test_single(dev, &ins, input, true, debug);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bit_fma_mscale_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
|
||||
{
|
||||
bi_instruction ins = bit_ins(BI_FMA, 4, nir_type_float, 32);
|
||||
ins.op.mscale = true;
|
||||
ins.src_types[3] = nir_type_int32;
|
||||
ins.src[2] = ins.src[3]; /* Not enough ports! */
|
||||
|
||||
for (unsigned outmod = 0; outmod < 4; ++outmod) {
|
||||
for (unsigned inmod = 0; inmod < 8; ++inmod) {
|
||||
ins.outmod = outmod;
|
||||
ins.src_abs[0] = (inmod & 0x1);
|
||||
ins.src_neg[1] = (inmod & 0x2);
|
||||
ins.src_neg[2] = (inmod & 0x4);
|
||||
bit_test_single(dev, &ins, input, true, debug);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bit_csel_helper(struct panfrost_device *dev,
|
||||
unsigned size, uint32_t *input, enum bit_debug debug)
|
||||
{
|
||||
bi_instruction ins = bit_ins(BI_CSEL, 4, nir_type_uint, size);
|
||||
|
||||
/* SCHEDULER: We can only read 3 registers at once. */
|
||||
ins.src[2] = ins.src[0];
|
||||
|
||||
for (enum bi_cond cond = BI_COND_LT; cond <= BI_COND_NE; ++cond) {
|
||||
ins.cond = cond;
|
||||
bit_test_single(dev, &ins, input, true, debug);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bit_special_helper(struct panfrost_device *dev,
|
||||
unsigned size, uint32_t *input, enum bit_debug debug)
|
||||
{
|
||||
bi_instruction ins = bit_ins(BI_SPECIAL_ADD, 2, nir_type_float, size);
|
||||
uint32_t exp_input[4];
|
||||
|
||||
for (enum bi_special_op op = BI_SPECIAL_FRCP; op <= BI_SPECIAL_EXP2_LOW; ++op) {
|
||||
if (op == BI_SPECIAL_EXP2_LOW) {
|
||||
/* exp2 only supported in fp32 mode */
|
||||
if (size != 32)
|
||||
continue;
|
||||
|
||||
/* Give expected input */
|
||||
exp_input[1] = input[0];
|
||||
float *ff = (float *) input;
|
||||
exp_input[0] = (int) (ff[0] * (1 << 24));
|
||||
}
|
||||
|
||||
for (unsigned c = 0; c < ((size == 16) ? 2 : 1); ++c) {
|
||||
ins.op.special = op;
|
||||
ins.swizzle[0][0] = c;
|
||||
bit_test_single(dev, &ins,
|
||||
op == BI_SPECIAL_EXP2_LOW ? exp_input : input,
|
||||
false, debug);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bit_table_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
|
||||
{
|
||||
bi_instruction ins = bit_ins(BI_TABLE, 1, nir_type_float, 32);
|
||||
|
||||
for (enum bi_table_op op = 0; op <= BI_TABLE_LOG2_U_OVER_U_1_LOW; ++op) {
|
||||
ins.op.table = op;
|
||||
bit_test_single(dev, &ins, input, false, debug);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bit_frexp_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
|
||||
{
|
||||
bi_instruction ins = bit_ins(BI_FREXP, 1, nir_type_float, 32);
|
||||
ins.dest_type = nir_type_int32;
|
||||
|
||||
for (enum bi_frexp_op op = 0; op <= BI_FREXPE_LOG; ++op) {
|
||||
ins.op.frexp = op;
|
||||
bit_test_single(dev, &ins, input, true, debug);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bit_round_helper(struct panfrost_device *dev, uint32_t *input, unsigned sz, bool FMA, enum bit_debug debug)
|
||||
{
|
||||
bi_instruction ins = bit_ins(BI_ROUND, 1, nir_type_float, sz);
|
||||
|
||||
for (enum bifrost_roundmode mode = 0; mode <= 3; ++mode) {
|
||||
BIT_FOREACH_SWIZZLE(swz, 1, sz) {
|
||||
bit_apply_swizzle(&ins, swz, 1, sz);
|
||||
ins.roundmode = mode;
|
||||
bit_test_single(dev, &ins, input, FMA, debug);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bit_reduce_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
|
||||
{
|
||||
bi_instruction ins = bit_ins(BI_REDUCE_FMA, 2, nir_type_float, 32);
|
||||
|
||||
for (enum bi_reduce_op op = 0; op <= BI_REDUCE_ADD_FREXPM; ++op) {
|
||||
ins.op.reduce = op;
|
||||
bit_test_single(dev, &ins, input, true, debug);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bit_select_helper(struct panfrost_device *dev, uint32_t *input, unsigned size, enum bit_debug debug)
|
||||
{
|
||||
unsigned C = 32 / size;
|
||||
bi_instruction ins = bit_ins(BI_SELECT, C, nir_type_uint, 32);
|
||||
|
||||
for (unsigned c = 0; c < C; ++c)
|
||||
ins.src_types[c] = nir_type_uint | size;
|
||||
|
||||
if (size == 8) {
|
||||
/* SCHEDULER: We can only read 3 registers at once. */
|
||||
ins.src[2] = ins.src[0];
|
||||
}
|
||||
|
||||
/* Each argument has swizzle {lo, hi} so 2^C options */
|
||||
unsigned hi = (size == 16) ? 1 : 2;
|
||||
|
||||
for (unsigned add = 0; add < ((size == 16) ? 2 : 1); ++add) {
|
||||
for (unsigned swizzle = 0; swizzle < (1 << C); ++swizzle) {
|
||||
for (unsigned i = 0; i < C; ++i)
|
||||
ins.swizzle[i][0] = ((swizzle >> i) & 1) ? hi : 0;
|
||||
|
||||
bit_test_single(dev, &ins, input, !add, debug);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bit_fcmp_helper(struct panfrost_device *dev, uint32_t *input, unsigned size, enum bit_debug debug, bool FMA)
|
||||
{
|
||||
bi_instruction ins = bit_ins(BI_CMP, 2, nir_type_float, size);
|
||||
ins.dest_type = nir_type_uint | size;
|
||||
|
||||
/* 16-bit has swizzles and abs. 32-bit has abs/neg mods. */
|
||||
unsigned max_mods = (size == 16) ? 64 : (size == 32) ? 16 : 1;
|
||||
|
||||
for (enum bi_cond cond = BI_COND_LT; cond <= BI_COND_NE; ++cond) {
|
||||
for (unsigned mods = 0; mods < max_mods; ++mods) {
|
||||
ins.cond = cond;
|
||||
|
||||
if (size == 16) {
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
ins.swizzle[i][0] = ((mods >> (i * 2)) & 1) ? 1 : 0;
|
||||
ins.swizzle[i][1] = ((mods >> (i * 2)) & 2) ? 1 : 0;
|
||||
}
|
||||
|
||||
ins.src_abs[0] = (mods & 16) ? true : false;
|
||||
ins.src_abs[1] = (mods & 32) ? true : false;
|
||||
} else if (size == 8) {
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
for (unsigned j = 0; j < 4; ++j)
|
||||
ins.swizzle[i][j] = j;
|
||||
}
|
||||
} else if (size == 32) {
|
||||
ins.src_abs[0] = (mods & 1) ? true : false;
|
||||
ins.src_abs[1] = (mods & 2) ? true : false;
|
||||
ins.src_neg[0] = (mods & 4) ? true : false;
|
||||
ins.src_neg[1] = (mods & 8) ? true : false;
|
||||
}
|
||||
|
||||
bit_test_single(dev, &ins, input, FMA, debug);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bit_icmp_helper(struct panfrost_device *dev, uint32_t *input, unsigned size, nir_alu_type T, enum bit_debug debug)
|
||||
{
|
||||
bi_instruction ins = bit_ins(BI_CMP, 2, T, size);
|
||||
ins.dest_type = nir_type_uint | size;
|
||||
|
||||
for (enum bi_cond cond = BI_COND_LT; cond <= BI_COND_NE; ++cond) {
|
||||
BIT_FOREACH_SWIZZLE(swz, 2, size) {
|
||||
ins.cond = cond;
|
||||
bit_apply_swizzle(&ins, swz, 2, size);
|
||||
bit_test_single(dev, &ins, input, false, debug);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void
|
||||
bit_convert_helper(struct panfrost_device *dev, unsigned from_size,
|
||||
unsigned to_size, unsigned cx, unsigned cy, bool FMA,
|
||||
enum bifrost_roundmode roundmode,
|
||||
uint32_t *input, enum bit_debug debug)
|
||||
{
|
||||
bi_instruction ins = {
|
||||
.type = BI_CONVERT,
|
||||
.dest = BIR_INDEX_REGISTER | 0,
|
||||
.src = { BIR_INDEX_REGISTER | 0 }
|
||||
};
|
||||
|
||||
nir_alu_type Ts[3] = { nir_type_float, nir_type_uint, nir_type_int };
|
||||
|
||||
for (unsigned from_base = 0; from_base < 3; ++from_base) {
|
||||
for (unsigned to_base = 0; to_base < 3; ++to_base) {
|
||||
/* Discard invalid combinations.. */
|
||||
if ((from_size == to_size) && (from_base == to_base))
|
||||
continue;
|
||||
|
||||
/* Can't switch signedness */
|
||||
if (from_base && to_base)
|
||||
continue;
|
||||
|
||||
/* No F16_TO_I32, etc */
|
||||
if (from_size != to_size && from_base == 0 && to_base)
|
||||
continue;
|
||||
|
||||
if (from_size != to_size && from_base && to_base == 0)
|
||||
continue;
|
||||
|
||||
/* No need, just ignore the upper half */
|
||||
if (from_size > to_size && from_base == to_base && from_base)
|
||||
continue;
|
||||
|
||||
ins.dest_type = Ts[to_base] | to_size;
|
||||
ins.src_types[0] = Ts[from_base] | from_size;
|
||||
ins.roundmode = roundmode;
|
||||
ins.swizzle[0][0] = cx;
|
||||
ins.swizzle[0][1] = cy;
|
||||
|
||||
if (to_size == 16 && from_size == 32) {
|
||||
ins.src_types[1] = ins.src_types[0];
|
||||
ins.src[1] = ins.src[0];
|
||||
} else {
|
||||
ins.src[1] = ins.src_types[1] = 0;
|
||||
}
|
||||
|
||||
bit_test_single(dev, &ins, input, FMA, debug);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bit_constant_helper(struct panfrost_device *dev,
|
||||
uint32_t *input, enum bit_debug debug)
|
||||
{
|
||||
enum bi_class C[3] = { BI_MOV, BI_ADD, BI_FMA };
|
||||
|
||||
for (unsigned doubled = 0; doubled < 2; ++doubled) {
|
||||
for (unsigned count = 1; count <= 3; ++count) {
|
||||
bi_instruction ins = bit_ins(C[count - 1], count, nir_type_float, 32);
|
||||
|
||||
ins.src[0] = BIR_INDEX_CONSTANT | 0;
|
||||
ins.src[1] = (count >= 2) ? BIR_INDEX_CONSTANT | (doubled ? 32 : 0) : 0;
|
||||
ins.src[2] = (count >= 3) ? BIR_INDEX_ZERO : 0;
|
||||
|
||||
ins.constant.u64 = doubled ?
|
||||
0x3f800000ull | (0x3f000000ull << 32ull) :
|
||||
0x3f800000ull;
|
||||
|
||||
bit_test_single(dev, &ins, input, true, debug);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bit_swizzle_identity(bi_instruction *ins, unsigned args, unsigned size)
|
||||
{
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
for (unsigned j = 0; j < (32 / size); ++j)
|
||||
ins->swizzle[i][j] = j;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bit_bitwise_helper(struct panfrost_device *dev, uint32_t *input, unsigned size, enum bit_debug debug)
|
||||
{
|
||||
bi_instruction ins = bit_ins(BI_BITWISE, 3, nir_type_uint, size);
|
||||
bit_swizzle_identity(&ins, 2, size);
|
||||
|
||||
/* TODO: shifts */
|
||||
ins.src[2] = BIR_INDEX_ZERO;
|
||||
ins.src_types[2] = nir_type_uint8;
|
||||
|
||||
for (unsigned op = BI_BITWISE_AND; op <= BI_BITWISE_XOR; ++op) {
|
||||
ins.op.bitwise = op;
|
||||
|
||||
for (unsigned mods = 0; mods < 4; ++mods) {
|
||||
ins.bitwise.dest_invert = mods & 1;
|
||||
ins.bitwise.src1_invert = mods & 2;
|
||||
|
||||
/* Skip out-of-spec combinations */
|
||||
if (ins.bitwise.src1_invert && op == BI_BITWISE_XOR)
|
||||
continue;
|
||||
|
||||
bit_test_single(dev, &ins, input, true, debug);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bit_imath_helper(struct panfrost_device *dev, uint32_t *input, unsigned size, enum bit_debug debug, bool FMA)
|
||||
{
|
||||
bi_instruction ins = bit_ins(BI_IMATH, 2, nir_type_uint, size);
|
||||
bit_swizzle_identity(&ins, 2, size);
|
||||
ins.src[2] = BIR_INDEX_ZERO; /* carry/borrow for FMA */
|
||||
|
||||
for (unsigned op = BI_IMATH_ADD; op <= BI_IMATH_SUB; ++op) {
|
||||
ins.op.imath = op;
|
||||
bit_test_single(dev, &ins, input, FMA, debug);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
bit_packing(struct panfrost_device *dev, enum bit_debug debug)
|
||||
{
|
||||
float input32[4];
|
||||
uint16_t input16[8];
|
||||
|
||||
bit_generate_float4(input32);
|
||||
bit_generate_half8(input16);
|
||||
|
||||
bit_constant_helper(dev, (uint32_t *) input32, debug);
|
||||
|
||||
for (unsigned sz = 16; sz <= 32; sz *= 2) {
|
||||
uint32_t *input =
|
||||
(sz == 16) ? (uint32_t *) input16 :
|
||||
(uint32_t *) input32;
|
||||
|
||||
bit_fmod_helper(dev, BI_ADD, sz, true, input, debug, 0);
|
||||
bit_fmod_helper(dev, BI_ADD, sz, false, input, debug, 0);
|
||||
bit_round_helper(dev, (uint32_t *) input32, sz, true, debug);
|
||||
|
||||
bit_fmod_helper(dev, BI_MINMAX, sz, false, input, debug, BI_MINMAX_MIN);
|
||||
bit_fmod_helper(dev, BI_MINMAX, sz, false, input, debug, BI_MINMAX_MAX);
|
||||
|
||||
bit_fma_helper(dev, sz, input, debug);
|
||||
bit_icmp_helper(dev, input, sz, nir_type_uint, debug);
|
||||
bit_icmp_helper(dev, input, sz, nir_type_int, debug);
|
||||
}
|
||||
|
||||
for (unsigned sz = 16; sz <= 32; sz *= 2)
|
||||
bit_csel_helper(dev, sz, (uint32_t *) input32, debug);
|
||||
|
||||
float special[4] = { 0.9 };
|
||||
uint32_t special16[4] = { _mesa_float_to_half(special[0]) | (_mesa_float_to_half(0.2) << 16) };
|
||||
|
||||
bit_table_helper(dev, (uint32_t *) special, debug);
|
||||
|
||||
for (unsigned sz = 16; sz <= 32; sz *= 2) {
|
||||
uint32_t *input =
|
||||
(sz == 16) ? special16 :
|
||||
(uint32_t *) special;
|
||||
|
||||
bit_special_helper(dev, sz, input, debug);
|
||||
}
|
||||
|
||||
for (unsigned rm = 0; rm < 4; ++rm) {
|
||||
bit_convert_helper(dev, 32, 32, 0, 0, false, rm, (uint32_t *) input32, debug);
|
||||
|
||||
for (unsigned c = 0; c < 2; ++c)
|
||||
bit_convert_helper(dev, 32, 16, c, 0, false, rm, (uint32_t *) input32, debug);
|
||||
|
||||
bit_convert_helper(dev, 16, 32, 0, 0, false, rm, (uint32_t *) input16, debug);
|
||||
|
||||
for (unsigned c = 0; c < 4; ++c)
|
||||
bit_convert_helper(dev, 16, 16, c & 1, c >> 1, false, rm, (uint32_t *) input16, debug);
|
||||
}
|
||||
|
||||
bit_frexp_helper(dev, (uint32_t *) input32, debug);
|
||||
bit_reduce_helper(dev, (uint32_t *) input32, debug);
|
||||
|
||||
uint32_t mscale_input[4];
|
||||
memcpy(mscale_input, input32, sizeof(input32));
|
||||
mscale_input[3] = 0x7;
|
||||
bit_fma_mscale_helper(dev, mscale_input, debug);
|
||||
|
||||
for (unsigned sz = 8; sz <= 16; sz *= 2) {
|
||||
bit_select_helper(dev, (uint32_t *) input32, sz, debug);
|
||||
}
|
||||
|
||||
bit_fcmp_helper(dev, (uint32_t *) input32, 32, debug, true);
|
||||
bit_fcmp_helper(dev, (uint32_t *) input32, 16, debug, true);
|
||||
|
||||
for (unsigned sz = 8; sz <= 32; sz *= 2) {
|
||||
bit_bitwise_helper(dev, (uint32_t *) input32, sz, debug);
|
||||
bit_imath_helper(dev, (uint32_t *) input32, sz, debug, false);
|
||||
}
|
||||
|
||||
bit_imath_helper(dev, (uint32_t *) input32, 32, debug, true);
|
||||
}
|
||||
|
|
@ -1,74 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2020 Collabora Ltd.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors (Collabora):
|
||||
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
|
||||
*/
|
||||
|
||||
#ifndef __BIFROST_TEST_H
|
||||
#define __BIFROST_TEST_H
|
||||
|
||||
#include "panfrost/lib/midgard_pack.h"
|
||||
#include "panfrost/lib/pan_device.h"
|
||||
#include "panfrost/lib/pan_bo.h"
|
||||
#include "bifrost_compile.h"
|
||||
#include "bifrost/compiler.h"
|
||||
|
||||
struct panfrost_device *
|
||||
bit_initialize(void *memctx);
|
||||
|
||||
bool bit_sanity_check(struct panfrost_device *dev);
|
||||
|
||||
enum bit_debug {
|
||||
BIT_DEBUG_NONE = 0,
|
||||
BIT_DEBUG_FAIL,
|
||||
BIT_DEBUG_ALL
|
||||
};
|
||||
|
||||
bool
|
||||
bit_vertex(struct panfrost_device *dev, panfrost_program *prog,
|
||||
uint32_t *iubo, size_t sz_ubo,
|
||||
uint32_t *iattr, size_t sz_attr,
|
||||
uint32_t *expected, size_t sz_expected, enum bit_debug debug);
|
||||
|
||||
/* BIT interpreter */
|
||||
|
||||
struct bit_state {
|
||||
/* Work registers */
|
||||
uint32_t r[64];
|
||||
|
||||
/* Passthrough within the bundle */
|
||||
uint32_t T;
|
||||
|
||||
/* Passthrough from last bundle */
|
||||
uint32_t T0;
|
||||
uint32_t T1;
|
||||
};
|
||||
|
||||
void
|
||||
bit_step(struct bit_state *s, bi_instruction *ins, bool FMA);
|
||||
|
||||
void bit_packing(struct panfrost_device *dev, enum bit_debug debug);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
|
@ -35,9 +35,6 @@ subdir('lib')
|
|||
|
||||
files_bifrost = files(
|
||||
'bifrost/cmdline.c',
|
||||
'bifrost/test/bi_submit.c',
|
||||
'bifrost/test/bi_interpret.c',
|
||||
'bifrost/test/bi_test_pack.c',
|
||||
)
|
||||
|
||||
bifrost_compiler = executable(
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue