nv50/ir: use sched control codes for gm107 builtins

Yes, IMUL/IMAD require dependency barriers and we should
definitely replace these instructions by XMAD but the
different flags need to be figured out. Note that XMAD only
supports 16-bits integers.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Pierre Moreau <pierre.morrow@free.fr>
This commit is contained in:
Samuel Pitoiset 2016-12-20 00:11:33 +01:00
parent f519c47f7d
commit 90537d6a89
2 changed files with 40 additions and 40 deletions

View file

@ -11,39 +11,39 @@
// SIZE: 22 / 14 * 8 bytes
//
gm107_div_u32:
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0xd wr 0x0 wt 0x3f) (st 0x1 wt 0x1) (st 0x6)
flo u32 $r2 $r1
lop xor 1 $r2 $r2 0x1f
mov $r3 0x1 0xf
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0x1) (st 0xf wr 0x0) (st 0x6 wr 0x0 wt 0x1)
shl $r2 $r3 $r2
i2i u32 u32 $r1 neg $r1
imul u32 u32 $r3 $r1 $r2
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1)
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1)
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1)
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0x6) (st 0x6 wr 0x0 rd 0x1 wt 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x2)
mov $r3 $r0 0xf
imul u32 u32 hi $r0 $r0 $r2
i2i u32 u32 $r2 neg $r1
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0x6 wr 0x0 wt 0x3) (st 0xd wt 0x1) (st 0x1)
imad u32 u32 $r1 $r1 $r0 $r3
isetp ge u32 and $p0 1 $r1 $r2 1
$p0 iadd $r1 $r1 neg $r2
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0x5) (st 0xd) (st 0x1)
$p0 iadd $r0 $r0 0x1
$p0 isetp ge u32 and $p0 1 $r1 $r2 1
$p0 iadd $r1 $r1 neg $r2
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0x1) (st 0xf) (st 0xf)
$p0 iadd $r0 $r0 0x1
ret
nop 0
@ -55,47 +55,47 @@ gm107_div_u32:
// CLOBBER: $r2 - $r3, $p0 - $p3
//
gm107_div_s32:
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0xd wt 0x3f) (st 0x1) (st 0x1 wr 0x0)
isetp lt and $p2 0x1 $r0 0 1
isetp lt xor $p3 1 $r1 0 $p2
i2i s32 s32 $r0 abs $r0
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0xf wr 0x1) (st 0xd wr 0x1 wt 0x2) (st 0x1 wt 0x2)
i2i s32 s32 $r1 abs $r1
flo u32 $r2 $r1
lop xor 1 $r2 $r2 0x1f
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0x6) (st 0x1) (st 0xf wr 0x1)
mov $r3 0x1 0xf
shl $r2 $r3 $r2
i2i u32 u32 $r1 neg $r1
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0x6 wr 0x1 wt 0x2) (st 0x6 wr 0x1 wt 0x2) (st 0x6 wr 0x1 wt 0x2)
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0x6 wr 0x1 wt 0x2) (st 0x6 wr 0x1 wt 0x2) (st 0x6 wr 0x1 wt 0x2)
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0x6 wr 0x1 wt 0x2) (st 0x6 wr 0x1 wt 0x2) (st 0x6 wr 0x1 wt 0x2)
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0x6 wr 0x1 rd 0x2 wt 0x2) (st 0x2 wt 0x5) (st 0x6 wr 0x0 rd 0x1 wt 0x2)
imad u32 u32 hi $r2 $r2 $r3 $r2
mov $r3 $r0 0xf
imul u32 u32 hi $r0 $r0 $r2
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0xf wr 0x1 rd 0x2 wt 0x2) (st 0x6 wr 0x0 wt 0x5) (st 0xd wt 0x3)
i2i u32 u32 $r2 neg $r1
imad u32 u32 $r1 $r1 $r0 $r3
isetp ge u32 and $p0 1 $r1 $r2 1
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0x1) (st 0x5) (st 0xd)
$p0 iadd $r1 $r1 neg $r2
$p0 iadd $r0 $r0 0x1
$p0 isetp ge u32 and $p0 1 $r1 $r2 1
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0x1) (st 0x2) (st 0xf wr 0x0)
$p0 iadd $r1 $r1 neg $r2
$p0 iadd $r0 $r0 0x1
$p3 i2i s32 s32 $r0 neg $r0
sched (st 0x0) (st 0x0) (st 0x0)
sched (st 0xf wr 0x1) (st 0xf wt 0x3) (st 0xf)
$p2 i2i s32 s32 $r1 neg $r1
ret
nop 0

View file

@ -1,83 +1,83 @@
uint64_t gm107_builtin_code[] = {
/* 0x0000: gm107_div_u32 */
0x001f8000fc0007e0,
0x001f9801fc21ff0d,
0x5c30000000170002,
0x3847040001f70202,
0x3898078000170003,
0x001f8000fc0007e0,
0x003c1800e1e007e1,
0x5c48000000270302,
0x5ce0200000170a01,
0x5c38000000270103,
0x001f8000fc0007e0,
0x003c1801e0c00f06,
0x5a40010000370202,
0x5c38000000270103,
0x5a40010000370202,
0x001f8000fc0007e0,
0x003c1801e0c00f06,
0x5c38000000270103,
0x5a40010000370202,
0x5c38000000270103,
0x001f8000fc0007e0,
0x003c1801e0c00f06,
0x5a40010000370202,
0x5c38000000270103,
0x5a40010000370202,
0x001f8000fc0007e0,
0x00443c0120c007e6,
0x5c98078000070003,
0x5c38008000270000,
0x5ce0200000170a02,
0x001f8000fc0007e0,
0x001f8401fda01f06,
0x5a00018000070101,
0x5b6c038000270107,
0x5c11000000200101,
0x001f8000fc0007e0,
0x001f8400fda007e5,
0x3810000000100000,
0x5b6c038000200107,
0x5c11000000200101,
0x001f8000fc0007e0,
0x001fbc00fde007e1,
0x3810000000100000,
0xe32000000007000f,
0x50b0000000070f00,
/* 0x0120: gm107_div_s32 */
0x001f8000fc0007e0,
0x001c0400fc21ffed,
0x5b6303800ff70017,
0x5b6341000ff7011f,
0x5ce2000000073a00,
0x001f8000fc0007e0,
0x005f8402e5a0072f,
0x5ce2000000173a01,
0x5c30000000170002,
0x3847040001f70202,
0x001f8000fc0007e0,
0x001cbc00fc2007e6,
0x3898078000170003,
0x5c48000000270302,
0x5ce0200000170a01,
0x001f8000fc0007e0,
0x005c9802e4c01726,
0x5c38000000270103,
0x5a40010000370202,
0x5c38000000270103,
0x001f8000fc0007e0,
0x005c9802e4c01726,
0x5a40010000370202,
0x5c38000000270103,
0x5a40010000370202,
0x001f8000fc0007e0,
0x005c9802e4c01726,
0x5c38000000270103,
0x5a40010000370202,
0x5c38000000270103,
0x001f8000fc0007e0,
0x00441805fc401226,
0x5a40010000370202,
0x5c98078000070003,
0x5c38008000270000,
0x001f8000fc0007e0,
0x007fb405e0c0122f,
0x5ce0200000170a02,
0x5a00018000070101,
0x5b6c038000270107,
0x001f8000fc0007e0,
0x001fb400fca007e1,
0x5c11000000200101,
0x3810000000100000,
0x5b6c038000200107,
0x001f8000fc0007e0,
0x001c3c00fc4007e1,
0x5c11000000200101,
0x3810000000100000,
0x5ce0200000033a00,
0x001f8000fc0007e0,
0x001fbc03fde0072f,
0x5ce0200000123a01,
0xe32000000007000f,
0x50b0000000070f00,