mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-06 20:50:31 +01:00
nv50/ir: use sched control codes for gm107 builtins
Yes, IMUL/IMAD require dependency barriers and we should definitely replace these instructions by XMAD but the different flags need to be figured out. Note that XMAD only supports 16-bits integers. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Pierre Moreau <pierre.morrow@free.fr>
This commit is contained in:
parent
f519c47f7d
commit
90537d6a89
2 changed files with 40 additions and 40 deletions
|
|
@ -11,39 +11,39 @@
|
|||
// SIZE: 22 / 14 * 8 bytes
|
||||
//
|
||||
gm107_div_u32:
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0xd wr 0x0 wt 0x3f) (st 0x1 wt 0x1) (st 0x6)
|
||||
flo u32 $r2 $r1
|
||||
lop xor 1 $r2 $r2 0x1f
|
||||
mov $r3 0x1 0xf
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0x1) (st 0xf wr 0x0) (st 0x6 wr 0x0 wt 0x1)
|
||||
shl $r2 $r3 $r2
|
||||
i2i u32 u32 $r1 neg $r1
|
||||
imul u32 u32 $r3 $r1 $r2
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1)
|
||||
imad u32 u32 hi $r2 $r2 $r3 $r2
|
||||
imul u32 u32 $r3 $r1 $r2
|
||||
imad u32 u32 hi $r2 $r2 $r3 $r2
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1)
|
||||
imul u32 u32 $r3 $r1 $r2
|
||||
imad u32 u32 hi $r2 $r2 $r3 $r2
|
||||
imul u32 u32 $r3 $r1 $r2
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1)
|
||||
imad u32 u32 hi $r2 $r2 $r3 $r2
|
||||
imul u32 u32 $r3 $r1 $r2
|
||||
imad u32 u32 hi $r2 $r2 $r3 $r2
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0x6) (st 0x6 wr 0x0 rd 0x1 wt 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x2)
|
||||
mov $r3 $r0 0xf
|
||||
imul u32 u32 hi $r0 $r0 $r2
|
||||
i2i u32 u32 $r2 neg $r1
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0x6 wr 0x0 wt 0x3) (st 0xd wt 0x1) (st 0x1)
|
||||
imad u32 u32 $r1 $r1 $r0 $r3
|
||||
isetp ge u32 and $p0 1 $r1 $r2 1
|
||||
$p0 iadd $r1 $r1 neg $r2
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0x5) (st 0xd) (st 0x1)
|
||||
$p0 iadd $r0 $r0 0x1
|
||||
$p0 isetp ge u32 and $p0 1 $r1 $r2 1
|
||||
$p0 iadd $r1 $r1 neg $r2
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0x1) (st 0xf) (st 0xf)
|
||||
$p0 iadd $r0 $r0 0x1
|
||||
ret
|
||||
nop 0
|
||||
|
|
@ -55,47 +55,47 @@ gm107_div_u32:
|
|||
// CLOBBER: $r2 - $r3, $p0 - $p3
|
||||
//
|
||||
gm107_div_s32:
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0xd wt 0x3f) (st 0x1) (st 0x1 wr 0x0)
|
||||
isetp lt and $p2 0x1 $r0 0 1
|
||||
isetp lt xor $p3 1 $r1 0 $p2
|
||||
i2i s32 s32 $r0 abs $r0
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0xf wr 0x1) (st 0xd wr 0x1 wt 0x2) (st 0x1 wt 0x2)
|
||||
i2i s32 s32 $r1 abs $r1
|
||||
flo u32 $r2 $r1
|
||||
lop xor 1 $r2 $r2 0x1f
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0x6) (st 0x1) (st 0xf wr 0x1)
|
||||
mov $r3 0x1 0xf
|
||||
shl $r2 $r3 $r2
|
||||
i2i u32 u32 $r1 neg $r1
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0x6 wr 0x1 wt 0x2) (st 0x6 wr 0x1 wt 0x2) (st 0x6 wr 0x1 wt 0x2)
|
||||
imul u32 u32 $r3 $r1 $r2
|
||||
imad u32 u32 hi $r2 $r2 $r3 $r2
|
||||
imul u32 u32 $r3 $r1 $r2
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0x6 wr 0x1 wt 0x2) (st 0x6 wr 0x1 wt 0x2) (st 0x6 wr 0x1 wt 0x2)
|
||||
imad u32 u32 hi $r2 $r2 $r3 $r2
|
||||
imul u32 u32 $r3 $r1 $r2
|
||||
imad u32 u32 hi $r2 $r2 $r3 $r2
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0x6 wr 0x1 wt 0x2) (st 0x6 wr 0x1 wt 0x2) (st 0x6 wr 0x1 wt 0x2)
|
||||
imul u32 u32 $r3 $r1 $r2
|
||||
imad u32 u32 hi $r2 $r2 $r3 $r2
|
||||
imul u32 u32 $r3 $r1 $r2
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0x6 wr 0x1 rd 0x2 wt 0x2) (st 0x2 wt 0x5) (st 0x6 wr 0x0 rd 0x1 wt 0x2)
|
||||
imad u32 u32 hi $r2 $r2 $r3 $r2
|
||||
mov $r3 $r0 0xf
|
||||
imul u32 u32 hi $r0 $r0 $r2
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0xf wr 0x1 rd 0x2 wt 0x2) (st 0x6 wr 0x0 wt 0x5) (st 0xd wt 0x3)
|
||||
i2i u32 u32 $r2 neg $r1
|
||||
imad u32 u32 $r1 $r1 $r0 $r3
|
||||
isetp ge u32 and $p0 1 $r1 $r2 1
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0x1) (st 0x5) (st 0xd)
|
||||
$p0 iadd $r1 $r1 neg $r2
|
||||
$p0 iadd $r0 $r0 0x1
|
||||
$p0 isetp ge u32 and $p0 1 $r1 $r2 1
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0x1) (st 0x2) (st 0xf wr 0x0)
|
||||
$p0 iadd $r1 $r1 neg $r2
|
||||
$p0 iadd $r0 $r0 0x1
|
||||
$p3 i2i s32 s32 $r0 neg $r0
|
||||
sched (st 0x0) (st 0x0) (st 0x0)
|
||||
sched (st 0xf wr 0x1) (st 0xf wt 0x3) (st 0xf)
|
||||
$p2 i2i s32 s32 $r1 neg $r1
|
||||
ret
|
||||
nop 0
|
||||
|
|
|
|||
|
|
@ -1,83 +1,83 @@
|
|||
uint64_t gm107_builtin_code[] = {
|
||||
/* 0x0000: gm107_div_u32 */
|
||||
0x001f8000fc0007e0,
|
||||
0x001f9801fc21ff0d,
|
||||
0x5c30000000170002,
|
||||
0x3847040001f70202,
|
||||
0x3898078000170003,
|
||||
0x001f8000fc0007e0,
|
||||
0x003c1800e1e007e1,
|
||||
0x5c48000000270302,
|
||||
0x5ce0200000170a01,
|
||||
0x5c38000000270103,
|
||||
0x001f8000fc0007e0,
|
||||
0x003c1801e0c00f06,
|
||||
0x5a40010000370202,
|
||||
0x5c38000000270103,
|
||||
0x5a40010000370202,
|
||||
0x001f8000fc0007e0,
|
||||
0x003c1801e0c00f06,
|
||||
0x5c38000000270103,
|
||||
0x5a40010000370202,
|
||||
0x5c38000000270103,
|
||||
0x001f8000fc0007e0,
|
||||
0x003c1801e0c00f06,
|
||||
0x5a40010000370202,
|
||||
0x5c38000000270103,
|
||||
0x5a40010000370202,
|
||||
0x001f8000fc0007e0,
|
||||
0x00443c0120c007e6,
|
||||
0x5c98078000070003,
|
||||
0x5c38008000270000,
|
||||
0x5ce0200000170a02,
|
||||
0x001f8000fc0007e0,
|
||||
0x001f8401fda01f06,
|
||||
0x5a00018000070101,
|
||||
0x5b6c038000270107,
|
||||
0x5c11000000200101,
|
||||
0x001f8000fc0007e0,
|
||||
0x001f8400fda007e5,
|
||||
0x3810000000100000,
|
||||
0x5b6c038000200107,
|
||||
0x5c11000000200101,
|
||||
0x001f8000fc0007e0,
|
||||
0x001fbc00fde007e1,
|
||||
0x3810000000100000,
|
||||
0xe32000000007000f,
|
||||
0x50b0000000070f00,
|
||||
/* 0x0120: gm107_div_s32 */
|
||||
0x001f8000fc0007e0,
|
||||
0x001c0400fc21ffed,
|
||||
0x5b6303800ff70017,
|
||||
0x5b6341000ff7011f,
|
||||
0x5ce2000000073a00,
|
||||
0x001f8000fc0007e0,
|
||||
0x005f8402e5a0072f,
|
||||
0x5ce2000000173a01,
|
||||
0x5c30000000170002,
|
||||
0x3847040001f70202,
|
||||
0x001f8000fc0007e0,
|
||||
0x001cbc00fc2007e6,
|
||||
0x3898078000170003,
|
||||
0x5c48000000270302,
|
||||
0x5ce0200000170a01,
|
||||
0x001f8000fc0007e0,
|
||||
0x005c9802e4c01726,
|
||||
0x5c38000000270103,
|
||||
0x5a40010000370202,
|
||||
0x5c38000000270103,
|
||||
0x001f8000fc0007e0,
|
||||
0x005c9802e4c01726,
|
||||
0x5a40010000370202,
|
||||
0x5c38000000270103,
|
||||
0x5a40010000370202,
|
||||
0x001f8000fc0007e0,
|
||||
0x005c9802e4c01726,
|
||||
0x5c38000000270103,
|
||||
0x5a40010000370202,
|
||||
0x5c38000000270103,
|
||||
0x001f8000fc0007e0,
|
||||
0x00441805fc401226,
|
||||
0x5a40010000370202,
|
||||
0x5c98078000070003,
|
||||
0x5c38008000270000,
|
||||
0x001f8000fc0007e0,
|
||||
0x007fb405e0c0122f,
|
||||
0x5ce0200000170a02,
|
||||
0x5a00018000070101,
|
||||
0x5b6c038000270107,
|
||||
0x001f8000fc0007e0,
|
||||
0x001fb400fca007e1,
|
||||
0x5c11000000200101,
|
||||
0x3810000000100000,
|
||||
0x5b6c038000200107,
|
||||
0x001f8000fc0007e0,
|
||||
0x001c3c00fc4007e1,
|
||||
0x5c11000000200101,
|
||||
0x3810000000100000,
|
||||
0x5ce0200000033a00,
|
||||
0x001f8000fc0007e0,
|
||||
0x001fbc03fde0072f,
|
||||
0x5ce0200000123a01,
|
||||
0xe32000000007000f,
|
||||
0x50b0000000070f00,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue