nvc0: allow for easier modification of compiler library routines

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
Ben Skeggs 2014-05-09 15:55:47 +10:00
parent 737477dac3
commit 0079a375a5
13 changed files with 1057 additions and 1057 deletions

View file

@ -0,0 +1,10 @@
ENVYAS ?= envyas
all: gf100.asm.h gk104.asm.h gk110.asm.h
gf100.asm.h: %.asm.h: %.asm
$(ENVYAS) -a -W -mnvc0 -Vnvc0 $< -o $@
gk104.asm.h: %.asm.h: %.asm
$(ENVYAS) -a -W -mnvc0 -Vnve4 $< -o $@
gk110.asm.h: %.asm.h: %.asm
$(ENVYAS) -a -W -mgk110 $< -o $@

View file

@ -0,0 +1,107 @@
.section #gf100_builtin_code
// DIV U32
//
// UNR recurrence (q = a / b):
// look for z such that 2^32 - b <= b * z < 2^32
// then q - 1 <= (a * z) / 2^32 <= q
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p1
// SIZE: 22 / 14 * 8 bytes
//
gf100_div_u32:
bfind u32 $r2 $r1
xor b32 $r2 $r2 0x1f
mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
cvt u32 $r1 neg u32 $r1
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
cvt u32 $r2 neg u32 $r1
add $r1 (mul u32 $r1 u32 $r0) $r3
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
ret
// DIV S32, like DIV U32 after taking ABS(inputs)
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p3
//
gf100_div_s32:
set $p2 0x1 lt s32 $r0 0x0
set $p3 0x1 lt s32 $r1 0x0 xor $p2
cvt s32 $r0 abs s32 $r0
cvt s32 $r1 abs s32 $r1
bfind u32 $r2 $r1
xor b32 $r2 $r2 0x1f
mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
cvt u32 $r1 neg u32 $r1
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
cvt u32 $r2 neg u32 $r1
add $r1 (mul u32 $r1 u32 $r0) $r3
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p3 cvt s32 $r0 neg s32 $r0
$p2 cvt s32 $r1 neg s32 $r1
ret
// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i)
//
// INPUT: $r0d (x)
// OUTPUT: $r0d (rcp(x))
// CLOBBER: $r2 - $r7
// SIZE: 9 * 8 bytes
//
gf100_rcp_f64:
nop
ret
// RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i)
//
// INPUT: $r0d (x)
// OUTPUT: $r0d (rsqrt(x))
// CLOBBER: $r2 - $r7
// SIZE: 14 * 8 bytes
//
gf100_rsq_f64:
nop
ret
.section #gf100_builtin_offsets
.b64 #gf100_div_u32
.b64 #gf100_div_s32
.b64 #gf100_rcp_f64
.b64 #gf100_rsq_f64

View file

@ -0,0 +1,63 @@
uint64_t gf100_builtin_code[] = {
/* 0x0000: gf100_div_u32 */
0x7800000004009c03,
0x0010dd187c209cdd,
0x6000000008309c03,
0x0810dc2a05605c18,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x280000000000dde4,
0x5000000008001c43,
0x0010430d05609c18,
0x1b0e00000811dc03,
0x4800000008104103,
0x0800000004000002,
0x1b0e00000811c003,
0x4800000008104103,
0x90001dff040000ac,
/* 0x00b0: gf100_div_s32 */
0x188e0000fc05dc23,
0x18c40000fc17dc23,
0x07305e1803301e18,
0x7800000004009c03,
0x0010dd187c209cdd,
0x6000000008309c03,
0x0810dc2a05605c18,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x280000000000dde4,
0x5000000008001c43,
0x0010430d05609c18,
0x1b0e00000811dc03,
0x4800000008104103,
0x0800000004000002,
0x1b0e00000811c003,
0x4800000008104103,
0x01700e18040000ac,
0x90001dff05704a18,
/* 0x0180: gf100_rcp_f64 */
0x90001dff00001c08,
/* 0x0188: gf100_rsq_f64 */
0x90001dff00001c08,
};
uint64_t gf100_builtin_offsets[] = {
0x0000000000000000,
0x00000000000000b0,
0x0000000000000180,
0x0000000000000188,
};

View file

@ -1,4 +1,4 @@
//
.section #gk104_builtin_code
// DIV U32
//
// UNR recurrence (q = a / b):
@ -10,81 +10,83 @@
// CLOBBER: $r2 - $r3, $p0 - $p1
// SIZE: 22 / 14 * 8 bytes
//
sched 0x28 0x4 0x28 0x4 0x28 0x28 0x28
bfind u32 $r2 $r1
long xor b32 $r2 $r2 0x1f
long mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
long cvt u32 $r1 neg u32 $r1
long mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
sched 0x4 0x28 0x4 0x28 0x28 0x2c 0x4
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
long cvt u32 $r2 neg u32 $r1
long add $r1 (mul u32 $r1 u32 $r0) $r3
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
sched 0x28 0x2c 0x4 0x20 0x2e 0x28 0x20
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
long ret
//
gk104_div_u32:
sched 0x28 0x4 0x28 0x4 0x28 0x28 0x28
bfind u32 $r2 $r1
long xor b32 $r2 $r2 0x1f
long mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
long cvt u32 $r1 neg u32 $r1
long mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
sched 0x4 0x28 0x4 0x28 0x28 0x2c 0x4
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
long cvt u32 $r2 neg u32 $r1
long add $r1 (mul u32 $r1 u32 $r0) $r3
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
sched 0x28 0x2c 0x4 0x20 0x2e 0x28 0x20
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
long ret
// DIV S32, like DIV U32 after taking ABS(inputs)
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p3
//
set $p2 0x1 lt s32 $r0 0x0
set $p3 0x1 lt s32 $r1 0x0 xor $p2
sched 0x20 0x28 0x28 0x4 0x28 0x04 0x28
long cvt s32 $r0 abs s32 $r0
long cvt s32 $r1 abs s32 $r1
bfind u32 $r2 $r1
long xor b32 $r2 $r2 0x1f
long mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
cvt u32 $r1 neg u32 $r1
sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
sched 0x28 0x28 0x4 0x28 0x04 0x28 0x28
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
long cvt u32 $r2 neg u32 $r1
long add $r1 (mul u32 $r1 u32 $r0) $r3
sched 0x2c 0x04 0x28 0x2c 0x04 0x28 0x20
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
long $p0 add b32 $r0 $r0 0x1
long $p3 cvt s32 $r0 neg s32 $r0
sched 0x04 0x2e 0x04 0x28 0x04 0x20 0x2c
$p2 cvt s32 $r1 neg s32 $r1
long ret
//
gk104_div_s32:
set $p2 0x1 lt s32 $r0 0x0
set $p3 0x1 lt s32 $r1 0x0 xor $p2
sched 0x20 0x28 0x28 0x4 0x28 0x04 0x28
long cvt s32 $r0 abs s32 $r0
long cvt s32 $r1 abs s32 $r1
bfind u32 $r2 $r1
long xor b32 $r2 $r2 0x1f
long mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
cvt u32 $r1 neg u32 $r1
sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
sched 0x28 0x28 0x4 0x28 0x04 0x28 0x28
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
long cvt u32 $r2 neg u32 $r1
long add $r1 (mul u32 $r1 u32 $r0) $r3
sched 0x2c 0x04 0x28 0x2c 0x04 0x28 0x20
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
long $p0 add b32 $r0 $r0 0x1
long $p3 cvt s32 $r0 neg s32 $r0
sched 0x04 0x2e 0x04 0x28 0x04 0x20 0x2c
$p2 cvt s32 $r1 neg s32 $r1
long ret
// SULDP [for each format]
// $r4d: address
// $r2: surface info (format)
@ -542,7 +544,8 @@ $p1 suldgb b32 $r3 cv zero u8 g[$r4d] $r2 $p0
long mov b32 $r3 0x3f800000
long nop
long ret
//
// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i)
//
// INPUT: $r0d (x)
@ -550,8 +553,10 @@ long ret
// CLOBBER: $r2 - $r7
// SIZE: 9 * 8 bytes
//
long nop
long ret
gk104_rcp_f64:
long nop
long ret
// RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i)
//
// INPUT: $r0d (x)
@ -559,8 +564,10 @@ long ret
// CLOBBER: $r2 - $r7
// SIZE: 14 * 8 bytes
//
long nop
long ret
gk104_rsq_f64:
long nop
long ret
//
// Trap handler.
// Requires at least 4 GPRs and 32 bytes of l[] memory to temporarily save GPRs.
@ -696,3 +703,9 @@ bpt pause 0x0
mov $flags $r2 mask 0xffff
ld b128 $r0q cs l[0x00]
rtt
.section #gk104_builtin_offsets
.b64 #gk104_div_u32
.b64 #gk104_div_s32
.b64 #gk104_rcp_f64
.b64 #gk104_rsq_f64

View file

@ -0,0 +1,598 @@
uint64_t gk104_builtin_code[] = {
/* 0x0000: gk104_div_u32 */
0x2282828042804287,
0x7800000004009c03,
0x380000007c209c82,
0x180000000400dde2,
0x6000000008309c03,
0x1c00000005205d04,
0x500000000810dc03,
0x200400000c209c43,
0x2282828282828287,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x2042c28280428047,
0x200400000c209c43,
0x280000000000dde4,
0x5000000008001c43,
0x1c00000005209d04,
0x2006000000105c03,
0x1b0e00000811dc03,
0x4800000008104103,
0x220282e20042c287,
0x0800000004000002,
0x1b0e00000811c003,
0x4800000008104103,
0x0800000004000002,
0x9000000000001de7,
/* 0x00f0: gk104_div_s32 */
0x188e0000fc05dc23,
0x18c40000fc17dc23,
0x2280428042828207,
0x1c00000001201ec4,
0x1c00000005205ec4,
0x7800000004009c03,
0x380000007c209c82,
0x180000000400dde2,
0x6000000008309c03,
0x1c00000005205d04,
0x2282828282828287,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x2282804280428287,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x280000000000dde4,
0x5000000008001c43,
0x1c00000005209d04,
0x2006000000105c03,
0x22028042c28042c7,
0x1b0e00000811dc03,
0x4800000008104103,
0x0800000004000002,
0x1b0e00000811c003,
0x4800000008104103,
0x0800000004000002,
0x1c00000001200f84,
0x22c200428042e047,
0x1c00000005204b84,
0x9000000000001de7,
0xd4004000084004c5,
0x0c5400000013dc04,
0xd4004000084009c5,
0xd4004000084007c5,
0x9000000000001de7,
0x2000000000000007,
0xd4004000084004c5,
0x0c5400000013dc04,
0xd4004000084009c5,
0xd4004000084007c5,
0x1900000004a0dc04,
0x1800000004a09c04,
0x30de0001d030dc02,
0x2000000000000007,
0x1900000000a05c04,
0x30de0001d0209c02,
0x1800000000a01c04,
0x30de0001d0105c02,
0x30de0001d0001c02,
0x9000000000001de7,
0xd4004000084004a5,
0x2000000000000007,
0x0c5400000013dc04,
0xd4004000084009a5,
0xd4004000084007a5,
0x1900000004a0de04,
0x1800000004a09e04,
0x30e000061c30dc02,
0x1900000000a05e04,
0x2000000000000007,
0x30e000061c209c02,
0x1800000000a01e04,
0x30e000061c105c02,
0x30e000061c001c02,
0x9000000000001de7,
0xd4004000084004a5,
0x0c5400000013dc04,
0x2000000000000007,
0xd4004000084009a5,
0xd4004000084007a5,
0x1d00000004a0de84,
0x1c00000004a09e84,
0x1d00000000a05e84,
0x1c00000000a01e84,
0x9000000000001de7,
0x2000000000000007,
0xd4004000084004a5,
0x0c5400000013dc04,
0xd4004000084009a5,
0xd4004000084007a5,
0x1d00000004a0dc04,
0x1c00000004a09c04,
0x1d00000000a05c04,
0x2000000000000007,
0x1c00000000a01c04,
0x9000000000001de7,
0xd4004000084004a5,
0x0c5400000013dc04,
0xd4004000084009a5,
0xd4004000084007a5,
0x1100000004a0dc04,
0x2000000000000007,
0x1000000004a09c04,
0x1100000000a05c04,
0x1000000000a01c04,
0x9000000000001de7,
0xd4004000084004a5,
0x0c5400000013dc04,
0xd4004000084009a5,
0x2000000000000007,
0xd4004000084007a5,
0x1800000000009de2,
0x18fe00000000dde2,
0x9000000000001de7,
0xd4004000084004a5,
0x0c5400000013dc04,
0xd4004000084009a5,
0x2000000000000007,
0xd4004000084007a5,
0x1800000000009de2,
0x180000000400dde2,
0x9000000000001de7,
0xd400400008400485,
0x0c5400000013dc04,
0xd400400008400985,
0x2000000000000007,
0xd400400008400785,
0x7000c02828005c03,
0x18fe00000000dde2,
0x7000c02850009c03,
0x3800000ffc001c02,
0x1800000008a09c04,
0x1800000004a05c04,
0x2000000000000007,
0x30ea00801c209c02,
0x1800000000a01c04,
0x30ea00801c105c02,
0x30ea00801c001c02,
0x9000000000001de7,
0xd400400008400485,
0x0c5400000013dc04,
0x2000000000000007,
0xd400400008400985,
0xd400400008400785,
0x7000c02828005c03,
0x180000000400dde2,
0x7000c02850009c03,
0x3800000ffc001c02,
0x9000000000001de7,
0x2000000000000007,
0xd400400008400485,
0x0c5400000013dc04,
0xd400400008400985,
0xd400400008400785,
0x198000000020dc04,
0x1900000000209c04,
0x30ee02020430dc02,
0x2000000000000007,
0x1880000000205c04,
0x30ee020204209c02,
0x1800000000201c04,
0x30ee020204105c02,
0x30ee020204001c02,
0x9000000000001de7,
0xd400400008400485,
0x2000000000000007,
0x0c5400000013dc04,
0xd400400008400985,
0xd400400008400785,
0x198000000020de04,
0x1900000000209e04,
0x30f004081030dc02,
0x1880000000205e04,
0x2000000000000007,
0x30f0040810209c02,
0x1800000000201e04,
0x30f0040810105c02,
0x30f0040810001c02,
0x9000000000001de7,
0xd400400008400485,
0x0c5400000013dc04,
0x2000000000000007,
0xd400400008400985,
0xd400400008400785,
0x1d8000000020de84,
0x1d00000000209e84,
0x1c80000000205e84,
0x1c00000000201e84,
0x9000000000001de7,
0x2000000000000007,
0xd400400008400485,
0x0c5400000013dc04,
0xd400400008400985,
0xd400400008400785,
0x1d8000000020dc04,
0x1d00000000209c04,
0x1c80000000205c04,
0x2000000000000007,
0x1c00000000201c04,
0x9000000000001de7,
0xd400400008400445,
0x0c5400000013dc04,
0xd400400008400945,
0xd400400008400745,
0x7000c01814005c03,
0x2000000000000007,
0x18fe00000000dde2,
0x7000c0142c009c03,
0x380000007c001c02,
0x1800000008209c04,
0x1800000004205c04,
0x30f4108420209c02,
0x1800000000201c04,
0x2000000000000007,
0x30f2082084105c02,
0x30f4108420001c02,
0x9000000000001de7,
0xd400400008400445,
0x0c5400000013dc04,
0xd400400008400945,
0xd400400008400745,
0x2000000000000007,
0x7000c01414005c03,
0x7000c01428009c03,
0x380000007c001c02,
0x18fe00000000dde2,
0x1800000008209c04,
0x1800000004205c04,
0x1800000000201c04,
0x2000000000000007,
0x30f4108420209c02,
0x30f4108420105c02,
0x30f4108420001c02,
0x9000000000001de7,
0xd400400008400485,
0x0c5400000013dc04,
0xd400400008400985,
0x2000000000000007,
0xd400400008400785,
0x1900000000a05c04,
0x1800000000a01c04,
0x30de0001d0105c02,
0x30de0001d0001c02,
0x1800000000009de2,
0x18fe00000000dde2,
0x2000000000000007,
0x9000000000001de7,
0xd400400008400485,
0x0c5400000013dc04,
0xd400400008400985,
0xd400400008400785,
0x18fe00000000dde2,
0x1900000000a05e04,
0x2000000000000007,
0x1800000000009de2,
0x1800000000a01e04,
0x30e000061c105c02,
0x30e000061c001c02,
0x9000000000001de7,
0xd400400008400485,
0x0c5400000013dc04,
0x2000000000000007,
0xd400400008400985,
0xd400400008400785,
0x180000000400dde2,
0x1d00000000a05e84,
0x1800000000009de2,
0x1c00000000a01e84,
0x9000000000001de7,
0x2000000000000007,
0xd400400008400485,
0x0c5400000013dc04,
0xd400400008400985,
0xd400400008400785,
0x180000000400dde2,
0x1d00000000a05c04,
0x1800000000009de2,
0x2000000000000007,
0x1c00000000a01c04,
0x9000000000001de7,
0xd400400008400485,
0x0c5400000013dc04,
0xd400400008400985,
0xd400400008400785,
0x18fe00000000dde2,
0x2000000000000007,
0x1100000000a05c04,
0x1800000000009de2,
0x1000000000a01c04,
0x9000000000001de7,
0xd400400008400485,
0x0c5400000013dc04,
0xd400400008400985,
0x2000000000000007,
0xd400400008400785,
0x18fe00000000dde2,
0x1800000000009de2,
0x1800000000005de2,
0x9000000000001de7,
0xd400400008400485,
0x0c5400000013dc04,
0x2000000000000007,
0xd400400008400985,
0xd400400008400785,
0x180000000400dde2,
0x1800000000009de2,
0x1800000000005de2,
0x9000000000001de7,
0xd400400008400445,
0x2000000000000007,
0x0c5400000013dc04,
0xd400400008400945,
0xd400400008400745,
0x18fe00000000dde2,
0x1880000000205c04,
0x1800000000009de2,
0x1800000000201c04,
0x2000000000000007,
0x30ee020204105c02,
0x30ee020204001c02,
0x9000000000001de7,
0xd400400008400445,
0x0c5400000013dc04,
0xd400400008400945,
0xd400400008400745,
0x2000000000000007,
0x18fe00000000dde2,
0x1880000000205e04,
0x1800000000009de2,
0x1800000000201e04,
0x30f0040810105c02,
0x30f0040810001c02,
0x9000000000001de7,
0x2000000000000007,
0xd400400008400445,
0x0c5400000013dc04,
0xd400400008400945,
0xd400400008400745,
0x180000000400dde2,
0x1c80000000205c04,
0x1800000000009de2,
0x2000000000000007,
0x1c00000000201c04,
0x9000000000001de7,
0xd400400008400445,
0x0c5400000013dc04,
0xd400400008400945,
0xd400400008400745,
0x180000000400dde2,
0x2000000000000007,
0x1c80000000205e84,
0x1800000000009de2,
0x1c00000000201e84,
0x9000000000001de7,
0xd400400008400445,
0x0c5400000013dc04,
0xd400400008400945,
0x2000000000000007,
0xd400400008400745,
0x18fe00000000dde2,
0x1800000000a01c04,
0x1800000000009de2,
0x1800000000005de2,
0x30de0001d0001c02,
0x9000000000001de7,
0x2000000000000007,
0xd400400008400445,
0x0c5400000013dc04,
0xd400400008400945,
0xd400400008400745,
0x18fe00000000dde2,
0x1800000000a01e04,
0x1800000000009de2,
0x2000000000000007,
0x1800000000005de2,
0x30e000061c001c02,
0x9000000000001de7,
0xd400400008400465,
0x0c5400000013dc04,
0xd400400008400965,
0xd400400008400765,
0x2000000000000007,
0x180000000400dde2,
0x1800000000009de2,
0x1800000000005de2,
0x9000000000001de7,
0xd400400008400445,
0x0c5400000013dc04,
0xd400400008400945,
0x2000000000000007,
0xd400400008400745,
0x180000000400dde2,
0x1800000000009de2,
0x1800000000005de2,
0x9000000000001de7,
0xd400400008400445,
0x0c5400000013dc04,
0x2000000000000007,
0xd400400008400945,
0xd400400008400745,
0x18fe00000000dde2,
0x1800000000009de2,
0x1000000000a01c04,
0x1800000000005de2,
0x9000000000001de7,
0x2000000000000007,
0xd400400008400405,
0x0c5400000013dc04,
0xd400400008400905,
0xd400400008400705,
0x18fe00000000dde2,
0x1800000000201c04,
0x1800000000009de2,
0x2000000000000007,
0x30ee020204001c02,
0x1800000000005de2,
0x9000000000001de7,
0xd400400008400405,
0x0c5400000013dc04,
0xd400400008400905,
0xd400400008400705,
0x2000000000000007,
0x18fe00000000dde2,
0x1800000000201e04,
0x1800000000009de2,
0x30f0040810001c02,
0x1800000000005de2,
0x9000000000001de7,
0xd400400008400425,
0x2000000000000007,
0x0c5400000013dc04,
0xd400400008400925,
0xd400400008400725,
0x180000000400dde2,
0x1800000000009de2,
0x1800000000005de2,
0x9000000000001de7,
0x2000000000000007,
0xd400400008400405,
0x0c5400000013dc04,
0xd400400008400905,
0xd400400008400705,
0x180000000400dde2,
0x1800000000009de2,
0x1800000000005de2,
0x2000000000000007,
0x9000000000001de7,
0xd40040000840c485,
0x0c5400000013dc04,
0xd40040000840c985,
0xd40040000840c785,
0x18fe00000000dde2,
0x4000000000001de4,
0x9000000000001de7,
/* 0x0f08: gk104_rcp_f64 */
0x4000000000001de4,
0x9000000000001de7,
/* 0x0f18: gk104_rsq_f64 */
0x4000000000001de4,
0x9000000000001de7,
0xc800000003f01cc5,
0x2c00000100005c04,
0x2c0000010800dc04,
0x3000c3fffff09c04,
0x680100000c1fdc03,
0x4000000a60001c47,
0x180000004000dde2,
/* 0x0f60: spill_cfstack */
0x78000009c0000007,
0x0c0000000430dd02,
0x4003ffffa0001ca7,
0x2800406400001de4,
0x2800406410005de4,
0x180000000400dde2,
0x547e18000000dd05,
0x60000008e0000007,
0x190ec0000431dc03,
0x40000000000001f4,
0x94000004c0009c85,
0x2c00000100009c04,
0x2c0000010800dc04,
0x9400000020009ca5,
0x9400000100011cc5,
0x9400000140021cc5,
0x9400000180031cc5,
0x94000001c0041cc5,
0x9400000200051cc5,
0x9400000240061cc5,
0x9400000280071cc5,
0x94000002c0081cc5,
0x9400000300091cc5,
0x94000003400a1cc5,
0x94000003800b1cc5,
0x94000003c00c1cc5,
0x94000004000d1cc5,
0x94000004400e1cc5,
0x94000004800f1cc5,
0xc000000003f09ea5,
0x94000000c0009ca5,
0xc000000023f09ea5,
0x94000000e0009ca5,
0x2c00000084009c04,
0x2c0000008800dc04,
0x9400000040009ca5,
0x2c0000008c009c04,
0x2c0000009400dc04,
0x9400000060009ca5,
0x2c00000098009c04,
0x2c0000009c00dc04,
0x9400000080009ca5,
0x2c000000c800dc04,
0x0c0000001030dd02,
0x4000000100001ea7,
0x480100000c001c03,
0x0800000000105c42,
/* 0x10d8: shared_loop */
0xc100000000309c85,
0x9400000500009c85,
0x0c00000010001d02,
0x0800000000105d42,
0x0c0000001030dd02,
0x4003ffff40001ca7,
/* 0x1108: shared_done */
0x2800406420001de4,
0x2800406430005de4,
0xe000000000001c45,
0xd000000003ffdcc5,
0x9c000000000fdcc5,
0x2c0000000c009c04,
0x7000c0205020dc03,
0x7000c01820209c03,
0x5000406450209c03,
0x500040644030dc03,
0x480000000c209c03,
0x4801000008001c03,
0x0800000000105c42,
/* 0x1170: search_cstack */
0x280040646000dde4,
0x8400000020009f05,
0x190ec0002821dc03,
0x40000000800001e7,
0x0c00000040001c02,
0x0800000000105c42,
0x0c0000004030dd02,
0x00029dff0ffc5cbf,
/* 0x11b0: entry_found */
0x8400000000009f85,
0x2800406400001de4,
0x2800406410005de4,
0x9400000010009c85,
0x4000000000001df4,
/* 0x11d8: end_exit */
0x9800000003ffdcc5,
0xd000000000008007,
0xa000000000004007,
/* 0x11f0: end_cont */
0xd000000000008007,
0x3400c3fffc201c04,
0xc000000003f01ec5,
0xa000000000000007,
};
uint64_t gk104_builtin_offsets[] = {
0x0000000000000000,
0x00000000000000f0,
0x0000000000000f08,
0x0000000000000f18,
};

View file

@ -0,0 +1,98 @@
.section #gk110_builtin_code
// DIV U32
//
// UNR recurrence (q = a / b):
// look for z such that 2^32 - b <= b * z < 2^32
// then q - 1 <= (a * z) / 2^32 <= q
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p1
// SIZE: 22 / 14 * 8 bytes
//
gk110_div_u32:
sched 0x28282804280428
bfind u32 $r2 $r1
xor b32 $r2 $r2 0x1f
mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
cvt u32 $r1 neg u32 $r1
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
sched 0x28282828282828
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
sched 0x042c2828042804
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
cvt u32 $r2 neg u32 $r1
add $r1 (mul u32 $r1 u32 $r0) $r3
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
sched 0x20282e20042c28
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
ret
// DIV S32, like DIV U32 after taking ABS(inputs)
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p3
//
gk110_div_s32:
set $p2 0x1 lt s32 $r0 0x0
set $p3 0x1 lt s32 $r1 0x0 xor $p2
sched 0x28042804282820
cvt s32 $r0 abs s32 $r0
cvt s32 $r1 abs s32 $r1
bfind u32 $r2 $r1
xor b32 $r2 $r2 0x1f
mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
cvt u32 $r1 neg u32 $r1
sched 0x28282828282828
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
sched 0x28280428042828
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
cvt u32 $r2 neg u32 $r1
add $r1 (mul u32 $r1 u32 $r0) $r3
sched 0x2028042c28042c
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p3 cvt s32 $r0 neg s32 $r0
sched 0x2c200428042e04
$p2 cvt s32 $r1 neg s32 $r1
ret
gk110_rcp_f64:
gk110_rsq_f64:
ret
.section #gk110_builtin_offsets
.b64 #gk110_div_u32
.b64 #gk110_div_s32
.b64 #gk110_rcp_f64
.b64 #gk110_rsq_f64

View file

@ -0,0 +1,81 @@
uint64_t gk110_builtin_code[] = {
/* 0x0000: gk110_div_u32 */
0x08a0a0a010a010a0,
0xe1800000009c000a,
0x220000000f9c0808,
0x74000000009fc00e,
0xe2400000011c0c0a,
0xe6010000009c2806,
0xe1c00000011c040e,
0xd2000800019c080a,
0x08a0a0a0a0a0a0a0,
0xe1c00000011c040e,
0xd2000800019c080a,
0xe1c00000011c040e,
0xd2000800019c080a,
0xe1c00000011c040e,
0xd2000800019c080a,
0xe1c00000011c040e,
0x0810b0a0a010a010,
0xd2000800019c080a,
0xe4c03c00001c000e,
0xe1c00400011c0002,
0xe6010000009c280a,
0xd0000c00001c0406,
0xdb601c00011c041e,
0xe088000001000406,
0x0880a0b88010b0a0,
0x4000000000800001,
0xdb601c000100041e,
0xe088000001000406,
0x4000000000800001,
0x19000000001c003c,
/* 0x00f0: gk110_div_s32 */
0xdb181c007f9c005e,
0xdb1a08007f9c047e,
0x08a010a010a0a080,
0xe6100000001ce802,
0xe6100000009ce806,
0xe1800000009c000a,
0x220000000f9c0808,
0x74000000009fc00e,
0xe2400000011c0c0a,
0xe6010000009c2806,
0x08a0a0a0a0a0a0a0,
0xe1c00000011c040e,
0xd2000800019c080a,
0xe1c00000011c040e,
0xd2000800019c080a,
0xe1c00000011c040e,
0xd2000800019c080a,
0xe1c00000011c040e,
0x08a0a010a010a0a0,
0xd2000800019c080a,
0xe1c00000011c040e,
0xd2000800019c080a,
0xe4c03c00001c000e,
0xe1c00400011c0002,
0xe6010000009c280a,
0xd0000c00001c0406,
0x0880a010b0a010b0,
0xdb601c00011c041e,
0xe088000001000406,
0x4000000000800001,
0xdb601c000100041e,
0xe088000001000406,
0x4000000000800001,
0xe6010000000ce802,
0x08b08010a010b810,
0xe60100000088e806,
0x19000000001c003c,
/* 0x0218: gk110_rcp_f64 */
/* 0x0218: gk110_rsq_f64 */
0x19000000001c003c,
};
uint64_t gk110_builtin_offsets[] = {
0x0000000000000000,
0x00000000000000f0,
0x0000000000000218,
0x0000000000000218,
};

View file

@ -39,26 +39,26 @@ TargetNVC0::TargetNVC0(unsigned int card) : Target(false, card >= 0xe4)
// lazyness -> will just hardcode everything for the time being
#include "target_lib_nvc0.asm.h"
#include "target_lib_nve4.asm.h"
#include "target_lib_nvf0.asm.h"
#include "lib/gf100.asm.h"
#include "lib/gk104.asm.h"
#include "lib/gk110.asm.h"
void
TargetNVC0::getBuiltinCode(const uint32_t **code, uint32_t *size) const
{
switch (chipset & ~0xf) {
case 0xe0:
*code = (const uint32_t *)&nve4_builtin_code[0];
*size = sizeof(nve4_builtin_code);
*code = (const uint32_t *)&gk104_builtin_code[0];
*size = sizeof(gk104_builtin_code);
break;
case 0xf0:
case 0x100:
*code = (const uint32_t *)&nvf0_builtin_code[0];
*size = sizeof(nvf0_builtin_code);
*code = (const uint32_t *)&gk110_builtin_code[0];
*size = sizeof(gk110_builtin_code);
break;
default:
*code = (const uint32_t *)&nvc0_builtin_code[0];
*size = sizeof(nvc0_builtin_code);
*code = (const uint32_t *)&gf100_builtin_code[0];
*size = sizeof(gf100_builtin_code);
break;
}
}
@ -70,12 +70,12 @@ TargetNVC0::getBuiltinOffset(int builtin) const
switch (chipset & ~0xf) {
case 0xe0:
return nve4_builtin_offsets[builtin];
return gk104_builtin_offsets[builtin];
case 0xf0:
case 0x100:
return nvf0_builtin_offsets[builtin];
return gk110_builtin_offsets[builtin];
default:
return nvc0_builtin_offsets[builtin];
return gf100_builtin_offsets[builtin];
}
}

View file

@ -1,96 +0,0 @@
//
// DIV U32
//
// UNR recurrence (q = a / b):
// look for z such that 2^32 - b <= b * z < 2^32
// then q - 1 <= (a * z) / 2^32 <= q
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p1
// SIZE: 22 / 14 * 8 bytes
//
bfind u32 $r2 $r1
xor b32 $r2 $r2 0x1f
mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
cvt u32 $r1 neg u32 $r1
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
cvt u32 $r2 neg u32 $r1
add $r1 (mul u32 $r1 u32 $r0) $r3
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
ret
//
// DIV S32, like DIV U32 after taking ABS(inputs)
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p3
//
set $p2 0x1 lt s32 $r0 0x0
set $p3 0x1 lt s32 $r1 0x0 xor $p2
cvt s32 $r0 abs s32 $r0
cvt s32 $r1 abs s32 $r1
bfind u32 $r2 $r1
xor b32 $r2 $r2 0x1f
mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
cvt u32 $r1 neg u32 $r1
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
cvt u32 $r2 neg u32 $r1
add $r1 (mul u32 $r1 u32 $r0) $r3
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p3 cvt s32 $r0 neg s32 $r0
$p2 cvt s32 $r1 neg s32 $r1
ret
//
// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i)
//
// INPUT: $r0d (x)
// OUTPUT: $r0d (rcp(x))
// CLOBBER: $r2 - $r7
// SIZE: 9 * 8 bytes
//
nop
ret
// RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i)
//
// INPUT: $r0d (x)
// OUTPUT: $r0d (rsqrt(x))
// CLOBBER: $r2 - $r7
// SIZE: 14 * 8 bytes
//
nop
ret

View file

@ -1,112 +0,0 @@
static const uint32_t nvc0_builtin_code[] =
{
0x04009c03,
0x78000000,
0x7c209cdd,
0x0010dd18,
0x08309c03,
0x60000000,
0x05605c18,
0x0810dc2a,
0x0c209c43,
0x20040000,
0x0810dc03,
0x50000000,
0x0c209c43,
0x20040000,
0x0810dc03,
0x50000000,
0x0c209c43,
0x20040000,
0x0810dc03,
0x50000000,
0x0c209c43,
0x20040000,
0x0810dc03,
0x50000000,
0x0c209c43,
0x20040000,
0x0000dde4,
0x28000000,
0x08001c43,
0x50000000,
0x05609c18,
0x0010430d,
0x0811dc03,
0x1b0e0000,
0x08104103,
0x48000000,
0x04000002,
0x08000000,
0x0811c003,
0x1b0e0000,
0x08104103,
0x48000000,
0x040000ac,
0x90001dff,
0xfc05dc23,
0x188e0000,
0xfc17dc23,
0x18c40000,
0x03301e18,
0x07305e18,
0x04009c03,
0x78000000,
0x7c209cdd,
0x0010dd18,
0x08309c03,
0x60000000,
0x05605c18,
0x0810dc2a,
0x0c209c43,
0x20040000,
0x0810dc03,
0x50000000,
0x0c209c43,
0x20040000,
0x0810dc03,
0x50000000,
0x0c209c43,
0x20040000,
0x0810dc03,
0x50000000,
0x0c209c43,
0x20040000,
0x0810dc03,
0x50000000,
0x0c209c43,
0x20040000,
0x0000dde4,
0x28000000,
0x08001c43,
0x50000000,
0x05609c18,
0x0010430d,
0x0811dc03,
0x1b0e0000,
0x08104103,
0x48000000,
0x04000002,
0x08000000,
0x0811c003,
0x1b0e0000,
0x08104103,
0x48000000,
0x040000ac,
0x01700e18,
0x05704a18,
0x90001dff,
0x00001c08,
0x90001dff,
0x00001c08,
0x90001dff,
};
static const uint16_t nvc0_builtin_offsets[NVC0_BUILTIN_COUNT] =
{
0x0000,
0x00b0,
0x0180,
0x0188
};

View file

@ -1,592 +0,0 @@
// Assembled from target_lib_nve4.asm by envyas -m nvc0 -V nve4 -W.
static const uint64_t nve4_builtin_code[] =
{
0x2282828042804287ULL,
0x7800000004009c03ULL,
0x380000007c209c82ULL,
0x180000000400dde2ULL,
0x6000000008309c03ULL,
0x1c00000005205d04ULL,
0x500000000810dc03ULL,
0x200400000c209c43ULL,
0x2282828282828287ULL,
0x500000000810dc03ULL,
0x200400000c209c43ULL,
0x500000000810dc03ULL,
0x200400000c209c43ULL,
0x500000000810dc03ULL,
0x200400000c209c43ULL,
0x500000000810dc03ULL,
0x2042c28280428047ULL,
0x200400000c209c43ULL,
0x280000000000dde4ULL,
0x5000000008001c43ULL,
0x1c00000005209d04ULL,
0x2006000000105c03ULL,
0x1b0e00000811dc03ULL,
0x4800000008104103ULL,
0x220282e20042c287ULL,
0x0800000004000002ULL,
0x1b0e00000811c003ULL,
0x4800000008104103ULL,
0x0800000004000002ULL,
0x9000000000001de7ULL,
0x188e0000fc05dc23ULL,
0x18c40000fc17dc23ULL,
0x2280428042828207ULL,
0x1c00000001201ec4ULL,
0x1c00000005205ec4ULL,
0x7800000004009c03ULL,
0x380000007c209c82ULL,
0x180000000400dde2ULL,
0x6000000008309c03ULL,
0x1c00000005205d04ULL,
0x2282828282828287ULL,
0x500000000810dc03ULL,
0x200400000c209c43ULL,
0x500000000810dc03ULL,
0x200400000c209c43ULL,
0x500000000810dc03ULL,
0x200400000c209c43ULL,
0x500000000810dc03ULL,
0x2282804280428287ULL,
0x200400000c209c43ULL,
0x500000000810dc03ULL,
0x200400000c209c43ULL,
0x280000000000dde4ULL,
0x5000000008001c43ULL,
0x1c00000005209d04ULL,
0x2006000000105c03ULL,
0x22028042c28042c7ULL,
0x1b0e00000811dc03ULL,
0x4800000008104103ULL,
0x0800000004000002ULL,
0x1b0e00000811c003ULL,
0x4800000008104103ULL,
0x0800000004000002ULL,
0x1c00000001200f84ULL,
0x22c200428042e047ULL,
0x1c00000005204b84ULL,
0x9000000000001de7ULL,
0xd4004000084004c5ULL,
0x0c5400000013dc04ULL,
0xd4004000084009c5ULL,
0xd4004000084007c5ULL,
0x9000000000001de7ULL,
0x2000000000000007ULL,
0xd4004000084004c5ULL,
0x0c5400000013dc04ULL,
0xd4004000084009c5ULL,
0xd4004000084007c5ULL,
0x1900000004a0dc04ULL,
0x1800000004a09c04ULL,
0x30de0001d030dc02ULL,
0x2000000000000007ULL,
0x1900000000a05c04ULL,
0x30de0001d0209c02ULL,
0x1800000000a01c04ULL,
0x30de0001d0105c02ULL,
0x30de0001d0001c02ULL,
0x9000000000001de7ULL,
0xd4004000084004a5ULL,
0x2000000000000007ULL,
0x0c5400000013dc04ULL,
0xd4004000084009a5ULL,
0xd4004000084007a5ULL,
0x1900000004a0de04ULL,
0x1800000004a09e04ULL,
0x30e000061c30dc02ULL,
0x1900000000a05e04ULL,
0x2000000000000007ULL,
0x30e000061c209c02ULL,
0x1800000000a01e04ULL,
0x30e000061c105c02ULL,
0x30e000061c001c02ULL,
0x9000000000001de7ULL,
0xd4004000084004a5ULL,
0x0c5400000013dc04ULL,
0x2000000000000007ULL,
0xd4004000084009a5ULL,
0xd4004000084007a5ULL,
0x1d00000004a0de84ULL,
0x1c00000004a09e84ULL,
0x1d00000000a05e84ULL,
0x1c00000000a01e84ULL,
0x9000000000001de7ULL,
0x2000000000000007ULL,
0xd4004000084004a5ULL,
0x0c5400000013dc04ULL,
0xd4004000084009a5ULL,
0xd4004000084007a5ULL,
0x1d00000004a0dc04ULL,
0x1c00000004a09c04ULL,
0x1d00000000a05c04ULL,
0x2000000000000007ULL,
0x1c00000000a01c04ULL,
0x9000000000001de7ULL,
0xd4004000084004a5ULL,
0x0c5400000013dc04ULL,
0xd4004000084009a5ULL,
0xd4004000084007a5ULL,
0x1100000004a0dc04ULL,
0x2000000000000007ULL,
0x1000000004a09c04ULL,
0x1100000000a05c04ULL,
0x1000000000a01c04ULL,
0x9000000000001de7ULL,
0xd4004000084004a5ULL,
0x0c5400000013dc04ULL,
0xd4004000084009a5ULL,
0x2000000000000007ULL,
0xd4004000084007a5ULL,
0x1800000000009de2ULL,
0x18fe00000000dde2ULL,
0x9000000000001de7ULL,
0xd4004000084004a5ULL,
0x0c5400000013dc04ULL,
0xd4004000084009a5ULL,
0x2000000000000007ULL,
0xd4004000084007a5ULL,
0x1800000000009de2ULL,
0x180000000400dde2ULL,
0x9000000000001de7ULL,
0xd400400008400485ULL,
0x0c5400000013dc04ULL,
0xd400400008400985ULL,
0x2000000000000007ULL,
0xd400400008400785ULL,
0x7000c02828005c03ULL,
0x18fe00000000dde2ULL,
0x7000c02850009c03ULL,
0x3800000ffc001c02ULL,
0x1800000008a09c04ULL,
0x1800000004a05c04ULL,
0x2000000000000007ULL,
0x30ea00801c209c02ULL,
0x1800000000a01c04ULL,
0x30ea00801c105c02ULL,
0x30ea00801c001c02ULL,
0x9000000000001de7ULL,
0xd400400008400485ULL,
0x0c5400000013dc04ULL,
0x2000000000000007ULL,
0xd400400008400985ULL,
0xd400400008400785ULL,
0x7000c02828005c03ULL,
0x180000000400dde2ULL,
0x7000c02850009c03ULL,
0x3800000ffc001c02ULL,
0x9000000000001de7ULL,
0x2000000000000007ULL,
0xd400400008400485ULL,
0x0c5400000013dc04ULL,
0xd400400008400985ULL,
0xd400400008400785ULL,
0x198000000020dc04ULL,
0x1900000000209c04ULL,
0x30ee02020430dc02ULL,
0x2000000000000007ULL,
0x1880000000205c04ULL,
0x30ee020204209c02ULL,
0x1800000000201c04ULL,
0x30ee020204105c02ULL,
0x30ee020204001c02ULL,
0x9000000000001de7ULL,
0xd400400008400485ULL,
0x2000000000000007ULL,
0x0c5400000013dc04ULL,
0xd400400008400985ULL,
0xd400400008400785ULL,
0x198000000020de04ULL,
0x1900000000209e04ULL,
0x30f004081030dc02ULL,
0x1880000000205e04ULL,
0x2000000000000007ULL,
0x30f0040810209c02ULL,
0x1800000000201e04ULL,
0x30f0040810105c02ULL,
0x30f0040810001c02ULL,
0x9000000000001de7ULL,
0xd400400008400485ULL,
0x0c5400000013dc04ULL,
0x2000000000000007ULL,
0xd400400008400985ULL,
0xd400400008400785ULL,
0x1d8000000020de84ULL,
0x1d00000000209e84ULL,
0x1c80000000205e84ULL,
0x1c00000000201e84ULL,
0x9000000000001de7ULL,
0x2000000000000007ULL,
0xd400400008400485ULL,
0x0c5400000013dc04ULL,
0xd400400008400985ULL,
0xd400400008400785ULL,
0x1d8000000020dc04ULL,
0x1d00000000209c04ULL,
0x1c80000000205c04ULL,
0x2000000000000007ULL,
0x1c00000000201c04ULL,
0x9000000000001de7ULL,
0xd400400008400445ULL,
0x0c5400000013dc04ULL,
0xd400400008400945ULL,
0xd400400008400745ULL,
0x7000c01814005c03ULL,
0x2000000000000007ULL,
0x18fe00000000dde2ULL,
0x7000c0142c009c03ULL,
0x380000007c001c02ULL,
0x1800000008209c04ULL,
0x1800000004205c04ULL,
0x30f4108420209c02ULL,
0x1800000000201c04ULL,
0x2000000000000007ULL,
0x30f2082084105c02ULL,
0x30f4108420001c02ULL,
0x9000000000001de7ULL,
0xd400400008400445ULL,
0x0c5400000013dc04ULL,
0xd400400008400945ULL,
0xd400400008400745ULL,
0x2000000000000007ULL,
0x7000c01414005c03ULL,
0x7000c01428009c03ULL,
0x380000007c001c02ULL,
0x18fe00000000dde2ULL,
0x1800000008209c04ULL,
0x1800000004205c04ULL,
0x1800000000201c04ULL,
0x2000000000000007ULL,
0x30f4108420209c02ULL,
0x30f4108420105c02ULL,
0x30f4108420001c02ULL,
0x9000000000001de7ULL,
0xd400400008400485ULL,
0x0c5400000013dc04ULL,
0xd400400008400985ULL,
0x2000000000000007ULL,
0xd400400008400785ULL,
0x1900000000a05c04ULL,
0x1800000000a01c04ULL,
0x30de0001d0105c02ULL,
0x30de0001d0001c02ULL,
0x1800000000009de2ULL,
0x18fe00000000dde2ULL,
0x2000000000000007ULL,
0x9000000000001de7ULL,
0xd400400008400485ULL,
0x0c5400000013dc04ULL,
0xd400400008400985ULL,
0xd400400008400785ULL,
0x18fe00000000dde2ULL,
0x1900000000a05e04ULL,
0x2000000000000007ULL,
0x1800000000009de2ULL,
0x1800000000a01e04ULL,
0x30e000061c105c02ULL,
0x30e000061c001c02ULL,
0x9000000000001de7ULL,
0xd400400008400485ULL,
0x0c5400000013dc04ULL,
0x2000000000000007ULL,
0xd400400008400985ULL,
0xd400400008400785ULL,
0x180000000400dde2ULL,
0x1d00000000a05e84ULL,
0x1800000000009de2ULL,
0x1c00000000a01e84ULL,
0x9000000000001de7ULL,
0x2000000000000007ULL,
0xd400400008400485ULL,
0x0c5400000013dc04ULL,
0xd400400008400985ULL,
0xd400400008400785ULL,
0x180000000400dde2ULL,
0x1d00000000a05c04ULL,
0x1800000000009de2ULL,
0x2000000000000007ULL,
0x1c00000000a01c04ULL,
0x9000000000001de7ULL,
0xd400400008400485ULL,
0x0c5400000013dc04ULL,
0xd400400008400985ULL,
0xd400400008400785ULL,
0x18fe00000000dde2ULL,
0x2000000000000007ULL,
0x1100000000a05c04ULL,
0x1800000000009de2ULL,
0x1000000000a01c04ULL,
0x9000000000001de7ULL,
0xd400400008400485ULL,
0x0c5400000013dc04ULL,
0xd400400008400985ULL,
0x2000000000000007ULL,
0xd400400008400785ULL,
0x18fe00000000dde2ULL,
0x1800000000009de2ULL,
0x1800000000005de2ULL,
0x9000000000001de7ULL,
0xd400400008400485ULL,
0x0c5400000013dc04ULL,
0x2000000000000007ULL,
0xd400400008400985ULL,
0xd400400008400785ULL,
0x180000000400dde2ULL,
0x1800000000009de2ULL,
0x1800000000005de2ULL,
0x9000000000001de7ULL,
0xd400400008400445ULL,
0x2000000000000007ULL,
0x0c5400000013dc04ULL,
0xd400400008400945ULL,
0xd400400008400745ULL,
0x18fe00000000dde2ULL,
0x1880000000205c04ULL,
0x1800000000009de2ULL,
0x1800000000201c04ULL,
0x2000000000000007ULL,
0x30ee020204105c02ULL,
0x30ee020204001c02ULL,
0x9000000000001de7ULL,
0xd400400008400445ULL,
0x0c5400000013dc04ULL,
0xd400400008400945ULL,
0xd400400008400745ULL,
0x2000000000000007ULL,
0x18fe00000000dde2ULL,
0x1880000000205e04ULL,
0x1800000000009de2ULL,
0x1800000000201e04ULL,
0x30f0040810105c02ULL,
0x30f0040810001c02ULL,
0x9000000000001de7ULL,
0x2000000000000007ULL,
0xd400400008400445ULL,
0x0c5400000013dc04ULL,
0xd400400008400945ULL,
0xd400400008400745ULL,
0x180000000400dde2ULL,
0x1c80000000205c04ULL,
0x1800000000009de2ULL,
0x2000000000000007ULL,
0x1c00000000201c04ULL,
0x9000000000001de7ULL,
0xd400400008400445ULL,
0x0c5400000013dc04ULL,
0xd400400008400945ULL,
0xd400400008400745ULL,
0x180000000400dde2ULL,
0x2000000000000007ULL,
0x1c80000000205e84ULL,
0x1800000000009de2ULL,
0x1c00000000201e84ULL,
0x9000000000001de7ULL,
0xd400400008400445ULL,
0x0c5400000013dc04ULL,
0xd400400008400945ULL,
0x2000000000000007ULL,
0xd400400008400745ULL,
0x18fe00000000dde2ULL,
0x1800000000a01c04ULL,
0x1800000000009de2ULL,
0x1800000000005de2ULL,
0x30de0001d0001c02ULL,
0x9000000000001de7ULL,
0x2000000000000007ULL,
0xd400400008400445ULL,
0x0c5400000013dc04ULL,
0xd400400008400945ULL,
0xd400400008400745ULL,
0x18fe00000000dde2ULL,
0x1800000000a01e04ULL,
0x1800000000009de2ULL,
0x2000000000000007ULL,
0x1800000000005de2ULL,
0x30e000061c001c02ULL,
0x9000000000001de7ULL,
0xd400400008400465ULL,
0x0c5400000013dc04ULL,
0xd400400008400965ULL,
0xd400400008400765ULL,
0x2000000000000007ULL,
0x180000000400dde2ULL,
0x1800000000009de2ULL,
0x1800000000005de2ULL,
0x9000000000001de7ULL,
0xd400400008400445ULL,
0x0c5400000013dc04ULL,
0xd400400008400945ULL,
0x2000000000000007ULL,
0xd400400008400745ULL,
0x180000000400dde2ULL,
0x1800000000009de2ULL,
0x1800000000005de2ULL,
0x9000000000001de7ULL,
0xd400400008400445ULL,
0x0c5400000013dc04ULL,
0x2000000000000007ULL,
0xd400400008400945ULL,
0xd400400008400745ULL,
0x18fe00000000dde2ULL,
0x1800000000009de2ULL,
0x1000000000a01c04ULL,
0x1800000000005de2ULL,
0x9000000000001de7ULL,
0x2000000000000007ULL,
0xd400400008400405ULL,
0x0c5400000013dc04ULL,
0xd400400008400905ULL,
0xd400400008400705ULL,
0x18fe00000000dde2ULL,
0x1800000000201c04ULL,
0x1800000000009de2ULL,
0x2000000000000007ULL,
0x30ee020204001c02ULL,
0x1800000000005de2ULL,
0x9000000000001de7ULL,
0xd400400008400405ULL,
0x0c5400000013dc04ULL,
0xd400400008400905ULL,
0xd400400008400705ULL,
0x2000000000000007ULL,
0x18fe00000000dde2ULL,
0x1800000000201e04ULL,
0x1800000000009de2ULL,
0x30f0040810001c02ULL,
0x1800000000005de2ULL,
0x9000000000001de7ULL,
0xd400400008400425ULL,
0x2000000000000007ULL,
0x0c5400000013dc04ULL,
0xd400400008400925ULL,
0xd400400008400725ULL,
0x180000000400dde2ULL,
0x1800000000009de2ULL,
0x1800000000005de2ULL,
0x9000000000001de7ULL,
0x2000000000000007ULL,
0xd400400008400405ULL,
0x0c5400000013dc04ULL,
0xd400400008400905ULL,
0xd400400008400705ULL,
0x180000000400dde2ULL,
0x1800000000009de2ULL,
0x1800000000005de2ULL,
0x2000000000000007ULL,
0x9000000000001de7ULL,
0xd40040000840c485ULL,
0x0c5400000013dc04ULL,
0xd40040000840c985ULL,
0xd40040000840c785ULL,
0x18fe00000000dde2ULL,
0x4000000000001de4ULL,
0x9000000000001de7ULL,
0x4000000000001de4ULL,
0x9000000000001de7ULL,
0x4000000000001de4ULL,
0x9000000000001de7ULL,
0xc800000003f01cc5ULL,
0x2c00000100005c04ULL,
0x2c0000010800dc04ULL,
0x3000c3fffff09c04ULL,
0x680100000c1fdc03ULL,
0x4000000a60001c47ULL,
0x180000004000dde2ULL,
0x78000009c0000007ULL,
0x0c0000000430dd02ULL,
0x4003ffffa0001ca7ULL,
0x2800406400001de4ULL,
0x2800406410005de4ULL,
0x180000000400dde2ULL,
0x547e18000000dd05ULL,
0x60000008e0000007ULL,
0x190ec0000431dc03ULL,
0x40000000000001f4ULL,
0x94000004c0009c85ULL,
0x2c00000100009c04ULL,
0x2c0000010800dc04ULL,
0x9400000020009ca5ULL,
0x9400000100011cc5ULL,
0x9400000140021cc5ULL,
0x9400000180031cc5ULL,
0x94000001c0041cc5ULL,
0x9400000200051cc5ULL,
0x9400000240061cc5ULL,
0x9400000280071cc5ULL,
0x94000002c0081cc5ULL,
0x9400000300091cc5ULL,
0x94000003400a1cc5ULL,
0x94000003800b1cc5ULL,
0x94000003c00c1cc5ULL,
0x94000004000d1cc5ULL,
0x94000004400e1cc5ULL,
0x94000004800f1cc5ULL,
0xc000000003f09ea5ULL,
0x94000000c0009ca5ULL,
0xc000000023f09ea5ULL,
0x94000000e0009ca5ULL,
0x2c00000084009c04ULL,
0x2c0000008800dc04ULL,
0x9400000040009ca5ULL,
0x2c0000008c009c04ULL,
0x2c0000009400dc04ULL,
0x9400000060009ca5ULL,
0x2c00000098009c04ULL,
0x2c0000009c00dc04ULL,
0x9400000080009ca5ULL,
0x2c000000c800dc04ULL,
0x0c0000001030dd02ULL,
0x4000000100001ea7ULL,
0x480100000c001c03ULL,
0x0800000000105c42ULL,
0xc100000000309c85ULL,
0x9400000500009c85ULL,
0x0c00000010001d02ULL,
0x0800000000105d42ULL,
0x0c0000001030dd02ULL,
0x4003ffff40001ca7ULL,
0x2800406420001de4ULL,
0x2800406430005de4ULL,
0xe000000000001c45ULL,
0xd000000003ffdcc5ULL,
0x9c000000000fdcc5ULL,
0x2c0000000c009c04ULL,
0x7000c0205020dc03ULL,
0x7000c01820209c03ULL,
0x5000406450209c03ULL,
0x500040644030dc03ULL,
0x480000000c209c03ULL,
0x4801000008001c03ULL,
0x0800000000105c42ULL,
0x280040646000dde4ULL,
0x8400000020009f05ULL,
0x190ec0002821dc03ULL,
0x40000000800001e7ULL,
0x0c00000040001c02ULL,
0x0800000000105c42ULL,
0x0c0000004030dd02ULL,
0x00029dff0ffc5cbfULL,
0x8400000000009f85ULL,
0x2800406400001de4ULL,
0x2800406410005de4ULL,
0x9400000010009c85ULL,
0x4000000000001df4ULL,
0x9800000003ffdcc5ULL,
0xd000000000008007ULL,
0xa000000000004007ULL,
0xd000000000008007ULL,
0x3400c3fffc201c04ULL,
0xc000000003f01ec5ULL,
0xa000000000000007ULL
};
static const uint16_t nve4_builtin_offsets[NVC0_BUILTIN_COUNT] =
{
0x0000,
0x00f0,
0x0f08,
0x0f18,
};

View file

@ -1,86 +0,0 @@
//
// DIV U32
//
// UNR recurrence (q = a / b):
// look for z such that 2^32 - b <= b * z < 2^32
// then q - 1 <= (a * z) / 2^32 <= q
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p1
// SIZE: 22 / 14 * 8 bytes
//
sched 0x28282804280428
bfind u32 $r2 $r1
xor b32 $r2 $r2 0x1f
mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
cvt u32 $r1 neg u32 $r1
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
sched 0x28282828282828
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
sched 0x042c2828042804
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
cvt u32 $r2 neg u32 $r1
add $r1 (mul u32 $r1 u32 $r0) $r3
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
sched 0x20282e20042c28
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
ret
//
// DIV S32, like DIV U32 after taking ABS(inputs)
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p3
//
set $p2 0x1 lt s32 $r0 0x0
set $p3 0x1 lt s32 $r1 0x0 xor $p2
sched 0x28042804282820
cvt s32 $r0 abs s32 $r0
cvt s32 $r1 abs s32 $r1
bfind u32 $r2 $r1
xor b32 $r2 $r2 0x1f
mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
cvt u32 $r1 neg u32 $r1
sched 0x28282828282828
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
sched 0x28280428042828
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
cvt u32 $r2 neg u32 $r1
add $r1 (mul u32 $r1 u32 $r0) $r3
sched 0x2028042c28042c
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p3 cvt s32 $r0 neg s32 $r0
sched 0x2c200428042e04
$p2 cvt s32 $r1 neg s32 $r1
ret

View file

@ -1,84 +0,0 @@
// Assembled from target_lib_nvf0.asm by envyas -m gk110 -W.
static const uint64_t nvf0_builtin_code[] =
{
// DIV U32
0x08a0a0a010a010a0ULL,
0xe1800000009c000aULL,
0x220000000f9c0808ULL,
0x74000000009fc00eULL,
0xe2400000011c0c0aULL,
0xe6010000009c2806ULL,
0xe1c00000011c040eULL,
0xd2000800019c080aULL,
0x08a0a0a0a0a0a0a0ULL,
0xe1c00000011c040eULL,
0xd2000800019c080aULL,
0xe1c00000011c040eULL,
0xd2000800019c080aULL,
0xe1c00000011c040eULL,
0xd2000800019c080aULL,
0xe1c00000011c040eULL,
0x0810b0a0a010a010ULL,
0xd2000800019c080aULL,
0xe4c03c00001c000eULL,
0xe1c00400011c0002ULL,
0xe6010000009c280aULL,
0xd0000c00001c0406ULL,
0xdb601c00011c041eULL,
0xe088000001000406ULL,
0x0880a0b88010b0a0ULL,
0x4000000000800001ULL,
0xdb601c000100041eULL,
0xe088000001000406ULL,
0x4000000000800001ULL,
0x19000000001c003cULL,
// DIV S32
0xdb181c007f9c005eULL,
0xdb1a08007f9c047eULL,
0x08a010a010a0a080ULL,
0xe6100000001ce802ULL,
0xe6100000009ce806ULL,
0xe1800000009c000aULL,
0x220000000f9c0808ULL,
0x74000000009fc00eULL,
0xe2400000011c0c0aULL,
0xe6010000009c2806ULL,
0x08a0a0a0a0a0a0a0ULL,
0xe1c00000011c040eULL,
0xd2000800019c080aULL,
0xe1c00000011c040eULL,
0xd2000800019c080aULL,
0xe1c00000011c040eULL,
0xd2000800019c080aULL,
0xe1c00000011c040eULL,
0x08a0a010a010a0a0ULL,
0xd2000800019c080aULL,
0xe1c00000011c040eULL,
0xd2000800019c080aULL,
0xe4c03c00001c000eULL,
0xe1c00400011c0002ULL,
0xe6010000009c280aULL,
0xd0000c00001c0406ULL,
0x0880a010b0a010b0ULL,
0xdb601c00011c041eULL,
0xe088000001000406ULL,
0x4000000000800001ULL,
0xdb601c000100041eULL,
0xe088000001000406ULL,
0x4000000000800001ULL,
0xe6010000000ce802ULL,
0x08b08010a010b810ULL,
0xe60100000088e806ULL,
0x19000000001c003cULL,
};
static const uint16_t nvf0_builtin_offsets[NVC0_BUILTIN_COUNT] =
{
0x0000,
0x00f0,
/* Just point at a ret instruction for now. */
0x00f0 - 8,
0x00f0 - 8
};