freedreno/computer: add script to test widening/narrowing

Just something I hacked together to help figure out which instructions can fold in a wideing/narrowing conversion. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4780>
2026-05-09 06:48:06 +02:00 · 2020-04-26 10:51:50 -07:00 · 2020-04-26 10:51:50 -07:00 · 997828e31b
commit 997828e31b
parent 6b551d9f36
1 changed files with 297 additions and 0 deletions
--- a/src/freedreno/computerator/examples/test-opcodes.sh
+++ b/src/freedreno/computerator/examples/test-opcodes.sh
@ -0,0 +1,297 @@
 #!/bin/bash
 #
 # Test various instructions to check whether half<->full widening/narrowing
 # works.  The basic premise is to perform the same instruction with and
 # without the widening/narrowing folded in and check if the results match.
 #
 # Note this doesn't currently diferentiate between signed/unsigned/bool,
 # and just assumes int is signed (since unsigned is basically(ish) like
 # signed but without sign extension)
 #
 # TODO probably good pick numeric src values that are better at triggering
 # edge cases, while still not loosing precision in a full->half->full
 # seqeuence.. but some instructions like absneg don't even appear to be
 # subtlely wrong when you try to fold in a precision conversion.
 #
 # add '-v' arg to see the result values
 set -e
 #
 # Templates for float->float instructions:
 #
 f2f_instrs=(
 	'add.f $dst, $src1, $src2'
 	'min.f $dst, $src1, $src2'
 	'min.f $dst, $src2, $src1'
 	'max.f $dst, $src1, $src2'
 	'max.f $dst, $src2, $src1'
 	'mul.f $dst, $src1, $src2'
 	'sign.f $dst, $src1'
 	'absneg.f $dst, \(neg\)$src1'
 	'absneg.f $dst, \(abs\)$src1'
 	'floor.f $dst, $src1'
 	'ceil.f $dst, $src1'
 	'rndne.f $dst, $src1'
 	'rndaz.f $dst, $src1'
 	'trunc.f $dst, $src1'
 )
 #
 # Templates for float->int instructions:
 #
 f2i_instrs=(
 	'cmps.f.gt $dst, $src1, $src2'
 	'cmps.f.lt $dst, $src1, $src2'
 	'cmpv.f.gt $dst, $src1, $src2'
 	'cmpv.f.lt $dst, $src1, $src2'
 )
 #
 # Templates for int->int instructions:
 #
 i2i_instrs=(
 	'add.u $dst, $src1, $src2'
 	'add.s $dst, $src1, $src2'
 	'sub.u $dst, $src1, $src2'
 	'sub.s $dst, $src1, $src2'
 	'cmps.f.gt $dst, $src1, $src2'
 	'cmps.f.lt $dst, $src1, $src2'
 	'min.u $dst, $src1, $src2'
 	'min.u $dst, $src2, $src1'
 	'min.s $dst, $src1, $src2'
 	'min.s $dst, $src2, $src1'
 	'max.u $dst, $src1, $src2'
 	'max.u $dst, $src2, $src1'
 	'max.s $dst, $src1, $src2'
 	'max.s $dst, $src2, $src1'
 	'absneg.s $dst, \(neg\)$src1'
 	'absneg.s $dst, \(abs\)$src1'
 	'and.b $dst, $src2, $src3'
 	'or.b $dst, $src1, $src2'
 	'not.b $dst, $src1'
 	'xor.b $dst, $src1, $src2'
 	'cmpv.u.gt $dst, $src1, $src2'
 	'cmpv.u.lt $dst, $src1, $src2'
 	'cmpv.s.gt $dst, $src1, $src2'
 	'cmpv.s.lt $dst, $src1, $src2'
 	'mul.u24 $dst, $src1, $src2'
 	'mul.s24 $dst, $src1, $src2'
 	'mull.u $dst, $src1, $src2'
 	'bfrev.b $dst, $src1'
 	'clz.s $dst, $src2'
 	'clz.b $dst, $src2'
 	'shl.b $dst, $src1, $src2'
 	'shr.b $dst, $src3, $src1'
 	'ashr.b $dst, $src3, $src1'
 	'mgen.b $dst, $src1, $src2'
 	'getbit.b $dst, $src3, $src2'
 	'setrm $dst, $src1'
 	'cbits.b $dst, $src3'
 	'shb $dst, $src1, $src2'
 	'msad $dst, $src1, $src2'
 )
 #
 # Helper to expand instruction template:
 #
 expand() {
 	instr=$1
 	dst=$2
 	src1=$3
 	src2=$4
 	src3=$5
 	eval echo $instr
 }
 expand_test() {
 	instr=$1
 	echo '; control, half->half:'
 	expand $instr "hr1.x" "hr0.x" "hr0.y" "hr0.z"
 	echo '; test, full->half:'
 	expand $instr "hr1.y" "r1.x" "r1.y" "r1.z"
 	echo '; control, full->full:'
 	expand $instr "r2.x" "r1.x" "r1.y" "r1.z"
 	echo '; test, half->full:'
 	expand $instr "r2.y" "hr0.x" "hr0.y" "hr0.z"
 	echo "(rpt5)nop"
 }
 #
 # Helpers to construct test program assembly:
 #
 header_asm() {
 	cat <<EOF
@localsize 1, 1, 1
@buf 4  ; g[0]
 EOF
 }
 footer_asm() {
 	cat <<EOF
 ; dest offsets:
 mov.u32u32 r3.x, 0
 mov.u32u32 r3.y, 1
 mov.u32u32 r3.z, 2
 mov.u32u32 r3.w, 3
 (rpt5)nop
 ; and store results:
 stib.untyped.1d.u32.1 g[0] + r3.x, r2.x   ; control: full->full
 stib.untyped.1d.u32.1 g[0] + r3.y, r2.y   ; test:    half->full
 stib.untyped.1d.u32.1 g[0] + r3.z, r2.z   ; control: half->half
 stib.untyped.1d.u32.1 g[0] + r3.w, r2.w   ; test:    full->half
 (sy)nop
 end
 EOF
 }
 setup_asm_float() {
 	cat <<EOF
 ; hr0->hr1 (r0) avail for half, hr0 for src, hr1 for dst
 ; r1->r2 avail for full, r1 for src, r2 for dst
 cov.f32f16 hr0.x, (1.0)
 cov.f32f16 hr0.y, (2.0)
 cov.f32f16 hr0.z, (3.0)
 mov.f32f32 r1.x,  (1.0)
 mov.f32f32 r1.y,  (2.0)
 mov.f32f32 r1.z,  (3.0)
 (rpt5)nop
 EOF
 }
 setup_asm_int() {
 	cat <<EOF
 ; hr0->hr1 (r0) avail for half, hr0 for src, hr1 for dst
 ; r1->r2 avail for full, r1 for src, r2 for dst
 cov.s32s16 hr0.x,  1
 cov.s32s16 hr0.y, -2
 cov.s32s16 hr0.z,  3
 mov.s32s32 r1.x,   1
 mov.s32s32 r1.y,  -2
 mov.s32s32 r1.z,   3
 (rpt5)nop
 EOF
 }
 #
 # Generate assembly code to test float->float opcode
 #
 f2f_asm() {
 	instr=$1
 	header_asm
 	setup_asm_float
 	expand_test $instr
 	cat <<EOF
 ; convert half results back to full:
 cov.f16f32 r2.z, hr1.x
 cov.f16f32 r2.w, hr1.y
 EOF
 	footer_asm
 }
 #
 # Generate assembly code to test float->int opcode
 #
 f2i_asm() {
 	instr=$1
 	header_asm
 	setup_asm_float
 	expand_test $instr
 	cat <<EOF
 ; convert half results back to full:
 cov.s16s32 r2.z, hr1.x
 cov.s16s32 r2.w, hr1.y
 EOF
 	footer_asm
 }
 #
 # Generate assembly code to test int->int opcode
 #
 i2i_asm() {
 	instr=$1
 	header_asm
 	setup_asm_int
 	expand_test $instr
 	cat <<EOF
 ; convert half results back to full:
 cov.s16s32 r2.z, hr1.x
 cov.s16s32 r2.w, hr1.y
 EOF
 	footer_asm
 }
 #
 # Helper to parse computerator output and print results:
 #
 check_results() {
 	str=`cat - | grep "	" | head -1 | xargs`
 	if [ "$verbose" = "true" ]; then
 		echo $str
 	fi
 	# Split components of result buffer:
 	cf=$(echo $str | cut -f1 -d' ')
 	tf=$(echo $str | cut -f2 -d' ')
 	ch=$(echo $str | cut -f3 -d' ')
 	th=$(echo $str | cut -f4 -d' ')
 	# Sanity test, make sure the control results match:
 	if [ $cf != $ch ]; then
 		echo "    FAIL: control results do not match!  Half vs full op is not equivalent!"
 		echo "    full=$cf half=$ch"
 	fi
 	# Compare test (with conversion folded) to control:
 	if [ $cf != $tf ]; then
 		echo "    FAIL: half -> full widening result does not match control!"
 		echo "    control=$cf result=$tf"
 	fi
 	if [ $ch != $th ]; then
 		echo "    FAIL: full -> half narrowing result does not match control!"
 		echo "    control=$ch result=$th"
 	fi
 	# HACK without a delay different invocations
 	# of computerator seem to somehow clobber each
 	# other.. which isn't great..
 	sleep 0.1
 }
 #
 # Run the tests!
 #
 if [ "$1" = "-v" ]; then
 	verbose="true"
 fi
 IFS=""
 for instr in ${f2f_instrs[@]}; do
 	echo "TEST: $instr"
 	f2f_asm $instr | ./computerator -g 1,1,1 | check_results
 done
 for instr in ${f2i_instrs[@]}; do
 	echo "TEST: $instr"
 	f2i_asm $instr | ./computerator -g 1,1,1 | check_results
 done
 for instr in ${i2i_instrs[@]}; do
 	echo "TEST: $instr"
 	i2i_asm $instr | ./computerator -g 1,1,1 | check_results
 done