From 1a8113fdee508ca8f880e74cc1adb616d4f35c9b Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 8 Jan 2021 14:32:33 -0800 Subject: [PATCH] freedreno/ir3/decode: Switch over to new disasm Signed-off-by: Rob Clark Part-of: --- src/freedreno/.gitlab-ci/reference/crash.log | 1060 +---------- ...exed.indirect_draw_count.triangle_list.log | 54 +- .../.gitlab-ci/reference/fd-clouds.log | 120 +- .../.gitlab-ci/reference/glxgears-a420.log | 198 +- src/freedreno/common/disasm.h | 2 + src/freedreno/decode/redump.h | 4 +- src/freedreno/ir3/disasm-a3xx.c | 1639 +++-------------- 7 files changed, 292 insertions(+), 2785 deletions(-) diff --git a/src/freedreno/.gitlab-ci/reference/crash.log b/src/freedreno/.gitlab-ci/reference/crash.log index e628320ff17..463fc49be8d 100644 --- a/src/freedreno/.gitlab-ci/reference/crash.log +++ b/src/freedreno/.gitlab-ci/reference/crash.log @@ -3432,22 +3432,12 @@ shader-blocks: - type: A6XX_SP_INST_DATA - bank: 0 size: 2048 - :0:0000:0000[0600e824x_a018c54ax] unknown(0,12) - :7:0001:0001[edc6145bx_11fa09c3x] (ss)(jp)unknown(7,11).g - :2:0002:0002[41440087x_008c504ax] ceil.f hr33.w, (neg)hc18.z - :0:0003:0003[14183488x_d5c04509x] (sy)(ss)(rpt4)cut - :5:0004:0008[a52373bdx_8ff7c071x] samgp0.3d.a.p (s8)(xy)hr47.y, r14.x, s#15, t#71 - :1:0005:0009[39301c43x_1d826d16x] (sy)(ss)(jp)(rpt4)cov.s16f16 (pos_infinity)hr16.w, (r)hc325.z - :3:0006:0014[7f64a39ax_609f35bcx] (sy)(jp)(rpt3)(ul)sad.s16 hr38.z, hc367.x, (neg)hc50.y, (r)hr39.w - :7:0007:0018[f352cfcbx_ecad502bx] (sy)unknown(7,6).g - :4:0008:0019[818209d0x_74021646x] (rpt1)unknown(4,12) hr52.x, (r)hc401.z - :6:0009:0021[c90972c0x_8e905e80x] (jp)stl.s16 l[hr48.x], hr16.x, 142 - :5:0010:0022[a4827242x_46248300x] gather4b.a (s8)(y)hr16.z, hr32.x, s#1, t#35 - :4:0011:0023[82342205x_cd064d21x] (rpt2)(ul)unknown(4,17) r1.y, (neg)c - :5:0012:0026[a923bf8bx_81f95908x] (jp)samb.3d.a.p (u32)(xyzw)r34.w, hr33.x, hr43.x, s#15, t#64 - :1:0013:0027[3dda8123x_a0d91ccdx] (sy)(jp)(rpt1)unknown(1,2) - -Assertion `instr->cat6.opc == 0' failed. + :0:0000:0000[0600e824x_a018c54ax] no match: 0600e824a018c54a + :7:0001:0001[edc6145bx_11fa09c3x] no match: edc6145b11fa09c3 + :2:0002:0002[41440087x_008c504ax] ceil.f hr33.w, (neg)hc18.z ; dontcare bits in ceil.f: 40000008c0000 + :0:0003:0003[14183488x_d5c04509x] no match: 14183488d5c04509 + :5:0004:0004[a52373bdx_8ff7c071x] no match: a52373bd8ff7c071 + :1:0005:0005[39301c43x_1d826d16x] no match: 39301c431d826d16 ----------------------------------------------- 8192 (0x2000) bytes 000000: a018c54a 0600e824 11fa09c3 edc6145b |J...$.......[...| @@ -4123,10 +4113,12 @@ Assertion `instr->cat6.opc == 0' failed. - type: A6XX_HLSQ_INST_RAM - bank: 0 size: 2048 - :2:0000:0000[40846422x_d81251c5x] (sat)(ul)sign.f r8.z, (neg)hc113.y - :4:0001:0001[938a16e2x_520c369ax] (sy)(ss)(sat)(rpt2)unknown(4,28) hr56.z, h(-358) - :1:0002:0004[200a00c1x_094864d2x] cov.u16f16 hr, -Assertion `num < MAX_REG' failed. + :2:0000:0000[40846422x_d81251c5x] (sat)(ul)sign.f r8.z, (neg)hc113.y ; dontcare bits in sign.f: 40000d8120000 + :4:0001:0001[938a16e2x_520c369ax] no match: 938a16e2520c369a + :1:0002:0002[200a00c1x_094864d2x] no match: 200a00c1094864d2 + :2:0003:0003[44109084x_4a201507x] no match: 441090844a201507 + :4:0004:0004[882fadabx_14a391b1x] (jp)(sat)(rpt1)(ul)rsq hr42.w, (abs)(r)hc108.y ; dontcare bits in rsq: f800014a30000 + :3:0005:0006[6060f068x_7106601ax] (ss)(ul)mad.u16 r26.x, (neg)hr6.z, (neg)hr48.y, (r)hc65.z ; dontcare bits in mad.u16: 2000 ----------------------------------------------- 8192 (0x2000) bytes 000000: d81251c5 40846422 520c369a 938a16e2 |.Q.."d.@.6.R....| @@ -4645,1028 +4637,12 @@ Assertion `num < MAX_REG' failed. size: 2048 :0:0000:0000[00000000x_00003002x] nop :0:0001:0001[00000000x_00000000x] nop - :6:0002:0002[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0003:0003[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0004:0004[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0005:0005[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0006:0006[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0007:0007[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0008:0008[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0009:0009[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0010:0010[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0011:0011[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0012:0012[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0013:0013[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0014:0014[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0015:0015[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0016:0016[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0017:0017[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0018:0018[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0019:0019[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0020:0020[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0021:0021[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0022:0022[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0023:0023[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0024:0024[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0025:0025[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0026:0026[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0027:0027[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0028:0028[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0029:0029[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0030:0030[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0031:0031[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0032:0032[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0033:0033[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0034:0034[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0035:0035[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0036:0036[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0037:0037[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0038:0038[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0039:0039[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0040:0040[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0041:0041[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0042:0042[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0043:0043[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0044:0044[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0045:0045[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0046:0046[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0047:0047[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0048:0048[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0049:0049[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0050:0050[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0051:0051[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0052:0052[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0053:0053[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0054:0054[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0055:0055[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0056:0056[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0057:0057[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0058:0058[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0059:0059[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0060:0060[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0061:0061[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0062:0062[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0063:0063[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0064:0064[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0065:0065[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0066:0066[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0067:0067[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0068:0068[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0069:0069[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0070:0070[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0071:0071[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0072:0072[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0073:0073[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0074:0074[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0075:0075[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0076:0076[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0077:0077[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0078:0078[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0079:0079[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0080:0080[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0081:0081[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0082:0082[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0083:0083[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0084:0084[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0085:0085[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0086:0086[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0087:0087[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0088:0088[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0089:0089[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0090:0090[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0091:0091[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0092:0092[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0093:0093[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0094:0094[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0095:0095[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0096:0096[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0097:0097[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0098:0098[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0099:0099[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0100:0100[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0101:0101[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0102:0102[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0103:0103[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0104:0104[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0105:0105[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0106:0106[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0107:0107[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0108:0108[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0109:0109[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0110:0110[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0111:0111[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0112:0112[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0113:0113[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0114:0114[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0115:0115[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0116:0116[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0117:0117[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0118:0118[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0119:0119[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0120:0120[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0121:0121[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0122:0122[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0123:0123[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0124:0124[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0125:0125[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0126:0126[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0127:0127[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0128:0128[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0129:0129[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0130:0130[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0131:0131[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0132:0132[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0133:0133[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0134:0134[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0135:0135[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0136:0136[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0137:0137[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0138:0138[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0139:0139[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0140:0140[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0141:0141[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0142:0142[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0143:0143[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0144:0144[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0145:0145[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0146:0146[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0147:0147[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0148:0148[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0149:0149[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0150:0150[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0151:0151[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0152:0152[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0153:0153[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0154:0154[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0155:0155[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0156:0156[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0157:0157[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0158:0158[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0159:0159[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0160:0160[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0161:0161[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0162:0162[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0163:0163[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0164:0164[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0165:0165[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0166:0166[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0167:0167[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0168:0168[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0169:0169[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0170:0170[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0171:0171[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0172:0172[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0173:0173[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0174:0174[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0175:0175[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0176:0176[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0177:0177[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0178:0178[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0179:0179[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0180:0180[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0181:0181[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0182:0182[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0183:0183[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0184:0184[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0185:0185[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0186:0186[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0187:0187[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0188:0188[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0189:0189[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0190:0190[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0191:0191[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0192:0192[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0193:0193[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0194:0194[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0195:0195[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0196:0196[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0197:0197[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0198:0198[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0199:0199[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0200:0200[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0201:0201[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0202:0202[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0203:0203[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0204:0204[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0205:0205[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0206:0206[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0207:0207[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0208:0208[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0209:0209[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0210:0210[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0211:0211[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0212:0212[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0213:0213[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0214:0214[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0215:0215[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0216:0216[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0217:0217[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0218:0218[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0219:0219[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0220:0220[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0221:0221[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0222:0222[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0223:0223[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0224:0224[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0225:0225[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0226:0226[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0227:0227[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0228:0228[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0229:0229[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0230:0230[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0231:0231[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0232:0232[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0233:0233[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0234:0234[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0235:0235[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0236:0236[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0237:0237[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0238:0238[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0239:0239[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0240:0240[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0241:0241[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0242:0242[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0243:0243[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0244:0244[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0245:0245[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0246:0246[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0247:0247[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0248:0248[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0249:0249[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0250:0250[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0251:0251[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0252:0252[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0253:0253[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0254:0254[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0255:0255[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0256:0256[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0257:0257[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0258:0258[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0259:0259[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0260:0260[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0261:0261[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0262:0262[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0263:0263[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0264:0264[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0265:0265[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0266:0266[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0267:0267[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0268:0268[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0269:0269[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0270:0270[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0271:0271[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0272:0272[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0273:0273[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0274:0274[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0275:0275[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0276:0276[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0277:0277[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0278:0278[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0279:0279[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0280:0280[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0281:0281[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0282:0282[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0283:0283[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0284:0284[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0285:0285[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0286:0286[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0287:0287[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0288:0288[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0289:0289[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0290:0290[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0291:0291[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0292:0292[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0293:0293[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0294:0294[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0295:0295[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0296:0296[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0297:0297[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0298:0298[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0299:0299[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0300:0300[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0301:0301[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0302:0302[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0303:0303[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0304:0304[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0305:0305[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0306:0306[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0307:0307[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0308:0308[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0309:0309[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0310:0310[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0311:0311[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0312:0312[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0313:0313[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0314:0314[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0315:0315[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0316:0316[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0317:0317[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0318:0318[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0319:0319[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0320:0320[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0321:0321[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0322:0322[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0323:0323[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0324:0324[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0325:0325[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0326:0326[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0327:0327[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0328:0328[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0329:0329[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0330:0330[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0331:0331[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0332:0332[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0333:0333[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0334:0334[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0335:0335[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0336:0336[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0337:0337[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0338:0338[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0339:0339[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0340:0340[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0341:0341[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0342:0342[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0343:0343[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0344:0344[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0345:0345[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0346:0346[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0347:0347[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0348:0348[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0349:0349[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0350:0350[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0351:0351[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0352:0352[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0353:0353[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0354:0354[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0355:0355[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0356:0356[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0357:0357[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0358:0358[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0359:0359[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0360:0360[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0361:0361[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0362:0362[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0363:0363[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0364:0364[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0365:0365[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0366:0366[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0367:0367[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0368:0368[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0369:0369[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0370:0370[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0371:0371[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0372:0372[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0373:0373[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0374:0374[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0375:0375[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0376:0376[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0377:0377[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0378:0378[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0379:0379[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0380:0380[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0381:0381[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0382:0382[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0383:0383[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0384:0384[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0385:0385[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0386:0386[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0387:0387[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0388:0388[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0389:0389[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0390:0390[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0391:0391[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0392:0392[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0393:0393[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0394:0394[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0395:0395[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0396:0396[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0397:0397[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0398:0398[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0399:0399[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0400:0400[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0401:0401[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0402:0402[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0403:0403[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0404:0404[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0405:0405[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0406:0406[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0407:0407[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0408:0408[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0409:0409[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0410:0410[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0411:0411[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0412:0412[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0413:0413[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0414:0414[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0415:0415[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0416:0416[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0417:0417[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0418:0418[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0419:0419[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0420:0420[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0421:0421[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0422:0422[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0423:0423[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0424:0424[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0425:0425[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0426:0426[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0427:0427[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0428:0428[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0429:0429[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0430:0430[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0431:0431[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0432:0432[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0433:0433[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0434:0434[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0435:0435[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0436:0436[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0437:0437[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0438:0438[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0439:0439[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0440:0440[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0441:0441[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0442:0442[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0443:0443[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0444:0444[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0445:0445[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0446:0446[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0447:0447[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0448:0448[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0449:0449[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0450:0450[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0451:0451[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0452:0452[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0453:0453[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0454:0454[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0455:0455[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0456:0456[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0457:0457[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0458:0458[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0459:0459[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0460:0460[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0461:0461[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0462:0462[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0463:0463[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0464:0464[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0465:0465[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0466:0466[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0467:0467[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0468:0468[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0469:0469[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0470:0470[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0471:0471[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0472:0472[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0473:0473[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0474:0474[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0475:0475[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0476:0476[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0477:0477[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0478:0478[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0479:0479[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0480:0480[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0481:0481[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0482:0482[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0483:0483[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0484:0484[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0485:0485[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0486:0486[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0487:0487[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0488:0488[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0489:0489[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0490:0490[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0491:0491[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0492:0492[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0493:0493[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0494:0494[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0495:0495[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0496:0496[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0497:0497[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0498:0498[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0499:0499[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0500:0500[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0501:0501[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0502:0502[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0503:0503[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0504:0504[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0505:0505[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0506:0506[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0507:0507[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0508:0508[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0509:0509[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0510:0510[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0511:0511[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :0:0512:0512[00500240x_00024000x] (rpt2)nop - :0:0513:0515[00402020x_00000000x] nop - :0:0514:0516[00000040x_00001000x] nop - :0:0515:0517[00510401x_00024020x] (eq)(rpt4)nop - :0:0516:0522[00100080x_00000008x] nop - :0:0517:0523[00000044x_00002080x] nop - :0:0518:0524[00001000x_00000000x] (ss)nop - :0:0519:0525[00200000x_00000008x] nop - :0:0520:0526[00000044x_00048110x] nop - :0:0521:0527[00000040x_00508000x] nop - :0:0522:0528[00010200x_00020044x] (eq)(rpt2)nop - :0:0523:0531[00000000x_00201014x] nop - :0:0524:0532[00012100x_00101100x] (eq)(rpt1)nop - :0:0525:0534[00000012x_00005000x] nop - :0:0526:0535[00000010x_00005000x] nop - :0:0527:0536[00040000x_00000020x] nop - :0:0528:0537[00002101x_00082514x] (rpt1)nop - :0:0529:0539[00000000x_00210020x] nop - :0:0530:0540[00440004x_00010002x] nop - :0:0531:0541[00000002x_00000250x] nop - :0:0532:0542[00000040x_00100000x] nop - :0:0533:0543[00000000x_00020014x] nop - :0:0534:0544[000400a0x_00050020x] nop - :0:0535:0545[00100000x_00000000x] nop - :0:0536:0546[00000000x_00044081x] nop - :0:0537:0547[00000000x_00000000x] nop - :0:0538:0548[00200048x_00000100x] nop - :0:0539:0549[00080020x_00000000x] nop - :0:0540:0550[00200002x_00200001x] nop - :0:0541:0551[002000a4x_00000404x] nop - :0:0542:0552[00440246x_00000004x] (rpt2)nop - :0:0543:0555[0008c040x_00442000x] nop - :0:0544:0556[002112a0x_00200000x] (ss)(eq)(rpt2)nop - :0:0545:0559[00000000x_00000000x] nop - :0:0546:0560[00000240x_00400001x] (rpt2)nop - :0:0547:0563[00000000x_00040400x] nop - :0:0548:0564[0000a100x_00104010x] (rpt1)nop - :0:0549:0566[00008480x_00002001x] (rpt4)nop - :0:0550:0571[00000001x_00000040x] nop - :0:0551:0572[00040001x_00040400x] nop - :0:0552:0573[00200000x_00040600x] nop - :0:0553:0574[00000100x_00100000x] (rpt1)nop - :0:0554:0576[00504180x_0020a200x] (rpt1)nop - :0:0555:0578[00000000x_00000000x] nop - :0:0556:0579[00000024x_00004000x] nop - :0:0557:0580[00200000x_00100008x] nop - :0:0558:0581[00010080x_00000000x] (eq)nop - :0:0559:0582[00080000x_00000000x] nop - :0:0560:0583[00084000x_00500400x] nop - :0:0561:0584[00004000x_00008000x] nop - :0:0562:0585[00200000x_00000300x] nop - :0:0563:0586[00000042x_00020001x] nop - :0:0564:0587[00005600x_00400088x] (ss)(rpt6)nop - :0:0565:0594[00000002x_00000000x] nop - :0:0566:0595[0002005ex_00400008x] bkt #8 - :0:0567:0596[00020020x_00200000x] bkt #0 - :0:0568:0597[001e0414x_00055480x] (rpt4)bkt #21632 - :0:0569:0602[00000000x_00000000x] nop - :0:0570:0603[00000442x_00000480x] (rpt4)nop - :0:0571:0608[00000200x_00080000x] (rpt2)nop - :0:0572:0611[00520000x_00600400x] bkt #1024 - :0:0573:0612[00001200x_00000008x] (ss)(rpt2)nop - :0:0574:0615[00400114x_00201000x] (rpt1)nop - :0:0575:0617[00110100x_00100002x] (eq)(rpt1)nop - :0:0576:0619[00404200x_00200683x] (rpt2)nop - :0:0577:0622[00000090x_00000004x] nop - :0:0578:0623[00502000x_00002000x] nop - :0:0579:0624[00000004x_00000020x] nop - :0:0580:0625[00103100x_00600010x] (ss)(rpt1)nop - :0:0581:0627[00000002x_00000010x] nop - :0:0582:0628[00004000x_00021200x] nop - :0:0583:0629[00000000x_00000000x] nop - :0:0584:0630[00201400x_0010220ax] (ss)(rpt4)nop - :0:0585:0635[00000000x_00030000x] nop - :0:0586:0636[00080040x_00400000x] nop - :0:0587:0637[00000080x_00000002x] nop - :0:0588:0638[00000580x_00000400x] (rpt5)nop - :0:0589:0644[00000200x_00000022x] (rpt2)nop - :0:0590:0647[00080000x_00300042x] nop - :0:0591:0648[00008000x_00040200x] nop - :0:0592:0649[00000000x_00040000x] nop - :0:0593:0650[0012008ax_00000010x] bkt #16 - :0:0594:0651[00000100x_00000000x] (rpt1)nop - :0:0595:0653[00010000x_00010018x] (eq)nop - :0:0596:0654[00500011x_00440020x] nop - :0:0597:0655[00100000x_00000000x] nop - :0:0598:0656[00008200x_0004020cx] (rpt2)nop - :0:0599:0659[00000400x_00100010x] (rpt4)nop - :0:0600:0664[00000004x_00118000x] nop - :0:0601:0665[00000002x_00004200x] nop - :0:0602:0666[00026300x_00000210x] (rpt3)bkt #528 - :0:0603:0670[0000a002x_00000040x] nop - :0:0604:0671[00081100x_00004082x] (ss)(rpt1)nop - :0:0605:0673[00000008x_00210000x] nop - :0:0606:0674[00020004x_00020000x] bkt #0 - :0:0607:0675[00020000x_00064108x] bkt #16648 - :0:0608:0676[00000084x_00020000x] nop - :0:0609:0677[00000181x_00000430x] (rpt1)nop - :0:0610:0679[001c8100x_00100002x] (rpt1)nop - :0:0611:0681[00000000x_00200020x] nop - :0:0612:0682[00100081x_00002000x] nop - :0:0613:0683[00000000x_00000008x] nop - :0:0614:0684[00009420x_00000024x] (ss)(rpt4)nop - :0:0615:0689[00000100x_00002010x] (rpt1)nop - :0:0616:0691[00004188x_00000000x] (rpt1)nop - :0:0617:0693[00100000x_00002000x] nop - :0:0618:0694[00120102x_00040000x] (rpt1)bkt #0 - :0:0619:0696[00040002x_00000000x] nop - :0:0620:0697[00224200x_00210201x] (rpt2)bkt #513 - :0:0621:0700[00000200x_00040000x] (rpt2)nop - :0:0622:0703[0000000cx_00000000x] nop - :0:0623:0704[00000000x_00005000x] nop - :0:0624:0705[00082208x_00010200x] (rpt2)nop - :0:0625:0708[00194011x_00000000x] (eq)nop - :0:0626:0709[00012100x_00000502x] (eq)(rpt1)nop - :0:0627:0711[00000240x_00040050x] (rpt2)nop - :0:0628:0714[00080211x_00004180x] (rpt2)nop - :0:0629:0717[00000000x_00001008x] nop - :0:0630:0718[00020490x_002004a0x] (rpt4)bkt #1184 - :0:0631:0723[00210004x_00001080x] (eq)nop - :0:0632:0724[00000000x_00300040x] nop - :0:0633:0725[00008002x_00000020x] nop - :0:0634:0726[00000000x_00041098x] nop - :0:0635:0727[002000a0x_00000000x] nop - :0:0636:0728[00000000x_000c0400x] nop - :0:0637:0729[00000401x_00000402x] (rpt4)nop - :0:0638:0734[00002000x_00200400x] nop - :0:0639:0735[00000101x_00001000x] (rpt1)nop - :0:0640:0737[00500240x_00024000x] (rpt2)nop - :0:0641:0740[00402020x_00000000x] nop - :0:0642:0741[00000040x_00001000x] nop - :0:0643:0742[00510401x_00024020x] (eq)(rpt4)nop - :0:0644:0747[00100080x_00000008x] nop - :0:0645:0748[00000044x_00002080x] nop - :0:0646:0749[00001000x_00000000x] (ss)nop - :0:0647:0750[00200000x_00000008x] nop - :0:0648:0751[00000044x_00048110x] nop - :0:0649:0752[00000040x_00508000x] nop - :0:0650:0753[00010200x_00020044x] (eq)(rpt2)nop - :0:0651:0756[00000000x_00201014x] nop - :0:0652:0757[00012100x_00101100x] (eq)(rpt1)nop - :0:0653:0759[00000012x_00005000x] nop - :0:0654:0760[00000010x_00005000x] nop - :0:0655:0761[00040000x_00000020x] nop - :0:0656:0762[00002101x_00082514x] (rpt1)nop - :0:0657:0764[00000000x_00210020x] nop - :0:0658:0765[00440004x_00010002x] nop - :0:0659:0766[00000002x_00000250x] nop - :0:0660:0767[00000040x_00100000x] nop - :0:0661:0768[00000000x_00020014x] nop - :0:0662:0769[000400a0x_00050020x] nop - :0:0663:0770[00100000x_00000000x] nop - :0:0664:0771[00000000x_00044081x] nop - :0:0665:0772[00000000x_00000000x] nop - :0:0666:0773[00200048x_00000100x] nop - :0:0667:0774[00080020x_00000000x] nop - :0:0668:0775[00200002x_00200001x] nop - :0:0669:0776[002000a4x_00000404x] nop - :0:0670:0777[00440246x_00000004x] (rpt2)nop - :0:0671:0780[0008c040x_00442000x] nop - :0:0672:0781[002112a0x_00200000x] (ss)(eq)(rpt2)nop - :0:0673:0784[00000000x_00000000x] nop - :0:0674:0785[00000240x_00400001x] (rpt2)nop - :0:0675:0788[00000000x_00040400x] nop - :0:0676:0789[0000a100x_00104010x] (rpt1)nop - :0:0677:0791[00008480x_00002001x] (rpt4)nop - :0:0678:0796[00000001x_00000040x] nop - :0:0679:0797[00040001x_00040400x] nop - :0:0680:0798[00200000x_00040600x] nop - :0:0681:0799[00000100x_00100000x] (rpt1)nop - :0:0682:0801[00504180x_0020a200x] (rpt1)nop - :0:0683:0803[00000000x_00000000x] nop - :0:0684:0804[00000024x_00004000x] nop - :0:0685:0805[00200000x_00100008x] nop - :0:0686:0806[00010080x_00000000x] (eq)nop - :0:0687:0807[00080000x_00000000x] nop - :0:0688:0808[00084000x_00500400x] nop - :0:0689:0809[00004000x_00008000x] nop - :0:0690:0810[00200000x_00000300x] nop - :0:0691:0811[00000042x_00020001x] nop - :0:0692:0812[00005600x_00400088x] (ss)(rpt6)nop - :0:0693:0819[00000002x_00000000x] nop - :0:0694:0820[0002005ex_00400008x] bkt #8 - :0:0695:0821[00020020x_00200000x] bkt #0 - :0:0696:0822[001e0414x_00055480x] (rpt4)bkt #21632 - :0:0697:0827[00000000x_00000000x] nop - :0:0698:0828[00000442x_00000480x] (rpt4)nop - :0:0699:0833[00000200x_00080000x] (rpt2)nop - :0:0700:0836[00520000x_00600400x] bkt #1024 - :0:0701:0837[00001200x_00000008x] (ss)(rpt2)nop - :0:0702:0840[00400114x_00201000x] (rpt1)nop - :0:0703:0842[00110100x_00100002x] (eq)(rpt1)nop - :0:0704:0844[00404200x_00200683x] (rpt2)nop - :0:0705:0847[00000090x_00000004x] nop - :0:0706:0848[00502000x_00002000x] nop - :0:0707:0849[00000004x_00000020x] nop - :0:0708:0850[00103100x_00600010x] (ss)(rpt1)nop - :0:0709:0852[00000002x_00000010x] nop - :0:0710:0853[00004000x_00021200x] nop - :0:0711:0854[00000000x_00000000x] nop - :0:0712:0855[00201400x_0010220ax] (ss)(rpt4)nop - :0:0713:0860[00000000x_00030000x] nop - :0:0714:0861[00080040x_00400000x] nop - :0:0715:0862[00000080x_00000002x] nop - :0:0716:0863[00000580x_00000400x] (rpt5)nop - :0:0717:0869[00000200x_00000022x] (rpt2)nop - :0:0718:0872[00080000x_00300042x] nop - :0:0719:0873[00008000x_00040200x] nop - :0:0720:0874[00000000x_00040000x] nop - :0:0721:0875[0012008ax_00000010x] bkt #16 - :0:0722:0876[00000100x_00000000x] (rpt1)nop - :0:0723:0878[00010000x_00010018x] (eq)nop - :0:0724:0879[00500011x_00440020x] nop - :0:0725:0880[00100000x_00000000x] nop - :0:0726:0881[00008200x_0004020cx] (rpt2)nop - :0:0727:0884[00000400x_00100010x] (rpt4)nop - :0:0728:0889[00000004x_00118000x] nop - :0:0729:0890[00000002x_00004200x] nop - :0:0730:0891[00026300x_00000210x] (rpt3)bkt #528 - :0:0731:0895[0000a002x_00000040x] nop - :0:0732:0896[00081100x_00004082x] (ss)(rpt1)nop - :0:0733:0898[00000008x_00210000x] nop - :0:0734:0899[00020004x_00020000x] bkt #0 - :0:0735:0900[00020000x_00064108x] bkt #16648 - :0:0736:0901[00000084x_00020000x] nop - :0:0737:0902[00000181x_00000430x] (rpt1)nop - :0:0738:0904[001c8100x_00100002x] (rpt1)nop - :0:0739:0906[00000000x_00200020x] nop - :0:0740:0907[00100081x_00002000x] nop - :0:0741:0908[00000000x_00000008x] nop - :0:0742:0909[00009420x_00000024x] (ss)(rpt4)nop - :0:0743:0914[00000100x_00002010x] (rpt1)nop - :0:0744:0916[00004188x_00000000x] (rpt1)nop - :0:0745:0918[00100000x_00002000x] nop - :0:0746:0919[00120102x_00040000x] (rpt1)bkt #0 - :0:0747:0921[00040002x_00000000x] nop - :0:0748:0922[00224200x_00210201x] (rpt2)bkt #513 - :0:0749:0925[00000200x_00040000x] (rpt2)nop - :0:0750:0928[0000000cx_00000000x] nop - :0:0751:0929[00000000x_00005000x] nop - :0:0752:0930[00082208x_00010200x] (rpt2)nop - :0:0753:0933[00194011x_00000000x] (eq)nop - :0:0754:0934[00012100x_00000502x] (eq)(rpt1)nop - :0:0755:0936[00000240x_00040050x] (rpt2)nop - :0:0756:0939[00080211x_00004180x] (rpt2)nop - :0:0757:0942[00000000x_00001008x] nop - :0:0758:0943[00020490x_002004a0x] (rpt4)bkt #1184 - :0:0759:0948[00210004x_00001080x] (eq)nop - :0:0760:0949[00000000x_00300040x] nop - :0:0761:0950[00008002x_00000020x] nop - :0:0762:0951[00000000x_00041098x] nop - :0:0763:0952[002000a0x_00000000x] nop - :0:0764:0953[00000000x_000c0400x] nop - :0:0765:0954[00000401x_00000402x] (rpt4)nop - :0:0766:0959[00002000x_00200400x] nop - :0:0767:0960[00000101x_00001000x] (rpt1)nop - :0:0768:0962[00000000x_00000000x] nop - :0:0769:0963[00000000x_00000000x] nop - :0:0770:0964[00000000x_00000000x] nop - :0:0771:0965[00000000x_00000000x] nop - :0:0772:0966[00000000x_00000000x] nop - :0:0773:0967[00000000x_00000000x] nop - :0:0774:0968[00000000x_00000000x] nop - :0:0775:0969[00000000x_00000000x] nop - :0:0776:0970[00000000x_00000000x] nop - :0:0777:0971[00000000x_00000000x] nop - :0:0778:0972[00000000x_00000000x] nop - :0:0779:0973[00000000x_00000000x] nop - :0:0780:0974[00000000x_00000000x] nop - :0:0781:0975[00000000x_00000000x] nop - :0:0782:0976[00000000x_00000000x] nop - :0:0783:0977[00000000x_00000000x] nop - :0:0784:0978[00000000x_00000000x] nop - :0:0785:0979[00000000x_00000000x] nop - :0:0786:0980[00000000x_00000000x] nop - :0:0787:0981[00000000x_00000000x] nop - :0:0788:0982[00000000x_00000000x] nop - :0:0789:0983[00000000x_00000000x] nop - :0:0790:0984[00000000x_00000000x] nop - :0:0791:0985[00000000x_00000000x] nop - :0:0792:0986[00000000x_00000000x] nop - :0:0793:0987[00000000x_00000000x] nop - :0:0794:0988[00000000x_00000000x] nop - :0:0795:0989[00000000x_00000000x] nop - :0:0796:0990[00000000x_00000000x] nop - :0:0797:0991[00000000x_00000000x] nop - :0:0798:0992[00000000x_00000000x] nop - :0:0799:0993[00000000x_00000000x] nop - :0:0800:0994[00000000x_00000000x] nop - :0:0801:0995[00000000x_00000000x] nop - :0:0802:0996[00000000x_00000000x] nop - :0:0803:0997[00000000x_00000000x] nop - :0:0804:0998[00000000x_00000000x] nop - :0:0805:0999[00000000x_00000000x] nop - :0:0806:1000[00000000x_00000000x] nop - :0:0807:1001[00000000x_00000000x] nop - :0:0808:1002[00000000x_00000000x] nop - :0:0809:1003[00000000x_00000000x] nop - :0:0810:1004[00000000x_00000000x] nop - :0:0811:1005[00000000x_00000000x] nop - :0:0812:1006[00000000x_00000000x] nop - :0:0813:1007[00000000x_00000000x] nop - :0:0814:1008[00000000x_00000000x] nop - :0:0815:1009[00000000x_00000000x] nop - :0:0816:1010[00000000x_00000000x] nop - :0:0817:1011[00000000x_00000000x] nop - :0:0818:1012[00000000x_00000000x] nop - :0:0819:1013[00000000x_00000000x] nop - :0:0820:1014[00000000x_00000000x] nop - :0:0821:1015[00000000x_00000000x] nop - :0:0822:1016[00000000x_00000000x] nop - :0:0823:1017[00000000x_00000000x] nop - :0:0824:1018[00000000x_00000000x] nop - :0:0825:1019[00000000x_00000000x] nop - :0:0826:1020[00000000x_00000000x] nop - :0:0827:1021[00000000x_00000000x] nop - :0:0828:1022[00000000x_00000000x] nop - :0:0829:1023[00000000x_00000000x] nop - :0:0830:1024[00000000x_00000000x] nop - :0:0831:1025[00000000x_00000000x] nop - :0:0832:1026[00000000x_00000000x] nop - :0:0833:1027[00000000x_00000000x] nop - :0:0834:1028[00000000x_00000000x] nop - :0:0835:1029[00000000x_00000000x] nop - :0:0836:1030[00000000x_00000000x] nop - :0:0837:1031[00000000x_00000000x] nop - :0:0838:1032[00000000x_00000000x] nop - :0:0839:1033[00000000x_00000000x] nop - :0:0840:1034[00000000x_00000000x] nop - :0:0841:1035[00000000x_00000000x] nop - :0:0842:1036[00000000x_00000000x] nop - :0:0843:1037[00000000x_00000000x] nop - :0:0844:1038[00000000x_00000000x] nop - :0:0845:1039[00000000x_00000000x] nop - :0:0846:1040[00000000x_00000000x] nop - :0:0847:1041[00000000x_00000000x] nop - :0:0848:1042[00000000x_00000000x] nop - :0:0849:1043[00000000x_00000000x] nop - :0:0850:1044[00000000x_00000000x] nop - :0:0851:1045[00000000x_00000000x] nop - :0:0852:1046[00000000x_00000000x] nop - :0:0853:1047[00000000x_00000000x] nop - :0:0854:1048[00000000x_00000000x] nop - :0:0855:1049[00000000x_00000000x] nop - :0:0856:1050[00000000x_00000000x] nop - :0:0857:1051[00000000x_00000000x] nop - :0:0858:1052[00000000x_00000000x] nop - :0:0859:1053[00000000x_00000000x] nop - :0:0860:1054[00000000x_00000000x] nop - :0:0861:1055[00000000x_00000000x] nop - :0:0862:1056[00000000x_00000000x] nop - :0:0863:1057[00000000x_00000000x] nop - :0:0864:1058[00000000x_00000000x] nop - :0:0865:1059[00000000x_00000000x] nop - :0:0866:1060[00000000x_00000000x] nop - :0:0867:1061[00000000x_00000000x] nop - :0:0868:1062[00000000x_00000000x] nop - :0:0869:1063[00000000x_00000000x] nop - :0:0870:1064[00000000x_00000000x] nop - :0:0871:1065[00000000x_00000000x] nop - :0:0872:1066[00000000x_00000000x] nop - :0:0873:1067[00000000x_00000000x] nop - :0:0874:1068[00000000x_00000000x] nop - :0:0875:1069[00000000x_00000000x] nop - :0:0876:1070[00000000x_00000000x] nop - :0:0877:1071[00000000x_00000000x] nop - :0:0878:1072[00000000x_00000000x] nop - :0:0879:1073[00000000x_00000000x] nop - :0:0880:1074[00000000x_00000000x] nop - :0:0881:1075[00000000x_00000000x] nop - :0:0882:1076[00000000x_00000000x] nop - :0:0883:1077[00000000x_00000000x] nop - :0:0884:1078[00000000x_00000000x] nop - :0:0885:1079[00000000x_00000000x] nop - :0:0886:1080[00000000x_00000000x] nop - :0:0887:1081[00000000x_00000000x] nop - :0:0888:1082[00000000x_00000000x] nop - :0:0889:1083[00000000x_00000000x] nop - :0:0890:1084[00000000x_00000000x] nop - :0:0891:1085[00000000x_00000000x] nop - :0:0892:1086[00000000x_00000000x] nop - :0:0893:1087[00000000x_00000000x] nop - :0:0894:1088[00000000x_00000000x] nop - :0:0895:1089[00000000x_00000000x] nop - :0:0896:1090[00000000x_00000000x] nop - :0:0897:1091[00000000x_00000000x] nop - :0:0898:1092[00000000x_00000000x] nop - :0:0899:1093[00000000x_00000000x] nop - :0:0900:1094[00000000x_00000000x] nop - :0:0901:1095[00000000x_00000000x] nop - :0:0902:1096[00000000x_00000000x] nop - :0:0903:1097[00000000x_00000000x] nop - :0:0904:1098[00000000x_00000000x] nop - :0:0905:1099[00000000x_00000000x] nop - :0:0906:1100[00000000x_00000000x] nop - :0:0907:1101[00000000x_00000000x] nop - :0:0908:1102[00000000x_00000000x] nop - :0:0909:1103[00000000x_00000000x] nop - :0:0910:1104[00000000x_00000000x] nop - :0:0911:1105[00000000x_00000000x] nop - :0:0912:1106[00000000x_00000000x] nop - :0:0913:1107[00000000x_00000000x] nop - :0:0914:1108[00000000x_00000000x] nop - :0:0915:1109[00000000x_00000000x] nop - :0:0916:1110[00000000x_00000000x] nop - :0:0917:1111[00000000x_00000000x] nop - :0:0918:1112[00000000x_00000000x] nop - :0:0919:1113[00000000x_00000000x] nop - :0:0920:1114[00000000x_00000000x] nop - :0:0921:1115[00000000x_00000000x] nop - :0:0922:1116[00000000x_00000000x] nop - :0:0923:1117[00000000x_00000000x] nop - :0:0924:1118[00000000x_00000000x] nop - :0:0925:1119[00000000x_00000000x] nop - :0:0926:1120[00000000x_00000000x] nop - :0:0927:1121[00000000x_00000000x] nop - :0:0928:1122[00000000x_00000000x] nop - :0:0929:1123[00000000x_00000000x] nop - :0:0930:1124[00000000x_00000000x] nop - :0:0931:1125[00000000x_00000000x] nop - :0:0932:1126[00000000x_00000000x] nop - :0:0933:1127[00000000x_00000000x] nop - :0:0934:1128[00000000x_00000000x] nop - :0:0935:1129[00000000x_00000000x] nop - :0:0936:1130[00000000x_00000000x] nop - :0:0937:1131[00000000x_00000000x] nop - :0:0938:1132[00000000x_00000000x] nop - :0:0939:1133[00000000x_00000000x] nop - :0:0940:1134[00000000x_00000000x] nop - :0:0941:1135[00000000x_00000000x] nop - :0:0942:1136[00000000x_00000000x] nop - :0:0943:1137[00000000x_00000000x] nop - :0:0944:1138[00000000x_00000000x] nop - :0:0945:1139[00000000x_00000000x] nop - :0:0946:1140[00000000x_00000000x] nop - :0:0947:1141[00000000x_00000000x] nop - :0:0948:1142[00000000x_00000000x] nop - :0:0949:1143[00000000x_00000000x] nop - :0:0950:1144[00000000x_00000000x] nop - :0:0951:1145[00000000x_00000000x] nop - :0:0952:1146[00000000x_00000000x] nop - :0:0953:1147[00000000x_00000000x] nop - :0:0954:1148[00000000x_00000000x] nop - :0:0955:1149[00000000x_00000000x] nop - :0:0956:1150[00000000x_00000000x] nop - :0:0957:1151[00000000x_00000000x] nop - :0:0958:1152[00000000x_00000000x] nop - :0:0959:1153[00000000x_00000000x] nop - :0:0960:1154[00000000x_00000000x] nop - :0:0961:1155[00000000x_00000000x] nop - :0:0962:1156[00000000x_00000000x] nop - :0:0963:1157[00000000x_00000000x] nop - :0:0964:1158[00000000x_00000000x] nop - :0:0965:1159[00000000x_00000000x] nop - :0:0966:1160[00000000x_00000000x] nop - :0:0967:1161[00000000x_00000000x] nop - :0:0968:1162[00000000x_00000000x] nop - :0:0969:1163[00000000x_00000000x] nop - :0:0970:1164[00000000x_00000000x] nop - :0:0971:1165[00000000x_00000000x] nop - :0:0972:1166[00000000x_00000000x] nop - :0:0973:1167[00000000x_00000000x] nop - :0:0974:1168[00000000x_00000000x] nop - :0:0975:1169[00000000x_00000000x] nop - :0:0976:1170[00000000x_00000000x] nop - :0:0977:1171[00000000x_00000000x] nop - :0:0978:1172[00000000x_00000000x] nop - :0:0979:1173[00000000x_00000000x] nop - :0:0980:1174[00000000x_00000000x] nop - :0:0981:1175[00000000x_00000000x] nop - :0:0982:1176[00000000x_00000000x] nop - :0:0983:1177[00000000x_00000000x] nop - :0:0984:1178[00000000x_00000000x] nop - :0:0985:1179[00000000x_00000000x] nop - :0:0986:1180[00000000x_00000000x] nop - :0:0987:1181[00000000x_00000000x] nop - :0:0988:1182[00000000x_00000000x] nop - :0:0989:1183[00000000x_00000000x] nop - :0:0990:1184[00000000x_00000000x] nop - :0:0991:1185[00000000x_00000000x] nop - :0:0992:1186[00000000x_00000000x] nop - :0:0993:1187[00000000x_00000000x] nop - :0:0994:1188[00000000x_00000000x] nop - :0:0995:1189[00000000x_00000000x] nop - :0:0996:1190[00000000x_00000000x] nop - :0:0997:1191[00000000x_00000000x] nop - :0:0998:1192[00000000x_00000000x] nop - :0:0999:1193[00000000x_00000000x] nop - :0:1000:1194[00000000x_00000000x] nop - :0:1001:1195[00000000x_00000000x] nop - :0:1002:1196[00000000x_00000000x] nop - :0:1003:1197[00000000x_00000000x] nop - :0:1004:1198[00000000x_00000000x] nop - :0:1005:1199[00000000x_00000000x] nop - :0:1006:1200[00000000x_00000000x] nop - :0:1007:1201[00000000x_00000000x] nop - :0:1008:1202[00000000x_00000000x] nop - :0:1009:1203[00000000x_00000000x] nop - :0:1010:1204[00000000x_00000000x] nop - :0:1011:1205[00000000x_00000000x] nop - :0:1012:1206[00000000x_00000000x] nop - :0:1013:1207[00000000x_00000000x] nop - :0:1014:1208[00000000x_00000000x] nop - :0:1015:1209[00000000x_00000000x] nop - :0:1016:1210[00000000x_00000000x] nop - :0:1017:1211[00000000x_00000000x] nop - :0:1018:1212[00000000x_00000000x] nop - :0:1019:1213[00000000x_00000000x] nop - :0:1020:1214[00000000x_00000000x] nop - :0:1021:1215[00000000x_00000000x] nop - :0:1022:1216[00000000x_00000000x] nop - :0:1023:1217[00000000x_00000000x] nop + :6:0002:0002[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], -34 ; dontcare bits in atomic.xor: ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 0xdf vs 0x0, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0x1 vs 0x0 + :6:0003:0003[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], -34 ; dontcare bits in atomic.xor: ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 0xdf vs 0x0, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0x1 vs 0x0 + :6:0004:0004[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], -34 ; dontcare bits in atomic.xor: ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 0xdf vs 0x0, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0x1 vs 0x0 + :6:0005:0005[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], -34 ; dontcare bits in atomic.xor: ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 0xdf vs 0x0, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0x1 vs 0x0 + :6:0006:0006[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], -34 ; dontcare bits in atomic.xor: ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 0xdf vs 0x0, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0x1 vs 0x0 + :6:0007:0007[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], -34 ; dontcare bits in atomic.xor: ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 0xdf vs 0x0, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0x1 vs 0x0 ----------------------------------------------- 8192 (0x2000) bytes 000000: 00003002 00000000 00000000 00000000 |.0..............| diff --git a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log index 3c4e006b4a1..9c09f98e83e 100644 --- a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log +++ b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log @@ -811,14 +811,7 @@ t4 write SP_VS_OBJ_START_LO (a81c) :0:0007:0010[00000000x_00000000x] nop :0:0008:0011[00000000x_00000000x] nop :0:0009:0012[00000000x_00000000x] nop - Register Stats: - - used (half): 8-23 (cnt=16, max=23) - - used (full): 4-11 (cnt=8, max=11) - - input (half): 8-19 (cnt=12, max=19) - - input (full): 4-9 (cnt=6, max=9) - - output (half): 16-23 (cnt=8, max=23) (estimated) - - output (full): 8-11 (cnt=4, max=11) (estimated) - + Stats: - shaderdb: 13 instr, 7 nops, 6 non-nops, 0 mov, 0 cov - shaderdb: 0 last-baryf, 0 half, 3 full, 2 constlen - shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -838,14 +831,7 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords) :0:0007:0010[00000000x_00000000x] nop :0:0008:0011[00000000x_00000000x] nop :0:0009:0012[00000000x_00000000x] nop - Register Stats: - - used (half): 8-23 (cnt=16, max=23) - - used (full): 4-11 (cnt=8, max=11) - - input (half): 8-19 (cnt=12, max=19) - - input (full): 4-9 (cnt=6, max=9) - - output (half): 16-23 (cnt=8, max=23) (estimated) - - output (full): 8-11 (cnt=4, max=11) (estimated) - + Stats: - shaderdb: 13 instr, 7 nops, 6 non-nops, 0 mov, 0 cov - shaderdb: 0 last-baryf, 0 half, 3 full, 2 constlen - shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -903,14 +889,7 @@ t4 write SP_FS_OBJ_START_LO (a983) :0:0006:0006[00000000x_00000000x] nop :0:0007:0007[00000000x_00000000x] nop :0:0008:0008[00000000x_00000000x] nop - Register Stats: - - used (half): 0-1 4-11 (cnt=10, max=11) - - used (full): 0 2-5 (cnt=5, max=5) - - input (half): 0-1 (cnt=2, max=1) - - input (full): 0 (cnt=1, max=0) - - output (half): 4-11 (cnt=8, max=11) (estimated) - - output (full): 2-5 (cnt=4, max=5) (estimated) - + Stats: - shaderdb: 9 instr, 4 nops, 5 non-nops, 0 mov, 0 cov - shaderdb: 3 last-baryf, 0 half, 2 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -929,14 +908,7 @@ t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords) :0:0006:0006[00000000x_00000000x] nop :0:0007:0007[00000000x_00000000x] nop :0:0008:0008[00000000x_00000000x] nop - Register Stats: - - used (half): 0-1 4-11 (cnt=10, max=11) - - used (full): 0 2-5 (cnt=5, max=5) - - input (half): 0-1 (cnt=2, max=1) - - input (full): 0 (cnt=1, max=0) - - output (half): 4-11 (cnt=8, max=11) (estimated) - - output (full): 2-5 (cnt=4, max=5) (estimated) - + Stats: - shaderdb: 9 instr, 4 nops, 5 non-nops, 0 mov, 0 cov - shaderdb: 3 last-baryf, 0 half, 2 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -1497,14 +1469,7 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords) :0:0007:0010[00000000x_00000000x] nop :0:0008:0011[00000000x_00000000x] nop :0:0009:0012[00000000x_00000000x] nop - Register Stats: - - used (half): 8-23 (cnt=16, max=23) - - used (full): 4-11 (cnt=8, max=11) - - input (half): 8-19 (cnt=12, max=19) - - input (full): 4-9 (cnt=6, max=9) - - output (half): 16-23 (cnt=8, max=23) (estimated) - - output (full): 8-11 (cnt=4, max=11) (estimated) - + Stats: - shaderdb: 13 instr, 7 nops, 6 non-nops, 0 mov, 0 cov - shaderdb: 0 last-baryf, 0 half, 3 full, 2 constlen - shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -1532,14 +1497,7 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords) :0:0006:0006[00000000x_00000000x] nop :0:0007:0007[00000000x_00000000x] nop :0:0008:0008[00000000x_00000000x] nop - Register Stats: - - used (half): 0-1 4-11 (cnt=10, max=11) - - used (full): 0 2-5 (cnt=5, max=5) - - input (half): 0-1 (cnt=2, max=1) - - input (full): 0 (cnt=1, max=0) - - output (half): 4-11 (cnt=8, max=11) (estimated) - - output (full): 2-5 (cnt=4, max=5) (estimated) - + Stats: - shaderdb: 9 instr, 4 nops, 5 non-nops, 0 mov, 0 cov - shaderdb: 3 last-baryf, 0 half, 2 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 diff --git a/src/freedreno/.gitlab-ci/reference/fd-clouds.log b/src/freedreno/.gitlab-ci/reference/fd-clouds.log index a3cfde19efa..74dcc31c4e4 100644 --- a/src/freedreno/.gitlab-ci/reference/fd-clouds.log +++ b/src/freedreno/.gitlab-ci/reference/fd-clouds.log @@ -634,14 +634,7 @@ t4 write SP_VS_OBJ_START_LO (a81c) :0:0002:0002[00000000x_00000000x] nop :0:0003:0003[00000000x_00000000x] nop :0:0004:0004[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): (cnt=0, max=0) - - input (half): (cnt=0, max=0) - - input (full): (cnt=0, max=0) - - output (half): (cnt=0, max=0) (estimated) - - output (full): (cnt=0, max=0) (estimated) - + Stats: - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -656,14 +649,7 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords) :0:0002:0002[00000000x_00000000x] nop :0:0003:0003[00000000x_00000000x] nop :0:0004:0004[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): (cnt=0, max=0) - - input (half): (cnt=0, max=0) - - input (full): (cnt=0, max=0) - - output (half): (cnt=0, max=0) (estimated) - - output (full): (cnt=0, max=0) (estimated) - + Stats: - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -1104,14 +1090,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) :0:0002:0002[00000000x_00000000x] nop :0:0003:0003[00000000x_00000000x] nop :0:0004:0004[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): (cnt=0, max=0) - - input (half): (cnt=0, max=0) - - input (full): (cnt=0, max=0) - - output (half): (cnt=0, max=0) (estimated) - - output (full): (cnt=0, max=0) (estimated) - + Stats: - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -1949,14 +1928,7 @@ t4 write SP_VS_OBJ_START_LO (a81c) :0:0002:0002[00000000x_00000000x] nop :0:0003:0003[00000000x_00000000x] nop :0:0004:0004[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): (cnt=0, max=0) - - input (half): (cnt=0, max=0) - - input (full): (cnt=0, max=0) - - output (half): (cnt=0, max=0) (estimated) - - output (full): (cnt=0, max=0) (estimated) - + Stats: - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -1971,14 +1943,7 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords) :0:0002:0002[00000000x_00000000x] nop :0:0003:0003[00000000x_00000000x] nop :0:0004:0004[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): (cnt=0, max=0) - - input (half): (cnt=0, max=0) - - input (full): (cnt=0, max=0) - - output (half): (cnt=0, max=0) (estimated) - - output (full): (cnt=0, max=0) (estimated) - + Stats: - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -2260,12 +2225,12 @@ t4 write SP_FS_OBJ_START_LO (a983) :0:0171:0249[00000200x_00000000x] (rpt2)nop :2:0172:0252[42bb08f8x_20820008x] (nop3) cmps.s.ge p0.x, r2.x, 130 :0:0173:0256[00000200x_00000000x] (rpt2)nop - :0:0174:0259[00800000x_00000464x] br p0.x, #1124 + :0:0174:0259[00800000x_00000464x] br p0.x, #l1298 :1:0175:0260[280cc008x_00000003x] (jp)mov.u32u32 r2.x, r0.w :0:0176:0261[00000200x_00000000x] (rpt2)nop :2:0177:0264[40b808f8x_0008106ex] (nop3) cmps.f.lt p0.x, c27.z, r2.x :0:0178:0268[00000200x_00000000x] (rpt2)nop - :0:0179:0271[00800000x_0000045fx] br p0.x, #1119 + :0:0179:0271[00800000x_0000045fx] br p0.x, #l1298 :1:0180:0272[280cc012x_00000004x] (jp)mov.u32u32 r4.z, r1.x :1:0181:0273[200cc018x_00000004x] mov.u32u32 r6.x, r1.x :1:0182:0274[200cc019x_00000004x] mov.u32u32 r6.y, r1.x @@ -2617,7 +2582,7 @@ t4 write SP_FS_OBJ_START_LO (a983) :2:0528:0937[40580023x_10700023x] (nop2) max.f r8.w, r8.w, c28.x :2:0529:0940[40700026x_103e0025x] mul.f r9.z, r9.y, c15.z :2:0530:0941[40300023x_10290023x] min.f r8.w, r8.w, c10.y - :0:0531:0942[00900000x_000002cdx] br !p0.x, #717 + :0:0531:0942[00900000x_000002cdx] br !p0.x, #l1248 :2:0532:0943[48100028x_10550019x] (jp)add.f r10.x, r6.y, c21.y :2:0533:0944[40700029x_10270019x] mul.f r10.y, r6.y, c9.w :2:0534:0945[40100019x_10590019x] add.f r6.y, r6.y, c22.y @@ -3333,7 +3298,8 @@ t4 write SP_FS_OBJ_START_LO (a983) :2:1244:2198[40700808x_0010001ax] (nop1) mul.f r2.x, r6.z, r4.x :2:1245:2200[40700009x_00180029x] mul.f r2.y, r10.y, r6.x :2:1246:2201[4070000ax_0012001dx] mul.f r2.z, r7.y, r4.z - :0:1247:2202[01000000x_0000000ex] jump #14 + :0:1247:2202[01000000x_0000000ex] jump #l1261 + :1:1248:2203[284cc007x_00000000x] l1248: :1:1248:2203[284cc007x_00000000x] (jp)mov.u32u32 r1.w, 0 :0:1249:2204[00000200x_00000000x] (rpt2)nop :1:1250:2207[200cc010x_00000007x] mov.u32u32 r4.x, r1.w @@ -3347,6 +3313,7 @@ t4 write SP_FS_OBJ_START_LO (a983) :1:1258:2223[200cc010x_00000009x] mov.u32u32 r4.x, r2.y :0:1259:2224[00000200x_00000000x] (rpt2)nop :1:1260:2227[200cc008x_00000010x] mov.u32u32 r2.x, r4.x + :1:1261:2228[280cc012x_00000004x] l1261: :1:1261:2228[280cc012x_00000004x] (jp)mov.u32u32 r4.z, r1.x :1:1262:2229[200cc019x_00000009x] mov.u32u32 r6.y, r2.y :1:1263:2230[200cc01ax_0000000ax] mov.u32u32 r6.z, r2.z @@ -3383,7 +3350,8 @@ t4 write SP_FS_OBJ_START_LO (a983) :1:1294:2266[200cc024x_0000000bx] mov.u32u32 r9.x, r2.w :2:1295:2267[42180006x_20010023x] (nop2) add.u r1.z, r8.w, 1 :1:1296:2270[200cc005x_00000024x] mov.u32u32 r1.y, r9.x - :0:1297:2271[01000000x_fffffb99x] jump #-1127 + :0:1297:2271[01000000x_fffffb99x] jump #l170 + :2:1298:2272[48500401x_00010001x] l1298: :2:1298:2272[48500401x_00010001x] (jp)(sat)max.f r0.y, r0.y, r0.y :2:1299:2273[40500402x_00020002x] (sat)max.f r0.z, r0.z, r0.z :2:1300:2274[40100003x_400e1022x] add.f r0.w, c8.z, (neg)r3.z @@ -3492,14 +3460,7 @@ t4 write SP_FS_OBJ_START_LO (a983) :0:1403:2411[00000000x_00000000x] nop :0:1404:2412[00000000x_00000000x] nop :0:1405:2413[00000000x_00000000x] nop - Register Stats: - - used (half): 0-147 (cnt=148, max=147) - - used (full): 0-73 (cnt=74, max=73) - - input (half): 38-41 (cnt=4, max=41) - - input (full): 19-20 (cnt=2, max=20) - - output (half): 8-15 (cnt=8, max=15) (estimated) - - output (full): 4-7 (cnt=4, max=7) (estimated) - + Stats: - shaderdb: 2414 instr, 1114 nops, 1300 non-nops, 46 mov, 2 cov - shaderdb: 0 last-baryf, 0 half, 19 full, 29 constlen - shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -3683,12 +3644,12 @@ t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords) :0:0171:0249[00000200x_00000000x] (rpt2)nop :2:0172:0252[42bb08f8x_20820008x] (nop3) cmps.s.ge p0.x, r2.x, 130 :0:0173:0256[00000200x_00000000x] (rpt2)nop - :0:0174:0259[00800000x_00000464x] br p0.x, #1124 + :0:0174:0259[00800000x_00000464x] br p0.x, #l1298 :1:0175:0260[280cc008x_00000003x] (jp)mov.u32u32 r2.x, r0.w :0:0176:0261[00000200x_00000000x] (rpt2)nop :2:0177:0264[40b808f8x_0008106ex] (nop3) cmps.f.lt p0.x, c27.z, r2.x :0:0178:0268[00000200x_00000000x] (rpt2)nop - :0:0179:0271[00800000x_0000045fx] br p0.x, #1119 + :0:0179:0271[00800000x_0000045fx] br p0.x, #l1298 :1:0180:0272[280cc012x_00000004x] (jp)mov.u32u32 r4.z, r1.x :1:0181:0273[200cc018x_00000004x] mov.u32u32 r6.x, r1.x :1:0182:0274[200cc019x_00000004x] mov.u32u32 r6.y, r1.x @@ -4040,7 +4001,7 @@ t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords) :2:0528:0937[40580023x_10700023x] (nop2) max.f r8.w, r8.w, c28.x :2:0529:0940[40700026x_103e0025x] mul.f r9.z, r9.y, c15.z :2:0530:0941[40300023x_10290023x] min.f r8.w, r8.w, c10.y - :0:0531:0942[00900000x_000002cdx] br !p0.x, #717 + :0:0531:0942[00900000x_000002cdx] br !p0.x, #l1248 :2:0532:0943[48100028x_10550019x] (jp)add.f r10.x, r6.y, c21.y :2:0533:0944[40700029x_10270019x] mul.f r10.y, r6.y, c9.w :2:0534:0945[40100019x_10590019x] add.f r6.y, r6.y, c22.y @@ -4756,7 +4717,8 @@ t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords) :2:1244:2198[40700808x_0010001ax] (nop1) mul.f r2.x, r6.z, r4.x :2:1245:2200[40700009x_00180029x] mul.f r2.y, r10.y, r6.x :2:1246:2201[4070000ax_0012001dx] mul.f r2.z, r7.y, r4.z - :0:1247:2202[01000000x_0000000ex] jump #14 + :0:1247:2202[01000000x_0000000ex] jump #l1261 + :1:1248:2203[284cc007x_00000000x] l1248: :1:1248:2203[284cc007x_00000000x] (jp)mov.u32u32 r1.w, 0 :0:1249:2204[00000200x_00000000x] (rpt2)nop :1:1250:2207[200cc010x_00000007x] mov.u32u32 r4.x, r1.w @@ -4770,6 +4732,7 @@ t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords) :1:1258:2223[200cc010x_00000009x] mov.u32u32 r4.x, r2.y :0:1259:2224[00000200x_00000000x] (rpt2)nop :1:1260:2227[200cc008x_00000010x] mov.u32u32 r2.x, r4.x + :1:1261:2228[280cc012x_00000004x] l1261: :1:1261:2228[280cc012x_00000004x] (jp)mov.u32u32 r4.z, r1.x :1:1262:2229[200cc019x_00000009x] mov.u32u32 r6.y, r2.y :1:1263:2230[200cc01ax_0000000ax] mov.u32u32 r6.z, r2.z @@ -4806,7 +4769,8 @@ t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords) :1:1294:2266[200cc024x_0000000bx] mov.u32u32 r9.x, r2.w :2:1295:2267[42180006x_20010023x] (nop2) add.u r1.z, r8.w, 1 :1:1296:2270[200cc005x_00000024x] mov.u32u32 r1.y, r9.x - :0:1297:2271[01000000x_fffffb99x] jump #-1127 + :0:1297:2271[01000000x_fffffb99x] jump #l170 + :2:1298:2272[48500401x_00010001x] l1298: :2:1298:2272[48500401x_00010001x] (jp)(sat)max.f r0.y, r0.y, r0.y :2:1299:2273[40500402x_00020002x] (sat)max.f r0.z, r0.z, r0.z :2:1300:2274[40100003x_400e1022x] add.f r0.w, c8.z, (neg)r3.z @@ -4915,14 +4879,7 @@ t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords) :0:1403:2411[00000000x_00000000x] nop :0:1404:2412[00000000x_00000000x] nop :0:1405:2413[00000000x_00000000x] nop - Register Stats: - - used (half): 0-147 (cnt=148, max=147) - - used (full): 0-73 (cnt=74, max=73) - - input (half): 38-41 (cnt=4, max=41) - - input (full): 19-20 (cnt=2, max=20) - - output (half): 8-15 (cnt=8, max=15) (estimated) - - output (full): 4-7 (cnt=4, max=7) (estimated) - + Stats: - shaderdb: 2414 instr, 1114 nops, 1300 non-nops, 46 mov, 2 cov - shaderdb: 0 last-baryf, 0 half, 19 full, 29 constlen - shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -5329,14 +5286,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) :0:0002:0002[00000000x_00000000x] nop :0:0003:0003[00000000x_00000000x] nop :0:0004:0004[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): (cnt=0, max=0) - - input (half): (cnt=0, max=0) - - input (full): (cnt=0, max=0) - - output (half): (cnt=0, max=0) (estimated) - - output (full): (cnt=0, max=0) (estimated) - + Stats: - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -5535,12 +5485,12 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) :0:0171:0249[00000200x_00000000x] (rpt2)nop :2:0172:0252[42bb08f8x_20820008x] (nop3) cmps.s.ge p0.x, r2.x, 130 :0:0173:0256[00000200x_00000000x] (rpt2)nop - :0:0174:0259[00800000x_00000464x] br p0.x, #1124 + :0:0174:0259[00800000x_00000464x] br p0.x, #l1298 :1:0175:0260[280cc008x_00000003x] (jp)mov.u32u32 r2.x, r0.w :0:0176:0261[00000200x_00000000x] (rpt2)nop :2:0177:0264[40b808f8x_0008106ex] (nop3) cmps.f.lt p0.x, c27.z, r2.x :0:0178:0268[00000200x_00000000x] (rpt2)nop - :0:0179:0271[00800000x_0000045fx] br p0.x, #1119 + :0:0179:0271[00800000x_0000045fx] br p0.x, #l1298 :1:0180:0272[280cc012x_00000004x] (jp)mov.u32u32 r4.z, r1.x :1:0181:0273[200cc018x_00000004x] mov.u32u32 r6.x, r1.x :1:0182:0274[200cc019x_00000004x] mov.u32u32 r6.y, r1.x @@ -5892,7 +5842,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) :2:0528:0937[40580023x_10700023x] (nop2) max.f r8.w, r8.w, c28.x :2:0529:0940[40700026x_103e0025x] mul.f r9.z, r9.y, c15.z :2:0530:0941[40300023x_10290023x] min.f r8.w, r8.w, c10.y - :0:0531:0942[00900000x_000002cdx] br !p0.x, #717 + :0:0531:0942[00900000x_000002cdx] br !p0.x, #l1248 :2:0532:0943[48100028x_10550019x] (jp)add.f r10.x, r6.y, c21.y :2:0533:0944[40700029x_10270019x] mul.f r10.y, r6.y, c9.w :2:0534:0945[40100019x_10590019x] add.f r6.y, r6.y, c22.y @@ -6608,7 +6558,8 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) :2:1244:2198[40700808x_0010001ax] (nop1) mul.f r2.x, r6.z, r4.x :2:1245:2200[40700009x_00180029x] mul.f r2.y, r10.y, r6.x :2:1246:2201[4070000ax_0012001dx] mul.f r2.z, r7.y, r4.z - :0:1247:2202[01000000x_0000000ex] jump #14 + :0:1247:2202[01000000x_0000000ex] jump #l1261 + :1:1248:2203[284cc007x_00000000x] l1248: :1:1248:2203[284cc007x_00000000x] (jp)mov.u32u32 r1.w, 0 :0:1249:2204[00000200x_00000000x] (rpt2)nop :1:1250:2207[200cc010x_00000007x] mov.u32u32 r4.x, r1.w @@ -6622,6 +6573,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) :1:1258:2223[200cc010x_00000009x] mov.u32u32 r4.x, r2.y :0:1259:2224[00000200x_00000000x] (rpt2)nop :1:1260:2227[200cc008x_00000010x] mov.u32u32 r2.x, r4.x + :1:1261:2228[280cc012x_00000004x] l1261: :1:1261:2228[280cc012x_00000004x] (jp)mov.u32u32 r4.z, r1.x :1:1262:2229[200cc019x_00000009x] mov.u32u32 r6.y, r2.y :1:1263:2230[200cc01ax_0000000ax] mov.u32u32 r6.z, r2.z @@ -6658,7 +6610,8 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) :1:1294:2266[200cc024x_0000000bx] mov.u32u32 r9.x, r2.w :2:1295:2267[42180006x_20010023x] (nop2) add.u r1.z, r8.w, 1 :1:1296:2270[200cc005x_00000024x] mov.u32u32 r1.y, r9.x - :0:1297:2271[01000000x_fffffb99x] jump #-1127 + :0:1297:2271[01000000x_fffffb99x] jump #l170 + :2:1298:2272[48500401x_00010001x] l1298: :2:1298:2272[48500401x_00010001x] (jp)(sat)max.f r0.y, r0.y, r0.y :2:1299:2273[40500402x_00020002x] (sat)max.f r0.z, r0.z, r0.z :2:1300:2274[40100003x_400e1022x] add.f r0.w, c8.z, (neg)r3.z @@ -6767,14 +6720,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) :0:1403:2411[00000000x_00000000x] nop :0:1404:2412[00000000x_00000000x] nop :0:1405:2413[00000000x_00000000x] nop - Register Stats: - - used (half): 0-147 (cnt=148, max=147) - - used (full): 0-73 (cnt=74, max=73) - - input (half): 38-41 (cnt=4, max=41) - - input (full): 19-20 (cnt=2, max=20) - - output (half): 8-15 (cnt=8, max=15) (estimated) - - output (full): 4-7 (cnt=4, max=7) (estimated) - + Stats: - shaderdb: 2414 instr, 1114 nops, 1300 non-nops, 46 mov, 2 cov - shaderdb: 0 last-baryf, 0 half, 19 full, 29 constlen - shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7 diff --git a/src/freedreno/.gitlab-ci/reference/glxgears-a420.log b/src/freedreno/.gitlab-ci/reference/glxgears-a420.log index 576aa98a301..0b02312d015 100644 --- a/src/freedreno/.gitlab-ci/reference/glxgears-a420.log +++ b/src/freedreno/.gitlab-ci/reference/glxgears-a420.log @@ -424,14 +424,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0002:0002[00000000x_00000000x] nop :0:0003:0003[00000000x_00000000x] nop :0:0004:0004[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): (cnt=0, max=0) - - input (half): (cnt=0, max=0) - - input (full): (cnt=0, max=0) - - output (half): (cnt=0, max=0) (estimated) - - output (full): (cnt=0, max=0) (estimated) - + Stats: - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -450,14 +443,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0006:0006[00000000x_00000000x] nop :0:0007:0007[00000000x_00000000x] nop :0:0008:0008[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0-3 (cnt=4, max=3) - - input (half): (cnt=0, max=0) - - input (full): (cnt=0, max=0) - - output (half): (cnt=0, max=0) (estimated) - - output (full): 0-3 (cnt=4, max=3) (estimated) - + Stats: - shaderdb: 9 instr, 4 nops, 5 non-nops, 4 mov, 0 cov - shaderdb: 0 last-baryf, 0 half, 1 full, 1 constlen - shaderdb: 5 cat0, 4 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -1038,14 +1024,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) :0:0058:0071[00000000x_00000000x] nop :0:0059:0072[00000000x_00000000x] nop :0:0060:0073[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0-13 (cnt=14, max=13) - - input (half): (cnt=0, max=0) - - input (full): 2-5 (cnt=4, max=5) - - output (half): (cnt=0, max=0) (estimated) - - output (full): 6-13 (cnt=8, max=13) (estimated) - + Stats: - shaderdb: 74 instr, 27 nops, 47 non-nops, 7 mov, 1 cov - shaderdb: 0 last-baryf, 0 half, 4 full, 13 constlen - shaderdb: 28 cat0, 8 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -1080,14 +1059,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0008:0008[00000000x_00000000x] nop :0:0009:0009[00000000x_00000000x] nop :0:0010:0010[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0-3 (cnt=4, max=3) - - input (half): (cnt=0, max=0) - - input (full): 0-3 (cnt=4, max=3) - - output (half): (cnt=0, max=0) (estimated) - - output (full): (cnt=0, max=0) (estimated) - + Stats: - shaderdb: 11 instr, 5 nops, 6 non-nops, 0 mov, 0 cov - shaderdb: 5 last-baryf, 0 half, 1 full, 0 constlen - shaderdb: 6 cat0, 0 cat1, 1 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7 @@ -1670,14 +1642,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) :0:0053:0064[00000000x_00000000x] nop :0:0054:0065[00000000x_00000000x] nop :0:0055:0066[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0-8 10-17 (cnt=17, max=17) - - input (half): (cnt=0, max=0) - - input (full): 2-8 (cnt=7, max=8) - - output (half): (cnt=0, max=0) (estimated) - - output (full): 10-17 (cnt=8, max=17) (estimated) - + Stats: - shaderdb: 67 instr, 23 nops, 44 non-nops, 4 mov, 1 cov - shaderdb: 0 last-baryf, 0 half, 5 full, 13 constlen - shaderdb: 24 cat0, 5 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -1711,14 +1676,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0008:0008[00000000x_00000000x] nop :0:0009:0009[00000000x_00000000x] nop :0:0010:0010[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0-3 (cnt=4, max=3) - - input (half): (cnt=0, max=0) - - input (full): 0-3 (cnt=4, max=3) - - output (half): (cnt=0, max=0) (estimated) - - output (full): (cnt=0, max=0) (estimated) - + Stats: - shaderdb: 11 instr, 5 nops, 6 non-nops, 0 mov, 0 cov - shaderdb: 5 last-baryf, 0 half, 1 full, 0 constlen - shaderdb: 6 cat0, 0 cat1, 1 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7 @@ -2103,14 +2061,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) :0:0053:0064[00000000x_00000000x] nop :0:0054:0065[00000000x_00000000x] nop :0:0055:0066[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0-8 10-17 (cnt=17, max=17) - - input (half): (cnt=0, max=0) - - input (full): 2-8 (cnt=7, max=8) - - output (half): (cnt=0, max=0) (estimated) - - output (full): 10-17 (cnt=8, max=17) (estimated) - + Stats: - shaderdb: 67 instr, 23 nops, 44 non-nops, 4 mov, 1 cov - shaderdb: 0 last-baryf, 0 half, 5 full, 13 constlen - shaderdb: 24 cat0, 5 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -2142,14 +2093,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0006:0006[00000000x_00000000x] nop :0:0007:0007[00000000x_00000000x] nop :0:0008:0008[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0 2-5 (cnt=5, max=5) - - input (half): (cnt=0, max=0) - - input (full): 0 (cnt=1, max=0) - - output (half): (cnt=0, max=0) (estimated) - - output (full): 2-5 (cnt=4, max=5) (estimated) - + Stats: - shaderdb: 9 instr, 4 nops, 5 non-nops, 0 mov, 0 cov - shaderdb: 3 last-baryf, 0 half, 2 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -2497,14 +2441,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) :0:0058:0071[00000000x_00000000x] nop :0:0059:0072[00000000x_00000000x] nop :0:0060:0073[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0-13 (cnt=14, max=13) - - input (half): (cnt=0, max=0) - - input (full): 2-5 (cnt=4, max=5) - - output (half): (cnt=0, max=0) (estimated) - - output (full): 6-13 (cnt=8, max=13) (estimated) - + Stats: - shaderdb: 74 instr, 27 nops, 47 non-nops, 7 mov, 1 cov - shaderdb: 0 last-baryf, 0 half, 4 full, 13 constlen - shaderdb: 28 cat0, 8 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -2539,14 +2476,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0008:0008[00000000x_00000000x] nop :0:0009:0009[00000000x_00000000x] nop :0:0010:0010[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0-3 (cnt=4, max=3) - - input (half): (cnt=0, max=0) - - input (full): 0-3 (cnt=4, max=3) - - output (half): (cnt=0, max=0) (estimated) - - output (full): (cnt=0, max=0) (estimated) - + Stats: - shaderdb: 11 instr, 5 nops, 6 non-nops, 0 mov, 0 cov - shaderdb: 5 last-baryf, 0 half, 1 full, 0 constlen - shaderdb: 6 cat0, 0 cat1, 1 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7 @@ -3052,14 +2982,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) :0:0053:0064[00000000x_00000000x] nop :0:0054:0065[00000000x_00000000x] nop :0:0055:0066[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0-8 10-17 (cnt=17, max=17) - - input (half): (cnt=0, max=0) - - input (full): 2-8 (cnt=7, max=8) - - output (half): (cnt=0, max=0) (estimated) - - output (full): 10-17 (cnt=8, max=17) (estimated) - + Stats: - shaderdb: 67 instr, 23 nops, 44 non-nops, 4 mov, 1 cov - shaderdb: 0 last-baryf, 0 half, 5 full, 13 constlen - shaderdb: 24 cat0, 5 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -3093,14 +3016,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0008:0008[00000000x_00000000x] nop :0:0009:0009[00000000x_00000000x] nop :0:0010:0010[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0-3 (cnt=4, max=3) - - input (half): (cnt=0, max=0) - - input (full): 0-3 (cnt=4, max=3) - - output (half): (cnt=0, max=0) (estimated) - - output (full): (cnt=0, max=0) (estimated) - + Stats: - shaderdb: 11 instr, 5 nops, 6 non-nops, 0 mov, 0 cov - shaderdb: 5 last-baryf, 0 half, 1 full, 0 constlen - shaderdb: 6 cat0, 0 cat1, 1 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7 @@ -3485,14 +3401,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) :0:0053:0064[00000000x_00000000x] nop :0:0054:0065[00000000x_00000000x] nop :0:0055:0066[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0-8 10-17 (cnt=17, max=17) - - input (half): (cnt=0, max=0) - - input (full): 2-8 (cnt=7, max=8) - - output (half): (cnt=0, max=0) (estimated) - - output (full): 10-17 (cnt=8, max=17) (estimated) - + Stats: - shaderdb: 67 instr, 23 nops, 44 non-nops, 4 mov, 1 cov - shaderdb: 0 last-baryf, 0 half, 5 full, 13 constlen - shaderdb: 24 cat0, 5 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -3524,14 +3433,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0006:0006[00000000x_00000000x] nop :0:0007:0007[00000000x_00000000x] nop :0:0008:0008[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0 2-5 (cnt=5, max=5) - - input (half): (cnt=0, max=0) - - input (full): 0 (cnt=1, max=0) - - output (half): (cnt=0, max=0) (estimated) - - output (full): 2-5 (cnt=4, max=5) (estimated) - + Stats: - shaderdb: 9 instr, 4 nops, 5 non-nops, 0 mov, 0 cov - shaderdb: 3 last-baryf, 0 half, 2 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -3879,14 +3781,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) :0:0058:0071[00000000x_00000000x] nop :0:0059:0072[00000000x_00000000x] nop :0:0060:0073[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0-13 (cnt=14, max=13) - - input (half): (cnt=0, max=0) - - input (full): 2-5 (cnt=4, max=5) - - output (half): (cnt=0, max=0) (estimated) - - output (full): 6-13 (cnt=8, max=13) (estimated) - + Stats: - shaderdb: 74 instr, 27 nops, 47 non-nops, 7 mov, 1 cov - shaderdb: 0 last-baryf, 0 half, 4 full, 13 constlen - shaderdb: 28 cat0, 8 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -3921,14 +3816,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0008:0008[00000000x_00000000x] nop :0:0009:0009[00000000x_00000000x] nop :0:0010:0010[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0-3 (cnt=4, max=3) - - input (half): (cnt=0, max=0) - - input (full): 0-3 (cnt=4, max=3) - - output (half): (cnt=0, max=0) (estimated) - - output (full): (cnt=0, max=0) (estimated) - + Stats: - shaderdb: 11 instr, 5 nops, 6 non-nops, 0 mov, 0 cov - shaderdb: 5 last-baryf, 0 half, 1 full, 0 constlen - shaderdb: 6 cat0, 0 cat1, 1 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7 @@ -4434,14 +4322,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) :0:0053:0064[00000000x_00000000x] nop :0:0054:0065[00000000x_00000000x] nop :0:0055:0066[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0-8 10-17 (cnt=17, max=17) - - input (half): (cnt=0, max=0) - - input (full): 2-8 (cnt=7, max=8) - - output (half): (cnt=0, max=0) (estimated) - - output (full): 10-17 (cnt=8, max=17) (estimated) - + Stats: - shaderdb: 67 instr, 23 nops, 44 non-nops, 4 mov, 1 cov - shaderdb: 0 last-baryf, 0 half, 5 full, 13 constlen - shaderdb: 24 cat0, 5 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -4475,14 +4356,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0008:0008[00000000x_00000000x] nop :0:0009:0009[00000000x_00000000x] nop :0:0010:0010[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0-3 (cnt=4, max=3) - - input (half): (cnt=0, max=0) - - input (full): 0-3 (cnt=4, max=3) - - output (half): (cnt=0, max=0) (estimated) - - output (full): (cnt=0, max=0) (estimated) - + Stats: - shaderdb: 11 instr, 5 nops, 6 non-nops, 0 mov, 0 cov - shaderdb: 5 last-baryf, 0 half, 1 full, 0 constlen - shaderdb: 6 cat0, 0 cat1, 1 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7 @@ -4867,14 +4741,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) :0:0053:0064[00000000x_00000000x] nop :0:0054:0065[00000000x_00000000x] nop :0:0055:0066[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0-8 10-17 (cnt=17, max=17) - - input (half): (cnt=0, max=0) - - input (full): 2-8 (cnt=7, max=8) - - output (half): (cnt=0, max=0) (estimated) - - output (full): 10-17 (cnt=8, max=17) (estimated) - + Stats: - shaderdb: 67 instr, 23 nops, 44 non-nops, 4 mov, 1 cov - shaderdb: 0 last-baryf, 0 half, 5 full, 13 constlen - shaderdb: 24 cat0, 5 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -4906,14 +4773,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0006:0006[00000000x_00000000x] nop :0:0007:0007[00000000x_00000000x] nop :0:0008:0008[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0 2-5 (cnt=5, max=5) - - input (half): (cnt=0, max=0) - - input (full): 0 (cnt=1, max=0) - - output (half): (cnt=0, max=0) (estimated) - - output (full): 2-5 (cnt=4, max=5) (estimated) - + Stats: - shaderdb: 9 instr, 4 nops, 5 non-nops, 0 mov, 0 cov - shaderdb: 3 last-baryf, 0 half, 2 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -5211,14 +5071,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0002:0002[00000000x_00000000x] nop :0:0003:0003[00000000x_00000000x] nop :0:0004:0004[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): (cnt=0, max=0) - - input (half): (cnt=0, max=0) - - input (full): (cnt=0, max=0) - - output (half): (cnt=0, max=0) (estimated) - - output (full): (cnt=0, max=0) (estimated) - + Stats: - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 @@ -5237,14 +5090,7 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0006:0006[00000000x_00000000x] nop :0:0007:0007[00000000x_00000000x] nop :0:0008:0008[00000000x_00000000x] nop - Register Stats: - - used (half): (cnt=0, max=0) - - used (full): 0-3 (cnt=4, max=3) - - input (half): (cnt=0, max=0) - - input (full): (cnt=0, max=0) - - output (half): (cnt=0, max=0) (estimated) - - output (full): 0-3 (cnt=4, max=3) (estimated) - + Stats: - shaderdb: 9 instr, 4 nops, 5 non-nops, 4 mov, 0 cov - shaderdb: 0 last-baryf, 0 half, 1 full, 1 constlen - shaderdb: 5 cat0, 4 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 diff --git a/src/freedreno/common/disasm.h b/src/freedreno/common/disasm.h index 82f2135fbe9..45be3d8422b 100644 --- a/src/freedreno/common/disasm.h +++ b/src/freedreno/common/disasm.h @@ -44,6 +44,8 @@ struct shader_stats { int nops; int ss, sy; int constlen; + int halfreg; + int fullreg; uint16_t sstall; uint16_t mov_count; uint16_t cov_count; diff --git a/src/freedreno/decode/redump.h b/src/freedreno/decode/redump.h index c77344e69c1..1b5d3e1af51 100644 --- a/src/freedreno/decode/redump.h +++ b/src/freedreno/decode/redump.h @@ -66,7 +66,9 @@ void rd_write_section(enum rd_sect_type type, const void *buf, int sz) __attribu #define RD_END() do { if (rd_end) rd_end(); } while (0) #define RD_WRITE_SECTION(t,b,s) do { if (rd_write_section) rd_write_section(t,b,s); } while (0) -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#endif #undef ALIGN #define ALIGN(v,a) (((v) + (a) - 1) & ~((a) - 1)) diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c index 184e814fb4c..575a6d49ff8 100644 --- a/src/freedreno/ir3/disasm-a3xx.c +++ b/src/freedreno/ir3/disasm-a3xx.c @@ -29,17 +29,14 @@ #include #include -#include -#include + +#include "isa/isa.h" #include "disasm.h" #include "instr-a3xx.h" -#include "regmask.h" static enum debug_t debug; -#define printf debug_printf - static const char *levels[] = { "", "\t", @@ -59,235 +56,81 @@ static const char *levels[] = { "x", }; -static const char *component = "xyzw"; - -static const char *type[] = { - [TYPE_F16] = "f16", - [TYPE_F32] = "f32", - [TYPE_U16] = "u16", - [TYPE_U32] = "u32", - [TYPE_S16] = "s16", - [TYPE_S32] = "s32", - [TYPE_U8] = "u8", - [TYPE_S8] = "s8", -}; - struct disasm_ctx { FILE *out; - int level; - unsigned gpu_id; + struct isa_decode_options *options; + unsigned level; + unsigned extra_cycles; - struct shader_stats *stats; - - /* we have to process the dst register after src to avoid tripping up - * the read-before-write detection + /** + * nop_count/has_end used to detect the real end of shader. Since + * in some cases there can be a epilogue following an `end` we look + * for a sequence of `nop`s following the `end` */ - unsigned last_dst; - bool last_dst_full; - bool last_dst_valid; + int nop_count; /* number of nop's since non-nop instruction: */ + bool has_end; /* have we seen end instruction */ - /* current instruction repeat flag: */ - unsigned repeat; - /* current instruction repeat indx/offset (for --expand): */ - unsigned repeatidx; + int cur_n; /* current instr # */ + int cur_opc_cat; /* current opc_cat */ int sfu_delay; - /* tracking for register usage */ - struct { - regmask_t used; - regmask_t rbw; /* read before write */ - regmask_t war; /* write after read */ - } regs; -}; - -static const char *float_imms[] = { - "0.0", - "0.5", - "1.0", - "2.0", - "e", - "pi", - "1/pi", - "1/log2(e)", - "log2(e)", - "1/log2(10)", - "log2(10)", - "4.0", -}; - -static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full, - bool is_float, bool r, - bool c, bool im, bool neg, bool abs, bool addr_rel) -{ - const char type = c ? 'c' : 'r'; - - // XXX I prefer - and || for neg/abs, but preserving format used - // by libllvm-a3xx for easy diffing.. - - if (abs && neg) - fprintf(ctx->out, "(absneg)"); - else if (neg) - fprintf(ctx->out, "(neg)"); - else if (abs) - fprintf(ctx->out, "(abs)"); - - if (r) - fprintf(ctx->out, "(r)"); - - if (im) { - unsigned flut_idx = reg.iim_val & 0x3f; - if (is_float && full && flut_idx < ARRAY_SIZE(float_imms)) { - fprintf(ctx->out, "(%s)", float_imms[flut_idx]); - } else if (is_float && flut_idx < ARRAY_SIZE(float_imms)) { - fprintf(ctx->out, "h(%s)", float_imms[flut_idx]); - } else if (full) { - fprintf(ctx->out, "%d", reg.iim_val); - } else { - fprintf(ctx->out, "h(%d)", reg.iim_val); - } - } else if (addr_rel) { - /* I would just use %+d but trying to make it diff'able with - * libllvm-a3xx... - */ - if (reg.iim_val < 0) - fprintf(ctx->out, "%s%c", full ? "" : "h", type, -reg.iim_val); - else if (reg.iim_val > 0) - fprintf(ctx->out, "%s%c", full ? "" : "h", type, reg.iim_val); - else - fprintf(ctx->out, "%s%c", full ? "" : "h", type); - } else if ((reg.num == REG_A0) && !c) { - /* This matches libllvm output, the second (scalar) address register - * seems to be called a1.x instead of a0.y. - */ - fprintf(ctx->out, "a%d.x", reg.comp); - } else if ((reg.num == REG_P0) && !c) { - fprintf(ctx->out, "p0.%c", component[reg.comp]); - } else { - fprintf(ctx->out, "%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]); - if (0 && full && !c) { - reg_t hr0 = reg; - hr0.iim_val *= 2; - reg_t hr1 = hr0; - hr1.iim_val += 1; - fprintf(ctx->out, " (hr%d.%c,hr%d.%c)", hr0.num, component[hr0.comp], hr1.num, component[hr1.comp]); - } - } -} - -static void regmask_set(regmask_t *regmask, unsigned num, bool full) -{ - ir3_assert(num < MAX_REG); - __regmask_set(regmask, !full, num); -} - -static void regmask_clear(regmask_t *regmask, unsigned num, bool full) -{ - ir3_assert(num < MAX_REG); - __regmask_clear(regmask, !full, num); -} - -static unsigned regmask_get(regmask_t *regmask, unsigned num, bool full) -{ - ir3_assert(num < MAX_REG); - return __regmask_get(regmask, !full, num); -} - -static unsigned regidx(reg_t reg) -{ - return (4 * reg.num) + reg.comp; -} - -static reg_t idxreg(unsigned idx) -{ - return (reg_t){ - .comp = idx & 0x3, - .num = idx >> 2, - }; -} - -static void print_sequence(struct disasm_ctx *ctx, int first, int last) -{ - if (first != MAX_REG) { - if (first == last) { - fprintf(ctx->out, " %d", first); - } else { - fprintf(ctx->out, " %d-%d", first, last); - } - } -} - -static int print_regs(struct disasm_ctx *ctx, regmask_t *regmask, bool full) -{ - int num, max = 0, cnt = 0; - int first, last; - - first = last = MAX_REG; - - for (num = 0; num < MAX_REG; num++) { - if (regmask_get(regmask, num, full)) { - if (num != (last + 1)) { - print_sequence(ctx, first, last); - first = num; - } - last = num; - if (num < (48*4)) - max = num; - cnt++; - } - } - - print_sequence(ctx, first, last); - - fprintf(ctx->out, " (cnt=%d, max=%d)", cnt, max); - - return max; -} - -static void print_reg_stats(struct disasm_ctx *ctx) -{ - int fullreg, halfreg; - - fprintf(ctx->out, "%sRegister Stats:\n", levels[ctx->level]); - fprintf(ctx->out, "%s- used (half):", levels[ctx->level]); - halfreg = print_regs(ctx, &ctx->regs.used, false); - fprintf(ctx->out, "\n"); - fprintf(ctx->out, "%s- used (full):", levels[ctx->level]); - fullreg = print_regs(ctx, &ctx->regs.used, true); - fprintf(ctx->out, "\n"); - fprintf(ctx->out, "%s- input (half):", levels[ctx->level]); - print_regs(ctx, &ctx->regs.rbw, false); - fprintf(ctx->out, "\n"); - fprintf(ctx->out, "%s- input (full):", levels[ctx->level]); - print_regs(ctx, &ctx->regs.rbw, true); - fprintf(ctx->out, "\n"); - fprintf(ctx->out, "%s- output (half):", levels[ctx->level]); - print_regs(ctx, &ctx->regs.war, false); - fprintf(ctx->out, " (estimated)\n"); - fprintf(ctx->out, "%s- output (full):", levels[ctx->level]); - print_regs(ctx, &ctx->regs.war, true); - fprintf(ctx->out, " (estimated)\n"); - fprintf(ctx->out, "\n"); - - /* convert to vec4, which is the granularity that registers are - * assigned to shader: + /** + * State accumulated decoding fields of the current instruction, + * handled after decoding is complete (ie. at start of next instr) */ - fullreg = (fullreg + 3) / 4; - halfreg = ctx->regs.used.mergedregs ? 0 : (halfreg + 3) / 4; + struct { + bool ss; + uint8_t nop; + uint8_t repeat; + } last; + /** + * State accumulated decoding fields of src or dst register + */ + struct { + bool half; + bool r; + enum { + FILE_GPR = 1, + FILE_CONST = 2, + } file; + unsigned num; + } reg; + + struct shader_stats *stats; +}; + +static void print_stats(struct disasm_ctx *ctx) +{ + if (ctx->options->gpu_id >= 600) { + /* handle MERGEREGS case.. this isn't *entirely* accurate, as + * you can have shader stages not using merged register file, + * but it is good enough for a guestimate: + */ + unsigned n = (ctx->stats->halfreg + 1) / 2; + + ctx->stats->halfreg = 0; + ctx->stats->fullreg = MAX2(ctx->stats->fullreg, n); + } + + unsigned instructions = ctx->cur_n + ctx->extra_cycles + 1; + + fprintf(ctx->out, "%sStats:\n", levels[ctx->level]); fprintf(ctx->out, "%s- shaderdb: %u instr, %u nops, %u non-nops, %u mov, %u cov\n", levels[ctx->level], - ctx->stats->instructions, + instructions, ctx->stats->nops, - ctx->stats->instructions - ctx->stats->nops, - ctx->stats->mov_count, ctx->stats->cov_count); + instructions - ctx->stats->nops, + ctx->stats->mov_count, + ctx->stats->cov_count); fprintf(ctx->out, "%s- shaderdb: %u last-baryf, %d half, %d full, %u constlen\n", levels[ctx->level], ctx->stats->last_baryf, - halfreg, - fullreg, - ctx->stats->constlen); + DIV_ROUND_UP(ctx->stats->halfreg, 4), + DIV_ROUND_UP(ctx->stats->fullreg, 4), + DIV_ROUND_UP(ctx->stats->constlen, 4)); fprintf(ctx->out, "%s- shaderdb: %u cat0, %u cat1, %u cat2, %u cat3, %u cat4, %u cat5, %u cat6, %u cat7\n", levels[ctx->level], @@ -307,1091 +150,13 @@ static void print_reg_stats(struct disasm_ctx *ctx) ctx->stats->sy); } -static void process_reg_dst(struct disasm_ctx *ctx) -{ - if (!ctx->last_dst_valid) - return; - - /* ignore dummy writes (ie. r63.x): */ - if (!VALIDREG(ctx->last_dst)) - return; - - for (unsigned i = 0; i <= ctx->repeat; i++) { - unsigned dst = ctx->last_dst + i; - - regmask_set(&ctx->regs.war, dst, ctx->last_dst_full); - regmask_set(&ctx->regs.used, dst, ctx->last_dst_full); - } - - ctx->last_dst_valid = false; -} -static void print_reg_dst(struct disasm_ctx *ctx, reg_t reg, bool full, bool addr_rel) -{ - /* presumably the special registers a0.c and p0.c don't count.. */ - if (!(addr_rel || (reg.num == REG_A0) || (reg.num == REG_P0))) { - ctx->last_dst = regidx(reg); - ctx->last_dst_full = full; - ctx->last_dst_valid = true; - } - reg = idxreg(regidx(reg) + ctx->repeatidx); - print_reg(ctx, reg, full, false, false, false, false, false, false, addr_rel); -} - -/* TODO switch to using reginfo struct everywhere, since more readable - * than passing a bunch of bools to print_reg_src - */ - -struct reginfo { - reg_t reg; - bool full; - bool r; - bool c; - bool f; /* src reg is interpreted as float, used for printing immediates */ - bool im; - bool neg; - bool abs; - bool addr_rel; -}; - -static void print_src(struct disasm_ctx *ctx, struct reginfo *info) -{ - reg_t reg = info->reg; - - /* presumably the special registers a0.c and p0.c don't count.. */ - if (!(info->addr_rel || info->c || info->im || - (reg.num == REG_A0) || (reg.num == REG_P0))) { - int i, num = regidx(reg); - for (i = 0; i <= ctx->repeat; i++) { - unsigned src = num + i; - - if (!regmask_get(&ctx->regs.used, src, info->full)) - regmask_set(&ctx->regs.rbw, src, info->full); - - regmask_clear(&ctx->regs.war, src, info->full); - regmask_set(&ctx->regs.used, src, info->full); - - if (!info->r) - break; - } - } else if (info->c) { - unsigned num = regidx(reg); - if (info->r) - num += ctx->repeat; - num = DIV_ROUND_UP(num, 4); - ctx->stats->constlen = MAX2(ctx->stats->constlen, num); - } - - if (info->r) - reg = idxreg(regidx(info->reg) + ctx->repeatidx); - - print_reg(ctx, reg, info->full, info->f, info->r, info->c, info->im, - info->neg, info->abs, info->addr_rel); -} - -//static void print_dst(struct disasm_ctx *ctx, struct reginfo *info) -//{ -// print_reg_dst(ctx, info->reg, info->full, info->addr_rel); -//} - -static void print_instr_cat0(struct disasm_ctx *ctx, instr_t *instr) -{ - static const struct { - const char *suffix; - int nsrc; - bool idx; - } brinfo[7] = { - [BRANCH_PLAIN] = { "r", 1, false }, - [BRANCH_OR] = { "rao", 2, false }, - [BRANCH_AND] = { "raa", 2, false }, - [BRANCH_CONST] = { "rac", 0, true }, - [BRANCH_ANY] = { "any", 1, false }, - [BRANCH_ALL] = { "all", 1, false }, - [BRANCH_X] = { "rax", 0, false }, - }; - instr_cat0_t *cat0 = &instr->cat0; - - switch (instr_opc(instr, ctx->gpu_id)) { - case OPC_KILL: - case OPC_PREDT: - case OPC_PREDF: - fprintf(ctx->out, " %sp0.%c", cat0->inv1 ? "!" : "", - component[cat0->comp1]); - break; - case OPC_B: - fprintf(ctx->out, "%s", brinfo[cat0->brtype].suffix); - if (brinfo[cat0->brtype].idx) { - fprintf(ctx->out, ".%u", cat0->idx); - } - if (brinfo[cat0->brtype].nsrc >= 1) { - fprintf(ctx->out, " %sp0.%c,", cat0->inv1 ? "!" : "", - component[cat0->comp1]); - } - if (brinfo[cat0->brtype].nsrc >= 2) { - fprintf(ctx->out, " %sp0.%c,", cat0->inv2 ? "!" : "", - component[cat0->comp2]); - } - fprintf(ctx->out, " #%d", cat0->a3xx.immed); - break; - case OPC_JUMP: - case OPC_CALL: - case OPC_BKT: - case OPC_GETONE: - case OPC_SHPS: - fprintf(ctx->out, " #%d", cat0->a3xx.immed); - break; - } - - if ((debug & PRINT_VERBOSE) && (cat0->dummy3|cat0->dummy4)) - fprintf(ctx->out, "\t{0: %x,%x}", cat0->dummy3, cat0->dummy4); -} - -static void print_instr_cat1(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat1_t *cat1 = &instr->cat1; - - switch (_OPC(1, cat1->opc)) { - case OPC_MOV: - if (cat1->src_type == cat1->dst_type) { - reg_t dst = (reg_t)cat1->dst; - if ((cat1->src_type == TYPE_S16) && (dst.num == REG_A0) && (dst.comp == 0)) { - /* special case (nmemonic?): */ - fprintf(ctx->out, "mova"); - } else if ((cat1->src_type == TYPE_U16) && (dst.num == REG_A0) && (dst.comp == 1)) { - fprintf(ctx->out, "mova1"); - } else { - fprintf(ctx->out, "mov.%s%s", type[cat1->src_type], type[cat1->dst_type]); - } - } else { - fprintf(ctx->out, "cov.%s%s", type[cat1->src_type], type[cat1->dst_type]); - } - - fprintf(ctx->out, " "); - - if (cat1->even) - fprintf(ctx->out, "(even)"); - - if (cat1->pos_inf) - fprintf(ctx->out, "(pos_infinity)"); - - print_reg_dst(ctx, (reg_t)(cat1->dst), type_size(cat1->dst_type) == 32, - cat1->dst_rel); - - fprintf(ctx->out, ", "); - - /* ugg, have to special case this.. vs print_reg().. */ - if (cat1->src_im) { - if (type_float(cat1->src_type)) { - if (type_size(cat1->src_type) < 32) { - fprintf(ctx->out, "h(%f)", _mesa_half_to_float(cat1->uim_val)); - } else { - fprintf(ctx->out, "(%f)", cat1->fim_val); - } - } else if (type_uint(cat1->src_type) && (cat1->uim_val > 0x1000)) { - /* Print large uint as hex, which differs from blob, but ir3 - * will generate mov.u32u32 for floats sometimes, and this is - * easier to see in hex than dec - */ - fprintf(ctx->out, "0x%08x", cat1->uim_val); - } else { - if ((type_size(cat1->src_type) < 32) && (cat1->uim_val & 0x8000)) { - /* need sign extension for signed half immed: */ - fprintf(ctx->out, "-%d", 0x10000 - (cat1->uim_val & 0xffff)); - } else { - fprintf(ctx->out, "%d", cat1->iim_val); - } - } - } else if (cat1->src_rel && !cat1->src_c) { - /* I would just use %+d but trying to make it diff'able with - * libllvm-a3xx... - */ - char type = cat1->src_rel_c ? 'c' : 'r'; - const char *full = (type_size(cat1->src_type) == 32) ? "" : "h"; - if (cat1->off < 0) - fprintf(ctx->out, "%s%c", full, type, -cat1->off); - else if (cat1->off > 0) - fprintf(ctx->out, "%s%c", full, type, cat1->off); - else - fprintf(ctx->out, "%s%c", full, type); - } else { - struct reginfo src = { - .reg = (reg_t)cat1->src, - .full = type_size(cat1->src_type) == 32, - .r = cat1->src_r, - .c = cat1->src_c, - .im = cat1->src_im, - }; - print_src(ctx, &src); - } - break; - case OPC_MOVMSK: - fprintf(ctx->out, ".w%u", (cat1->repeat + 1) * 32); - fprintf(ctx->out, " "); - print_reg_dst(ctx, (reg_t)(cat1->dst), true, cat1->dst_rel); - break; - } -} - -static void print_instr_cat2(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat2_t *cat2 = &instr->cat2; - int opc = _OPC(2, cat2->opc); - static const char *cond[] = { - "lt", - "le", - "gt", - "ge", - "eq", - "ne", - "?6?", - }; - - switch (opc) { - case OPC_CMPS_F: - case OPC_CMPS_U: - case OPC_CMPS_S: - case OPC_CMPV_F: - case OPC_CMPV_U: - case OPC_CMPV_S: - fprintf(ctx->out, ".%s", cond[cat2->cond]); - break; - } - - fprintf(ctx->out, " "); - if (cat2->ei) - fprintf(ctx->out, "(ei)"); - print_reg_dst(ctx, (reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false); - fprintf(ctx->out, ", "); - - struct reginfo src1 = { - .full = cat2->full, - .r = cat2->repeat ? cat2->src1_r : 0, - .f = is_cat2_float(opc), - .im = cat2->src1_im, - .abs = cat2->src1_abs, - .neg = cat2->src1_neg, - }; - - if (cat2->c1.src1_c) { - src1.reg = (reg_t)(cat2->c1.src1); - src1.c = true; - } else if (cat2->rel1.src1_rel) { - src1.reg = (reg_t)(cat2->rel1.src1); - src1.c = cat2->rel1.src1_c; - src1.addr_rel = true; - } else { - src1.reg = (reg_t)(cat2->src1); - } - print_src(ctx, &src1); - - struct reginfo src2 = { - .r = cat2->repeat ? cat2->src2_r : 0, - .full = cat2->full, - .f = is_cat2_float(opc), - .abs = cat2->src2_abs, - .neg = cat2->src2_neg, - .im = cat2->src2_im, - }; - switch (opc) { - case OPC_ABSNEG_F: - case OPC_ABSNEG_S: - case OPC_CLZ_B: - case OPC_CLZ_S: - case OPC_SIGN_F: - case OPC_FLOOR_F: - case OPC_CEIL_F: - case OPC_RNDNE_F: - case OPC_RNDAZ_F: - case OPC_TRUNC_F: - case OPC_NOT_B: - case OPC_BFREV_B: - case OPC_SETRM: - case OPC_CBITS_B: - /* these only have one src reg */ - break; - default: - fprintf(ctx->out, ", "); - if (cat2->c2.src2_c) { - src2.reg = (reg_t)(cat2->c2.src2); - src2.c = true; - } else if (cat2->rel2.src2_rel) { - src2.reg = (reg_t)(cat2->rel2.src2); - src2.c = cat2->rel2.src2_c; - src2.addr_rel = true; - } else { - src2.reg = (reg_t)(cat2->src2); - } - print_src(ctx, &src2); - break; - } -} - -static void print_instr_cat3(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat3_t *cat3 = &instr->cat3; - bool full = instr_cat3_full(cat3); - - fprintf(ctx->out, " "); - print_reg_dst(ctx, (reg_t)(cat3->dst), full ^ cat3->dst_half, false); - fprintf(ctx->out, ", "); - - struct reginfo src1 = { - .r = cat3->repeat ? cat3->src1_r : 0, - .full = full, - .neg = cat3->src1_neg, - }; - if (cat3->c1.src1_c) { - src1.reg = (reg_t)(cat3->c1.src1); - src1.c = true; - } else if (cat3->rel1.src1_rel) { - src1.reg = (reg_t)(cat3->rel1.src1); - src1.c = cat3->rel1.src1_c; - src1.addr_rel = true; - } else { - src1.reg = (reg_t)(cat3->src1); - } - print_src(ctx, &src1); - - fprintf(ctx->out, ", "); - struct reginfo src2 = { - .reg = (reg_t)cat3->src2, - .full = full, - .r = cat3->repeat ? cat3->src2_r : 0, - .c = cat3->src2_c, - .neg = cat3->src2_neg, - }; - print_src(ctx, &src2); - - fprintf(ctx->out, ", "); - struct reginfo src3 = { - .r = cat3->src3_r, - .full = full, - .neg = cat3->src3_neg, - }; - if (cat3->c2.src3_c) { - src3.reg = (reg_t)(cat3->c2.src3); - src3.c = true; - } else if (cat3->rel2.src3_rel) { - src3.reg = (reg_t)(cat3->rel2.src3); - src3.c = cat3->rel2.src3_c; - src3.addr_rel = true; - } else { - src3.reg = (reg_t)(cat3->src3); - } - print_src(ctx, &src3); -} - -static void print_instr_cat4(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat4_t *cat4 = &instr->cat4; - - fprintf(ctx->out, " "); - print_reg_dst(ctx, (reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false); - fprintf(ctx->out, ", "); - - struct reginfo src = { - .r = cat4->src_r, - .im = cat4->src_im, - .full = cat4->full, - .neg = cat4->src_neg, - .abs = cat4->src_abs, - }; - if (cat4->c.src_c) { - src.reg = (reg_t)(cat4->c.src); - src.c = true; - } else if (cat4->rel.src_rel) { - src.reg = (reg_t)(cat4->rel.src); - src.c = cat4->rel.src_c; - src.addr_rel = true; - } else { - src.reg = (reg_t)(cat4->src); - } - print_src(ctx, &src); - - if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2)) - fprintf(ctx->out, "\t{4: %x,%x}", cat4->dummy1, cat4->dummy2); -} - -static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr) -{ - static const struct { - bool src1, src2, samp, tex; - } info[0x1f] = { - [opc_op(OPC_ISAM)] = { true, false, true, true, }, - [opc_op(OPC_ISAML)] = { true, true, true, true, }, - [opc_op(OPC_ISAMM)] = { true, false, true, true, }, - [opc_op(OPC_SAM)] = { true, false, true, true, }, - [opc_op(OPC_SAMB)] = { true, true, true, true, }, - [opc_op(OPC_SAML)] = { true, true, true, true, }, - [opc_op(OPC_SAMGQ)] = { true, false, true, true, }, - [opc_op(OPC_GETLOD)] = { true, false, true, true, }, - [opc_op(OPC_CONV)] = { true, true, true, true, }, - [opc_op(OPC_CONVM)] = { true, true, true, true, }, - [opc_op(OPC_GETSIZE)] = { true, false, false, true, }, - [opc_op(OPC_GETBUF)] = { false, false, false, true, }, - [opc_op(OPC_GETPOS)] = { true, false, false, true, }, - [opc_op(OPC_GETINFO)] = { false, false, false, true, }, - [opc_op(OPC_DSX)] = { true, false, false, false, }, - [opc_op(OPC_DSY)] = { true, false, false, false, }, - [opc_op(OPC_GATHER4R)] = { true, false, true, true, }, - [opc_op(OPC_GATHER4G)] = { true, false, true, true, }, - [opc_op(OPC_GATHER4B)] = { true, false, true, true, }, - [opc_op(OPC_GATHER4A)] = { true, false, true, true, }, - [opc_op(OPC_SAMGP0)] = { true, false, true, true, }, - [opc_op(OPC_SAMGP1)] = { true, false, true, true, }, - [opc_op(OPC_SAMGP2)] = { true, false, true, true, }, - [opc_op(OPC_SAMGP3)] = { true, false, true, true, }, - [opc_op(OPC_DSXPP_1)] = { true, false, false, false, }, - [opc_op(OPC_DSYPP_1)] = { true, false, false, false, }, - [opc_op(OPC_RGETPOS)] = { true, false, false, false, }, - [opc_op(OPC_RGETINFO)] = { false, false, false, false, }, - }; - - static const struct { - bool indirect; - bool bindless; - bool use_a1; - bool uniform; - } desc_features[8] = { - [CAT5_NONUNIFORM] = { .indirect = true, }, - [CAT5_UNIFORM] = { .indirect = true, .uniform = true, }, - [CAT5_BINDLESS_IMM] = { .bindless = true, }, - [CAT5_BINDLESS_UNIFORM] = { - .bindless = true, - .indirect = true, - .uniform = true, - }, - [CAT5_BINDLESS_NONUNIFORM] = { - .bindless = true, - .indirect = true, - }, - [CAT5_BINDLESS_A1_IMM] = { - .bindless = true, - .use_a1 = true, - }, - [CAT5_BINDLESS_A1_UNIFORM] = { - .bindless = true, - .indirect = true, - .uniform = true, - .use_a1 = true, - }, - [CAT5_BINDLESS_A1_NONUNIFORM] = { - .bindless = true, - .indirect = true, - .use_a1 = true, - }, - }; - - instr_cat5_t *cat5 = &instr->cat5; - int i; - - bool desc_indirect = - cat5->is_s2en_bindless && - desc_features[cat5->s2en_bindless.desc_mode].indirect; - bool bindless = - cat5->is_s2en_bindless && - desc_features[cat5->s2en_bindless.desc_mode].bindless; - bool use_a1 = - cat5->is_s2en_bindless && - desc_features[cat5->s2en_bindless.desc_mode].use_a1; - bool uniform = - cat5->is_s2en_bindless && - desc_features[cat5->s2en_bindless.desc_mode].uniform; - - if (cat5->is_3d) fprintf(ctx->out, ".3d"); - if (cat5->is_a) fprintf(ctx->out, ".a"); - if (cat5->is_o) fprintf(ctx->out, ".o"); - if (cat5->is_p) fprintf(ctx->out, ".p"); - if (cat5->is_s) fprintf(ctx->out, ".s"); - if (desc_indirect) fprintf(ctx->out, ".s2en"); - if (uniform) fprintf(ctx->out, ".uniform"); - - if (bindless) { - unsigned base = (cat5->s2en_bindless.base_hi << 1) | cat5->base_lo; - fprintf(ctx->out, ".base%d", base); - } - - fprintf(ctx->out, " "); - - switch (_OPC(5, cat5->opc)) { - case OPC_DSXPP_1: - case OPC_DSYPP_1: - break; - default: - fprintf(ctx->out, "(%s)", type[cat5->type]); - break; - } - - fprintf(ctx->out, "("); - for (i = 0; i < 4; i++) - if (cat5->wrmask & (1 << i)) - fprintf(ctx->out, "%c", "xyzw"[i]); - fprintf(ctx->out, ")"); - - print_reg_dst(ctx, (reg_t)(cat5->dst), type_size(cat5->type) == 32, false); - - if (info[cat5->opc].src1) { - fprintf(ctx->out, ", "); - struct reginfo src = { .reg = (reg_t)(cat5->src1), .full = cat5->full }; - print_src(ctx, &src); - } - - if (cat5->is_o || info[cat5->opc].src2) { - fprintf(ctx->out, ", "); - struct reginfo src = { .reg = (reg_t)(cat5->src2), .full = cat5->full }; - print_src(ctx, &src); - } - if (cat5->is_s2en_bindless) { - if (!desc_indirect) { - if (info[cat5->opc].samp) { - if (use_a1) - fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3); - else - fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3 & 0xf); - } - - if (info[cat5->opc].tex && !use_a1) { - fprintf(ctx->out, ", t#%d", cat5->s2en_bindless.src3 >> 4); - } - } - } else { - if (info[cat5->opc].samp) - fprintf(ctx->out, ", s#%d", cat5->norm.samp); - if (info[cat5->opc].tex) - fprintf(ctx->out, ", t#%d", cat5->norm.tex); - } - - if (desc_indirect) { - fprintf(ctx->out, ", "); - struct reginfo src = { .reg = (reg_t)(cat5->s2en_bindless.src3), .full = bindless }; - print_src(ctx, &src); - } - - if (use_a1) - fprintf(ctx->out, ", a1.x"); - - if (debug & PRINT_VERBOSE) { - if (cat5->is_s2en_bindless) { - if ((debug & PRINT_VERBOSE) && cat5->s2en_bindless.dummy1) - fprintf(ctx->out, "\t{5: %x}", cat5->s2en_bindless.dummy1); - } else { - if ((debug & PRINT_VERBOSE) && cat5->norm.dummy1) - fprintf(ctx->out, "\t{5: %x}", cat5->norm.dummy1); - } - } -} - -static void print_instr_cat6_a3xx(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat6_t *cat6 = &instr->cat6; - char sd = 0, ss = 0; /* dst/src address space */ - bool nodst = false; - struct reginfo dst, src1, src2, ssbo; - int src1off = 0; - - memset(&dst, 0, sizeof(dst)); - memset(&src1, 0, sizeof(src1)); - memset(&src2, 0, sizeof(src2)); - memset(&ssbo, 0, sizeof(ssbo)); - - switch (_OPC(6, cat6->opc)) { - case OPC_RESINFO: - case OPC_RESFMT: - dst.full = type_size(cat6->type) == 32; - src1.full = type_size(cat6->type) == 32; - src2.full = type_size(cat6->type) == 32; - break; - case OPC_L2G: - case OPC_G2L: - dst.full = true; - src1.full = true; - src2.full = true; - break; - case OPC_STG: - case OPC_STL: - case OPC_STP: - case OPC_STLW: - case OPC_STIB: - dst.full = type_size(cat6->type) == 32; - src1.full = type_size(cat6->type) == 32; - src2.full = type_size(cat6->type) == 32; - break; - default: - dst.full = type_size(cat6->type) == 32; - src1.full = true; - src2.full = true; - break; - } - - switch (_OPC(6, cat6->opc)) { - case OPC_PREFETCH: - break; - case OPC_RESINFO: - fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1); - break; - case OPC_LDGB: - fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped"); - fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1); - fprintf(ctx->out, ".%s", type[cat6->type]); - fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1); - break; - case OPC_STGB: - case OPC_STIB: - fprintf(ctx->out, ".%s", cat6->stgb.typed ? "typed" : "untyped"); - fprintf(ctx->out, ".%dd", cat6->stgb.d + 1); - fprintf(ctx->out, ".%s", type[cat6->type]); - fprintf(ctx->out, ".%d", cat6->stgb.type_size + 1); - break; - case OPC_ATOMIC_ADD: - case OPC_ATOMIC_SUB: - case OPC_ATOMIC_XCHG: - case OPC_ATOMIC_INC: - case OPC_ATOMIC_DEC: - case OPC_ATOMIC_CMPXCHG: - case OPC_ATOMIC_MIN: - case OPC_ATOMIC_MAX: - case OPC_ATOMIC_AND: - case OPC_ATOMIC_OR: - case OPC_ATOMIC_XOR: - ss = cat6->g ? 'g' : 'l'; - fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped"); - fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1); - fprintf(ctx->out, ".%s", type[cat6->type]); - fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1); - fprintf(ctx->out, ".%c", ss); - break; - case OPC_LDG: - fprintf(ctx->out, ".%s", type[cat6->type]); - break; - default: - dst.im = cat6->g && !cat6->dst_off; - if (dst.im) - dst.full = true; - fprintf(ctx->out, ".%s", type[cat6->type]); - break; - } - fprintf(ctx->out, " "); - - switch (_OPC(6, cat6->opc)) { - case OPC_STG: - sd = 'g'; - break; - case OPC_STP: - sd = 'p'; - break; - case OPC_STL: - case OPC_STLW: - sd = 'l'; - break; - - case OPC_LDG: - case OPC_LDC: - ss = 'g'; - break; - case OPC_LDP: - ss = 'p'; - break; - case OPC_LDL: - case OPC_LDLW: - case OPC_LDLV: - ss = 'l'; - break; - - case OPC_L2G: - ss = 'l'; - sd = 'g'; - break; - - case OPC_G2L: - ss = 'g'; - sd = 'l'; - break; - - case OPC_PREFETCH: - ss = 'g'; - nodst = true; - break; - } - - if ((_OPC(6, cat6->opc) == OPC_STGB) || (_OPC(6, cat6->opc) == OPC_STIB)) { - struct reginfo src3; - - memset(&src3, 0, sizeof(src3)); - - src1.reg = (reg_t)(cat6->stgb.src1); - src2.reg = (reg_t)(cat6->stgb.src2); - src2.im = cat6->stgb.src2_im; - if (src2.im) - src2.full = true; - src3.reg = (reg_t)(cat6->stgb.src3); - src3.im = cat6->stgb.src3_im; - src3.full = true; - - fprintf(ctx->out, "g[%u], ", cat6->stgb.dst_ssbo); - print_src(ctx, &src1); - fprintf(ctx->out, ", "); - print_src(ctx, &src2); - fprintf(ctx->out, ", "); - print_src(ctx, &src3); - - if (debug & PRINT_VERBOSE) - fprintf(ctx->out, " (pad0=%x, pad3=%x)", cat6->stgb.pad0, cat6->stgb.pad3); - - return; - } - - if (is_atomic(_OPC(6, cat6->opc))) { - - src1.reg = (reg_t)(cat6->ldgb.src1); - src1.im = cat6->ldgb.src1_im; - if (src1.im) - src1.full = true; - src2.reg = (reg_t)(cat6->ldgb.src2); - src2.im = cat6->ldgb.src2_im; - if (src2.im) - src2.full = true; - dst.reg = (reg_t)(cat6->ldgb.dst); - - print_src(ctx, &dst); - fprintf(ctx->out, ", "); - if (ss == 'g') { - struct reginfo src3; - memset(&src3, 0, sizeof(src3)); - - src3.reg = (reg_t)(cat6->ldgb.src3); - src3.full = true; - - /* For images, the ".typed" variant is used and src2 is - * the ivecN coordinates, ie ivec2 for 2d. - * - * For SSBOs, the ".untyped" variant is used and src2 is - * a simple dword offset.. src3 appears to be - * uvec2(offset * 4, 0). Not sure the point of that. - */ - - fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo); - print_src(ctx, &src1); /* value */ - fprintf(ctx->out, ", "); - print_src(ctx, &src2); /* offset/coords */ - fprintf(ctx->out, ", "); - print_src(ctx, &src3); /* 64b byte offset.. */ - - if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, " (pad0=%x, mustbe0=%x)", cat6->ldgb.pad0, - cat6->ldgb.mustbe0); - } - } else { /* ss == 'l' */ - fprintf(ctx->out, "l["); - print_src(ctx, &src1); /* simple byte offset */ - fprintf(ctx->out, "], "); - print_src(ctx, &src2); /* value */ - - if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, " (src3=%x, pad0=%x, src_ssbo_im=%x, mustbe0=%x)", - cat6->ldgb.src3, cat6->ldgb.pad0, - cat6->ldgb.src_ssbo_im, cat6->ldgb.mustbe0); - } - } - - return; - } else if (_OPC(6, cat6->opc) == OPC_RESINFO) { - dst.reg = (reg_t)(cat6->ldgb.dst); - ssbo.reg = (reg_t)(cat6->ldgb.src_ssbo); - ssbo.im = cat6->ldgb.src_ssbo_im; - if (ssbo.im) - ssbo.full = true; - - print_src(ctx, &dst); - fprintf(ctx->out, ", "); - - fprintf(ctx->out, "g["); - print_src(ctx, &ssbo); - fprintf(ctx->out, "]"); - - return; - } else if (_OPC(6, cat6->opc) == OPC_LDGB) { - - src1.reg = (reg_t)(cat6->ldgb.src1); - src1.im = cat6->ldgb.src1_im; - if (src1.im) - src1.full = true; - src2.reg = (reg_t)(cat6->ldgb.src2); - src2.im = cat6->ldgb.src2_im; - if (src2.im) - src2.full = true; - ssbo.full = true; - ssbo.reg = (reg_t)(cat6->ldgb.src_ssbo); - ssbo.im = cat6->ldgb.src_ssbo_im; - if (ssbo.im) - ssbo.full = true; - dst.reg = (reg_t)(cat6->ldgb.dst); - - print_src(ctx, &dst); - fprintf(ctx->out, ", "); - - fprintf(ctx->out, "g["); - print_src(ctx, &ssbo); - fprintf(ctx->out, "], "); - - print_src(ctx, &src1); - fprintf(ctx->out, ", "); - print_src(ctx, &src2); - - if (debug & PRINT_VERBOSE) - fprintf(ctx->out, " (pad0=%x, ssbo_im=%x, mustbe0=%x)", cat6->ldgb.pad0, cat6->ldgb.src_ssbo_im, cat6->ldgb.mustbe0); - - return; - } else if (_OPC(6, cat6->opc) == OPC_LDG && cat6->a.src1_im && cat6->a.src3_im) { - struct reginfo src3; - - memset(&src3, 0, sizeof(src3)); - src1.reg = (reg_t)(cat6->a.src1); - src3.reg = (reg_t)(cat6->a.src3); - src3.im = cat6->a.src3_im; - if (src3.im) - src3.full = true; - src2.reg = (reg_t)(cat6->a.off); - src2.full = true; - dst.reg = (reg_t)(cat6->d.dst); - - print_src(ctx, &dst); - fprintf(ctx->out, ", g["); - print_src(ctx, &src1); - fprintf(ctx->out, "+"); - print_src(ctx, &src2); - fprintf(ctx->out, "], "); - print_src(ctx, &src3); - - return; - } - - if (cat6->src_off) { - src1.reg = (reg_t)(cat6->a.src1); - src1.im = cat6->a.src1_im; - src2.reg = (reg_t)(cat6->a.src3); - src2.im = cat6->a.src3_im; - src1off = cat6->a.off; - } else { - src1.reg = (reg_t)(cat6->b.src1); - src1.im = cat6->b.src1_im; - src2.reg = (reg_t)(cat6->b.src2); - src2.im = cat6->b.src2_im; - } - - if (src1.im) - src1.full = true; - if (src2.im) - src2.full = true; - - if (!nodst) { - if (sd) - fprintf(ctx->out, "%c[", sd); - /* note: dst might actually be a src (ie. address to store to) */ - if (cat6->dst_off) { - dst.reg = (reg_t)(cat6->c.dst); - print_src(ctx, &dst); - if (cat6->g) { - struct reginfo dstoff_reg = { - .reg = (reg_t) cat6->c.off, - .full = true - }; - fprintf(ctx->out, "+"); - print_src(ctx, &dstoff_reg); - } else if (cat6->c.off || cat6->c.off_high) { - fprintf(ctx->out, "%+d", ((uint32_t)cat6->c.off_high << 8) | cat6->c.off); - } - } else { - dst.reg = (reg_t)(cat6->d.dst); - print_src(ctx, &dst); - } - if (sd) - fprintf(ctx->out, "]"); - fprintf(ctx->out, ", "); - } - - if (ss) - fprintf(ctx->out, "%c[", ss); - - /* can have a larger than normal immed, so hack: */ - if (src1.im) { - fprintf(ctx->out, "%u", src1.reg.dummy13); - } else { - print_src(ctx, &src1); - } - - if (cat6->src_off && cat6->g) - print_src(ctx, &src2); - else if (src1off) - fprintf(ctx->out, "%+d", src1off); - if (ss) - fprintf(ctx->out, "]"); - - switch (_OPC(6, cat6->opc)) { - case OPC_RESINFO: - case OPC_RESFMT: - break; - default: - fprintf(ctx->out, ", "); - print_src(ctx, &src2); - break; - } -} - -static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx; - struct reginfo src1, src2, ssbo; - uint32_t opc = _OPC(6, cat6->opc); - bool is_id = opc == OPC_GETSPID || opc == OPC_GETWID; - bool uses_type = opc != OPC_LDC; - - static const struct { - bool indirect; - bool bindless; - const char *name; - } desc_features[8] = { - [CAT6_IMM] = { - .name = "imm" - }, - [CAT6_UNIFORM] = { - .indirect = true, - .name = "uniform" - }, - [CAT6_NONUNIFORM] = { - .indirect = true, - .name = "nonuniform" - }, - [CAT6_BINDLESS_IMM] = { - .bindless = true, - .name = "imm" - }, - [CAT6_BINDLESS_UNIFORM] = { - .bindless = true, - .indirect = true, - .name = "uniform" - }, - [CAT6_BINDLESS_NONUNIFORM] = { - .bindless = true, - .indirect = true, - .name = "nonuniform" - }, - }; - - bool indirect_ssbo = desc_features[cat6->desc_mode].indirect; - bool bindless = desc_features[cat6->desc_mode].bindless; - bool type_full = cat6->type != TYPE_U16; - - - memset(&src1, 0, sizeof(src1)); - memset(&src2, 0, sizeof(src2)); - memset(&ssbo, 0, sizeof(ssbo)); - - /* disambiguate from pre-bindless variants: */ - switch (opc) { - case OPC_RESINFO: - case OPC_LDIB: - case OPC_STIB: - case OPC_ATOMIC_ADD: - case OPC_ATOMIC_SUB: - case OPC_ATOMIC_XCHG: - case OPC_ATOMIC_INC: - case OPC_ATOMIC_DEC: - case OPC_ATOMIC_CMPXCHG: - case OPC_ATOMIC_MIN: - case OPC_ATOMIC_MAX: - case OPC_ATOMIC_AND: - case OPC_ATOMIC_OR: - case OPC_ATOMIC_XOR: - fprintf(ctx->out, ".b"); - break; - default: - break; - } - - if (uses_type) { - if (!is_id) { - fprintf(ctx->out, ".%s", cat6->typed ? "typed" : "untyped"); - fprintf(ctx->out, ".%dd", cat6->d + 1); - } - fprintf(ctx->out, ".%s", type[cat6->type]); - } else { - fprintf(ctx->out, ".offset%d", cat6->d); - } - - if (!is_id) { - fprintf(ctx->out, ".%u", cat6->type_size + 1); - fprintf(ctx->out, ".%s", desc_features[cat6->desc_mode].name); - - if (bindless) - fprintf(ctx->out, ".base%d", cat6->base); - } - - fprintf(ctx->out, " "); - - src2.reg = (reg_t)(cat6->src2); - src2.full = type_full; - print_src(ctx, &src2); - - if (!is_id) { - fprintf(ctx->out, ", "); - - if (opc != OPC_RESINFO) { - src1.reg = (reg_t)(cat6->src1); - src1.full = true; // XXX - print_src(ctx, &src1); - fprintf(ctx->out, ", "); - } - - ssbo.reg = (reg_t)(cat6->ssbo); - ssbo.im = !indirect_ssbo; - ssbo.full = true; - print_src(ctx, &ssbo); - } - - if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x, pad5=%x)", - cat6->pad1, cat6->pad2, cat6->pad3, cat6->pad4, cat6->pad5); - } -} - -static void print_instr_cat6(struct disasm_ctx *ctx, instr_t *instr) -{ - if (!is_cat6_legacy(instr, ctx->gpu_id)) { - print_instr_cat6_a6xx(ctx, instr); - if (debug & PRINT_VERBOSE) - fprintf(ctx->out, " NEW"); - } else { - print_instr_cat6_a3xx(ctx, instr); - if (debug & PRINT_VERBOSE) - fprintf(ctx->out, " LEGACY"); - } -} -static void print_instr_cat7(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat7_t *cat7 = &instr->cat7; - - if (cat7->g) - fprintf(ctx->out, ".g"); - if (cat7->l) - fprintf(ctx->out, ".l"); - - if (_OPC(7, cat7->opc) == OPC_FENCE) { - if (cat7->r) - fprintf(ctx->out, ".r"); - if (cat7->w) - fprintf(ctx->out, ".w"); - } -} - /* size of largest OPC field of all the instruction categories: */ #define NOPC_BITS 6 static const struct opc_info { - uint16_t cat; - uint16_t opc; const char *name; - void (*print)(struct disasm_ctx *ctx, instr_t *instr); } opcs[1 << (3+NOPC_BITS)] = { -#define OPC(cat, opc, name) [(opc)] = { (cat), (opc), #name, print_instr_cat##cat } +#define OPC(cat, opc, name) [(opc)] = { #name } /* category 0: */ OPC(0, OPC_NOP, nop), OPC(0, OPC_B, b), @@ -1586,175 +351,187 @@ const char *disasm_a3xx_instr_name(opc_t opc) return opcs[opc].name; } -static void print_single_instr(struct disasm_ctx *ctx, instr_t *instr) + +static void +disasm_field_cb(void *d, const char *field_name, struct isa_decode_value *val) { - const char *name = GETINFO(instr)->name; - uint32_t opc = instr_opc(instr, ctx->gpu_id); + struct disasm_ctx *ctx = d; - if (name) { - fprintf(ctx->out, "%s", name); - GETINFO(instr)->print(ctx, instr); - } else { - fprintf(ctx->out, "unknown(%d,%d)", instr->opc_cat, opc); - - switch (instr->opc_cat) { - case 0: print_instr_cat0(ctx, instr); break; - case 1: print_instr_cat1(ctx, instr); break; - case 2: print_instr_cat2(ctx, instr); break; - case 3: print_instr_cat3(ctx, instr); break; - case 4: print_instr_cat4(ctx, instr); break; - case 5: print_instr_cat5(ctx, instr); break; - case 6: print_instr_cat6(ctx, instr); break; - case 7: print_instr_cat7(ctx, instr); break; + if (!strcmp(field_name, "NAME")) { + if (!strcmp("nop", val->str)) { + if (ctx->has_end) { + ctx->nop_count++; + if (ctx->nop_count > 3) { + ctx->options->stop = true; + } + } + ctx->stats->nops += 1 + ctx->last.repeat; + } else { + ctx->nop_count = 0; } + + if (!strcmp("end", val->str)) { + ctx->has_end = true; + ctx->nop_count = 0; + } else if (!strcmp("chsh", val->str)) { + ctx->options->stop = true; + } else if (!strcmp("bary.f", val->str)) { + ctx->stats->last_baryf = ctx->cur_n; + } + } else if (!strcmp(field_name, "REPEAT")) { + ctx->extra_cycles += val->num; + ctx->stats->instrs_per_cat[ctx->cur_opc_cat] += val->num; + ctx->last.repeat = val->num; + } else if (!strcmp(field_name, "NOP")) { + ctx->extra_cycles += val->num; + ctx->stats->instrs_per_cat[0] += val->num; + ctx->stats->nops += val->num; + ctx->last.nop = val->num; + } else if (!strcmp(field_name, "SY")) { + ctx->stats->sy += val->num; + } else if (!strcmp(field_name, "SS")) { + ctx->stats->ss += val->num; + ctx->last.ss = !!val->num; + } else if (!strcmp(field_name, "CONST")) { + ctx->reg.num = val->num; + ctx->reg.file = FILE_CONST; + } else if (!strcmp(field_name, "GPR")) { + /* don't count GPR regs r48.x (shared) or higher: */ + if (val->num < 48) { + ctx->reg.num = val->num; + ctx->reg.file = FILE_GPR; + } + } else if (!strcmp(field_name, "SRC_R") || + !strcmp(field_name, "SRC1_R") || + !strcmp(field_name, "SRC2_R") || + !strcmp(field_name, "SRC3_R")) { + ctx->reg.r = val->num; + } else if (!strcmp(field_name, "DST")) { + /* Dest register is always repeated + * + * Note that this doesn't really properly handle instructions + * that write multiple components.. the old disasm didn't handle + * that case either. + */ + ctx->reg.r = true; + } else if (strstr(field_name, "HALF")) { + ctx->reg.half = val->num; + } else if (!strcmp(field_name, "SWIZ")) { + unsigned num = (ctx->reg.num << 2) | val->num; + if (ctx->reg.r) + num += ctx->last.repeat; + + if (ctx->reg.file == FILE_CONST) { + ctx->stats->constlen = MAX2(ctx->stats->constlen, num); + } else if (ctx->reg.file == FILE_GPR) { + if (ctx->reg.half) { + ctx->stats->halfreg = MAX2(ctx->stats->halfreg, num); + } else { + ctx->stats->fullreg = MAX2(ctx->stats->fullreg, num); + } + } + + memset(&ctx->reg, 0, sizeof(ctx->reg)); } } -static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n) +/** + * Handle stat updates dealt with at the end of instruction decoding, + * ie. before beginning of next instruction + */ +static void +disasm_handle_last(struct disasm_ctx *ctx) { - instr_t *instr = (instr_t *)dwords; - opc_t opc = _OPC(instr->opc_cat, instr_opc(instr, ctx->gpu_id)); - unsigned nop = 0; - unsigned cycles = ctx->stats->instructions; - - if (debug & PRINT_RAW) { - fprintf(ctx->out, "%s:%d:%04d:%04d[%08xx_%08xx] ", levels[ctx->level], - instr->opc_cat, n, cycles++, dwords[1], dwords[0]); - } - - if (opc == OPC_BARY_F) - ctx->stats->last_baryf = ctx->stats->instructions; - - ctx->repeat = instr_repeat(instr); - ctx->stats->instructions += 1 + ctx->repeat; - ctx->stats->instlen++; - - /* NOTE: order flags are printed is a bit fugly.. but for now I - * try to match the order in llvm-a3xx disassembler for easy - * diff'ing.. - */ - - if (instr->sync) { - fprintf(ctx->out, "(sy)"); - ctx->stats->sy++; - } - if (instr->ss && ((instr->opc_cat <= 4) || (instr->opc_cat == 7))) { - fprintf(ctx->out, "(ss)"); - ctx->stats->ss++; + if (ctx->last.ss) { ctx->stats->sstall += ctx->sfu_delay; ctx->sfu_delay = 0; } - if (instr->jmp_tgt) - fprintf(ctx->out, "(jp)"); - if ((instr->opc_cat == 0) && instr->cat0.eq) - fprintf(ctx->out, "(eq)"); - if (instr_sat(instr)) - fprintf(ctx->out, "(sat)"); - if (instr->opc_cat == 1 && instr->cat1.ul) - fprintf(ctx->out, "(ul)"); - if (ctx->repeat && opc != OPC_MOVMSK) - fprintf(ctx->out, "(rpt%d)", ctx->repeat); - else if ((instr->opc_cat == 2) && (instr->cat2.src1_r || instr->cat2.src2_r)) - nop = (instr->cat2.src2_r * 2) + instr->cat2.src1_r; - else if ((instr->opc_cat == 3) && (instr->cat3.src1_r || instr->cat3.src2_r)) - nop = (instr->cat3.src2_r * 2) + instr->cat3.src1_r; - if (nop) - fprintf(ctx->out, "(nop%d) ", nop); - if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4))) - fprintf(ctx->out, "(ul)"); - - if (instr->opc_cat == 4) { + if (ctx->cur_opc_cat == 4) { ctx->sfu_delay = 10; } else { - int n = MIN2(ctx->sfu_delay, 1 + ctx->repeat + nop); + int n = MIN2(ctx->sfu_delay, 1 + ctx->last.repeat + ctx->last.nop); ctx->sfu_delay -= n; } - ctx->stats->instructions += nop; - ctx->stats->nops += nop; - if (opc == OPC_NOP) { - ctx->stats->nops += 1 + ctx->repeat; - ctx->stats->instrs_per_cat[0] += 1 + ctx->repeat; - } else { - ctx->stats->instrs_per_cat[instr->opc_cat] += 1 + ctx->repeat; - ctx->stats->instrs_per_cat[0] += nop; - } + memset(&ctx->last, 0, sizeof(ctx->last)); +} - if (opc == OPC_MOV) { - if (instr->cat1.src_type == instr->cat1.dst_type) { - ctx->stats->mov_count += 1 + ctx->repeat; - } else { - ctx->stats->cov_count += 1 + ctx->repeat; +static void +disasm_instr_cb(void *d, unsigned n, uint64_t instr) +{ + struct disasm_ctx *ctx = d; + uint32_t *dwords = (uint32_t *)&instr; + unsigned opc_cat = instr >> 61; + + /* There are some cases where we can get instr_cb called multiple + * times per instruction (like when we need an extra line for branch + * target labels), don't update stats in these cases: + */ + if (n != ctx->cur_n) { + if (n > 0) { + disasm_handle_last(ctx); } - } + ctx->stats->instrs_per_cat[opc_cat]++; + ctx->cur_n = n; - print_single_instr(ctx, instr); - fprintf(ctx->out, "\n"); + /* mov vs cov stats are a bit harder to fish out of the field + * names, because current ir3-cat1.xml doesn't use {NAME} for + * this distinction. So for now just handle this case with + * some hand-coded parsing: + */ + if (opc_cat == 1) { + unsigned opc = (instr >> 57) & 0x3; + unsigned src_type = (instr >> 50) & 0x7; + unsigned dst_type = (instr >> 46) & 0x7; - process_reg_dst(ctx); - - if ((instr->opc_cat <= 4) && (debug & EXPAND_REPEAT)) { - int i; - for (i = 0; i < nop; i++) { - if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, "%s:%d:%04d:%04d[ ] ", - levels[ctx->level], instr->opc_cat, n, cycles++); + if (opc == 0) { + if (src_type == dst_type) { + ctx->stats->mov_count++; + } else { + ctx->stats->cov_count++; + } } - fprintf(ctx->out, "nop\n"); } - for (i = 0; i < ctx->repeat; i++) { - ctx->repeatidx = i + 1; - if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, "%s:%d:%04d:%04d[ ] ", - levels[ctx->level], instr->opc_cat, n, cycles++); - } - print_single_instr(ctx, instr); - fprintf(ctx->out, "\n"); - } - ctx->repeatidx = 0; } - return (instr->opc_cat == 0) && - ((opc == OPC_END) || (opc == OPC_CHSH)); + ctx->cur_opc_cat = opc_cat; + + if (debug & PRINT_RAW) { + fprintf(ctx->out, "%s:%d:%04d:%04d[%08xx_%08xx] ", levels[ctx->level], + opc_cat, n, ctx->extra_cycles + n, dwords[1], dwords[0]); + } } int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id, struct shader_stats *stats) { - struct disasm_ctx ctx; - int i; - int nop_count = 0; - bool has_end = false; + struct isa_decode_options decode_options = { + .gpu_id = gpu_id, + .show_errors = true, + .max_errors = 5, + .branch_labels = true, + .field_cb = disasm_field_cb, + .instr_cb = disasm_instr_cb, + }; + struct disasm_ctx ctx = { + .out = out, + .level = level, + .options = &decode_options, + .stats = stats, + .cur_n = -1, + }; - ir3_assert((sizedwords % 2) == 0); + memset(stats, 0, sizeof(*stats)); - memset(&ctx, 0, sizeof(ctx)); - ctx.out = out; - ctx.level = level; - ctx.gpu_id = gpu_id; - ctx.stats = stats; - if (gpu_id >= 600) { - ctx.regs.used.mergedregs = true; - ctx.regs.rbw.mergedregs = true; - ctx.regs.war.mergedregs = true; - } - memset(ctx.stats, 0, sizeof(*ctx.stats)); + decode_options.cbdata = &ctx; - for (i = 0; i < sizedwords; i += 2) { - has_end |= print_instr(&ctx, &dwords[i], i/2); - if (!has_end) - continue; - if (dwords[i] == 0 && dwords[i + 1] == 0) - nop_count++; - else - nop_count = 0; - if (nop_count > 3) - break; - } + isa_decode(dwords, sizedwords * 4, out, &decode_options); + + disasm_handle_last(&ctx); if (debug & PRINT_STATS) - print_reg_stats(&ctx); + print_stats(&ctx); return 0; }