diff --git a/src/freedreno/.gitlab-ci/reference/crash.log b/src/freedreno/.gitlab-ci/reference/crash.log index 34a06da730e..b1a79865cf3 100644 --- a/src/freedreno/.gitlab-ci/reference/crash.log +++ b/src/freedreno/.gitlab-ci/reference/crash.log @@ -3441,12 +3441,12 @@ shader-blocks: :3:0006:0014[7f64a39ax_609f35bcx] (sy)(jp)(rpt3)(ul)sad.s16 hr38.z, hc367.x, (neg)hc50.y, (r)hr39.w :7:0007:0018[f352cfcbx_ecad502bx] (sy)unknown(7,6).g :4:0008:0019[818209d0x_74021646x] (rpt1)unknown(4,12) hr52.x, (r)hc401.z - :6:0009:0021[c90972c0x_8e905e80x] (jp)stl.s16 l[hr48.x], hr976.x, 142 + :6:0009:0021[c90972c0x_8e905e80x] (jp)stl.s16 l[hr48.x], hr16.x, 142 :5:0010:0022[a4827242x_46248300x] gather4b.a (s8)(y)hr16.z, hr32.x, s#1, t#35 :4:0011:0023[82342205x_cd064d21x] (rpt2)(ul)unknown(4,17) r1.y, (neg)c :5:0012:0026[a923bf8bx_81f95908x] (jp)samb.3d.a.p (u32)(xyzw)r34.w, hr33.x, hr43.x, s#15, t#64 - :1:0013:0027[3dda8123x_a0d91ccdx] (sy)(jp)(rpt1)cov.u8u16 (even)(pos_infinity)hr, 0xa0d91ccd -../src/freedreno/decode/instr-a3xx.h:979: is_cat6_legacy: Assertion `instr->cat6.opc == 0' failed. + +../src/freedreno/ir3/disasm-a3xx.c:173: regmask_set: Assertion `num < MAX_REG' failed. ----------------------------------------------- 8192 (0x2000) bytes 000000: a018c54a 0600e824 11fa09c3 edc6145b |J...$.......[...| @@ -4124,17 +4124,8 @@ shader-blocks: size: 2048 :2:0000:0000[40846422x_d81251c5x] (sat)(ul)sign.f r8.z, (neg)hc113.y :4:0001:0001[938a16e2x_520c369ax] (sy)(ss)(sat)(rpt2)unknown(4,28) hr56.z, -358 - :1:0002:0004[200a00c1x_094864d2x] cov.u16f16 hr, hr308.z - :2:0003:0005[44109084x_4a201507x] (ss)unknown(2,32) (ei)r33.x, c321.w, (neg)r - :4:0004:0006[882fadabx_14a391b1x] (jp)(sat)(rpt1)(ul)rsq hr42.w, (abs)(r)hc108.y - :3:0005:0008[6060f068x_7106601ax] (ss)(ul)mad.u16 r26.x, (neg)hr6.z, (neg)hc48.y, (r)hc65.z - :3:0006:0009[60ed4212x_02900201x] (rpt2)madsh.u16 hr4.z, r128.y, r54.z, r164.x - :0:0007:0012[005b6589x_8a054280x] (eq)(rpt5)bkt #17024 - :3:0008:0018[7cebfff7x_dbae7deex] (sy)(ss)(jp)(sat)(rpt3)(ul)sel.b32 a3.x, (neg)(r)c891.z, (neg)c53.w, (neg)c747.z - :5:0009:0022[aff86b27x_fd7472ffx] (jp)unknown(5,31).o.p.base4 (u8)(xyw)hr9.w, r14.y, a1.x - :0:0010:0023[0ed959d7x_6d7a21a4x] (ss)(jp)(eq)(rpt1)unknown(0,13) - :2:0011:0025[445a8ebex_8d6e703bx] (sat)(rpt2)cmpv.s.gt (ei)r47.z, (neg)(r)59, (abs)(r)c -../src/freedreno/decode/instr-a3xx.h:979: is_cat6_legacy: Assertion `instr->cat6.opc == 0' failed. + :1:0002:0004[200a00c1x_094864d2x] cov.u16f16 hr, +../src/freedreno/ir3/disasm-a3xx.c:185: regmask_get: Assertion `num < MAX_REG' failed. ----------------------------------------------- 8192 (0x2000) bytes 000000: d81251c5 40846422 520c369a 938a16e2 |.Q.."d.@.6.R....| @@ -4653,1040 +4644,8 @@ shader-blocks: size: 2048 :0:0000:0000[00000000x_00003002x] nop :0:0001:0001[00000000x_00000000x] nop - :6:0002:0002[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0003:0003[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0004:0004[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0005:0005[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0006:0006[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0007:0007[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0008:0008[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0009:0009[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0010:0010[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0011:0011[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0012:0012[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0013:0013[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0014:0014[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0015:0015[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0016:0016[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0017:0017[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0018:0018[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0019:0019[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0020:0020[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0021:0021[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0022:0022[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0023:0023[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0024:0024[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0025:0025[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0026:0026[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0027:0027[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0028:0028[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0029:0029[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0030:0030[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0031:0031[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0032:0032[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0033:0033[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0034:0034[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0035:0035[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0036:0036[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0037:0037[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0038:0038[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0039:0039[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0040:0040[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0041:0041[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0042:0042[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0043:0043[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0044:0044[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0045:0045[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0046:0046[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0047:0047[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0048:0048[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0049:0049[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0050:0050[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0051:0051[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0052:0052[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0053:0053[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0054:0054[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0055:0055[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0056:0056[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0057:0057[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0058:0058[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0059:0059[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0060:0060[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0061:0061[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0062:0062[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0063:0063[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0064:0064[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0065:0065[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0066:0066[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0067:0067[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0068:0068[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0069:0069[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0070:0070[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0071:0071[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0072:0072[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0073:0073[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0074:0074[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0075:0075[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0076:0076[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0077:0077[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0078:0078[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0079:0079[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0080:0080[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0081:0081[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0082:0082[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0083:0083[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0084:0084[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0085:0085[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0086:0086[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0087:0087[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0088:0088[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0089:0089[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0090:0090[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0091:0091[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0092:0092[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0093:0093[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0094:0094[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0095:0095[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0096:0096[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0097:0097[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0098:0098[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0099:0099[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0100:0100[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0101:0101[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0102:0102[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0103:0103[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0104:0104[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0105:0105[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0106:0106[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0107:0107[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0108:0108[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0109:0109[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0110:0110[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0111:0111[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0112:0112[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0113:0113[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0114:0114[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0115:0115[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0116:0116[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0117:0117[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0118:0118[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0119:0119[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0120:0120[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0121:0121[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0122:0122[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0123:0123[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0124:0124[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0125:0125[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0126:0126[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0127:0127[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0128:0128[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0129:0129[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0130:0130[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0131:0131[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0132:0132[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0133:0133[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0134:0134[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0135:0135[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0136:0136[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0137:0137[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0138:0138[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0139:0139[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0140:0140[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0141:0141[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0142:0142[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0143:0143[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0144:0144[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0145:0145[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0146:0146[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0147:0147[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0148:0148[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0149:0149[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0150:0150[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0151:0151[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0152:0152[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0153:0153[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0154:0154[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0155:0155[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0156:0156[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0157:0157[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0158:0158[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0159:0159[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0160:0160[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0161:0161[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0162:0162[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0163:0163[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0164:0164[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0165:0165[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0166:0166[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0167:0167[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0168:0168[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0169:0169[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0170:0170[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0171:0171[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0172:0172[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0173:0173[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0174:0174[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0175:0175[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0176:0176[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0177:0177[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0178:0178[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0179:0179[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0180:0180[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0181:0181[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0182:0182[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0183:0183[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0184:0184[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0185:0185[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0186:0186[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0187:0187[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0188:0188[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0189:0189[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0190:0190[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0191:0191[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0192:0192[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0193:0193[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0194:0194[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0195:0195[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0196:0196[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0197:0197[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0198:0198[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0199:0199[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0200:0200[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0201:0201[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0202:0202[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0203:0203[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0204:0204[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0205:0205[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0206:0206[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0207:0207[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0208:0208[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0209:0209[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0210:0210[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0211:0211[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0212:0212[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0213:0213[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0214:0214[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0215:0215[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0216:0216[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0217:0217[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0218:0218[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0219:0219[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0220:0220[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0221:0221[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0222:0222[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0223:0223[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0224:0224[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0225:0225[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0226:0226[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0227:0227[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0228:0228[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0229:0229[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0230:0230[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0231:0231[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0232:0232[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0233:0233[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0234:0234[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0235:0235[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0236:0236[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0237:0237[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0238:0238[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0239:0239[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0240:0240[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0241:0241[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0242:0242[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0243:0243[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0244:0244[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0245:0245[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0246:0246[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0247:0247[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0248:0248[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0249:0249[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0250:0250[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0251:0251[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0252:0252[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0253:0253[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0254:0254[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0255:0255[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222 - :6:0256:0256[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0257:0257[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0258:0258[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0259:0259[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0260:0260[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0261:0261[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0262:0262[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0263:0263[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0264:0264[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0265:0265[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0266:0266[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0267:0267[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0268:0268[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0269:0269[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0270:0270[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0271:0271[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0272:0272[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0273:0273[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0274:0274[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0275:0275[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0276:0276[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0277:0277[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0278:0278[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0279:0279[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0280:0280[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0281:0281[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0282:0282[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0283:0283[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0284:0284[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0285:0285[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0286:0286[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0287:0287[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0288:0288[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0289:0289[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0290:0290[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0291:0291[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0292:0292[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0293:0293[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0294:0294[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0295:0295[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0296:0296[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0297:0297[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0298:0298[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0299:0299[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0300:0300[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0301:0301[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0302:0302[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0303:0303[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0304:0304[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0305:0305[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0306:0306[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0307:0307[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0308:0308[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0309:0309[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0310:0310[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0311:0311[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0312:0312[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0313:0313[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0314:0314[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0315:0315[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0316:0316[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0317:0317[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0318:0318[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0319:0319[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0320:0320[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0321:0321[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0322:0322[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0323:0323[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0324:0324[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0325:0325[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0326:0326[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0327:0327[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0328:0328[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0329:0329[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0330:0330[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0331:0331[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0332:0332[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0333:0333[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0334:0334[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0335:0335[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0336:0336[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0337:0337[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0338:0338[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0339:0339[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0340:0340[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0341:0341[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0342:0342[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0343:0343[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0344:0344[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0345:0345[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0346:0346[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0347:0347[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0348:0348[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0349:0349[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0350:0350[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0351:0351[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0352:0352[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0353:0353[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0354:0354[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0355:0355[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0356:0356[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0357:0357[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0358:0358[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0359:0359[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0360:0360[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0361:0361[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0362:0362[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0363:0363[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0364:0364[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0365:0365[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0366:0366[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0367:0367[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0368:0368[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0369:0369[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0370:0370[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0371:0371[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0372:0372[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0373:0373[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0374:0374[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0375:0375[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0376:0376[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0377:0377[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0378:0378[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0379:0379[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0380:0380[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0381:0381[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0382:0382[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0383:0383[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0384:0384[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0385:0385[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0386:0386[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0387:0387[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0388:0388[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0389:0389[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0390:0390[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0391:0391[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0392:0392[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0393:0393[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0394:0394[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0395:0395[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0396:0396[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0397:0397[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0398:0398[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0399:0399[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0400:0400[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0401:0401[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0402:0402[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0403:0403[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0404:0404[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0405:0405[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0406:0406[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0407:0407[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0408:0408[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0409:0409[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0410:0410[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0411:0411[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0412:0412[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0413:0413[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0414:0414[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0415:0415[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0416:0416[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0417:0417[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0418:0418[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0419:0419[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0420:0420[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0421:0421[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0422:0422[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0423:0423[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0424:0424[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0425:0425[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0426:0426[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0427:0427[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0428:0428[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0429:0429[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0430:0430[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0431:0431[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0432:0432[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0433:0433[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0434:0434[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0435:0435[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0436:0436[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0437:0437[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0438:0438[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0439:0439[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0440:0440[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0441:0441[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0442:0442[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0443:0443[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0444:0444[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0445:0445[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0446:0446[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0447:0447[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0448:0448[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0449:0449[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0450:0450[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0451:0451[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0452:0452[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0453:0453[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0454:0454[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0455:0455[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0456:0456[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0457:0457[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0458:0458[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0459:0459[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0460:0460[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0461:0461[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0462:0462[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0463:0463[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0464:0464[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0465:0465[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0466:0466[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0467:0467[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0468:0468[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0469:0469[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0470:0470[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0471:0471[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0472:0472[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0473:0473[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0474:0474[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0475:0475[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0476:0476[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0477:0477[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0478:0478[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0479:0479[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0480:0480[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0481:0481[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0482:0482[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0483:0483[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0484:0484[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0485:0485[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0486:0486[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0487:0487[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0488:0488[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0489:0489[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0490:0490[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0491:0491[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0492:0492[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0493:0493[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0494:0494[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0495:0495[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0496:0496[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0497:0497[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0498:0498[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0499:0499[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0500:0500[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0501:0501[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0502:0502[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0503:0503[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0504:0504[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0505:0505[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0506:0506[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0507:0507[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0508:0508[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0509:0509[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0510:0510[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :6:0511:0511[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222 - :0:0512:0512[00500240x_00024000x] (rpt2)nop - :0:0513:0515[00402020x_00000000x] nop - :0:0514:0516[00000040x_00001000x] nop - :0:0515:0517[00510401x_00024020x] (eq)(rpt4)nop - :0:0516:0522[00100080x_00000008x] nop - :0:0517:0523[00000044x_00002080x] nop - :0:0518:0524[00001000x_00000000x] (ss)nop - :0:0519:0525[00200000x_00000008x] nop - :0:0520:0526[00000044x_00048110x] nop - :0:0521:0527[00000040x_00508000x] nop - :0:0522:0528[00010200x_00020044x] (eq)(rpt2)nop - :0:0523:0531[00000000x_00201014x] nop - :0:0524:0532[00012100x_00101100x] (eq)(rpt1)nop - :0:0525:0534[00000012x_00005000x] nop - :0:0526:0535[00000010x_00005000x] nop - :0:0527:0536[00040000x_00000020x] nop - :0:0528:0537[00002101x_00082514x] (rpt1)nop - :0:0529:0539[00000000x_00210020x] nop - :0:0530:0540[00440004x_00010002x] nop - :0:0531:0541[00000002x_00000250x] nop - :0:0532:0542[00000040x_00100000x] nop - :0:0533:0543[00000000x_00020014x] nop - :0:0534:0544[000400a0x_00050020x] nop - :0:0535:0545[00100000x_00000000x] nop - :0:0536:0546[00000000x_00044081x] nop - :0:0537:0547[00000000x_00000000x] nop - :0:0538:0548[00200048x_00000100x] nop - :0:0539:0549[00080020x_00000000x] nop - :0:0540:0550[00200002x_00200001x] nop - :0:0541:0551[002000a4x_00000404x] nop - :0:0542:0552[00440246x_00000004x] (rpt2)nop - :0:0543:0555[0008c040x_00442000x] nop - :0:0544:0556[002112a0x_00200000x] (ss)(eq)(rpt2)nop - :0:0545:0559[00000000x_00000000x] nop - :0:0546:0560[00000240x_00400001x] (rpt2)nop - :0:0547:0563[00000000x_00040400x] nop - :0:0548:0564[0000a100x_00104010x] (rpt1)nop - :0:0549:0566[00008480x_00002001x] (rpt4)nop - :0:0550:0571[00000001x_00000040x] nop - :0:0551:0572[00040001x_00040400x] nop - :0:0552:0573[00200000x_00040600x] nop - :0:0553:0574[00000100x_00100000x] (rpt1)nop - :0:0554:0576[00504180x_0020a200x] (rpt1)nop - :0:0555:0578[00000000x_00000000x] nop - :0:0556:0579[00000024x_00004000x] nop - :0:0557:0580[00200000x_00100008x] nop - :0:0558:0581[00010080x_00000000x] (eq)nop - :0:0559:0582[00080000x_00000000x] nop - :0:0560:0583[00084000x_00500400x] nop - :0:0561:0584[00004000x_00008000x] nop - :0:0562:0585[00200000x_00000300x] nop - :0:0563:0586[00000042x_00020001x] nop - :0:0564:0587[00005600x_00400088x] (ss)(rpt6)nop - :0:0565:0594[00000002x_00000000x] nop - :0:0566:0595[0002005ex_00400008x] bkt #8 - :0:0567:0596[00020020x_00200000x] bkt #0 - :0:0568:0597[001e0414x_00055480x] (rpt4)bkt #21632 - :0:0569:0602[00000000x_00000000x] nop - :0:0570:0603[00000442x_00000480x] (rpt4)nop - :0:0571:0608[00000200x_00080000x] (rpt2)nop - :0:0572:0611[00520000x_00600400x] bkt #1024 - :0:0573:0612[00001200x_00000008x] (ss)(rpt2)nop - :0:0574:0615[00400114x_00201000x] (rpt1)nop - :0:0575:0617[00110100x_00100002x] (eq)(rpt1)nop - :0:0576:0619[00404200x_00200683x] (rpt2)nop - :0:0577:0622[00000090x_00000004x] nop - :0:0578:0623[00502000x_00002000x] nop - :0:0579:0624[00000004x_00000020x] nop - :0:0580:0625[00103100x_00600010x] (ss)(rpt1)nop - :0:0581:0627[00000002x_00000010x] nop - :0:0582:0628[00004000x_00021200x] nop - :0:0583:0629[00000000x_00000000x] nop - :0:0584:0630[00201400x_0010220ax] (ss)(rpt4)nop - :0:0585:0635[00000000x_00030000x] nop - :0:0586:0636[00080040x_00400000x] nop - :0:0587:0637[00000080x_00000002x] nop - :0:0588:0638[00000580x_00000400x] (rpt5)nop - :0:0589:0644[00000200x_00000022x] (rpt2)nop - :0:0590:0647[00080000x_00300042x] nop - :0:0591:0648[00008000x_00040200x] nop - :0:0592:0649[00000000x_00040000x] nop - :0:0593:0650[0012008ax_00000010x] bkt #16 - :0:0594:0651[00000100x_00000000x] (rpt1)nop - :0:0595:0653[00010000x_00010018x] (eq)nop - :0:0596:0654[00500011x_00440020x] nop - :0:0597:0655[00100000x_00000000x] nop - :0:0598:0656[00008200x_0004020cx] (rpt2)nop - :0:0599:0659[00000400x_00100010x] (rpt4)nop - :0:0600:0664[00000004x_00118000x] nop - :0:0601:0665[00000002x_00004200x] nop - :0:0602:0666[00026300x_00000210x] (rpt3)bkt #528 - :0:0603:0670[0000a002x_00000040x] nop - :0:0604:0671[00081100x_00004082x] (ss)(rpt1)nop - :0:0605:0673[00000008x_00210000x] nop - :0:0606:0674[00020004x_00020000x] bkt #0 - :0:0607:0675[00020000x_00064108x] bkt #16648 - :0:0608:0676[00000084x_00020000x] nop - :0:0609:0677[00000181x_00000430x] (rpt1)nop - :0:0610:0679[001c8100x_00100002x] (rpt1)nop - :0:0611:0681[00000000x_00200020x] nop - :0:0612:0682[00100081x_00002000x] nop - :0:0613:0683[00000000x_00000008x] nop - :0:0614:0684[00009420x_00000024x] (ss)(rpt4)nop - :0:0615:0689[00000100x_00002010x] (rpt1)nop - :0:0616:0691[00004188x_00000000x] (rpt1)nop - :0:0617:0693[00100000x_00002000x] nop - :0:0618:0694[00120102x_00040000x] (rpt1)bkt #0 - :0:0619:0696[00040002x_00000000x] nop - :0:0620:0697[00224200x_00210201x] (rpt2)bkt #513 - :0:0621:0700[00000200x_00040000x] (rpt2)nop - :0:0622:0703[0000000cx_00000000x] nop - :0:0623:0704[00000000x_00005000x] nop - :0:0624:0705[00082208x_00010200x] (rpt2)nop - :0:0625:0708[00194011x_00000000x] (eq)nop - :0:0626:0709[00012100x_00000502x] (eq)(rpt1)nop - :0:0627:0711[00000240x_00040050x] (rpt2)nop - :0:0628:0714[00080211x_00004180x] (rpt2)nop - :0:0629:0717[00000000x_00001008x] nop - :0:0630:0718[00020490x_002004a0x] (rpt4)bkt #1184 - :0:0631:0723[00210004x_00001080x] (eq)nop - :0:0632:0724[00000000x_00300040x] nop - :0:0633:0725[00008002x_00000020x] nop - :0:0634:0726[00000000x_00041098x] nop - :0:0635:0727[002000a0x_00000000x] nop - :0:0636:0728[00000000x_000c0400x] nop - :0:0637:0729[00000401x_00000402x] (rpt4)nop - :0:0638:0734[00002000x_00200400x] nop - :0:0639:0735[00000101x_00001000x] (rpt1)nop - :0:0640:0737[00500240x_00024000x] (rpt2)nop - :0:0641:0740[00402020x_00000000x] nop - :0:0642:0741[00000040x_00001000x] nop - :0:0643:0742[00510401x_00024020x] (eq)(rpt4)nop - :0:0644:0747[00100080x_00000008x] nop - :0:0645:0748[00000044x_00002080x] nop - :0:0646:0749[00001000x_00000000x] (ss)nop - :0:0647:0750[00200000x_00000008x] nop - :0:0648:0751[00000044x_00048110x] nop - :0:0649:0752[00000040x_00508000x] nop - :0:0650:0753[00010200x_00020044x] (eq)(rpt2)nop - :0:0651:0756[00000000x_00201014x] nop - :0:0652:0757[00012100x_00101100x] (eq)(rpt1)nop - :0:0653:0759[00000012x_00005000x] nop - :0:0654:0760[00000010x_00005000x] nop - :0:0655:0761[00040000x_00000020x] nop - :0:0656:0762[00002101x_00082514x] (rpt1)nop - :0:0657:0764[00000000x_00210020x] nop - :0:0658:0765[00440004x_00010002x] nop - :0:0659:0766[00000002x_00000250x] nop - :0:0660:0767[00000040x_00100000x] nop - :0:0661:0768[00000000x_00020014x] nop - :0:0662:0769[000400a0x_00050020x] nop - :0:0663:0770[00100000x_00000000x] nop - :0:0664:0771[00000000x_00044081x] nop - :0:0665:0772[00000000x_00000000x] nop - :0:0666:0773[00200048x_00000100x] nop - :0:0667:0774[00080020x_00000000x] nop - :0:0668:0775[00200002x_00200001x] nop - :0:0669:0776[002000a4x_00000404x] nop - :0:0670:0777[00440246x_00000004x] (rpt2)nop - :0:0671:0780[0008c040x_00442000x] nop - :0:0672:0781[002112a0x_00200000x] (ss)(eq)(rpt2)nop - :0:0673:0784[00000000x_00000000x] nop - :0:0674:0785[00000240x_00400001x] (rpt2)nop - :0:0675:0788[00000000x_00040400x] nop - :0:0676:0789[0000a100x_00104010x] (rpt1)nop - :0:0677:0791[00008480x_00002001x] (rpt4)nop - :0:0678:0796[00000001x_00000040x] nop - :0:0679:0797[00040001x_00040400x] nop - :0:0680:0798[00200000x_00040600x] nop - :0:0681:0799[00000100x_00100000x] (rpt1)nop - :0:0682:0801[00504180x_0020a200x] (rpt1)nop - :0:0683:0803[00000000x_00000000x] nop - :0:0684:0804[00000024x_00004000x] nop - :0:0685:0805[00200000x_00100008x] nop - :0:0686:0806[00010080x_00000000x] (eq)nop - :0:0687:0807[00080000x_00000000x] nop - :0:0688:0808[00084000x_00500400x] nop - :0:0689:0809[00004000x_00008000x] nop - :0:0690:0810[00200000x_00000300x] nop - :0:0691:0811[00000042x_00020001x] nop - :0:0692:0812[00005600x_00400088x] (ss)(rpt6)nop - :0:0693:0819[00000002x_00000000x] nop - :0:0694:0820[0002005ex_00400008x] bkt #8 - :0:0695:0821[00020020x_00200000x] bkt #0 - :0:0696:0822[001e0414x_00055480x] (rpt4)bkt #21632 - :0:0697:0827[00000000x_00000000x] nop - :0:0698:0828[00000442x_00000480x] (rpt4)nop - :0:0699:0833[00000200x_00080000x] (rpt2)nop - :0:0700:0836[00520000x_00600400x] bkt #1024 - :0:0701:0837[00001200x_00000008x] (ss)(rpt2)nop - :0:0702:0840[00400114x_00201000x] (rpt1)nop - :0:0703:0842[00110100x_00100002x] (eq)(rpt1)nop - :0:0704:0844[00404200x_00200683x] (rpt2)nop - :0:0705:0847[00000090x_00000004x] nop - :0:0706:0848[00502000x_00002000x] nop - :0:0707:0849[00000004x_00000020x] nop - :0:0708:0850[00103100x_00600010x] (ss)(rpt1)nop - :0:0709:0852[00000002x_00000010x] nop - :0:0710:0853[00004000x_00021200x] nop - :0:0711:0854[00000000x_00000000x] nop - :0:0712:0855[00201400x_0010220ax] (ss)(rpt4)nop - :0:0713:0860[00000000x_00030000x] nop - :0:0714:0861[00080040x_00400000x] nop - :0:0715:0862[00000080x_00000002x] nop - :0:0716:0863[00000580x_00000400x] (rpt5)nop - :0:0717:0869[00000200x_00000022x] (rpt2)nop - :0:0718:0872[00080000x_00300042x] nop - :0:0719:0873[00008000x_00040200x] nop - :0:0720:0874[00000000x_00040000x] nop - :0:0721:0875[0012008ax_00000010x] bkt #16 - :0:0722:0876[00000100x_00000000x] (rpt1)nop - :0:0723:0878[00010000x_00010018x] (eq)nop - :0:0724:0879[00500011x_00440020x] nop - :0:0725:0880[00100000x_00000000x] nop - :0:0726:0881[00008200x_0004020cx] (rpt2)nop - :0:0727:0884[00000400x_00100010x] (rpt4)nop - :0:0728:0889[00000004x_00118000x] nop - :0:0729:0890[00000002x_00004200x] nop - :0:0730:0891[00026300x_00000210x] (rpt3)bkt #528 - :0:0731:0895[0000a002x_00000040x] nop - :0:0732:0896[00081100x_00004082x] (ss)(rpt1)nop - :0:0733:0898[00000008x_00210000x] nop - :0:0734:0899[00020004x_00020000x] bkt #0 - :0:0735:0900[00020000x_00064108x] bkt #16648 - :0:0736:0901[00000084x_00020000x] nop - :0:0737:0902[00000181x_00000430x] (rpt1)nop - :0:0738:0904[001c8100x_00100002x] (rpt1)nop - :0:0739:0906[00000000x_00200020x] nop - :0:0740:0907[00100081x_00002000x] nop - :0:0741:0908[00000000x_00000008x] nop - :0:0742:0909[00009420x_00000024x] (ss)(rpt4)nop - :0:0743:0914[00000100x_00002010x] (rpt1)nop - :0:0744:0916[00004188x_00000000x] (rpt1)nop - :0:0745:0918[00100000x_00002000x] nop - :0:0746:0919[00120102x_00040000x] (rpt1)bkt #0 - :0:0747:0921[00040002x_00000000x] nop - :0:0748:0922[00224200x_00210201x] (rpt2)bkt #513 - :0:0749:0925[00000200x_00040000x] (rpt2)nop - :0:0750:0928[0000000cx_00000000x] nop - :0:0751:0929[00000000x_00005000x] nop - :0:0752:0930[00082208x_00010200x] (rpt2)nop - :0:0753:0933[00194011x_00000000x] (eq)nop - :0:0754:0934[00012100x_00000502x] (eq)(rpt1)nop - :0:0755:0936[00000240x_00040050x] (rpt2)nop - :0:0756:0939[00080211x_00004180x] (rpt2)nop - :0:0757:0942[00000000x_00001008x] nop - :0:0758:0943[00020490x_002004a0x] (rpt4)bkt #1184 - :0:0759:0948[00210004x_00001080x] (eq)nop - :0:0760:0949[00000000x_00300040x] nop - :0:0761:0950[00008002x_00000020x] nop - :0:0762:0951[00000000x_00041098x] nop - :0:0763:0952[002000a0x_00000000x] nop - :0:0764:0953[00000000x_000c0400x] nop - :0:0765:0954[00000401x_00000402x] (rpt4)nop - :0:0766:0959[00002000x_00200400x] nop - :0:0767:0960[00000101x_00001000x] (rpt1)nop - :0:0768:0962[00000000x_00000000x] nop - :0:0769:0963[00000000x_00000000x] nop - :0:0770:0964[00000000x_00000000x] nop - :0:0771:0965[00000000x_00000000x] nop - :0:0772:0966[00000000x_00000000x] nop - :0:0773:0967[00000000x_00000000x] nop - :0:0774:0968[00000000x_00000000x] nop - :0:0775:0969[00000000x_00000000x] nop - :0:0776:0970[00000000x_00000000x] nop - :0:0777:0971[00000000x_00000000x] nop - :0:0778:0972[00000000x_00000000x] nop - :0:0779:0973[00000000x_00000000x] nop - :0:0780:0974[00000000x_00000000x] nop - :0:0781:0975[00000000x_00000000x] nop - :0:0782:0976[00000000x_00000000x] nop - :0:0783:0977[00000000x_00000000x] nop - :0:0784:0978[00000000x_00000000x] nop - :0:0785:0979[00000000x_00000000x] nop - :0:0786:0980[00000000x_00000000x] nop - :0:0787:0981[00000000x_00000000x] nop - :0:0788:0982[00000000x_00000000x] nop - :0:0789:0983[00000000x_00000000x] nop - :0:0790:0984[00000000x_00000000x] nop - :0:0791:0985[00000000x_00000000x] nop - :0:0792:0986[00000000x_00000000x] nop - :0:0793:0987[00000000x_00000000x] nop - :0:0794:0988[00000000x_00000000x] nop - :0:0795:0989[00000000x_00000000x] nop - :0:0796:0990[00000000x_00000000x] nop - :0:0797:0991[00000000x_00000000x] nop - :0:0798:0992[00000000x_00000000x] nop - :0:0799:0993[00000000x_00000000x] nop - :0:0800:0994[00000000x_00000000x] nop - :0:0801:0995[00000000x_00000000x] nop - :0:0802:0996[00000000x_00000000x] nop - :0:0803:0997[00000000x_00000000x] nop - :0:0804:0998[00000000x_00000000x] nop - :0:0805:0999[00000000x_00000000x] nop - :0:0806:1000[00000000x_00000000x] nop - :0:0807:1001[00000000x_00000000x] nop - :0:0808:1002[00000000x_00000000x] nop - :0:0809:1003[00000000x_00000000x] nop - :0:0810:1004[00000000x_00000000x] nop - :0:0811:1005[00000000x_00000000x] nop - :0:0812:1006[00000000x_00000000x] nop - :0:0813:1007[00000000x_00000000x] nop - :0:0814:1008[00000000x_00000000x] nop - :0:0815:1009[00000000x_00000000x] nop - :0:0816:1010[00000000x_00000000x] nop - :0:0817:1011[00000000x_00000000x] nop - :0:0818:1012[00000000x_00000000x] nop - :0:0819:1013[00000000x_00000000x] nop - :0:0820:1014[00000000x_00000000x] nop - :0:0821:1015[00000000x_00000000x] nop - :0:0822:1016[00000000x_00000000x] nop - :0:0823:1017[00000000x_00000000x] nop - :0:0824:1018[00000000x_00000000x] nop - :0:0825:1019[00000000x_00000000x] nop - :0:0826:1020[00000000x_00000000x] nop - :0:0827:1021[00000000x_00000000x] nop - :0:0828:1022[00000000x_00000000x] nop - :0:0829:1023[00000000x_00000000x] nop - :0:0830:1024[00000000x_00000000x] nop - :0:0831:1025[00000000x_00000000x] nop - :0:0832:1026[00000000x_00000000x] nop - :0:0833:1027[00000000x_00000000x] nop - :0:0834:1028[00000000x_00000000x] nop - :0:0835:1029[00000000x_00000000x] nop - :0:0836:1030[00000000x_00000000x] nop - :0:0837:1031[00000000x_00000000x] nop - :0:0838:1032[00000000x_00000000x] nop - :0:0839:1033[00000000x_00000000x] nop - :0:0840:1034[00000000x_00000000x] nop - :0:0841:1035[00000000x_00000000x] nop - :0:0842:1036[00000000x_00000000x] nop - :0:0843:1037[00000000x_00000000x] nop - :0:0844:1038[00000000x_00000000x] nop - :0:0845:1039[00000000x_00000000x] nop - :0:0846:1040[00000000x_00000000x] nop - :0:0847:1041[00000000x_00000000x] nop - :0:0848:1042[00000000x_00000000x] nop - :0:0849:1043[00000000x_00000000x] nop - :0:0850:1044[00000000x_00000000x] nop - :0:0851:1045[00000000x_00000000x] nop - :0:0852:1046[00000000x_00000000x] nop - :0:0853:1047[00000000x_00000000x] nop - :0:0854:1048[00000000x_00000000x] nop - :0:0855:1049[00000000x_00000000x] nop - :0:0856:1050[00000000x_00000000x] nop - :0:0857:1051[00000000x_00000000x] nop - :0:0858:1052[00000000x_00000000x] nop - :0:0859:1053[00000000x_00000000x] nop - :0:0860:1054[00000000x_00000000x] nop - :0:0861:1055[00000000x_00000000x] nop - :0:0862:1056[00000000x_00000000x] nop - :0:0863:1057[00000000x_00000000x] nop - :0:0864:1058[00000000x_00000000x] nop - :0:0865:1059[00000000x_00000000x] nop - :0:0866:1060[00000000x_00000000x] nop - :0:0867:1061[00000000x_00000000x] nop - :0:0868:1062[00000000x_00000000x] nop - :0:0869:1063[00000000x_00000000x] nop - :0:0870:1064[00000000x_00000000x] nop - :0:0871:1065[00000000x_00000000x] nop - :0:0872:1066[00000000x_00000000x] nop - :0:0873:1067[00000000x_00000000x] nop - :0:0874:1068[00000000x_00000000x] nop - :0:0875:1069[00000000x_00000000x] nop - :0:0876:1070[00000000x_00000000x] nop - :0:0877:1071[00000000x_00000000x] nop - :0:0878:1072[00000000x_00000000x] nop - :0:0879:1073[00000000x_00000000x] nop - :0:0880:1074[00000000x_00000000x] nop - :0:0881:1075[00000000x_00000000x] nop - :0:0882:1076[00000000x_00000000x] nop - :0:0883:1077[00000000x_00000000x] nop - :0:0884:1078[00000000x_00000000x] nop - :0:0885:1079[00000000x_00000000x] nop - :0:0886:1080[00000000x_00000000x] nop - :0:0887:1081[00000000x_00000000x] nop - :0:0888:1082[00000000x_00000000x] nop - :0:0889:1083[00000000x_00000000x] nop - :0:0890:1084[00000000x_00000000x] nop - :0:0891:1085[00000000x_00000000x] nop - :0:0892:1086[00000000x_00000000x] nop - :0:0893:1087[00000000x_00000000x] nop - :0:0894:1088[00000000x_00000000x] nop - :0:0895:1089[00000000x_00000000x] nop - :0:0896:1090[00000000x_00000000x] nop - :0:0897:1091[00000000x_00000000x] nop - :0:0898:1092[00000000x_00000000x] nop - :0:0899:1093[00000000x_00000000x] nop - :0:0900:1094[00000000x_00000000x] nop - :0:0901:1095[00000000x_00000000x] nop - :0:0902:1096[00000000x_00000000x] nop - :0:0903:1097[00000000x_00000000x] nop - :0:0904:1098[00000000x_00000000x] nop - :0:0905:1099[00000000x_00000000x] nop - :0:0906:1100[00000000x_00000000x] nop - :0:0907:1101[00000000x_00000000x] nop - :0:0908:1102[00000000x_00000000x] nop - :0:0909:1103[00000000x_00000000x] nop - :0:0910:1104[00000000x_00000000x] nop - :0:0911:1105[00000000x_00000000x] nop - :0:0912:1106[00000000x_00000000x] nop - :0:0913:1107[00000000x_00000000x] nop - :0:0914:1108[00000000x_00000000x] nop - :0:0915:1109[00000000x_00000000x] nop - :0:0916:1110[00000000x_00000000x] nop - :0:0917:1111[00000000x_00000000x] nop - :0:0918:1112[00000000x_00000000x] nop - :0:0919:1113[00000000x_00000000x] nop - :0:0920:1114[00000000x_00000000x] nop - :0:0921:1115[00000000x_00000000x] nop - :0:0922:1116[00000000x_00000000x] nop - :0:0923:1117[00000000x_00000000x] nop - :0:0924:1118[00000000x_00000000x] nop - :0:0925:1119[00000000x_00000000x] nop - :0:0926:1120[00000000x_00000000x] nop - :0:0927:1121[00000000x_00000000x] nop - :0:0928:1122[00000000x_00000000x] nop - :0:0929:1123[00000000x_00000000x] nop - :0:0930:1124[00000000x_00000000x] nop - :0:0931:1125[00000000x_00000000x] nop - :0:0932:1126[00000000x_00000000x] nop - :0:0933:1127[00000000x_00000000x] nop - :0:0934:1128[00000000x_00000000x] nop - :0:0935:1129[00000000x_00000000x] nop - :0:0936:1130[00000000x_00000000x] nop - :0:0937:1131[00000000x_00000000x] nop - :0:0938:1132[00000000x_00000000x] nop - :0:0939:1133[00000000x_00000000x] nop - :0:0940:1134[00000000x_00000000x] nop - :0:0941:1135[00000000x_00000000x] nop - :0:0942:1136[00000000x_00000000x] nop - :0:0943:1137[00000000x_00000000x] nop - :0:0944:1138[00000000x_00000000x] nop - :0:0945:1139[00000000x_00000000x] nop - :0:0946:1140[00000000x_00000000x] nop - :0:0947:1141[00000000x_00000000x] nop - :0:0948:1142[00000000x_00000000x] nop - :0:0949:1143[00000000x_00000000x] nop - :0:0950:1144[00000000x_00000000x] nop - :0:0951:1145[00000000x_00000000x] nop - :0:0952:1146[00000000x_00000000x] nop - :0:0953:1147[00000000x_00000000x] nop - :0:0954:1148[00000000x_00000000x] nop - :0:0955:1149[00000000x_00000000x] nop - :0:0956:1150[00000000x_00000000x] nop - :0:0957:1151[00000000x_00000000x] nop - :0:0958:1152[00000000x_00000000x] nop - :0:0959:1153[00000000x_00000000x] nop - :0:0960:1154[00000000x_00000000x] nop - :0:0961:1155[00000000x_00000000x] nop - :0:0962:1156[00000000x_00000000x] nop - :0:0963:1157[00000000x_00000000x] nop - :0:0964:1158[00000000x_00000000x] nop - :0:0965:1159[00000000x_00000000x] nop - :0:0966:1160[00000000x_00000000x] nop - :0:0967:1161[00000000x_00000000x] nop - :0:0968:1162[00000000x_00000000x] nop - :0:0969:1163[00000000x_00000000x] nop - :0:0970:1164[00000000x_00000000x] nop - :0:0971:1165[00000000x_00000000x] nop - :0:0972:1166[00000000x_00000000x] nop - :0:0973:1167[00000000x_00000000x] nop - :0:0974:1168[00000000x_00000000x] nop - :0:0975:1169[00000000x_00000000x] nop - :0:0976:1170[00000000x_00000000x] nop - :0:0977:1171[00000000x_00000000x] nop - :0:0978:1172[00000000x_00000000x] nop - :0:0979:1173[00000000x_00000000x] nop - :0:0980:1174[00000000x_00000000x] nop - :0:0981:1175[00000000x_00000000x] nop - :0:0982:1176[00000000x_00000000x] nop - :0:0983:1177[00000000x_00000000x] nop - :0:0984:1178[00000000x_00000000x] nop - :0:0985:1179[00000000x_00000000x] nop - :0:0986:1180[00000000x_00000000x] nop - :0:0987:1181[00000000x_00000000x] nop - :0:0988:1182[00000000x_00000000x] nop - :0:0989:1183[00000000x_00000000x] nop - :0:0990:1184[00000000x_00000000x] nop - :0:0991:1185[00000000x_00000000x] nop - :0:0992:1186[00000000x_00000000x] nop - :0:0993:1187[00000000x_00000000x] nop - :0:0994:1188[00000000x_00000000x] nop - :0:0995:1189[00000000x_00000000x] nop - :0:0996:1190[00000000x_00000000x] nop - :0:0997:1191[00000000x_00000000x] nop - :0:0998:1192[00000000x_00000000x] nop - :0:0999:1193[00000000x_00000000x] nop - :0:1000:1194[00000000x_00000000x] nop - :0:1001:1195[00000000x_00000000x] nop - :0:1002:1196[00000000x_00000000x] nop - :0:1003:1197[00000000x_00000000x] nop - :0:1004:1198[00000000x_00000000x] nop - :0:1005:1199[00000000x_00000000x] nop - :0:1006:1200[00000000x_00000000x] nop - :0:1007:1201[00000000x_00000000x] nop - :0:1008:1202[00000000x_00000000x] nop - :0:1009:1203[00000000x_00000000x] nop - :0:1010:1204[00000000x_00000000x] nop - :0:1011:1205[00000000x_00000000x] nop - :0:1012:1206[00000000x_00000000x] nop - :0:1013:1207[00000000x_00000000x] nop - :0:1014:1208[00000000x_00000000x] nop - :0:1015:1209[00000000x_00000000x] nop - :0:1016:1210[00000000x_00000000x] nop - :0:1017:1211[00000000x_00000000x] nop - :0:1018:1212[00000000x_00000000x] nop - :0:1019:1213[00000000x_00000000x] nop - :0:1020:1214[00000000x_00000000x] nop - :0:1021:1215[00000000x_00000000x] nop - :0:1022:1216[00000000x_00000000x] nop - :0:1023:1217[00000000x_00000000x] nop - Register Stats: - - used (half): 173 239 (cnt=2, max=173) - - used (full): 182 190 (cnt=2, max=190) - - used (merged): 173 239 364-365 380-381 (cnt=6, max=173) - - input (half): 173 239 (cnt=2, max=173) - - input (full): 182 190 (cnt=2, max=190) - - const (half): (cnt=0, max=0) - - const (full): (cnt=0, max=0) - - output (half): (cnt=0, max=0) (estimated) - - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 1218 instructions, 658 nops, 560 non-nops, (1024 instlen), 44 half, 48 full - - shaderdb: 16 (ss), 510 (sy) + :6:0002:0002[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[ +../src/freedreno/ir3/disasm-a3xx.c:173: regmask_set: Assertion `num < MAX_REG' failed. ----------------------------------------------- 8192 (0x2000) bytes 000000: 00003002 00000000 00000000 00000000 |.0..............| diff --git a/src/freedreno/.gitlab-ci/reference/fd-clouds.log b/src/freedreno/.gitlab-ci/reference/fd-clouds.log index 798c0673438..afb8e7f899d 100644 --- a/src/freedreno/.gitlab-ci/reference/fd-clouds.log +++ b/src/freedreno/.gitlab-ci/reference/fd-clouds.log @@ -640,8 +640,8 @@ t4 write SP_VS_OBJ_START_LO (a81c) - used (merged): (cnt=0, max=0) - input (half): (cnt=0, max=0) - input (full): (cnt=0, max=0) - - const (half): (cnt=0, max=0) - - const (full): (cnt=0, max=0) + - max const: 0 + - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full @@ -662,8 +662,8 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords) - used (merged): (cnt=0, max=0) - input (half): (cnt=0, max=0) - input (full): (cnt=0, max=0) - - const (half): (cnt=0, max=0) - - const (full): (cnt=0, max=0) + - max const: 0 + - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full @@ -1110,8 +1110,8 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) - used (merged): (cnt=0, max=0) - input (half): (cnt=0, max=0) - input (full): (cnt=0, max=0) - - const (half): (cnt=0, max=0) - - const (full): (cnt=0, max=0) + - max const: 0 + - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full @@ -1955,8 +1955,8 @@ t4 write SP_VS_OBJ_START_LO (a81c) - used (merged): (cnt=0, max=0) - input (half): (cnt=0, max=0) - input (full): (cnt=0, max=0) - - const (half): (cnt=0, max=0) - - const (full): (cnt=0, max=0) + - max const: 0 + - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full @@ -1977,8 +1977,8 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords) - used (merged): (cnt=0, max=0) - input (half): (cnt=0, max=0) - input (full): (cnt=0, max=0) - - const (half): (cnt=0, max=0) - - const (full): (cnt=0, max=0) + - max const: 0 + - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full @@ -3498,8 +3498,8 @@ t4 write SP_FS_OBJ_START_LO (a983) - used (merged): 0-147 (cnt=148, max=147) - input (half): (cnt=0, max=0) - input (full): 19-20 (cnt=2, max=20) - - const (half): (cnt=0, max=0) - - const (full): 0-1 3-5 8-9 32-113 (cnt=89, max=113) + - max const: 113 + - output (half): (cnt=0, max=0) (estimated) - output (full): 4-7 (cnt=4, max=7) (estimated) - shaderdb: 2414 instructions, 1355 nops, 1059 non-nops, (1406 instlen), 0 half, 19 full @@ -4921,8 +4921,8 @@ t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords) - used (merged): 0-147 (cnt=148, max=147) - input (half): (cnt=0, max=0) - input (full): 19-20 (cnt=2, max=20) - - const (half): (cnt=0, max=0) - - const (full): 0-1 3-5 8-9 32-113 (cnt=89, max=113) + - max const: 113 + - output (half): (cnt=0, max=0) (estimated) - output (full): 4-7 (cnt=4, max=7) (estimated) - shaderdb: 2414 instructions, 1355 nops, 1059 non-nops, (1406 instlen), 0 half, 19 full @@ -5335,8 +5335,8 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) - used (merged): (cnt=0, max=0) - input (half): (cnt=0, max=0) - input (full): (cnt=0, max=0) - - const (half): (cnt=0, max=0) - - const (full): (cnt=0, max=0) + - max const: 0 + - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full @@ -6773,8 +6773,8 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) - used (merged): 0-147 (cnt=148, max=147) - input (half): (cnt=0, max=0) - input (full): 19-20 (cnt=2, max=20) - - const (half): (cnt=0, max=0) - - const (full): 0-1 3-5 8-9 32-113 (cnt=89, max=113) + - max const: 113 + - output (half): (cnt=0, max=0) (estimated) - output (full): 4-7 (cnt=4, max=7) (estimated) - shaderdb: 2414 instructions, 1355 nops, 1059 non-nops, (1406 instlen), 0 half, 19 full diff --git a/src/freedreno/.gitlab-ci/reference/glxgears-a420.log b/src/freedreno/.gitlab-ci/reference/glxgears-a420.log index 78930e4cacc..2abe7e8d2c5 100644 --- a/src/freedreno/.gitlab-ci/reference/glxgears-a420.log +++ b/src/freedreno/.gitlab-ci/reference/glxgears-a420.log @@ -427,11 +427,10 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) Register Stats: - used (half): (cnt=0, max=0) - used (full): (cnt=0, max=0) - - used (merged): (cnt=0, max=0) - input (half): (cnt=0, max=0) - input (full): (cnt=0, max=0) - - const (half): (cnt=0, max=0) - - const (full): (cnt=0, max=0) + - max const: 0 + - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full @@ -453,11 +452,10 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) Register Stats: - used (half): (cnt=0, max=0) - used (full): 0-3 (cnt=4, max=3) - - used (merged): 0-7 (cnt=8, max=7) - input (half): (cnt=0, max=0) - input (full): (cnt=0, max=0) - - const (half): (cnt=0, max=0) - - const (full): 0-3 (cnt=4, max=3) + - max const: 3 + - output (half): (cnt=0, max=0) (estimated) - output (full): 0-3 (cnt=4, max=3) (estimated) - shaderdb: 9 instructions, 8 nops, 1 non-nops, (9 instlen), 0 half, 1 full @@ -1041,11 +1039,10 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) Register Stats: - used (half): (cnt=0, max=0) - used (full): 0-13 (cnt=14, max=13) - - used (merged): 0-27 (cnt=28, max=27) - input (half): (cnt=0, max=0) - input (full): 2-5 (cnt=4, max=5) - - const (half): (cnt=0, max=0) - - const (full): 0-18 20-26 32-34 36-38 40-42 52 (cnt=36, max=52) + - max const: 52 + - output (half): (cnt=0, max=0) (estimated) - output (full): 6-13 (cnt=8, max=13) (estimated) - shaderdb: 74 instructions, 38 nops, 36 non-nops, (61 instlen), 0 half, 4 full @@ -1082,14 +1079,13 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0010:0010[00000000x_00000000x] nop Register Stats: - used (half): (cnt=0, max=0) - - used (full): 0-3 252 (cnt=5, max=3) - - used (merged): 0-7 504-505 (cnt=10, max=7) + - used (full): 0-3 (cnt=4, max=3) - input (half): (cnt=0, max=0) - input (full): 0-3 (cnt=4, max=3) - - const (half): (cnt=0, max=0) - - const (full): (cnt=0, max=0) + - max const: 0 + - output (half): (cnt=0, max=0) (estimated) - - output (full): 252 (cnt=1, max=0) (estimated) + - output (full): (cnt=0, max=0) (estimated) - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full - shaderdb: 1 (ss), 0 (sy) 109ce878: 0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002 @@ -1673,11 +1669,10 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) Register Stats: - used (half): (cnt=0, max=0) - used (full): 0-8 10-17 (cnt=17, max=17) - - used (merged): 0-17 20-35 (cnt=34, max=35) - input (half): (cnt=0, max=0) - input (full): 2-8 (cnt=7, max=8) - - const (half): (cnt=0, max=0) - - const (full): 0-22 28-30 32-34 36-38 52 (cnt=33, max=52) + - max const: 52 + - output (half): (cnt=0, max=0) (estimated) - output (full): 10-17 (cnt=8, max=17) (estimated) - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full @@ -1713,14 +1708,13 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0010:0010[00000000x_00000000x] nop Register Stats: - used (half): (cnt=0, max=0) - - used (full): 0-3 252 (cnt=5, max=3) - - used (merged): 0-7 504-505 (cnt=10, max=7) + - used (full): 0-3 (cnt=4, max=3) - input (half): (cnt=0, max=0) - input (full): 0-3 (cnt=4, max=3) - - const (half): (cnt=0, max=0) - - const (full): (cnt=0, max=0) + - max const: 0 + - output (half): (cnt=0, max=0) (estimated) - - output (full): 252 (cnt=1, max=0) (estimated) + - output (full): (cnt=0, max=0) (estimated) - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full - shaderdb: 1 (ss), 0 (sy) 109cf040: 0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002 @@ -2106,11 +2100,10 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) Register Stats: - used (half): (cnt=0, max=0) - used (full): 0-8 10-17 (cnt=17, max=17) - - used (merged): 0-17 20-35 (cnt=34, max=35) - input (half): (cnt=0, max=0) - input (full): 2-8 (cnt=7, max=8) - - const (half): (cnt=0, max=0) - - const (full): 0-22 28-30 32-34 36-38 52 (cnt=33, max=52) + - max const: 52 + - output (half): (cnt=0, max=0) (estimated) - output (full): 10-17 (cnt=8, max=17) (estimated) - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full @@ -2145,11 +2138,10 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) Register Stats: - used (half): (cnt=0, max=0) - used (full): 0 2-5 (cnt=5, max=5) - - used (merged): 0-1 4-11 (cnt=10, max=11) - input (half): (cnt=0, max=0) - input (full): 0 (cnt=1, max=0) - - const (half): (cnt=0, max=0) - - const (full): (cnt=0, max=0) + - max const: 0 + - output (half): (cnt=0, max=0) (estimated) - output (full): 2-5 (cnt=4, max=5) (estimated) - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 half, 2 full @@ -2500,11 +2492,10 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) Register Stats: - used (half): (cnt=0, max=0) - used (full): 0-13 (cnt=14, max=13) - - used (merged): 0-27 (cnt=28, max=27) - input (half): (cnt=0, max=0) - input (full): 2-5 (cnt=4, max=5) - - const (half): (cnt=0, max=0) - - const (full): 0-18 20-26 32-34 36-38 40-42 52 (cnt=36, max=52) + - max const: 52 + - output (half): (cnt=0, max=0) (estimated) - output (full): 6-13 (cnt=8, max=13) (estimated) - shaderdb: 74 instructions, 38 nops, 36 non-nops, (61 instlen), 0 half, 4 full @@ -2541,14 +2532,13 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0010:0010[00000000x_00000000x] nop Register Stats: - used (half): (cnt=0, max=0) - - used (full): 0-3 252 (cnt=5, max=3) - - used (merged): 0-7 504-505 (cnt=10, max=7) + - used (full): 0-3 (cnt=4, max=3) - input (half): (cnt=0, max=0) - input (full): 0-3 (cnt=4, max=3) - - const (half): (cnt=0, max=0) - - const (full): (cnt=0, max=0) + - max const: 0 + - output (half): (cnt=0, max=0) (estimated) - - output (full): 252 (cnt=1, max=0) (estimated) + - output (full): (cnt=0, max=0) (estimated) - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full - shaderdb: 1 (ss), 0 (sy) 109cfb78: 0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002 @@ -3055,11 +3045,10 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) Register Stats: - used (half): (cnt=0, max=0) - used (full): 0-8 10-17 (cnt=17, max=17) - - used (merged): 0-17 20-35 (cnt=34, max=35) - input (half): (cnt=0, max=0) - input (full): 2-8 (cnt=7, max=8) - - const (half): (cnt=0, max=0) - - const (full): 0-22 28-30 32-34 36-38 52 (cnt=33, max=52) + - max const: 52 + - output (half): (cnt=0, max=0) (estimated) - output (full): 10-17 (cnt=8, max=17) (estimated) - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full @@ -3095,14 +3084,13 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0010:0010[00000000x_00000000x] nop Register Stats: - used (half): (cnt=0, max=0) - - used (full): 0-3 252 (cnt=5, max=3) - - used (merged): 0-7 504-505 (cnt=10, max=7) + - used (full): 0-3 (cnt=4, max=3) - input (half): (cnt=0, max=0) - input (full): 0-3 (cnt=4, max=3) - - const (half): (cnt=0, max=0) - - const (full): (cnt=0, max=0) + - max const: 0 + - output (half): (cnt=0, max=0) (estimated) - - output (full): 252 (cnt=1, max=0) (estimated) + - output (full): (cnt=0, max=0) (estimated) - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full - shaderdb: 1 (ss), 0 (sy) 109d02c0: 0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002 @@ -3488,11 +3476,10 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) Register Stats: - used (half): (cnt=0, max=0) - used (full): 0-8 10-17 (cnt=17, max=17) - - used (merged): 0-17 20-35 (cnt=34, max=35) - input (half): (cnt=0, max=0) - input (full): 2-8 (cnt=7, max=8) - - const (half): (cnt=0, max=0) - - const (full): 0-22 28-30 32-34 36-38 52 (cnt=33, max=52) + - max const: 52 + - output (half): (cnt=0, max=0) (estimated) - output (full): 10-17 (cnt=8, max=17) (estimated) - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full @@ -3527,11 +3514,10 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) Register Stats: - used (half): (cnt=0, max=0) - used (full): 0 2-5 (cnt=5, max=5) - - used (merged): 0-1 4-11 (cnt=10, max=11) - input (half): (cnt=0, max=0) - input (full): 0 (cnt=1, max=0) - - const (half): (cnt=0, max=0) - - const (full): (cnt=0, max=0) + - max const: 0 + - output (half): (cnt=0, max=0) (estimated) - output (full): 2-5 (cnt=4, max=5) (estimated) - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 half, 2 full @@ -3882,11 +3868,10 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) Register Stats: - used (half): (cnt=0, max=0) - used (full): 0-13 (cnt=14, max=13) - - used (merged): 0-27 (cnt=28, max=27) - input (half): (cnt=0, max=0) - input (full): 2-5 (cnt=4, max=5) - - const (half): (cnt=0, max=0) - - const (full): 0-18 20-26 32-34 36-38 40-42 52 (cnt=36, max=52) + - max const: 52 + - output (half): (cnt=0, max=0) (estimated) - output (full): 6-13 (cnt=8, max=13) (estimated) - shaderdb: 74 instructions, 38 nops, 36 non-nops, (61 instlen), 0 half, 4 full @@ -3923,14 +3908,13 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0010:0010[00000000x_00000000x] nop Register Stats: - used (half): (cnt=0, max=0) - - used (full): 0-3 252 (cnt=5, max=3) - - used (merged): 0-7 504-505 (cnt=10, max=7) + - used (full): 0-3 (cnt=4, max=3) - input (half): (cnt=0, max=0) - input (full): 0-3 (cnt=4, max=3) - - const (half): (cnt=0, max=0) - - const (full): (cnt=0, max=0) + - max const: 0 + - output (half): (cnt=0, max=0) (estimated) - - output (full): 252 (cnt=1, max=0) (estimated) + - output (full): (cnt=0, max=0) (estimated) - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full - shaderdb: 1 (ss), 0 (sy) 109d0df8: 0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002 @@ -4437,11 +4421,10 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) Register Stats: - used (half): (cnt=0, max=0) - used (full): 0-8 10-17 (cnt=17, max=17) - - used (merged): 0-17 20-35 (cnt=34, max=35) - input (half): (cnt=0, max=0) - input (full): 2-8 (cnt=7, max=8) - - const (half): (cnt=0, max=0) - - const (full): 0-22 28-30 32-34 36-38 52 (cnt=33, max=52) + - max const: 52 + - output (half): (cnt=0, max=0) (estimated) - output (full): 10-17 (cnt=8, max=17) (estimated) - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full @@ -4477,14 +4460,13 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) :0:0010:0010[00000000x_00000000x] nop Register Stats: - used (half): (cnt=0, max=0) - - used (full): 0-3 252 (cnt=5, max=3) - - used (merged): 0-7 504-505 (cnt=10, max=7) + - used (full): 0-3 (cnt=4, max=3) - input (half): (cnt=0, max=0) - input (full): 0-3 (cnt=4, max=3) - - const (half): (cnt=0, max=0) - - const (full): (cnt=0, max=0) + - max const: 0 + - output (half): (cnt=0, max=0) (estimated) - - output (full): 252 (cnt=1, max=0) (estimated) + - output (full): (cnt=0, max=0) (estimated) - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full - shaderdb: 1 (ss), 0 (sy) 109d1540: 0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002 @@ -4870,11 +4852,10 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) Register Stats: - used (half): (cnt=0, max=0) - used (full): 0-8 10-17 (cnt=17, max=17) - - used (merged): 0-17 20-35 (cnt=34, max=35) - input (half): (cnt=0, max=0) - input (full): 2-8 (cnt=7, max=8) - - const (half): (cnt=0, max=0) - - const (full): 0-22 28-30 32-34 36-38 52 (cnt=33, max=52) + - max const: 52 + - output (half): (cnt=0, max=0) (estimated) - output (full): 10-17 (cnt=8, max=17) (estimated) - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full @@ -4909,11 +4890,10 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) Register Stats: - used (half): (cnt=0, max=0) - used (full): 0 2-5 (cnt=5, max=5) - - used (merged): 0-1 4-11 (cnt=10, max=11) - input (half): (cnt=0, max=0) - input (full): 0 (cnt=1, max=0) - - const (half): (cnt=0, max=0) - - const (full): (cnt=0, max=0) + - max const: 0 + - output (half): (cnt=0, max=0) (estimated) - output (full): 2-5 (cnt=4, max=5) (estimated) - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 half, 2 full @@ -5214,11 +5194,10 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) Register Stats: - used (half): (cnt=0, max=0) - used (full): (cnt=0, max=0) - - used (merged): (cnt=0, max=0) - input (half): (cnt=0, max=0) - input (full): (cnt=0, max=0) - - const (half): (cnt=0, max=0) - - const (full): (cnt=0, max=0) + - max const: 0 + - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full @@ -5240,11 +5219,10 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) Register Stats: - used (half): (cnt=0, max=0) - used (full): 0-3 (cnt=4, max=3) - - used (merged): 0-7 (cnt=8, max=7) - input (half): (cnt=0, max=0) - input (full): (cnt=0, max=0) - - const (half): (cnt=0, max=0) - - const (full): 0-3 (cnt=4, max=3) + - max const: 3 + - output (half): (cnt=0, max=0) (estimated) - output (full): 0-3 (cnt=4, max=3) (estimated) - shaderdb: 9 instructions, 8 nops, 1 non-nops, (9 instlen), 0 half, 1 full diff --git a/src/freedreno/decode/disasm.h b/src/freedreno/common/disasm.h similarity index 90% rename from src/freedreno/decode/disasm.h rename to src/freedreno/common/disasm.h index de89ab49e90..1a2993e6f42 100644 --- a/src/freedreno/decode/disasm.h +++ b/src/freedreno/common/disasm.h @@ -24,6 +24,8 @@ #ifndef DISASM_H_ #define DISASM_H_ +#include +#include #include #include "compiler/shader_enums.h" @@ -32,7 +34,8 @@ enum debug_t { PRINT_RAW = 0x1, /* dump raw hexdump */ PRINT_VERBOSE = 0x2, - EXPAND_REPEAT = 0x4, + PRINT_STATS = 0x4, + EXPAND_REPEAT = 0x8, }; struct shader_stats { @@ -47,6 +50,8 @@ int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, gl_shader_stage typ int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id); int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id, struct shader_stats *stats); -void disasm_set_debug(enum debug_t debug); + +void disasm_a2xx_set_debug(enum debug_t debug); +void disasm_a3xx_set_debug(enum debug_t debug); #endif /* DISASM_H_ */ diff --git a/src/freedreno/common/meson.build b/src/freedreno/common/meson.build index a79fe8e9fe2..32b3f08f8ac 100644 --- a/src/freedreno/common/meson.build +++ b/src/freedreno/common/meson.build @@ -21,6 +21,7 @@ libfreedreno_common = static_library( 'freedreno_common', [ + 'disasm.h', 'freedreno_uuid.c', 'freedreno_uuid.h', 'freedreno_guardband.h', diff --git a/src/freedreno/decode/cffdump.c b/src/freedreno/decode/cffdump.c index 7fec7dcbd10..a9ceacdee4e 100644 --- a/src/freedreno/decode/cffdump.c +++ b/src/freedreno/decode/cffdump.c @@ -139,6 +139,7 @@ static const struct option opts[] = { int main(int argc, char **argv) { + enum debug_t debug = PRINT_RAW | PRINT_STATS; int ret = -1; int start = 0, end = 0x7ffffff, draw = -1; int c; @@ -153,7 +154,7 @@ int main(int argc, char **argv) /* option that set a flag, nothing to do */ break; case 'v': - disasm_set_debug(PRINT_RAW | EXPAND_REPEAT | PRINT_VERBOSE); + debug |= (PRINT_RAW | EXPAND_REPEAT | PRINT_VERBOSE); break; case 's': options.summary = true; @@ -192,6 +193,9 @@ int main(int argc, char **argv) } } + disasm_a2xx_set_debug(debug); + disasm_a3xx_set_debug(debug); + if (interactive) { pager_open(); } diff --git a/src/freedreno/decode/crashdec.c b/src/freedreno/decode/crashdec.c index 3b17d831d57..8c5ccb562c7 100644 --- a/src/freedreno/decode/crashdec.c +++ b/src/freedreno/decode/crashdec.c @@ -55,7 +55,7 @@ #include "pager.h" #include "rnnutil.h" #include "util.h" -#include "instr-a3xx.h" +#include "ir3/instr-a3xx.h" static FILE *in; @@ -223,7 +223,7 @@ void ir3_assert_handler(const char *expr, const char *file, int line, const char *func) { - printf("%s:%u: %s: Assertion `%s' failed.\n", file, line, func, expr); + printf("\n%s:%u: %s: Assertion `%s' failed.\n", file, line, func, expr); if (jmp_env_valid) longjmp(jmp_env, 1); abort(); @@ -1103,6 +1103,8 @@ main(int argc, char **argv) } } + disasm_a3xx_set_debug(PRINT_RAW); + if (interactive) { pager_open(); } diff --git a/src/freedreno/decode/disasm-a2xx.c b/src/freedreno/decode/disasm-a2xx.c index 80b8a00d348..e6d7ba30c88 100644 --- a/src/freedreno/decode/disasm-a2xx.c +++ b/src/freedreno/decode/disasm-a2xx.c @@ -49,7 +49,7 @@ static const char *levels[] = { "x", }; -enum debug_t debug; +static enum debug_t debug; static struct rnn *rnn; @@ -618,7 +618,7 @@ int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, gl_shader_stage typ return 0; } -void disasm_set_debug(enum debug_t d) +void disasm_a2xx_set_debug(enum debug_t d) { debug = d; } diff --git a/src/freedreno/decode/disasm-a3xx.c b/src/freedreno/decode/disasm-a3xx.c deleted file mode 100644 index 9645dc5f41b..00000000000 --- a/src/freedreno/decode/disasm-a3xx.c +++ /dev/null @@ -1,1641 +0,0 @@ -/* - * Copyright (c) 2013 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include - -#include "disasm.h" -#include "instr-a3xx.h" - -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - -extern enum debug_t debug; - -static const char *levels[] = { - "", - "\t", - "\t\t", - "\t\t\t", - "\t\t\t\t", - "\t\t\t\t\t", - "\t\t\t\t\t\t", - "\t\t\t\t\t\t\t", - "\t\t\t\t\t\t\t\t", - "\t\t\t\t\t\t\t\t\t", - "x", - "x", - "x", - "x", - "x", - "x", -}; - -static const char *component = "xyzw"; - -static const char *type[] = { - [TYPE_F16] = "f16", - [TYPE_F32] = "f32", - [TYPE_U16] = "u16", - [TYPE_U32] = "u32", - [TYPE_S16] = "s16", - [TYPE_S32] = "s32", - [TYPE_U8] = "u8", - [TYPE_S8] = "s8", -}; - - -#define MAX_REG 4096 - -typedef struct { - uint8_t full[MAX_REG/8]; - uint8_t half[MAX_REG/8]; -} regmask_t; - -struct disasm_ctx { - FILE *out; - int level; - unsigned gpu_id; - - struct shader_stats *stats; - - /* we have to process the dst register after src to avoid tripping up - * the read-before-write detection - */ - unsigned last_dst; - bool last_dst_full; - bool last_dst_valid; - - /* current instruction repeat flag: */ - unsigned repeat; - /* current instruction repeat indx/offset (for --expand): */ - unsigned repeatidx; - - /* tracking for register usage */ - struct { - regmask_t used; - regmask_t used_merged; - regmask_t rbw; /* read before write */ - regmask_t war; /* write after read */ - regmask_t cnst; /* used consts */ - } regs; -}; - -static const char *float_imms[] = { - "0.0", - "0.5", - "1.0", - "2.0", - "e", - "pi", - "1/pi", - "1/log2(e)", - "log2(e)", - "1/log2(10)", - "log2(10)", - "4.0", -}; - -static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full, - bool is_float, bool r, - bool c, bool im, bool neg, bool abs, bool addr_rel) -{ - const char type = c ? 'c' : 'r'; - - // XXX I prefer - and || for neg/abs, but preserving format used - // by libllvm-a3xx for easy diffing.. - - if (abs && neg) - fprintf(ctx->out, "(absneg)"); - else if (neg) - fprintf(ctx->out, "(neg)"); - else if (abs) - fprintf(ctx->out, "(abs)"); - - if (r) - fprintf(ctx->out, "(r)"); - - if (im) { - if (is_float && full && reg.iim_val < ARRAY_SIZE(float_imms)) { - fprintf(ctx->out, "(%s)", float_imms[reg.iim_val]); - } else { - fprintf(ctx->out, "%d", reg.iim_val); - } - } else if (addr_rel) { - /* I would just use %+d but trying to make it diff'able with - * libllvm-a3xx... - */ - if (reg.iim_val < 0) - fprintf(ctx->out, "%s%c", full ? "" : "h", type, -reg.iim_val); - else if (reg.iim_val > 0) - fprintf(ctx->out, "%s%c", full ? "" : "h", type, reg.iim_val); - else - fprintf(ctx->out, "%s%c", full ? "" : "h", type); - } else if ((reg.num == REG_A0) && !c) { - /* This matches libllvm output, the second (scalar) address register - * seems to be called a1.x instead of a0.y. - */ - fprintf(ctx->out, "a%d.x", reg.comp); - } else if ((reg.num == REG_P0) && !c) { - fprintf(ctx->out, "p0.%c", component[reg.comp]); - } else { - fprintf(ctx->out, "%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]); - } -} - -/* Tracking for registers used, read-before-write (input), and - * write-after-read (output.. but not 100%).. - */ - -static void regmask_set(regmask_t *regmask, unsigned num, bool full, unsigned val) -{ - unsigned i = num / 8; - unsigned j = num % 8; - ir3_assert(num < MAX_REG); - if (full) { - regmask->full[i] = (regmask->full[i] & ~(1 << j)) | (val << j); - } else { - regmask->half[i] = (regmask->half[i] & ~(1 << j)) | (val << j); - } -} - -static unsigned regmask_get(regmask_t *regmask, unsigned num, bool full) -{ - unsigned i = num / 8; - unsigned j = num % 8; - ir3_assert(num < MAX_REG); - if (full) { - return (regmask->full[i] >> j) & 0x1; - } else { - return (regmask->half[i] >> j) & 0x1; - } -} - -static unsigned regidx(reg_t reg) -{ - return (4 * reg.num) + reg.comp; -} - -static reg_t idxreg(unsigned idx) -{ - return (reg_t){ - .comp = idx & 0x3, - .num = idx >> 2, - }; -} - -static int print_regs(struct disasm_ctx *ctx, regmask_t *regmask, bool full) -{ - int num, max = 0, cnt = 0; - int first, last; - - void print_sequence(void) - { - if (first != MAX_REG) { - if (first == last) { - fprintf(ctx->out, " %d", first); - } else { - fprintf(ctx->out, " %d-%d", first, last); - } - } - } - - first = last = MAX_REG; - - for (num = 0; num < MAX_REG; num++) { - if (regmask_get(regmask, num, full)) { - if (num != (last + 1)) { - print_sequence(); - first = num; - } - last = num; - if (num < (48*4)) - max = num; - cnt++; - } - } - - print_sequence(); - - fprintf(ctx->out, " (cnt=%d, max=%d)", cnt, max); - - return max; -} - -static void print_reg_stats(struct disasm_ctx *ctx) -{ - int fullreg, halfreg; - - fprintf(ctx->out, "%sRegister Stats:\n", levels[ctx->level]); - fprintf(ctx->out, "%s- used (half):", levels[ctx->level]); - halfreg = print_regs(ctx, &ctx->regs.used, false); - fprintf(ctx->out, "\n"); - fprintf(ctx->out, "%s- used (full):", levels[ctx->level]); - fullreg = print_regs(ctx, &ctx->regs.used, true); - fprintf(ctx->out, "\n"); - fprintf(ctx->out, "%s- used (merged):", levels[ctx->level]); - print_regs(ctx, &ctx->regs.used_merged, false); - fprintf(ctx->out, "\n"); - fprintf(ctx->out, "%s- input (half):", levels[ctx->level]); - print_regs(ctx, &ctx->regs.rbw, false); - fprintf(ctx->out, "\n"); - fprintf(ctx->out, "%s- input (full):", levels[ctx->level]); - print_regs(ctx, &ctx->regs.rbw, true); - fprintf(ctx->out, "\n"); - fprintf(ctx->out, "%s- const (half):", levels[ctx->level]); - print_regs(ctx, &ctx->regs.cnst, false); - fprintf(ctx->out, "\n"); - fprintf(ctx->out, "%s- const (full):", levels[ctx->level]); - print_regs(ctx, &ctx->regs.cnst, true); - fprintf(ctx->out, "\n"); - fprintf(ctx->out, "%s- output (half):", levels[ctx->level]); - print_regs(ctx, &ctx->regs.war, false); - fprintf(ctx->out, " (estimated)\n"); - fprintf(ctx->out, "%s- output (full):", levels[ctx->level]); - print_regs(ctx, &ctx->regs.war, true); - fprintf(ctx->out, " (estimated)\n"); - - /* convert to vec4, which is the granularity that registers are - * assigned to shader: - */ - fullreg = (fullreg + 3) / 4; - halfreg = (halfreg + 3) / 4; - - // Note this count of instructions includes rptN, which matches - // up to how mesa prints this: - fprintf(ctx->out, "%s- shaderdb: %d instructions, %d nops, %d non-nops, " - "(%d instlen), %d half, %d full\n", - levels[ctx->level], ctx->stats->instructions, ctx->stats->nops, - ctx->stats->instructions - ctx->stats->nops, ctx->stats->instlen, - halfreg, fullreg); - fprintf(ctx->out, "%s- shaderdb: %d (ss), %d (sy)\n", levels[ctx->level], - ctx->stats->ss, ctx->stats->sy); -} - -static void process_reg_dst(struct disasm_ctx *ctx) -{ - int i; - - if (!ctx->last_dst_valid) - return; - - for (i = 0; i <= ctx->repeat; i++) { - unsigned dst = ctx->last_dst + i; - - regmask_set(&ctx->regs.war, dst, ctx->last_dst_full, 1); - regmask_set(&ctx->regs.used, dst, ctx->last_dst_full, 1); - - if (ctx->last_dst_full) { - regmask_set(&ctx->regs.used_merged, (dst*2)+0, false, 1); - regmask_set(&ctx->regs.used_merged, (dst*2)+1, false, 1); - } else { - regmask_set(&ctx->regs.used_merged, dst, false, 1); - } - } - - ctx->last_dst_valid = false; -} - -static void print_reg_dst(struct disasm_ctx *ctx, reg_t reg, bool full, bool addr_rel) -{ - /* presumably the special registers a0.c and p0.c don't count.. */ - if (!(addr_rel || (reg.num == 61) || (reg.num == 62))) { - ctx->last_dst = regidx(reg); - ctx->last_dst_full = full; - ctx->last_dst_valid = true; - } - reg = idxreg(regidx(reg) + ctx->repeatidx); - print_reg(ctx, reg, full, false, false, false, false, false, false, addr_rel); -} - -static void print_reg_src(struct disasm_ctx *ctx, reg_t reg, bool full, bool f, bool r, - bool c, bool im, bool neg, bool abs, bool addr_rel) -{ - /* presumably the special registers a0.c and p0.c don't count.. */ - if (!(addr_rel || c || im || (reg.num == 61) || (reg.num == 62))) { - int i, num = regidx(reg); - for (i = 0; i <= ctx->repeat; i++) { - unsigned src = num + i; - - if (!regmask_get(&ctx->regs.used, src, full)) - regmask_set(&ctx->regs.rbw, src, full, 1); - - regmask_set(&ctx->regs.war, src, full, 0); - regmask_set(&ctx->regs.used, src, full, 1); - - if (full) { - regmask_set(&ctx->regs.used_merged, (src*2)+0, false, 1); - regmask_set(&ctx->regs.used_merged, (src*2)+1, false, 1); - } else { - regmask_set(&ctx->regs.used_merged, src, false, 1); - } - - if (!r) - break; - } - } else if (c) { - int i, num = regidx(reg); - for (i = 0; i <= ctx->repeat; i++) { - unsigned src = num + i; - - regmask_set(&ctx->regs.cnst, src, full, 1); - - if (!r) - break; - } - - unsigned max = (num + ctx->repeat + 1 + 3) / 4; - if (max > ctx->stats->constlen) - ctx->stats->constlen = max; - } - - if (r) - reg = idxreg(regidx(reg) + ctx->repeatidx); - - print_reg(ctx, reg, full, f, r, c, im, neg, abs, addr_rel); -} - -/* TODO switch to using reginfo struct everywhere, since more readable - * than passing a bunch of bools to print_reg_src - */ - -struct reginfo { - reg_t reg; - bool full; - bool r; - bool c; - bool f; /* src reg is interpreted as float, used for printing immediates */ - bool im; - bool neg; - bool abs; - bool addr_rel; -}; - -static void print_src(struct disasm_ctx *ctx, struct reginfo *info) -{ - reg_t reg = info->reg; - - if (info->r) - reg = idxreg(regidx(info->reg) + ctx->repeatidx); - - print_reg_src(ctx, reg, info->full, info->f, info->r, info->c, info->im, - info->neg, info->abs, info->addr_rel); -} - -//static void print_dst(struct disasm_ctx *ctx, struct reginfo *info) -//{ -// print_reg_dst(ctx, info->reg, info->full, info->addr_rel); -//} - -static void print_instr_cat0(struct disasm_ctx *ctx, instr_t *instr) -{ - static const struct { - const char *suffix; - int nsrc; - bool idx; - } brinfo[7] = { - [BRANCH_PLAIN] = { "r", 1, false }, - [BRANCH_OR] = { "rao", 2, false }, - [BRANCH_AND] = { "raa", 2, false }, - [BRANCH_CONST] = { "rac", 0, true }, - [BRANCH_ANY] = { "any", 1, false }, - [BRANCH_ALL] = { "all", 1, false }, - [BRANCH_X] = { "rax", 0, false }, - }; - instr_cat0_t *cat0 = &instr->cat0; - - switch (instr_opc(instr, ctx->gpu_id)) { - case OPC_KILL: - case OPC_PREDT: - case OPC_PREDF: - fprintf(ctx->out, " %sp0.%c", cat0->inv0 ? "!" : "", - component[cat0->comp0]); - break; - case OPC_B: - fprintf(ctx->out, "%s", brinfo[cat0->brtype].suffix); - if (brinfo[cat0->brtype].idx) { - fprintf(ctx->out, ".%u", cat0->idx); - } - if (brinfo[cat0->brtype].nsrc >= 1) { - fprintf(ctx->out, " %sp0.%c,", cat0->inv0 ? "!" : "", - component[cat0->comp0]); - } - if (brinfo[cat0->brtype].nsrc >= 2) { - fprintf(ctx->out, " %sp0.%c,", cat0->inv1 ? "!" : "", - component[cat0->comp1]); - } - fprintf(ctx->out, " #%d", cat0->a3xx.immed); - break; - case OPC_JUMP: - case OPC_CALL: - case OPC_BKT: - case OPC_GETONE: - case OPC_SHPS: - fprintf(ctx->out, " #%d", cat0->a3xx.immed); - break; - } - - if ((debug & PRINT_VERBOSE) && (cat0->dummy3|cat0->dummy4)) - fprintf(ctx->out, "\t{0: %x,%x}", cat0->dummy3, cat0->dummy4); -} - -static void print_instr_cat1(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat1_t *cat1 = &instr->cat1; - - if (cat1->ul) - fprintf(ctx->out, "(ul)"); - - if (cat1->src_type == cat1->dst_type) { - if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) { - /* special case (nmemonic?): */ - fprintf(ctx->out, "mova"); - } else { - fprintf(ctx->out, "mov.%s%s", type[cat1->src_type], type[cat1->dst_type]); - } - } else { - fprintf(ctx->out, "cov.%s%s", type[cat1->src_type], type[cat1->dst_type]); - } - - fprintf(ctx->out, " "); - - if (cat1->even) - fprintf(ctx->out, "(even)"); - - if (cat1->pos_inf) - fprintf(ctx->out, "(pos_infinity)"); - - print_reg_dst(ctx, (reg_t)(cat1->dst), type_size(cat1->dst_type) == 32, - cat1->dst_rel); - - fprintf(ctx->out, ", "); - - /* ugg, have to special case this.. vs print_reg().. */ - if (cat1->src_im) { - if (type_float(cat1->src_type)) - fprintf(ctx->out, "(%f)", cat1->fim_val); - else if (type_uint(cat1->src_type)) - fprintf(ctx->out, "0x%08x", cat1->uim_val); - else - fprintf(ctx->out, "%d", cat1->iim_val); - } else if (cat1->src_rel && !cat1->src_c) { - /* I would just use %+d but trying to make it diff'able with - * libllvm-a3xx... - */ - char type = cat1->src_rel_c ? 'c' : 'r'; - const char *full = (type_size(cat1->src_type) == 32) ? "" : "h"; - if (cat1->off < 0) - fprintf(ctx->out, "%s%c", full, type, -cat1->off); - else if (cat1->off > 0) - fprintf(ctx->out, "%s%c", full, type, cat1->off); - else - fprintf(ctx->out, "%s%c", full, type); - } else { - struct reginfo src = { - .reg = (reg_t)cat1->src, - .full = type_size(cat1->src_type) == 32, - .r = cat1->src_r, - .c = cat1->src_c, - .im = cat1->src_im, - }; - print_src(ctx, &src); - } - - if ((debug & PRINT_VERBOSE) && (cat1->must_be_0)) - fprintf(ctx->out, "\t{1: %x}", cat1->must_be_0); -} - -static void print_instr_cat2(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat2_t *cat2 = &instr->cat2; - int opc = _OPC(2, cat2->opc); - static const char *cond[] = { - "lt", - "le", - "gt", - "ge", - "eq", - "ne", - "?6?", - }; - - switch (opc) { - case OPC_CMPS_F: - case OPC_CMPS_U: - case OPC_CMPS_S: - case OPC_CMPV_F: - case OPC_CMPV_U: - case OPC_CMPV_S: - fprintf(ctx->out, ".%s", cond[cat2->cond]); - break; - } - - fprintf(ctx->out, " "); - if (cat2->ei) - fprintf(ctx->out, "(ei)"); - print_reg_dst(ctx, (reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false); - fprintf(ctx->out, ", "); - - struct reginfo src1 = { - .full = cat2->full, - .r = cat2->repeat ? cat2->src1_r : 0, - .f = is_cat2_float(opc), - .im = cat2->src1_im, - .abs = cat2->src1_abs, - .neg = cat2->src1_neg, - }; - - if (cat2->c1.src1_c) { - src1.reg = (reg_t)(cat2->c1.src1); - src1.c = true; - } else if (cat2->rel1.src1_rel) { - src1.reg = (reg_t)(cat2->rel1.src1); - src1.c = cat2->rel1.src1_c; - src1.addr_rel = true; - } else { - src1.reg = (reg_t)(cat2->src1); - } - print_src(ctx, &src1); - - struct reginfo src2 = { - .r = cat2->repeat ? cat2->src2_r : 0, - .full = cat2->full, - .f = is_cat2_float(opc), - .abs = cat2->src2_abs, - .neg = cat2->src2_neg, - .im = cat2->src2_im, - }; - switch (opc) { - case OPC_ABSNEG_F: - case OPC_ABSNEG_S: - case OPC_CLZ_B: - case OPC_CLZ_S: - case OPC_SIGN_F: - case OPC_FLOOR_F: - case OPC_CEIL_F: - case OPC_RNDNE_F: - case OPC_RNDAZ_F: - case OPC_TRUNC_F: - case OPC_NOT_B: - case OPC_BFREV_B: - case OPC_SETRM: - case OPC_CBITS_B: - /* these only have one src reg */ - break; - default: - fprintf(ctx->out, ", "); - if (cat2->c2.src2_c) { - src2.reg = (reg_t)(cat2->c2.src2); - src2.c = true; - } else if (cat2->rel2.src2_rel) { - src2.reg = (reg_t)(cat2->rel2.src2); - src2.c = cat2->rel2.src2_c; - src2.addr_rel = true; - } else { - src2.reg = (reg_t)(cat2->src2); - } - print_src(ctx, &src2); - break; - } -} - -static void print_instr_cat3(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat3_t *cat3 = &instr->cat3; - bool full = instr_cat3_full(cat3); - - fprintf(ctx->out, " "); - print_reg_dst(ctx, (reg_t)(cat3->dst), full ^ cat3->dst_half, false); - fprintf(ctx->out, ", "); - - struct reginfo src1 = { - .r = cat3->repeat ? cat3->src1_r : 0, - .full = full, - .neg = cat3->src1_neg, - }; - if (cat3->c1.src1_c) { - src1.reg = (reg_t)(cat3->c1.src1); - src1.c = true; - } else if (cat3->rel1.src1_rel) { - src1.reg = (reg_t)(cat3->rel1.src1); - src1.c = cat3->rel1.src1_c; - src1.addr_rel = true; - } else { - src1.reg = (reg_t)(cat3->src1); - } - print_src(ctx, &src1); - - fprintf(ctx->out, ", "); - struct reginfo src2 = { - .reg = (reg_t)cat3->src2, - .full = full, - .r = cat3->repeat ? cat3->src2_r : 0, - .c = cat3->src2_c, - .neg = cat3->src2_neg, - }; - print_src(ctx, &src2); - - fprintf(ctx->out, ", "); - struct reginfo src3 = { - .r = cat3->src3_r, - .full = full, - .neg = cat3->src3_neg, - }; - if (cat3->c2.src3_c) { - src3.reg = (reg_t)(cat3->c2.src3); - src3.c = true; - } else if (cat3->rel2.src3_rel) { - src3.reg = (reg_t)(cat3->rel2.src3); - src3.c = cat3->rel2.src3_c; - src3.addr_rel = true; - } else { - src3.reg = (reg_t)(cat3->src3); - } - print_src(ctx, &src3); -} - -static void print_instr_cat4(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat4_t *cat4 = &instr->cat4; - - fprintf(ctx->out, " "); - print_reg_dst(ctx, (reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false); - fprintf(ctx->out, ", "); - - struct reginfo src = { - .r = cat4->src_r, - .im = cat4->src_im, - .full = cat4->full, - .neg = cat4->src_neg, - .abs = cat4->src_abs, - }; - if (cat4->c.src_c) { - src.reg = (reg_t)(cat4->c.src); - src.c = true; - } else if (cat4->rel.src_rel) { - src.reg = (reg_t)(cat4->rel.src); - src.c = cat4->rel.src_c; - src.addr_rel = true; - } else { - src.reg = (reg_t)(cat4->src); - } - print_src(ctx, &src); - - if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2)) - fprintf(ctx->out, "\t{4: %x,%x}", cat4->dummy1, cat4->dummy2); -} - -static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr) -{ - static const struct { - bool src1, src2, samp, tex; - } info[0x1f] = { - [opc_op(OPC_ISAM)] = { true, false, true, true, }, - [opc_op(OPC_ISAML)] = { true, true, true, true, }, - [opc_op(OPC_ISAMM)] = { true, false, true, true, }, - [opc_op(OPC_SAM)] = { true, false, true, true, }, - [opc_op(OPC_SAMB)] = { true, true, true, true, }, - [opc_op(OPC_SAML)] = { true, true, true, true, }, - [opc_op(OPC_SAMGQ)] = { true, false, true, true, }, - [opc_op(OPC_GETLOD)] = { true, false, true, true, }, - [opc_op(OPC_CONV)] = { true, true, true, true, }, - [opc_op(OPC_CONVM)] = { true, true, true, true, }, - [opc_op(OPC_GETSIZE)] = { true, false, false, true, }, - [opc_op(OPC_GETBUF)] = { false, false, false, true, }, - [opc_op(OPC_GETPOS)] = { true, false, false, true, }, - [opc_op(OPC_GETINFO)] = { false, false, false, true, }, - [opc_op(OPC_DSX)] = { true, false, false, false, }, - [opc_op(OPC_DSY)] = { true, false, false, false, }, - [opc_op(OPC_GATHER4R)] = { true, false, true, true, }, - [opc_op(OPC_GATHER4G)] = { true, false, true, true, }, - [opc_op(OPC_GATHER4B)] = { true, false, true, true, }, - [opc_op(OPC_GATHER4A)] = { true, false, true, true, }, - [opc_op(OPC_SAMGP0)] = { true, false, true, true, }, - [opc_op(OPC_SAMGP1)] = { true, false, true, true, }, - [opc_op(OPC_SAMGP2)] = { true, false, true, true, }, - [opc_op(OPC_SAMGP3)] = { true, false, true, true, }, - [opc_op(OPC_DSXPP_1)] = { true, false, false, false, }, - [opc_op(OPC_DSYPP_1)] = { true, false, false, false, }, - [opc_op(OPC_RGETPOS)] = { true, false, false, false, }, - [opc_op(OPC_RGETINFO)] = { false, false, false, false, }, - }; - - static const struct { - bool indirect; - bool bindless; - bool use_a1; - bool uniform; - } desc_features[8] = { - [CAT5_NONUNIFORM] = { .indirect = true, }, - [CAT5_UNIFORM] = { .indirect = true, .uniform = true, }, - [CAT5_BINDLESS_IMM] = { .bindless = true, }, - [CAT5_BINDLESS_UNIFORM] = { - .bindless = true, - .indirect = true, - .uniform = true, - }, - [CAT5_BINDLESS_NONUNIFORM] = { - .bindless = true, - .indirect = true, - }, - [CAT5_BINDLESS_A1_IMM] = { - .bindless = true, - .use_a1 = true, - }, - [CAT5_BINDLESS_A1_UNIFORM] = { - .bindless = true, - .indirect = true, - .uniform = true, - .use_a1 = true, - }, - [CAT5_BINDLESS_A1_NONUNIFORM] = { - .bindless = true, - .indirect = true, - .use_a1 = true, - }, - }; - - instr_cat5_t *cat5 = &instr->cat5; - int i; - - bool desc_indirect = - cat5->is_s2en_bindless && - desc_features[cat5->s2en_bindless.desc_mode].indirect; - bool bindless = - cat5->is_s2en_bindless && - desc_features[cat5->s2en_bindless.desc_mode].bindless; - bool use_a1 = - cat5->is_s2en_bindless && - desc_features[cat5->s2en_bindless.desc_mode].use_a1; - bool uniform = - cat5->is_s2en_bindless && - desc_features[cat5->s2en_bindless.desc_mode].uniform; - - if (cat5->is_3d) fprintf(ctx->out, ".3d"); - if (cat5->is_a) fprintf(ctx->out, ".a"); - if (cat5->is_o) fprintf(ctx->out, ".o"); - if (cat5->is_p) fprintf(ctx->out, ".p"); - if (cat5->is_s) fprintf(ctx->out, ".s"); - if (desc_indirect) fprintf(ctx->out, ".s2en"); - if (uniform) fprintf(ctx->out, ".uniform"); - - if (bindless) { - unsigned base = (cat5->s2en_bindless.base_hi << 1) | cat5->base_lo; - fprintf(ctx->out, ".base%d", base); - } - - fprintf(ctx->out, " "); - - switch (_OPC(5, cat5->opc)) { - case OPC_DSXPP_1: - case OPC_DSYPP_1: - break; - default: - fprintf(ctx->out, "(%s)", type[cat5->type]); - break; - } - - fprintf(ctx->out, "("); - for (i = 0; i < 4; i++) - if (cat5->wrmask & (1 << i)) - fprintf(ctx->out, "%c", "xyzw"[i]); - fprintf(ctx->out, ")"); - - print_reg_dst(ctx, (reg_t)(cat5->dst), type_size(cat5->type) == 32, false); - - if (info[cat5->opc].src1) { - fprintf(ctx->out, ", "); - struct reginfo src = { .reg = (reg_t)(cat5->src1), .full = cat5->full }; - print_src(ctx, &src); - } - - if (cat5->is_o || info[cat5->opc].src2) { - fprintf(ctx->out, ", "); - struct reginfo src = { .reg = (reg_t)(cat5->src2), .full = cat5->full }; - print_src(ctx, &src); - } - if (cat5->is_s2en_bindless) { - if (!desc_indirect) { - if (info[cat5->opc].samp) { - if (use_a1) - fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3); - else - fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3 & 0xf); - } - - if (info[cat5->opc].tex && !use_a1) { - fprintf(ctx->out, ", t#%d", cat5->s2en_bindless.src3 >> 4); - } - } - } else { - if (info[cat5->opc].samp) - fprintf(ctx->out, ", s#%d", cat5->norm.samp); - if (info[cat5->opc].tex) - fprintf(ctx->out, ", t#%d", cat5->norm.tex); - } - - if (desc_indirect) { - fprintf(ctx->out, ", "); - struct reginfo src = { .reg = (reg_t)(cat5->s2en_bindless.src3), .full = bindless }; - print_src(ctx, &src); - } - - if (use_a1) - fprintf(ctx->out, ", a1.x"); - - if (debug & PRINT_VERBOSE) { - if (cat5->is_s2en_bindless) { - if ((debug & PRINT_VERBOSE) && cat5->s2en_bindless.dummy1) - fprintf(ctx->out, "\t{5: %x}", cat5->s2en_bindless.dummy1); - } else { - if ((debug & PRINT_VERBOSE) && cat5->norm.dummy1) - fprintf(ctx->out, "\t{5: %x}", cat5->norm.dummy1); - } - } -} - -static void print_instr_cat6_a3xx(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat6_t *cat6 = &instr->cat6; - char sd = 0, ss = 0; /* dst/src address space */ - bool nodst = false; - struct reginfo dst, src1, src2; - int src1off = 0, dstoff = 0; - - memset(&dst, 0, sizeof(dst)); - memset(&src1, 0, sizeof(src1)); - memset(&src2, 0, sizeof(src2)); - - switch (_OPC(6, cat6->opc)) { - case OPC_RESINFO: - case OPC_RESFMT: - dst.full = type_size(cat6->type) == 32; - src1.full = type_size(cat6->type) == 32; - src2.full = type_size(cat6->type) == 32; - break; - case OPC_L2G: - case OPC_G2L: - dst.full = true; - src1.full = true; - src2.full = true; - break; - case OPC_STG: - case OPC_STL: - case OPC_STP: - case OPC_STLW: - case OPC_STIB: - dst.full = type_size(cat6->type) == 32; - src1.full = type_size(cat6->type) == 32; - src2.full = type_size(cat6->type) == 32; - break; - default: - dst.full = type_size(cat6->type) == 32; - src1.full = true; - src2.full = true; - break; - } - - switch (_OPC(6, cat6->opc)) { - case OPC_PREFETCH: - break; - case OPC_RESINFO: - fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1); - break; - case OPC_LDGB: - fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped"); - fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1); - fprintf(ctx->out, ".%s", type[cat6->type]); - fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1); - break; - case OPC_STGB: - case OPC_STIB: - fprintf(ctx->out, ".%s", cat6->stgb.typed ? "typed" : "untyped"); - fprintf(ctx->out, ".%dd", cat6->stgb.d + 1); - fprintf(ctx->out, ".%s", type[cat6->type]); - fprintf(ctx->out, ".%d", cat6->stgb.type_size + 1); - break; - case OPC_ATOMIC_ADD: - case OPC_ATOMIC_SUB: - case OPC_ATOMIC_XCHG: - case OPC_ATOMIC_INC: - case OPC_ATOMIC_DEC: - case OPC_ATOMIC_CMPXCHG: - case OPC_ATOMIC_MIN: - case OPC_ATOMIC_MAX: - case OPC_ATOMIC_AND: - case OPC_ATOMIC_OR: - case OPC_ATOMIC_XOR: - ss = cat6->g ? 'g' : 'l'; - fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped"); - fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1); - fprintf(ctx->out, ".%s", type[cat6->type]); - fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1); - fprintf(ctx->out, ".%c", ss); - break; - default: - dst.im = cat6->g && !cat6->dst_off; - fprintf(ctx->out, ".%s", type[cat6->type]); - break; - } - fprintf(ctx->out, " "); - - switch (_OPC(6, cat6->opc)) { - case OPC_STG: - sd = 'g'; - break; - case OPC_STP: - sd = 'p'; - break; - case OPC_STL: - case OPC_STLW: - sd = 'l'; - break; - - case OPC_LDG: - case OPC_LDC: - ss = 'g'; - break; - case OPC_LDP: - ss = 'p'; - break; - case OPC_LDL: - case OPC_LDLW: - case OPC_LDLV: - ss = 'l'; - break; - - case OPC_L2G: - ss = 'l'; - sd = 'g'; - break; - - case OPC_G2L: - ss = 'g'; - sd = 'l'; - break; - - case OPC_PREFETCH: - ss = 'g'; - nodst = true; - break; - } - - if ((_OPC(6, cat6->opc) == OPC_STGB) || (_OPC(6, cat6->opc) == OPC_STIB)) { - struct reginfo src3; - - memset(&src3, 0, sizeof(src3)); - - src1.reg = (reg_t)(cat6->stgb.src1); - src2.reg = (reg_t)(cat6->stgb.src2); - src2.im = cat6->stgb.src2_im; - src3.reg = (reg_t)(cat6->stgb.src3); - src3.im = cat6->stgb.src3_im; - src3.full = true; - - fprintf(ctx->out, "g[%u], ", cat6->stgb.dst_ssbo); - print_src(ctx, &src1); - fprintf(ctx->out, ", "); - print_src(ctx, &src2); - fprintf(ctx->out, ", "); - print_src(ctx, &src3); - - if (debug & PRINT_VERBOSE) - fprintf(ctx->out, " (pad0=%x, pad3=%x)", cat6->stgb.pad0, cat6->stgb.pad3); - - return; - } - - if (is_atomic(_OPC(6, cat6->opc))) { - - src1.reg = (reg_t)(cat6->ldgb.src1); - src1.im = cat6->ldgb.src1_im; - src2.reg = (reg_t)(cat6->ldgb.src2); - src2.im = cat6->ldgb.src2_im; - dst.reg = (reg_t)(cat6->ldgb.dst); - - print_src(ctx, &dst); - fprintf(ctx->out, ", "); - if (ss == 'g') { - struct reginfo src3; - memset(&src3, 0, sizeof(src3)); - - src3.reg = (reg_t)(cat6->ldgb.src3); - src3.full = true; - - /* For images, the ".typed" variant is used and src2 is - * the ivecN coordinates, ie ivec2 for 2d. - * - * For SSBOs, the ".untyped" variant is used and src2 is - * a simple dword offset.. src3 appears to be - * uvec2(offset * 4, 0). Not sure the point of that. - */ - - fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo); - print_src(ctx, &src1); /* value */ - fprintf(ctx->out, ", "); - print_src(ctx, &src2); /* offset/coords */ - fprintf(ctx->out, ", "); - print_src(ctx, &src3); /* 64b byte offset.. */ - - if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, - cat6->ldgb.pad3, cat6->ldgb.mustbe0); - } - } else { /* ss == 'l' */ - fprintf(ctx->out, "l["); - print_src(ctx, &src1); /* simple byte offset */ - fprintf(ctx->out, "], "); - print_src(ctx, &src2); /* value */ - - if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, " (src3=%x, pad0=%x, pad3=%x, mustbe0=%x)", - cat6->ldgb.src3, cat6->ldgb.pad0, - cat6->ldgb.pad3, cat6->ldgb.mustbe0); - } - } - - return; - } else if (_OPC(6, cat6->opc) == OPC_RESINFO) { - dst.reg = (reg_t)(cat6->ldgb.dst); - - print_src(ctx, &dst); - fprintf(ctx->out, ", "); - fprintf(ctx->out, "g[%u]", cat6->ldgb.src_ssbo); - - return; - } else if (_OPC(6, cat6->opc) == OPC_LDGB) { - - src1.reg = (reg_t)(cat6->ldgb.src1); - src1.im = cat6->ldgb.src1_im; - src2.reg = (reg_t)(cat6->ldgb.src2); - src2.im = cat6->ldgb.src2_im; - dst.reg = (reg_t)(cat6->ldgb.dst); - - print_src(ctx, &dst); - fprintf(ctx->out, ", "); - fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo); - print_src(ctx, &src1); - fprintf(ctx->out, ", "); - print_src(ctx, &src2); - - if (debug & PRINT_VERBOSE) - fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, cat6->ldgb.pad3, cat6->ldgb.mustbe0); - - return; - } else if (_OPC(6, cat6->opc) == OPC_LDG && cat6->a.src1_im && cat6->a.src2_im) { - struct reginfo src3; - - memset(&src3, 0, sizeof(src3)); - src1.reg = (reg_t)(cat6->a.src1); - src2.reg = (reg_t)(cat6->a.src2); - src2.im = cat6->a.src2_im; - src3.reg = (reg_t)(cat6->a.off); - src3.full = true; - dst.reg = (reg_t)(cat6->d.dst); - - print_src(ctx, &dst); - fprintf(ctx->out, ", g["); - print_src(ctx, &src1); - fprintf(ctx->out, "+"); - print_src(ctx, &src3); - fprintf(ctx->out, "], "); - print_src(ctx, &src2); - - return; - } - if (cat6->dst_off) { - dst.reg = (reg_t)(cat6->c.dst); - dstoff = cat6->c.off; - } else { - dst.reg = (reg_t)(cat6->d.dst); - } - - if (cat6->src_off) { - src1.reg = (reg_t)(cat6->a.src1); - src1.im = cat6->a.src1_im; - src2.reg = (reg_t)(cat6->a.src2); - src2.im = cat6->a.src2_im; - src1off = cat6->a.off; - } else { - src1.reg = (reg_t)(cat6->b.src1); - src1.im = cat6->b.src1_im; - src2.reg = (reg_t)(cat6->b.src2); - src2.im = cat6->b.src2_im; - } - - if (!nodst) { - if (sd) - fprintf(ctx->out, "%c[", sd); - /* note: dst might actually be a src (ie. address to store to) */ - print_src(ctx, &dst); - if (cat6->dst_off && cat6->g) { - struct reginfo dstoff_reg = {0}; - dstoff_reg.reg = (reg_t) cat6->c.off; - dstoff_reg.full = true; - fprintf(ctx->out, "+"); - print_src(ctx, &dstoff_reg); - } else if (dstoff) - fprintf(ctx->out, "%+d", dstoff); - if (sd) - fprintf(ctx->out, "]"); - fprintf(ctx->out, ", "); - } - - if (ss) - fprintf(ctx->out, "%c[", ss); - - /* can have a larger than normal immed, so hack: */ - if (src1.im) { - fprintf(ctx->out, "%u", src1.reg.dummy13); - } else { - print_src(ctx, &src1); - } - - if (cat6->src_off && cat6->g) - print_src(ctx, &src2); - else if (src1off) - fprintf(ctx->out, "%+d", src1off); - if (ss) - fprintf(ctx->out, "]"); - - switch (_OPC(6, cat6->opc)) { - case OPC_RESINFO: - case OPC_RESFMT: - break; - default: - fprintf(ctx->out, ", "); - print_src(ctx, &src2); - break; - } -} - -static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx; - struct reginfo src1, src2, ssbo; - bool uses_type = _OPC(6, cat6->opc) != OPC_LDC; - - static const struct { - bool indirect; - bool bindless; - const char *name; - } desc_features[8] = { - [CAT6_IMM] = { - .name = "imm" - }, - [CAT6_UNIFORM] = { - .indirect = true, - .name = "uniform" - }, - [CAT6_NONUNIFORM] = { - .indirect = true, - .name = "nonuniform" - }, - [CAT6_BINDLESS_IMM] = { - .bindless = true, - .name = "imm" - }, - [CAT6_BINDLESS_UNIFORM] = { - .bindless = true, - .indirect = true, - .name = "uniform" - }, - [CAT6_BINDLESS_NONUNIFORM] = { - .bindless = true, - .indirect = true, - .name = "nonuniform" - }, - }; - - bool indirect_ssbo = desc_features[cat6->desc_mode].indirect; - bool bindless = desc_features[cat6->desc_mode].bindless; - bool type_full = cat6->type != TYPE_U16; - - - memset(&src1, 0, sizeof(src1)); - memset(&src2, 0, sizeof(src2)); - memset(&ssbo, 0, sizeof(ssbo)); - - if (uses_type) { - fprintf(ctx->out, ".%s", cat6->typed ? "typed" : "untyped"); - fprintf(ctx->out, ".%dd", cat6->d + 1); - fprintf(ctx->out, ".%s", type[cat6->type]); - } else { - fprintf(ctx->out, ".offset%d", cat6->d); - } - fprintf(ctx->out, ".%u", cat6->type_size + 1); - - fprintf(ctx->out, ".%s", desc_features[cat6->desc_mode].name); - if (bindless) - fprintf(ctx->out, ".base%d", cat6->base); - fprintf(ctx->out, " "); - - src2.reg = (reg_t)(cat6->src2); - src2.full = type_full; - print_src(ctx, &src2); - fprintf(ctx->out, ", "); - - src1.reg = (reg_t)(cat6->src1); - src1.full = true; // XXX - print_src(ctx, &src1); - fprintf(ctx->out, ", "); - ssbo.reg = (reg_t)(cat6->ssbo); - ssbo.im = !indirect_ssbo; - ssbo.full = true; - print_src(ctx, &ssbo); - - if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x, pad5=%x)", - cat6->pad1, cat6->pad2, cat6->pad3, cat6->pad4, cat6->pad5); - } -} - -static void print_instr_cat6(struct disasm_ctx *ctx, instr_t *instr) -{ - if (!is_cat6_legacy(instr, ctx->gpu_id)) { - print_instr_cat6_a6xx(ctx, instr); - if (debug & PRINT_VERBOSE) - fprintf(ctx->out, " NEW"); - } else { - print_instr_cat6_a3xx(ctx, instr); - if (debug & PRINT_VERBOSE) - fprintf(ctx->out, " LEGACY"); - } -} -static void print_instr_cat7(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat7_t *cat7 = &instr->cat7; - - if (cat7->g) - fprintf(ctx->out, ".g"); - if (cat7->l) - fprintf(ctx->out, ".l"); - - if (_OPC(7, cat7->opc) == OPC_FENCE) { - if (cat7->r) - fprintf(ctx->out, ".r"); - if (cat7->w) - fprintf(ctx->out, ".w"); - } -} - -/* size of largest OPC field of all the instruction categories: */ -#define NOPC_BITS 6 - -static const struct opc_info { - uint16_t cat; - uint16_t opc; - const char *name; - void (*print)(struct disasm_ctx *ctx, instr_t *instr); -} opcs[1 << (3+NOPC_BITS)] = { -#define OPC(cat, opc, name) [(opc)] = { (cat), (opc), #name, print_instr_cat##cat } - /* category 0: */ - OPC(0, OPC_NOP, nop), - OPC(0, OPC_B, b), - OPC(0, OPC_JUMP, jump), - OPC(0, OPC_CALL, call), - OPC(0, OPC_RET, ret), - OPC(0, OPC_KILL, kill), - OPC(0, OPC_END, end), - OPC(0, OPC_EMIT, emit), - OPC(0, OPC_CUT, cut), - OPC(0, OPC_CHMASK, chmask), - OPC(0, OPC_CHSH, chsh), - OPC(0, OPC_FLOW_REV, flow_rev), - OPC(0, OPC_PREDT, predt), - OPC(0, OPC_PREDF, predf), - OPC(0, OPC_PREDE, prede), - OPC(0, OPC_BKT, bkt), - OPC(0, OPC_STKS, stks), - OPC(0, OPC_STKR, stkr), - OPC(0, OPC_XSET, xset), - OPC(0, OPC_XCLR, xclr), - OPC(0, OPC_GETONE, getone), - OPC(0, OPC_DBG, dbg), - OPC(0, OPC_SHPS, shps), - OPC(0, OPC_SHPE, shpe), - - /* category 1: */ - OPC(1, OPC_MOV, ), - - /* category 2: */ - OPC(2, OPC_ADD_F, add.f), - OPC(2, OPC_MIN_F, min.f), - OPC(2, OPC_MAX_F, max.f), - OPC(2, OPC_MUL_F, mul.f), - OPC(2, OPC_SIGN_F, sign.f), - OPC(2, OPC_CMPS_F, cmps.f), - OPC(2, OPC_ABSNEG_F, absneg.f), - OPC(2, OPC_CMPV_F, cmpv.f), - OPC(2, OPC_FLOOR_F, floor.f), - OPC(2, OPC_CEIL_F, ceil.f), - OPC(2, OPC_RNDNE_F, rndne.f), - OPC(2, OPC_RNDAZ_F, rndaz.f), - OPC(2, OPC_TRUNC_F, trunc.f), - OPC(2, OPC_ADD_U, add.u), - OPC(2, OPC_ADD_S, add.s), - OPC(2, OPC_SUB_U, sub.u), - OPC(2, OPC_SUB_S, sub.s), - OPC(2, OPC_CMPS_U, cmps.u), - OPC(2, OPC_CMPS_S, cmps.s), - OPC(2, OPC_MIN_U, min.u), - OPC(2, OPC_MIN_S, min.s), - OPC(2, OPC_MAX_U, max.u), - OPC(2, OPC_MAX_S, max.s), - OPC(2, OPC_ABSNEG_S, absneg.s), - OPC(2, OPC_AND_B, and.b), - OPC(2, OPC_OR_B, or.b), - OPC(2, OPC_NOT_B, not.b), - OPC(2, OPC_XOR_B, xor.b), - OPC(2, OPC_CMPV_U, cmpv.u), - OPC(2, OPC_CMPV_S, cmpv.s), - OPC(2, OPC_MUL_U24, mul.u24), - OPC(2, OPC_MUL_S24, mul.s24), - OPC(2, OPC_MULL_U, mull.u), - OPC(2, OPC_BFREV_B, bfrev.b), - OPC(2, OPC_CLZ_S, clz.s), - OPC(2, OPC_CLZ_B, clz.b), - OPC(2, OPC_SHL_B, shl.b), - OPC(2, OPC_SHR_B, shr.b), - OPC(2, OPC_ASHR_B, ashr.b), - OPC(2, OPC_BARY_F, bary.f), - OPC(2, OPC_MGEN_B, mgen.b), - OPC(2, OPC_GETBIT_B, getbit.b), - OPC(2, OPC_SETRM, setrm), - OPC(2, OPC_CBITS_B, cbits.b), - OPC(2, OPC_SHB, shb), - OPC(2, OPC_MSAD, msad), - - /* category 3: */ - OPC(3, OPC_MAD_U16, mad.u16), - OPC(3, OPC_MADSH_U16, madsh.u16), - OPC(3, OPC_MAD_S16, mad.s16), - OPC(3, OPC_MADSH_M16, madsh.m16), - OPC(3, OPC_MAD_U24, mad.u24), - OPC(3, OPC_MAD_S24, mad.s24), - OPC(3, OPC_MAD_F16, mad.f16), - OPC(3, OPC_MAD_F32, mad.f32), - OPC(3, OPC_SEL_B16, sel.b16), - OPC(3, OPC_SEL_B32, sel.b32), - OPC(3, OPC_SEL_S16, sel.s16), - OPC(3, OPC_SEL_S32, sel.s32), - OPC(3, OPC_SEL_F16, sel.f16), - OPC(3, OPC_SEL_F32, sel.f32), - OPC(3, OPC_SAD_S16, sad.s16), - OPC(3, OPC_SAD_S32, sad.s32), - - /* category 4: */ - OPC(4, OPC_RCP, rcp), - OPC(4, OPC_RSQ, rsq), - OPC(4, OPC_LOG2, log2), - OPC(4, OPC_EXP2, exp2), - OPC(4, OPC_SIN, sin), - OPC(4, OPC_COS, cos), - OPC(4, OPC_SQRT, sqrt), - OPC(4, OPC_HRSQ, hrsq), - OPC(4, OPC_HLOG2, hlog2), - OPC(4, OPC_HEXP2, hexp2), - - /* category 5: */ - OPC(5, OPC_ISAM, isam), - OPC(5, OPC_ISAML, isaml), - OPC(5, OPC_ISAMM, isamm), - OPC(5, OPC_SAM, sam), - OPC(5, OPC_SAMB, samb), - OPC(5, OPC_SAML, saml), - OPC(5, OPC_SAMGQ, samgq), - OPC(5, OPC_GETLOD, getlod), - OPC(5, OPC_CONV, conv), - OPC(5, OPC_CONVM, convm), - OPC(5, OPC_GETSIZE, getsize), - OPC(5, OPC_GETBUF, getbuf), - OPC(5, OPC_GETPOS, getpos), - OPC(5, OPC_GETINFO, getinfo), - OPC(5, OPC_DSX, dsx), - OPC(5, OPC_DSY, dsy), - OPC(5, OPC_GATHER4R, gather4r), - OPC(5, OPC_GATHER4G, gather4g), - OPC(5, OPC_GATHER4B, gather4b), - OPC(5, OPC_GATHER4A, gather4a), - OPC(5, OPC_SAMGP0, samgp0), - OPC(5, OPC_SAMGP1, samgp1), - OPC(5, OPC_SAMGP2, samgp2), - OPC(5, OPC_SAMGP3, samgp3), - OPC(5, OPC_DSXPP_1, dsxpp.1), - OPC(5, OPC_DSYPP_1, dsypp.1), - OPC(5, OPC_RGETPOS, rgetpos), - OPC(5, OPC_RGETINFO, rgetinfo), - - - /* category 6: */ - OPC(6, OPC_LDG, ldg), - OPC(6, OPC_LDL, ldl), - OPC(6, OPC_LDP, ldp), - OPC(6, OPC_STG, stg), - OPC(6, OPC_STL, stl), - OPC(6, OPC_STP, stp), - OPC(6, OPC_LDIB, ldib), - OPC(6, OPC_G2L, g2l), - OPC(6, OPC_L2G, l2g), - OPC(6, OPC_PREFETCH, prefetch), - OPC(6, OPC_LDLW, ldlw), - OPC(6, OPC_STLW, stlw), - OPC(6, OPC_RESFMT, resfmt), - OPC(6, OPC_RESINFO, resinfo), - OPC(6, OPC_ATOMIC_ADD, atomic.add), - OPC(6, OPC_ATOMIC_SUB, atomic.sub), - OPC(6, OPC_ATOMIC_XCHG, atomic.xchg), - OPC(6, OPC_ATOMIC_INC, atomic.inc), - OPC(6, OPC_ATOMIC_DEC, atomic.dec), - OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg), - OPC(6, OPC_ATOMIC_MIN, atomic.min), - OPC(6, OPC_ATOMIC_MAX, atomic.max), - OPC(6, OPC_ATOMIC_AND, atomic.and), - OPC(6, OPC_ATOMIC_OR, atomic.or), - OPC(6, OPC_ATOMIC_XOR, atomic.xor), - OPC(6, OPC_LDGB, ldgb), - OPC(6, OPC_STGB, stgb), - OPC(6, OPC_STIB, stib), - OPC(6, OPC_LDC, ldc), - OPC(6, OPC_LDLV, ldlv), - - OPC(7, OPC_BAR, bar), - OPC(7, OPC_FENCE, fence), - - -#undef OPC -}; - -#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)])) - -static void print_single_instr(struct disasm_ctx *ctx, instr_t *instr) -{ - const char *name = GETINFO(instr)->name; - uint32_t opc = instr_opc(instr, ctx->gpu_id); - - if (name) { - fprintf(ctx->out, "%s", name); - GETINFO(instr)->print(ctx, instr); - } else { - fprintf(ctx->out, "unknown(%d,%d)", instr->opc_cat, opc); - - switch (instr->opc_cat) { - case 0: print_instr_cat0(ctx, instr); break; - case 1: print_instr_cat1(ctx, instr); break; - case 2: print_instr_cat2(ctx, instr); break; - case 3: print_instr_cat3(ctx, instr); break; - case 4: print_instr_cat4(ctx, instr); break; - case 5: print_instr_cat5(ctx, instr); break; - case 6: print_instr_cat6(ctx, instr); break; - case 7: print_instr_cat7(ctx, instr); break; - } - } -} - -static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n) -{ - instr_t *instr = (instr_t *)dwords; - uint32_t opc = instr_opc(instr, ctx->gpu_id); - unsigned nop = 0; - unsigned cycles = ctx->stats->instructions; - - fprintf(ctx->out, "%s:%d:%04d:%04d[%08xx_%08xx] ", levels[ctx->level], - instr->opc_cat, n, cycles++, dwords[1], dwords[0]); - -#if 0 - /* print unknown bits: */ - if (debug & PRINT_RAW) - fprintf(ctx->out, "[%08xx_%08xx] ", dwords[1] & 0x001ff800, dwords[0] & 0x00000000); - - if (debug & PRINT_VERBOSE) - fprintf(ctx->out, "%d,%02d ", instr->opc_cat, opc); -#endif - - /* NOTE: order flags are printed is a bit fugly.. but for now I - * try to match the order in llvm-a3xx disassembler for easy - * diff'ing.. - */ - - ctx->repeat = instr_repeat(instr); - ctx->stats->instructions += 1 + ctx->repeat; - ctx->stats->instlen++; - - if (instr->sync) { - fprintf(ctx->out, "(sy)"); - ctx->stats->sy++; - } - if (instr->ss && ((instr->opc_cat <= 4) || (instr->opc_cat == 7))) { - fprintf(ctx->out, "(ss)"); - ctx->stats->ss++; - } - if (instr->jmp_tgt) - fprintf(ctx->out, "(jp)"); - if ((instr->opc_cat == 0) && instr->cat0.eq) - fprintf(ctx->out, "(eq)"); - if (instr_sat(instr)) - fprintf(ctx->out, "(sat)"); - if (ctx->repeat) - fprintf(ctx->out, "(rpt%d)", ctx->repeat); - else if ((instr->opc_cat == 2) && (instr->cat2.src1_r || instr->cat2.src2_r)) - nop = (instr->cat2.src2_r * 2) + instr->cat2.src1_r; - else if ((instr->opc_cat == 3) && (instr->cat3.src1_r || instr->cat3.src2_r)) - nop = (instr->cat3.src2_r * 2) + instr->cat3.src1_r; - ctx->stats->instructions += nop; - ctx->stats->nops += nop; - if (opc == OPC_NOP) - ctx->stats->nops += 1 + ctx->repeat; - if (nop) - fprintf(ctx->out, "(nop%d) ", nop); - - if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4))) - fprintf(ctx->out, "(ul)"); - - print_single_instr(ctx, instr); - fprintf(ctx->out, "\n"); - - process_reg_dst(ctx); - - if ((instr->opc_cat <= 4) && (debug & EXPAND_REPEAT)) { - int i; - for (i = 0; i < nop; i++) { - fprintf(ctx->out, "%s:%d:%04d:%04d[ ] ", - levels[ctx->level], instr->opc_cat, n, cycles++); - fprintf(ctx->out, "nop\n"); - } - for (i = 0; i < ctx->repeat; i++) { - ctx->repeatidx = i + 1; - fprintf(ctx->out, "%s:%d:%04d:%04d[ ] ", - levels[ctx->level], instr->opc_cat, n, cycles++); - - print_single_instr(ctx, instr); - fprintf(ctx->out, "\n"); - } - ctx->repeatidx = 0; - } - - return (instr->opc_cat == 0) && - ((opc == OPC_END) || (opc == OPC_CHSH)); -} - -int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id) -{ - struct shader_stats stats; - return disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats); -} - -int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out, - unsigned gpu_id, struct shader_stats *stats) -{ - struct disasm_ctx ctx; - int i; - int nop_count = 0; - bool has_end = false; - -// ir3_assert((sizedwords % 2) == 0); - - memset(&ctx, 0, sizeof(ctx)); - ctx.out = out; - ctx.level = level; - ctx.gpu_id = gpu_id; - ctx.stats = stats; - memset(ctx.stats, 0, sizeof(*ctx.stats)); - - for (i = 0; i < sizedwords; i += 2) { - has_end |= print_instr(&ctx, &dwords[i], i/2); - if (!has_end) - continue; - if (dwords[i] == 0 && dwords[i + 1] == 0) - nop_count++; - else - nop_count = 0; - if (nop_count > 3) - break; - } - - print_reg_stats(&ctx); - - return 0; -} diff --git a/src/freedreno/decode/instr-a3xx.h b/src/freedreno/decode/instr-a3xx.h deleted file mode 100644 index 218bdc3e17c..00000000000 --- a/src/freedreno/decode/instr-a3xx.h +++ /dev/null @@ -1,1115 +0,0 @@ -/* - * Copyright (c) 2013 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef INSTR_A3XX_H_ -#define INSTR_A3XX_H_ - -#define PACKED __attribute__((__packed__)) - -#include -#include -#include - -void ir3_assert_handler(const char *expr, const char *file, int line, - const char *func) __attribute__((weak)) __attribute__ ((__noreturn__)); - -/* A wrapper for assert() that allows overriding handling of a failed - * assert. This is needed for tools like crashdec which can want to - * attempt to disassemble memory that might not actually be valid - * instructions. - */ -#define ir3_assert(expr) do { \ - if (!(expr)) { \ - if (ir3_assert_handler) { \ - ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \ - } \ - assert(expr); \ - } \ - } while (0) - -/* size of largest OPC field of all the instruction categories: */ -#define NOPC_BITS 6 - -#define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc) - -typedef enum { - /* category 0: */ - OPC_NOP = _OPC(0, 0), - OPC_B = _OPC(0, 1), - OPC_JUMP = _OPC(0, 2), - OPC_CALL = _OPC(0, 3), - OPC_RET = _OPC(0, 4), - OPC_KILL = _OPC(0, 5), - OPC_END = _OPC(0, 6), - OPC_EMIT = _OPC(0, 7), - OPC_CUT = _OPC(0, 8), - OPC_CHMASK = _OPC(0, 9), - OPC_CHSH = _OPC(0, 10), - OPC_FLOW_REV = _OPC(0, 11), - - OPC_BKT = _OPC(0, 16), - OPC_STKS = _OPC(0, 17), - OPC_STKR = _OPC(0, 18), - OPC_XSET = _OPC(0, 19), - OPC_XCLR = _OPC(0, 20), - OPC_GETONE = _OPC(0, 21), - OPC_DBG = _OPC(0, 22), - OPC_SHPS = _OPC(0, 23), /* shader prologue start */ - OPC_SHPE = _OPC(0, 24), /* shader prologue end */ - - OPC_PREDT = _OPC(0, 29), /* predicated true */ - OPC_PREDF = _OPC(0, 30), /* predicated false */ - OPC_PREDE = _OPC(0, 31), /* predicated end */ - - /* category 1: */ - OPC_MOV = _OPC(1, 0), - - /* category 2: */ - OPC_ADD_F = _OPC(2, 0), - OPC_MIN_F = _OPC(2, 1), - OPC_MAX_F = _OPC(2, 2), - OPC_MUL_F = _OPC(2, 3), - OPC_SIGN_F = _OPC(2, 4), - OPC_CMPS_F = _OPC(2, 5), - OPC_ABSNEG_F = _OPC(2, 6), - OPC_CMPV_F = _OPC(2, 7), - /* 8 - invalid */ - OPC_FLOOR_F = _OPC(2, 9), - OPC_CEIL_F = _OPC(2, 10), - OPC_RNDNE_F = _OPC(2, 11), - OPC_RNDAZ_F = _OPC(2, 12), - OPC_TRUNC_F = _OPC(2, 13), - /* 14-15 - invalid */ - OPC_ADD_U = _OPC(2, 16), - OPC_ADD_S = _OPC(2, 17), - OPC_SUB_U = _OPC(2, 18), - OPC_SUB_S = _OPC(2, 19), - OPC_CMPS_U = _OPC(2, 20), - OPC_CMPS_S = _OPC(2, 21), - OPC_MIN_U = _OPC(2, 22), - OPC_MIN_S = _OPC(2, 23), - OPC_MAX_U = _OPC(2, 24), - OPC_MAX_S = _OPC(2, 25), - OPC_ABSNEG_S = _OPC(2, 26), - /* 27 - invalid */ - OPC_AND_B = _OPC(2, 28), - OPC_OR_B = _OPC(2, 29), - OPC_NOT_B = _OPC(2, 30), - OPC_XOR_B = _OPC(2, 31), - /* 32 - invalid */ - OPC_CMPV_U = _OPC(2, 33), - OPC_CMPV_S = _OPC(2, 34), - /* 35-47 - invalid */ - OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */ - OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */ - OPC_MULL_U = _OPC(2, 50), - OPC_BFREV_B = _OPC(2, 51), - OPC_CLZ_S = _OPC(2, 52), - OPC_CLZ_B = _OPC(2, 53), - OPC_SHL_B = _OPC(2, 54), - OPC_SHR_B = _OPC(2, 55), - OPC_ASHR_B = _OPC(2, 56), - OPC_BARY_F = _OPC(2, 57), - OPC_MGEN_B = _OPC(2, 58), - OPC_GETBIT_B = _OPC(2, 59), - OPC_SETRM = _OPC(2, 60), - OPC_CBITS_B = _OPC(2, 61), - OPC_SHB = _OPC(2, 62), - OPC_MSAD = _OPC(2, 63), - - /* category 3: */ - OPC_MAD_U16 = _OPC(3, 0), - OPC_MADSH_U16 = _OPC(3, 1), - OPC_MAD_S16 = _OPC(3, 2), - OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */ - OPC_MAD_U24 = _OPC(3, 4), - OPC_MAD_S24 = _OPC(3, 5), - OPC_MAD_F16 = _OPC(3, 6), - OPC_MAD_F32 = _OPC(3, 7), - OPC_SEL_B16 = _OPC(3, 8), - OPC_SEL_B32 = _OPC(3, 9), - OPC_SEL_S16 = _OPC(3, 10), - OPC_SEL_S32 = _OPC(3, 11), - OPC_SEL_F16 = _OPC(3, 12), - OPC_SEL_F32 = _OPC(3, 13), - OPC_SAD_S16 = _OPC(3, 14), - OPC_SAD_S32 = _OPC(3, 15), - - /* category 4: */ - OPC_RCP = _OPC(4, 0), - OPC_RSQ = _OPC(4, 1), - OPC_LOG2 = _OPC(4, 2), - OPC_EXP2 = _OPC(4, 3), - OPC_SIN = _OPC(4, 4), - OPC_COS = _OPC(4, 5), - OPC_SQRT = _OPC(4, 6), - /* NOTE that these are 8+opc from their highp equivs, so it's possible - * that the high order bit in the opc field has been repurposed for - * half-precision use? But note that other ops (rcp/lsin/cos/sqrt) - * still use the same opc as highp - */ - OPC_HRSQ = _OPC(4, 9), - OPC_HLOG2 = _OPC(4, 10), - OPC_HEXP2 = _OPC(4, 11), - - /* category 5: */ - OPC_ISAM = _OPC(5, 0), - OPC_ISAML = _OPC(5, 1), - OPC_ISAMM = _OPC(5, 2), - OPC_SAM = _OPC(5, 3), - OPC_SAMB = _OPC(5, 4), - OPC_SAML = _OPC(5, 5), - OPC_SAMGQ = _OPC(5, 6), - OPC_GETLOD = _OPC(5, 7), - OPC_CONV = _OPC(5, 8), - OPC_CONVM = _OPC(5, 9), - OPC_GETSIZE = _OPC(5, 10), - OPC_GETBUF = _OPC(5, 11), - OPC_GETPOS = _OPC(5, 12), - OPC_GETINFO = _OPC(5, 13), - OPC_DSX = _OPC(5, 14), - OPC_DSY = _OPC(5, 15), - OPC_GATHER4R = _OPC(5, 16), - OPC_GATHER4G = _OPC(5, 17), - OPC_GATHER4B = _OPC(5, 18), - OPC_GATHER4A = _OPC(5, 19), - OPC_SAMGP0 = _OPC(5, 20), - OPC_SAMGP1 = _OPC(5, 21), - OPC_SAMGP2 = _OPC(5, 22), - OPC_SAMGP3 = _OPC(5, 23), - OPC_DSXPP_1 = _OPC(5, 24), - OPC_DSYPP_1 = _OPC(5, 25), - OPC_RGETPOS = _OPC(5, 26), - OPC_RGETINFO = _OPC(5, 27), - - /* category 6: */ - OPC_LDG = _OPC(6, 0), /* load-global */ - OPC_LDL = _OPC(6, 1), - OPC_LDP = _OPC(6, 2), - OPC_STG = _OPC(6, 3), /* store-global */ - OPC_STL = _OPC(6, 4), - OPC_STP = _OPC(6, 5), - OPC_LDIB = _OPC(6, 6), - OPC_G2L = _OPC(6, 7), - OPC_L2G = _OPC(6, 8), - OPC_PREFETCH = _OPC(6, 9), - OPC_LDLW = _OPC(6, 10), - OPC_STLW = _OPC(6, 11), - OPC_RESFMT = _OPC(6, 14), - OPC_RESINFO = _OPC(6, 15), - OPC_ATOMIC_ADD = _OPC(6, 16), - OPC_ATOMIC_SUB = _OPC(6, 17), - OPC_ATOMIC_XCHG = _OPC(6, 18), - OPC_ATOMIC_INC = _OPC(6, 19), - OPC_ATOMIC_DEC = _OPC(6, 20), - OPC_ATOMIC_CMPXCHG = _OPC(6, 21), - OPC_ATOMIC_MIN = _OPC(6, 22), - OPC_ATOMIC_MAX = _OPC(6, 23), - OPC_ATOMIC_AND = _OPC(6, 24), - OPC_ATOMIC_OR = _OPC(6, 25), - OPC_ATOMIC_XOR = _OPC(6, 26), - OPC_LDGB = _OPC(6, 27), - OPC_STGB = _OPC(6, 28), - OPC_STIB = _OPC(6, 29), - OPC_LDC = _OPC(6, 30), - OPC_LDLV = _OPC(6, 31), - - /* category 7: */ - OPC_BAR = _OPC(7, 0), - OPC_FENCE = _OPC(7, 1), -} opc_t; - -#define opc_cat(opc) ((int)((opc) >> NOPC_BITS)) -#define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1))) - -typedef enum { - TYPE_F16 = 0, - TYPE_F32 = 1, - TYPE_U16 = 2, - TYPE_U32 = 3, - TYPE_S16 = 4, - TYPE_S32 = 5, - TYPE_U8 = 6, - TYPE_S8 = 7, // XXX I assume? -} type_t; - -static inline uint32_t type_size(type_t type) -{ - switch (type) { - case TYPE_F32: - case TYPE_U32: - case TYPE_S32: - return 32; - case TYPE_F16: - case TYPE_U16: - case TYPE_S16: - return 16; - case TYPE_U8: - case TYPE_S8: - return 8; - default: - ir3_assert(0); /* invalid type */ - return 0; - } -} - -static inline int type_float(type_t type) -{ - return (type == TYPE_F32) || (type == TYPE_F16); -} - -static inline int type_uint(type_t type) -{ - return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8); -} - -static inline int type_sint(type_t type) -{ - return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8); -} - -typedef union PACKED { - /* normal gpr or const src register: */ - struct PACKED { - uint32_t comp : 2; - uint32_t num : 10; - }; - /* for immediate val: */ - int32_t iim_val : 11; - /* to make compiler happy: */ - uint32_t dummy32; - uint32_t dummy10 : 10; - int32_t idummy10 : 10; - uint32_t dummy11 : 11; - uint32_t dummy12 : 12; - uint32_t dummy13 : 13; - uint32_t dummy8 : 8; - int32_t idummy13 : 13; - int32_t idummy8 : 8; -} reg_t; - -/* special registers: */ -#define REG_A0 61 /* address register */ -#define REG_P0 62 /* predicate register */ - -static inline int reg_special(reg_t reg) -{ - return (reg.num == REG_A0) || (reg.num == REG_P0); -} - -typedef enum { - BRANCH_PLAIN = 0, /* br */ - BRANCH_OR = 1, /* brao */ - BRANCH_AND = 2, /* braa */ - BRANCH_CONST = 3, /* brac */ - BRANCH_ANY = 4, /* bany */ - BRANCH_ALL = 5, /* ball */ - BRANCH_X = 6, /* brax ??? */ -} brtype_t; - -typedef struct PACKED { - /* dword0: */ - union PACKED { - struct PACKED { - int16_t immed : 16; - uint32_t dummy1 : 16; - } a3xx; - struct PACKED { - int32_t immed : 20; - uint32_t dummy1 : 12; - } a4xx; - struct PACKED { - int32_t immed : 32; - } a5xx; - }; - - /* dword1: */ - uint32_t idx : 5; /* brac.N index */ - uint32_t brtype : 3; /* branch type, see brtype_t */ - uint32_t repeat : 3; - uint32_t dummy3 : 1; - uint32_t ss : 1; - uint32_t inv1 : 1; - uint32_t comp1 : 2; - uint32_t eq : 1; - uint32_t opc_hi : 1; /* at least one bit */ - uint32_t dummy4 : 2; - uint32_t inv0 : 1; - uint32_t comp0 : 2; /* component for first src */ - uint32_t opc : 4; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat0_t; - -typedef struct PACKED { - /* dword0: */ - union PACKED { - /* for normal src register: */ - struct PACKED { - uint32_t src : 11; - /* at least low bit of pad must be zero or it will - * look like a address relative src - */ - uint32_t pad : 21; - }; - /* for address relative: */ - struct PACKED { - int32_t off : 10; - uint32_t src_rel_c : 1; - uint32_t src_rel : 1; - uint32_t unknown : 20; - }; - /* for immediate: */ - int32_t iim_val; - uint32_t uim_val; - float fim_val; - }; - - /* dword1: */ - uint32_t dst : 8; - uint32_t repeat : 3; - uint32_t src_r : 1; - uint32_t ss : 1; - uint32_t ul : 1; - uint32_t dst_type : 3; - uint32_t dst_rel : 1; - uint32_t src_type : 3; - uint32_t src_c : 1; - uint32_t src_im : 1; - uint32_t even : 1; - uint32_t pos_inf : 1; - uint32_t must_be_0 : 2; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat1_t; - -typedef struct PACKED { - /* dword0: */ - union PACKED { - struct PACKED { - uint32_t src1 : 11; - uint32_t must_be_zero1: 2; - uint32_t src1_im : 1; /* immediate */ - uint32_t src1_neg : 1; /* negate */ - uint32_t src1_abs : 1; /* absolute value */ - }; - struct PACKED { - uint32_t src1 : 10; - uint32_t src1_c : 1; /* relative-const */ - uint32_t src1_rel : 1; /* relative address */ - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel1; - struct PACKED { - uint32_t src1 : 12; - uint32_t src1_c : 1; /* const */ - uint32_t dummy : 3; - } c1; - }; - - union PACKED { - struct PACKED { - uint32_t src2 : 11; - uint32_t must_be_zero2: 2; - uint32_t src2_im : 1; /* immediate */ - uint32_t src2_neg : 1; /* negate */ - uint32_t src2_abs : 1; /* absolute value */ - }; - struct PACKED { - uint32_t src2 : 10; - uint32_t src2_c : 1; /* relative-const */ - uint32_t src2_rel : 1; /* relative address */ - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel2; - struct PACKED { - uint32_t src2 : 12; - uint32_t src2_c : 1; /* const */ - uint32_t dummy : 3; - } c2; - }; - - /* dword1: */ - uint32_t dst : 8; - uint32_t repeat : 2; - uint32_t sat : 1; - uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */ - uint32_t ss : 1; - uint32_t ul : 1; /* dunno */ - uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ - uint32_t ei : 1; - uint32_t cond : 3; - uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */ - uint32_t full : 1; /* not half */ - uint32_t opc : 6; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat2_t; - -typedef struct PACKED { - /* dword0: */ - union PACKED { - struct PACKED { - uint32_t src1 : 11; - uint32_t must_be_zero1: 2; - uint32_t src2_c : 1; - uint32_t src1_neg : 1; - uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */ - }; - struct PACKED { - uint32_t src1 : 10; - uint32_t src1_c : 1; - uint32_t src1_rel : 1; - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel1; - struct PACKED { - uint32_t src1 : 12; - uint32_t src1_c : 1; - uint32_t dummy : 3; - } c1; - }; - - union PACKED { - struct PACKED { - uint32_t src3 : 11; - uint32_t must_be_zero2: 2; - uint32_t src3_r : 1; - uint32_t src2_neg : 1; - uint32_t src3_neg : 1; - }; - struct PACKED { - uint32_t src3 : 10; - uint32_t src3_c : 1; - uint32_t src3_rel : 1; - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel2; - struct PACKED { - uint32_t src3 : 12; - uint32_t src3_c : 1; - uint32_t dummy : 3; - } c2; - }; - - /* dword1: */ - uint32_t dst : 8; - uint32_t repeat : 2; - uint32_t sat : 1; - uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */ - uint32_t ss : 1; - uint32_t ul : 1; - uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ - uint32_t src2 : 8; - uint32_t opc : 4; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat3_t; - -static inline bool instr_cat3_full(instr_cat3_t *cat3) -{ - switch (_OPC(3, cat3->opc)) { - case OPC_MAD_F16: - case OPC_MAD_U16: - case OPC_MAD_S16: - case OPC_SEL_B16: - case OPC_SEL_S16: - case OPC_SEL_F16: - case OPC_SAD_S16: - case OPC_SAD_S32: // really?? - return false; - default: - return true; - } -} - -typedef struct PACKED { - /* dword0: */ - union PACKED { - struct PACKED { - uint32_t src : 11; - uint32_t must_be_zero1: 2; - uint32_t src_im : 1; /* immediate */ - uint32_t src_neg : 1; /* negate */ - uint32_t src_abs : 1; /* absolute value */ - }; - struct PACKED { - uint32_t src : 10; - uint32_t src_c : 1; /* relative-const */ - uint32_t src_rel : 1; /* relative address */ - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel; - struct PACKED { - uint32_t src : 12; - uint32_t src_c : 1; /* const */ - uint32_t dummy : 3; - } c; - }; - uint32_t dummy1 : 16; /* seem to be ignored */ - - /* dword1: */ - uint32_t dst : 8; - uint32_t repeat : 2; - uint32_t sat : 1; - uint32_t src_r : 1; - uint32_t ss : 1; - uint32_t ul : 1; - uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ - uint32_t dummy2 : 5; /* seem to be ignored */ - uint32_t full : 1; /* not half */ - uint32_t opc : 6; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat4_t; - -/* With is_bindless_s2en = 1, this determines whether bindless is enabled and - * if so, how to get the (base, index) pair for both sampler and texture. - * There is a single base embedded in the instruction, which is always used - * for the texture. - */ -typedef enum { - /* Use traditional GL binding model, get texture and sampler index - * from src3 which is not presumed to be uniform. This is - * backwards-compatible with earlier generations, where this field was - * always 0 and nonuniform-indexed sampling always worked. - */ - CAT5_NONUNIFORM = 0, - - /* The sampler base comes from the low 3 bits of a1.x, and the sampler - * and texture index come from src3 which is presumed to be uniform. - */ - CAT5_BINDLESS_A1_UNIFORM = 1, - - /* The texture and sampler share the same base, and the sampler and - * texture index come from src3 which is *not* presumed to be uniform. - */ - CAT5_BINDLESS_NONUNIFORM = 2, - - /* The sampler base comes from the low 3 bits of a1.x, and the sampler - * and texture index come from src3 which is *not* presumed to be - * uniform. - */ - CAT5_BINDLESS_A1_NONUNIFORM = 3, - - /* Use traditional GL binding model, get texture and sampler index - * from src3 which is presumed to be uniform. - */ - CAT5_UNIFORM = 4, - - /* The texture and sampler share the same base, and the sampler and - * texture index come from src3 which is presumed to be uniform. - */ - CAT5_BINDLESS_UNIFORM = 5, - - /* The texture and sampler share the same base, get sampler index from low - * 4 bits of src3 and texture index from high 4 bits. - */ - CAT5_BINDLESS_IMM = 6, - - /* The sampler base comes from the low 3 bits of a1.x, and the texture - * index comes from the next 8 bits of a1.x. The sampler index is an - * immediate in src3. - */ - CAT5_BINDLESS_A1_IMM = 7, -} cat5_desc_mode_t; - -typedef struct PACKED { - /* dword0: */ - union PACKED { - /* normal case: */ - struct PACKED { - uint32_t full : 1; /* not half */ - uint32_t src1 : 8; - uint32_t src2 : 8; - uint32_t dummy1 : 4; /* seem to be ignored */ - uint32_t samp : 4; - uint32_t tex : 7; - } norm; - /* s2en case: */ - struct PACKED { - uint32_t full : 1; /* not half */ - uint32_t src1 : 8; - uint32_t src2 : 8; - uint32_t dummy1 : 2; - uint32_t base_hi : 2; - uint32_t src3 : 8; - uint32_t desc_mode : 3; - } s2en_bindless; - /* same in either case: */ - // XXX I think, confirm this - struct PACKED { - uint32_t full : 1; /* not half */ - uint32_t src1 : 8; - uint32_t src2 : 8; - uint32_t pad : 15; - }; - }; - - /* dword1: */ - uint32_t dst : 8; - uint32_t wrmask : 4; /* write-mask */ - uint32_t type : 3; - uint32_t base_lo : 1; /* used with bindless */ - uint32_t is_3d : 1; - - uint32_t is_a : 1; - uint32_t is_s : 1; - uint32_t is_s2en_bindless : 1; - uint32_t is_o : 1; - uint32_t is_p : 1; - - uint32_t opc : 5; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat5_t; - -/* dword0 encoding for src_off: [src1 + off], src2: */ -typedef struct PACKED { - /* dword0: */ - uint32_t mustbe1 : 1; - int32_t off : 13; - uint32_t src1 : 8; - uint32_t src1_im : 1; - uint32_t src2_im : 1; - uint32_t src2 : 8; - - /* dword1: */ - uint32_t dword1; -} instr_cat6a_t; - -/* dword0 encoding for !src_off: [src1], src2 */ -typedef struct PACKED { - /* dword0: */ - uint32_t mustbe0 : 1; - uint32_t src1 : 13; - uint32_t ignore0 : 8; - uint32_t src1_im : 1; - uint32_t src2_im : 1; - uint32_t src2 : 8; - - /* dword1: */ - uint32_t dword1; -} instr_cat6b_t; - -/* dword1 encoding for dst_off: */ -typedef struct PACKED { - /* dword0: */ - uint32_t dword0; - - /* note: there is some weird stuff going on where sometimes - * cat6->a.off is involved.. but that seems like a bug in - * the blob, since it is used even if !cat6->src_off - * It would make sense for there to be some more bits to - * bring us to 11 bits worth of offset, but not sure.. - */ - int32_t off : 8; - uint32_t mustbe1 : 1; - uint32_t dst : 8; - uint32_t pad1 : 15; -} instr_cat6c_t; - -/* dword1 encoding for !dst_off: */ -typedef struct PACKED { - /* dword0: */ - uint32_t dword0; - - uint32_t dst : 8; - uint32_t mustbe0 : 1; - uint32_t idx : 8; - uint32_t pad0 : 15; -} instr_cat6d_t; - -/* ldgb and atomics.. - * - * ldgb: pad0=0, pad3=1 - * atomic .g: pad0=1, pad3=1 - * .l: pad0=1, pad3=0 - */ -typedef struct PACKED { - /* dword0: */ - uint32_t pad0 : 1; - uint32_t src3 : 8; - uint32_t d : 2; - uint32_t typed : 1; - uint32_t type_size : 2; - uint32_t src1 : 8; - uint32_t src1_im : 1; - uint32_t src2_im : 1; - uint32_t src2 : 8; - - /* dword1: */ - uint32_t dst : 8; - uint32_t mustbe0 : 1; - uint32_t src_ssbo : 8; - uint32_t pad2 : 3; // type - uint32_t g : 1; - uint32_t pad3 : 1; - uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat -} instr_cat6ldgb_t; - -/* stgb, pad0=0, pad3=2 - */ -typedef struct PACKED { - /* dword0: */ - uint32_t mustbe1 : 1; // ??? - uint32_t src1 : 8; - uint32_t d : 2; - uint32_t typed : 1; - uint32_t type_size : 2; - uint32_t pad0 : 9; - uint32_t src2_im : 1; - uint32_t src2 : 8; - - /* dword1: */ - uint32_t src3 : 8; - uint32_t src3_im : 1; - uint32_t dst_ssbo : 8; - uint32_t pad2 : 3; // type - uint32_t pad3 : 2; - uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat -} instr_cat6stgb_t; - -typedef union PACKED { - instr_cat6a_t a; - instr_cat6b_t b; - instr_cat6c_t c; - instr_cat6d_t d; - instr_cat6ldgb_t ldgb; - instr_cat6stgb_t stgb; - struct PACKED { - /* dword0: */ - uint32_t src_off : 1; - uint32_t pad1 : 31; - - /* dword1: */ - uint32_t pad2 : 8; - uint32_t dst_off : 1; - uint32_t pad3 : 8; - uint32_t type : 3; - uint32_t g : 1; /* or in some cases it means dst immed */ - uint32_t pad4 : 1; - uint32_t opc : 5; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; - }; -} instr_cat6_t; - -/* Similar to cat5_desc_mode_t, describes how the descriptor is loaded. - */ -typedef enum { - /* Use old GL binding model with an immediate index. */ - CAT6_IMM = 0, - - CAT6_UNIFORM = 1, - - CAT6_NONUNIFORM = 2, - - /* Use the bindless model, with an immediate index. - */ - CAT6_BINDLESS_IMM = 4, - - /* Use the bindless model, with a uniform register index. - */ - CAT6_BINDLESS_UNIFORM = 5, - - /* Use the bindless model, with a register index that isn't guaranteed - * to be uniform. This presumably checks if the indices are equal and - * splits up the load/store, because it works the way you would - * expect. - */ - CAT6_BINDLESS_NONUNIFORM = 6, -} cat6_desc_mode_t; - -/** - * For atomic ops (which return a value): - * - * pad1=1, pad3=c, pad5=3 - * src1 - vecN offset/coords - * src2.x - is actually dest register - * src2.y - is 'data' except for cmpxchg where src2.y is 'compare' - * and src2.z is 'data' - * - * For stib (which does not return a value): - * pad1=0, pad3=c, pad5=2 - * src1 - vecN offset/coords - * src2 - value to store - * - * For ldib: - * pad1=1, pad3=c, pad5=2 - * src1 - vecN offset/coords - * - * for ldc (load from UBO using descriptor): - * pad1=0, pad3=8, pad5=2 - * - * pad2 and pad5 are only observed to be 0. - */ -typedef struct PACKED { - /* dword0: */ - uint32_t pad1 : 1; - uint32_t base : 3; - uint32_t pad2 : 2; - uint32_t desc_mode : 3; - uint32_t d : 2; - uint32_t typed : 1; - uint32_t type_size : 2; - uint32_t opc : 5; - uint32_t pad3 : 5; - uint32_t src1 : 8; /* coordinate/offset */ - - /* dword1: */ - uint32_t src2 : 8; /* or the dst for load instructions */ - uint32_t pad4 : 1; //mustbe0 ?? - uint32_t ssbo : 8; /* ssbo/image binding point */ - uint32_t type : 3; - uint32_t pad5 : 7; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat6_a6xx_t; - -typedef struct PACKED { - /* dword0: */ - uint32_t pad1 : 32; - - /* dword1: */ - uint32_t pad2 : 12; - uint32_t ss : 1; /* maybe in the encoding, but blob only uses (sy) */ - uint32_t pad3 : 6; - uint32_t w : 1; /* write */ - uint32_t r : 1; /* read */ - uint32_t l : 1; /* local */ - uint32_t g : 1; /* global */ - uint32_t opc : 4; /* presumed, but only a couple known OPCs */ - uint32_t jmp_tgt : 1; /* (jp) */ - uint32_t sync : 1; /* (sy) */ - uint32_t opc_cat : 3; -} instr_cat7_t; - -typedef union PACKED { - instr_cat0_t cat0; - instr_cat1_t cat1; - instr_cat2_t cat2; - instr_cat3_t cat3; - instr_cat4_t cat4; - instr_cat5_t cat5; - instr_cat6_t cat6; - instr_cat6_a6xx_t cat6_a6xx; - instr_cat7_t cat7; - struct PACKED { - /* dword0: */ - uint32_t pad1 : 32; - - /* dword1: */ - uint32_t pad2 : 12; - uint32_t ss : 1; /* cat1-cat4 (cat0??) and cat7 (?) */ - uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */ - uint32_t pad3 : 13; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; - - }; -} instr_t; - -static inline uint32_t instr_repeat(instr_t *instr) -{ - switch (instr->opc_cat) { - case 0: return instr->cat0.repeat; - case 1: return instr->cat1.repeat; - case 2: return instr->cat2.repeat; - case 3: return instr->cat3.repeat; - case 4: return instr->cat4.repeat; - default: return 0; - } -} - -static inline bool instr_sat(instr_t *instr) -{ - switch (instr->opc_cat) { - case 2: return instr->cat2.sat; - case 3: return instr->cat3.sat; - case 4: return instr->cat4.sat; - default: return false; - } -} - -/* We can probably drop the gpu_id arg, but keeping it for now so we can - * assert if we see something we think should be new encoding on an older - * gpu. - */ -static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id) -{ - instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx; - - /* At least one of these two bits is pad in all the possible - * "legacy" cat6 encodings, and a analysis of all the pre-a6xx - * cmdstream traces I have indicates that the pad bit is zero - * in all cases. So we can use this to detect new encoding: - */ - if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) { - ir3_assert(gpu_id >= 600); - ir3_assert(instr->cat6.opc == 0); - return false; - } - - return true; -} - -static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id) -{ - switch (instr->opc_cat) { - case 0: return instr->cat0.opc | instr->cat0.opc_hi << 4; - case 1: return 0; - case 2: return instr->cat2.opc; - case 3: return instr->cat3.opc; - case 4: return instr->cat4.opc; - case 5: return instr->cat5.opc; - case 6: - if (!is_cat6_legacy(instr, gpu_id)) - return instr->cat6_a6xx.opc; - return instr->cat6.opc; - case 7: return instr->cat7.opc; - default: return 0; - } -} - -static inline bool is_mad(opc_t opc) -{ - switch (opc) { - case OPC_MAD_U16: - case OPC_MAD_S16: - case OPC_MAD_U24: - case OPC_MAD_S24: - case OPC_MAD_F16: - case OPC_MAD_F32: - return true; - default: - return false; - } -} - -static inline bool is_madsh(opc_t opc) -{ - switch (opc) { - case OPC_MADSH_U16: - case OPC_MADSH_M16: - return true; - default: - return false; - } -} - -static inline bool is_atomic(opc_t opc) -{ - switch (opc) { - case OPC_ATOMIC_ADD: - case OPC_ATOMIC_SUB: - case OPC_ATOMIC_XCHG: - case OPC_ATOMIC_INC: - case OPC_ATOMIC_DEC: - case OPC_ATOMIC_CMPXCHG: - case OPC_ATOMIC_MIN: - case OPC_ATOMIC_MAX: - case OPC_ATOMIC_AND: - case OPC_ATOMIC_OR: - case OPC_ATOMIC_XOR: - return true; - default: - return false; - } -} - -static inline bool is_ssbo(opc_t opc) -{ - switch (opc) { - case OPC_RESFMT: - case OPC_RESINFO: - case OPC_LDGB: - case OPC_STGB: - case OPC_STIB: - return true; - default: - return false; - } -} - -static inline bool is_isam(opc_t opc) -{ - switch (opc) { - case OPC_ISAM: - case OPC_ISAML: - case OPC_ISAMM: - return true; - default: - return false; - } -} - - -static inline bool is_cat2_float(opc_t opc) -{ - switch (opc) { - case OPC_ADD_F: - case OPC_MIN_F: - case OPC_MAX_F: - case OPC_MUL_F: - case OPC_SIGN_F: - case OPC_CMPS_F: - case OPC_ABSNEG_F: - case OPC_CMPV_F: - case OPC_FLOOR_F: - case OPC_CEIL_F: - case OPC_RNDNE_F: - case OPC_RNDAZ_F: - case OPC_TRUNC_F: - return true; - - default: - return false; - } -} - -static inline bool is_cat3_float(opc_t opc) -{ - switch (opc) { - case OPC_MAD_F16: - case OPC_MAD_F32: - case OPC_SEL_F16: - case OPC_SEL_F32: - return true; - default: - return false; - } -} - -int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id); - -#endif /* INSTR_A3XX_H_ */ diff --git a/src/freedreno/decode/meson.build b/src/freedreno/decode/meson.build index 74b3fd690d6..b666dfc80c4 100644 --- a/src/freedreno/decode/meson.build +++ b/src/freedreno/decode/meson.build @@ -37,10 +37,7 @@ libfreedreno_cffdec = static_library( 'cffdec.c', 'cffdec.h', 'disasm-a2xx.c', - 'disasm-a3xx.c', - 'disasm.h', 'instr-a2xx.h', - 'instr-a3xx.h', 'pager.c', 'pager.h', 'rnnutil.c', @@ -48,6 +45,7 @@ libfreedreno_cffdec = static_library( 'util.h', ], include_directories: [ + inc_freedreno, inc_freedreno_rnn, inc_include, inc_src, @@ -55,7 +53,10 @@ libfreedreno_cffdec = static_library( c_args : [ no_override_init_args ], gnu_symbol_visibility: 'hidden', dependencies: [], - link_with: libfreedreno_rnn, + link_with: [ + libfreedreno_rnn, + libfreedreno_ir3, # for disasm_a3xx + ], build_by_default: false, ) @@ -85,6 +86,7 @@ if dep_lua.found() and dep_libarchive.found() 'script.h' ], include_directories: [ + inc_freedreno, inc_freedreno_rnn, inc_include, inc_src, @@ -107,6 +109,7 @@ crashdec = executable( 'crashdec', 'crashdec.c', include_directories: [ + inc_freedreno, inc_freedreno_rnn, inc_include, inc_src, @@ -125,6 +128,7 @@ if dep_libarchive.found() 'pgmdump', 'pgmdump.c', include_directories: [ + inc_freedreno, inc_include, inc_src, ], @@ -133,6 +137,7 @@ if dep_libarchive.found() link_with: [ libfreedreno_cffdec, libfreedreno_io, + libfreedreno_ir3, # for disasm_a3xx ], build_by_default: with_tools.contains('freedreno'), install: false, @@ -141,6 +146,7 @@ if dep_libarchive.found() 'pgmdump2', 'pgmdump2.c', include_directories: [ + inc_freedreno, inc_include, inc_src, ], @@ -149,6 +155,7 @@ if dep_libarchive.found() link_with: [ libfreedreno_cffdec, libfreedreno_io, + libfreedreno_ir3, # for disasm_a3xx ], build_by_default: with_tools.contains('freedreno'), install: false, diff --git a/src/freedreno/decode/pgmdump.c b/src/freedreno/decode/pgmdump.c index ab5deae7335..932fdb55f7e 100644 --- a/src/freedreno/decode/pgmdump.c +++ b/src/freedreno/decode/pgmdump.c @@ -891,7 +891,7 @@ static void dump_program(struct state *state) int main(int argc, char **argv) { enum rd_sect_type type = RD_NONE; - enum debug_t debug = 0; + enum debug_t debug = PRINT_RAW | PRINT_STATS; void *buf = NULL; int sz; struct io *io; @@ -945,7 +945,8 @@ int main(int argc, char **argv) return -1; } - disasm_set_debug(debug); + disasm_a2xx_set_debug(debug); + disasm_a3xx_set_debug(debug); infile = argv[1]; diff --git a/src/freedreno/decode/pgmdump2.c b/src/freedreno/decode/pgmdump2.c index 7410bcd3179..94db08db503 100644 --- a/src/freedreno/decode/pgmdump2.c +++ b/src/freedreno/decode/pgmdump2.c @@ -440,7 +440,7 @@ static void dump_program(struct state *state) int main(int argc, char **argv) { enum rd_sect_type type = RD_NONE; - enum debug_t debug = 0; + enum debug_t debug = PRINT_RAW | PRINT_STATS; void *buf = NULL; int sz; struct io *io; @@ -494,7 +494,7 @@ int main(int argc, char **argv) return -1; } - disasm_set_debug(debug); + disasm_a3xx_set_debug(debug); infile = argv[1]; diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c index d6a1c15e09c..b84163106fb 100644 --- a/src/freedreno/ir3/disasm-a3xx.c +++ b/src/freedreno/ir3/disasm-a3xx.c @@ -30,14 +30,9 @@ #include +#include "disasm.h" #include "instr-a3xx.h" - -/* bitmask of debug flags */ -enum debug_t { - PRINT_RAW = 0x1, /* dump raw hexdump */ - PRINT_VERBOSE = 0x2, - EXPAND_REPEAT = 0x4, -}; +#include "regmask.h" static enum debug_t debug; @@ -80,12 +75,28 @@ struct disasm_ctx { int level; unsigned gpu_id; + struct shader_stats *stats; + + /* we have to process the dst register after src to avoid tripping up + * the read-before-write detection + */ + unsigned last_dst; + bool last_dst_full; + bool last_dst_valid; + /* current instruction repeat flag: */ unsigned repeat; /* current instruction repeat indx/offset (for --expand): */ unsigned repeatidx; - unsigned instructions; + /* tracking for register usage */ + struct { + regmask_t used; + regmask_t used_merged; + regmask_t rbw; /* read before write */ + regmask_t war; /* write after read */ + unsigned max_const; + } regs; }; static const char *float_imms[] = { @@ -157,6 +168,24 @@ static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full, } } +static void regmask_set(regmask_t *regmask, unsigned num, bool full) +{ + ir3_assert(num < MAX_REG); + __regmask_set(regmask, !full, num); +} + +static void regmask_clear(regmask_t *regmask, unsigned num, bool full) +{ + ir3_assert(num < MAX_REG); + __regmask_clear(regmask, !full, num); +} + +static unsigned regmask_get(regmask_t *regmask, unsigned num, bool full) +{ + ir3_assert(num < MAX_REG); + return __regmask_get(regmask, !full, num); +} + static unsigned regidx(reg_t reg) { return (4 * reg.num) + reg.comp; @@ -170,8 +199,127 @@ static reg_t idxreg(unsigned idx) }; } +static void print_sequence(struct disasm_ctx *ctx, int first, int last) +{ + if (first != MAX_REG) { + if (first == last) { + fprintf(ctx->out, " %d", first); + } else { + fprintf(ctx->out, " %d-%d", first, last); + } + } +} + +static int print_regs(struct disasm_ctx *ctx, regmask_t *regmask, bool full) +{ + int num, max = 0, cnt = 0; + int first, last; + + first = last = MAX_REG; + + for (num = 0; num < MAX_REG; num++) { + if (regmask_get(regmask, num, full)) { + if (num != (last + 1)) { + print_sequence(ctx, first, last); + first = num; + } + last = num; + if (num < (48*4)) + max = num; + cnt++; + } + } + + print_sequence(ctx, first, last); + + fprintf(ctx->out, " (cnt=%d, max=%d)", cnt, max); + + return max; +} + +static void print_reg_stats(struct disasm_ctx *ctx) +{ + int fullreg, halfreg; + + fprintf(ctx->out, "%sRegister Stats:\n", levels[ctx->level]); + fprintf(ctx->out, "%s- used (half):", levels[ctx->level]); + halfreg = print_regs(ctx, &ctx->regs.used, false); + fprintf(ctx->out, "\n"); + fprintf(ctx->out, "%s- used (full):", levels[ctx->level]); + fullreg = print_regs(ctx, &ctx->regs.used, true); + fprintf(ctx->out, "\n"); + if (ctx->gpu_id >= 600) { + fprintf(ctx->out, "%s- used (merged):", levels[ctx->level]); + print_regs(ctx, &ctx->regs.used_merged, false); + fprintf(ctx->out, "\n"); + } + fprintf(ctx->out, "%s- input (half):", levels[ctx->level]); + print_regs(ctx, &ctx->regs.rbw, false); + fprintf(ctx->out, "\n"); + fprintf(ctx->out, "%s- input (full):", levels[ctx->level]); + print_regs(ctx, &ctx->regs.rbw, true); + fprintf(ctx->out, "\n"); + fprintf(ctx->out, "%s- max const: %u\n", levels[ctx->level], ctx->regs.max_const); + fprintf(ctx->out, "\n"); + fprintf(ctx->out, "%s- output (half):", levels[ctx->level]); + print_regs(ctx, &ctx->regs.war, false); + fprintf(ctx->out, " (estimated)\n"); + fprintf(ctx->out, "%s- output (full):", levels[ctx->level]); + print_regs(ctx, &ctx->regs.war, true); + fprintf(ctx->out, " (estimated)\n"); + + /* convert to vec4, which is the granularity that registers are + * assigned to shader: + */ + fullreg = (fullreg + 3) / 4; + halfreg = (halfreg + 3) / 4; + + // Note this count of instructions includes rptN, which matches + // up to how mesa prints this: + fprintf(ctx->out, "%s- shaderdb: %d instructions, %d nops, %d non-nops, " + "(%d instlen), %d half, %d full\n", + levels[ctx->level], ctx->stats->instructions, ctx->stats->nops, + ctx->stats->instructions - ctx->stats->nops, ctx->stats->instlen, + halfreg, fullreg); + fprintf(ctx->out, "%s- shaderdb: %d (ss), %d (sy)\n", levels[ctx->level], + ctx->stats->ss, ctx->stats->sy); +} + +static void process_reg_dst(struct disasm_ctx *ctx) +{ + if (!ctx->last_dst_valid) + return; + + /* ignore dummy writes (ie. r63.x): */ + if (!VALIDREG(ctx->last_dst)) + return; + + for (unsigned i = 0; i <= ctx->repeat; i++) { + unsigned dst = ctx->last_dst + i; + + regmask_set(&ctx->regs.war, dst, ctx->last_dst_full); + regmask_set(&ctx->regs.used, dst, ctx->last_dst_full); + + if (ctx->gpu_id >= 600) { + if (ctx->last_dst_full) { + regmask_set(&ctx->regs.used_merged, (dst*2)+0, false); + regmask_set(&ctx->regs.used_merged, (dst*2)+1, false); + } else { + regmask_set(&ctx->regs.used_merged, dst, false); + } + } + } + + ctx->last_dst_valid = false; +} static void print_reg_dst(struct disasm_ctx *ctx, reg_t reg, bool full, bool addr_rel) { + /* presumably the special registers a0.c and p0.c don't count.. */ + if (!(addr_rel || (reg.num == REG_A0) || (reg.num == REG_P0))) { + ctx->last_dst = regidx(reg); + ctx->last_dst_full = full; + ctx->last_dst_valid = true; + } reg = idxreg(regidx(reg) + ctx->repeatidx); print_reg(ctx, reg, full, false, false, false, false, false, false, addr_rel); } @@ -196,6 +344,45 @@ static void print_src(struct disasm_ctx *ctx, struct reginfo *info) { reg_t reg = info->reg; + /* presumably the special registers a0.c and p0.c don't count.. */ + if (!(info->addr_rel || info->c || info->im || + (reg.num == REG_A0) || (reg.num == REG_P0))) { + int i, num = regidx(reg); + for (i = 0; i <= ctx->repeat; i++) { + unsigned src = num + i; + + if (!regmask_get(&ctx->regs.used, src, info->full)) + regmask_set(&ctx->regs.rbw, src, info->full); + + regmask_clear(&ctx->regs.war, src, info->full); + regmask_set(&ctx->regs.used, src, info->full); + + if (info->full) { + regmask_set(&ctx->regs.used_merged, (src*2)+0, false); + regmask_set(&ctx->regs.used_merged, (src*2)+1, false); + } else { + regmask_set(&ctx->regs.used_merged, src, false); + } + + if (!info->r) + break; + } + } else if (info->c) { + int i, num = regidx(reg); + for (i = 0; i <= ctx->repeat; i++) { + unsigned src = num + i; + + ctx->regs.max_const = MAX2(ctx->regs.max_const, src); + + if (!info->r) + break; + } + + unsigned max = (num + ctx->repeat + 1 + 3) / 4; + if (max > ctx->stats->constlen) + ctx->stats->constlen = max; + } + if (info->r) reg = idxreg(regidx(info->reg) + ctx->repeatidx); @@ -1309,12 +1496,10 @@ static const struct opc_info { #define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)])) -// XXX hack.. probably should move this table somewhere common: -#include "ir3.h" -const char *ir3_instr_name(struct ir3_instruction *instr) +const char *disasm_a3xx_instr_name(opc_t opc) { - if (opc_cat(instr->opc) == -1) return "??meta??"; - return opcs[instr->opc].name; + if (opc_cat(opc) == -1) return "??meta??"; + return opcs[opc].name; } static void print_single_instr(struct disasm_ctx *ctx, instr_t *instr) @@ -1346,11 +1531,11 @@ static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n) instr_t *instr = (instr_t *)dwords; uint32_t opc = instr_opc(instr, ctx->gpu_id); unsigned nop = 0; - unsigned cycles = ctx->instructions; + unsigned cycles = ctx->stats->instructions; - if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, "%s%04d:%04d[%08xx_%08xx] ", levels[ctx->level], - n, cycles++, dwords[1], dwords[0]); + if (debug & PRINT_RAW) { + fprintf(ctx->out, "%s:%d:%04d:%04d[%08xx_%08xx] ", levels[ctx->level], + instr->opc_cat, n, cycles++, dwords[1], dwords[0]); } /* NOTE: order flags are printed is a bit fugly.. but for now I @@ -1359,13 +1544,16 @@ static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n) */ ctx->repeat = instr_repeat(instr); - ctx->instructions += 1 + ctx->repeat; + ctx->stats->instructions += 1 + ctx->repeat; + ctx->stats->instlen++; if (instr->sync) { fprintf(ctx->out, "(sy)"); + ctx->stats->sy++; } if (instr->ss && ((instr->opc_cat <= 4) || (instr->opc_cat == 7))) { fprintf(ctx->out, "(ss)"); + ctx->stats->ss++; } if (instr->jmp_tgt) fprintf(ctx->out, "(jp)"); @@ -1379,7 +1567,10 @@ static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n) nop = (instr->cat2.src2_r * 2) + instr->cat2.src1_r; else if ((instr->opc_cat == 3) && (instr->cat3.src1_r || instr->cat3.src2_r)) nop = (instr->cat3.src2_r * 2) + instr->cat3.src1_r; - ctx->instructions += nop; + ctx->stats->instructions += nop; + ctx->stats->nops += nop; + if (opc == OPC_NOP) + ctx->stats->nops += 1 + ctx->repeat; if (nop) fprintf(ctx->out, "(nop%d) ", nop); @@ -1389,20 +1580,22 @@ static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n) print_single_instr(ctx, instr); fprintf(ctx->out, "\n"); + process_reg_dst(ctx); + if ((instr->opc_cat <= 4) && (debug & EXPAND_REPEAT)) { int i; for (i = 0; i < nop; i++) { if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, "%s%04d:%04d[ ] ", - levels[ctx->level], n, cycles++); + fprintf(ctx->out, "%s:%d:%04d:%04d[ ] ", + levels[ctx->level], instr->opc_cat, n, cycles++); } fprintf(ctx->out, "nop\n"); } for (i = 0; i < ctx->repeat; i++) { ctx->repeatidx = i + 1; if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, "%s%04d:%04d[ ] ", - levels[ctx->level], n, cycles++); + fprintf(ctx->out, "%s:%d:%04d:%04d[ ] ", + levels[ctx->level], instr->opc_cat, n, cycles++); } print_single_instr(ctx, instr); fprintf(ctx->out, "\n"); @@ -1410,24 +1603,37 @@ static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n) ctx->repeatidx = 0; } - return (instr->opc_cat == 0) && (opc == OPC_END); + return (instr->opc_cat == 0) && + ((opc == OPC_END) || (opc == OPC_CHSH)); } int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id) +{ + struct shader_stats stats; + return disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats); +} + +int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out, + unsigned gpu_id, struct shader_stats *stats) { struct disasm_ctx ctx; int i; int nop_count = 0; + bool has_end = false; - assert((sizedwords % 2) == 0); + ir3_assert((sizedwords % 2) == 0); memset(&ctx, 0, sizeof(ctx)); ctx.out = out; ctx.level = level; ctx.gpu_id = gpu_id; + ctx.stats = stats; + memset(ctx.stats, 0, sizeof(*ctx.stats)); for (i = 0; i < sizedwords; i += 2) { - print_instr(&ctx, &dwords[i], i/2); + has_end |= print_instr(&ctx, &dwords[i], i/2); + if (!has_end) + continue; if (dwords[i] == 0 && dwords[i + 1] == 0) nop_count++; else @@ -1436,5 +1642,13 @@ int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned break; } + if (debug & PRINT_STATS) + print_reg_stats(&ctx); + return 0; } + +void disasm_a3xx_set_debug(enum debug_t d) +{ + debug = d; +} diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h index 4ffcb7accae..1b0f7089c18 100644 --- a/src/freedreno/ir3/instr-a3xx.h +++ b/src/freedreno/ir3/instr-a3xx.h @@ -31,6 +31,22 @@ #include #include +void ir3_assert_handler(const char *expr, const char *file, int line, + const char *func) __attribute__((weak)) __attribute__ ((__noreturn__)); + +/* A wrapper for assert() that allows overriding handling of a failed + * assert. This is needed for tools like crashdec which can want to + * attempt to disassemble memory that might not actually be valid + * instructions. + */ +#define ir3_assert(expr) do { \ + if (!(expr)) { \ + if (ir3_assert_handler) { \ + ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \ + } \ + assert(expr); \ + } \ + } while (0) /* size of largest OPC field of all the instruction categories: */ #define NOPC_BITS 6 @@ -249,6 +265,8 @@ typedef enum { #define opc_cat(opc) ((int)((opc) >> NOPC_BITS)) #define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1))) +const char *disasm_a3xx_instr_name(opc_t opc); + typedef enum { TYPE_F16 = 0, TYPE_F32 = 1, @@ -275,7 +293,7 @@ static inline uint32_t type_size(type_t type) case TYPE_S8: return 8; default: - assert(0); /* invalid type */ + ir3_assert(0); /* invalid type */ return 0; } } @@ -315,6 +333,21 @@ typedef union PACKED { int32_t idummy8 : 8; } reg_t; +/* comp: + * 0 - x + * 1 - y + * 2 - z + * 3 - w + */ +static inline uint32_t regid(int num, int comp) +{ + return (num << 2) | (comp & 0x3); +} + +#define INVALID_REG regid(63, 0) +#define VALIDREG(r) ((r) != INVALID_REG) +#define CONDREG(r, val) COND(VALIDREG(r), (val)) + /* special registers: */ #define REG_A0 61 /* address register */ #define REG_P0 62 /* predicate register */ @@ -979,8 +1012,8 @@ static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id) * in all cases. So we can use this to detect new encoding: */ if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) { - assert(gpu_id >= 600); - assert(instr->cat6.opc == 0); + ir3_assert(gpu_id >= 600); + ir3_assert(instr->cat6.opc == 0); return false; } @@ -1114,6 +1147,4 @@ static inline bool is_cat3_float(opc_t opc) } } -int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id); - #endif /* INSTR_A3XX_H_ */ diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 1c0799f912e..0a7ab731643 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -631,19 +631,6 @@ bool ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags); set_foreach ((__instr)->uses, __entry) \ if ((__use = (void *)__entry->key)) -#define MAX_ARRAYS 16 - -/* comp: - * 0 - x - * 1 - y - * 2 - z - * 3 - w - */ -static inline uint32_t regid(int num, int comp) -{ - return (num << 2) | (comp & 0x3); -} - static inline uint32_t reg_num(struct ir3_register *reg) { return reg->num >> 2; @@ -654,10 +641,6 @@ static inline uint32_t reg_comp(struct ir3_register *reg) return reg->num & 0x3; } -#define INVALID_REG regid(63, 0) -#define VALIDREG(r) ((r) != INVALID_REG) -#define CONDREG(r, val) COND(VALIDREG(r), (val)) - static inline bool is_flow(struct ir3_instruction *instr) { return (opc_cat(instr->opc) == 0); diff --git a/src/freedreno/ir3/ir3_print.c b/src/freedreno/ir3/ir3_print.c index 458e3d9a713..115cea0c8c6 100644 --- a/src/freedreno/ir3/ir3_print.c +++ b/src/freedreno/ir3/ir3_print.c @@ -114,7 +114,7 @@ static void print_instr_name(struct ir3_instruction *instr, bool flags) printf(".%s%s", type_name(instr->cat1.src_type), type_name(instr->cat1.dst_type)); } else { - printf("%s", ir3_instr_name(instr)); + printf("%s", disasm_a3xx_instr_name(instr->opc)); if (instr->flags & IR3_INSTR_3D) printf(".3d"); if (instr->flags & IR3_INSTR_A) diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 146cc352f1c..99cacbf3301 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -36,6 +36,8 @@ #include "ir3_compiler.h" #include "ir3_nir.h" +#include "disasm.h" + int ir3_glsl_type_size(const struct glsl_type *type, bool bindless) { diff --git a/src/freedreno/ir3/regmask.h b/src/freedreno/ir3/regmask.h index 6eade8daf01..4dded900b4d 100644 --- a/src/freedreno/ir3/regmask.h +++ b/src/freedreno/ir3/regmask.h @@ -86,6 +86,31 @@ __regmask_set(regmask_t *regmask, bool half, unsigned n) } } +static inline void +__regmask_clear(regmask_t *regmask, bool half, unsigned n) +{ + if (regmask->mergedregs) { + /* a6xx+ case, with merged register file, we track things in terms + * of half-precision registers, with a full precisions register + * using two half-precision slots: + */ + if (half) { + BITSET_CLEAR(regmask->mask, n); + } else { + n *= 2; + BITSET_CLEAR(regmask->mask, n); + BITSET_CLEAR(regmask->mask, n+1); + } + } else { + /* pre a6xx case, with separate register file for half and full + * precision: + */ + if (half) + n += MAX_REG; + BITSET_CLEAR(regmask->mask, n); + } +} + static inline void regmask_init(regmask_t *regmask, bool mergedregs) { diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c index e6f8f7b2861..1b16fad6957 100644 --- a/src/freedreno/ir3/tests/disasm.c +++ b/src/freedreno/ir3/tests/disasm.c @@ -35,7 +35,7 @@ #include #include #include "util/macros.h" -#include "instr-a3xx.h" +#include "disasm.h" #define INSTR_5XX(i, d) { .gpu_id = 540, .instr = #i, .expected = d } #define INSTR_6XX(i, d) { .gpu_id = 630, .instr = #i, .expected = d } diff --git a/src/freedreno/meson.build b/src/freedreno/meson.build index 3df6dfb16c6..6a1a16ea84a 100644 --- a/src/freedreno/meson.build +++ b/src/freedreno/meson.build @@ -18,7 +18,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -inc_freedreno = include_directories(['.', './registers']) +inc_freedreno = include_directories(['.', './registers', './common']) inc_freedreno_rnn = include_directories('rnn') subdir('common') diff --git a/src/gallium/drivers/freedreno/a2xx/disasm-a2xx.c b/src/gallium/drivers/freedreno/a2xx/disasm-a2xx.c index b5647307038..43c6333fb56 100644 --- a/src/gallium/drivers/freedreno/a2xx/disasm-a2xx.c +++ b/src/gallium/drivers/freedreno/a2xx/disasm-a2xx.c @@ -112,7 +112,7 @@ static void print_export_comment(uint32_t num, gl_shader_stage type) } break; default: - unreachable("not reached"); + assert(!"not reached"); } /* if we had a symbol table here, we could look * up the name of the varying.. @@ -629,7 +629,7 @@ int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, gl_shader_stage typ return 0; } -void disasm_set_debug(enum debug_t d) +void disasm_a2xx_set_debug(enum debug_t d) { debug = d; } diff --git a/src/gallium/drivers/freedreno/disasm.h b/src/gallium/drivers/freedreno/disasm.h deleted file mode 100644 index dc29b2fbd2a..00000000000 --- a/src/gallium/drivers/freedreno/disasm.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright © 2012 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef DISASM_H_ -#define DISASM_H_ - -#include -#include - -#include "compiler/shader_enums.h" -#include "util/u_debug.h" - -/* bitmask of debug flags */ -enum debug_t { - PRINT_RAW = 0x1, /* dump raw hexdump */ - PRINT_VERBOSE = 0x2, -}; - -int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, gl_shader_stage type); -int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id); -void disasm_set_debug(enum debug_t debug); - -#endif /* DISASM_H_ */ diff --git a/src/gallium/drivers/freedreno/meson.build b/src/gallium/drivers/freedreno/meson.build index bb1ffa67cb0..8179e78ffc5 100644 --- a/src/gallium/drivers/freedreno/meson.build +++ b/src/gallium/drivers/freedreno/meson.build @@ -19,7 +19,6 @@ # SOFTWARE. files_libfreedreno = files( - 'disasm.h', 'freedreno_batch.c', 'freedreno_batch.h', 'freedreno_batch_cache.c',