;============================================================================== ; LR LSTM ;============================================================================== lr_lstm: 0: vzeroupper 3: push rbp 4: mov rbp,rdi lr_lstm/punctuation/Lookup: 7: movsxd rax,DWORD PTR [rbp+0xc] b: mov rcx,0x2 12: test rax,rax 15: cmovs rax,rcx 19: shl rax,0x5 1d: movabs rdx,0x5bec00 27: add rax,rdx 2a: mov QWORD PTR [rbp+0x30],rax lr_lstm/suffix/Lookup: 2e: lea rcx,[rbp+0x10] 32: movabs rdx,0x7ff69c3c6040 3c: lea rsi,[rbp+0x60] 40: vxorps ymm0,ymm0,ymm0 44: vxorps ymm1,ymm1,ymm1 48: mov r8,0x208d 4f: xor rdi,rdi 52: movsxd rax,DWORD PTR [rcx+rdi*4] 56: test rax,rax 59: jns 0x6c 5f: cmp rax,0xffffffffffffffff 63: jne 0x7c 69: mov rax,r8 6c: shl rax,0x6 70: add rax,rdx 73: vaddps ymm0,ymm0,YMMWORD PTR [rax] 77: vaddps ymm1,ymm1,YMMWORD PTR [rax+0x20] 7c: inc rdi 7f: cmp rdi,0x3 83: jne 0x52 85: vmovaps YMMWORD PTR [rsi],ymm0 89: vmovaps YMMWORD PTR [rsi+0x20],ymm1 lr_lstm/quote/Lookup: 8e: movsxd rax,DWORD PTR [rbp+0x48] 92: mov rcx,0x3 99: test rax,rax 9c: cmovs rax,rcx a0: shl rax,0x5 a4: movabs rdx,0x5beb00 ae: add rax,rdx b1: mov QWORD PTR [rbp+0x50],rax lr_lstm/capitalization/Lookup: b5: movsxd rax,DWORD PTR [rbp+0x1c] b9: mov rcx,0x4 c0: test rax,rax c3: cmovs rax,rcx c7: shl rax,0x5 cb: movabs rdx,0x5ccf00 d5: add rax,rdx d8: mov QWORD PTR [rbp+0x58],rax lr_lstm/digit/Lookup: dc: movsxd rax,DWORD PTR [rbp+0x38] e0: mov rcx,0x2 e7: test rax,rax ea: cmovs rax,rcx ee: shl rax,0x5 f2: movabs rdx,0x5bea00 fc: add rax,rdx ff: mov QWORD PTR [rbp+0xa0],rax lr_lstm/hyphen/Lookup: 106: movsxd rax,DWORD PTR [rbp+0x8] 10a: mov rcx,0x1 111: test rax,rax 114: cmovs rax,rcx 118: shl rax,0x5 11c: movabs rdx,0x5c0240 126: add rax,rdx 129: mov QWORD PTR [rbp+0xa8],rax lr_lstm/words/Lookup: 130: movsxd rax,DWORD PTR [rbp+0x3c] 134: mov rcx,0xd008 13b: test rax,rax 13e: cmovs rax,rcx 142: shl rax,0x7 146: movabs rdx,0x7ff69c4f2040 150: add rax,rdx 153: mov QWORD PTR [rbp+0xb0],rax lr_lstm/concat: 15a: lea r8,[rbp+0xc0] 161: mov rsi,QWORD PTR [rbp+0xb0] 168: lea rdi,[r8] 16b: mov rcx,0x80 172: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 174: lea rsi,[rbp+0x60] 178: lea rdi,[r8+0x80] 17f: mov rcx,0x40 186: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 188: mov rsi,QWORD PTR [rbp+0x58] 18c: lea rdi,[r8+0xc0] 193: mov rcx,0x20 19a: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 19c: mov rsi,QWORD PTR [rbp+0xa8] 1a3: lea rdi,[r8+0xe0] 1aa: mov rcx,0x20 1b1: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 1b3: mov rsi,QWORD PTR [rbp+0x30] 1b7: lea rdi,[r8+0x100] 1be: mov rcx,0x20 1c5: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 1c7: mov rsi,QWORD PTR [rbp+0x50] 1cb: lea rdi,[r8+0x120] 1d2: mov rcx,0x20 1d9: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 1db: mov rsi,QWORD PTR [rbp+0xa0] 1e2: lea rdi,[r8+0x140] 1e9: mov rcx,0x20 1f0: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] lr_lstm/MatMul_3: 1f2: lea rdi,[rbp+0xc0] 1f9: movabs rsi,0x5f8ec0 203: lea r8,[rbp+0x220] 20a: xor rcx,rcx 20d: vxorps ymm0,ymm0,ymm0 211: vxorps ymm1,ymm1,ymm1 215: vxorps ymm2,ymm2,ymm2 219: vxorps ymm3,ymm3,ymm3 21d: vxorps ymm4,ymm4,ymm4 221: vxorps ymm5,ymm5,ymm5 225: vxorps ymm6,ymm6,ymm6 229: vxorps ymm7,ymm7,ymm7 22d: mov rdx,rsi 230: xor rax,rax 233: vbroadcastss ymm12,DWORD PTR [rdi+rax*1] 239: vfmadd231ps ymm0,ymm12,YMMWORD PTR [rdx] 23e: vfmadd231ps ymm1,ymm12,YMMWORD PTR [rdx+0x20] 244: vfmadd231ps ymm2,ymm12,YMMWORD PTR [rdx+0x40] 24a: vfmadd231ps ymm3,ymm12,YMMWORD PTR [rdx+0x60] 250: vfmadd231ps ymm4,ymm12,YMMWORD PTR [rdx+0x80] 259: vfmadd231ps ymm5,ymm12,YMMWORD PTR [rdx+0xa0] 262: vfmadd231ps ymm6,ymm12,YMMWORD PTR [rdx+0xc0] 26b: vfmadd231ps ymm7,ymm12,YMMWORD PTR [rdx+0xe0] 274: add rdx,0x400 27b: add rax,0x4 27f: cmp rax,0x160 285: jl 0x233 287: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 28d: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 294: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 29b: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 2a2: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4 2ac: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5 2b6: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6 2c0: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7 2ca: add rsi,0x100 2d1: add rcx,0x100 2d8: cmp rcx,0x400 2df: jl 0x20d lr_lstm/MatMul: 2e5: lea rdi,[rbp+0xc0] 2ec: movabs rsi,0x624fc0 2f6: lea r8,[rbp+0x620] 2fd: xor rcx,rcx 300: vxorps ymm0,ymm0,ymm0 304: vxorps ymm1,ymm1,ymm1 308: vxorps ymm2,ymm2,ymm2 30c: vxorps ymm3,ymm3,ymm3 310: vxorps ymm4,ymm4,ymm4 314: vxorps ymm5,ymm5,ymm5 318: vxorps ymm6,ymm6,ymm6 31c: vxorps ymm7,ymm7,ymm7 320: mov rdx,rsi 323: xor rax,rax 326: vbroadcastss ymm12,DWORD PTR [rdi+rax*1] 32c: vfmadd231ps ymm0,ymm12,YMMWORD PTR [rdx] 331: vfmadd231ps ymm1,ymm12,YMMWORD PTR [rdx+0x20] 337: vfmadd231ps ymm2,ymm12,YMMWORD PTR [rdx+0x40] 33d: vfmadd231ps ymm3,ymm12,YMMWORD PTR [rdx+0x60] 343: vfmadd231ps ymm4,ymm12,YMMWORD PTR [rdx+0x80] 34c: vfmadd231ps ymm5,ymm12,YMMWORD PTR [rdx+0xa0] 355: vfmadd231ps ymm6,ymm12,YMMWORD PTR [rdx+0xc0] 35e: vfmadd231ps ymm7,ymm12,YMMWORD PTR [rdx+0xe0] 367: add rdx,0x400 36e: add rax,0x4 372: cmp rax,0x160 378: jl 0x326 37a: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 380: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 387: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 38e: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 395: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4 39f: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5 3a9: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6 3b3: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7 3bd: add rsi,0x100 3c4: add rcx,0x100 3cb: cmp rcx,0x400 3d2: jl 0x300 lr_lstm/MatMul_4: 3d8: mov rdi,QWORD PTR [rbp+0x0] 3dc: movabs rsi,0x7ff6a039d040 3e6: lea r9,[rbp+0x220] 3ed: lea r8,[rbp+0xa20] 3f4: xor rcx,rcx 3f7: vmovaps ymm0,YMMWORD PTR [r9+rcx*1] 3fd: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20] 404: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40] 40b: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60] 412: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80] 41c: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0] 426: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0] 430: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0] 43a: mov rdx,rsi 43d: xor rax,rax 440: vbroadcastss ymm12,DWORD PTR [rdi+rax*1] 446: vfmadd231ps ymm0,ymm12,YMMWORD PTR [rdx] 44b: vfmadd231ps ymm1,ymm12,YMMWORD PTR [rdx+0x20] 451: vfmadd231ps ymm2,ymm12,YMMWORD PTR [rdx+0x40] 457: vfmadd231ps ymm3,ymm12,YMMWORD PTR [rdx+0x60] 45d: vfmadd231ps ymm4,ymm12,YMMWORD PTR [rdx+0x80] 466: vfmadd231ps ymm5,ymm12,YMMWORD PTR [rdx+0xa0] 46f: vfmadd231ps ymm6,ymm12,YMMWORD PTR [rdx+0xc0] 478: vfmadd231ps ymm7,ymm12,YMMWORD PTR [rdx+0xe0] 481: add rdx,0x400 488: add rax,0x4 48c: cmp rax,0x400 492: jl 0x440 494: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 49a: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 4a1: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 4a8: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 4af: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4 4b9: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5 4c3: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6 4cd: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7 4d7: add rsi,0x100 4de: add rcx,0x100 4e5: cmp rcx,0x400 4ec: jl 0x3f7 lr_lstm/MatMul_1: 4f2: mov rdi,QWORD PTR [rbp+0x0] 4f6: movabs rsi,0x7ff6a031b040 500: lea r9,[rbp+0x620] 507: lea r8,[rbp+0xe20] 50e: xor rcx,rcx 511: vmovaps ymm0,YMMWORD PTR [r9+rcx*1] 517: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20] 51e: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40] 525: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60] 52c: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80] 536: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0] 540: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0] 54a: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0] 554: mov rdx,rsi 557: xor rax,rax 55a: vbroadcastss ymm12,DWORD PTR [rdi+rax*1] 560: vfmadd231ps ymm0,ymm12,YMMWORD PTR [rdx] 565: vfmadd231ps ymm1,ymm12,YMMWORD PTR [rdx+0x20] 56b: vfmadd231ps ymm2,ymm12,YMMWORD PTR [rdx+0x40] 571: vfmadd231ps ymm3,ymm12,YMMWORD PTR [rdx+0x60] 577: vfmadd231ps ymm4,ymm12,YMMWORD PTR [rdx+0x80] 580: vfmadd231ps ymm5,ymm12,YMMWORD PTR [rdx+0xa0] 589: vfmadd231ps ymm6,ymm12,YMMWORD PTR [rdx+0xc0] 592: vfmadd231ps ymm7,ymm12,YMMWORD PTR [rdx+0xe0] 59b: add rdx,0x400 5a2: add rax,0x4 5a6: cmp rax,0x400 5ac: jl 0x55a 5ae: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 5b4: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 5bb: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 5c2: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 5c9: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4 5d3: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5 5dd: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6 5e7: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7 5f1: add rsi,0x100 5f8: add rcx,0x100 5ff: cmp rcx,0x400 606: jl 0x511 lr_lstm/MatMul_2: 60c: mov rdi,QWORD PTR [rbp+0x40] 610: movabs rsi,0x7ff6a041f040 61a: lea r9,[rbp+0xe20] 621: lea r8,[rbp+0x1220] 628: xor rcx,rcx 62b: vmovaps ymm0,YMMWORD PTR [r9+rcx*1] 631: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20] 638: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40] 63f: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60] 646: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80] 650: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0] 65a: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0] 664: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0] 66e: mov rdx,rsi 671: xor rax,rax 674: vbroadcastss ymm12,DWORD PTR [rdi+rax*1] 67a: vfmadd231ps ymm0,ymm12,YMMWORD PTR [rdx] 67f: vfmadd231ps ymm1,ymm12,YMMWORD PTR [rdx+0x20] 685: vfmadd231ps ymm2,ymm12,YMMWORD PTR [rdx+0x40] 68b: vfmadd231ps ymm3,ymm12,YMMWORD PTR [rdx+0x60] 691: vfmadd231ps ymm4,ymm12,YMMWORD PTR [rdx+0x80] 69a: vfmadd231ps ymm5,ymm12,YMMWORD PTR [rdx+0xa0] 6a3: vfmadd231ps ymm6,ymm12,YMMWORD PTR [rdx+0xc0] 6ac: vfmadd231ps ymm7,ymm12,YMMWORD PTR [rdx+0xe0] 6b5: add rdx,0x400 6bc: add rax,0x4 6c0: cmp rax,0x400 6c6: jl 0x674 6c8: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 6ce: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 6d5: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 6dc: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 6e3: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4 6ed: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5 6f7: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6 701: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7 70b: add rsi,0x100 712: add rcx,0x100 719: cmp rcx,0x400 720: jl 0x62b lr_lstm/add_4: 726: vmovaps ymm0,YMMWORD PTR [rip+0x6b2] # 0xde0 72e: vmovaps ymm1,YMMWORD PTR [rip+0x6ca] # 0xe00 736: vmovaps ymm2,YMMWORD PTR [rip+0x6e2] # 0xe20 73e: vmovaps ymm3,YMMWORD PTR [rip+0x6fa] # 0xe40 746: vmovaps ymm4,YMMWORD PTR [rip+0x712] # 0xe60 74e: vmovaps ymm5,YMMWORD PTR [rip+0x72a] # 0xe80 756: vmovaps ymm6,YMMWORD PTR [rip+0x742] # 0xea0 75e: vmovaps ymm7,YMMWORD PTR [rip+0x75a] # 0xec0 766: vmovaps ymm8,YMMWORD PTR [rip+0x772] # 0xee0 76e: vmovaps ymm9,YMMWORD PTR [rip+0x78a] # 0xf00 776: vmovaps ymm10,YMMWORD PTR [rip+0x7a2] # 0xf20 77e: lea r8,[rbp+0xa20] 785: movabs rcx,0x5c2fc0 78f: lea r9,[rbp+0x1220] 796: movabs rdx,0x5c1dc0 7a0: mov rsi,QWORD PTR [rbp+0x40] 7a4: mov rdi,QWORD PTR [rbp+0x20] 7a8: lea r10,[rbp+0xa20] 7af: xor rax,rax 7b2: vmovaps ymm11,YMMWORD PTR [r9+rax*1] 7b8: vaddps ymm12,ymm11,YMMWORD PTR [rdx+rax*1] 7bd: vxorps ymm11,ymm11,ymm11 7c2: vsubps ymm13,ymm11,ymm12 7c7: vminps ymm11,ymm13,YMMWORD PTR [rip+0x771] # 0xf40 7cf: vmaxps ymm12,ymm11,YMMWORD PTR [rip+0x789] # 0xf60 7d7: vmovaps ymm11,ymm12 7dc: vmovaps ymm14,YMMWORD PTR [rip+0x79c] # 0xf80 7e4: vfmadd213ps ymm11,ymm14,YMMWORD PTR [rip+0x7b3] # 0xfa0 7ed: vroundps ymm14,ymm11,0x1 7f3: vmovaps ymm11,ymm14 7f8: vfmadd132ps ymm11,ymm12,YMMWORD PTR [rip+0x7bf] # 0xfc0 801: vmulps ymm12,ymm11,ymm11 806: vmovaps ymm15,YMMWORD PTR [rip+0x7d2] # 0xfe0 80e: vfmadd213ps ymm15,ymm11,YMMWORD PTR [rip+0x7e9] # 0x1000 817: vfmadd213ps ymm15,ymm11,YMMWORD PTR [rip+0x800] # 0x1020 820: vfmadd213ps ymm15,ymm11,YMMWORD PTR [rip+0x817] # 0x1040 829: vfmadd213ps ymm15,ymm11,YMMWORD PTR [rip+0x82e] # 0x1060 832: vfmadd213ps ymm15,ymm11,YMMWORD PTR [rip+0x765] # 0xfa0 83b: vfmadd213ps ymm15,ymm12,ymm11 840: vaddps ymm11,ymm15,ymm0 844: vaddps ymm12,ymm14,YMMWORD PTR [rip+0x834] # 0x1080 84c: vcvttps2dq ymm14,ymm12 851: vpslld ymm12,ymm14,0x17 857: vmulps ymm14,ymm11,ymm12 85c: vmaxps ymm11,ymm14,ymm13 861: vaddps ymm12,ymm0,ymm11 866: vdivps ymm11,ymm0,ymm12 86b: vmovaps ymm12,YMMWORD PTR [r8+rax*1] 871: vaddps ymm13,ymm12,YMMWORD PTR [rcx+rax*1] 876: vminps ymm12,ymm13,ymm2 87a: vmaxps ymm13,ymm12,ymm1 87e: vmulps ymm12,ymm13,ymm13 883: vmovaps ymm14,ymm12 888: vfmadd213ps ymm14,ymm3,ymm4 88d: vmovaps ymm15,ymm12 892: vfmadd213ps ymm15,ymm14,ymm5 897: vmovaps ymm14,ymm12 89c: vfmadd213ps ymm14,ymm15,ymm6 8a1: vmovaps ymm15,ymm12 8a6: vfmadd213ps ymm15,ymm14,ymm7 8ab: vmovaps ymm14,ymm12 8b0: vfmadd213ps ymm14,ymm15,ymm8 8b5: vmovaps ymm15,ymm12 8ba: vfmadd213ps ymm15,ymm14,ymm9 8bf: vmulps ymm14,ymm13,ymm15 8c4: vmovaps ymm13,ymm12 8c9: vfmadd213ps ymm13,ymm10,YMMWORD PTR [rip+0x7ce] # 0x10a0 8d2: vmovaps ymm15,ymm12 8d7: vfmadd213ps ymm15,ymm13,YMMWORD PTR [rip+0x7e0] # 0x10c0 8e0: vfmadd213ps ymm12,ymm15,YMMWORD PTR [rip+0x7f7] # 0x10e0 8e9: vdivps ymm13,ymm14,ymm12 8ee: vsubps ymm12,ymm0,ymm11 8f3: vmulps ymm14,ymm12,YMMWORD PTR [rsi+rax*1] 8f8: vfmadd213ps ymm11,ymm13,ymm14 8fd: vmovaps YMMWORD PTR [rdi+rax*1],ymm11 902: vminps ymm12,ymm11,ymm2 906: vmaxps ymm11,ymm12,ymm1 90a: vmulps ymm12,ymm11,ymm11 90f: vmovaps ymm13,ymm12 914: vfmadd213ps ymm13,ymm3,ymm4 919: vmovaps ymm14,ymm12 91e: vfmadd213ps ymm14,ymm13,ymm5 923: vmovaps ymm13,ymm12 928: vfmadd213ps ymm13,ymm14,ymm6 92d: vmovaps ymm14,ymm12 932: vfmadd213ps ymm14,ymm13,ymm7 937: vmovaps ymm13,ymm12 93c: vfmadd213ps ymm13,ymm14,ymm8 941: vmovaps ymm14,ymm12 946: vfmadd213ps ymm14,ymm13,ymm9 94b: vmulps ymm13,ymm11,ymm14 950: vmovaps ymm11,ymm12 955: vfmadd213ps ymm11,ymm10,YMMWORD PTR [rip+0x742] # 0x10a0 95e: vmovaps ymm14,ymm12 963: vfmadd213ps ymm14,ymm11,YMMWORD PTR [rip+0x754] # 0x10c0 96c: vfmadd213ps ymm12,ymm14,YMMWORD PTR [rip+0x76b] # 0x10e0 975: vdivps ymm11,ymm13,ymm12 97a: vmovaps YMMWORD PTR [r10+rax*1],ymm11 980: add rax,0x20 984: cmp rax,0x400 98a: jl 0x7b2 lr_lstm/MatMul_6: 990: mov rdi,QWORD PTR [rbp+0x20] 994: movabs rsi,0x7ff6a03de040 99e: lea r8,[rbp+0x1620] 9a5: xor rcx,rcx 9a8: vxorps ymm0,ymm0,ymm0 9ac: vxorps ymm1,ymm1,ymm1 9b0: vxorps ymm2,ymm2,ymm2 9b4: vxorps ymm3,ymm3,ymm3 9b8: vxorps ymm4,ymm4,ymm4 9bc: vxorps ymm5,ymm5,ymm5 9c0: vxorps ymm6,ymm6,ymm6 9c4: vxorps ymm7,ymm7,ymm7 9c8: mov rdx,rsi 9cb: xor rax,rax 9ce: vbroadcastss ymm12,DWORD PTR [rdi+rax*1] 9d4: vfmadd231ps ymm0,ymm12,YMMWORD PTR [rdx] 9d9: vfmadd231ps ymm1,ymm12,YMMWORD PTR [rdx+0x20] 9df: vfmadd231ps ymm2,ymm12,YMMWORD PTR [rdx+0x40] 9e5: vfmadd231ps ymm3,ymm12,YMMWORD PTR [rdx+0x60] 9eb: vfmadd231ps ymm4,ymm12,YMMWORD PTR [rdx+0x80] 9f4: vfmadd231ps ymm5,ymm12,YMMWORD PTR [rdx+0xa0] 9fd: vfmadd231ps ymm6,ymm12,YMMWORD PTR [rdx+0xc0] a06: vfmadd231ps ymm7,ymm12,YMMWORD PTR [rdx+0xe0] a0f: add rdx,0x400 a16: add rax,0x4 a1a: cmp rax,0x400 a20: jl 0x9ce a22: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 a28: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 a2f: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 a36: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 a3d: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4 a47: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5 a51: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6 a5b: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7 a65: add rsi,0x100 a6c: add rcx,0x100 a73: cmp rcx,0x400 a7a: jl 0x9a8 lr_lstm/MatMul_5: a80: lea rdi,[rbp+0xc0] a87: movabs rsi,0x60ef40 a91: lea r9,[rbp+0x1620] a98: lea r8,[rbp+0x1a20] a9f: xor rcx,rcx aa2: vmovaps ymm0,YMMWORD PTR [r9+rcx*1] aa8: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20] aaf: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40] ab6: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60] abd: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80] ac7: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0] ad1: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0] adb: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0] ae5: mov rdx,rsi ae8: xor rax,rax aeb: vbroadcastss ymm12,DWORD PTR [rdi+rax*1] af1: vfmadd231ps ymm0,ymm12,YMMWORD PTR [rdx] af6: vfmadd231ps ymm1,ymm12,YMMWORD PTR [rdx+0x20] afc: vfmadd231ps ymm2,ymm12,YMMWORD PTR [rdx+0x40] b02: vfmadd231ps ymm3,ymm12,YMMWORD PTR [rdx+0x60] b08: vfmadd231ps ymm4,ymm12,YMMWORD PTR [rdx+0x80] b11: vfmadd231ps ymm5,ymm12,YMMWORD PTR [rdx+0xa0] b1a: vfmadd231ps ymm6,ymm12,YMMWORD PTR [rdx+0xc0] b23: vfmadd231ps ymm7,ymm12,YMMWORD PTR [rdx+0xe0] b2c: add rdx,0x400 b33: add rax,0x4 b37: cmp rax,0x160 b3d: jl 0xaeb b3f: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 b45: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 b4c: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 b53: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 b5a: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4 b64: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5 b6e: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6 b78: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7 b82: add rsi,0x100 b89: add rcx,0x100 b90: cmp rcx,0x400 b97: jl 0xaa2 lr_lstm/MatMul_7: b9d: mov rdi,QWORD PTR [rbp+0x0] ba1: movabs rsi,0x7ff6a035c040 bab: lea r9,[rbp+0x1a20] bb2: lea r8,[rbp+0x1e20] bb9: xor rcx,rcx bbc: vmovaps ymm0,YMMWORD PTR [r9+rcx*1] bc2: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20] bc9: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40] bd0: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60] bd7: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80] be1: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0] beb: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0] bf5: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0] bff: mov rdx,rsi c02: xor rax,rax c05: vbroadcastss ymm12,DWORD PTR [rdi+rax*1] c0b: vfmadd231ps ymm0,ymm12,YMMWORD PTR [rdx] c10: vfmadd231ps ymm1,ymm12,YMMWORD PTR [rdx+0x20] c16: vfmadd231ps ymm2,ymm12,YMMWORD PTR [rdx+0x40] c1c: vfmadd231ps ymm3,ymm12,YMMWORD PTR [rdx+0x60] c22: vfmadd231ps ymm4,ymm12,YMMWORD PTR [rdx+0x80] c2b: vfmadd231ps ymm5,ymm12,YMMWORD PTR [rdx+0xa0] c34: vfmadd231ps ymm6,ymm12,YMMWORD PTR [rdx+0xc0] c3d: vfmadd231ps ymm7,ymm12,YMMWORD PTR [rdx+0xe0] c46: add rdx,0x400 c4d: add rax,0x4 c51: cmp rax,0x400 c57: jl 0xc05 c59: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 c5f: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 c66: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 c6d: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 c74: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4 c7e: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5 c88: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6 c92: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7 c9c: add rsi,0x100 ca3: add rcx,0x100 caa: cmp rcx,0x400 cb1: jl 0xbbc cb7: vmovaps ymm0,YMMWORD PTR [rip+0x121] # 0xde0 lr_lstm/add_7: cbf: vxorps ymm1,ymm1,ymm1 cc3: vmovaps ymm2,YMMWORD PTR [rip+0x295] # 0xf60 ccb: vmovaps ymm3,YMMWORD PTR [rip+0x26d] # 0xf40 cd3: vmovaps ymm4,YMMWORD PTR [rip+0x2c5] # 0xfa0 cdb: vmovaps ymm5,YMMWORD PTR [rip+0x29d] # 0xf80 ce3: vmovaps ymm6,YMMWORD PTR [rip+0x2d5] # 0xfc0 ceb: vmovaps ymm7,YMMWORD PTR [rip+0x2ed] # 0xfe0 cf3: vmovaps ymm8,YMMWORD PTR [rip+0x305] # 0x1000 cfb: vmovaps ymm9,YMMWORD PTR [rip+0x31d] # 0x1020 d03: vmovaps ymm10,YMMWORD PTR [rip+0x335] # 0x1040 d0b: lea rsi,[rbp+0x1e20] d12: movabs rcx,0x5bedc0 d1c: lea rdi,[rbp+0xa20] d23: mov rdx,QWORD PTR [rbp+0x28] d27: xor rax,rax d2a: vmovaps ymm11,YMMWORD PTR [rsi+rax*1] d2f: vaddps ymm12,ymm11,YMMWORD PTR [rcx+rax*1] d34: vsubps ymm11,ymm1,ymm12 d39: vminps ymm12,ymm11,ymm3 d3d: vmaxps ymm13,ymm12,ymm2 d41: vmovaps ymm12,ymm13 d46: vfmadd213ps ymm12,ymm5,ymm4 d4b: vroundps ymm14,ymm12,0x1 d51: vmovaps ymm12,ymm14 d56: vfmadd213ps ymm12,ymm6,ymm13 d5b: vmulps ymm13,ymm12,ymm12 d60: vmovaps ymm15,ymm7 d64: vfmadd213ps ymm15,ymm12,ymm8 d69: vfmadd213ps ymm15,ymm12,ymm9 d6e: vfmadd213ps ymm15,ymm12,ymm10 d73: vfmadd213ps ymm15,ymm12,YMMWORD PTR [rip+0x2e4] # 0x1060 d7c: vfmadd213ps ymm15,ymm12,YMMWORD PTR [rip+0x21b] # 0xfa0 d85: vfmadd213ps ymm15,ymm13,ymm12 d8a: vaddps ymm12,ymm15,ymm0 d8e: vaddps ymm13,ymm14,YMMWORD PTR [rip+0x2ea] # 0x1080 d96: vcvttps2dq ymm14,ymm13 d9b: vpslld ymm13,ymm14,0x17 da1: vmulps ymm14,ymm12,ymm13 da6: vmaxps ymm12,ymm14,ymm11 dab: vaddps ymm11,ymm0,ymm12 db0: vdivps ymm12,ymm0,ymm11 db5: vmulps ymm11,ymm12,YMMWORD PTR [rdi+rax*1] dba: vmovaps YMMWORD PTR [rdx+rax*1],ymm11 dbf: add rax,0x20 dc3: cmp rax,0x400 dc9: jl 0xd2a dcf: pop rbp dd0: vzeroupper dd3: ret ;============================================================================== ; RL LSTM ;============================================================================== rl_lstm: 0: vzeroupper 3: push rbp 4: mov rbp,rdi rl_lstm/hyphen/Lookup: 7: movsxd rax,DWORD PTR [rbp+0x0] b: mov rcx,0x1 12: test rax,rax 15: cmovs rax,rcx 19: shl rax,0x5 1d: movabs rdx,0x5daf80 27: add rax,rdx 2a: mov QWORD PTR [rbp+0x48],rax rl_lstm/capitalization/Lookup: 2e: movsxd rax,DWORD PTR [rbp+0x4] 32: mov rcx,0x4 39: test rax,rax 3c: cmovs rax,rcx 40: shl rax,0x5 44: movabs rdx,0x5db040 4e: add rax,rdx 51: mov QWORD PTR [rbp+0x50],rax rl_lstm/quote/Lookup: 55: movsxd rax,DWORD PTR [rbp+0x40] 59: mov rcx,0x3 60: test rax,rax 63: cmovs rax,rcx 67: shl rax,0x5 6b: movabs rdx,0x5c0300 75: add rax,rdx 78: mov QWORD PTR [rbp+0x58],rax rl_lstm/words/Lookup: 7c: movsxd rax,DWORD PTR [rbp+0x1c] 80: mov rcx,0xd008 87: test rax,rax 8a: cmovs rax,rcx 8e: shl rax,0x7 92: movabs rdx,0x7ff69bcc2040 9c: add rax,rdx 9f: mov QWORD PTR [rbp+0x60],rax rl_lstm/digit/Lookup: a3: movsxd rax,DWORD PTR [rbp+0x18] a7: mov rcx,0x2 ae: test rax,rax b1: cmovs rax,rcx b5: shl rax,0x5 b9: movabs rdx,0x5becc0 c3: add rax,rdx c6: mov QWORD PTR [rbp+0x68],rax rl_lstm/punctuation/Lookup: ca: movsxd rax,DWORD PTR [rbp+0x30] ce: mov rcx,0x2 d5: test rax,rax d8: cmovs rax,rcx dc: shl rax,0x5 e0: movabs rdx,0x5dae80 ea: add rax,rdx ed: mov QWORD PTR [rbp+0x70],rax rl_lstm/suffix/Lookup: f1: lea rcx,[rbp+0x34] f5: movabs rdx,0x7ff69c343040 ff: lea rsi,[rbp+0x80] 106: vxorps ymm0,ymm0,ymm0 10a: vxorps ymm1,ymm1,ymm1 10e: mov r8,0x208d 115: xor rdi,rdi 118: movsxd rax,DWORD PTR [rcx+rdi*4] 11c: test rax,rax 11f: jns 0x132 125: cmp rax,0xffffffffffffffff 129: jne 0x142 12f: mov rax,r8 132: shl rax,0x6 136: add rax,rdx 139: vaddps ymm0,ymm0,YMMWORD PTR [rax] 13d: vaddps ymm1,ymm1,YMMWORD PTR [rax+0x20] 142: inc rdi 145: cmp rdi,0x3 149: jne 0x118 14b: vmovaps YMMWORD PTR [rsi],ymm0 14f: vmovaps YMMWORD PTR [rsi+0x20],ymm1 rl_lstm/concat: 154: lea r8,[rbp+0xc0] 15b: mov rsi,QWORD PTR [rbp+0x60] 15f: lea rdi,[r8] 162: mov rcx,0x80 169: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 16b: lea rsi,[rbp+0x80] 172: lea rdi,[r8+0x80] 179: mov rcx,0x40 180: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 182: mov rsi,QWORD PTR [rbp+0x50] 186: lea rdi,[r8+0xc0] 18d: mov rcx,0x20 194: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 196: mov rsi,QWORD PTR [rbp+0x48] 19a: lea rdi,[r8+0xe0] 1a1: mov rcx,0x20 1a8: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 1aa: mov rsi,QWORD PTR [rbp+0x70] 1ae: lea rdi,[r8+0x100] 1b5: mov rcx,0x20 1bc: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 1be: mov rsi,QWORD PTR [rbp+0x58] 1c2: lea rdi,[r8+0x120] 1c9: mov rcx,0x20 1d0: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 1d2: mov rsi,QWORD PTR [rbp+0x68] 1d6: lea rdi,[r8+0x140] 1dd: mov rcx,0x20 1e4: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] rl_lstm/MatMul: 1e6: lea rdi,[rbp+0xc0] 1ed: movabs rsi,0x63b040 1f7: lea r8,[rbp+0x220] 1fe: xor rcx,rcx 201: vxorps ymm0,ymm0,ymm0 205: vxorps ymm1,ymm1,ymm1 209: vxorps ymm2,ymm2,ymm2 20d: vxorps ymm3,ymm3,ymm3 211: vxorps ymm4,ymm4,ymm4 215: vxorps ymm5,ymm5,ymm5 219: vxorps ymm6,ymm6,ymm6 21d: vxorps ymm7,ymm7,ymm7 221: mov rdx,rsi 224: xor rax,rax 227: vbroadcastss ymm12,DWORD PTR [rdi+rax*1] 22d: vfmadd231ps ymm0,ymm12,YMMWORD PTR [rdx] 232: vfmadd231ps ymm1,ymm12,YMMWORD PTR [rdx+0x20] 238: vfmadd231ps ymm2,ymm12,YMMWORD PTR [rdx+0x40] 23e: vfmadd231ps ymm3,ymm12,YMMWORD PTR [rdx+0x60] 244: vfmadd231ps ymm4,ymm12,YMMWORD PTR [rdx+0x80] 24d: vfmadd231ps ymm5,ymm12,YMMWORD PTR [rdx+0xa0] 256: vfmadd231ps ymm6,ymm12,YMMWORD PTR [rdx+0xc0] 25f: vfmadd231ps ymm7,ymm12,YMMWORD PTR [rdx+0xe0] 268: add rdx,0x400 26f: add rax,0x4 273: cmp rax,0x160 279: jl 0x227 27b: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 281: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 288: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 28f: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 296: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4 2a0: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5 2aa: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6 2b4: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7 2be: add rsi,0x100 2c5: add rcx,0x100 2cc: cmp rcx,0x400 2d3: jl 0x201 rl_lstm/MatMul_3: 2d9: lea rdi,[rbp+0xc0] 2e0: movabs rsi,0x6510c0 2ea: lea r8,[rbp+0x620] 2f1: xor rcx,rcx 2f4: vxorps ymm0,ymm0,ymm0 2f8: vxorps ymm1,ymm1,ymm1 2fc: vxorps ymm2,ymm2,ymm2 300: vxorps ymm3,ymm3,ymm3 304: vxorps ymm4,ymm4,ymm4 308: vxorps ymm5,ymm5,ymm5 30c: vxorps ymm6,ymm6,ymm6 310: vxorps ymm7,ymm7,ymm7 314: mov rdx,rsi 317: xor rax,rax 31a: vbroadcastss ymm12,DWORD PTR [rdi+rax*1] 320: vfmadd231ps ymm0,ymm12,YMMWORD PTR [rdx] 325: vfmadd231ps ymm1,ymm12,YMMWORD PTR [rdx+0x20] 32b: vfmadd231ps ymm2,ymm12,YMMWORD PTR [rdx+0x40] 331: vfmadd231ps ymm3,ymm12,YMMWORD PTR [rdx+0x60] 337: vfmadd231ps ymm4,ymm12,YMMWORD PTR [rdx+0x80] 340: vfmadd231ps ymm5,ymm12,YMMWORD PTR [rdx+0xa0] 349: vfmadd231ps ymm6,ymm12,YMMWORD PTR [rdx+0xc0] 352: vfmadd231ps ymm7,ymm12,YMMWORD PTR [rdx+0xe0] 35b: add rdx,0x400 362: add rax,0x4 366: cmp rax,0x160 36c: jl 0x31a 36e: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 374: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 37b: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 382: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 389: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4 393: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5 39d: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6 3a7: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7 3b1: add rsi,0x100 3b8: add rcx,0x100 3bf: cmp rcx,0x400 3c6: jl 0x2f4 rl_lstm/MatMul_1: 3cc: mov rdi,QWORD PTR [rbp+0x8] 3d0: movabs rsi,0x7ff69cbf5040 3da: lea r9,[rbp+0x220] 3e1: lea r8,[rbp+0xa20] 3e8: xor rcx,rcx 3eb: vmovaps ymm0,YMMWORD PTR [r9+rcx*1] 3f1: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20] 3f8: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40] 3ff: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60] 406: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80] 410: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0] 41a: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0] 424: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0] 42e: mov rdx,rsi 431: xor rax,rax 434: vbroadcastss ymm12,DWORD PTR [rdi+rax*1] 43a: vfmadd231ps ymm0,ymm12,YMMWORD PTR [rdx] 43f: vfmadd231ps ymm1,ymm12,YMMWORD PTR [rdx+0x20] 445: vfmadd231ps ymm2,ymm12,YMMWORD PTR [rdx+0x40] 44b: vfmadd231ps ymm3,ymm12,YMMWORD PTR [rdx+0x60] 451: vfmadd231ps ymm4,ymm12,YMMWORD PTR [rdx+0x80] 45a: vfmadd231ps ymm5,ymm12,YMMWORD PTR [rdx+0xa0] 463: vfmadd231ps ymm6,ymm12,YMMWORD PTR [rdx+0xc0] 46c: vfmadd231ps ymm7,ymm12,YMMWORD PTR [rdx+0xe0] 475: add rdx,0x400 47c: add rax,0x4 480: cmp rax,0x400 486: jl 0x434 488: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 48e: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 495: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 49c: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 4a3: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4 4ad: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5 4b7: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6 4c1: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7 4cb: add rsi,0x100 4d2: add rcx,0x100 4d9: cmp rcx,0x400 4e0: jl 0x3eb rl_lstm/MatMul_4: 4e6: mov rdi,QWORD PTR [rbp+0x8] 4ea: movabs rsi,0x7ff69cb73040 4f4: lea r9,[rbp+0x620] 4fb: lea r8,[rbp+0xe20] 502: xor rcx,rcx 505: vmovaps ymm0,YMMWORD PTR [r9+rcx*1] 50b: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20] 512: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40] 519: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60] 520: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80] 52a: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0] 534: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0] 53e: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0] 548: mov rdx,rsi 54b: xor rax,rax 54e: vbroadcastss ymm12,DWORD PTR [rdi+rax*1] 554: vfmadd231ps ymm0,ymm12,YMMWORD PTR [rdx] 559: vfmadd231ps ymm1,ymm12,YMMWORD PTR [rdx+0x20] 55f: vfmadd231ps ymm2,ymm12,YMMWORD PTR [rdx+0x40] 565: vfmadd231ps ymm3,ymm12,YMMWORD PTR [rdx+0x60] 56b: vfmadd231ps ymm4,ymm12,YMMWORD PTR [rdx+0x80] 574: vfmadd231ps ymm5,ymm12,YMMWORD PTR [rdx+0xa0] 57d: vfmadd231ps ymm6,ymm12,YMMWORD PTR [rdx+0xc0] 586: vfmadd231ps ymm7,ymm12,YMMWORD PTR [rdx+0xe0] 58f: add rdx,0x400 596: add rax,0x4 59a: cmp rax,0x400 5a0: jl 0x54e 5a2: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 5a8: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 5af: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 5b6: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 5bd: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4 5c7: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5 5d1: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6 5db: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7 5e5: add rsi,0x100 5ec: add rcx,0x100 5f3: cmp rcx,0x400 5fa: jl 0x505 rl_lstm/MatMul_2: 600: mov rdi,QWORD PTR [rbp+0x10] 604: movabs rsi,0x7ff6a0299040 60e: lea r9,[rbp+0xa20] 615: lea r8,[rbp+0x1220] 61c: xor rcx,rcx 61f: vmovaps ymm0,YMMWORD PTR [r9+rcx*1] 625: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20] 62c: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40] 633: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60] 63a: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80] 644: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0] 64e: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0] 658: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0] 662: mov rdx,rsi 665: xor rax,rax 668: vbroadcastss ymm12,DWORD PTR [rdi+rax*1] 66e: vfmadd231ps ymm0,ymm12,YMMWORD PTR [rdx] 673: vfmadd231ps ymm1,ymm12,YMMWORD PTR [rdx+0x20] 679: vfmadd231ps ymm2,ymm12,YMMWORD PTR [rdx+0x40] 67f: vfmadd231ps ymm3,ymm12,YMMWORD PTR [rdx+0x60] 685: vfmadd231ps ymm4,ymm12,YMMWORD PTR [rdx+0x80] 68e: vfmadd231ps ymm5,ymm12,YMMWORD PTR [rdx+0xa0] 697: vfmadd231ps ymm6,ymm12,YMMWORD PTR [rdx+0xc0] 6a0: vfmadd231ps ymm7,ymm12,YMMWORD PTR [rdx+0xe0] 6a9: add rdx,0x400 6b0: add rax,0x4 6b4: cmp rax,0x400 6ba: jl 0x668 6bc: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 6c2: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 6c9: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 6d0: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 6d7: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4 6e1: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5 6eb: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6 6f5: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7 6ff: add rsi,0x100 706: add rcx,0x100 70d: cmp rcx,0x400 714: jl 0x61f rl_lstm/add_4: 71a: vmovaps ymm0,YMMWORD PTR [rip+0x6be] # 0xde0 722: vmovaps ymm1,YMMWORD PTR [rip+0x6d6] # 0xe00 72a: vmovaps ymm2,YMMWORD PTR [rip+0x6ee] # 0xe20 732: vmovaps ymm3,YMMWORD PTR [rip+0x706] # 0xe40 73a: vmovaps ymm4,YMMWORD PTR [rip+0x71e] # 0xe60 742: vmovaps ymm5,YMMWORD PTR [rip+0x736] # 0xe80 74a: vmovaps ymm6,YMMWORD PTR [rip+0x74e] # 0xea0 752: vmovaps ymm7,YMMWORD PTR [rip+0x766] # 0xec0 75a: vmovaps ymm8,YMMWORD PTR [rip+0x77e] # 0xee0 762: vmovaps ymm9,YMMWORD PTR [rip+0x796] # 0xf00 76a: vmovaps ymm10,YMMWORD PTR [rip+0x7ae] # 0xf20 772: lea r8,[rbp+0xe20] 779: movabs rcx,0x5c8b00 783: lea r9,[rbp+0x1220] 78a: movabs rdx,0x5dbd00 794: mov rsi,QWORD PTR [rbp+0x10] 798: mov rdi,QWORD PTR [rbp+0x20] 79c: lea r10,[rbp+0xe20] 7a3: xor rax,rax 7a6: vmovaps ymm11,YMMWORD PTR [r9+rax*1] 7ac: vaddps ymm12,ymm11,YMMWORD PTR [rdx+rax*1] 7b1: vxorps ymm11,ymm11,ymm11 7b6: vsubps ymm13,ymm11,ymm12 7bb: vminps ymm11,ymm13,YMMWORD PTR [rip+0x77d] # 0xf40 7c3: vmaxps ymm12,ymm11,YMMWORD PTR [rip+0x795] # 0xf60 7cb: vmovaps ymm11,ymm12 7d0: vmovaps ymm14,YMMWORD PTR [rip+0x7a8] # 0xf80 7d8: vfmadd213ps ymm11,ymm14,YMMWORD PTR [rip+0x7bf] # 0xfa0 7e1: vroundps ymm14,ymm11,0x1 7e7: vmovaps ymm11,ymm14 7ec: vfmadd132ps ymm11,ymm12,YMMWORD PTR [rip+0x7cb] # 0xfc0 7f5: vmulps ymm12,ymm11,ymm11 7fa: vmovaps ymm15,YMMWORD PTR [rip+0x7de] # 0xfe0 802: vfmadd213ps ymm15,ymm11,YMMWORD PTR [rip+0x7f5] # 0x1000 80b: vfmadd213ps ymm15,ymm11,YMMWORD PTR [rip+0x80c] # 0x1020 814: vfmadd213ps ymm15,ymm11,YMMWORD PTR [rip+0x823] # 0x1040 81d: vfmadd213ps ymm15,ymm11,YMMWORD PTR [rip+0x83a] # 0x1060 826: vfmadd213ps ymm15,ymm11,YMMWORD PTR [rip+0x771] # 0xfa0 82f: vfmadd213ps ymm15,ymm12,ymm11 834: vaddps ymm11,ymm15,ymm0 838: vaddps ymm12,ymm14,YMMWORD PTR [rip+0x840] # 0x1080 840: vcvttps2dq ymm14,ymm12 845: vpslld ymm12,ymm14,0x17 84b: vmulps ymm14,ymm11,ymm12 850: vmaxps ymm11,ymm14,ymm13 855: vaddps ymm12,ymm0,ymm11 85a: vdivps ymm11,ymm0,ymm12 85f: vmovaps ymm12,YMMWORD PTR [r8+rax*1] 865: vaddps ymm13,ymm12,YMMWORD PTR [rcx+rax*1] 86a: vminps ymm12,ymm13,ymm2 86e: vmaxps ymm13,ymm12,ymm1 872: vmulps ymm12,ymm13,ymm13 877: vmovaps ymm14,ymm12 87c: vfmadd213ps ymm14,ymm3,ymm4 881: vmovaps ymm15,ymm12 886: vfmadd213ps ymm15,ymm14,ymm5 88b: vmovaps ymm14,ymm12 890: vfmadd213ps ymm14,ymm15,ymm6 895: vmovaps ymm15,ymm12 89a: vfmadd213ps ymm15,ymm14,ymm7 89f: vmovaps ymm14,ymm12 8a4: vfmadd213ps ymm14,ymm15,ymm8 8a9: vmovaps ymm15,ymm12 8ae: vfmadd213ps ymm15,ymm14,ymm9 8b3: vmulps ymm14,ymm13,ymm15 8b8: vmovaps ymm13,ymm12 8bd: vfmadd213ps ymm13,ymm10,YMMWORD PTR [rip+0x7da] # 0x10a0 8c6: vmovaps ymm15,ymm12 8cb: vfmadd213ps ymm15,ymm13,YMMWORD PTR [rip+0x7ec] # 0x10c0 8d4: vfmadd213ps ymm12,ymm15,YMMWORD PTR [rip+0x803] # 0x10e0 8dd: vdivps ymm13,ymm14,ymm12 8e2: vsubps ymm12,ymm0,ymm11 8e7: vmulps ymm14,ymm12,YMMWORD PTR [rsi+rax*1] 8ec: vfmadd213ps ymm11,ymm13,ymm14 8f1: vmovaps YMMWORD PTR [rdi+rax*1],ymm11 8f6: vminps ymm12,ymm11,ymm2 8fa: vmaxps ymm11,ymm12,ymm1 8fe: vmulps ymm12,ymm11,ymm11 903: vmovaps ymm13,ymm12 908: vfmadd213ps ymm13,ymm3,ymm4 90d: vmovaps ymm14,ymm12 912: vfmadd213ps ymm14,ymm13,ymm5 917: vmovaps ymm13,ymm12 91c: vfmadd213ps ymm13,ymm14,ymm6 921: vmovaps ymm14,ymm12 926: vfmadd213ps ymm14,ymm13,ymm7 92b: vmovaps ymm13,ymm12 930: vfmadd213ps ymm13,ymm14,ymm8 935: vmovaps ymm14,ymm12 93a: vfmadd213ps ymm14,ymm13,ymm9 93f: vmulps ymm13,ymm11,ymm14 944: vmovaps ymm11,ymm12 949: vfmadd213ps ymm11,ymm10,YMMWORD PTR [rip+0x74e] # 0x10a0 952: vmovaps ymm14,ymm12 957: vfmadd213ps ymm14,ymm11,YMMWORD PTR [rip+0x760] # 0x10c0 960: vfmadd213ps ymm12,ymm14,YMMWORD PTR [rip+0x777] # 0x10e0 969: vdivps ymm11,ymm13,ymm12 96e: vmovaps YMMWORD PTR [r10+rax*1],ymm11 974: add rax,0x20 978: cmp rax,0x400 97e: jl 0x7a6 rl_lstm/MatMul_6: 984: mov rdi,QWORD PTR [rbp+0x20] 988: movabs rsi,0x7ff6a02da040 992: lea r8,[rbp+0x1620] 999: xor rcx,rcx 99c: vxorps ymm0,ymm0,ymm0 9a0: vxorps ymm1,ymm1,ymm1 9a4: vxorps ymm2,ymm2,ymm2 9a8: vxorps ymm3,ymm3,ymm3 9ac: vxorps ymm4,ymm4,ymm4 9b0: vxorps ymm5,ymm5,ymm5 9b4: vxorps ymm6,ymm6,ymm6 9b8: vxorps ymm7,ymm7,ymm7 9bc: mov rdx,rsi 9bf: xor rax,rax 9c2: vbroadcastss ymm12,DWORD PTR [rdi+rax*1] 9c8: vfmadd231ps ymm0,ymm12,YMMWORD PTR [rdx] 9cd: vfmadd231ps ymm1,ymm12,YMMWORD PTR [rdx+0x20] 9d3: vfmadd231ps ymm2,ymm12,YMMWORD PTR [rdx+0x40] 9d9: vfmadd231ps ymm3,ymm12,YMMWORD PTR [rdx+0x60] 9df: vfmadd231ps ymm4,ymm12,YMMWORD PTR [rdx+0x80] 9e8: vfmadd231ps ymm5,ymm12,YMMWORD PTR [rdx+0xa0] 9f1: vfmadd231ps ymm6,ymm12,YMMWORD PTR [rdx+0xc0] 9fa: vfmadd231ps ymm7,ymm12,YMMWORD PTR [rdx+0xe0] a03: add rdx,0x400 a0a: add rax,0x4 a0e: cmp rax,0x400 a14: jl 0x9c2 a16: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 a1c: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 a23: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 a2a: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 a31: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4 a3b: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5 a45: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6 a4f: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7 a59: add rsi,0x100 a60: add rcx,0x100 a67: cmp rcx,0x400 a6e: jl 0x99c rl_lstm/MatMul_5: a74: lea rdi,[rbp+0xc0] a7b: movabs rsi,0x676300 a85: lea r9,[rbp+0x1620] a8c: lea r8,[rbp+0x1a20] a93: xor rcx,rcx a96: vmovaps ymm0,YMMWORD PTR [r9+rcx*1] a9c: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20] aa3: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40] aaa: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60] ab1: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80] abb: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0] ac5: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0] acf: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0] ad9: mov rdx,rsi adc: xor rax,rax adf: vbroadcastss ymm12,DWORD PTR [rdi+rax*1] ae5: vfmadd231ps ymm0,ymm12,YMMWORD PTR [rdx] aea: vfmadd231ps ymm1,ymm12,YMMWORD PTR [rdx+0x20] af0: vfmadd231ps ymm2,ymm12,YMMWORD PTR [rdx+0x40] af6: vfmadd231ps ymm3,ymm12,YMMWORD PTR [rdx+0x60] afc: vfmadd231ps ymm4,ymm12,YMMWORD PTR [rdx+0x80] b05: vfmadd231ps ymm5,ymm12,YMMWORD PTR [rdx+0xa0] b0e: vfmadd231ps ymm6,ymm12,YMMWORD PTR [rdx+0xc0] b17: vfmadd231ps ymm7,ymm12,YMMWORD PTR [rdx+0xe0] b20: add rdx,0x400 b27: add rax,0x4 b2b: cmp rax,0x160 b31: jl 0xadf b33: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 b39: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 b40: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 b47: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 b4e: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4 b58: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5 b62: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6 b6c: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7 b76: add rsi,0x100 b7d: add rcx,0x100 b84: cmp rcx,0x400 b8b: jl 0xa96 rl_lstm/MatMul_7: b91: mov rdi,QWORD PTR [rbp+0x8] b95: movabs rsi,0x7ff69cbb4040 b9f: lea r9,[rbp+0x1a20] ba6: lea r8,[rbp+0x1e20] bad: xor rcx,rcx bb0: vmovaps ymm0,YMMWORD PTR [r9+rcx*1] bb6: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20] bbd: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40] bc4: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60] bcb: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80] bd5: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0] bdf: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0] be9: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0] bf3: mov rdx,rsi bf6: xor rax,rax bf9: vbroadcastss ymm12,DWORD PTR [rdi+rax*1] bff: vfmadd231ps ymm0,ymm12,YMMWORD PTR [rdx] c04: vfmadd231ps ymm1,ymm12,YMMWORD PTR [rdx+0x20] c0a: vfmadd231ps ymm2,ymm12,YMMWORD PTR [rdx+0x40] c10: vfmadd231ps ymm3,ymm12,YMMWORD PTR [rdx+0x60] c16: vfmadd231ps ymm4,ymm12,YMMWORD PTR [rdx+0x80] c1f: vfmadd231ps ymm5,ymm12,YMMWORD PTR [rdx+0xa0] c28: vfmadd231ps ymm6,ymm12,YMMWORD PTR [rdx+0xc0] c31: vfmadd231ps ymm7,ymm12,YMMWORD PTR [rdx+0xe0] c3a: add rdx,0x400 c41: add rax,0x4 c45: cmp rax,0x400 c4b: jl 0xbf9 c4d: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 c53: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 c5a: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 c61: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 c68: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4 c72: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5 c7c: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6 c86: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7 c90: add rsi,0x100 c97: add rcx,0x100 c9e: cmp rcx,0x400 ca5: jl 0xbb0 rl_lstm/add_7: cab: vmovaps ymm0,YMMWORD PTR [rip+0x12d] # 0xde0 cb3: vxorps ymm1,ymm1,ymm1 cb7: vmovaps ymm2,YMMWORD PTR [rip+0x2a1] # 0xf60 cbf: vmovaps ymm3,YMMWORD PTR [rip+0x279] # 0xf40 cc7: vmovaps ymm4,YMMWORD PTR [rip+0x2d1] # 0xfa0 ccf: vmovaps ymm5,YMMWORD PTR [rip+0x2a9] # 0xf80 cd7: vmovaps ymm6,YMMWORD PTR [rip+0x2e1] # 0xfc0 cdf: vmovaps ymm7,YMMWORD PTR [rip+0x2f9] # 0xfe0 ce7: vmovaps ymm8,YMMWORD PTR [rip+0x311] # 0x1000 cef: vmovaps ymm9,YMMWORD PTR [rip+0x329] # 0x1020 cf7: vmovaps ymm10,YMMWORD PTR [rip+0x341] # 0x1040 cff: lea rsi,[rbp+0x1e20] d06: movabs rcx,0x5c3ec0 d10: lea rdi,[rbp+0xe20] d17: mov rdx,QWORD PTR [rbp+0x28] d1b: xor rax,rax d1e: vmovaps ymm11,YMMWORD PTR [rsi+rax*1] d23: vaddps ymm12,ymm11,YMMWORD PTR [rcx+rax*1] d28: vsubps ymm11,ymm1,ymm12 d2d: vminps ymm12,ymm11,ymm3 d31: vmaxps ymm13,ymm12,ymm2 d35: vmovaps ymm12,ymm13 d3a: vfmadd213ps ymm12,ymm5,ymm4 d3f: vroundps ymm14,ymm12,0x1 d45: vmovaps ymm12,ymm14 d4a: vfmadd213ps ymm12,ymm6,ymm13 d4f: vmulps ymm13,ymm12,ymm12 d54: vmovaps ymm15,ymm7 d58: vfmadd213ps ymm15,ymm12,ymm8 d5d: vfmadd213ps ymm15,ymm12,ymm9 d62: vfmadd213ps ymm15,ymm12,ymm10 d67: vfmadd213ps ymm15,ymm12,YMMWORD PTR [rip+0x2f0] # 0x1060 d70: vfmadd213ps ymm15,ymm12,YMMWORD PTR [rip+0x227] # 0xfa0 d79: vfmadd213ps ymm15,ymm13,ymm12 d7e: vaddps ymm12,ymm15,ymm0 d82: vaddps ymm13,ymm14,YMMWORD PTR [rip+0x2f6] # 0x1080 d8a: vcvttps2dq ymm14,ymm13 d8f: vpslld ymm13,ymm14,0x17 d95: vmulps ymm14,ymm12,ymm13 d9a: vmaxps ymm12,ymm14,ymm11 d9f: vaddps ymm11,ymm0,ymm12 da4: vdivps ymm12,ymm0,ymm11 da9: vmulps ymm11,ymm12,YMMWORD PTR [rdi+rax*1] dae: vmovaps YMMWORD PTR [rdx+rax*1],ymm11 db3: add rax,0x20 db7: cmp rax,0x400 dbd: jl 0xd1e dc3: pop rbp dc4: vzeroupper dc7: ret ;============================================================================== ; FF ;============================================================================== ff: 0: vzeroupper 3: push rbp 4: mov rbp,rdi ff/rl/Collect: 7: lea rdx,[rbp+0x12c] e: mov r8,QWORD PTR [rbp+0x200] 15: lea r9,[rbp+0x2a0] 1c: movsxd rax,DWORD PTR [rdx] 1f: test rax,rax 22: js 0x43 28: shl rax,0xa 2c: add rax,r8 2f: mov rdi,r9 32: mov rsi,rax 35: mov rcx,0x400 3c: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 3e: jmp 0x5a 43: cmp rax,0xffffffffffffffff 47: jne 0x5a 4d: mov r11d,0x3f800000 53: mov DWORD PTR [r9+0x400],r11d ff/frame-end-lr/Collect: 5a: lea rdx,[rbp+0x164] 61: mov r8,QWORD PTR [rbp+0x138] 68: lea r9,[rbp+0x6c0] 6f: xor r10,r10 72: movsxd rax,DWORD PTR [rdx+r10*4] 76: test rax,rax 79: js 0x9a 7f: shl rax,0xa 83: add rax,r8 86: mov rdi,r9 89: mov rsi,rax 8c: mov rcx,0x400 93: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 95: jmp 0xb1 9a: cmp rax,0xffffffffffffffff 9e: jne 0xb1 a4: mov r11d,0x3f800000 aa: mov DWORD PTR [r9+0x400],r11d b1: add r9,0x420 b8: inc r10 bb: cmp r10,0x5 bf: jne 0x72 ff/frame-end-rl/Collect: c1: lea rdx,[rbp+0x140] c8: mov r8,QWORD PTR [rbp+0x200] cf: lea r9,[rbp+0x1b60] d6: xor r10,r10 d9: movsxd rax,DWORD PTR [rdx+r10*4] dd: test rax,rax e0: js 0x101 e6: shl rax,0xa ea: add rax,r8 ed: mov rdi,r9 f0: mov rsi,rax f3: mov rcx,0x400 fa: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] fc: jmp 0x118 101: cmp rax,0xffffffffffffffff 105: jne 0x118 10b: mov r11d,0x3f800000 111: mov DWORD PTR [r9+0x400],r11d 118: add r9,0x420 11f: inc r10 122: cmp r10,0x5 126: jne 0xd9 ff/in-roles/Lookup: 128: lea rcx,[rbp+0x208] 12f: movabs rdx,0x5ed880 139: lea rsi,[rbp+0x3000] 140: vxorps ymm0,ymm0,ymm0 144: vxorps ymm1,ymm1,ymm1 148: mov r8,0x7c 14f: xor rdi,rdi 152: movsxd rax,DWORD PTR [rcx+rdi*4] 156: test rax,rax 159: jns 0x16c 15f: cmp rax,0xffffffffffffffff 163: jne 0x17c 169: mov rax,r8 16c: shl rax,0x6 170: add rax,rdx 173: vaddps ymm0,ymm0,YMMWORD PTR [rax] 177: vaddps ymm1,ymm1,YMMWORD PTR [rax+0x20] 17c: inc rdi 17f: cmp rdi,0x20 183: jne 0x152 185: vmovaps YMMWORD PTR [rsi],ymm0 189: vmovaps YMMWORD PTR [rsi+0x20],ymm1 ff/unlabeled-roles/Lookup: 18e: lea rcx,[rbp+0x180] 195: movabs rdx,0x5ce4c0 19f: lea rsi,[rbp+0x3040] 1a6: vxorps ymm0,ymm0,ymm0 1aa: vxorps ymm1,ymm1,ymm1 1ae: mov r8,0x18 1b5: xor rdi,rdi 1b8: movsxd rax,DWORD PTR [rcx+rdi*4] 1bc: test rax,rax 1bf: jns 0x1d2 1c5: cmp rax,0xffffffffffffffff 1c9: jne 0x1e2 1cf: mov rax,r8 1d2: shl rax,0x6 1d6: add rax,rdx 1d9: vaddps ymm0,ymm0,YMMWORD PTR [rax] 1dd: vaddps ymm1,ymm1,YMMWORD PTR [rax+0x20] 1e2: inc rdi 1e5: cmp rdi,0x20 1e9: jne 0x1b8 1eb: vmovaps YMMWORD PTR [rsi],ymm0 1ef: vmovaps YMMWORD PTR [rsi+0x20],ymm1 ff/labeled-roles/Lookup: 1f4: mov rcx,rbp 1f7: movabs rdx,0x6be8c0 201: lea rsi,[rbp+0x3080] 208: vxorps ymm0,ymm0,ymm0 20c: vxorps ymm1,ymm1,ymm1 210: mov r8,0x270 217: xor rdi,rdi 21a: movsxd rax,DWORD PTR [rcx+rdi*4] 21e: test rax,rax 221: jns 0x234 227: cmp rax,0xffffffffffffffff 22b: jne 0x244 231: mov rax,r8 234: shl rax,0x6 238: add rax,rdx 23b: vaddps ymm0,ymm0,YMMWORD PTR [rax] 23f: vaddps ymm1,ymm1,YMMWORD PTR [rax+0x20] 244: inc rdi 247: cmp rdi,0x20 24b: jne 0x21a 24d: vmovaps YMMWORD PTR [rsi],ymm0 251: vmovaps YMMWORD PTR [rsi+0x20],ymm1 ff/out-roles/Lookup: 256: lea rcx,[rbp+0xac] 25d: movabs rdx,0x5f69c0 267: lea rsi,[rbp+0x30c0] 26e: vxorps ymm0,ymm0,ymm0 272: vxorps ymm1,ymm1,ymm1 276: mov r8,0x7c 27d: xor rdi,rdi 280: movsxd rax,DWORD PTR [rcx+rdi*4] 284: test rax,rax 287: jns 0x29a 28d: cmp rax,0xffffffffffffffff 291: jne 0x2aa 297: mov rax,r8 29a: shl rax,0x6 29e: add rax,rdx 2a1: vaddps ymm0,ymm0,YMMWORD PTR [rax] 2a5: vaddps ymm1,ymm1,YMMWORD PTR [rax+0x20] 2aa: inc rdi 2ad: cmp rdi,0x20 2b1: jne 0x280 2b3: vmovaps YMMWORD PTR [rsi],ymm0 2b7: vmovaps YMMWORD PTR [rsi+0x20],ymm1 ff/frame-focus-steps/Collect: 2bc: lea rdx,[rbp+0x98] 2c3: mov r8,QWORD PTR [rbp+0x178] 2ca: lea r9,[rbp+0x3100] 2d1: xor r10,r10 2d4: movsxd rax,DWORD PTR [rdx+r10*4] 2d8: test rax,rax 2db: js 0x2fc 2e1: shl rax,0x9 2e5: add rax,r8 2e8: mov rdi,r9 2eb: mov rsi,rax 2ee: mov rcx,0x200 2f5: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 2f7: jmp 0x313 2fc: cmp rax,0xffffffffffffffff 300: jne 0x313 306: mov r11d,0x3f800000 30c: mov DWORD PTR [r9+0x200],r11d 313: add r9,0x220 31a: inc r10 31d: cmp r10,0x5 321: jne 0x2d4 ff/frame-creation-steps/Collect: 323: lea rdx,[rbp+0x84] 32a: mov r8,QWORD PTR [rbp+0x178] 331: lea r9,[rbp+0x3ba0] 338: xor r10,r10 33b: movsxd rax,DWORD PTR [rdx+r10*4] 33f: test rax,rax 342: js 0x363 348: shl rax,0x9 34c: add rax,r8 34f: mov rdi,r9 352: mov rsi,rax 355: mov rcx,0x200 35c: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 35e: jmp 0x37a 363: cmp rax,0xffffffffffffffff 367: jne 0x37a 36d: mov r11d,0x3f800000 373: mov DWORD PTR [r9+0x200],r11d 37a: add r9,0x220 381: inc r10 384: cmp r10,0x5 388: jne 0x33b ff/lr/Collect: 38a: lea rdx,[rbp+0x80] 391: mov r8,QWORD PTR [rbp+0x138] 398: lea r9,[rbp+0x4640] 39f: movsxd rax,DWORD PTR [rdx] 3a2: test rax,rax 3a5: js 0x3c6 3ab: shl rax,0xa 3af: add rax,r8 3b2: mov rdi,r9 3b5: mov rsi,rax 3b8: mov rcx,0x400 3bf: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 3c1: jmp 0x3dd 3c6: cmp rax,0xffffffffffffffff 3ca: jne 0x3dd 3d0: mov r11d,0x3f800000 3d6: mov DWORD PTR [r9+0x400],r11d ff/history/Collect: 3dd: lea rdx,[rbp+0x154] 3e4: mov r8,QWORD PTR [rbp+0x178] 3eb: lea r9,[rbp+0x4a60] 3f2: xor r10,r10 3f5: movsxd rax,DWORD PTR [rdx+r10*4] 3f9: test rax,rax 3fc: js 0x41d 402: shl rax,0x9 406: add rax,r8 409: mov rdi,r9 40c: mov rsi,rax 40f: mov rcx,0x200 416: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 418: jmp 0x434 41d: cmp rax,0xffffffffffffffff 421: jne 0x434 427: mov r11d,0x3f800000 42d: mov DWORD PTR [r9+0x200],r11d 434: add r9,0x220 43b: inc r10 43e: cmp r10,0x4 442: jne 0x3f5 ff/rl/MatMul: 444: lea rdi,[rbp+0x2a0] 44b: movabs rsi,0x68c340 455: lea r8,[rbp+0x52e0] 45c: xor rcx,rcx 45f: vxorps ymm0,ymm0,ymm0 463: vxorps ymm1,ymm1,ymm1 467: vxorps ymm2,ymm2,ymm2 46b: vxorps ymm3,ymm3,ymm3 46f: mov rdx,rsi 472: xor rax,rax 475: vbroadcastss ymm8,DWORD PTR [rdi+rax*1] 47b: vfmadd231ps ymm0,ymm8,YMMWORD PTR [rdx] 480: vfmadd231ps ymm1,ymm8,YMMWORD PTR [rdx+0x20] 486: vfmadd231ps ymm2,ymm8,YMMWORD PTR [rdx+0x40] 48c: vfmadd231ps ymm3,ymm8,YMMWORD PTR [rdx+0x60] 492: add rdx,0x80 499: add rax,0x4 49d: cmp rax,0x404 4a3: jl 0x475 4a5: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 4ab: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 4b2: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 4b9: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 ff/frame-end-lr/MatMul: 4c0: lea rax,[rbp+0x6c0] 4c7: movabs rcx,0x667140 4d1: lea rdi,[rbp+0x5360] 4d8: mov rsi,rcx 4db: add rsi,0x8400 4e2: mov r8,rdi 4e5: add r8,0x280 4ec: mov rdx,rcx 4ef: xor r9,r9 4f2: vxorps ymm3,ymm3,ymm3 4f6: vxorps ymm4,ymm4,ymm4 4fa: vxorps ymm5,ymm5,ymm5 4fe: vmovaps ymm0,YMMWORD PTR [rax+r9*4] 504: vmovaps ymm1,YMMWORD PTR [rax+r9*4+0x20] 50b: vmovaps ymm2,YMMWORD PTR [rax+r9*4+0x40] 512: vfmadd231ps ymm3,ymm0,YMMWORD PTR [rdx+r9*4] 518: vfmadd231ps ymm4,ymm1,YMMWORD PTR [rdx+r9*4+0x20] 51f: vfmadd231ps ymm5,ymm2,YMMWORD PTR [rdx+r9*4+0x40] 526: add r9,0x18 52a: cmp r9,0x101 531: jl 0x4fe 533: vaddps ymm3,ymm3,ymm4 537: vaddps ymm3,ymm3,ymm5 53b: vperm2f128 ymm6,ymm3,ymm3,0x1 541: vhaddps ymm3,ymm3,ymm6 545: vhaddps ymm3,ymm3,ymm3 549: vhaddps ymm3,ymm3,ymm3 54d: vmovss DWORD PTR [rdi],xmm3 551: add rdi,0x4 555: add rdx,0x420 55c: cmp rdx,rsi 55f: jl 0x4ef 561: add rax,0x420 567: cmp rdi,r8 56a: jl 0x4ec ff/frame-end-rl/MatMul: 56c: lea rax,[rbp+0x1b60] 573: movabs rcx,0x6a5400 57d: lea rdi,[rbp+0x55e0] 584: mov rsi,rcx 587: add rsi,0x8400 58e: mov r8,rdi 591: add r8,0x280 598: mov rdx,rcx 59b: xor r9,r9 59e: vxorps ymm3,ymm3,ymm3 5a2: vxorps ymm4,ymm4,ymm4 5a6: vxorps ymm5,ymm5,ymm5 5aa: vmovaps ymm0,YMMWORD PTR [rax+r9*4] 5b0: vmovaps ymm1,YMMWORD PTR [rax+r9*4+0x20] 5b7: vmovaps ymm2,YMMWORD PTR [rax+r9*4+0x40] 5be: vfmadd231ps ymm3,ymm0,YMMWORD PTR [rdx+r9*4] 5c4: vfmadd231ps ymm4,ymm1,YMMWORD PTR [rdx+r9*4+0x20] 5cb: vfmadd231ps ymm5,ymm2,YMMWORD PTR [rdx+r9*4+0x40] 5d2: add r9,0x18 5d6: cmp r9,0x101 5dd: jl 0x5aa 5df: vaddps ymm3,ymm3,ymm4 5e3: vaddps ymm3,ymm3,ymm5 5e7: vperm2f128 ymm6,ymm3,ymm3,0x1 5ed: vhaddps ymm3,ymm3,ymm6 5f1: vhaddps ymm3,ymm3,ymm3 5f5: vhaddps ymm3,ymm3,ymm3 5f9: vmovss DWORD PTR [rdi],xmm3 5fd: add rdi,0x4 601: add rdx,0x420 608: cmp rdx,rsi 60b: jl 0x59b 60d: add rax,0x420 613: cmp rdi,r8 616: jl 0x598 ff/frame-focus-steps/MatMul: 618: lea rax,[rbp+0x3100] 61f: movabs rcx,0x6ad840 629: lea rdi,[rbp+0x5860] 630: mov rsi,rcx 633: add rsi,0x8800 63a: mov r8,rdi 63d: add r8,0x500 644: mov rdx,rcx 647: xor r9,r9 64a: vxorps ymm1,ymm1,ymm1 64e: vmovaps ymm0,YMMWORD PTR [rax+r9*4] 654: vfmadd231ps ymm1,ymm0,YMMWORD PTR [rdx+r9*4] 65a: add r9,0x8 65e: cmp r9,0x81 665: jl 0x64e 667: vperm2f128 ymm2,ymm1,ymm1,0x1 66d: vhaddps ymm1,ymm1,ymm2 671: vhaddps ymm1,ymm1,ymm1 675: vhaddps ymm1,ymm1,ymm1 679: vmovss DWORD PTR [rdi],xmm1 67d: add rdi,0x4 681: add rdx,0x220 688: cmp rdx,rsi 68b: jl 0x647 68d: add rax,0x220 693: cmp rdi,r8 696: jl 0x644 ff/frame-creation-steps/MatMul: 698: lea rax,[rbp+0x3ba0] 69f: movabs rcx,0x6b6080 6a9: lea rdi,[rbp+0x5d60] 6b0: mov rsi,rcx 6b3: add rsi,0x8800 6ba: mov r8,rdi 6bd: add r8,0x500 6c4: mov rdx,rcx 6c7: xor r9,r9 6ca: vxorps ymm1,ymm1,ymm1 6ce: vmovaps ymm0,YMMWORD PTR [rax+r9*4] 6d4: vfmadd231ps ymm1,ymm0,YMMWORD PTR [rdx+r9*4] 6da: add r9,0x8 6de: cmp r9,0x81 6e5: jl 0x6ce 6e7: vperm2f128 ymm2,ymm1,ymm1,0x1 6ed: vhaddps ymm1,ymm1,ymm2 6f1: vhaddps ymm1,ymm1,ymm1 6f5: vhaddps ymm1,ymm1,ymm1 6f9: vmovss DWORD PTR [rdi],xmm1 6fd: add rdi,0x4 701: add rdx,0x220 708: cmp rdx,rsi 70b: jl 0x6c7 70d: add rax,0x220 713: cmp rdi,r8 716: jl 0x6c4 ff/lr/MatMul: 718: lea rdi,[rbp+0x4640] 71f: movabs rsi,0x694780 729: lea r8,[rbp+0x6260] 730: xor rcx,rcx 733: vxorps ymm0,ymm0,ymm0 737: vxorps ymm1,ymm1,ymm1 73b: vxorps ymm2,ymm2,ymm2 73f: vxorps ymm3,ymm3,ymm3 743: mov rdx,rsi 746: xor rax,rax 749: vbroadcastss ymm8,DWORD PTR [rdi+rax*1] 74f: vfmadd231ps ymm0,ymm8,YMMWORD PTR [rdx] 754: vfmadd231ps ymm1,ymm8,YMMWORD PTR [rdx+0x20] 75a: vfmadd231ps ymm2,ymm8,YMMWORD PTR [rdx+0x40] 760: vfmadd231ps ymm3,ymm8,YMMWORD PTR [rdx+0x60] 766: add rdx,0x80 76d: add rax,0x4 771: cmp rax,0x404 777: jl 0x749 779: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 77f: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 786: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 78d: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 ff/history/MatMul: 794: lea rax,[rbp+0x4a60] 79b: movabs rcx,0x69cbc0 7a5: lea rdi,[rbp+0x62e0] 7ac: mov rsi,rcx 7af: add rsi,0x8800 7b6: mov r8,rdi 7b9: add r8,0x400 7c0: mov rdx,rcx 7c3: xor r9,r9 7c6: vxorps ymm1,ymm1,ymm1 7ca: vmovaps ymm0,YMMWORD PTR [rax+r9*4] 7d0: vfmadd231ps ymm1,ymm0,YMMWORD PTR [rdx+r9*4] 7d6: add r9,0x8 7da: cmp r9,0x81 7e1: jl 0x7ca 7e3: vperm2f128 ymm2,ymm1,ymm1,0x1 7e9: vhaddps ymm1,ymm1,ymm2 7ed: vhaddps ymm1,ymm1,ymm1 7f1: vhaddps ymm1,ymm1,ymm1 7f5: vmovss DWORD PTR [rdi],xmm1 7f9: add rdi,0x4 7fd: add rdx,0x220 804: cmp rdx,rsi 807: jl 0x7c3 809: add rax,0x220 80f: cmp rdi,r8 812: jl 0x7c0 ff/concat: 814: lea r8,[rbp+0x66e0] 81b: lea rsi,[rbp+0x3000] 822: lea rdi,[r8] 825: mov rcx,0x40 82c: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 82e: lea rsi,[rbp+0x30c0] 835: lea rdi,[r8+0x40] 839: mov rcx,0x40 840: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 842: lea rsi,[rbp+0x3080] 849: lea rdi,[r8+0x80] 850: mov rcx,0x40 857: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 859: lea rsi,[rbp+0x3040] 860: lea rdi,[r8+0xc0] 867: mov rcx,0x40 86e: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 870: lea rsi,[rbp+0x5d60] 877: lea rdi,[r8+0x100] 87e: mov rcx,0x500 885: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 887: lea rsi,[rbp+0x5860] 88e: lea rdi,[r8+0x600] 895: mov rcx,0x500 89c: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 89e: lea rsi,[rbp+0x5360] 8a5: lea rdi,[r8+0xb00] 8ac: mov rcx,0x280 8b3: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 8b5: lea rsi,[rbp+0x55e0] 8bc: lea rdi,[r8+0xd80] 8c3: mov rcx,0x280 8ca: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 8cc: lea rsi,[rbp+0x62e0] 8d3: lea rdi,[r8+0x1000] 8da: mov rcx,0x400 8e1: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 8e3: lea rsi,[rbp+0x6260] 8ea: lea rdi,[r8+0x1400] 8f1: mov rcx,0x80 8f8: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] 8fa: lea rsi,[rbp+0x52e0] 901: lea rdi,[r8+0x1480] 908: mov rcx,0x80 90f: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi] ff/MatMul: 911: lea rdi,[rbp+0x66e0] 918: movabs rsi,0x7ff69c449040 922: movabs r9,0x5d3a40 92c: mov r8,QWORD PTR [rbp+0x130] 933: vxorps ymm13,ymm13,ymm13 938: xor rcx,rcx 93b: vmovaps ymm0,YMMWORD PTR [r9+rcx*1] 941: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20] 948: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40] 94f: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60] 956: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80] 960: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0] 96a: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0] 974: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0] 97e: mov rdx,rsi 981: xor rax,rax 984: vbroadcastss ymm12,DWORD PTR [rdi+rax*1] 98a: vfmadd231ps ymm0,ymm12,YMMWORD PTR [rdx] 98f: vfmadd231ps ymm1,ymm12,YMMWORD PTR [rdx+0x20] 995: vfmadd231ps ymm2,ymm12,YMMWORD PTR [rdx+0x40] 99b: vfmadd231ps ymm3,ymm12,YMMWORD PTR [rdx+0x60] 9a1: vfmadd231ps ymm4,ymm12,YMMWORD PTR [rdx+0x80] 9aa: vfmadd231ps ymm5,ymm12,YMMWORD PTR [rdx+0xa0] 9b3: vfmadd231ps ymm6,ymm12,YMMWORD PTR [rdx+0xc0] 9bc: vfmadd231ps ymm7,ymm12,YMMWORD PTR [rdx+0xe0] 9c5: add rdx,0x200 9cc: add rax,0x4 9d0: cmp rax,0x1500 9d6: jl 0x984 9d8: vmaxps ymm0,ymm0,ymm13 9dd: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 9e3: vmaxps ymm1,ymm1,ymm13 9e8: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 9ef: vmaxps ymm2,ymm2,ymm13 9f4: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 9fb: vmaxps ymm3,ymm3,ymm13 a00: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 a07: vmaxps ymm4,ymm4,ymm13 a0c: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4 a16: vmaxps ymm5,ymm5,ymm13 a1b: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5 a25: vmaxps ymm6,ymm6,ymm13 a2a: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6 a34: vmaxps ymm7,ymm7,ymm13 a39: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7 a43: add rsi,0x100 a4a: add rcx,0x100 a51: cmp rcx,0x200 a58: jl 0x93b ff/MatMul_1: a5e: mov rdi,QWORD PTR [rbp+0x130] a65: movabs rsi,0x7ff69cc36040 a6f: movabs r9,0x66f5c0 a79: lea r8,[rbp+0x7be0] a80: xor rcx,rcx a83: vmovaps ymm0,YMMWORD PTR [r9+rcx*1] a89: mov rdx,rsi a8c: xor rax,rax a8f: vbroadcastss ymm8,DWORD PTR [rdi+rax*1] a95: vfmadd231ps ymm0,ymm8,YMMWORD PTR [rdx] a9a: add rdx,0x6ce0 aa1: add rax,0x4 aa5: cmp rax,0x200 aab: jl 0xa8f aad: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 ab3: add rsi,0x20 ab7: add rcx,0x20 abb: cmp rcx,0x6ce0 ac2: jl 0xa83 ac4: pop rbp ac5: vzeroupper ac8: ret