-
Notifications
You must be signed in to change notification settings - Fork 182
/
lzma_dec_x86_64.S
1341 lines (1052 loc) · 31.4 KB
/
lzma_dec_x86_64.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# LzmaDecOpt.asm -- ASM version of LZMA_decodeReal_3() function
# 2018-02-06: Igor Pavlov : Public domain
#
# 3 - is the code compatibility version of LZMA_decodeReal_*()
# function for check at link time.
# That code is tightly coupled with LZMA_tryDummy()
# and with another functions in lzma2_dec.c file.
# CLzmaDec structure, (probs) array layout, input and output of
# LZMA_decodeReal_*() must be equal in both versions (C / ASM).
.intel_syntax noprefix
# 7zAsm.asm -- ASM macros
# 2018-02-03 : Igor Pavlov : Public domain
.equ REG_SIZE, 8
.equ REG_LOGAR_SIZE, 3
.equ x0, EAX
.equ x1, ECX
.equ x2, EDX
.equ x3, EBX
.equ x4, ESP
.equ x5, EBP
.equ x6, ESI
.equ x7, EDI
.equ x0_W, AX
.equ x1_W, CX
.equ x2_W, DX
.equ x3_W, BX
.equ x5_W, BP
.equ x6_W, SI
.equ x7_W, DI
.equ x0_L, AL
.equ x1_L, CL
.equ x2_L, DL
.equ x3_L, BL
.equ x0_H, AH
.equ x1_H, CH
.equ x2_H, DH
.equ x3_H, BH
.equ x5_L, BPL
.equ x6_L, SIL
.equ x7_L, DIL
.equ r0, RAX
.equ r1, RCX
.equ r2, RDX
.equ r3, RBX
.equ r4, RSP
.equ r5, RBP
.equ r6, RSI
.equ r7, RDI
.equ x8, r8d
.equ x9, r9d
.equ x10, r10d
.equ x11, r11d
.equ x12, r12d
.equ x13, r13d
.equ x14, r14d
.equ x15, r15d
.if MS_x64_CALL
# for WIN64-x64 ABI:
.equ REG_PARAM_0, r1
.equ REG_PARAM_1, r2
.equ REG_PARAM_2, r8
.equ REG_PARAM_3, r9
.macro MY_PUSH_PRESERVED_REGS
push r3
push r5
push r6 # WIN64
push r7 # WIN64
push r12
push r13
push r14
push r15
.endm
.macro MY_POP_PRESERVED_REGS
pop r15
pop r14
pop r13
pop r12
pop r7 # WIN64
pop r6 # WIN64
pop r5
pop r3
.endm
.else
# for System V AMD64 ABI:
.equ REG_PARAM_0, r7
.equ REG_PARAM_1, r6
.equ REG_PARAM_2, r2
.equ REG_PARAM_3, r1
.macro MY_PUSH_PRESERVED_REGS
push r3
push r5
push r12
push r13
push r14
push r15
.endm
.macro MY_POP_PRESERVED_REGS
pop r15
pop r14
pop r13
pop r12
pop r5
pop r3
.endm
.endif
.macro MY_ALIGN num:req
.balign \num
.endm
.macro MY_ALIGN_16
MY_ALIGN 16
.endm
.macro MY_ALIGN_32
MY_ALIGN 32
.endm
.macro MY_ALIGN_64
MY_ALIGN 64
.endm
# .equ _LZMA_SIZE_OPT, 1
.equ PSHIFT, 2
.macro PLOAD dest, mem
mov \dest, dword ptr [\mem]
.endm
.macro PSTORE src, mem
mov dword ptr [\mem], \src
.endm
.equ PMULT, (1 SHL PSHIFT)
.equ PMULT_HALF, (1 SHL (PSHIFT - 1))
.equ PMULT_2, (1 SHL (PSHIFT + 1))
# x0 range
# x1 pbPos / (prob) TREE
# x2 probBranch / prm (MATCHED) / pbPos / cnt
# x3 sym
#====== r4 === RSP
# x5 cod
# x6 t1 NORM_CALC / probs_state / dist
# x7 t0 NORM_CALC / prob2 IF_BIT_1
# x8 state
# x9 match (MATCHED) / sym2 / dist2 / lpMask_reg
# x10 kBitModelTotal_reg
# r11 probs
# x12 offs (MATCHED) / dic / len_temp
# x13 processedPos
# x14 bit (MATCHED) / dicPos
# r15 buf
.equ cod, x5
.equ cod_L, x5_L
.equ range, x0
.equ state, x8
.equ state_R, r8
.equ buf, r15
.equ processedPos, x13
.equ kBitModelTotal_reg, x10
.equ probBranch, x2
.equ probBranch_R, r2
.equ probBranch_W, x2_W
.equ pbPos, x1
.equ pbPos_R, r1
.equ cnt, x2
.equ cnt_R, r2
.equ lpMask_reg, x9
.equ dicPos, r14
.equ sym, x3
.equ sym_R, r3
.equ sym_L, x3_L
.equ probs, r11
.equ dic, r12
.equ t0, x7
.equ t0_W, x7_W
.equ t0_R, r7
.equ prob2, t0
.equ prob2_W, t0_W
.equ t1, x6
.equ t1_R, r6
.equ probs_state, t1
.equ probs_state_R, t1_R
.equ prm, r2
.equ match, x9
.equ match_R, r9
.equ offs, x12
.equ offs_R, r12
.equ bit, x14
.equ bit_R, r14
.equ sym2, x9
.equ sym2_R, r9
.equ len_temp, x12
.equ dist, sym
.equ dist2, x9
.equ kNumBitModelTotalBits, 11
.equ kBitModelTotal, (1 SHL kNumBitModelTotalBits)
.equ kNumMoveBits, 5
.equ kBitModelOffset, ((1 SHL kNumMoveBits) - 1)
.equ kTopValue, (1 SHL 24)
.macro NORM_2
# movzx t0, BYTE PTR [buf]
shl cod, 8
mov cod_L, BYTE PTR [buf]
shl range, 8
# or cod, t0
inc buf
.endm
.macro NORM
cmp range, kTopValue
jae SHORT 1f
NORM_2
1:
.endm
# ---------- Branch MACROS ----------
.macro UPDATE_0 probsArray:req, probOffset:req, probDisp:req
mov prob2, kBitModelTotal_reg
sub prob2, probBranch
shr prob2, kNumMoveBits
add probBranch, prob2
PSTORE probBranch, (\probOffset * 1 + \probsArray + \probDisp * PMULT)
.endm
.macro UPDATE_1 probsArray:req, probOffset:req, probDisp:req
sub prob2, range
sub cod, range
mov range, prob2
mov prob2, probBranch
shr probBranch, kNumMoveBits
sub prob2, probBranch
PSTORE prob2, (\probOffset * 1 + \probsArray + \probDisp * PMULT)
.endm
.macro CMP_COD probsArray:req, probOffset:req, probDisp:req
PLOAD probBranch, (\probOffset * 1 + \probsArray + \probDisp * PMULT)
NORM
mov prob2, range
shr range, kNumBitModelTotalBits
imul range, probBranch
cmp cod, range
.endm
.macro IF_BIT_1_NOUP probsArray:req, probOffset:req, probDisp:req, toLabel:req
CMP_COD \probsArray, \probOffset, \probDisp
jae \toLabel
.endm
.macro IF_BIT_1 probsArray:req, probOffset:req, probDisp:req, toLabel:req
IF_BIT_1_NOUP \probsArray, \probOffset, \probDisp, \toLabel
UPDATE_0 \probsArray, \probOffset, \probDisp
.endm
.macro IF_BIT_0_NOUP probsArray:req, probOffset:req, probDisp:req, toLabel:req
CMP_COD \probsArray, \probOffset, \probDisp
jb \toLabel
.endm
# ---------- CMOV MACROS ----------
.macro NORM_CALC prob:req
NORM
mov t0, range
shr range, kNumBitModelTotalBits
imul range, \prob
sub t0, range
mov t1, cod
sub cod, range
.endm
.macro PUP prob:req, probPtr:req
sub t0, \prob
# only sar works for both 16/32 bit prob modes
sar t0, kNumMoveBits
add t0, \prob
PSTORE t0, \probPtr
.endm
.macro PUP_SUB prob:req, probPtr:req, symSub:req
sbb sym, \symSub
PUP \prob, \probPtr
.endm
.macro PUP_COD prob:req, probPtr:req, symSub:req
mov t0, kBitModelOffset
cmovb cod, t1
mov t1, sym
cmovb t0, kBitModelTotal_reg
PUP_SUB \prob, \probPtr, \symSub
.endm
.macro BIT_0 prob:req, probNext:req
PLOAD \prob, (probs + 1 * PMULT)
PLOAD \probNext, (probs + 1 * PMULT_2)
NORM_CALC \prob
cmovae range, t0
PLOAD t0, (probs + 1 * PMULT_2 + PMULT)
cmovae \probNext, t0
mov t0, kBitModelOffset
cmovb cod, t1
cmovb t0, kBitModelTotal_reg
mov sym, 2
PUP_SUB \prob, (probs + 1 * PMULT), (0 - 1)
.endm
.macro BIT_1 prob:req, probNext:req
PLOAD \probNext, (probs + sym_R * PMULT_2)
add sym, sym
NORM_CALC \prob
cmovae range, t0
PLOAD t0, (probs + sym_R * PMULT + PMULT)
cmovae \probNext, t0
PUP_COD \prob, (probs + t1_R * PMULT_HALF), (0 - 1)
.endm
.macro BIT_2 prob:req, symSub:req
add sym, sym
NORM_CALC \prob
cmovae range, t0
PUP_COD \prob, (probs + t1_R * PMULT_HALF), \symSub
.endm
# ---------- MATCHED LITERAL ----------
.macro LITM_0
mov offs, 256 * PMULT
shl match, (PSHIFT + 1)
mov bit, offs
and bit, match
PLOAD x1, (probs + 256 * PMULT + bit_R * 1 + 1 * PMULT)
lea prm, [probs + 256 * PMULT + bit_R * 1 + 1 * PMULT]
# lea prm, [probs + 256 * PMULT + 1 * PMULT]
# add prm, bit_R
xor offs, bit
add match, match
NORM_CALC x1
cmovae offs, bit
mov bit, match
cmovae range, t0
mov t0, kBitModelOffset
cmovb cod, t1
cmovb t0, kBitModelTotal_reg
mov sym, 0
PUP_SUB x1, prm, (-2-1)
.endm
.macro LITM
and bit, offs
lea prm, [probs + offs_R * 1]
add prm, bit_R
PLOAD x1, (prm + sym_R * PMULT)
xor offs, bit
add sym, sym
add match, match
NORM_CALC x1
cmovae offs, bit
mov bit, match
cmovae range, t0
PUP_COD x1, (prm + t1_R * PMULT_HALF), (- 1)
.endm
.macro LITM_2
and bit, offs
lea prm, [probs + offs_R * 1]
add prm, bit_R
PLOAD x1, (prm + sym_R * PMULT)
add sym, sym
NORM_CALC x1
cmovae range, t0
PUP_COD x1, (prm + t1_R * PMULT_HALF), (256 - 1)
.endm
# ---------- REVERSE BITS ----------
.macro REV_0 prob:req, probNext:req
# PLOAD prob, probs + 1 * PMULT
# lea sym2_R, [probs + 2 * PMULT]
# PLOAD probNext, probs + 2 * PMULT
PLOAD \probNext, sym2_R
NORM_CALC \prob
cmovae range, t0
PLOAD t0, (probs + 3 * PMULT)
cmovae \probNext, t0
cmovb cod, t1
mov t0, kBitModelOffset
cmovb t0, kBitModelTotal_reg
lea t1_R, [probs + 3 * PMULT]
cmovae sym2_R, t1_R
PUP \prob, (probs + 1 * PMULT)
.endm
.macro REV_1 prob:req, probNext:req, step:req
add sym2_R, \step * PMULT
PLOAD \probNext, sym2_R
NORM_CALC \prob
cmovae range, t0
PLOAD t0, (sym2_R + \step * PMULT)
cmovae \probNext, t0
cmovb cod, t1
mov t0, kBitModelOffset
cmovb t0, kBitModelTotal_reg
lea t1_R, [sym2_R + \step * PMULT]
cmovae sym2_R, t1_R
PUP \prob, (t1_R - \step * PMULT_2)
.endm
.macro REV_2 prob:req, step:req
sub sym2_R, probs
shr sym2, PSHIFT
or sym, sym2
NORM_CALC \prob
cmovae range, t0
lea t0, [sym - \step]
cmovb sym, t0
cmovb cod, t1
mov t0, kBitModelOffset
cmovb t0, kBitModelTotal_reg
PUP \prob, (probs + sym2_R * PMULT)
.endm
.macro REV_1_VAR prob:req
PLOAD \prob, sym_R
mov probs, sym_R
add sym_R, sym2_R
NORM_CALC \prob
cmovae range, t0
lea t0_R, [sym_R + sym2_R]
cmovae sym_R, t0_R
mov t0, kBitModelOffset
cmovb cod, t1
# mov t1, kBitModelTotal
# cmovb t0, t1
cmovb t0, kBitModelTotal_reg
add sym2, sym2
PUP \prob, probs
.endm
.macro LIT_PROBS lpMaskParam:req
# prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc)#
mov t0, processedPos
shl t0, 8
add sym, t0
and sym, \lpMaskParam
add probs_state_R, pbPos_R
mov x1, [LOC + lc2]
lea sym, dword ptr[sym_R + 2 * sym_R]
add probs, Literal * PMULT
shl sym, x1_L
add probs, sym_R
UPDATE_0 probs_state_R, 0, IsMatch
inc processedPos
.endm
.equ kNumPosBitsMax, 4
.equ kNumPosStatesMax, (1 SHL kNumPosBitsMax)
.equ kLenNumLowBits, 3
.equ kLenNumLowSymbols, (1 SHL kLenNumLowBits)
.equ kLenNumHighBits, 8
.equ kLenNumHighSymbols, (1 SHL kLenNumHighBits)
.equ kNumLenProbs, (2 * kLenNumLowSymbols * kNumPosStatesMax + kLenNumHighSymbols)
.equ LenLow, 0
.equ LenChoice, LenLow
.equ LenChoice2, (LenLow + kLenNumLowSymbols)
.equ LenHigh, (LenLow + 2 * kLenNumLowSymbols * kNumPosStatesMax)
.equ kNumStates, 12
.equ kNumStates2, 16
.equ kNumLitStates, 7
.equ kStartPosModelIndex, 4
.equ kEndPosModelIndex, 14
.equ kNumFullDistances, (1 SHL (kEndPosModelIndex SHR 1))
.equ kNumPosSlotBits, 6
.equ kNumLenToPosStates, 4
.equ kNumAlignBits, 4
.equ kAlignTableSize, (1 SHL kNumAlignBits)
.equ kMatchMinLen, 2
.equ kMatchSpecLenStart, (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
.equ kStartOffset, 1664
.equ SpecPos, (-kStartOffset)
.equ IsRep0Long, (SpecPos + kNumFullDistances)
.equ RepLenCoder, (IsRep0Long + (kNumStates2 SHL kNumPosBitsMax))
.equ LenCoder, (RepLenCoder + kNumLenProbs)
.equ IsMatch, (LenCoder + kNumLenProbs)
.equ kAlign, (IsMatch + (kNumStates2 SHL kNumPosBitsMax))
.equ IsRep, (kAlign + kAlignTableSize)
.equ IsRepG0, (IsRep + kNumStates)
.equ IsRepG1, (IsRepG0 + kNumStates)
.equ IsRepG2, (IsRepG1 + kNumStates)
.equ PosSlot, (IsRepG2 + kNumStates)
.equ Literal, (PosSlot + (kNumLenToPosStates SHL kNumPosSlotBits))
.equ NUM_BASE_PROBS, (Literal + kStartOffset)
.if kAlign ne 0
.err <Stop_Compiling_Bad_LZMA_kAlign>
.endif
.if NUM_BASE_PROBS ne 1984
.err <Stop_Compiling_Bad_LZMA_PROBS>
.endif
# CLzmaDec_Asm:
.equ lc, 0
.equ lp, 1
.equ pb, 2
.equ dicSize, 4
.equ dic_Spec, 8
.equ dicPos_Spec, 16
.equ dicBufSize, 24
.equ buf_Spec, 32
.equ probs_1664, 40
.equ range_Spec, 48
.equ code_Spec, 52
.equ processedPos_Spec, 56
.equ checkDicSize, 60
.equ rep0, 64
.equ rep1, 68
.equ rep2, 72
.equ rep3, 76
.equ state_Spec, 80
.equ state2, 84
.equ remainLen, 88
# CLzmaDec_Asm_Loc:
.equ Old_RSP, 0
.equ lzmaPtr, 8
.equ _pad0_, 16
.equ _pad1_, 24
.equ _pad2_, 32
.equ dicBufSize_Loc, 40
.equ probs_Spec, 48
.equ dic_Spec_Loc, 56
.equ limit, 64
.equ bufLimit, 72
.equ lc2, 80
.equ lpMask, 84
.equ pbMask, 88
.equ checkDicSize_Loc, 92
# .equ _pad_, 96
.equ remainLen_Loc, 100
.equ dicPos_Spec_Loc, 104
.equ rep0_Loc, 112
.equ rep1_Loc, 116
.equ rep2_Loc, 120
.equ rep3_Loc, 124
.equ Sizeof_CLzmaDec_Asm_Loc, 128
.equ GLOB_2, sym_R
.equ GLOB, r1
.equ LOC_0, r0
.equ LOC, RSP
.macro IsMatchBranch_Pre reg
# prob = probs + IsMatch + (state << kNumPosBitsMax) + posState#
mov pbPos, [LOC + pbMask]
and pbPos, processedPos
shl pbPos, (kLenNumLowBits + 1 + PSHIFT)
lea probs_state_R, [probs + state_R]
.endm
.macro IsMatchBranch reg
IsMatchBranch_Pre
IF_BIT_1 probs_state_R, pbPos_R, IsMatch, IsMatch_label
.endm
.macro CheckLimits reg
cmp buf, [LOC + bufLimit]
jae fin_OK
cmp dicPos, [LOC + limit]
jae fin_OK
.endm
# RSP is (16x + 8) bytes aligned in WIN64-x64
# .equ LocalSize, ((((SIZEOF CLzmaDec_Asm_Loc) + 7) / 16 * 16) + 8)
.equ PARAM_lzma, REG_PARAM_0
.equ PARAM_limit, REG_PARAM_1
.equ PARAM_bufLimit, REG_PARAM_2
.text
# MY_ALIGN_64
.balign 16, 0x90
.global LZMA_decodeReal_3
LZMA_decodeReal_3:
MY_PUSH_PRESERVED_REGS
lea r0, [RSP - Sizeof_CLzmaDec_Asm_Loc]
and r0, -128
mov r5, RSP
mov RSP, r0
mov [LOC_0 + Old_RSP], r5
mov [LOC_0 + lzmaPtr], PARAM_lzma
mov dword ptr [LOC_0 + remainLen_Loc], 0 # remainLen must be ZERO
mov [LOC_0 + bufLimit], PARAM_bufLimit
mov sym_R, PARAM_lzma # CLzmaDec_Asm_Loc pointer for GLOB_2
mov dic, [GLOB_2 + dic_Spec]
add PARAM_limit, dic
mov [LOC_0 + limit], PARAM_limit
mov t0, [GLOB_2 + rep0]
mov [LOC_0 + rep0_Loc], t0
mov t0, [GLOB_2 + rep1]
mov [LOC_0 + rep1_Loc], t0
mov t0, [GLOB_2 + rep2]
mov [LOC_0 + rep2_Loc], t0
mov t0, [GLOB_2 + rep3]
mov [LOC_0 + rep3_Loc], t0
mov dicPos, [GLOB_2 + dicPos_Spec]
add dicPos, dic
mov [LOC_0 + dicPos_Spec_Loc], dicPos
mov [LOC_0 + dic_Spec_Loc], dic
mov x1_L, [GLOB_2 + pb]
mov t0, 1
shl t0, x1_L
dec t0
mov [LOC_0 + pbMask], t0
# unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1#
# unsigned lc = p->prop.lc#
# unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc)#
mov x1_L, [GLOB_2 + lc]
mov x2, 0x100
mov t0, x2
shr x2, x1_L
# inc x1
add x1_L, PSHIFT
mov [LOC_0 + lc2], x1
mov x1_L, [GLOB_2 + lp]
shl t0, x1_L
sub t0, x2
mov [LOC_0 + lpMask], t0
mov lpMask_reg, t0
mov probs, [GLOB_2 + probs_1664]
mov [LOC_0 + probs_Spec], probs
mov t0_R, [GLOB_2 + dicBufSize]
mov [LOC_0 + dicBufSize_Loc], t0_R
mov x1, [GLOB_2 + checkDicSize]
mov [LOC_0 + checkDicSize_Loc], x1
mov processedPos, [GLOB_2 + processedPos_Spec]
mov state, [GLOB_2 + state_Spec]
shl state, PSHIFT
mov buf, [GLOB_2 + buf_Spec]
mov range, [GLOB_2 + range_Spec]
mov cod, [GLOB_2 + code_Spec]
mov kBitModelTotal_reg, kBitModelTotal
xor sym, sym
## if (processedPos != 0 || checkDicSize != 0)
or x1, processedPos
jz 1f
add t0_R, dic
cmp dicPos, dic
cmovnz t0_R, dicPos
movzx sym, byte ptr[t0_R - 1]
1:
IsMatchBranch_Pre
cmp state, 4 * PMULT
jb lit_end
cmp state, kNumLitStates * PMULT
jb lit_matched_end
jmp lz_end
# ---------- LITERAL ----------
MY_ALIGN_64
lit_start:
xor state, state
lit_start_2:
LIT_PROBS lpMask_reg
.ifdef _LZMA_SIZE_OPT
PLOAD x1, probs + 1 * PMULT
mov sym, 1
MY_ALIGN_16
lit_loop:
BIT_1 x1, x2
mov x1, x2
cmp sym, 127
jbe lit_loop
.else
BIT_0 x1, x2
BIT_1 x2, x1
BIT_1 x1, x2
BIT_1 x2, x1
BIT_1 x1, x2
BIT_1 x2, x1
BIT_1 x1, x2
.endif
BIT_2 x2, (256 - 1)
# mov dic, [LOC + dic_Spec_Loc]
mov probs, [LOC + probs_Spec]
IsMatchBranch_Pre
mov byte ptr[dicPos], sym_L
inc dicPos
CheckLimits
lit_end:
IF_BIT_0_NOUP probs_state_R, pbPos_R, IsMatch, lit_start
# jmp IsMatch_label
# ---------- MATCHES ----------
# MY_ALIGN_32
IsMatch_label:
UPDATE_1 probs_state_R, pbPos_R, IsMatch
IF_BIT_1 probs_state_R, 0, IsRep, IsRep_label
add probs, LenCoder * PMULT
add state, kNumStates * PMULT
# ---------- LEN DECODE ----------
len_decode:
mov len_temp, 8 - 1 - kMatchMinLen
IF_BIT_0_NOUP probs, 0, 0, len_mid_0
UPDATE_1 probs, 0, 0
add probs, (1 SHL (kLenNumLowBits + PSHIFT))
mov len_temp, -1 - kMatchMinLen
IF_BIT_0_NOUP probs, 0, 0, len_mid_0
UPDATE_1 probs, 0, 0
add probs, LenHigh * PMULT - (1 SHL (kLenNumLowBits + PSHIFT))
mov sym, 1
PLOAD x1, (probs + 1 * PMULT)
MY_ALIGN_32
len8_loop:
BIT_1 x1, x2
mov x1, x2
cmp sym, 64
jb len8_loop
mov len_temp, (kLenNumHighSymbols - kLenNumLowSymbols * 2) - 1 - kMatchMinLen
jmp len_mid_2
MY_ALIGN_32
len_mid_0:
UPDATE_0 probs, 0, 0
add probs, pbPos_R
BIT_0 x2, x1
len_mid_2:
BIT_1 x1, x2
BIT_2 x2, len_temp
mov probs, [LOC + probs_Spec]
cmp state, kNumStates * PMULT
jb copy_match
# ---------- DECODE DISTANCE ----------
# probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits)#
mov t0, 3 + kMatchMinLen
cmp sym, 3 + kMatchMinLen
cmovb t0, sym
add probs, PosSlot * PMULT - (kMatchMinLen SHL (kNumPosSlotBits + PSHIFT))
shl t0, (kNumPosSlotBits + PSHIFT)
add probs, t0_R
# sym = Len
# mov [LOC + remainLen_Loc], sym
mov len_temp, sym
.ifdef _LZMA_SIZE_OPT
PLOAD x1, probs + 1 * PMULT
mov sym, 1
MY_ALIGN_16
slot_loop:
BIT_1 x1, x2
mov x1, x2
cmp sym, 32
jb slot_loop
.else
BIT_0 x1, x2
BIT_1 x2, x1
BIT_1 x1, x2
BIT_1 x2, x1
BIT_1 x1, x2
.endif
mov x1, sym
BIT_2 x2, 64-1
and sym, 3
mov probs, [LOC + probs_Spec]
cmp x1, 32 + kEndPosModelIndex / 2
jb short_dist
# unsigned numDirectBits = (unsigned)(((distance >> 1) - 1))#
sub x1, (32 + 1 + kNumAlignBits)
# distance = (2 | (distance & 1))#
or sym, 2
PLOAD x2, (probs + 1 * PMULT)
shl sym, kNumAlignBits + 1
lea sym2_R, [probs + 2 * PMULT]
jmp direct_norm
# lea t1, [sym_R + (1 SHL kNumAlignBits)]
# cmp range, kTopValue
# jb direct_norm
# ---------- DIRECT DISTANCE ----------
MY_ALIGN_32
direct_loop:
shr range, 1
mov t0, cod
sub cod, range
cmovs cod, t0
cmovns sym, t1
# sub cod, range
# mov x2, cod
# sar x2, 31
# lea sym, dword ptr [r2 + sym_R * 2 + 1]
# and x2, range
# add cod, x2
dec x1
je direct_end
add sym, sym
direct_norm:
lea t1, [sym_R + (1 SHL kNumAlignBits)]
cmp range, kTopValue
jae near ptr direct_loop
# we align for 32 here with "near ptr" command above
NORM_2
jmp direct_loop
MY_ALIGN_32
direct_end:
# prob = + kAlign#
# distance <<= kNumAlignBits#
REV_0 x2, x1
REV_1 x1, x2, 2
REV_1 x2, x1, 4
REV_2 x1, 8
decode_dist_end:
## if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
mov t0, [LOC + checkDicSize_Loc]
test t0, t0
cmove t0, processedPos
cmp sym, t0
jae end_of_payload
# rep3 = rep2#
# rep2 = rep1#
# rep1 = rep0#
# rep0 = distance + 1#
inc sym
mov t0, [LOC + rep0_Loc]
mov t1, [LOC + rep1_Loc]
mov x1, [LOC + rep2_Loc]
mov [LOC + rep0_Loc], sym
# mov sym, [LOC + remainLen_Loc]
mov sym, len_temp
mov [LOC + rep1_Loc], t0
mov [LOC + rep2_Loc], t1
mov [LOC + rep3_Loc], x1
# state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3#
cmp state, (kNumStates + kNumLitStates) * PMULT
mov state, kNumLitStates * PMULT
mov t0, (kNumLitStates + 3) * PMULT
cmovae state, t0