forked from kanaka/mal
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreader.asm
1118 lines (896 loc) · 30 KB
/
reader.asm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
%include "macros.mac"
section .data
;; Reader macro strings
static quote_symbol_string, db "quote"
static quasiquote_symbol_string, db "quasiquote"
static unquote_symbol_string, db "unquote"
static splice_unquote_symbol_string, db "splice-unquote"
static deref_symbol_string, db "deref"
static with_meta_symbol_string, db "with-meta"
;; Error message strings
static error_string_unexpected_end, db "Error: Unexpected end of input. Could be a missing )", 10
static error_string_bracket_not_brace, db "Error: Expecting '}' but got ')'"
;; Symbols for comparison
static_symbol nil_symbol, 'nil'
static_symbol true_symbol, 'true'
static_symbol false_symbol, 'false'
section .text
;; Read a string into memory as a form (nested lists and atoms)
;; Note: In this implementation the tokenizer is not done separately
;;
;; Input: Address of string (char array) in RSI
;;
;; Output: Address of object in RAX
;;
;; Uses registers:
;; R12 Address of the start of the current list (starts 0)
;; R13 Address of the current list tail
;; R14 Stack pointer at start. Used for unwinding on error
;; R15 Address of first list. Used for unwinding on error
;;
;; In addition, the tokenizer uses
;;
;; RAX (object return)
;; RBX
;; RCX (character return in CL)
;; RDX
;; R8 ** State must be preserved
;; R9 **
;; R10 **
;; R12
;; R13
;; R14 Original stack pointer on call
;; R15 Top-level list, so all can be released on error
;;
read_str:
; Initialise tokenizer
call tokenizer_init
; Set current list to zero
mov r12, 0
; Set first list to zero
mov r15, 0
; Save stack pointer for unwinding
mov r14, rsp
.read_loop:
call tokenizer_next
cmp cl, 0
jne .got_token
; Unexpected end of tokens
mov rdx, error_string_unexpected_end.len
mov rsi, error_string_unexpected_end
jmp .error
.got_token:
cmp cl, 'i' ; An integer. Cons object in RAX
je .finished
cmp cl, '"' ; A string. Array object in RAX
je .finished
cmp cl, 's' ; A symbol
je .symbol
cmp cl, '('
je .list_start
cmp cl, ')'
je .return_nil ; Note: if reading a list, cl will be tested in the list reader
cmp cl, '{'
je .map_start
cmp cl, '}' ; cl tested in map reader
je .return_nil
cmp cl, '['
je .vector_start
cmp cl, ']' ; cl tested in vector reader
je .return_nil
cmp cl, 39 ; quote '
je .handle_quote
cmp cl, '`'
je .handle_quasiquote
cmp cl, '~'
je .handle_unquote
cmp cl, 1
je .handle_splice_unquote
cmp cl, '@'
je .handle_deref
cmp cl, '^'
je .handle_with_meta
; Unknown
jmp .return_nil
; --------------------------------
.list_start:
; Get the first value
; Note that we call rather than jmp because the first
; value needs to be treated differently. There's nothing
; to append to yet...
call .read_loop
; rax now contains the first object
cmp cl, ')' ; Check if it was end of list
jne .list_has_contents
mov cl, 0 ; so ')' doesn't propagate to nested lists
; Set list to empty
mov [rax], BYTE maltype_empty_list
ret ; Returns 'nil' given "()"
.list_has_contents:
; If this is a Cons then use it
; If not, then need to allocate a Cons
mov cl, BYTE [rax]
mov ch, cl
and ch, (block_mask + container_mask) ; Tests block and container type
jz .list_is_value
; If here then not a simple value, so need to allocate
; a Cons object
; Start new list
push rax
call alloc_cons ; Address in rax
pop rbx
mov [rax], BYTE (block_cons + container_list + content_pointer)
mov [rax + Cons.car], rbx
; Now have Cons in RAX, containing pointer to object as car
.list_is_value:
; Cons in RAX
; Make sure it's marked as a list
mov cl, BYTE [rax]
or cl, container_list
mov [rax], BYTE cl
mov r12, rax ; Start of current list
mov r13, rax ; Set current list
cmp r15, 0 ; Test if first list
jne .list_read_loop
mov r15, rax ; Save the first, for unwinding
.list_read_loop:
; Repeatedly get the next value in the list
; (which may be other lists)
; until we get a ')' token
push r12
push r13
call .read_loop ; object in rax
pop r13
pop r12
cmp cl, ')' ; Check if it was end of list
je .list_done ; Have nil object in rax
; Test if this is a Cons value
mov cl, BYTE [rax]
mov ch, cl
and ch, (block_mask + container_mask) ; Tests block and container type
jz .list_loop_is_value
; If here then not a simple value, so need to allocate
; a Cons object
; Start new list
push rax
call alloc_cons ; Address in rax
pop rbx
mov [rax], BYTE (block_cons + container_list + content_pointer)
mov [rax + Cons.car], rbx
; Now have Cons in RAX, containing pointer to object as car
.list_loop_is_value:
; Cons in RAX
; Make sure it's marked as a list
mov cl, BYTE [rax]
or cl, container_list
mov [rax], BYTE cl
; Append to r13
mov [r13 + Cons.typecdr], BYTE content_pointer
mov [r13 + Cons.cdr], rax
mov r13, rax ; Set current list
jmp .list_read_loop
.list_done:
; Release nil object in rax
mov rsi, rax
call release_cons
; Terminate the list
mov [r13 + Cons.typecdr], BYTE content_nil
mov QWORD [r13 + Cons.cdr], QWORD 0
mov rax, r12 ; Start of current list
ret
; --------------------------------
.map_start:
; Get the first value
; Note that we call rather than jmp because the first
; value needs to be treated differently. There's nothing
; to append to yet...
call .read_loop
; rax now contains the first object
cmp cl, '}' ; Check if it was end of map
jne .map_has_contents
mov cl, 0 ; so '}' doesn't propagate to nested maps
; Set map to empty
mov [rax], BYTE maltype_empty_map
ret ; Returns 'nil' given "()"
.map_has_contents:
; If this is a Cons then use it
; If not, then need to allocate a Cons
mov cl, BYTE [rax]
mov ch, cl
and ch, (block_mask + container_mask) ; Tests block and container type
jz .map_is_value
; If here then not a simple value, so need to allocate
; a Cons object
; Start new map
push rax
call alloc_cons ; Address in rax
pop rbx
mov [rax], BYTE (block_cons + container_map + content_pointer)
mov [rax + Cons.car], rbx
; Now have Cons in RAX, containing pointer to object as car
.map_is_value:
; Cons in RAX
; Make sure it's marked as a map
mov cl, BYTE [rax]
or cl, container_map
mov [rax], BYTE cl
mov r12, rax ; Start of current map
mov r13, rax ; Set current map
cmp r15, 0 ; Test if first map
jne .map_read_loop
mov r15, rax ; Save the first, for unwinding
.map_read_loop:
; Repeatedly get the next value in the map
; (which may be other maps)
; until we get a '}' token
push r12
push r13
call .read_loop ; object in rax
pop r13
pop r12
cmp cl, '}' ; Check if it was end of map
je .map_done ; Have nil object in rax
; Test if this is a Cons value
mov cl, BYTE [rax]
mov ch, cl
and ch, (block_mask + container_mask) ; Tests block and container type
jz .map_loop_is_value
; If here then not a simple value, so need to allocate
; a Cons object
; Start new map
push rax
call alloc_cons ; Address in rax
pop rbx
mov [rax], BYTE (block_cons + container_map + content_pointer)
mov [rax + Cons.car], rbx
; Now have Cons in RAX, containing pointer to object as car
.map_loop_is_value:
; Cons in RAX
; Make sure it's marked as a map
mov cl, BYTE [rax]
or cl, container_map
mov [rax], BYTE cl
; Append to r13
mov [r13 + Cons.typecdr], BYTE content_pointer
mov [r13 + Cons.cdr], rax
mov r13, rax ; Set current map
jmp .map_read_loop
.map_done:
; Release nil object in rax
mov rsi, rax
call release_cons
; Terminate the map
mov [r13 + Cons.typecdr], BYTE content_nil
mov QWORD [r13 + Cons.cdr], QWORD 0
mov rax, r12 ; Start of current map
ret
; --------------------------------
.vector_start:
; Get the first value
; Note that we call rather than jmp because the first
; value needs to be treated differently. There's nothing
; to append to yet...
call .read_loop
; rax now contains the first object
cmp cl, ']' ; Check if it was end of vector
jne .vector_has_contents
mov cl, 0 ; so ']' doesn't propagate to nested vectors
; Set vector to empty
mov [rax], BYTE maltype_empty_vector
ret ; Returns 'nil' given "()"
.vector_has_contents:
; If this is a Cons then use it
; If not, then need to allocate a Cons
mov cl, BYTE [rax]
mov ch, cl
and ch, (block_mask + container_mask) ; Tests block and container type
jz .vector_is_value
; If here then not a simple value, so need to allocate
; a Cons object
; Start new vector
push rax
call alloc_cons ; Address in rax
pop rbx
mov [rax], BYTE (block_cons + container_vector + content_pointer)
mov [rax + Cons.car], rbx
; Now have Cons in RAX, containing pointer to object as car
.vector_is_value:
; Cons in RAX
; Make sure it's marked as a vector
mov cl, BYTE [rax]
or cl, container_vector
mov [rax], BYTE cl
mov r12, rax ; Start of current vector
mov r13, rax ; Set current vector
cmp r15, 0 ; Test if first vector
jne .vector_read_loop
mov r15, rax ; Save the first, for unwinding
.vector_read_loop:
; Repeatedly get the next value in the vector
; (which may be other vectors)
; until we get a ']' token
push r12
push r13
call .read_loop ; object in rax
pop r13
pop r12
cmp cl, ']' ; Check if it was end of vector
je .vector_done ; Have nil object in rax
; Test if this is a Cons value
mov cl, BYTE [rax]
mov ch, cl
and ch, (block_mask + container_mask) ; Tests block and container type
jz .vector_loop_is_value
; If here then not a simple value, so need to allocate
; a Cons object
; Start new vector
push rax
call alloc_cons ; Address in rax
pop rbx
mov [rax], BYTE (block_cons + container_vector + content_pointer)
mov [rax + Cons.car], rbx
; Now have Cons in RAX, containing pointer to object as car
.vector_loop_is_value:
; Cons in RAX
; Make sure it's marked as a vector
mov cl, BYTE [rax]
or cl, container_vector
mov [rax], BYTE cl
; Append to r13
mov [r13 + Cons.typecdr], BYTE content_pointer
mov [r13 + Cons.cdr], rax
mov r13, rax ; Set current vector
jmp .vector_read_loop
.vector_done:
; Release nil object in rax
mov rsi, rax
call release_cons
; Terminate the vector
mov [r13 + Cons.typecdr], BYTE content_nil
mov QWORD [r13 + Cons.cdr], QWORD 0
mov rax, r12 ; Start of current vector
ret
; --------------------------------
.handle_quote:
; Turn 'a into (quote a)
call alloc_cons ; Address in rax
mov r12, rax
; Get a symbol "quote"
push r8
push r9
mov rsi, quote_symbol_string
mov edx, quote_symbol_string.len
call raw_to_string ; Address in rax
pop r9
pop r8
.wrap_next_object:
mov [rax], BYTE maltype_symbol
mov [r12], BYTE (block_cons + container_list + content_pointer)
mov [r12 + Cons.car], rax
; Get the next object
push r12
call .read_loop ; object in rax
pop r12
mov r13, rax ; Put object to be quoted in r13
call alloc_cons ; Address in rax
mov [rax], BYTE (block_cons + container_list + content_pointer)
mov [rax + Cons.car], r13
mov [rax + Cons.typecdr], BYTE content_nil
; Cons object in rax. Append to object in r12
mov [r12 + Cons.typecdr], BYTE content_pointer
mov [r12 + Cons.cdr], rax
mov rax, r12
ret
; --------------------------------
.handle_quasiquote:
; Turn `a into (quasiquote a)
call alloc_cons ; Address in rax
mov r12, rax
; Get a symbol "quasiquote"
push r8
push r9
mov rsi, quasiquote_symbol_string
mov edx, quasiquote_symbol_string.len
call raw_to_string ; Address in rax
pop r9
pop r8
jmp .wrap_next_object ; From there the same as handle_quote
; --------------------------------
.handle_unquote:
; Turn ~a into (unquote a)
call alloc_cons ; Address in rax
mov r12, rax
; Get a symbol "unquote"
push r8
push r9
mov rsi, unquote_symbol_string
mov edx, unquote_symbol_string.len
call raw_to_string ; Address in rax
pop r9
pop r8
jmp .wrap_next_object ; From there the same as handle_quote
; --------------------------------
.handle_splice_unquote:
; Turn ~@a into (unquote a)
call alloc_cons ; Address in rax
mov r12, rax
; Get a symbol "unquote"
push r8
push r9
mov rsi, splice_unquote_symbol_string
mov edx, splice_unquote_symbol_string.len
call raw_to_string ; Address in rax
pop r9
pop r8
jmp .wrap_next_object ; From there the same as handle_quote
; --------------------------------
.handle_deref:
; Turn @a into (deref a)
call alloc_cons ; Address in rax
mov r12, rax
; Get a symbol "deref"
push r8
push r9
mov rsi, deref_symbol_string
mov edx, deref_symbol_string.len
call raw_to_string ; Address in rax
pop r9
pop r8
jmp .wrap_next_object ; From there the same as handle_quote
; --------------------------------
.handle_with_meta:
; Turn ^ a b into (with-meta b a)
call alloc_cons ; Address in rax
mov r12, rax
; Get a symbol "with-meta"
push r8
push r9
mov rsi, with_meta_symbol_string
mov edx, with_meta_symbol_string.len
call raw_to_string ; Address in rax
pop r9
pop r8
mov [rax], BYTE maltype_symbol
mov [r12], BYTE (block_cons + container_list + content_pointer)
mov [r12 + Cons.car], rax
; Get the next two objects
push r12
call .read_loop ; object in rax
pop r12
push rax
push r12
call .read_loop ; in RAX
pop r12
mov r13, rax
call alloc_cons ; Address in rax
mov [rax], BYTE (block_cons + container_list + content_pointer)
mov [rax + Cons.car], r13
; Cons object in rax. Append to object in r12
mov [r12 + Cons.typecdr], BYTE content_pointer
mov [r12 + Cons.cdr], rax
mov r13, rax
call alloc_cons ; Address in rax
mov [rax], BYTE (block_cons + container_list + content_pointer)
pop rdi ; First object
mov [rax + Cons.car], rdi
; Append to object in R13
mov [r13 + Cons.typecdr], BYTE content_pointer
mov [r13 + Cons.cdr], rax
mov rax, r12
ret
; --------------------------------
.symbol:
; symbol is in RAX
; Some symbols are have their own type
; - nil, true, false
;
mov rsi, rax
mov rdi, nil_symbol
push rsi
call compare_char_array
pop rsi
cmp rax, 0
je .symbol_nil
mov rdi, true_symbol
push rsi
call compare_char_array
pop rsi
cmp rax, 0
je .symbol_true
mov rdi, false_symbol
push rsi
call compare_char_array
pop rsi
cmp rax, 0
je .symbol_false
; not a special symbol, so return
mov rax, rsi
ret
.symbol_nil:
; symbol in rsi not needed
call release_array
call alloc_cons
mov [rax], BYTE maltype_nil ; a nil type
ret
.symbol_true:
call release_array
call alloc_cons
mov [rax], BYTE maltype_true
ret
.symbol_false:
call release_array
call alloc_cons
mov [rax], BYTE maltype_false
ret
; --------------------------------
.finished:
ret
.error:
; Jump here on error with raw string in RSI
; and string length in rdx
push r14
push r15
call print_rawstring
pop r15
pop r14
; fall through to unwind
.unwind:
; Jump to here cleans up
mov rsp, r14 ; Rewind stack pointer
cmp r15, 0 ; Check if there is a list
je .return_nil
mov rsi, r15
call release_cons ; releases everything recursively
; fall through to return_nil
.return_nil:
; Allocates a new Cons object with nil and returns
; Cleanup should happen before jumping here
push rcx
call alloc_cons
pop rcx
mov [rax], BYTE maltype_nil
mov [rax + Cons.typecdr], BYTE content_nil
ret
;; Initialise the tokenizer
;;
;; Input: Address of string in RSI
;;
;; NOTE: This uses RSI, RAX and RBX, and expects these to be preserved
;; between calls to tokenizer_next_char
;;
;; R9 Address of string
;; R10 Position in data array
;; R11 End of data array
;;
tokenizer_init:
; Save string to r9
mov r9, rsi
; Put start of data array into r10
mov r10, rsi
add r10, Array.data
; Put end of data array into r11
mov r11d, [rsi + Array.length] ; Length of array, zero-extended
add r11, r10
ret
;; Move onto the next chunk of the array
;; This is needed because strings are not stored in one
;; contiguous block of memory, but may use multiple Array
;; objects in a linked list
;;
;; If no chunks are left, then R10 = R11
tokenizer_next_chunk:
mov r10, [r9 + Array.next]
cmp r10, 0
je .no_more
; More chunks left
push rsi ; Because symbol reading uses RSI (tokenizer_next.handle_symbol)
mov rsi, r10
call tokenizer_init
pop rsi
ret
.no_more:
; No more chunks left. R10 is zero
mov r11, r10
ret
;; Moves the next char into CL
;; If no more, puts 0 into CL
tokenizer_next_char:
; Check if we have reached the end of this chunk
cmp r10, r11
jne .chars_remain
; Hit the end. See if there is another chunk
call tokenizer_next_chunk
cmp r10, r11
jne .chars_remain ; Success, got another
; No more chunks
mov cl, 0 ; Null char signals end
ret
.chars_remain:
mov cl, BYTE [r10]
inc r10 ; point to next byte
ret
;; Get the next token
;; Token code is in CL register. Could be:
;; - 0 : Nil, finished
;; - Characters ()[]()'`~^@
;; - Pair '~@', represented by code 1
;; - A string: " in CL, and address in RAX
;; - An integer: 'i' in CL
;; - A symbol: 's' in CL, address in RAX
;;
;; Address of object in RAX
;;
;; May use registers:
;; RBX
;; RCX
;; RDX
;;
tokenizer_next:
.next_char:
; Fetch the next char into CL
call tokenizer_next_char
cmp cl, 0
je .found ; End, no more tokens
; Here expect to have:
; - The current character in CL
; - Address of next data in r10
; - Address of data end in r11
; Skip whitespace or commas
cmp cl, ' ' ; Space
je .next_char
cmp cl, ',' ; Comma
je .next_char
cmp cl, 9 ; Tab
je .next_char
cmp cl, 10 ; Line Feed
je .next_char
cmp cl, 13 ; Carriage Return
je .next_char
; Special characters. These are returned in CL as-is
cmp cl, '('
je .found
cmp cl, ')'
je .found
cmp cl, '['
je .found
cmp cl, ']'
je .found
cmp cl, '{'
je .found
cmp cl, '}'
je .found
cmp cl, 39 ; character '
je .found
cmp cl, 96 ; character `
je .found
cmp cl, '^'
je .found
cmp cl, '@'
je .found
cmp cl, '~' ; Could be followed by '@'
je .handle_tilde
cmp cl, ';' ; Start of a comment
je .comment
cmp cl, 34 ; Opening string quotes
je .handle_string
; Could be number or symbol
cmp cl, '-' ; Minus sign
je .handle_minus
mov ch, 0
; Check for a character 0-9
cmp cl, '0'
jl .handle_symbol
cmp cl, '9'
jg .handle_symbol
; Here an integer
jmp .handle_integer
.comment:
; Start of a comment. Keep reading until a new line or end
; Fetch the next char into CL
call tokenizer_next_char
cmp cl, 0
je .found ; End, no more tokens
cmp cl, 10
je .next_char ; Next line, start reading again
jmp .comment
.handle_minus:
; Push current state of the tokenizer
push r9
push r10
push r11
; Get the next character
call tokenizer_next_char
; Check if it is a number
cmp cl, '0'
jl .minus_not_number
cmp cl, '9'
jg .minus_not_number
; Here is a number
mov ch, '-' ; Put '-' in ch for later
; Discard old state by moving stack pointer
add rsp, 24 ; 3 * 8 bytes
jmp .handle_integer
.minus_not_number:
; Restore state
pop r11
pop r10
pop r9
mov cl, '-' ; Put back
jmp .handle_symbol
.handle_integer:
; Start integer
; accumulate in EDX
xor edx, edx
.integer_loop:
; Here have a char 0-9 in CL
sub cl, '0' ; Convert to number between 0 and 9
movzx ebx, cl
add edx, ebx
; Push current state of the tokenizer
push r9
push r10
push r11
; Peek at next character
call tokenizer_next_char ; Next char in CL
cmp cl, '0'
jl .integer_finished
cmp cl, '9'
jg .integer_finished
; Discard old state by moving stack pointer
add rsp, 24 ; 3 * 8 bytes
imul edx, 10
jmp .integer_loop
.integer_finished:
; Next char not an int
; Restore state of the tokenizer
pop r11
pop r10
pop r9
push rdx ; Save the integer
; Get a Cons object to put the result into
call alloc_cons
pop rdx ; Restore integer
; Check if the number should be negative
cmp ch, '-'
jne .integer_store
neg rdx
.integer_store:
; Address of Cons now in RAX
mov [rax], BYTE maltype_integer
mov [rax + Cons.car], rdx
mov cl, 'i' ; Mark as an integer
ret
; -------------------------------------------
.handle_symbol:
; Read characters until reaching whitespace, special character or end
call string_new
mov rsi, rax ; Output string in rsi
.symbol_loop:
; Put the current character into the array
call string_append_char
; Push current state of the tokenizer
push r9
push r10
push r11
call tokenizer_next_char
cmp cl, 0 ; End of characters
je .symbol_finished
cmp cl, ' ' ; Space
je .symbol_finished
cmp cl, ',' ; Comma
je .symbol_finished
cmp cl, 9 ; Tab
je .symbol_finished
cmp cl, 10 ; Line Feed
je .symbol_finished
cmp cl, 13 ; Carriage Return
je .symbol_finished
cmp cl, '('
je .symbol_finished
cmp cl, ')'
je .symbol_finished
cmp cl, '['
je .symbol_finished
cmp cl, ']'
je .symbol_finished
cmp cl, '{'
je .symbol_finished
cmp cl, '}'
je .symbol_finished
cmp cl, 39 ; character '
je .symbol_finished
cmp cl, 96 ; character `
je .symbol_finished
cmp cl, '^'