-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest.rkt
1265 lines (1057 loc) · 27.9 KB
/
test.rkt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#lang pisemble
(require (for-syntax syntax/parse racket/stxparam))
(require racket/stxparam)
(require "periph.rkt" "stack.rkt")
(define s-frame-size 256) ; size of all saved registers
(define SYNC_INVALID_EL1t 0)
(define IRQ_INVALID_EL1t 1)
(define FIQ_INVALID_EL1t 2)
(define ERROR_INVALID_EL1t 3)
(define SYNC_INVALID_EL1h 4)
(define IRQ_INVALID_EL1h 5)
(define FIQ_INVALID_EL1h 6)
(define ERROR_INVALID_EL1h 7)
(define SYNC_INVALID_EL0_64 8)
(define IRQ_INVALID_EL0_64 9)
(define FIQ_INVALID_EL0_64 10)
(define ERROR_INVALID_EL0_64 11)
(define SYNC_INVALID_EL0_32 12)
(define IRQ_INVALID_EL0_32 13)
(define FIQ_INVALID_EL0_32 14)
(define ERROR_INVALID_EL0_32 15)
(define SYSTEM_TIMER_IRQ_0 1)
(define SYSTEM_TIMER_IRQ_1 %10)
(define SCTLR_RESERVED
(bitwise-ior (arithmetic-shift 3 28)
(arithmetic-shift 3 22)
(arithmetic-shift 1 20)
(arithmetic-shift 1 11)))
(define SCTLR_EE_LITTLE_ENDIAN (arithmetic-shift 0 25))
(define SCTLR_EOE_LITTLE_ENDIAN (arithmetic-shift 0 24))
(define SCTLR_I_CACHE_DISABLED (arithmetic-shift 0 12))
(define SCTLR_D_CACHE_DISABLED (arithmetic-shift 0 2))
(define SCTLR_I_CACHE_ENABLED (arithmetic-shift 1 12))
(define SCTLR_D_CACHE_ENABLED (arithmetic-shift 1 2))
(define SCTLR_MMU_DISABLED (arithmetic-shift 0 0))
(define SCTLR_MMU_ENABLED (arithmetic-shift 1 0))
(define SCTLR_VALUE_MMU_DISABLED (bitwise-ior SCTLR_RESERVED SCTLR_EE_LITTLE_ENDIAN SCTLR_I_CACHE_DISABLED SCTLR_D_CACHE_DISABLED SCTLR_MMU_DISABLED))
(define SCTLR_VALUE_MMU_DISABLED2 (bitwise-ior SCTLR_RESERVED SCTLR_EE_LITTLE_ENDIAN SCTLR_I_CACHE_ENABLED SCTLR_D_CACHE_ENABLED SCTLR_MMU_DISABLED))
; ***************************************
; HCR_EL2, Hypervisor Configuration Register (EL2), Page 2487 of AArch64-Reference-Manual.
; ***************************************
(define HCR_RW (arithmetic-shift 1 31))
(define HCR_VALUE HCR_RW)
; ***************************************
; SCR_EL3, Secure Configuration Register (EL3), Page 2648 of AArch64-Reference-Manual.
; ***************************************
(define SCR_RESERVED (arithmetic-shift 3 4))
(define SCR_RW (arithmetic-shift 1 10))
(define SCR_NS (arithmetic-shift 1 0))
(define SCR_VALUE (bitwise-ior SCR_RESERVED SCR_RW SCR_NS))
; ***************************************
; SPSR_EL3, Saved Program Status Register (EL3) Page 389 of AArch64-Reference-Manual.
; ***************************************
(define SPSR_MASK_ALL (arithmetic-shift 7 6))
(define SPSR_EL1h (arithmetic-shift 5 0))
(define SPSR_VALUE (bitwise-ior SPSR_MASK_ALL SPSR_EL1h))
(define-syntax (kernel-entry stx)
(syntax-parse stx
[(_)
#'{
; (PUSH x0 x1 x2 x3 x4 x5 x6 x7 x8)
sub sp sp @s-frame-size
stp x0 x1 [sp @(* 16 0)]
stp x2 x3 [sp @(* 16 1)]
stp x4 x5 [sp @(* 16 2)]
stp x6 x7 [sp @(* 16 3)]
stp x8 x9 [sp @(* 16 4)]
stp x10 x11 [sp @(* 16 5)]
stp x12 x13 [sp @(* 16 6)]
stp x14 x15 [sp @(* 16 7)]
stp x16 x17 [sp @(* 16 8)]
stp x18 x19 [sp @(* 16 9)]
stp x20 x21 [sp @(* 16 10)]
stp x22 x23 [sp @(* 16 11)]
stp x24 x25 [sp @(* 16 12)]
stp x27 x27 [sp @(* 16 13)]
stp x28 x29 [sp @(* 16 14)]
str x30 [sp @(* 16 15)]
}]))
(define-syntax (kernel-exit stx)
(syntax-parse stx
[(_)
#'{
; (POP x8 x7 x6 x5 x4 x3 x2 x1 x0)
ldp x0 x1 [sp @(* 16 0)]
ldp x2 x3 [sp @(* 16 1)]
ldp x4 x5 [sp @(* 16 2)]
ldp x6 x7 [sp @(* 16 3)]
ldp x8 x9 [sp @(* 16 4)]
ldp x10 x11 [sp @(* 16 5)]
ldp x12 x13 [sp @(* 16 6)]
ldp x14 x15 [sp @(* 16 7)]
ldp x16 x17 [sp @(* 16 8)]
ldp x18 x19 [sp @(* 16 9)]
ldp x20 x21 [sp @(* 16 10)]
ldp x22 x23 [sp @(* 16 11)]
ldp x24 x25 [sp @(* 16 12)]
ldp x26 x27 [sp @(* 16 13)]
ldp x28 x29 [sp @(* 16 14)]
ldr x30 [sp @(* 16 15)]
add sp sp @s-frame-size
eret
}]))
(define-syntax (handle-invalid-entry stx)
(syntax-parse stx
[(_ type)
#'{
mov x0 @type
mrs x1 esr_el1
mrs x2 elr_el1
bl show-invalid-entry-message:
b err-hang:
}
]))
(define-syntax (wait stx)
(syntax-parse stx
[(_ value)
#'{ldr x2 value
:inner
sub x2 x2 @1
cbnz x2 inner- }]))
(define-syntax (debug-reg stx)
(syntax-parse stx
[(_ r:register)
#'{
; preserve x0, it is used for passing char to send-char
(PUSH x0)
(PUSH r)
; first send code indicating whether a 32 or 64 bit number will be sent
mov w0 @(if `r.is32 2 3)
bl send-char:
; restore r into x0 and send first char
(POP x0)
bl send-char:
; shift and send the remainig 3 or 7 bytes
(for ([_ (in-range (if `r.is32 3 7))])
{ lsr x0 x0 @8
bl send-char: })
; restore x0 original value
(POP x0) }]
[(_ r:register rn:register ...+)
#'(begin (debug-reg r)
(debug-reg rn ...))]))
(define-syntax (dump-all-regs stx)
(syntax-parse stx
[(_)
#:with (reg ...) (for/list ([i (in-range 0 31)])
(datum->syntax this-syntax (string->symbol (format "X~a"i))))
#:with (name ...) (for/list ([i (in-range 0 31)])
(datum->syntax this-syntax (format "X~a "i)))
#'(begin (begin (debug-str name #f) (debug-reg reg)) ...)]))
; (debug-reg x0 w1 w2 x3)
(define pi 'pi3)
;(define pi 'pi4)
; the vector table is layed out 0x80 bytes
; apart (
(define-syntax (ventry stx)
(syntax-parse stx
[(_ vector:label-targ)
#'{
/= $80
b vector
}]
[(_ vector:label-targ more ...+)
#'{
/= $80
b vector
(ventry more ...)
}]))
(define VCORE-MBOX (+ PERIPH-BASE $00B880))
(aarch64 "kernel8.img" [] {
width = 1024
height = 768
set-offset = $1C
clr-offset = $28
udn-offset = $E4
aux-enables = $4
aux-io = $40
aux-ier = $44
aux-iir = $48
aux-lcr = $4c
aux-mcr = $50
aux-lsr = $54
aux-cntrl = $60
aux-baud = $68
;mailbox offsets
mbox-read = $0
mbox-poll = $10
mbox-sender = $14
mbox-status = $18
mbox-config = $1c
mbox-write = $20
mbox-response = $80000000
mbox-full = $80000000
mbox-empty = $40000000
;Mailbox channels
mbox-ch-prop = $8 ; request to VC from ARM
; raspberry pi image size
image-width = 190
image-height = 240
mrs x0 mpidr_el1
mov x1 @$3
and x0 x0 x1
cbz x0 main:
b hang:
:err-hang
b err-hang-
:hang
wfe
b hang-
:main
;; msr cpacr_el1, x0 // Enable FP/SIMD at EL1
movz x0 @$30 LSL @16 ; 3 << 20
(write-value-32 $d5181040)
; get ready to switch from EL3 down to EL1
ldr x0 SCTLR-VALUE-MMU-DISABLED:
msr sctlr_el1 x0
ldr x0 HCR-VALUE:
msr hcr_el2 x0
ldr x0 SCR-VALUE:
msr scr_el3 x0
ldr x0 SPSR-VALUE:
msr spsr_el3 x0
adr x0 el1_entry:
msr elr_el3 x0
eret
:el1_entry
ldr x1 START:
mov sp x1
(init-uart)
(debug-str "init irq vector" #t)
bl irq-vector-init:
; (debug-str "init timer " #t)
bl timer-init:
; (debug-str "init ic" #t)
bl enable-interrupt-controller:
; (debug-str "enable irq" #t)
; bl enable-irq:
ldr x0 SMICS:
mov x1 @0
str w1 (x0 @0)
; bl dump-regs:
; (debug-str "HELLO WORLD!")
; (debug-str "BAREMETAL RPI4 MEETS RACKETLANG!")
;(debug-str "heres a number" #t)
mov x0 @$BAD
lsl x0 x0 @16
movk x0 @$F00D
lsl x0 x0 @16
movk x0 @$DEAD
lsl x0 x0 @16
movk x0 @$bEEf
(debug-reg x0 w0)
(debug-str "isqr 1" #t)
mov x0 @1
bl isqr:
(debug-reg x0 w0)
(debug-str "isqr 100" #t)
mov x0 @100
bl isqr:
(debug-reg x0 w0)
ldr x1 FAKE-ISR: ; SMI
(debug-reg x1)
lsl x0 x0 @32
(debug-reg x0 w0)
ldr x0 TEST:
(debug-reg x0 )
adr x0 TEST:
ldr x0 (x0 @0)
(debug-reg x0 )
lsr x0 x0 @8
(debug-reg x0 )
lsr x0 x0 @8
(debug-reg x0 )
lsr x0 x0 @8
(debug-reg x0 )
lsr x0 x0 @8
(debug-reg x0 )
:here adr x0 here-
(debug-reg w0)
(debug-str "value before:" #t)
adr x0 MBOX-MSG:
(debug-reg w0)
; add x0 x0 @24
ldrb w0 (x0 @0)
(debug-reg w0)
(debug-str "fb addr nefore" #t)
adr x0 fb:
ldr w0 (x0 @0)
(debug-reg x0)
adr x8 MBOX-MSG:
bl send-vcore-msg:
(debug-str "bpl" #t)
adr x0 bpl:
; add x0 x0 @20
ldr w0 (x0 @0)
(debug-reg x0)
(debug-str "fb addr" #t)
adr x0 fb:
; add x0 x0 @20
ldr w0 (x0 @0)
(debug-reg x0)
;convert to gpu address
ldr x1 CONVERT:
and x0 x0 x1
; draw pixels!
mov x8 x0 ; X8 holds base video memory
adr x9 VMEM:
str x8 (x9 @0) ; store memory pointer in VMEM
mov x1 @255
lsl x1 x1 @32
movk x1 @255
; lsl x1 x1 @32
; movk x1 @255
; lsl x1 x1 @8
:draw
mov x4 @height ; rows
:row
mov x3 @(/ width 8)
:col
str x1 (x0 @0)
str x1 (x0 @8)
str x1 (x0 @16)
str x1 (x0 @24)
add x0 x0 @32
; add w1 w1 @1
sub x3 x3 @1
cbnz x3 col-
sub x4 x4 @1
cbnz x4 row-
; second page
; mov x1 @$FF00 ; comment this line to keep colour same (stop flashing page flip!)
mov x1 @$FF00
lsl x1 x1 @16
:draw
mov x4 @height ; rows
:row
mov x3 @(/ width 8)
:col
str x1 (x0 @0)
str x1 (x0 @8)
str x1 (x0 @16)
str x1 (x0 @24)
add x0 x0 @32
; add w1 w1 @1
sub x3 x3 @1
cbnz x3 col-
sub x4 x4 @1
cbnz x4 row-
mov x0 x8
; now we can scroll using the virtual offset message
;; :flip
;; (debug-str "flip" #t)
;; ldr x0 TIMER_CLO:
;; ldr w0 (x0 @0)
;; (debug-reg x0)
;; mov x3 @0
;; bl page-flip:
;; (wait DELAY:)
;; mov x3 @height
;; bl page-flip:
;; (wait DELAY:)
;; b flip-
(debug-str "DONEDONE" #t)
bl enable-irq:
mov x0 @100
(write-value-32 $9E220000) ;scvtf s0 x0
(write-value-32 $1E21C000) ;fsqrt s0 s0
(write-value-32 $9E250000) ; fcvtau x0 s0
bl update-gfx-moire:
:loop
b loop-
(create-send-char)
:dump-regs
(PUSH x30)
(dump-all-regs)
(POP x30)
ret x30
(subr page-flip ([ptr x0]) [x0 x1 x2] {
;pass y value in x3
adr ptr MBOX-VOFFSET-MSG:
mov x2 @4
mov x1 @(* 8 4)
str w1 (ptr @0) ; msg->size
add ptr ptr x2 ; +=4
mov x1 @0
str w1 (ptr @0) ; msg->request/response
add ptr ptr x2 ; +=4
mov x1 @4
lsl x1 x1 @16
movk x1 @$8009
str w1 (ptr @0) ;msg->tag
add ptr ptr x2 ; +=4
mov x1 @8
str w1 (ptr @0) ; msg->value buffer size
add ptr ptr x2 ; +=4
mov x1 @0
str w1 (ptr @0) ; msg->tag request
add ptr ptr x2 ; +=4
str w1 (ptr @0) ; msg->x offset
add ptr ptr x2 ; +=4
mov x1 x3
str w1 (ptr @0) ; msg->y offset
add ptr ptr x2 ; +=4
mov x1 @0
str w1 (ptr @0) ; msg->end tag
adr x8 MBOX-VOFFSET-MSG:
bl send-vcore-msg:
})
; calculate integer square root with linear binary search
; X0 input; X0 output; no stack
(subr isqr ([y x0]
[L x1]
[M x2]
[R x3]) [] {
mov L @0 ; L = 0
add R y @1 ; R = y + 1
:loop
sub x4 R @1 ;r - 1 todo: need subs
cmp L x4
b.eq done+ ; while L != r -1
add M L R ; M = L + R
lsr M M @1 ; M /= 2
mul x4 M M
cmp x4 y
b.le lte:
mov R M
b loop-
:lte
mov L M
b loop-
:done
mov X0 L
})
; pass message address in x8
(subr send-vcore-msg ()[x0 x1 x2 x3 x8]{
ldr x0 VC_MBOX:
:wait
ldr w1 (x0 @mbox-status)
; and with 0x8000_0000
mov x2 @1
lsl x2 x2 @31
and x1 x1 x2
cbnz x1 wait-
; attempt to call the mailbox interface
; upper 28 bits are the address of the message
; expected to be in x8
ldr x0 VC_MBOX:
mov x2 x8
; lower four bits specify the mailbox channel,
; in this case mbox-ch-prop (8)
mov x3 @mbox-ch-prop
orr x2 x2 x3 ; we dont support orr reg-reg-imm yet
; send our message
str w2 (x0 @mbox-write)
; now wait for a response
:wait
ldr w1 (x0 @mbox-status)
; and with 0x4000_0000
mov x2 @1
lsl x2 x2 @30
and x1 x1 x2
cbnz x1 wait-
; check if mbox-read = channel
ldr w1 (x0 @mbox-read)
; (debug-reg x1)
mov x2 @%1111
and x1 x1 x2
sub x1 x1 @mbox-ch-prop
cbnz x1 wait-
})
:enable-irq
msr daifclr @2
ret x30
:disable-irq
msr daifset @2
ret x30
:show-invalid-entry-message
(debug-str "unhandled exception" #t)
(debug-reg x0 x1 x2)
ret x30
/= $800
:vectors
(ventry
sync_invalid_el1t:
irq_invalid_el1t:
fiq_invalid_el1t:
error_invalid_el1t:
sync_invalid_el1h:
el1_irq:
fiq_invalid_el1h:
error_invalid_el1h:
sync_invalid_el0_64:
irq_invalid_el0_64:
fiq_invalid_el0_64:
error_invalid_el0_64:
sync_invalid_el0_32:
irq_invalid_el0_32:
fiq_invalid_el0_32:
error_invalid_el0_32:
)
:sync_invalid_el1t
(handle-invalid-entry SYNC_INVALID_EL1t)
:irq_invalid_el1t
(handle-invalid-entry IRQ_INVALID_EL1t)
:fiq_invalid_el1t
(handle-invalid-entry FIQ_INVALID_EL1t)
:error_invalid_el1t
(handle-invalid-entry ERROR_INVALID_EL1t)
:sync_invalid_el1h
(handle-invalid-entry SYNC_INVALID_EL1h)
:fiq_invalid_el1h
(handle-invalid-entry FIQ_INVALID_EL1h)
:error_invalid_el1h
(handle-invalid-entry ERROR_INVALID_EL1h)
:sync_invalid_el0_64
(handle-invalid-entry SYNC_INVALID_EL0_64)
:irq_invalid_el0_64
(handle-invalid-entry IRQ_INVALID_EL0_64)
:fiq_invalid_el0_64
(handle-invalid-entry FIQ_INVALID_EL0_64)
:error_invalid_el0_64
(handle-invalid-entry ERROR_INVALID_EL0_64)
:sync_invalid_el0_32
(handle-invalid-entry SYNC_INVALID_EL0_32)
:irq_invalid_el0_32
(handle-invalid-entry IRQ_INVALID_EL0_32)
:fiq_invalid_el0_32
(handle-invalid-entry FIQ_INVALID_EL0_32)
:error_invalid_el0_32
(handle-invalid-entry ERROR_INVALID_EL0_32)
:el1_irq
(kernel-entry)
bl handle-irq:
(kernel-exit)
:irq-vector-init
adr x0 vectors:
msr vbar_el1 x0
ret x30
(subr enable-interrupt-controller () [x0 x1] {
;; ldr x0 ENABLE_IRQS_1:
;; mov x1 @SYSTEM_TIMER_IRQ_1
;; str w1 (x0 @0)
ldr x0 ENABLE_IRQS_2:
ldr x1 FAKE-ISR: ; SMI
str w1 (x0 @0)
})
(subr timer-init () [x0 x1] {
ldr x0 TIMER_CLO:
ldr w0 (x0 @0)
(debug-str "timer val" #t)
(debug-reg x0)
ldr x1 DELAY2:
add w1 w0 w1
(debug-reg w1)
ldr x0 TIMER_C1:
str w1 (x0 @0)
})
(subr get-back-buffer () [x1 x2] {
ldr x0 VMEM:
mov x1 x0
;x0 now has the base video memory pointer
; if CURRENT-PAGE is 0 then we want to send
; pointer + ((height / 2) * width)
ldr x0 CURRENT-PAGE:
cbz x0 done+
; (debug-str "HERE" #t)
ldr x2 FRAME-SIZE:
add x1 x1 x2
:done ; return in x0
mov x0 x1
})
; renders moire pattern
(subr update-gfx-moire (
[x x1] ; current x and y pixels
[y x2]
[colour w3]
[temp x4] ; temporary / intermediates
[dx x5]
[dy x6]
[dx2 x7]
[dy2 x8]
[vptr x9] ; video memory pointer
[pAX x10] ; point a and b co-ords
[pAY x11]
[pBX x12]
[pBY x13]
[row-size x14]
[fc x15] ; frame-count
) [x0 x1 x2 x3 x4]
{
; ldr row-size ROW-SIZE:
:loop
ldr w0 finished-rendering:
cbnz x0 loop-
; (debug-str "render" #t)
bl get-back-buffer:
mov vptr x0
ldr fc frame-count:
; lsr fc fc @1
mov x0 @$3FFF
and fc fc x0
mov x0 @4
mul fc fc x0
adr x0 cx1lookup:
add x0 x0 fc
ldr w10 (X0 @0)
adr x0 cy1lookup:
add x0 x0 fc
ldr w11 (X0 @0)
adr x0 cx2lookup:
add x0 x0 fc
ldr w12 (X0 @0)
adr x0 cy2lookup:
add x0 x0 fc
ldr w13 (X0 @0)
;; ldr pAX pointAX:
;; ldr pAY pointAY:
;; ldr pBX pointBX:
;; ldr pBY pointBY:
mov y @(/ height 2)
; mov colour @$FFFF
:y-loop
mov x @(/ width 2)
; calculate distance of this pixel to point a and b
mov dy y
mov temp pAY
sub dy dy temp
mul dy dy dy
mov dy2 y
mov temp pBY
sub dy2 dy2 temp
mul dy2 dy2 dy2
:x-loop
mov dx x
mov temp pAX
sub dx dx temp
mul dx dx dx
mov dx2 x
mov temp pBX
sub dx2 dx2 temp
mul dx2 dx2 dx2
add x0 dx dy ; sqrt dx + dy
;; (write-value-32 $9E220000) ;scvtf s0 x0
;; (write-value-32 $1E21C000) ;fsqrt s0 s0
;; (write-value-32 $9E250000) ; fcvtau x0 s0
mov temp x0
add x0 dx2 dy2 ; sqrt dx2 + dy2
;; (write-value-32 $9E220000) ;scvtf s0 x0
;; (write-value-32 $1E21C000) ;fsqrt s0 s0
;; (write-value-32 $9E250000) ; fcvtau x0 s0
eor colour temp x0 ; xor sqrts
lsr colour colour @1
mov temp @1
and colour colour temp
cbnz colour other+
mov colour @$ffFF
lsl x3 x3 @16
movk colour @$FFFF
b store+
:other
mov colour @$0000
:store
str colour (vptr @0)
sub x x @1
add vptr vptr @4
cbnz x x-loop-
sub y y @1
sub vptr vptr @(* (/ width 2) 4)
ldr temp ROW-SIZE:
add vptr vptr temp
cbnz y y-loop-
mov temp @1
adr x0 finished-rendering:
str temp (w0 @0)
ldr temp frame-count: ; frame-count++
add temp temp @1
adr x0 frame-count:
str temp (x0 @0)
b loop-
})
(subr update-gfx-moire-simd (
[x x1]
[y x2]
[colour w3]
[temp x4]
[dx x5]
[dy x6]
[dx2 x7]
[dy2 x8]
[vptr x9]
[pAX x10]
[pAY x11]
[pBX x12]
[pBY x13]
[row-size x14]
[fc x15]
) [x0 x1 x2 x3 x4]
{
; for the full-screen moire simd we need to
; do as many computations at once as possible
; eg calcuate many pixels at once. the upper bounds
; of possible values will affect how many lanes we can
; use in the registers
:temp
; the basic algorithm is as such
; for each pixel;
; calculate the distance to the two circles
; dy = (square (y - cy))
; dx = (square (x - cx))
; dy2 = (square (y - cy2))
; dx2 = (square (x - cx2))
; srt1 = (sqrt (dx + dy))
; srt2 = (sqrt (dx2 + dy2))
; then we xor together and shift the result by n bits
; (srt1 ^ srt2) >> 4
; and finally select colour based on bit 0
; the maximum value of the squares will be 1024 * 1024 = $100000 or 21 bits (24 bits)
; we can most easily fit 4 of these values into the 128 bit vector reg.
; this actually works out quite nicely since we have 4 values to calculate;
; - load v1 with 2x copies of y and 2x copies of x (FP)
; - load v2 with cy and cy2, cx and cx2
; - v1 = v1 - v2 (minus) FSUB
; - replicate v1 into v2
; - v1 = v1 * v2 (square) FMUL
; - now we need to add v1 using v1.0 + v1.1 (faddp, we don't care about the second vector)
; so, addp v1 v1 v1
; this results in a vector v1 of [(dx+dy);(dx2+dy2);(dx+dy);(dx2+dy2)]
; now we can sqrt them
; fsqrt v1 v1
; convert to unsigned ints
; FCVTPU v1.2s v1.2s
; we don't want to xor since we have 2 we aren't interested in
; at this point we can get them back itno non-fp land
; mov 64 bits out which is our two values
; fmov x0 d0
; replicate and shift
; mov x1 x0
; lsl x1 @32
; xor them
; eors w0 w1
; now csel out the coluor
; ldr row-size ROW-SIZE:
:loop
ldr w0 finished-rendering:
cbnz x0 loop-
; (debug-str "render" #t)
bl get-back-buffer:
mov vptr x0
ldr fc frame-count:
; lsr fc fc @1
mov x0 @$3FFF
and fc fc x0
mov x0 @4
mul fc fc x0
adr x0 cx1lookup:
add x0 x0 fc
ldr w10 (X0 @0)
adr x0 cy1lookup:
add x0 x0 fc
ldr w11 (X0 @0)
adr x0 cx2lookup:
add x0 x0 fc
ldr w12 (X0 @0)
adr x0 cy2lookup:
add x0 x0 fc
ldr w13 (X0 @0)
;; ldr pAX pointAX:
;; ldr pAY pointAY:
;; ldr pBX pointBX:
;; ldr pBY pointBY:
;mov y @(/ height 2)
mov y @height
; mov colour @$FFFF
:y-loop
;mov x @(/ width 2)
mov x @width
; calculate distance of this pixel to point a and b
mov dy y
mov temp pAY
sub dy dy temp
mul dy dy dy
mov dy2 y
mov temp pBY
sub dy2 dy2 temp
mul dy2 dy2 dy2
:x-loop
mov dx x
mov temp pAX
sub dx dx temp
mul dx dx dx
mov dx2 x
mov temp pBX
sub dx2 dx2 temp
mul dx2 dx2 dx2
add x0 dx dy ; sqrt dx + dy
(write-value-32 $9E220000) ;scvtf s0 x0
(write-value-32 $1E21C000) ;fsqrt s0 s0
(write-value-32 $9E250000) ; fcvtau x0 s0
mov temp x0
add x0 dx2 dy2 ; sqrt dx2 + dy2
(write-value-32 $9E220000) ;scvtf s0 x0
(write-value-32 $1E21C000) ;fsqrt s0 s0
(write-value-32 $9E250000) ; fcvtau x0 s0
eor colour temp x0 ; xor sqrts
lsr colour colour @4
mov temp @1
and colour colour temp
cbnz colour other+
mov colour @$ffFF
lsl x3 x3 @16
movk colour @$FFFF
b store+
:other
mov colour @$0000
:store
str colour (vptr @0)
sub x x @1
add vptr vptr @4
cbnz x x-loop-
sub y y @1
;sub vptr vptr @(* (/ width 2) 4)
mov temp @(* width 4)
sub vptr vptr temp
ldr temp ROW-SIZE:
add vptr vptr temp
cbnz y y-loop-
mov temp @1
adr x0 finished-rendering:
str temp (w0 @0)
ldr temp frame-count: ; frame-count++
add temp temp @1