forked from verilator/verilator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathverilated.cpp
3005 lines (2803 loc) · 117 KB
/
verilated.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// -*- mode: C++; c-file-style: "cc-mode" -*-
//*************************************************************************
//
// Code available from: https://verilator.org
//
// Copyright 2003-2022 by Wilson Snyder. This program is free software; you can
// redistribute it and/or modify it under the terms of either the GNU
// Lesser General Public License Version 3 or the Perl Artistic License
// Version 2.0.
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
//
//=========================================================================
///
/// \file
/// \brief Verilated general routine implementation code
///
/// This file must be compiled and linked against all Verilated objects
/// (all code created from Verilator).
///
/// Verilator always adds this file to the Makefile for the linker.
///
/// Those macro/function/variable starting or ending in _ are internal,
/// however many of the other function/macros here are also internal.
///
//=========================================================================
// Internal note:
//
// verilated.o may exist both in --lib-create (incrementally linked .a/.so)
// and the main module. Both refer the same instance of static
// variables/VL_THREAD_LOCAL in verilated.o such as Verilated, or
// VerilatedImpData. This is important to share that state, but the
// sharing may cause a double-free error when shutting down because the
// loader will insert a constructor/destructor at each reference to
// verilated.o, resulting in at runtime constructors/destructors being
// called multiple times.
//
// To avoid the trouble:
// * Statics declared inside functions. The compiler will wrap
// the construction in must-be-one-time checks.
// * Or, use only C++20 constinit types. (TODO: Make a VL_CONSTINIT).
// * Or, use types that are multi-constructor safe.
// * Or, the static should be of a union, which will avoid compiler
// construction, and appropriately check for duplicate construction.
// * Or, code is not linked in protected library. e.g. the VPI
// and DPI libraries are not needed there.
//=========================================================================
#define VERILATOR_VERILATED_CPP_
#include "verilatedos.h"
#include "verilated_imp.h"
#include "verilated_config.h"
#include <algorithm>
#include <cctype>
#include <cerrno>
#include <sstream>
#include <sys/stat.h> // mkdir
#include <list>
#include <limits>
#include <utility>
// clang-format off
#if defined(_WIN32) || defined(__MINGW32__)
# include <direct.h> // mkdir
#endif
// clang-format on
// Max characters in static char string for VL_VALUE_STRING
constexpr unsigned VL_VALUE_STRING_MAX_WIDTH = 8192;
//===========================================================================
// Static sanity checks
static_assert(sizeof(vluint8_t) == 1, "vluint8_t is missized");
static_assert(sizeof(vluint16_t) == 2, "vluint8_t is missized");
static_assert(sizeof(vluint32_t) == 4, "vluint8_t is missized");
static_assert(sizeof(vluint64_t) == 8, "vluint8_t is missized");
//===========================================================================
// Global variables
// Internal note: Globals may multi-construct, see verilated.cpp top.
// Fast path, keep together
int Verilated::s_debug = 0;
VerilatedContext* Verilated::s_lastContextp = nullptr;
// Keep below together in one cache line
// Internal note: Globals may multi-construct, see verilated.cpp top.
VL_THREAD_LOCAL Verilated::ThreadLocal Verilated::t_s;
//===========================================================================
// User definable functions
// Note a TODO is a future version of the API will pass a structure so that
// the calling arguments allow for extension
#ifndef VL_USER_FINISH ///< Define this to override the vl_finish function
void vl_finish(const char* filename, int linenum, const char* hier) VL_MT_UNSAFE {
if (false && hier) {}
VL_PRINTF( // Not VL_PRINTF_MT, already on main thread
"- %s:%d: Verilog $finish\n", filename, linenum);
if (Verilated::threadContextp()->gotFinish()) {
VL_PRINTF( // Not VL_PRINTF_MT, already on main thread
"- %s:%d: Second verilog $finish, exiting\n", filename, linenum);
Verilated::runFlushCallbacks();
Verilated::runExitCallbacks();
std::exit(0);
}
Verilated::threadContextp()->gotFinish(true);
}
#endif
#ifndef VL_USER_STOP ///< Define this to override the vl_stop function
void vl_stop(const char* filename, int linenum, const char* hier) VL_MT_UNSAFE {
const char* const msg = "Verilog $stop";
Verilated::threadContextp()->gotError(true);
Verilated::threadContextp()->gotFinish(true);
if (Verilated::threadContextp()->fatalOnError()) {
vl_fatal(filename, linenum, hier, msg);
} else {
if (filename && filename[0]) {
// Not VL_PRINTF_MT, already on main thread
VL_PRINTF("%%Error: %s:%d: %s\n", filename, linenum, msg);
} else {
VL_PRINTF("%%Error: %s\n", msg);
}
Verilated::runFlushCallbacks();
}
}
#endif
#ifndef VL_USER_FATAL ///< Define this to override the vl_fatal function
void vl_fatal(const char* filename, int linenum, const char* hier, const char* msg) VL_MT_UNSAFE {
if (false && hier) {}
Verilated::threadContextp()->gotError(true);
Verilated::threadContextp()->gotFinish(true);
if (filename && filename[0]) {
// Not VL_PRINTF_MT, already on main thread
VL_PRINTF("%%Error: %s:%d: %s\n", filename, linenum, msg);
} else {
VL_PRINTF("%%Error: %s\n", msg);
}
Verilated::runFlushCallbacks();
VL_PRINTF("Aborting...\n"); // Not VL_PRINTF_MT, already on main thread
// Second flush in case VL_PRINTF does something needing a flush
Verilated::runFlushCallbacks();
// Callbacks prior to termination
Verilated::runExitCallbacks();
std::abort();
}
#endif
#ifndef VL_USER_STOP_MAYBE ///< Define this to override the vl_stop_maybe function
void vl_stop_maybe(const char* filename, int linenum, const char* hier, bool maybe) VL_MT_UNSAFE {
Verilated::threadContextp()->errorCountInc();
if (maybe
&& Verilated::threadContextp()->errorCount() < Verilated::threadContextp()->errorLimit()) {
VL_PRINTF( // Not VL_PRINTF_MT, already on main thread
"-Info: %s:%d: %s\n", filename, linenum,
"Verilog $stop, ignored due to +verilator+error+limit");
} else {
vl_stop(filename, linenum, hier);
}
}
#endif
#ifndef VL_USER_WARN ///< Define this to override the vl_warn function
void vl_warn(const char* filename, int linenum, const char* hier, const char* msg) VL_MT_UNSAFE {
if (false && hier) {}
if (filename && filename[0]) {
// Not VL_PRINTF_MT, already on main thread
VL_PRINTF("%%Warning: %s:%d: %s\n", filename, linenum, msg);
} else {
VL_PRINTF("%%Warning: %s\n", msg);
}
Verilated::runFlushCallbacks();
}
#endif
//===========================================================================
// Wrapper to call certain functions via messages when multithreaded
void VL_FINISH_MT(const char* filename, int linenum, const char* hier) VL_MT_SAFE {
#ifdef VL_THREADED
VerilatedThreadMsgQueue::post(VerilatedMsg{[=]() { //
vl_finish(filename, linenum, hier);
}});
#else
vl_finish(filename, linenum, hier);
#endif
}
void VL_STOP_MT(const char* filename, int linenum, const char* hier, bool maybe) VL_MT_SAFE {
#ifdef VL_THREADED
VerilatedThreadMsgQueue::post(VerilatedMsg{[=]() { //
vl_stop_maybe(filename, linenum, hier, maybe);
}});
#else
vl_stop_maybe(filename, linenum, hier, maybe);
#endif
}
void VL_FATAL_MT(const char* filename, int linenum, const char* hier, const char* msg) VL_MT_SAFE {
#ifdef VL_THREADED
VerilatedThreadMsgQueue::post(VerilatedMsg{[=]() { //
vl_fatal(filename, linenum, hier, msg);
}});
#else
vl_fatal(filename, linenum, hier, msg);
#endif
}
void VL_WARN_MT(const char* filename, int linenum, const char* hier, const char* msg) VL_MT_SAFE {
#ifdef VL_THREADED
VerilatedThreadMsgQueue::post(VerilatedMsg{[=]() { //
vl_warn(filename, linenum, hier, msg);
}});
#else
vl_warn(filename, linenum, hier, msg);
#endif
}
//===========================================================================
// Debug prints
// sprintf but return as string (this isn't fast, for print messages only)
std::string _vl_string_vprintf(const char* formatp, va_list ap) VL_MT_SAFE {
va_list aq;
va_copy(aq, ap);
const size_t len = VL_VSNPRINTF(nullptr, 0, formatp, aq);
va_end(aq);
if (VL_UNLIKELY(len < 1)) return "";
char* const bufp = new char[len + 1];
VL_VSNPRINTF(bufp, len + 1, formatp, ap);
std::string out{bufp, len}; // Not const to allow move optimization
delete[] bufp;
return out;
}
vluint64_t _vl_dbg_sequence_number() VL_MT_SAFE {
#ifdef VL_THREADED
static std::atomic<vluint64_t> sequence;
#else
static vluint64_t sequence = 0;
#endif
return ++sequence;
}
vluint32_t VL_THREAD_ID() VL_MT_SAFE {
#ifdef VL_THREADED
// Alternative is to use std::this_thread::get_id, but that returns a
// hard-to-read number and is very slow
static std::atomic<vluint32_t> s_nextId(0);
static VL_THREAD_LOCAL vluint32_t t_myId = ++s_nextId;
return t_myId;
#else
return 0;
#endif
}
void VL_DBG_MSGF(const char* formatp, ...) VL_MT_SAFE {
// We're still using c printf formats instead of operator<< so we can avoid the heavy
// includes that otherwise would be required in every Verilated module
va_list ap;
va_start(ap, formatp);
const std::string out = _vl_string_vprintf(formatp, ap);
va_end(ap);
// printf("-imm-V{t%d,%" PRId64 "}%s", VL_THREAD_ID(), _vl_dbg_sequence_number(),
// out.c_str());
// Using VL_PRINTF not VL_PRINTF_MT so that we can call VL_DBG_MSGF
// from within the guts of the thread execution machinery (and it goes
// to the screen and not into the queues we're debugging)
VL_PRINTF("-V{t%u,%" PRIu64 "}%s", VL_THREAD_ID(), _vl_dbg_sequence_number(), out.c_str());
}
#ifdef VL_THREADED
void VL_PRINTF_MT(const char* formatp, ...) VL_MT_SAFE {
va_list ap;
va_start(ap, formatp);
const std::string out = _vl_string_vprintf(formatp, ap);
va_end(ap);
VerilatedThreadMsgQueue::post(VerilatedMsg{[=]() { //
VL_PRINTF("%s", out.c_str());
}});
}
#endif
//===========================================================================
// Random -- Mostly called at init time, so not inline.
static vluint32_t vl_sys_rand32() VL_MT_UNSAFE {
// Return random 32-bits using system library.
// Used only to construct seed for Verilator's PNRG.
static VerilatedMutex s_mutex;
const VerilatedLockGuard lock{s_mutex}; // Otherwise rand is unsafe
#if defined(_WIN32) && !defined(__CYGWIN__)
// Windows doesn't have lrand48(), although Cygwin does.
return (std::rand() << 16) ^ std::rand();
#else
return (lrand48() << 16) ^ lrand48();
#endif
}
vluint64_t vl_rand64() VL_MT_SAFE {
static VL_THREAD_LOCAL vluint64_t t_state[2];
static VL_THREAD_LOCAL vluint32_t t_seedEpoch = 0;
// For speed, we use a thread-local epoch number to know when to reseed
// A thread always belongs to a single context, so this works out ok
if (VL_UNLIKELY(t_seedEpoch != VerilatedContextImp::randSeedEpoch())) {
// Set epoch before state, to avoid race case with new seeding
t_seedEpoch = VerilatedContextImp::randSeedEpoch();
t_state[0] = Verilated::threadContextp()->impp()->randSeedDefault64();
t_state[1] = t_state[0];
// Fix state as algorithm is slow to randomize if many zeros
// This causes a loss of ~ 1 bit of seed entropy, no big deal
if (VL_COUNTONES_I(t_state[0]) < 10) t_state[0] = ~t_state[0];
if (VL_COUNTONES_I(t_state[1]) < 10) t_state[1] = ~t_state[1];
}
// Xoroshiro128+ algorithm
const vluint64_t result = t_state[0] + t_state[1];
t_state[1] ^= t_state[0];
t_state[0] = (((t_state[0] << 55) | (t_state[0] >> 9)) ^ t_state[1] ^ (t_state[1] << 14));
t_state[1] = (t_state[1] << 36) | (t_state[1] >> 28);
return result;
}
WDataOutP VL_RANDOM_W(int obits, WDataOutP outwp) VL_MT_SAFE {
for (int i = 0; i < VL_WORDS_I(obits); ++i) outwp[i] = vl_rand64();
// Last word is unclean
return outwp;
}
IData VL_RANDOM_SEEDED_II(IData& seedr) VL_MT_SAFE {
// $random - seed is a new seed to apply, then we return new seed
Verilated::threadContextp()->randSeed(static_cast<int>(seedr));
seedr = VL_RANDOM_I();
return VL_RANDOM_I();
}
IData VL_URANDOM_SEEDED_II(IData seed) VL_MT_SAFE {
// $urandom - seed is a new seed to apply
Verilated::threadContextp()->randSeed(static_cast<int>(seed));
return VL_RANDOM_I();
}
IData VL_RAND_RESET_I(int obits) VL_MT_SAFE {
if (Verilated::threadContextp()->randReset() == 0) return 0;
IData data = ~0;
if (Verilated::threadContextp()->randReset() != 1) { // if 2, randomize
data = VL_RANDOM_I();
}
data &= VL_MASK_I(obits);
return data;
}
QData VL_RAND_RESET_Q(int obits) VL_MT_SAFE {
if (Verilated::threadContextp()->randReset() == 0) return 0;
QData data = ~0ULL;
if (Verilated::threadContextp()->randReset() != 1) { // if 2, randomize
data = VL_RANDOM_Q();
}
data &= VL_MASK_Q(obits);
return data;
}
WDataOutP VL_RAND_RESET_W(int obits, WDataOutP outwp) VL_MT_SAFE {
for (int i = 0; i < VL_WORDS_I(obits) - 1; ++i) outwp[i] = VL_RAND_RESET_I(32);
outwp[VL_WORDS_I(obits) - 1] = VL_RAND_RESET_I(32) & VL_MASK_E(obits);
return outwp;
}
WDataOutP VL_ZERO_RESET_W(int obits, WDataOutP outwp) VL_MT_SAFE {
for (int i = 0; i < VL_WORDS_I(obits); ++i) outwp[i] = 0;
return outwp;
}
//===========================================================================
// Debug
void _vl_debug_print_w(int lbits, const WDataInP iwp) VL_MT_SAFE {
VL_PRINTF_MT(" Data: w%d: ", lbits);
for (int i = VL_WORDS_I(lbits) - 1; i >= 0; --i) VL_PRINTF_MT("%08x ", iwp[i]);
VL_PRINTF_MT("\n");
}
//===========================================================================
// Slow math
WDataOutP _vl_moddiv_w(int lbits, WDataOutP owp, const WDataInP lwp, const WDataInP rwp,
bool is_modulus) VL_MT_SAFE {
// See Knuth Algorithm D. Computes u/v = q.r
// This isn't massively tuned, as wide division is rare
// for debug see V3Number version
// Requires clean input
const int words = VL_WORDS_I(lbits);
for (int i = 0; i < words; ++i) owp[i] = 0;
// Find MSB and check for zero.
const int umsbp1 = VL_MOSTSETBITP1_W(words, lwp); // dividend
const int vmsbp1 = VL_MOSTSETBITP1_W(words, rwp); // divisor
if (VL_UNLIKELY(vmsbp1 == 0) // rwp==0 so division by zero. Return 0.
|| VL_UNLIKELY(umsbp1 == 0)) { // 0/x so short circuit and return 0
return owp;
}
const int uw = VL_WORDS_I(umsbp1); // aka "m" in the algorithm
const int vw = VL_WORDS_I(vmsbp1); // aka "n" in the algorithm
if (vw == 1) { // Single divisor word breaks rest of algorithm
vluint64_t k = 0;
for (int j = uw - 1; j >= 0; --j) {
const vluint64_t unw64 = ((k << 32ULL) + static_cast<vluint64_t>(lwp[j]));
owp[j] = unw64 / static_cast<vluint64_t>(rwp[0]);
k = unw64 - static_cast<vluint64_t>(owp[j]) * static_cast<vluint64_t>(rwp[0]);
}
if (is_modulus) {
owp[0] = k;
for (int i = 1; i < words; ++i) owp[i] = 0;
}
return owp;
}
// +1 word as we may shift during normalization
vluint32_t un[VL_MULS_MAX_WORDS + 1]; // Fixed size, as MSVC++ doesn't allow [words] here
vluint32_t vn[VL_MULS_MAX_WORDS + 1]; // v normalized
// Zero for ease of debugging and to save having to zero for shifts
// Note +1 as loop will use extra word
for (int i = 0; i < words + 1; ++i) { un[i] = vn[i] = 0; }
// Algorithm requires divisor MSB to be set
// Copy and shift to normalize divisor so MSB of vn[vw-1] is set
const int s = 31 - VL_BITBIT_I(vmsbp1 - 1); // shift amount (0...31)
const vluint32_t shift_mask = s ? 0xffffffff : 0; // otherwise >> 32 won't mask the value
for (int i = vw - 1; i > 0; --i) {
vn[i] = (rwp[i] << s) | (shift_mask & (rwp[i - 1] >> (32 - s)));
}
vn[0] = rwp[0] << s;
// Copy and shift dividend by same amount; may set new upper word
if (s) {
un[uw] = lwp[uw - 1] >> (32 - s);
} else {
un[uw] = 0;
}
for (int i = uw - 1; i > 0; --i) {
un[i] = (lwp[i] << s) | (shift_mask & (lwp[i - 1] >> (32 - s)));
}
un[0] = lwp[0] << s;
// Main loop
for (int j = uw - vw; j >= 0; --j) {
// Estimate
const vluint64_t unw64 = (static_cast<vluint64_t>(un[j + vw]) << 32ULL
| static_cast<vluint64_t>(un[j + vw - 1]));
vluint64_t qhat = unw64 / static_cast<vluint64_t>(vn[vw - 1]);
vluint64_t rhat = unw64 - qhat * static_cast<vluint64_t>(vn[vw - 1]);
again:
if (qhat >= 0x100000000ULL || ((qhat * vn[vw - 2]) > ((rhat << 32ULL) + un[j + vw - 2]))) {
qhat = qhat - 1;
rhat = rhat + vn[vw - 1];
if (rhat < 0x100000000ULL) goto again;
}
vlsint64_t t = 0; // Must be signed
vluint64_t k = 0;
for (int i = 0; i < vw; ++i) {
const vluint64_t p = qhat * vn[i]; // Multiply by estimate
t = un[i + j] - k - (p & 0xFFFFFFFFULL); // Subtract
un[i + j] = t;
k = (p >> 32ULL) - (t >> 32ULL);
}
t = un[j + vw] - k;
un[j + vw] = t;
owp[j] = qhat; // Save quotient digit
if (t < 0) {
// Over subtracted; correct by adding back
owp[j]--;
k = 0;
for (int i = 0; i < vw; ++i) {
t = static_cast<vluint64_t>(un[i + j]) + static_cast<vluint64_t>(vn[i]) + k;
un[i + j] = t;
k = t >> 32ULL;
}
un[j + vw] = un[j + vw] + k;
}
}
if (is_modulus) { // modulus
// Need to reverse normalization on copy to output
for (int i = 0; i < vw; ++i) {
owp[i] = (un[i] >> s) | (shift_mask & (un[i + 1] << (32 - s)));
}
for (int i = vw; i < words; ++i) owp[i] = 0;
return owp;
} else { // division
return owp;
}
}
WDataOutP VL_POW_WWW(int obits, int, int rbits, WDataOutP owp, const WDataInP lwp,
const WDataInP rwp) VL_MT_SAFE {
// obits==lbits, rbits can be different
owp[0] = 1;
for (int i = 1; i < VL_WORDS_I(obits); i++) owp[i] = 0;
// cppcheck-suppress variableScope
VlWide<VL_MULS_MAX_WORDS> powstore; // Fixed size, as MSVC++ doesn't allow [words] here
VlWide<VL_MULS_MAX_WORDS> lastpowstore; // Fixed size, as MSVC++ doesn't allow [words] here
VlWide<VL_MULS_MAX_WORDS> lastoutstore; // Fixed size, as MSVC++ doesn't allow [words] here
// cppcheck-suppress variableScope
VL_ASSIGN_W(obits, powstore, lwp);
for (int bit = 0; bit < rbits; bit++) {
if (bit > 0) { // power = power*power
VL_ASSIGN_W(obits, lastpowstore, powstore);
VL_MUL_W(VL_WORDS_I(obits), powstore, lastpowstore, lastpowstore);
}
if (VL_BITISSET_W(rwp, bit)) { // out *= power
VL_ASSIGN_W(obits, lastoutstore, owp);
VL_MUL_W(VL_WORDS_I(obits), owp, lastoutstore, powstore);
}
}
return owp;
}
WDataOutP VL_POW_WWQ(int obits, int lbits, int rbits, WDataOutP owp, const WDataInP lwp,
QData rhs) VL_MT_SAFE {
VlWide<VL_WQ_WORDS_E> rhsw;
VL_SET_WQ(rhsw, rhs);
return VL_POW_WWW(obits, lbits, rbits, owp, lwp, rhsw);
}
QData VL_POW_QQW(int, int, int rbits, QData lhs, const WDataInP rwp) VL_MT_SAFE {
// Skip check for rhs == 0, as short-circuit doesn't save time
if (VL_UNLIKELY(lhs == 0)) return 0;
QData power = lhs;
QData out = 1ULL;
for (int bit = 0; bit < rbits; ++bit) {
if (bit > 0) power = power * power;
if (VL_BITISSET_W(rwp, bit)) out *= power;
}
return out;
}
WDataOutP VL_POWSS_WWW(int obits, int, int rbits, WDataOutP owp, const WDataInP lwp,
const WDataInP rwp, bool lsign, bool rsign) VL_MT_SAFE {
// obits==lbits, rbits can be different
if (rsign && VL_SIGN_W(rbits, rwp)) {
const int words = VL_WORDS_I(obits);
VL_ZERO_W(obits, owp);
EData lor = 0; // 0=all zeros, ~0=all ones, else mix
for (int i = 1; i < (words - 1); ++i) { lor |= lwp[i]; }
lor |= ((lwp[words - 1] == VL_MASK_E(rbits)) ? ~VL_EUL(0) : 0);
if (lor == 0 && lwp[0] == 0) { // "X" so return 0
return owp;
} else if (lor == 0 && lwp[0] == 1) { // 1
owp[0] = 1;
return owp;
} else if (lsign && lor == ~VL_EUL(0) && lwp[0] == ~VL_EUL(0)) { // -1
if (rwp[0] & 1) { // -1^odd=-1
return VL_ALLONES_W(obits, owp);
} else { // -1^even=1
owp[0] = 1;
return owp;
}
}
return owp;
}
return VL_POW_WWW(obits, rbits, rbits, owp, lwp, rwp);
}
WDataOutP VL_POWSS_WWQ(int obits, int lbits, int rbits, WDataOutP owp, const WDataInP lwp,
QData rhs, bool lsign, bool rsign) VL_MT_SAFE {
VlWide<VL_WQ_WORDS_E> rhsw;
VL_SET_WQ(rhsw, rhs);
return VL_POWSS_WWW(obits, lbits, rbits, owp, lwp, rhsw, lsign, rsign);
}
QData VL_POWSS_QQW(int obits, int, int rbits, QData lhs, const WDataInP rwp, bool lsign,
bool rsign) VL_MT_SAFE {
// Skip check for rhs == 0, as short-circuit doesn't save time
if (rsign && VL_SIGN_W(rbits, rwp)) {
if (lhs == 0) {
return 0; // "X"
} else if (lhs == 1) {
return 1;
} else if (lsign && lhs == VL_MASK_Q(obits)) { // -1
if (rwp[0] & 1) {
return VL_MASK_Q(obits); // -1^odd=-1
} else {
return 1; // -1^even=1
}
}
return 0;
}
return VL_POW_QQW(obits, rbits, rbits, lhs, rwp);
}
double VL_ITOR_D_W(int lbits, const WDataInP lwp) VL_PURE {
int ms_word = VL_WORDS_I(lbits) - 1;
for (; !lwp[ms_word] && ms_word > 0;) --ms_word;
if (ms_word == 0) return static_cast<double>(lwp[0]);
if (ms_word == 1) return static_cast<double>(VL_SET_QW(lwp));
// We need 53 bits of mantissa, which might mean looking at 3 words
// namely ms_word, ms_word-1 and ms_word-2
const EData ihi = lwp[ms_word];
const EData imid = lwp[ms_word - 1];
const EData ilo = lwp[ms_word - 2];
const double hi = static_cast<double>(ihi) * std::exp2(2 * VL_EDATASIZE);
const double mid = static_cast<double>(imid) * std::exp2(VL_EDATASIZE);
const double lo = static_cast<double>(ilo);
const double d = (hi + mid + lo) * std::exp2(VL_EDATASIZE * (ms_word - 2));
return d;
}
double VL_ISTOR_D_W(int lbits, const WDataInP lwp) VL_PURE {
if (!VL_SIGN_W(lbits, lwp)) return VL_ITOR_D_W(lbits, lwp);
vluint32_t pos[VL_MULS_MAX_WORDS + 1]; // Fixed size, as MSVC++ doesn't allow [words] here
VL_NEGATE_W(VL_WORDS_I(lbits), pos, lwp);
_vl_clean_inplace_w(lbits, pos);
return -VL_ITOR_D_W(lbits, pos);
}
//===========================================================================
// Formatting
// Output a string representation of a wide number
std::string VL_DECIMAL_NW(int width, const WDataInP lwp) VL_MT_SAFE {
const int maxdecwidth = (width + 3) * 4 / 3;
// Or (maxdecwidth+7)/8], but can't have more than 4 BCD bits per word
VlWide<VL_VALUE_STRING_MAX_WIDTH / 4 + 2> bcd;
VL_ZERO_RESET_W(maxdecwidth, bcd);
VlWide<VL_VALUE_STRING_MAX_WIDTH / 4 + 2> tmp;
VlWide<VL_VALUE_STRING_MAX_WIDTH / 4 + 2> tmp2;
int from_bit = width - 1;
// Skip all leading zeros
for (; from_bit >= 0 && !(VL_BITRSHIFT_W(lwp, from_bit) & 1); --from_bit) {}
// Double-dabble algorithm
for (; from_bit >= 0; --from_bit) {
// Any digits >= 5 need an add 3 (via tmp)
for (int nibble_bit = 0; nibble_bit < maxdecwidth; nibble_bit += 4) {
if ((VL_BITRSHIFT_W(bcd, nibble_bit) & 0xf) >= 5) {
VL_ZERO_RESET_W(maxdecwidth, tmp2);
tmp2[VL_BITWORD_E(nibble_bit)] |= VL_EUL(0x3) << VL_BITBIT_E(nibble_bit);
VL_ASSIGN_W(maxdecwidth, tmp, bcd);
VL_ADD_W(VL_WORDS_I(maxdecwidth), bcd, tmp, tmp2);
}
}
// Shift; bcd = bcd << 1
VL_ASSIGN_W(maxdecwidth, tmp, bcd);
VL_SHIFTL_WWI(maxdecwidth, maxdecwidth, 32, bcd, tmp, 1);
// bcd[0] = lwp[from_bit]
if (VL_BITISSET_W(lwp, from_bit)) bcd[0] |= 1;
}
std::string output;
int lsb = (maxdecwidth - 1) & ~3;
for (; lsb > 0; lsb -= 4) { // Skip leading zeros
if (VL_BITRSHIFT_W(bcd, lsb) & 0xf) break;
}
for (; lsb >= 0; lsb -= 4) {
output += ('0' + (VL_BITRSHIFT_W(bcd, lsb) & 0xf)); // 0..9
}
return output;
}
template <typename T>
std::string _vl_vsformat_time(char* tmp, T ld, int timeunit, bool left, size_t width) {
const VerilatedContextImp* const ctxImpp = Verilated::threadContextp()->impp();
const std::string suffix = ctxImpp->timeFormatSuffix();
const int userUnits = ctxImpp->timeFormatUnits(); // 0..-15
const int fracDigits = ctxImpp->timeFormatPrecision(); // 0..N
const int shift = -userUnits + fracDigits + timeunit; // 0..-15
int digits = 0;
if (std::numeric_limits<T>::is_integer) {
constexpr int b = 128;
constexpr int w = VL_WORDS_I(b);
VlWide<w> tmp0, tmp1, tmp2, tmp3;
WDataInP shifted = VL_EXTEND_WQ(b, 0, tmp0, static_cast<QData>(ld));
if (shift < 0) {
const WDataInP pow10 = VL_EXTEND_WQ(b, 0, tmp1, vl_time_pow10(-shift));
shifted = VL_DIV_WWW(b, tmp2, shifted, pow10);
} else {
const WDataInP pow10 = VL_EXTEND_WQ(b, 0, tmp1, vl_time_pow10(shift));
shifted = VL_MUL_W(w, tmp2, shifted, pow10);
}
const WDataInP fracDigitsPow10 = VL_EXTEND_WQ(b, 0, tmp3, vl_time_pow10(fracDigits));
const WDataInP integer = VL_DIV_WWW(b, tmp0, shifted, fracDigitsPow10);
const WDataInP frac = VL_MODDIV_WWW(b, tmp1, shifted, fracDigitsPow10);
const WDataInP max64Bit
= VL_EXTEND_WQ(b, 0, tmp2, std::numeric_limits<vluint64_t>::max()); // breaks shifted
if (VL_GT_W(w, integer, max64Bit)) {
WDataOutP v = VL_ASSIGN_W(b, tmp3, integer); // breaks fracDigitsPow10
VlWide<w> zero, ten;
VL_ZERO_W(b, zero);
VL_EXTEND_WI(b, 0, ten, 10);
char buf[128]; // 128B is obviously long enough to represent 128bit integer in decimal
char* ptr = buf + sizeof(buf) - 1;
*ptr = '\0';
while (VL_GT_W(w, v, zero)) {
--ptr;
const WDataInP mod = VL_MODDIV_WWW(b, tmp2, v, ten); // breaks max64Bit
*ptr = "0123456789"[VL_SET_QW(mod)];
VlWide<w> divided;
VL_DIV_WWW(b, divided, v, ten);
VL_ASSIGN_W(b, v, divided);
}
if (!fracDigits) {
digits = VL_SNPRINTF(tmp, VL_VALUE_STRING_MAX_WIDTH, "%s%s", ptr, suffix.c_str());
} else {
digits = VL_SNPRINTF(tmp, VL_VALUE_STRING_MAX_WIDTH, "%s.%0*" PRIu64 "%s", ptr,
fracDigits, VL_SET_QW(frac), suffix.c_str());
}
} else {
const vluint64_t integer64 = VL_SET_QW(integer);
if (!fracDigits) {
digits = VL_SNPRINTF(tmp, VL_VALUE_STRING_MAX_WIDTH, "%" PRIu64 "%s", integer64,
suffix.c_str());
} else {
digits = VL_SNPRINTF(tmp, VL_VALUE_STRING_MAX_WIDTH, "%" PRIu64 ".%0*" PRIu64 "%s",
integer64, fracDigits, VL_SET_QW(frac), suffix.c_str());
}
}
} else {
const double shiftd = vl_time_multiplier(shift);
const double scaled = ld * shiftd;
const double fracDiv = vl_time_multiplier(fracDigits);
const double whole = scaled / fracDiv;
if (!fracDigits) {
digits = VL_SNPRINTF(tmp, VL_VALUE_STRING_MAX_WIDTH, "%.0f%s", whole, suffix.c_str());
} else {
digits = VL_SNPRINTF(tmp, VL_VALUE_STRING_MAX_WIDTH, "%.*f%s", fracDigits, whole,
suffix.c_str());
}
}
const int needmore = width - digits;
std::string padding;
if (needmore > 0) padding.append(needmore, ' '); // Pad with spaces
return left ? (tmp + padding) : (padding + tmp);
}
// Do a va_arg returning a quad, assuming input argument is anything less than wide
#define VL_VA_ARG_Q_(ap, bits) (((bits) <= VL_IDATASIZE) ? va_arg(ap, IData) : va_arg(ap, QData))
void _vl_vsformat(std::string& output, const char* formatp, va_list ap) VL_MT_SAFE {
// Format a Verilog $write style format into the output list
// The format must be pre-processed (and lower cased) by Verilator
// Arguments are in "width, arg-value (or WDataIn* if wide)" form
//
// Note uses a single buffer internally; presumes only one usage per printf
// Note also assumes variables < 64 are not wide, this assumption is
// sometimes not true in low-level routines written here in verilated.cpp
static VL_THREAD_LOCAL char t_tmp[VL_VALUE_STRING_MAX_WIDTH];
const char* pctp = nullptr; // Most recent %##.##g format
bool inPct = false;
bool widthSet = false;
bool left = false;
size_t width = 0;
for (const char* pos = formatp; *pos; ++pos) {
if (!inPct && pos[0] == '%') {
pctp = pos;
inPct = true;
widthSet = false;
width = 0;
} else if (!inPct) { // Normal text
// Fast-forward to next escape and add to output
const char* ep = pos;
while (ep[0] && ep[0] != '%') ++ep;
if (ep != pos) {
output.append(pos, ep - pos);
pos += ep - pos - 1;
}
} else { // Format character
inPct = false;
const char fmt = pos[0];
switch (fmt) {
case '0': // FALLTHRU
case '1': // FALLTHRU
case '2': // FALLTHRU
case '3': // FALLTHRU
case '4': // FALLTHRU
case '5': // FALLTHRU
case '6': // FALLTHRU
case '7': // FALLTHRU
case '8': // FALLTHRU
case '9':
inPct = true; // Get more digits
widthSet = true;
width = width * 10 + (fmt - '0');
break;
case '-':
left = true;
inPct = true; // Get more digits
break;
case '.':
inPct = true; // Get more digits
break;
case '%': //
output += '%';
break;
case 'N': { // "C" string with name of module, add . if needed
const char* const cstrp = va_arg(ap, const char*);
if (VL_LIKELY(*cstrp)) {
output += cstrp;
output += '.';
}
break;
}
case 'S': { // "C" string
const char* const cstrp = va_arg(ap, const char*);
output += cstrp;
break;
}
case '@': { // Verilog/C++ string
va_arg(ap, int); // # bits is ignored
const std::string* const cstrp = va_arg(ap, const std::string*);
std::string padding;
if (width > cstrp->size()) padding.append(width - cstrp->size(), ' ');
output += left ? (*cstrp + padding) : (padding + *cstrp);
break;
}
case 'e':
case 'f':
case 'g':
case '^': { // Realtime
const int lbits = va_arg(ap, int);
const double d = va_arg(ap, double);
if (lbits) {} // UNUSED - always 64
if (fmt == '^') { // Realtime
if (!widthSet) width = Verilated::threadContextp()->impp()->timeFormatWidth();
const int timeunit = va_arg(ap, int);
output += _vl_vsformat_time(t_tmp, d, timeunit, left, width);
} else {
const size_t len = pos - pctp + 1;
const std::string fmts{pctp, len};
VL_SNPRINTF(t_tmp, VL_VALUE_STRING_MAX_WIDTH, fmts.c_str(), d);
output += t_tmp;
}
break;
}
default: {
// Deal with all read-and-print somethings
const int lbits = va_arg(ap, int);
QData ld = 0;
VlWide<VL_WQ_WORDS_E> qlwp;
WDataInP lwp = nullptr;
if (lbits <= VL_QUADSIZE) {
ld = VL_VA_ARG_Q_(ap, lbits);
VL_SET_WQ(qlwp, ld);
lwp = qlwp;
} else {
lwp = va_arg(ap, WDataInP);
ld = lwp[0];
}
int lsb = lbits - 1;
if (widthSet && width == 0) {
while (lsb && !VL_BITISSET_W(lwp, lsb)) --lsb;
}
switch (fmt) {
case 'c': {
const IData charval = ld & 0xff;
output += static_cast<char>(charval);
break;
}
case 's': {
std::string field;
for (; lsb >= 0; --lsb) {
lsb = (lsb / 8) * 8; // Next digit
const IData charval = VL_BITRSHIFT_W(lwp, lsb) & 0xff;
field += (charval == 0) ? ' ' : charval;
}
std::string padding;
if (width > field.size()) padding.append(width - field.size(), ' ');
output += left ? (field + padding) : (padding + field);
break;
}
case 'd': { // Signed decimal
int digits = 0;
std::string append;
if (lbits <= VL_QUADSIZE) {
digits = VL_SNPRINTF(
t_tmp, VL_VALUE_STRING_MAX_WIDTH, "%" PRId64,
static_cast<vlsint64_t>(VL_EXTENDS_QQ(lbits, lbits, ld)));
append = t_tmp;
} else {
if (VL_SIGN_E(lbits, lwp[VL_WORDS_I(lbits) - 1])) {
VlWide<VL_VALUE_STRING_MAX_WIDTH / 4 + 2> neg;
VL_NEGATE_W(VL_WORDS_I(lbits), neg, lwp);
append = std::string{"-"} + VL_DECIMAL_NW(lbits, neg);
} else {
append = VL_DECIMAL_NW(lbits, lwp);
}
digits = append.length();
}
const int needmore = width - digits;
std::string padding;
if (needmore > 0) {
if (pctp && pctp[0] && pctp[1] == '0') { // %0
padding.append(needmore, '0'); // Pre-pad zero
} else {
padding.append(needmore, ' '); // Pre-pad spaces
}
}
output += left ? (append + padding) : (padding + append);
break;
}
case '#': { // Unsigned decimal
int digits = 0;
std::string append;
if (lbits <= VL_QUADSIZE) {
digits = VL_SNPRINTF(t_tmp, VL_VALUE_STRING_MAX_WIDTH, "%" PRIu64, ld);
append = t_tmp;
} else {
append = VL_DECIMAL_NW(lbits, lwp);
digits = append.length();
}
const int needmore = width - digits;
std::string padding;
if (needmore > 0) {
if (pctp && pctp[0] && pctp[1] == '0') { // %0
padding.append(needmore, '0'); // Pre-pad zero
} else {
padding.append(needmore, ' '); // Pre-pad spaces
}
}
output += left ? (append + padding) : (padding + append);
break;
}
case 't': { // Time
if (!widthSet) width = Verilated::threadContextp()->impp()->timeFormatWidth();
const int timeunit = va_arg(ap, int);
output += _vl_vsformat_time(t_tmp, ld, timeunit, left, width);
break;
}
case 'b':
for (; lsb >= 0; --lsb) output += (VL_BITRSHIFT_W(lwp, lsb) & 1) + '0';
break;
case 'o':
for (; lsb >= 0; --lsb) {
lsb = (lsb / 3) * 3; // Next digit
// Octal numbers may span more than one wide word,
// so we need to grab each bit separately and check for overrun
// Octal is rare, so we'll do it a slow simple way
output += static_cast<char>(
'0' + ((VL_BITISSETLIMIT_W(lwp, lbits, lsb + 0)) ? 1 : 0)
+ ((VL_BITISSETLIMIT_W(lwp, lbits, lsb + 1)) ? 2 : 0)
+ ((VL_BITISSETLIMIT_W(lwp, lbits, lsb + 2)) ? 4 : 0));
}
break;
case 'u':
case 'z': { // Packed 4-state
const bool is_4_state = (fmt == 'z');
output.reserve(output.size() + ((is_4_state ? 2 : 1) * VL_WORDS_I(lbits)));
int bytes_to_go = VL_BYTES_I(lbits);
int bit = 0;
while (bytes_to_go > 0) {
const int wr_bytes = std::min(4, bytes_to_go);
for (int byte = 0; byte < wr_bytes; byte++, bit += 8)
output += static_cast<char>(VL_BITRSHIFT_W(lwp, bit) & 0xff);
output.append(4 - wr_bytes, static_cast<char>(0));
if (is_4_state) output.append(4, static_cast<char>(0));
bytes_to_go -= wr_bytes;
}
break;
}
case 'v': // Strength; assume always strong
for (lsb = lbits - 1; lsb >= 0; --lsb) {
if (VL_BITRSHIFT_W(lwp, lsb) & 1) {
output += "St1 ";
} else {
output += "St0 ";
}
}
break;
case 'x':
for (; lsb >= 0; --lsb) {
lsb = (lsb / 4) * 4; // Next digit
const IData charval = VL_BITRSHIFT_W(lwp, lsb) & 0xf;
output += "0123456789abcdef"[charval];
}
break;
default: { // LCOV_EXCL_START
const std::string msg = std::string{"Unknown _vl_vsformat code: "} + pos[0];
VL_FATAL_MT(__FILE__, __LINE__, "", msg.c_str());
break;
} // LCOV_EXCL_STOP
} // switch
}
} // switch
}
}
}
static inline bool _vl_vsss_eof(FILE* fp, int floc) VL_MT_SAFE {
if (VL_LIKELY(fp)) {
return std::feof(fp) ? true : false; // true : false to prevent MSVC++ warning
} else {
return floc < 0;
}
}