forked from geckom/ChatScript
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscriptCompile.cpp
5756 lines (5387 loc) · 214 KB
/
scriptCompile.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#include "common.h"
//------------------------
// ALWAYS AVAILABLE
//------------------------
static unsigned int undefinedCallThreadList = 0;
static int complexity = 0;
static int priorLine = 0;
bool autoset = false;
static char macroName[MAX_WORD_SIZE];
static uint64 macroid;
char* dataBase = NULL;
static char* dataChunk = NULL;
static char* outputStart = NULL;
static char* lineStart = NULL;
static bool globalBotScope = false;
char* newBuffer = NULL;
static char* oldBuffer = NULL;
static char display[MAX_DISPLAY][100];
static int displayIndex = 0;
static char* incomingPtrSys = 0; // cache AFTER token find ptr when peeking.
static char lookaheadSys[MAX_WORD_SIZE]; // cache token found when peeking
static unsigned int hasWarnings; // number of warnings generated
unsigned int hasErrors;
uint64 grade = 0; // vocabulary warning
char* lastDeprecation = 0;
bool compiling = false; // script compiler in progress
bool patternContext = false; // current compiling a pattern
unsigned int buildId; // current build
static int callingSystem = 0;
static bool chunking = false;
static unsigned int substitutes;
static unsigned int cases;
static unsigned int badword;
static unsigned int functionCall;
static bool isDescribe = false;
#define MAX_WARNINGS 200
static char warnings[MAX_WARNINGS][MAX_WORD_SIZE];
static unsigned int warnIndex = 0;
static char baseName[SMALL_WORD_SIZE];
#define MAX_ERRORS 200
static char errors[MAX_ERRORS][MAX_WORD_SIZE];
static unsigned int errorIndex = 0;
static char functionArguments[MAX_ARGUMENT_COUNT+1][500];
static int functionArgumentCount = 0;
char botheader[MAX_WORD_SIZE];
static bool renameInProgress = false;
static bool endtopicSeen = false; // needed when ending a plan
unsigned int buildID = 0;
static char* topicFiles[] = // files created by a topic refresh from scratch
{
(char*)"describe", // document variables functions concepts topics etc
(char*)"facts", // hold facts
(char*)"keywords", // holds topic and concepts keywords
(char*)"macros", // holds macro definitions
(char*)"map", // where things are defined
(char*)"script", // hold topic definitions
(char*)"plans", // hold plan definitions
(char*)"patternWords", // things we want to detect in patterns that may not be normal words
(char*)"dict", // dictionary changes
(char*)"private", // private substitutions changes
(char*)"canon", // private canonical values
0
};
static void WritePatternWord(char* word);
static void WriteKey(char* word);
static FILE* mapFile = NULL; // for IDE
void InitScriptSystem()
{
mapFile = NULL;
outputStart = NULL;
}
void AddWarning(char* buffer)
{
sprintf(warnings[warnIndex++],(char*)"line %d of %s: %s",currentFileLine,currentFilename,buffer);
if (strstr(warnings[warnIndex-1],(char*)"is not a known word")) {++badword;}
else if (strstr(warnings[warnIndex-1],(char*)" changes ")) {++substitutes;}
else if (strstr(warnings[warnIndex-1],(char*)"is unknown as a word")) {++badword;}
else if (strstr(warnings[warnIndex-1],(char*)"in opposite case")){++cases;}
else if (strstr(warnings[warnIndex-1],(char*)"a function call")){++functionCall;}
if (warnIndex >= MAX_WARNINGS) --warnIndex;
}
void ScriptWarn()
{
if (compiling)
{
++hasWarnings;
if (*currentFilename)
{
if (*botheader) Log(STDTRACELOG, (char*)"*** Warning- line %d of %s bot:%s : ", currentFileLine, currentFilename, botheader);
else Log(STDTRACELOG, (char*)"*** Warning- line %d of %s: ", currentFileLine, currentFilename);
}
else Log(STDTRACELOG, (char*)"*** Warning- ");
}
}
void AddError(char* buffer)
{
sprintf(errors[errorIndex++],(char*)"line %d of %s: %s",currentFileLine,currentFilename,buffer);
if (errorIndex >= MAX_ERRORS) --errorIndex;
}
void ScriptError()
{
callingSystem = 0;
chunking = false;
outputStart = NULL;
renameInProgress = false;
if (compiling)
{
++hasErrors;
patternContext = false;
Log(STDTRACELOG,(char*)"*** Error- line %d of %s: ",currentFileLine,currentFilename);
}
}
static void AddDisplay(char* word)
{
MakeLowerCase(word);
for (int i = 0; i < displayIndex; ++i)
{
if (!strcmp(word,display[i])) return; // no duplicates needed
}
strcpy(display[displayIndex],word);
if (++displayIndex >= MAX_DISPLAY) BADSCRIPT("Display argument limited to %d: %s\r\n",MAX_DISPLAY,word)
}
static char* ReadDisplay(FILE* in, char* ptr)
{
char word[SMALL_WORD_SIZE];
ptr = ReadNextSystemToken(in,ptr,word,false);
while (1)
{
ptr = ReadNextSystemToken(in,ptr,word,false);
if (*word == ')') break;
if (*word != USERVAR_PREFIX)
BADSCRIPT("Display argument must be uservar of $$ $ or $_: %s\r\n",word)
if (strchr(word,'.'))
BADSCRIPT("Display argument cannot be dot-selected %s\r\n",word)
AddDisplay(word); // explicit display
}
return ptr;
}
static char* WriteDisplay(char* pack)
{
*pack++ = '(';
*pack++ = ' ';
if (displayIndex) // show and merge in the new stuff
{
for (int i = 0; i < displayIndex; ++i)
{
strcpy(pack,display[i]);
pack += strlen(pack);
*pack++ = ' ';
}
displayIndex = 0;
}
*pack++ = ')';
*pack++ = ' ';
*pack = 0;
return pack;
}
void EraseTopicFiles(unsigned int build,char* name)
{
int i = -1;
while (topicFiles[++i])
{
char file[SMALL_WORD_SIZE];
sprintf(file,(char*)"%s/%s%s.txt",topic,topicFiles[i],name);
remove(file);
sprintf(file,(char*)"%s/BUILD%s/%s%s.txt",topic,name,topicFiles[i],name);
remove(file);
}
}
static char* FindComparison(char* word)
{
if (!*word || !word[1] || !word[2]) return NULL; // if token is short, we cannot do the below word+1 scans
if (*word == '.') return NULL; // .<_3 is not a comparison
if (*word == '\\') return NULL; // escaped is not a comparison
if (*word == '!' && word[1] == '?' && word[2] == '$') return NULL;
if (*word == '_' && word[1] == '?' && word[2] == '$') return NULL;
if (*word == '?' && word[1] == '$') return NULL;
char* at = strchr(word+1,'!');
if (at && *word == '!') at = NULL; // ignore !!
if (!at) at = strchr(word+1,'<');
if (!at) at = strchr(word+1,'>');
if (!at)
{
at = strchr(word+1,'&');
if (at && (at[1] == '_' || at[1] == ' ')) at = 0; // ignore & as part of a name
}
if (!at) at = strchr(word+1,'=');
if (!at) at = strchr(word+1,'?'); // member of set
if (!at)
{
at = strchr(word+1,'!'); // negation
if (at && (at[1] == '=' || at[1] == '?'));
else at = NULL;
}
return at;
}
static void AddMapOutput(int line)
{
// if we are mapping (:build) and have started output and some data storage change has happened
if (mapFile && dataBase && lineStart != dataChunk && strnicmp(macroName, "^tbl:", 5))
{
*dataChunk = 0;
char src[MAX_WORD_SIZE];
strncpy(src, lineStart, 30);
src[30] = 0;
fprintf(mapFile, (char*)" line: %d %d # %s\r\n", line, (int)(lineStart - dataBase),src); // readBuffer
}
lineStart = dataChunk; // used to detect new line needs tracking
}
char* ReadNextSystemToken(FILE* in,char* ptr, char* word, bool separateUnderscore, bool peek)
{
#ifdef INFORMATION
The outside can ask for the next real token or merely peek ahead one token. And sometimes the outside
after peeking, decides it wants to back up a real token (passing it to some other processor).
To support backing up a real token, the system must keep the current readBuffer filled with the data that
led to that token (to allow a ptr - strlen(word) backup).
To support peeking, the system may have to read a bunch of lines in to find a token. It is going to need to
track that buffer separately, so when it needs a real token which was the peek, it can both get the peek value
and be using contents of the new buffer thereafter.
So peeks must never touch the real readBuffer. And real reads must know whether the last token was peeked
and from which buffer it was peeked.
And, if someone wants to back up to allow the old token to be reread, they have to CANCEL any peek data, so the token
comes from the old buffer. Meanwhile the newbuffer continues to have content for when the old buffer runs out.
#endif
int line = currentFileLine;
// clear peek cache
if (!in && !ptr) // clear cache request, next get will be from main buffer (though secondary buffer may still have peek read data)
{
if (word) *word = 0;
incomingPtrSys = NULL; // no longer holding a PEEK value.
return NULL;
}
char* result = NULL;
if (incomingPtrSys ) // had a prior PEEK, now in cache. use up cached value, unless duplicate peeking
{
result = incomingPtrSys; // caller who is peeking will likely ignore this
if (!peek)
{
currentFileLine = maxFileLine; // revert to highest read
// he wants reality now...
if (newBuffer && *newBuffer) // prior peek was from this buffer, make it real data in real buffer
{
strcpy(readBuffer,newBuffer);
result = (result - newBuffer) + readBuffer; // adjust pointer to current buffer
*newBuffer = 0;
}
strcpy(word,lookaheadSys);
incomingPtrSys = 0;
}
else
{
strcpy(word,lookaheadSys); // duplicate peek
result = (char*)1; // NO ONE SHOULD KEEP A PEEKed PTR
}
return result;
}
*word = 0;
if (ptr) result = ReadSystemToken(ptr,word,separateUnderscore);
bool newline = false;
while (!*word) // found no token left in existing buffer - we have to juggle buffers now unless running overwrite
{
if (!newline && newBuffer && *newBuffer) // use pre-read buffer per normal, it will have a token
{
strcpy(readBuffer,newBuffer);
*newBuffer = 0;
result = ReadSystemToken(readBuffer,word,separateUnderscore);
break;
}
else // read new line into hypothetical buffer, not destroying old actual buffer yet
{
if (!in || ReadALine(newBuffer,in) < 0) return NULL; // end of file
if (!strnicmp(newBuffer,(char*)"#ignore",7)) // hit an ignore zone
{
unsigned int ignoreCount = 1;
while (ReadALine(newBuffer,in) >= 0)
{
if (!strnicmp(newBuffer,(char*)"#ignore",7)) ++ignoreCount;
else if (!strnicmp(newBuffer,(char*)"#endignore",10))
{
if (--ignoreCount == 0)
{
if (ReadALine(newBuffer,in) < 0) return NULL; // EOF
break;
}
}
}
if (ignoreCount) return NULL; //EOF before finding closure
}
result = ReadSystemToken(newBuffer,word,separateUnderscore); // result is ptr into NEWBUFFER
newline = true;
}
}
if (peek) // save request - newBuffer has implied newline if any
{
incomingPtrSys = result; // next location in whatever buffer
strcpy(lookaheadSys,word); // save next token peeked
result = (char*)1; // NO ONE SHOULD KEEP A PEEKed PTR
currentFileLine = line; // claim old value
}
else if (newline && newBuffer) // live token from new buffer, adjust pointers and buffers to be fully up to date
{
strcpy(readBuffer,newBuffer);
result = (result - newBuffer) + readBuffer; // ptr into current readBuffer now
*newBuffer = 0;
}
return result; // ptr into READBUFFER or 1 if from peek zone
}
static void InsureAppropriateCase(char* word)
{
char c;
char* at = FindComparison(word);
// force to lower case various standard things
// topcs/sets/classes/user vars/ functions and function vars are always lower case
if (at) // a comparison has 2 sides
{
c = *at;
*at = 0;
InsureAppropriateCase(word);
if (at[1] == '=' || at[1] == '?') InsureAppropriateCase(at+2); // == or >= or such
else InsureAppropriateCase(at+1);
*at = c;
}
else if (*word == '_' || *word == '\'') InsureAppropriateCase(word+1);
else if (*word == USERVAR_PREFIX)
{
char* dot = strchr(word,'.');
if (dot) *dot = 0;
MakeLowerCase(word);
if (dot) *dot = '.';
}
else if ((*word == '^' && word[1] != '"') || *word == '~' || *word == SYSVAR_PREFIX || *word == '|' ) MakeLowerCase(word);
else if (*word == '@' && IsDigit(word[1])) MakeLowerCase(word); // potential factref like @2subject
}
static int GetFunctionArgument(char* arg) // get index of argument (0-based) if it is value, else -1
{
for (int i = 0; i < functionArgumentCount; ++i)
{
if (!stricmp(arg,functionArguments[i])) return i;
}
return -1; // failed
}
static void FindDeprecated(char* ptr, char* value, char* message)
{
char* comment = strstr(ptr,(char*)"# ");
char* at = ptr;
size_t len = strlen(value);
while (at)
{
at = strstr(at,value);
if (!at) break;
if (*(at-1) == USERVAR_PREFIX) // $$xxx should be ignored
{
at += 2;
continue;
}
if (comment && at > comment) return; // inside a comment
char word[MAX_WORD_SIZE];
ReadCompiledWord(at,word);
if (!stricmp(value,word))
{
lastDeprecation = at;
BADSCRIPT(message);
}
at += len;
}
}
char* ReadSystemToken(char* ptr, char* word, bool separateUnderscore) // how we tokenize system stuff (rules and topic system) words -remaps & to AND
{
*word = 0;
if (!ptr) return 0;
char tmp[MAX_WORD_SIZE];
char* start = word;
ptr = SkipWhitespace(ptr);
FindDeprecated(ptr,(char*)"$bot",(char*)"Deprecated $bot needs to be $cs_bot");
FindDeprecated(ptr,(char*)"$login",(char*)"Deprecated $login needs to be $cs_login");
FindDeprecated(ptr,(char*)"$userfactlimit",(char*)"Deprecated $userfactlimit needs to be $cs_userfactlimit");
FindDeprecated(ptr,(char*)"$crashmsg",(char*)"Deprecated $crashmsg needs to be $cs_crashmsg");
FindDeprecated(ptr,(char*)"$token",(char*)"Deprecated $token needs to be $cs_token");
FindDeprecated(ptr,(char*)"$response",(char*)"Deprecated $response needs to be $cs_response");
FindDeprecated(ptr,(char*)"$randindex",(char*)"Deprecated $randindex needs to be $cs_randindex");
FindDeprecated(ptr,(char*)"$wildcardseparator",(char*)"Deprecated $wildcardseparator needs to be $cs_wildcardseparator");
FindDeprecated(ptr,(char*)"$abstract",(char*)"Deprecated $abstract needs to be $cs_abstract");
FindDeprecated(ptr,(char*)"$prepass",(char*)"Deprecated $prepass needs to be $cs_prepass");
FindDeprecated(ptr,(char*)"$control_main",(char*)"Deprecated $control_main needs to be $cs_control_main");
FindDeprecated(ptr,(char*)"$control_pre",(char*)"Deprecated $control_pre needs to be $cs_control_pre");
FindDeprecated(ptr,(char*)"$control_post",(char*)"Deprecated $control_post needs to be $cs_control_post");
#ifdef INFORMATION
A token is nominally a contiguous collection of characters broken off by tab or space (since return and newline are stripped off).
Tokens to include whitespace are encased in doublequotes.
Characters with reserved status automatically also break into individual tokens and to include them you must put \ before them. These include:
[ ] ( ) { } always and separate into individual tokens except for _( _[ _{
< > and << >> are reserved, but only when at start or end of token. Allowed comparisons embedded. As is <= and >=
Tokens ending with ' or 's break off (possessive) in patterns.
Tokens starting with prefix characters ' or ! or _ keep together, except per reserved tokens. '$junk is one token.
Variables ending with punctuation separate the punctuation. $hello. is two tokens as is _0.
Reserved characters in a composite token with _ before or after are kept. E.g. This_(_story_is_)_done
You can include a reserved tokens by putting \ in front of them.
Some tokens revise their start, like the pattern tokens representing comparison. They do this in the script compiler.
#endif
// strings
if (*ptr == '"' || ( *ptr == '^' && ptr[1] == '"') || ( *ptr == '^' && ptr[1] == '\'') || (*ptr == '\\' && ptr[1] == '"')) // doublequote maybe with functional heading
{
// simple \"
if (*ptr == '\\' && (!ptr[2] || ptr[2] == ' ' || ptr[2] == '\t' || ptr[2] == ENDUNIT)) // legal
{
*word = '\\';
word[1] = '"';
word[2] = 0;
return ptr+2;
}
bool backslash = false;
bool noblank = true;
bool functionString = false;
if (*ptr == '^')
{
*word++ = *ptr++; // ^"script" swallows ^
noblank = false; // allowed blanks at start or rear
functionString = true;
}
else if (*ptr == '\\') // \"string is this"
{
backslash = true;
++ptr;
}
char* end = ReadQuote(ptr,word,backslash,noblank,MAX_WORD_SIZE); // swallow ending marker and points past
if (!callingSystem && !isDescribe && !chunking && !functionString && *word == '"' && word[1] != '^' && strstr(word,"$_"))
WARNSCRIPT((char*)"%s has potential local var $_ in it. This cannot be passed as argument to user macros. Is it intended to be?\r\n",word)
if (end)
{
if (*word == '"' && word[1] != FUNCTIONSTRING && !functionString) return end; // all legal within
// NOW WE SEE A FUNCTION STRING
// when seeing ^, see if it remaps as a function argument
// check for internal ^ also...
char* hat = word-1;
if ((*word == '"' || *word == '\'') && functionString) hat = word; // came before
else if (*word == '"' && word[1] == FUNCTIONSTRING) hat = word+1;
else if ((word[1] == '"' || word[1] == '\'') && *word == FUNCTIONSTRING) hat = word;
// locate any local variable references in active strings
char* at = word;
while ((at = strchr(at,USERVAR_PREFIX)))
{
if (at[1] == LOCALVAR_PREFIX)
{
char* start = at;
while (++at)
{
if (!IsAlphaUTF8OrDigit(*at) && *at != '_' && *at != '-')
{
char c = *at;
*at = 0;
AddDisplay(start);
*at = c;
break;
}
}
}
else ++at;
}
while ( (hat = strchr(hat+1,'^'))) // find a hat within
{
if (IsDigit(hat[1])) continue; // normal internal
if (*(hat-1) == '\\') continue; // escaped
char* at = hat;
while (*++at && (IsAlphaUTF8OrDigit(*at) || *at == '_')){;}
char c = *at;
*at = 0;
int index = GetFunctionArgument(hat);
WORDP D = FindWord(hat); // in case its a function name
*at = c;
if (index >= 0) // was a function argument
{
strcpy(tmp,at); // protect chunk
sprintf(hat,(char*)"^%d%s",index,tmp);
}
else if (D && D->internalBits & FUNCTION_NAME){;}
else if (!renameInProgress && !(hat[1] == USERVAR_PREFIX || hat[1] == '_'))
{
*at = 0;
WARNSCRIPT((char*)"%s is not a recognized function argument. Is it intended to be?\r\n",hat)
*at = c;
}
}
hat = word-1;
while ((hat = strchr(hat+1,'_'))) // rename _var?
{
if (IsAlphaUTF8OrDigit(*(hat-1) ) || *(hat-1) == '_' || *(hat-1) == '-') continue; // not a starter
if (IsDigit(hat[1])) continue; // normal _ var
if (*(hat-1) == '\\' || *(hat-1) == '"') continue; // escaped or quoted
char* at = hat;
while (*++at && (IsAlphaUTF8OrDigit(*at))){;} // find end
WORDP D = FindWord(hat,at-hat,LOWERCASE_LOOKUP);
if (D && D->internalBits & RENAMED) // remap matchvar inside string
{
strcpy(tmp,at); // protect chunk
sprintf(hat+1,(char*)"%d%s",(unsigned int)D->properties,tmp);
}
}
hat = word-1;
while ((hat = strchr(hat+1,'@'))) // rename @set?
{
if (IsAlphaUTF8OrDigit(*(hat-1) )) continue; // not a starter
if (IsDigit(hat[1]) || hat[1] == '_') continue; // normal @ var or @_marker
if (*(hat-1) == '\\') continue; // escaped
char* at = GetSetEnd(hat);
WORDP D = FindWord(hat,at-hat,LOWERCASE_LOOKUP);
if (D && D->internalBits & RENAMED) // rename @set inside string
{
strcpy(tmp,at); // protect chunk
sprintf(hat+1,(char*)"%d%s",(unsigned int)D->properties,tmp);
}
else if (!renameInProgress) // can do anything safely in a simple quoted string
{
char c = *at;
*at = 0;
WARNSCRIPT((char*)"%s is not a recognized @rename. Is it intended to be?\r\n",hat)
*at = c;
}
}
hat = word-1;
if (strstr(readBuffer, "rename:")) // accept rename of existing constant twice in a row
hat = " ";
while ((hat = strchr(hat+1,'#'))) // rename #constant or ##constant
{
if (*(hat-1) == '\\') continue; // escaped
if (IsAlphaUTF8OrDigit(*(hat-1) )) continue; // not a starter
char* at = hat;
if (at[1] == '#') ++at; // user constant
while (*++at && (IsAlphaUTF8OrDigit(*at) || *at == '_')){;} // find end
strcpy(tmp,at); // protect chunk
*at = 0;
uint64 n;
if (hat[1] == '#' && IsAlphaUTF8(hat[2])) // user constant
{
WORDP D = FindWord(hat,at-hat,LOWERCASE_LOOKUP);
if (D && D->internalBits & RENAMED) // remap #constant inside string
{
n = D->properties;
if (D->systemFlags & CONSTANT_IS_NEGATIVE)
{
int64 x = (int64) n;
x = -x;
#ifdef WIN32
sprintf(hat,(char*)"%I64d%s",(long long int) x,tmp);
#else
sprintf(hat,(char*)"%lld%s",(long long int) x,tmp);
#endif
}
else
{
#ifdef WIN32
sprintf(hat,(char*)"%I64d%s",(long long int) n,tmp);
#else
sprintf(hat,(char*)"%lld%s",(long long int) n,tmp);
#endif
}
}
}
else // system constant
{
n = FindValueByName(hat+1);
if (!n) n = FindSystemValueByName(hat+1);
if (!n) n = FindParseValueByName(hat+1);
if (!n) n = FindMiscValueByName(hat+1);
if (n)
{
#ifdef WIN32
sprintf(hat,(char*)"%I64d%s",(long long int) n,tmp);
#else
sprintf(hat,(char*)"%lld%s",(long long int) n,tmp);
#endif
}
}
if (!*hat)
{
*hat = '#';
BADSCRIPT((char*)"Bad # constant %s\r\n",hat)
}
}
return end; // if we did swallow a string
}
if (*ptr == '\\') // was this \"xxx with NO closing
{
memmove(word+1,word,strlen(word)+1);
*word = '\\';
}
else
{
word = start;
if (*start == '^') --ptr;
}
}
// the normal composite token
bool quote = false;
char* xxorig = ptr;
bool var = (*ptr == '$');
int brackets = 0;
while (*ptr)
{
if (*ptr == ENDUNIT) break;
if (patternContext && quote) {} // allow stuff in comparison quote
else if (*ptr == ' ' || *ptr == '\t') break; // legal
if (patternContext && *ptr == '"') quote = !quote;
char c = *ptr++;
*word++ = c;
*word = 0;
if ((word - start) > (MAX_WORD_SIZE - 2)) break; // avoid overflow
// want to leave array json notation alone but react to [...] touching a variable - $var]
if (var && c == '[') // ANY variable should be separated by space from a [ if not json array
{
++brackets; // this MUST then be a json array and brackets will balance
if (brackets > 1) BADSCRIPT("$var MUST be separated from [ unless you intend json array reference\r\n")
}
else if (var && c == ']')
{
if (--brackets < 0) // if brackets is set, we must be in json array
{
--ptr;
--word;
break;
}
}
else if (GetNestingData(c)) // break off nesting attached to a started token unless its an escaped token
{
size_t len = word - start;
if (len == 1) break; // automatically token by itself
if (len == 2)
{
if ((*start == '_' || *start == '!') && (c == '[' || c == '(' || c == '{')) break; // one token as _( or !(
if (*start == '\\') break; // one token escaped
}
// split off into two tokens
--ptr;
--word;
break;
}
}
*word = 0;
word = start;
size_t len = strlen(word);
if (len == 0) return ptr;
if (patternContext && word[len - 1] == '"' && word[len - 2] != '\\')
{
char* quote = strchr(word, '"');
if (quote == word+len-1) BADSCRIPT("Tailing quote without start: %s\r\n", word)
}
if (*word == '#' && !strstr(readBuffer,"rename:")) // is this a constant from dictionary.h? or user constant
{
uint64 n;
if (word[1] == '#' && IsAlphaUTF8(word[2])) // user constant
{
WORDP D = FindWord(word,0,LOWERCASE_LOOKUP);
if (D && D->internalBits & RENAMED) // remap #constant
{
n = D->properties;
if (D->systemFlags & CONSTANT_IS_NEGATIVE)
{
int64 x = (int64) n;
x = -x;
#ifdef WIN32
sprintf(word,(char*)"%I64d",(long long int) x);
#else
sprintf(word,(char*)"%lld",(long long int) x);
#endif
}
else
{
#ifdef WIN32
sprintf(word,(char*)"%I64d",(long long int) n);
#else
sprintf(word,(char*)"%lld",(long long int) n);
#endif
}
}
else if (renameInProgress) {;} // leave token alone, defining
else BADSCRIPT((char*)"Bad user constant %s\r\n",word)
}
else // system constant
{
n = FindValueByName(word+1);
if (!n) n = FindSystemValueByName(word+1);
if (!n) n = FindParseValueByName(word+1);
if (!n) n = FindMiscValueByName(word+1);
if (n)
{
#ifdef WIN32
sprintf(word,(char*)"%I64d",(long long int) n);
#else
sprintf(word,(char*)"%lld",(long long int) n);
#endif
}
else if (!IsDigit(word[1]) && word[1] != '!') //treat rest as a comment line (except if has number after it, which is user text OR internal arg reference for function
{
if (IsAlphaUTF8(word[1]))
BADSCRIPT((char*)"Bad numeric # constant %s\r\n",word)
*ptr = 0;
*word = 0;
}
}
}
if ( *word == '_' && (IsAlphaUTF8(word[1]) ) ) // is this a rename _
{
WORDP D = FindWord(word);
if (D && D->internalBits & RENAMED) sprintf(word+1,(char*)"%d",(unsigned int)D->properties); // remap match var convert to number
// patterns can underscore ANYTING
}
if (*word == '\'' && word[1] == '_' && (IsAlphaUTF8(word[2]) ) ) // is this a rename _ with '
{
WORDP D = FindWord(word+1);
if (D && D->internalBits & RENAMED) sprintf(word+2,(char*)"%d",(unsigned int)D->properties); // remap match var convert to number
else if (!renameInProgress && !patternContext) // patterns can underscore ANYTING
WARNSCRIPT((char*)"%s is not a recognized _rename. Should it be?\r\n",word+1)
}
if ( *word == '@' && IsAlphaUTF8(word[1]) ) // is this a rename @
{
char* at = GetSetEnd(word);
WORDP D = FindWord(word,at-word);
if (D && D->internalBits & RENAMED) // remap @set in string
{
strcpy(tmp,at); // protect chunk
sprintf(word+1,(char*)"%d%s",(unsigned int)D->properties,tmp);
}
else if (!renameInProgress) WARNSCRIPT((char*)"%s is not a recognized @rename. Is it intended to be?\r\n",word)
}
if ( *word == '@' && word[1] == '_' && IsAlphaUTF8(word[2])) // is this a rename @_0+
{
size_t len = strlen(word);
WORDP D = FindWord(word + 1, len - 1); // @_data marker
char c = 0;
if (!D)
{
c = word[len - 1];
word[len - 1] = 0;
D = FindWord(word + 1, len - 2);
word[len - 1] = c;
}
if (D && D->internalBits & RENAMED)
{
if (c) sprintf(word+2,(char*)"%d%c",(unsigned int)D->properties,c); // remap @set in string
else sprintf(word + 2, (char*)"%d", (unsigned int)D->properties); // remap @set in string
}
else if (!renameInProgress) WARNSCRIPT((char*)"%s is not a recognized @rename. Is it intended to be?\r\n",word)
}
// some tokens require special splitting
// break off starting << from <<hello
if (*word == '<' && word[1] != '=')
{
if (len == 3 && *word == word[1] && word[2] == '=') {;}
else if (word[1] == '<')
{
if (word[2]) // not assign operator
{
ptr -= strlen(word) - 2; // safe
word[2] = 0;
len -= 2;
}
}
}
// break off ending >> from hello>>
if (len > 2 && word[len-1] == '>')
{
if (len == 3 && *word == word[1] && word[2] == '=') {;}
else if (word[len-2] == '>')
{
ptr -= 2;
word[len-2] = 0;
len -= 2;
}
}
// break off punctuation from variable end
if (len > 2 && ((*word == USERVAR_PREFIX && !IsDigit(word[1])) || *word == '^' || (*word == '@' && IsDigit(word[1])) || *word == SYSVAR_PREFIX || (*word == '_' && IsDigit(word[1])) || (*word == '\'' && word[1] == '_'))) // not currency
{
if (!patternContext || word[len-1] != '?') // BUT NOT $$xxx? in pattern context
{
while (IsRealPunctuation(word[len-1])) // one would be enough, but $hello... needs to be addressed
{
--len;
--ptr;
}
word[len] = 0;
}
}
// break off opening < in pattern
if (patternContext && *word == '<' && word[1] != '<')
{
ptr -= len - 1;
len = 1;
word[1] = 0;
}
// break off closing > in pattern unless escaped or notted
if (len == 2 && (*word == '!' || *word == '\\')){;}
else if (patternContext && len > 1 && word[len-1] == '>' && word[len-2] != '>' && word[len-2] != '_' && word[len-2] != '!')
{
ptr -= len - 1;
--len;
word[len-1] = 0;
}
// find internal comparison op if any
char* at = (patternContext) ? FindComparison(word) : 0;
if (at && *word == '*' && !IsDigit(word[1]))
{
if (compiling) BADSCRIPT((char*)"TOKENS-1 Cannot do comparison on variable gap %s . Memorize and compare against _# instead later.\r\n",word)
}
if (at && *at == '!' && at[1] == '$') { ; } // allow !$xxx
else if (at) // revise comparison operators
{
if (*at == '!') ++at;
++at;
if (*at == '^' && at[1]) // remap function arg on right side.
{
int index = GetFunctionArgument(at);
if (index >= 0) sprintf(at,(char*)"^%d",index);
}
if (*at == '_' && IsAlphaUTF8(word[1]) ) // remap rename matchvar arg on right side.
{
WORDP D = FindWord(at);
if (D && D->internalBits & RENAMED) sprintf(at,(char*)"_%d",(unsigned int)D->properties);
}
if (*at == '@' && IsAlphaUTF8(word[1]) ) // remap @set arg on right side.
{
char* at1 = GetSetEnd(at);
WORDP D = FindWord(at,at1-at);
if (D && D->internalBits & RENAMED) // remap @set on right side
{
strcpy(tmp,at1); // protect chunk
sprintf(at+1,(char*)"%d%s",(unsigned int)D->properties,tmp);
}
}
// check for remap on LHS
if (*word == '^')
{
char c = *--at;
*at = 0;
int index = GetFunctionArgument(word);
*at = c;
if (index >= 0)
{
sprintf(tmp,(char*)"^%d%s",index,at);
strcpy(word,tmp);
}
}
// check for rename on LHS
if (*word == '_' && IsAlphaUTF8(word[1]) )
{
char* at = word;
while (IsAlphaUTF8OrDigit(*++at)){;}
WORDP D = FindWord(word,at-word);
if (D && D->internalBits & RENAMED) // remap match var
{
sprintf(tmp,(char*)"%d%s",(unsigned int)D->properties,at);
strcpy(word+1,tmp);
}
}
// check for rename on LHS
if (*word == '@' && IsAlphaUTF8(word[1]) )
{
char* at = GetSetEnd(word);
WORDP D = FindWord(word,at-word);
if (D && D->internalBits & RENAMED) // remap @set in string
{
strcpy(tmp,at); // protect chunk
sprintf(word+1,(char*)"%d%s",(unsigned int)D->properties,tmp);
}
}
}
// when seeing ^, see if it remaps as a function argument
// check for internal ^ also...
char* hat = word-1;
while ( (hat = strchr(hat+1,'^'))) // find a hat within
{
char* at = hat;
while (*++at && (IsAlphaUTF8(*at) || *at == '_' || IsDigit(*at))){;}
char c = *at;
*at = 0; // terminate it so internal ^ is recognized uniquely
strcpy(tmp,hat);
*at = c;
while (*tmp)
{
int index = GetFunctionArgument(tmp);
if (index >= 0)
{
char remainder[MAX_WORD_SIZE];
strcpy(remainder,at); // protect chunk AFTER this
sprintf(hat,(char*)"^%d%s",index,remainder);
break;
}
else tmp[0] = 0; // just abort it for now shrink it smaller, to handle @9subject kinds of behaviors
}
}
// same for quoted function arg
if (*word == '\'' && word[1] == '^' && word[2])
{
int index = GetFunctionArgument(word+1);
if (index >= 0) sprintf(word,(char*)"'^%d",index);
}
// break apart math on variables eg $value+2 as a service to the user
if ((*word == '%' || *word == '$') && word[1]) // cannot use _ here as that will break memorization pattern tokens
{
char* at = word + 1;
if (at[1] == '$' || at[1] == '_') ++at; // skip over 2ndary marker
--at;
while (LegalVarChar(*++at) ); // find end of initial word
if (*word == '$' && (*at == '.' || *at == '[' || *at == ']') && (LegalVarChar(at[1]) || at[1] == '$' || at[1] == '[' || at[1] == ']'))// allow $x.y as a complete name
{
while (LegalVarChar(*++at) || *at == '.' || *at == '$' || (*at == '[' || *at == ']') ); // find end of field name sequence
if (*(at-1) == '.') --at; // tailing period cannot be part of it
}
if (*at && IsPunctuation(*at) & ARITHMETICS && *at != '=')
{
// - is legal in a var or word token
if (*at != '-' || (!IsAlphaUTF8OrDigit(at[1]) && at[1] != '_'))
{
ptr -= strlen(at);
*at = 0;
len = at - start;
}
}
}
char* tilde = (IsAlphaUTF8(*word)) ? strchr(word+1,'~') : 0;
if (tilde) // has specific meaning like African-american~1n or African-american~1
{
if (IsDigit(*++tilde)) // we know the meaning, removing any POS marker since that is redundant
{
if (IsDigit(*++tilde)) ++tilde;
if (*tilde && !tilde[1]) *tilde = 0; // trim off pos marker
// now force meaning to master
MEANING M = ReadMeaning(word,true,false);
if (M)
{
M = GetMaster(M);
sprintf(word,(char*)"%s~%d",Meaning2Word(M)->word,Meaning2Index(M));
}
}
}
// universal cover of simple use - complex tokens require processing elsewhere
if (*word == USERVAR_PREFIX && word[1] == LOCALVAR_PREFIX)
{