forked from meteor/meteor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.js
420 lines (382 loc) · 23.8 KB
/
lexer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
var regexEscape = function (str) {
return str.replace(/[\][^$\\.*+?(){}|]/g, '\\$&');
};
// Adapted from source code of http://xregexp.com/plugins/#unicode
var unicodeCategories = {
Ll: "0061-007A00B500DF-00F600F8-00FF01010103010501070109010B010D010F01110113011501170119011B011D011F01210123012501270129012B012D012F01310133013501370138013A013C013E014001420144014601480149014B014D014F01510153015501570159015B015D015F01610163016501670169016B016D016F0171017301750177017A017C017E-0180018301850188018C018D019201950199-019B019E01A101A301A501A801AA01AB01AD01B001B401B601B901BA01BD-01BF01C601C901CC01CE01D001D201D401D601D801DA01DC01DD01DF01E101E301E501E701E901EB01ED01EF01F001F301F501F901FB01FD01FF02010203020502070209020B020D020F02110213021502170219021B021D021F02210223022502270229022B022D022F02310233-0239023C023F0240024202470249024B024D024F-02930295-02AF037103730377037B-037D039003AC-03CE03D003D103D5-03D703D903DB03DD03DF03E103E303E503E703E903EB03ED03EF-03F303F503F803FB03FC0430-045F04610463046504670469046B046D046F04710473047504770479047B047D047F0481048B048D048F04910493049504970499049B049D049F04A104A304A504A704A904AB04AD04AF04B104B304B504B704B904BB04BD04BF04C204C404C604C804CA04CC04CE04CF04D104D304D504D704D904DB04DD04DF04E104E304E504E704E904EB04ED04EF04F104F304F504F704F904FB04FD04FF05010503050505070509050B050D050F05110513051505170519051B051D051F05210523052505270561-05871D00-1D2B1D6B-1D771D79-1D9A1E011E031E051E071E091E0B1E0D1E0F1E111E131E151E171E191E1B1E1D1E1F1E211E231E251E271E291E2B1E2D1E2F1E311E331E351E371E391E3B1E3D1E3F1E411E431E451E471E491E4B1E4D1E4F1E511E531E551E571E591E5B1E5D1E5F1E611E631E651E671E691E6B1E6D1E6F1E711E731E751E771E791E7B1E7D1E7F1E811E831E851E871E891E8B1E8D1E8F1E911E931E95-1E9D1E9F1EA11EA31EA51EA71EA91EAB1EAD1EAF1EB11EB31EB51EB71EB91EBB1EBD1EBF1EC11EC31EC51EC71EC91ECB1ECD1ECF1ED11ED31ED51ED71ED91EDB1EDD1EDF1EE11EE31EE51EE71EE91EEB1EED1EEF1EF11EF31EF51EF71EF91EFB1EFD1EFF-1F071F10-1F151F20-1F271F30-1F371F40-1F451F50-1F571F60-1F671F70-1F7D1F80-1F871F90-1F971FA0-1FA71FB0-1FB41FB61FB71FBE1FC2-1FC41FC61FC71FD0-1FD31FD61FD71FE0-1FE71FF2-1FF41FF61FF7210A210E210F2113212F21342139213C213D2146-2149214E21842C30-2C5E2C612C652C662C682C6A2C6C2C712C732C742C76-2C7B2C812C832C852C872C892C8B2C8D2C8F2C912C932C952C972C992C9B2C9D2C9F2CA12CA32CA52CA72CA92CAB2CAD2CAF2CB12CB32CB52CB72CB92CBB2CBD2CBF2CC12CC32CC52CC72CC92CCB2CCD2CCF2CD12CD32CD52CD72CD92CDB2CDD2CDF2CE12CE32CE42CEC2CEE2CF32D00-2D252D272D2DA641A643A645A647A649A64BA64DA64FA651A653A655A657A659A65BA65DA65FA661A663A665A667A669A66BA66DA681A683A685A687A689A68BA68DA68FA691A693A695A697A723A725A727A729A72BA72DA72F-A731A733A735A737A739A73BA73DA73FA741A743A745A747A749A74BA74DA74FA751A753A755A757A759A75BA75DA75FA761A763A765A767A769A76BA76DA76FA771-A778A77AA77CA77FA781A783A785A787A78CA78EA791A793A7A1A7A3A7A5A7A7A7A9A7FAFB00-FB06FB13-FB17FF41-FF5A",
Lm: "02B0-02C102C6-02D102E0-02E402EC02EE0374037A0559064006E506E607F407F507FA081A0824082809710E460EC610FC17D718431AA71C78-1C7D1D2C-1D6A1D781D9B-1DBF2071207F2090-209C2C7C2C7D2D6F2E2F30053031-3035303B309D309E30FC-30FEA015A4F8-A4FDA60CA67FA717-A71FA770A788A7F8A7F9A9CFAA70AADDAAF3AAF4FF70FF9EFF9F",
Lo: "00AA00BA01BB01C0-01C3029405D0-05EA05F0-05F20620-063F0641-064A066E066F0671-06D306D506EE06EF06FA-06FC06FF07100712-072F074D-07A507B107CA-07EA0800-08150840-085808A008A2-08AC0904-0939093D09500958-09610972-09770979-097F0985-098C098F09900993-09A809AA-09B009B209B6-09B909BD09CE09DC09DD09DF-09E109F009F10A05-0A0A0A0F0A100A13-0A280A2A-0A300A320A330A350A360A380A390A59-0A5C0A5E0A72-0A740A85-0A8D0A8F-0A910A93-0AA80AAA-0AB00AB20AB30AB5-0AB90ABD0AD00AE00AE10B05-0B0C0B0F0B100B13-0B280B2A-0B300B320B330B35-0B390B3D0B5C0B5D0B5F-0B610B710B830B85-0B8A0B8E-0B900B92-0B950B990B9A0B9C0B9E0B9F0BA30BA40BA8-0BAA0BAE-0BB90BD00C05-0C0C0C0E-0C100C12-0C280C2A-0C330C35-0C390C3D0C580C590C600C610C85-0C8C0C8E-0C900C92-0CA80CAA-0CB30CB5-0CB90CBD0CDE0CE00CE10CF10CF20D05-0D0C0D0E-0D100D12-0D3A0D3D0D4E0D600D610D7A-0D7F0D85-0D960D9A-0DB10DB3-0DBB0DBD0DC0-0DC60E01-0E300E320E330E40-0E450E810E820E840E870E880E8A0E8D0E94-0E970E99-0E9F0EA1-0EA30EA50EA70EAA0EAB0EAD-0EB00EB20EB30EBD0EC0-0EC40EDC-0EDF0F000F40-0F470F49-0F6C0F88-0F8C1000-102A103F1050-1055105A-105D106110651066106E-10701075-1081108E10D0-10FA10FD-1248124A-124D1250-12561258125A-125D1260-1288128A-128D1290-12B012B2-12B512B8-12BE12C012C2-12C512C8-12D612D8-13101312-13151318-135A1380-138F13A0-13F41401-166C166F-167F1681-169A16A0-16EA1700-170C170E-17111720-17311740-17511760-176C176E-17701780-17B317DC1820-18421844-18771880-18A818AA18B0-18F51900-191C1950-196D1970-19741980-19AB19C1-19C71A00-1A161A20-1A541B05-1B331B45-1B4B1B83-1BA01BAE1BAF1BBA-1BE51C00-1C231C4D-1C4F1C5A-1C771CE9-1CEC1CEE-1CF11CF51CF62135-21382D30-2D672D80-2D962DA0-2DA62DA8-2DAE2DB0-2DB62DB8-2DBE2DC0-2DC62DC8-2DCE2DD0-2DD62DD8-2DDE3006303C3041-3096309F30A1-30FA30FF3105-312D3131-318E31A0-31BA31F0-31FF3400-4DB54E00-9FCCA000-A014A016-A48CA4D0-A4F7A500-A60BA610-A61FA62AA62BA66EA6A0-A6E5A7FB-A801A803-A805A807-A80AA80C-A822A840-A873A882-A8B3A8F2-A8F7A8FBA90A-A925A930-A946A960-A97CA984-A9B2AA00-AA28AA40-AA42AA44-AA4BAA60-AA6FAA71-AA76AA7AAA80-AAAFAAB1AAB5AAB6AAB9-AABDAAC0AAC2AADBAADCAAE0-AAEAAAF2AB01-AB06AB09-AB0EAB11-AB16AB20-AB26AB28-AB2EABC0-ABE2AC00-D7A3D7B0-D7C6D7CB-D7FBF900-FA6DFA70-FAD9FB1DFB1F-FB28FB2A-FB36FB38-FB3CFB3EFB40FB41FB43FB44FB46-FBB1FBD3-FD3DFD50-FD8FFD92-FDC7FDF0-FDFBFE70-FE74FE76-FEFCFF66-FF6FFF71-FF9DFFA0-FFBEFFC2-FFC7FFCA-FFCFFFD2-FFD7FFDA-FFDC",
Lt: "01C501C801CB01F21F88-1F8F1F98-1F9F1FA8-1FAF1FBC1FCC1FFC",
Lu: "0041-005A00C0-00D600D8-00DE01000102010401060108010A010C010E01100112011401160118011A011C011E01200122012401260128012A012C012E01300132013401360139013B013D013F0141014301450147014A014C014E01500152015401560158015A015C015E01600162016401660168016A016C016E017001720174017601780179017B017D018101820184018601870189-018B018E-0191019301940196-0198019C019D019F01A001A201A401A601A701A901AC01AE01AF01B1-01B301B501B701B801BC01C401C701CA01CD01CF01D101D301D501D701D901DB01DE01E001E201E401E601E801EA01EC01EE01F101F401F6-01F801FA01FC01FE02000202020402060208020A020C020E02100212021402160218021A021C021E02200222022402260228022A022C022E02300232023A023B023D023E02410243-02460248024A024C024E03700372037603860388-038A038C038E038F0391-03A103A3-03AB03CF03D2-03D403D803DA03DC03DE03E003E203E403E603E803EA03EC03EE03F403F703F903FA03FD-042F04600462046404660468046A046C046E04700472047404760478047A047C047E0480048A048C048E04900492049404960498049A049C049E04A004A204A404A604A804AA04AC04AE04B004B204B404B604B804BA04BC04BE04C004C104C304C504C704C904CB04CD04D004D204D404D604D804DA04DC04DE04E004E204E404E604E804EA04EC04EE04F004F204F404F604F804FA04FC04FE05000502050405060508050A050C050E05100512051405160518051A051C051E05200522052405260531-055610A0-10C510C710CD1E001E021E041E061E081E0A1E0C1E0E1E101E121E141E161E181E1A1E1C1E1E1E201E221E241E261E281E2A1E2C1E2E1E301E321E341E361E381E3A1E3C1E3E1E401E421E441E461E481E4A1E4C1E4E1E501E521E541E561E581E5A1E5C1E5E1E601E621E641E661E681E6A1E6C1E6E1E701E721E741E761E781E7A1E7C1E7E1E801E821E841E861E881E8A1E8C1E8E1E901E921E941E9E1EA01EA21EA41EA61EA81EAA1EAC1EAE1EB01EB21EB41EB61EB81EBA1EBC1EBE1EC01EC21EC41EC61EC81ECA1ECC1ECE1ED01ED21ED41ED61ED81EDA1EDC1EDE1EE01EE21EE41EE61EE81EEA1EEC1EEE1EF01EF21EF41EF61EF81EFA1EFC1EFE1F08-1F0F1F18-1F1D1F28-1F2F1F38-1F3F1F48-1F4D1F591F5B1F5D1F5F1F68-1F6F1FB8-1FBB1FC8-1FCB1FD8-1FDB1FE8-1FEC1FF8-1FFB21022107210B-210D2110-211221152119-211D212421262128212A-212D2130-2133213E213F214521832C00-2C2E2C602C62-2C642C672C692C6B2C6D-2C702C722C752C7E-2C802C822C842C862C882C8A2C8C2C8E2C902C922C942C962C982C9A2C9C2C9E2CA02CA22CA42CA62CA82CAA2CAC2CAE2CB02CB22CB42CB62CB82CBA2CBC2CBE2CC02CC22CC42CC62CC82CCA2CCC2CCE2CD02CD22CD42CD62CD82CDA2CDC2CDE2CE02CE22CEB2CED2CF2A640A642A644A646A648A64AA64CA64EA650A652A654A656A658A65AA65CA65EA660A662A664A666A668A66AA66CA680A682A684A686A688A68AA68CA68EA690A692A694A696A722A724A726A728A72AA72CA72EA732A734A736A738A73AA73CA73EA740A742A744A746A748A74AA74CA74EA750A752A754A756A758A75AA75CA75EA760A762A764A766A768A76AA76CA76EA779A77BA77DA77EA780A782A784A786A78BA78DA790A792A7A0A7A2A7A4A7A6A7A8A7AAFF21-FF3A",
Mc: "0903093B093E-09400949-094C094E094F0982098309BE-09C009C709C809CB09CC09D70A030A3E-0A400A830ABE-0AC00AC90ACB0ACC0B020B030B3E0B400B470B480B4B0B4C0B570BBE0BBF0BC10BC20BC6-0BC80BCA-0BCC0BD70C01-0C030C41-0C440C820C830CBE0CC0-0CC40CC70CC80CCA0CCB0CD50CD60D020D030D3E-0D400D46-0D480D4A-0D4C0D570D820D830DCF-0DD10DD8-0DDF0DF20DF30F3E0F3F0F7F102B102C10311038103B103C105610571062-10641067-106D108310841087-108C108F109A-109C17B617BE-17C517C717C81923-19261929-192B193019311933-193819B0-19C019C819C91A19-1A1B1A551A571A611A631A641A6D-1A721B041B351B3B1B3D-1B411B431B441B821BA11BA61BA71BAA1BAC1BAD1BE71BEA-1BEC1BEE1BF21BF31C24-1C2B1C341C351CE11CF21CF3302E302FA823A824A827A880A881A8B4-A8C3A952A953A983A9B4A9B5A9BAA9BBA9BD-A9C0AA2FAA30AA33AA34AA4DAA7BAAEBAAEEAAEFAAF5ABE3ABE4ABE6ABE7ABE9ABEAABEC",
Mn: "0300-036F0483-04870591-05BD05BF05C105C205C405C505C70610-061A064B-065F067006D6-06DC06DF-06E406E706E806EA-06ED07110730-074A07A6-07B007EB-07F30816-0819081B-08230825-08270829-082D0859-085B08E4-08FE0900-0902093A093C0941-0948094D0951-095709620963098109BC09C1-09C409CD09E209E30A010A020A3C0A410A420A470A480A4B-0A4D0A510A700A710A750A810A820ABC0AC1-0AC50AC70AC80ACD0AE20AE30B010B3C0B3F0B41-0B440B4D0B560B620B630B820BC00BCD0C3E-0C400C46-0C480C4A-0C4D0C550C560C620C630CBC0CBF0CC60CCC0CCD0CE20CE30D41-0D440D4D0D620D630DCA0DD2-0DD40DD60E310E34-0E3A0E47-0E4E0EB10EB4-0EB90EBB0EBC0EC8-0ECD0F180F190F350F370F390F71-0F7E0F80-0F840F860F870F8D-0F970F99-0FBC0FC6102D-10301032-10371039103A103D103E10581059105E-10601071-1074108210851086108D109D135D-135F1712-17141732-1734175217531772177317B417B517B7-17BD17C617C9-17D317DD180B-180D18A91920-19221927192819321939-193B1A171A181A561A58-1A5E1A601A621A65-1A6C1A73-1A7C1A7F1B00-1B031B341B36-1B3A1B3C1B421B6B-1B731B801B811BA2-1BA51BA81BA91BAB1BE61BE81BE91BED1BEF-1BF11C2C-1C331C361C371CD0-1CD21CD4-1CE01CE2-1CE81CED1CF41DC0-1DE61DFC-1DFF20D0-20DC20E120E5-20F02CEF-2CF12D7F2DE0-2DFF302A-302D3099309AA66FA674-A67DA69FA6F0A6F1A802A806A80BA825A826A8C4A8E0-A8F1A926-A92DA947-A951A980-A982A9B3A9B6-A9B9A9BCAA29-AA2EAA31AA32AA35AA36AA43AA4CAAB0AAB2-AAB4AAB7AAB8AABEAABFAAC1AAECAAEDAAF6ABE5ABE8ABEDFB1EFE00-FE0FFE20-FE26",
Nd: "0030-00390660-066906F0-06F907C0-07C90966-096F09E6-09EF0A66-0A6F0AE6-0AEF0B66-0B6F0BE6-0BEF0C66-0C6F0CE6-0CEF0D66-0D6F0E50-0E590ED0-0ED90F20-0F291040-10491090-109917E0-17E91810-18191946-194F19D0-19D91A80-1A891A90-1A991B50-1B591BB0-1BB91C40-1C491C50-1C59A620-A629A8D0-A8D9A900-A909A9D0-A9D9AA50-AA59ABF0-ABF9FF10-FF19",
Nl: "16EE-16F02160-21822185-218830073021-30293038-303AA6E6-A6EF",
Pc: "005F203F20402054FE33FE34FE4D-FE4FFF3F"
};
var unicodeClass = function (abbrev) {
return '[' +
unicodeCategories[abbrev].replace(/[0-9A-F]{4}/ig, "\\u$&") + ']';
};
// See ECMA-262 spec, 3rd edition, section 7
// Section 7.2
// Match one or more characters of whitespace, excluding line terminators.
// Do this by matching reluctantly, stopping at a non-dot (line terminator
// or end of string) or a non-whitespace.
// We are taking advantage of the fact that we are parsing JS from JS in
// regexes like this by "passing through" the spec's definition of whitespace,
// which is the same in regexes and the lexical grammar.
var rWhiteSpace = /[^\S\u000A\u000D\u2028\u2029]+/g;
// Section 7.3
// Match one line terminator. Same as (?!.)[\s\S] but more explicit.
var rLineTerminator = /[\u000A\u000D\u2028\u2029]/g;
// Section 7.4
// Match one multi-line comment.
// [\s\S] is shorthand for any character, including newlines.
// The *? reluctant qualifier makes this easy.
var rMultiLineComment = /\/\*[\s\S]*?\*\//g;
// Match one single-line comment, not including the line terminator.
var rSingleLineComment = /\/\/.*/g;
// Section 7.6
// Match one or more characters that can start an identifier.
// This is IdentifierStart+.
var rIdentifierPrefix = new RegExp(
"([a-zA-Z$_]+|\\\\u[0-9a-fA-F]{4}|" +
[unicodeClass('Lu'), unicodeClass('Ll'), unicodeClass('Lt'),
unicodeClass('Lm'), unicodeClass('Lo'), unicodeClass('Nl')].join('|') +
")+", 'g');
// Match one or more characters that can continue an identifier.
// This is (IdentifierPart and not IdentifierStart)+.
// To match a full identifier, match rIdentifierPrefix, then
// match rIdentifierMiddle followed by rIdentifierPrefix until they both fail.
var rIdentifierMiddle = new RegExp(
"([0-9]|" + [unicodeClass('Mn'), unicodeClass('Mc'), unicodeClass('Nd'),
unicodeClass('Pc')].join('|') + ")+", 'g');
// Section 7.7
// Match one punctuator (except for division punctuators).
var rPunctuator = new RegExp(
regexEscape("{ } ( ) [ ] . ; , < > <= >= == != === !== + - * % ++ -- << >> "+
">>> & | ^ ! ~ && || ? : = += -= *= %= <<= >>= >>>= &= |= ^=")
// sort from longest to shortest so that we don't match '==' for '===' and
// '*' for '*=', etc.
.split(' ').sort(function (a,b) { return b.length - a.length; })
.join('|'), 'g');
var rDivPunctuator = /\/=?/g;
// Section 7.8.3
var rHexLiteral = /0[xX][0-9a-fA-F]+(?!\w)/g;
var rOctLiteral = /0[0-7]+(?!\w)/g; // deprecated
var rDecLiteral =
/(((0|[1-9][0-9]*)(\.[0-9]*)?)|\.[0-9]+)([Ee][+-]?[0-9]+)?(?!\w)/g;
// Section 7.8.4
var rStringQuote = /["']/g;
// Match one or more characters besides quotes, backslashes, or line ends
var rStringMiddle = /(?=.)[^"'\\]+?((?!.)|(?=["'\\]))/g;
// Match one escape sequence, including the backslash.
var rEscapeSequence =
/\\(['"\\bfnrtv]|0(?![0-9])|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|(?=.)[^ux0-9])/g;
// Match one ES5 line continuation
var rLineContinuation =
/\\(\r\n|[\u000A\u000D\u2028\u2029])/g;
// Section 7.8.5
// Match one regex literal, including slashes, not including flags.
// Support unescaped '/' in character classes, per 5th ed.
// For example: `/[/]/` will match the string `"/"`.
//
// Explanation of regex:
// - Match `/` not followed by `/` or `*`
// - Match one or more of any of these:
// - Backslash followed by one non-newline
// - One non-newline, not `[` or `\` or `/`
// - A character class, beginning with `[` and ending with `]`.
// In the middle is zero or more of any of these:
// - Backslash followed by one non-newline
// - One non-newline, not `]` or `\`
// - Match closing `/`
var rRegexLiteral =
/\/(?![*\/])(\\.|(?=.)[^\[\/\\]|\[(\\.|(?=.)[^\]\\])*\])+\//g;
var rRegexFlags = /[a-zA-Z]*/g;
var rDecider =
/((?=.)\s)|(\/[\/\*]?)|([\][{}();,<>=!+*%&|^~?:-]|\.(?![0-9]))|([\d.])|(["'])|(.)|([\S\s])/g;
var keywordLookup = {
' break': 'KEYWORD',
' case': 'KEYWORD',
' catch': 'KEYWORD',
' continue': 'KEYWORD',
' debugger': 'KEYWORD',
' default': 'KEYWORD',
' delete': 'KEYWORD',
' do': 'KEYWORD',
' else': 'KEYWORD',
' finally': 'KEYWORD',
' for': 'KEYWORD',
' function': 'KEYWORD',
' if': 'KEYWORD',
' in': 'KEYWORD',
' instanceof': 'KEYWORD',
' new': 'KEYWORD',
' return': 'KEYWORD',
' switch': 'KEYWORD',
' this': 'KEYWORD',
' throw': 'KEYWORD',
' try': 'KEYWORD',
' typeof': 'KEYWORD',
' var': 'KEYWORD',
' void': 'KEYWORD',
' while': 'KEYWORD',
' with': 'KEYWORD',
' false': 'BOOLEAN',
' true': 'BOOLEAN',
' null': 'NULL'
};
var makeSet = function (array) {
var s = {};
for (var i = 0, N = array.length; i < N; i++)
s[array[i]] = true;
return s;
};
var nonTokenTypes = makeSet('WHITESPACE COMMENT NEWLINE EOF ERROR'.split(' '));
var punctuationBeforeDivision = makeSet('] ) } ++ --'.split(' '));
var keywordsBeforeDivision = makeSet('this'.split(' '));
var guessIsDivisionPermittedAfterToken = function (tok) {
// Figure out if a '/' character should be interpreted as division
// rather than the start of a regular expression when it follows the
// token, which must be a token lexeme per isToken().
// The beginning of section 7 of the spec briefly
// explains what's going on; basically the lexical grammar can't
// distinguish, for example, `e/f/g` (division) from `e=/f/g`
// (assignment of a regular expression), among many other variations.
//
// THIS IS ONLY A HEURISTIC, though it will rarely fail.
// Here are the two cases I know of where help from the parser is needed:
// - if (foo)
// /ba/.test("banana") && console.log("matches");
// (Close paren of a control structure before a statement starting with
// a regex literal. Starting a statement with a regex literal is
// unusual, of course, because it's hard to have a side effect.)
// - ++ /foo/.abc
// (Prefix `++` or `--` before an expression starting with a regex
// literal. This will run but I can't see any use for it.)
switch (tok.type()) {
case "PUNCTUATION":
// few punctuators can end an expression, but e.g. `)`
return !! punctuationBeforeDivision[tok.text()];
case "KEYWORD":
// few keywords can end an expression, but e.g. `this`
return !! keywordsBeforeDivision[tok.text()];
case "IDENTIFIER":
return true;
default: // literal
return true;
}
};
////////// PUBLIC API
var Lexeme = function (pos, type, text) {
this._pos = pos;
this._type = type;
this._text = text;
};
Lexeme.prototype.startPos = function () {
return this._pos;
};
Lexeme.prototype.endPos = function () {
return this._pos + this._text.length;
};
Lexeme.prototype.type = function () {
return this._type;
};
Lexeme.prototype.text = function () {
return this._text;
};
Lexeme.prototype.isToken = function () {
return ! nonTokenTypes[this._type];
};
Lexeme.prototype.isError = function () {
return this._type === "ERROR";
};
Lexeme.prototype.isEOF = function () {
return this._type === "EOF";
};
Lexeme.prototype.prev = function () {
return this._prev;
};
Lexeme.prototype.next = function () {
return this._next;
};
Lexeme.prototype.toString = function () {
return this.isError() ? "ERROR" :
this.isEOF() ? "EOF" : "`" + this.text() + "`";
};
// Create a Lexer for the given string of JavaScript code.
//
// A lexer keeps a pointer `pos` into the string that is
// advanced when you ask for the next lexeme with `next()`.
//
// XXXXX UPDATE DOCS
// Properties:
// code: Original JavaScript code string.
// pos: Current index into the string. You can assign to it
// to continue lexing from a different position. After
// calling next(), it is the ending index of the most
// recent lexeme.
// lastPos: The starting index of the most recent lexeme.
// Equal to `pos - text.length`.
// text: Text of the last lexeme as a string.
// type: Type of the last lexeme, as returned by `next()`.
// divisionPermitted: Whether a '/' character should be interpreted
// as division rather than the start of a regular expression.
// This flag is set automatically during lexing based on the
// previous token (i.e. the most recent token lexeme), but
// it is technically only a heuristic.
// Thie flag can be read and set manually to affect the
// parsing of the next token.
JSLexer = function (code) {
this.code = code;
this.pos = 0;
this.divisionPermitted = false;
this.lastLexeme = null;
};
JSLexer.Lexeme = Lexeme;
// XXXX UPDATE DOCS
// Return the type of the next of lexeme starting at `pos`, and advance
// `pos` to the end of the lexeme. The text of the lexeme is available
// in `text`. The text is always the substring of `code` between the
// old and new values of `pos`. An "EOF" lexeme terminates
// the stream. "ERROR" lexemes indicate a bad input string. Out of all
// lexemes, only "EOF" has empty text, and it always has empty text.
// All others contain at least one character from the source code.
//
// Lexeme types:
// Literals: BOOLEAN, NULL, REGEX, NUMBER, STRING
// Whitespace-like: WHITESPACE, COMMENT, NEWLINE, EOF
// Other Tokens: IDENTIFIER, KEYWORD, PUNCTUATION
// ... and ERROR
JSLexer.prototype.next = function () {
var self = this;
var code = self.code;
var origPos = self.pos;
var divisionPermitted = self.divisionPermitted;
if (origPos > code.length)
throw new Error("out of range");
// Running regexes inside this function will move this local
// `pos` forward.
// When we commit to emitting a lexeme, we'll set self.pos
// based on it.
var pos = origPos;
// Emit a lexeme. Always called as `return lexeme(type)`.
var lexeme = function (type) {
// If `pos` hasn't moved, we consider this an error.
// This means that grammar cases that only run one regex
// or an alternation ('||') of regexes don't need to
// check for failure.
// This also guarantees that only EOF lexemes are empty.
if (pos === origPos && type !== 'EOF') {
type = 'ERROR';
pos = origPos + 1;
}
self.pos = pos;
var lex = new JSLexer.Lexeme(origPos, type, code.substring(origPos, pos));
if (self.lastLexeme) {
self.lastLexeme._next = lex;
lex._prev = self.lastLexeme;
}
self.lastLexeme = lex;
if (lex.isToken())
self.divisionPermitted = guessIsDivisionPermittedAfterToken(lex);
return lex;
};
if (pos === code.length)
return lexeme('EOF');
// Result of the regex match in the most recent call to `run`.
var match = null;
// Run a regex starting from `pos`, recording the end of the matched
// string in `pos` and the match data in `match`. The regex must have
// the 'g' (global) flag. If it doesn't match at `pos`, set `match`
// to null. The caller should expect the regex to match at `pos`, as
// failure is too expensive to run in a tight loop.
var run = function (regex) {
// Cause regex matching to start at `pos`.
regex.lastIndex = pos;
match = regex.exec(code);
// Simulate "sticky" matching by throwing out the match if it
// didn't match exactly at `pos`. If it didn't, we may have
// just searched the entire string.
if (match && (match.index !== pos))
match = null;
// Record the end position of the match back into `pos`.
// Avoid an IE7 bug where lastIndex is incremented when
// the match has 0 length.
if (match && match[0].length !== 0)
pos = regex.lastIndex;
return match;
};
// Decide which case of the grammar we are in based on one or two
// characters, then roll back `pos`.
run(rDecider);
pos = origPos;
// Grammar cases
if (match[1]) { // \s
run(rWhiteSpace);
return lexeme('WHITESPACE');
}
if (match[2]) { // one of //, /*, /
if (match[2] === '//') {
run(rSingleLineComment);
return lexeme('COMMENT');
}
if (match[2] === '/*') {
run(rMultiLineComment);
return lexeme(match ? 'COMMENT' : 'ERROR');
}
if (match[2] === '/') {
if (divisionPermitted) {
run(rDivPunctuator);
return lexeme('PUNCTUATION');
} else {
run(rRegexLiteral);
if (! match)
return lexeme('ERROR');
run(rRegexFlags);
return lexeme('REGEX');
}
}
}
if (match[3]) { // any other punctuation char
run(rPunctuator);
return lexeme(match ? 'PUNCTUATION' : 'ERROR');
}
if (match[4]) { // 0-9
run(rDecLiteral) || run(rHexLiteral) || run(rOctLiteral);
return lexeme(match ? 'NUMBER' : 'ERROR');
}
if (match[5]) { // " or '
run(rStringQuote);
var quote = match[0];
do {
run(rStringMiddle) || run(rEscapeSequence) ||
run(rLineContinuation) || run(rStringQuote);
} while (match && match[0] !== quote);
if (! (match && match[0] === quote))
return lexeme('ERROR');
return lexeme('STRING');
}
if (match[7]) { // non-dot (line terminator)
run(rLineTerminator);
return lexeme('NEWLINE');
}
// dot (any non-line-terminator)
run(rIdentifierPrefix);
// Use non-short-circuiting bitwise OR, '|', to always try
// both regexes in sequence, returning false only if neither
// matched.
while ((!! run(rIdentifierMiddle)) |
(!! run(rIdentifierPrefix))) { /*continue*/ }
var word = code.substring(origPos, pos);
return lexeme(keywordLookup[' '+word] || 'IDENTIFIER');
};
JSLexer.prettyOffset = function (code, pos) {
var codeUpToPos = code.substring(0, pos);
var startOfLine = codeUpToPos.lastIndexOf('\n') + 1;
var indexInLine = pos - startOfLine; // 0-based
var lineNum = codeUpToPos.replace(/[^\n]+/g, '').length + 1; // 1-based
return "line " + lineNum + ", offset " + indexInLine;
};