-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
kowsik
committed
Nov 11, 2010
1 parent
24e08d0
commit f2ff387
Showing
3 changed files
with
819 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,359 @@ | ||
/* Parse function for JavaScript. Makes use of the tokenizer from | ||
* tokenizejavascript.js. Note that your parsers do not have to be | ||
* this complicated -- if you don't want to recognize local variables, | ||
* in many languages it is enough to just look for braces, semicolons, | ||
* parentheses, etc, and know when you are inside a string or comment. | ||
* | ||
* See manual.html for more info about the parser interface. | ||
*/ | ||
|
||
var JSParser = Editor.Parser = (function() { | ||
// Token types that can be considered to be atoms. | ||
var atomicTypes = {"atom": true, "number": true, "variable": true, "string": true, "regexp": true}; | ||
// Setting that can be used to have JSON data indent properly. | ||
var json = false; | ||
// Constructor for the lexical context objects. | ||
function JSLexical(indented, column, type, align, prev, info) { | ||
// indentation at start of this line | ||
this.indented = indented; | ||
// column at which this scope was opened | ||
this.column = column; | ||
// type of scope ('vardef', 'stat' (statement), 'form' (special form), '[', '{', or '(') | ||
this.type = type; | ||
// '[', '{', or '(' blocks that have any text after their opening | ||
// character are said to be 'aligned' -- any lines below are | ||
// indented all the way to the opening character. | ||
if (align != null) | ||
this.align = align; | ||
// Parent scope, if any. | ||
this.prev = prev; | ||
this.info = info; | ||
} | ||
|
||
// My favourite JavaScript indentation rules. | ||
function indentJS(lexical) { | ||
return function(firstChars) { | ||
var firstChar = firstChars && firstChars.charAt(0), type = lexical.type; | ||
var closing = firstChar == type; | ||
if (type == "vardef") | ||
return lexical.indented + 4; | ||
else if (type == "form" && firstChar == "{") | ||
return lexical.indented; | ||
else if (type == "stat" || type == "form") | ||
return lexical.indented + indentUnit; | ||
else if (lexical.info == "switch" && !closing) | ||
return lexical.indented + (/^(?:case|default)\b/.test(firstChars) ? indentUnit : 2 * indentUnit); | ||
else if (lexical.align) | ||
return lexical.column - (closing ? 1 : 0); | ||
else | ||
return lexical.indented + (closing ? 0 : indentUnit); | ||
}; | ||
} | ||
|
||
// The parser-iterator-producing function itself. | ||
function parseJS(input, basecolumn) { | ||
// Wrap the input in a token stream | ||
var tokens = tokenizeJavaScript(input); | ||
// The parser state. cc is a stack of actions that have to be | ||
// performed to finish the current statement. For example we might | ||
// know that we still need to find a closing parenthesis and a | ||
// semicolon. Actions at the end of the stack go first. It is | ||
// initialized with an infinitely looping action that consumes | ||
// whole statements. | ||
var cc = [json ? expressions : statements]; | ||
// Context contains information about the current local scope, the | ||
// variables defined in that, and the scopes above it. | ||
var context = null; | ||
// The lexical scope, used mostly for indentation. | ||
var lexical = new JSLexical((basecolumn || 0) - indentUnit, 0, "block", false); | ||
// Current column, and the indentation at the start of the current | ||
// line. Used to create lexical scope objects. | ||
var column = 0; | ||
var indented = 0; | ||
// Variables which are used by the mark, cont, and pass functions | ||
// below to communicate with the driver loop in the 'next' | ||
// function. | ||
var consume, marked; | ||
|
||
// The iterator object. | ||
var parser = {next: next, copy: copy}; | ||
|
||
function next(){ | ||
// Start by performing any 'lexical' actions (adjusting the | ||
// lexical variable), or the operations below will be working | ||
// with the wrong lexical state. | ||
while(cc[cc.length - 1].lex) | ||
cc.pop()(); | ||
|
||
// Fetch a token. | ||
var token = tokens.next(); | ||
|
||
// Adjust column and indented. | ||
if (token.type == "whitespace" && column == 0) | ||
indented = token.value.length; | ||
column += token.value.length; | ||
if (token.content == "\n"){ | ||
indented = column = 0; | ||
// If the lexical scope's align property is still undefined at | ||
// the end of the line, it is an un-aligned scope. | ||
if (!("align" in lexical)) | ||
lexical.align = false; | ||
// Newline tokens get an indentation function associated with | ||
// them. | ||
token.indentation = indentJS(lexical); | ||
} | ||
// No more processing for meaningless tokens. | ||
if (token.type == "whitespace" || token.type == "comment") | ||
return token; | ||
// When a meaningful token is found and the lexical scope's | ||
// align is undefined, it is an aligned scope. | ||
if (!("align" in lexical)) | ||
lexical.align = true; | ||
|
||
// Execute actions until one 'consumes' the token and we can | ||
// return it. | ||
while(true) { | ||
consume = marked = false; | ||
// Take and execute the topmost action. | ||
cc.pop()(token.type, token.content); | ||
if (consume){ | ||
// Marked is used to change the style of the current token. | ||
if (marked) | ||
token.style = marked; | ||
// Here we differentiate between local and global variables. | ||
else if (token.type == "variable" && inScope(token.content)) | ||
token.style = "js-localvariable"; | ||
return token; | ||
} | ||
} | ||
} | ||
|
||
// This makes a copy of the parser state. It stores all the | ||
// stateful variables in a closure, and returns a function that | ||
// will restore them when called with a new input stream. Note | ||
// that the cc array has to be copied, because it is contantly | ||
// being modified. Lexical objects are not mutated, and context | ||
// objects are not mutated in a harmful way, so they can be shared | ||
// between runs of the parser. | ||
function copy(){ | ||
var _context = context, _lexical = lexical, _cc = cc.concat([]), _tokenState = tokens.state; | ||
|
||
return function copyParser(input){ | ||
context = _context; | ||
lexical = _lexical; | ||
cc = _cc.concat([]); // copies the array | ||
column = indented = 0; | ||
tokens = tokenizeJavaScript(input, _tokenState); | ||
return parser; | ||
}; | ||
} | ||
|
||
// Helper function for pushing a number of actions onto the cc | ||
// stack in reverse order. | ||
function push(fs){ | ||
for (var i = fs.length - 1; i >= 0; i--) | ||
cc.push(fs[i]); | ||
} | ||
// cont and pass are used by the action functions to add other | ||
// actions to the stack. cont will cause the current token to be | ||
// consumed, pass will leave it for the next action. | ||
function cont(){ | ||
push(arguments); | ||
consume = true; | ||
} | ||
function pass(){ | ||
push(arguments); | ||
consume = false; | ||
} | ||
// Used to change the style of the current token. | ||
function mark(style){ | ||
marked = style; | ||
} | ||
|
||
// Push a new scope. Will automatically link the current scope. | ||
function pushcontext(){ | ||
context = {prev: context, vars: {"this": true, "arguments": true}}; | ||
} | ||
// Pop off the current scope. | ||
function popcontext(){ | ||
context = context.prev; | ||
} | ||
// Register a variable in the current scope. | ||
function register(varname){ | ||
if (context){ | ||
mark("js-variabledef"); | ||
context.vars[varname] = true; | ||
} | ||
} | ||
// Check whether a variable is defined in the current scope. | ||
function inScope(varname){ | ||
var cursor = context; | ||
while (cursor) { | ||
if (cursor.vars[varname]) | ||
return true; | ||
cursor = cursor.prev; | ||
} | ||
return false; | ||
} | ||
|
||
// Push a new lexical context of the given type. | ||
function pushlex(type, info) { | ||
var result = function(){ | ||
lexical = new JSLexical(indented, column, type, null, lexical, info) | ||
}; | ||
result.lex = true; | ||
return result; | ||
} | ||
// Pop off the current lexical context. | ||
function poplex(){ | ||
if (lexical.type == ")") | ||
indented = lexical.indented; | ||
lexical = lexical.prev; | ||
} | ||
poplex.lex = true; | ||
// The 'lex' flag on these actions is used by the 'next' function | ||
// to know they can (and have to) be ran before moving on to the | ||
// next token. | ||
|
||
// Creates an action that discards tokens until it finds one of | ||
// the given type. | ||
function expect(wanted){ | ||
return function expecting(type){ | ||
if (type == wanted) cont(); | ||
else if (wanted == ";") pass(); | ||
else cont(arguments.callee); | ||
}; | ||
} | ||
|
||
// Looks for a statement, and then calls itself. | ||
function statements(type){ | ||
return pass(statement, statements); | ||
} | ||
function expressions(type){ | ||
return pass(expression, expressions); | ||
} | ||
// Dispatches various types of statements based on the type of the | ||
// current token. | ||
function statement(type){ | ||
if (type == "var") cont(pushlex("vardef"), vardef1, expect(";"), poplex); | ||
else if (type == "keyword a") cont(pushlex("form"), expression, statement, poplex); | ||
else if (type == "keyword b") cont(pushlex("form"), statement, poplex); | ||
else if (type == "{") cont(pushlex("}"), block, poplex); | ||
else if (type == ";") cont(); | ||
else if (type == "function") cont(functiondef); | ||
else if (type == "for") cont(pushlex("form"), expect("("), pushlex(")"), forspec1, expect(")"), poplex, statement, poplex); | ||
else if (type == "variable") cont(pushlex("stat"), maybelabel); | ||
else if (type == "switch") cont(pushlex("form"), expression, pushlex("}", "switch"), expect("{"), block, poplex, poplex); | ||
else if (type == "case") cont(expression, expect(":")); | ||
else if (type == "default") cont(expect(":")); | ||
else if (type == "catch") cont(pushlex("form"), pushcontext, expect("("), funarg, expect(")"), statement, poplex, popcontext); | ||
else pass(pushlex("stat"), expression, expect(";"), poplex); | ||
} | ||
// Dispatch expression types. | ||
function expression(type){ | ||
if (atomicTypes.hasOwnProperty(type)) cont(maybeoperator); | ||
else if (type == "function") cont(functiondef); | ||
else if (type == "keyword c") cont(expression); | ||
else if (type == "(") cont(pushlex(")"), expression, expect(")"), poplex, maybeoperator); | ||
else if (type == "operator") cont(expression); | ||
else if (type == "[") cont(pushlex("]"), commasep(expression, "]"), poplex, maybeoperator); | ||
else if (type == "{") cont(pushlex("}"), commasep(objprop, "}"), poplex, maybeoperator); | ||
else cont(); | ||
} | ||
// Called for places where operators, function calls, or | ||
// subscripts are valid. Will skip on to the next action if none | ||
// is found. | ||
function maybeoperator(type, value){ | ||
if (type == "operator" && /\+\+|--/.test(value)) cont(maybeoperator); | ||
else if (type == "operator") cont(expression); | ||
else if (type == ";") pass(); | ||
else if (type == "(") cont(pushlex(")"), commasep(expression, ")"), poplex, maybeoperator); | ||
else if (type == ".") cont(property, maybeoperator); | ||
else if (type == "[") cont(pushlex("]"), expression, expect("]"), poplex, maybeoperator); | ||
} | ||
// When a statement starts with a variable name, it might be a | ||
// label. If no colon follows, it's a regular statement. | ||
function maybelabel(type){ | ||
if (type == ":") cont(poplex, statement); | ||
else pass(maybeoperator, expect(";"), poplex); | ||
} | ||
// Property names need to have their style adjusted -- the | ||
// tokenizer thinks they are variables. | ||
function property(type){ | ||
if (type == "variable") {mark("js-property"); cont();} | ||
} | ||
// This parses a property and its value in an object literal. | ||
function objprop(type){ | ||
if (type == "variable") mark("js-property"); | ||
if (atomicTypes.hasOwnProperty(type)) cont(expect(":"), expression); | ||
} | ||
// Parses a comma-separated list of the things that are recognized | ||
// by the 'what' argument. | ||
function commasep(what, end){ | ||
function proceed(type) { | ||
if (type == ",") cont(what, proceed); | ||
else if (type == end) cont(); | ||
else cont(expect(end)); | ||
} | ||
return function commaSeparated(type) { | ||
if (type == end) cont(); | ||
else pass(what, proceed); | ||
}; | ||
} | ||
// Look for statements until a closing brace is found. | ||
function block(type){ | ||
if (type == "}") cont(); | ||
else pass(statement, block); | ||
} | ||
// Variable definitions are split into two actions -- 1 looks for | ||
// a name or the end of the definition, 2 looks for an '=' sign or | ||
// a comma. | ||
function vardef1(type, value){ | ||
if (type == "variable"){register(value); cont(vardef2);} | ||
else cont(); | ||
} | ||
function vardef2(type, value){ | ||
if (value == "=") cont(expression, vardef2); | ||
else if (type == ",") cont(vardef1); | ||
} | ||
// For loops. | ||
function forspec1(type){ | ||
if (type == "var") cont(vardef1, forspec2); | ||
else if (type == ";") pass(forspec2); | ||
else if (type == "variable") cont(formaybein); | ||
else pass(forspec2); | ||
} | ||
function formaybein(type, value){ | ||
if (value == "in") cont(expression); | ||
else cont(maybeoperator, forspec2); | ||
} | ||
function forspec2(type, value){ | ||
if (type == ";") cont(forspec3); | ||
else if (value == "in") cont(expression); | ||
else cont(expression, expect(";"), forspec3); | ||
} | ||
function forspec3(type) { | ||
if (type == ")") pass(); | ||
else cont(expression); | ||
} | ||
// A function definition creates a new context, and the variables | ||
// in its argument list have to be added to this context. | ||
function functiondef(type, value){ | ||
if (type == "variable"){register(value); cont(functiondef);} | ||
else if (type == "(") cont(pushcontext, commasep(funarg, ")"), statement, popcontext); | ||
} | ||
function funarg(type, value){ | ||
if (type == "variable"){register(value); cont();} | ||
} | ||
|
||
return parser; | ||
} | ||
|
||
return { | ||
make: parseJS, | ||
electricChars: "{}:", | ||
configure: function(obj) { | ||
if (obj.json != null) json = obj.json; | ||
} | ||
}; | ||
})(); |
Oops, something went wrong.