forked from WebKit/WebKit-http
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a grammar (in antlr4 format) to the WSL spec.
https://bugs.webkit.org/show_bug.cgi?id=186310 Rubberstamped by Filip Pizlo It is just the raw rules, without much comments and no typesetting. Compiles to any of Java/JS/C++/etc.. with antlr4 (requires a JDK) It does not exactly match the parser of the current js implementation of WSL, it: - Accepts negative literals - Reserves keywords fallthrough/auto - Refuses '_' as a valid identifier - Accepts several capitalizations for Null/True/False - Accepts variable declarations at the top-level - Correctly parses ternary expressions - Forbids empty extension list for protocols - Does not allow a space between '[' and ']' in type suffixes - Correctly parses nested generics - Accepts a 'fallthrough;' statement (for switches) - Refuses chained relational operator (e.g. x < y < z) - Generally a completely different structure for parsing effectful expressions, although it probably accepts about the same language - Currently only accepts literals and identifiers as constexpr (while the js parser accepts way too much, including '(x += 42)') There are probably more differences that I missed. The js parser will probably have to be mostly rewritten to follow the spec more closely (and fix all the bugs). I will try to see if it can be automatically derived from the antlr4 grammar. * WebGPUShadingLanguageRI/SpecWork/WSL.g4: Added. git-svn-id: http://svn.webkit.org/repository/webkit/trunk@232514 268f45cc-cd09-0410-ab3c-d52691b4dbfc
- Loading branch information
1 parent
c90330a
commit 303def7
Showing
2 changed files
with
262 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,32 @@ | ||
2018-06-05 Robin Morisset <[email protected]> | ||
|
||
Add a grammar (in antlr4 format) to the WSL spec. | ||
https://bugs.webkit.org/show_bug.cgi?id=186310 | ||
|
||
Rubberstamped by Filip Pizlo | ||
|
||
It is just the raw rules, without much comments and no typesetting. | ||
Compiles to any of Java/JS/C++/etc.. with antlr4 (requires a JDK) | ||
|
||
It does not exactly match the parser of the current js implementation of WSL, it: | ||
- Accepts negative literals | ||
- Reserves keywords fallthrough/auto | ||
- Refuses '_' as a valid identifier | ||
- Accepts several capitalizations for Null/True/False | ||
- Accepts variable declarations at the top-level | ||
- Correctly parses ternary expressions | ||
- Forbids empty extension list for protocols | ||
- Does not allow a space between '[' and ']' in type suffixes | ||
- Correctly parses nested generics | ||
- Accepts a 'fallthrough;' statement (for switches) | ||
- Refuses chained relational operator (e.g. x < y < z) | ||
- Generally a completely different structure for parsing effectful expressions, although it probably accepts about the same language | ||
- Currently only accepts literals and identifiers as constexpr (while the js parser accepts way too much, including '(x += 42)') | ||
There are probably more differences that I missed. The js parser will probably have to be mostly rewritten to follow the spec more closely (and fix all the bugs). | ||
I will try to see if it can be automatically derived from the antlr4 grammar. | ||
|
||
* WebGPUShadingLanguageRI/SpecWork/WSL.g4: Added. | ||
|
||
2018-06-04 Frederic Wang <[email protected]> | ||
|
||
import-w3c-tests should rely on <meta name="flags"> to detect CSS manual tests | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,233 @@ | ||
grammar WSL; | ||
|
||
/* | ||
* Lexer | ||
*/ | ||
Whitespace: [ \t\r\n]+ -> skip ; | ||
|
||
// Note: we forbid leading 0s in decimal integers. to bikeshed. | ||
fragment CoreDecimalIntLiteral: [1-9] [0-9]* ; | ||
// Note: we allow a leading '-' but not a leading '+' in all kind of numeric literals. to bikeshed. | ||
fragment DecimalIntLiteral: '-'? CoreDecimalIntLiteral ; | ||
fragment DecimalUIntLiteral: CoreDecimalIntLiteral 'u' ; | ||
fragment CoreHexadecimalIntLiteral: '0x' [0-9a-fA-F]+ ; | ||
fragment HexadecimalIntLiteral: '-'? CoreHexadecimalIntLiteral; | ||
fragment HexadecimalUIntLiteral: CoreHexadecimalIntLiteral 'u'; | ||
fragment IntLiteral: DecimalIntLiteral | DecimalUIntLiteral | HexadecimalIntLiteral | HexadecimalUIntLiteral ; | ||
// Do we want to allow underscores in the middle of numbers for readability? | ||
|
||
fragment CoreFloatLiteral: [0-9]+'.'[0-9]* | [0-9]*'.'[0-9]+ ; | ||
fragment FloatLiteral: '-'? CoreFloatLiteral [fd]? ; | ||
// TODO: what to do about floats that are too big or too small to represent? | ||
// TODO: what is the default precision? double? | ||
// IDEA: add Nan, +infinity, -infinity | ||
// IDEA: add half-precision literals | ||
|
||
// One rule per keyword, to prevent them from being recognized as identifiers | ||
STRUCT: 'struct'; | ||
PROTOCOL: 'protocol'; | ||
TYPEDEF: 'typedef'; | ||
ENUM: 'enum'; | ||
OPERATOR: 'operator'; | ||
|
||
IF: 'if'; | ||
ELSE: 'else'; | ||
CONTINUE: 'continue'; | ||
BREAK: 'break'; | ||
SWITCH: 'switch'; | ||
CASE: 'case'; | ||
DEFAULT: 'default'; | ||
FALLTHROUGH: 'fallthrough'; | ||
FOR: 'for'; | ||
WHILE: 'while'; | ||
DO: 'do'; | ||
RETURN: 'return'; | ||
TRAP: 'trap'; | ||
|
||
fragment NULL: 'null' | 'NULL' ; | ||
fragment TRUE: 'true' | 'True' ; | ||
fragment FALSE: 'false' | 'False' ; | ||
// Note: We could make these three fully case sensitive or insensitive. to bikeshed. | ||
|
||
CONSTANT: 'constant'; | ||
DEVICE: 'device'; | ||
THREADGROUP: 'threadgroup'; | ||
THREAD: 'thread'; | ||
|
||
VERTEX: 'vertex'; | ||
FRAGMENT: 'fragment'; | ||
|
||
NATIVE: 'native'; | ||
RESTRICTED: 'restricted'; | ||
// Note: these could be only keyword in the native mode, but I decided to make them always reserved. to bikeshed. | ||
|
||
UNDERSCORE: '_'; | ||
AUTO: 'auto'; | ||
// Note: these are currently not used by the grammar, but I would like to make them reserved keywords for future expansion of the language. to bikeshed | ||
|
||
fragment ValidIdentifier: [a-zA-Z_] [a-zA-Z0-9_]* ; | ||
Identifier: ValidIdentifier ; | ||
// Note: this currently excludes unicode, but allows digits in the middle of identifiers. We could easily restrict or extend this definition. to bikeshed | ||
|
||
OperatorName | ||
: 'operator' ('>>' | '<<' | '+' | '-' | '*' | '/' | '%' | '&&' | '||' | '&' | '^' | '|' | '>=' | '<=' | '==' | '<' | '>' | '++' | '--' | '!' | '~' | '[]' | '[]=' | '&[]') | ||
| 'operator&.' ValidIdentifier | ||
| 'operator.' ValidIdentifier '=' | ||
| 'operator.' ValidIdentifier ; | ||
// Note: operator!= is not user-definable, it is automatically derived from operator== | ||
|
||
Literal: IntLiteral | FloatLiteral | NULL | TRUE | FALSE; | ||
|
||
/* | ||
* Parser: Top-level | ||
*/ | ||
file: topLevelDecl* EOF ; | ||
topLevelDecl | ||
: ';' | ||
| variableDecls ';' | ||
| typeDef | ||
| structDef | ||
| enumDef | ||
| funcDef | ||
| nativeFuncDecl | ||
| nativeTypeDecl | ||
| protocolDecl ; | ||
|
||
typeDef: TYPEDEF Identifier typeParameters '=' type ';' ; | ||
|
||
structDef: STRUCT Identifier typeParameters '{' structElement* '}' ; | ||
structElement: type Identifier ';' ; | ||
|
||
enumDef: ENUM Identifier (':' type)? '{' enumMember (',' enumMember)* '}' ; | ||
// Note: we could allow an extra ',' at the end of the list of enumMembers, ala Rust, to make it easier to reorder the members. to bikeshed | ||
enumMember: Identifier ('=' constexpr)? ; | ||
|
||
funcDef: RESTRICTED? funcDecl block; | ||
funcDecl | ||
: (VERTEX | FRAGMENT) type Identifier parameters | ||
| type (Identifier | OperatorName) typeParameters parameters | ||
| OPERATOR typeParameters type parameters ; | ||
// Note: the return type is moved in a different place for operator casts, as a hint that it plays a role in overload resolution. to bikeshed | ||
parameters | ||
: '(' ')' | ||
| '(' parameter (',' parameter)* ')' ; | ||
parameter: type Identifier? ; | ||
|
||
nativeFuncDecl: RESTRICTED? NATIVE funcDecl ';' ; | ||
nativeTypeDecl: NATIVE TYPEDEF Identifier typeParameters ';' ; | ||
|
||
protocolDecl: PROTOCOL Identifier (':' protocolRef (',' protocolRef)*)? '{' (funcDecl ';')* '}' ; | ||
// Note: I forbid empty extensions lists in protocol declarations, while the original js parser allowed them. to bikeshed | ||
protocolRef: Identifier ; | ||
|
||
/* | ||
* Parser: Types | ||
*/ | ||
typeParameters | ||
: '<' typeParameter (',' typeParameter)* '>' | ||
| ('<' '>')?; | ||
// Note: contrary to C++ for example, we allow '<>' and consider it equivalent to having no type parameters at all. to bikeshed | ||
typeParameter | ||
: type Identifier | ||
| Identifier (':' protocolRef ('+' protocolRef)*)? ; | ||
|
||
type | ||
: addressSpace Identifier typeArguments typeSuffixAbbreviated* | ||
| Identifier typeArguments typeSuffixNonAbbreviated* ; | ||
addressSpace: CONSTANT | DEVICE | THREADGROUP | THREAD ; | ||
typeSuffixAbbreviated: '*' | '[]' | '[' constexpr ']'; | ||
typeSuffixNonAbbreviated: '*' addressSpace | '[]' addressSpace | '[' constexpr ']' ; | ||
// Note: in this formulation of typeSuffix*, we don't allow whitespace between the '[' and the ']' in '[]'. We easily could at the cost of a tiny more bit of lookahead. to bikeshed | ||
|
||
typeArguments | ||
: '<' (typeArgument ',')* addressSpace? Identifier '<' (typeArgument (',' typeArgument)*)? '>>' | ||
//Note: this first alternative is a horrible hack to deal with nested generics that end with '>>'. As far as I can tell it works fine, but requires arbitrary lookahead. | ||
| '<' typeArgument (',' typeArgument)* '>' | ||
| ('<' '>')? ; | ||
typeArgument: constexpr | type ; | ||
|
||
/* | ||
* Parser: Statements | ||
*/ | ||
block: '{' blockBody '}' ; | ||
blockBody: stmt* ; | ||
|
||
stmt | ||
: block | ||
| ifStmt | ||
| switchStmt | ||
| forStmt | ||
| whileStmt | ||
| doStmt ';' | ||
| BREAK ';' | ||
| CONTINUE ';' | ||
| FALLTHROUGH ';' | ||
| TRAP ';' | ||
| RETURN expr? ';' | ||
| variableDecls ';' | ||
| effectfulExpr ';' ; | ||
|
||
ifStmt: IF '(' expr ')' stmt (ELSE stmt)? ; | ||
|
||
switchStmt: SWITCH '(' expr ')' '{' switchCase* '}' ; | ||
switchCase: (CASE constexpr | DEFAULT) ':' blockBody ; | ||
|
||
forStmt: FOR '(' (variableDecls | effectfulExpr) ';' expr? ';' expr? ')' stmt ; | ||
whileStmt: WHILE '(' expr ')' stmt ; | ||
doStmt: DO stmt WHILE '(' expr ')' ; | ||
|
||
variableDecls: type variableDecl (',' variableDecl)* ; | ||
variableDecl: Identifier ('=' expr)? ; | ||
|
||
/* | ||
* Parser: Expressions | ||
*/ | ||
constexpr: Literal | Identifier; | ||
|
||
// Note: we separate effectful expressions from normal expressions, and only allow the former in statement positions, to disambiguate the following: | ||
// "x * y;". Without this trick, it would look like both an expression and a variable declaration, and could not be disambiguated until name resolution. | ||
effectfulExpr: ((effAssignment ',')* effAssignment)? ; | ||
effAssignment | ||
: possiblePrefix assignOperator expr | ||
| effPrefix ; | ||
assignOperator: '=' | '+=' | '-=' | '*=' | '/=' | '%=' | '^=' | '&=' | '|=' | '>>=' | '<<=' ; | ||
effPrefix | ||
: ('++' | '--') possiblePrefix | ||
| effSuffix ; | ||
effSuffix | ||
: possibleSuffix ('++' | '--') | ||
| callExpression | ||
| '(' expr ')' ; | ||
// Note: this last case is to allow craziness like "(x < y ? z += 42 : w += 13);" | ||
// TODO: Not sure at all how useful it is, I also still have to double check that it introduces no ambiguity. | ||
limitedSuffixOperator | ||
: '.' Identifier | ||
| '->' Identifier | ||
| '[' expr ']' ; | ||
|
||
expr: (possibleTernaryConditional ',')* possibleTernaryConditional; | ||
// TODO: I tried to mimic https://en.cppreference.com/w/cpp/language/operator_precedence with regards to assignment and ternary conditionals, but it still needs some testing | ||
possibleTernaryConditional | ||
: possibleLogicalBinop '?' expr ':' possibleTernaryConditional | ||
| possiblePrefix assignOperator possibleTernaryConditional | ||
| possibleLogicalBinop ; | ||
possibleLogicalBinop: possibleRelationalBinop (logicalBinop possibleLogicalBinop)*; | ||
logicalBinop: '||' | '&&' | '|' | '^' | '&' ; | ||
// Note: the list above may need some manipulation to get the proper left-to-right associativity | ||
possibleRelationalBinop: possibleShift (relationalBinop possibleShift)?; | ||
relationalBinop: '<' | '>' | '<=' | '>=' | '==' | '!=' ; | ||
// Note: we made relational binops non-associative to better disambiguate "x<y>(z)" into a call expression and not a comparison of comparison | ||
// Idea: https://en.cppreference.com/w/cpp/language/operator_comparison#Three-way_comparison | ||
possibleShift: possibleAdd (('>>' | '<<') possibleAdd)* ; | ||
possibleAdd: possibleMult (('+' | '-') possibleMult)* ; | ||
possibleMult: possiblePrefix (('*' | '/' | '%') possiblePrefix)* ; | ||
possiblePrefix: prefixOp* possibleSuffix ; | ||
prefixOp: '++' | '--' | '+' | '-' | '~' | '!' | '&' | '@' | '*' ; | ||
possibleSuffix | ||
: callExpression limitedSuffixOperator* | ||
| term (limitedSuffixOperator | '++' | '--')* ; | ||
callExpression: Identifier typeArguments '(' (possibleTernaryConditional (',' possibleTernaryConditional)*)? ')'; | ||
term | ||
: Literal | ||
| Identifier | ||
| '(' expr ')' ; |