Skip to content

Commit

Permalink
ext tokenizer port + cleanup unused lexer states
Browse files Browse the repository at this point in the history
we basically added a mechanism to store the token stream during parsing
and exposed the entire parser stack on the tokenizer extension through
an opt in flag: token_get_all($src, TOKEN_PARSE).

this change allows easy future language enhancements regarding context
aware parsing & scanning without further maintance on the tokenizer
extension while solves known inconsistencies "parseless" tokenizer
extension has when it handles `__halt_compiler()` presence.
  • Loading branch information
marcioAlmada committed Apr 30, 2015
1 parent 02a9eb4 commit 1107593
Show file tree
Hide file tree
Showing 10 changed files with 510 additions and 220 deletions.
6 changes: 4 additions & 2 deletions Zend/zend_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
#include "zend_interfaces.h"
#include "zend_virtual_cwd.h"
#include "zend_multibyte.h"
#include "zend_language_scanner.h"
#include "zend_inheritance.h"

#define SET_NODE(target, src) do { \
Expand Down Expand Up @@ -568,7 +567,10 @@ static int zend_add_const_name_literal(zend_op_array *op_array, zend_string *nam
op.constant = zend_add_literal(CG(active_op_array), &_c); \
} while (0)

void zend_stop_lexing(void) {
void zend_stop_lexing(void)
{
if(LANG_SCNG(on_event)) LANG_SCNG(on_event)(ON_STOP, END, 0);

LANG_SCNG(yy_cursor) = LANG_SCNG(yy_limit);
}

Expand Down
9 changes: 9 additions & 0 deletions Zend/zend_globals.h
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,12 @@ struct _zend_ini_scanner_globals {
int scanner_mode;
};

typedef enum {
ON_TOKEN,
ON_FEEDBACK,
ON_STOP
} zend_php_scanner_event;

struct _zend_php_scanner_globals {
zend_file_handle *yy_in;
zend_file_handle *yy_out;
Expand Down Expand Up @@ -278,6 +284,9 @@ struct _zend_php_scanner_globals {

/* initial string length after scanning to first variable */
int scanned_string_len;

/* hooks */
void (* on_event)(zend_php_scanner_event event, int token, int line);
};

#endif /* ZEND_GLOBALS_H */
Expand Down
14 changes: 6 additions & 8 deletions Zend/zend_language_parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
#include "zend_globals.h"
#include "zend_API.h"
#include "zend_constants.h"
#include "zend_language_scanner_defs.h"
#include "zend_language_scanner.h"

#define YYSIZE_T size_t
#define yytnamerr zend_yytnamerr
Expand All @@ -49,12 +49,6 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);
#define YYFREE free
#endif

#define REWIND { \
zend_stack_push(&LANG_SCNG(state_stack), (void *) &LANG_SCNG(yy_state)); \
LANG_SCNG(yy_state) = yycST_LOOKING_FOR_SEMI_RESERVED_NAME; \
LANG_SCNG(yy_cursor) = (unsigned char*)LANG_SCNG(yy_text); \
LANG_SCNG(yy_leng) = 0; }

%}

%pure_parser
Expand Down Expand Up @@ -290,7 +284,11 @@ semi_reserved:

identifier:
T_STRING { $$ = $1; }
| /* if */ semi_reserved { REWIND } /* and rematch as */ T_STRING { $$ = $3; }
| semi_reserved {
zval zv;
zend_lex_tstring(&zv);
$$ = zend_ast_create_zval(&zv);
}
;

top_statement_list:
Expand Down
4 changes: 4 additions & 0 deletions Zend/zend_language_scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ typedef struct _zend_lex_state {
zend_encoding_filter output_filter;
const zend_encoding *script_encoding;

/* hooks */
void (* on_event)(zend_php_scanner_event event, int token, int line);

zend_ast *ast;
zend_arena *ast_arena;
} zend_lex_state;
Expand All @@ -66,6 +69,7 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state);
ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename);
ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding);
ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding);
ZEND_API void zend_lex_tstring(zval *zv);

END_EXTERN_C()

Expand Down
Loading

0 comments on commit 1107593

Please sign in to comment.