diff --git a/ChangeLog b/ChangeLog index 6f6245f3572893..3ca39b3fca89bf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Mon Jul 23 18:37:14 2007 Yukihiro Matsumoto + + * include/ruby/oniguruma.h: upgrade to Oniguruma 5.9.0. fixes + some memory violation. [ruby-dev:31070] + Sun Jul 22 20:09:49 2007 Tadayoshi Funaba * lib/date/format.rb (Date._parse): now accepts some new diff --git a/euc_jp.c b/euc_jp.c index bbe888d417cf7d..65729feda8fd2b 100644 --- a/euc_jp.c +++ b/euc_jp.c @@ -113,7 +113,7 @@ code_to_mbc(OnigCodePoint code, UChar *buf) #if 1 if (enc_len(ONIG_ENCODING_EUC_JP, buf) != (p - buf)) - return ONIGENCERR_INVALID_WIDE_CHAR_VALUE; + return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE; #endif return p - buf; } @@ -234,7 +234,7 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype) ctype -= (ONIGENC_MAX_STD_CTYPE + 1); if (ctype >= (unsigned int )PropertyListNum) - return ONIGENCERR_TYPE_BUG; + return ONIGENC_ERR_TYPE_BUG; return onig_is_in_code_range((UChar* )PropertyList[ctype], code); } @@ -256,7 +256,7 @@ get_ctype_code_range(int ctype, OnigCodePoint* sb_out, ctype -= (ONIGENC_MAX_STD_CTYPE + 1); if (ctype >= PropertyListNum) - return ONIGENCERR_TYPE_BUG; + return ONIGENC_ERR_TYPE_BUG; *ranges = PropertyList[ctype]; return 0; diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h index 1c10939df41b08..d0d04782e42d40 100644 --- a/include/ruby/oniguruma.h +++ b/include/ruby/oniguruma.h @@ -38,7 +38,7 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 5 -#define ONIGURUMA_VERSION_MINOR 7 +#define ONIGURUMA_VERSION_MINOR 9 #define ONIGURUMA_VERSION_TEENY 0 #ifdef __cplusplus diff --git a/regcomp.c b/regcomp.c index 332129a7da0a6f..0b7706a9c947c0 100644 --- a/regcomp.c +++ b/regcomp.c @@ -70,8 +70,8 @@ swap_node(Node* a, Node* b) Node c; c = *a; *a = *b; *b = c; - if (NTYPE(a) == N_STRING) { - StrNode* sn = &(NSTRING(a)); + if (NTYPE(a) == NT_STR) { + StrNode* sn = NSTR(a); if (sn->capa == 0) { int len = sn->end - sn->s; sn->s = sn->buf; @@ -79,8 +79,8 @@ swap_node(Node* a, Node* b) } } - if (NTYPE(b) == N_STRING) { - StrNode* sn = &(NSTRING(b)); + if (NTYPE(b) == NT_STR) { + StrNode* sn = NSTR(b); if (sn->capa == 0) { int len = sn->end - sn->s; sn->s = sn->buf; @@ -138,8 +138,14 @@ bitset_on_num(BitSetRef bs) extern int onig_bbuf_init(BBuf* buf, int size) { - buf->p = (UChar* )xmalloc(size); - if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); + if (size <= 0) { + size = 0; + buf->p = NULL; + } + else { + buf->p = (UChar* )xmalloc(size); + if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); + } buf->alloc = size; buf->used = 0; @@ -155,7 +161,7 @@ unset_addr_list_init(UnsetAddrList* uslist, int size) UnsetAddr* p; p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); - CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(p); uslist->num = 0; uslist->alloc = size; uslist->us = p; @@ -178,7 +184,7 @@ unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) if (uslist->num >= uslist->alloc) { size = uslist->alloc * 2; p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size); - CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(p); uslist->alloc = size; uslist->us = p; } @@ -456,7 +462,7 @@ compile_length_string_node(Node* node, regex_t* reg) UChar *p, *prev; StrNode* sn; - sn = &(NSTRING(node)); + sn = NSTR(node); if (sn->end <= sn->s) return 0; @@ -504,7 +510,7 @@ compile_string_node(Node* node, regex_t* reg) UChar *p, *prev, *end; StrNode* sn; - sn = &(NSTRING(node)); + sn = NSTR(node); if (sn->end <= sn->s) return 0; @@ -572,7 +578,7 @@ compile_length_cclass_node(CClassNode* cc, regex_t* reg) { int len; - if (IS_CCLASS_SHARE(cc)) { + if (IS_NCCLASS_SHARE(cc)) { len = SIZE_OPCODE + SIZE_POINTER; return len; } @@ -602,14 +608,14 @@ compile_cclass_node(CClassNode* cc, regex_t* reg) { int r; - if (IS_CCLASS_SHARE(cc)) { + if (IS_NCCLASS_SHARE(cc)) { add_opcode(reg, OP_CCLASS_NODE); r = add_pointer(reg, cc); return r; } if (IS_NULL(cc->mbuf)) { - if (IS_CCLASS_NOT(cc)) + if (IS_NCCLASS_NOT(cc)) add_opcode(reg, OP_CCLASS_NOT); else add_opcode(reg, OP_CCLASS); @@ -618,7 +624,7 @@ compile_cclass_node(CClassNode* cc, regex_t* reg) } else { if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { - if (IS_CCLASS_NOT(cc)) + if (IS_NCCLASS_NOT(cc)) add_opcode(reg, OP_CCLASS_MB_NOT); else add_opcode(reg, OP_CCLASS_MB); @@ -626,7 +632,7 @@ compile_cclass_node(CClassNode* cc, regex_t* reg) r = add_multi_byte_cclass(cc->mbuf, reg); } else { - if (IS_CCLASS_NOT(cc)) + if (IS_NCCLASS_NOT(cc)) add_opcode(reg, OP_CCLASS_MIX_NOT); else add_opcode(reg, OP_CCLASS_MIX); @@ -649,7 +655,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper) if (reg->repeat_range_alloc == 0) { p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); - CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(p); reg->repeat_range = p; reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; } @@ -658,7 +664,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper) n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; p = (OnigRepeatRange* )xrealloc(reg->repeat_range, sizeof(OnigRepeatRange) * n); - CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(p); reg->repeat_range = p; reg->repeat_range_alloc = n; } @@ -672,7 +678,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper) } static int -compile_range_repeat_node(QuantifierNode* qn, int target_len, int empty_info, +compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info, regex_t* reg) { int r; @@ -708,10 +714,10 @@ compile_range_repeat_node(QuantifierNode* qn, int target_len, int empty_info, } static int -is_anychar_star_quantifier(QuantifierNode* qn) +is_anychar_star_quantifier(QtfrNode* qn) { if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && - NTYPE(qn->target) == N_ANYCHAR) + NTYPE(qn->target) == NT_CANY) return 1; else return 0; @@ -723,7 +729,7 @@ is_anychar_star_quantifier(QuantifierNode* qn) #ifdef USE_COMBINATION_EXPLOSION_CHECK static int -compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) +compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) { int len, mod_tlen, cklen; int ckn; @@ -738,7 +744,7 @@ compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); /* anychar repeat */ - if (NTYPE(qn->target) == N_ANYCHAR) { + if (NTYPE(qn->target) == NT_CANY) { if (qn->greedy && infinite) { if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; @@ -803,7 +809,7 @@ compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) } static int -compile_quantifier_node(QuantifierNode* qn, regex_t* reg) +compile_quantifier_node(QtfrNode* qn, regex_t* reg) { int r, mod_tlen; int ckn; @@ -829,7 +835,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg) if (r) return r; } - return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); + return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); } else { if (IS_MULTILINE(reg->options)) { @@ -957,7 +963,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg) #else /* USE_COMBINATION_EXPLOSION_CHECK */ static int -compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) +compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) { int len, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); @@ -967,7 +973,7 @@ compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) if (tlen < 0) return tlen; /* anychar repeat */ - if (NTYPE(qn->target) == N_ANYCHAR) { + if (NTYPE(qn->target) == NT_CANY) { if (qn->greedy && infinite) { if (IS_NOT_NULL(qn->next_head_exact)) return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; @@ -1022,7 +1028,7 @@ compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) } static int -compile_quantifier_node(QuantifierNode* qn, regex_t* reg) +compile_quantifier_node(QtfrNode* qn, regex_t* reg) { int i, r, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); @@ -1040,7 +1046,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg) else r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); if (r) return r; - return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); + return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); } else { if (IS_MULTILINE(reg->options)) @@ -1081,7 +1087,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg) r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, mod_tlen + SIZE_OP_JUMP); if (r) return r; - add_bytes(reg, NSTRING(qn->head_exact).s, 1); + add_bytes(reg, NSTR(qn->head_exact)->s, 1); r = compile_tree_empty_check(qn->target, reg, empty_info); if (r) return r; r = add_opcode_rel_addr(reg, OP_JUMP, @@ -1091,7 +1097,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg) r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, mod_tlen + SIZE_OP_JUMP); if (r) return r; - add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); + add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); r = compile_tree_empty_check(qn->target, reg, empty_info); if (r) return r; r = add_opcode_rel_addr(reg, OP_JUMP, @@ -1150,7 +1156,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg) #endif /* USE_COMBINATION_EXPLOSION_CHECK */ static int -compile_length_option_node(EffectNode* node, regex_t* reg) +compile_length_option_node(EncloseNode* node, regex_t* reg) { int tlen; OnigOptionType prev = reg->options; @@ -1170,7 +1176,7 @@ compile_length_option_node(EffectNode* node, regex_t* reg) } static int -compile_option_node(EffectNode* node, regex_t* reg) +compile_option_node(EncloseNode* node, regex_t* reg) { int r; OnigOptionType prev = reg->options; @@ -1196,12 +1202,12 @@ compile_option_node(EffectNode* node, regex_t* reg) } static int -compile_length_effect_node(EffectNode* node, regex_t* reg) +compile_length_enclose_node(EncloseNode* node, regex_t* reg) { int len; int tlen; - if (node->type == EFFECT_OPTION) + if (node->type == ENCLOSE_OPTION) return compile_length_option_node(node, reg); if (node->target) { @@ -1212,16 +1218,16 @@ compile_length_effect_node(EffectNode* node, regex_t* reg) tlen = 0; switch (node->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_EFFECT_CALLED(node)) { + if (IS_ENCLOSE_CALLED(node)) { len = SIZE_OP_MEMORY_START_PUSH + tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - len += (IS_EFFECT_RECURSION(node) + len += (IS_ENCLOSE_RECURSION(node) ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); else - len += (IS_EFFECT_RECURSION(node) + len += (IS_ENCLOSE_RECURSION(node) ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); } else @@ -1237,9 +1243,9 @@ compile_length_effect_node(EffectNode* node, regex_t* reg) } break; - case EFFECT_STOP_BACKTRACK: - if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) { - QuantifierNode* qn = &NQUANTIFIER(node->target); + case ENCLOSE_STOP_BACKTRACK: + if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { + QtfrNode* qn = NQTFR(node->target); tlen = compile_length_tree(qn->target, reg); if (tlen < 0) return tlen; @@ -1262,17 +1268,17 @@ compile_length_effect_node(EffectNode* node, regex_t* reg) static int get_char_length_tree(Node* node, regex_t* reg, int* len); static int -compile_effect_node(EffectNode* node, regex_t* reg) +compile_enclose_node(EncloseNode* node, regex_t* reg) { int r, len; - if (node->type == EFFECT_OPTION) + if (node->type == ENCLOSE_OPTION) return compile_option_node(node, reg); switch (node->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_EFFECT_CALLED(node)) { + if (IS_ENCLOSE_CALLED(node)) { r = add_opcode(reg, OP_CALL); if (r) return r; node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; @@ -1282,10 +1288,10 @@ compile_effect_node(EffectNode* node, regex_t* reg) len = compile_length_tree(node->target, reg); len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - len += (IS_EFFECT_RECURSION(node) + len += (IS_ENCLOSE_RECURSION(node) ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); else - len += (IS_EFFECT_RECURSION(node) + len += (IS_ENCLOSE_RECURSION(node) ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); r = add_opcode_rel_addr(reg, OP_JUMP, len); @@ -1302,12 +1308,12 @@ compile_effect_node(EffectNode* node, regex_t* reg) r = compile_tree(node->target, reg); if (r) return r; #ifdef USE_SUBEXP_CALL - if (IS_EFFECT_CALLED(node)) { + if (IS_ENCLOSE_CALLED(node)) { if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - r = add_opcode(reg, (IS_EFFECT_RECURSION(node) + r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); else - r = add_opcode(reg, (IS_EFFECT_RECURSION(node) + r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) ? OP_MEMORY_END_REC : OP_MEMORY_END)); if (r) return r; @@ -1327,9 +1333,9 @@ compile_effect_node(EffectNode* node, regex_t* reg) } break; - case EFFECT_STOP_BACKTRACK: - if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) { - QuantifierNode* qn = &NQUANTIFIER(node->target); + case ENCLOSE_STOP_BACKTRACK: + if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { + QtfrNode* qn = NQTFR(node->target); r = compile_tree_n_times(qn->target, qn->lower, reg); if (r) return r; @@ -1486,48 +1492,48 @@ compile_length_tree(Node* node, regex_t* reg) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: len = 0; do { - r = compile_length_tree(NCONS(node).left, reg); + r = compile_length_tree(NCAR(node), reg); if (r < 0) return r; len += r; - } while (IS_NOT_NULL(node = NCONS(node).right)); + } while (IS_NOT_NULL(node = NCDR(node))); r = len; break; - case N_ALT: + case NT_ALT: { int n; n = r = 0; do { - r += compile_length_tree(NCONS(node).left, reg); + r += compile_length_tree(NCAR(node), reg); n++; - } while (IS_NOT_NULL(node = NCONS(node).right)); + } while (IS_NOT_NULL(node = NCDR(node))); r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); } break; - case N_STRING: + case NT_STR: if (NSTRING_IS_RAW(node)) - r = compile_length_string_raw_node(&(NSTRING(node)), reg); + r = compile_length_string_raw_node(NSTR(node), reg); else r = compile_length_string_node(node, reg); break; - case N_CCLASS: - r = compile_length_cclass_node(&(NCCLASS(node)), reg); + case NT_CCLASS: + r = compile_length_cclass_node(NCCLASS(node), reg); break; - case N_CTYPE: - case N_ANYCHAR: + case NT_CTYPE: + case NT_CANY: r = SIZE_OPCODE; break; - case N_BACKREF: + case NT_BREF: { - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); #ifdef USE_BACKREF_AT_LEVEL if (IS_BACKREF_NEST_LEVEL(br)) { @@ -1547,21 +1553,21 @@ compile_length_tree(Node* node, regex_t* reg) break; #ifdef USE_SUBEXP_CALL - case N_CALL: + case NT_CALL: r = SIZE_OP_CALL; break; #endif - case N_QUANTIFIER: - r = compile_length_quantifier_node(&(NQUANTIFIER(node)), reg); + case NT_QTFR: + r = compile_length_quantifier_node(NQTFR(node), reg); break; - case N_EFFECT: - r = compile_length_effect_node(&NEFFECT(node), reg); + case NT_ENCLOSE: + r = compile_length_enclose_node(NENCLOSE(node), reg); break; - case N_ANCHOR: - r = compile_length_anchor_node(&(NANCHOR(node)), reg); + case NT_ANCHOR: + r = compile_length_anchor_node(NANCHOR(node), reg); break; default: @@ -1579,60 +1585,60 @@ compile_tree(Node* node, regex_t* reg) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: do { - r = compile_tree(NCONS(node).left, reg); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = compile_tree(NCAR(node), reg); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_ALT: + case NT_ALT: { Node* x = node; len = 0; do { - len += compile_length_tree(NCONS(x).left, reg); - if (NCONS(x).right != NULL) { + len += compile_length_tree(NCAR(x), reg); + if (NCDR(x) != NULL) { len += SIZE_OP_PUSH + SIZE_OP_JUMP; } - } while (IS_NOT_NULL(x = NCONS(x).right)); + } while (IS_NOT_NULL(x = NCDR(x))); pos = reg->used + len; /* goal position */ do { - len = compile_length_tree(NCONS(node).left, reg); - if (IS_NOT_NULL(NCONS(node).right)) { + len = compile_length_tree(NCAR(node), reg); + if (IS_NOT_NULL(NCDR(node))) { r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); if (r) break; } - r = compile_tree(NCONS(node).left, reg); + r = compile_tree(NCAR(node), reg); if (r) break; - if (IS_NOT_NULL(NCONS(node).right)) { + if (IS_NOT_NULL(NCDR(node))) { len = pos - (reg->used + SIZE_OP_JUMP); r = add_opcode_rel_addr(reg, OP_JUMP, len); if (r) break; } - } while (IS_NOT_NULL(node = NCONS(node).right)); + } while (IS_NOT_NULL(node = NCDR(node))); } break; - case N_STRING: + case NT_STR: if (NSTRING_IS_RAW(node)) - r = compile_string_raw_node(&(NSTRING(node)), reg); + r = compile_string_raw_node(NSTR(node), reg); else r = compile_string_node(node, reg); break; - case N_CCLASS: - r = compile_cclass_node(&(NCCLASS(node)), reg); + case NT_CCLASS: + r = compile_cclass_node(NCCLASS(node), reg); break; - case N_CTYPE: + case NT_CTYPE: { int op; - switch (NCTYPE(node).ctype) { + switch (NCTYPE(node)->ctype) { case ONIGENC_CTYPE_WORD: - if (NCTYPE(node).not != 0) op = OP_NOT_WORD; - else op = OP_WORD; + if (NCTYPE(node)->not != 0) op = OP_NOT_WORD; + else op = OP_WORD; break; default: return ONIGERR_TYPE_BUG; @@ -1642,16 +1648,16 @@ compile_tree(Node* node, regex_t* reg) } break; - case N_ANYCHAR: + case NT_CANY: if (IS_MULTILINE(reg->options)) r = add_opcode(reg, OP_ANYCHAR_ML); else r = add_opcode(reg, OP_ANYCHAR); break; - case N_BACKREF: + case NT_BREF: { - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); #ifdef USE_BACKREF_AT_LEVEL if (IS_BACKREF_NEST_LEVEL(br)) { @@ -1712,21 +1718,21 @@ compile_tree(Node* node, regex_t* reg) break; #ifdef USE_SUBEXP_CALL - case N_CALL: - r = compile_call(&(NCALL(node)), reg); + case NT_CALL: + r = compile_call(NCALL(node), reg); break; #endif - case N_QUANTIFIER: - r = compile_quantifier_node(&(NQUANTIFIER(node)), reg); + case NT_QTFR: + r = compile_quantifier_node(NQTFR(node), reg); break; - case N_EFFECT: - r = compile_effect_node(&NEFFECT(node), reg); + case NT_ENCLOSE: + r = compile_enclose_node(NENCLOSE(node), reg); break; - case N_ANCHOR: - r = compile_anchor_node(&(NANCHOR(node)), reg); + case NT_ANCHOR: + r = compile_anchor_node(NANCHOR(node), reg); break; default: @@ -1748,29 +1754,29 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) Node* node = *plink; switch (NTYPE(node)) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: do { - r = noname_disable_map(&(NCONS(node).left), map, counter); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = noname_disable_map(&(NCAR(node)), map, counter); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: + case NT_QTFR: { - Node** ptarget = &(NQUANTIFIER(node).target); + Node** ptarget = &(NQTFR(node)->target); Node* old = *ptarget; r = noname_disable_map(ptarget, map, counter); - if (*ptarget != old && NTYPE(*ptarget) == N_QUANTIFIER) { + if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) { onig_reduce_nested_quantifier(node, *ptarget); } } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); - if (en->type == EFFECT_MEMORY) { - if (IS_EFFECT_NAMED_GROUP(en)) { + EncloseNode* en = NENCLOSE(node); + if (en->type == ENCLOSE_MEMORY) { + if (IS_ENCLOSE_NAMED_GROUP(en)) { (*counter)++; map[en->regnum].new_val = *counter; en->regnum = *counter; @@ -1800,7 +1806,7 @@ renumber_node_backref(Node* node, GroupNumRemap* map) { int i, pos, n, old_num; int *backs; - BackrefNode* bn = &(NBACKREF(node)); + BRefNode* bn = NBREF(node); if (! IS_BACKREF_NAME_REF(bn)) return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; @@ -1829,20 +1835,20 @@ renumber_by_map(Node* node, GroupNumRemap* map) int r = 0; switch (NTYPE(node)) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: do { - r = renumber_by_map(NCONS(node).left, map); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = renumber_by_map(NCAR(node), map); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: - r = renumber_by_map(NQUANTIFIER(node).target, map); + case NT_QTFR: + r = renumber_by_map(NQTFR(node)->target, map); break; - case N_EFFECT: - r = renumber_by_map(NEFFECT(node).target, map); + case NT_ENCLOSE: + r = renumber_by_map(NENCLOSE(node)->target, map); break; - case N_BACKREF: + case NT_BREF: r = renumber_node_backref(node, map); break; @@ -1859,21 +1865,21 @@ numbered_ref_check(Node* node) int r = 0; switch (NTYPE(node)) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: do { - r = numbered_ref_check(NCONS(node).left); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = numbered_ref_check(NCAR(node)); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: - r = numbered_ref_check(NQUANTIFIER(node).target); + case NT_QTFR: + r = numbered_ref_check(NQTFR(node)->target); break; - case N_EFFECT: - r = numbered_ref_check(NEFFECT(node).target); + case NT_ENCLOSE: + r = numbered_ref_check(NENCLOSE(node)->target); break; - case N_BACKREF: - if (! IS_BACKREF_NAME_REF(&(NBACKREF(node)))) + case NT_BREF: + if (! IS_BACKREF_NAME_REF(NBREF(node))) return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; break; @@ -1892,7 +1898,7 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) GroupNumRemap* map; map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1)); - CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(map); for (i = 1; i <= env->num_mem; i++) { map[i].new_val = 0; } @@ -1930,12 +1936,12 @@ static int unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) { int i, offset; - EffectNode* en; + EncloseNode* en; AbsAddrType addr; for (i = 0; i < uslist->num; i++) { - en = &(NEFFECT(uslist->us[i].target)); - if (! IS_EFFECT_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; + en = NENCLOSE(uslist->us[i].target); + if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; addr = en->call_addr; offset = uslist->us[i].offset; @@ -1952,46 +1958,46 @@ quantifiers_memory_node_info(Node* node) int r = 0; switch (NTYPE(node)) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: { int v; do { - v = quantifiers_memory_node_info(NCONS(node).left); + v = quantifiers_memory_node_info(NCAR(node)); if (v > r) r = v; - } while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right)); + } while (v >= 0 && IS_NOT_NULL(node = NCDR(node))); } break; #ifdef USE_SUBEXP_CALL - case N_CALL: - if (IS_CALL_RECURSION(&NCALL(node))) { + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) { return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ } else - r = quantifiers_memory_node_info(NCALL(node).target); + r = quantifiers_memory_node_info(NCALL(node)->target); break; #endif - case N_QUANTIFIER: + case NT_QTFR: { - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); if (qn->upper != 0) { r = quantifiers_memory_node_info(qn->target); } } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: return NQ_TARGET_IS_EMPTY_MEM; break; - case EFFECT_OPTION: - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: r = quantifiers_memory_node_info(en->target); break; default: @@ -2000,12 +2006,12 @@ quantifiers_memory_node_info(Node* node) } break; - case N_BACKREF: - case N_STRING: - case N_CTYPE: - case N_CCLASS: - case N_ANYCHAR: - case N_ANCHOR: + case NT_BREF: + case NT_STR: + case NT_CTYPE: + case NT_CCLASS: + case NT_CANY: + case NT_ANCHOR: default: break; } @@ -2022,12 +2028,12 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) *min = 0; switch (NTYPE(node)) { - case N_BACKREF: + case NT_BREF: { int i; int* backs; Node** nodes = SCANENV_MEM_NODES(env); - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); if (br->state & NST_RECURSION) break; backs = BACKREFS_P(br); @@ -2044,57 +2050,57 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) break; #ifdef USE_SUBEXP_CALL - case N_CALL: - if (IS_CALL_RECURSION(&NCALL(node))) { - EffectNode* en = &(NEFFECT(NCALL(node).target)); - if (IS_EFFECT_MIN_FIXED(en)) + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) { + EncloseNode* en = NENCLOSE(NCALL(node)->target); + if (IS_ENCLOSE_MIN_FIXED(en)) *min = en->min_len; } else - r = get_min_match_length(NCALL(node).target, min, env); + r = get_min_match_length(NCALL(node)->target, min, env); break; #endif - case N_LIST: + case NT_LIST: do { - r = get_min_match_length(NCONS(node).left, &tmin, env); + r = get_min_match_length(NCAR(node), &tmin, env); if (r == 0) *min += tmin; - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_ALT: + case NT_ALT: { Node *x, *y; y = node; do { - x = NCONS(y).left; + x = NCAR(y); r = get_min_match_length(x, &tmin, env); if (r != 0) break; if (y == node) *min = tmin; else if (*min > tmin) *min = tmin; - } while (r == 0 && IS_NOT_NULL(y = NCONS(y).right)); + } while (r == 0 && IS_NOT_NULL(y = NCDR(y))); } break; - case N_STRING: + case NT_STR: { - StrNode* sn = &(NSTRING(node)); + StrNode* sn = NSTR(node); *min = sn->end - sn->s; } break; - case N_CTYPE: + case NT_CTYPE: *min = 1; break; - case N_CCLASS: - case N_ANYCHAR: + case NT_CCLASS: + case NT_CANY: *min = 1; break; - case N_QUANTIFIER: + case NT_QTFR: { - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); if (qn->lower > 0) { r = get_min_match_length(qn->target, min, env); @@ -2104,32 +2110,32 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_EFFECT_MIN_FIXED(en)) + if (IS_ENCLOSE_MIN_FIXED(en)) *min = en->min_len; else { r = get_min_match_length(en->target, min, env); if (r == 0) { en->min_len = *min; - SET_EFFECT_STATUS(node, NST_MIN_FIXED); + SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); } } break; #endif - case EFFECT_OPTION: - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: r = get_min_match_length(en->target, min, env); break; } } break; - case N_ANCHOR: + case NT_ANCHOR: default: break; } @@ -2145,43 +2151,43 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) *max = 0; switch (NTYPE(node)) { - case N_LIST: + case NT_LIST: do { - r = get_max_match_length(NCONS(node).left, &tmax, env); + r = get_max_match_length(NCAR(node), &tmax, env); if (r == 0) *max = distance_add(*max, tmax); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_ALT: + case NT_ALT: do { - r = get_max_match_length(NCONS(node).left, &tmax, env); + r = get_max_match_length(NCAR(node), &tmax, env); if (r == 0 && *max < tmax) *max = tmax; - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_STRING: + case NT_STR: { - StrNode* sn = &(NSTRING(node)); + StrNode* sn = NSTR(node); *max = sn->end - sn->s; } break; - case N_CTYPE: + case NT_CTYPE: *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); break; - case N_CCLASS: - case N_ANYCHAR: + case NT_CCLASS: + case NT_CANY: *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); break; - case N_BACKREF: + case NT_BREF: { int i; int* backs; Node** nodes = SCANENV_MEM_NODES(env); - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); if (br->state & NST_RECURSION) { *max = ONIG_INFINITE_DISTANCE; break; @@ -2197,17 +2203,17 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) break; #ifdef USE_SUBEXP_CALL - case N_CALL: - if (! IS_CALL_RECURSION(&(NCALL(node)))) - r = get_max_match_length(NCALL(node).target, max, env); + case NT_CALL: + if (! IS_CALL_RECURSION(NCALL(node))) + r = get_max_match_length(NCALL(node)->target, max, env); else *max = ONIG_INFINITE_DISTANCE; break; #endif - case N_QUANTIFIER: + case NT_QTFR: { - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); if (qn->upper != 0) { r = get_max_match_length(qn->target, max, env); @@ -2221,32 +2227,32 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_EFFECT_MAX_FIXED(en)) + if (IS_ENCLOSE_MAX_FIXED(en)) *max = en->max_len; else { r = get_max_match_length(en->target, max, env); if (r == 0) { en->max_len = *max; - SET_EFFECT_STATUS(node, NST_MAX_FIXED); + SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); } } break; #endif - case EFFECT_OPTION: - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: r = get_max_match_length(en->target, max, env); break; } } break; - case N_ANCHOR: + case NT_ANCHOR: default: break; } @@ -2267,22 +2273,22 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) level++; *len = 0; switch (NTYPE(node)) { - case N_LIST: + case NT_LIST: do { - r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level); + r = get_char_length_tree1(NCAR(node), reg, &tlen, level); if (r == 0) *len = distance_add(*len, tlen); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_ALT: + case NT_ALT: { int tlen2; int varlen = 0; - r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level); - while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)) { - r = get_char_length_tree1(NCONS(node).left, reg, &tlen2, level); + r = get_char_length_tree1(NCAR(node), reg, &tlen, level); + while (r == 0 && IS_NOT_NULL(node = NCDR(node))) { + r = get_char_length_tree1(NCAR(node), reg, &tlen2, level); if (r == 0) { if (tlen != tlen2) varlen = 1; @@ -2301,9 +2307,9 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } break; - case N_STRING: + case NT_STR: { - StrNode* sn = &(NSTRING(node)); + StrNode* sn = NSTR(node); UChar *s = sn->s; while (s < sn->end) { s += enc_len(reg->enc, s); @@ -2312,9 +2318,9 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } break; - case N_QUANTIFIER: + case NT_QTFR: { - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); if (qn->lower == qn->upper) { r = get_char_length_tree1(qn->target, reg, &tlen, level); if (r == 0) @@ -2326,42 +2332,42 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) break; #ifdef USE_SUBEXP_CALL - case N_CALL: - if (! IS_CALL_RECURSION(&(NCALL(node)))) - r = get_char_length_tree1(NCALL(node).target, reg, len, level); + case NT_CALL: + if (! IS_CALL_RECURSION(NCALL(node))) + r = get_char_length_tree1(NCALL(node)->target, reg, len, level); else r = GET_CHAR_LEN_VARLEN; break; #endif - case N_CTYPE: + case NT_CTYPE: *len = 1; break; - case N_CCLASS: - case N_ANYCHAR: + case NT_CCLASS: + case NT_CANY: *len = 1; break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_EFFECT_CLEN_FIXED(en)) + if (IS_ENCLOSE_CLEN_FIXED(en)) *len = en->char_len; else { r = get_char_length_tree1(en->target, reg, len, level); if (r == 0) { en->char_len = *len; - SET_EFFECT_STATUS(node, NST_CLEN_FIXED); + SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); } } break; #endif - case EFFECT_OPTION: - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: r = get_char_length_tree1(en->target, reg, len, level); break; default: @@ -2370,7 +2376,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } break; - case N_ANCHOR: + case NT_ANCHOR: break; default: @@ -2399,18 +2405,18 @@ is_not_included(Node* x, Node* y, regex_t* reg) retry: ytype = NTYPE(y); switch (NTYPE(x)) { - case N_CTYPE: + case NT_CTYPE: { switch (ytype) { - case N_CTYPE: - if (NCTYPE(y).ctype == NCTYPE(x).ctype && - NCTYPE(y).not != NCTYPE(x).not) + case NT_CTYPE: + if (NCTYPE(y)->ctype == NCTYPE(x)->ctype && + NCTYPE(y)->not != NCTYPE(x)->not) return 1; else return 0; break; - case N_CCLASS: + case NT_CCLASS: swap: { Node* tmp; @@ -2419,7 +2425,7 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case N_STRING: + case NT_STR: goto swap; break; @@ -2429,15 +2435,15 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case N_CCLASS: + case NT_CCLASS: { - CClassNode* xc = &(NCCLASS(x)); + CClassNode* xc = NCCLASS(x); switch (ytype) { - case N_CTYPE: - switch (NCTYPE(y).ctype) { + case NT_CTYPE: + switch (NCTYPE(y)->ctype) { case ONIGENC_CTYPE_WORD: - if (NCTYPE(y).not == 0) { - if (IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) { + if (NCTYPE(y)->not == 0) { + if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { for (i = 0; i < SINGLE_BYTE_SIZE; i++) { if (BITSET_AT(xc->bs, i)) { if (IS_CODE_SB_WORD(reg->enc, i)) return 0; @@ -2450,7 +2456,7 @@ is_not_included(Node* x, Node* y, regex_t* reg) else { for (i = 0; i < SINGLE_BYTE_SIZE; i++) { if (! IS_CODE_SB_WORD(reg->enc, i)) { - if (!IS_CCLASS_NOT(xc)) { + if (!IS_NCCLASS_NOT(xc)) { if (BITSET_AT(xc->bs, i)) return 0; } @@ -2469,29 +2475,29 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case N_CCLASS: + case NT_CCLASS: { int v; - CClassNode* yc = &(NCCLASS(y)); + CClassNode* yc = NCCLASS(y); for (i = 0; i < SINGLE_BYTE_SIZE; i++) { v = BITSET_AT(xc->bs, i); - if ((v != 0 && !IS_CCLASS_NOT(xc)) || - (v == 0 && IS_CCLASS_NOT(xc))) { + if ((v != 0 && !IS_NCCLASS_NOT(xc)) || + (v == 0 && IS_NCCLASS_NOT(xc))) { v = BITSET_AT(yc->bs, i); - if ((v != 0 && !IS_CCLASS_NOT(yc)) || - (v == 0 && IS_CCLASS_NOT(yc))) + if ((v != 0 && !IS_NCCLASS_NOT(yc)) || + (v == 0 && IS_NCCLASS_NOT(yc))) return 0; } } - if ((IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) || - (IS_NULL(yc->mbuf) && !IS_CCLASS_NOT(yc))) + if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) || + (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc))) return 1; return 0; } break; - case N_STRING: + case NT_STR: goto swap; break; @@ -2501,30 +2507,30 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case N_STRING: + case NT_STR: { - StrNode* xs = &(NSTRING(x)); + StrNode* xs = NSTR(x); if (NSTRING_LEN(x) == 0) break; c = *(xs->s); switch (ytype) { - case N_CTYPE: - switch (NCTYPE(y).ctype) { + case NT_CTYPE: + switch (NCTYPE(y)->ctype) { case ONIGENC_CTYPE_WORD: if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) - return NCTYPE(y).not; + return NCTYPE(y)->not; else - return !(NCTYPE(y).not); + return !(NCTYPE(y)->not); break; default: break; } break; - case N_CCLASS: + case NT_CCLASS: { - CClassNode* cc = &(NCCLASS(y)); + CClassNode* cc = NCCLASS(y); code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); @@ -2532,10 +2538,10 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case N_STRING: + case NT_STR: { UChar *q; - StrNode* ys = &(NSTRING(y)); + StrNode* ys = NSTR(y); len = NSTRING_LEN(x); if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { @@ -2569,28 +2575,28 @@ get_head_value_node(Node* node, int exact, regex_t* reg) Node* n = NULL_NODE; switch (NTYPE(node)) { - case N_BACKREF: - case N_ALT: - case N_ANYCHAR: + case NT_BREF: + case NT_ALT: + case NT_CANY: #ifdef USE_SUBEXP_CALL - case N_CALL: + case NT_CALL: #endif break; - case N_CTYPE: - case N_CCLASS: + case NT_CTYPE: + case NT_CCLASS: if (exact == 0) { n = node; } break; - case N_LIST: - n = get_head_value_node(NCONS(node).left, exact, reg); + case NT_LIST: + n = get_head_value_node(NCAR(node), exact, reg); break; - case N_STRING: + case NT_STR: { - StrNode* sn = &(NSTRING(node)); + StrNode* sn = NSTR(node); if (sn->end <= sn->s) break; @@ -2604,9 +2610,9 @@ get_head_value_node(Node* node, int exact, regex_t* reg) } break; - case N_QUANTIFIER: + case NT_QTFR: { - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); if (qn->lower > 0) { if (IS_NOT_NULL(qn->head_exact)) n = qn->head_exact; @@ -2616,31 +2622,31 @@ get_head_value_node(Node* node, int exact, regex_t* reg) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_OPTION: + case ENCLOSE_OPTION: { OnigOptionType options = reg->options; - reg->options = NEFFECT(node).option; - n = get_head_value_node(NEFFECT(node).target, exact, reg); + reg->options = NENCLOSE(node)->option; + n = get_head_value_node(NENCLOSE(node)->target, exact, reg); reg->options = options; } break; - case EFFECT_MEMORY: - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_MEMORY: + case ENCLOSE_STOP_BACKTRACK: n = get_head_value_node(en->target, exact, reg); break; } } break; - case N_ANCHOR: - if (NANCHOR(node).type == ANCHOR_PREC_READ) - n = get_head_value_node(NANCHOR(node).target, exact, reg); + case NT_ANCHOR: + if (NANCHOR(node)->type == ANCHOR_PREC_READ) + n = get_head_value_node(NANCHOR(node)->target, exact, reg); break; default: @@ -2651,45 +2657,46 @@ get_head_value_node(Node* node, int exact, regex_t* reg) } static int -check_type_tree(Node* node, int type_mask, int effect_mask, int anchor_mask) +check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) { int type, r = 0; type = NTYPE(node); - if ((type & type_mask) == 0) + if ((NTYPE2BIT(type) & type_mask) == 0) return 1; switch (type) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: do { - r = check_type_tree(NCONS(node).left, type_mask, effect_mask, anchor_mask); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = check_type_tree(NCAR(node), type_mask, enclose_mask, + anchor_mask); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: - r = check_type_tree(NQUANTIFIER(node).target, type_mask, effect_mask, + case NT_QTFR: + r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask, anchor_mask); break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); - if ((en->type & effect_mask) == 0) + EncloseNode* en = NENCLOSE(node); + if ((en->type & enclose_mask) == 0) return 1; - r = check_type_tree(en->target, type_mask, effect_mask, anchor_mask); + r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask); } break; - case N_ANCHOR: - type = NANCHOR(node).type; + case NT_ANCHOR: + type = NANCHOR(node)->type; if ((type & anchor_mask) == 0) return 1; - if (NANCHOR(node).target) - r = check_type_tree(NANCHOR(node).target, - type_mask, effect_mask, anchor_mask); + if (NANCHOR(node)->target) + r = check_type_tree(NANCHOR(node)->target, + type_mask, enclose_mask, anchor_mask); break; default: @@ -2711,7 +2718,7 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: { Node *x; OnigDistance min; @@ -2719,40 +2726,40 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) x = node; do { - ret = subexp_inf_recursive_check(NCONS(x).left, env, head); + ret = subexp_inf_recursive_check(NCAR(x), env, head); if (ret < 0 || ret == RECURSION_INFINITE) return ret; r |= ret; if (head) { - ret = get_min_match_length(NCONS(x).left, &min, env); + ret = get_min_match_length(NCAR(x), &min, env); if (ret != 0) return ret; if (min != 0) head = 0; } - } while (IS_NOT_NULL(x = NCONS(x).right)); + } while (IS_NOT_NULL(x = NCDR(x))); } break; - case N_ALT: + case NT_ALT: { int ret; r = RECURSION_EXIST; do { - ret = subexp_inf_recursive_check(NCONS(node).left, env, head); + ret = subexp_inf_recursive_check(NCAR(node), env, head); if (ret < 0 || ret == RECURSION_INFINITE) return ret; r &= ret; - } while (IS_NOT_NULL(node = NCONS(node).right)); + } while (IS_NOT_NULL(node = NCDR(node))); } break; - case N_QUANTIFIER: - r = subexp_inf_recursive_check(NQUANTIFIER(node).target, env, head); + case NT_QTFR: + r = subexp_inf_recursive_check(NQTFR(node)->target, env, head); if (r == RECURSION_EXIST) { - if (NQUANTIFIER(node).lower == 0) r = 0; + if (NQTFR(node)->lower == 0) r = 0; } break; - case N_ANCHOR: + case NT_ANCHOR: { - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); switch (an->type) { case ANCHOR_PREC_READ: case ANCHOR_PREC_READ_NOT: @@ -2764,19 +2771,19 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) } break; - case N_CALL: - r = subexp_inf_recursive_check(NCALL(node).target, env, head); + case NT_CALL: + r = subexp_inf_recursive_check(NCALL(node)->target, env, head); break; - case N_EFFECT: - if (IS_EFFECT_MARK2(&(NEFFECT(node)))) + case NT_ENCLOSE: + if (IS_ENCLOSE_MARK2(NENCLOSE(node))) return 0; - else if (IS_EFFECT_MARK1(&(NEFFECT(node)))) + else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE); else { - SET_EFFECT_STATUS(node, NST_MARK2); - r = subexp_inf_recursive_check(NEFFECT(node).target, env, head); - CLEAR_EFFECT_STATUS(node, NST_MARK2); + SET_ENCLOSE_STATUS(node, NST_MARK2); + r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head); + CLEAR_ENCLOSE_STATUS(node, NST_MARK2); } break; @@ -2795,20 +2802,20 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) type = NTYPE(node); switch (type) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: do { - r = subexp_inf_recursive_check_trav(NCONS(node).left, env); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = subexp_inf_recursive_check_trav(NCAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: - r = subexp_inf_recursive_check_trav(NQUANTIFIER(node).target, env); + case NT_QTFR: + r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env); break; - case N_ANCHOR: + case NT_ANCHOR: { - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); switch (an->type) { case ANCHOR_PREC_READ: case ANCHOR_PREC_READ_NOT: @@ -2820,15 +2827,15 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); - if (IS_EFFECT_RECURSION(en)) { - SET_EFFECT_STATUS(node, NST_MARK1); + if (IS_ENCLOSE_RECURSION(en)) { + SET_ENCLOSE_STATUS(node, NST_MARK1); r = subexp_inf_recursive_check(en->target, env, 1); if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; - CLEAR_EFFECT_STATUS(node, NST_MARK1); + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); } r = subexp_inf_recursive_check_trav(en->target, env); } @@ -2845,25 +2852,23 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) static int subexp_recursive_check(Node* node) { - int type; int r = 0; - type = NTYPE(node); - switch (type) { - case N_LIST: - case N_ALT: + switch (NTYPE(node)) { + case NT_LIST: + case NT_ALT: do { - r |= subexp_recursive_check(NCONS(node).left); - } while (IS_NOT_NULL(node = NCONS(node).right)); + r |= subexp_recursive_check(NCAR(node)); + } while (IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: - r = subexp_recursive_check(NQUANTIFIER(node).target); + case NT_QTFR: + r = subexp_recursive_check(NQTFR(node)->target); break; - case N_ANCHOR: + case NT_ANCHOR: { - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); switch (an->type) { case ANCHOR_PREC_READ: case ANCHOR_PREC_READ_NOT: @@ -2875,20 +2880,20 @@ subexp_recursive_check(Node* node) } break; - case N_CALL: - r = subexp_recursive_check(NCALL(node).target); + case NT_CALL: + r = subexp_recursive_check(NCALL(node)->target); if (r != 0) SET_CALL_RECURSION(node); break; - case N_EFFECT: - if (IS_EFFECT_MARK2(&(NEFFECT(node)))) + case NT_ENCLOSE: + if (IS_ENCLOSE_MARK2(NENCLOSE(node))) return 0; - else if (IS_EFFECT_MARK1(&(NEFFECT(node)))) + else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) return 1; /* recursion */ else { - SET_EFFECT_STATUS(node, NST_MARK2); - r = subexp_recursive_check(NEFFECT(node).target); - CLEAR_EFFECT_STATUS(node, NST_MARK2); + SET_ENCLOSE_STATUS(node, NST_MARK2); + r = subexp_recursive_check(NENCLOSE(node)->target); + CLEAR_ENCLOSE_STATUS(node, NST_MARK2); } break; @@ -2910,29 +2915,29 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env) type = NTYPE(node); switch (type) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: { int ret; do { - ret = subexp_recursive_check_trav(NCONS(node).left, env); + ret = subexp_recursive_check_trav(NCAR(node), env); if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; else if (ret < 0) return ret; - } while (IS_NOT_NULL(node = NCONS(node).right)); + } while (IS_NOT_NULL(node = NCDR(node))); } break; - case N_QUANTIFIER: - r = subexp_recursive_check_trav(NQUANTIFIER(node).target, env); - if (NQUANTIFIER(node).upper == 0) { + case NT_QTFR: + r = subexp_recursive_check_trav(NQTFR(node)->target, env); + if (NQTFR(node)->upper == 0) { if (r == FOUND_CALLED_NODE) - NQUANTIFIER(node).is_refered = 1; + NQTFR(node)->is_refered = 1; } break; - case N_ANCHOR: + case NT_ANCHOR: { - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); switch (an->type) { case ANCHOR_PREC_READ: case ANCHOR_PREC_READ_NOT: @@ -2944,20 +2949,20 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); - if (! IS_EFFECT_RECURSION(en)) { - if (IS_EFFECT_CALLED(en)) { - SET_EFFECT_STATUS(node, NST_MARK1); + if (! IS_ENCLOSE_RECURSION(en)) { + if (IS_ENCLOSE_CALLED(en)) { + SET_ENCLOSE_STATUS(node, NST_MARK1); r = subexp_recursive_check(en->target); - if (r != 0) SET_EFFECT_STATUS(node, NST_RECURSION); - CLEAR_EFFECT_STATUS(node, NST_MARK1); + if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION); + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); } } r = subexp_recursive_check_trav(en->target, env); - if (IS_EFFECT_CALLED(en)) + if (IS_ENCLOSE_CALLED(en)) r |= FOUND_CALLED_NODE; } break; @@ -2977,30 +2982,30 @@ setup_subexp_call(Node* node, ScanEnv* env) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: do { - r = setup_subexp_call(NCONS(node).left, env); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = setup_subexp_call(NCAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_ALT: + case NT_ALT: do { - r = setup_subexp_call(NCONS(node).left, env); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = setup_subexp_call(NCAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: - r = setup_subexp_call(NQUANTIFIER(node).target, env); + case NT_QTFR: + r = setup_subexp_call(NQTFR(node)->target, env); break; - case N_EFFECT: - r = setup_subexp_call(NEFFECT(node).target, env); + case NT_ENCLOSE: + r = setup_subexp_call(NENCLOSE(node)->target, env); break; - case N_CALL: + case NT_CALL: { int n, num, *refs; UChar *p; - CallNode* cn = &(NCALL(node)); + CallNode* cn = NCALL(node); Node** nodes = SCANENV_MEM_NODES(env); #ifdef USE_NAMED_GROUP @@ -3046,16 +3051,16 @@ setup_subexp_call(Node* node, ScanEnv* env) ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); return ONIGERR_UNDEFINED_NAME_REFERENCE; } - SET_EFFECT_STATUS(cn->target, NST_CALLED); + SET_ENCLOSE_STATUS(cn->target, NST_CALLED); BIT_STATUS_ON_AT(env->bt_mem_start, cn->ref_num); cn->unset_addr_list = env->unset_addr_list; } } break; - case N_ANCHOR: + case NT_ANCHOR: { - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); switch (an->type) { case ANCHOR_PREC_READ: @@ -3083,30 +3088,29 @@ setup_subexp_call(Node* node, ScanEnv* env) static int divide_look_behind_alternatives(Node* node) { - Node tmp_node; Node *head, *np, *insert_node; - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); int anc_type = an->type; head = an->target; - np = NCONS(head).left; - tmp_node = *node; *node = *head; *head = tmp_node; - NCONS(node).left = head; - NANCHOR(head).target = np; + np = NCAR(head); + swap_node(node, head); + NCAR(node) = head; + NANCHOR(head)->target = np; np = node; - while ((np = NCONS(np).right) != NULL_NODE) { + while ((np = NCDR(np)) != NULL_NODE) { insert_node = onig_node_new_anchor(anc_type); - CHECK_NULL_RETURN_VAL(insert_node, ONIGERR_MEMORY); - NANCHOR(insert_node).target = NCONS(np).left; - NCONS(np).left = insert_node; + CHECK_NULL_RETURN_MEMERR(insert_node); + NANCHOR(insert_node)->target = NCAR(np); + NCAR(np) = insert_node; } if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { np = node; do { - np->type = N_LIST; /* alt -> list */ - } while ((np = NCONS(np).right) != NULL_NODE); + SET_NTYPE(np, NT_LIST); /* alt -> list */ + } while ((np = NCDR(np)) != NULL_NODE); } return 0; } @@ -3115,7 +3119,7 @@ static int setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) { int r, len; - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); r = get_char_length_tree(an->target, reg, &len); if (r == 0) @@ -3139,11 +3143,15 @@ next_setup(Node* node, Node* next_node, regex_t* reg) retry: type = NTYPE(node); - if (type == N_QUANTIFIER) { - QuantifierNode* qn = &(NQUANTIFIER(node)); + if (type == NT_QTFR) { + QtfrNode* qn = NQTFR(node); if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { -#ifdef USE_QUANTIFIER_PEEK_NEXT - qn->next_head_exact = get_head_value_node(next_node, 1, reg); +#ifdef USE_QTFR_PEEK_NEXT + Node* n = get_head_value_node(next_node, 1, reg); + /* '\0': for UTF-16BE etc... */ + if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') { + qn->next_head_exact = n; + } #endif /* automatic posseivation a*b ==> (?>a*)b */ if (qn->lower <= 1) { @@ -3154,20 +3162,20 @@ next_setup(Node* node, Node* next_node, regex_t* reg) if (IS_NOT_NULL(x)) { y = get_head_value_node(next_node, 0, reg); if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { - Node* en = onig_node_new_effect(EFFECT_STOP_BACKTRACK); - CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY); - SET_EFFECT_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); + Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); + CHECK_NULL_RETURN_MEMERR(en); + SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); swap_node(node, en); - NEFFECT(node).target = en; + NENCLOSE(node)->target = en; } } } } } } - else if (type == N_EFFECT) { - EffectNode* en = &(NEFFECT(node)); - if (en->type == EFFECT_MEMORY) { + else if (type == NT_ENCLOSE) { + EncloseNode* en = NENCLOSE(node); + if (en->type == ENCLOSE_MEMORY) { node = en->target; goto retry; } @@ -3182,12 +3190,12 @@ update_string_node_case_fold(regex_t* reg, Node *node) UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; UChar *sbuf, *ebuf, *sp; int r, i, len, sbuf_size; - StrNode* sn = &NSTRING(node); + StrNode* sn = NSTR(node); end = sn->end; sbuf_size = (end - sn->s) * 2; sbuf = (UChar* )xmalloc(sbuf_size); - CHECK_NULL_RETURN_VAL(sbuf, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(sbuf); ebuf = sbuf + sbuf_size; sp = sbuf; @@ -3198,7 +3206,7 @@ update_string_node_case_fold(regex_t* reg, Node *node) for (i = 0; i < len; i++) { if (sp >= ebuf) { sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2); - CHECK_NULL_RETURN_VAL(sbuf, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(sbuf); sp = sbuf + sbuf_size; sbuf_size *= 2; ebuf = sbuf + sbuf_size; @@ -3265,11 +3273,11 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], xnode = onig_node_new_list(NULL, NULL); if (IS_NULL(xnode)) goto mem_err; - NCONS(var_anode).left = xnode; + NCAR(var_anode) = xnode; anode = onig_node_new_alt(NULL_NODE, NULL_NODE); if (IS_NULL(anode)) goto mem_err; - NCONS(xnode).left = anode; + NCAR(xnode) = anode; } else { *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE); @@ -3279,7 +3287,7 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], snode = onig_node_new_str(p, p + slen); if (IS_NULL(snode)) goto mem_err; - NCONS(anode).left = snode; + NCAR(anode) = snode; for (i = 0; i < item_num; i++) { snode = onig_node_new_str(NULL, NULL); @@ -3325,18 +3333,18 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], goto mem_err; } - NCONS(an).left = xnode; + NCAR(an) = xnode; } else { - NCONS(an).left = snode; + NCAR(an) = snode; } - NCONS(var_anode).right = an; + NCDR(var_anode) = an; var_anode = an; } else { - NCONS(an).left = snode; - NCONS(anode).right = an; + NCAR(an) = snode; + NCDR(anode) = an; anode = an; } } @@ -3347,8 +3355,7 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], onig_node_free(snode); mem_err: - if (IS_NOT_NULL(*rnode)) - onig_node_free(*rnode); + onig_node_free(*rnode); return ONIGERR_MEMORY; } @@ -3362,7 +3369,7 @@ expand_case_fold_string(Node* node, regex_t* reg) UChar *start, *end, *p; Node *top_root, *root, *snode, *prev_node; OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; - StrNode* sn = &NSTRING(node); + StrNode* sn = NSTR(node); if (NSTRING_IS_AMBIG(node)) return 0; @@ -3377,7 +3384,10 @@ expand_case_fold_string(Node* node, regex_t* reg) while (p < end) { n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, p, end, items); - if (n < 0) goto err; + if (n < 0) { + r = n; + goto err; + } len = enc_len(reg->enc, p); @@ -3429,7 +3439,7 @@ expand_case_fold_string(Node* node, regex_t* reg) } } - root = NCONS(prev_node).left; + root = NCAR(prev_node); } else { /* r == 0 */ if (IS_NOT_NULL(root)) { @@ -3482,8 +3492,7 @@ expand_case_fold_string(Node* node, regex_t* reg) r = ONIGERR_MEMORY; err: - if (IS_NOT_NULL(top_root)) - onig_node_free(top_root); + onig_node_free(top_root); return r; } @@ -3505,31 +3514,31 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: { Node* prev = NULL_NODE; do { - r = setup_comb_exp_check(NCONS(node).left, r, env); - prev = NCONS(node).left; - } while (r >= 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = setup_comb_exp_check(NCAR(node), r, env); + prev = NCAR(node); + } while (r >= 0 && IS_NOT_NULL(node = NCDR(node))); } break; - case N_ALT: + case NT_ALT: { int ret; do { - ret = setup_comb_exp_check(NCONS(node).left, state, env); + ret = setup_comb_exp_check(NCAR(node), state, env); r |= ret; - } while (ret >= 0 && IS_NOT_NULL(node = NCONS(node).right)); + } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node))); } break; - case N_QUANTIFIER: + case NT_QTFR: { int child_state = state; int add_state = 0; - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); Node* target = qn->target; int var_num; @@ -3540,11 +3549,11 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ if (env->backrefed_mem == 0) { - if (NTYPE(qn->target) == N_EFFECT) { - EffectNode* en = &(NEFFECT(qn->target)); - if (en->type == EFFECT_MEMORY) { - if (NTYPE(en->target) == N_QUANTIFIER) { - QuantifierNode* q = &(NQUANTIFIER(en->target)); + if (NTYPE(qn->target) == NT_ENCLOSE) { + EncloseNode* en = NENCLOSE(qn->target); + if (en->type == ENCLOSE_MEMORY) { + if (NTYPE(en->target) == NT_QTFR) { + QtfrNode* q = NQTFR(en->target); if (IS_REPEAT_INFINITE(q->upper) && q->greedy == qn->greedy) { qn->upper = (qn->lower == 0 ? 1 : qn->lower); @@ -3590,12 +3599,12 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: { if (env->curr_max_regnum < en->regnum) env->curr_max_regnum = en->regnum; @@ -3612,11 +3621,11 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) break; #ifdef USE_SUBEXP_CALL - case N_CALL: - if (IS_CALL_RECURSION(&(NCALL(node)))) + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) env->has_recursion = 1; else - r = setup_comb_exp_check(NCALL(node).target, state, env); + r = setup_comb_exp_check(NCALL(node)->target, state, env); break; #endif @@ -3649,49 +3658,49 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: { Node* prev = NULL_NODE; do { - r = setup_tree(NCONS(node).left, reg, state, env); + r = setup_tree(NCAR(node), reg, state, env); if (IS_NOT_NULL(prev) && r == 0) { - r = next_setup(prev, NCONS(node).left, reg); + r = next_setup(prev, NCAR(node), reg); } - prev = NCONS(node).left; - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + prev = NCAR(node); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); } break; - case N_ALT: + case NT_ALT: do { - r = setup_tree(NCONS(node).left, reg, (state | IN_ALT), env); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = setup_tree(NCAR(node), reg, (state | IN_ALT), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_CCLASS: + case NT_CCLASS: break; - case N_STRING: + case NT_STR: if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { r = expand_case_fold_string(node, reg); } break; - case N_CTYPE: - case N_ANYCHAR: + case NT_CTYPE: + case NT_CANY: break; #ifdef USE_SUBEXP_CALL - case N_CALL: + case NT_CALL: break; #endif - case N_BACKREF: + case NT_BREF: { int i; int* p; Node** nodes = SCANENV_MEM_NODES(env); - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); p = BACKREFS_P(br); for (i = 0; i < br->back_num; i++) { if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; @@ -3702,15 +3711,15 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); } #endif - SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED); + SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED); } } break; - case N_QUANTIFIER: + case NT_QTFR: { OnigDistance d; - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); Node* target = qn->target; if ((state & IN_REPEAT) != 0) { @@ -3735,7 +3744,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* ()* ==> ()?, ()+ ==> () */ qn->upper = 1; if (qn->lower > 1) qn->lower = 1; - if (NTYPE(target) == N_STRING) { + if (NTYPE(target) == NT_STR) { qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */ } } @@ -3751,29 +3760,29 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* expand string */ #define EXPAND_STRING_MAX_LENGTH 100 - if (NTYPE(target) == N_STRING) { + if (NTYPE(target) == NT_STR) { if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { int len = NSTRING_LEN(target); - StrNode* sn = &(NSTRING(target)); + StrNode* sn = NSTR(target); if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { int i, n = qn->lower; - onig_node_conv_to_str_node(node, NSTRING(target).flag); + onig_node_conv_to_str_node(node, NSTR(target)->flag); for (i = 0; i < n; i++) { r = onig_node_str_cat(node, sn->s, sn->end); if (r) break; } onig_node_free(target); - break; /* break case N_QUANTIFIER: */ + break; /* break case NT_QTFR: */ } } } #ifdef USE_OP_PUSH_OR_JUMP_EXACT if (qn->greedy && (qn->target_empty_info != 0)) { - if (NTYPE(target) == N_QUANTIFIER) { - QuantifierNode* tqn = &(NQUANTIFIER(target)); + if (NTYPE(target) == NT_QTFR) { + QtfrNode* tqn = NQTFR(target); if (IS_NOT_NULL(tqn->head_exact)) { qn->head_exact = tqn->head_exact; tqn->head_exact = NULL; @@ -3787,39 +3796,39 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_OPTION: + case ENCLOSE_OPTION: { OnigOptionType options = reg->options; - reg->options = NEFFECT(node).option; - r = setup_tree(NEFFECT(node).target, reg, state, env); + reg->options = NENCLOSE(node)->option; + r = setup_tree(NENCLOSE(node)->target, reg, state, env); reg->options = options; } break; - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); - /* SET_EFFECT_STATUS(node, NST_MEM_IN_ALT_NOT); */ + /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ } r = setup_tree(en->target, reg, state, env); break; - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_STOP_BACKTRACK: { Node* target = en->target; r = setup_tree(target, reg, state, env); - if (NTYPE(target) == N_QUANTIFIER) { - QuantifierNode* tqn = &(NQUANTIFIER(target)); + if (NTYPE(target) == NT_QTFR) { + QtfrNode* tqn = NQTFR(target); if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && tqn->greedy != 0) { /* (?>a*), a*+ etc... */ int qtype = NTYPE(tqn->target); if (IS_NODE_TYPE_SIMPLE(qtype)) - SET_EFFECT_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); + SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); } } } @@ -3828,9 +3837,9 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) } break; - case N_ANCHOR: + case NT_ANCHOR: { - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); switch (an->type) { case ANCHOR_PREC_READ: @@ -3842,11 +3851,11 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* allowed node types in look-behind */ #define ALLOWED_TYPE_IN_LB \ - ( N_LIST | N_ALT | N_STRING | N_CCLASS | N_CTYPE | \ - N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUANTIFIER | N_CALL ) + ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \ + BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL ) -#define ALLOWED_EFFECT_IN_LB ( EFFECT_MEMORY ) -#define ALLOWED_EFFECT_IN_LB_NOT 0 +#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY ) +#define ALLOWED_ENCLOSE_IN_LB_NOT 0 #define ALLOWED_ANCHOR_IN_LB \ ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) @@ -3856,7 +3865,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) case ANCHOR_LOOK_BEHIND: { r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, - ALLOWED_EFFECT_IN_LB, ALLOWED_ANCHOR_IN_LB); + ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB); if (r < 0) return r; if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; r = setup_look_behind(node, reg, env); @@ -3868,7 +3877,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) case ANCHOR_LOOK_BEHIND_NOT: { r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, - ALLOWED_EFFECT_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); + ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); if (r < 0) return r; if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; r = setup_look_behind(node, reg, env); @@ -4535,7 +4544,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: { OptEnv nenv; NodeOptInfo nopt; @@ -4543,33 +4552,33 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) copy_opt_env(&nenv, env); do { - r = optimize_node_left(NCONS(nd).left, &nopt, &nenv); + r = optimize_node_left(NCAR(nd), &nopt, &nenv); if (r == 0) { add_mml(&nenv.mmd, &nopt.len); concat_left_node_opt_info(env->enc, opt, &nopt); } - } while (r == 0 && IS_NOT_NULL(nd = NCONS(nd).right)); + } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd))); } break; - case N_ALT: + case NT_ALT: { NodeOptInfo nopt; Node* nd = node; do { - r = optimize_node_left(NCONS(nd).left, &nopt, env); + r = optimize_node_left(NCAR(nd), &nopt, env); if (r == 0) { if (nd == node) copy_node_opt_info(opt, &nopt); else alt_merge_node_opt_info(opt, &nopt, env); } - } while ((r == 0) && IS_NOT_NULL(nd = NCONS(nd).right)); + } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd))); } break; - case N_STRING: + case NT_STR: { - StrNode* sn = &(NSTRING(node)); + StrNode* sn = NSTR(node); int slen = sn->end - sn->s; int is_raw = NSTRING_IS_RAW(node); @@ -4610,14 +4619,14 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case N_CCLASS: + case NT_CCLASS: { int i, z; - CClassNode* cc = &(NCCLASS(node)); + CClassNode* cc = NCCLASS(node); /* no need to check ignore case. (setted in setup_tree()) */ - if (IS_NOT_NULL(cc->mbuf) || IS_CCLASS_NOT(cc)) { + if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); @@ -4626,7 +4635,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) else { for (i = 0; i < SINGLE_BYTE_SIZE; i++) { z = BITSET_AT(cc->bs, i); - if ((z && !IS_CCLASS_NOT(cc)) || (!z && IS_CCLASS_NOT(cc))) { + if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { add_char_opt_map_info(&opt->map, (UChar )i, env->enc); } } @@ -4635,7 +4644,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case N_CTYPE: + case NT_CTYPE: { int i, min, max; @@ -4644,9 +4653,9 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) if (max == 1) { min = 1; - switch (NCTYPE(node).ctype) { + switch (NCTYPE(node)->ctype) { case ONIGENC_CTYPE_WORD: - if (NCTYPE(node).not != 0) { + if (NCTYPE(node)->not != 0) { for (i = 0; i < SINGLE_BYTE_SIZE; i++) { if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { add_char_opt_map_info(&opt->map, (UChar )i, env->enc); @@ -4670,7 +4679,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case N_ANYCHAR: + case NT_CANY: { OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); @@ -4678,22 +4687,22 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case N_ANCHOR: - switch (NANCHOR(node).type) { + case NT_ANCHOR: + switch (NANCHOR(node)->type) { case ANCHOR_BEGIN_BUF: case ANCHOR_BEGIN_POSITION: case ANCHOR_BEGIN_LINE: case ANCHOR_END_BUF: case ANCHOR_SEMI_END_BUF: case ANCHOR_END_LINE: - add_opt_anc_info(&opt->anc, NANCHOR(node).type); + add_opt_anc_info(&opt->anc, NANCHOR(node)->type); break; case ANCHOR_PREC_READ: { NodeOptInfo nopt; - r = optimize_node_left(NANCHOR(node).target, &nopt, env); + r = optimize_node_left(NANCHOR(node)->target, &nopt, env); if (r == 0) { if (nopt.exb.len > 0) copy_opt_exact_info(&opt->expr, &nopt.exb); @@ -4715,13 +4724,13 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case N_BACKREF: + case NT_BREF: { int i; int* backs; OnigDistance min, max, tmin, tmax; Node** nodes = SCANENV_MEM_NODES(env->scan_env); - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); if (br->state & NST_RECURSION) { set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); @@ -4745,31 +4754,31 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) break; #ifdef USE_SUBEXP_CALL - case N_CALL: - if (IS_CALL_RECURSION(&(NCALL(node)))) + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); else { OnigOptionType save = env->options; - env->options = NEFFECT(NCALL(node).target).option; - r = optimize_node_left(NCALL(node).target, opt, env); + env->options = NENCLOSE(NCALL(node)->target)->option; + r = optimize_node_left(NCALL(node)->target, opt, env); env->options = save; } break; #endif - case N_QUANTIFIER: + case NT_QTFR: { int i; OnigDistance min, max; NodeOptInfo nopt; - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); r = optimize_node_left(qn->target, &nopt, env); if (r) break; if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { if (env->mmd.max == 0 && - NTYPE(qn->target) == N_ANYCHAR && qn->greedy) { + NTYPE(qn->target) == NT_CANY && qn->greedy) { if (IS_MULTILINE(env->options)) add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); else @@ -4810,12 +4819,12 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_OPTION: + case ENCLOSE_OPTION: { OnigOptionType save = env->options; @@ -4825,7 +4834,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL en->opt_count++; if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { @@ -4833,8 +4842,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) min = 0; max = ONIG_INFINITE_DISTANCE; - if (IS_EFFECT_MIN_FIXED(en)) min = en->min_len; - if (IS_EFFECT_MAX_FIXED(en)) max = en->max_len; + if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len; + if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len; set_mml(&opt->len, min, max); } else @@ -4849,7 +4858,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_STOP_BACKTRACK: r = optimize_node_left(en->target, opt, env); break; } @@ -4877,7 +4886,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) if (e->ignore_case) { reg->exact = (UChar* )xmalloc(e->len); - CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(reg->exact); xmemcpy(reg->exact, e->s, e->len); reg->exact_end = reg->exact + e->len; reg->optimize = ONIG_OPTIMIZE_EXACT_IC; @@ -4886,7 +4895,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) int allow_reverse; reg->exact = str_dup(e->s, e->s + e->len); - CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(reg->exact); reg->exact_end = reg->exact + e->len; allow_reverse = @@ -5504,7 +5513,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, } } - if (IS_NOT_NULL(root)) onig_node_free(root); + onig_node_free(root); if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) xfree(scan_env.mem_nodes_dynamic); return r; @@ -5605,13 +5614,13 @@ onig_init(void) if (onig_inited != 0) return 0; - onig_inited = 1; - THREAD_SYSTEM_INIT; THREAD_ATOMIC_START; + onig_inited = 1; + onigenc_init(); - onigenc_set_default_caseconv_table((UChar* )0); + /* onigenc_set_default_caseconv_table((UChar* )0); */ #ifdef ONIG_DEBUG_STATISTICS onig_statistics_init(); @@ -5637,7 +5646,7 @@ onig_end(void) onig_free_shared_cclass_table(); #endif -#ifdef USE_RECYCLE_NODE +#ifdef USE_PARSE_TREE_NODE_RECYCLE onig_free_node_list(); #endif @@ -5648,6 +5657,64 @@ onig_end(void) return 0; } +extern int +onig_is_in_code_range(const UChar* p, OnigCodePoint code) +{ + OnigCodePoint n, *data; + OnigCodePoint low, high, x; + + GET_CODE_POINT(n, p); + data = (OnigCodePoint* )p; + data++; + + for (low = 0, high = n; low < high; ) { + x = (low + high) >> 1; + if (code > data[x * 2 + 1]) + low = x + 1; + else + high = x; + } + + return ((low < n && code >= data[low * 2]) ? 1 : 0); +} + +extern int +onig_is_code_in_cc_len(int enclen, OnigCodePoint code, CClassNode* cc) +{ + int found; + + if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) { + if (IS_NULL(cc->mbuf)) { + found = 0; + } + else { + found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); + } + } + else { + found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); + } + + if (IS_NCCLASS_NOT(cc)) + return !found; + else + return found; +} + +extern int +onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) +{ + int len; + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + len = 2; + } + else { + len = ONIGENC_CODE_TO_MBCLEN(enc, code); + } + return onig_is_code_in_cc_len(len, code, cc); +} + #ifdef ONIG_DEBUG @@ -6084,27 +6151,27 @@ print_indent_tree(FILE* f, Node* node, int indent) type = NTYPE(node); switch (type) { - case N_LIST: - case N_ALT: - if (NTYPE(node) == N_LIST) + case NT_LIST: + case NT_ALT: + if (NTYPE(node) == NT_LIST) fprintf(f, "\n", (int )node); else fprintf(f, "\n", (int )node); - print_indent_tree(f, NCONS(node).left, indent + add); - while (IS_NOT_NULL(node = NCONS(node).right)) { + print_indent_tree(f, NCAR(node), indent + add); + while (IS_NOT_NULL(node = NCDR(node))) { if (NTYPE(node) != type) { fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); exit(0); } - print_indent_tree(f, NCONS(node).left, indent + add); + print_indent_tree(f, NCAR(node), indent + add); } break; - case N_STRING: + case NT_STR: fprintf(f, "", (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node); - for (p = NSTRING(node).s; p < NSTRING(node).end; p++) { + for (p = NSTR(node)->s; p < NSTR(node)->end; p++) { if (*p >= 0x20 && *p < 0x7f) fputc(*p, f); else { @@ -6113,11 +6180,11 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; - case N_CCLASS: + case NT_CCLASS: fprintf(f, "", (int )node); - if (IS_CCLASS_NOT(&NCCLASS(node))) fputs(" not", f); - if (NCCLASS(node).mbuf) { - BBuf* bbuf = NCCLASS(node).mbuf; + if (IS_CCLASS_NOT(NCCLASS(node)) fputs(" not", f); + if (NCCLASS(node)->mbuf) { + BBuf* bbuf = NCCLASS(node)->mbuf; for (i = 0; i < bbuf->used; i++) { if (i > 0) fprintf(f, ","); fprintf(f, "%0x", bbuf->p[i]); @@ -6125,11 +6192,11 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; - case N_CTYPE: + case NT_CTYPE: fprintf(f, " ", (int )node); - switch (NCTYPE(node).ctype) { + switch (NCTYPE(node)->ctype) { case ONIGENC_CTYPE_WORD: - if (NCTYPE(node).not != 0) + if (NCTYPE(node)->not != 0) fputs("not word", f); else fputs("word", f); @@ -6141,13 +6208,13 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; - case N_ANYCHAR: + case NT_CANY: fprintf(f, "", (int )node); break; - case N_ANCHOR: + case NT_ANCHOR: fprintf(f, " ", (int )node); - switch (NANCHOR(node).type) { + switch (NANCHOR(node)->type) { case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break; case ANCHOR_END_BUF: fputs("end buf", f); break; case ANCHOR_BEGIN_LINE: fputs("begin line", f); break; @@ -6172,10 +6239,10 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; - case N_BACKREF: + case NT_BREF: { int* p; - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); p = BACKREFS_P(br); fprintf(f, "", (int )node); for (i = 0; i < br->back_num; i++) { @@ -6186,33 +6253,33 @@ print_indent_tree(FILE* f, Node* node, int indent) break; #ifdef USE_SUBEXP_CALL - case N_CALL: + case NT_CALL: { - CallNode* cn = &(NCALL(node)); + CallNode* cn = NCALL(node); fprintf(f, "", (int )node); p_string(f, cn->name_end - cn->name, cn->name); } break; #endif - case N_QUANTIFIER: + case NT_QTFR: fprintf(f, "{%d,%d}%s\n", (int )node, - NQUANTIFIER(node).lower, NQUANTIFIER(node).upper, - (NQUANTIFIER(node).greedy ? "" : "?")); - print_indent_tree(f, NQUANTIFIER(node).target, indent + add); + NQTFR(node)->lower, NQTFR(node)->upper, + (NQTFR(node)->greedy ? "" : "?")); + print_indent_tree(f, NQTFR(node)->target, indent + add); break; - case N_EFFECT: - fprintf(f, " ", (int )node); - switch (NEFFECT(node).type) { - case EFFECT_OPTION: - fprintf(f, "option:%d\n", NEFFECT(node).option); - print_indent_tree(f, NEFFECT(node).target, indent + add); + case NT_ENCLOSE: + fprintf(f, " ", (int )node); + switch (NENCLOSE(node)->type) { + case ENCLOSE_OPTION: + fprintf(f, "option:%d\n", NENCLOSE(node)->option); + print_indent_tree(f, NENCLOSE(node)->target, indent + add); break; - case EFFECT_MEMORY: - fprintf(f, "memory:%d", NEFFECT(node).regnum); + case ENCLOSE_MEMORY: + fprintf(f, "memory:%d", NENCLOSE(node)->regnum); break; - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_STOP_BACKTRACK: fprintf(f, "stop-bt"); break; @@ -6220,7 +6287,7 @@ print_indent_tree(FILE* f, Node* node, int indent) break; } fprintf(f, "\n"); - print_indent_tree(f, NEFFECT(node).target, indent + add); + print_indent_tree(f, NENCLOSE(node)->target, indent + add); break; default: @@ -6228,8 +6295,8 @@ print_indent_tree(FILE* f, Node* node, int indent) break; } - if (type != N_LIST && type != N_ALT && type != N_QUANTIFIER && - type != N_EFFECT) + if (type != NT_LIST && type != NT_ALT && type != NT_QTFR && + type != NT_ENCLOSE) fprintf(f, "\n"); fflush(f); } diff --git a/regenc.c b/regenc.c index 9f50cb63db09c0..50fef7c070c963 100644 --- a/regenc.c +++ b/regenc.c @@ -169,10 +169,7 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) } } -const UChar* OnigEncAsciiToLowerCaseTable = (const UChar* )0; - -#ifndef USE_EXTERNAL_LOWER_CASE_CONV_TABLE -static const UChar BuiltInAsciiToLowerCaseTable[] = { +const UChar OnigEncAsciiToLowerCaseTable[] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -206,7 +203,6 @@ static const UChar BuiltInAsciiToLowerCaseTable[] = { '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', }; -#endif /* not USE_EXTERNAL_LOWER_CASE_CONV_TABLE */ #ifdef USE_UPPER_CASE_TABLE const UChar OnigEncAsciiToUpperCaseTable[256] = { @@ -355,17 +351,8 @@ const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { extern void onigenc_set_default_caseconv_table(const UChar* table) { - if (table == (const UChar* )0) { -#ifndef USE_EXTERNAL_LOWER_CASE_CONV_TABLE - table = BuiltInAsciiToLowerCaseTable; -#else - return ; -#endif - } - - if (table != OnigEncAsciiToLowerCaseTable) { - OnigEncAsciiToLowerCaseTable = table; - } + /* nothing */ + /* obsoleted. */ } extern UChar* @@ -732,7 +719,7 @@ onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) #if 1 if (enc_len(enc, buf) != (p - buf)) - return ONIGENCERR_INVALID_WIDE_CHAR_VALUE; + return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE; #endif return p - buf; } @@ -755,7 +742,7 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) #if 1 if (enc_len(enc, buf) != (p - buf)) - return ONIGENCERR_INVALID_WIDE_CHAR_VALUE; + return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE; #endif return p - buf; } diff --git a/regenc.h b/regenc.h index 7aeb19a668a84c..bd9fab3be19213 100644 --- a/regenc.h +++ b/regenc.h @@ -4,7 +4,7 @@ regenc.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -56,10 +56,10 @@ typedef struct { #endif /* error codes */ -#define ONIGENCERR_MEMORY -5 -#define ONIGENCERR_TYPE_BUG -6 -#define ONIGENCERR_INVALID_WIDE_CHAR_VALUE -400 -#define ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE -401 +#define ONIGENC_ERR_MEMORY -5 +#define ONIGENC_ERR_TYPE_BUG -6 +#define ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE -400 +#define ONIGENC_ERR_TOO_BIG_WIDE_CHAR_VALUE -401 #define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0) #define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0) @@ -96,10 +96,9 @@ typedef struct { } PosixBracketEntryType; +/* #define USE_CRNL_AS_LINE_TERMINATOR */ #define USE_UNICODE_PROPERTIES /* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */ - -/* following must not use with USE_CRNL_AS_LINE_TERMINATOR */ /* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */ @@ -163,7 +162,7 @@ onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n)); extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code)); ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; -ONIG_EXTERN const UChar* OnigEncAsciiToLowerCaseTable; +ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[]; ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[]; ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]; diff --git a/regerror.c b/regerror.c index d23b2b030d77d8..b0cc71d18db440 100644 --- a/regerror.c +++ b/regerror.c @@ -2,7 +2,7 @@ regerror.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -317,7 +317,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) va_list args; va_init_list(args, fmt); - n = vsnprintf((char* )buf, bufsize, (const char* )fmt, args); + n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args); va_end(args); need = (pat_end - pat) * 4 + 4; diff --git a/regexec.c b/regexec.c index 2a568fb1da4307..ea99dac8a91680 100644 --- a/regexec.c +++ b/regexec.c @@ -111,7 +111,7 @@ history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) (OnigCaptureTreeNode** )xrealloc(parent->childs, sizeof(OnigCaptureTreeNode*) * n); } - CHECK_NULL_RETURN_VAL(parent->childs, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(parent->childs); for (i = parent->allocated; i < n; i++) { parent->childs[i] = (OnigCaptureTreeNode* )0; } @@ -331,14 +331,14 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) (msa).region = (arg_region);\ (msa).start = (arg_start);\ (msa).best_len = ONIG_MISMATCH;\ -} while (0) +} while(0) #else #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ (msa).stack_p = (void* )0;\ (msa).options = (arg_option);\ (msa).region = (arg_region);\ (msa).start = (arg_start);\ -} while (0) +} while(0) #endif #ifdef USE_COMBINATION_EXPLOSION_CHECK @@ -367,14 +367,14 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) (msa).state_check_buff = (void* )0;\ (msa).state_check_buff_size = 0;\ }\ -} while (0) + } while(0) #define MATCH_ARG_FREE(msa) do {\ if ((msa).stack_p) xfree((msa).stack_p);\ if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ if ((msa).state_check_buff) xfree((msa).state_check_buff);\ }\ -} while (0); +} while(0) #else #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) @@ -623,7 +623,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, level--;\ }\ }\ -} while (0) +} while(0) #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\ int level = 0;\ @@ -641,7 +641,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, }\ k++;\ }\ -} while (0) +} while(0) #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ STACK_ENSURE(1);\ @@ -939,7 +939,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, else if (k->type == STK_CALL_FRAME) level--;\ else if (k->type == STK_RETURN) level++;\ }\ -} while (0) +} while(0) #define STACK_RETURN(addr) do {\ int level = 0;\ @@ -1045,7 +1045,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && BIT_STATUS_AT(reg->capture_history, n) != 0) { child = history_node_new(); - CHECK_NULL_RETURN_VAL(child, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(child); child->group = n; child->beg = (int )(k->u.mem.pstr - str); r = history_tree_add_child(node, child); @@ -1174,17 +1174,17 @@ static int OpCurr = OP_FINISH; static int OpPrevTarget = OP_FAIL; static int MaxStackDepth = 0; -#define STAT_OP_IN(opcode) do {\ +#define MOP_IN(opcode) do {\ if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ OpCurr = opcode;\ OpCounter[opcode]++;\ GETTIME(ts);\ -} while (0) +} while(0) -#define STAT_OP_OUT do {\ +#define MOP_OUT do {\ GETTIME(te);\ OpTime[OpCurr] += TIMEDIFF(te, ts);\ -} while (0) +} while(0) extern void onig_statistics_init(void) @@ -1212,73 +1212,15 @@ onig_print_statistics(FILE* f) stk++;\ if (stk - stk_base > MaxStackDepth) \ MaxStackDepth = stk - stk_base;\ -} while (0) +} while(0) #else #define STACK_INC stk++ -#define STAT_OP_IN(opcode) -#define STAT_OP_OUT +#define MOP_IN(opcode) +#define MOP_OUT #endif -extern int -onig_is_in_code_range(const UChar* p, OnigCodePoint code) -{ - OnigCodePoint n, *data; - OnigCodePoint low, high, x; - - GET_CODE_POINT(n, p); - data = (OnigCodePoint* )p; - data++; - - for (low = 0, high = n; low < high; ) { - x = (low + high) >> 1; - if (code > data[x * 2 + 1]) - low = x + 1; - else - high = x; - } - - return ((low < n && code >= data[low * 2]) ? 1 : 0); -} - -static int -is_code_in_cc(int enclen, OnigCodePoint code, CClassNode* cc) -{ - int found; - - if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) { - if (IS_NULL(cc->mbuf)) { - found = 0; - } - else { - found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); - } - } - else { - found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); - } - - if (IS_CCLASS_NOT(cc)) - return !found; - else - return found; -} - -extern int -onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) -{ - int len; - - if (ONIGENC_MBC_MINLEN(enc) > 1) { - len = 2; - } - else { - len = ONIGENC_CODE_TO_MBCLEN(enc, code); - } - return is_code_in_cc(len, code, cc); -} - /* matching region of POSIX API */ typedef int regoff_t; @@ -1369,7 +1311,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, sbegin = s; switch (*p++) { - case OP_END: STAT_OP_IN(OP_END); + case OP_END: MOP_IN(OP_END); n = s - sstart; if (n > best_len) { OnigRegion* region; @@ -1435,7 +1377,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (IS_NULL(region->history_root)) { region->history_root = node = history_node_new(); - CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(node); } else { node = region->history_root; @@ -1464,7 +1406,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE end_best_len: #endif - STAT_OP_OUT; + MOP_OUT; if (IS_FIND_CONDITION(option)) { if (IS_FIND_NOT_EMPTY(option) && s == sstart) { @@ -1480,7 +1422,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto finish; break; - case OP_EXACT1: STAT_OP_IN(OP_EXACT1); + case OP_EXACT1: MOP_IN(OP_EXACT1); #if 0 DATA_ENSURE(1); if (*p != *s) goto fail; @@ -1489,18 +1431,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s++) goto fail; DATA_ENSURE(0); p++; - STAT_OP_OUT; + MOP_OUT; break; - case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC); + case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC); { int len; - UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; DATA_ENSURE(1); - ss = s; - sp = p; - len = ONIGENC_MBC_CASE_FOLD(encode, /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ case_fold_flag, @@ -1514,21 +1453,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; q++; } } - STAT_OP_OUT; + MOP_OUT; break; - case OP_EXACT2: STAT_OP_IN(OP_EXACT2); + case OP_EXACT2: MOP_IN(OP_EXACT2); DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; sprev = s; p++; s++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACT3: STAT_OP_IN(OP_EXACT3); + case OP_EXACT3: MOP_IN(OP_EXACT3); DATA_ENSURE(3); if (*p != *s) goto fail; p++; s++; @@ -1537,11 +1476,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACT4: STAT_OP_IN(OP_EXACT4); + case OP_EXACT4: MOP_IN(OP_EXACT4); DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; @@ -1552,11 +1491,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACT5: STAT_OP_IN(OP_EXACT5); + case OP_EXACT5: MOP_IN(OP_EXACT5); DATA_ENSURE(5); if (*p != *s) goto fail; p++; s++; @@ -1569,25 +1508,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTN: STAT_OP_IN(OP_EXACTN); + case OP_EXACTN: MOP_IN(OP_EXACTN); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen); while (tlen-- > 0) { if (*p++ != *s++) goto fail; } sprev = s - 1; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC); + case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC); { int len; - UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; GET_LENGTH_INC(tlen, p); endp = p + tlen; @@ -1595,9 +1534,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, while (p < endp) { sprev = s; DATA_ENSURE(1); - ss = s; - sp = p; - len = ONIGENC_MBC_CASE_FOLD(encode, /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ case_fold_flag, @@ -1611,20 +1547,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTMB2N1: STAT_OP_IN(OP_EXACTMB2N1); + case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1); DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; - STAT_OP_OUT; + MOP_OUT; break; - case OP_EXACTMB2N2: STAT_OP_IN(OP_EXACTMB2N2); + case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2); DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; @@ -1635,11 +1571,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; if (*p != *s) goto fail; p++; s++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTMB2N3: STAT_OP_IN(OP_EXACTMB2N3); + case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3); DATA_ENSURE(6); if (*p != *s) goto fail; p++; s++; @@ -1654,11 +1590,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; if (*p != *s) goto fail; p++; s++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTMB2N: STAT_OP_IN(OP_EXACTMB2N); + case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 2); while (tlen-- > 0) { @@ -1668,11 +1604,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; } sprev = s - 2; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTMB3N: STAT_OP_IN(OP_EXACTMB3N); + case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 3); while (tlen-- > 0) { @@ -1684,11 +1620,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; } sprev = s - 3; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTMBN: STAT_OP_IN(OP_EXACTMBN); + case OP_EXACTMBN: MOP_IN(OP_EXACTMBN); GET_LENGTH_INC(tlen, p); /* mb-len */ GET_LENGTH_INC(tlen2, p); /* string len */ tlen2 *= tlen; @@ -1698,19 +1634,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; } sprev = s - tlen; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_CCLASS: STAT_OP_IN(OP_CCLASS); + case OP_CCLASS: MOP_IN(OP_CCLASS); DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; p += SIZE_BITSET; s += enc_len(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ - STAT_OP_OUT; + MOP_OUT; break; - case OP_CCLASS_MB: STAT_OP_IN(OP_CCLASS_MB); + case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB); if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; cclass_mb: @@ -1736,10 +1672,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif } p += tlen; - STAT_OP_OUT; + MOP_OUT; break; - case OP_CCLASS_MIX: STAT_OP_IN(OP_CCLASS_MIX); + case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; @@ -1754,18 +1690,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += tlen; s++; } - STAT_OP_OUT; + MOP_OUT; break; - case OP_CCLASS_NOT: STAT_OP_IN(OP_CCLASS_NOT); + case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT); DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; p += SIZE_BITSET; s += enc_len(encode, s); - STAT_OP_OUT; + MOP_OUT; break; - case OP_CCLASS_MB_NOT: STAT_OP_IN(OP_CCLASS_MB_NOT); + case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_HEAD(encode, s)) { s++; @@ -1803,10 +1739,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += tlen; cc_mb_not_success: - STAT_OP_OUT; + MOP_OUT; break; - case OP_CCLASS_MIX_NOT: STAT_OP_IN(OP_CCLASS_MIX_NOT); + case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; @@ -1821,10 +1757,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += tlen; s++; } - STAT_OP_OUT; + MOP_OUT; break; - case OP_CCLASS_NODE: STAT_OP_IN(OP_CCLASS_NODE); + case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE); { OnigCodePoint code; void *node; @@ -1838,29 +1774,29 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s += mb_len; DATA_ENSURE(0); code = ONIGENC_MBC_TO_CODE(encode, ss, s); - if (is_code_in_cc(mb_len, code, node) == 0) goto fail; + if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; } - STAT_OP_OUT; + MOP_OUT; break; - case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR); + case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); DATA_ENSURE(1); n = enc_len(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; s += n; - STAT_OP_OUT; + MOP_OUT; break; - case OP_ANYCHAR_ML: STAT_OP_IN(OP_ANYCHAR_ML); + case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); DATA_ENSURE(1); n = enc_len(encode, s); DATA_ENSURE(n); s += n; - STAT_OP_OUT; + MOP_OUT; break; - case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR); + case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); n = enc_len(encode, s); @@ -1869,10 +1805,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, sprev = s; s += n; } - STAT_OP_OUT; + MOP_OUT; break; - case OP_ANYCHAR_ML_STAR: STAT_OP_IN(OP_ANYCHAR_ML_STAR); + case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); n = enc_len(encode, s); @@ -1886,10 +1822,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s++; } } - STAT_OP_OUT; + MOP_OUT; break; - case OP_ANYCHAR_STAR_PEEK_NEXT: STAT_OP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); + case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); @@ -1901,16 +1837,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s += n; } p++; - STAT_OP_OUT; + MOP_OUT; break; - case OP_ANYCHAR_ML_STAR_PEEK_NEXT:STAT_OP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); + case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); } n = enc_len(encode, s); - if (n >1) { + if (n > 1) { DATA_ENSURE(n); sprev = s; s += n; @@ -1921,11 +1857,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } p++; - STAT_OP_OUT; + MOP_OUT; break; #ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_ANYCHAR_STAR: STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR); + case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); GET_STATE_CHECK_NUM_INC(mem, p); while (DATA_ENSURE_CHECK1) { STATE_CHECK_VAL(scv, mem); @@ -1938,11 +1874,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, sprev = s; s += n; } - STAT_OP_OUT; + MOP_OUT; break; case OP_STATE_CHECK_ANYCHAR_ML_STAR: - STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); + MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); GET_STATE_CHECK_NUM_INC(mem, p); while (DATA_ENSURE_CHECK1) { @@ -1961,29 +1897,29 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s++; } } - STAT_OP_OUT; + MOP_OUT; break; #endif /* USE_COMBINATION_EXPLOSION_CHECK */ - case OP_WORD: STAT_OP_IN(OP_WORD); + case OP_WORD: MOP_IN(OP_WORD); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; s += enc_len(encode, s); - STAT_OP_OUT; + MOP_OUT; break; - case OP_NOT_WORD: STAT_OP_IN(OP_NOT_WORD); + case OP_NOT_WORD: MOP_IN(OP_NOT_WORD); DATA_ENSURE(1); if (ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; s += enc_len(encode, s); - STAT_OP_OUT; + MOP_OUT; break; - case OP_WORD_BOUND: STAT_OP_IN(OP_WORD_BOUND); + case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND); if (ON_STR_BEGIN(s)) { DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD(encode, s, end)) @@ -1998,11 +1934,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, == ONIGENC_IS_MBC_WORD(encode, sprev, end)) goto fail; } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_NOT_WORD_BOUND: STAT_OP_IN(OP_NOT_WORD_BOUND); + case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND); if (ON_STR_BEGIN(s)) { if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; @@ -2016,25 +1952,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, != ONIGENC_IS_MBC_WORD(encode, sprev, end)) goto fail; } - STAT_OP_OUT; + MOP_OUT; continue; break; #ifdef USE_WORD_BEGIN_END - case OP_WORD_BEGIN: STAT_OP_IN(OP_WORD_BEGIN); + case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN); if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { - STAT_OP_OUT; + MOP_OUT; continue; } } goto fail; break; - case OP_WORD_END: STAT_OP_IN(OP_WORD_END); + case OP_WORD_END: MOP_IN(OP_WORD_END); if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { - STAT_OP_OUT; + MOP_OUT; continue; } } @@ -2042,65 +1978,65 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; #endif - case OP_BEGIN_BUF: STAT_OP_IN(OP_BEGIN_BUF); + case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF); if (! ON_STR_BEGIN(s)) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_END_BUF: STAT_OP_IN(OP_END_BUF); + case OP_END_BUF: MOP_IN(OP_END_BUF); if (! ON_STR_END(s)) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_BEGIN_LINE: STAT_OP_IN(OP_BEGIN_LINE); + case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE); if (ON_STR_BEGIN(s)) { if (IS_NOTBOL(msa->options)) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; } else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { - STAT_OP_OUT; + MOP_OUT; continue; } goto fail; break; - case OP_END_LINE: STAT_OP_IN(OP_END_LINE); + case OP_END_LINE: MOP_IN(OP_END_LINE); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (IS_NOTEOL(msa->options)) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE } #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { - STAT_OP_OUT; + MOP_OUT; continue; } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - STAT_OP_OUT; + MOP_OUT; continue; } #endif goto fail; break; - case OP_SEMI_END_BUF: STAT_OP_IN(OP_SEMI_END_BUF); + case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif - if (IS_NOTEOL(msa->options)) goto fail; /* Is it needed? */ - STAT_OP_OUT; + if (IS_NOTEOL(msa->options)) goto fail; + MOP_OUT; continue; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE } @@ -2108,14 +2044,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && ON_STR_END(s + enc_len(encode, s))) { - STAT_OP_OUT; + MOP_OUT; continue; } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { UChar* ss = s + enc_len(encode, s); - if (ON_STR_END(ss + enc_len(encode, ss))) { - STAT_OP_OUT; + ss += enc_len(encode, ss); + if (ON_STR_END(ss)) { + MOP_OUT; continue; } } @@ -2123,53 +2060,53 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; break; - case OP_BEGIN_POSITION: STAT_OP_IN(OP_BEGIN_POSITION); + case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION); if (s != msa->start) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_MEMORY_START_PUSH: STAT_OP_IN(OP_MEMORY_START_PUSH); + case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH); GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_START(mem, s); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_MEMORY_START: STAT_OP_IN(OP_MEMORY_START); + case OP_MEMORY_START: MOP_IN(OP_MEMORY_START); GET_MEMNUM_INC(mem, p); mem_start_stk[mem] = (OnigStackIndex )((void* )s); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_MEMORY_END_PUSH: STAT_OP_IN(OP_MEMORY_END_PUSH); + case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH); GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_END(mem, s); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_MEMORY_END: STAT_OP_IN(OP_MEMORY_END); + case OP_MEMORY_END: MOP_IN(OP_MEMORY_END); GET_MEMNUM_INC(mem, p); mem_end_stk[mem] = (OnigStackIndex )((void* )s); - STAT_OP_OUT; + MOP_OUT; continue; break; #ifdef USE_SUBEXP_CALL - case OP_MEMORY_END_PUSH_REC: STAT_OP_IN(OP_MEMORY_END_PUSH_REC); + case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC); GET_MEMNUM_INC(mem, p); STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ STACK_PUSH_MEM_END(mem, s); mem_start_stk[mem] = GET_STACK_INDEX(stkp); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_MEMORY_END_REC: STAT_OP_IN(OP_MEMORY_END_REC); + case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC); GET_MEMNUM_INC(mem, p); mem_end_stk[mem] = (OnigStackIndex )((void* )s); STACK_GET_MEM_START(mem, stkp); @@ -2180,22 +2117,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); STACK_PUSH_MEM_END_MARK(mem); - STAT_OP_OUT; + MOP_OUT; continue; break; #endif - case OP_BACKREF1: STAT_OP_IN(OP_BACKREF1); + case OP_BACKREF1: MOP_IN(OP_BACKREF1); mem = 1; goto backref; break; - case OP_BACKREF2: STAT_OP_IN(OP_BACKREF2); + case OP_BACKREF2: MOP_IN(OP_BACKREF2); mem = 2; goto backref; break; - case OP_BACKREFN: STAT_OP_IN(OP_BACKREFN); + case OP_BACKREFN: MOP_IN(OP_BACKREFN); GET_MEMNUM_INC(mem, p); backref: { @@ -2223,12 +2160,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, while (sprev + (len = enc_len(encode, sprev)) < s) sprev += len; - STAT_OP_OUT; + MOP_OUT; continue; } break; - case OP_BACKREFN_IC: STAT_OP_IN(OP_BACKREFN_IC); + case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC); GET_MEMNUM_INC(mem, p); { int len; @@ -2255,12 +2192,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, while (sprev + (len = enc_len(encode, sprev)) < s) sprev += len; - STAT_OP_OUT; + MOP_OUT; continue; } break; - case OP_BACKREF_MULTI: STAT_OP_IN(OP_BACKREF_MULTI); + case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI); { int len, is_fail; UChar *pstart, *pend, *swork; @@ -2294,12 +2231,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; /* success */ } if (i == tlen) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; } break; - case OP_BACKREF_MULTI_IC: STAT_OP_IN(OP_BACKREF_MULTI_IC); + case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC); { int len, is_fail; UChar *pstart, *pend, *swork; @@ -2333,7 +2270,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; /* success */ } if (i == tlen) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; } break; @@ -2360,35 +2297,37 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else goto fail; - STAT_OP_OUT; + MOP_OUT; continue; } break; #endif - - case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH); + +#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ + case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH); GET_OPTION_INC(option, p); STACK_PUSH_ALT(p, s, sprev); p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_SET_OPTION: STAT_OP_IN(OP_SET_OPTION); + case OP_SET_OPTION: MOP_IN(OP_SET_OPTION); GET_OPTION_INC(option, p); - STAT_OP_OUT; + MOP_OUT; continue; break; +#endif - case OP_NULL_CHECK_START: STAT_OP_IN(OP_NULL_CHECK_START); + case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START); GET_MEMNUM_INC(mem, p); /* mem: null check id */ STACK_PUSH_NULL_CHECK_START(mem, s); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_NULL_CHECK_END: STAT_OP_IN(OP_NULL_CHECK_END); + case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END); { int isnull; @@ -2418,12 +2357,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } } - STAT_OP_OUT; + MOP_OUT; continue; break; #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK - case OP_NULL_CHECK_END_MEMST: STAT_OP_IN(OP_NULL_CHECK_END_MEMST); + case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST); { int isnull; @@ -2438,14 +2377,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto null_check_found; } } - STAT_OP_OUT; + MOP_OUT; continue; break; #endif #ifdef USE_SUBEXP_CALL case OP_NULL_CHECK_END_MEMST_PUSH: - STAT_OP_IN(OP_NULL_CHECK_END_MEMST_PUSH); + MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); { int isnull; @@ -2467,39 +2406,39 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_NULL_CHECK_END(mem); } } - STAT_OP_OUT; + MOP_OUT; continue; break; #endif - case OP_JUMP: STAT_OP_IN(OP_JUMP); + case OP_JUMP: MOP_IN(OP_JUMP); GET_RELADDR_INC(addr, p); p += addr; - STAT_OP_OUT; + MOP_OUT; CHECK_INTERRUPT_IN_MATCH_AT; continue; break; - case OP_PUSH: STAT_OP_IN(OP_PUSH); + case OP_PUSH: MOP_IN(OP_PUSH); GET_RELADDR_INC(addr, p); STACK_PUSH_ALT(p + addr, s, sprev); - STAT_OP_OUT; + MOP_OUT; continue; break; #ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_PUSH: STAT_OP_IN(OP_STATE_CHECK_PUSH); + case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH); GET_STATE_CHECK_NUM_INC(mem, p); STATE_CHECK_VAL(scv, mem); if (scv) goto fail; GET_RELADDR_INC(addr, p); STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_STATE_CHECK_PUSH_OR_JUMP: STAT_OP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); + case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); GET_STATE_CHECK_NUM_INC(mem, p); GET_RELADDR_INC(addr, p); STATE_CHECK_VAL(scv, mem); @@ -2509,54 +2448,54 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else { STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_STATE_CHECK: STAT_OP_IN(OP_STATE_CHECK); + case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK); GET_STATE_CHECK_NUM_INC(mem, p); STATE_CHECK_VAL(scv, mem); if (scv) goto fail; STACK_PUSH_STATE_CHECK(s, mem); - STAT_OP_OUT; + MOP_OUT; continue; break; #endif /* USE_COMBINATION_EXPLOSION_CHECK */ - case OP_POP: STAT_OP_IN(OP_POP); + case OP_POP: MOP_IN(OP_POP); STACK_POP_ONE; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_PUSH_OR_JUMP_EXACT1: STAT_OP_IN(OP_PUSH_OR_JUMP_EXACT1); + case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1); GET_RELADDR_INC(addr, p); if (*p == *s && DATA_ENSURE_CHECK1) { p++; STACK_PUSH_ALT(p + addr, s, sprev); - STAT_OP_OUT; + MOP_OUT; continue; } p += (addr + 1); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_PUSH_IF_PEEK_NEXT: STAT_OP_IN(OP_PUSH_IF_PEEK_NEXT); + case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT); GET_RELADDR_INC(addr, p); if (*p == *s) { p++; STACK_PUSH_ALT(p + addr, s, sprev); - STAT_OP_OUT; + MOP_OUT; continue; } p++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_REPEAT: STAT_OP_IN(OP_REPEAT); + case OP_REPEAT: MOP_IN(OP_REPEAT); { GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ GET_RELADDR_INC(addr, p); @@ -2569,11 +2508,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_ALT(p + addr, s, sprev); } } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_REPEAT_NG: STAT_OP_IN(OP_REPEAT_NG); + case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG); { GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ GET_RELADDR_INC(addr, p); @@ -2587,11 +2526,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += addr; } } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_REPEAT_INC: STAT_OP_IN(OP_REPEAT_INC); + case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ si = repeat_stk[mem]; stkp = STACK_AT(si); @@ -2609,19 +2548,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p = stkp->u.repeat.pcode; } STACK_PUSH_REPEAT_INC(si); - STAT_OP_OUT; + MOP_OUT; CHECK_INTERRUPT_IN_MATCH_AT; continue; break; - case OP_REPEAT_INC_SG: STAT_OP_IN(OP_REPEAT_INC_SG); + case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc; break; - case OP_REPEAT_INC_NG: STAT_OP_IN(OP_REPEAT_INC_NG); + case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ si = repeat_stk[mem]; stkp = STACK_AT(si); @@ -2643,68 +2582,68 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { STACK_PUSH_REPEAT_INC(si); } - STAT_OP_OUT; + MOP_OUT; CHECK_INTERRUPT_IN_MATCH_AT; continue; break; - case OP_REPEAT_INC_NG_SG: STAT_OP_IN(OP_REPEAT_INC_NG_SG); + case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc_ng; break; - case OP_PUSH_POS: STAT_OP_IN(OP_PUSH_POS); + case OP_PUSH_POS: MOP_IN(OP_PUSH_POS); STACK_PUSH_POS(s, sprev); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_POP_POS: STAT_OP_IN(OP_POP_POS); + case OP_POP_POS: MOP_IN(OP_POP_POS); { STACK_POS_END(stkp); s = stkp->u.state.pstr; sprev = stkp->u.state.pstr_prev; } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_PUSH_POS_NOT: STAT_OP_IN(OP_PUSH_POS_NOT); + case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT); GET_RELADDR_INC(addr, p); STACK_PUSH_POS_NOT(p + addr, s, sprev); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_FAIL_POS: STAT_OP_IN(OP_FAIL_POS); + case OP_FAIL_POS: MOP_IN(OP_FAIL_POS); STACK_POP_TIL_POS_NOT; goto fail; break; - case OP_PUSH_STOP_BT: STAT_OP_IN(OP_PUSH_STOP_BT); + case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT); STACK_PUSH_STOP_BT; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_POP_STOP_BT: STAT_OP_IN(OP_POP_STOP_BT); + case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT); STACK_STOP_BT_END; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_LOOK_BEHIND: STAT_OP_IN(OP_LOOK_BEHIND); + case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND); GET_LENGTH_INC(tlen, p); s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(s)) goto fail; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_PUSH_LOOK_BEHIND_NOT: STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT); + case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT); GET_RELADDR_INC(addr, p); GET_LENGTH_INC(tlen, p); q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); @@ -2719,28 +2658,28 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s = q; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_FAIL_LOOK_BEHIND_NOT: STAT_OP_IN(OP_FAIL_LOOK_BEHIND_NOT); + case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT); STACK_POP_TIL_LOOK_BEHIND_NOT; goto fail; break; #ifdef USE_SUBEXP_CALL - case OP_CALL: STAT_OP_IN(OP_CALL); + case OP_CALL: MOP_IN(OP_CALL); GET_ABSADDR_INC(addr, p); STACK_PUSH_CALL_FRAME(p); p = reg->p + addr; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_RETURN: STAT_OP_IN(OP_RETURN); + case OP_RETURN: MOP_IN(OP_RETURN); STACK_RETURN(p); STACK_PUSH_RETURN; - STAT_OP_OUT; + MOP_OUT; continue; break; #endif @@ -2750,9 +2689,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; fail: - STAT_OP_OUT; + MOP_OUT; /* fall */ - case OP_FAIL: STAT_OP_IN(OP_FAIL); + case OP_FAIL: MOP_IN(OP_FAIL); STACK_POP; p = stk->u.state.pcode; s = stk->u.state.pstr; @@ -2765,7 +2704,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } #endif - STAT_OP_OUT; + MOP_OUT; continue; break; @@ -3238,10 +3177,12 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, case ANCHOR_END_LINE: if (ON_STR_END(p)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE prev = (UChar* )onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p); if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) goto retry_gate; +#endif } else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) #ifdef USE_CRNL_AS_LINE_TERMINATOR @@ -3360,12 +3301,14 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, case ANCHOR_END_LINE: if (ON_STR_END(p)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); if (IS_NULL(prev)) goto fail; if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { p = prev; goto retry; } +#endif } else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) #ifdef USE_CRNL_AS_LINE_TERMINATOR @@ -3570,6 +3513,14 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, max_semi_end = (UChar* )end; if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { min_semi_end = pre_end; + +#ifdef USE_CRNL_AS_LINE_TERMINATOR + pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1); + if (IS_NOT_NULL(pre_end) && + ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { + min_semi_end = pre_end; + } +#endif if (min_semi_end > str && start <= min_semi_end) { goto end_buf; } diff --git a/regint.h b/regint.h index 11a84ad3620c5e..98b8055bb5c248 100644 --- a/regint.h +++ b/regint.h @@ -51,7 +51,7 @@ (defined(__ppc__) && defined(__APPLE__)) || \ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD86) || \ defined(__mc68020__) -#define PLATFORM_UNALIGNED_WORD_ACCESS +/* #define PLATFORM_UNALIGNED_WORD_ACCESS */ #endif /* config */ @@ -63,16 +63,13 @@ #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR /* #define USE_RECOMPILE_API */ -/* treat \r\n as line terminator. - !!! NO SUPPORT !!! - use this configuration on your own responsibility */ -/* #define USE_CRNL_AS_LINE_TERMINATOR */ +/* #define USE_CRNL_AS_LINE_TERMINATOR */ /* moved to regenc.h. */ /* internal config */ -#define USE_RECYCLE_NODE +#define USE_PARSE_TREE_NODE_RECYCLE #define USE_OP_PUSH_OR_JUMP_EXACT -#define USE_QUANTIFIER_PEEK_NEXT -#define USE_ST_HASH_TABLE +#define USE_QTFR_PEEK_NEXT +#define USE_ST_LIBRARY #define USE_SHARED_CCLASS_TABLE #define INIT_MATCH_STACK_SIZE 160 @@ -109,10 +106,6 @@ #define CHECK_INTERRUPT_IN_MATCH_AT -#if defined(_WIN32) && !defined(__GNUC__) && !defined(vsnprintf) -#define vsnprintf _vsnprintf -#endif - #ifdef RUBY #define onig_st_init_table st_init_table @@ -165,12 +158,16 @@ #define xmemset memset #define xmemcpy memcpy #define xmemmove memmove + #if defined(_WIN32) && !defined(__GNUC__) #define xalloca _alloca +#define xvsnprintf _vsnprintf #else #define xalloca alloca +#define xvsnprintf vsnprintf #endif + #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) #define ONIG_STATE_INC(reg) (reg)->state++ #define ONIG_STATE_DEC(reg) (reg)->state-- @@ -235,11 +232,26 @@ #define IS_NULL(p) (((void*)(p)) == (void*)0) #define IS_NOT_NULL(p) (((void*)(p)) != (void*)0) #define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL -#define CHECK_NULL_RETURN_VAL(p,val) if (IS_NULL(p)) return (val) +#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY #define NULL_UCHARP ((UChar* )0) -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS -#define WORD_ALIGNMENT_SIZE SIZEOF_INT + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + +#define PLATFORM_GET_INC(val,p,type) do{\ + val = *(type* )p;\ + (p) += sizeof(type);\ +} while(0) + +#else + +#define PLATFORM_GET_INC(val,p,type) do{\ + xmemcpy(&val, (p), sizeof(type));\ + (p) += sizeof(type);\ +} while(0) + +/* sizeof(OnigCodePoint) */ +#define WORD_ALIGNMENT_SIZE SIZEOF_LONG #define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\ (pad_size) = WORD_ALIGNMENT_SIZE \ @@ -252,86 +264,6 @@ (addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\ } while (0) - -#define B_SHIFT 8 -#define B_MASK 0xff - -#define SERIALIZE_2BYTE_INT(i,p) do {\ - *(p) = ((i) >> B_SHIFT) & B_MASK;\ - *((p)+1) = (i) & B_MASK;\ -} while (0) - -#define SERIALIZE_4BYTE_INT(i,p) do {\ - *(p) = ((i) >> B_SHIFT*3) & B_MASK;\ - *((p)+1) = ((i) >> B_SHIFT*2) & B_MASK;\ - *((p)+2) = ((i) >> B_SHIFT ) & B_MASK;\ - *((p)+3) = (i) & B_MASK;\ -} while (0) - -#define SERIALIZE_8BYTE_INT(i,p) do {\ - *(p) = ((i) >> B_SHIFT*7) & B_MASK;\ - *((p)+1) = ((i) >> B_SHIFT*6) & B_MASK;\ - *((p)+2) = ((i) >> B_SHIFT*5) & B_MASK;\ - *((p)+3) = ((i) >> B_SHIFT*4) & B_MASK;\ - *((p)+4) = ((i) >> B_SHIFT*3) & B_MASK;\ - *((p)+5) = ((i) >> B_SHIFT*2) & B_MASK;\ - *((p)+6) = ((i) >> B_SHIFT ) & B_MASK;\ - *((p)+7) = (i) & B_MASK;\ -} while (0) - -#define GET_2BYTE_INT_INC(type,i,p) do {\ - (i) = (type )(((unsigned int )(*(p)) << B_SHIFT) | (unsigned int )((p)[1]));\ - (p) += 2;\ -} while (0) - -#define GET_4BYTE_INT_INC(type,i,p) do {\ - (i) = (type )(((unsigned int )((p)[0]) << B_SHIFT*3) | \ - ((unsigned int )((p)[1]) << B_SHIFT*2) | \ - ((unsigned int )((p)[2]) << B_SHIFT ) | \ - ((unsigned int )((p)[3]) )); \ - (p) += 4;\ -} while (0) - -#define GET_8BYTE_INT_INC(type,i,p) do {\ - (i) = (type )(((unsigned long )((p)[0]) << B_SHIFT*7) | \ - ((unsigned long )((p)[1]) << B_SHIFT*6) | \ - ((unsigned long )((p)[2]) << B_SHIFT*5) | \ - ((unsigned long )((p)[3]) << B_SHIFT*4) | \ - ((unsigned long )((p)[4]) << B_SHIFT*3) | \ - ((unsigned long )((p)[5]) << B_SHIFT*2) | \ - ((unsigned long )((p)[6]) << B_SHIFT ) | \ - ((unsigned long )((p)[7]) )); \ - (p) += 8;\ -} while (0) - -#if SIZEOF_SHORT == 2 -#define GET_SHORT_INC(i,p) GET_2BYTE_INT_INC(short,i,p) -#define SERIALIZE_SHORT(i,p) SERIALIZE_2BYTE_INT(i,p) -#elif SIZEOF_SHORT == 4 -#define GET_SHORT_INC(i,p) GET_4BYTE_INT_INC(short,i,p) -#define SERIALIZE_SHORT(i,p) SERIALIZE_4BYTE_INT(i,p) -#elif SIZEOF_SHORT == 8 -#define GET_SHORT_INC(i,p) GET_8BYTE_INT_INC(short,i,p) -#define SERIALIZE_SHORT(i,p) SERIALIZE_8BYTE_INT(i,p) -#endif - -#if SIZEOF_INT == 2 -#define GET_INT_INC(i,p) GET_2BYTE_INT_INC(int,i,p) -#define GET_UINT_INC(i,p) GET_2BYTE_INT_INC(unsigned,i,p) -#define SERIALIZE_INT(i,p) SERIALIZE_2BYTE_INT(i,p) -#define SERIALIZE_UINT(i,p) SERIALIZE_2BYTE_INT(i,p) -#elif SIZEOF_INT == 4 -#define GET_INT_INC(i,p) GET_4BYTE_INT_INC(int,i,p) -#define GET_UINT_INC(i,p) GET_4BYTE_INT_INC(unsigned,i,p) -#define SERIALIZE_INT(i,p) SERIALIZE_4BYTE_INT(i,p) -#define SERIALIZE_UINT(i,p) SERIALIZE_4BYTE_INT(i,p) -#elif SIZEOF_INT == 8 -#define GET_INT_INC(i,p) GET_8BYTE_INT_INC(int,i,p) -#define GET_UINT_INC(i,p) GET_8BYTE_INT_INC(unsigned,i,p) -#define SERIALIZE_INT(i,p) SERIALIZE_8BYTE_INT(i,p) -#define SERIALIZE_UINT(i,p) SERIALIZE_8BYTE_INT(i,p) -#endif - #endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ /* stack pop level */ @@ -383,7 +315,6 @@ typedef unsigned int BitStatusType; #define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND) #define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST) #define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY) -#define IS_POSIXLINE(option) (IS_SINGLELINE(option) && IS_MULTILINE(option)) #define IS_FIND_CONDITION(option) ((option) & \ (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY)) #define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL) @@ -597,9 +528,6 @@ enum OpCode { OP_MEMORY_END, OP_MEMORY_END_REC, /* push marker to stack */ - OP_SET_OPTION_PUSH, /* set option and push recover option */ - OP_SET_OPTION, /* set option */ - OP_FAIL, /* pop stack and move */ OP_JUMP, OP_PUSH, @@ -634,7 +562,11 @@ enum OpCode { OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ OP_STATE_CHECK, /* check only */ OP_STATE_CHECK_ANYCHAR_STAR, - OP_STATE_CHECK_ANYCHAR_ML_STAR + OP_STATE_CHECK_ANYCHAR_ML_STAR, + + /* no need: IS_DYNAMIC_OPTION() == 0 */ + OP_SET_OPTION_PUSH, /* set option and push recover option */ + OP_SET_OPTION /* set option */ }; typedef int RelAddrType; @@ -657,22 +589,6 @@ typedef void* PointerType; #define SIZE_POINTER sizeof(PointerType) -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS - -#define PLATFORM_GET_INC(val,p,type) do{\ - val = *(type* )p;\ - (p) += sizeof(type);\ -} while(0) - -#else - -#define PLATFORM_GET_INC(val,p,type) do{\ - xmemcpy(&val, (p), sizeof(type));\ - (p) += sizeof(type);\ -} while(0) - -#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ - #define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType) #define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType) #define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType) @@ -767,21 +683,33 @@ typedef void* PointerType; ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \ ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) + +#define NCCLASS_FLAGS(cc) ((cc)->flags) +#define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag)) +#define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag)) +#define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0) + /* cclass node */ -#define CCLASS_FLAG_NOT 1 -#define CCLASS_FLAG_SHARE (1<<1) +#define FLAG_NCCLASS_NOT (1<<0) +#define FLAG_NCCLASS_SHARE (1<<1) -#define CCLASS_SET_NOT(cc) (cc)->flags |= CCLASS_FLAG_NOT -#define CCLASS_SET_SHARE(cc) (cc)->flags |= CCLASS_FLAG_SHARE -#define CCLASS_CLEAR_NOT(cc) (cc)->flags &= ~CCLASS_FLAG_NOT +#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT) +#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE) +#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT) +#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT) +#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE) -#define IS_CCLASS_NOT(cc) (((cc)->flags & CCLASS_FLAG_NOT) != 0) -#define IS_CCLASS_SHARE(cc) (((cc)->flags & CCLASS_FLAG_SHARE) != 0) +typedef struct { + int type; + /* struct _Node* next; */ + /* unsigned int flags; */ +} NodeBase; typedef struct { - int flags; + NodeBase base; + unsigned int flags; BitSet bs; - BBuf* mbuf; /* multi-byte info or NULL */ + BBuf* mbuf; /* multi-byte info or NULL */ } CClassNode; typedef long OnigStackIndex; @@ -873,6 +801,7 @@ extern void onig_chain_reduce P_((regex_t* reg)); extern void onig_chain_link_add P_((regex_t* to, regex_t* add)); extern void onig_transfer P_((regex_t* to, regex_t* from)); extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); +extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc)); /* strend hash */ typedef void hash_table_type; diff --git a/regparse.c b/regparse.c index f924f1dfffb513..df92266ae1c28d 100644 --- a/regparse.c +++ b/regparse.c @@ -109,7 +109,7 @@ bbuf_clone(BBuf** rto, BBuf* from) BBuf *to; *rto = to = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN_VAL(to, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(to); r = BBUF_INIT(to, from->alloc); if (r != 0) return r; to->used = from->used; @@ -300,7 +300,7 @@ typedef struct { int* back_refs; } NameEntry; -#ifdef USE_ST_HASH_TABLE +#ifdef USE_ST_LIBRARY #include "ruby/st.h" @@ -309,19 +309,51 @@ typedef struct { unsigned char* end; } st_strend_key; -static int strend_cmp(st_strend_key*, st_strend_key*); -static int strend_hash(st_strend_key*); +static int +str_end_cmp(st_strend_key* x, st_strend_key* y) +{ + unsigned char *p, *q; + int c; -static const struct st_hash_type type_strend_hash = { - strend_cmp, - strend_hash, -}; + if ((x->end - x->s) != (y->end - y->s)) + return 1; + + p = x->s; + q = y->s; + while (p < x->end) { + c = (int )*p - (int )*q; + if (c != 0) return c; + + p++; q++; + } + + return 0; +} + +static int +str_end_hash(st_strend_key* x) +{ + UChar *p; + int val = 0; + + p = x->s; + while (p < x->end) { + val = val * 997 + (int )*p++; + } + + return val + (val >> 5); +} extern hash_table_type* onig_st_init_strend_table_with_size(int size) { - return (hash_table_type* )onig_st_init_table_with_size(&type_strend_hash, - size); + static struct st_hash_type hashType = { + str_end_cmp, + str_end_hash, + }; + + return (hash_table_type* ) + onig_st_init_table_with_size(&hashType, size); } extern int @@ -330,8 +362,8 @@ onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, { st_strend_key key; - key.s = (unsigned char* )str_key; - key.end = (unsigned char* )end_key; + key.s = (UChar* )str_key; + key.end = (UChar* )end_key; return onig_st_lookup(table, (st_data_t )(&key), value); } @@ -344,8 +376,8 @@ onig_st_insert_strend(hash_table_type* table, const UChar* str_key, int result; key = (st_strend_key* )xmalloc(sizeof(st_strend_key)); - key->s = (unsigned char* )str_key; - key->end = (unsigned char* )end_key; + key->s = (UChar* )str_key; + key->end = (UChar* )end_key; result = onig_st_insert(table, (st_data_t )key, value); if (result) { xfree(key); @@ -353,41 +385,6 @@ onig_st_insert_strend(hash_table_type* table, const UChar* str_key, return result; } -static int -strend_cmp(st_strend_key* x, st_strend_key* y) -{ - unsigned char *p, *q; - int c; - - if ((x->end - x->s) != (y->end - y->s)) - return 1; - - p = x->s; - q = y->s; - while (p < x->end) { - c = (int )*p - (int )*q; - if (c != 0) return c; - - p++; q++; - } - - return 0; -} - -static int -strend_hash(st_strend_key* x) -{ - int val; - unsigned char *p; - - val = 0; - p = x->s; - while (p < x->end) { - val = val * 997 + (int )*p++; - } - - return val + (val >> 5); -} typedef st_table NameTable; typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ @@ -561,7 +558,7 @@ onig_number_of_names(regex_t* reg) return 0; } -#else /* USE_ST_HASH_TABLE */ +#else /* USE_ST_LIBRARY */ #define INIT_NAMES_ALLOC_NUM 8 @@ -697,7 +694,7 @@ onig_number_of_names(regex_t* reg) return 0; } -#endif /* else USE_ST_HASH_TABLE */ +#endif /* else USE_ST_LIBRARY */ static int name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) @@ -711,13 +708,13 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) e = name_find(reg, name, name_end); if (IS_NULL(e)) { -#ifdef USE_ST_HASH_TABLE +#ifdef USE_ST_LIBRARY if (IS_NULL(t)) { t = onig_st_init_strend_table_with_size(5); reg->name_table = (void* )t; } e = (NameEntry* )xmalloc(sizeof(NameEntry)); - CHECK_NULL_RETURN_VAL(e, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(e); e->name = strdup_with_null(reg->enc, name, name_end); if (IS_NULL(e->name)) return ONIGERR_MEMORY; @@ -734,7 +731,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) if (IS_NULL(t)) { alloc = INIT_NAMES_ALLOC_NUM; t = (NameTable* )xmalloc(sizeof(NameTable)); - CHECK_NULL_RETURN_VAL(t, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(t); t->e = NULL; t->alloc = 0; t->num = 0; @@ -753,7 +750,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) alloc = t->alloc * 2; t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc); - CHECK_NULL_RETURN_VAL(t->e, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(t->e); t->alloc = alloc; clear: @@ -787,7 +784,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) if (e->back_num == 2) { alloc = INIT_NAME_BACKREFS_ALLOC_NUM; e->back_refs = (int* )xmalloc(sizeof(int) * alloc); - CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(e->back_refs); e->back_alloc = alloc; e->back_refs[0] = e->back_ref1; e->back_refs[1] = backref; @@ -796,7 +793,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) if (e->back_num > e->back_alloc) { alloc = e->back_alloc * 2; e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc); - CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(e->back_refs); e->back_alloc = alloc; } e->back_refs[e->back_num - 1] = backref; @@ -950,7 +947,7 @@ scan_env_add_mem_entry(ScanEnv* env) alloc = env->mem_alloc * 2; p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc); } - CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(p); for (i = env->num_mem + 1; i < alloc; i++) p[i] = NULL_NODE; @@ -975,7 +972,7 @@ scan_env_set_mem_node(ScanEnv* env, int num, Node* node) } -#ifdef USE_RECYCLE_NODE +#ifdef USE_PARSE_TREE_NODE_RECYCLE typedef struct _FreeNode { struct _FreeNode* next; } FreeNode; @@ -990,21 +987,20 @@ onig_node_free(Node* node) if (IS_NULL(node)) return ; switch (NTYPE(node)) { - case N_STRING: - if (NSTRING(node).capa != 0 && - IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) { - xfree(NSTRING(node).s); + case NT_STR: + if (NSTR(node)->capa != 0 && + IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { + xfree(NSTR(node)->s); } break; - case N_LIST: - case N_ALT: - onig_node_free(NCONS(node).left); - /* onig_node_free(NCONS(node).right); */ + case NT_LIST: + case NT_ALT: + onig_node_free(NCAR(node)); { - Node* next_node = NCONS(node).right; + Node* next_node = NCDR(node); -#ifdef USE_RECYCLE_NODE +#ifdef USE_PARSE_TREE_NODE_RECYCLE { FreeNode* n = (FreeNode* )node; @@ -1021,38 +1017,38 @@ onig_node_free(Node* node) } break; - case N_CCLASS: + case NT_CCLASS: { - CClassNode* cc = &(NCCLASS(node)); + CClassNode* cc = NCCLASS(node); - if (IS_CCLASS_SHARE(cc)) return ; + if (IS_NCCLASS_SHARE(cc)) return ; if (cc->mbuf) bbuf_free(cc->mbuf); } break; - case N_QUANTIFIER: - if (NQUANTIFIER(node).target) - onig_node_free(NQUANTIFIER(node).target); + case NT_QTFR: + if (NQTFR(node)->target) + onig_node_free(NQTFR(node)->target); break; - case N_EFFECT: - if (NEFFECT(node).target) - onig_node_free(NEFFECT(node).target); + case NT_ENCLOSE: + if (NENCLOSE(node)->target) + onig_node_free(NENCLOSE(node)->target); break; - case N_BACKREF: - if (IS_NOT_NULL(NBACKREF(node).back_dynamic)) - xfree(NBACKREF(node).back_dynamic); + case NT_BREF: + if (IS_NOT_NULL(NBREF(node)->back_dynamic)) + xfree(NBREF(node)->back_dynamic); break; - case N_ANCHOR: - if (NANCHOR(node).target) - onig_node_free(NANCHOR(node).target); + case NT_ANCHOR: + if (NANCHOR(node)->target) + onig_node_free(NANCHOR(node)->target); break; } -#ifdef USE_RECYCLE_NODE +#ifdef USE_PARSE_TREE_NODE_RECYCLE { FreeNode* n = (FreeNode* )node; @@ -1066,7 +1062,7 @@ onig_node_free(Node* node) #endif } -#ifdef USE_RECYCLE_NODE +#ifdef USE_PARSE_TREE_NODE_RECYCLE extern int onig_free_node_list(void) { @@ -1088,7 +1084,7 @@ node_new(void) { Node* node; -#ifdef USE_RECYCLE_NODE +#ifdef USE_PARSE_TREE_NODE_RECYCLE THREAD_ATOMIC_START; if (IS_NOT_NULL(FreeNodeList)) { node = (Node* )FreeNodeList; @@ -1100,6 +1096,7 @@ node_new(void) #endif node = (Node* )xmalloc(sizeof(Node)); + /* xmemset(node, 0, sizeof(Node)); */ return node; } @@ -1108,6 +1105,7 @@ static void initialize_cclass(CClassNode* cc) { BITSET_CLEAR(cc->bs); + /* cc->base.flags = 0; */ cc->flags = 0; cc->mbuf = NULL; } @@ -1117,9 +1115,9 @@ node_new_cclass(void) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_CCLASS; - initialize_cclass(&(NCCLASS(node))); + SET_NTYPE(node, NT_CCLASS); + initialize_cclass(NCCLASS(node)); return node; } @@ -1131,13 +1129,11 @@ node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out, CClassNode* cc; OnigCodePoint j; - Node* node = node_new(); + Node* node = node_new_cclass(); CHECK_NULL_RETURN(node); - node->type = N_CCLASS; - cc = &(NCCLASS(node)); - cc->flags = 0; - if (not != 0) CCLASS_SET_NOT(cc); + cc = NCCLASS(node); + if (not != 0) NCCLASS_SET_NOT(cc); BITSET_CLEAR(cc->bs); if (sb_out > 0 && IS_NOT_NULL(ranges)) { @@ -1164,7 +1160,7 @@ node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out, if (n == 0) goto is_null; bbuf = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN_VAL(bbuf, NULL); + CHECK_NULL_RETURN(bbuf); bbuf->alloc = n + 1; bbuf->used = n + 1; bbuf->p = (UChar* )((void* )ranges); @@ -1180,9 +1176,10 @@ node_new_ctype(int type, int not) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_CTYPE; - NCTYPE(node).ctype = type; - NCTYPE(node).not = not; + + SET_NTYPE(node, NT_CTYPE); + NCTYPE(node)->ctype = type; + NCTYPE(node)->not = not; return node; } @@ -1191,7 +1188,8 @@ node_new_anychar(void) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_ANYCHAR; + + SET_NTYPE(node, NT_CANY); return node; } @@ -1200,9 +1198,10 @@ node_new_list(Node* left, Node* right) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_LIST; - NCONS(node).left = left; - NCONS(node).right = right; + + SET_NTYPE(node, NT_LIST); + NCAR(node) = left; + NCDR(node) = right; return node; } @@ -1221,10 +1220,10 @@ onig_node_list_add(Node* list, Node* x) if (IS_NULL(n)) return NULL_NODE; if (IS_NOT_NULL(list)) { - while (IS_NOT_NULL(NCONS(list).right)) - list = NCONS(list).right; + while (IS_NOT_NULL(NCDR(list))) + list = NCDR(list); - NCONS(list).right = n; + NCDR(list) = n; } return n; @@ -1235,9 +1234,10 @@ onig_node_new_alt(Node* left, Node* right) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_ALT; - NCONS(node).left = left; - NCONS(node).right = right; + + SET_NTYPE(node, NT_ALT); + NCAR(node) = left; + NCDR(node) = right; return node; } @@ -1246,10 +1246,11 @@ onig_node_new_anchor(int type) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_ANCHOR; - NANCHOR(node).type = type; - NANCHOR(node).target = NULL; - NANCHOR(node).char_len = -1; + + SET_NTYPE(node, NT_ANCHOR); + NANCHOR(node)->type = type; + NANCHOR(node)->target = NULL; + NANCHOR(node)->char_len = -1; return node; } @@ -1264,31 +1265,32 @@ node_new_backref(int back_num, int* backrefs, int by_name, Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_BACKREF; - NBACKREF(node).state = 0; - NBACKREF(node).back_num = back_num; - NBACKREF(node).back_dynamic = (int* )NULL; + + SET_NTYPE(node, NT_BREF); + NBREF(node)->state = 0; + NBREF(node)->back_num = back_num; + NBREF(node)->back_dynamic = (int* )NULL; if (by_name != 0) - NBACKREF(node).state |= NST_NAME_REF; + NBREF(node)->state |= NST_NAME_REF; #ifdef USE_BACKREF_AT_LEVEL if (exist_level != 0) { - NBACKREF(node).state |= NST_NEST_LEVEL; - NBACKREF(node).nest_level = nest_level; + NBREF(node)->state |= NST_NEST_LEVEL; + NBREF(node)->nest_level = nest_level; } #endif for (i = 0; i < back_num; i++) { if (backrefs[i] <= env->num_mem && IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { - NBACKREF(node).state |= NST_RECURSION; /* /...(\1).../ */ + NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */ break; } } if (back_num <= NODE_BACKREFS_SIZE) { for (i = 0; i < back_num; i++) - NBACKREF(node).back_static[i] = backrefs[i]; + NBREF(node)->back_static[i] = backrefs[i]; } else { int* p = (int* )xmalloc(sizeof(int) * back_num); @@ -1296,7 +1298,7 @@ node_new_backref(int back_num, int* backrefs, int by_name, onig_node_free(node); return NULL; } - NBACKREF(node).back_dynamic = p; + NBREF(node)->back_dynamic = p; for (i = 0; i < back_num; i++) p[i] = backrefs[i]; } @@ -1310,12 +1312,12 @@ node_new_call(UChar* name, UChar* name_end) Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_CALL; - NCALL(node).state = 0; - NCALL(node).ref_num = CALLNODE_REFNUM_UNDEF; - NCALL(node).target = NULL_NODE; - NCALL(node).name = name; - NCALL(node).name_end = name_end; + SET_NTYPE(node, NT_CALL); + NCALL(node)->state = 0; + NCALL(node)->ref_num = CALLNODE_REFNUM_UNDEF; + NCALL(node)->target = NULL_NODE; + NCALL(node)->name = name; + NCALL(node)->name_end = name_end; return node; } #endif @@ -1325,58 +1327,60 @@ node_new_quantifier(int lower, int upper, int by_number) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_QUANTIFIER; - NQUANTIFIER(node).state = 0; - NQUANTIFIER(node).target = NULL; - NQUANTIFIER(node).lower = lower; - NQUANTIFIER(node).upper = upper; - NQUANTIFIER(node).greedy = 1; - NQUANTIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY; - NQUANTIFIER(node).head_exact = NULL_NODE; - NQUANTIFIER(node).next_head_exact = NULL_NODE; - NQUANTIFIER(node).is_refered = 0; + + SET_NTYPE(node, NT_QTFR); + NQTFR(node)->state = 0; + NQTFR(node)->target = NULL; + NQTFR(node)->lower = lower; + NQTFR(node)->upper = upper; + NQTFR(node)->greedy = 1; + NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY; + NQTFR(node)->head_exact = NULL_NODE; + NQTFR(node)->next_head_exact = NULL_NODE; + NQTFR(node)->is_refered = 0; if (by_number != 0) - NQUANTIFIER(node).state |= NST_BY_NUMBER; + NQTFR(node)->state |= NST_BY_NUMBER; #ifdef USE_COMBINATION_EXPLOSION_CHECK - NQUANTIFIER(node).comb_exp_check_num = 0; + NQTFR(node)->comb_exp_check_num = 0; #endif return node; } static Node* -node_new_effect(int type) +node_new_enclose(int type) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_EFFECT; - NEFFECT(node).type = type; - NEFFECT(node).state = 0; - NEFFECT(node).regnum = 0; - NEFFECT(node).option = 0; - NEFFECT(node).target = NULL; - NEFFECT(node).call_addr = -1; - NEFFECT(node).opt_count = 0; + + SET_NTYPE(node, NT_ENCLOSE); + NENCLOSE(node)->type = type; + NENCLOSE(node)->state = 0; + NENCLOSE(node)->regnum = 0; + NENCLOSE(node)->option = 0; + NENCLOSE(node)->target = NULL; + NENCLOSE(node)->call_addr = -1; + NENCLOSE(node)->opt_count = 0; return node; } extern Node* -onig_node_new_effect(int type) +onig_node_new_enclose(int type) { - return node_new_effect(type); + return node_new_enclose(type); } static Node* -node_new_effect_memory(OnigOptionType option, int is_named) +node_new_enclose_memory(OnigOptionType option, int is_named) { - Node* node = node_new_effect(EFFECT_MEMORY); + Node* node = node_new_enclose(ENCLOSE_MEMORY); CHECK_NULL_RETURN(node); if (is_named != 0) - SET_EFFECT_STATUS(node, NST_NAMED_GROUP); + SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP); #ifdef USE_SUBEXP_CALL - NEFFECT(node).option = option; + NENCLOSE(node)->option = option; #endif return node; } @@ -1384,9 +1388,9 @@ node_new_effect_memory(OnigOptionType option, int is_named) static Node* node_new_option(OnigOptionType option) { - Node* node = node_new_effect(EFFECT_OPTION); + Node* node = node_new_enclose(ENCLOSE_OPTION); CHECK_NULL_RETURN(node); - NEFFECT(node).option = option; + NENCLOSE(node)->option = option; return node; } @@ -1396,31 +1400,31 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end) int addlen = end - s; if (addlen > 0) { - int len = NSTRING(node).end - NSTRING(node).s; + int len = NSTR(node)->end - NSTR(node)->s; - if (NSTRING(node).capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { + if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { UChar* p; int capa = len + addlen + NODE_STR_MARGIN; - if (capa <= NSTRING(node).capa) { - onig_strcpy(NSTRING(node).s + len, s, end); + if (capa <= NSTR(node)->capa) { + onig_strcpy(NSTR(node)->s + len, s, end); } else { - if (NSTRING(node).s == NSTRING(node).buf) - p = strcat_capa_from_static(NSTRING(node).s, NSTRING(node).end, + if (NSTR(node)->s == NSTR(node)->buf) + p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end, s, end, capa); else - p = strcat_capa(NSTRING(node).s, NSTRING(node).end, s, end, capa); + p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa); - CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); - NSTRING(node).s = p; - NSTRING(node).capa = capa; + CHECK_NULL_RETURN_MEMERR(p); + NSTR(node)->s = p; + NSTR(node)->capa = capa; } } else { - onig_strcpy(NSTRING(node).s + len, s, end); + onig_strcpy(NSTR(node)->s + len, s, end); } - NSTRING(node).end = NSTRING(node).s + len + addlen; + NSTR(node)->end = NSTR(node)->s + len + addlen; } return 0; @@ -1445,26 +1449,25 @@ node_str_cat_char(Node* node, UChar c) extern void onig_node_conv_to_str_node(Node* node, int flag) { - node->type = N_STRING; - - NSTRING(node).flag = flag; - NSTRING(node).capa = 0; - NSTRING(node).s = NSTRING(node).buf; - NSTRING(node).end = NSTRING(node).buf; + SET_NTYPE(node, NT_STR); + NSTR(node)->flag = flag; + NSTR(node)->capa = 0; + NSTR(node)->s = NSTR(node)->buf; + NSTR(node)->end = NSTR(node)->buf; } extern void onig_node_str_clear(Node* node) { - if (NSTRING(node).capa != 0 && - IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) { - xfree(NSTRING(node).s); + if (NSTR(node)->capa != 0 && + IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { + xfree(NSTR(node)->s); } - NSTRING(node).capa = 0; - NSTRING(node).flag = 0; - NSTRING(node).s = NSTRING(node).buf; - NSTRING(node).end = NSTRING(node).buf; + NSTR(node)->capa = 0; + NSTR(node)->flag = 0; + NSTR(node)->s = NSTR(node)->buf; + NSTR(node)->end = NSTR(node)->buf; } static Node* @@ -1473,11 +1476,11 @@ node_new_str(const UChar* s, const UChar* end) Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_STRING; - NSTRING(node).capa = 0; - NSTRING(node).flag = 0; - NSTRING(node).s = NSTRING(node).buf; - NSTRING(node).end = NSTRING(node).buf; + SET_NTYPE(node, NT_STR); + NSTR(node)->capa = 0; + NSTR(node)->flag = 0; + NSTR(node)->s = NSTR(node)->buf; + NSTR(node)->end = NSTR(node)->buf; if (onig_node_str_cat(node, s, end)) { onig_node_free(node); return NULL; @@ -1659,7 +1662,7 @@ new_code_range(BBuf** pbuf) BBuf* bbuf; bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN_VAL(*pbuf, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*pbuf); r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE); if (r) return r; @@ -1958,7 +1961,7 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc) BBuf *tbuf; int r; - if (IS_CCLASS_NOT(cc)) { + if (IS_NCCLASS_NOT(cc)) { bitset_invert(cc->bs); if (! ONIGENC_IS_SINGLEBYTE(enc)) { @@ -1969,7 +1972,7 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc) cc->mbuf = tbuf; } - CCLASS_CLEAR_NOT(cc); + NCCLASS_CLEAR_NOT(cc); } return 0; @@ -1983,10 +1986,10 @@ and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) BitSetRef bsr1, bsr2; BitSet bs1, bs2; - not1 = IS_CCLASS_NOT(dest); + not1 = IS_NCCLASS_NOT(dest); bsr1 = dest->bs; buf1 = dest->mbuf; - not2 = IS_CCLASS_NOT(cc); + not2 = IS_NCCLASS_NOT(cc); bsr2 = cc->bs; buf2 = cc->mbuf; @@ -2041,10 +2044,10 @@ or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) BitSetRef bsr1, bsr2; BitSet bs1, bs2; - not1 = IS_CCLASS_NOT(dest); + not1 = IS_NCCLASS_NOT(dest); bsr1 = dest->bs; buf1 = dest->mbuf; - not2 = IS_CCLASS_NOT(cc); + not2 = IS_NCCLASS_NOT(cc); bsr2 = cc->bs; buf2 = cc->mbuf; @@ -2120,26 +2123,26 @@ static int is_invalid_quantifier_target(Node* node) { switch (NTYPE(node)) { - case N_ANCHOR: + case NT_ANCHOR: return 1; break; - case N_EFFECT: - if (NEFFECT(node).type == EFFECT_OPTION) - return is_invalid_quantifier_target(NEFFECT(node).target); + case NT_ENCLOSE: + /* allow enclosed elements */ + /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */ break; - case N_LIST: /* ex. (?:\G\A)* */ + case NT_LIST: do { - if (! is_invalid_quantifier_target(NCONS(node).left)) return 0; - } while (IS_NOT_NULL(node = NCONS(node).right)); + if (! is_invalid_quantifier_target(NCAR(node))) return 0; + } while (IS_NOT_NULL(node = NCDR(node))); return 0; break; - case N_ALT: /* ex. (?:abc|\A)* */ + case NT_ALT: do { - if (is_invalid_quantifier_target(NCONS(node).left)) return 1; - } while (IS_NOT_NULL(node = NCONS(node).right)); + if (is_invalid_quantifier_target(NCAR(node))) return 1; + } while (IS_NOT_NULL(node = NCDR(node))); break; default: @@ -2150,7 +2153,7 @@ is_invalid_quantifier_target(Node* node) /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ static int -popular_quantifier_num(QuantifierNode* q) +popular_quantifier_num(QtfrNode* q) { if (q->greedy) { if (q->lower == 0) { @@ -2197,16 +2200,17 @@ extern void onig_reduce_nested_quantifier(Node* pnode, Node* cnode) { int pnum, cnum; - QuantifierNode *p, *c; + QtfrNode *p, *c; - p = &(NQUANTIFIER(pnode)); - c = &(NQUANTIFIER(cnode)); + p = NQTFR(pnode); + c = NQTFR(cnode); pnum = popular_quantifier_num(p); cnum = popular_quantifier_num(c); + if (pnum < 0 || cnum < 0) return ; switch(ReduceTypeTable[cnum][pnum]) { case RQ_DEL: - *p = *c; + *pnode = *cnode; break; case RQ_A: p->target = c->target; @@ -2705,7 +2709,7 @@ CC_ESC_WARN(ScanEnv* env, UChar *c) } static void -CCEND_ESC_WARN(ScanEnv* env, UChar* c) +CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c) { if (onig_warn == onig_null_warn) return ; @@ -3568,7 +3572,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; tok->type = TK_ANCHOR; tok->u.subtype = (IS_SINGLELINE(env->option) - ? ANCHOR_END_BUF : ANCHOR_END_LINE); + ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE); break; case '[': @@ -3578,7 +3582,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case ']': if (*src > env->pattern) /* /].../ is allowed. */ - CCEND_ESC_WARN(env, (UChar* )"]"); + CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]"); break; case '#': @@ -3887,11 +3891,11 @@ parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, if (ctype < 0) return ctype; *np = node_new_cclass(); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); - cc = &(NCCLASS(*np)); + CHECK_NULL_RETURN_MEMERR(*np); + cc = NCCLASS(*np); r = add_ctype_to_cc(cc, ctype, 0, env); if (r != 0) return r; - if (tok->u.prop.not != 0) CCLASS_SET_NOT(cc); + if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); return 0; } @@ -4069,8 +4073,8 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, } *np = node = node_new_cclass(); - CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY); - cc = &(NCCLASS(node)); + CHECK_NULL_RETURN_MEMERR(node); + cc = NCCLASS(node); and_start = 0; state = CCS_START; @@ -4258,7 +4262,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, r = parse_char_class(&anode, tok, &p, end, env); if (r != 0) goto cc_open_err; - acc = &(NCCLASS(anode)); + acc = NCCLASS(anode); r = or_cclass(cc, acc, env->enc); onig_node_free(anode); @@ -4323,10 +4327,10 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, } if (neg != 0) - CCLASS_SET_NOT(cc); + NCCLASS_SET_NOT(cc); else - CCLASS_CLEAR_NOT(cc); - if (IS_CCLASS_NOT(cc) && + NCCLASS_CLEAR_NOT(cc); + if (IS_NCCLASS_NOT(cc) && IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) { int is_empty; @@ -4349,7 +4353,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, return 0; err: - if (cc != &(NCCLASS(*np))) + if (cc != NCCLASS(*np)) bbuf_free(cc->mbuf); onig_node_free(*np); return r; @@ -4359,8 +4363,8 @@ static int parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, ScanEnv* env); static int -parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env) +parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, + ScanEnv* env) { int r, num; int list_capture; @@ -4399,7 +4403,7 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT); break; case '>': /* (?>...) stop backtrack */ - *np = node_new_effect(EFFECT_STOP_BACKTRACK); + *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK); break; case '\'': @@ -4440,9 +4444,9 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, r = name_add(env->reg, name, name_end, num, env); if (r != 0) return r; - *np = node_new_effect_memory(env->option, 1); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); - NEFFECT(*np).regnum = num; + *np = node_new_enclose_memory(env->option, 1); + CHECK_NULL_RETURN_MEMERR(*np); + NENCLOSE(*np)->regnum = num; if (list_capture != 0) BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); env->num_named++; @@ -4470,8 +4474,8 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, PUNFETCH; } #endif - *np = node_new_effect_memory(env->option, 0); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + *np = node_new_enclose_memory(env->option, 0); + CHECK_NULL_RETURN_MEMERR(*np); num = scan_env_add_mem_entry(env); if (num < 0) { onig_node_free(*np); @@ -4481,7 +4485,7 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, onig_node_free(*np); return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; } - NEFFECT(*np).regnum = num; + NENCLOSE(*np)->regnum = num; BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); } else { @@ -4534,7 +4538,7 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (c == ')') { *np = node_new_option(option); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); *src = p; return 2; /* option only */ } @@ -4548,8 +4552,8 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, env->option = prev; if (r < 0) return r; *np = node_new_option(option); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); - NEFFECT(*np).target = target; + CHECK_NULL_RETURN_MEMERR(*np); + NENCLOSE(*np)->target = target; *src = p; return 0; } @@ -4568,26 +4572,26 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP)) goto group; - *np = node_new_effect_memory(env->option, 0); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + *np = node_new_enclose_memory(env->option, 0); + CHECK_NULL_RETURN_MEMERR(*np); num = scan_env_add_mem_entry(env); if (num < 0) return num; - NEFFECT(*np).regnum = num; + NENCLOSE(*np)->regnum = num; } - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); r = fetch_token(tok, &p, end, env); if (r < 0) return r; r = parse_subexp(&target, tok, term, &p, end, env); if (r < 0) return r; - if (NTYPE(*np) == N_ANCHOR) - NANCHOR(*np).target = target; + if (NTYPE(*np) == NT_ANCHOR) + NANCHOR(*np)->target = target; else { - NEFFECT(*np).target = target; - if (NEFFECT(*np).type == EFFECT_MEMORY) { + NENCLOSE(*np)->target = target; + if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) { /* Don't move this to previous of parse_subexp() */ - r = scan_env_set_mem_node(env, NEFFECT(*np).regnum, *np); + r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np); if (r != 0) return r; } } @@ -4607,17 +4611,17 @@ static const char* ReduceQStr[] = { static int set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) { - QuantifierNode* qn; + QtfrNode* qn; - qn = &(NQUANTIFIER(qnode)); + qn = NQTFR(qnode); if (qn->lower == 1 && qn->upper == 1) { return 1; } switch (NTYPE(target)) { - case N_STRING: + case NT_STR: if (! group) { - StrNode* sn = &(NSTRING(target)); + StrNode* sn = NSTR(target); if (str_node_can_be_split(sn, env->enc)) { Node* n = str_node_split_last_char(sn, env->enc); if (IS_NOT_NULL(n)) { @@ -4628,10 +4632,10 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) } break; - case N_QUANTIFIER: + case NT_QTFR: { /* check redundant double repeat. */ /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ - QuantifierNode* qnt = &(NQUANTIFIER(target)); + QtfrNode* qnt = NQTFR(target); int nestq_num = popular_quantifier_num(qn); int targetq_num = popular_quantifier_num(qnt); @@ -4718,16 +4722,16 @@ static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y) static int type_cclass_hash(type_cclass_key* key) { int i, val; - unsigned char *p; + UChar *p; val = 0; - p = (unsigned char* )&(key->enc); + p = (UChar* )&(key->enc); for (i = 0; i < sizeof(key->enc); i++) { val = val * 997 + (int )*p++; } - p = (unsigned char* )(&key->type); + p = (UChar* )(&key->type); for (i = 0; i < sizeof(key->type); i++) { val = val * 997 + (int )*p++; } @@ -4748,7 +4752,7 @@ static int i_free_shared_class(type_cclass_key* key, Node* node, void* arg) { if (IS_NOT_NULL(node)) { - CClassNode* cc = &(NCCLASS(node)); + CClassNode* cc = NCCLASS(node); if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf); xfree(node); } @@ -4777,17 +4781,18 @@ typedef struct { CClassNode* cc; Node* alt_root; Node** ptail; -} ICaseFoldArgType; +} IApplyCaseFoldArg; static int -i_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg) +i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], + int to_len, void* arg) { - ICaseFoldArgType* iarg; + IApplyCaseFoldArg* iarg; ScanEnv* env; CClassNode* cc; BitSetRef bs; - iarg = (ICaseFoldArgType* )arg; + iarg = (IApplyCaseFoldArg* )arg; env = iarg->env; cc = iarg->cc; bs = cc->bs; @@ -4795,16 +4800,18 @@ i_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg) if (to_len == 1) { int in_cc; in_cc = onig_is_code_in_cc(env->enc, from, cc); - if ((in_cc != 0 && !IS_CCLASS_NOT(cc)) || - (in_cc == 0 && IS_CCLASS_NOT(cc))) { - if (ONIGENC_MBC_MINLEN(env->enc) > 1 || from >= SINGLE_BYTE_SIZE) { + if ((in_cc != 0 && !IS_NCCLASS_NOT(cc)) || + (in_cc == 0 && IS_NCCLASS_NOT(cc))) { + if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { + if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc); add_code_range(&(cc->mbuf), env, *to, *to); } else { - if (BITSET_AT(bs, from)) { - /* /(?i:[^A-C])/.match("a") ==> fail. */ + /* /(?i:[^A-C])/.match("a") ==> fail. */ + if (IS_NCCLASS_NOT(cc)) + BITSET_CLEAR_BIT(bs, *to); + else BITSET_SET_BIT(bs, *to); - } } } } @@ -4814,13 +4821,13 @@ i_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg) Node *snode = NULL_NODE; if (onig_is_code_in_cc(env->enc, from, cc)) { - if (IS_CCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc); + if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc); for (i = 0; i < to_len; i++) { len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf); if (i == 0) { snode = onig_node_new_str(buf, buf + len); - CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(snode); /* char-class expanded multi-char only compare with string folded at match time. */ @@ -4836,8 +4843,8 @@ i_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg) } *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE); - CHECK_NULL_RETURN_VAL(*(iarg->ptail), ONIGERR_MEMORY); - iarg->ptail = &(NCONS((*(iarg->ptail))).right); + CHECK_NULL_RETURN_MEMERR(*(iarg->ptail)); + iarg->ptail = &(NCDR((*(iarg->ptail)))); } } @@ -4865,20 +4872,20 @@ parse_exp(Node** np, OnigToken* tok, int term, break; case TK_SUBEXP_OPEN: - r = parse_effect(np, tok, TK_SUBEXP_CLOSE, src, end, env); + r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env); if (r < 0) return r; if (r == 1) group = 1; else if (r == 2) { /* option only */ Node* target; OnigOptionType prev = env->option; - env->option = NEFFECT(*np).option; + env->option = NENCLOSE(*np)->option; r = fetch_token(tok, src, end, env); if (r < 0) return r; r = parse_subexp(&target, tok, term, src, end, env); env->option = prev; if (r < 0) return r; - NEFFECT(*np).target = target; + NENCLOSE(*np)->target = target; return tok->type; } break; @@ -4895,7 +4902,7 @@ parse_exp(Node** np, OnigToken* tok, int term, tk_byte: { *np = node_new_str(tok->backp, *src); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); while (1) { r = fetch_token(tok, src, end, env); @@ -4916,11 +4923,11 @@ parse_exp(Node** np, OnigToken* tok, int term, tk_raw_byte: { *np = node_new_str_raw_char((UChar )tok->u.c); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); len = 1; while (1) { if (len >= ONIGENC_MBC_MINLEN(env->enc)) { - if (len == enc_len(env->enc, NSTRING(*np).s)) { + if (len == enc_len(env->enc, NSTR(*np)->s)) { r = fetch_token(tok, src, end, env); NSTRING_CLEAR_RAW(*np); goto string_end; @@ -4935,8 +4942,8 @@ parse_exp(Node** np, OnigToken* tok, int term, int rem; if (len < ONIGENC_MBC_MINLEN(env->enc)) { rem = ONIGENC_MBC_MINLEN(env->enc) - len; - (void )node_str_head_pad(&NSTRING(*np), rem, (UChar )0); - if (len + rem == enc_len(env->enc, NSTRING(*np).s)) { + (void )node_str_head_pad(NSTR(*np), rem, (UChar )0); + if (len + rem == enc_len(env->enc, NSTR(*np)->s)) { NSTRING_CLEAR_RAW(*np); goto string_end; } @@ -4963,7 +4970,7 @@ parse_exp(Node** np, OnigToken* tok, int term, #else *np = node_new_str(buf, buf + num); #endif - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); } break; @@ -4980,7 +4987,7 @@ parse_exp(Node** np, OnigToken* tok, int term, nextp = qend = end; } *np = node_new_str(qstart, qend); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); *src = nextp; } break; @@ -4990,7 +4997,7 @@ parse_exp(Node** np, OnigToken* tok, int term, switch (tok->u.prop.ctype) { case ONIGENC_CTYPE_WORD: *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); break; case ONIGENC_CTYPE_SPACE: @@ -5040,9 +5047,10 @@ parse_exp(Node** np, OnigToken* tok, int term, return ONIGERR_MEMORY; } - cc = &(NCCLASS(*np)); - CCLASS_SET_SHARE(cc); + cc = NCCLASS(*np); + NCCLASS_SET_SHARE(cc); new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key)); + xmemcpy(new_key, &key, sizeof(type_cclass_key)); onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key, (st_data_t )*np); @@ -5051,10 +5059,10 @@ parse_exp(Node** np, OnigToken* tok, int term, else { #endif *np = node_new_cclass(); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); - cc = &(NCCLASS(*np)); + CHECK_NULL_RETURN_MEMERR(*np); + cc = NCCLASS(*np); add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env); - if (tok->u.prop.not != 0) CCLASS_SET_NOT(cc); + if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); #ifdef USE_SHARED_CCLASS_TABLE } #endif @@ -5080,21 +5088,19 @@ parse_exp(Node** np, OnigToken* tok, int term, r = parse_char_class(np, tok, src, end, env); if (r != 0) return r; - cc = &(NCCLASS(*np)); - + cc = NCCLASS(*np); if (IS_IGNORECASE(env->option)) { - ICaseFoldArgType iarg; + IApplyCaseFoldArg iarg; - iarg.env = env; - iarg.cc = cc; + iarg.env = env; + iarg.cc = cc; iarg.alt_root = NULL_NODE; iarg.ptail = &(iarg.alt_root); r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag, - i_case_fold, &iarg); + i_apply_case_fold, &iarg); if (r != 0) { - if (IS_NOT_NULL(iarg.alt_root)) - onig_node_free(iarg.alt_root); + onig_node_free(iarg.alt_root); return r; } if (IS_NOT_NULL(iarg.alt_root)) { @@ -5111,15 +5117,15 @@ parse_exp(Node** np, OnigToken* tok, int term, case TK_ANYCHAR: *np = node_new_anychar(); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); break; case TK_ANYCHAR_ANYTIME: *np = node_new_anychar(); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); qn = node_new_quantifier(0, REPEAT_INFINITE, 0); - CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY); - NQUANTIFIER(qn).target = *np; + CHECK_NULL_RETURN_MEMERR(qn); + NQTFR(qn)->target = *np; *np = qn; break; @@ -5133,13 +5139,13 @@ parse_exp(Node** np, OnigToken* tok, int term, tok->u.backref.level, #endif env); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); break; #ifdef USE_SUBEXP_CALL case TK_CALL: *np = node_new_call(tok->u.call.name, tok->u.call.name_end); - CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(*np); env->num_call++; break; #endif @@ -5179,17 +5185,17 @@ parse_exp(Node** np, OnigToken* tok, int term, return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, - (r == TK_INTERVAL ? 1 : 0)); - CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY); - NQUANTIFIER(qn).greedy = tok->u.repeat.greedy; + (r == TK_INTERVAL ? 1 : 0)); + CHECK_NULL_RETURN_MEMERR(qn); + NQTFR(qn)->greedy = tok->u.repeat.greedy; r = set_quantifier(qn, *targetp, group, env); if (r < 0) return r; if (tok->u.repeat.possessive != 0) { Node* en; - en = node_new_effect(EFFECT_STOP_BACKTRACK); - CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY); - NEFFECT(en).target = qn; + en = node_new_enclose(ENCLOSE_STOP_BACKTRACK); + CHECK_NULL_RETURN_MEMERR(en); + NENCLOSE(en)->target = qn; qn = en; } @@ -5200,10 +5206,10 @@ parse_exp(Node** np, OnigToken* tok, int term, Node *tmp; *targetp = node_new_list(*targetp, NULL); - CHECK_NULL_RETURN_VAL(*targetp, ONIGERR_MEMORY); - tmp = NCONS(*targetp).right = node_new_list(qn, NULL); - CHECK_NULL_RETURN_VAL(tmp, ONIGERR_MEMORY); - targetp = &(NCONS(tmp).left); + CHECK_NULL_RETURN_MEMERR(*targetp); + tmp = NCDR(*targetp) = node_new_list(qn, NULL); + CHECK_NULL_RETURN_MEMERR(tmp); + targetp = &(NCAR(tmp)); } goto re_entry; } @@ -5228,19 +5234,19 @@ parse_branch(Node** top, OnigToken* tok, int term, } else { *top = node_new_list(node, NULL); - headp = &(NCONS(*top).right); + headp = &(NCDR(*top)); while (r != TK_EOT && r != term && r != TK_ALT) { r = parse_exp(&node, tok, term, src, end, env); if (r < 0) return r; - if (NTYPE(node) == N_LIST) { + if (NTYPE(node) == NT_LIST) { *headp = node; - while (IS_NOT_NULL(NCONS(node).right)) node = NCONS(node).right; - headp = &(NCONS(node).right); + while (IS_NOT_NULL(NCDR(node))) node = NCDR(node); + headp = &(NCDR(node)); } else { *headp = node_new_list(node, NULL); - headp = &(NCONS(*headp).right); + headp = &(NCDR(*headp)); } } } @@ -5268,7 +5274,7 @@ parse_subexp(Node** top, OnigToken* tok, int term, } else if (r == TK_ALT) { *top = onig_node_new_alt(node, NULL); - headp = &(NCONS(*top).right); + headp = &(NCDR(*top)); while (r == TK_ALT) { r = fetch_token(tok, src, end, env); if (r < 0) return r; @@ -5276,7 +5282,7 @@ parse_subexp(Node** top, OnigToken* tok, int term, if (r < 0) return r; *headp = onig_node_new_alt(node, NULL); - headp = &(NCONS(*headp).right); + headp = &(NCDR(*headp)); } if (tok->type != term) @@ -5307,8 +5313,8 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) } extern int -onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_t* reg, - ScanEnv* env) +onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, + regex_t* reg, ScanEnv* env) { int r; UChar* p; diff --git a/regparse.h b/regparse.h index de990325745414..7a6314098a2b6b 100644 --- a/regparse.h +++ b/regparse.h @@ -32,38 +32,61 @@ #include "regint.h" /* node type */ -#define N_STRING (1<< 0) -#define N_CCLASS (1<< 1) -#define N_CTYPE (1<< 2) -#define N_ANYCHAR (1<< 3) -#define N_BACKREF (1<< 4) -#define N_QUANTIFIER (1<< 5) -#define N_EFFECT (1<< 6) -#define N_ANCHOR (1<< 7) -#define N_LIST (1<< 8) -#define N_ALT (1<< 9) -#define N_CALL (1<<10) +#define NT_STR 0 +#define NT_CCLASS 1 +#define NT_CTYPE 2 +#define NT_CANY 3 +#define NT_BREF 4 +#define NT_QTFR 5 +#define NT_ENCLOSE 6 +#define NT_ANCHOR 7 +#define NT_LIST 8 +#define NT_ALT 9 +#define NT_CALL 10 + +/* node type bit */ +#define NTYPE2BIT(type) (1<<(type)) + +#define BIT_NT_STR NTYPE2BIT(NT_STR) +#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS) +#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE) +#define BIT_NT_CANY NTYPE2BIT(NT_CANY) +#define BIT_NT_BREF NTYPE2BIT(NT_BREF) +#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR) +#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE) +#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR) +#define BIT_NT_LIST NTYPE2BIT(NT_LIST) +#define BIT_NT_ALT NTYPE2BIT(NT_ALT) +#define BIT_NT_CALL NTYPE2BIT(NT_CALL) #define IS_NODE_TYPE_SIMPLE(type) \ - (((type) & (N_STRING | N_CCLASS | N_CTYPE | N_ANYCHAR | N_BACKREF)) != 0) - -#define NTYPE(node) ((node)->type) -#define NCONS(node) ((node)->u.cons) -#define NSTRING(node) ((node)->u.str) -#define NCCLASS(node) ((node)->u.cclass) -#define NCTYPE(node) ((node)->u.ctype) -#define NQUANTIFIER(node) ((node)->u.quant) -#define NANCHOR(node) ((node)->u.anchor) -#define NBACKREF(node) ((node)->u.backref) -#define NEFFECT(node) ((node)->u.effect) -#define NCALL(node) ((node)->u.call) + ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\ + BIT_NT_CANY | BIT_NT_BREF)) != 0) + +#define NTYPE(node) ((node)->u.base.type) +#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype) + +#define NSTR(node) (&((node)->u.str)) +#define NCCLASS(node) (&((node)->u.cclass)) +#define NCTYPE(node) (&((node)->u.ctype)) +#define NBREF(node) (&((node)->u.bref)) +#define NQTFR(node) (&((node)->u.qtfr)) +#define NENCLOSE(node) (&((node)->u.enclose)) +#define NANCHOR(node) (&((node)->u.anchor)) +#define NCONS(node) (&((node)->u.cons)) +#define NCALL(node) (&((node)->u.call)) + +#define NCAR(node) (NCONS(node)->car) +#define NCDR(node) (NCONS(node)->cdr) + + #define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML) #define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) -#define EFFECT_MEMORY (1<<0) -#define EFFECT_OPTION (1<<1) -#define EFFECT_STOP_BACKTRACK (1<<2) +#define ENCLOSE_MEMORY (1<<0) +#define ENCLOSE_OPTION (1<<1) +#define ENCLOSE_STOP_BACKTRACK (1<<2) #define NODE_STR_MARGIN 16 #define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ @@ -92,30 +115,6 @@ #define NQ_TARGET_IS_EMPTY_MEM 2 #define NQ_TARGET_IS_EMPTY_REC 3 - -typedef struct { - UChar* s; - UChar* end; - unsigned int flag; - int capa; /* (allocated size - 1) or 0: use buf[] */ - UChar buf[NODE_STR_BUF_SIZE]; -} StrNode; - -typedef struct { - int state; - struct _Node* target; - int lower; - int upper; - int greedy; - int target_empty_info; - struct _Node* head_exact; - struct _Node* next_head_exact; - int is_refered; /* include called node. don't eliminate even if {0} */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int comb_exp_check_num; /* 1,2,3...: check, 0: no check */ -#endif -} QuantifierNode; - /* status bits */ #define NST_MIN_FIXED (1<<0) #define NST_MAX_FIXED (1<<1) @@ -133,20 +132,20 @@ typedef struct { #define NST_NEST_LEVEL (1<<13) #define NST_BY_NUMBER (1<<14) /* {n,m} */ -#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f) -#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f) - -#define IS_EFFECT_CALLED(en) (((en)->state & NST_CALLED) != 0) -#define IS_EFFECT_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0) -#define IS_EFFECT_RECURSION(en) (((en)->state & NST_RECURSION) != 0) -#define IS_EFFECT_MARK1(en) (((en)->state & NST_MARK1) != 0) -#define IS_EFFECT_MARK2(en) (((en)->state & NST_MARK2) != 0) -#define IS_EFFECT_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0) -#define IS_EFFECT_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0) -#define IS_EFFECT_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0) -#define IS_EFFECT_STOP_BT_SIMPLE_REPEAT(en) \ +#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f) +#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f) + +#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0) +#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0) +#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0) +#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0) +#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0) +#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0) +#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0) +#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0) +#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \ (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0) -#define IS_EFFECT_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0) +#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0) #define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION #define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0) @@ -156,7 +155,35 @@ typedef struct { #define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) #define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0) +#define CALLNODE_REFNUM_UNDEF -1 + typedef struct { + NodeBase base; + UChar* s; + UChar* end; + unsigned int flag; + int capa; /* (allocated size - 1) or 0: use buf[] */ + UChar buf[NODE_STR_BUF_SIZE]; +} StrNode; + +typedef struct { + NodeBase base; + int state; + struct _Node* target; + int lower; + int upper; + int greedy; + int target_empty_info; + struct _Node* head_exact; + struct _Node* next_head_exact; + int is_refered; /* include called node. don't eliminate even if {0} */ +#ifdef USE_COMBINATION_EXPLOSION_CHECK + int comb_exp_check_num; /* 1,2,3...: check, 0: no check */ +#endif +} QtfrNode; + +typedef struct { + NodeBase base; int state; int type; int regnum; @@ -168,9 +195,7 @@ typedef struct { OnigDistance max_len; /* max length (byte) */ int char_len; /* character length */ int opt_count; /* referenced count in optimize_node_left() */ -} EffectNode; - -#define CALLNODE_REFNUM_UNDEF -1 +} EncloseNode; #ifdef USE_SUBEXP_CALL @@ -186,53 +211,63 @@ typedef struct { } UnsetAddrList; typedef struct { + NodeBase base; int state; int ref_num; UChar* name; UChar* name_end; - struct _Node* target; /* EffectNode : EFFECT_MEMORY */ + struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */ UnsetAddrList* unset_addr_list; } CallNode; #endif typedef struct { + NodeBase base; int state; int back_num; int back_static[NODE_BACKREFS_SIZE]; int* back_dynamic; int nest_level; -} BackrefNode; +} BRefNode; typedef struct { + NodeBase base; int type; struct _Node* target; int char_len; } AnchorNode; +typedef struct { + NodeBase base; + struct _Node* car; + struct _Node* cdr; +} ConsAltNode; + +typedef struct { + NodeBase base; + int ctype; + int not; +} CtypeNode; + typedef struct _Node { - int type; union { - StrNode str; - CClassNode cclass; - QuantifierNode quant; - EffectNode effect; + NodeBase base; + StrNode str; + CClassNode cclass; + QtfrNode qtfr; + EncloseNode enclose; + BRefNode bref; + AnchorNode anchor; + ConsAltNode cons; + CtypeNode ctype; #ifdef USE_SUBEXP_CALL - CallNode call; + CallNode call; #endif - BackrefNode backref; - AnchorNode anchor; - struct { - struct _Node* left; - struct _Node* right; - } cons; - struct { - int ctype; - int not; - } ctype; } u; } Node; + #define NULL_NODE ((Node* )0) #define SCANENV_MEMNODES_SIZE 8 @@ -295,7 +330,7 @@ extern void onig_node_conv_to_str_node P_((Node* node, int raw)); extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end)); extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end)); extern void onig_node_free P_((Node* node)); -extern Node* onig_node_new_effect P_((int type)); +extern Node* onig_node_new_enclose P_((int type)); extern Node* onig_node_new_anchor P_((int type)); extern Node* onig_node_new_str P_((const UChar* s, const UChar* end)); extern Node* onig_node_new_list P_((Node* left, Node* right)); diff --git a/sjis.c b/sjis.c index f6b78eba138c9a..9bfae901726d49 100644 --- a/sjis.c +++ b/sjis.c @@ -269,7 +269,7 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype) ctype -= (ONIGENC_MAX_STD_CTYPE + 1); if (ctype >= (unsigned int )PropertyListNum) - return ONIGENCERR_TYPE_BUG; + return ONIGENC_ERR_TYPE_BUG; return onig_is_in_code_range((UChar* )PropertyList[ctype], code); } @@ -291,7 +291,7 @@ get_ctype_code_range(int ctype, OnigCodePoint* sb_out, ctype -= (ONIGENC_MAX_STD_CTYPE + 1); if (ctype >= PropertyListNum) - return ONIGENCERR_TYPE_BUG; + return ONIGENC_ERR_TYPE_BUG; *ranges = PropertyList[ctype]; return 0; diff --git a/unicode.c b/unicode.c index 12df76f4490b0a..657954a5fea7d4 100644 --- a/unicode.c +++ b/unicode.c @@ -10756,7 +10756,7 @@ onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype) } if (ctype >= CODE_RANGES_NUM) { - return ONIGENCERR_TYPE_BUG; + return ONIGENC_ERR_TYPE_BUG; } if (CodeRangeTableInited == 0) init_code_range_array(); @@ -10769,7 +10769,7 @@ extern int onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[]) { if (ctype >= CODE_RANGES_NUM) { - return ONIGENCERR_TYPE_BUG; + return ONIGENC_ERR_TYPE_BUG; } if (CodeRangeTableInited == 0) init_code_range_array(); diff --git a/utf8.c b/utf8.c index daeeea3bd8ab5b..89167cf17fd62d 100644 --- a/utf8.c +++ b/utf8.c @@ -2,7 +2,7 @@ utf8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -72,7 +72,9 @@ utf8_is_mbc_newline(const UChar* p, const UChar* end) if (*p == 0x0a) return 1; #ifdef USE_UNICODE_ALL_LINE_TERMINATORS +#ifndef USE_CRNL_AS_LINE_TERMINATOR if (*p == 0x0d) return 1; +#endif if (p + 1 < end) { if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */ return 1; @@ -133,7 +135,7 @@ utf8_code_to_mbclen(OnigCodePoint code) else if (code == INVALID_CODE_FF) return 1; #endif else - return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE; + return ONIGENC_ERR_TOO_BIG_WIDE_CHAR_VALUE; } #if 0 @@ -154,7 +156,7 @@ utf8_code_to_mbc_first(OnigCodePoint code) else if ((code & 0x80000000) == 0) return ((code>>30) & 0x01) | 0xfc; else { - return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE; + return ONIGENC_ERR_TOO_BIG_WIDE_CHAR_VALUE; } } } @@ -209,7 +211,7 @@ utf8_code_to_mbc(OnigCodePoint code, UChar *buf) } #endif else { - return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE; + return ONIGENC_ERR_TOO_BIG_WIDE_CHAR_VALUE; } *p++ = UTF8_TRAIL0(code);