forked from ruby/ruby
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5892 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
- Loading branch information
ksaito
committed
Mar 5, 2004
1 parent
33a06e4
commit 5770336
Showing
15 changed files
with
17,522 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
/********************************************************************** | ||
ascii.c - Oniguruma (regular expression library) | ||
Copyright (C) 2003-2004 K.Kosako ([email protected]) | ||
**********************************************************************/ | ||
#include "regenc.h" | ||
|
||
static int | ||
ascii_code_is_ctype(OnigCodePoint code, unsigned int ctype) | ||
{ | ||
if (code < 128) | ||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); | ||
else | ||
return FALSE; | ||
} | ||
|
||
OnigEncodingType OnigEncodingASCII = { | ||
{ | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 | ||
}, | ||
"US-ASCII", /* name */ | ||
1, /* max byte length */ | ||
FALSE, /* is_fold_match */ | ||
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */ | ||
TRUE, /* is continuous sb mb codepoint */ | ||
onigenc_single_byte_mbc_to_code, | ||
onigenc_single_byte_code_to_mbclen, | ||
onigenc_single_byte_code_to_mbc, | ||
onigenc_ascii_mbc_to_lower, | ||
onigenc_ascii_mbc_is_case_ambig, | ||
ascii_code_is_ctype, | ||
onigenc_nothing_get_ctype_code_range, | ||
onigenc_single_byte_left_adjust_char_head, | ||
onigenc_single_byte_is_allowed_reverse_match, | ||
onigenc_nothing_get_all_fold_match_code, | ||
onigenc_nothing_get_fold_match_info | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
/********************************************************************** | ||
euc_jp.c - Oniguruma (regular expression library) | ||
Copyright (C) 2003-2004 K.Kosako ([email protected]) | ||
**********************************************************************/ | ||
#include "regenc.h" | ||
|
||
#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) | ||
|
||
static OnigCodePoint | ||
eucjp_mbc_to_code(UChar* p, UChar* end) | ||
{ | ||
int c, i, len; | ||
OnigCodePoint n; | ||
|
||
c = *p++; | ||
len = enc_len(ONIG_ENCODING_EUC_JP, c); | ||
n = c; | ||
if (len == 1) return n; | ||
|
||
for (i = 1; i < len; i++) { | ||
if (p >= end) break; | ||
c = *p++; | ||
n <<= 8; n += c; | ||
} | ||
return n; | ||
} | ||
|
||
static int | ||
eucjp_code_to_mbclen(OnigCodePoint code) | ||
{ | ||
if ((code & 0xff0000) != 0) return 3; | ||
else if ((code & 0xff00) != 0) return 2; | ||
else return 1; | ||
} | ||
|
||
static int | ||
eucjp_code_to_mbc_first(OnigCodePoint code) | ||
{ | ||
int first; | ||
|
||
if ((code & 0xff0000) != 0) { | ||
first = (code >> 16) & 0xff; | ||
/* | ||
if (enc_len(ONIG_ENCODING_EUC_JP, first) != 3) | ||
return ONIGERR_INVALID_WIDE_CHAR_VALUE; | ||
*/ | ||
} | ||
else if ((code & 0xff00) != 0) { | ||
first = (code >> 8) & 0xff; | ||
/* | ||
if (enc_len(ONIG_ENCODING_EUC_JP, first) != 2) | ||
return ONIGERR_INVALID_WIDE_CHAR_VALUE; | ||
*/ | ||
} | ||
else { | ||
/* | ||
if (enc_len(ONIG_ENCODING_EUC_JP, code) != 1) | ||
return ONIGERR_INVALID_WIDE_CHAR_VALUE; | ||
*/ | ||
return (int )code; | ||
} | ||
return first; | ||
} | ||
|
||
static int | ||
eucjp_code_to_mbc(OnigCodePoint code, UChar *buf) | ||
{ | ||
UChar *p = buf; | ||
|
||
if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff)); | ||
if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); | ||
*p++ = (UChar )(code & 0xff); | ||
|
||
#if 1 | ||
if (enc_len(ONIG_ENCODING_EUC_JP, buf[0]) != (p - buf)) | ||
return ONIGERR_INVALID_WIDE_CHAR_VALUE; | ||
#endif | ||
return p - buf; | ||
} | ||
|
||
static int | ||
eucjp_mbc_to_lower(UChar* p, UChar* lower) | ||
{ | ||
int len; | ||
|
||
if (ONIGENC_IS_MBC_ASCII(p)) { | ||
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); | ||
return 1; | ||
} | ||
else { | ||
len = enc_len(ONIG_ENCODING_EUC_JP, *p); | ||
if (lower != p) { | ||
/* memcpy(lower, p, len); */ | ||
int i; | ||
for (i = 0; i < len; i++) { | ||
*lower++ = *p++; | ||
} | ||
} | ||
return len; /* return byte length of converted char to lower */ | ||
} | ||
} | ||
|
||
static int | ||
eucjp_code_is_ctype(OnigCodePoint code, unsigned int ctype) | ||
{ | ||
if ((ctype & ONIGENC_CTYPE_WORD) != 0) { | ||
if (code < 128) | ||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); | ||
else { | ||
int first = eucjp_code_to_mbc_first(code); | ||
return (enc_len(ONIG_ENCODING_EUC_JP, first) > 1 ? TRUE : FALSE); | ||
} | ||
|
||
ctype &= ~ONIGENC_CTYPE_WORD; | ||
if (ctype == 0) return FALSE; | ||
} | ||
|
||
if (code < 128) | ||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); | ||
else | ||
return FALSE; | ||
} | ||
|
||
static UChar* | ||
eucjp_left_adjust_char_head(UChar* start, UChar* s) | ||
{ | ||
/* Assumed in this encoding, | ||
mb-trail bytes don't mix with single bytes. | ||
*/ | ||
UChar *p; | ||
int len; | ||
|
||
if (s <= start) return s; | ||
p = s; | ||
|
||
while (!eucjp_islead(*p) && p > start) p--; | ||
len = enc_len(ONIG_ENCODING_EUC_JP, *p); | ||
if (p + len > s) return p; | ||
p += len; | ||
return p + ((s - p) & ~1); | ||
} | ||
|
||
static int | ||
eucjp_is_allowed_reverse_match(UChar* s, UChar* end) | ||
{ | ||
UChar c = *s; | ||
if (c <= 0x7e || c == 0x8e || c == 0x8f) | ||
return TRUE; | ||
else | ||
return FALSE; | ||
} | ||
|
||
OnigEncodingType OnigEncodingEUC_JP = { | ||
{ | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | ||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | ||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | ||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | ||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | ||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 | ||
}, | ||
"EUC-JP", /* name */ | ||
3, /* max byte length */ | ||
FALSE, /* is_fold_match */ | ||
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */ | ||
FALSE, /* is continuous sb mb codepoint */ | ||
eucjp_mbc_to_code, | ||
eucjp_code_to_mbclen, | ||
eucjp_code_to_mbc, | ||
eucjp_mbc_to_lower, | ||
onigenc_mbn_mbc_is_case_ambig, | ||
eucjp_code_is_ctype, | ||
onigenc_nothing_get_ctype_code_range, | ||
eucjp_left_adjust_char_head, | ||
eucjp_is_allowed_reverse_match, | ||
onigenc_nothing_get_all_fold_match_code, | ||
onigenc_nothing_get_fold_match_info | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
/********************************************************************** | ||
oniggnu.h - Oniguruma (regular expression library) | ||
Copyright (C) 2004 K.Kosako ([email protected]) | ||
**********************************************************************/ | ||
#ifndef ONIGGNU_H | ||
#define ONIGGNU_H | ||
|
||
#include "oniguruma.h" | ||
|
||
#define MBCTYPE_ASCII 0 | ||
#define MBCTYPE_EUC 1 | ||
#define MBCTYPE_SJIS 2 | ||
#define MBCTYPE_UTF8 3 | ||
|
||
/* GNU regex options */ | ||
#ifndef RE_NREGS | ||
#define RE_NREGS ONIG_NREGION | ||
#endif | ||
#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE | ||
#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND | ||
#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE | ||
#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE | ||
#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST | ||
#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE) | ||
|
||
#ifdef RUBY_PLATFORM | ||
#define re_mbcinit ruby_re_mbcinit | ||
#define re_compile_pattern ruby_re_compile_pattern | ||
#define re_recompile_pattern ruby_re_recompile_pattern | ||
#define re_free_pattern ruby_re_free_pattern | ||
#define re_adjust_startpos ruby_re_adjust_startpos | ||
#define re_search ruby_re_search | ||
#define re_match ruby_re_match | ||
#define re_set_casetable ruby_re_set_casetable | ||
#define re_copy_registers ruby_re_copy_registers | ||
#define re_free_registers ruby_re_free_registers | ||
#define register_info_type ruby_register_info_type | ||
#define re_error_code_to_str ruby_error_code_to_str | ||
|
||
#define ruby_error_code_to_str onig_error_code_to_str | ||
#define ruby_re_copy_registers onig_region_copy | ||
#else | ||
#define re_error_code_to_str onig_error_code_to_str | ||
#define re_copy_registers onig_region_copy | ||
#endif | ||
|
||
#ifdef ONIG_RUBY_M17N | ||
ONIG_EXTERN | ||
void re_mbcinit P_((OnigEncoding)); | ||
#else | ||
ONIG_EXTERN | ||
void re_mbcinit P_((int)); | ||
#endif | ||
|
||
ONIG_EXTERN | ||
int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); | ||
ONIG_EXTERN | ||
int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); | ||
ONIG_EXTERN | ||
void re_free_pattern P_((struct re_pattern_buffer*)); | ||
ONIG_EXTERN | ||
int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int)); | ||
ONIG_EXTERN | ||
int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*)); | ||
ONIG_EXTERN | ||
int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*)); | ||
ONIG_EXTERN | ||
void re_set_casetable P_((const char*)); | ||
ONIG_EXTERN | ||
void re_free_registers P_((struct re_registers*)); | ||
ONIG_EXTERN | ||
int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */ | ||
|
||
#endif /* ONIGGNU_H */ |
Oops, something went wrong.