Initial revision

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5892 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
technorama · Mar 5, 2004 · 5770336 · 5770336
1 parent 33a06e4
commit 5770336
Show file tree

Hide file tree

Showing 15 changed files with 17,522 additions and 0 deletions.
diff --git a/ascii.c b/ascii.c
@@ -0,0 +1,54 @@
+/**********************************************************************
+
+  ascii.c -  Oniguruma (regular expression library)
+
+  Copyright (C) 2003-2004  K.Kosako ([email protected])
+
+**********************************************************************/
+#include "regenc.h"
+
+static int
+ascii_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+{
+  if (code < 128)
+    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+  else
+    return FALSE;
+}
+
+OnigEncodingType OnigEncodingASCII = {
+  {
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+  },
+  "US-ASCII",  /* name */
+  1,           /* max byte length */
+  FALSE,       /* is_fold_match */
+  ONIGENC_CTYPE_SUPPORT_LEVEL_SB,   /* ctype_support_level */
+  TRUE,                             /* is continuous sb mb codepoint */
+  onigenc_single_byte_mbc_to_code,
+  onigenc_single_byte_code_to_mbclen,
+  onigenc_single_byte_code_to_mbc,
+  onigenc_ascii_mbc_to_lower,
+  onigenc_ascii_mbc_is_case_ambig,
+  ascii_code_is_ctype,
+  onigenc_nothing_get_ctype_code_range,
+  onigenc_single_byte_left_adjust_char_head,
+  onigenc_single_byte_is_allowed_reverse_match,
+  onigenc_nothing_get_all_fold_match_code,
+  onigenc_nothing_get_fold_match_info
+};
diff --git a/euc_jp.c b/euc_jp.c
@@ -0,0 +1,191 @@
+/**********************************************************************
+
+  euc_jp.c -  Oniguruma (regular expression library)
+
+  Copyright (C) 2003-2004  K.Kosako ([email protected])
+
+**********************************************************************/
+#include "regenc.h"
+
+#define eucjp_islead(c)    ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
+
+static OnigCodePoint
+eucjp_mbc_to_code(UChar* p, UChar* end)
+{
+  int c, i, len;
+  OnigCodePoint n;
+
+  c = *p++;
+  len = enc_len(ONIG_ENCODING_EUC_JP, c);
+  n = c;
+  if (len == 1) return n;
+
+  for (i = 1; i < len; i++) {
+    if (p >= end) break;
+    c = *p++;
+    n <<= 8;  n += c;
+  }
+  return n;
+}
+
+static int
+eucjp_code_to_mbclen(OnigCodePoint code)
+{
+  if      ((code & 0xff0000) != 0) return 3;
+  else if ((code &   0xff00) != 0) return 2;
+  else return 1;
+}
+
+static int
+eucjp_code_to_mbc_first(OnigCodePoint code)
+{
+  int first;
+
+  if ((code & 0xff0000) != 0) {
+    first = (code >> 16) & 0xff;
+    /*
+    if (enc_len(ONIG_ENCODING_EUC_JP, first) != 3)
+      return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+    */
+  }
+  else if ((code & 0xff00) != 0) {
+    first = (code >> 8) & 0xff;
+    /*
+    if (enc_len(ONIG_ENCODING_EUC_JP, first) != 2)
+      return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+    */
+  }
+  else {
+    /*
+    if (enc_len(ONIG_ENCODING_EUC_JP, code) != 1)
+      return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+    */
+    return (int )code;
+  }
+  return first;
+}
+
+static int
+eucjp_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+  UChar *p = buf;
+
+  if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff));
+  if ((code &   0xff00) != 0) *p++ = (UChar )(((code >>  8) & 0xff));
+  *p++ = (UChar )(code & 0xff);
+
+#if 1
+  if (enc_len(ONIG_ENCODING_EUC_JP, buf[0]) != (p - buf))
+    return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+#endif  
+  return p - buf;
+}
+
+static int
+eucjp_mbc_to_lower(UChar* p, UChar* lower)
+{
+  int len;
+
+  if (ONIGENC_IS_MBC_ASCII(p)) {
+    *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+    return 1;
+  }
+  else {
+    len = enc_len(ONIG_ENCODING_EUC_JP, *p);
+    if (lower != p) {
+      /* memcpy(lower, p, len); */
+      int i;
+      for (i = 0; i < len; i++) {
+	*lower++ = *p++;
+      }
+    }
+    return len; /* return byte length of converted char to lower */
+  }
+}
+
+static int
+eucjp_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+{
+  if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
+    if (code < 128)
+      return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+    else {
+      int first = eucjp_code_to_mbc_first(code);
+      return (enc_len(ONIG_ENCODING_EUC_JP, first) > 1 ? TRUE : FALSE);
+    }
+
+    ctype &= ~ONIGENC_CTYPE_WORD;
+    if (ctype == 0) return FALSE;
+  }
+
+  if (code < 128)
+    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+  else
+    return FALSE;
+}
+
+static UChar*
+eucjp_left_adjust_char_head(UChar* start, UChar* s)
+{
+  /* Assumed in this encoding,
+     mb-trail bytes don't mix with single bytes.
+  */
+  UChar *p;
+  int len;
+
+  if (s <= start) return s;
+  p = s;
+
+  while (!eucjp_islead(*p) && p > start) p--;
+  len = enc_len(ONIG_ENCODING_EUC_JP, *p);
+  if (p + len > s) return p;
+  p += len;
+  return p + ((s - p) & ~1);
+}
+
+static int
+eucjp_is_allowed_reverse_match(UChar* s, UChar* end)
+{
+  UChar c = *s;
+  if (c <= 0x7e || c == 0x8e || c == 0x8f)
+    return TRUE;
+  else
+    return FALSE;
+}
+
+OnigEncodingType OnigEncodingEUC_JP = {
+  {
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+  },
+  "EUC-JP",   /* name */
+  3,          /* max byte length */
+  FALSE,      /* is_fold_match */
+  ONIGENC_CTYPE_SUPPORT_LEVEL_SB,    /* ctype_support_level */
+  FALSE,      /* is continuous sb mb codepoint */
+  eucjp_mbc_to_code,
+  eucjp_code_to_mbclen,
+  eucjp_code_to_mbc,
+  eucjp_mbc_to_lower,
+  onigenc_mbn_mbc_is_case_ambig,
+  eucjp_code_is_ctype,
+  onigenc_nothing_get_ctype_code_range,
+  eucjp_left_adjust_char_head,
+  eucjp_is_allowed_reverse_match,
+  onigenc_nothing_get_all_fold_match_code,
+  onigenc_nothing_get_fold_match_info
+};
diff --git a/oniggnu.h b/oniggnu.h
@@ -0,0 +1,77 @@
+/**********************************************************************
+
+  oniggnu.h - Oniguruma (regular expression library)
+
+  Copyright (C) 2004  K.Kosako ([email protected])
+
+**********************************************************************/
+#ifndef ONIGGNU_H
+#define ONIGGNU_H
+
+#include "oniguruma.h"
+
+#define MBCTYPE_ASCII         0
+#define MBCTYPE_EUC           1
+#define MBCTYPE_SJIS          2
+#define MBCTYPE_UTF8          3
+
+/* GNU regex options */
+#ifndef RE_NREGS
+#define RE_NREGS                ONIG_NREGION
+#endif
+#define RE_OPTION_IGNORECASE    ONIG_OPTION_IGNORECASE
+#define RE_OPTION_EXTENDED      ONIG_OPTION_EXTEND
+#define RE_OPTION_MULTILINE     ONIG_OPTION_MULTILINE
+#define RE_OPTION_SINGLELINE    ONIG_OPTION_SINGLELINE
+#define RE_OPTION_LONGEST       ONIG_OPTION_FIND_LONGEST
+#define RE_OPTION_POSIXLINE    (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE)
+
+#ifdef RUBY_PLATFORM
+#define re_mbcinit              ruby_re_mbcinit
+#define re_compile_pattern      ruby_re_compile_pattern
+#define re_recompile_pattern    ruby_re_recompile_pattern
+#define re_free_pattern         ruby_re_free_pattern
+#define re_adjust_startpos      ruby_re_adjust_startpos
+#define re_search               ruby_re_search
+#define re_match                ruby_re_match
+#define re_set_casetable        ruby_re_set_casetable
+#define re_copy_registers       ruby_re_copy_registers
+#define re_free_registers       ruby_re_free_registers
+#define register_info_type      ruby_register_info_type
+#define re_error_code_to_str    ruby_error_code_to_str
+
+#define ruby_error_code_to_str  onig_error_code_to_str
+#define ruby_re_copy_registers  onig_region_copy
+#else
+#define re_error_code_to_str    onig_error_code_to_str
+#define re_copy_registers       onig_region_copy
+#endif
+
+#ifdef ONIG_RUBY_M17N
+ONIG_EXTERN
+void re_mbcinit P_((OnigEncoding));
+#else
+ONIG_EXTERN
+void re_mbcinit P_((int));
+#endif
+
+ONIG_EXTERN
+int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
+ONIG_EXTERN
+int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
+ONIG_EXTERN
+void re_free_pattern P_((struct re_pattern_buffer*));
+ONIG_EXTERN
+int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int));
+ONIG_EXTERN
+int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*));
+ONIG_EXTERN
+int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*));
+ONIG_EXTERN
+void re_set_casetable P_((const char*));
+ONIG_EXTERN
+void re_free_registers P_((struct re_registers*));
+ONIG_EXTERN
+int re_alloc_pattern P_((struct re_pattern_buffer**));  /* added */
+
+#endif /* ONIGGNU_H */