Skip to content

Commit

Permalink
* include/ruby/ruby.h (struct RRegexp): new field usecnt. replace
Browse files Browse the repository at this point in the history
  str and len by src.

* gc.c (gc_mark_children): mark src field of regexp.
  (obj_free): don't free str field.

* re.c (REG_BUSY): removed.
  (rb_reg_initialize): prohibit re-initialize regexp.
  (rb_reg_search): use usecnt to prevent freeing regexp currently
  using.  this prevents SEGV by:
    r = /\A((a.)*(a.)*)*b/
    r =~ "ab" + "\xc2\xa1".force_encoding("euc-jp")
    t = Thread.new { r =~ "ab"*8 + "\xc2\xa1".force_encoding("utf-8")}
    sleep 0.2
    r =~ "ab"*8 + "\xc2\xa1".force_encoding("euc-jp")



git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@17635 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
  • Loading branch information
akr committed Jun 28, 2008
1 parent 4c766e1 commit 340cd50
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 49 deletions.
18 changes: 18 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
Sat Jun 28 21:25:08 2008 Tanaka Akira <[email protected]>

* include/ruby/ruby.h (struct RRegexp): new field usecnt. replace
str and len by src.

* gc.c (gc_mark_children): mark src field of regexp.
(obj_free): don't free str field.

* re.c (REG_BUSY): removed.
(rb_reg_initialize): prohibit re-initialize regexp.
(rb_reg_search): use usecnt to prevent freeing regexp currently
using. this prevents SEGV by:
r = /\A((a.)*(a.)*)*b/
r =~ "ab" + "\xc2\xa1".force_encoding("euc-jp")
t = Thread.new { r =~ "ab"*8 + "\xc2\xa1".force_encoding("utf-8")}
sleep 0.2
r =~ "ab"*8 + "\xc2\xa1".force_encoding("euc-jp")

Sat Jun 28 21:15:43 2008 Nobuyoshi Nakada <[email protected]>

* include/ruby/intern.h (rb_str_new2, rb_tainted_str_new2,
Expand Down
6 changes: 3 additions & 3 deletions gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1267,6 +1267,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev)
break;

case T_REGEXP:
gc_mark(objspace, obj->as.regexp.src, lev);
break;

case T_FLOAT:
case T_BIGNUM:
break;
Expand Down Expand Up @@ -1505,9 +1508,6 @@ obj_free(rb_objspace_t *objspace, VALUE obj)
if (RANY(obj)->as.regexp.ptr) {
onig_free(RANY(obj)->as.regexp.ptr);
}
if (RANY(obj)->as.regexp.str) {
xfree(RANY(obj)->as.regexp.str);
}
break;
case T_DATA:
if (DATA_PTR(obj)) {
Expand Down
7 changes: 5 additions & 2 deletions include/ruby/ruby.h
Original file line number Diff line number Diff line change
Expand Up @@ -522,9 +522,12 @@ struct RArray {
struct RRegexp {
struct RBasic basic;
struct re_pattern_buffer *ptr;
long len;
char *str;
VALUE src;
unsigned long usecnt;
};
#define RREGEXP_SRC(r) RREGEXP(r)->src
#define RREGEXP_SRC_PTR(r) RSTRING_PTR(RREGEXP(r)->src)
#define RREGEXP_SRC_LEN(r) RSTRING_LEN(RREGEXP(r)->src)

struct RHash {
struct RBasic basic;
Expand Down
11 changes: 7 additions & 4 deletions marshal.c
Original file line number Diff line number Diff line change
Expand Up @@ -693,10 +693,13 @@ w_object(VALUE obj, struct dump_arg *arg, int limit)
break;

case T_REGEXP:
w_uclass(obj, rb_cRegexp, arg);
w_byte(TYPE_REGEXP, arg);
w_bytes(RREGEXP(obj)->str, RREGEXP(obj)->len, arg);
w_byte((char)rb_reg_options(obj), arg);
w_uclass(obj, rb_cRegexp, arg);
w_byte(TYPE_REGEXP, arg);
{
int opts = rb_reg_options(obj);
w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg);
w_byte((char)opts, arg);
}
break;

case T_ARRAY:
Expand Down
74 changes: 35 additions & 39 deletions re.c
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,6 @@ rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)

#define REG_LITERAL FL_USER5
#define REG_ENCODING_NONE FL_USER6
#define REG_BUSY FL_USER7

#define KCODE_FIXED FL_USER4

Expand Down Expand Up @@ -309,7 +308,7 @@ rb_char_to_option_kcode(int c, int *option, int *kcode)
static void
rb_reg_check(VALUE re)
{
if (!RREGEXP(re)->ptr || !RREGEXP(re)->str) {
if (!RREGEXP(re)->ptr || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
rb_raise(rb_eTypeError, "uninitialized Regexp");
}
}
Expand Down Expand Up @@ -416,7 +415,7 @@ rb_reg_source(VALUE re)
VALUE str;

rb_reg_check(re);
str = rb_enc_str_new(RREGEXP(re)->str,RREGEXP(re)->len, rb_enc_get(re));
str = rb_enc_str_new(RREGEXP_SRC_PTR(re),RREGEXP_SRC_LEN(re), rb_enc_get(re));
if (OBJ_TAINTED(re)) OBJ_TAINT(str);
return str;
}
Expand All @@ -437,7 +436,7 @@ static VALUE
rb_reg_inspect(VALUE re)
{
rb_reg_check(re);
return rb_reg_desc(RREGEXP(re)->str, RREGEXP(re)->len, re);
return rb_reg_desc(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), re);
}


Expand Down Expand Up @@ -475,8 +474,8 @@ rb_reg_to_s(VALUE re)

rb_enc_copy(str, re);
options = RREGEXP(re)->ptr->options;
ptr = (UChar*)RREGEXP(re)->str;
len = RREGEXP(re)->len;
ptr = (UChar*)RREGEXP_SRC_PTR(re);
len = RREGEXP_SRC_LEN(re);
again:
if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') {
int err = 1;
Expand Down Expand Up @@ -528,8 +527,8 @@ rb_reg_to_s(VALUE re)
}
if (err) {
options = RREGEXP(re)->ptr->options;
ptr = (UChar*)RREGEXP(re)->str;
len = RREGEXP(re)->len;
ptr = (UChar*)RREGEXP_SRC_PTR(re);
len = RREGEXP_SRC_LEN(re);
}
}

Expand Down Expand Up @@ -1220,10 +1219,10 @@ rb_reg_prepare_re(VALUE re, VALUE str)

rb_reg_check(re);
reg = RREGEXP(re)->ptr;
pattern = RREGEXP(re)->str;
pattern = RREGEXP_SRC_PTR(re);

unescaped = rb_reg_preprocess(
pattern, pattern + RREGEXP(re)->len, enc,
pattern, pattern + RREGEXP_SRC_LEN(re), enc,
&fixed_enc, err);

if (unescaped == Qnil) {
Expand All @@ -1236,7 +1235,7 @@ rb_reg_prepare_re(VALUE re, VALUE str)
OnigDefaultSyntax, &einfo);
if (r) {
onig_error_code_to_str((UChar*)err, r, &einfo);
rb_reg_raise(pattern, RREGEXP(re)->len, err, re);
rb_reg_raise(pattern, RREGEXP_SRC_LEN(re), err, re);
}

RB_GC_GUARD(unescaped);
Expand Down Expand Up @@ -1281,15 +1280,17 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
VALUE match;
struct re_registers regi, *regs = &regi;
char *range = RSTRING_PTR(str);
regex_t *reg0 = RREGEXP(re)->ptr, *reg;
int busy = FL_TEST(re, REG_BUSY);
regex_t *reg;
int tmpreg;

if (pos > RSTRING_LEN(str) || pos < 0) {
rb_backref_set(Qnil);
return -1;
}

reg = rb_reg_prepare_re(re, str);
tmpreg = reg != RREGEXP(re)->ptr;
if (!tmpreg) RREGEXP(re)->usecnt++;

match = rb_backref_get();
if (!NIL_P(match)) {
Expand All @@ -1303,7 +1304,6 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
if (NIL_P(match)) {
MEMZERO(regs, struct re_registers, 1);
}
FL_SET(re, REG_BUSY);
if (!reverse) {
range += RSTRING_LEN(str);
}
Expand All @@ -1313,17 +1313,16 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
((UChar*)(RSTRING_PTR(str)) + pos),
((UChar*)range),
regs, ONIG_OPTION_NONE);

if (RREGEXP(re)->ptr != reg) {
if (busy) {
if (!tmpreg) RREGEXP(re)->usecnt--;
if (tmpreg) {
if (RREGEXP(re)->usecnt) {
onig_free(reg);
}
else {
onig_free(reg0);
onig_free(RREGEXP(re)->ptr);
RREGEXP(re)->ptr = reg;
}
}
if (!busy) FL_UNSET(re, REG_BUSY);
if (result < 0) {
if (regs == &regi)
onig_region_free(regs, 0);
Expand All @@ -1334,7 +1333,7 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
else {
onig_errmsg_buffer err = "";
onig_error_code_to_str((UChar*)err, result);
rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, 0);
rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, 0);
}
}

Expand Down Expand Up @@ -2295,10 +2294,9 @@ rb_reg_initialize(VALUE obj, const char *s, int len, rb_encoding *enc,
rb_check_frozen(obj);
if (FL_TEST(obj, REG_LITERAL))
rb_raise(rb_eSecurityError, "can't modify literal regexp");
if (re->ptr) onig_free(re->ptr);
if (re->str) xfree(re->str);
if (re->ptr)
rb_raise(rb_eTypeError, "already initialized regexp");
re->ptr = 0;
re->str = 0;

unescaped = rb_reg_preprocess(s, s+len, enc, &fixed_enc, err);
if (unescaped == Qnil)
Expand Down Expand Up @@ -2330,10 +2328,8 @@ rb_reg_initialize(VALUE obj, const char *s, int len, rb_encoding *enc,
re->ptr = make_regexp(RSTRING_PTR(unescaped), RSTRING_LEN(unescaped), enc,
options & ARG_REG_OPTION_MASK, err);
if (!re->ptr) return -1;
re->str = ALLOC_N(char, len+1);
memcpy(re->str, s, len);
re->str[len] = '\0';
re->len = len;
re->src = rb_enc_str_new(s, len, enc);
OBJ_FREEZE(re->src);
RB_GC_GUARD(unescaped);
return 0;
}
Expand Down Expand Up @@ -2366,8 +2362,8 @@ rb_reg_s_alloc(VALUE klass)
OBJSETUP(re, klass, T_REGEXP);

re->ptr = 0;
re->len = 0;
re->str = 0;
re->src = 0;
re->usecnt = 0;

return (VALUE)re;
}
Expand Down Expand Up @@ -2431,9 +2427,9 @@ VALUE
rb_reg_regcomp(VALUE str)
{
volatile VALUE save_str = str;
if (reg_cache && RREGEXP(reg_cache)->len == RSTRING_LEN(str)
if (reg_cache && RREGEXP_SRC_LEN(reg_cache) == RSTRING_LEN(str)
&& ENCODING_GET(reg_cache) == ENCODING_GET(str)
&& memcmp(RREGEXP(reg_cache)->str, RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
&& memcmp(RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
return reg_cache;

return reg_cache = rb_reg_new_str(save_str, 0);
Expand All @@ -2454,8 +2450,8 @@ rb_reg_hash(VALUE re)

rb_reg_check(re);
hashval = RREGEXP(re)->ptr->options;
len = RREGEXP(re)->len;
p = RREGEXP(re)->str;
len = RREGEXP_SRC_LEN(re);
p = RREGEXP_SRC_PTR(re);
while (len--) {
hashval = hashval * 33 + *p++;
}
Expand Down Expand Up @@ -2488,9 +2484,9 @@ rb_reg_equal(VALUE re1, VALUE re2)
rb_reg_check(re1); rb_reg_check(re2);
if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse;
if (RREGEXP(re1)->ptr->options != RREGEXP(re2)->ptr->options) return Qfalse;
if (RREGEXP(re1)->len != RREGEXP(re2)->len) return Qfalse;
if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2)) return Qfalse;
if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse;
if (memcmp(RREGEXP(re1)->str, RREGEXP(re2)->str, RREGEXP(re1)->len) == 0) {
if (memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1)) == 0) {
return Qtrue;
}
return Qfalse;
Expand Down Expand Up @@ -2756,8 +2752,8 @@ rb_reg_initialize_m(int argc, VALUE *argv, VALUE self)
}
rb_reg_check(re);
flags = rb_reg_options(re);
ptr = RREGEXP(re)->str;
len = RREGEXP(re)->len;
ptr = RREGEXP_SRC_PTR(re);
len = RREGEXP_SRC_LEN(re);
enc = rb_enc_get(re);
if (rb_reg_initialize(self, ptr, len, enc, flags, err)) {
str = rb_enc_str_new(ptr, len, enc);
Expand Down Expand Up @@ -3107,8 +3103,8 @@ rb_reg_init_copy(VALUE copy, VALUE re)
rb_raise(rb_eTypeError, "wrong argument type");
}
rb_reg_check(re);
s = RREGEXP(re)->str;
len = RREGEXP(re)->len;
s = RREGEXP_SRC_PTR(re);
len = RREGEXP_SRC_LEN(re);
if (rb_reg_initialize(copy, s, len, rb_enc_get(re), rb_reg_options(re), err) != 0) {
rb_reg_raise(s, len, err, re);
}
Expand Down
2 changes: 1 addition & 1 deletion string.c
Original file line number Diff line number Diff line change
Expand Up @@ -2247,7 +2247,7 @@ rb_str_rindex_m(int argc, VALUE *argv, VALUE str)
switch (TYPE(sub)) {
case T_REGEXP:
/* enc = rb_get_check(str, sub); */
if (RREGEXP(sub)->len) {
if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) {
pos = rb_reg_adjust_startpos(sub, str, pos, 1);
pos = rb_reg_search(sub, str, pos, 1);
pos = rb_str_sublen(str, pos);
Expand Down

0 comments on commit 340cd50

Please sign in to comment.