Skip to content

Commit

Permalink
* string.c (rb_str_chomp_bang): now works on UTF-16.
Browse files Browse the repository at this point in the history
* string.c (tr_setup_table): negation should work on non ASCII
  compatible strings as well.

* string.c (rb_str_split_m): awk split should work on non ASCII
  compatible strings as well.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15641 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
  • Loading branch information
matz committed Feb 29, 2008
1 parent daa622a commit 2d97d3d
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 31 deletions.
10 changes: 10 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
Fri Feb 29 20:25:07 2008 Yukihiro Matsumoto <[email protected]>

* string.c (rb_str_chomp_bang): now works on UTF-16.

* string.c (tr_setup_table): negation should work on non ASCII
compatible strings as well.

* string.c (rb_str_split_m): awk split should work on non ASCII
compatible strings as well.

Fri Feb 29 18:08:43 2008 Yukihiro Matsumoto <[email protected]>

* time.c (time_strftime): format should be ascii compatible.
Expand Down
87 changes: 63 additions & 24 deletions string.c
Original file line number Diff line number Diff line change
Expand Up @@ -4453,9 +4453,20 @@ tr_setup_table(VALUE str, char stable[256], int first,
tr.p = RSTRING_PTR(str); tr.pend = tr.p + RSTRING_LEN(str);
tr.gen = tr.now = tr.max = 0;

if (RSTRING_LEN(str) > 1 && RSTRING_PTR(str)[0] == '^') {
cflag = 1;
tr.p++;
if (RSTRING_LEN(str) > 1) {
if (rb_enc_asciicompat(enc)) {
if (RSTRING_PTR(str)[0] == '^') {
cflag = 1;
tr.p++;
}
}
else {
c = rb_enc_codepoint(RSTRING_PTR(str), RSTRING_END(str), enc);
if (c == '^') {
cflag = 1;
tr.p+=rb_enc_codelen(c, enc);
}
}
}
if (first) {
for (i=0; i<256; i++) {
Expand Down Expand Up @@ -4838,11 +4849,21 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
}
else {
fs_set:
if (TYPE(spat) == T_STRING && RSTRING_LEN(spat) == 1) {
if (RSTRING_PTR(spat)[0] == ' ') {
awk_split = Qtrue;
if (TYPE(spat) == T_STRING) {
rb_encoding *enc2 = STR_ENC_GET(spat);

if (rb_enc_mbminlen(enc2) == 1) {
if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
awk_split = Qtrue;
}
}
else {
if (str_strlen(spat, enc2) == 1 &&
rb_enc_codepoint(RSTRING_PTR(spat), RSTRING_END(spat), enc2) == ' ') {
awk_split = Qtrue;
}
}
if (!awk_split) {
spat = rb_reg_regcomp(rb_reg_quote(spat));
}
}
Expand Down Expand Up @@ -5266,38 +5287,56 @@ rb_str_chomp_bang(int argc, VALUE *argv, VALUE str)
char *p, *pp, *e;
long len, rslen;

len = RSTRING_LEN(str);
if (len == 0) return Qnil;
p = RSTRING_PTR(str);
e = p + len;
if (rb_scan_args(argc, argv, "01", &rs) == 0) {
len = RSTRING_LEN(str);
if (len == 0) return Qnil;
p = RSTRING_PTR(str);
rs = rb_rs;
if (rs == rb_default_rs) {
smart_chomp:
rb_enc_check(str, rs);
rb_str_modify(str);
if (RSTRING_PTR(str)[len-1] == '\n') {
STR_DEC_LEN(str);
if (RSTRING_LEN(str) > 0 &&
RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') {
STR_DEC_LEN(str);
enc = rb_enc_get(str);
if (rb_enc_mbminlen(enc) > 1) {
len = str_strlen(str, enc);
pp = rb_enc_nth(p, e, len-1, enc);
if (rb_enc_is_newline(pp, e, enc)) {
e = pp;
len--;
}
}
else if (RSTRING_PTR(str)[len-1] == '\r') {
STR_DEC_LEN(str);
if (len > 0) {
p = rb_enc_nth(p, e, len-1, enc);
if (rb_enc_codepoint(p, e, enc) == '\r') {
pp = e = p;
}
}
if (e == RSTRING_END(str)) {
return Qnil;
}
len = pp - RSTRING_PTR(str);
STR_SET_LEN(str, len);
}
else {
return Qnil;
if (RSTRING_PTR(str)[len-1] == '\n') {
STR_DEC_LEN(str);
if (RSTRING_LEN(str) > 0 &&
RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') {
STR_DEC_LEN(str);
}
}
else if (RSTRING_PTR(str)[len-1] == '\r') {
STR_DEC_LEN(str);
}
else {
return Qnil;
}
}
RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
return str;
}
}
if (NIL_P(rs)) return Qnil;
StringValue(rs);
enc = rb_enc_check(str, rs);
len = RSTRING_LEN(str);
if (len == 0) return Qnil;
p = RSTRING_PTR(str);
rslen = RSTRING_LEN(rs);
if (rslen == 0) {
while (len>0 && p[len-1] == '\n') {
Expand All @@ -5321,8 +5360,8 @@ rb_str_chomp_bang(int argc, VALUE *argv, VALUE str)
if (is_broken_string(rs)) {
return Qnil;
}
e = p + len;
pp = e - rslen;
enc = rb_enc_check(str, rs);
if (p[len-1] == newline &&
(rslen <= 1 ||
memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
Expand Down
12 changes: 8 additions & 4 deletions test/ruby/test_string.rb
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ def test_center
def test_chomp
assert_equal(S("hello"), S("hello").chomp("\n"))
assert_equal(S("hello"), S("hello\n").chomp("\n"))
save = $/

$/ = "\n"

Expand All @@ -289,7 +290,7 @@ def test_chomp
$/ = "!"
assert_equal(S("hello"), S("hello").chomp)
assert_equal(S("hello"), S("hello!").chomp)
$/ = "\n"
$/ = save
end

def test_chomp!
Expand All @@ -302,6 +303,7 @@ def test_chomp!
a = S("hello\n")
a.chomp!(S("\n"))
assert_equal(S("hello"), a)
save = $/

$/ = "\n"
a = S("hello")
Expand All @@ -321,7 +323,7 @@ def test_chomp!
a.chomp!
assert_equal(S("hello"), a)

$/ = "\n"
$/ = save

a = S("hello\n")
b = a.dup
Expand Down Expand Up @@ -474,6 +476,7 @@ def test_dup
end

def test_each
save = $/
$/ = "\n"
res=[]
S("hello\nworld").lines.each {|x| res << x}
Expand All @@ -490,7 +493,7 @@ def test_each
S("hello!world").lines.each {|x| res << x}
assert_equal(S("hello!"), res[0])
assert_equal(S("world"), res[1])
$/ = "\n"
$/ = save
end

def test_each_byte
Expand All @@ -502,6 +505,7 @@ def test_each_byte
end

def test_each_line
save = $/
$/ = "\n"
res=[]
S("hello\nworld").lines.each {|x| res << x}
Expand All @@ -520,7 +524,7 @@ def test_each_line
assert_equal(S("hello!"), res[0])
assert_equal(S("world"), res[1])

$/ = "\n"
$/ = save
end

def test_empty?
Expand Down
8 changes: 5 additions & 3 deletions test/ruby/test_utf16.rb
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,11 @@ def test_concat_nonempty

def test_chomp
s = "\1\n".force_encoding("utf-16be")
assert_raise(ArgumentError, "#{encdump s}.chomp") {
s.chomp
}
assert_equal(s, s.chomp, "#{encdump s}.chomp")
s = "\0\n".force_encoding("utf-16be")
assert_equal("", s.chomp, "#{encdump s}.chomp")
s = "\0\r\0\n".force_encoding("utf-16be")
assert_equal("", s.chomp, "#{encdump s}.chomp")
end

def test_succ
Expand Down

0 comments on commit 2d97d3d

Please sign in to comment.