Skip to content

Commit

Permalink
* encoding.c: provide basic features for M17N.
Browse files Browse the repository at this point in the history
* parse.y: encoding aware parsing.

* parse.y (pragma_encoding): encoding specification pragma.

* parse.y (rb_intern3): encoding specified symbols.

* string.c (rb_str_length): length based on characters.  
  for older behavior, bytesize method added.

* string.c (rb_str_index_m): index based on characters.  rindex as
  well.

* string.c (succ_char): encoding aware succeeding string.

* string.c (rb_str_reverse): reverse based on characters.

* string.c (rb_str_inspect): encoding aware string description.

* string.c (rb_str_upcase_bang): encoding aware case conversion.
  downcase, capitalize, swapcase as well.

* string.c (rb_str_tr_bang): tr based on characters.  delete,
  squeeze, tr_s, count as well.

* string.c (rb_str_split_m): split based on characters.

* string.c (rb_str_each_line): encoding aware each_line.

* string.c (rb_str_each_char): added.  iteration based on
  characters.

* string.c (rb_str_strip_bang): encoding aware whitespace
  stripping.  lstrip, rstrip as well.

* string.c (rb_str_justify): encoding aware justifying (ljust,
  rjust, center).

* string.c (str_encoding): get encoding attribute from a string. 

* re.c (rb_reg_initialize): encoding aware regular expression

* sprintf.c (rb_str_format): formatting (i.e. length count) based
  on characters.

* io.c (rb_io_getc): getc to return one-character string.
  for older behavior, getbyte method added.

* ext/stringio/stringio.c (strio_getc): ditto.

* io.c (rb_io_ungetc): allow pushing arbitrary string at the
  current reading point.

* ext/stringio/stringio.c (strio_ungetc): ditto.

* ext/strscan/strscan.c: encoding support.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13261 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
  • Loading branch information
matz committed Aug 25, 2007
1 parent 0ada813 commit a25fbe3
Show file tree
Hide file tree
Showing 113 changed files with 1,423 additions and 752 deletions.
60 changes: 60 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,63 @@
Sat Aug 25 11:45:37 2007 Yukihiro Matsumoto <[email protected]>

* encoding.c: provide basic features for M17N.

* parse.y: encoding aware parsing.

* parse.y (pragma_encoding): encoding specification pragma.

* parse.y (rb_intern3): encoding specified symbols.

* string.c (rb_str_length): length based on characters.
for older behavior, bytesize method added.

* string.c (rb_str_index_m): index based on characters. rindex as
well.

* string.c (succ_char): encoding aware succeeding string.

* string.c (rb_str_reverse): reverse based on characters.

* string.c (rb_str_inspect): encoding aware string description.

* string.c (rb_str_upcase_bang): encoding aware case conversion.
downcase, capitalize, swapcase as well.

* string.c (rb_str_tr_bang): tr based on characters. delete,
squeeze, tr_s, count as well.

* string.c (rb_str_split_m): split based on characters.

* string.c (rb_str_each_line): encoding aware each_line.

* string.c (rb_str_each_char): added. iteration based on
characters.

* string.c (rb_str_strip_bang): encoding aware whitespace
stripping. lstrip, rstrip as well.

* string.c (rb_str_justify): encoding aware justifying (ljust,
rjust, center).

* string.c (str_encoding): get encoding attribute from a string.

* re.c (rb_reg_initialize): encoding aware regular expression

* sprintf.c (rb_str_format): formatting (i.e. length count) based
on characters.

* io.c (rb_io_getc): getc to return one-character string.
for older behavior, getbyte method added.

* ext/stringio/stringio.c (strio_getc): ditto.

* io.c (rb_io_ungetc): allow pushing arbitrary string at the
current reading point.

* ext/stringio/stringio.c (strio_ungetc): ditto.

* ext/strscan/strscan.c: encoding support.

Sat Aug 25 10:59:19 2007 Koichi Sasada <[email protected]>

* cont.c: separate Continuation and Fiber from core.
Expand Down
2 changes: 1 addition & 1 deletion array.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
$Date$
created at: Fri Aug 6 09:46:12 JST 1993
Copyright (C) 1993-2003 Yukihiro Matsumoto
Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
Expand Down
2 changes: 1 addition & 1 deletion bignum.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
$Date$
created at: Fri Jun 10 00:48:55 JST 1994
Copyright (C) 1993-2003 Yukihiro Matsumoto
Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/

Expand Down
2 changes: 1 addition & 1 deletion class.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
$Date$
created at: Tue Aug 10 15:05:44 JST 1993
Copyright (C) 1993-2003 Yukihiro Matsumoto
Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/

Expand Down
4 changes: 3 additions & 1 deletion common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ OBJS = array.$(OBJEXT) \
compar.$(OBJEXT) \
dir.$(OBJEXT) \
dln.$(OBJEXT) \
encoding.$(OBJEXT) \
enum.$(OBJEXT) \
enumerator.$(OBJEXT) \
error.$(OBJEXT) \
Expand Down Expand Up @@ -401,6 +402,7 @@ dmydln.$(OBJEXT): {$(VPATH)}dmydln.c {$(VPATH)}dln.c {$(VPATH)}ruby.h \
{$(VPATH)}config.h {$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h \
{$(VPATH)}dln.h
dmyext.$(OBJEXT): {$(VPATH)}dmyext.c
encoding.$(OBJEXT): {$(VPATH)}encoding.c {$(VPATH)}encoding.h
enum.$(OBJEXT): {$(VPATH)}enum.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
{$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h \
{$(VPATH)}node.h {$(VPATH)}util.h
Expand Down Expand Up @@ -523,7 +525,7 @@ sprintf.$(OBJEXT): {$(VPATH)}sprintf.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
st.$(OBJEXT): {$(VPATH)}st.c {$(VPATH)}config.h {$(VPATH)}st.h {$(VPATH)}defines.h
string.$(OBJEXT): {$(VPATH)}string.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
{$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h \
{$(VPATH)}re.h {$(VPATH)}regex.h
{$(VPATH)}re.h {$(VPATH)}regex.h {$(VPATH)}encoding.h
struct.$(OBJEXT): {$(VPATH)}struct.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
{$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h
thread.$(OBJEXT): {$(VPATH)}thread.c {$(VPATH)}eval_intern.h \
Expand Down
2 changes: 1 addition & 1 deletion compar.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
$Date$
created at: Thu Aug 26 14:39:48 JST 1993
Copyright (C) 1993-2003 Yukihiro Matsumoto
Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/

Expand Down
2 changes: 1 addition & 1 deletion dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
$Date$
created at: Wed Jan 5 09:51:01 JST 1994
Copyright (C) 1993-2003 Yukihiro Matsumoto
Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
Expand Down
2 changes: 1 addition & 1 deletion dln.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
$Date$
created at: Tue Jan 18 17:05:06 JST 1994
Copyright (C) 1993-2003 Yukihiro Matsumoto
Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/

Expand Down
2 changes: 1 addition & 1 deletion dln.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
$Date$
created at: Wed Jan 19 16:53:09 JST 1994
Copyright (C) 1993-2003 Yukihiro Matsumoto
Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/

Expand Down
2 changes: 1 addition & 1 deletion enum.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
$Date$
created at: Fri Oct 1 15:15:19 JST 1993
Copyright (C) 1993-2003 Yukihiro Matsumoto
Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/

Expand Down
2 changes: 1 addition & 1 deletion error.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
$Date$
created at: Mon Aug 9 16:11:34 JST 1993
Copyright (C) 1993-2003 Yukihiro Matsumoto
Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/

Expand Down
5 changes: 3 additions & 2 deletions euc_jp.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,9 @@ static int
code_to_mbclen(OnigCodePoint code)
{
if (ONIGENC_IS_CODE_ASCII(code)) return 1;
else if ((code & 0xff0000) != 0) return 3;
else if ((code & 0xff00) != 0) return 2;
else if (code > 0xffffff) return 0;
else if ((code & 0xff0000) >= 0x800000) return 3;
else if ((code & 0xff00) >= 0x8000) return 2;
else return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion eval.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
$Date$
created at: Thu Jun 10 14:22:17 JST 1993
Copyright (C) 1993-2003 Yukihiro Matsumoto
Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
Expand Down
2 changes: 1 addition & 1 deletion ext/socket/socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
$Date$
created at: Thu Mar 31 12:21:29 JST 1994
Copyright (C) 1993-2001 Yukihiro Matsumoto
Copyright (C) 1993-2007 Yukihiro Matsumoto
************************************************/

Expand Down
79 changes: 49 additions & 30 deletions ext/stringio/stringio.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
**********************************************************************/

#include "ruby.h"
#include "rubyio.h"
#include "ruby/io.h"
#if defined(HAVE_FCNTL_H) || defined(_WIN32)
#include <fcntl.h>
#elif defined(HAVE_SYS_FCNTL_H)
Expand Down Expand Up @@ -84,6 +84,18 @@ get_strio(VALUE self)
return ptr;
}

static VALUE
strio_substr(struct StringIO *ptr, int pos, int len)
{
VALUE str = ptr->string;
rb_encoding *enc = rb_enc_get(str);
int rlen = RSTRING_LEN(str) - pos;

if (len > rlen) len = rlen;
if (len < 0) len = 0;
return rb_enc_str_new(RSTRING_PTR(str)+pos, len, enc);
}

#define StringIO(obj) get_strio(obj)

#define CLOSED(ptr) (!((ptr)->flags & FMODE_READWRITE))
Expand Down Expand Up @@ -603,23 +615,25 @@ strio_each_byte(VALUE self)

/*
* call-seq:
* strio.getc -> fixnum or nil
* strio.getc -> string or nil
*
* See IO#getc.
*/
static VALUE
strio_getc(VALUE self)
{
struct StringIO *ptr = readable(StringIO(self));
int c;
char ch;
rb_encoding *enc = rb_enc_get(ptr->string);
int len;
char *p;

if (ptr->pos >= RSTRING_LEN(ptr->string)) {
return Qnil;
}
c = RSTRING_PTR(ptr->string)[ptr->pos++];
ch = c & 0xff;
return rb_str_new(&ch, 1);
p = RSTRING_PTR(ptr->string)+ptr->pos;
len = rb_enc_mbclen(p, enc);
ptr->pos += len;
return rb_enc_str_new(p, len, rb_enc_get(ptr->string));
}

/*
Expand Down Expand Up @@ -671,30 +685,34 @@ static VALUE
strio_ungetc(VALUE self, VALUE c)
{
struct StringIO *ptr = readable(StringIO(self));
int cc;
long len, pos = ptr->pos;
long lpos, clen;
char *p, *pend;
rb_encoding *enc;

if (NIL_P(c)) return Qnil;
if (FIXNUM_P(c)) {
cc = FIX2INT(c);
int cc = FIX2INT(c);
char buf[16];

enc = rb_enc_get(ptr->string);
rb_enc_mbcput(cc, buf, enc);
c = rb_enc_str_new(buf, rb_enc_codelen(cc, enc), enc);
}
else {
SafeStringValue(c);
if (RSTRING_LEN(c) > 1) {
rb_warn("IO#ungetc pushes back only one byte");
}
cc = (unsigned char)RSTRING_PTR(c)[0];
enc = rb_enc_check(ptr->string, c);
}
if (cc != EOF && pos > 0) {
if ((len = RSTRING_LEN(ptr->string)) < pos-- ||
(unsigned char)RSTRING_PTR(ptr->string)[pos] !=
(unsigned char)cc) {
strio_extend(ptr, pos, 1);
RSTRING_PTR(ptr->string)[pos] = cc;
OBJ_INFECT(ptr->string, self);
}
--ptr->pos;
/* get logical position */
lpos = 0; p = RSTRING_PTR(ptr->string); pend = p + ptr->pos - 1;
for (;;) {
clen = rb_enc_mbclen(p, enc);
if (p+clen >= pend) break;
p += clen;
lpos++;
}
rb_str_update(ptr->string, lpos, ptr->pos ? 1 : 0, c);
ptr->pos = p - RSTRING_PTR(ptr->string);

return Qnil;
}

Expand Down Expand Up @@ -800,7 +818,7 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr)
e = s + limit;
}
if (NIL_P(str)) {
str = rb_str_substr(ptr->string, ptr->pos, e - s);
str = strio_substr(ptr, ptr->pos, e - s);
}
else if ((n = RSTRING_LEN(str)) == 0) {
p = s;
Expand All @@ -816,13 +834,13 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr)
break;
}
}
str = rb_str_substr(ptr->string, s - RSTRING_PTR(ptr->string), e - s);
str = strio_substr(ptr, s - RSTRING_PTR(ptr->string), e - s);
}
else if (n == 1) {
if ((p = memchr(s, RSTRING_PTR(str)[0], e - s)) != 0) {
e = p + 1;
}
str = rb_str_substr(ptr->string, ptr->pos, e - s);
str = strio_substr(ptr, ptr->pos, e - s);
}
else {
if (n < e - s) {
Expand All @@ -843,7 +861,7 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr)
}
}
}
str = rb_str_substr(ptr->string, ptr->pos, e - s);
str = strio_substr(ptr, ptr->pos, e - s);
}
ptr->pos = e - RSTRING_PTR(ptr->string);
ptr->lineno++;
Expand Down Expand Up @@ -944,7 +962,7 @@ strio_write(VALUE self, VALUE str)
if (TYPE(str) != T_STRING)
str = rb_obj_as_string(str);
len = RSTRING_LEN(str);
if (!len) return INT2FIX(0);
if (len == 0) return INT2FIX(0);
check_modifiable(ptr);
olen = RSTRING_LEN(ptr->string);
if (ptr->flags & FMODE_APPEND) {
Expand All @@ -955,7 +973,8 @@ strio_write(VALUE self, VALUE str)
}
else {
strio_extend(ptr, ptr->pos, len);
rb_str_update(ptr->string, ptr->pos, len, str);
memmove(RSTRING_PTR(ptr->string)+ptr->pos, RSTRING_PTR(str), len);
OBJ_INFECT(ptr->string, str);
}
OBJ_INFECT(ptr->string, self);
ptr->pos += len;
Expand Down Expand Up @@ -1070,7 +1089,7 @@ strio_read(int argc, VALUE *argv, VALUE self)
rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", argc);
}
if (NIL_P(str)) {
str = rb_str_substr(ptr->string, ptr->pos, len);
str = strio_substr(ptr, ptr->pos, len);
}
else {
long rest = RSTRING_LEN(ptr->string) - ptr->pos;
Expand Down
Loading

0 comments on commit a25fbe3

Please sign in to comment.