Skip to content

Commit

Permalink
* transcode.c: new file to provide encoding conversion features.
Browse files Browse the repository at this point in the history
  code contributed by Martin Duerst.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14172 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
  • Loading branch information
matz committed Dec 10, 2007
1 parent 38a24d7 commit 7ded13f
Show file tree
Hide file tree
Showing 8 changed files with 3,797 additions and 3 deletions.
5 changes: 5 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
Mon Dec 10 14:00:43 2007 Yukihiro Matsumoto <[email protected]>

* transcode.c: new file to provide encoding conversion features.
code contributed by Martin Duerst.

Mon Dec 10 13:50:33 2007 Nobuyoshi Nakada <[email protected]>

* re.c (rb_reg_search): return byte offset. [ruby-dev:32452]
Expand Down
7 changes: 6 additions & 1 deletion common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ COMMONOBJS = array.$(OBJEXT) \
string.$(OBJEXT) \
struct.$(OBJEXT) \
time.$(OBJEXT) \
transcode.$(OBJEXT) \
transcode_data_iso_8859.$(OBJEXT) \
util.$(OBJEXT) \
variable.$(OBJEXT) \
version.$(OBJEXT) \
Expand Down Expand Up @@ -530,7 +532,7 @@ sprintf.$(OBJEXT): {$(VPATH)}sprintf.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
st.$(OBJEXT): {$(VPATH)}st.c {$(VPATH)}config.h {$(VPATH)}st.h {$(VPATH)}defines.h
string.$(OBJEXT): {$(VPATH)}string.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
{$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h \
{$(VPATH)}re.h {$(VPATH)}regex.h {$(VPATH)}encoding.h
{$(VPATH)}re.h {$(VPATH)}regex.h {$(VPATH)}encoding.h
struct.$(OBJEXT): {$(VPATH)}struct.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
{$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h
thread.$(OBJEXT): {$(VPATH)}thread.c {$(VPATH)}eval_intern.h \
Expand All @@ -540,6 +542,9 @@ thread.$(OBJEXT): {$(VPATH)}thread.c {$(VPATH)}eval_intern.h \
{$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h \
{$(VPATH)}node.h {$(VPATH)}util.h \
{$(VPATH)}signal.h {$(VPATH)}st.h {$(VPATH)}dln.h
transcode.$(OBJEXT): {$(VPATH)}transcode.c {$(VPATH)}transcode_data.h {$(VPATH)}ruby.h {$(VPATH)}config.h \
{$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h {$(VPATH)}encoding.h
transcode_data_iso_8859.$(OBJEXT): {$(VPATH)}transcode_data_iso_8859.c {$(VPATH)}transcode_data.h
cont.$(OBJEXT): {$(VPATH)}cont.c {$(VPATH)}eval_intern.h \
{$(VPATH)}ruby.h {$(VPATH)}vm_core.h {$(VPATH)}id.h {$(VPATH)}config.h \
{$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h \
Expand Down
2 changes: 2 additions & 0 deletions inits.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ void Init_Array(void);
void Init_Bignum(void);
void Init_Binding(void);
void Init_Comparable(void);
void Init_transcode(void);
void Init_Dir(void);
void Init_Enumerable(void);
void Init_Enumerator(void);
Expand Down Expand Up @@ -77,6 +78,7 @@ rb_call_inits()
Init_Struct();
Init_Regexp();
Init_pack();
Init_transcode();
Init_marshal();
Init_Range();
Init_IO();
Expand Down
4 changes: 2 additions & 2 deletions string.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ str_alloc(VALUE klass)
return (VALUE)str;
}

static VALUE
VALUE
str_new(VALUE klass, const char *ptr, long len)
{
VALUE str;
Expand Down Expand Up @@ -625,7 +625,7 @@ str_modifiable(VALUE str)
rb_raise(rb_eSecurityError, "Insecure: can't modify string");
}

static int
int
str_independent(VALUE str)
{
str_modifiable(str);
Expand Down
44 changes: 44 additions & 0 deletions test/ruby/test_transcode.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# -*- encoding: US-ASCII -*- # make sure this runs in binary mode

class String
# different name, because we should be able to remove this later
def fix_encoding (encoding)
force_encoding(encoding)
end
end

require 'test/unit'
class TestConvert < Test::Unit::TestCase
def test_can_call
# we don't have semantics for conversion without attribute yet
# maybe 'convert to UTF-8' would be nice :-)
assert_raise(ArgumentError) { 'abc'.encode }
assert_raise(ArgumentError) { 'abc'.encode! }
assert_raise(ArgumentError) { 'abc'.force_encoding('Shift_JIS').encode('UTF-8') } # temporary
assert_raise(ArgumentError) { 'abc'.force_encoding('Shift_JIS').encode!('UTF-8') } # temporary
assert_raise(ArgumentError) { 'abc'.encode('foo', 'bar') }
assert_raise(ArgumentError) { 'abc'.encode!('foo', 'bar') }
assert_raise(ArgumentError) { 'abc'.force_encoding('utf-8').encode('foo') }
assert_raise(ArgumentError) { 'abc'.force_encoding('utf-8').encode!('foo') }
assert_equal('abc'.force_encoding('utf-8').encode('iso-8859-1'), 'abc') # temporary, fix encoding
assert_equal("D\xFCrst".force_encoding('iso-8859-1').encode('utf-8').fix_encoding('utf-8'), "D\u00FCrst")
assert_equal("D\xFCrst".encode('utf-8', 'iso-8859-1').fix_encoding('utf-8'), "D\u00FCrst")
assert_equal("D\xFCrst".encode('utf-8', 'iso-8859-2').fix_encoding('utf-8'), "D\u00FCrst")
assert_equal("D\xFCrst".encode('utf-8', 'iso-8859-3').fix_encoding('utf-8'), "D\u00FCrst")
assert_equal("D\xFCrst".encode('utf-8', 'iso-8859-4').fix_encoding('utf-8'), "D\u00FCrst")
assert_equal("D\xFCrst".encode('utf-8', 'iso-8859-9').fix_encoding('utf-8'), "D\u00FCrst")
assert_equal("D\xFCrst".encode('utf-8', 'iso-8859-10').fix_encoding('utf-8'), "D\u00FCrst")
assert_equal("D\xFCrst".encode('utf-8', 'iso-8859-13').fix_encoding('utf-8'), "D\u00FCrst")
assert_equal("D\xFCrst".encode('utf-8', 'iso-8859-14').fix_encoding('utf-8'), "D\u00FCrst")
assert_equal("D\xFCrst".encode('utf-8', 'iso-8859-15').fix_encoding('utf-8'), "D\u00FCrst")
assert_equal("D\u00FCrst".encode('iso-8859-1'), "D\xFCrst")
assert_equal("D\u00FCrst".encode('iso-8859-2'), "D\xFCrst")
assert_equal("D\u00FCrst".encode('iso-8859-3'), "D\xFCrst")
assert_equal("D\u00FCrst".encode('iso-8859-4'), "D\xFCrst")
assert_equal("D\u00FCrst".encode('iso-8859-9'), "D\xFCrst")
assert_equal("D\u00FCrst".encode('iso-8859-10'), "D\xFCrst")
assert_equal("D\u00FCrst".encode('iso-8859-13'), "D\xFCrst")
assert_equal("D\u00FCrst".encode('iso-8859-14'), "D\xFCrst")
assert_equal("D\u00FCrst".encode('iso-8859-15'), "D\xFCrst")
end
end
Loading

0 comments on commit 7ded13f

Please sign in to comment.