* transcode.c: new file to provide encoding conversion features.

code contributed by Martin Duerst. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14172 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Aupajo · Dec 10, 2007 · 7ded13f · 7ded13f
1 parent 38a24d7
commit 7ded13f
Show file tree

Hide file tree

Showing 8 changed files with 3,797 additions and 3 deletions.
diff --git a/ChangeLog b/ChangeLog
@@ -1,3 +1,8 @@
+Mon Dec 10 14:00:43 2007  Yukihiro Matsumoto  <[email protected]>
+
+	* transcode.c: new file to provide encoding conversion features.
+	  code contributed by Martin Duerst.
+
 Mon Dec 10 13:50:33 2007  Nobuyoshi Nakada  <[email protected]>
 
 	* re.c (rb_reg_search): return byte offset.  [ruby-dev:32452]

diff --git a/common.mk b/common.mk
@@ -65,6 +65,8 @@ COMMONOBJS    = array.$(OBJEXT) \
 		string.$(OBJEXT) \
 		struct.$(OBJEXT) \
 		time.$(OBJEXT) \
+		transcode.$(OBJEXT) \
+		transcode_data_iso_8859.$(OBJEXT) \
 		util.$(OBJEXT) \
 		variable.$(OBJEXT) \
 		version.$(OBJEXT) \
@@ -530,7 +532,7 @@ sprintf.$(OBJEXT): {$(VPATH)}sprintf.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
 st.$(OBJEXT): {$(VPATH)}st.c {$(VPATH)}config.h {$(VPATH)}st.h {$(VPATH)}defines.h
 string.$(OBJEXT): {$(VPATH)}string.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
   {$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h \
-  {$(VPATH)}re.h {$(VPATH)}regex.h {$(VPATH)}encoding.h 
+  {$(VPATH)}re.h {$(VPATH)}regex.h {$(VPATH)}encoding.h
 struct.$(OBJEXT): {$(VPATH)}struct.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
   {$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h
 thread.$(OBJEXT): {$(VPATH)}thread.c {$(VPATH)}eval_intern.h \
@@ -540,6 +542,9 @@ thread.$(OBJEXT): {$(VPATH)}thread.c {$(VPATH)}eval_intern.h \
   {$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h \
   {$(VPATH)}node.h {$(VPATH)}util.h \
   {$(VPATH)}signal.h {$(VPATH)}st.h {$(VPATH)}dln.h
+transcode.$(OBJEXT): {$(VPATH)}transcode.c {$(VPATH)}transcode_data.h {$(VPATH)}ruby.h {$(VPATH)}config.h \
+  {$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h  {$(VPATH)}encoding.h
+transcode_data_iso_8859.$(OBJEXT): {$(VPATH)}transcode_data_iso_8859.c {$(VPATH)}transcode_data.h
 cont.$(OBJEXT):  {$(VPATH)}cont.c {$(VPATH)}eval_intern.h \
   {$(VPATH)}ruby.h {$(VPATH)}vm_core.h {$(VPATH)}id.h {$(VPATH)}config.h \
   {$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h \

diff --git a/inits.c b/inits.c
@@ -16,6 +16,7 @@ void Init_Array(void);
 void Init_Bignum(void);
 void Init_Binding(void);
 void Init_Comparable(void);
+void Init_transcode(void);
 void Init_Dir(void);
 void Init_Enumerable(void);
 void Init_Enumerator(void);
@@ -77,6 +78,7 @@ rb_call_inits()
     Init_Struct();
     Init_Regexp();
     Init_pack();
+    Init_transcode();
     Init_marshal();
     Init_Range();
     Init_IO();

diff --git a/string.c b/string.c
@@ -179,7 +179,7 @@ str_alloc(VALUE klass)
     return (VALUE)str;
 }
 
-static VALUE
+VALUE
 str_new(VALUE klass, const char *ptr, long len)
 {
     VALUE str;
@@ -625,7 +625,7 @@ str_modifiable(VALUE str)
 	rb_raise(rb_eSecurityError, "Insecure: can't modify string");
 }
 
-static int
+int
 str_independent(VALUE str)
 {
     str_modifiable(str);

diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb
@@ -0,0 +1,44 @@
+# -*- encoding: US-ASCII -*-   # make sure this runs in binary mode
+
+class String
+  # different name, because we should be able to remove this later
+  def fix_encoding (encoding)
+    force_encoding(encoding)
+  end
+end
+
+require 'test/unit'
+class TestConvert < Test::Unit::TestCase
+  def test_can_call
+    # we don't have semantics for conversion without attribute yet
+    # maybe 'convert to UTF-8' would be nice :-)
+    assert_raise(ArgumentError) { 'abc'.encode }
+    assert_raise(ArgumentError) { 'abc'.encode! }
+    assert_raise(ArgumentError) { 'abc'.force_encoding('Shift_JIS').encode('UTF-8') } # temporary
+    assert_raise(ArgumentError) { 'abc'.force_encoding('Shift_JIS').encode!('UTF-8') } # temporary
+    assert_raise(ArgumentError) { 'abc'.encode('foo', 'bar') }
+    assert_raise(ArgumentError) { 'abc'.encode!('foo', 'bar') }
+    assert_raise(ArgumentError) { 'abc'.force_encoding('utf-8').encode('foo') }
+    assert_raise(ArgumentError) { 'abc'.force_encoding('utf-8').encode!('foo') }
+    assert_equal('abc'.force_encoding('utf-8').encode('iso-8859-1'), 'abc') # temporary, fix encoding
+    assert_equal("D\xFCrst".force_encoding('iso-8859-1').encode('utf-8').fix_encoding('utf-8'), "D\u00FCrst")
+    assert_equal("D\xFCrst".encode('utf-8', 'iso-8859-1').fix_encoding('utf-8'), "D\u00FCrst")
+    assert_equal("D\xFCrst".encode('utf-8', 'iso-8859-2').fix_encoding('utf-8'), "D\u00FCrst")
+    assert_equal("D\xFCrst".encode('utf-8', 'iso-8859-3').fix_encoding('utf-8'), "D\u00FCrst")
+    assert_equal("D\xFCrst".encode('utf-8', 'iso-8859-4').fix_encoding('utf-8'), "D\u00FCrst")
+    assert_equal("D\xFCrst".encode('utf-8', 'iso-8859-9').fix_encoding('utf-8'), "D\u00FCrst")
+    assert_equal("D\xFCrst".encode('utf-8', 'iso-8859-10').fix_encoding('utf-8'), "D\u00FCrst")
+    assert_equal("D\xFCrst".encode('utf-8', 'iso-8859-13').fix_encoding('utf-8'), "D\u00FCrst")
+    assert_equal("D\xFCrst".encode('utf-8', 'iso-8859-14').fix_encoding('utf-8'), "D\u00FCrst")
+    assert_equal("D\xFCrst".encode('utf-8', 'iso-8859-15').fix_encoding('utf-8'), "D\u00FCrst")
+    assert_equal("D\u00FCrst".encode('iso-8859-1'), "D\xFCrst")
+    assert_equal("D\u00FCrst".encode('iso-8859-2'), "D\xFCrst")
+    assert_equal("D\u00FCrst".encode('iso-8859-3'), "D\xFCrst")
+    assert_equal("D\u00FCrst".encode('iso-8859-4'), "D\xFCrst")
+    assert_equal("D\u00FCrst".encode('iso-8859-9'), "D\xFCrst")
+    assert_equal("D\u00FCrst".encode('iso-8859-10'), "D\xFCrst")
+    assert_equal("D\u00FCrst".encode('iso-8859-13'), "D\xFCrst")
+    assert_equal("D\u00FCrst".encode('iso-8859-14'), "D\xFCrst")
+    assert_equal("D\u00FCrst".encode('iso-8859-15'), "D\xFCrst")
+  end
+end