Skip to content

Commit

Permalink
Thu Feb 21 17:15:15 2008 Martin Duerst <[email protected]>
Browse files Browse the repository at this point in the history
	* transcode.c: Added basic support for passing options to String#encode
	  via a hash. Currently only one option, with one value, is supported:
	  invalid: :ignore (dropping invalid byte sequences instead of
	  producing an error). Option naming is not yet stable!

	* test/ruby/test_transcode.rb: Added a single test for invalid: :ignore
	  option. Not more tests because most data does not yet distinguish
	  between INVALID and UNKNOWN.



git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15565 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
  • Loading branch information
duerst committed Feb 21, 2008
1 parent ca22f3e commit 6d5ef97
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 6 deletions.
11 changes: 11 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
Thu Feb 21 17:15:15 2008 Martin Duerst <[email protected]>

* transcode.c: Added basic support for passing options to String#encode
via a hash. Currently only one option, with one value, is supported:
invalid: :ignore (dropping invalid byte sequences instead of
producing an error). Option naming is not yet stable!

* test/ruby/test_transcode.rb: Added a single test for invalid: :ignore
option. Not more tests because most data does not yet distinguish
between INVALID and UNKNOWN.

Thu Feb 21 16:35:26 2008 Nobuyoshi Nakada <[email protected]>

* array.c (rb_ary_unshift_m): expands enough for argc. [ruby-dev:33880]
Expand Down
7 changes: 5 additions & 2 deletions test/ruby/test_transcode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ def setup # trick to create all the necessary encodings
end

def test_errors
# we don't have semantics for conversion without attribute yet
# maybe 'convert to UTF-8' would be nice :-)
assert_raise(ArgumentError) { 'abc'.encode }
assert_raise(ArgumentError) { 'abc'.encode! }
assert_raise(ArgumentError) { 'abc'.encode('foo', 'bar') }
Expand Down Expand Up @@ -241,4 +239,9 @@ def test_utf_32
check_utf_32_both_ways("\u{8FF00}", "\x00\x08\xFF\x00")
check_utf_32_both_ways("\u{F00FF}", "\x00\x0F\x00\xFF")
end

def test_invalid_ignore
# arguments only
'abc'.encode('utf-8', invalid: :ignore)
end
end
33 changes: 29 additions & 4 deletions transcode.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
#include "transcode_data.h"
#include <ctype.h>

static VALUE sym_invalid, sym_ignore;
#define INVALID_IGNORE 0x1

/*
* Dispatch data and logic
*/
Expand Down Expand Up @@ -132,7 +135,8 @@ static void
transcode_loop(unsigned char **in_pos, unsigned char **out_pos,
unsigned char *in_stop, unsigned char *out_stop,
const rb_transcoder *my_transcoder,
rb_transcoding *my_transcoding)
rb_transcoding *my_transcoding,
const int opt)
{
unsigned char *in_p = *in_pos, *out_p = *out_pos;
const BYTE_LOOKUP *conv_tree_start = my_transcoder->conv_tree_start;
Expand Down Expand Up @@ -211,14 +215,17 @@ transcode_loop(unsigned char **in_pos, unsigned char **out_pos,
case INVALID:
goto invalid;
case UNDEF:
/* todo: add code for alternative behaviors */
/* todo: add code for alternate behaviors */
rb_raise(rb_eRuntimeError /*@@@change exception*/, "conversion undefined for byte sequence (maybe invalid byte sequence)");
continue;
}
continue;
invalid:
/* deal with invalid byte sequence */
/* todo: add code for alternative behaviors */
/* todo: add more alternative behaviors */
if (opt&INVALID_IGNORE) {
continue;
}
rb_raise(rb_eRuntimeError /*change exception*/, "invalid byte sequence");
continue;
}
Expand Down Expand Up @@ -254,7 +261,22 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
const rb_transcoder *my_transcoder;
rb_transcoding my_transcoding;
int final_encoding = 0;
VALUE opt;
int options = 0;

opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash");
if (!NIL_P(opt)) {
VALUE v;

argc--;
v = rb_hash_aref(opt, sym_invalid);
if (NIL_P(v)) {
rb_raise(rb_eArgError, "unknown value for invalid: setting");
}
else if (v==sym_ignore) {
options |= INVALID_IGNORE;
}
}
if (argc < 1 || argc > 2) {
rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc);
}
Expand Down Expand Up @@ -325,7 +347,7 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
my_transcoding.ruby_string_dest = dest;
my_transcoding.flush_func = str_transcoding_resize;

transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), my_transcoder, &my_transcoding);
transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), my_transcoder, &my_transcoding, options);
if (fromp != sp+slen) {
rb_raise(rb_eArgError, "not fully converted, %d bytes left", sp+slen-fromp);
}
Expand Down Expand Up @@ -426,6 +448,9 @@ Init_transcode(void)
transcoder_lib_table = st_init_strcasetable();
init_transcoder_table();

sym_invalid = ID2SYM(rb_intern("invalid"));
sym_ignore = ID2SYM(rb_intern("ignore"));

rb_define_method(rb_cString, "encode", rb_str_transcode, -1);
rb_define_method(rb_cString, "encode!", rb_str_transcode_bang, -1);
}

0 comments on commit 6d5ef97

Please sign in to comment.