Skip to content

Commit

Permalink
* pack.c (utf8_to_uv): added checks for malformed or redundant
Browse files Browse the repository at this point in the history
  UTF-8 sequences.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3105 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
  • Loading branch information
matz committed Dec 2, 2002
1 parent 96986a7 commit e193fd8
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 15 deletions.
5 changes: 5 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ Sun Dec 1 22:43:29 2002 Nobuyoshi Nakada <[email protected]>
* win32/win32.c (rb_w32_stat): empty path is invalid, and return
ENOENT rather than EBADF in such case. [ruby-talk:57177]

Fri Nov 29 18:01:48 2002 Yukihiro Matsumoto <[email protected]>

* pack.c (utf8_to_uv): added checks for malformed or redundant
UTF-8 sequences.

Thu Nov 28 12:08:30 2002 Akinori MUSHA <[email protected]>

* lib/mkmf.rb: Avoid the use of "clean::" in favor of "clean:" in
Expand Down
15 changes: 15 additions & 0 deletions ext/socket/socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -2922,4 +2922,19 @@ Init_socket()
#ifdef NI_DGRAM
sock_define_const("NI_DGRAM", NI_DGRAM);
#endif
#ifdef SHUT_RD
sock_define_const("SHUT_RD", SHUT_RD);
#else
sock_define_const("SHUT_RD", 0);
#endif
#ifdef SHUT_WR
sock_define_const("SHUT_WR", SHUT_WR);
#else
sock_define_const("SHUT_WR", 1);
#endif
#ifdef SHUT_RDWR
sock_define_const("SHUT_RDWR", SHUT_RDWR);
#else
sock_define_const("SHUT_RDWR", 2);
#endif
}
61 changes: 46 additions & 15 deletions pack.c
Original file line number Diff line number Diff line change
Expand Up @@ -1855,25 +1855,56 @@ utf8_to_uv(p, lenp)
char *p;
long *lenp;
{
int c = (*p++)&0xff;
unsigned long uv;
long n = 1;

if (c < 0xc0) n = 1;
else if (c < 0xe0) n = 2;
else if (c < 0xf0) n = 3;
else if (c < 0xf8) n = 4;
else if (c < 0xfc) n = 5;
else if (c < 0xfe) n = 6;
else if (c == 0xfe) n = 7;
if (n > *lenp) return 0;
int c = *p++ & 0xff;
unsigned long uv = c;
long n;

if (!(uv & 0x80)) {
*lenp = 1;
return uv;
}
if (!(uv & 0x40)) {
rb_warning("malformed UTF-8 character");
*lenp = 1;
return uv;
}

if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
else if (!(uv & 0x01)) { n = 7; uv = 0; }
else { n = 13; uv = 0; }
if (n > *lenp) {
rb_warning("malformed UTF-8 character (expected %d bytes, given %d bytes)",
n, *lenp);
return 0xfffd;
}
*lenp = n--;

uv = c;
if (n != 0) {
uv &= (1<<(BYTEWIDTH-2-n)) - 1;
while (n--) {
uv = uv << 6 | (*p++ & ((1<<6)-1));
c = *p++ & 0xff;
if ((c & 0xc0) != 0x80) {
rb_warning("malformed UTF-8 character");
*lenp -= n + 1;
return 0xfffd;
}
else {
c &= 0x3f;
if (uv == 0 && c == 0) {
int i;

for (i=0; n-i>0 && (p[i] & 0x3f) == 0; i++)
;
rb_warning("redundant UTF-8 sequence (skip %d bytes)", i+1);
n -= i;
p += i;
continue;
}
uv = uv << 6 | c;
}
}
}
return uv;
Expand Down

0 comments on commit e193fd8

Please sign in to comment.