Skip to content

Commit

Permalink
udf: Use UTF-32 <-> UTF-8 conversion functions from NLS
Browse files Browse the repository at this point in the history
Instead of implementing our own functions converting to and from UTF-8,
use the ones provided by NLS.

Signed-off-by: Jan Kara <[email protected]>
  • Loading branch information
jankara committed Apr 19, 2018
1 parent b8333ea commit b8a41c4
Showing 1 changed file with 17 additions and 63 deletions.
80 changes: 17 additions & 63 deletions fs/udf/unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

#include "udf_sb.h"

#define UNICODE_MAX 0x10ffff
#define SURROGATE_MASK 0xfffff800
#define SURROGATE_PAIR 0x0000d800

Expand All @@ -40,22 +41,12 @@ static int udf_uni2char_utf8(wchar_t uni,
if (boundlen <= 0)
return -ENAMETOOLONG;

if ((uni & SURROGATE_MASK) == SURROGATE_PAIR)
return -EINVAL;

if (uni < 0x80) {
out[u_len++] = (unsigned char)uni;
} else if (uni < 0x800) {
if (boundlen < 2)
return -ENAMETOOLONG;
out[u_len++] = (unsigned char)(0xc0 | (uni >> 6));
out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
} else {
if (boundlen < 3)
return -ENAMETOOLONG;
out[u_len++] = (unsigned char)(0xe0 | (uni >> 12));
out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f));
out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
u_len = utf32_to_utf8(uni, out, boundlen);
if (u_len < 0) {
if (uni > UNICODE_MAX ||
(uni & SURROGATE_MASK) == SURROGATE_PAIR)
return -EINVAL;
return -ENAMETOOLONG;
}
return u_len;
}
Expand All @@ -64,56 +55,19 @@ static int udf_char2uni_utf8(const unsigned char *in,
int boundlen,
wchar_t *uni)
{
unsigned int utf_char;
unsigned char c;
int utf_cnt, u_len;

utf_char = 0;
utf_cnt = 0;
for (u_len = 0; u_len < boundlen;) {
c = in[u_len++];

/* Complete a multi-byte UTF-8 character */
if (utf_cnt) {
utf_char = (utf_char << 6) | (c & 0x3f);
if (--utf_cnt)
continue;
} else {
/* Check for a multi-byte UTF-8 character */
if (c & 0x80) {
/* Start a multi-byte UTF-8 character */
if ((c & 0xe0) == 0xc0) {
utf_char = c & 0x1f;
utf_cnt = 1;
} else if ((c & 0xf0) == 0xe0) {
utf_char = c & 0x0f;
utf_cnt = 2;
} else if ((c & 0xf8) == 0xf0) {
utf_char = c & 0x07;
utf_cnt = 3;
} else if ((c & 0xfc) == 0xf8) {
utf_char = c & 0x03;
utf_cnt = 4;
} else if ((c & 0xfe) == 0xfc) {
utf_char = c & 0x01;
utf_cnt = 5;
} else {
utf_cnt = -1;
break;
}
continue;
} else {
/* Single byte UTF-8 character (most common) */
utf_char = c;
}
}
*uni = utf_char;
break;
}
if (utf_cnt) {
int u_len;
unicode_t c;

u_len = utf8_to_utf32(in, boundlen, &c);
if (u_len < 0) {
*uni = '?';
return -EINVAL;
}

if (c > MAX_WCHAR_T)
*uni = '?';
else
*uni = c;
return u_len;
}

Expand Down

0 comments on commit b8a41c4

Please sign in to comment.