Skip to content

Commit

Permalink
NLS: update handling of Unicode
Browse files Browse the repository at this point in the history
This patch (as1239) updates the kernel's treatment of Unicode.  The
character-set conversion routines are well behind the current state of
the Unicode specification: They don't recognize the existence of code
points beyond plane 0 or of surrogate pairs in the UTF-16 encoding.

The old wchar_t 16-bit type is retained because it's still used in
lots of places.  This shouldn't cause any new problems; if a
conversion now results in an invalid 16-bit code then before it must
have yielded an undefined code.

Difficult-to-read names like "utf_mbstowcs" are replaced with more
transparent names like "utf8s_to_utf16s" and the ordering of the
parameters is rationalized (buffer lengths come immediate after the
pointers they refer to, and the inputs precede the outputs).
Fortunately the low-level conversion routines are used in only a few
places; the interfaces to the higher-level uni2char and char2uni
methods have been left unchanged.

Signed-off-by: Alan Stern <[email protected]>
Acked-by: Clemens Ladisch <[email protected]>
Signed-off-by: Greg Kroah-Hartman <[email protected]>
  • Loading branch information
AlanStern authored and gregkh committed Jun 16, 2009
1 parent a853a3d commit 74675a5
Show file tree
Hide file tree
Showing 9 changed files with 182 additions and 137 deletions.
10 changes: 3 additions & 7 deletions drivers/usb/core/message.c
Original file line number Diff line number Diff line change
Expand Up @@ -780,14 +780,13 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size)
{
unsigned char *tbuf;
int err;
unsigned int u;

if (dev->state == USB_STATE_SUSPENDED)
return -EHOSTUNREACH;
if (size <= 0 || !buf || !index)
return -EINVAL;
buf[0] = 0;
tbuf = kmalloc(256 + 2, GFP_NOIO);
tbuf = kmalloc(256, GFP_NOIO);
if (!tbuf)
return -ENOMEM;

Expand All @@ -814,12 +813,9 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size)
if (err < 0)
goto errout;

for (u = 2; u < err; u += 2)
le16_to_cpus((u16 *)&tbuf[u]);
tbuf[u] = 0;
tbuf[u + 1] = 0;
size--; /* leave room for trailing NULL char in output buffer */
err = utf8_wcstombs(buf, (u16 *)&tbuf[2], size);
err = utf16s_to_utf8s((wchar_t *) &tbuf[2], (err - 2) / 2,
UTF16_LITTLE_ENDIAN, buf, size);
buf[err] = 0;

if (tbuf[1] != USB_DT_STRING)
Expand Down
20 changes: 9 additions & 11 deletions fs/befs/linuxvfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ befs_utf2nls(struct super_block *sb, const char *in,
{
struct nls_table *nls = BEFS_SB(sb)->nls;
int i, o;
wchar_t uni;
unicode_t uni;
int unilen, utflen;
char *result;
/* The utf8->nls conversion won't make the final nls string bigger
Expand All @@ -539,16 +539,16 @@ befs_utf2nls(struct super_block *sb, const char *in,
for (i = o = 0; i < in_len; i += utflen, o += unilen) {

/* convert from UTF-8 to Unicode */
utflen = utf8_mbtowc(&uni, &in[i], in_len - i);
if (utflen < 0) {
utflen = utf8_to_utf32(&in[i], in_len - i, &uni);
if (utflen < 0)
goto conv_err;
}

/* convert from Unicode to nls */
if (uni > MAX_WCHAR_T)
goto conv_err;
unilen = nls->uni2char(uni, &result[o], in_len - o);
if (unilen < 0) {
if (unilen < 0)
goto conv_err;
}
}
result[o] = '\0';
*out_len = o;
Expand Down Expand Up @@ -619,15 +619,13 @@ befs_nls2utf(struct super_block *sb, const char *in,

/* convert from nls to unicode */
unilen = nls->char2uni(&in[i], in_len - i, &uni);
if (unilen < 0) {
if (unilen < 0)
goto conv_err;
}

/* convert from unicode to UTF-8 */
utflen = utf8_wctomb(&result[o], uni, 3);
if (utflen <= 0) {
utflen = utf32_to_utf8(uni, &result[o], 3);
if (utflen <= 0)
goto conv_err;
}
}

result[o] = '\0';
Expand Down
29 changes: 15 additions & 14 deletions fs/fat/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,19 @@
#include <asm/uaccess.h>
#include "fat.h"

/*
* Maximum buffer size of short name.
* [(MSDOS_NAME + '.') * max one char + nul]
* For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
*/
#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
/*
* Maximum buffer size of unicode chars from slots.
* [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
*/
#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1)
#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t))

static inline loff_t fat_make_i_pos(struct super_block *sb,
struct buffer_head *bh,
struct msdos_dir_entry *de)
Expand Down Expand Up @@ -171,7 +184,8 @@ static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni,
unsigned char *buf, int size)
{
if (sbi->options.utf8)
return utf8_wcstombs(buf, uni, size);
return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS,
UTF16_HOST_ENDIAN, buf, size);
else
return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate,
sbi->nls_io);
Expand Down Expand Up @@ -324,19 +338,6 @@ static int fat_parse_long(struct inode *dir, loff_t *pos,
return 0;
}

/*
* Maximum buffer size of short name.
* [(MSDOS_NAME + '.') * max one char + nul]
* For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
*/
#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
/*
* Maximum buffer size of unicode chars from slots.
* [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
*/
#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1)
#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t))

/*
* Return values: negative -> error, 0 -> not found, positive -> found,
* value is the total amount of slots, including the shortname entry.
Expand Down
4 changes: 2 additions & 2 deletions fs/fat/namei_vfat.c
Original file line number Diff line number Diff line change
Expand Up @@ -502,11 +502,11 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
if (utf8) {
int name_len = strlen(name);

*outlen = utf8_mbstowcs((wchar_t *)outname, name, PATH_MAX);
*outlen = utf8s_to_utf16s(name, PATH_MAX, (wchar_t *) outname);

/*
* We stripped '.'s before and set len appropriately,
* but utf8_mbstowcs doesn't care about len
* but utf8s_to_utf16s doesn't care about len
*/
*outlen -= (name_len - len);

Expand Down
36 changes: 3 additions & 33 deletions fs/isofs/joliet.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,37 +37,6 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls)
return (op - ascii);
}

/* Convert big endian wide character string to utf8 */
static int
wcsntombs_be(__u8 *s, const __u8 *pwcs, int inlen, int maxlen)
{
const __u8 *ip;
__u8 *op;
int size;
__u16 c;

op = s;
ip = pwcs;
while ((*ip || ip[1]) && (maxlen > 0) && (inlen > 0)) {
c = (*ip << 8) | ip[1];
if (c > 0x7f) {
size = utf8_wctomb(op, c, maxlen);
if (size == -1) {
/* Ignore character and move on */
maxlen--;
} else {
op += size;
maxlen -= size;
}
} else {
*op++ = (__u8) c;
}
ip += 2;
inlen--;
}
return (op - s);
}

int
get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode)
{
Expand All @@ -79,8 +48,9 @@ get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, st
nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;

if (utf8) {
len = wcsntombs_be(outname, de->name,
de->name_len[0] >> 1, PAGE_SIZE);
len = utf16s_to_utf8s((const wchar_t *) de->name,
de->name_len[0] >> 1, UTF16_BIG_ENDIAN,
outname, PAGE_SIZE);
} else {
len = uni16_to_x8(outname, (__be16 *) de->name,
de->name_len[0] >> 1, nls);
Expand Down
8 changes: 5 additions & 3 deletions fs/ncpfs/ncplib_kernel.c
Original file line number Diff line number Diff line change
Expand Up @@ -1113,11 +1113,13 @@ ncp__io2vol(struct ncp_server *server, unsigned char *vname, unsigned int *vlen,

if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) {
int k;
unicode_t u;

k = utf8_mbtowc(&ec, iname, iname_end - iname);
if (k < 0)
k = utf8_to_utf32(iname, iname_end - iname, &u);
if (k < 0 || u > MAX_WCHAR_T)
return -EINVAL;
iname += k;
ec = u;
} else {
if (*iname == NCP_ESC) {
int k;
Expand Down Expand Up @@ -1214,7 +1216,7 @@ ncp__vol2io(struct ncp_server *server, unsigned char *iname, unsigned int *ilen,
if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) {
int k;

k = utf8_wctomb(iname, ec, iname_end - iname);
k = utf32_to_utf8(ec, iname, iname_end - iname);
if (k < 0) {
err = -ENAMETOOLONG;
goto quit;
Expand Down
Loading

0 comments on commit 74675a5

Please sign in to comment.