forked from videolan/vlc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support URL-escape non-ASCII characters properly as far as possible
(ie. all characters from the Basic Multilingual Plane from Unicode)
- Loading branch information
Rémi Denis-Courmont
committed
Mar 27, 2006
1 parent
798b0d9
commit 168549f
Showing
1 changed file
with
45 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
/***************************************************************************** | ||
* vlc_url.h: URL related macros | ||
***************************************************************************** | ||
* Copyright (C) 2002-2005 the VideoLAN team | ||
* Copyright (C) 2002-2006 the VideoLAN team | ||
* $Id$ | ||
* | ||
* Authors: Christophe Massiot <[email protected]> | ||
|
@@ -174,13 +174,19 @@ static inline int isurlsafe( int c ) | |
return ( (unsigned char)( c - 'a' ) < 26 ) | ||
|| ( (unsigned char)( c - 'A' ) < 26 ) | ||
|| ( (unsigned char)( c - '0' ) < 10 ) | ||
|| ( (unsigned char)( c ) > 127 ) | ||
/* Hmm, we should not encode character that are allowed in URLs | ||
* (even if they are not URL-safe), nor URL-safe characters. | ||
* We still encode some of them because of Microsoft's crap browser. | ||
*/ | ||
|| ( strchr( "-_.", c ) != NULL ); | ||
} | ||
|
||
static inline char url_hexchar( int c ) | ||
{ | ||
return ( c < 10 ) ? c + '0' : c + 'A' - 10; | ||
} | ||
|
||
/***************************************************************************** | ||
* vlc_UrlEncode: | ||
***************************************************************************** | ||
|
@@ -189,14 +195,9 @@ static inline int isurlsafe( int c ) | |
*****************************************************************************/ | ||
static inline char *vlc_UrlEncode( const char *psz_url ) | ||
{ | ||
char *psz_enc, *out; | ||
char psz_enc[3 * strlen( psz_url ) + 1], *out = psz_enc; | ||
const unsigned char *in; | ||
|
||
psz_enc = (char *)malloc( 3 * strlen( psz_url ) + 1 ); | ||
if( psz_enc == NULL ) | ||
return NULL; | ||
|
||
out = psz_enc; | ||
for( in = (const unsigned char *)psz_url; *in; in++ ) | ||
{ | ||
unsigned char c = *in; | ||
|
@@ -205,16 +206,48 @@ static inline char *vlc_UrlEncode( const char *psz_url ) | |
*out++ = (char)c; | ||
else | ||
{ | ||
uint16_t cp; | ||
|
||
*out++ = '%'; | ||
*out++ = ( ( c >> 4 ) >= 0xA ) ? 'A' + ( c >> 4 ) - 0xA | ||
: '0' + ( c >> 4 ); | ||
*out++ = ( ( c & 0xf ) >= 0xA ) ? 'A' + ( c & 0xf ) - 0xA | ||
: '0' + ( c & 0xf ); | ||
/* UTF-8 to UCS-2 conversion */ | ||
if( ( c & 0x7f ) == 0 ) | ||
cp = c; | ||
else | ||
if( ( c & 0xe0 ) == 0xc0 ) | ||
{ | ||
cp = ((c & 0x1f) << 6) | (in[1] & 0x3f); | ||
in++; | ||
} | ||
else | ||
if( ( c & 0xf0 ) == 0xe0 ) | ||
{ | ||
cp = ((c & 0xf) << 12) | ((in[1] & 0x3f) << 6) | (in[2] & 0x3f); | ||
in += 2; | ||
} | ||
else | ||
/* cannot URL-encode code points outside the BMP */ | ||
return NULL; | ||
|
||
if( cp < 0xff ) | ||
{ | ||
/* Encode ISO-8859-1 characters */ | ||
*out++ = url_hexchar( cp >> 4 ); | ||
*out++ = url_hexchar( cp & 0xf ); | ||
} | ||
else | ||
{ | ||
/* Encode non-Latin-1 characters */ | ||
*out++ = 'u'; | ||
*out++ = url_hexchar( cp >> 12 ); | ||
*out++ = url_hexchar((cp >> 8) & 0xf ); | ||
*out++ = url_hexchar((cp >> 4) & 0xf ); | ||
*out++ = url_hexchar( cp & 0xf ); | ||
} | ||
} | ||
} | ||
*out++ = '\0'; | ||
|
||
return (char *)realloc( psz_enc, out - psz_enc ); | ||
return strdup( psz_enc ); | ||
} | ||
|
||
/***************************************************************************** | ||
|