Skip to content

Commit

Permalink
Support URL-escape non-ASCII characters properly as far as possible
Browse files Browse the repository at this point in the history
(ie. all characters from the Basic Multilingual Plane from Unicode)
  • Loading branch information
Rémi Denis-Courmont committed Mar 27, 2006
1 parent 798b0d9 commit 168549f
Showing 1 changed file with 45 additions and 12 deletions.
57 changes: 45 additions & 12 deletions include/vlc_url.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*****************************************************************************
* vlc_url.h: URL related macros
*****************************************************************************
* Copyright (C) 2002-2005 the VideoLAN team
* Copyright (C) 2002-2006 the VideoLAN team
* $Id$
*
* Authors: Christophe Massiot <[email protected]>
Expand Down Expand Up @@ -174,13 +174,19 @@ static inline int isurlsafe( int c )
return ( (unsigned char)( c - 'a' ) < 26 )
|| ( (unsigned char)( c - 'A' ) < 26 )
|| ( (unsigned char)( c - '0' ) < 10 )
|| ( (unsigned char)( c ) > 127 )
/* Hmm, we should not encode character that are allowed in URLs
* (even if they are not URL-safe), nor URL-safe characters.
* We still encode some of them because of Microsoft's crap browser.
*/
|| ( strchr( "-_.", c ) != NULL );
}

static inline char url_hexchar( int c )
{
return ( c < 10 ) ? c + '0' : c + 'A' - 10;
}

/*****************************************************************************
* vlc_UrlEncode:
*****************************************************************************
Expand All @@ -189,14 +195,9 @@ static inline int isurlsafe( int c )
*****************************************************************************/
static inline char *vlc_UrlEncode( const char *psz_url )
{
char *psz_enc, *out;
char psz_enc[3 * strlen( psz_url ) + 1], *out = psz_enc;
const unsigned char *in;

psz_enc = (char *)malloc( 3 * strlen( psz_url ) + 1 );
if( psz_enc == NULL )
return NULL;

out = psz_enc;
for( in = (const unsigned char *)psz_url; *in; in++ )
{
unsigned char c = *in;
Expand All @@ -205,16 +206,48 @@ static inline char *vlc_UrlEncode( const char *psz_url )
*out++ = (char)c;
else
{
uint16_t cp;

*out++ = '%';
*out++ = ( ( c >> 4 ) >= 0xA ) ? 'A' + ( c >> 4 ) - 0xA
: '0' + ( c >> 4 );
*out++ = ( ( c & 0xf ) >= 0xA ) ? 'A' + ( c & 0xf ) - 0xA
: '0' + ( c & 0xf );
/* UTF-8 to UCS-2 conversion */
if( ( c & 0x7f ) == 0 )
cp = c;
else
if( ( c & 0xe0 ) == 0xc0 )
{
cp = ((c & 0x1f) << 6) | (in[1] & 0x3f);
in++;
}
else
if( ( c & 0xf0 ) == 0xe0 )
{
cp = ((c & 0xf) << 12) | ((in[1] & 0x3f) << 6) | (in[2] & 0x3f);
in += 2;
}
else
/* cannot URL-encode code points outside the BMP */
return NULL;

if( cp < 0xff )
{
/* Encode ISO-8859-1 characters */
*out++ = url_hexchar( cp >> 4 );
*out++ = url_hexchar( cp & 0xf );
}
else
{
/* Encode non-Latin-1 characters */
*out++ = 'u';
*out++ = url_hexchar( cp >> 12 );
*out++ = url_hexchar((cp >> 8) & 0xf );
*out++ = url_hexchar((cp >> 4) & 0xf );
*out++ = url_hexchar( cp & 0xf );
}
}
}
*out++ = '\0';

return (char *)realloc( psz_enc, out - psz_enc );
return strdup( psz_enc );
}

/*****************************************************************************
Expand Down

0 comments on commit 168549f

Please sign in to comment.