-
Notifications
You must be signed in to change notification settings - Fork 44
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
172 additions
and
148 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
'use strict'; | ||
|
||
var iconvLite = require('iconv-lite'); | ||
var Iconv; | ||
|
||
try { | ||
Iconv = require('iconv').Iconv; | ||
} catch (E) { | ||
// node-iconv not present | ||
} | ||
|
||
// Expose to the world | ||
module.exports.convert = convert; | ||
|
||
/** | ||
* Convert encoding of an UTF-8 string or a buffer | ||
* | ||
* @param {String|Buffer} str String to be converted | ||
* @param {String} to Encoding to be converted to | ||
* @param {String} [from='UTF-8'] Encoding to be converted from | ||
* @param {Boolean} useLite If set to ture, force to use iconvLite | ||
* @return {Buffer} Encoded string | ||
*/ | ||
function convert(str, to, from, useLite) { | ||
from = checkEncoding(from || 'UTF-8'); | ||
to = checkEncoding(to || 'UTF-8'); | ||
str = str || ''; | ||
|
||
var result; | ||
|
||
if (from != 'UTF-8' && typeof str == 'string') { | ||
str = new Buffer(str, 'binary'); | ||
} | ||
|
||
if (from === to) { | ||
if (typeof str === 'string') { | ||
result = new Buffer(str); | ||
} else { | ||
result = str; | ||
} | ||
} else { | ||
if (Iconv && !useLite) { | ||
try { | ||
result = convertIconv(str, to, from); | ||
} catch (E) { | ||
console.log(E); | ||
try { | ||
result = convertIconvLite(str, to, from); | ||
} catch (E) { | ||
console.log(E); | ||
result = str; | ||
} | ||
} | ||
} else { | ||
try { | ||
result = convertIconvLite(str, to, from); | ||
} catch (E) { | ||
console.log(E); | ||
result = str; | ||
} | ||
} | ||
} | ||
|
||
if (typeof result == 'string') { | ||
result = new Buffer(result, 'utf-8'); | ||
} | ||
|
||
return result; | ||
} | ||
|
||
/** | ||
* Convert encoding of a string with node-iconv (if available) | ||
* | ||
* @param {String|Buffer} str String to be converted | ||
* @param {String} to Encoding to be converted to | ||
* @param {String} [from='UTF-8'] Encoding to be converted from | ||
* @return {Buffer} Encoded string | ||
*/ | ||
function convertIconv(str, to, from) { | ||
var response, iconv; | ||
iconv = new Iconv(from, to + '//TRANSLIT//IGNORE'); | ||
response = iconv.convert(str); | ||
return response.slice(0, response.length); | ||
} | ||
|
||
/** | ||
* Convert encoding of astring with iconv-lite | ||
* | ||
* @param {String|Buffer} str String to be converted | ||
* @param {String} to Encoding to be converted to | ||
* @param {String} [from='UTF-8'] Encoding to be converted from | ||
* @return {Buffer} Encoded string | ||
*/ | ||
function convertIconvLite(str, to, from) { | ||
if (to == 'UTF-8') { | ||
return iconvLite.decode(str, from); | ||
} else if (from == 'UTF-8') { | ||
return iconvLite.encode(str, to); | ||
} else { | ||
return iconvLite.encode(iconvLite.decode(str, from), to); | ||
} | ||
} | ||
|
||
/** | ||
* Converts charset name if needed | ||
* | ||
* @param {String} name Character set | ||
* @return {String} Character set name | ||
*/ | ||
function checkEncoding(name) { | ||
return (name || '').toString().trim(). | ||
replace(/^latin[\-_]?(\d+)$/i, 'ISO-8859-$1'). | ||
replace(/^win(?:dows)?[\-_]?(\d+)$/i, 'WINDOWS-$1'). | ||
replace(/^utf[\-_]?(\d+)$/i, 'UTF-$1'). | ||
replace(/^ks_c_5601\-1987$/i, 'CP949'). | ||
replace(/^us[\-_]?ascii$/i, 'ASCII'). | ||
toUpperCase(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,18 @@ | ||
{ | ||
"name": "encoding", | ||
"version": "0.1.7", | ||
"version": "0.1.8", | ||
"description": "Convert encodings, uses iconv by default and fallbacks to iconv-lite if needed", | ||
"main": "index.js", | ||
"main": "lib/encoding.js", | ||
"scripts": { | ||
"test": "nodeunit test.js" | ||
"test": "nodeunit test" | ||
}, | ||
"repository": "https://github.com/andris9/encoding.git", | ||
"author": "Andris Reinman", | ||
"license": "MIT", | ||
"dependencies":{ | ||
"iconv-lite": "~0.2.11" | ||
"dependencies": { | ||
"iconv-lite": "~0.4.3" | ||
}, | ||
"devDependencies":{ | ||
"devDependencies": { | ||
"nodeunit": "~0.8.1" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
'use strict'; | ||
|
||
var encoding = require('../lib/encoding'); | ||
|
||
exports['General tests'] = { | ||
|
||
'From UTF-8 to Latin_1': function(test) { | ||
var input = 'ÕÄÖÜ', | ||
expected = new Buffer([0xd5, 0xc4, 0xd6, 0xdc]); | ||
test.deepEqual(encoding.convert(input, 'latin1'), expected); | ||
test.done(); | ||
}, | ||
|
||
'From Latin_1 to UTF-8': function(test) { | ||
var input = new Buffer([0xd5, 0xc4, 0xd6, 0xdc]), | ||
expected = 'ÕÄÖÜ'; | ||
test.deepEqual(encoding.convert(input, 'utf-8', 'latin1').toString(), expected); | ||
test.done(); | ||
}, | ||
|
||
'From UTF-8 to UTF-8': function(test) { | ||
var input = 'ÕÄÖÜ', | ||
expected = new Buffer('ÕÄÖÜ'); | ||
test.deepEqual(encoding.convert(input, 'utf-8', 'utf-8'), expected); | ||
test.done(); | ||
}, | ||
|
||
'From Latin_13 to Latin_15': function(test) { | ||
var input = new Buffer([0xd5, 0xc4, 0xd6, 0xdc, 0xd0]), | ||
expected = new Buffer([0xd5, 0xc4, 0xd6, 0xdc, 0xA6]); | ||
test.deepEqual(encoding.convert(input, 'latin_15', 'latin13'), expected); | ||
test.done(); | ||
}, | ||
|
||
'From Latin_13 to Latin_15 lite': function(test) { | ||
var input = new Buffer([0xd5, 0xc4, 0xd6, 0xdc, 0xd0]), | ||
expected = new Buffer([0xd5, 0xc4, 0xd6, 0xdc, 0xA6]); | ||
test.deepEqual(encoding.convert(input, 'latin_15', 'latin13', true), expected); | ||
test.done(); | ||
}, | ||
|
||
'From ISO-2022-JP to UTF-8': function(test) { | ||
var input = new Buffer('GyRCM1g5OzU7PVEwdzgmPSQ4IUYkMnFKczlwGyhC', 'base64'), | ||
expected = new Buffer('5a2m5qCh5oqA6KGT5ZOh56CU5L+u5qSc6KiO5Lya5aCx5ZGK', 'base64'); | ||
test.deepEqual(encoding.convert(input, 'utf-8', 'ISO-2022-JP'), expected); | ||
test.done(); | ||
} | ||
}; |