Skip to content

Commit

Permalink
string_decoder: Fix failures from new test cases
Browse files Browse the repository at this point in the history
This patch simplifies the implementation of StringDecoder, fixes the
failures from the new test cases, and also no longer relies on v8's
WriteUtf8 function to encode individual surrogates.
  • Loading branch information
felixge authored and tjfontaine committed Jun 6, 2014
1 parent 22b8398 commit 9fbd0f0
Showing 1 changed file with 21 additions and 25 deletions.
46 changes: 21 additions & 25 deletions lib/string_decoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,29 +57,29 @@ var StringDecoder = exports.StringDecoder = function(encoding) {

StringDecoder.prototype.write = function(buffer) {
var charStr = '';
var offset = 0;

// if our last write ended with an incomplete multibyte character
while (this.charLength) {
// determine how many remaining bytes this buffer has to offer for this char
var i = (buffer.length >= this.charLength - this.charReceived) ?
var available = (buffer.length >= this.charLength - this.charReceived) ?
this.charLength - this.charReceived :
buffer.length;

// add the new bytes to the char buffer
buffer.copy(this.charBuffer, this.charReceived, offset, i);
this.charReceived += (i - offset);
offset = i;
buffer.copy(this.charBuffer, this.charReceived, 0, available);
this.charReceived += available;

if (this.charReceived < this.charLength) {
// still not enough chars in this buffer? wait for more ...
return '';
}

// remove bytes belonging to the current character from the buffer
buffer = buffer.slice(available, buffer.length);

// get the character that was split
charStr = this.charBuffer.slice(0, this.charLength).toString(this.encoding);

// lead surrogate (D800-DBFF) is also the incomplete character
// CESU-8: lead surrogate (D800-DBFF) is also the incomplete character
var charCode = charStr.charCodeAt(charStr.length - 1);
if (charCode >= 0xD800 && charCode <= 0xDBFF) {
this.charLength += this.surrogateSize;
Expand All @@ -89,34 +89,33 @@ StringDecoder.prototype.write = function(buffer) {
this.charReceived = this.charLength = 0;

// if there are no more bytes in this buffer, just emit our char
if (i == buffer.length) return charStr;

// otherwise cut off the characters end from the beginning of this buffer
buffer = buffer.slice(i, buffer.length);
if (buffer.length === 0) {
return charStr;
}
break;
}

var lenIncomplete = this.detectIncompleteChar(buffer);
// determine and set charLength / charReceived
this.detectIncompleteChar(buffer);

var end = buffer.length;
if (this.charLength) {
// buffer the incomplete character bytes we got
buffer.copy(this.charBuffer, 0, buffer.length - lenIncomplete, end);
this.charReceived = lenIncomplete;
end -= lenIncomplete;
buffer.copy(this.charBuffer, 0, buffer.length - this.charReceived, end);
end -= this.charReceived;
}

charStr += buffer.toString(this.encoding, 0, end);

var end = charStr.length - 1;
var charCode = charStr.charCodeAt(end);
// lead surrogate (D800-DBFF) is also the incomplete character
// CESU-8: lead surrogate (D800-DBFF) is also the incomplete character
if (charCode >= 0xD800 && charCode <= 0xDBFF) {
var size = this.surrogateSize;
this.charLength += size;
this.charReceived += size;
this.charBuffer.copy(this.charBuffer, size, 0, size);
this.charBuffer.write(charStr.charAt(charStr.length - 1), this.encoding);
buffer.copy(this.charBuffer, 0, 0, size);
return charStr.substring(0, end);
}

Expand Down Expand Up @@ -153,8 +152,7 @@ StringDecoder.prototype.detectIncompleteChar = function(buffer) {
break;
}
}

return i;
this.charReceived = i;
};

StringDecoder.prototype.end = function(buffer) {
Expand All @@ -177,13 +175,11 @@ function passThroughWrite(buffer) {
}

function utf16DetectIncompleteChar(buffer) {
var incomplete = this.charReceived = buffer.length % 2;
this.charLength = incomplete ? 2 : 0;
return incomplete;
this.charReceived = buffer.length % 2;
this.charLength = this.charReceived ? 2 : 0;
}

function base64DetectIncompleteChar(buffer) {
var incomplete = this.charReceived = buffer.length % 3;
this.charLength = incomplete ? 3 : 0;
return incomplete;
this.charReceived = buffer.length % 3;
this.charLength = this.charReceived ? 3 : 0;
}

0 comments on commit 9fbd0f0

Please sign in to comment.