Skip to content

Commit

Permalink
changed return type of Tokeniser.consumeCharacterReference from Chara…
Browse files Browse the repository at this point in the history
…cter to char[], and also changed TokeniserState accordingly
  • Loading branch information
mingfai.ma committed Jan 26, 2013
1 parent 1fd0a61 commit 4de5fe8
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 26 deletions.
14 changes: 7 additions & 7 deletions src/main/java/org/jsoup/parser/Tokeniser.java
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ void acknowledgeSelfClosingFlag() {
selfClosingFlagAcknowledged = true;
}

Character consumeCharacterReference(Character additionalAllowedCharacter, boolean inAttribute) {
char[] consumeCharacterReference(Character additionalAllowedCharacter, boolean inAttribute) {
if (reader.isEmpty())
return null;
if (additionalAllowedCharacter != null && additionalAllowedCharacter == reader.current())
Expand All @@ -124,11 +124,11 @@ Character consumeCharacterReference(Character additionalAllowedCharacter, boolea
} // skip
if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF) {
characterReferenceError("character outside of valid range");
return replacementChar;
return new char[]{replacementChar};
} else {
// todo: implement number replacement table
// todo: check for extra illegal unicode points as parse errors
return (char) charval;
return Character.toChars(charval);
}
} else { // named
// get as many letters as possible, and look for matching entities.
Expand All @@ -150,7 +150,7 @@ Character consumeCharacterReference(Character additionalAllowedCharacter, boolea
}
if (!reader.matchConsume(";"))
characterReferenceError("missing semicolon"); // missing semi
return Entities.getCharacterByName(nameRef);
return new char[]{Entities.getCharacterByName(nameRef)};
}
}

Expand Down Expand Up @@ -232,13 +232,13 @@ String unescapeEntities(boolean inAttribute) {
builder.append(reader.consumeTo('&'));
if (reader.matches('&')) {
reader.consume();
Character c = consumeCharacterReference(null, inAttribute);
if (c == null)
char[] c = consumeCharacterReference(null, inAttribute);
if (c == null || c.length==0)
builder.append('&');
else
builder.append(c);
}
}
return builder.toString();
}
}
}
38 changes: 19 additions & 19 deletions src/main/java/org/jsoup/parser/TokeniserState.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ void read(Tokeniser t, CharacterReader r) {
CharacterReferenceInData {
// from & in data
void read(Tokeniser t, CharacterReader r) {
Character c = t.consumeCharacterReference(null, false);
if (c == null)
char[] c = t.consumeCharacterReference(null, false);
if (c == null||c.length==0)
t.emit('&');
else
t.emit(c);
t.emit(new String(c));
t.transition(Data);
}
},
Expand Down Expand Up @@ -66,11 +66,11 @@ void read(Tokeniser t, CharacterReader r) {
},
CharacterReferenceInRcdata {
void read(Tokeniser t, CharacterReader r) {
Character c = t.consumeCharacterReference(null, false);
if (c == null)
char[] c = t.consumeCharacterReference(null, false);
if (c == null||c.length==0)
t.emit('&');
else
t.emit(c);
t.emit(new String(c));
t.transition(Rcdata);
}
},
Expand Down Expand Up @@ -206,7 +206,7 @@ void read(Tokeniser t, CharacterReader r) {
case eof: // should emit pending tag?
t.eofError(this);
t.transition(Data);
// no default, as covered with above consumeToAny
// no default, as covered with above consumeToAny
}
}
},
Expand Down Expand Up @@ -589,7 +589,7 @@ void read(Tokeniser t, CharacterReader r) {
anythingElse(t, r);
}
}

private void anythingElse(Tokeniser t, CharacterReader r) {
t.emit("</" + t.dataBuffer.toString());
t.transition(ScriptDataEscaped);
Expand Down Expand Up @@ -832,7 +832,7 @@ void read(Tokeniser t, CharacterReader r) {
case '<':
t.error(this);
t.tagPending.appendAttributeName(c);
// no default, as covered in consumeToAny
// no default, as covered in consumeToAny
}
}
},
Expand Down Expand Up @@ -941,9 +941,9 @@ void read(Tokeniser t, CharacterReader r) {
t.transition(AfterAttributeValue_quoted);
break;
case '&':
Character ref = t.consumeCharacterReference('"', true);
if (ref != null)
t.tagPending.appendAttributeValue(ref);
char[] ref = t.consumeCharacterReference('"', true);
if (ref != null && ref.length>0)
t.tagPending.appendAttributeValue(new String(ref));
else
t.tagPending.appendAttributeValue('&');
break;
Expand Down Expand Up @@ -971,9 +971,9 @@ void read(Tokeniser t, CharacterReader r) {
t.transition(AfterAttributeValue_quoted);
break;
case '&':
Character ref = t.consumeCharacterReference('\'', true);
if (ref != null)
t.tagPending.appendAttributeValue(ref);
char[] ref = t.consumeCharacterReference('"', true);
if (ref != null && ref.length>0)
t.tagPending.appendAttributeValue(new String(ref));
else
t.tagPending.appendAttributeValue('&');
break;
Expand Down Expand Up @@ -1005,9 +1005,9 @@ void read(Tokeniser t, CharacterReader r) {
t.transition(BeforeAttributeName);
break;
case '&':
Character ref = t.consumeCharacterReference('>', true);
if (ref != null)
t.tagPending.appendAttributeValue(ref);
char[] ref = t.consumeCharacterReference('"', true);
if (ref != null && ref.length>0)
t.tagPending.appendAttributeValue(new String(ref));
else
t.tagPending.appendAttributeValue('&');
break;
Expand Down Expand Up @@ -1800,4 +1800,4 @@ void read(Tokeniser t, CharacterReader r) {
private static final char replacementChar = Tokeniser.replacementChar;
private static final String replacementStr = String.valueOf(Tokeniser.replacementChar);
private static final char eof = CharacterReader.EOF;
}
}

0 comments on commit 4de5fe8

Please sign in to comment.