Skip to content

Commit

Permalink
KNL-1253 Improved support to add an option to better handle interpret…
Browse files Browse the repository at this point in the history
…ing html into the intended plain text

git-svn-id: https://source.sakaiproject.org/svn/kernel/trunk@309575 66ffb92e-73f9-0310-93c1-f5514f145a0a
  • Loading branch information
Aaron Zeckoski committed May 16, 2014
1 parent b0e59e7 commit de33331
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -392,8 +392,12 @@ public String processFormattedText(final String strFromBrowser, StringBuilder er
* Strips html/xml tags from a string and returns the cleaned version
*
* @param text any text (if this is null or empty then the input text is returned unchanged)
* @param smartSpacing if true then try to make the text represent the intent of the html,
* trims out duplicate spaces, converts block type html into a space, etc.,
* else just removes html tags and leaves all other parts of the string intact,
* NOTE: false is also slightly faster
* @return the cleaned string
*/
public String stripHtmlFromText(String text);
public String stripHtmlFromText(String text, boolean smartSpacing);

}
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ public String sanitizeHrefURL(String urlToSanitize) {
}

@Override
public String stripHtmlFromText(String text) {
public String stripHtmlFromText(String text, boolean smartSpacing) {
log.warn(WARNING);
return text;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1161,10 +1161,17 @@ public String sanitizeHrefURL(String urlToSanitize) {
}

@Override
public String stripHtmlFromText(String text) {
public String stripHtmlFromText(String text, boolean smartSpacing) {
// KNL-1253 use Jsoup
if (text != null && !"".equals(text)) {
text = org.jsoup.Jsoup.clean(text, org.jsoup.safety.Whitelist.none());
if (smartSpacing) {
// replace block level html with an extra space (to try to preserve the intent)
text = text.replaceAll("/br>", "/br> ").replaceAll("/p>", "/p> ").replaceAll("/tr>", "/tr> ");
}
text = org.jsoup.Jsoup.clean(text, "", org.jsoup.safety.Whitelist.none(), new org.jsoup.nodes.Document.OutputSettings().prettyPrint(false).outline(false));
if (smartSpacing) {
text = text.replaceAll("\\s+", " ").trim(); // eliminate extra whitespaces
}
}
return text;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -842,28 +842,32 @@ public void testKNL_1253() {

// no change
text = null;
result = formattedText.stripHtmlFromText(text);
result = formattedText.stripHtmlFromText(text,true);
assertEquals(text, result);

text = "";
result = formattedText.stripHtmlFromText(text);
result = formattedText.stripHtmlFromText(text,true);
assertEquals(text, result);

text = "azeckoski";
result = formattedText.stripHtmlFromText(text);
result = formattedText.stripHtmlFromText(text,true);
assertEquals(text, result);

// changed
text = "<b>azeckoski</b>";
result = formattedText.stripHtmlFromText(text);
result = formattedText.stripHtmlFromText(text,true);
assertEquals("azeckoski", result);

text = "<a href='www.vt.edu'><b>azeckoski</b> is AZ</a>";
result = formattedText.stripHtmlFromText(text);
result = formattedText.stripHtmlFromText(text,true);
assertEquals("azeckoski is AZ", result);

text = "<table><tr><th>Column1</th></tr><tr><td>Row1</td></tr></table>";
result = formattedText.stripHtmlFromText(text);
result = formattedText.stripHtmlFromText(text,true);
assertEquals("Column1 Row1", result);

text = "<table><tr><th>Column1</th></tr><tr><td>Row1</td></tr></table>";
result = formattedText.stripHtmlFromText(text,false);
assertEquals("Column1Row1", result);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,10 @@ public static String sanitizeHrefURL(String urlToSanitize) {
}

/**
* @see org.sakaiproject.util.api.FormattedText#stripHtmlFromText(String)
* @see org.sakaiproject.util.api.FormattedText#stripHtmlFromText(String,boolean)
*/
public String stripHtmlFromText(String text) {
return getFormattedText().stripHtmlFromText(text);
public String stripHtmlFromText(String text, boolean smartSpacing) {
return getFormattedText().stripHtmlFromText(text, smartSpacing);
}

}

0 comments on commit de33331

Please sign in to comment.