Skip to content

Commit

Permalink
Improve error handling during xml validation
Browse files Browse the repository at this point in the history
The form editor does not provide useful info when the type definition declares an invalid regex or if the regex matching results in error. This commit catches a PatternSyntaxException or a StackOverflowError thrown during validation, logs it in the server logs, and informs the user about this issue.
  • Loading branch information
gallardo committed Nov 25, 2022
1 parent 811bd0d commit 16ba880
Show file tree
Hide file tree
Showing 12 changed files with 320 additions and 10 deletions.
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
GUI_EDITOR_XMLCONTENT_CANNOT_VALIDATE_ERROR_3 =Fehler bei der Validierung des Inhaltes. Ticket {0} - Regel "{1}" kann nicht verwendet werden, um folgenden Eingabewert zu validieren "{2}"
GUI_EDITOR_XMLCONTENT_INVALID_RULE_3 =Fehler: Ungültige Regel. Ticket {0} - Regel "{1}". {2}
GUI_EDITOR_XMLCONTENT_VALIDATION_WARNING_2 =Unpassender Eingabewert "{0}" nach der Regel {1}
GUI_EDITOR_XMLCONTENT_VALIDATION_ERROR_TITLE_0 =Die folgenden Fehler sind bei der Validierung des Formulars aufgetreten:
GUI_EDITOR_XMLCONTENT_VALIDATION_ERROR_2 =Fehlerhafter Eingabewert "{0}" nach der Regel {1}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ LOG_XMLCONTENT_VALIDATION_ERR_2 =Error de validaci
LOG_XMLCONTENT_VALIDATION_WARN_2 =Aviso de validación {0} : {1}
LOG_XMLCONTENT_VISIT_1 =Visitando {0}

GUI_EDITOR_XMLCONTENT_CANNOT_VALIDATE_ERROR_3 =No se puede validar el contenido porque se ha producido un error interno. Ticket {0} - La regla "{1}" no puede usarse para validar el contenido "{2}"
GUI_EDITOR_XMLCONTENT_INVALID_RULE_3 =Error: regla no válida. Ticket {0} - Regla "{1}". {2}
GUI_EDITOR_XMLCONTENT_VALIDATION_ERROR_TITLE_0=Los siguientes errores sucedieron en la validación del formulario:
GUI_EDITOR_XMLCONTENT_VALIDATION_ERROR_2 =Valor "{0}" no válido según la regla {1}
GUI_EDITOR_XMLCONTENT_VALIDATION_WARNING_2 =Valor "{0}" inapropiado según la regla {1}
Expand Down
2 changes: 1 addition & 1 deletion src/org/opencms/ade/contenteditor/CmsContentService.java
Original file line number Diff line number Diff line change
Expand Up @@ -1043,7 +1043,7 @@ public CmsValidationResult validateEntity(CmsEntity changedEntity) throws CmsRpc
}
}
return result;
} catch (Exception e) {
} catch (Throwable e) {
error(e);
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/org/opencms/jsp/util/CmsJspStandardContextBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -1111,7 +1111,7 @@ public String getBinaryUploadFolder(CmsJspContentAccessBean content) {
/**
* Returns the container the currently rendered element is part of.<p>
*
* @return the currently the currently rendered element is part of
* @return the container the currently rendered element is part of
*/
public CmsContainerBean getContainer() {

Expand Down
52 changes: 45 additions & 7 deletions src/org/opencms/xml/content/CmsDefaultXmlContentHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import javax.servlet.ServletRequest;

Expand Down Expand Up @@ -4255,23 +4256,60 @@ protected CmsXmlContentErrorHandler validateValue(
return validateValue(cms, value, valueStr, errorHandler, isWarning);
}

boolean matchResult = true;
boolean matchSign = true;
if (regex.charAt(0) == '!') {
// negate the pattern
matchResult = false;
matchSign = false;
regex = regex.substring(1);
}

String matchValue = valueStr;
if (matchValue == null) {
String stringToBeMatched = valueStr;
if (stringToBeMatched == null) {
// set match value to empty String to avoid exceptions in pattern matcher
matchValue = "";
stringToBeMatched = "";
}

// use the custom validation pattern
if (matchResult != Pattern.matches(regex, matchValue)) {
final boolean matches;
try {
matches = Pattern.matches(regex, stringToBeMatched);
} catch (PatternSyntaxException | StackOverflowError e) {
final String localizedMessage = (e.getLocalizedMessage() != null ? e.getLocalizedMessage() : "");
final String ticket = String.valueOf(System.currentTimeMillis());

Throwable trace = e;
if (e instanceof StackOverflowError) {
final String stackOverflowInfoMessage = "StackOverflowError thrown on pattern matching during xml" +
" content validation. (Cause will be also logged in DEBUG level.)\n" +
"Note 1.- Possible cause: The Java regex engine uses recursive method calls to implement" +
" backtracking. When a repetition inside a regular expression contains multiple paths" +
" (i.e. the body of the repetition contains an alternation (|), an optional element or another" +
" repetition), trying to match the regular expression can cause a stack overflow on large inputs." +
" This does not happen when using a possessive quantifier (such as *+ instead of *) or when using" +
" a character class inside a repetition (e.g. [ab]* instead of (a|b)*).\n" +
"Note 2.- On StackOverflowError, the size of the stacktraces could be limited by the JVM " +
" and we could be missing information to identify the origin of the problem. To help in this" +
" case, we create a new exception close to this origin. Alternatively, you can increase" +
" the depth of the stack trace (for instance, '-XX:MaxJavaStackTraceDepth=1000000') to" +
" identify it";
trace = LOG.isDebugEnabled() ? new Exception(stackOverflowInfoMessage, e) : new Exception(stackOverflowInfoMessage);
errorHandler.addError(value, Messages.get().getBundle(
value.getLocale()).key(Messages.GUI_EDITOR_XMLCONTENT_CANNOT_VALIDATE_ERROR_3, ticket, regex, stringToBeMatched));
} else {
errorHandler.addError(value, Messages.get().getBundle(
value.getLocale()).key(Messages.GUI_EDITOR_XMLCONTENT_INVALID_RULE_3, ticket, regex, localizedMessage));
}

LOG.warn("Ticket " + ticket + " - " + localizedMessage + "\n"
+ " Regex='" + (matchSign ? "" : "!") + regex + "'\n"
+ " Path='" + value.getPath() + "'\n"
+ " Input='" + stringToBeMatched + "'", trace);

return errorHandler;
}
if (matchSign != matches) {
// generate the message
String message = getValidationMessage(cms, value, regex, valueStr, matchResult, isWarning);
String message = getValidationMessage(cms, value, regex, valueStr, matchSign, isWarning);
if (isWarning) {
errorHandler.addWarning(value, message);
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/org/opencms/xml/content/CmsXmlContent.java
Original file line number Diff line number Diff line change
Expand Up @@ -832,7 +832,7 @@ public CmsXmlContentErrorHandler validate(CmsObject cms) {
* Visits all values of this XML content with the given value visitor.<p>
*
* Please note that the order in which the values are visited may NOT be the
* order they appear in the XML document. It is ensured that the the parent
* order they appear in the XML document. It is ensured that the parent
* of a nested value is visited before the element it contains.<p>
*
* @param visitor the value visitor implementation to visit the values with
Expand Down
4 changes: 4 additions & 0 deletions src/org/opencms/xml/content/Messages.java
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,10 @@ public final class Messages extends A_CmsMessageBundle {
/** Message constant for key in the resource bundle. */
public static final String GUI_CATEGORY_CHECK_NOLEAF_ERROR_0 = "GUI_CATEGORY_CHECK_NOLEAF_ERROR_0";

public static final String GUI_EDITOR_XMLCONTENT_CANNOT_VALIDATE_ERROR_3 = "GUI_EDITOR_XMLCONTENT_CANNOT_VALIDATE_ERROR_3";

public static final String GUI_EDITOR_XMLCONTENT_INVALID_RULE_3 = "GUI_EDITOR_XMLCONTENT_INVALID_RULE_3";

/** Message constant for key in the resource bundle. */
public static final String GUI_EDITOR_XMLCONTENT_VALIDATION_ERROR_2 = "GUI_EDITOR_XMLCONTENT_VALIDATION_ERROR_2";

Expand Down
2 changes: 2 additions & 0 deletions src/org/opencms/xml/content/messages.properties
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ LOG_XMLCONTENT_VISIT_1 =Visiting {0}
GUI_XMLCONTENT_CHECK_WARNING_NOT_RELEASED_0 =Resource has not yet been released.
GUI_XMLCONTENT_CHECK_WARNING_EXPIRED_0 =Resource has already expired.
GUI_XMLCONTENT_CHECK_ERROR_0 =Invalid reference, the resource can not be found.
GUI_EDITOR_XMLCONTENT_CANNOT_VALIDATE_ERROR_3 =The content cannot be validated due to an internal error. Ticket {0} - Rule "{1}" cannot be used to validate content "{2}"
GUI_EDITOR_XMLCONTENT_INVALID_RULE_3 =Error: Invalid rule. Ticket {0} - Rule "{1}". {2}
GUI_EDITOR_XMLCONTENT_VALIDATION_ERROR_TITLE_0=The following errors occurred when validating the form:
GUI_EDITOR_XMLCONTENT_VALIDATION_ERROR_2 =Invalid value "{0}" according to rule {1}
GUI_EDITOR_XMLCONTENT_VALIDATION_WARNING_2 =Bad value "{0}" according to rule {1}
Expand Down
153 changes: 153 additions & 0 deletions test/org/opencms/xml/content/TestCmsXmlContentValidation.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/*
* This library is part of OpenCms -
* the Open Source Content Management System
*
* Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* For further information about Alkacon Software GmbH & Co. KG, please see the
* company website: http://www.alkacon.com
*
* For further information about OpenCms, please see the
* project website: http://www.opencms.org
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/

package org.opencms.xml.content;

import junit.extensions.TestSetup;
import junit.framework.Test;
import junit.framework.TestSuite;

import org.opencms.file.CmsObject;
import org.opencms.i18n.CmsEncoder;
import org.opencms.main.CmsEvent;
import org.opencms.main.I_CmsEventListener;
import org.opencms.main.OpenCms;
import org.opencms.test.OpenCmsTestCase;
import org.opencms.test.OpenCmsTestProperties;
import org.opencms.util.CmsFileUtil;
import org.opencms.xml.CmsXmlContentDefinition;
import org.opencms.xml.CmsXmlEntityResolver;

import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Locale;

/**
* Tests the OpenCms XML content validation with regex rules<p>
*/
public class TestCmsXmlContentValidation extends OpenCmsTestCase {
final String SCHEMA_SYSTEM_ID = "dummy://xmlcontent-definition-testregex.xsd";

/**
* Default JUnit constructor.<p>
*
* @param arg0 JUnit parameters
*/
public TestCmsXmlContentValidation(String arg0) {

super(arg0);
}

/**
* Test suite for this test class.<p>
*
* @return the test suite
*/
public static Test suite() {

OpenCmsTestProperties.initialize(org.opencms.test.AllTests.TEST_PROPERTIES_PATH);

TestSuite suite = new TestSuite();
suite.setName(TestCmsXmlContentValidation.class.getName());

suite.addTest(new TestCmsXmlContentValidation("testHandlingOfPatternSyntaxExceptionDuringValidation"));
suite.addTest(new TestCmsXmlContentValidation("testHandlingOfStackOverflowErrorDuringValidation"));

TestSetup wrapper = new TestSetup(suite) {

@Override
protected void setUp() {

setupOpenCms("simpletest", "/");
}

@Override
protected void tearDown() {

removeOpenCms();
}
};

return wrapper;
}

public void testHandlingOfStackOverflowErrorDuringValidation() throws Exception {
final CmsXmlContentErrorHandler validationResult = validateXmlFile("org/opencms/xml/content/xmlcontent-definition-evilregex.xsd");

assertEquals("Number of registered errors", 1, validationResult.getErrors().size());
final String recordedError = validationResult.getErrors(Locale.ENGLISH).get("String[1]");
final String expectedMessage = Messages.get().getBundle(Locale.ENGLISH).key(Messages.GUI_EDITOR_XMLCONTENT_CANNOT_VALIDATE_ERROR_3).split("\\{")[0];
assertTrue("Expected error during validation not registered in the error handler. Recorded error: '" + recordedError + "'. Expected message: '" + expectedMessage + "'",
recordedError.contains(expectedMessage));
}

public void testHandlingOfPatternSyntaxExceptionDuringValidation() throws Exception {
final CmsXmlContentErrorHandler validationResult = validateXmlFile("org/opencms/xml/content/xmlcontent-definition-malformedregex.xsd");

assertEquals("Number of registered errors", 1, validationResult.getErrors().size());
final String recordedError = validationResult.getErrors(Locale.ENGLISH).get("String[1]");
final String expectedMessage = Messages.get().getBundle(Locale.ENGLISH).key(Messages.GUI_EDITOR_XMLCONTENT_INVALID_RULE_3).split("\\{")[0];
assertTrue("Expected error during validation not registered in the error handler. Recorded error: '" + recordedError + "'. Expected message: '" + expectedMessage + "'",
recordedError.contains(expectedMessage));
}

private CmsXmlContentErrorHandler validateXmlFile(String SCHEMA_FILENAME) throws Exception {
echo("Testing the handling of errors during validation using schema " + SCHEMA_FILENAME);
CmsObject cms = getCmsObject();
CmsXmlEntityResolver resolver = new CmsXmlEntityResolver(cms);

cacheSchema(resolver, SCHEMA_SYSTEM_ID, SCHEMA_FILENAME);

// now read the XML content
String content = CmsFileUtil.readFile("org/opencms/xml/content/xmlcontent-1-mod7.xml", CmsEncoder.ENCODING_UTF_8);
CmsXmlContent xmlcontent = CmsXmlContentFactory.unmarshal(content, CmsEncoder.ENCODING_UTF_8, resolver);

// validate the XML structure
final CmsXmlContentErrorHandler validationResult = xmlcontent.validate(getCmsObject());
assertFalse("Warning were recorded but not expected. " + validationResult.getWarnings(Locale.ENGLISH), validationResult.hasWarnings());
return validationResult;
}

/**
* Updates the OpenCms XML entity resolver cache with a changed XML schema id.<p>
*
* @param resolver the OpenCms XML entity resolver to use
* @param id the XML schema id to update in the resolver
* @param filename the name of the file in the RFS where to read the new schema content from
* @throws Exception if something goes wrong
*/
private void cacheSchema(CmsXmlEntityResolver resolver, String id, String filename) throws Exception {

// fire "clear cache" event to clear up previously cached schemas
OpenCms.fireCmsEvent(new CmsEvent(I_CmsEventListener.EVENT_CLEAR_CACHES, new HashMap<String, Object>()));
// read the XML from the given file and store it in the resolver
String content = CmsFileUtil.readFile(filename, CmsEncoder.ENCODING_UTF_8);
CmsXmlContentDefinition definition = CmsXmlContentDefinition.unmarshal(content, id, resolver);
System.out.println(definition.getSchema().asXML());
CmsXmlEntityResolver.cacheSystemId(id, definition.getSchema().asXML().getBytes(StandardCharsets.UTF_8));
}
}
33 changes: 33 additions & 0 deletions test/org/opencms/xml/content/xmlcontent-1-mod7.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?xml version="1.0" encoding="UTF-8"?>

<Multitests
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="dummy://xmlcontent-definition-testregex.xsd">

<Multitest language="en">
<String>1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
4567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890</String>
</Multitest>
</Multitests>
39 changes: 39 additions & 0 deletions test/org/opencms/xml/content/xmlcontent-definition-evilregex.xsd
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<?xml version="1.0" encoding="UTF-8"?>

<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified">

<xsd:include schemaLocation="opencms://opencms-xmlcontent.xsd"/>
<xsd:element name="Multitests" type="OpenCmsMultitests"/>

<xsd:complexType name="OpenCmsMultitests">
<xsd:sequence>
<xsd:element name="Multitest" type="OpenCmsMultitest" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence>
</xsd:complexType>

<xsd:complexType name="OpenCmsMultitest">
<xsd:sequence>
<xsd:element name="String" type="OpenCmsString" minOccurs="0" maxOccurs="unbounded" />
</xsd:sequence>
<xsd:attribute name="language" type="OpenCmsLocale" use="required"/>
</xsd:complexType>

<xsd:annotation>
<xsd:appinfo>
<FieldSettings>
<Setting>
<PropertyName>String</PropertyName>
<!--
This kind of regex is compiled into a recursive call, which results in a StackOverflow error
when used on a very large string. Source: https://stackoverflow.com/a/7510006
Reported ticket in the Java Bug Database Report: https://bugs.java.com/bugdatabase/view_bug.do?bug_id=6337993
-->
<RuleRegex>(.|\R)*</RuleRegex>
<RuleType>error</RuleType>
<Error>error</Error>
<Search>listtitle</Search>
</Setting>
</FieldSettings>
</xsd:appinfo>
</xsd:annotation>
</xsd:schema>
Loading

0 comments on commit 16ba880

Please sign in to comment.