Skip to content

Commit

Permalink
Add and use CSVParser.Builder and builder()
Browse files Browse the repository at this point in the history
  • Loading branch information
garydgregory committed Nov 1, 2024
1 parent 9dcc633 commit e991e6d
Show file tree
Hide file tree
Showing 12 changed files with 195 additions and 47 deletions.
1 change: 1 addition & 0 deletions src/changes/changes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
<!-- FIX -->
<!-- ADD -->
<action type="add" issue="CSV-313" dev="ggregory" due-to="Gary Gregory">Add CSVPrinter.getRecordCount().</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add and use CSVParser.Builder and builder().</action>
<!-- UPDATE -->
<action type="update" dev="ggregory" due-to="Gary Gregory, Dependabot">Bump org.apache.commons:commons-parent from 76 to 78 #486, #495.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory, Dependabot">Bump org.codehaus.mojo:taglist-maven-plugin from 3.1.0 to 3.2.1 #493.</action>
Expand Down
9 changes: 9 additions & 0 deletions src/main/java/org/apache/commons/csv/CSVFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -1370,6 +1370,15 @@ private static boolean containsLineBreak(final String source) {
return contains(source, Constants.CR) || contains(source, Constants.LF);
}

/**
* Creates a null-safe copy of the given instance.
*
* @return a copy of the given instance or null if the input is null.
*/
static CSVFormat copy(final CSVFormat format) {
return format != null ? format.copy() : null;
}

static boolean isBlank(final String value) {
return value == null || value.trim().isEmpty();
}
Expand Down
73 changes: 71 additions & 2 deletions src/main/java/org/apache/commons/csv/CSVParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

import org.apache.commons.io.build.AbstractStreamBuilder;
import org.apache.commons.io.function.Uncheck;

/**
Expand Down Expand Up @@ -142,6 +143,65 @@
*/
public final class CSVParser implements Iterable<CSVRecord>, Closeable {

/**
* Builds a new {@link CSVParser}.
*
* @since 1.13.0
*/
public static class Builder extends AbstractStreamBuilder<CSVParser, Builder> {

private CSVFormat format;
private long characterOffset;
private long recordNumber;

/**
* Constructs a new instance.
*/
protected Builder() {
// empty
}

@SuppressWarnings("resource")
@Override
public CSVParser get() throws IOException {
return new CSVParser(getReader(), format != null ? format : CSVFormat.DEFAULT, characterOffset, recordNumber);
}

/**
* Sets the lexer offset when the parser does not start parsing at the beginning of the source.
*
* @param characterOffset the lexer offset.
* @return this instance.
*/
public Builder setCharacterOffset(final long characterOffset) {
this.characterOffset = characterOffset;
return asThis();
}

/**
* Sets the CSV format. A copy of the given format is kept.
*
* @param format the CSV format, null is equivalent to {@link CSVFormat#DEFAULT}.
* @return this instance.
*/
public Builder setFormat(final CSVFormat format) {
this.format = CSVFormat.copy(format);
return asThis();
}

/**
* Sets the next record number to assign.
*
* @param recordNumber the next record number to assign.
* @return this instance.
*/
public Builder setRecordNumber(final long recordNumber) {
this.recordNumber = recordNumber;
return asThis();
}

}

final class CSVRecordIterator implements Iterator<CSVRecord> {
private CSVRecord current;

Expand Down Expand Up @@ -190,7 +250,6 @@ public void remove() {
throw new UnsupportedOperationException();
}
}

/**
* Header information based on name and position.
*/
Expand All @@ -212,6 +271,16 @@ private static final class Headers {
}
}

/**
* Creates a new builder.
*
* @return a new builder.
* @since 1.13.0
*/
public static Builder builder() {
return new Builder();
}

/**
* Creates a parser for the given {@link File}.
*
Expand Down Expand Up @@ -427,7 +496,7 @@ public CSVParser(final Reader reader, final CSVFormat format) throws IOException
* @param characterOffset
* Lexer offset when the parser does not start parsing at the beginning of the source.
* @param recordNumber
* The next record number to assign
* The next record number to assign.
* @throws IllegalArgumentException
* If the parameters of the format are inconsistent or if either the reader or format is null.
* @throws IOException
Expand Down
8 changes: 4 additions & 4 deletions src/test/java/org/apache/commons/csv/CSVFileParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ private String readTestData(final BufferedReader reader) throws IOException {
@ParameterizedTest
@MethodSource("generateData")
public void testCSVFile(final File testFile) throws Exception {
try (FileReader fr = new FileReader(testFile); BufferedReader testData = new BufferedReader(fr)) {
String line = readTestData(testData);
try (FileReader fr = new FileReader(testFile); BufferedReader testDataReader = new BufferedReader(fr)) {
String line = readTestData(testDataReader);
assertNotNull("file must contain config line", line);
final String[] split = line.split(" ");
assertTrue(split.length >= 1, testFile.getName() + " require 1 param");
Expand All @@ -81,7 +81,7 @@ public void testCSVFile(final File testFile) throws Exception {
fail(testFile.getName() + " unexpected option: " + option);
}
}
line = readTestData(testData); // get string version of format
line = readTestData(testDataReader); // get string version of format
assertEquals(line, format.toString(), testFile.getName() + " Expected format ");

// Now parse the file and compare against the expected results
Expand All @@ -94,7 +94,7 @@ public void testCSVFile(final File testFile) throws Exception {
parsed += "#" + comment.replace("\n", "\\n");
}
final int count = record.size();
assertEquals(readTestData(testData), count + ":" + parsed, testFile.getName());
assertEquals(readTestData(testDataReader), count + ":" + parsed, testFile.getName());
}
}
}
Expand Down
111 changes: 89 additions & 22 deletions src/test/java/org/apache/commons/csv/CSVParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,15 @@
*/
public class CSVParserTest {

private static final CSVFormat EXCEL_WITH_HEADER = CSVFormat.EXCEL.withHeader();

private static final Charset UTF_8 = StandardCharsets.UTF_8;

private static final String UTF_8_NAME = UTF_8.name();

private static final String CSV_INPUT = "a,b,c,d\n" + " a , b , 1 2 \n" + "\"foo baar\", b,\n" +
// + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n";
" \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping
// + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n";
" \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping

private static final String CSV_INPUT_1 = "a,b,c,d";

Expand Down Expand Up @@ -220,48 +222,54 @@ public void testBackslashEscapingOld() throws IOException {
@Disabled("CSV-107")
public void testBOM() throws IOException {
final URL url = ClassLoader.getSystemClassLoader().getResource("org/apache/commons/csv/CSVFileParser/bom.csv");
try (final CSVParser parser = CSVParser.parse(url, StandardCharsets.UTF_8, CSVFormat.EXCEL.withHeader())) {
try (final CSVParser parser = CSVParser.parse(url, StandardCharsets.UTF_8, EXCEL_WITH_HEADER)) {
parser.forEach(record -> assertNotNull(record.get("Date")));
}
}

@Test
public void testBOMInputStreamParserWithInputStream() throws IOException {
try (final BOMInputStream inputStream = createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv");
final CSVParser parser = CSVParser.parse(inputStream, UTF_8, CSVFormat.EXCEL.withHeader())) {
final CSVParser parser = CSVParser.parse(inputStream, UTF_8, EXCEL_WITH_HEADER)) {
parser.forEach(record -> assertNotNull(record.get("Date")));
}
}

@Test
public void testBOMInputStreamParserWithReader() throws IOException {
try (final Reader reader = new InputStreamReader(createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME);
final CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader())) {
final CSVParser parser = CSVParser.builder()
.setReader(reader)
.setFormat(EXCEL_WITH_HEADER)
.get()) {
parser.forEach(record -> assertNotNull(record.get("Date")));
}
}

@Test
public void testBOMInputStreamParseWithReader() throws IOException {
try (final Reader reader = new InputStreamReader(createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME);
final CSVParser parser = CSVParser.parse(reader, CSVFormat.EXCEL.withHeader())) {
final CSVParser parser = CSVParser.builder()
.setReader(reader)
.setFormat(EXCEL_WITH_HEADER)
.get()) {
parser.forEach(record -> assertNotNull(record.get("Date")));
}
}

@Test
public void testCarriageReturnEndings() throws IOException {
final String code = "foo\rbaar,\rhello,world\r,kanu";
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
final String string = "foo\rbaar,\rhello,world\r,kanu";
try (final CSVParser parser = CSVParser.builder().setCharSequence(string).get()) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(4, records.size());
}
}

@Test
public void testCarriageReturnLineFeedEndings() throws IOException {
final String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
final String string = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
try (final CSVParser parser = CSVParser.builder().setCharSequence(string).get()) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(4, records.size());
}
Expand Down Expand Up @@ -569,7 +577,7 @@ public void testExcelFormat2() throws Exception {
@Test
public void testExcelHeaderCountLessThanData() throws Exception {
final String code = "A,B,C,,\r\na,b,c,d,e\r\n";
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL.withHeader())) {
try (final CSVParser parser = CSVParser.parse(code, EXCEL_WITH_HEADER)) {
parser.getRecords().forEach(record -> {
assertEquals("a", record.get("A"));
assertEquals("b", record.get("B"));
Expand Down Expand Up @@ -783,7 +791,10 @@ public void testGetOneLine() throws IOException {
public void testGetOneLineOneParser() throws IOException {
final CSVFormat format = CSVFormat.DEFAULT;
try (final PipedWriter writer = new PipedWriter();
final CSVParser parser = new CSVParser(new PipedReader(writer), format)) {
final CSVParser parser = CSVParser.builder()
.setReader(new PipedReader(writer))
.setFormat(format)
.get()) {
writer.append(CSV_INPUT_1);
writer.append(format.getRecordSeparator());
final CSVRecord record1 = parser.nextRecord();
Expand Down Expand Up @@ -1232,35 +1243,68 @@ public void testNotValueCSV() throws IOException {
public void testParse() throws Exception {
final ClassLoader loader = ClassLoader.getSystemClassLoader();
final URL url = loader.getResource("org/apache/commons/csv/CSVFileParser/test.csv");
final CSVFormat format = CSVFormat.DEFAULT.withHeader("A", "B", "C", "D");
final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader("A", "B", "C", "D").build();
final Charset charset = StandardCharsets.UTF_8;

try (@SuppressWarnings("resource") // CSVParser closes the input resource
final CSVParser parser = CSVParser.parse(new InputStreamReader(url.openStream(), charset), format)) {
// Reader
try (final CSVParser parser = CSVParser.parse(new InputStreamReader(url.openStream(), charset), format)) {
parseFully(parser);
}
try (final CSVParser parser = CSVParser.parse(new String(Files.readAllBytes(Paths.get(url.toURI())), charset), format)) {
try (final CSVParser parser = CSVParser.builder().setReader(new InputStreamReader(url.openStream(), charset)).setFormat(format).get()) {
parseFully(parser);
}
try (final CSVParser parser = CSVParser.parse(new File(url.toURI()), charset, format)) {
// String
final Path path = Paths.get(url.toURI());
final String string = new String(Files.readAllBytes(path), charset);
try (final CSVParser parser = CSVParser.parse(string, format)) {
parseFully(parser);
}
try (@SuppressWarnings("resource") // CSVParser closes the input resource
final CSVParser parser = CSVParser.parse(url.openStream(), charset, format)) {
try (final CSVParser parser = CSVParser.builder().setCharSequence(string).setFormat(format).get()) {
parseFully(parser);
}
try (final CSVParser parser = CSVParser.parse(Paths.get(url.toURI()), charset, format)) {
// File
final File file = new File(url.toURI());
try (final CSVParser parser = CSVParser.parse(file, charset, format)) {
parseFully(parser);
}
try (final CSVParser parser = CSVParser.builder().setFile(file).setCharset(charset).setFormat(format).get()) {
parseFully(parser);
}
// InputStream
try (final CSVParser parser = CSVParser.parse(url.openStream(), charset, format)) {
parseFully(parser);
}
try (final CSVParser parser = CSVParser.builder().setInputStream(url.openStream()).setCharset(charset).setFormat(format).get()) {
parseFully(parser);
}
// Path
try (final CSVParser parser = CSVParser.parse(path, charset, format)) {
parseFully(parser);
}
try (final CSVParser parser = CSVParser.builder().setPath(path).setCharset(charset).setFormat(format).get()) {
parseFully(parser);
}
// URL
try (final CSVParser parser = CSVParser.parse(url, charset, format)) {
parseFully(parser);
}
try (final CSVParser parser = CSVParser.builder().setURI(url.toURI()).setCharset(charset).setFormat(format).get()) {
parseFully(parser);
}
// InputStreamReader
try (final CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format)) {
parseFully(parser);
}
try (final CSVParser parser = CSVParser.builder().setReader(new InputStreamReader(url.openStream(), charset)).setFormat(format).get()) {
parseFully(parser);
}
// InputStreamReader with longs
try (final CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format, /* characterOffset= */0, /* recordNumber= */1)) {
parseFully(parser);
}
try (final CSVParser parser = CSVParser.builder().setReader(new InputStreamReader(url.openStream(), charset)).setFormat(format).setCharacterOffset(0)
.setRecordNumber(0).get()) {
parseFully(parser);
}
}

@Test
Expand Down Expand Up @@ -1380,7 +1424,10 @@ public void testParsingPrintedEmptyFirstColumn(final CSVFormat.Predefined format
try (CSVPrinter printer = new CSVPrinter(buf, format.getFormat())) {
printer.printRecords(Stream.of(lines));
}
try (CSVParser csvRecords = new CSVParser(new StringReader(buf.toString()), format.getFormat())) {
try (CSVParser csvRecords = CSVParser.builder()
.setReader(new StringReader(buf.toString()))
.setFormat(format.getFormat())
.get()) {
for (final String[] line : lines) {
assertArrayEquals(line, csvRecords.nextRecord().values());
}
Expand Down Expand Up @@ -1654,6 +1701,26 @@ private void validateRecordPosition(final String lineSeparator) throws IOExcepti
assertEquals(code.indexOf("EOF"), record.getCharacterPosition());
}
// now try to read starting at record 3
try (CSVParser parser = CSVParser.builder()
.setReader(new StringReader(code.substring((int) positionRecord3)))
.setFormat(format)
.setCharacterOffset(positionRecord3)
.setRecordNumber(3)
.get()) {
CSVRecord record;
// nextRecord
assertNotNull(record = parser.nextRecord());
assertEquals(3, record.getRecordNumber());
assertEquals(code.indexOf("'A"), record.getCharacterPosition());
assertEquals("A" + lineSeparator + "A", record.get(0));
assertEquals("B" + lineSeparator + "B", record.get(1));
assertEquals("CC", record.get(2));
// nextRecord
assertNotNull(record = parser.nextRecord());
assertEquals(4, record.getRecordNumber());
assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition());
assertEquals("\u00c4", record.get(0));
} // again with ctor
try (CSVParser parser = new CSVParser(new StringReader(code.substring((int) positionRecord3)), format, positionRecord3, 3)) {
CSVRecord record;
// nextRecord
Expand Down
2 changes: 1 addition & 1 deletion src/test/java/org/apache/commons/csv/PerformanceTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ private static void testExtendedBuffer(final boolean makeString) throws Exceptio
}

private static void testParseCommonsCSV() throws Exception {
testParser("CSV", () -> new CSVParser(createReader(), format));
testParser("CSV", () -> CSVParser.builder().setReader(createReader()).setFormat(format).get());
}

private static void testParsePath() throws Exception {
Expand Down
Loading

0 comments on commit e991e6d

Please sign in to comment.