Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions src/main/java/org/apache/commons/csv/CSVFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -2097,6 +2097,33 @@ private boolean isDelimiter(final char ch0, final CharSequence charSeq, final in
return true;
}

/**
* Tests whether appending the delimiter after {@code charSeq} would let the parser match the delimiter starting inside the value. This happens with a
* multi-character delimiter when the value ends with a straddling prefix of it (for delimiter {@code ||}, a value ending in {@code |} followed by the
* delimiter yields {@code |||}, which the greedy lexer splits one character early). Such a value must be encapsulated so the field boundary is unambiguous.
*/
private boolean endsWithDelimiterPrefix(final CharSequence charSeq, final char[] delimiter, final int delimiterLength) {
if (delimiterLength < 2) {
return false;
}
final int len = charSeq.length();
for (int start = Math.max(0, len - delimiterLength + 1); start < len; start++) {
boolean match = true;
for (int j = 0; j < delimiterLength; j++) {
final int idx = start + j;
final char c = idx < len ? charSeq.charAt(idx) : delimiter[idx - len];
if (c != delimiter[j]) {
match = false;
break;
}
}
if (match) {
return true;
}
}
return false;
}

/**
* Tests whether escapes are being processed.
*
Expand Down Expand Up @@ -2510,6 +2537,9 @@ private void printWithQuotes(final Object object, final CharSequence charSeq, fi
// encapsulate if we end in anything less than ' '
if (isTrimChar(c)) {
quote = true;
} else if (endsWithDelimiterPrefix(charSeq, delim, delimLength)) {
// A trailing partial multi-character delimiter would merge with the following delimiter on read.
quote = true;
}
}
}
Expand Down
22 changes: 22 additions & 0 deletions src/test/java/org/apache/commons/csv/CSVPrinterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1908,6 +1908,28 @@ void testQuoteCommentMarkerFirstChar() throws IOException {
}
}

@Test
void testQuoteValueEndingWithMultiCharacterDelimiterPrefix() throws IOException {
final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("||").get();
final StringWriter sw = new StringWriter();
try (CSVPrinter printer = new CSVPrinter(sw, format)) {
// "a|" ends with the delimiter's first char; unquoted output "a|||b" would split one char early on read.
printer.printRecord("a|", "b");
// "a|b" does not end with a delimiter prefix, so it stays unquoted.
printer.printRecord("a|b", "c");
}
final String string = sw.toString();
assertEquals("\"a|\"||b" + RECORD_SEPARATOR + "a|b||c" + RECORD_SEPARATOR, string);
try (CSVParser parser = CSVParser.parse(string, format)) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(2, records.size());
assertEquals("a|", records.get(0).get(0));
assertEquals("b", records.get(0).get(1));
assertEquals("a|b", records.get(1).get(0));
assertEquals("c", records.get(1).get(1));
}
}

@Test
void testQuoteNonNumeric() throws IOException {
final StringWriter sw = new StringWriter();
Expand Down
Loading