apache · ranjithrp · Jun 21, 2019 · Jun 22, 2019
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -266,7 +266,7 @@ public CSVFormat getFormat() {
      * @see Predefined#Default
      */
     public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF,
-            null, null, null, false, false, false, false, false, false, true);
+            null, null, null, false, false, false, false, false, false, true, false);
 
     /**
      * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
@@ -673,7 +673,7 @@ private static boolean isLineBreak(final Character c) {
      */
     public static CSVFormat newFormat(final char delimiter) {
         return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false,
-                false, false, false, false, true);
+                false, false, false, false, true, false);
     }
 
     /**
@@ -709,6 +709,8 @@ public static CSVFormat valueOf(final String format) {
     private final boolean ignoreHeaderCase; // should ignore header names case
 
     private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
+
+    private final boolean ignoreQuotesInToken; //should ignore quotes in the token
 
     private final String nullString; // the string to be used for null values
 
@@ -762,6 +764,8 @@ public static CSVFormat valueOf(final String format) {
      * @param trailingDelimiter
      *            TODO
      * @param autoFlush
+     * @param ignoreQuotesInToken
+     * 			the quotes within a string token will be ignored
      * @throws IllegalArgumentException
      *             if the delimiter is a line break character
      */
@@ -770,7 +774,8 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
             final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
             final Object[] headerComments, final String[] header, final boolean skipHeaderRecord,
             final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim,
-            final boolean trailingDelimiter, final boolean autoFlush, final boolean allowDuplicateHeaderNames) {
+            final boolean trailingDelimiter, final boolean autoFlush, final boolean allowDuplicateHeaderNames,
+            final boolean ignoreQuotesInToken) {
         this.delimiter = delimiter;
         this.quoteCharacter = quoteChar;
         this.quoteMode = quoteMode;
@@ -790,6 +795,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
         this.autoFlush = autoFlush;
         this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
         this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
+        this.ignoreQuotesInToken = ignoreQuotesInToken;
         validate();
     }
 
@@ -864,6 +870,9 @@ public boolean equals(final Object obj) {
         if (ignoreSurroundingSpaces != other.ignoreSurroundingSpaces) {
             return false;
         }
+        if (ignoreQuotesInToken != other.ignoreQuotesInToken) {
+            return false;
+        }
         if (ignoreEmptyLines != other.ignoreEmptyLines) {
             return false;
         }
@@ -1004,6 +1013,16 @@ public boolean getIgnoreHeaderCase() {
     public boolean getIgnoreSurroundingSpaces() {
         return ignoreSurroundingSpaces;
     }
+
+    /**
+     * Specifies whether quotes in token are ignored when parsing input.
+     *
+     * @return {@code true} to allow quotes anywhwere in the string, 
+     *            {@code false} to ensure quotes come in the beginning and end of string only.
+     */
+    public boolean getIgnoreQuotesInToken() {
+        return ignoreQuotesInToken;
+    }
 
     /**
      * Gets the String to convert to and from {@code null}.
@@ -1088,6 +1107,7 @@ public int hashCode() {
         result = prime * result + ((escapeCharacter == null) ? 0 : escapeCharacter.hashCode());
         result = prime * result + ((nullString == null) ? 0 : nullString.hashCode());
         result = prime * result + (ignoreSurroundingSpaces ? 1231 : 1237);
+        result = prime * result + (ignoreQuotesInToken ? 1231 : 1237);
         result = prime * result + (ignoreHeaderCase ? 1231 : 1237);
         result = prime * result + (ignoreEmptyLines ? 1231 : 1237);
         result = prime * result + (skipHeaderRecord ? 1231 : 1237);
@@ -1618,6 +1638,9 @@ public String toString() {
         if (getIgnoreSurroundingSpaces()) {
             sb.append(" SurroundingSpaces:ignored");
         }
+        if (getIgnoreQuotesInToken()) {
+            sb.append(" QuotesInToken:ignored");
+        }
         if (getIgnoreHeaderCase()) {
             sb.append(" IgnoreHeaderCase:ignored");
         }
@@ -1734,7 +1757,7 @@ public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeade
         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
-                allowDuplicateHeaderNames);
+                allowDuplicateHeaderNames, ignoreQuotesInToken);
     }
 
     /**
@@ -1760,7 +1783,7 @@ public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNam
         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
-                allowDuplicateHeaderNames);
+                allowDuplicateHeaderNames, ignoreQuotesInToken);
     }
 
     /**
@@ -1776,7 +1799,7 @@ public CSVFormat withAutoFlush(final boolean autoFlush) {
         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
-                allowDuplicateHeaderNames);
+                allowDuplicateHeaderNames, ignoreQuotesInToken);
     }
 
     /**
@@ -1812,7 +1835,7 @@ public CSVFormat withCommentMarker(final Character commentMarker) {
         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
-                allowDuplicateHeaderNames);
+                allowDuplicateHeaderNames, ignoreQuotesInToken);
     }
 
     /**
@@ -1831,7 +1854,7 @@ public CSVFormat withDelimiter(final char delimiter) {
         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
-                allowDuplicateHeaderNames);
+                allowDuplicateHeaderNames, ignoreQuotesInToken);
     }
 
     /**
@@ -1863,7 +1886,7 @@ public CSVFormat withEscape(final Character escape) {
         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces,
                 ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
                 allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
-                allowDuplicateHeaderNames);
+                allowDuplicateHeaderNames, ignoreQuotesInToken);
     }
 
     /**
@@ -2020,7 +2043,7 @@ public CSVFormat withHeader(final String... header) {
         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
-                allowDuplicateHeaderNames);
+                allowDuplicateHeaderNames, ignoreQuotesInToken);
     }
 
     /**
@@ -2042,7 +2065,7 @@ public CSVFormat withHeaderComments(final Object... headerComments) {
         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
-                allowDuplicateHeaderNames);
+                allowDuplicateHeaderNames, ignoreQuotesInToken);
     }
 
     /**
@@ -2068,7 +2091,7 @@ public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
-                allowDuplicateHeaderNames);
+                allowDuplicateHeaderNames, ignoreQuotesInToken);
     }
 
     /**
@@ -2095,7 +2118,7 @@ public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) {
         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
-                allowDuplicateHeaderNames);
+                allowDuplicateHeaderNames, ignoreQuotesInToken);
     }
 
     /**
@@ -2121,9 +2144,35 @@ public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpac
         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
-                allowDuplicateHeaderNames);
+                allowDuplicateHeaderNames, ignoreQuotesInToken);
+    }
+
+    /**
+     * Returns a new {@code CSVFormat} with the parser allowing quotes anywhere in the string {@code true}.
+     *
+     * @return A new CSVFormat that is equal to this but with quotes allowed anywhere in the string.
+     * @see #withIgnoreQuotesInToken(boolean)
+     * @since 1.9
+     */
+    public CSVFormat withIgnoreQuotesInToken() {
+        return this.withIgnoreQuotesInToken(true);
     }
 
+    /**
+     * Returns a new {@code CSVFormat} with the parser with quotes anywhere in the string set to the given value.
+     *
+     * @param ignoreQuotesInToken
+     *            parser with quotes anywhere in the string, {@code true} to allow quotes anywhwere in the string, 
+     *            {@code false} to ensure quotes come in the beginning and end of string only
+     * @return A new CSVFormat that is equal to this but with quotes allowed anywhere in the string.
+     */
+    public CSVFormat withIgnoreQuotesInToken(final boolean ignoreQuotesInToken) {
+        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
+                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
+                skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+                allowDuplicateHeaderNames, ignoreQuotesInToken);
+    }
+
     /**
      * Returns a new {@code CSVFormat} with conversions to and from null for strings on input and output.
      * <ul>
@@ -2141,7 +2190,7 @@ public CSVFormat withNullString(final String nullString) {
         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
-                allowDuplicateHeaderNames);
+                allowDuplicateHeaderNames, ignoreQuotesInToken);
     }
 
     /**
@@ -2173,7 +2222,7 @@ public CSVFormat withQuote(final Character quoteChar) {
         return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces,
                 ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
                 allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
-                allowDuplicateHeaderNames);
+                allowDuplicateHeaderNames, ignoreQuotesInToken);
     }
 
     /**
@@ -2188,7 +2237,7 @@ public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) {
         return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter,
                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
-                allowDuplicateHeaderNames);
+                allowDuplicateHeaderNames, ignoreQuotesInToken);
     }
 
     /**
@@ -2227,7 +2276,7 @@ public CSVFormat withRecordSeparator(final String recordSeparator) {
         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
-                allowDuplicateHeaderNames);
+                allowDuplicateHeaderNames, ignoreQuotesInToken);
     }
 
     /**
@@ -2255,7 +2304,7 @@ public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
-                allowDuplicateHeaderNames);
+                allowDuplicateHeaderNames, ignoreQuotesInToken);
     }
 
     /**
@@ -2297,7 +2346,7 @@ public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) {
         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
-                allowDuplicateHeaderNames);
+                allowDuplicateHeaderNames, ignoreQuotesInToken);
     }
 
     /**
@@ -2325,6 +2374,6 @@ public CSVFormat withTrim(final boolean trim) {
         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
-                allowDuplicateHeaderNames);
+                allowDuplicateHeaderNames,ignoreQuotesInToken);
     }
 }
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -55,6 +55,7 @@ final class Lexer implements Closeable {
 
     private final boolean ignoreSurroundingSpaces;
     private final boolean ignoreEmptyLines;
+    private final boolean ignoreQuotesInToken;
 
     /** The input stream */
     private final ExtendedBufferedReader reader;
@@ -72,6 +73,7 @@ String getFirstEol(){
         this.commentStart = mapNullToDisabled(format.getCommentMarker());
         this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
         this.ignoreEmptyLines = format.getIgnoreEmptyLines();
+        this.ignoreQuotesInToken = format.getIgnoreQuotesInToken();
     }
 
     /**
@@ -276,6 +278,8 @@ private Token parseEncapsulatedToken(final Token token) throws IOException {
                         } else if (readEndOfLine(c)) {
                             token.type = EORECORD;
                             return token;
+                        } else if(ignoreQuotesInToken) {
+                        	token.content.append((char)c);
                         } else if (!isWhitespace(c)) {
                             // error invalid char between token and next delimiter
                             throw new IOException("(line " + getCurrentLineNumber() +

diff --git a/src/test/java/org/apache/commons/csv/CSVFormatTest.java b/src/test/java/org/apache/commons/csv/CSVFormatTest.java
@@ -1051,6 +1051,12 @@ public void testWithIgnoreEmptyLines() throws Exception {
         assertFalse(CSVFormat.DEFAULT.withIgnoreEmptyLines(false).getIgnoreEmptyLines());
         assertTrue(CSVFormat.DEFAULT.withIgnoreEmptyLines().getIgnoreEmptyLines());
     }
+
+    @Test
+    public void testWithIgnoreQuotesInToken() throws Exception {
+        assertFalse(CSVFormat.DEFAULT.withIgnoreQuotesInToken(false).getIgnoreQuotesInToken());
+        assertTrue(CSVFormat.DEFAULT.withIgnoreQuotesInToken().getIgnoreQuotesInToken());
+    }
 
 
     @Test

diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java b/src/test/java/org/apache/commons/csv/LexerTest.java
@@ -31,12 +31,15 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.io.IOException;
 import java.io.StringReader;
 
+import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
+import org.junit.internal.runners.statements.Fail;
 
 /**
  *
@@ -68,6 +71,30 @@ public void testSurroundingSpacesAreDeleted() throws IOException {
             assertThat(parser.nextToken(new Token()), matches(EOF, ""));
         }
     }
+
+    @Test
+    public void testIgnoreQuotesInTokenTrue() throws IOException {
+        final String code = "abc,\"xyz\" 123 bar,3,11961034,\"First author,  Second Author\"";
+        try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreQuotesInToken())) {
+            assertThat(parser.nextToken(new Token()), matches(TOKEN, "abc"));
+            assertThat(parser.nextToken(new Token()), matches(TOKEN, "xyz 123 bar"));
+            assertThat(parser.nextToken(new Token()), matches(TOKEN, "3"));
+            assertThat(parser.nextToken(new Token()), matches(TOKEN, "11961034"));
+            assertThat(parser.nextToken(new Token()), matches(EOF, "First author,  Second Author"));
+        }
+    }
+
+    @Test
+    public void testIgnoreQuotesInTokenFalse() throws IOException {
+        final String code = "abc,\"xyz\" 123 bar,3,11961034,\"First author,  Second Author\"";
+        try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT)) {
+            assertThat(parser.nextToken(new Token()), matches(TOKEN, "abc"));
+            assertThat(parser.nextToken(new Token()), matches(TOKEN, "xyz 123 bar"));
+            fail();
+        } catch (IOException e) {
+        	assertTrue(e.getMessage().equals("(line 1) invalid char between encapsulated token and delimiter"));
+        }
+    }
 
     @Test
     public void testSurroundingTabsAreDeleted() throws IOException {