From 9b004b1e6a2155ba6e8da84a997bffa93ce2a86b Mon Sep 17 00:00:00 2001 From: Mark Baker Date: Mon, 25 Feb 2019 23:20:50 +0100 Subject: [PATCH] Ignore escaped enclosures within an enclosure when inferring csv separator (#906) --- CHANGELOG.md | 1 + src/PhpSpreadsheet/Reader/Csv.php | 4 ++-- tests/PhpSpreadsheetTests/Reader/CsvTest.php | 6 ++++++ ...break_in_enclosure_with_escaped_quotes.csv | 21 +++++++++++++++++++ 4 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 tests/data/Reader/CSV/line_break_in_enclosure_with_escaped_quotes.csv diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f20ddc7..f80307d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). - Added support for inline styles in Html reader (borders, alignment, width, height) - QuotedText cells no longer treated as formulae if the content begins with a `=` - Clean handling for DDE in formulae +- Fix handling for escaped enclosures and new lines in CSV Separator Inference ## [1.6.0] - 2019-01-02 diff --git a/src/PhpSpreadsheet/Reader/Csv.php b/src/PhpSpreadsheet/Reader/Csv.php index dd2e3655..9e6bccf7 100644 --- a/src/PhpSpreadsheet/Reader/Csv.php +++ b/src/PhpSpreadsheet/Reader/Csv.php @@ -254,8 +254,8 @@ class Csv extends BaseReader $line = $line . $newLine; // Drop everything that is enclosed to avoid counting false positives in enclosures - $enclosure = preg_quote($this->enclosure, '/'); - // Add 's' to the replace rule in order for '.' to also match newline. + $enclosure = '(?escapeCharacter, '/') . ')' + . preg_quote($this->enclosure, '/'); $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/Us', '', $line); // See if we have any enclosures left in the line diff --git a/tests/PhpSpreadsheetTests/Reader/CsvTest.php b/tests/PhpSpreadsheetTests/Reader/CsvTest.php index 80186a41..be08f6a6 100644 --- a/tests/PhpSpreadsheetTests/Reader/CsvTest.php +++ b/tests/PhpSpreadsheetTests/Reader/CsvTest.php @@ -49,6 +49,12 @@ class CsvTest extends TestCase 'A3', 'Test', ], + [ + __DIR__ . '/../../data/Reader/CSV/line_break_in_enclosure_with_escaped_quotes.csv', + ',', + 'A3', + 'Test', + ], [ __DIR__ . '/../../data/Reader/HTML/csv_with_angle_bracket.csv', ',', diff --git a/tests/data/Reader/CSV/line_break_in_enclosure_with_escaped_quotes.csv b/tests/data/Reader/CSV/line_break_in_enclosure_with_escaped_quotes.csv new file mode 100644 index 00000000..e84db1b5 --- /dev/null +++ b/tests/data/Reader/CSV/line_break_in_enclosure_with_escaped_quotes.csv @@ -0,0 +1,21 @@ +Name,Copy,URL +Test,"This is a \"test csv file\" +with both \"line breaks\" +and \"escaped +quotes\" that breaks +the delimiters",http://google.com +Test,"This is a \"test csv file\" +with both \"line breaks\" +and \"escaped +quotes\" that breaks +the delimiters",http://google.com +Test,"This is a \"test csv file\" +with both \"line breaks\" +and \"escaped +quotes\" that breaks +the delimiters",http://google.com +Test,"This is a \"test csv file\" +with both \"line breaks\" +and \"escaped +quotes\" that breaks +the delimiters",http://google.com