From 813855b2b2a4f39c73abc72a757730d33a7d9381 Mon Sep 17 00:00:00 2001 From: Paul Barton Date: Wed, 10 Oct 2018 15:27:14 +0100 Subject: [PATCH] Fix CSV delimiter detection on line breaks The CSV Reader can now correctly ignore line breaks inside enclosures which allows it to determine the delimiter correctly. Fixes #716 Fixes #717 --- CHANGELOG.md | 1 + src/PhpSpreadsheet/Reader/Csv.php | 42 ++++++++++++++++--- tests/PhpSpreadsheetTests/Reader/CsvTest.php | 6 +++ .../Reader/CSV/line_break_in_enclosure.csv | 18 ++++++++ 4 files changed, 62 insertions(+), 5 deletions(-) create mode 100644 tests/data/Reader/CSV/line_break_in_enclosure.csv diff --git a/CHANGELOG.md b/CHANGELOG.md index cac43c9d..31048b70 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - Xls file cause the exception during open by Xls reader - [#402](https://github.com/PHPOffice/PhpSpreadsheet/issues/402) - Skip non numeric value in SUMIF - [#618](https://github.com/PHPOffice/PhpSpreadsheet/pull/618) - OFFSET should allow omitted height and width - [#561](https://github.com/PHPOffice/PhpSpreadsheet/issues/561) +- Correctly determine delimiter when CSV contains line breaks inside enclosures - [#716](https://github.com/PHPOffice/PhpSpreadsheet/issues/716) ## [1.4.1] - 2018-09-30 diff --git a/src/PhpSpreadsheet/Reader/Csv.php b/src/PhpSpreadsheet/Reader/Csv.php index 6899773a..0e18bcc3 100644 --- a/src/PhpSpreadsheet/Reader/Csv.php +++ b/src/PhpSpreadsheet/Reader/Csv.php @@ -163,11 +163,7 @@ class Csv extends BaseReader // Count how many times each of the potential delimiters appears in each line $numberLines = 0; - while (($line = fgets($this->fileHandle)) !== false && (++$numberLines < 1000)) { - // Drop everything that is enclosed to avoid counting false positives in enclosures - $enclosure = preg_quote($this->enclosure, '/'); - $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line); - + while (($line = $this->getNextLine()) !== false && (++$numberLines < 1000)) { $countLine = []; for ($i = strlen($line) - 1; $i >= 0; --$i) { $char = $line[$i]; @@ -230,6 +226,42 @@ class Csv extends BaseReader return $this->skipBOM(); } + /** + * Get the next full line from the file. + * + * @param string $line + * + * @return bool|string + */ + private function getNextLine($line = '') + { + // Get the next line in the file + $newLine = fgets($this->fileHandle); + + // Return false if there is no next line + if ($newLine === false) { + return false; + } + + // Add the new line to the line passed in + $line = $line . $newLine; + + // Drop everything that is enclosed to avoid counting false positives in enclosures + $enclosure = preg_quote($this->enclosure, '/'); + $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line); + + // See if we have any enclosures left in the line + $matches = []; + preg_match('/(' . $enclosure . ')/', $line, $matches); + + // if we still have an enclosure then we need to read the next line aswell + if (count($matches) > 0) { + $line = $this->getNextLine($line); + } + + return $line; + } + /** * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). * diff --git a/tests/PhpSpreadsheetTests/Reader/CsvTest.php b/tests/PhpSpreadsheetTests/Reader/CsvTest.php index eeddbb08..e748cd1b 100644 --- a/tests/PhpSpreadsheetTests/Reader/CsvTest.php +++ b/tests/PhpSpreadsheetTests/Reader/CsvTest.php @@ -43,6 +43,12 @@ class CsvTest extends TestCase 'C2', '25,5', ], + [ + __DIR__ . '/../../data/Reader/CSV/line_break_in_enclosure.csv', + ',', + 'A3', + 'Test', + ], [ __DIR__ . '/../../data/Reader/HTML/csv_with_angle_bracket.csv', ',', diff --git a/tests/data/Reader/CSV/line_break_in_enclosure.csv b/tests/data/Reader/CSV/line_break_in_enclosure.csv new file mode 100644 index 00000000..70e84bcd --- /dev/null +++ b/tests/data/Reader/CSV/line_break_in_enclosure.csv @@ -0,0 +1,18 @@ +Name,Copy,URL +Test,"This is a test +with line breaks +that breaks the +delimiters",http://google.com +Test,"This is a test +with line breaks +that breaks the +delimiters",http://google.com +Test,"This is a test +with line breaks +that breaks the +delimiters",http://google.com +Test,"This is a test +with line breaks +that breaks the +delimiters",http://google.com +Test,"This is a test",http://google.com