Better auto-detection of CSV separators

Closes #305
This commit is contained in:
Adrien Crivelli 2017-12-28 12:22:01 +09:00
parent ac1c7a2c7d
commit 139d85d874
No known key found for this signature in database
GPG Key ID: B182FD79DC6DE92E
4 changed files with 68 additions and 7 deletions

View File

@ -5,12 +5,20 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).
## [Unreleased]
### Added
### Fixed
- Better auto-detection of CSV separators - [#305](https://github.com/PHPOffice/PhpSpreadsheet/issues/305)
## [1.0.0] - 2017-12-25
### Added
- Support to write merged cells in ODS format [#287](https://github.com/PHPOffice/PhpSpreadsheet/issues/287)
- Able to set the `topLeftCell` in freeze panes [#261](https://github.com/PHPOffice/PhpSpreadsheet/pull/261)
- Support to write merged cells in ODS format - [#287](https://github.com/PHPOffice/PhpSpreadsheet/issues/287)
- Able to set the `topLeftCell` in freeze panes - [#261](https://github.com/PHPOffice/PhpSpreadsheet/pull/261)
- Support `DateTimeImmutable` as cell value
- Support migration of prefixed classes

View File

@ -155,6 +155,10 @@ class Csv extends BaseReader
// Count how many times each of the potential delimiters appears in each line
$numberLines = 0;
while (($line = fgets($this->fileHandle)) !== false && (++$numberLines < 1000)) {
// Drop everything that is enclosed to avoid counting false positives in enclosures
$enclosure = preg_quote($this->enclosure, '/');
$line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line);
$countLine = [];
for ($i = strlen($line) - 1; $i >= 0; --$i) {
$char = $line[$i];
@ -223,6 +227,8 @@ class Csv extends BaseReader
* @param string $pFilename
*
* @throws Exception
*
* @return array
*/
public function listWorksheetInfo($pFilename)
{

View File

@ -7,17 +7,60 @@ use PHPUnit\Framework\TestCase;
class CsvTest extends TestCase
{
public function testDelimiterDetection()
/**
* @dataProvider providerDelimiterDetection
*
* @param string $filename
* @param string $expectedDelimiter
* @param string $cell
* @param float|int|string $expectedValue
*/
public function testDelimiterDetection($filename, $expectedDelimiter, $cell, $expectedValue)
{
$reader = new Csv();
self::assertNull($reader->getDelimiter());
$filename = __DIR__ . '/../../data/Reader/CSV/semicolon_separated.csv';
$spreadsheet = $reader->load($filename);
self::assertSame(';', $reader->getDelimiter(), 'should be able to infer the delimiter');
self::assertSame($expectedDelimiter, $reader->getDelimiter(), 'should be able to infer the delimiter');
$actual = $spreadsheet->getActiveSheet()->getCell('C2')->getValue();
self::assertSame('25,5', $actual, 'should be able to retrieve values with commas');
$actual = $spreadsheet->getActiveSheet()->getCell($cell)->getValue();
self::assertSame($expectedValue, $actual, 'should be able to retrieve correct value');
}
public function providerDelimiterDetection()
{
return [
[
__DIR__ . '/../../data/Reader/CSV/enclosure.csv',
',',
'C4',
'username2',
],
[
__DIR__ . '/../../data/Reader/CSV/semicolon_separated.csv',
';',
'C2',
'25,5',
],
[
__DIR__ . '/../../data/Reader/HTML/csv_with_angle_bracket.csv',
',',
'B1',
'Number of items with weight <= 50kg',
],
[
__DIR__ . '/../../../samples/Reader/sampleData/example1.csv',
',',
'I4',
'100%',
],
[
__DIR__ . '/../../../samples/Reader/sampleData/example2.csv',
',',
'D8',
-58.373161,
],
];
}
}

View File

@ -0,0 +1,4 @@
"Date/Time","task","username"
"12/17/2017 14:35","viewpage","username1",
"12/17/2017 04:00","editpage","username5",
"11/29/2017 08:20","deletepage","username2",,
Can't render this file because it has a wrong number of fields in line 2.