2017-04-03 02:52:35 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
namespace PhpOffice\PhpSpreadsheetTests\Reader;
|
|
|
|
|
2017-12-17 07:34:40 +00:00
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Csv;
|
Improve Coverage for CSV (#1475)
I believe that both CSV Reader and Writer are 100% covered now.
There were some errors uncovered during development.
The reader specifically permits encodings other than UTF-8 to be used.
However, fgetcsv will not properly handle other encodings.
I tried replacing it with fgets/iconv/strgetcsv, but that could not
handle line breaks within a cell, even for UTF-8.
This is, I'm sure, a very rare use case.
I eventually handled it by using php://memory to hold the translated
file contents for non-UTF8. There were no tests for this situation,
and now there are (probably too many).
"Contiguous" read was not handle correctly. There is a file
in samples which uses it. It was designed to read a large sheet,
and split it into three. The first sheet was corrrect, but the
second and third were almost entirely empty. This has been corrected,
and the sample code was adapted into a formal test with assertions
to confirm that it works as designed.
I made a minor documentation change. Unlike HTML, where you never
need a BOM because you can declare the encoding in the file,
a CSV with non-ASCII characters must explicitly include a BOM
for Excel to handle it correctly. This was explained in the Reading CSV
section, but was glossed over in the Writing CSV section, which I
have updated.
2020-05-17 09:15:18 +00:00
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
|
2017-11-08 15:48:01 +00:00
|
|
|
use PHPUnit\Framework\TestCase;
|
2017-04-03 02:52:35 +00:00
|
|
|
|
2017-11-08 15:48:01 +00:00
|
|
|
class CsvTest extends TestCase
|
2017-04-03 02:52:35 +00:00
|
|
|
{
|
2017-12-28 03:22:01 +00:00
|
|
|
/**
|
|
|
|
* @dataProvider providerDelimiterDetection
|
|
|
|
*
|
|
|
|
* @param string $filename
|
|
|
|
* @param string $expectedDelimiter
|
|
|
|
* @param string $cell
|
|
|
|
* @param float|int|string $expectedValue
|
|
|
|
*/
|
2020-05-18 04:49:57 +00:00
|
|
|
public function testDelimiterDetection($filename, $expectedDelimiter, $cell, $expectedValue): void
|
2017-04-17 16:51:53 +00:00
|
|
|
{
|
2017-12-17 07:34:40 +00:00
|
|
|
$reader = new Csv();
|
2017-09-20 05:55:42 +00:00
|
|
|
self::assertNull($reader->getDelimiter());
|
2017-04-17 16:51:53 +00:00
|
|
|
|
|
|
|
$spreadsheet = $reader->load($filename);
|
|
|
|
|
2017-12-28 03:22:01 +00:00
|
|
|
self::assertSame($expectedDelimiter, $reader->getDelimiter(), 'should be able to infer the delimiter');
|
2017-04-17 16:51:53 +00:00
|
|
|
|
2017-12-28 03:22:01 +00:00
|
|
|
$actual = $spreadsheet->getActiveSheet()->getCell($cell)->getValue();
|
|
|
|
self::assertSame($expectedValue, $actual, 'should be able to retrieve correct value');
|
|
|
|
}
|
|
|
|
|
|
|
|
public function providerDelimiterDetection()
|
|
|
|
{
|
|
|
|
return [
|
|
|
|
[
|
2020-05-17 09:35:55 +00:00
|
|
|
'tests/data/Reader/CSV/enclosure.csv',
|
2017-12-28 03:22:01 +00:00
|
|
|
',',
|
|
|
|
'C4',
|
|
|
|
'username2',
|
|
|
|
],
|
|
|
|
[
|
2020-05-17 09:35:55 +00:00
|
|
|
'tests/data/Reader/CSV/semicolon_separated.csv',
|
2017-12-28 03:22:01 +00:00
|
|
|
';',
|
|
|
|
'C2',
|
|
|
|
'25,5',
|
|
|
|
],
|
2018-10-10 14:27:14 +00:00
|
|
|
[
|
2020-05-17 09:35:55 +00:00
|
|
|
'tests/data/Reader/CSV/line_break_in_enclosure.csv',
|
2018-10-10 14:27:14 +00:00
|
|
|
',',
|
|
|
|
'A3',
|
|
|
|
'Test',
|
|
|
|
],
|
2019-02-25 22:20:50 +00:00
|
|
|
[
|
2020-05-17 09:35:55 +00:00
|
|
|
'tests/data/Reader/CSV/line_break_in_enclosure_with_escaped_quotes.csv',
|
2019-02-25 22:20:50 +00:00
|
|
|
',',
|
|
|
|
'A3',
|
|
|
|
'Test',
|
|
|
|
],
|
2017-12-28 03:22:01 +00:00
|
|
|
[
|
2020-05-17 09:35:55 +00:00
|
|
|
'tests/data/Reader/HTML/csv_with_angle_bracket.csv',
|
2017-12-28 03:22:01 +00:00
|
|
|
',',
|
|
|
|
'B1',
|
|
|
|
'Number of items with weight <= 50kg',
|
|
|
|
],
|
|
|
|
[
|
2020-05-17 09:35:55 +00:00
|
|
|
'samples/Reader/sampleData/example1.csv',
|
2017-12-28 03:22:01 +00:00
|
|
|
',',
|
|
|
|
'I4',
|
|
|
|
'100%',
|
|
|
|
],
|
|
|
|
[
|
2020-05-17 09:35:55 +00:00
|
|
|
'samples/Reader/sampleData/example2.csv',
|
2017-12-28 03:22:01 +00:00
|
|
|
',',
|
|
|
|
'D8',
|
|
|
|
-58.373161,
|
|
|
|
],
|
2018-10-26 17:14:45 +00:00
|
|
|
[
|
2020-05-17 09:35:55 +00:00
|
|
|
'tests/data/Reader/CSV/empty.csv',
|
2018-10-26 17:14:45 +00:00
|
|
|
',',
|
|
|
|
'A1',
|
|
|
|
null,
|
|
|
|
],
|
|
|
|
[
|
2020-05-17 09:35:55 +00:00
|
|
|
'tests/data/Reader/CSV/no_delimiter.csv',
|
2018-10-26 17:14:45 +00:00
|
|
|
',',
|
|
|
|
'A1',
|
|
|
|
'SingleLine',
|
|
|
|
],
|
2017-12-28 03:22:01 +00:00
|
|
|
];
|
2017-04-17 16:51:53 +00:00
|
|
|
}
|
2018-02-05 12:33:23 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @dataProvider providerCanLoad
|
|
|
|
*
|
|
|
|
* @param bool $expected
|
|
|
|
* @param string $filename
|
|
|
|
*/
|
2020-05-18 04:49:57 +00:00
|
|
|
public function testCanLoad($expected, $filename): void
|
2018-02-05 12:33:23 +00:00
|
|
|
{
|
|
|
|
$reader = new Csv();
|
|
|
|
self::assertSame($expected, $reader->canRead($filename));
|
|
|
|
}
|
|
|
|
|
|
|
|
public function providerCanLoad()
|
|
|
|
{
|
|
|
|
return [
|
2020-05-17 09:35:55 +00:00
|
|
|
[false, 'tests/data/Reader/Ods/data.ods'],
|
Improving Coverage for Excel2003 XML Reader (#1557)
* Improving Coverage for Excel2003 XML Reader
Reader/Xml is now 100% covered.
File templates/Excel2003XMLTest.xml, used in some tests, is *not*
readable by a current version of Excel. I have substituted a new file
excel2003.xml to be used in its place. I have not deleted the original
in case someone in future (possibly me) wants to see what it needs to
make it usable.
There are minimal code changes.
- Unused protected functions pixel2WidthUnits and widthUnits2Pixel
are deleted.
- One regex looking to convert hex characters is changed from a-z to a-f,
and made case insensitive.
- No calculation performed for "error" cell (previously calculation
was attempted and threw exception).
- Empty relative row/cell is now handled correctly.
- Style applied to empty cell when appropriate.
- Support added for textRotation.
- Support added for border styles.
- Support added for diagonal borders.
- Support added for superscript and subscript.
- Support added for fill patterns.
In theory, encodings other than UTF-8 were supported.
In fact, I was unable to get SecurityScanner to pass *any* xml which is
not UTF-8. Eliminating the assumption that strings might not be UTF-8
allowed much of the code to be greatly simplified.
After that, I added some code that would permit the use of
some ASCII-compatible encodings (there is a test of ISO-8859-1).
It would be more difficult to handle other encodings (such as UTF-16).
I am not convinced that even the ISO-8859 effort is worth it,
but am willing to investigate either expanding or eliminating
non-UTF8 support.
I added a number of tests, creating an Xml directory, and moving
XmlTest to that directory.
Pull Request had problems reading old invalid sample in the code
coverage phase, not in any of the other test phases, and not in
the code coverage phase on my local machine.
As it turns out, aside from being invalid, the sample
is much larger than any of the other samples. Tests have been
adjusted accordingly.
* Smaller Test File
Should eliminate need to avoid test during xml coverage.
* Break Up Style Test into Multiple Tests
Per suggestion from Mark Baker.
* Integrate AddressHelper Change
The introduction of AddressHelper introduced a conflict which needed to
be resolved. I wanted to test it locally before resolving. This required
me to add (unchanged) AddressHelper to my local copy. I hope this is
an okay manner of resolving the conflict.
* Weird Travis Error
XmlOddTest works just fine on my local machine, but Travis failed it.
Even worse, the lines which Travis flags don't even make any sense
(one was the empty line between two methods!).
This test is not essential to the rest of the change. I am removing
it from the package, and will attempt to re-add it when I have a chance
to sync up my fork with the main project.
2020-10-11 11:26:56 +00:00
|
|
|
[false, 'samples/templates/excel2003.xml'],
|
2020-05-17 09:35:55 +00:00
|
|
|
[true, 'tests/data/Reader/CSV/enclosure.csv'],
|
|
|
|
[true, 'tests/data/Reader/CSV/semicolon_separated.csv'],
|
|
|
|
[true, 'tests/data/Reader/CSV/contains_html.csv'],
|
|
|
|
[true, 'tests/data/Reader/CSV/csv_without_extension'],
|
|
|
|
[true, 'tests/data/Reader/HTML/csv_with_angle_bracket.csv'],
|
|
|
|
[true, 'tests/data/Reader/CSV/empty.csv'],
|
|
|
|
[true, 'samples/Reader/sampleData/example1.csv'],
|
|
|
|
[true, 'samples/Reader/sampleData/example2.csv'],
|
2018-02-05 12:33:23 +00:00
|
|
|
];
|
|
|
|
}
|
2018-05-23 01:31:41 +00:00
|
|
|
|
2020-05-18 04:49:57 +00:00
|
|
|
public function testEscapeCharacters(): void
|
2018-05-23 01:31:41 +00:00
|
|
|
{
|
|
|
|
$reader = (new Csv())->setEscapeCharacter('"');
|
2020-05-17 09:35:55 +00:00
|
|
|
$worksheet = $reader->load('tests/data/Reader/CSV/backslash.csv')
|
2018-05-23 01:31:41 +00:00
|
|
|
->getActiveSheet();
|
|
|
|
|
|
|
|
$expected = [
|
|
|
|
['field 1', 'field 2\\'],
|
|
|
|
['field 3\\', 'field 4'],
|
|
|
|
];
|
|
|
|
|
2020-05-18 04:49:57 +00:00
|
|
|
self::assertSame('"', $reader->getEscapeCharacter());
|
|
|
|
self::assertSame($expected, $worksheet->toArray());
|
2018-05-23 01:31:41 +00:00
|
|
|
}
|
Improve Coverage for CSV (#1475)
I believe that both CSV Reader and Writer are 100% covered now.
There were some errors uncovered during development.
The reader specifically permits encodings other than UTF-8 to be used.
However, fgetcsv will not properly handle other encodings.
I tried replacing it with fgets/iconv/strgetcsv, but that could not
handle line breaks within a cell, even for UTF-8.
This is, I'm sure, a very rare use case.
I eventually handled it by using php://memory to hold the translated
file contents for non-UTF8. There were no tests for this situation,
and now there are (probably too many).
"Contiguous" read was not handle correctly. There is a file
in samples which uses it. It was designed to read a large sheet,
and split it into three. The first sheet was corrrect, but the
second and third were almost entirely empty. This has been corrected,
and the sample code was adapted into a formal test with assertions
to confirm that it works as designed.
I made a minor documentation change. Unlike HTML, where you never
need a BOM because you can declare the encoding in the file,
a CSV with non-ASCII characters must explicitly include a BOM
for Excel to handle it correctly. This was explained in the Reading CSV
section, but was glossed over in the Writing CSV section, which I
have updated.
2020-05-17 09:15:18 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @dataProvider providerEncodings
|
|
|
|
*
|
|
|
|
* @param string $filename
|
|
|
|
* @param string $encoding
|
|
|
|
*/
|
2020-05-18 04:49:57 +00:00
|
|
|
public function testEncodings($filename, $encoding): void
|
Improve Coverage for CSV (#1475)
I believe that both CSV Reader and Writer are 100% covered now.
There were some errors uncovered during development.
The reader specifically permits encodings other than UTF-8 to be used.
However, fgetcsv will not properly handle other encodings.
I tried replacing it with fgets/iconv/strgetcsv, but that could not
handle line breaks within a cell, even for UTF-8.
This is, I'm sure, a very rare use case.
I eventually handled it by using php://memory to hold the translated
file contents for non-UTF8. There were no tests for this situation,
and now there are (probably too many).
"Contiguous" read was not handle correctly. There is a file
in samples which uses it. It was designed to read a large sheet,
and split it into three. The first sheet was corrrect, but the
second and third were almost entirely empty. This has been corrected,
and the sample code was adapted into a formal test with assertions
to confirm that it works as designed.
I made a minor documentation change. Unlike HTML, where you never
need a BOM because you can declare the encoding in the file,
a CSV with non-ASCII characters must explicitly include a BOM
for Excel to handle it correctly. This was explained in the Reading CSV
section, but was glossed over in the Writing CSV section, which I
have updated.
2020-05-17 09:15:18 +00:00
|
|
|
{
|
|
|
|
$reader = new Csv();
|
|
|
|
$reader->setInputEncoding($encoding);
|
|
|
|
$spreadsheet = $reader->load($filename);
|
|
|
|
$sheet = $spreadsheet->getActiveSheet();
|
|
|
|
self::assertEquals('Å', $sheet->getCell('A1')->getValue());
|
|
|
|
}
|
|
|
|
|
2020-05-18 04:49:57 +00:00
|
|
|
public function testInvalidWorkSheetInfo(): void
|
Improve Coverage for CSV (#1475)
I believe that both CSV Reader and Writer are 100% covered now.
There were some errors uncovered during development.
The reader specifically permits encodings other than UTF-8 to be used.
However, fgetcsv will not properly handle other encodings.
I tried replacing it with fgets/iconv/strgetcsv, but that could not
handle line breaks within a cell, even for UTF-8.
This is, I'm sure, a very rare use case.
I eventually handled it by using php://memory to hold the translated
file contents for non-UTF8. There were no tests for this situation,
and now there are (probably too many).
"Contiguous" read was not handle correctly. There is a file
in samples which uses it. It was designed to read a large sheet,
and split it into three. The first sheet was corrrect, but the
second and third were almost entirely empty. This has been corrected,
and the sample code was adapted into a formal test with assertions
to confirm that it works as designed.
I made a minor documentation change. Unlike HTML, where you never
need a BOM because you can declare the encoding in the file,
a CSV with non-ASCII characters must explicitly include a BOM
for Excel to handle it correctly. This was explained in the Reading CSV
section, but was glossed over in the Writing CSV section, which I
have updated.
2020-05-17 09:15:18 +00:00
|
|
|
{
|
|
|
|
$this->expectException(ReaderException::class);
|
|
|
|
$reader = new Csv();
|
|
|
|
$reader->listWorksheetInfo('');
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @dataProvider providerEncodings
|
|
|
|
*
|
|
|
|
* @param string $filename
|
|
|
|
* @param string $encoding
|
|
|
|
*/
|
2020-05-18 04:49:57 +00:00
|
|
|
public function testWorkSheetInfo($filename, $encoding): void
|
Improve Coverage for CSV (#1475)
I believe that both CSV Reader and Writer are 100% covered now.
There were some errors uncovered during development.
The reader specifically permits encodings other than UTF-8 to be used.
However, fgetcsv will not properly handle other encodings.
I tried replacing it with fgets/iconv/strgetcsv, but that could not
handle line breaks within a cell, even for UTF-8.
This is, I'm sure, a very rare use case.
I eventually handled it by using php://memory to hold the translated
file contents for non-UTF8. There were no tests for this situation,
and now there are (probably too many).
"Contiguous" read was not handle correctly. There is a file
in samples which uses it. It was designed to read a large sheet,
and split it into three. The first sheet was corrrect, but the
second and third were almost entirely empty. This has been corrected,
and the sample code was adapted into a formal test with assertions
to confirm that it works as designed.
I made a minor documentation change. Unlike HTML, where you never
need a BOM because you can declare the encoding in the file,
a CSV with non-ASCII characters must explicitly include a BOM
for Excel to handle it correctly. This was explained in the Reading CSV
section, but was glossed over in the Writing CSV section, which I
have updated.
2020-05-17 09:15:18 +00:00
|
|
|
{
|
|
|
|
$reader = new Csv();
|
|
|
|
$reader->setInputEncoding($encoding);
|
|
|
|
$info = $reader->listWorksheetInfo($filename);
|
|
|
|
self::assertEquals('Worksheet', $info[0]['worksheetName']);
|
|
|
|
self::assertEquals('B', $info[0]['lastColumnLetter']);
|
|
|
|
self::assertEquals(1, $info[0]['lastColumnIndex']);
|
|
|
|
self::assertEquals(2, $info[0]['totalRows']);
|
|
|
|
self::assertEquals(2, $info[0]['totalColumns']);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function providerEncodings()
|
|
|
|
{
|
|
|
|
return [
|
2020-05-17 09:35:55 +00:00
|
|
|
['tests/data/Reader/CSV/encoding.iso88591.csv', 'ISO-8859-1'],
|
|
|
|
['tests/data/Reader/CSV/encoding.utf8.csv', 'UTF-8'],
|
|
|
|
['tests/data/Reader/CSV/encoding.utf8bom.csv', 'UTF-8'],
|
|
|
|
['tests/data/Reader/CSV/encoding.utf16be.csv', 'UTF-16BE'],
|
|
|
|
['tests/data/Reader/CSV/encoding.utf16le.csv', 'UTF-16LE'],
|
|
|
|
['tests/data/Reader/CSV/encoding.utf32be.csv', 'UTF-32BE'],
|
|
|
|
['tests/data/Reader/CSV/encoding.utf32le.csv', 'UTF-32LE'],
|
Improve Coverage for CSV (#1475)
I believe that both CSV Reader and Writer are 100% covered now.
There were some errors uncovered during development.
The reader specifically permits encodings other than UTF-8 to be used.
However, fgetcsv will not properly handle other encodings.
I tried replacing it with fgets/iconv/strgetcsv, but that could not
handle line breaks within a cell, even for UTF-8.
This is, I'm sure, a very rare use case.
I eventually handled it by using php://memory to hold the translated
file contents for non-UTF8. There were no tests for this situation,
and now there are (probably too many).
"Contiguous" read was not handle correctly. There is a file
in samples which uses it. It was designed to read a large sheet,
and split it into three. The first sheet was corrrect, but the
second and third were almost entirely empty. This has been corrected,
and the sample code was adapted into a formal test with assertions
to confirm that it works as designed.
I made a minor documentation change. Unlike HTML, where you never
need a BOM because you can declare the encoding in the file,
a CSV with non-ASCII characters must explicitly include a BOM
for Excel to handle it correctly. This was explained in the Reading CSV
section, but was glossed over in the Writing CSV section, which I
have updated.
2020-05-17 09:15:18 +00:00
|
|
|
];
|
|
|
|
}
|
|
|
|
|
2020-05-18 04:49:57 +00:00
|
|
|
public function testUtf16LineBreak(): void
|
Improve Coverage for CSV (#1475)
I believe that both CSV Reader and Writer are 100% covered now.
There were some errors uncovered during development.
The reader specifically permits encodings other than UTF-8 to be used.
However, fgetcsv will not properly handle other encodings.
I tried replacing it with fgets/iconv/strgetcsv, but that could not
handle line breaks within a cell, even for UTF-8.
This is, I'm sure, a very rare use case.
I eventually handled it by using php://memory to hold the translated
file contents for non-UTF8. There were no tests for this situation,
and now there are (probably too many).
"Contiguous" read was not handle correctly. There is a file
in samples which uses it. It was designed to read a large sheet,
and split it into three. The first sheet was corrrect, but the
second and third were almost entirely empty. This has been corrected,
and the sample code was adapted into a formal test with assertions
to confirm that it works as designed.
I made a minor documentation change. Unlike HTML, where you never
need a BOM because you can declare the encoding in the file,
a CSV with non-ASCII characters must explicitly include a BOM
for Excel to handle it correctly. This was explained in the Reading CSV
section, but was glossed over in the Writing CSV section, which I
have updated.
2020-05-17 09:15:18 +00:00
|
|
|
{
|
|
|
|
$reader = new Csv();
|
|
|
|
$reader->setInputEncoding('UTF-16BE');
|
2020-05-17 09:35:55 +00:00
|
|
|
$spreadsheet = $reader->load('tests/data/Reader/CSV/utf16be.line_break_in_enclosure.csv');
|
Improve Coverage for CSV (#1475)
I believe that both CSV Reader and Writer are 100% covered now.
There were some errors uncovered during development.
The reader specifically permits encodings other than UTF-8 to be used.
However, fgetcsv will not properly handle other encodings.
I tried replacing it with fgets/iconv/strgetcsv, but that could not
handle line breaks within a cell, even for UTF-8.
This is, I'm sure, a very rare use case.
I eventually handled it by using php://memory to hold the translated
file contents for non-UTF8. There were no tests for this situation,
and now there are (probably too many).
"Contiguous" read was not handle correctly. There is a file
in samples which uses it. It was designed to read a large sheet,
and split it into three. The first sheet was corrrect, but the
second and third were almost entirely empty. This has been corrected,
and the sample code was adapted into a formal test with assertions
to confirm that it works as designed.
I made a minor documentation change. Unlike HTML, where you never
need a BOM because you can declare the encoding in the file,
a CSV with non-ASCII characters must explicitly include a BOM
for Excel to handle it correctly. This was explained in the Reading CSV
section, but was glossed over in the Writing CSV section, which I
have updated.
2020-05-17 09:15:18 +00:00
|
|
|
$sheet = $spreadsheet->getActiveSheet();
|
|
|
|
$expected = <<<EOF
|
|
|
|
This is a test
|
|
|
|
with line breaks
|
|
|
|
that breaks the
|
|
|
|
delimiters
|
|
|
|
EOF;
|
|
|
|
self::assertEquals($expected, $sheet->getCell('B3')->getValue());
|
|
|
|
}
|
|
|
|
|
2020-05-24 10:57:39 +00:00
|
|
|
public function testLineBreakEscape(): void
|
|
|
|
{
|
|
|
|
$reader = new Csv();
|
|
|
|
$spreadsheet = $reader->load('tests/data/Reader/CSV/line_break_in_enclosure_with_escaped_quotes.csv');
|
|
|
|
$sheet = $spreadsheet->getActiveSheet();
|
|
|
|
$expected = <<<EOF
|
|
|
|
This is a "test csv file"
|
|
|
|
with both "line breaks"
|
|
|
|
and "escaped
|
|
|
|
quotes" that breaks
|
|
|
|
the delimiters
|
|
|
|
EOF;
|
|
|
|
self::assertEquals($expected, $sheet->getCell('B3')->getValue());
|
|
|
|
}
|
|
|
|
|
|
|
|
public function testUtf32LineBreakEscape(): void
|
|
|
|
{
|
|
|
|
$reader = new Csv();
|
|
|
|
$reader->setInputEncoding('UTF-32LE');
|
|
|
|
$spreadsheet = $reader->load('tests/data/Reader/CSV/line_break_escaped_32le.csv');
|
|
|
|
$sheet = $spreadsheet->getActiveSheet();
|
|
|
|
$expected = <<<EOF
|
|
|
|
This is a "test csv file"
|
|
|
|
with both "line breaks"
|
|
|
|
and "escaped
|
|
|
|
quotes" that breaks
|
|
|
|
the delimiters
|
|
|
|
EOF;
|
|
|
|
self::assertEquals($expected, $sheet->getCell('B3')->getValue());
|
|
|
|
}
|
|
|
|
|
2020-05-18 04:49:57 +00:00
|
|
|
public function testSeparatorLine(): void
|
Improve Coverage for CSV (#1475)
I believe that both CSV Reader and Writer are 100% covered now.
There were some errors uncovered during development.
The reader specifically permits encodings other than UTF-8 to be used.
However, fgetcsv will not properly handle other encodings.
I tried replacing it with fgets/iconv/strgetcsv, but that could not
handle line breaks within a cell, even for UTF-8.
This is, I'm sure, a very rare use case.
I eventually handled it by using php://memory to hold the translated
file contents for non-UTF8. There were no tests for this situation,
and now there are (probably too many).
"Contiguous" read was not handle correctly. There is a file
in samples which uses it. It was designed to read a large sheet,
and split it into three. The first sheet was corrrect, but the
second and third were almost entirely empty. This has been corrected,
and the sample code was adapted into a formal test with assertions
to confirm that it works as designed.
I made a minor documentation change. Unlike HTML, where you never
need a BOM because you can declare the encoding in the file,
a CSV with non-ASCII characters must explicitly include a BOM
for Excel to handle it correctly. This was explained in the Reading CSV
section, but was glossed over in the Writing CSV section, which I
have updated.
2020-05-17 09:15:18 +00:00
|
|
|
{
|
|
|
|
$reader = new Csv();
|
|
|
|
$reader->setSheetIndex(3);
|
2020-05-17 09:35:55 +00:00
|
|
|
$spreadsheet = $reader->load('tests/data/Reader/CSV/sep.csv');
|
Improve Coverage for CSV (#1475)
I believe that both CSV Reader and Writer are 100% covered now.
There were some errors uncovered during development.
The reader specifically permits encodings other than UTF-8 to be used.
However, fgetcsv will not properly handle other encodings.
I tried replacing it with fgets/iconv/strgetcsv, but that could not
handle line breaks within a cell, even for UTF-8.
This is, I'm sure, a very rare use case.
I eventually handled it by using php://memory to hold the translated
file contents for non-UTF8. There were no tests for this situation,
and now there are (probably too many).
"Contiguous" read was not handle correctly. There is a file
in samples which uses it. It was designed to read a large sheet,
and split it into three. The first sheet was corrrect, but the
second and third were almost entirely empty. This has been corrected,
and the sample code was adapted into a formal test with assertions
to confirm that it works as designed.
I made a minor documentation change. Unlike HTML, where you never
need a BOM because you can declare the encoding in the file,
a CSV with non-ASCII characters must explicitly include a BOM
for Excel to handle it correctly. This was explained in the Reading CSV
section, but was glossed over in the Writing CSV section, which I
have updated.
2020-05-17 09:15:18 +00:00
|
|
|
self::assertEquals(';', $reader->getDelimiter());
|
|
|
|
$sheet = $spreadsheet->getActiveSheet();
|
|
|
|
self::assertEquals(3, $reader->getSheetIndex());
|
|
|
|
self::assertEquals(3, $spreadsheet->getActiveSheetIndex());
|
|
|
|
self::assertEquals('A', $sheet->getCell('A1')->getValue());
|
|
|
|
self::assertEquals(1, $sheet->getCell('B1')->getValue());
|
|
|
|
self::assertEquals(2, $sheet->getCell('A2')->getValue());
|
|
|
|
self::assertEquals(3, $sheet->getCell('B2')->getValue());
|
|
|
|
}
|
|
|
|
|
2020-05-18 04:49:57 +00:00
|
|
|
public function testDefaultSettings(): void
|
Improve Coverage for CSV (#1475)
I believe that both CSV Reader and Writer are 100% covered now.
There were some errors uncovered during development.
The reader specifically permits encodings other than UTF-8 to be used.
However, fgetcsv will not properly handle other encodings.
I tried replacing it with fgets/iconv/strgetcsv, but that could not
handle line breaks within a cell, even for UTF-8.
This is, I'm sure, a very rare use case.
I eventually handled it by using php://memory to hold the translated
file contents for non-UTF8. There were no tests for this situation,
and now there are (probably too many).
"Contiguous" read was not handle correctly. There is a file
in samples which uses it. It was designed to read a large sheet,
and split it into three. The first sheet was corrrect, but the
second and third were almost entirely empty. This has been corrected,
and the sample code was adapted into a formal test with assertions
to confirm that it works as designed.
I made a minor documentation change. Unlike HTML, where you never
need a BOM because you can declare the encoding in the file,
a CSV with non-ASCII characters must explicitly include a BOM
for Excel to handle it correctly. This was explained in the Reading CSV
section, but was glossed over in the Writing CSV section, which I
have updated.
2020-05-17 09:15:18 +00:00
|
|
|
{
|
|
|
|
$reader = new Csv();
|
|
|
|
self::assertEquals('UTF-8', $reader->getInputEncoding());
|
|
|
|
self::assertEquals('"', $reader->getEnclosure());
|
|
|
|
$reader->setEnclosure('\'');
|
|
|
|
self::assertEquals('\'', $reader->getEnclosure());
|
|
|
|
$reader->setEnclosure('');
|
|
|
|
self::assertEquals('"', $reader->getEnclosure());
|
2020-06-19 18:35:44 +00:00
|
|
|
// following tests from BaseReader
|
|
|
|
self::assertTrue($reader->getReadEmptyCells());
|
|
|
|
self::assertFalse($reader->getIncludeCharts());
|
|
|
|
self::assertNull($reader->getLoadSheetsOnly());
|
Improve Coverage for CSV (#1475)
I believe that both CSV Reader and Writer are 100% covered now.
There were some errors uncovered during development.
The reader specifically permits encodings other than UTF-8 to be used.
However, fgetcsv will not properly handle other encodings.
I tried replacing it with fgets/iconv/strgetcsv, but that could not
handle line breaks within a cell, even for UTF-8.
This is, I'm sure, a very rare use case.
I eventually handled it by using php://memory to hold the translated
file contents for non-UTF8. There were no tests for this situation,
and now there are (probably too many).
"Contiguous" read was not handle correctly. There is a file
in samples which uses it. It was designed to read a large sheet,
and split it into three. The first sheet was corrrect, but the
second and third were almost entirely empty. This has been corrected,
and the sample code was adapted into a formal test with assertions
to confirm that it works as designed.
I made a minor documentation change. Unlike HTML, where you never
need a BOM because you can declare the encoding in the file,
a CSV with non-ASCII characters must explicitly include a BOM
for Excel to handle it correctly. This was explained in the Reading CSV
section, but was glossed over in the Writing CSV section, which I
have updated.
2020-05-17 09:15:18 +00:00
|
|
|
}
|
|
|
|
|
2020-05-18 04:49:57 +00:00
|
|
|
public function testReadEmptyFileName(): void
|
Improve Coverage for CSV (#1475)
I believe that both CSV Reader and Writer are 100% covered now.
There were some errors uncovered during development.
The reader specifically permits encodings other than UTF-8 to be used.
However, fgetcsv will not properly handle other encodings.
I tried replacing it with fgets/iconv/strgetcsv, but that could not
handle line breaks within a cell, even for UTF-8.
This is, I'm sure, a very rare use case.
I eventually handled it by using php://memory to hold the translated
file contents for non-UTF8. There were no tests for this situation,
and now there are (probably too many).
"Contiguous" read was not handle correctly. There is a file
in samples which uses it. It was designed to read a large sheet,
and split it into three. The first sheet was corrrect, but the
second and third were almost entirely empty. This has been corrected,
and the sample code was adapted into a formal test with assertions
to confirm that it works as designed.
I made a minor documentation change. Unlike HTML, where you never
need a BOM because you can declare the encoding in the file,
a CSV with non-ASCII characters must explicitly include a BOM
for Excel to handle it correctly. This was explained in the Reading CSV
section, but was glossed over in the Writing CSV section, which I
have updated.
2020-05-17 09:15:18 +00:00
|
|
|
{
|
|
|
|
$this->expectException(ReaderException::class);
|
|
|
|
$reader = new Csv();
|
|
|
|
$filename = '';
|
|
|
|
$reader->load($filename);
|
|
|
|
}
|
|
|
|
|
2020-05-18 04:49:57 +00:00
|
|
|
public function testReadNonexistentFileName(): void
|
Improve Coverage for CSV (#1475)
I believe that both CSV Reader and Writer are 100% covered now.
There were some errors uncovered during development.
The reader specifically permits encodings other than UTF-8 to be used.
However, fgetcsv will not properly handle other encodings.
I tried replacing it with fgets/iconv/strgetcsv, but that could not
handle line breaks within a cell, even for UTF-8.
This is, I'm sure, a very rare use case.
I eventually handled it by using php://memory to hold the translated
file contents for non-UTF8. There were no tests for this situation,
and now there are (probably too many).
"Contiguous" read was not handle correctly. There is a file
in samples which uses it. It was designed to read a large sheet,
and split it into three. The first sheet was corrrect, but the
second and third were almost entirely empty. This has been corrected,
and the sample code was adapted into a formal test with assertions
to confirm that it works as designed.
I made a minor documentation change. Unlike HTML, where you never
need a BOM because you can declare the encoding in the file,
a CSV with non-ASCII characters must explicitly include a BOM
for Excel to handle it correctly. This was explained in the Reading CSV
section, but was glossed over in the Writing CSV section, which I
have updated.
2020-05-17 09:15:18 +00:00
|
|
|
{
|
|
|
|
$this->expectException(ReaderException::class);
|
|
|
|
$reader = new Csv();
|
2020-05-17 09:35:55 +00:00
|
|
|
$reader->load('tests/data/Reader/CSV/encoding.utf8.csvxxx');
|
Improve Coverage for CSV (#1475)
I believe that both CSV Reader and Writer are 100% covered now.
There were some errors uncovered during development.
The reader specifically permits encodings other than UTF-8 to be used.
However, fgetcsv will not properly handle other encodings.
I tried replacing it with fgets/iconv/strgetcsv, but that could not
handle line breaks within a cell, even for UTF-8.
This is, I'm sure, a very rare use case.
I eventually handled it by using php://memory to hold the translated
file contents for non-UTF8. There were no tests for this situation,
and now there are (probably too many).
"Contiguous" read was not handle correctly. There is a file
in samples which uses it. It was designed to read a large sheet,
and split it into three. The first sheet was corrrect, but the
second and third were almost entirely empty. This has been corrected,
and the sample code was adapted into a formal test with assertions
to confirm that it works as designed.
I made a minor documentation change. Unlike HTML, where you never
need a BOM because you can declare the encoding in the file,
a CSV with non-ASCII characters must explicitly include a BOM
for Excel to handle it correctly. This was explained in the Reading CSV
section, but was glossed over in the Writing CSV section, which I
have updated.
2020-05-17 09:15:18 +00:00
|
|
|
}
|
2017-04-03 02:52:35 +00:00
|
|
|
}
|