diff --git a/CHANGELOG.md b/CHANGELOG.md index 947d5f6..318bfcc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ This project adheres to [Semantic Versioning](http://semver.org/). ## Unreleased +## 1.3.3 / 2026-03-10 + +* Fix handling and reporting parse errors when parsing an html table. + ## 1.3.2 / 2025-05-06 * Support PHP 8.4 (thanks @mharmuth) diff --git a/src/TableParser/HTML/HTMLStringTableParser.php b/src/TableParser/HTML/HTMLStringTableParser.php index d309d97..610e9b4 100644 --- a/src/TableParser/HTML/HTMLStringTableParser.php +++ b/src/TableParser/HTML/HTMLStringTableParser.php @@ -7,9 +7,12 @@ namespace Ingenerator\BehatTableAssert\TableParser\HTML; +use Behat\Gherkin\Node\TableNode; +use Dom\HTMLDocument; use Ingenerator\BehatTableAssert\TableNode\PaddedTableNode; use LibXMLError; use Masterminds\HTML5; +use function Dom\import_simplexml; /** * Parses an HTML string for a element into a TableNode. The table must have a single row @@ -90,33 +93,37 @@ public function parse($html) */ protected function parseHTMLString($html) { - $old_use_internal_errors = \libxml_use_internal_errors(TRUE); - try { - $html5 = new HTML5(); - $dom = $html5->loadHTML( - '' - .'' - .''.\trim($html).'' - .'' - ); + $html5 = new HTML5(); + $dom = $html5->loadHTML( + '' + .'' + .''.\trim($html).'' + .'', + ); - $table_elem = $dom->getElementsByTagName('body')->item(0)->firstChild; - $table = \simplexml_import_dom($table_elem); - if ($errors = \libxml_get_errors()) { - $this->throwInvalidHTMLException($html, $errors); - } - } finally { - \libxml_clear_errors(); - \libxml_use_internal_errors($old_use_internal_errors); + $table_elem = $dom->getElementsByTagName('body')->item(0)->firstChild; + if (!$table_elem instanceof \DOMElement) { + throw new \InvalidArgumentException( + sprintf("Expected html root element but got %s\n\n===HTML===\n%s", get_debug_type($table_elem), $html), + ); + } + $table = \simplexml_import_dom($table_elem); + if ($errors = $html5->getErrors()) { + throw new \InvalidArgumentException( + sprintf( + "Invalid HTML:\n%s\n\n===HTML===\n%s", + implode("\n", array_map(fn($err) => ' - '.$err, $errors)), + $html, + ), + ); } return $table; } /** - * @param string $html - * @param LibXMLError[] $errors + * @deprecated no longer called by the library */ protected function throwInvalidHTMLException($html, $errors) { diff --git a/test/TableParser/HTML/HTMLStringTableParserTest.php b/test/TableParser/HTML/HTMLStringTableParserTest.php index 2303175..22fde44 100644 --- a/test/TableParser/HTML/HTMLStringTableParserTest.php +++ b/test/TableParser/HTML/HTMLStringTableParserTest.php @@ -35,37 +35,16 @@ public function test_it_throws_when_parsing_non_or_empty_string($value) } /** - * @testWith ["random", false] - * ["random", true] - * ["
", false] - * ["
1
", false] + * @testWith ["random text", "Expected html root element but got DOMText"] + * ["
<17sd>illegal tag name
", "Invalid HTML:\n - Line 1, Col 113: Illegal tag opening"] + * ["
", "Expected a but got div"] */ - public function test_it_always_restores_state_of_libxml_error_handling( - $html, - $use_errors_before - ) { - $old_setting = \libxml_use_internal_errors($use_errors_before); - try { - $this->newSubject()->parse($html); - } catch (\Exception $e) { /* ignore */ - } - $errors_after = \libxml_get_errors(); - $use_errors_after = \libxml_use_internal_errors($old_setting); - - $this->assertSame([], $errors_after, 'Should clear libxml errors'); - $this->assertEquals( - $use_errors_before, - $use_errors_after, - 'Should restore libxml_use_internal_errors' - ); - } - - public function test_it_throws_when_parsing_html_that_is_not_a_table() + public function test_it_throws_when_parsing_html_that_is_not_a_table_or_not_valid(string $input, string $expect_msg) { $this->expectException(\InvalidArgumentException::class); - $this->expectExceptionMessage('Expected a
'); + $this->expectExceptionMessage($expect_msg); - $this->newSubject()->parse('
'); + $this->newSubject()->parse($input); } public function test_it_throws_when_parsing_table_without_thead() @@ -224,7 +203,7 @@ public function provider_valid_html_tables() '
'. ''. ''. - ''. + ''. '
HeaderDate
Cell1
Cell1
', [ ['Header', 'Date'],