diff --git a/CHANGELOG.md b/CHANGELOG.md
index 947d5f6..318bfcc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,10 @@ This project adheres to [Semantic Versioning](http://semver.org/).
## Unreleased
+## 1.3.3 / 2026-03-10
+
+* Fix handling and reporting parse errors when parsing an html table.
+
## 1.3.2 / 2025-05-06
* Support PHP 8.4 (thanks @mharmuth)
diff --git a/src/TableParser/HTML/HTMLStringTableParser.php b/src/TableParser/HTML/HTMLStringTableParser.php
index d309d97..610e9b4 100644
--- a/src/TableParser/HTML/HTMLStringTableParser.php
+++ b/src/TableParser/HTML/HTMLStringTableParser.php
@@ -7,9 +7,12 @@
namespace Ingenerator\BehatTableAssert\TableParser\HTML;
+use Behat\Gherkin\Node\TableNode;
+use Dom\HTMLDocument;
use Ingenerator\BehatTableAssert\TableNode\PaddedTableNode;
use LibXMLError;
use Masterminds\HTML5;
+use function Dom\import_simplexml;
/**
* Parses an HTML string for a
element into a TableNode. The table must have a single row
@@ -90,33 +93,37 @@ public function parse($html)
*/
protected function parseHTMLString($html)
{
- $old_use_internal_errors = \libxml_use_internal_errors(TRUE);
- try {
- $html5 = new HTML5();
- $dom = $html5->loadHTML(
- ''
- .' '
- .''.\trim($html).''
- .''
- );
+ $html5 = new HTML5();
+ $dom = $html5->loadHTML(
+ ''
+ .' '
+ .''.\trim($html).''
+ .'',
+ );
- $table_elem = $dom->getElementsByTagName('body')->item(0)->firstChild;
- $table = \simplexml_import_dom($table_elem);
- if ($errors = \libxml_get_errors()) {
- $this->throwInvalidHTMLException($html, $errors);
- }
- } finally {
- \libxml_clear_errors();
- \libxml_use_internal_errors($old_use_internal_errors);
+ $table_elem = $dom->getElementsByTagName('body')->item(0)->firstChild;
+ if (!$table_elem instanceof \DOMElement) {
+ throw new \InvalidArgumentException(
+ sprintf("Expected html root element but got %s\n\n===HTML===\n%s", get_debug_type($table_elem), $html),
+ );
+ }
+ $table = \simplexml_import_dom($table_elem);
+ if ($errors = $html5->getErrors()) {
+ throw new \InvalidArgumentException(
+ sprintf(
+ "Invalid HTML:\n%s\n\n===HTML===\n%s",
+ implode("\n", array_map(fn($err) => ' - '.$err, $errors)),
+ $html,
+ ),
+ );
}
return $table;
}
/**
- * @param string $html
- * @param LibXMLError[] $errors
+ * @deprecated no longer called by the library
*/
protected function throwInvalidHTMLException($html, $errors)
{
diff --git a/test/TableParser/HTML/HTMLStringTableParserTest.php b/test/TableParser/HTML/HTMLStringTableParserTest.php
index 2303175..22fde44 100644
--- a/test/TableParser/HTML/HTMLStringTableParserTest.php
+++ b/test/TableParser/HTML/HTMLStringTableParserTest.php
@@ -35,37 +35,16 @@ public function test_it_throws_when_parsing_non_or_empty_string($value)
}
/**
- * @testWith ["random", false]
- * ["random", true]
- * ["
", false]
- * ["", false]
+ * @testWith ["random text", "Expected html root element but got DOMText"]
+ * ["<17sd>illegal tag name17sd>
", "Invalid HTML:\n - Line 1, Col 113: Illegal tag opening"]
+ * ["
", "Expected a but got div"]
*/
- public function test_it_always_restores_state_of_libxml_error_handling(
- $html,
- $use_errors_before
- ) {
- $old_setting = \libxml_use_internal_errors($use_errors_before);
- try {
- $this->newSubject()->parse($html);
- } catch (\Exception $e) { /* ignore */
- }
- $errors_after = \libxml_get_errors();
- $use_errors_after = \libxml_use_internal_errors($old_setting);
-
- $this->assertSame([], $errors_after, 'Should clear libxml errors');
- $this->assertEquals(
- $use_errors_before,
- $use_errors_after,
- 'Should restore libxml_use_internal_errors'
- );
- }
-
- public function test_it_throws_when_parsing_html_that_is_not_a_table()
+ public function test_it_throws_when_parsing_html_that_is_not_a_table_or_not_valid(string $input, string $expect_msg)
{
$this->expectException(\InvalidArgumentException::class);
- $this->expectExceptionMessage('Expected a ');
+ $this->expectExceptionMessage($expect_msg);
- $this->newSubject()->parse('
');
+ $this->newSubject()->parse($input);
}
public function test_it_throws_when_parsing_table_without_thead()
@@ -224,7 +203,7 @@ public function provider_valid_html_tables()
''.
'Header Date '.
''.
- 'Cell1 30 Aug 2016 '.
+ 'Cell1 30 Aug 2016 '.
'
',
[
['Header', 'Date'],