diff --git a/README.md b/README.md old mode 100755 new mode 100644 index 82524ed..42eaf18 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# eXeLearning .elp Parser for PHP +# eXeLearning `.elp` / `.elpx` Parser for PHP -Simple, fast, and extension-free parser for eXeLearning project files +Simple parser for eXeLearning project files.

Features | @@ -16,16 +16,16 @@ Simple, fast, and extension-free parser for eXeLearning project files ## Features -**ELP Parser** provides a simple and intuitive API to parse eXeLearning project files (.elp): +`ELPParser` supports the two eXeLearning project families described in the upstream format docs: -- Parse both version 2 and version 3 .elp files -- Extract text content from XML -- Detect file version -- Extract entire .elp file contents -- Retrieve full metadata tree -- No external extensions required -- Lightweight and easy to use (less than 4 KB footprint library) -- Compatible with PHP 8.0 to PHP 8.5 +- Legacy `.elp` projects from eXeLearning 2.x based on `contentv3.xml` +- Modern `.elpx` projects from eXeLearning 3+ based on `content.xml` and ODE 2.0 +- Modern `.elp` exports that also use `content.xml` +- Detection of eXeLearning major version when the package exposes it +- Heuristic detection of likely v4-style `.elpx` packages using root `content.dtd` +- Extraction of normalized metadata, strings, pages, idevices and asset references +- Safe archive extraction with ZIP path traversal checks +- JSON serialization support For more information, visit the [documentation](https://exelearning.github.io/elp-parser/). @@ -33,12 +33,11 @@ For more information, visit the [documentation](https://exelearning.github.io/el - PHP 8.0+ - Composer -- zip extension +- `zip` extension +- `simplexml` extension ## Installation -Install the package via Composer: - ```bash composer require exelearning/elp-parser ``` @@ -51,78 +50,108 @@ composer require exelearning/elp-parser use Exelearning\ELPParser; try { - // Parse an .elp file - $parser = ELPParser::fromFile('/path/to/your/project.elp'); - - // Get the file version - $version = $parser->getVersion(); // Returns 2 or 3 - - // Get metadata fields + $parser = ELPParser::fromFile('/path/to/project.elpx'); + + $version = $parser->getVersion(); $title = $parser->getTitle(); $description = $parser->getDescription(); $author = $parser->getAuthor(); $license = $parser->getLicense(); $language = $parser->getLanguage(); - // Get all extracted strings - $strings = $parser->getStrings(); - - // Print extracted strings - foreach ($strings as $string) { + foreach ($parser->getStrings() as $string) { echo $string . "\n"; } } catch (Exception $e) { - echo "Error parsing ELP file: " . $e->getMessage(); + echo "Error parsing project: " . $e->getMessage(); } ``` -### File Extraction +### Format Inspection ```php use Exelearning\ELPParser; -try { - $parser = ELPParser::fromFile('/path/to/your/project.elp'); - - // Extract entire .elp contents to a directory - $parser->extract('/path/to/destination/folder'); -} catch (Exception $e) { - echo "Error extracting ELP file: " . $e->getMessage(); -} +$parser = ELPParser::fromFile('/path/to/project.elpx'); + +echo $parser->getSourceExtension(); // elp | elpx +echo $parser->getContentFormat(); // legacy-contentv3 | ode-content +echo $parser->getContentFile(); // contentv3.xml | content.xml +echo $parser->getContentSchemaVersion(); // 2.0 for modern ODE packages +echo $parser->getExeVersion(); // raw upstream version string when present +echo $parser->getResourceLayout(); // none | content-resources | legacy-temp-paths | mixed +var_dump($parser->hasRootDtd()); // true when content.dtd exists at archive root +var_dump($parser->isLikelyVersion4Package()); ``` -### Advanced Usage +### Pages and Assets ```php -// Convert parsed data to array -$data = $parser->toArray(); +$pages = $parser->getPages(); +$visiblePages = $parser->getVisiblePages(); +$blocks = $parser->getBlocks(); +$idevices = $parser->getIdevices(); +$pageTexts = $parser->getPageTexts(); +$visiblePageTexts = $parser->getVisiblePageTexts(); +$firstPageText = $parser->getPageTextById($pages[0]['id']); +$teacherOnlyIdevices = $parser->getTeacherOnlyIdevices(); +$hiddenIdevices = $parser->getHiddenIdevices(); +$assets = $parser->getAssets(); +$images = $parser->getImages(); +$audioFiles = $parser->getAudioFiles(); +$videoFiles = $parser->getVideoFiles(); +$documents = $parser->getDocuments(); +$assetsDetailed = $parser->getAssetsDetailed(); +$orphanAssets = $parser->getOrphanAssets(); +$metadata = $parser->getMetadata(); +``` + +In modern `content.xml` packages, assets usually live under paths such as `content/resources/...`. +Older projects and some transitional exports may still reference legacy layouts such as `files/tmp/...`. +The parser exposes this through `getResourceLayout()`. -// JSON serialization -$jsonData = json_encode($parser); +### Export JSON -// Export directly to a JSON file -$parser->exportJson('path/to/output.json'); +```php +$json = $parser->exportJson(); +$parser->exportJson('/path/to/output.json'); +``` -// Retrieve full metadata as array -$meta = $parser->getMetadata(); +### Extract Project Files + +```php +$parser->extract('/path/to/destination'); ``` -## Error Handling +## Version Compatibility + +The parser distinguishes between project format and eXeLearning version: -The parser includes robust error handling: -- Detects invalid .elp files -- Throws exceptions for parsing errors -- Supports both version 2 and 3 file formats +- `getContentFormat()` tells you whether the package uses legacy `contentv3.xml` or modern `content.xml` +- `getVersion()` reports the detected eXeLearning major version +- In practice this means: + - eXeLearning 2.x legacy `.elp` => version `2` + - modern ODE-based `.elp` => usually version `3` + - `.elpx` packages with root `content.dtd` are treated as likely v4-style packages and currently report version `4` + - otherwise modern ODE-based packages default to version `3` -## Performance +This distinction matters because some projects created with newer eXeLearning builds still identify themselves internally with `exe_version=3.0`, so strict `v4` detection is not always possible from the package alone. +For that reason, the library combines explicit metadata with format heuristics: -- Lightweight implementation -- Minimal memory footprint -- Fast XML parsing using native PHP extensions +- `.elpx` +- `content.xml` +- root `content.dtd` +- optionally `content/resources/...` as the modern resource layout + +## Error Handling -## Contributing +The parser throws exceptions for: -Contributions are welcome! Please submit pull requests or open issues on our GitHub repository. +- Missing files +- Invalid ZIP archives +- Unsupported project layouts +- XML parsing failures +- Unsafe archive entries during extraction ## License diff --git a/docs/api.md b/docs/api.md index 003de18..ba641b0 100644 --- a/docs/api.md +++ b/docs/api.md @@ -1,140 +1,212 @@ # API Reference -## ELPParser Class +## `Exelearning\ELPParser` -ELPParser class for parsing .elp (eXeLearning) project files. This class provides functionality to parse .elp files, which are ZIP archives containing XML content for eXeLearning projects. It supports both version 2 and 3 formats. +Parser for eXeLearning `.elp` and `.elpx` project files. -**Namespace:** `Exelearning` +Supported project families: -**Implements:** `JsonSerializable` +- Legacy `.elp` packages using `contentv3.xml` +- Modern `.elp` / `.elpx` packages using `content.xml` ### Constructor #### `__construct(string $filePath)` -Create a new ELPParser instance. - -- **Parameters:** - - `$filePath` (string): Path to the .elp file -- **Throws:** `Exception` if file cannot be opened or is invalid -- **Return:** void +Create a new parser instance from a project file path. ### Static Methods #### `fromFile(string $filePath): ELPParser` -Static method to create an ELPParser from a file path. - -- **Parameters:** - - `$filePath` (string): Path to the .elp file -- **Throws:** `Exception` if file cannot be opened or is invalid -- **Returns:** `ELPParser` +Create a parser instance from a file path. -### Public Methods +### Core Metadata #### `getVersion(): int` -Get the detected ELP file version. +Return the detected eXeLearning major version. + +#### `getTitle(): string` + +Return the project title. + +#### `getDescription(): string` + +Return the project description. + +#### `getAuthor(): string` + +Return the project author. + +#### `getLicense(): string` + +Return the project license. + +#### `getLanguage(): string` + +Return the project language. + +#### `getLearningResourceType(): string` + +Return the learning resource type when present. + +### Format Introspection + +#### `getSourceExtension(): string` + +Return the source file extension, usually `elp` or `elpx`. + +#### `getContentFormat(): string` + +Return the detected internal project format: + +- `legacy-contentv3` +- `ode-content` + +#### `getContentFile(): string` + +Return the XML entry used by the package: + +- `contentv3.xml` +- `content.xml` + +#### `getContentSchemaVersion(): ?string` + +Return the modern ODE schema version when available. + +#### `getExeVersion(): ?string` + +Return the raw upstream eXeLearning version string when available. + +#### `getResourceLayout(): string` + +Return the detected resource layout family: -- **Returns:** int - ELP file version (2 or 3) +- `content-resources` +- `legacy-temp-paths` +- `mixed` +- `none` + +#### `hasRootDtd(): bool` + +Return `true` when the archive contains `content.dtd` at the root. + +#### `isLikelyVersion4Package(): bool` + +Return `true` when the package matches the current v4 heuristic: + +- `.elpx` +- modern `content.xml` / ODE package +- root `content.dtd` + +#### `isLegacyFormat(): bool` + +Return `true` for legacy `contentv3.xml` projects. + +### Parsed Content #### `getStrings(): array` -Get all extracted strings. +Return extracted strings from the project. -- **Returns:** array - List of extracted strings +#### `getPages(): array` -#### `getTitle(): string` +Return parsed page information, including block and idevice data when available. -Get the title of the ELP content. +#### `getVisiblePages(): array` -- **Returns:** string +Return only visible pages. -#### `getDescription(): string` +#### `getBlocks(): array` -Get the description of the ELP content. +Return all parsed blocks across all pages. -- **Returns:** string +#### `getIdevices(): array` -#### `getAuthor(): string` +Return all parsed idevices across all pages. -Get the author of the ELP content. +#### `getPageTexts(): array` -- **Returns:** string +Return grouped text content for each page, including the per-idevice text list and a concatenated page text. -#### `getLicense(): string` +#### `getVisiblePageTexts(): array` -Get the license of the ELP content. +Return grouped text content for visible pages only. -- **Returns:** string +#### `getPageTextById(string $pageId): ?array` -#### `getLanguage(): string` +Return grouped text content for a single page, or `null` if the page ID does not exist. -Get the language of the ELP content. +#### `getTeacherOnlyIdevices(): array` -- **Returns:** string +Return idevices marked as teacher-only. -#### `getLearningResourceType(): string` +#### `getHiddenIdevices(): array` -Get the learning resource type. +Return idevices whose visibility flag is false. -- **Returns:** string +#### `getAssets(): array` -#### `toArray(): array` +Return referenced asset paths detected in the parsed content. -Convert parser data to an array. +#### `getAssetsDetailed(): array` -- **Returns:** array - Array containing: - - version: int - - title: string - - description: string - - author: string - - license: string - - language: string - - learningResourceType: string - - strings: array +Return detailed asset records including path, type, extension, page origins, idevice origins and occurrence count. -#### `jsonSerialize(): mixed` +#### `getImages(): array` -JSON serialization method implementing JsonSerializable interface. +Return image asset paths. -- **Returns:** mixed - Data to be JSON serialized +#### `getAudioFiles(): array` -#### `exportJson(?string $destinationPath = null): string` +Return audio asset paths. -Export parsed data to JSON. If a destination path is provided, the JSON will be written to that file. +#### `getVideoFiles(): array` -- **Parameters:** - - `$destinationPath` (string|null): Optional file path for the JSON output -- **Throws:** `Exception` if the JSON cannot be written -- **Returns:** string - JSON representation of the parsed data +Return video asset paths. + +#### `getDocuments(): array` + +Return document asset paths. + +#### `getOrphanAssets(): array` + +Return asset files present in the ZIP archive but not referenced by the parsed content. + +#### `getArchiveEntries(): array` + +Return the archive entry names inside the package. #### `getMetadata(): array` -Return a detailed metadata array containing Package, Dublin Core, LOM and LOM-ES -information together with the page tree. +Return normalized metadata for the project. + +### Serialization + +#### `toArray(): array` + +Return a compact array summary with: + +- `version` +- `title` +- `description` +- `author` +- `license` +- `language` +- `learningResourceType` +- `strings` + +#### `jsonSerialize(): mixed` + +Return the value used for JSON serialization. + +#### `exportJson(?string $destinationPath = null): string` + +Return the JSON representation of the compact summary and optionally write it to disk. -- **Throws:** `Exception` if the XML cannot be parsed -- **Returns:** array - Metadata structure +### Extraction #### `extract(string $destinationPath): void` -Extract contents of an ELP file to a specified directory. - -- **Parameters:** - - `$destinationPath` (string): Directory to extract contents to -- **Throws:** `Exception` if extraction fails -- **Returns:** void - -### Protected Properties - -- `$filePath` (string): Path to the .elp file -- `$version` (int): ELP file version (2 or 3) -- `$content` (array): Extracted content and metadata -- `$strings` (array): Raw extracted strings -- `$title` (string): Title of the ELP content -- `$description` (string): Description of the ELP content -- `$author` (string): Author of the ELP content -- `$license` (string): License of the ELP content -- `$language` (string): Language of the ELP content -- `$learningResourceType` (string): Learning resource type +Extract the package contents to a directory. Extraction is validated entry by entry to block unsafe ZIP paths. diff --git a/docs/index.md b/docs/index.md index 8355f28..aecfdc6 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,177 +1,116 @@ # ELP Parser Documentation -ELP Parser is a PHP library designed to parse and extract content from ELP (eXe Learning Package) files. It provides a simple and intuitive interface to access metadata and content from ELP packages. +ELP Parser is a PHP library for reading eXeLearning project packages in both legacy and modern formats. + +## Supported Formats + +The library follows the format split documented in the upstream eXeLearning project: + +- Legacy `.elp` packages from eXeLearning 2.x use `contentv3.xml` +- Modern `.elpx` packages from eXeLearning 3+ use `content.xml` with the ODE 2.0 model +- Some modern `.elp` packages also use `content.xml`, so the parser detects the internal format instead of relying on the extension alone ## Features -- Parse ELP files (both version 2 and 3 supported) -- Extract metadata like title, description, author, etc. -- Retrieve a complete metadata tree -- Access learning resource information -- Extract package contents to a specified directory -- JSON serialization support +- Parse `.elp` and `.elpx` +- Detect legacy `contentv3.xml` vs modern `content.xml` +- Detect eXeLearning major version when exposed by the package metadata +- Detect likely v4-style `.elpx` packages using root `content.dtd` +- Read title, description, author, license, language and learning resource type +- Retrieve normalized metadata +- Enumerate pages, idevices and asset references +- Extract package contents safely +- Export parsed summaries as JSON ## Quick Example ```php getTitle(); -$author = $parser->getAuthor(); -$description = $parser->getDescription(); +require 'vendor/autoload.php'; -// Extract contents -$parser->extract('destination/path'); -``` +use Exelearning\ELPParser; ---- +$parser = ELPParser::fromFile('path/to/project.elpx'); -## Getting Started +echo $parser->getTitle() . PHP_EOL; +echo $parser->getVersion() . PHP_EOL; +echo $parser->getContentFormat() . PHP_EOL; +echo $parser->getResourceLayout() . PHP_EOL; -### Installation +foreach ($parser->getPages() as $page) { + echo $page['title'] . PHP_EOL; +} +``` -Install the ELP Parser via Composer by running the following command in your project directory: +## Installation ```bash composer require exelearning/elp-parser ``` -### Basic Usage - -Here's a simple example to get you started with ELP Parser: +## Basic Usage ```php -getTitle() . "\n"; echo "Author: " . $parser->getAuthor() . "\n"; echo "Description: " . $parser->getDescription() . "\n"; - -// Get all strings from the package -$strings = $parser->getStrings(); - -// Extract the package contents -$parser->extract('path/to/destination'); ``` -### Configuration - -No additional configuration is required. The library works out of the box once installed. - -### Next Steps - -- Explore advanced usage examples below -- Refer to the API Reference for a complete list of available methods - ---- - -## Usage Guide - -### Working with ELP Files - -#### Parsing an ELP File - -To parse an ELP file, use the following code: +## Format Inspection ```php -use Exelearning\ElpParser\ElpParser; - -$parser = ElpParser::fromFile('path/to/package.elp'); +echo $parser->getSourceExtension() . "\n"; +echo $parser->getContentFile() . "\n"; +echo $parser->getContentFormat() . "\n"; +echo $parser->getContentSchemaVersion() . "\n"; +echo $parser->getExeVersion() . "\n"; +echo $parser->getResourceLayout() . "\n"; +var_dump($parser->hasRootDtd()); +var_dump($parser->isLikelyVersion4Package()); ``` -#### Accessing Metadata - -The parser provides several methods to access package metadata: +## Pages, Strings and Assets ```php -getTitle(); -$description = $parser->getDescription(); -$author = $parser->getAuthor(); -$license = $parser->getLicense(); -$language = $parser->getLanguage(); -$resourceType = $parser->getLearningResourceType(); - -// Get the ELP version -$version = $parser->getVersion(); - -// Get all strings from the package $strings = $parser->getStrings(); +$pages = $parser->getPages(); +$visiblePages = $parser->getVisiblePages(); +$blocks = $parser->getBlocks(); +$idevices = $parser->getIdevices(); +$pageTexts = $parser->getPageTexts(); +$visiblePageTexts = $parser->getVisiblePageTexts(); +$pageText = $parser->getPageTextById($pages[0]['id']); +$teacherOnlyIdevices = $parser->getTeacherOnlyIdevices(); +$hiddenIdevices = $parser->getHiddenIdevices(); +$assets = $parser->getAssets(); +$images = $parser->getImages(); +$audioFiles = $parser->getAudioFiles(); +$videoFiles = $parser->getVideoFiles(); +$documents = $parser->getDocuments(); +$assetsDetailed = $parser->getAssetsDetailed(); +$orphanAssets = $parser->getOrphanAssets(); +$metadata = $parser->getMetadata(); ``` -#### Extracting Contents +In modern ODE-based projects, referenced media commonly appears under `content/resources/...`. +Legacy packages and some older exports may still reference resource paths closer to `files/tmp/...`. +The parser classifies this through `getResourceLayout()`. -To extract the contents of an ELP package: +## Extraction ```php $parser->extract('path/to/destination'); ``` -#### Converting to Array or JSON - -The parser implements `JsonSerializable` and provides methods for data conversion: - -```php -toArray(); - -// Get JSON representation -$json = json_encode($parser); -``` - -#### Exporting to a JSON file - -You can directly export the parsed data to a JSON file using `exportJson()`: - -```php -$parser->exportJson('path/to/output.json'); - -// Obtain a metadata tree -$meta = $parser->getMetadata(); -``` - -### Version Compatibility - -The library supports both version 2 and version 3 of ELP files. The parsing process automatically detects the version and handles the content appropriately. - -### Error Handling - -It's recommended to wrap operations in try-catch blocks to handle potential exceptions: - -```php -getMessage(); -} -``` - ---- - -## Requirements - -- PHP 8.0 or higher -- SimpleXML extension -- ZipArchive extension - ---- - -## Summary +## Version Notes -With ELP Parser, you can efficiently parse, extract, and manipulate the contents of ELP files. The library's flexibility and ease of use make it an excellent choice for working with eXe Learning Packages. Explore more advanced usage and detailed API documentation to harness its full potential. +- `getVersion()` reports the detected eXeLearning major version +- `getContentFormat()` reports the internal package model +- This lets the library handle eXeLearning 2.x and modern ODE-based packages through a single API +- Newer builds may still expose `exe_version=3.0`, so exact major detection is not always possible from metadata alone +- The library therefore uses a heuristic for likely v4-style packages: `.elpx` + modern ODE layout + root `content.dtd` diff --git a/src/ElpParser.php b/src/ElpParser.php index 4280279..09149b2 100644 --- a/src/ElpParser.php +++ b/src/ElpParser.php @@ -14,15 +14,17 @@ namespace Exelearning; -use ZipArchive; -use SimpleXMLElement; use Exception; +use JsonSerializable; +use SimpleXMLElement; +use ZipArchive; /** - * ELPParser class for parsing .elp (eXeLearning) project files + * Parser for eXeLearning project files. * - * This class provides functionality to parse .elp files, which are ZIP archives - * containing XML content for eXeLearning projects. It supports both version 2 and 3 formats. + * Supported project formats: + * - Legacy .elp packages based on contentv3.xml from eXeLearning 2.x + * - Modern .elp/.elpx packages based on content.xml (ODE 2.0) from eXeLearning 3+ * * @category Parser * @package Exelearning @@ -30,292 +32,1290 @@ * @license MIT https://opensource.org/licenses/MIT * @link https://github.com/exelearning/elp-parser */ -class ELPParser implements \JsonSerializable +class ELPParser implements JsonSerializable { /** - * Path to the .elp file + * Path to the project file. * * @var string */ protected string $filePath; /** - * ELP file version (2 or 3) + * Detected eXeLearning major version. + * + * Legacy projects are reported as version 2. Modern ODE-based projects + * are treated as version 3+ and default to 3 when the package does not + * expose a higher major version explicitly. * * @var int */ - protected int $version; + protected int $version = 2; + + /** + * Source file extension, usually elp or elpx. + * + * @var string + */ + protected string $sourceExtension = ''; + + /** + * Project content format. + * + * Possible values: + * - legacy-contentv3 + * - ode-content + * + * @var string + */ + protected string $contentFormat = ''; + + /** + * XML entry name inside the archive. + * + * @var string + */ + protected string $contentFile = ''; + + /** + * ODE schema version when available. + * + * @var string|null + */ + protected ?string $contentSchemaVersion = null; + + /** + * Raw eXeLearning version string when available. + * + * @var string|null + */ + protected ?string $exeVersion = null; + + /** + * Archive file list. + * + * @var array + */ + protected array $archiveEntries = []; + + /** + * Whether the package includes a root content.dtd file. + * + * @var bool + */ + protected bool $hasRootDtd = false; + + /** + * Detected resource layout family. + * + * Possible values: + * - content-resources + * - legacy-temp-paths + * - mixed + * - none + * + * @var string + */ + protected string $resourceLayout = 'none'; + + /** + * Parsed legacy dictionary data. + * + * @var array + */ + protected array $legacyData = []; /** - * Extracted content and metadata + * Parsed modern ODE properties. * * @var array */ - protected array $content = []; + protected array $odeProperties = []; /** - * Raw extracted strings + * Parsed modern ODE resources. + * + * @var array + */ + protected array $odeResources = []; + + /** + * Raw extracted strings. * * @var array */ protected array $strings = []; /** - * Title of the ELP content + * Parsed page information. + * + * @var array + */ + protected array $pages = []; + + /** + * Referenced assets found in content. + * + * @var array + */ + protected array $assets = []; + + /** + * Detailed referenced asset information. + * + * @var array + */ + protected array $assetsDetailed = []; + + /** + * Title of the project. * * @var string */ protected string $title = ''; /** - * Description of the ELP content + * Description of the project. * * @var string */ protected string $description = ''; /** - * Author of the ELP content + * Author of the project. * * @var string */ protected string $author = ''; /** - * License of the ELP content + * License of the project. * * @var string */ protected string $license = ''; /** - * Language of the ELP content + * Language of the project. * * @var string */ protected string $language = ''; /** - * Learning resource type + * Learning resource type. * * @var string */ protected string $learningResourceType = ''; /** - * Create a new ELPParser instance + * Create a new parser instance. + * + * @param string $filePath Path to the project file + * + * @throws Exception If file cannot be opened or is invalid + * @return void + */ + public function __construct(string $filePath) + { + $this->filePath = $filePath; + $this->sourceExtension = strtolower(pathinfo($filePath, PATHINFO_EXTENSION)); + $this->parse(); + } + + /** + * Create a parser from a file path. * - * @param string $filePath Path to the .elp file + * @param string $filePath Path to the project file * * @throws Exception If file cannot be opened or is invalid + * @return self + */ + public static function fromFile(string $filePath): self + { + return new self($filePath); + } + + /** + * Detect the project format and parse its contents. + * + * @throws Exception If file parsing fails + * @return void + */ + protected function parse(): void + { + if (!file_exists($this->filePath)) { + throw new Exception('File does not exist.'); + } + + $zip = new ZipArchive(); + if ($zip->open($this->filePath) !== true) { + throw new Exception('The file is not a valid ZIP file.'); + } + + $this->archiveEntries = $this->readArchiveEntries($zip); + $this->hasRootDtd = in_array('content.dtd', $this->archiveEntries, true); + $this->resourceLayout = $this->detectResourceLayoutFromArchiveEntries($this->archiveEntries); + + if ($zip->locateName('contentv3.xml') !== false) { + $this->contentFormat = 'legacy-contentv3'; + $this->contentFile = 'contentv3.xml'; + $this->version = 2; + } elseif ($zip->locateName('content.xml') !== false) { + $this->contentFormat = 'ode-content'; + $this->contentFile = 'content.xml'; + } else { + $zip->close(); + throw new Exception('Invalid ELP file: No content XML found.'); + } + + $xmlContent = $zip->getFromName($this->contentFile); + $zip->close(); + + if ($xmlContent === false) { + throw new Exception('Failed to read XML content.'); + } + + $xml = $this->loadXml($xmlContent); + + if ($this->contentFormat === 'legacy-contentv3') { + $this->parseLegacyXml($xml); + return; + } + + $this->parseModernXml($xml); + } + + /** + * Parse a legacy contentv3.xml project. + * + * @param SimpleXMLElement $xml Parsed XML document + * + * @return void + */ + protected function parseLegacyXml(SimpleXMLElement $xml): void + { + $data = $this->parseElement($xml); + $this->legacyData = is_array($data) ? $data : []; + + $this->title = $this->legacyData['_title'] ?? ''; + $this->description = $this->legacyData['_description'] ?? ''; + $this->author = $this->legacyData['_author'] ?? ''; + $this->license = $this->legacyData['license'] ?? ''; + $this->language = $this->legacyData['_lang'] ?? ''; + $this->learningResourceType = $this->legacyData['_learningResourceType'] ?? ''; + + $this->strings = $this->recursiveStringExtraction($xml); + + if (isset($this->legacyData['_root']) && is_array($this->legacyData['_root'])) { + $pages = []; + $this->collectLegacyPages($this->legacyData['_root'], 0, $pages); + $this->pages = $pages; + $this->assetsDetailed = $this->extractDetailedAssetsFromPages($pages); + $this->assets = $this->flattenAssetPaths($this->assetsDetailed); + } + } + + /** + * Parse a modern ODE project based on content.xml. + * + * @param SimpleXMLElement $xml Parsed XML document + * * @return void */ - public function __construct(string $filePath) + protected function parseModernXml(SimpleXMLElement $xml): void + { + $this->contentSchemaVersion = isset($xml['version']) ? (string) $xml['version'] : null; + $this->odeResources = $this->readModernKeyValueNodes($this->xpath($xml, './x:odeResources/x:odeResource')); + $this->odeProperties = $this->readModernKeyValueNodes($this->xpath($xml, './x:odeProperties/x:odeProperty')); + + $this->title = $this->odeProperties['pp_title'] ?? ''; + $this->description = $this->odeProperties['pp_description'] ?? ''; + $this->author = $this->odeProperties['pp_author'] ?? ''; + $this->license = $this->odeProperties['pp_license'] ?? ($this->odeProperties['license'] ?? ''); + $this->language = $this->odeProperties['pp_lang'] ?? ($this->odeProperties['lom_general_language'] ?? ''); + $this->learningResourceType = $this->odeProperties['pp_learningResourceType'] ?? ''; + + $this->exeVersion = $this->odeResources['exe_version'] + ?? ($this->odeProperties['pp_exelearning_version'] ?? null); + $this->version = $this->detectModernVersion($this->exeVersion); + + $this->pages = $this->collectModernPages($xml); + $this->strings = $this->collectModernStrings($this->pages); + $this->assetsDetailed = $this->extractDetailedAssetsFromPages($this->pages); + $this->assets = $this->flattenAssetPaths($this->assetsDetailed); + } + + /** + * Load XML content with hardened libxml settings. + * + * @param string $xmlContent XML content + * + * @throws Exception If XML parsing fails + * @return SimpleXMLElement + */ + protected function loadXml(string $xmlContent): SimpleXMLElement + { + $previous = libxml_use_internal_errors(true); + $xml = simplexml_load_string($xmlContent, SimpleXMLElement::class, LIBXML_NONET); + + if ($xml === false) { + $errors = libxml_get_errors(); + libxml_clear_errors(); + libxml_use_internal_errors($previous); + $message = isset($errors[0]) ? trim($errors[0]->message) : 'Unknown XML parsing error.'; + throw new Exception('XML Parsing error: ' . $message); + } + + libxml_clear_errors(); + libxml_use_internal_errors($previous); + + return $xml; + } + + /** + * Execute an XPath query with the default namespace mapped to x. + * + * @param SimpleXMLElement $node XML node + * @param string $path XPath expression + * + * @return array + */ + protected function xpath(SimpleXMLElement $node, string $path): array + { + $namespaces = $node->getDocNamespaces(true); + if (isset($namespaces[''])) { + $node->registerXPathNamespace('x', $namespaces['']); + } else { + $path = str_replace('x:', '', $path); + } + + $result = $node->xpath($path); + + return is_array($result) ? $result : []; + } + + /** + * Read ZIP entry names. + * + * @param ZipArchive $zip Open ZIP archive + * + * @return array + */ + protected function readArchiveEntries(ZipArchive $zip): array + { + $entries = []; + + for ($index = 0; $index < $zip->numFiles; $index++) { + $name = $zip->getNameIndex($index); + if ($name !== false) { + $entries[] = $name; + } + } + + return $entries; + } + + /** + * Convert modern ODE key/value collections into an associative array. + * + * @param array $nodes Nodes with key/value children + * + * @return array + */ + protected function readModernKeyValueNodes(array $nodes): array + { + $values = []; + + foreach ($nodes as $node) { + $key = isset($node->key) ? trim((string) $node->key) : ''; + if ($key === '') { + continue; + } + + $values[$key] = isset($node->value) ? trim((string) $node->value) : ''; + } + + return $values; + } + + /** + * Detect the eXeLearning major version for modern projects. + * + * @param string|null $exeVersion Raw version string + * + * @return int + */ + protected function detectModernVersion(?string $exeVersion): int + { + if ($exeVersion === null || $exeVersion === '') { + return $this->isLikelyVersion4Package() ? 4 : 3; + } + + if (preg_match('/(?:^|[^0-9])([3-9])(?:\.[0-9]+)?/', $exeVersion, $matches) === 1) { + $detected = (int) $matches[1]; + + if ($detected <= 3 && $this->isLikelyVersion4Package()) { + return 4; + } + + return $detected; + } + + return $this->isLikelyVersion4Package() ? 4 : 3; + } + + /** + * Detect the resource layout family from archive entries. + * + * @param array $entries ZIP entry names + * + * @return string + */ + protected function detectResourceLayoutFromArchiveEntries(array $entries): string + { + $hasContentResources = false; + $hasLegacyTempPaths = false; + + foreach ($entries as $entry) { + if (str_starts_with($entry, 'content/resources/')) { + $hasContentResources = true; + } + + if (str_starts_with($entry, 'files/tmp/')) { + $hasLegacyTempPaths = true; + } + } + + if ($hasContentResources && $hasLegacyTempPaths) { + return 'mixed'; + } + + if ($hasContentResources) { + return 'content-resources'; + } + + if ($hasLegacyTempPaths) { + return 'legacy-temp-paths'; + } + + return 'none'; + } + + /** + * Recursively extract all text strings from XML. + * + * @param SimpleXMLElement $element XML element to extract from + * + * @return array + */ + protected function recursiveStringExtraction(SimpleXMLElement $element): array + { + $strings = []; + $elementArray = (array) $element; + + foreach ($elementArray as $value) { + if (is_string($value) && trim($value) !== '') { + $strings[] = trim($value); + continue; + } + + if ($value instanceof SimpleXMLElement) { + $strings = array_merge($strings, $this->recursiveStringExtraction($value)); + continue; + } + + if (!is_array($value)) { + continue; + } + + foreach ($value as $subValue) { + if ($subValue instanceof SimpleXMLElement) { + $strings = array_merge($strings, $this->recursiveStringExtraction($subValue)); + } + } + } + + return array_values(array_unique($strings)); + } + + /** + * Recursively parse a contentv3 structure. + * + * @param SimpleXMLElement $element XML element + * + * @return mixed + */ + protected function parseElement(SimpleXMLElement $element): mixed + { + $name = $element->getName(); + + switch ($name) { + case 'unicode': + case 'string': + return (string) $element['value']; + case 'int': + return (int) $element['value']; + case 'bool': + return ((string) $element['value']) === '1'; + case 'list': + $list = []; + foreach ($element->children() as $child) { + $list[] = $this->parseElement($child); + } + return $list; + case 'dictionary': + $dict = []; + $key = null; + foreach ($element->children() as $child) { + $childName = $child->getName(); + if (($childName === 'string' || $childName === 'unicode') && (string) $child['role'] === 'key') { + $key = (string) $child['value']; + } elseif ($key !== null) { + $dict[$key] = $this->parseElement($child); + $key = null; + } + } + return $dict; + case 'instance': + return isset($element->dictionary) ? $this->parseElement($element->dictionary) : []; + case 'none': + return null; + case 'reference': + return ['ref' => (string) $element['key']]; + default: + return []; + } + } + + /** + * Build page information for legacy projects. + * + * @param array $node Node information + * @param int $level Current depth level + * @param array $pages Accumulated pages + * + * @return void + */ + protected function collectLegacyPages(array $node, int $level, array &$pages): void + { + $title = $node['_title'] ?? ''; + $filename = $level === 0 ? 'index.html' : $this->slug($title) . '.html'; + + $idevices = []; + if (isset($node['idevices']) && is_array($node['idevices'])) { + foreach ($node['idevices'] as $idevice) { + $html = ''; + if (isset($idevice['fields']) && is_array($idevice['fields'])) { + foreach ($idevice['fields'] as $field) { + if (isset($field['content_w_resourcePaths'])) { + $html = (string) $field['content_w_resourcePaths']; + break; + } + } + } + + $idevices[] = [ + 'id' => $idevice['_id'] ?? '', + 'type' => $idevice['_iDeviceDir'] ?? ($idevice['class_'] ?? ''), + 'title' => $idevice['_title'] ?? '', + 'text' => $this->htmlToText($html), + 'html' => $html, + 'visible' => true, + 'teacherOnly' => false, + ]; + } + } + + $pages[] = [ + 'id' => $node['_id'] ?? '', + 'parentId' => is_array($node['parent'] ?? null) ? '' : ($node['parent'] ?? ''), + 'filename' => $filename, + 'title' => $title, + 'pageName' => $title, + 'level' => $level, + 'visible' => true, + 'highlight' => false, + 'hidePageTitle' => false, + 'editableInPage' => false, + 'blocks' => [], + 'idevices' => $idevices, + ]; + + if (!isset($node['children']) || !is_array($node['children'])) { + return; + } + + foreach ($node['children'] as $child) { + if (is_array($child)) { + $this->collectLegacyPages($child, $level + 1, $pages); + } + } + } + + /** + * Build page information for ODE projects. + * + * @param SimpleXMLElement $xml Parsed XML document + * + * @return array + */ + protected function collectModernPages(SimpleXMLElement $xml): array + { + $pages = []; + $nodes = $this->xpath($xml, './x:odeNavStructures/x:odeNavStructure'); + + foreach ($nodes as $node) { + $pageProperties = $this->readModernKeyValueNodes( + $this->xpath($node, './x:odeNavStructureProperties/x:odeNavStructureProperty') + ); + + $blocks = []; + $idevices = []; + + foreach ($this->xpath($node, './x:odePagStructures/x:odePagStructure') as $block) { + $blockProperties = $this->readModernKeyValueNodes( + $this->xpath($block, './x:odePagStructureProperties/x:odePagStructureProperty') + ); + + $components = []; + + foreach ($this->xpath($block, './x:odeComponents/x:odeComponent') as $component) { + $componentProperties = $this->readModernKeyValueNodes( + $this->xpath($component, './x:odeComponentsProperties/x:odeComponentsProperty') + ); + + $html = isset($component->htmlView) ? trim((string) $component->htmlView) : ''; + $componentData = [ + 'id' => isset($component->odeIdeviceId) ? (string) $component->odeIdeviceId : '', + 'type' => isset($component->odeIdeviceTypeName) ? (string) $component->odeIdeviceTypeName : '', + 'order' => isset($component->odeComponentsOrder) ? (int) $component->odeComponentsOrder : 0, + 'text' => $this->htmlToText($html), + 'html' => $html, + 'jsonProperties' => $this->decodeJsonProperties( + isset($component->jsonProperties) ? (string) $component->jsonProperties : '' + ), + 'visible' => ($componentProperties['visibility'] ?? 'true') !== 'false', + 'teacherOnly' => ($componentProperties['teacherOnly'] ?? 'false') === 'true', + 'identifier' => $componentProperties['identifier'] ?? '', + 'cssClass' => $componentProperties['cssClass'] ?? '', + ]; + + $components[] = $componentData; + $idevices[] = $componentData; + } + + $blocks[] = [ + 'id' => isset($block->odeBlockId) ? (string) $block->odeBlockId : '', + 'pageId' => isset($block->odePageId) ? (string) $block->odePageId : '', + 'name' => isset($block->blockName) ? (string) $block->blockName : '', + 'iconName' => isset($block->iconName) ? (string) $block->iconName : '', + 'order' => isset($block->odePagStructureOrder) ? (int) $block->odePagStructureOrder : 0, + 'visible' => ($blockProperties['visibility'] ?? 'true') !== 'false', + 'teacherOnly' => ($blockProperties['teacherOnly'] ?? 'false') === 'true', + 'allowToggle' => ($blockProperties['allowToggle'] ?? 'true') !== 'false', + 'minimized' => ($blockProperties['minimized'] ?? 'false') === 'true', + 'identifier' => $blockProperties['identifier'] ?? '', + 'cssClass' => $blockProperties['cssClass'] ?? '', + 'components' => $components, + ]; + } + + $pages[] = [ + 'id' => isset($node->odePageId) ? (string) $node->odePageId : '', + 'parentId' => isset($node->odeParentPageId) ? (string) $node->odeParentPageId : '', + 'title' => $pageProperties['titlePage'] ?? ((string) ($node->pageName ?? '')), + 'pageName' => isset($node->pageName) ? (string) $node->pageName : '', + 'nodeTitle' => $pageProperties['titleNode'] ?? '', + 'description' => $pageProperties['description'] ?? '', + 'order' => isset($node->odeNavStructureOrder) ? (int) $node->odeNavStructureOrder : 0, + 'visible' => ($pageProperties['visibility'] ?? 'true') !== 'false', + 'highlight' => ($pageProperties['highlight'] ?? 'false') === 'true', + 'hidePageTitle' => ($pageProperties['hidePageTitle'] ?? 'false') === 'true', + 'editableInPage' => ($pageProperties['editableInPage'] ?? 'false') === 'true', + 'titleHtml' => $pageProperties['titleHtml'] ?? '', + 'blocks' => $blocks, + 'idevices' => $idevices, + ]; + } + + usort( + $pages, + static fn(array $left, array $right): int => ($left['order'] ?? 0) <=> ($right['order'] ?? 0) + ); + + return $pages; + } + + /** + * Decode JSON component properties. + * + * @param string $json Raw JSON text + * + * @return array + */ + protected function decodeJsonProperties(string $json): array + { + if ($json === '') { + return []; + } + + $decoded = json_decode($json, true); + + return is_array($decoded) ? $decoded : []; + } + + /** + * Convert HTML to plain text. + * + * @param string $html HTML fragment + * + * @return string + */ + protected function htmlToText(string $html): string + { + if ($html === '') { + return ''; + } + + $text = html_entity_decode(strip_tags($html), ENT_QUOTES | ENT_HTML5, 'UTF-8'); + $text = preg_replace('/\s+/u', ' ', $text); + + return trim((string) $text); + } + + /** + * Extract unique strings from parsed modern pages. + * + * @param array $pages Page information + * + * @return array + */ + protected function collectModernStrings(array $pages): array + { + $strings = []; + + foreach ($pages as $page) { + foreach (['title', 'pageName', 'nodeTitle', 'description'] as $field) { + if (!empty($page[$field])) { + $strings[] = trim((string) $page[$field]); + } + } + + foreach ($page['blocks'] as $block) { + if (!empty($block['name'])) { + $strings[] = trim((string) $block['name']); + } + + foreach ($block['components'] as $component) { + if (!empty($component['text'])) { + $strings[] = trim((string) $component['text']); + } + } + } + } + + return array_values(array_unique(array_filter($strings, static fn($value): bool => $value !== ''))); + } + + /** + * Extract referenced asset paths from page HTML. + * + * @param array $pages Page information + * + * @return array + */ + protected function extractDetailedAssetsFromPages(array $pages): array + { + $assets = []; + + foreach ($pages as $page) { + foreach ($page['idevices'] as $idevice) { + if (empty($idevice['html'])) { + continue; + } + + preg_match_all( + '/(?:\{\{context_path\}\}\/)?([A-Za-z0-9_\/.\-]+\.(?:png|jpe?g|gif|svg|webp|bmp|mp3|wav|ogg|m4a|mp4|webm|ogv|pdf|docx?|xlsx?|pptx?|odt|ods|odp|zip))/i', + $idevice['html'], + $matches + ); + + foreach ($matches[1] as $match) { + $path = ltrim($match, '/'); + $assets[$path] ??= [ + 'path' => $path, + 'type' => $this->detectAssetType($path), + 'extension' => strtolower((string) pathinfo($path, PATHINFO_EXTENSION)), + 'pages' => [], + 'idevices' => [], + 'occurrences' => 0, + ]; + + $assets[$path]['pages'][$page['id'] ?: $page['title']] = [ + 'id' => $page['id'] ?? '', + 'title' => $page['title'] ?? '', + ]; + $assets[$path]['idevices'][$idevice['id'] ?: ($page['id'] . ':' . $idevice['type'])] = [ + 'id' => $idevice['id'] ?? '', + 'type' => $idevice['type'] ?? '', + 'pageId' => $page['id'] ?? '', + 'pageTitle' => $page['title'] ?? '', + ]; + $assets[$path]['occurrences']++; + } + } + } + + foreach ($assets as &$asset) { + $asset['pages'] = array_values($asset['pages']); + $asset['idevices'] = array_values($asset['idevices']); + } + unset($asset); + + ksort($assets); + + return array_values($assets); + } + + /** + * Flatten detailed assets to a sorted path list. + * + * @param array $assetsDetailed Detailed assets + * + * @return array + */ + protected function flattenAssetPaths(array $assetsDetailed): array + { + $paths = array_map(static fn(array $asset): string => $asset['path'], $assetsDetailed); + sort($paths); + + return $paths; + } + + /** + * Detect the logical asset type from a file path. + * + * @param string $path Asset path + * + * @return string + */ + protected function detectAssetType(string $path): string + { + $extension = strtolower((string) pathinfo($path, PATHINFO_EXTENSION)); + + return match ($extension) { + 'png', 'jpg', 'jpeg', 'gif', 'svg', 'webp', 'bmp' => 'image', + 'mp3', 'wav', 'ogg', 'm4a' => 'audio', + 'mp4', 'webm', 'ogv' => 'video', + 'pdf', 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx', 'odt', 'ods', 'odp' => 'document', + 'zip' => 'archive', + default => 'other', + }; + } + + /** + * Get the detected eXeLearning major version. + * + * @return int + */ + public function getVersion(): int + { + return $this->version; + } + + /** + * Get the source project extension. + * + * @return string + */ + public function getSourceExtension(): string + { + return $this->sourceExtension; + } + + /** + * Get the detected content format identifier. + * + * @return string + */ + public function getContentFormat(): string + { + return $this->contentFormat; + } + + /** + * Get the XML entry name used by the package. + * + * @return string + */ + public function getContentFile(): string + { + return $this->contentFile; + } + + /** + * Get the ODE schema version when available. + * + * @return string|null + */ + public function getContentSchemaVersion(): ?string + { + return $this->contentSchemaVersion; + } + + /** + * Get the raw eXeLearning version string when available. + * + * @return string|null + */ + public function getExeVersion(): ?string + { + return $this->exeVersion; + } + + /** + * Determine whether the project uses the legacy contentv3 format. + * + * @return bool + */ + public function isLegacyFormat(): bool + { + return $this->contentFormat === 'legacy-contentv3'; + } + + /** + * Get all extracted strings. + * + * @return array + */ + public function getStrings(): array + { + return $this->strings; + } + + /** + * Get parsed page information. + * + * @return array + */ + public function getPages(): array + { + return $this->pages; + } + + /** + * Get only visible pages. + * + * @return array + */ + public function getVisiblePages(): array + { + return array_values( + array_filter( + $this->pages, + static fn(array $page): bool => ($page['visible'] ?? true) === true + ) + ); + } + + /** + * Get all blocks across all pages. + * + * @return array + */ + public function getBlocks(): array + { + $blocks = []; + + foreach ($this->pages as $page) { + foreach ($page['blocks'] ?? [] as $block) { + $blocks[] = $block + [ + 'pageTitle' => $page['title'] ?? '', + ]; + } + } + + return $blocks; + } + + /** + * Get all idevices across all pages. + * + * @return array + */ + public function getIdevices(): array + { + $idevices = []; + + foreach ($this->pages as $page) { + foreach ($page['idevices'] ?? [] as $idevice) { + $idevices[] = $idevice + [ + 'pageId' => $page['id'] ?? '', + 'pageTitle' => $page['title'] ?? '', + ]; + } + } + + return $idevices; + } + + /** + * Get grouped text content for each page. + * + * @return array + */ + public function getPageTexts(): array + { + $pageTexts = []; + + foreach ($this->pages as $page) { + $texts = []; + + foreach ($page['idevices'] ?? [] as $idevice) { + $text = trim((string) ($idevice['text'] ?? '')); + if ($text !== '') { + $texts[] = $text; + } + } + + $pageTexts[] = [ + 'id' => $page['id'] ?? '', + 'title' => $page['title'] ?? '', + 'pageName' => $page['pageName'] ?? '', + 'visible' => $page['visible'] ?? true, + 'texts' => $texts, + 'text' => trim(implode("\n\n", $texts)), + ]; + } + + return $pageTexts; + } + + /** + * Get grouped text content for visible pages only. + * + * @return array + */ + public function getVisiblePageTexts(): array + { + return array_values( + array_filter( + $this->getPageTexts(), + static fn(array $pageText): bool => ($pageText['visible'] ?? true) === true + ) + ); + } + + /** + * Get grouped text content for a single page by its ID. + * + * @param string $pageId Page identifier + * + * @return array|null + */ + public function getPageTextById(string $pageId): ?array + { + foreach ($this->getPageTexts() as $pageText) { + if (($pageText['id'] ?? '') === $pageId) { + return $pageText; + } + } + + return null; + } + + /** + * Get idevices marked as teacher-only. + * + * @return array + */ + public function getTeacherOnlyIdevices(): array + { + return array_values( + array_filter( + $this->getIdevices(), + static fn(array $idevice): bool => ($idevice['teacherOnly'] ?? false) === true + ) + ); + } + + /** + * Get hidden idevices. + * + * @return array + */ + public function getHiddenIdevices(): array + { + return array_values( + array_filter( + $this->getIdevices(), + static fn(array $idevice): bool => ($idevice['visible'] ?? true) === false + ) + ); + } + + /** + * Get asset paths referenced by the parsed content. + * + * @return array + */ + public function getAssets(): array + { + return $this->assets; + } + + /** + * Get detailed asset information. + * + * @return array + */ + public function getAssetsDetailed(): array + { + return $this->assetsDetailed; + } + + /** + * Get image asset paths. + * + * @return array + */ + public function getImages(): array + { + return $this->filterAssetPathsByType('image'); + } + + /** + * Get audio asset paths. + * + * @return array + */ + public function getAudioFiles(): array { - $this->filePath = $filePath; - $this->parse(); + return $this->filterAssetPathsByType('audio'); } /** - * Static method to create an ELPParser from a file path - * - * @param string $filePath Path to the .elp file + * Get video asset paths. * - * @throws Exception If file cannot be opened or is invalid - * @return self + * @return array */ - public static function fromFile(string $filePath): self + public function getVideoFiles(): array { - return new self($filePath); + return $this->filterAssetPathsByType('video'); } /** - * Detect the ELP file version and parse its contents + * Get document asset paths. * - * @throws Exception If file parsing fails - * @return void + * @return array */ - protected function parse(): void + public function getDocuments(): array { - $zip = new ZipArchive(); - - if (!file_exists($this->filePath)) { - throw new Exception('File does not exist.'); - } - - // Check MIME type - $mimeType = mime_content_type($this->filePath); - if ($mimeType !== 'application/zip') { - throw new Exception('The file is not a valid ZIP file.'); - } - - $zip = new ZipArchive(); - if ($zip->open($this->filePath) !== true) { - throw new Exception('Unable to open the ZIP file.'); - } - - // Detect version - if ($zip->locateName('content.xml') !== false && $zip->locateName('index.html') !== false) { - $this->version = 3; - $contentFile = 'content.xml'; - } elseif ($zip->locateName('contentv3.xml') !== false) { - $this->version = 2; - $contentFile = 'contentv3.xml'; - } else { - $zip->close(); - throw new Exception("Invalid ELP file: No content XML found."); - } - - // Extract content - $xmlContent = $zip->getFromName($contentFile); - $zip->close(); - - if ($xmlContent === false) { - throw new Exception("Failed to read XML content"); - } - - $this->parseXML($xmlContent); + return $this->filterAssetPathsByType('document'); } - /** - * Parse the XML content and extract relevant information - * - * @param string $xmlContent XML content as a string + * Get archive assets that are present in the ZIP but not referenced in parsed content. * - * @throws Exception If XML parsing fails - * @return void + * @return array */ - protected function parseXML(string $xmlContent): void + public function getOrphanAssets(): array { - libxml_use_internal_errors(true); - $xml = simplexml_load_string($xmlContent); + $referenced = array_fill_keys($this->assets, true); + $orphans = []; - if ($xml === false) { - $errors = libxml_get_errors(); - libxml_clear_errors(); - throw new Exception("XML Parsing error: " . $errors[0]->message); - } + foreach ($this->archiveEntries as $entry) { + if (str_ends_with($entry, '/')) { + continue; + } + + $type = $this->detectAssetType($entry); + if (!in_array($type, ['image', 'audio', 'video', 'document', 'archive'], true)) { + continue; + } - if ($this->version === 2) { - $this->extractVersion2Metadata($xml); - } else if ($this->version === 3) { - $this->extractVersion3Metadata($xml); + if (!isset($referenced[$entry])) { + $orphans[] = $entry; + } } - // Extract all strings - $this->extractStrings($xml); - } + sort($orphans); - /** - * Extract strings from the XML document - * - * @param SimpleXMLElement $xml XML document - * - * @return void - */ - protected function extractStrings(SimpleXMLElement $xml): void - { - // Customize this method to extract specific strings based on your needs - $this->strings = $this->recursiveStringExtraction($xml); + return $orphans; } /** - * Recursively extract all text strings from XML + * Filter asset paths by logical type. * - * @param SimpleXMLElement $element XML element to extract from + * @param string $type Asset type * - * @return array Extracted strings + * @return array */ - protected function recursiveStringExtraction(SimpleXMLElement $element): array + protected function filterAssetPathsByType(string $type): array { - $strings = []; - - // Convert SimpleXMLElement to array to handle complex structures - $elementArray = (array)$element; + $paths = []; - foreach ($elementArray as $key => $value) { - if (is_string($value) && !empty(trim($value))) { - $strings[] = trim($value); - } elseif ($value instanceof SimpleXMLElement) { - $strings = array_merge($strings, $this->recursiveStringExtraction($value)); - } elseif (is_array($value)) { - foreach ($value as $subValue) { - if ($subValue instanceof SimpleXMLElement) { - $strings = array_merge($strings, $this->recursiveStringExtraction($subValue)); - } - } + foreach ($this->assetsDetailed as $asset) { + if (($asset['type'] ?? null) === $type) { + $paths[] = $asset['path']; } } - return $strings; + sort($paths); + + return $paths; } /** - * Get the detected ELP file version + * Get archive entry names. * - * @return int ELP file version (2 or 3) + * @return array */ - public function getVersion(): int + public function getArchiveEntries(): array { - return $this->version; + return $this->archiveEntries; } /** - * Get all extracted strings + * Return whether the package contains a root content.dtd entry. * - * @return array List of extracted strings + * @return bool */ - public function getStrings(): array + public function hasRootDtd(): bool { - return $this->strings; + return $this->hasRootDtd; } /** - * Convert parser data to an array + * Return the detected resource layout family. * - * @return array Parsed ELP file data + * @return string */ + public function getResourceLayout(): string + { + return $this->resourceLayout; + } + /** - * Extract metadata from version 3 XML format + * Heuristic detection for likely eXeLearning 4-style packages. * - * @param SimpleXMLElement $xml XML document + * The package format alone does not always expose the exact major version. + * In practice, `.elpx` plus a root `content.dtd` is a useful signal for + * newer packages even when embedded metadata still reports `3.0`. * - * @return void + * @return bool */ - protected function extractVersion3Metadata(SimpleXMLElement $xml): void - { - if (isset($xml->odeProperties)) { - foreach ($xml->odeProperties->odeProperty as $property) { - $key = (string)$property->key; - $value = (string)$property->value; - - switch ($key) { - case 'pp_title': - $this->title = $value; - break; - case 'pp_description': - $this->description = $value; - break; - case 'pp_author': - $this->author = $value; - break; - case 'license': - $this->license = $value; - break; - case 'lom_general_language': - $this->language = $value; - break; - case 'pp_learningResourceType': - $this->learningResourceType = $value; - break; - } - } - } + public function isLikelyVersion4Package(): bool + { + return $this->contentFormat === 'ode-content' + && $this->sourceExtension === 'elpx' + && $this->hasRootDtd; } /** - * Get the title + * Get the title. * * @return string */ @@ -325,7 +1325,7 @@ public function getTitle(): string } /** - * Get the description + * Get the description. * * @return string */ @@ -335,7 +1335,7 @@ public function getDescription(): string } /** - * Get the author + * Get the author. * * @return string */ @@ -345,7 +1345,7 @@ public function getAuthor(): string } /** - * Get the license + * Get the license. * * @return string */ @@ -355,7 +1355,7 @@ public function getLicense(): string } /** - * Get the language + * Get the language. * * @return string */ @@ -365,7 +1365,7 @@ public function getLanguage(): string } /** - * Get the learning resource type + * Get the learning resource type. * * @return string */ @@ -375,84 +1375,9 @@ public function getLearningResourceType(): string } /** - * Extract metadata from version 2 XML format - * - * @param SimpleXMLElement $xml XML document - * - * @return void - */ - protected function extractVersion2Metadata(SimpleXMLElement $xml): void - { - if (!isset($xml->dictionary)) { - return; - } - - $metadata = []; - $currentKey = null; - - foreach ($xml->dictionary->children() as $element) { - $elementName = $element->getName(); - - if ($elementName === 'string') { - $role = (string)$element['role']; - $value = (string)$element['value']; - - if ($role === 'key') { - $currentKey = $value; - } - } elseif ($currentKey !== null) { - // Extract the value based on the type of element - switch ($elementName) { - case 'unicode': - $metadata[$currentKey] = (string)$element['value']; - break; - case 'bool': - $metadata[$currentKey] = ((string)$element['value']) === '1'; - break; - case 'int': - $metadata[$currentKey] = (int)$element['value']; - break; - case 'list': - // Handle lists if necessary - $listValues = []; - foreach ($element->children() as $listItem) { - if ($listItem->getName() === 'unicode') { - $listValues[] = (string)$listItem['value']; - } - // Add handling for other types of elements within the list if necessary - } - $metadata[$currentKey] = $listValues; - break; - case 'dictionary': - // Handle nested dictionaries if necessary - // This may require a recursive function - // For simplicity, it can be omitted or implemented as needed - break; - // Add other cases as needed - default: - // Handle unknown types or ignore them - break; - } - - // Reset the current key after assigning the value - $currentKey = null; - } - } - - // Map the metadata to the corresponding properties - $this->title = $metadata['_title'] ?? ''; - $this->description = $metadata['_description'] ?? ''; - $this->author = $metadata['_author'] ?? ''; - $this->license = $metadata['license'] ?? ''; - $this->language = $metadata['_lang'] ?? ''; - $this->learningResourceType = $metadata['_learningResourceType'] ?? ''; - } - - - /** - * Serialization method + * Convert parser data to an array. * - * @return array Data + * @return array */ public function toArray(): array { @@ -469,9 +1394,9 @@ public function toArray(): array } /** - * JSON serialization method + * JSON serialization method. * - * @return array Data to be JSON serialized + * @return array */ public function jsonSerialize(): mixed { @@ -479,15 +1404,12 @@ public function jsonSerialize(): mixed } /** - * Export parsed data as JSON string or file - * - * If a destination path is provided, the JSON string will be written to the - * given file. The method returns the JSON representation in any case. + * Export parsed data as JSON string or file. * * @param string|null $destinationPath Optional path to save the JSON file * * @throws Exception If the file cannot be written - * @return string JSON representation of the parsed ELP data + * @return string */ public function exportJson(?string $destinationPath = null): string { @@ -497,47 +1419,39 @@ public function exportJson(?string $destinationPath = null): string throw new Exception('Failed to encode JSON: ' . json_last_error_msg()); } - if ($destinationPath !== null) { - if (file_put_contents($destinationPath, $json) === false) { - throw new Exception('Unable to write JSON file.'); - } + if ($destinationPath !== null && file_put_contents($destinationPath, $json) === false) { + throw new Exception('Unable to write JSON file.'); } return $json; } /** - * Get detailed metadata and content structure as an array - * - * This method parses the underlying XML to build a rich metadata - * representation including package information, Dublin Core data, - * LOM and LOM-ES schemas as well as a simplified page tree. + * Get detailed metadata information. * - * @throws Exception If the XML content cannot be parsed - * @return array Metadata and content information + * @return array */ public function getMetadata(): array { - $zip = new ZipArchive(); - if ($zip->open($this->filePath) !== true) { - throw new Exception('Unable to open the ZIP file.'); - } - - $contentFile = $this->version === 2 ? 'contentv3.xml' : 'content.xml'; - $xmlContent = $zip->getFromName($contentFile); - $zip->close(); - - if ($xmlContent === false) { - throw new Exception('Failed to read XML content.'); + if ($this->isLegacyFormat()) { + return [ + 'metadata' => $this->buildLegacyMetadata(), + ]; } - libxml_use_internal_errors(true); - $xml = simplexml_load_string($xmlContent); - if ($xml === false) { - throw new Exception('XML Parsing error'); - } + return [ + 'metadata' => $this->buildModernMetadata(), + ]; + } - $data = $this->parseElement($xml->dictionary); + /** + * Build normalized legacy metadata output. + * + * @return array + */ + protected function buildLegacyMetadata(): array + { + $data = $this->legacyData; $meta = [ [ @@ -548,7 +1462,7 @@ public function getMetadata(): array 'description' => [ 'general_description' => $data['_description'] ?? '', 'objectives' => $data['_objectives'] ?? '', - 'preknowledge' => $data['_preknowledge'] ?? '' + 'preknowledge' => $data['_preknowledge'] ?? '', ], 'author' => $data['_author'] ?? '', 'license' => $data['license'] ?? '', @@ -574,156 +1488,81 @@ public function getMetadata(): array 'level_3' => $data['_levelNames'][2] ?? '', ], 'advanced_options' => [ - 'custom_head' => $data['_extraHeadContent'] ?? '' - ] + 'custom_head' => $data['_extraHeadContent'] ?? '', + ], ], ], ]; - if (isset($data['dublinCore'])) { - $meta[] = [ - 'schema' => 'Dublin core', - 'content' => $data['dublinCore'] ?? [], - ]; - } - - if (isset($data['lom'])) { - $meta[] = [ - 'schema' => 'LOM v1.0', - 'content' => $data['lom'] ?? [], - ]; - } - - if (isset($data['lomEs'])) { - $meta[] = [ - 'schema' => 'LOM-ES v1.0', - 'content' => $data['lomEs'] ?? [], - ]; + foreach (['dublinCore' => 'Dublin core', 'lom' => 'LOM v1.0', 'lomEs' => 'LOM-ES v1.0'] as $key => $schema) { + if (isset($data[$key])) { + $meta[] = [ + 'schema' => $schema, + 'content' => $data[$key] ?? [], + ]; + } } - return [ - 'metadata' => $meta, - ]; - } - - /** - * Recursively parse a dictionary structure - * - * @param SimpleXMLElement $element XML element - * - * @return mixed Parsed data - */ - protected function parseElement(SimpleXMLElement $element): mixed - { - $name = $element->getName(); - - switch ($name) { - case 'unicode': - case 'string': - return (string) $element['value']; - case 'int': - return (int) $element['value']; - case 'bool': - return ((string) $element['value']) === '1'; - case 'list': - $list = []; - foreach ($element->children() as $child) { - $list[] = $this->parseElement($child); - } - return $list; - case 'dictionary': - $dict = []; - $key = null; - foreach ($element->children() as $child) { - $cname = $child->getName(); - if (($cname === 'string' || $cname === 'unicode') && (string) $child['role'] === 'key') { - $key = (string) $child['value']; - } elseif ($key !== null) { - $dict[$key] = $this->parseElement($child); - $key = null; - } - } - return $dict; - case 'instance': - return $this->parseElement($element->dictionary); - case 'none': - return null; - case 'reference': - return ['ref' => (string) $element['key']]; - default: - return null; - } + return $meta; } /** - * Collect page data recursively + * Build normalized modern metadata output. * - * @param array $node Node information - * @param int $level Current depth level - * @param array $pages Accumulated pages - * - * @return void + * @return array */ - protected function collectPages(array $node, int $level, array &$pages): void + protected function buildModernMetadata(): array { - $title = $node['_title'] ?? ''; - $filename = $level === 0 ? 'index.html' : $this->slug($title) . '.html'; - - $idevices = []; - if (isset($node['idevices']) && is_array($node['idevices'])) { - foreach ($node['idevices'] as $idevice) { - $html = ''; - if (isset($idevice['fields']) && is_array($idevice['fields'])) { - foreach ($idevice['fields'] as $field) { - if (isset($field['content_w_resourcePaths'])) { - $html = $field['content_w_resourcePaths']; - break; - } - } - } - $idevices[] = [ - 'id' => $idevice['_id'] ?? '', - 'type' => $idevice['_iDeviceDir'] ?? ($idevice['class_'] ?? ''), - 'title' => $idevice['_title'] ?? '', - 'text' => trim(strip_tags($html)), - 'html_code' => $html, - ]; - } - } - - $pages[] = [ - 'filename' => $filename, - 'pagename' => $title, - 'level' => $level, - 'idevices' => $idevices, + return [ + [ + 'schema' => 'Package', + 'content' => [ + 'title' => $this->title, + 'lang' => $this->language, + 'description' => [ + 'general_description' => $this->description, + 'objectives' => '', + 'preknowledge' => '', + ], + 'author' => $this->author, + 'license' => $this->license, + 'learningResourceType' => $this->learningResourceType, + 'format' => [ + 'container' => $this->sourceExtension, + 'content_file' => $this->contentFile, + 'content_format' => $this->contentFormat, + 'schema_version' => $this->contentSchemaVersion ?? '', + 'resource_layout' => $this->resourceLayout, + 'has_root_dtd' => $this->hasRootDtd, + 'likely_version_4' => $this->isLikelyVersion4Package(), + ], + 'project_properties' => $this->odeProperties, + 'project_resources' => $this->odeResources, + ], + ], ]; - - if (isset($node['children']) && is_array($node['children'])) { - foreach ($node['children'] as $child) { - if (is_array($child)) { - $this->collectPages($child, $level + 1, $pages); - } - } - } } /** - * Create a filename-friendly slug from a string + * Create a filename-friendly slug from a string. * * @param string $text Input text * - * @return string Slug + * @return string */ protected function slug(string $text): string { $slug = removeAccents($text); $slug = strtolower($slug); $slug = preg_replace('/[^a-z0-9]+/', '_', $slug); - return trim($slug, '_'); + + return trim((string) $slug, '_'); } /** - * Extract contents of an ELP file to a specified directory + * Extract the project contents to a directory. + * + * Extraction is performed entry by entry to block path traversal attempts. * * @param string $destinationPath Directory to extract contents to * @@ -735,16 +1574,86 @@ public function extract(string $destinationPath): void $zip = new ZipArchive(); if ($zip->open($this->filePath) !== true) { - throw new Exception("Unable to open ELP file for extraction"); + throw new Exception('Unable to open ELP/ELPX file for extraction.'); } - if (!file_exists($destinationPath)) { - mkdir($destinationPath, 0755, true); + if (!file_exists($destinationPath) && !mkdir($destinationPath, 0755, true) && !is_dir($destinationPath)) { + $zip->close(); + throw new Exception('Unable to create destination directory.'); + } + + $destinationRoot = realpath($destinationPath); + if ($destinationRoot === false) { + $zip->close(); + throw new Exception('Unable to resolve destination directory.'); + } + + for ($index = 0; $index < $zip->numFiles; $index++) { + $entryName = $zip->getNameIndex($index); + if ($entryName === false) { + continue; + } + + if ($this->isUnsafeArchivePath($entryName)) { + $zip->close(); + throw new Exception('Unsafe ZIP entry detected: ' . $entryName); + } + + $targetPath = $destinationRoot . DIRECTORY_SEPARATOR . str_replace('/', DIRECTORY_SEPARATOR, $entryName); + + if (str_ends_with($entryName, '/')) { + if (!is_dir($targetPath) && !mkdir($targetPath, 0755, true) && !is_dir($targetPath)) { + $zip->close(); + throw new Exception('Unable to create directory during extraction.'); + } + continue; + } + + $targetDir = dirname($targetPath); + if (!is_dir($targetDir) && !mkdir($targetDir, 0755, true) && !is_dir($targetDir)) { + $zip->close(); + throw new Exception('Unable to create directory during extraction.'); + } + + $stream = $zip->getStream($entryName); + if ($stream === false) { + $zip->close(); + throw new Exception('Unable to read ZIP entry: ' . $entryName); + } + + $contents = stream_get_contents($stream); + fclose($stream); + + if ($contents === false || file_put_contents($targetPath, $contents) === false) { + $zip->close(); + throw new Exception('Unable to extract ZIP entry: ' . $entryName); + } } - $zip->extractTo($destinationPath); $zip->close(); } + + /** + * Check if a ZIP entry path is unsafe. + * + * @param string $entryName ZIP entry name + * + * @return bool + */ + protected function isUnsafeArchivePath(string $entryName): bool + { + if ($entryName === '' || str_starts_with($entryName, '/') || preg_match('/^[A-Za-z]:[\/\\\\]/', $entryName) === 1) { + return true; + } + + foreach (preg_split('#[\/\\\\]+#', $entryName) as $segment) { + if ($segment === '..') { + return true; + } + } + + return false; + } } /** @@ -881,7 +1790,7 @@ function removeAccents(string $text, string $locale = ''): string * * @param string $str Input string. * - * @return bool True if the string is valid UTF-8. + * @return bool */ function seemsUtf8(string $str): bool { diff --git a/tests/Fixtures/propiedades.elpx b/tests/Fixtures/propiedades.elpx new file mode 100644 index 0000000..02d2525 Binary files /dev/null and b/tests/Fixtures/propiedades.elpx differ diff --git a/tests/Fixtures/un-contenido-de-ejemplo-para-probar-estilos-y-catalogacion.elpx b/tests/Fixtures/un-contenido-de-ejemplo-para-probar-estilos-y-catalogacion.elpx new file mode 100644 index 0000000..fccc893 Binary files /dev/null and b/tests/Fixtures/un-contenido-de-ejemplo-para-probar-estilos-y-catalogacion.elpx differ diff --git a/tests/Unit/ElpParserTest.php b/tests/Unit/ElpParserTest.php index 5247a83..38fb9dd 100644 --- a/tests/Unit/ElpParserTest.php +++ b/tests/Unit/ElpParserTest.php @@ -275,3 +275,235 @@ function () { ->toThrow(Exception::class, 'Invalid ELP file: No content XML found.'); } ); + +it( + 'can parse a modern ELPX file and expose extended format metadata', + function () { + $elpxFile = __DIR__ . '/../Fixtures/un-contenido-de-ejemplo-para-probar-estilos-y-catalogacion.elpx'; + + expect(file_exists($elpxFile))->toBeTrue('Test ELPX file not found'); + + $parser = ELPParser::fromFile($elpxFile); + + expect($parser->getVersion())->toBe(4); + expect($parser->getSourceExtension())->toBe('elpx'); + expect($parser->getContentFormat())->toBe('ode-content'); + expect($parser->getContentFile())->toBe('content.xml'); + expect($parser->getContentSchemaVersion())->toBe('2.0'); + expect($parser->getExeVersion())->toBe('3.0'); + expect($parser->isLegacyFormat())->toBeFalse(); + expect($parser->hasRootDtd())->toBeTrue(); + expect($parser->getResourceLayout())->toBe('content-resources'); + expect($parser->isLikelyVersion4Package())->toBeTrue(); + + expect($parser->getTitle())->toBe('Un contenido de ejemplo para probar estilos y catalogación'); + expect($parser->getAuthor())->toBe('Ignacio Gros'); + expect($parser->getDescription())->toContain('Descripción general'); + expect($parser->getLanguage())->toBe('es'); + + $pages = $parser->getPages(); + expect($pages)->toBeArray(); + expect(count($pages))->toBe(14); + expect($pages[0]['title'])->toBe('Inicio'); + expect($pages[0]['idevices'])->toBeArray(); + expect($pages[0]['idevices'][0]['type'])->toBe('text'); + + $assets = $parser->getAssets(); + expect($assets)->toContain('content/resources/00.jpg'); + expect($assets)->toContain('content/resources/colegio.mp3'); + + $metadata = $parser->getMetadata(); + expect($metadata['metadata'][0]['content']['format']['container'])->toBe('elpx'); + expect($metadata['metadata'][0]['content']['project_resources']['exe_version'])->toBe('3.0'); + expect($metadata['metadata'][0]['content']['format']['likely_version_4'])->toBeTrue(); + } +); + +it( + 'can parse elpx page and component visibility properties', + function () { + $elpxFile = __DIR__ . '/../Fixtures/propiedades.elpx'; + + expect(file_exists($elpxFile))->toBeTrue('Properties ELPX fixture not found'); + + $parser = ELPParser::fromFile($elpxFile); + $pages = $parser->getPages(); + + expect($parser->getVersion())->toBe(4); + expect($parser->getTitle())->toBe('propiedades'); + expect($parser->getLanguage())->toBe('eu'); + expect($parser->getLicense())->toBe('creative commons: attribution - share alike 4.0'); + expect($parser->hasRootDtd())->toBeTrue(); + expect($parser->getResourceLayout())->toBe('none'); + expect($parser->isLikelyVersion4Package())->toBeTrue(); + expect(count($pages))->toBe(6); + expect($pages[0]['title'])->toBe('Propiedades idevices'); + expect($pages[0]['idevices'][0]['visible'])->toBeTrue(); + expect($pages[0]['idevices'][1]['visible'])->toBeFalse(); + expect($pages[0]['idevices'][2]['teacherOnly'])->toBeTrue(); + expect($pages[3]['pageName'])->toBe('Propiedades páginas - otro título'); + expect($pages[3]['title'])->toBe('otro título!!!!!!!!!!!!!!'); + expect($pages[3]['editableInPage'])->toBeTrue(); + expect($pages[4]['visible'])->toBeFalse(); + expect($parser->getStrings())->toContain('no visible en exportación'); + } +); + +it( + 'distinguishes modern elp packages from likely version 4 elpx packages', + function () { + $modernElp = ELPParser::fromFile(__DIR__ . '/../Fixtures/exe3-accessibility-revision.elp'); + $modernElpx = ELPParser::fromFile(__DIR__ . '/../Fixtures/un-contenido-de-ejemplo-para-probar-estilos-y-catalogacion.elpx'); + + expect($modernElp->getContentFormat())->toBe('ode-content'); + expect($modernElp->getSourceExtension())->toBe('elp'); + expect($modernElp->hasRootDtd())->toBeFalse(); + expect($modernElp->isLikelyVersion4Package())->toBeFalse(); + expect($modernElp->getResourceLayout())->toBe('content-resources'); + expect($modernElp->getVersion())->toBe(3); + + expect($modernElpx->getContentFormat())->toBe('ode-content'); + expect($modernElpx->getSourceExtension())->toBe('elpx'); + expect($modernElpx->hasRootDtd())->toBeTrue(); + expect($modernElpx->isLikelyVersion4Package())->toBeTrue(); + expect($modernElpx->getResourceLayout())->toBe('content-resources'); + expect($modernElpx->getVersion())->toBe(4); + } +); + +it( + 'lists assets by type and exposes detailed asset origins', + function () { + $parser = ELPParser::fromFile( + __DIR__ . '/../Fixtures/un-contenido-de-ejemplo-para-probar-estilos-y-catalogacion.elpx' + ); + + $allAssets = $parser->getAssets(); + $images = $parser->getImages(); + $audio = $parser->getAudioFiles(); + $video = $parser->getVideoFiles(); + $documents = $parser->getDocuments(); + $detailed = $parser->getAssetsDetailed(); + + expect($allAssets)->toContain('content/resources/00.jpg'); + expect($allAssets)->toContain('content/resources/colegio.mp3'); + + expect($images)->toContain('content/resources/00.jpg'); + expect($images)->toContain('content/resources/01.jpg'); + expect($audio)->toBe(['content/resources/colegio.mp3']); + expect($video)->toBeArray()->toHaveCount(0); + expect($documents)->toBeArray()->toHaveCount(0); + + expect($detailed)->toBeArray(); + expect(count($detailed))->toBeGreaterThan(0); + + $imageAsset = null; + $audioAsset = null; + foreach ($detailed as $asset) { + if ($asset['path'] === 'content/resources/00.jpg') { + $imageAsset = $asset; + } + if ($asset['path'] === 'content/resources/colegio.mp3') { + $audioAsset = $asset; + } + } + + expect($imageAsset)->toBeArray(); + expect($imageAsset['type'])->toBe('image'); + expect($imageAsset['extension'])->toBe('jpg'); + expect($imageAsset['occurrences'])->toBeGreaterThan(0); + expect($imageAsset['pages'])->toBeArray(); + expect($imageAsset['idevices'])->toBeArray(); + expect($imageAsset['pages'][0]['title'])->toBe('Inicio'); + + expect($audioAsset)->toBeArray(); + expect($audioAsset['type'])->toBe('audio'); + expect($audioAsset['extension'])->toBe('mp3'); + } +); + +it( + 'lists visible pages blocks idevices and grouped page texts', + function () { + $parser = ELPParser::fromFile(__DIR__ . '/../Fixtures/propiedades.elpx'); + + $pages = $parser->getPages(); + $visiblePages = $parser->getVisiblePages(); + $blocks = $parser->getBlocks(); + $idevices = $parser->getIdevices(); + $pageTexts = $parser->getPageTexts(); + + expect($pages)->toHaveCount(6); + expect($visiblePages)->toHaveCount(5); + expect($blocks)->toBeArray(); + expect(count($blocks))->toBeGreaterThan(0); + expect($idevices)->toBeArray(); + expect(count($idevices))->toBeGreaterThan(0); + expect($pageTexts)->toHaveCount(6); + + expect($blocks[0]['pageTitle'])->toBe('Propiedades idevices'); + expect($idevices[0]['pageTitle'])->toBe('Propiedades idevices'); + + $hiddenPage = null; + $firstPageText = null; + + foreach ($pageTexts as $pageText) { + if ($pageText['title'] === 'Propiedades idevices') { + $firstPageText = $pageText; + } + if ($pageText['pageName'] === 'Propiedades páginas - no visible') { + $hiddenPage = $pageText; + } + } + + expect($firstPageText)->toBeArray(); + expect($firstPageText['texts'])->toContain('normal'); + expect($firstPageText['texts'])->toContain('no visible en exportación'); + expect($firstPageText['text'])->toContain('visible solo en modo docente'); + + expect($hiddenPage)->toBeArray(); + expect($hiddenPage['visible'])->toBeFalse(); + + $pageById = $parser->getPageTextById($pages[0]['id']); + expect($pageById)->toBeArray(); + expect($pageById['title'])->toBe('Propiedades idevices'); + expect($parser->getPageTextById('missing-page-id'))->toBeNull(); + } +); + +it( + 'lists visible page texts teacher only idevices hidden idevices and orphan assets', + function () { + $propertiesParser = ELPParser::fromFile(__DIR__ . '/../Fixtures/propiedades.elpx'); + + $visiblePageTexts = $propertiesParser->getVisiblePageTexts(); + $teacherOnlyIdevices = $propertiesParser->getTeacherOnlyIdevices(); + $hiddenIdevices = $propertiesParser->getHiddenIdevices(); + $orphans = $propertiesParser->getOrphanAssets(); + + expect($visiblePageTexts)->toHaveCount(5); + foreach ($visiblePageTexts as $pageText) { + expect($pageText['visible'])->toBeTrue(); + } + + expect($teacherOnlyIdevices)->toHaveCount(1); + expect($teacherOnlyIdevices[0]['teacherOnly'])->toBeTrue(); + expect($teacherOnlyIdevices[0]['pageTitle'])->toBe('Propiedades idevices'); + + expect($hiddenIdevices)->toHaveCount(1); + expect($hiddenIdevices[0]['visible'])->toBeFalse(); + expect($hiddenIdevices[0]['pageTitle'])->toBe('Propiedades idevices'); + + expect($orphans)->toContain('content/img/exe_powered_logo.png'); + expect($orphans)->toContain('theme/screenshot.png'); + + $contentParser = ELPParser::fromFile( + __DIR__ . '/../Fixtures/un-contenido-de-ejemplo-para-probar-estilos-y-catalogacion.elpx' + ); + $contentOrphans = $contentParser->getOrphanAssets(); + + expect($contentOrphans)->toContain('theme/screenshot.png'); + expect($contentOrphans)->not->toContain('content/resources/00.jpg'); + expect($contentOrphans)->not->toContain('content/resources/colegio.mp3'); + } +);