forked from DSpace/DSpace
-
Notifications
You must be signed in to change notification settings - Fork 2
UFAL/Add CCMM 1.1.0 OAI-PMH crosswalk (ccmm-xml metadataPrefix) #1262
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
milanmajchrak
wants to merge
2
commits into
dtq-dev
Choose a base branch
from
ufal/oai-pmh-new-crosswalk
base: dtq-dev
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
191 changes: 191 additions & 0 deletions
191
dspace-oai/src/test/java/org/dspace/xoai/tests/stylesheets/CcmmXslTest.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,191 @@ | ||
| /** | ||
| * The contents of this file are subject to the license and copyright | ||
| * detailed in the LICENSE and NOTICE files at the root of the source | ||
| * tree and available online at | ||
| * | ||
| * http://www.dspace.org/license/ | ||
| */ | ||
| package org.dspace.xoai.tests.stylesheets; | ||
|
|
||
| import static org.dspace.xoai.tests.support.XmlMatcherBuilder.xml; | ||
| import static org.hamcrest.MatcherAssert.assertThat; | ||
| import static org.hamcrest.core.Is.is; | ||
| import static org.hamcrest.core.IsEqual.equalTo; | ||
|
|
||
| import org.dspace.xoai.tests.support.XmlMatcherBuilder; | ||
| import org.junit.Test; | ||
|
|
||
| /** | ||
| * Tests for the CCMM (Czech Common Metadata Model) 1.1.0 OAI-PMH crosswalk. | ||
| * | ||
| * @see <a href="https://github.com/techlib/CCMM">CCMM Schema</a> | ||
| * @see <a href="https://github.com/ufal/clarin-dspace/issues/1145">Issue #1145</a> | ||
| */ | ||
| public class CcmmXslTest extends AbstractXSLTest { | ||
|
|
||
| private static final String CCMM_NS = "https://schema.ccmm.cz/research-data/1.1"; | ||
|
|
||
| @Test | ||
| public void ccmmCanTransformInput() throws Exception { | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath("//ccmm:title", equalTo("Czech NLP Dataset v2.0")))); | ||
| } | ||
|
|
||
| @Test | ||
| public void ccmmContainsPublicationYear() throws Exception { | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath("//ccmm:publication_year", equalTo("2025")))); | ||
| } | ||
|
|
||
| @Test | ||
| public void ccmmContainsIdentifier() throws Exception { | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath( | ||
| "//ccmm:dataset/ccmm:identifier/ccmm:value", | ||
| equalTo("http://hdl.handle.net/11234/1-5678")))); | ||
| } | ||
|
|
||
| @Test | ||
| public void ccmmContainsCreator() throws Exception { | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath( | ||
| "//ccmm:dataset/ccmm:qualified_relation[1]/ccmm:relation/ccmm:person/ccmm:name", | ||
| equalTo("Novak, Jan")))); | ||
| } | ||
|
|
||
| @Test | ||
| public void ccmmContainsSubjects() throws Exception { | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath( | ||
| "//ccmm:dataset/ccmm:subject[1]/ccmm:title", | ||
| equalTo("linguistics")))); | ||
| } | ||
|
|
||
| @Test | ||
| public void ccmmContainsResourceType() throws Exception { | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath( | ||
| "//ccmm:dataset/ccmm:resource_type/ccmm:label", | ||
| equalTo("corpus")))); | ||
| } | ||
|
|
||
| @Test | ||
| public void ccmmContainsDescription() throws Exception { | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath( | ||
| "//ccmm:dataset/ccmm:description/ccmm:description_text", | ||
| equalTo("A sample dataset for testing CCMM crosswalk output in the OAI-PMH protocol.")))); | ||
| } | ||
|
|
||
| @Test | ||
| public void ccmmContainsLicense() throws Exception { | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath( | ||
| "//ccmm:dataset/ccmm:terms_of_use/ccmm:license/ccmm:iri", | ||
| equalTo("https://creativecommons.org/licenses/by/4.0/")))); | ||
| } | ||
|
|
||
| @Test | ||
| public void ccmmContainsAccessRights() throws Exception { | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath( | ||
| "//ccmm:dataset/ccmm:terms_of_use/ccmm:access_rights/ccmm:iri", | ||
| equalTo("http://purl.org/coar/access_right/c_abf2")))); | ||
| } | ||
|
|
||
| @Test | ||
| public void ccmmContainsPrimaryLanguage() throws Exception { | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath( | ||
| "//ccmm:dataset/ccmm:primary_language/ccmm:label", | ||
| equalTo("ces")))); | ||
| } | ||
|
|
||
| @Test | ||
| public void ccmmContainsAlternateTitle() throws Exception { | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath( | ||
| "//ccmm:dataset/ccmm:alternate_title/ccmm:title", | ||
| equalTo("CND 2.0")))); | ||
| } | ||
|
|
||
| @Test | ||
| public void ccmmContainsPublisher() throws Exception { | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath( | ||
| "//ccmm:dataset/ccmm:qualified_relation[ccmm:role/ccmm:label='Distributor']/ccmm:relation/ccmm:organization/ccmm:name", | ||
| equalTo("Charles University, Faculty of Mathematics and Physics, Institute of Formal and Applied Linguistics")))); | ||
| } | ||
|
|
||
| @Test | ||
| public void ccmmContainsMetadataIdentification() throws Exception { | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath( | ||
| "//ccmm:dataset/ccmm:metadata_identification/ccmm:conforms_to_standard/ccmm:iri", | ||
| equalTo("https://schema.ccmm.cz/research-data/1.1")))); | ||
| } | ||
|
|
||
| @Test | ||
| public void ccmmCanTransformBasicXoaiInput() throws Exception { | ||
| // Test with the default xoai-test1.xml (simpler data) to ensure crosswalk | ||
| // handles missing fields gracefully | ||
| String result = apply("ccmm.xsl").to(resource("xoai-test1.xml")); | ||
| assertThat(result, is(ccmm().withXPath("//ccmm:title", equalTo("Test Webpage")))); | ||
| } | ||
|
|
||
| // ---- Fallback scenario tests ---- | ||
|
|
||
| @Test | ||
| public void ccmmFallbackTitleIsUntitled() throws Exception { | ||
| // When dc.title is missing, fallback to "Untitled" | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-minimal-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath("//ccmm:dataset/ccmm:title", equalTo("Untitled")))); | ||
| } | ||
|
|
||
| @Test | ||
| public void ccmmFallbackPublicationYearIs9999() throws Exception { | ||
| // When dc.date.issued and dc.date.accessioned are missing, fallback to "9999" | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-minimal-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath("//ccmm:dataset/ccmm:publication_year", equalTo("9999")))); | ||
| } | ||
|
|
||
| @Test | ||
| public void ccmmFallbackSubjectIsUnspecified() throws Exception { | ||
| // When dc.subject is missing, fallback to "unspecified" | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-minimal-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath( | ||
| "//ccmm:dataset/ccmm:subject/ccmm:title", equalTo("unspecified")))); | ||
| } | ||
|
|
||
| @Test | ||
| public void ccmmFallbackIdentifierUsesOthersHandle() throws Exception { | ||
| // When dc.identifier.uri and dc.identifier.doi are missing, use others/handle | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-minimal-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath( | ||
| "//ccmm:dataset/ccmm:identifier/ccmm:value", | ||
| equalTo("http://hdl.handle.net/99999/test-1")))); | ||
| } | ||
|
|
||
| @Test | ||
| public void ccmmFallbackRepositoryNameIsUnknown() throws Exception { | ||
| // When repository/name is missing, fallback to "Unknown Repository" | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-minimal-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath( | ||
| "//ccmm:dataset/ccmm:metadata_identification/ccmm:qualified_relation/ccmm:relation/ccmm:organization/ccmm:name", | ||
| equalTo("Unknown Repository")))); | ||
| } | ||
|
|
||
| @Test | ||
| public void ccmmFallbackLicenseIsUnspecified() throws Exception { | ||
| // When dc.rights.uri is missing but dc.rights text exists, license IRI is unspecified | ||
| String result = apply("ccmm.xsl").to(resource("xoai-ccmm-minimal-test.xml")); | ||
| assertThat(result, is(ccmm().withXPath( | ||
| "//ccmm:dataset/ccmm:terms_of_use/ccmm:license/ccmm:iri", | ||
| equalTo("https://model.ccmm.cz/vocabulary/ccmm/license/unspecified")))); | ||
| } | ||
|
|
||
| private XmlMatcherBuilder ccmm() { | ||
| return xml() | ||
| .withNamespace("ccmm", CCMM_NS); | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| <?xml version="1.0" encoding="UTF-8"?> | ||
| <!-- | ||
| Minimal XOAI metadata XML for CCMM crosswalk fallback testing. | ||
| Missing most DC fields to trigger fallback behavior for required CCMM elements: | ||
| - title (missing -> "Untitled") | ||
| - publication_year (missing issued/accessioned -> "9999") | ||
| - subject (missing -> "unspecified") | ||
| - identifier (only others/handle available) | ||
| - repository name (missing -> "Unknown Repository") | ||
| --> | ||
| <metadata xmlns="http://www.lyncode.com/xoai" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
| xsi:schemaLocation="http://www.lyncode.com/xoai http://www.lyncode.com/xsd/xoai.xsd"> | ||
| <element name="dc"> | ||
| <element name="rights"> | ||
| <element name="none"> | ||
| <field name="value">All rights reserved</field> | ||
| </element> | ||
| </element> | ||
| </element> | ||
| <element name="others"> | ||
| <field name="handle">99999/test-1</field> | ||
| <field name="identifier">oai:test.repository:99999/test-1</field> | ||
| <field name="lastModifyDate">2025-01-01 00:00:00.000</field> | ||
| </element> | ||
| </metadata> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,107 @@ | ||
| <?xml version="1.0" encoding="UTF-8"?> | ||
| <!-- | ||
| Test XOAI metadata XML for CCMM crosswalk testing. | ||
| Includes fields commonly found in NMD/NRP dataset submissions. | ||
| --> | ||
| <metadata xmlns="http://www.lyncode.com/xoai" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
| xsi:schemaLocation="http://www.lyncode.com/xoai http://www.lyncode.com/xsd/xoai.xsd"> | ||
| <element name="dc"> | ||
| <element name="contributor"> | ||
| <element name="author"> | ||
| <element name="none"> | ||
| <field name="value">Novak, Jan</field> | ||
| <field name="value">Svobodova, Marie</field> | ||
| </element> | ||
| </element> | ||
| </element> | ||
| <element name="date"> | ||
| <element name="accessioned"> | ||
| <element name="none"> | ||
| <field name="value">2025-06-15T10:30:00Z</field> | ||
| </element> | ||
| </element> | ||
| <element name="available"> | ||
| <element name="none"> | ||
| <field name="value">2025-06-15T10:30:00Z</field> | ||
| </element> | ||
| </element> | ||
| <element name="issued"> | ||
| <element name="none"> | ||
| <field name="value">2025-03-20</field> | ||
| </element> | ||
| </element> | ||
| </element> | ||
| <element name="identifier"> | ||
| <element name="uri"> | ||
| <element name="none"> | ||
| <field name="value">http://hdl.handle.net/11234/1-5678</field> | ||
| </element> | ||
| </element> | ||
| </element> | ||
| <element name="description"> | ||
| <element name="abstract"> | ||
| <element name="en"> | ||
| <field name="value">A sample dataset for testing CCMM crosswalk output in the OAI-PMH protocol.</field> | ||
| </element> | ||
| </element> | ||
| </element> | ||
| <element name="language"> | ||
| <element name="iso"> | ||
| <element name="none"> | ||
| <field name="value">ces</field> | ||
| </element> | ||
| </element> | ||
| </element> | ||
| <element name="publisher"> | ||
| <element name="none"> | ||
| <field name="value">Charles University, Faculty of Mathematics and Physics, Institute of Formal and Applied Linguistics</field> | ||
| </element> | ||
| </element> | ||
| <element name="rights"> | ||
| <element name="none"> | ||
| <field name="value">Creative Commons - Attribution 4.0 International</field> | ||
| </element> | ||
| <element name="uri"> | ||
| <element name="none"> | ||
| <field name="value">https://creativecommons.org/licenses/by/4.0/</field> | ||
| </element> | ||
| </element> | ||
| <element name="label"> | ||
| <element name="none"> | ||
| <field name="value">CC BY 4.0</field> | ||
| </element> | ||
| </element> | ||
| </element> | ||
| <element name="subject"> | ||
| <element name="none"> | ||
| <field name="value">linguistics</field> | ||
| <field name="value">Czech language</field> | ||
| <field name="value">NLP</field> | ||
| </element> | ||
| </element> | ||
| <element name="title"> | ||
| <element name="none"> | ||
| <field name="value">Czech NLP Dataset v2.0</field> | ||
| </element> | ||
| <element name="alternative"> | ||
| <element name="none"> | ||
| <field name="value">CND 2.0</field> | ||
| </element> | ||
| </element> | ||
| </element> | ||
| <element name="type"> | ||
| <element name="none"> | ||
| <field name="value">corpus</field> | ||
| </element> | ||
| </element> | ||
| </element> | ||
| <element name="others"> | ||
| <field name="handle">11234/1-5678</field> | ||
| <field name="identifier">oai:lindat.mff.cuni.cz:11234/1-5678</field> | ||
| <field name="lastModifyDate">2025-06-15 10:30:00.000</field> | ||
| </element> | ||
| <element name="repository"> | ||
| <field name="name">LINDAT/CLARIAH-CZ</field> | ||
| <field name="mail">lindat-help@ufal.mff.cuni.cz</field> | ||
| </element> | ||
| </metadata> |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The test suite lacks coverage for the fallback scenarios where required fields are missing. Consider adding tests that verify the behavior when title, identifiers, publication year, time references, and subjects are missing to ensure fallback values (like "Untitled", "9999", etc.) are correctly produced. This is particularly important for required CCMM fields.