diff --git a/dspace-oai/src/test/java/org/dspace/xoai/tests/stylesheets/CcmmXslTest.java b/dspace-oai/src/test/java/org/dspace/xoai/tests/stylesheets/CcmmXslTest.java new file mode 100644 index 000000000000..fb7834cef980 --- /dev/null +++ b/dspace-oai/src/test/java/org/dspace/xoai/tests/stylesheets/CcmmXslTest.java @@ -0,0 +1,191 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.xoai.tests.stylesheets; + +import static org.dspace.xoai.tests.support.XmlMatcherBuilder.xml; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.Is.is; +import static org.hamcrest.core.IsEqual.equalTo; + +import org.dspace.xoai.tests.support.XmlMatcherBuilder; +import org.junit.Test; + +/** + * Tests for the CCMM (Czech Common Metadata Model) 1.1.0 OAI-PMH crosswalk. + * + * @see CCMM Schema + * @see Issue #1145 + */ +public class CcmmXslTest extends AbstractXSLTest { + + private static final String CCMM_NS = "https://schema.ccmm.cz/research-data/1.1"; + + @Test + public void ccmmCanTransformInput() throws Exception { + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); + assertThat(result, is(ccmm().withXPath("//ccmm:title", equalTo("Czech NLP Dataset v2.0")))); + } + + @Test + public void ccmmContainsPublicationYear() throws Exception { + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); + assertThat(result, is(ccmm().withXPath("//ccmm:publication_year", equalTo("2025")))); + } + + @Test + public void ccmmContainsIdentifier() throws Exception { + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); + assertThat(result, is(ccmm().withXPath( + "//ccmm:dataset/ccmm:identifier/ccmm:value", + equalTo("http://hdl.handle.net/11234/1-5678")))); + } + + @Test + public void ccmmContainsCreator() throws Exception { + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); + assertThat(result, is(ccmm().withXPath( + "//ccmm:dataset/ccmm:qualified_relation[1]/ccmm:relation/ccmm:person/ccmm:name", + equalTo("Novak, Jan")))); + } + + @Test + public void ccmmContainsSubjects() throws Exception { + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); + assertThat(result, is(ccmm().withXPath( + "//ccmm:dataset/ccmm:subject[1]/ccmm:title", + equalTo("linguistics")))); + } + + @Test + public void ccmmContainsResourceType() throws Exception { + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); + assertThat(result, is(ccmm().withXPath( + "//ccmm:dataset/ccmm:resource_type/ccmm:label", + equalTo("corpus")))); + } + + @Test + public void ccmmContainsDescription() throws Exception { + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); + assertThat(result, is(ccmm().withXPath( + "//ccmm:dataset/ccmm:description/ccmm:description_text", + equalTo("A sample dataset for testing CCMM crosswalk output in the OAI-PMH protocol.")))); + } + + @Test + public void ccmmContainsLicense() throws Exception { + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); + assertThat(result, is(ccmm().withXPath( + "//ccmm:dataset/ccmm:terms_of_use/ccmm:license/ccmm:iri", + equalTo("https://creativecommons.org/licenses/by/4.0/")))); + } + + @Test + public void ccmmContainsAccessRights() throws Exception { + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); + assertThat(result, is(ccmm().withXPath( + "//ccmm:dataset/ccmm:terms_of_use/ccmm:access_rights/ccmm:iri", + equalTo("http://purl.org/coar/access_right/c_abf2")))); + } + + @Test + public void ccmmContainsPrimaryLanguage() throws Exception { + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); + assertThat(result, is(ccmm().withXPath( + "//ccmm:dataset/ccmm:primary_language/ccmm:label", + equalTo("ces")))); + } + + @Test + public void ccmmContainsAlternateTitle() throws Exception { + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); + assertThat(result, is(ccmm().withXPath( + "//ccmm:dataset/ccmm:alternate_title/ccmm:title", + equalTo("CND 2.0")))); + } + + @Test + public void ccmmContainsPublisher() throws Exception { + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); + assertThat(result, is(ccmm().withXPath( + "//ccmm:dataset/ccmm:qualified_relation[ccmm:role/ccmm:label='Distributor']/ccmm:relation/ccmm:organization/ccmm:name", + equalTo("Charles University, Faculty of Mathematics and Physics, Institute of Formal and Applied Linguistics")))); + } + + @Test + public void ccmmContainsMetadataIdentification() throws Exception { + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-test.xml")); + assertThat(result, is(ccmm().withXPath( + "//ccmm:dataset/ccmm:metadata_identification/ccmm:conforms_to_standard/ccmm:iri", + equalTo("https://schema.ccmm.cz/research-data/1.1")))); + } + + @Test + public void ccmmCanTransformBasicXoaiInput() throws Exception { + // Test with the default xoai-test1.xml (simpler data) to ensure crosswalk + // handles missing fields gracefully + String result = apply("ccmm.xsl").to(resource("xoai-test1.xml")); + assertThat(result, is(ccmm().withXPath("//ccmm:title", equalTo("Test Webpage")))); + } + + // ---- Fallback scenario tests ---- + + @Test + public void ccmmFallbackTitleIsUntitled() throws Exception { + // When dc.title is missing, fallback to "Untitled" + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-minimal-test.xml")); + assertThat(result, is(ccmm().withXPath("//ccmm:dataset/ccmm:title", equalTo("Untitled")))); + } + + @Test + public void ccmmFallbackPublicationYearIs9999() throws Exception { + // When dc.date.issued and dc.date.accessioned are missing, fallback to "9999" + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-minimal-test.xml")); + assertThat(result, is(ccmm().withXPath("//ccmm:dataset/ccmm:publication_year", equalTo("9999")))); + } + + @Test + public void ccmmFallbackSubjectIsUnspecified() throws Exception { + // When dc.subject is missing, fallback to "unspecified" + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-minimal-test.xml")); + assertThat(result, is(ccmm().withXPath( + "//ccmm:dataset/ccmm:subject/ccmm:title", equalTo("unspecified")))); + } + + @Test + public void ccmmFallbackIdentifierUsesOthersHandle() throws Exception { + // When dc.identifier.uri and dc.identifier.doi are missing, use others/handle + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-minimal-test.xml")); + assertThat(result, is(ccmm().withXPath( + "//ccmm:dataset/ccmm:identifier/ccmm:value", + equalTo("http://hdl.handle.net/99999/test-1")))); + } + + @Test + public void ccmmFallbackRepositoryNameIsUnknown() throws Exception { + // When repository/name is missing, fallback to "Unknown Repository" + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-minimal-test.xml")); + assertThat(result, is(ccmm().withXPath( + "//ccmm:dataset/ccmm:metadata_identification/ccmm:qualified_relation/ccmm:relation/ccmm:organization/ccmm:name", + equalTo("Unknown Repository")))); + } + + @Test + public void ccmmFallbackLicenseIsUnspecified() throws Exception { + // When dc.rights.uri is missing but dc.rights text exists, license IRI is unspecified + String result = apply("ccmm.xsl").to(resource("xoai-ccmm-minimal-test.xml")); + assertThat(result, is(ccmm().withXPath( + "//ccmm:dataset/ccmm:terms_of_use/ccmm:license/ccmm:iri", + equalTo("https://model.ccmm.cz/vocabulary/ccmm/license/unspecified")))); + } + + private XmlMatcherBuilder ccmm() { + return xml() + .withNamespace("ccmm", CCMM_NS); + } +} diff --git a/dspace-oai/src/test/resources/xoai-ccmm-minimal-test.xml b/dspace-oai/src/test/resources/xoai-ccmm-minimal-test.xml new file mode 100644 index 000000000000..90f7a5496d4d --- /dev/null +++ b/dspace-oai/src/test/resources/xoai-ccmm-minimal-test.xml @@ -0,0 +1,25 @@ + + + + + + + All rights reserved + + + + + 99999/test-1 + oai:test.repository:99999/test-1 + 2025-01-01 00:00:00.000 + + diff --git a/dspace-oai/src/test/resources/xoai-ccmm-test.xml b/dspace-oai/src/test/resources/xoai-ccmm-test.xml new file mode 100644 index 000000000000..96697ac43165 --- /dev/null +++ b/dspace-oai/src/test/resources/xoai-ccmm-test.xml @@ -0,0 +1,107 @@ + + + + + + + + Novak, Jan + Svobodova, Marie + + + + + + + 2025-06-15T10:30:00Z + + + + + 2025-06-15T10:30:00Z + + + + + 2025-03-20 + + + + + + + http://hdl.handle.net/11234/1-5678 + + + + + + + A sample dataset for testing CCMM crosswalk output in the OAI-PMH protocol. + + + + + + + ces + + + + + + Charles University, Faculty of Mathematics and Physics, Institute of Formal and Applied Linguistics + + + + + Creative Commons - Attribution 4.0 International + + + + https://creativecommons.org/licenses/by/4.0/ + + + + + CC BY 4.0 + + + + + + linguistics + Czech language + NLP + + + + + Czech NLP Dataset v2.0 + + + + CND 2.0 + + + + + + corpus + + + + + 11234/1-5678 + oai:lindat.mff.cuni.cz:11234/1-5678 + 2025-06-15 10:30:00.000 + + + LINDAT/CLARIAH-CZ + lindat-help@ufal.mff.cuni.cz + + diff --git a/dspace/config/crosswalks/oai/metadataFormats/ccmm.xsl b/dspace/config/crosswalks/oai/metadataFormats/ccmm.xsl new file mode 100644 index 000000000000..0e1b8e8ec44c --- /dev/null +++ b/dspace/config/crosswalks/oai/metadataFormats/ccmm.xsl @@ -0,0 +1,667 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + https://model.ccmm.cz/vocabulary/datacite/contributorType/DataCurator + DataCurator + + + + + https://schema.ccmm.cz/research-data/1.1 + CCMM 1.1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + https://hdl.handle.net/ + Handle + + + + + + + + + + https://doi.org/ + DOI + + + + + + + + + + https://www.w3.org/ns/iana/uri-schemes + URI + + + + + + + + + + + + + https://hdl.handle.net/ + Handle + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + https://model.ccmm.cz/vocabulary/datacite/contributorType/Creator + Creator + + + + + + + + + + + + + https://model.ccmm.cz/vocabulary/datacite/contributorType/Creator + Creator + + + + + + + + + + + + + https://model.ccmm.cz/vocabulary/datacite/contributorType/Editor + Editor + + + + + + + + + + + + + https://model.ccmm.cz/vocabulary/datacite/contributorType/Other + Other + + + + + + + + + + + + + https://model.ccmm.cz/vocabulary/datacite/contributorType/Distributor + Distributor + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + https://model.ccmm.cz/vocabulary/datacite/dateType/Issued + Issued + + + + + + + + + + + + + + + https://model.ccmm.cz/vocabulary/datacite/dateType/Accepted + Accepted + + + + + + + + + + + + + + + https://model.ccmm.cz/vocabulary/datacite/dateType/Available + Available + + + + + + + + + + + + + + + + https://model.ccmm.cz/vocabulary/datacite/dateType/Issued + Issued + + + + + + + + + + + + + + + https://model.ccmm.cz/vocabulary/datacite/resourceTypeGeneral/Dataset + https://model.ccmm.cz/vocabulary/datacite/resourceTypeGeneral/Software + https://model.ccmm.cz/vocabulary/datacite/resourceTypeGeneral/Text + https://model.ccmm.cz/vocabulary/datacite/resourceTypeGeneral/Image + https://model.ccmm.cz/vocabulary/datacite/resourceTypeGeneral/Collection + https://model.ccmm.cz/vocabulary/datacite/resourceTypeGeneral/Other + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + http://purl.org/coar/access_right/c_f1cf + embargoed access + + + http://purl.org/coar/access_right/c_16ec + restricted access + + + http://purl.org/coar/access_right/c_abf2 + open access + + + + + + + + + + + + + + + + + + + https://model.ccmm.cz/vocabulary/ccmm/license/unspecified + + + + + + https://model.ccmm.cz/vocabulary/ccmm/license/unspecified + unspecified + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + https://model.ccmm.cz/vocabulary/datacite/descriptionType/Abstract + Abstract + + + + + + + + + https://model.ccmm.cz/vocabulary/datacite/descriptionType/Abstract + Abstract + + + + + + + + + + + + + + + + + + + + + + European Commission + + + + + + + + + + + + + + + + + + + + + https://www.w3.org/ns/iana/uri-schemes + URI + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dspace/config/crosswalks/oai/xoai.xml b/dspace/config/crosswalks/oai/xoai.xml index 723aa02d7311..cd502b454e7e 100644 --- a/dspace/config/crosswalks/oai/xoai.xml +++ b/dspace/config/crosswalks/oai/xoai.xml @@ -24,6 +24,7 @@ + This is the default context of the DSpace OAI-PMH data provider. @@ -102,6 +103,7 @@ + This context exports items following the openaire data archive rules. @@ -238,6 +240,16 @@ http://w3id.org/meta-share/meta-share/ ../Schema/ELG-SHARE.xsd + + + ccmm-xml + metadataFormats/ccmm.xsl + https://schema.ccmm.cz/research-data/1.1 + https://techlib.github.io/CCMM/dataset/schema.xsd +