diff --git a/dspace-api/src/main/java/org/dspace/ctask/general/ItemMetadataQAChecker.java b/dspace-api/src/main/java/org/dspace/ctask/general/ItemMetadataQAChecker.java new file mode 100644 index 000000000000..2e9e7af67168 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/ctask/general/ItemMetadataQAChecker.java @@ -0,0 +1,575 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +/* Created for LINDAT/CLARIN */ +package org.dspace.ctask.general; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.dspace.app.util.DCInput; +import org.dspace.app.util.DCInputSet; +import org.dspace.app.util.DCInputsReader; +import org.dspace.app.util.DCInputsReaderException; +import org.dspace.content.DSpaceObject; +import org.dspace.content.Item; +import org.dspace.content.MetadataValue; +import org.dspace.curate.AbstractCurationTask; +import org.dspace.curate.Curator; +import org.dspace.discovery.IsoLangCodes; +import org.dspace.versioning.VersionHistory; +import org.dspace.versioning.factory.VersionServiceFactory; +import org.dspace.versioning.service.VersionHistoryService; + +/** + * Check basic properties of item metadata for quality assurance. + * Ported from DSpace v5 CLARIN implementation. + * + * @author LINDAT/CLARIN + */ +public class ItemMetadataQAChecker extends AbstractCurationTask { + + public static final int CURATE_WARNING = -1000; + private static final Logger log = LogManager.getLogger(ItemMetadataQAChecker.class); + + /** Expected types. */ + private Set dcTypeValuesSet; + + private static final String[] rightsMdStrings = {"dc.rights.uri", "dc.rights.label", "dc.rights"}; + + private Map itemTitles; + private String handlePrefix; + private Map complexInputs; + + private String[] nonRepeatableMetadata; + private String[] strangeMetadata; + private String[] highlyRecommended; + + private VersionHistoryService versionHistoryService; + + @Override + public void init(Curator curator, String taskId) throws IOException { + super.init(curator, taskId); + itemTitles = new HashMap<>(); + handlePrefix = configurationService.getProperty("handle.canonical.prefix"); + + // Initialize expected types from configuration + String[] configuredTypes = configurationService.getArrayProperty( + "lr.curation.metadata.expected.types"); + if (configuredTypes != null && configuredTypes.length > 0) { + dcTypeValuesSet = new HashSet<>(Arrays.asList(configuredTypes)); + } else { + // Use defaults if not configured + dcTypeValuesSet = new HashSet<>(Arrays.asList( + "corpus", "lexicalConceptualResource", "languageDescription", "toolService")); + } + + nonRepeatableMetadata = configurationService.getArrayProperty("lr.curation.metadata.nonrepeatable", + new String[]{ + "local.branding", + "dc.type", + "dc.date.accessioned", + "dc.rights.label", + "dc.date.available", + "dc.source.uri", + "metashare.ResourceInfo#DistributionInfo#LicenseInfo.license" + }); + strangeMetadata = configurationService.getArrayProperty("lr.curation.metadata.strange", new String[]{ + "dc.description.uri", + }); + highlyRecommended = configurationService.getArrayProperty("lr.curation.metadata.recommended", new String[]{ + "dc.subject", + }); + + complexInputs = new HashMap<>(); + loadComplexInputs(); + + versionHistoryService = VersionServiceFactory.getInstance().getVersionHistoryService(); + } + + private void loadComplexInputs() { + try { + DCInputsReader reader = new DCInputsReader(); + // Get all input sets to check complex inputs across all forms + List inputSets = reader.getAllInputs(Integer.MAX_VALUE, 0); + + for (DCInputSet inputSet : inputSets) { + DCInput[][] fields = inputSet.getFields(); + for (DCInput[] row : fields) { + for (DCInput input : row) { + if ("complex".equals(input.getInputType())) { + String name = StringUtils.isBlank(input.getQualifier()) + ? String.format("%s.%s", input.getSchema(), input.getElement()) + : String.format("%s.%s.%s", input.getSchema(), input.getElement(), + input.getQualifier()); + complexInputs.put(name, input.getComplexDefinition().getInputs().size()); + } + } + } + } + } catch (DCInputsReaderException e) { + log.error("Problems fetching input-forms.xml", e); + } + } + + private String getHandle(Item item) { + if (null != item.getHandle()) { + return handlePrefix + item.getHandle(); + } else { + return "item id: " + item.getID(); + } + } + + @Override + public int perform(DSpaceObject dso) throws IOException { + int status = Curator.CURATE_UNSET; + StringBuilder results = new StringBuilder(); + String errStr = "Unknown error"; + + // do on Items only + if (dso instanceof Item) { + Item item = (Item) dso; + if (item.getHandle() != null) { + List metadataValues = itemService.getMetadata( + item, Item.ANY, Item.ANY, Item.ANY, Item.ANY); + + // no metadata? + if (metadataValues == null || metadataValues.isEmpty()) { + errStr = "Does not have any metadata"; + status = Curator.CURATE_FAIL; + } else { + // perform the validation + try { + validateDcType(item, results); + validateTitle(item, results); + validateDcLanguageIso(item, results); + validateRelations(item, results); + validateEmptyMetadata(item, metadataValues, results); + validatePredefinedNonRepeatableMetadata(item, results); + validateStrangeMetadata(item, results); + validateRightsLabels(item, results); + itemWithFilesHasLicense(item); + validateHighlyRecommendedMetadata(item, results); + validateComplexInputs(item, results); + + status = Curator.CURATE_SUCCESS; + } catch (CurateException exc) { + errStr = exc.getMessage(); + status = exc.errCode; + } + } + } else { + // no handle! + errStr = "Does not have a handle"; + status = Curator.CURATE_FAIL; + } + + // format the error if any + switch (status) { + case Curator.CURATE_SUCCESS: + break; + case CURATE_WARNING: + results.append(String.format("Warning: %s %s", errStr, addMagicString(getHandle(item)))); + break; + default: + results.append(String.format("ERROR! %s %s", errStr, addMagicString(getHandle(item)))); + break; + } + } + + report(results.toString()); + setResult(results.toString()); + return status; + } + + /** + * Add magic string for identification in reports. + * @param handle the handle to mark + * @return marked string + */ + private static String addMagicString(String handle) { + return "[[" + handle + "]]"; + } + + // + // dc type checker + // + + private void validateDcType(Item item, StringBuilder results) throws CurateException { + List dcsType = itemService.getMetadataByMetadataString(item, "dc.type"); + // no metadata? + if (dcsType == null || dcsType.isEmpty()) { + throw new CurateException("Does not have dc.type metadata", Curator.CURATE_FAIL); + } + + // check array is not null or length > 0 + for (MetadataValue dcsEntry : dcsType) { + String value = dcsEntry.getValue(); + if (value == null) { + throw new CurateException("dc.type has null value", Curator.CURATE_FAIL); + } + + String typeVal = value.trim(); + + // check if original and trimmed versions match + if (!typeVal.equals(value)) { + throw new CurateException("leading or trailing spaces", Curator.CURATE_FAIL); + } + + // check if the dc.type field is empty + if (Pattern.matches("^\\s*$", typeVal)) { + throw new CurateException("empty value", Curator.CURATE_FAIL); + } + + // check if the value is valid + if (!dcTypeValuesSet.contains(typeVal)) { + throw new CurateException("invalid type (" + typeVal + ")", Curator.CURATE_FAIL); + } + } + } + + /** + * Checks the language code (dc.language.iso) against the possible language codes + * and validates that local.language.name matches the human-readable language names. + * + * @param item the item + * @param results the results + * @throws CurateException if validation fails + */ + private void validateDcLanguageIso(Item item, StringBuilder results) throws CurateException { + List dcsLanguageIso = itemService.getMetadataByMetadataString(item, "dc.language.iso"); + if (dcsLanguageIso != null && !dcsLanguageIso.isEmpty()) { + // Validate dc.language.iso codes + for (MetadataValue langCodeDC : dcsLanguageIso) { + String langCode = langCodeDC.getValue(); + if (langCode == null) { + throw new CurateException("dc.language.iso has null value", Curator.CURATE_FAIL); + } + if (IsoLangCodes.getLangForCode(langCode) == null) { + throw new CurateException( + String.format("Invalid language code - %s", langCode), + Curator.CURATE_FAIL); + } + } + + // Validate local.language.name matches dc.language.iso + List languageNames = itemService.getMetadataByMetadataString(item, "local.language.name"); + if (languageNames == null || languageNames.size() != dcsLanguageIso.size()) { + throw new CurateException( + String.format("local.language.name count [%d] does not match dc.language.iso count [%d]", + languageNames == null ? 0 : languageNames.size(), dcsLanguageIso.size()), + Curator.CURATE_FAIL); + } + + // Validate that each language name corresponds to its ISO code + for (int i = 0; i < dcsLanguageIso.size(); i++) { + String expectedLangName = IsoLangCodes.getLangForCode(dcsLanguageIso.get(i).getValue()); + String actualLangName = languageNames.get(i).getValue(); + if (!expectedLangName.equals(actualLangName)) { + throw new CurateException( + String.format("local.language.name [%s] does not match expected name [%s] for ISO code [%s]", + actualLangName, expectedLangName, dcsLanguageIso.get(i).getValue()), + Curator.CURATE_FAIL); + } + } + } + } + + // + // title checker + // + + private void validateTitle(Item item, StringBuilder results) throws CurateException { + String title = itemService.getMetadataFirstValue(item, "dc", "title", null, Item.ANY); + if (title == null) { + throw new CurateException("Item has no dc.title metadata", Curator.CURATE_FAIL); + } + if (itemTitles.containsKey(title)) { + String msg = String.format("Title [%s] duplicate in [%s]", title, itemTitles.get(title)); + throw new CurateException(msg, Curator.CURATE_FAIL); + } + itemTitles.put(title, getHandle(item)); + } + + // + // relation checker (based on assumption items are not part of multiple version histories) + // + + private void validateRelations(Item item, StringBuilder results) throws CurateException { + String handlePrefixLocal = configurationService.getProperty("handle.canonical.prefix"); + try { + String mdIsReplacedBy = "dc.relation.isreplacedby"; + String mdReplaces = "dc.relation.replaces"; + + List dcsIsReplacedBy = getNonBlankMetadata(item, mdIsReplacedBy); + List dcsReplaces = getNonBlankMetadata(item, mdReplaces); + + if (dcsIsReplacedBy.isEmpty() && dcsReplaces.isEmpty()) { + // item contains no relation metadata, nothing to check + return; + } + + // check if objects referenced by "dc.relation.isreplacedby" exist, + // and reference back to this item with "dc.relation.replaces" metadata + if (!dcsIsReplacedBy.isEmpty()) { + boolean relationsOK = + checkRelations(item, dcsIsReplacedBy, mdIsReplacedBy, mdReplaces, handlePrefixLocal); + if (!relationsOK) { + throw relationMetadataException(mdIsReplacedBy, mdReplaces); + } + } + // check if objects referenced by "dc.relation.replaces" exist, + // and reference forward to this item with "dc.relation.isreplacedby" metadata + if (!dcsReplaces.isEmpty()) { + boolean relationsOK = checkRelations(item, dcsReplaces, mdReplaces, mdIsReplacedBy, handlePrefixLocal); + if (!relationsOK) { + throw relationMetadataException(mdReplaces, mdIsReplacedBy); + } + } + + // everything is OK + results.append(String.format("Item [%s] meets relation requirements. ", getHandle(item))); + + } catch (SQLException | IOException e) { + throw new CurateException(e.getMessage(), Curator.CURATE_FAIL); + } + } + + private List getNonBlankMetadata(Item item, String metadataString) { + return itemService.getMetadataByMetadataString(item, metadataString) + .stream() + .filter(metadataValue -> !StringUtils.isBlank(metadataValue.getValue())) + .collect(Collectors.toList()); + } + + private boolean checkRelations(Item item, + List references, + String referencesFieldName, + String fieldNameInOtherDirection, + String handlePrefixLocal) throws SQLException, IOException, CurateException { + for (MetadataValue ref : references) { + Item referencedItem = getReferencedItem(ref, handlePrefixLocal); + boolean checksPass = hasReferenceBack(referencedItem, item.getHandle(), + fieldNameInOtherDirection, handlePrefixLocal) && + checkVersionHistory(item, referencedItem, referencesFieldName); + if (!checksPass) { + return false; + } + } + return true; + } + + private Item getReferencedItem(MetadataValue relatedReference, String handlePrefixLocal) + throws SQLException, IOException, CurateException { + String referencedItemHandle = getHandle(relatedReference, handlePrefixLocal); + DSpaceObject referencedObject = dereference(Curator.curationContext(), referencedItemHandle); + if (referencedObject instanceof Item) { + return (Item) referencedObject; + } else { + throw new CurateException( + String.format("contains '%s' but the referenced object [[%s]] is not an item or doesn't exist", + relatedReference.getMetadataField().toString('.'), referencedItemHandle), + Curator.CURATE_FAIL); + } + } + + private boolean hasReferenceBack(Item referencedItem, String handleBack, String fieldNameInOtherDirection, + String handlePrefixLocal) throws CurateException { + boolean ok = itemService.getMetadataByMetadataString(referencedItem, fieldNameInOtherDirection).stream() + .map(mdv -> getHandle(mdv, handlePrefixLocal)) + .anyMatch(handle -> handle != null && handle.equals(handleBack)); + if (!ok) { + throw new CurateException(String.format("the referenced item %s does not refer back via %s", + addMagicString(getHandle(referencedItem)), fieldNameInOtherDirection), Curator.CURATE_FAIL); + } + return true; + } + + private String getHandle(MetadataValue relationReference, String handlePrefixLocal) { + String handle = relationReference.getValue(); + if (StringUtils.isNotBlank(handlePrefixLocal) && handle != null && handle.startsWith(handlePrefixLocal)) { + handle = handle.substring(handlePrefixLocal.length()); + } + return handle; + } + + private boolean checkVersionHistory(Item item1, Item item2, String relation) throws SQLException, CurateException { + VersionHistory item1History = versionHistoryService.findByItem(Curator.curationContext(), item1); + if (item1History == null) { + throw new CurateException( + String.format("contains '%s' but it's not part of any version history", relation), + Curator.CURATE_FAIL + ); + } + VersionHistory item2History = versionHistoryService.findByItem(Curator.curationContext(), item2); + if (item2History == null) { + throw new CurateException( + String.format("contains '%s' but the referenced item %s is not part of any version history", + relation, addMagicString(getHandle(item2))), + Curator.CURATE_FAIL + ); + } + + if (!item1History.equals(item2History)) { + throw new CurateException( + String.format("contains '%s' but the referenced item %s is not in the same version history", + relation, addMagicString(getHandle(item2))), + Curator.CURATE_FAIL + ); + } + return true; + } + + private static CurateException relationMetadataException(String leftRel, String rightRel) { + return new CurateException( + String.format("contains '%s' but the referenced object doesn't exist or " + + "doesn't contain '%s' or doesn't point to this item", + leftRel, rightRel), + Curator.CURATE_FAIL + ); + } + + private void validateEmptyMetadata(Item item, List metadataValues, StringBuilder results) + throws CurateException { + for (MetadataValue dc : metadataValues) { + if (dc.getValue() == null) { + throw new CurateException( + String.format("value [%s.%s.%s] is null", dc.getMetadataField().getMetadataSchema().getName(), + dc.getMetadataField().getElement(), dc.getMetadataField().getQualifier()), + Curator.CURATE_FAIL); + } + if (dc.getValue().trim().length() == 0) { + throw new CurateException( + String.format("value [%s.%s.%s] is empty", dc.getMetadataField().getMetadataSchema().getName(), + dc.getMetadataField().getElement(), dc.getMetadataField().getQualifier()), + Curator.CURATE_FAIL); + } + } + } + + private void validatePredefinedNonRepeatableMetadata(Item item, StringBuilder results) throws CurateException { + for (String noDuplicate : nonRepeatableMetadata) { + List vals = itemService.getMetadataByMetadataString(item, noDuplicate); + if (null != vals && vals.size() > 1) { + throw new CurateException( + String.format("value [%s] is present multiple times", noDuplicate), + Curator.CURATE_FAIL); + } + } + } + + private void validateRightsLabels(Item item, StringBuilder results) throws CurateException { + List dcvs = itemService.getMetadata(item, "dc", "rights", "label", Item.ANY); + try { + // Only check if item has files when we have an active session + // Skip this check if we can't access bundles (lazy loading issue) + if (null != item.getHandle() && dcvs != null && !dcvs.isEmpty()) { + if (!itemService.hasUploadedFiles(item, "ORIGINAL")) { + StringBuilder labels = new StringBuilder(); + for (MetadataValue label : dcvs) { + labels.append(label.getValue()).append(" "); + } + throw new CurateException( + String.format("has labels [%s] but no files", labels.toString()), + Curator.CURATE_FAIL); + } + } + } catch (SQLException e) { + throw new CurateException( + String.format("has internal problems [%s]", e.getMessage()), + Curator.CURATE_FAIL); + } + } + + private void validateHighlyRecommendedMetadata(Item item, StringBuilder results) throws CurateException { + for (String md : highlyRecommended) { + List vals = itemService.getMetadataByMetadataString(item, md); + if (null == vals || vals.isEmpty()) { + throw new CurateException( + String.format("does not contain any [%s] values", md), + CURATE_WARNING); + } + } + } + + private void validateStrangeMetadata(Item item, StringBuilder results) throws CurateException { + for (String md : strangeMetadata) { + List vals = itemService.getMetadataByMetadataString(item, md); + if (null != vals && !vals.isEmpty()) { + throw new CurateException( + String.format("contains suspicious [%s] metadata", md), + Curator.CURATE_FAIL); + } + } + } + + private void validateComplexInputs(Item item, StringBuilder results) throws CurateException { + for (Map.Entry entry : complexInputs.entrySet()) { + for (MetadataValue dval : itemService.getMetadataByMetadataString(item, entry.getKey())) { + String val = dval.getValue(); + if (val.split(DCInput.ComplexDefinitions.getSeparator(), -1).length != entry.getValue()) { + throw new CurateException( + String.format( + "%s is a component with %s values but is not stored as such. [%s]", + entry.getKey(), entry.getValue(), val), + Curator.CURATE_FAIL); + } + } + } + } + + private void itemWithFilesHasLicense(Item item) throws CurateException { + try { + boolean fail = false; + StringBuilder sb = new StringBuilder(); + try { + if (itemService.hasUploadedFiles(item, "ORIGINAL")) { + for (String mdString : rightsMdStrings) { + final List vals = itemService.getMetadataByMetadataString(item, mdString); + if (vals == null || vals.isEmpty()) { + fail = true; + sb.append(mdString).append(", "); + } + } + } + } catch (org.hibernate.LazyInitializationException e) { + // Item is detached from session, skip file check + // This can happen when processing large batches + log.debug("Skipping file check for item {} due to detached session", item.getHandle()); + } + if (fail) { + throw new CurateException("There are bitstreams but incomplete rights metadata. Missing: " + + sb.toString(), Curator.CURATE_FAIL); + } + } catch (SQLException throwables) { + throw new CurateException(throwables.getMessage(), Curator.CURATE_ERROR); + } + } + + /** + * Curate exception. + */ + static class CurateException extends Exception { + int errCode; + + public CurateException(String message, int errCode) { + super(message); + this.errCode = errCode; + } + } +} diff --git a/dspace-api/src/test/data/dspaceFolder/config/modules/curate.cfg b/dspace-api/src/test/data/dspaceFolder/config/modules/curate.cfg index 22b44f319a26..b7d0aec48f70 100644 --- a/dspace-api/src/test/data/dspaceFolder/config/modules/curate.cfg +++ b/dspace-api/src/test/data/dspaceFolder/config/modules/curate.cfg @@ -15,6 +15,7 @@ plugin.named.org.dspace.curate.CurationTask = org.dspace.ctask.general.RequiredM #plugin.named.org.dspace.curate.CurationTask = org.dspace.ctask.general.MicrosoftTranslator = translate plugin.named.org.dspace.curate.CurationTask = org.dspace.ctask.general.MetadataValueLinkChecker = checklinks plugin.named.org.dspace.curate.CurationTask = org.dspace.ctask.general.ItemHandleChecker = checkhandles +plugin.named.org.dspace.curate.CurationTask = org.dspace.ctask.general.ItemMetadataQAChecker = metadataqa # add new tasks here (or in additional config files) # Testing tasks diff --git a/dspace-api/src/test/java/org/dspace/curate/ItemMetadataQACheckerIT.java b/dspace-api/src/test/java/org/dspace/curate/ItemMetadataQACheckerIT.java new file mode 100644 index 000000000000..7bb5f44e8e13 --- /dev/null +++ b/dspace-api/src/test/java/org/dspace/curate/ItemMetadataQACheckerIT.java @@ -0,0 +1,457 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.curate; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.Date; + +import org.dspace.AbstractIntegrationTestWithDatabase; +import org.dspace.authorize.AuthorizeException; +import org.dspace.builder.CollectionBuilder; +import org.dspace.builder.CommunityBuilder; +import org.dspace.builder.ItemBuilder; +import org.dspace.content.Collection; +import org.dspace.content.Community; +import org.dspace.content.Item; +import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.content.service.CollectionService; +import org.dspace.content.service.CommunityService; +import org.dspace.content.service.ItemService; +import org.dspace.services.ConfigurationService; +import org.dspace.services.factory.DSpaceServicesFactory; +import org.dspace.versioning.VersionHistory; +import org.dspace.versioning.factory.VersionServiceFactory; +import org.dspace.versioning.service.VersionHistoryService; +import org.dspace.versioning.service.VersioningService; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Test for ItemMetadataQAChecker curation task. + * + * @author LINDAT/CLARIN + */ +public class ItemMetadataQACheckerIT extends AbstractIntegrationTestWithDatabase { + private static final String TASK_NAME = "metadataqa"; + + protected CommunityService communityService = ContentServiceFactory.getInstance().getCommunityService(); + protected CollectionService collectionService = ContentServiceFactory.getInstance().getCollectionService(); + protected ItemService itemService = ContentServiceFactory.getInstance().getItemService(); + protected VersionHistoryService versionHistoryService = + VersionServiceFactory.getInstance().getVersionHistoryService(); + protected VersioningService versioningService = VersionServiceFactory.getInstance().getVersionService(); + protected ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService(); + + Community parentCommunity; + Collection collection; + Item validItem; + Item itemWithoutDcType; + Item itemWithInvalidDcType; + Item itemWithInvalidLanguage; + Item itemWithIncorrectLanguageName; + Item itemWithTwoAvailableDates; + Item itemWithTwoAvailableDatesAndLang; + Item itemVersion1; + Item itemVersion2; + Item itemVersion3; + Item itemVersion4; + Item itemVersion5; + private String handlePrefix; + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + try { + context.turnOffAuthorisationSystem(); + + // Create a parent community + this.parentCommunity = CommunityBuilder.createCommunity(context) + .withName("Test Community") + .build(); + + // Create a collection + this.collection = CollectionBuilder.createCollection(context, parentCommunity) + .withName("Test Collection") + .build(); + + // Create a valid item with all required metadata + validItem = ItemBuilder.createItem(context, collection) + .withTitle("Valid Test Item") + .withMetadata("dc", "type", null, "corpus") + .withMetadata("dc", "language", "iso", "eng") + .withMetadata("local", "language", "name", "English") + .withMetadata("dc", "subject", null, "test subject") + .withMetadata("local", "branding", null, "Test Community") + .build(); + + // Create an item without dc.type + itemWithoutDcType = ItemBuilder.createItem(context, collection) + .withTitle("Item Without Type") + .build(); + + // Create an item with invalid dc.type + itemWithInvalidDcType = ItemBuilder.createItem(context, collection) + .withTitle("Item With Invalid Type") + .withMetadata("dc", "type", null, "invalidType") + .build(); + + // Create an item with invalid language code + itemWithInvalidLanguage = ItemBuilder.createItem(context, collection) + .withTitle("Item With Invalid Language") + .withMetadata("dc", "type", null, "corpus") + .withMetadata("dc", "language", "iso", "xyz") + .build(); + + // Create an item with incorrect local.language.name - deliberately set wrong name + // Note: We need to create it without triggering automatic language name addition + itemWithIncorrectLanguageName = ItemBuilder.createItem(context, collection) + .withTitle("Item With Incorrect Language Name") + .withMetadata("dc", "type", null, "corpus") + .withMetadata("dc", "subject", null, "test subject") + .withMetadata("local", "branding", null, "Test Community") + .build(); + // Manually add dc.language.iso and wrong local.language.name after creation + itemService.addMetadata(context, itemWithIncorrectLanguageName, "dc", "language", "iso", null, "eng"); + itemService.addMetadata(context, itemWithIncorrectLanguageName, "local", "language", "name", null, + "WrongLanguageName"); + itemService.update(context, itemWithIncorrectLanguageName); + + itemWithTwoAvailableDates = ItemBuilder.createItem(context, collection) + .withTitle("Item With Two Available Dates") + .withMetadata("dc", "type", null, "corpus") + .withMetadata("dc", "date", "available", "2020-01-01") + .withMetadata("dc", "date", "available", "2021-01-01") + .build(); + + itemWithTwoAvailableDatesAndLang = ItemBuilder.createItem(context, collection) + .withTitle("Item With Two Available Dates") + .withMetadata("dc", "type", null, "corpus") + .withMetadata("dc", "date", "available", "2020-01-01") + .build(); + + itemService.addMetadata(context, itemWithTwoAvailableDatesAndLang,"dc", "date", + "available", "en_US", "2021-01-01"); + + itemVersion1 = ItemBuilder.createItem(context, collection) + .withTitle("Item Version 1") + .withMetadata("dc", "type", null, "corpus") + .withMetadata("dc", "subject", null, "test subject") + .withMetadata("local", "branding", null, "Test Community") + .build(); + + itemVersion2 = ItemBuilder.createItem(context, collection) + .withTitle("Item Version 2") + .withMetadata("dc", "type", null, "corpus") + .withMetadata("dc", "subject", null, "test subject") + .withMetadata("local", "branding", null, "Test Community") + .build(); + + itemVersion3 = ItemBuilder.createItem(context, collection) + .withTitle("Item Version 3") + .withMetadata("dc", "type", null, "corpus") + .withMetadata("dc", "subject", null, "test subject") + .withMetadata("local", "branding", null, "Test Community") + .build(); + + itemVersion4 = ItemBuilder.createItem(context, collection) + .withTitle("Item Version 4") + .withMetadata("dc", "type", null, "corpus") + .withMetadata("dc", "subject", null, "test subject") + .withMetadata("local", "branding", null, "Test Community") + .build(); + + itemVersion5 = ItemBuilder.createItem(context, collection) + .withTitle("Item Version 5") + .withMetadata("dc", "type", null, "corpus") + .withMetadata("dc", "subject", null, "test subject") + .withMetadata("local", "branding", null, "Test Community") + .build(); + + String ref1 = itemService.getMetadataFirstValue(itemVersion1, "dc", "identifier", "uri", Item.ANY); + String ref2 = itemService.getMetadataFirstValue(itemVersion2, "dc", "identifier", "uri", Item.ANY); + + itemService.addMetadata(context, itemVersion1, "dc", "relation", "isreplacedby", null, ref2); + itemService.addMetadata(context, itemVersion2, "dc", "relation", "replaces", null, ref1); + itemService.addMetadata(context, itemVersion3, "dc", "relation", "replaces", null, ref2); + itemService.update(context, itemVersion1); + itemService.update(context, itemVersion2); + itemService.update(context, itemVersion3); + + VersionHistory versionHistory = versionHistoryService.create(context); + versioningService.createNewVersion(context, versionHistory, itemVersion1, "Version 1", new Date(), 1); + versioningService.createNewVersion(context, versionHistory, itemVersion2, "Version 2", new Date(), 2); + versioningService.createNewVersion(context, versionHistory, itemVersion3, "Version 3", new Date(), 3); + + context.restoreAuthSystemState(); + handlePrefix = configurationService.getProperty("handle.canonical.prefix"); + + } catch (Exception ex) { + fail("Error in init: " + ex.getMessage()); + } + } + + @Test + public void testItemWithTwoAvailableDates() throws IOException { + Curator curator = new Curator(); + curator.addTask(TASK_NAME); + context.setCurrentUser(admin); + + // Run curator task for item with two dc.date.available - should fail + curator.curate(context, itemWithTwoAvailableDates.getHandle()); + int status = curator.getStatus(TASK_NAME); + assertEquals("Curation should fail for item with two dc.date.available", Curator.CURATE_FAIL, status); + String result = curator.getResult(TASK_NAME); + assertTrue("Result should mention multiple dc.date.available", result.contains("dc.date.available")); + } + + @Test + public void testItemWithTwoAvailableDatesAndLang() throws IOException { + Curator curator = new Curator(); + curator.addTask(TASK_NAME); + context.setCurrentUser(admin); + + // Run curator task for item with two dc.date.available with language - should fail + curator.curate(context, itemWithTwoAvailableDatesAndLang.getHandle()); + int status = curator.getStatus(TASK_NAME); + assertEquals("Curation should fail for item with two dc.date.available with language", + Curator.CURATE_FAIL, status); + String result = curator.getResult(TASK_NAME); + assertTrue("Result should mention multiple dc.date.available", result.contains("dc.date.available")); + } + + @Test + public void testValidItem() throws IOException { + Curator curator = new Curator(); + curator.addTask(TASK_NAME); + context.setCurrentUser(admin); + + // Run curator task for valid item - should succeed + curator.curate(context, validItem.getHandle()); + int status = curator.getStatus(TASK_NAME); + assertEquals("Curation should succeed for valid item", Curator.CURATE_SUCCESS, status); + } + + @Test + public void testItemWithoutDcType() throws IOException { + Curator curator = new Curator(); + curator.addTask(TASK_NAME); + context.setCurrentUser(admin); + + // Run curator task for item without dc.type - should fail + curator.curate(context, itemWithoutDcType.getHandle()); + int status = curator.getStatus(TASK_NAME); + assertEquals("Curation should fail for item without dc.type", Curator.CURATE_FAIL, status); + String result = curator.getResult(TASK_NAME); + assertTrue("Result should mention dc.type metadata", result.contains("dc.type")); + } + + @Test + public void testItemWithInvalidDcType() throws IOException { + Curator curator = new Curator(); + curator.addTask(TASK_NAME); + context.setCurrentUser(admin); + + // Run curator task for item with invalid dc.type - should fail + curator.curate(context, itemWithInvalidDcType.getHandle()); + int status = curator.getStatus(TASK_NAME); + assertEquals("Curation should fail for item with invalid dc.type", Curator.CURATE_FAIL, status); + String result = curator.getResult(TASK_NAME); + assertTrue("Result should mention invalid type", result.contains("invalid type")); + } + + @Test + public void testItemWithInvalidLanguageCode() throws IOException { + Curator curator = new Curator(); + curator.addTask(TASK_NAME); + context.setCurrentUser(admin); + + // Run curator task for item with invalid language code - should fail + curator.curate(context, itemWithInvalidLanguage.getHandle()); + int status = curator.getStatus(TASK_NAME); + assertEquals("Curation should fail for item with invalid language code", Curator.CURATE_FAIL, status); + String result = curator.getResult(TASK_NAME); + assertTrue("Result should mention invalid language code", result.contains("Invalid language code")); + } + + @Test + public void testItemWithIncorrectLanguageName() throws IOException { + Curator curator = new Curator(); + curator.addTask(TASK_NAME); + context.setCurrentUser(admin); + + // Run curator task for item with incorrect local.language.name - should fail + curator.curate(context, itemWithIncorrectLanguageName.getHandle()); + int status = curator.getStatus(TASK_NAME); + String result = curator.getResult(TASK_NAME); + System.out.println("Test result: " + result); + assertEquals("Curation should fail for item with incorrect local.language.name", Curator.CURATE_FAIL, status); + assertTrue("Result should mention local.language.name mismatch, but was: " + result, + result.contains("local.language.name") && result.contains("does not match")); + } + + @Test + public void testItemVersion1() throws IOException { + testItemWithCorrectRelationship(itemVersion1, "meets relation requirements"); + } + + @Test + public void testItemVersion2() throws IOException { + testItemWithCorrectRelationship(itemVersion2, "meets relation requirements"); + } + + @Test + public void testItemWithBadRelationship1() throws IOException, SQLException, AuthorizeException { + // itemVersion2 has 'dc.relation.isreplacedby that points to itemVersion4 + // but itemVersion4 doesn't contain 'dc.relation.replaces' metadata + String ref4 = itemService.getMetadataFirstValue(itemVersion4, "dc", "identifier", "uri", Item.ANY); + + context.turnOffAuthorisationSystem(); + itemService.addMetadata(context, itemVersion2, "dc", "relation", "isreplacedby", null, ref4); + itemService.update(context, itemVersion2); + context.restoreAuthSystemState(); + + testItemWithRelationError( + itemVersion2, + "the referenced item [[%s]] does not refer back via %s", + ref4, + "dc.relation.replaces"); + } + + @Test + public void testItemWithBadRelationship2() throws IOException { + String ref2 = itemService.getMetadataFirstValue(itemVersion2, "dc", "identifier", "uri", Item.ANY); + // itemVersion3 has 'dc.relation.replaces' that points back to itemVersion2 + // but itemVersion2 doesn't have 'dc.relation.isreplacedby' that points forward to itemVersion3 + testItemWithRelationError( + itemVersion3, + "the referenced item [[%s]] does not refer back via %s", + ref2, + "dc.relation.isreplacedby"); + } + @Test + public void testItemWithBadRelationship3() throws IOException, SQLException, AuthorizeException { + + context.turnOffAuthorisationSystem(); + String ref = "https://example.org/this-doesnt-resolve"; + itemService.addMetadata(context, itemVersion5, "dc", "relation", "replaces", null, ref); + itemService.update(context, itemVersion5); + context.restoreAuthSystemState(); + + testItemWithRelationError( + itemVersion5, + "contains '%s' but the referenced object [[%s]] is not an item or doesn't exist", + "dc.relation.replaces", + ref); + } + + @Test + public void testItemWithMissingVersionHistory() throws SQLException, IOException, AuthorizeException { + String ref2 = itemService.getMetadataFirstValue(itemVersion2, "dc", "identifier", "uri", Item.ANY); + String ref4 = itemService.getMetadataFirstValue(itemVersion4, "dc", "identifier", "uri", Item.ANY); + + context.turnOffAuthorisationSystem(); + itemService.addMetadata(context, itemVersion2, "dc", "relation", "isreplacedby", null, ref4); + itemService.addMetadata(context, itemVersion4, "dc", "relation", "replaces", null, ref2); + itemService.update(context, itemVersion2); + itemService.update(context, itemVersion4); + context.restoreAuthSystemState(); + + testItemWithRelationError(itemVersion4, + "contains '%s' but it's not part of any version history", "dc.relation.replaces"); + } + + @Test + public void testItemWithMissingVersionHistoryForReferencedItem() + throws SQLException, IOException, AuthorizeException { + String ref2 = itemService.getMetadataFirstValue(itemVersion2, "dc", "identifier", "uri", Item.ANY); + String ref4 = itemService.getMetadataFirstValue(itemVersion4, "dc", "identifier", "uri", Item.ANY); + + context.turnOffAuthorisationSystem(); + itemService.addMetadata(context, itemVersion2, "dc", "relation", "isreplacedby", null, ref4); + itemService.addMetadata(context, itemVersion4, "dc", "relation", "replaces", null, ref2); + itemService.update(context, itemVersion2); + itemService.update(context, itemVersion4); + context.restoreAuthSystemState(); + + testItemWithRelationError(itemVersion2, + "contains '%s' but the referenced item [[%s]] is not part of any version history", + "dc.relation.isreplacedby", ref4); + } + + @Test + public void testItemWithNotMatchingVersionHistory() throws SQLException, IOException, AuthorizeException { + String ref2 = itemService.getMetadataFirstValue(itemVersion2, "dc", "identifier", "uri", Item.ANY); + String ref4 = itemService.getMetadataFirstValue(itemVersion4, "dc", "identifier", "uri", Item.ANY); + + context.turnOffAuthorisationSystem(); + itemService.addMetadata(context, itemVersion2,"dc", "relation", "isreplacedby", null, ref4); + itemService.addMetadata(context, itemVersion4,"dc", "relation", "replaces", null, ref2); + itemService.update(context, itemVersion2); + itemService.update(context, itemVersion4); + context.restoreAuthSystemState(); + + VersionHistory versionHistory = versionHistoryService.create(context); + versioningService.createNewVersion(context, versionHistory, itemVersion4, + "Another Version History - Version 1", new Date(), 1); + + testItemWithRelationError(itemVersion4, + "contains '%s' but the referenced item [[%s]] is not in the same version history", + "dc.relation.replaces", ref2); + } + + @Test + public void testItemWithNoRelationMetadata() throws SQLException, IOException { + testItemWithCorrectRelationship(itemVersion4, null); + } + + private void testItemWithCorrectRelationship(Item item, String successMessage) throws IOException { + Curator curator = runCuratorForItem(item); + + int status = curator.getStatus(TASK_NAME); + String result = curator.getResult(TASK_NAME); + assertEquals("Curation should succeed for valid item with relation", Curator.CURATE_SUCCESS, status); + if (successMessage == null) { + assertTrue("Result must be empty, but was " + result, result.isEmpty()); + } else { + assertTrue("Result must contain success message, but was " + result, + result.contains(successMessage) && result.contains(item.getHandle())); + } + } + + private void testItemWithRelationError(Item item, String errorMessage, Object... args) throws IOException { + Curator curator = runCuratorForItem(item); + + int status = curator.getStatus(TASK_NAME); + String result = curator.getResult(TASK_NAME); + assertEquals("Curation should fail for incorrect relationship", Curator.CURATE_FAIL, status); + String failMessage = String.format(errorMessage, args); + assertTrue(String.format("Result: %s\n must contain fail message \n %s ", result, failMessage), + result.contains(failMessage) + ); + } + + private Curator runCuratorForItem(Item item) throws IOException { + Curator curator = new Curator(); + curator.addTask(TASK_NAME); + context.setCurrentUser(admin); + curator.curate(context, item.getHandle()); + return curator; + } + + @After + public void destroy() throws Exception { + super.destroy(); + } +} diff --git a/dspace/config/modules/curate.cfg b/dspace/config/modules/curate.cfg index d36578fa943d..62e6e3644d8f 100644 --- a/dspace/config/modules/curate.cfg +++ b/dspace/config/modules/curate.cfg @@ -16,6 +16,7 @@ plugin.named.org.dspace.curate.CurationTask = org.dspace.ctask.general.RequiredM plugin.named.org.dspace.curate.CurationTask = org.dspace.ctask.general.MetadataValueLinkChecker = checklinks plugin.named.org.dspace.curate.CurationTask = org.dspace.ctask.general.ItemHandleChecker = checkhandles plugin.named.org.dspace.curate.CurationTask = org.dspace.ctask.general.RegisterDOI = registerdoi +plugin.named.org.dspace.curate.CurationTask = org.dspace.ctask.general.ItemMetadataQAChecker = metadataqa #plugin.named.org.dspace.curate.CurationTask = org.dspace.ctask.general.CitationPage = citationpage # add new tasks here (or in additional config files)