Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ mvn package site
# Build without running tests (useful when tests fail with extra logging)
mvn clean package -DskipTests

# Run tests only
# Run full test suite (always use this to verify changes)
mvn test

# Run tests excluding ReferenceIntegrityTest with specific Cucumber tags
# Run a targeted subset during development (NOT for verifying changes — excludes ReferenceIntegrityTest)
mvn test -Dtest=\!ReferenceIntegrityTest* -Dcucumber.filter.tags='@v3.7.x'

# Run a single Cucumber scenario by name (issue number and subtest, e.g. #1458-1)
Expand Down Expand Up @@ -172,7 +172,7 @@ Alternatively, use `--verbose 1` flag for INFO level output in reports (default
- Source/Target: Java 17 (pom.xml:172-173)

### Main Dependencies
- **pds4-jparser** (version 2.11.0) - PDS4 label parsing and validation
- **pds4-jparser** (version 3.2.0-SNAPSHOT) - PDS4 label parsing and validation; **source is at `../pds4-jparser`** — check there first when tracing errors from pds4-jparser classes (e.g. `FieldValueValidator`, `ProblemType`, parser internals)
- **pds3-product-tools** (version 4.4.2) - PDS3 validation
- **Saxon-HE** (version 12.9) - XSLT and XPath processing
- **registry-common** (version 2.1.0-SNAPSHOT) - Registry API integration
Expand Down
32 changes: 32 additions & 0 deletions src/main/java/gov/nasa/pds/tools/util/LabelCacheEntry.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package gov.nasa.pds.tools.util;

import java.util.ArrayList;

/**
* Immutable cache of identifier data extracted from a parsed label, used to avoid re-parsing
* during the referential integrity phase.
*/
public class LabelCacheEntry {
private final ArrayList<String> logicalIdentifiers;
private final ArrayList<String> lidOrLidVidReferences;
private final ArrayList<String> contextAreaRefs;

public LabelCacheEntry(ArrayList<String> logicalIdentifiers,
ArrayList<String> lidOrLidVidReferences, ArrayList<String> contextAreaRefs) {
this.logicalIdentifiers = logicalIdentifiers;
this.lidOrLidVidReferences = lidOrLidVidReferences;
this.contextAreaRefs = contextAreaRefs;
}

public ArrayList<String> getLogicalIdentifiers() {
return logicalIdentifiers;
}

public ArrayList<String> getLidOrLidVidReferences() {
return lidOrLidVidReferences;
}

public ArrayList<String> getContextAreaRefs() {
return contextAreaRefs;
}
}
16 changes: 14 additions & 2 deletions src/main/java/gov/nasa/pds/tools/util/LabelUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -365,14 +365,20 @@ public static synchronized ArrayList<String> getIdentifiersCommon(DOMSource sour
* @return lidOrLidVidReference The LID or LIDVID referenced in this label.
*/
public static ArrayList<String> getLidVidReferences(DOMSource source, URL context) {
return getLidVidReferences(source, context, true);
}

public static ArrayList<String> getLidVidReferences(DOMSource source, URL context,
boolean reportCarriageReturns) {
LOG.debug("getLidVidReferences:MY_SOURCE[{}]", source);

String[] tagsList = new String[2];
tagsList[0] = LIDVID_REFERENCE;
tagsList[1] = LID_REFERENCE;

ArrayList<String> lidOrLidVidReferences =
LabelUtil.getIdentifiersCommon(source, context, tagsList, INTERNAL_REFERENCE_AREA);
LabelUtil.getIdentifiersCommon(source, context, tagsList, INTERNAL_REFERENCE_AREA,
reportCarriageReturns);

LOG.debug("getLidVidReferences:context,lidOrLidVidReferences {},{}", context,
lidOrLidVidReferences);
Expand All @@ -387,13 +393,19 @@ public static ArrayList<String> getLidVidReferences(DOMSource source, URL contex
* @return logicalIdentifiers A list of logical identifiers in this label.
*/
public static ArrayList<String> getLogicalIdentifiers(DOMSource source, URL context) {
return getLogicalIdentifiers(source, context, true);
}

public static ArrayList<String> getLogicalIdentifiers(DOMSource source, URL context,
boolean reportCarriageReturns) {
LOG.debug("getLogicalIdentifiers:MY_SOURCE[{}]", source);

String[] tagsList = new String[1];
tagsList[0] = LOGICAL_IDENTIFIER_TAG;

ArrayList<String> logicalIdentifiers =
LabelUtil.getIdentifiersCommon(source, context, tagsList, IDENTIFICATION_AREA);
LabelUtil.getIdentifiersCommon(source, context, tagsList, IDENTIFICATION_AREA,
reportCarriageReturns);

LOG.debug("getLogicalIdentifiers:context,logicalIdentifiers {},{}", context,
logicalIdentifiers);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ public class ReferentialIntegrityUtil {
private static String[] tagsList = new String[2];
private static HashSet<String> reportedErrorsReferenceSet = new HashSet<>();
private static URL parentBundleURL = null;
private static HashMap<URL, LabelCacheEntry> labelIdentifierCache = new HashMap<>();

/**
* Initialize this class to ready for doing referential checks.
Expand All @@ -163,6 +164,14 @@ public static void initialize(String referenceType, URL target, ProblemListener
ReferentialIntegrityUtil.tagsList[1] = LabelUtil.LID_REFERENCE;
}

public static void cacheLabelIdentifiers(URL url, LabelCacheEntry entry) {
labelIdentifierCache.put(url, entry);
}

public static LabelCacheEntry getCachedLabelIdentifiers(URL url) {
return labelIdentifierCache.get(url);
}

/**
* Reset this class to its initial state in case running from regression tests.
*
Expand All @@ -188,6 +197,7 @@ public static void reset() {
ReferentialIntegrityUtil.urlsParsedCumulative.clear();
ReferentialIntegrityUtil.reportedErrorsReferenceSet.clear();
ReferentialIntegrityUtil.parentBundleURL = null;
ReferentialIntegrityUtil.labelIdentifierCache.clear();
}

/**
Expand Down Expand Up @@ -577,6 +587,30 @@ private static void addUniqueReferencesToMap(HashMap<String, HashSetReferenceInf
ReferentialIntegrityUtil.getReferenceType(), parentId, url, numReferencesAdded);
}

private static void collectAllContextReferences(ArrayList<String> allContextAreaRefs,
ArrayList<String> logicalIdentifiers, ArrayList<String> lidOrLidVidReferences,
boolean labelIsBundleFlag, boolean labelIsCollectionFlag, URL url) {
if ((logicalIdentifiers == null) || logicalIdentifiers.isEmpty()) {
return;
}
if (labelIsCollectionFlag || labelIsBundleFlag) {
if (labelIsBundleFlag) {
ReferentialIntegrityUtil.addUniqueReferencesToMap(ReferentialIntegrityUtil.bundleReferenceMap,
allContextAreaRefs, url, logicalIdentifiers.get(0));
} else {
ReferentialIntegrityUtil.addUniqueReferencesToMap(
ReferentialIntegrityUtil.collectionReferenceMap, allContextAreaRefs, url,
logicalIdentifiers.get(0));
}
} else {
ReferentialIntegrityUtil.addUniqueReferencesToMap(
ReferentialIntegrityUtil.contextReferencesCumulative, allContextAreaRefs, url,
logicalIdentifiers.get(0));
}
LOG.debug("collectAllContextReferences:url,contextReferencesCumulative {},{},{}", url,
contextReferencesCumulative, contextReferencesCumulative.size());
}

private static void collectAllContextReferences(DOMSource domSource,
ArrayList<String> logicalIdentifiers, ArrayList<String> lidOrLidVidReferences,
boolean labelIsBundleFlag, boolean labelIsCollectionFlag, URL url) {
Expand Down Expand Up @@ -786,16 +820,23 @@ public static void additionalReferentialIntegrityChecks(URL crawlTarget, URL bun
if (TargetExaminer.isTargetCollectionType (child.getUrl())) {
labelIsCollectionFlag = true;
}
xml = db.parse(url.openStream());
domSource = new DOMSource(xml);
// Note that the function getLidVidReferences() collects all Internal_Reference
// elements in the PDS4 core namespace (including those in Reference_List,
// Context_Area, discipline LDD areas, and any other location in the label).
// so the lidOrLidVidReferencesCumulative will be a cumulative collection of all
// references collected in lidOrLidVidReferences for each label.

ArrayList<String> lidOrLidVidReferences = LabelUtil.getLidVidReferences(domSource, url);
ArrayList<String> logicalIdentifiers = LabelUtil.getLogicalIdentifiers(domSource, url);
LabelCacheEntry cached = ReferentialIntegrityUtil.getCachedLabelIdentifiers(url);
ArrayList<String> lidOrLidVidReferences;
ArrayList<String> logicalIdentifiers;
if (cached != null) {
logicalIdentifiers = cached.getLogicalIdentifiers();
lidOrLidVidReferences = cached.getLidOrLidVidReferences();
} else {
xml = db.parse(url.openStream());
domSource = new DOMSource(xml);
// Note that the function getLidVidReferences() collects all Internal_Reference
// elements in the PDS4 core namespace (including those in Reference_List,
// Context_Area, discipline LDD areas, and any other location in the label).
// so the lidOrLidVidReferencesCumulative will be a cumulative collection of all
// references collected in lidOrLidVidReferences for each label.
lidOrLidVidReferences = LabelUtil.getLidVidReferences(domSource, url);
logicalIdentifiers = LabelUtil.getLogicalIdentifiers(domSource, url);
}
LOG.debug("additionalReferentialIntegrityChecks:url,lidOrLidVidReferences {},{}", url,
lidOrLidVidReferences.size());
LOG.debug("additionalReferentialIntegrityChecks:url,logicalIdentifiers {},{}", url,
Expand Down Expand Up @@ -871,8 +912,14 @@ public static void additionalReferentialIntegrityChecks(URL crawlTarget, URL bun
// Collect all the context references defined for each label under the
// "Context_Area" tag.
if (ReferentialIntegrityUtil.contextReferenceCheck) {
ReferentialIntegrityUtil.collectAllContextReferences(domSource, logicalIdentifiers,
lidOrLidVidReferences, labelIsBundleFlag, labelIsCollectionFlag, url);
if (cached != null) {
ReferentialIntegrityUtil.collectAllContextReferences(cached.getContextAreaRefs(),
logicalIdentifiers, lidOrLidVidReferences, labelIsBundleFlag,
labelIsCollectionFlag, url);
} else {
ReferentialIntegrityUtil.collectAllContextReferences(domSource, logicalIdentifiers,
lidOrLidVidReferences, labelIsBundleFlag, labelIsCollectionFlag, url);
}
}

} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
Expand All @@ -11,7 +12,9 @@
import javax.xml.transform.dom.DOMSource;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import gov.nasa.pds.tools.util.LabelCacheEntry;
import gov.nasa.pds.tools.util.LabelUtil;
import gov.nasa.pds.tools.util.ReferentialIntegrityUtil;

public class CrossLabelFileAreaReferenceChecker {
final private static HashMap<String,Boolean> isObservational = new HashMap<String,Boolean>();
Expand All @@ -26,24 +29,25 @@ private static String resolve (String name, ValidationTarget target) throws URIS
* @param name - the file being referenced by the file area
* @param target - the label being validated
* @return true if name and target are unique and only known references and false otherwise
* All of these exceptions should not happen because they are getting the LID from the target.
* Would not have made it this far if that could not have happened already, So, just pass them
* back to the called and let them handle it with their own generic exception handler/message.
* @throws IOException
* @throws ParserConfigurationException
* @throws SAXException
* @throws URISyntaxException
*/
public static boolean add (String name, ValidationTarget target, boolean isObs)
public static boolean add(String name, ValidationTarget target, boolean isObs)
throws IOException, ParserConfigurationException, SAXException, URISyntaxException {
boolean success = false;
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
Document xml = dbf.newDocumentBuilder().parse(target.getUrl().openStream());
DOMSource domSource = new DOMSource(xml);
String full_name = resolve(name, target);
isObservational.put(full_name,
isObs || (isObservational.containsKey(full_name) ? isObservational.get(full_name) : false));
for (String lid : LabelUtil.getLogicalIdentifiers (domSource, target.getUrl())) {
ArrayList<String> logicalIdentifiers;
LabelCacheEntry cached = ReferentialIntegrityUtil.getCachedLabelIdentifiers(target.getUrl());
if (cached != null) {
logicalIdentifiers = cached.getLogicalIdentifiers();
} else {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
Document xml = dbf.newDocumentBuilder().parse(target.getUrl().openStream());
DOMSource domSource = new DOMSource(xml);
logicalIdentifiers = LabelUtil.getLogicalIdentifiers(domSource, target.getUrl());
}
for (String lid : logicalIdentifiers) {
if (lid.contains("::")) {
lid = lid.substring (0, lid.indexOf("::"));
}
Expand All @@ -64,5 +68,6 @@ public static String getOtherFilename (String name, ValidationTarget target) thr
}
public static void reset() {
knownRefs.clear();
isObservational.clear();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ private boolean validate(NodeInfo xml) {
ancestor = ancestor.getParent();
isObservational |= OBS_DATA_TAGS.contains(ancestor.getLocalPart());
}
if (!CrossLabelFileAreaReferenceChecker.add (fullName, target, isObservational)) {
if (!CrossLabelFileAreaReferenceChecker.add(fullName, target, isObservational)) {
this.getListener().addProblem(
new ValidationProblem(
new ProblemDefinition(ExceptionType.ERROR, ProblemType.DUPLICATED_FILE_AREA_REFERENCE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@
import gov.nasa.pds.validate.constants.Constants;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.tree.tiny.TinyNodeImpl;
import javax.xml.transform.dom.DOMSource;
import gov.nasa.pds.tools.util.LabelCacheEntry;
import gov.nasa.pds.tools.util.LabelUtil;
import gov.nasa.pds.tools.util.ReferentialIntegrityUtil;

/**
* Implements a validation chain that validates PDS4 bundles. It is applicable if there is a bundle
Expand Down Expand Up @@ -318,6 +322,7 @@ public void validateLabel() {
if (document != null) {
getContext().put(PDS4Context.LABEL_DOCUMENT, document);
labelIsValidFlag = true; // A non-null document signified that the label is valid.
cacheIdentifiers(document, getTarget());
}
} catch (SAXException | IOException | ParserConfigurationException | TransformerException
| MissingLabelSchemaException e) {
Expand All @@ -337,6 +342,24 @@ public void validateLabel() {
LOG.debug("validateLabel:target,labelIsValidFlag {},{}", target, labelIsValidFlag);
}

private static void cacheIdentifiers(Document document, URL targetUrl) {
DOMSource domSource = new DOMSource(document);
String[] tagsArr = {LabelUtil.LIDVID_REFERENCE, LabelUtil.LID_REFERENCE};
// Report \n errors here (true), so referential integrity phase can use cache safely.
// Context area calls stay false — getLidVidReferences(true) above already covers those nodes.
ArrayList<String> logicalIds = LabelUtil.getLogicalIdentifiers(domSource, targetUrl, true);
ArrayList<String> lidVidRefs = LabelUtil.getLidVidReferences(domSource, targetUrl, true);
ArrayList<String> contextRefs = new ArrayList<>();
contextRefs.addAll(LabelUtil.getIdentifiersCommon(domSource, targetUrl, tagsArr,
LabelUtil.CONTEXT_AREA_INVESTIGATION_AREA_REFERENCE, false));
contextRefs.addAll(LabelUtil.getIdentifiersCommon(domSource, targetUrl, tagsArr,
LabelUtil.CONTEXT_AREA_OBSERVATION_SYSTEM_COMPONENT_REFERENCE, false));
contextRefs.addAll(LabelUtil.getIdentifiersCommon(domSource, targetUrl, tagsArr,
LabelUtil.CONTEXT_AREA_TARGET_IDENTIFICATION_REFERENCE, false));
ReferentialIntegrityUtil.cacheLabelIdentifiers(targetUrl,
new LabelCacheEntry(logicalIds, lidVidRefs, contextRefs));
}

private boolean resolveSingleSchema(URL label, Map.Entry<String, URL> schemaLocation,
URL schemaUrl, ProblemContainer container, ProblemContainer labelProblems,
XMLCatalogResolver resolver) {
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/gov/nasa/pds/validate/ValidateLauncher.java
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
import gov.nasa.pds.tools.util.ReferentialIntegrityUtil;
import gov.nasa.pds.tools.util.XMLExtractor;
import gov.nasa.pds.tools.validate.ContentProblem;
import gov.nasa.pds.tools.validate.CrossLabelFileAreaReferenceChecker;
import gov.nasa.pds.tools.validate.InMemoryRegistrar;
import gov.nasa.pds.tools.validate.ProblemContainer;
import gov.nasa.pds.tools.validate.ProblemDefinition;
Expand Down Expand Up @@ -1627,6 +1628,7 @@ public boolean doValidation(Map<URL, String> checksumManifest) throws Exception
// Due to the util class ReferentialIntegrityUtil being static, it need to be
// reset() if running a regression test.
ReferentialIntegrityUtil.reset();
CrossLabelFileAreaReferenceChecker.reset();

return success;
}
Expand Down
Loading