From 43105ad4f4492e19f66ea84a52b1d2787b8308a2 Mon Sep 17 00:00:00 2001 From: jweiser Date: Wed, 28 Jan 2026 18:05:48 -0500 Subject: [PATCH 1/3] reworks st id mapping to use custom mapping object --- .../release/downloaddirectory/Main.java | 1 + .../{ => StableIdMapper}/MapOldStableIds.java | 113 ++++++++++-------- .../StableIdMapper/StableIdMapping.java | 48 ++++++++ 3 files changed, 111 insertions(+), 51 deletions(-) rename src/main/java/org/reactome/release/downloaddirectory/{ => StableIdMapper}/MapOldStableIds.java (83%) create mode 100644 src/main/java/org/reactome/release/downloaddirectory/StableIdMapper/StableIdMapping.java diff --git a/src/main/java/org/reactome/release/downloaddirectory/Main.java b/src/main/java/org/reactome/release/downloaddirectory/Main.java index bfe645cc..3ef24ef2 100644 --- a/src/main/java/org/reactome/release/downloaddirectory/Main.java +++ b/src/main/java/org/reactome/release/downloaddirectory/Main.java @@ -25,6 +25,7 @@ import org.gk.persistence.MySQLAdaptor; import org.reactome.release.downloaddirectory.BioPax.BioPax; import org.reactome.release.downloaddirectory.GenerateGOAnnotationFile.CreateGOAFile; +import org.reactome.release.downloaddirectory.StableIdMapper.MapOldStableIds; public class Main { private static final Logger logger = LogManager.getLogger(); diff --git a/src/main/java/org/reactome/release/downloaddirectory/MapOldStableIds.java b/src/main/java/org/reactome/release/downloaddirectory/StableIdMapper/MapOldStableIds.java similarity index 83% rename from src/main/java/org/reactome/release/downloaddirectory/MapOldStableIds.java rename to src/main/java/org/reactome/release/downloaddirectory/StableIdMapper/MapOldStableIds.java index bd91609f..9c25ac88 100644 --- a/src/main/java/org/reactome/release/downloaddirectory/MapOldStableIds.java +++ b/src/main/java/org/reactome/release/downloaddirectory/StableIdMapper/MapOldStableIds.java @@ -1,4 +1,4 @@ -package org.reactome.release.downloaddirectory; +package org.reactome.release.downloaddirectory.StableIdMapper; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -36,58 +36,53 @@ public static void execute(MySQLAdaptor dba, String releaseNumber) throws Except List dbIds = new ArrayList<>(dbIdToStableIds.keySet()); Collections.sort(dbIds); + logger.info("Retrieving current stable identifiers from " + dba.getDBName()); + Set currentStableIdentifiers = getCurrentStableIdentifiers(dba); + // Iterate through array of stable IDs associated with DB ID, splitting into human and non-human groups. - List> hsaIds = new ArrayList<>(); - List> nonHsaIds = new ArrayList<>(); + List hsaIds = new ArrayList<>(); + List nonHsaIds = new ArrayList<>(); for (String dbId : dbIds) { List stableIds = dbIdToStableIds.get(dbId); Collections.sort(stableIds); + stableIds = sortWithCurrentStableIdAsPrimary(stableIds, currentStableIdentifiers); + StableIdMapping stableIdMapping = new StableIdMapping(stableIds); // After sorting the first stable ID in the array is considered the primary ID. // An Array of Arrays is used here, with each interior array's first element being // the primaryId string and the second element being an array of the remaining stable IDs. // Example: [R-HSA-1006169, [REACT_118604]], [R-HSA-1006173, [REACT_119254]]] - if (!(stableIds.size() < 2) || (stableIds.get(0).matches("^R-.*"))) + if (stableIdMapping.hasSecondaryIds() || stableIdMapping.hasNewFormatPrimaryId()) { - String primaryId = stableIds.get(0); - stableIds.remove(0); - ArrayList organizedIds = new ArrayList<>(); - if (primaryId.matches("R-HSA.*")) + if (stableIdMapping.isHuman()) { - organizedIds.add(primaryId); - organizedIds.add(stableIds); - hsaIds.add(organizedIds); + hsaIds.add(stableIdMapping); } else { - organizedIds.add(primaryId); - organizedIds.add(stableIds); - nonHsaIds.add(organizedIds); + nonHsaIds.add(stableIdMapping); } } } // Reorder the data so that the interior arrays that have only 1 element are going to be output first. - List> combinedIds = new ArrayList<>(); + List combinedIds = new ArrayList<>(); combinedIds.addAll(hsaIds); combinedIds.addAll(nonHsaIds); - List> stableIdsToOldIdsMappings = new ArrayList<>(); - List> deferredIds = new ArrayList<>(); - for (List stableIdsArray : combinedIds) + List stableIdsToOldIdsMappings = new ArrayList<>(); + List deferredIds = new ArrayList<>(); + for (StableIdMapping stableIdMapping : combinedIds) { @SuppressWarnings("unchecked") - List secondaryIds = (List) stableIdsArray.get(1); + List secondaryIds = stableIdMapping.getSecondaryIds(); if (secondaryIds.size() > 1) { - deferredIds.add(stableIdsArray); + deferredIds.add(stableIdMapping); } else { - stableIdsToOldIdsMappings.add(stableIdsArray); + stableIdsToOldIdsMappings.add(stableIdMapping); } } stableIdsToOldIdsMappings.addAll(deferredIds); - logger.info("Retrieving current stable identifiers from " + dba.getDBName()); - Set currentStableIdentifiers = getCurrentStableIdentifiers(dba); - writeMappingsToFile(releaseNumber, stableIdsToOldIdsMappings, currentStableIdentifiers); logger.info("MapOldStableIds finished"); @@ -110,14 +105,23 @@ private static ResultSet retrieveAllStableIdentifiers(MySQLAdaptor dba) throws C } /** - * Checks that the primary identifier taken from the stable_identifiers database is currently used, and it has secondary mappings. - * @param currentStableIdentifiers Set - Set of all StableIdentifiers currently in database. - * @param primaryId String - Primary StableIdentifier that maps to secondaryIds. - * @param secondaryIds List - All StableIdentifiers (old and new formats) that map to the primary stable identifier. - * @return boolean, indicating it is a currently used StableIdentifier with secondary mappings. + * Uses the resultSet from the stable_identifiers database query (which retrieved *all* StableIdentifiers and + * their associated instance ids that have ever existed in Reactome) to build a map of instance IDs to StableIdentifiers. + * @param stableIdResults ResultSet - Data result of query to stable_identifiers database for stable identifiers and associated instance IDs. + * @return Map> - Mapping of db IDs to Stable Identifiers. + * @throws SQLException - Thrown if there are issues accessing the ResultSet object. */ - private static boolean currentStableIdentifierWithMapping(Set currentStableIdentifiers, String primaryId, List secondaryIds) { - return currentStableIdentifiers.contains(primaryId) && !secondaryIds.isEmpty(); + private static Map> getDbIdToStableIds(ResultSet stableIdResults) throws SQLException { + Map> dbIdToStableIds = new HashMap<>(); + + // Iterate through returned results of DB IDs and stable IDs + while (stableIdResults.next()) { + String stableId = stableIdResults.getString(1); + String dbId = stableIdResults.getString(2); + + dbIdToStableIds.computeIfAbsent(dbId, k -> new ArrayList<>()).add(stableId); + } + return dbIdToStableIds; } /** @@ -135,24 +139,20 @@ private static Set getCurrentStableIdentifiers(MySQLAdaptor dba) throws return currentStableIdentifiersSet; } - /** - * Uses the resultSet from the stable_identifiers database query (which retrieved *all* StableIdentifiers and - * their associated instance ids that have ever existed in Reactome) to build a map of instance IDs to StableIdentifiers. - * @param stableIdResults ResultSet - Data result of query to stable_identifiers database for stable identifiers and associated instance IDs. - * @return Map> - Mapping of db IDs to Stable Identifiers. - * @throws SQLException - Thrown if there are issues accessing the ResultSet object. - */ - private static Map> getDbIdToStableIds(ResultSet stableIdResults) throws SQLException { - Map> dbIdToStableIds = new HashMap<>(); + private static List sortWithCurrentStableIdAsPrimary(List stableIds, Set currentStableIdentifiers) { + List sortedStableIds = new ArrayList<>(); - // Iterate through returned results of DB IDs and stable IDs - while (stableIdResults.next()) { - String stableId = stableIdResults.getString(1); - String dbId = stableIdResults.getString(2); - - dbIdToStableIds.computeIfAbsent(dbId, k -> new ArrayList<>()).add(stableId); + List deferredStableIds = new ArrayList<>(); + for (String stableId : stableIds) { + if (currentStableIdentifiers.contains(stableId)) { + sortedStableIds.add(stableId); + } else { + deferredStableIds.add(stableId); + } } - return dbIdToStableIds; + sortedStableIds.addAll(deferredStableIds); + + return sortedStableIds; } /** @@ -163,19 +163,30 @@ private static Map> getDbIdToStableIds(ResultSet stableIdRe * @param currentStableIdentifiers Set, all StableIdentifiers in current release database. * @throws IOException - Thrown if there are issues with creating mapping file. */ - private static void writeMappingsToFile(String releaseNumber, List> stableIdsToOldIdsMappings, Set currentStableIdentifiers) throws IOException { + private static void writeMappingsToFile(String releaseNumber, List stableIdsToOldIdsMappings, Set currentStableIdentifiers) throws IOException { Path oldStableIdsMappingFilePath = Paths.get(releaseNumber, "reactome_stable_ids.txt"); String header = "# Reactome stable IDs for release " + releaseNumber + "\n" + "Stable_ID\told_identifier(s)\n"; Files.write(oldStableIdsMappingFilePath, header.getBytes(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); - for (List stableIdsArray : stableIdsToOldIdsMappings) + for (StableIdMapping stableIdMapping : stableIdsToOldIdsMappings) { - String primaryId = (String) stableIdsArray.get(0); + String primaryId = stableIdMapping.getPrimaryId(); @SuppressWarnings("unchecked") - List secondaryIds = (ArrayList) stableIdsArray.get(1); + List secondaryIds = stableIdMapping.getSecondaryIds(); if (currentStableIdentifierWithMapping(currentStableIdentifiers, primaryId, secondaryIds)) { String line = primaryId + "\t" + String.join(",", secondaryIds) + "\n"; Files.write(oldStableIdsMappingFilePath, line.getBytes(), StandardOpenOption.APPEND); } } } + + /** + * Checks that the primary identifier taken from the stable_identifiers database is currently used, and it has secondary mappings. + * @param currentStableIdentifiers Set - Set of all StableIdentifiers currently in database. + * @param primaryId String - Primary StableIdentifier that maps to secondaryIds. + * @param secondaryIds List - All StableIdentifiers (old and new formats) that map to the primary stable identifier. + * @return boolean, indicating it is a currently used StableIdentifier with secondary mappings. + */ + private static boolean currentStableIdentifierWithMapping(Set currentStableIdentifiers, String primaryId, List secondaryIds) { + return currentStableIdentifiers.contains(primaryId) && !secondaryIds.isEmpty(); + } } diff --git a/src/main/java/org/reactome/release/downloaddirectory/StableIdMapper/StableIdMapping.java b/src/main/java/org/reactome/release/downloaddirectory/StableIdMapper/StableIdMapping.java new file mode 100644 index 00000000..4204d34a --- /dev/null +++ b/src/main/java/org/reactome/release/downloaddirectory/StableIdMapper/StableIdMapping.java @@ -0,0 +1,48 @@ +package org.reactome.release.downloaddirectory.StableIdMapper; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * @author Joel Weiser (joel.weiser@oicr.on.ca) + * Created 1/23/2026 + */ +public class StableIdMapping { + private List stableIds; + + public StableIdMapping(List stableIds) { + this.stableIds = stableIds; + } + + public String getPrimaryId() { + return stableIds.get(0); + } + + public List getSecondaryIds() { + List stableIdsCopy = new ArrayList<>(stableIds); + stableIdsCopy.remove(getPrimaryId()); + return stableIdsCopy; + } + + public boolean hasNewFormatPrimaryId() { + return hasNewFormat(getPrimaryId()); + } + + public boolean hasSecondaryIds() { + return !getSecondaryIds().isEmpty(); + } + + public boolean isHuman() { + return getPrimaryId().matches("R-HSA.*"); + } + + @Override + public String toString() { + return getPrimaryId() + "\t" + String.join(",", getSecondaryIds()) + "\n"; + } + + private boolean hasNewFormat(String stableId) { + return stableId.matches("^R-.*"); + } +} From a60767a76df6f0f664d910b33f26ddcd9d979eff Mon Sep 17 00:00:00 2001 From: jweiser Date: Thu, 29 Jan 2026 00:09:43 -0500 Subject: [PATCH 2/3] updates StableIdMapping to filter out mis-matched identifiers --- .../StableIdMapper/StableIdMapping.java | 62 +++++++++++++++++-- 1 file changed, 58 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/reactome/release/downloaddirectory/StableIdMapper/StableIdMapping.java b/src/main/java/org/reactome/release/downloaddirectory/StableIdMapper/StableIdMapping.java index 4204d34a..9b130c07 100644 --- a/src/main/java/org/reactome/release/downloaddirectory/StableIdMapper/StableIdMapping.java +++ b/src/main/java/org/reactome/release/downloaddirectory/StableIdMapper/StableIdMapping.java @@ -1,15 +1,20 @@ package org.reactome.release.downloaddirectory.StableIdMapper; import java.util.ArrayList; -import java.util.Collections; +import java.util.Arrays; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * @author Joel Weiser (joel.weiser@oicr.on.ca) * Created 1/23/2026 */ public class StableIdMapping { + private final static Pattern STABLE_ID_PATTERN = Pattern.compile("^R-(.{3})-.*"); + private List stableIds; + private List secondaryIds; public StableIdMapping(List stableIds) { this.stableIds = stableIds; @@ -20,9 +25,12 @@ public String getPrimaryId() { } public List getSecondaryIds() { - List stableIdsCopy = new ArrayList<>(stableIds); - stableIdsCopy.remove(getPrimaryId()); - return stableIdsCopy; + if (this.secondaryIds == null) { + List stableIdsCopy = new ArrayList<>(stableIds); + stableIdsCopy.remove(getPrimaryId()); + this.secondaryIds = filterOutMismatchedIdentifiers(stableIdsCopy); + } + return this.secondaryIds; } public boolean hasNewFormatPrimaryId() { @@ -42,7 +50,53 @@ public String toString() { return getPrimaryId() + "\t" + String.join(",", getSecondaryIds()) + "\n"; } + private List filterOutMismatchedIdentifiers(List stableIds) { + List filteredStableIds = new ArrayList<>(); + boolean debug = false; + for (String stableId : stableIds) { + if (stableId.startsWith("REACT_") || + getAbbreviation(getPrimaryId()).equals(getAbbreviation(stableId)) || + getAbbreviationExceptions().contains(getAbbreviation(stableId)) || + (getAbbreviationExceptions().contains(getPrimaryId()) && noMismatchWithAlreadyFilteredStableIds(stableId, filteredStableIds)) + ) { + filteredStableIds.add(stableId); + } else { + debug = true; + } + } + if (debug) { + System.out.println(this.stableIds); + } + return filteredStableIds; + } + + private boolean noMismatchWithAlreadyFilteredStableIds(String stableId, List filteredStableIds) { + if (getAbbreviationExceptions().contains(getAbbreviation(stableId))) { + return true; + } + for (String filteredStableId : filteredStableIds) { + if (!getAbbreviation(stableId).equals(getAbbreviation(filteredStableId))) { + return false; + } + } + return true; + } + private boolean hasNewFormat(String stableId) { return stableId.matches("^R-.*"); } + + private String getAbbreviation(String stableId) { + Matcher stableIdMatcher = STABLE_ID_PATTERN.matcher(stableId); + + if (stableIdMatcher.find()) { + return stableIdMatcher.group(1); + } else { + return ""; + } + } + + private List getAbbreviationExceptions() { + return Arrays.asList("ALL", "NUL", "HC ", "HCV", "HPC", "HPB", "HBV"); + } } From edd312404cc535b74412dadfabe5fdacf470cfbe Mon Sep 17 00:00:00 2001 From: jweiser Date: Sun, 1 Feb 2026 23:11:14 -0500 Subject: [PATCH 3/3] adds validation, makes copy of stableId list, removes debugging statements --- .../StableIdMapper/StableIdMapping.java | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/reactome/release/downloaddirectory/StableIdMapper/StableIdMapping.java b/src/main/java/org/reactome/release/downloaddirectory/StableIdMapper/StableIdMapping.java index 9b130c07..88c29640 100644 --- a/src/main/java/org/reactome/release/downloaddirectory/StableIdMapper/StableIdMapping.java +++ b/src/main/java/org/reactome/release/downloaddirectory/StableIdMapper/StableIdMapping.java @@ -13,11 +13,15 @@ public class StableIdMapping { private final static Pattern STABLE_ID_PATTERN = Pattern.compile("^R-(.{3})-.*"); - private List stableIds; + private final List stableIds; private List secondaryIds; public StableIdMapping(List stableIds) { - this.stableIds = stableIds; + if (stableIds == null || stableIds.isEmpty()) { + throw new IllegalArgumentException("stableIds can not be null or empty"); + } + + this.stableIds = new ArrayList<>(stableIds); } public String getPrimaryId() { @@ -52,7 +56,6 @@ public String toString() { private List filterOutMismatchedIdentifiers(List stableIds) { List filteredStableIds = new ArrayList<>(); - boolean debug = false; for (String stableId : stableIds) { if (stableId.startsWith("REACT_") || getAbbreviation(getPrimaryId()).equals(getAbbreviation(stableId)) || @@ -60,13 +63,8 @@ private List filterOutMismatchedIdentifiers(List stableIds) { (getAbbreviationExceptions().contains(getPrimaryId()) && noMismatchWithAlreadyFilteredStableIds(stableId, filteredStableIds)) ) { filteredStableIds.add(stableId); - } else { - debug = true; } } - if (debug) { - System.out.println(this.stableIds); - } return filteredStableIds; }