Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.gk.persistence.MySQLAdaptor;
import org.reactome.release.downloaddirectory.BioPax.BioPax;
import org.reactome.release.downloaddirectory.GenerateGOAnnotationFile.CreateGOAFile;
import org.reactome.release.downloaddirectory.StableIdMapper.MapOldStableIds;

public class Main {
private static final Logger logger = LogManager.getLogger();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package org.reactome.release.downloaddirectory;
package org.reactome.release.downloaddirectory.StableIdMapper;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand Down Expand Up @@ -36,58 +36,53 @@ public static void execute(MySQLAdaptor dba, String releaseNumber) throws Except
List<String> dbIds = new ArrayList<>(dbIdToStableIds.keySet());
Collections.sort(dbIds);

logger.info("Retrieving current stable identifiers from " + dba.getDBName());
Set<String> currentStableIdentifiers = getCurrentStableIdentifiers(dba);

// Iterate through array of stable IDs associated with DB ID, splitting into human and non-human groups.
List<List<Object>> hsaIds = new ArrayList<>();
List<List<Object>> nonHsaIds = new ArrayList<>();
List<StableIdMapping> hsaIds = new ArrayList<>();
List<StableIdMapping> nonHsaIds = new ArrayList<>();
for (String dbId : dbIds)
{
List<String> stableIds = dbIdToStableIds.get(dbId);
Collections.sort(stableIds);
stableIds = sortWithCurrentStableIdAsPrimary(stableIds, currentStableIdentifiers);

StableIdMapping stableIdMapping = new StableIdMapping(stableIds);
// After sorting the first stable ID in the array is considered the primary ID.
// An Array of Arrays is used here, with each interior array's first element being
// the primaryId string and the second element being an array of the remaining stable IDs.
// Example: [R-HSA-1006169, [REACT_118604]], [R-HSA-1006173, [REACT_119254]]]
if (!(stableIds.size() < 2) || (stableIds.get(0).matches("^R-.*")))
if (stableIdMapping.hasSecondaryIds() || stableIdMapping.hasNewFormatPrimaryId())
{
String primaryId = stableIds.get(0);
stableIds.remove(0);
ArrayList<Object> organizedIds = new ArrayList<>();
if (primaryId.matches("R-HSA.*"))
if (stableIdMapping.isHuman())
{
organizedIds.add(primaryId);
organizedIds.add(stableIds);
hsaIds.add(organizedIds);
hsaIds.add(stableIdMapping);
} else {
organizedIds.add(primaryId);
organizedIds.add(stableIds);
nonHsaIds.add(organizedIds);
nonHsaIds.add(stableIdMapping);
}
}
}

// Reorder the data so that the interior arrays that have only 1 element are going to be output first.
List<List<Object>> combinedIds = new ArrayList<>();
List<StableIdMapping> combinedIds = new ArrayList<>();
combinedIds.addAll(hsaIds);
combinedIds.addAll(nonHsaIds);
List<List<Object>> stableIdsToOldIdsMappings = new ArrayList<>();
List<List<Object>> deferredIds = new ArrayList<>();
for (List<Object> stableIdsArray : combinedIds)
List<StableIdMapping> stableIdsToOldIdsMappings = new ArrayList<>();
List<StableIdMapping> deferredIds = new ArrayList<>();
for (StableIdMapping stableIdMapping : combinedIds)
{
@SuppressWarnings("unchecked")
List<String> secondaryIds = (List<String>) stableIdsArray.get(1);
List<String> secondaryIds = stableIdMapping.getSecondaryIds();
if (secondaryIds.size() > 1)
{
deferredIds.add(stableIdsArray);
deferredIds.add(stableIdMapping);
} else {
stableIdsToOldIdsMappings.add(stableIdsArray);
stableIdsToOldIdsMappings.add(stableIdMapping);
}
}
stableIdsToOldIdsMappings.addAll(deferredIds);

logger.info("Retrieving current stable identifiers from " + dba.getDBName());
Set<String> currentStableIdentifiers = getCurrentStableIdentifiers(dba);

writeMappingsToFile(releaseNumber, stableIdsToOldIdsMappings, currentStableIdentifiers);

logger.info("MapOldStableIds finished");
Expand All @@ -110,14 +105,23 @@ private static ResultSet retrieveAllStableIdentifiers(MySQLAdaptor dba) throws C
}

/**
* Checks that the primary identifier taken from the stable_identifiers database is currently used, and it has secondary mappings.
* @param currentStableIdentifiers Set<String> - Set of all StableIdentifiers currently in database.
* @param primaryId String - Primary StableIdentifier that maps to secondaryIds.
* @param secondaryIds List<String> - All StableIdentifiers (old and new formats) that map to the primary stable identifier.
* @return boolean, indicating it is a currently used StableIdentifier with secondary mappings.
* Uses the resultSet from the stable_identifiers database query (which retrieved *all* StableIdentifiers and
* their associated instance ids that have ever existed in Reactome) to build a map of instance IDs to StableIdentifiers.
* @param stableIdResults ResultSet - Data result of query to stable_identifiers database for stable identifiers and associated instance IDs.
* @return Map<String, List<String>> - Mapping of db IDs to Stable Identifiers.
* @throws SQLException - Thrown if there are issues accessing the ResultSet object.
*/
private static boolean currentStableIdentifierWithMapping(Set<String> currentStableIdentifiers, String primaryId, List<String> secondaryIds) {
return currentStableIdentifiers.contains(primaryId) && !secondaryIds.isEmpty();
private static Map<String, List<String>> getDbIdToStableIds(ResultSet stableIdResults) throws SQLException {
Map<String, List<String>> dbIdToStableIds = new HashMap<>();

// Iterate through returned results of DB IDs and stable IDs
while (stableIdResults.next()) {
String stableId = stableIdResults.getString(1);
String dbId = stableIdResults.getString(2);

dbIdToStableIds.computeIfAbsent(dbId, k -> new ArrayList<>()).add(stableId);
}
return dbIdToStableIds;
}

/**
Expand All @@ -135,24 +139,20 @@ private static Set<String> getCurrentStableIdentifiers(MySQLAdaptor dba) throws
return currentStableIdentifiersSet;
}

/**
* Uses the resultSet from the stable_identifiers database query (which retrieved *all* StableIdentifiers and
* their associated instance ids that have ever existed in Reactome) to build a map of instance IDs to StableIdentifiers.
* @param stableIdResults ResultSet - Data result of query to stable_identifiers database for stable identifiers and associated instance IDs.
* @return Map<String, List<String>> - Mapping of db IDs to Stable Identifiers.
* @throws SQLException - Thrown if there are issues accessing the ResultSet object.
*/
private static Map<String, List<String>> getDbIdToStableIds(ResultSet stableIdResults) throws SQLException {
Map<String, List<String>> dbIdToStableIds = new HashMap<>();
private static List<String> sortWithCurrentStableIdAsPrimary(List<String> stableIds, Set<String> currentStableIdentifiers) {
List<String> sortedStableIds = new ArrayList<>();

// Iterate through returned results of DB IDs and stable IDs
while (stableIdResults.next()) {
String stableId = stableIdResults.getString(1);
String dbId = stableIdResults.getString(2);

dbIdToStableIds.computeIfAbsent(dbId, k -> new ArrayList<>()).add(stableId);
List<String> deferredStableIds = new ArrayList<>();
for (String stableId : stableIds) {
if (currentStableIdentifiers.contains(stableId)) {
sortedStableIds.add(stableId);
} else {
deferredStableIds.add(stableId);
}
}
return dbIdToStableIds;
sortedStableIds.addAll(deferredStableIds);

return sortedStableIds;
}

/**
Expand All @@ -163,19 +163,30 @@ private static Map<String, List<String>> getDbIdToStableIds(ResultSet stableIdRe
* @param currentStableIdentifiers Set<String>, all StableIdentifiers in current release database.
* @throws IOException - Thrown if there are issues with creating mapping file.
*/
private static void writeMappingsToFile(String releaseNumber, List<List<Object>> stableIdsToOldIdsMappings, Set<String> currentStableIdentifiers) throws IOException {
private static void writeMappingsToFile(String releaseNumber, List<StableIdMapping> stableIdsToOldIdsMappings, Set<String> currentStableIdentifiers) throws IOException {
Path oldStableIdsMappingFilePath = Paths.get(releaseNumber, "reactome_stable_ids.txt");
String header = "# Reactome stable IDs for release " + releaseNumber + "\n" + "Stable_ID\told_identifier(s)\n";
Files.write(oldStableIdsMappingFilePath, header.getBytes(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
for (List<Object> stableIdsArray : stableIdsToOldIdsMappings)
for (StableIdMapping stableIdMapping : stableIdsToOldIdsMappings)
{
String primaryId = (String) stableIdsArray.get(0);
String primaryId = stableIdMapping.getPrimaryId();
@SuppressWarnings("unchecked")
List<String> secondaryIds = (ArrayList<String>) stableIdsArray.get(1);
List<String> secondaryIds = stableIdMapping.getSecondaryIds();
if (currentStableIdentifierWithMapping(currentStableIdentifiers, primaryId, secondaryIds)) {
String line = primaryId + "\t" + String.join(",", secondaryIds) + "\n";
Files.write(oldStableIdsMappingFilePath, line.getBytes(), StandardOpenOption.APPEND);
}
}
}

/**
* Checks that the primary identifier taken from the stable_identifiers database is currently used, and it has secondary mappings.
* @param currentStableIdentifiers Set<String> - Set of all StableIdentifiers currently in database.
* @param primaryId String - Primary StableIdentifier that maps to secondaryIds.
* @param secondaryIds List<String> - All StableIdentifiers (old and new formats) that map to the primary stable identifier.
* @return boolean, indicating it is a currently used StableIdentifier with secondary mappings.
*/
private static boolean currentStableIdentifierWithMapping(Set<String> currentStableIdentifiers, String primaryId, List<String> secondaryIds) {
return currentStableIdentifiers.contains(primaryId) && !secondaryIds.isEmpty();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package org.reactome.release.downloaddirectory.StableIdMapper;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* @author Joel Weiser (joel.weiser@oicr.on.ca)
* Created 1/23/2026
*/
public class StableIdMapping {
private final static Pattern STABLE_ID_PATTERN = Pattern.compile("^R-(.{3})-.*");

private final List<String> stableIds;
private List<String> secondaryIds;

public StableIdMapping(List<String> stableIds) {
if (stableIds == null || stableIds.isEmpty()) {
throw new IllegalArgumentException("stableIds can not be null or empty");
}

this.stableIds = new ArrayList<>(stableIds);
}

public String getPrimaryId() {
return stableIds.get(0);
}

public List<String> getSecondaryIds() {
if (this.secondaryIds == null) {
List<String> stableIdsCopy = new ArrayList<>(stableIds);
stableIdsCopy.remove(getPrimaryId());
this.secondaryIds = filterOutMismatchedIdentifiers(stableIdsCopy);
}
return this.secondaryIds;
}

public boolean hasNewFormatPrimaryId() {
return hasNewFormat(getPrimaryId());
}

public boolean hasSecondaryIds() {
return !getSecondaryIds().isEmpty();
}

public boolean isHuman() {
return getPrimaryId().matches("R-HSA.*");
}

@Override
public String toString() {
return getPrimaryId() + "\t" + String.join(",", getSecondaryIds()) + "\n";
}

private List<String> filterOutMismatchedIdentifiers(List<String> stableIds) {
List<String> filteredStableIds = new ArrayList<>();
for (String stableId : stableIds) {
if (stableId.startsWith("REACT_") ||
getAbbreviation(getPrimaryId()).equals(getAbbreviation(stableId)) ||
getAbbreviationExceptions().contains(getAbbreviation(stableId)) ||
(getAbbreviationExceptions().contains(getPrimaryId()) && noMismatchWithAlreadyFilteredStableIds(stableId, filteredStableIds))
) {
filteredStableIds.add(stableId);
}
}
return filteredStableIds;
}

private boolean noMismatchWithAlreadyFilteredStableIds(String stableId, List<String> filteredStableIds) {
if (getAbbreviationExceptions().contains(getAbbreviation(stableId))) {
return true;
}
for (String filteredStableId : filteredStableIds) {
if (!getAbbreviation(stableId).equals(getAbbreviation(filteredStableId))) {
return false;
}
}
return true;
}

private boolean hasNewFormat(String stableId) {
return stableId.matches("^R-.*");
}

private String getAbbreviation(String stableId) {
Matcher stableIdMatcher = STABLE_ID_PATTERN.matcher(stableId);

if (stableIdMatcher.find()) {
return stableIdMatcher.group(1);
} else {
return "";
}
}

private List<String> getAbbreviationExceptions() {
return Arrays.asList("ALL", "NUL", "HC ", "HCV", "HPC", "HPB", "HBV");
}
}