Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 43 additions & 26 deletions src/jebl/evolution/io/NexusImporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ public enum NexusBlock {
TAXA,
CHARACTERS,
DATA,
ASSUMPTIONS, // TODO
CALIBRATION, // TODO
UNALIGNED,
DISTANCES,
TREES
Expand Down Expand Up @@ -479,7 +481,7 @@ public List<DistanceMatrix> importDistanceMatrices() throws IOException, ImportE
/**
* Finds the end of the current block.
*/
private void findToken(String query, boolean ignoreCase) throws IOException
protected void findToken(String query, boolean ignoreCase) throws IOException
{
String token;
boolean found = false;
Expand Down Expand Up @@ -624,21 +626,9 @@ private void readDataBlockHeader(String tokenToLookFor, NexusBlock block) throws
}

String token3 = helper.readToken(";");
if (token3.equalsIgnoreCase("NUCLEOTIDE") ||
token3.equalsIgnoreCase("DNA") ||
token3.equalsIgnoreCase("RNA")) {
// replace getSequenceType if there is new data type
sequenceType = getSequenceType(token3);

sequenceType = SequenceType.NUCLEOTIDE;

} else if (token3.equalsIgnoreCase("PROTEIN")) {

sequenceType = SequenceType.AMINO_ACID;

} else if (token3.equalsIgnoreCase("CONTINUOUS")) {

throw new ImportException.UnparsableDataException("Continuous data cannot be parsed at present");

}
} else if (token2.equalsIgnoreCase("INTERLEAVE")) {
isInterleaved = true;
}
Expand All @@ -657,6 +647,33 @@ private void readDataBlockHeader(String tokenToLookFor, NexusBlock block) throws
}
}

/**
* Extract data type after "DATATYPE" keyword, and convert into {@link SequenceType}.
* Override this method if there is new data type.
* @param token the token returned from {@link ImportHelper#readToken(String)}.
* @return the corresponding {@link SequenceType}.
* @throws ImportException.UnparsableDataException
*/
protected SequenceType getSequenceType(String token) throws ImportException.UnparsableDataException {
SequenceType sequenceType = null;
if (token.equalsIgnoreCase("NUCLEOTIDE") ||
token.equalsIgnoreCase("DNA") ||
token.equalsIgnoreCase("RNA")) {

sequenceType = SequenceType.NUCLEOTIDE;

} else if (token.equalsIgnoreCase("PROTEIN")) {

sequenceType = SequenceType.AMINO_ACID;

} else if (token.equalsIgnoreCase("CONTINUOUS")) {

throw new ImportException.UnparsableDataException("Continuous data cannot be parsed at present");

}
return sequenceType;
}

/**
* Reads sequences in a 'DATA' or 'CHARACTERS' block.
*/
Expand Down Expand Up @@ -852,7 +869,7 @@ private List<Taxon> readTaxaBlock() throws ImportException, IOException {
* @param importHelper ImportHelper which may have read meta comments.
* @throws ImportException.BadFormatException
*/
static void parseAndClearMetaComments(Attributable item, ImportHelper importHelper) throws ImportException.BadFormatException {
public static void parseAndClearMetaComments(Attributable item, ImportHelper importHelper) throws ImportException.BadFormatException {
for (String meta : importHelper.getMetaComments()) {
// A meta-comment which should be in the form:
// \[&label[=value][,label[=value]>[,/..]]\]
Expand Down Expand Up @@ -1354,7 +1371,7 @@ private Node readExternalNode(SimpleRootedTree tree) throws ImportException, IOE
}
}

static void parseMetaCommentPairs(String meta, Attributable item) throws ImportException.BadFormatException {
public static void parseMetaCommentPairs(String meta, Attributable item) throws ImportException.BadFormatException {
// This regex should match key=value pairs, separated by commas
// This can match the following types of meta comment pairs:
// value=number, value="string", value={item1, item2, item3}
Expand Down Expand Up @@ -1393,7 +1410,7 @@ static void parseMetaCommentPairs(String meta, Attributable item) throws ImportE
* @param value the string
* @return the object
*/
static Object parseValue(String value) {
public static Object parseValue(String value) {

value = value.trim();

Expand Down Expand Up @@ -1467,14 +1484,14 @@ static Object parseValue(String value) {

// private stuff
private NexusBlock nextBlock = null;
private String nextBlockName = null;

private int taxonCount = 0, siteCount = 0;
private SequenceType sequenceType = null;
private String gapCharacters = "-";
private String matchCharacters = ".";
private String missingCharacters = "?";
private boolean isInterleaved = false;
protected String nextBlockName = null;

protected int taxonCount = 0, siteCount = 0;
protected SequenceType sequenceType = null;
protected String gapCharacters = "-";
protected String matchCharacters = ".";
protected String missingCharacters = "?";
protected boolean isInterleaved = false;

protected final ImportHelper helper;
}
106 changes: 106 additions & 0 deletions src/jebl/evolution/sequences/Binary.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@

package jebl.evolution.sequences;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;

/**
* Binary data
* @author Walter Xie
*/
public final class Binary {

private Binary() { } // make class uninstantiable

public static final String NAME = "binary";

public static final BinaryState ZERO_STATE = new BinaryState("0", "0", 0);
public static final BinaryState ONE_STATE = new BinaryState("1", "1", 1);

public static final BinaryState[] CANONICAL_STATES = new BinaryState[] {
ZERO_STATE, ONE_STATE
};

public static final BinaryState UNKNOWN_STATE = new BinaryState("Unknown base", "?", 2, CANONICAL_STATES);
public static final BinaryState GAP_STATE = new BinaryState("Gap", "-", 3, CANONICAL_STATES);

public static final BinaryState[] STATES = new BinaryState[] {
ZERO_STATE, ONE_STATE, UNKNOWN_STATE, GAP_STATE
};

private static final int STATES_BY_CODE_SIZE = 128;

/**
* 4, including gap and ambiguity states
*/
public static int getStateCount() { return STATES.length; } // 4

/**
* @return A list of all possible states, including the gap and ambiguity states.
*/
public static List<State> getStates() { return Collections.unmodifiableList(Arrays.asList((State[])STATES)); }

/**
* 2, excluding gap and ambiguity states
*/
public static int getCanonicalStateCount() { return CANONICAL_STATES.length; }

public static List<BinaryState> getCanonicalStates() { return Collections.unmodifiableList(Arrays.asList(CANONICAL_STATES)); }

public static BinaryState getState(char code) {
if (code < 0 || code >= STATES_BY_CODE_SIZE) {
return null;
}
return statesByCode[code];
}

public static BinaryState getState(String code) {
return getState(code.charAt(0));
}

public static BinaryState getState(int index) {
return STATES[index];
}

public static BinaryState getUnknownState() { return UNKNOWN_STATE; }

public static BinaryState getGapState() { return GAP_STATE; }

public static boolean isUnknown(State state) { return state == UNKNOWN_STATE; }

public static boolean isGap(State state) { return state == GAP_STATE; }

private static final BinaryState[] statesByCode;
static {
statesByCode = new BinaryState[STATES_BY_CODE_SIZE];
for (int i = 0; i < statesByCode.length; i++) {
// Undefined characters are mapped to null
statesByCode[i] = null;
}

for (BinaryState state : STATES) {
final char code = state.getCode().charAt(0);
statesByCode[code] = state;
statesByCode[Character.toLowerCase(code)] = state;
}
}

public static BinaryState[] toStateArray(String sequenceString) {
BinaryState[] seq = new BinaryState[sequenceString.length()];
for (int i = 0; i < seq.length; i++) {
seq[i] = getState(sequenceString.charAt(i));
}
return seq;
}

public static BinaryState[] toStateArray(byte[] indexArray) {
BinaryState[] seq = new BinaryState[indexArray.length];
for (int i = 0; i < seq.length; i++) {
seq[i] = getState(indexArray[i]);
}
return seq;
}


}
30 changes: 30 additions & 0 deletions src/jebl/evolution/sequences/BinaryState.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@

package jebl.evolution.sequences;

/**
* @author Walter Xie
*/
public final class BinaryState extends State {

BinaryState(String name, String stateCode, int index) {
super(name, stateCode, index);
}

BinaryState(String name, String stateCode, int index, BinaryState[] ambiguities) {
super(name, stateCode, index, ambiguities);
}

@Override
public int compareTo(Object o) {
// throws ClassCastException on across-class comparison
BinaryState that = (BinaryState) o;
return super.compareTo(that);
}

public boolean isGap() {
return this == Binary.GAP_STATE;
}

public SequenceType getType() { return SequenceType.BINARY; }

}
108 changes: 108 additions & 0 deletions src/jebl/evolution/sequences/SequenceType.java
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,90 @@ public String toString() {
}
};

public static final SequenceType BINARY = new SequenceType() {

@Override
public int getStateCount() {
return Binary.getStateCount();
}

@Override
public List<? extends State> getStates() {
return Binary.getStates();
}

@Override
public int getCanonicalStateCount() {
return Binary.getCanonicalStateCount();
}

@Override
public List<? extends State> getCanonicalStates() {
return Binary.getCanonicalStates();
}

@Override
public State getState(String code) {
return Binary.getState(code);
}

@Override
public State getState(char code) {
return Binary.getState(code);
}

@Override
public int getCodeLength() {
return 1;
}

@Override
public State getState(int index) {
return Binary.getState(index);
}

@Override
public State getUnknownState() {
return Binary.getUnknownState();
}

@Override
public State getGapState() {
return Binary.getGapState();
}

@Override
public boolean isUnknown(State state) {
return Binary.isUnknown(state);
}

@Override
public boolean isGap(State state) {
return Binary.isGap(state);
}

@Override
public String getName() {
return Binary.NAME;
}

@Override
public String getNexusDataType() {
return Binary.NAME;
}

@Override
public State[] toStateArray(String sequenceString) {
return Binary.toStateArray(sequenceString);
}

@Override
public State[] toStateArray(byte[] indexArray) {
return Binary.toStateArray(indexArray);
}
};


public class Utils {
private Utils() { } // make class uninstantiable

Expand All @@ -352,5 +436,29 @@ public static String getAlphabet(SequenceType sequenceType) {
}
return buffer.toString();
}

/**
* @param dataTypeName keywords in Nexus or data type descriptions
*/
public static SequenceType getDataType(String dataTypeName) {
// remove spaces, all lower case
switch (dataTypeName.trim().toLowerCase()) {
// keywords in Nexus DATATYPE
case "rna":
case "dna":
case "nucleotide":
return NUCLEOTIDE;
case "aminoacid":
case "protein":
return AMINO_ACID;
case "binary":
return BINARY;
// case "standard":
// case "continuous":
// return STANDARD; // TODO parse continuous DATATYPE in NexusImporter
default:
throw new UnsupportedOperationException(dataTypeName);
}
}
}
}
Loading