-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathuniqueFasta.py
More file actions
executable file
·50 lines (35 loc) · 1.25 KB
/
uniqueFasta.py
File metadata and controls
executable file
·50 lines (35 loc) · 1.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/home/mplace/anaconda3/bin/python
"""
uniqueFasta.py
Print out unique sequences within a fasta file.
Uses Biopython
The sequence is used as a key to a dictionary.
"""
from Bio import SeqIO
import argparse
import textwrap
def main():
"""
Start Here
"""
# handle command line arguments
cmdparser = argparse.ArgumentParser(description="Produce a unique fasta sequence file", prog='uniqueFasta.py' )
cmdparser.add_argument('-f1','--file1', action='store', required='true',
dest='FILE1', help='REQUIRED, fasta file (.fasta, .fa , .fsa)')
cmdResults = vars(cmdparser.parse_args())
firSeq = {}
if cmdResults['FILE1'] is not None:
f1 = cmdResults['FILE1']
for seq1 in SeqIO.parse(f1,"fasta"):
tseq = str(seq1.seq)
if tseq in firSeq:
firSeq[ tseq ] = firSeq[ tseq ] + "|" + seq1.id
else:
firSeq[ tseq ] = seq1.id
#{ print (">%s\n%s" %(firSeq[k], textwrap.wrap(k,width=50) ) for k in firSeq.keys()}
for k,v in firSeq.items():
print (">%s" %(v))
data = (textwrap.wrap(k,width=80) )
[ print(i) for i in data ]
if __name__ == "__main__":
main()