forked from joaks1/python-translation-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtranslate.py
More file actions
124 lines (105 loc) · 4.38 KB
/
Copy pathtranslate.py
File metadata and controls
124 lines (105 loc) · 4.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#! /usr/bin/env python3
import sys
def translate_sequence(rna_sequence, genetic_code):
rna_sequence = rna_sequence.upper()
if len(rna_sequence) < 3:
return ""
protein = ""
for i in range(0, len(rna_sequence), 3):
codon = rna_sequence[i:i+3]
if len(codon) < 3:
break
amino_acid = genetic_code[codon]
if amino_acid == "*":
break
protein += amino_acid
return protein
pass
def get_all_translations(rna_sequence, genetic_code):
rna_sequence = rna_sequence.upper()
translations = []
for frame in range(3):
for i in range(frame, len(rna_sequence) - 2, 3):
codon = rna_sequence[i:i+3]
if codon == "AUG":
protein = ""
for j in range(i, len(rna_sequence) - 2,3):
current_codon = rna_sequence[j:j+3]
amino_acid = genetic_code.get(current_codon)
if amino_acid is None:
break
if amino_acid == "*":
break
protein += amino_acid
if protein:
translations.append(protein)
return translations
pass
def get_reverse(sequence):
sequence = sequence.upper()
return sequence[::-1]
pass
def get_complement(sequence):
sequence = sequence.upper()
complementary = str.maketrans('AUCG', 'UAGC')
return sequence.translate(complementary)
pass
def reverse_and_complement(sequence):
sequence = sequence.upper()
complementary = str.maketrans('AUCG', 'UAGC')
return sequence.translate(complementary)[::-1]
pass
def get_longest_peptide(rna_sequence, genetic_code):
rna_sequence = rna_sequence.upper()
complementary = str.maketrans('AUCG', 'UAGC')
reverse_complement = rna_sequence.translate(complementary)[::-1]
"""Get the longest peptide encoded by an RNA sequence.
Explore six reading frames of `rna_sequence` (the three reading frames of
`rna_sequence`, and the three reading frames of the reverse and complement
of `rna_sequence`) and return (as a string) the longest sequence of amino
acids that it encodes, according to the `genetic_code`.
If no amino acids can be translated from `rna_sequence` nor its reverse and
complement, an empty string is returned.
Parameters
----------
rna_sequence : str
A string representing an RNA sequence (upper or lower-case).
genetic_code : dict
A dictionary mapping all 64 codons (strings of three RNA bases) to
amino acids (string of single-letter amino acid abbreviation). Stop
codons should be represented with asterisks ('*').
Returns
-------
str
A string of the longest sequence of amino acids encoded by
`rna_sequence`.
"""
pass
if __name__ == '__main__':
genetic_code = {'GUC': 'V', 'ACC': 'T', 'GUA': 'V', 'GUG': 'V', 'ACU': 'T', 'AAC': 'N', 'CCU': 'P', 'UGG': 'W', 'AGC': 'S', 'AUC': 'I', 'CAU': 'H', 'AAU': 'N', 'AGU': 'S', 'GUU': 'V', 'CAC': 'H', 'ACG': 'T', 'CCG': 'P', 'CCA': 'P', 'ACA': 'T', 'CCC': 'P', 'UGU': 'C', 'GGU': 'G', 'UCU': 'S', 'GCG': 'A', 'UGC': 'C', 'CAG': 'Q', 'GAU': 'D', 'UAU': 'Y', 'CGG': 'R', 'UCG': 'S', 'AGG': 'R', 'GGG': 'G', 'UCC': 'S', 'UCA': 'S', 'UAA': '*', 'GGA': 'G', 'UAC': 'Y', 'GAC': 'D', 'UAG': '*', 'AUA': 'I', 'GCA': 'A', 'CUU': 'L', 'GGC': 'G', 'AUG': 'M', 'CUG': 'L', 'GAG': 'E', 'CUC': 'L', 'AGA': 'R', 'CUA': 'L', 'GCC': 'A', 'AAA': 'K', 'AAG': 'K', 'CAA': 'Q', 'UUU': 'F', 'CGU': 'R', 'CGC': 'R', 'CGA': 'R', 'GCU': 'A', 'GAA': 'E', 'AUU': 'I', 'UUG': 'L', 'UUA': 'L', 'UGA': '*', 'UUC': 'F'}
rna_seq = ("AUG"
"UAC"
"UGG"
"CAC"
"GCU"
"ACU"
"GCU"
"CCA"
"UAU"
"ACU"
"CAC"
"CAG"
"AAU"
"AUC"
"AGU"
"ACA"
"GCG")
longest_peptide = get_longest_peptide(rna_sequence = rna_seq,
genetic_code = genetic_code)
assert isinstance(longest_peptide, str), "Oops: the longest peptide is {0}, not a string".format(longest_peptide)
message = "The longest peptide encoded by\n\t'{0}'\nis\n\t'{1}'\n".format(
rna_seq,
longest_peptide)
sys.stdout.write(message)
if longest_peptide == "MYWHATAPYTHQNISTA":
sys.stdout.write("Indeed.\n")