From 6b49d93f33179195d7e8d98385170feddd4bdc69 Mon Sep 17 00:00:00 2001 From: Katherine Date: Fri, 13 Oct 2017 11:00:41 -0400 Subject: [PATCH 01/14] Created blank Python script --- exercise8.py | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 exercise8.py diff --git a/exercise8.py b/exercise8.py new file mode 100644 index 0000000..d52bc44 --- /dev/null +++ b/exercise8.py @@ -0,0 +1,5 @@ +#####Question 1 + + + +#####Question 2 From 0b1c8e5a82c86087e71193e59029ef85abb6e6b2 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Fri, 13 Oct 2017 11:12:40 -0400 Subject: [PATCH 02/14] opend vcf file-dan --- .idea/vcs.xml | 6 ++++++ exercise8.py | 13 +++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 .idea/vcs.xml diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/exercise8.py b/exercise8.py index d52bc44..743eb9c 100644 --- a/exercise8.py +++ b/exercise8.py @@ -1,5 +1,18 @@ #####Question 1 +import pandas as pd + +vcffile = open("Cflorida.vcf","r") + + + + + + + + + + #####Question 2 From dd336908c492642f1dba139fdc2136912f73a242 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Fri, 13 Oct 2017 11:13:13 -0400 Subject: [PATCH 03/14] opend vcf file-dan2 --- exercise8.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/exercise8.py b/exercise8.py index 743eb9c..6557c77 100644 --- a/exercise8.py +++ b/exercise8.py @@ -1,3 +1,6 @@ +#########exercise 8############################# + + #####Question 1 import pandas as pd From 2c148b1db999bda382f65f481cf5b79e8d04e677 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Tue, 24 Oct 2017 17:49:59 -0400 Subject: [PATCH 04/14] basic Q1 works... kinda ish --- exercise8.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/exercise8.py b/exercise8.py index 6557c77..d35f178 100644 --- a/exercise8.py +++ b/exercise8.py @@ -3,10 +3,20 @@ #####Question 1 -import pandas as pd +import re +vcf = open("Cflorida.vcf","r") -vcffile = open("Cflorida.vcf","r") +SearchStr =r'>[cf]' +ReplaceStr =r'fuuuck' +for line in vcf: + line = line.strip() + if "#" in line: + newLine = re.sub(SearchStr, ReplaceStr, line) + print newLine + else: print line + +vcf.close() From e2d9ab4d16b9ed79719f181885f9b1e31bca4867 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Wed, 25 Oct 2017 01:08:12 -0400 Subject: [PATCH 05/14] part of q1 works --- exercise8.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/exercise8.py b/exercise8.py index d35f178..a088d7b 100644 --- a/exercise8.py +++ b/exercise8.py @@ -6,15 +6,14 @@ import re vcf = open("Cflorida.vcf","r") -SearchStr =r'>[cf]' -ReplaceStr =r'fuuuck' +search_tex =r'(CF.A.|CF.A2.|CF07.A.|cf.a.)([0-9]{3})' +replace_tex =r'Cf.Sfa.\2' for line in vcf: line = line.strip() if "#" in line: - newLine = re.sub(SearchStr, ReplaceStr, line) - print newLine - else: print line + texas = re.sub(search_tex, replace_tex, line) + print texas vcf.close() From 2e27b68a5b146188182f04aa85bbce5fdcd1c6da Mon Sep 17 00:00:00 2001 From: Katherine Date: Wed, 25 Oct 2017 20:50:34 -0400 Subject: [PATCH 06/14] Added if statement to forloop --- exercise8.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/exercise8.py b/exercise8.py index a088d7b..e875a94 100644 --- a/exercise8.py +++ b/exercise8.py @@ -5,16 +5,21 @@ import re vcf = open("Cflorida.vcf","r") - +outfile = open("CfloridaCounts.txt","w") search_tex =r'(CF.A.|CF.A2.|CF07.A.|cf.a.)([0-9]{3})' replace_tex =r'Cf.Sfa.\2' for line in vcf: line = line.strip() - if "#" in line: + if "##" in line: + outfile.write(line + "\n") + elif "#" in line: texas = re.sub(search_tex, replace_tex, line) + outfile.append(line + "\n") print texas - + else: + #select lines with "Contig", find 0/0:, keep 1 digit after that, get rid of everything else + outfile.append(line + "\n") vcf.close() From 136e7a727fab5da8c403add34a9d4d6adc3a712c Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Thu, 26 Oct 2017 17:25:28 -0400 Subject: [PATCH 07/14] did flurda --- exercise8.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/exercise8.py b/exercise8.py index e875a94..fd5fb82 100644 --- a/exercise8.py +++ b/exercise8.py @@ -6,8 +6,10 @@ import re vcf = open("Cflorida.vcf","r") outfile = open("CfloridaCounts.txt","w") -search_tex =r'(CF.A.|CF.A2.|CF07.A.|cf.a.)([0-9]{3})' -replace_tex =r'Cf.Sfa.\2' +search_tex = r'(CF.A.|CF.A2.|CF07.A.|cf.a.)([0-9]{3})' +replace_tex = r'Cf.Sfa.\2' +search_fl = r'(CF.G2.|CF.GAI.|cf.gai.)([0-9]{3})' +replace_fl = r'Cf.Gai.\4' for line in vcf: line = line.strip() @@ -16,6 +18,7 @@ elif "#" in line: texas = re.sub(search_tex, replace_tex, line) outfile.append(line + "\n") + florida= re.sub(search_tex, replace_tex, line) print texas else: #select lines with "Contig", find 0/0:, keep 1 digit after that, get rid of everything else From d84bbb4d83ebc7632bd2cfc69763897aa4a4a9d0 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Thu, 26 Oct 2017 17:32:01 -0400 Subject: [PATCH 08/14] fixed dan's errors --- exercise8.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/exercise8.py b/exercise8.py index fd5fb82..e97c026 100644 --- a/exercise8.py +++ b/exercise8.py @@ -14,12 +14,11 @@ for line in vcf: line = line.strip() if "##" in line: - outfile.write(line + "\n") - elif "#" in line: + outfile.write(line + "\n") + elif "#" in line: texas = re.sub(search_tex, replace_tex, line) + florida= re.sub(search_fl, replace_fl, line) outfile.append(line + "\n") - florida= re.sub(search_tex, replace_tex, line) - print texas else: #select lines with "Contig", find 0/0:, keep 1 digit after that, get rid of everything else outfile.append(line + "\n") From 28700d4fc58561dda4e2ef4511f995a8e5bd66c2 Mon Sep 17 00:00:00 2001 From: Katherine Date: Thu, 26 Oct 2017 17:52:34 -0400 Subject: [PATCH 09/14] K adding to else statement --- exercise8.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/exercise8.py b/exercise8.py index e97c026..89034a1 100644 --- a/exercise8.py +++ b/exercise8.py @@ -19,8 +19,15 @@ texas = re.sub(search_tex, replace_tex, line) florida= re.sub(search_fl, replace_fl, line) outfile.append(line + "\n") - else: + else: #select lines with "Contig", find 0/0:, keep 1 digit after that, get rid of everything else + if: + match = re.search(([0-1]/[0-1]:)(\d{1},\d{1})) + m.group(2) + else: + match = re.search(([0-1]/[0-1]:)(.,{1})) + m.group(2) + #replace with NA outfile.append(line + "\n") vcf.close() From 0e98966cce9e1f5fd2b2d58283981dac75b83a83 Mon Sep 17 00:00:00 2001 From: Mati Nemera Date: Thu, 26 Oct 2017 17:56:29 -0400 Subject: [PATCH 10/14] added slash --- exercise8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exercise8.py b/exercise8.py index 89034a1..02309fe 100644 --- a/exercise8.py +++ b/exercise8.py @@ -25,7 +25,7 @@ match = re.search(([0-1]/[0-1]:)(\d{1},\d{1})) m.group(2) else: - match = re.search(([0-1]/[0-1]:)(.,{1})) + match = re.search(([0-1]/[0-1]:)(\.,{1})) m.group(2) #replace with NA outfile.append(line + "\n") From 7efc62793f55a9e2c6f18905a04879f5e4236162 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Thu, 26 Oct 2017 19:19:33 -0400 Subject: [PATCH 11/14] now puuuushin --- exercise8.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/exercise8.py b/exercise8.py index 02309fe..39e3705 100644 --- a/exercise8.py +++ b/exercise8.py @@ -2,33 +2,37 @@ #####Question 1 - +import sys import re vcf = open("Cflorida.vcf","r") outfile = open("CfloridaCounts.txt","w") search_tex = r'(CF.A.|CF.A2.|CF07.A.|cf.a.)([0-9]{3})' replace_tex = r'Cf.Sfa.\2' search_fl = r'(CF.G2.|CF.GAI.|cf.gai.)([0-9]{3})' -replace_fl = r'Cf.Gai.\4' +replace_fl = r'Cf.Gai.\2' + +search_allel = r'[0-1]/[0-1]:(\d{1},\d{1})' +replace_allel = r'\2' + + +match = r'.\' +fix = r'NA' + + for line in vcf: line = line.strip() if "##" in line: - outfile.write(line + "\n") + outfile.write (line + "\n") elif "#" in line: - texas = re.sub(search_tex, replace_tex, line) - florida= re.sub(search_fl, replace_fl, line) - outfile.append(line + "\n") + line = re.sub(search_tex, replace_tex, line) + line = re.sub(search_fl, replace_fl, line) + outfile.write(line + "\n") else: - #select lines with "Contig", find 0/0:, keep 1 digit after that, get rid of everything else - if: - match = re.search(([0-1]/[0-1]:)(\d{1},\d{1})) - m.group(2) - else: - match = re.search(([0-1]/[0-1]:)(\.,{1})) - m.group(2) - #replace with NA - outfile.append(line + "\n") + line = re.sub(search_allel, replace_allel) + outfile.write(line + "\n") + +outfile.close() vcf.close() From bbe516c634d14a93d7941a01ff8c13443b4e1378 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Thu, 26 Oct 2017 19:24:57 -0400 Subject: [PATCH 12/14] Dan is pushin again --- exercise8.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/exercise8.py b/exercise8.py index 39e3705..f09181c 100644 --- a/exercise8.py +++ b/exercise8.py @@ -15,8 +15,8 @@ replace_allel = r'\2' -match = r'.\' -fix = r'NA' +search_match = r'\.' +replace_match = r'NA' @@ -29,7 +29,8 @@ line = re.sub(search_fl, replace_fl, line) outfile.write(line + "\n") else: - line = re.sub(search_allel, replace_allel) + line = re.sub(search_allel, replace_allel, line) + line = re.sub(search_match, replace_match, line) outfile.write(line + "\n") outfile.close() From 53be0e3c0811cd05199eaa8954408fbff1959a78 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Thu, 26 Oct 2017 19:39:12 -0400 Subject: [PATCH 13/14] NOOOOIIICCCEEE WE DONE GUUD --- exercise8.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exercise8.py b/exercise8.py index f09181c..0305658 100644 --- a/exercise8.py +++ b/exercise8.py @@ -11,8 +11,8 @@ search_fl = r'(CF.G2.|CF.GAI.|cf.gai.)([0-9]{3})' replace_fl = r'Cf.Gai.\2' -search_allel = r'[0-1]/[0-1]:(\d{1},\d{1})' -replace_allel = r'\2' +search_allel = r'[01.]/[01.]:([0-9.]+,?[0-9.]*):[0-9:.,]*' +replace_allel = r'\1' search_match = r'\.' From 463451ca92a8e3a9760a6f6f24d20a88d4df87f8 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Thu, 26 Oct 2017 19:47:33 -0400 Subject: [PATCH 14/14] finished question 1 --- exercise8.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/exercise8.py b/exercise8.py index 0305658..5457023 100644 --- a/exercise8.py +++ b/exercise8.py @@ -1,5 +1,8 @@ #########exercise 8############################# +#Mati Nemera +#Katherine Inskeep +#Dan Bruzzese #####Question 1 import sys @@ -19,7 +22,6 @@ replace_match = r'NA' - for line in vcf: line = line.strip() if "##" in line: