From c428e91f28c4f511b4ef083d917bd3878bcf2d84 Mon Sep 17 00:00:00 2001 From: Tim Burton Date: Thu, 26 Oct 2017 22:02:51 -0400 Subject: [PATCH 1/2] first --- Exercise08_1.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 Exercise08_1.py diff --git a/Exercise08_1.py b/Exercise08_1.py new file mode 100644 index 0000000..2422e44 --- /dev/null +++ b/Exercise08_1.py @@ -0,0 +1,32 @@ +#Exercise 8, Python question 1 +#10/13/17, MMD +import vcf +import re +#Open files to read and write +vcffile = open("Cflorida.vcf","r") +outfile = open("CfloridaCounts.txt","w") + +#assign regex to variable name, or compile to variable name + +lineNumber=0 +#loop over file +for line in vcffile:#look at old code to see how you looped over a file + #strip end of line + line=line.strip() + if lineNumber==0: #how can you tell if this is the header line? + outfile.write(line+"\n") + #write unchanged header line to file + elif lineNumber==1: #how can you tell if this is the line with the column headings? + #standardize (replace) sample names with TX and FL regexes + re.sub(([CF|cf]{1}.*\.[Aa]{1}.*\.),(Cf.Sfa.),line) + re.sub(([CF|cf]{1}.*\.[Gg]{1}.*\.),(Cf.Gai.),line) + #write new version of line to file + outfile.write(line+"\n") + else: #now you're in the data + #replace full SNP info with allele counts only + #replace missing data with NA + #write new version of line to new file + +#Close files + + From 44d7a44a02dc964b31484da871abcb6f9a176c59 Mon Sep 17 00:00:00 2001 From: Patrick Doherty Date: Thu, 26 Oct 2017 23:49:47 -0400 Subject: [PATCH 2/2] Attempt to fix Regex, will not work --- Exercise08_1.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Exercise08_1.py b/Exercise08_1.py index 2422e44..4362f62 100644 --- a/Exercise08_1.py +++ b/Exercise08_1.py @@ -18,8 +18,8 @@ #write unchanged header line to file elif lineNumber==1: #how can you tell if this is the line with the column headings? #standardize (replace) sample names with TX and FL regexes - re.sub(([CF|cf]{1}.*\.[Aa]{1}.*\.),(Cf.Sfa.),line) - re.sub(([CF|cf]{1}.*\.[Gg]{1}.*\.),(Cf.Gai.),line) + re.sub(((CF|cf){1}.?{4}\.,(Cf.Sfa.),line) ###Having trouble gettig re.sub to work, syntax not working. Trying to replace CF and then any 4 characters leading to a period + re.sub(((CF|cf){1}.?{4}\.,(Cf.Gai.),line) #write new version of line to file outfile.write(line+"\n") else: #now you're in the data