Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions Exercise08_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#Exercise 8, Python question 1
#10/13/17, MMD
import vcf
import re
#Open files to read and write
vcffile = open("Cflorida.vcf","r")
outfile = open("CfloridaCounts.txt","w")

#assign regex to variable name, or compile to variable name

lineNumber=0
#loop over file
for line in vcffile:#look at old code to see how you looped over a file
#strip end of line
line=line.strip()
if lineNumber==0: #how can you tell if this is the header line?

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or you can use
if line[0:1] == "##":

outfile.write(line+"\n")
#write unchanged header line to file
elif lineNumber==1: #how can you tell if this is the line with the column headings?
#standardize (replace) sample names with TX and FL regexes
re.sub(((CF|cf){1}.?{4}\.,(Cf.Sfa.),line) ###Having trouble gettig re.sub to work, syntax not working. Trying to replace CF and then any 4 characters leading to a period
re.sub(((CF|cf){1}.?{4}\.,(Cf.Gai.),line)

@lyy005 lyy005 Nov 2, 2017

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Quotation mark is missing.
names = re.sub("[Cc]Ff?.[Aa]2?","Cf.Sfa",line)
names = re.sub("[Cc][Ff].(G2|GAI|gai)","Cf.Gai",names)

#write new version of line to file
outfile.write(line+"\n")
else: #now you're in the data
#replace full SNP info with allele counts only

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sub = re.sub("[01.]/[01.]:([0-9,.]+):[0-9.]+:[0-9.]+:[0-9,.]+",r"\1",line) # extract allele information
sub = re.sub(".","NA",sub) #if allele counts . replace with NA

#replace missing data with NA
#write new version of line to new file

#Close files

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-0.5 points