forked from lyy005/Intro_Biocom_ND_319_Tutorial8
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathQuestion1.py
More file actions
executable file
·43 lines (34 loc) · 1.1 KB
/
Copy pathQuestion1.py
File metadata and controls
executable file
·43 lines (34 loc) · 1.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#Ex 8
#Open files to read and write
import re
vcffile = open("Cflorida.vcf","r")
outfile = open("CfloridaCounts.txt","w")
#assign regex to variable names
TXnames=r"[Cc][Ff](07)?\.[Aa](2)?"
FLnames=r"[Cc][Ff]\.[Gg][Aa2](Ii)?"
ACR=r"[01.]/[01.]:([0-9,.]+):[0-9.]+:[0-9.]+:[0-9,.]+"
#loop over file
for Line in vcffile:
#strip end of line
Line = Line.strip()
#ID header line
if "##" in Line:
#write unchanged header line to file
outfile.write(Line + "\n")
#ID second line
elif "#" in Line:
#sub sample names with TX and FL regexes
newnamesTX = re.sub(TXnames,"Cf.Sfa",Line)
newnamesALL = re.sub(FLnames,"Cf.Gai",newnamesTX)
#write new version of line to file
outfile.write(newnamesALL + "\n")
else:
#replace full SNP info with allele counts only
AC=re.sub(ACR,r"\1",Line)
#replace missing data with NA
ACNA=re.sub(r"\.","NA",AC)
#write new version of line to new file
outfile.write(ACNA + "\n")
#Close files
vcffile.close()
outfile.close()