From bc6dfc7ce692c5e345f67bd706d82d3551a0b51c Mon Sep 17 00:00:00 2001 From: Mati Nemera Date: Fri, 6 Oct 2017 20:21:06 -0400 Subject: [PATCH 1/4] first commit --- exercise7.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100755 exercise7.py diff --git a/exercise7.py b/exercise7.py new file mode 100755 index 0000000..99ac1ce --- /dev/null +++ b/exercise7.py @@ -0,0 +1,12 @@ +import pandas +InFile=open("Lecture11.fasta","r") +sequenceLength=[] +percentGC = [] +for line in InFile: + line = line.strip() + if ">" in line: + next + else: + sequenceLength.append(len(line)-1) + percentGC.append(1.0*(line.count("G")+line.count("C"))/len(line)) +print(percentGC) \ No newline at end of file From 5b3cb066f11cfd72111e005b37c9e4557d0ada6d Mon Sep 17 00:00:00 2001 From: Soren Holm Date: Wed, 11 Oct 2017 20:49:27 -0400 Subject: [PATCH 2/4] Part 2 and 3 ready for review --- exercise72.py | 8 ++++++++ exercise73.py | 17 +++++++++++++++++ icecream.txt | 18 ++++++++++++++++++ 3 files changed, 43 insertions(+) create mode 100644 exercise72.py create mode 100644 exercise73.py create mode 100644 icecream.txt diff --git a/exercise72.py b/exercise72.py new file mode 100644 index 0000000..1d97d6f --- /dev/null +++ b/exercise72.py @@ -0,0 +1,8 @@ +import numpy +import pandas +from plotnine import * +ice=pandas.read_csv("icecream.txt",sep=",",header=0) + + +scatter=ggplot(ice,aes(x="Temperature C",y="How much I want ice cream")) +scatter+geom_point()+coord_cartesian() + stat_smooth(method="lm") \ No newline at end of file diff --git a/exercise73.py b/exercise73.py new file mode 100644 index 0000000..b52f706 --- /dev/null +++ b/exercise73.py @@ -0,0 +1,17 @@ +import numpy +import pandas +from plotnine import * +data=pandas.read_csv("data.txt",sep=",",header=0) + + +#produces bar plot for means of populations +barplot=ggplot(data)+theme_classic()+xlab("region")+ylab("observations") +barplot+geom_bar(aes(x="factor(region)",y="observations"),stat="summary",fun_y=numpy.mean) + + +#produces scatter plot with jittering of observations +scatterplot=ggplot(data,aes(x="region",y="observations")) +scatterplot+geom_point()+coord_cartesian()+geom_jitter() + +#The bar plot clearly shows that the mean is about the same for each population. The scatterplot also shows that but less clearly. +#The new information the scatter plot reaveals is the spread abd grouping of data which was hidden in the bar plot \ No newline at end of file diff --git a/icecream.txt b/icecream.txt new file mode 100644 index 0000000..1b92598 --- /dev/null +++ b/icecream.txt @@ -0,0 +1,18 @@ +"Temperature C","How much I want ice cream" +2,5 +1,5 +3,4 +5,5 +6,7 +10,7 +9,7 +8,6 +11,7 +15,8 +13,8 +20,9 +25,10 +29,10 +31,11 +22,8 +27,9 From f15103785e2c1233250d73de504612218e9db728 Mon Sep 17 00:00:00 2001 From: Soren Holm Date: Wed, 11 Oct 2017 21:16:14 -0400 Subject: [PATCH 3/4] Problem 1 complete --- exercise7.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/exercise7.py b/exercise7.py index 99ac1ce..a2108ae 100755 --- a/exercise7.py +++ b/exercise7.py @@ -9,4 +9,13 @@ else: sequenceLength.append(len(line)-1) percentGC.append(1.0*(line.count("G")+line.count("C"))/len(line)) -print(percentGC) \ No newline at end of file +#print(percentGC) + +#So we need the data in a datafram to be used by ggplot aparantly. So here I put it in a dataframe +data=pandas.DataFrame({"Sequence Length": sequenceLength, "Percent GC": percentGC}) + +length=ggplot(data,aes(x="Sequence Length")) +length+geom_histogram()+theme_classic() + +gc=ggplot(data,aes(x="Percent GC")) +gc+geom_histogram()+theme_classic() \ No newline at end of file From c3b3028561b02bb174148d9d189900a5b7ced6de Mon Sep 17 00:00:00 2001 From: Mati Nemera Date: Thu, 12 Oct 2017 00:25:52 -0400 Subject: [PATCH 4/4] exercise7 question 1 --- exercise7.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/exercise7.py b/exercise7.py index a2108ae..20a5d6e 100755 --- a/exercise7.py +++ b/exercise7.py @@ -3,19 +3,19 @@ sequenceLength=[] percentGC = [] for line in InFile: - line = line.strip() + line = line.strip() #remove extra space if ">" in line: next else: sequenceLength.append(len(line)-1) percentGC.append(1.0*(line.count("G")+line.count("C"))/len(line)) -#print(percentGC) +print(percentGC) -#So we need the data in a datafram to be used by ggplot aparantly. So here I put it in a dataframe +#Puts data in dataframe data=pandas.DataFrame({"Sequence Length": sequenceLength, "Percent GC": percentGC}) - +from plotnine import * length=ggplot(data,aes(x="Sequence Length")) length+geom_histogram()+theme_classic() gc=ggplot(data,aes(x="Percent GC")) -gc+geom_histogram()+theme_classic() \ No newline at end of file +gc+geom_histogram()+theme_classic()