Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions exercise7.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import pandas
InFile=open("Lecture11.fasta","r")
sequenceLength=[]
percentGC = []
for line in InFile:
line = line.strip() #remove extra space
if ">" in line:
next
else:
sequenceLength.append(len(line)-1)
percentGC.append(1.0*(line.count("G")+line.count("C"))/len(line))
print(percentGC)

#Puts data in dataframe
data=pandas.DataFrame({"Sequence Length": sequenceLength, "Percent GC": percentGC})
from plotnine import *
length=ggplot(data,aes(x="Sequence Length"))
length+geom_histogram()+theme_classic()

gc=ggplot(data,aes(x="Percent GC"))
gc+geom_histogram()+theme_classic()

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good job

8 changes: 8 additions & 0 deletions exercise72.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import numpy
import pandas
from plotnine import *
ice=pandas.read_csv("icecream.txt",sep=",",header=0)


scatter=ggplot(ice,aes(x="Temperature C",y="How much I want ice cream"))
scatter+geom_point()+coord_cartesian() + stat_smooth(method="lm")

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good job

17 changes: 17 additions & 0 deletions exercise73.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import numpy
import pandas
from plotnine import *
data=pandas.read_csv("data.txt",sep=",",header=0)


#produces bar plot for means of populations
barplot=ggplot(data)+theme_classic()+xlab("region")+ylab("observations")
barplot+geom_bar(aes(x="factor(region)",y="observations"),stat="summary",fun_y=numpy.mean)


#produces scatter plot with jittering of observations
scatterplot=ggplot(data,aes(x="region",y="observations"))
scatterplot+geom_point()+coord_cartesian()+geom_jitter()

#The bar plot clearly shows that the mean is about the same for each population. The scatterplot also shows that but less clearly.
#The new information the scatter plot reaveals is the spread abd grouping of data which was hidden in the bar plot

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good job

18 changes: 18 additions & 0 deletions icecream.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"Temperature C","How much I want ice cream"
2,5
1,5
3,4
5,5
6,7
10,7
9,7
8,6
11,7
15,8
13,8
20,9
25,10
29,10
31,11
22,8
27,9