diff --git a/exercise7.py b/exercise7.py new file mode 100755 index 0000000..20a5d6e --- /dev/null +++ b/exercise7.py @@ -0,0 +1,21 @@ +import pandas +InFile=open("Lecture11.fasta","r") +sequenceLength=[] +percentGC = [] +for line in InFile: + line = line.strip() #remove extra space + if ">" in line: + next + else: + sequenceLength.append(len(line)-1) + percentGC.append(1.0*(line.count("G")+line.count("C"))/len(line)) +print(percentGC) + +#Puts data in dataframe +data=pandas.DataFrame({"Sequence Length": sequenceLength, "Percent GC": percentGC}) +from plotnine import * +length=ggplot(data,aes(x="Sequence Length")) +length+geom_histogram()+theme_classic() + +gc=ggplot(data,aes(x="Percent GC")) +gc+geom_histogram()+theme_classic() diff --git a/exercise72.py b/exercise72.py new file mode 100644 index 0000000..1d97d6f --- /dev/null +++ b/exercise72.py @@ -0,0 +1,8 @@ +import numpy +import pandas +from plotnine import * +ice=pandas.read_csv("icecream.txt",sep=",",header=0) + + +scatter=ggplot(ice,aes(x="Temperature C",y="How much I want ice cream")) +scatter+geom_point()+coord_cartesian() + stat_smooth(method="lm") \ No newline at end of file diff --git a/exercise73.py b/exercise73.py new file mode 100644 index 0000000..b52f706 --- /dev/null +++ b/exercise73.py @@ -0,0 +1,17 @@ +import numpy +import pandas +from plotnine import * +data=pandas.read_csv("data.txt",sep=",",header=0) + + +#produces bar plot for means of populations +barplot=ggplot(data)+theme_classic()+xlab("region")+ylab("observations") +barplot+geom_bar(aes(x="factor(region)",y="observations"),stat="summary",fun_y=numpy.mean) + + +#produces scatter plot with jittering of observations +scatterplot=ggplot(data,aes(x="region",y="observations")) +scatterplot+geom_point()+coord_cartesian()+geom_jitter() + +#The bar plot clearly shows that the mean is about the same for each population. The scatterplot also shows that but less clearly. +#The new information the scatter plot reaveals is the spread abd grouping of data which was hidden in the bar plot \ No newline at end of file diff --git a/icecream.txt b/icecream.txt new file mode 100644 index 0000000..1b92598 --- /dev/null +++ b/icecream.txt @@ -0,0 +1,18 @@ +"Temperature C","How much I want ice cream" +2,5 +1,5 +3,4 +5,5 +6,7 +10,7 +9,7 +8,6 +11,7 +15,8 +13,8 +20,9 +25,10 +29,10 +31,11 +22,8 +27,9