lyy005 · amidlige · Oct 6, 2017 · Oct 10, 2017 · Oct 10, 2017 · Oct 10, 2017
diff --git a/Bar Plot.png b/Bar Plot.png
diff --git a/Lecture11.fasta b/Lecture11.fasta
diff --git a/Scatter Plot.png b/Scatter Plot.png
diff --git a/Tut7.py b/Tut7.py
@@ -0,0 +1,60 @@
+import os
+from plotnine import *
+from matplotlib import pyplot as plt
+import numpy as np
+import pandas
+
+os.chdir("Intro_Biocom_ND_319_Tutorial7/")
+
+fasta = open("Lecture11.fasta", "r")
+
+sequenceID=[]
+sequenceLength=[]
+percentGC=[]
+meltingTemp = []
+
+for Line in fasta:
+    Line = Line.strip()
+    if '>' in Line:
+        sequenceID.append(Line[1:])
+    else:
+        seqLen=float(len(Line))
+
+        if seqLen <= 14:
+            Tm=2*(nG+nC)+2*seqLen 
+        else: 
+            Tm=-9999
+
+        nG=Line.count("G")
+        nC=Line.count("C")
+
+
+    sequenceLength.append(seqLen)
+    percentGC.append((nG+nC)/seqLen*100)
+    meltingTemp.append(Tm)
+
+
+plt.hist(percentGC, bins = 11)
+plt.title('Percent GC content distribution')
+plt.xlabel('GC Content (%)')
+plt.ylabel('count')
+
+plt.show()
+
+plt.hist(sequenceLength, bins = 24)
+plt.title('Sequence Length distribution')
+plt.xlabel('Sequence Length')
+plt.ylabel('count')
+
+plt.show()
+
+InFile.close()
+
+## Question 3 Starts Here ##
+
+data = pandas.read_csv("data.txt", sep = ",")
+
+
+plot = ggplot(data, aes(y = "observations", x = "region"))
+plot + geom_bar(stat="summary", fun_y = np.mean, fill = "green")
+
diff --git a/Tutorial_3_Part3_Scatter_Plot b/Tutorial_3_Part3_Scatter_Plot
@@ -0,0 +1,12 @@
+import os
+from plotnine import *
+from matplotlib import pyplot as plt
+import numpy as np
+import pandas
+
+
+data = pandas.read_csv("data.txt", sep = ",")
+
+
+plot = ggplot(data, aes(y = "observations", x = "region"))
+plot + geom_jitter(colour='black')
diff --git a/exercise7part1.py b/exercise7part1.py
@@ -0,0 +1,21 @@
+import pandas
+import numpy
+
+InFile=open("Lecture11.fasta","r")
+
+A = numpy.zeros((100,4)) #tells python how big we want the table to be. 
+B = pandas.DataFrame(A,columns=['seqNum','seqLength','Gcontent','Ccontent']) #labels each column
+
+for line in InFile:
+    Line=Line.strip()
+    if '>' in Line:
+
+    else:
+        seqLen=float(len(Line))
+        seqLen=B.iloc[i,1]
+
+        nG=Line.count("G")
+        nG=B.iloc[i,2]
+
+        nC=Line.count("C")
+        nC=B.iloc[i,3]
diff --git a/patentsPerYear.py b/patentsPerYear.py
@@ -0,0 +1,15 @@
+import pandas
+import matplotlib.pyplot as plt
+
+df = pandas.read_excel('patentsPerYear.xlsx')
+
+data= df[['Calendar Year','Utility Patent Application (Inventions)']]
+x=data['Calendar Year']
+y=data['Utility Patent Application (Inventions)']
+plt.scatter(x,y)
+
+z = np.polyfit(x, y, 1)
+p = np.poly1d(z)
+plt.plot(x,p(x),"r--")
+
+plt.show()
diff --git a/patentsPerYear.xlsx b/patentsPerYear.xlsx