diff --git a/Exercise_7.py b/Exercise_7.py new file mode 100644 index 0000000..e400dbf --- /dev/null +++ b/Exercise_7.py @@ -0,0 +1,43 @@ +# Question 1 +# I am not a big fan of plotnine, so I used the matplotlib and seaborn, which I think are a little bit more popular. +import pandas as pd +import matplotlib.pyplot as plt +data=pd.read_table('/Users/chenyingying/Documents/Intro_Biocom_ND_319_Tutorial7/Lecture11.fasta',header=None) +df=pd.DataFrame(columns=['ID','Length','%GC']) +df['ID']=range(1,101,1) +for i in range(0,100,1): + temp=data.iloc[i*2+1,0] + df.iloc[i,1]=len(temp) + df.iloc[i,2]=100*(temp.count('C')+temp.count('G'))/len(temp) +fig=plt.figure(figsize=(8,8)) +fig.suptitle('Histogram') +ax1 = fig.add_subplot(121) +ax1=plt.hist(df['Length'],edgecolor="black") +plt.title('Sequence Length') +ax2 = fig.add_subplot(122) +ax2=plt.hist(df['%GC'],edgecolor="black") +plt.title('%GC') +# Question 2 +import pandas as pd +import seaborn as sns +data_2=pd.read_table('/Users/chenyingying/Documents/Intro_Biocom_ND_319_Tutorial7/LakeGPP.txt',sep='\t') +sns.lmplot(x="TP_mgm3", y="GPP_mmolm3d", data=data_2,size=8) +#Question 3 +import pandas as pd +import matplotlib.pyplot as plt +from sklearn.preprocessing import LabelEncoder +import seaborn as sns +data_3=pd.read_table('/Users/chenyingying/Documents/Intro_Biocom_ND_319_Tutorial7/data.txt',sep=',') +le=LabelEncoder() +le.fit(data_3['region']) +data_3['Index']=le.transform(data_3['region']) +# Barplot of mean +fig=plt.figure(figsize=(8,8)) +data_3.groupby('region')['observations'].mean().plot.bar() +plt.title('Barplot of mean') +# Scatter plot +f2=sns.pairplot(x_vars=["Index"], y_vars=["observations"], data=data_3, hue="region",size=8) +f2.set(xticks=[0,1,2,3]) +f2.set(xticklabels=['east','north','south','west']) +f2.set(xlabel='region') +f2.set(title='Scatter plot') \ No newline at end of file diff --git a/LakeGPP.txt b/LakeGPP.txt new file mode 100644 index 0000000..06ffdb2 --- /dev/null +++ b/LakeGPP.txt @@ -0,0 +1,26 @@ +"TP_mgm3" "GPP_mmolm3d" +39.8 34.2 +9.4 11.8 +41.5 74.9 +55.9 36.9 +13.2 3.8 +17.9 34.4 +4.4 6.1 +7.1 10 +23.2 25.5 +19.9 7.8 +20.2 13.3 +34.3 15 +34.9 77.9 +105.3 144.3 +18.6 35 +78.3 47.9 +15.3 7.1 +21.3 60.8 +28 28.3 +34 13.3 +42.5 30.5 +29.3 49.1 +25.2 7.3 +28.1 32.1 +13.8 29.7 diff --git a/Plot_Question 1.png b/Plot_Question 1.png new file mode 100644 index 0000000..aac2bd8 Binary files /dev/null and b/Plot_Question 1.png differ diff --git a/Plot_Question 2.png b/Plot_Question 2.png new file mode 100644 index 0000000..e97afef Binary files /dev/null and b/Plot_Question 2.png differ diff --git a/Plot_Question 3_1.png b/Plot_Question 3_1.png new file mode 100644 index 0000000..d308fb9 Binary files /dev/null and b/Plot_Question 3_1.png differ diff --git a/Plot_Question 3_2.png b/Plot_Question 3_2.png new file mode 100644 index 0000000..ab780b9 Binary files /dev/null and b/Plot_Question 3_2.png differ