diff --git a/Exercise_5.py b/Exercise_5.py new file mode 100644 index 0000000..7775cb3 --- /dev/null +++ b/Exercise_5.py @@ -0,0 +1,22 @@ +# Challenge 1 +import pandas as pd +data=pd.read_csv("/Users/chenyingying/Documents/Intro_Biocom_ND_319_Tutorial5/wages.csv") +data1=data.iloc[:,0:2] +stdout=data1.drop_duplicates().sort_values(['gender','yearsExperience']) +stdout=pd.DataFrame(stdout) +stdout.to_csv('stdout.csv',sep=',',index=False) +# Challenge 2 +#The gender, yearsExperience, wage for the lowest earners +data.sort_values('wage').head(n=1).loc[:,['gender','yearsExperience','wage']] +#The gender, yearsExperience, wage for the highest earners +data.sort_values('wage').tail(n=1).loc[:,['gender','yearsExperience','wage']] +#females in the top ten earners +sum(data.sort_values('wage').tail(n=10).iloc[:,0]=='female') +# Challenge 3 +import matplotlib.pyplot as plt +y=[data.loc[data.iloc[:,2].values==12,'wage'].min(),data.loc[data.iloc[:,2].values==16,'wage'].min()] +x=[12,16] +plt.scatter(x,y) +plt.title('Effect of graduating college on the min wage') +plt.xlabel('yearsSchool') +plt.ylabel('Minimum wage') diff --git a/Figure_1.png b/Figure_1.png new file mode 100644 index 0000000..d0dc505 Binary files /dev/null and b/Figure_1.png differ diff --git a/stdout.csv b/stdout.csv new file mode 100644 index 0000000..2ad845d --- /dev/null +++ b/stdout.csv @@ -0,0 +1,34 @@ +gender,yearsExperience +female,1 +female,2 +female,3 +female,4 +female,5 +female,6 +female,7 +female,8 +female,9 +female,10 +female,11 +female,12 +female,13 +female,14 +female,15 +female,16 +male,2 +male,3 +male,4 +male,5 +male,6 +male,7 +male,8 +male,9 +male,10 +male,11 +male,12 +male,13 +male,14 +male,15 +male,16 +male,17 +male,18