lyy005 · mcorley1 · Sep 26, 2017 · Sep 27, 2017 · Sep 27, 2017 · Sep 28, 2017
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+.DS_Store
+*.pptx
+
diff --git a/Exercise_5_Challenge_Complete.py b/Exercise_5_Challenge_Complete.py
@@ -0,0 +1,52 @@
+#Completing Part 1
+import pandas #loading the pandas package to use data frames 
+
+data = pandas.read_csv("wages.csv", header=0,sep=",") #loads the file
+
+gender_yrsexp = data.iloc[:,0:2] #subsets data by selecting the first two columns 
+
+uniquegender = gender_yrsexp.drop_duplicates()  #drops duplicates, like the unique function in unix 
+uniquegender.shape   #displays shape of array 
+
+sortgender = uniquegender.sort_values(["gender","yearsExperience"])
+
+sortgender.to_csv("part1done.txt", sep=" ")
+
+#Completing Part 2
+import pandas
+df=pandas.read_csv('wages.csv')
+#df=wages.csv data
+
+#Highest earner
+highest_earner=df.nlargest(1,'wage')
+#output is under highest_earner (male,5,11,39.808917197)
+
+#Lowest earner
+lowest_earner=df.nsmallest(1,'wage')
+#Output is under lowest_earner (female,9,11,0.07655561)
+
+#Number of females in the top 10
+num_of_females=df[df['wage']>=df['wage'].nlargest(10).iloc[-1]]['gender'].eq('female').sum()
+#Output is under num_of_females (=2)
+
+#Completing Part 3
+import pandas
+
+#Define wages
+wages=pandas.read_csv('wages.csv')
+
+#Select for the people who didn't finish school(i.e. 12yrs of school)
+education12=wages[wages.yearsSchool==12]
+
+#Calculate the minimum wage for people with 12yrs of school
+minimum12=min(education12.wage)
+
+#Select for the people who did finish school(i.e. 16yrs of school)
+education16=wages[wages.yearsSchool==16]
+
+#Calculate the minimum wage for people with 16yrs of school
+minimum16=min(education16.wage)
+
+#Calculate the difference between the minimum wage for those who finished school vs. those who didn't
+print(minimum16-minimum12)
+#Difference is 4.0816223772
diff --git a/Exercise_5_Part_2.py b/Exercise_5_Part_2.py
@@ -0,0 +1,15 @@
+import pandas
+df=pandas.read_csv('wages.csv')
+#df=wages.csv data
+
+#Highest earner
+highest_earner=df.nlargest(1,'wage')
+#output is under highest_earner (male,5,11,39.808917197)
+
+#Lowest earner
+lowest_earner=df.nsmallest(1,'wage')
+#Output is under lowest_earner (female,9,11,0.07655561)
+
+#Number of females in the top 10
+num_of_females=df[df['wage']>=df['wage'].nlargest(10).iloc[-1]]['gender'].eq('female').sum()
+#Output is under num_of_females (=2)
diff --git a/Exercise_5_Part_3.py b/Exercise_5_Part_3.py
@@ -0,0 +1,19 @@
+import pandas
+#Define wages
+wages=pandas.read_csv('wages.csv')
+
+#Select for the people who didn't finish school(i.e. 12yrs of school)
+education12=wages[wages.yearsSchool==12]
+
+#Calculate the minimum wage for people with 12yrs of school
+minimum12=min(education12.wage)
+
+#Select for the people who did finish school(i.e. 16yrs of school)
+education16=wages[wages.yearsSchool==16]
+
+#Calculate the minimum wage for people with 16yrs of school
+minimum16=min(education16.wage)
+
+#Calculate the difference between the minimum wage for those who finished school vs. those who didn't
+print(minimum16-minimum12)
+#Difference is 4.0816223772
diff --git a/excercise5updated.py b/excercise5updated.py
@@ -0,0 +1,14 @@
+#completing exercise5
+import pandas #loading the pandas package to use data frames 
+
+data = pandas.read_csv("wages.csv", header=0,sep=",") #loads the file
+
+#completing part1 
+gender_yrsexp = data.iloc[:,0:2] #subsets data by selecting the first two columns 
+
+uniquegender = gender_yrsexp.drop_duplicates()  #drops duplicates, like the unique function in unix 
+uniquegender.shape   #displays shape of array 
+
+sortgender = uniquegender.sort_values(["gender","yearsExperience"])
+
+sortgender.to_csv("part1done.txt", sep=" ")
diff --git a/part1completed.txt b/part1completed.txt
@@ -0,0 +1,34 @@
+ gender yearsExperience
+168 female 1
+215 female 2
+15 female 3
+37 female 4
+23 female 5
+27 female 6
+9 female 7
+4 female 8
+0 female 9
+7 female 10
+2 female 11
+1 female 12
+17 female 13
+350 female 14
+46 female 15
+623 female 16
+1784 male 2
+1658 male 3
+1650 male 4
+1599 male 5
+1594 male 6
+1570 male 7
+1581 male 8
+1579 male 9
+1569 male 10
+1573 male 11
+1571 male 12
+1617 male 13
+1589 male 14
+1605 male 15
+1608 male 16
+1959 male 17
+1942 male 18
diff --git a/part1done.txt b/part1done.txt
@@ -0,0 +1,34 @@
+ gender yearsExperience
+168 female 1
+215 female 2
+15 female 3
+37 female 4
+23 female 5
+27 female 6
+9 female 7
+4 female 8
+0 female 9
+7 female 10
+2 female 11
+1 female 12
+17 female 13
+350 female 14
+46 female 15
+623 female 16
+1784 male 2
+1658 male 3
+1650 male 4
+1599 male 5
+1594 male 6
+1570 male 7
+1581 male 8
+1579 male 9
+1569 male 10
+1573 male 11
+1571 male 12
+1617 male 13
+1589 male 14
+1605 male 15
+1608 male 16
+1959 male 17
+1942 male 18