From 575051fdc9d7fb189b89706a254928ad53933474 Mon Sep 17 00:00:00 2001
From: Michelle Corley <mcorley1@nd.edu>
Date: Tue, 26 Sep 2017 10:45:33 -0400
Subject: [PATCH 1/7] initial commit of part 1

---
 .gitignore         |  3 +++
 exercise5.py       |  0
 part1completed.txt | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 exercise5.py
 create mode 100644 part1completed.txt

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5c5ca7a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.DS_Store
+*.pptx
+
diff --git a/exercise5.py b/exercise5.py
new file mode 100644
index 0000000..e69de29
diff --git a/part1completed.txt b/part1completed.txt
new file mode 100644
index 0000000..a4ec58a
--- /dev/null
+++ b/part1completed.txt
@@ -0,0 +1,34 @@
+ gender yearsExperience
+168 female 1
+215 female 2
+15 female 3
+37 female 4
+23 female 5
+27 female 6
+9 female 7
+4 female 8
+0 female 9
+7 female 10
+2 female 11
+1 female 12
+17 female 13
+350 female 14
+46 female 15
+623 female 16
+1784 male 2
+1658 male 3
+1650 male 4
+1599 male 5
+1594 male 6
+1570 male 7
+1581 male 8
+1579 male 9
+1569 male 10
+1573 male 11
+1571 male 12
+1617 male 13
+1589 male 14
+1605 male 15
+1608 male 16
+1959 male 17
+1942 male 18

From b216583ed4271f62b220beafca68f778c701e2c6 Mon Sep 17 00:00:00 2001
From: Michelle Corley <mcorley1@nd.edu>
Date: Wed, 27 Sep 2017 11:46:54 -0400
Subject: [PATCH 2/7] second commit of exercise 5, part 1

---
 exercise5.py  |  0
 part1done.txt | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)
 delete mode 100644 exercise5.py
 create mode 100644 part1done.txt

diff --git a/exercise5.py b/exercise5.py
deleted file mode 100644
index e69de29..0000000
diff --git a/part1done.txt b/part1done.txt
new file mode 100644
index 0000000..a4ec58a
--- /dev/null
+++ b/part1done.txt
@@ -0,0 +1,34 @@
+ gender yearsExperience
+168 female 1
+215 female 2
+15 female 3
+37 female 4
+23 female 5
+27 female 6
+9 female 7
+4 female 8
+0 female 9
+7 female 10
+2 female 11
+1 female 12
+17 female 13
+350 female 14
+46 female 15
+623 female 16
+1784 male 2
+1658 male 3
+1650 male 4
+1599 male 5
+1594 male 6
+1570 male 7
+1581 male 8
+1579 male 9
+1569 male 10
+1573 male 11
+1571 male 12
+1617 male 13
+1589 male 14
+1605 male 15
+1608 male 16
+1959 male 17
+1942 male 18

From 60c00c99661c04cbb7d500ac550d6d3f74104178 Mon Sep 17 00:00:00 2001
From: Michelle Corley <mcorley1@nd.edu>
Date: Wed, 27 Sep 2017 12:13:17 -0400
Subject: [PATCH 3/7] second commit of excercise 5 code

---
 excercise5updated.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 excercise5updated.py

diff --git a/excercise5updated.py b/excercise5updated.py
new file mode 100644
index 0000000..6de004a
--- /dev/null
+++ b/excercise5updated.py
@@ -0,0 +1,14 @@
+#completing exercise5
+import pandas #loading the pandas package to use data frames 
+
+data = pandas.read_csv("wages.csv", header=0,sep=",") #loads the file
+
+#completing part1 
+gender_yrsexp = data.iloc[:,0:2] #subsets data by selecting the first two columns 
+
+uniquegender = gender_yrsexp.drop_duplicates()  #drops duplicates, like the unique function in unix 
+uniquegender.shape   #displays shape of array 
+
+sortgender = uniquegender.sort_values(["gender","yearsExperience"])
+
+sortgender.to_csv("part1done.txt", sep=" ")
\ No newline at end of file

From 69fef0d64cc2cd993e34d8e56fb61b77b48ff533 Mon Sep 17 00:00:00 2001
From: kkilgoreND <31992787+kkilgoreND@users.noreply.github.com>
Date: Thu, 28 Sep 2017 08:29:26 -0400
Subject: [PATCH 4/7] Exercise 5 Challenge Part 2 1st commit

---
 Exercise_5_Part_2.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 Exercise_5_Part_2.py

diff --git a/Exercise_5_Part_2.py b/Exercise_5_Part_2.py
new file mode 100644
index 0000000..dab1676
--- /dev/null
+++ b/Exercise_5_Part_2.py
@@ -0,0 +1,15 @@
+import pandas
+df=pandas.read_csv('wages.csv')
+#df=wages.csv data
+
+#Highest earner
+highest_earner=df.nlargest(1,'wage')
+#output is under highest_earner (male,5,11,39.808917197)
+
+#Lowest earner
+lowest_earner=df.nsmallest(1,'wage')
+#Output is under lowest_earner (female,9,11,0.07655561)
+
+#Number of females in the top 10
+num_of_females=df[df['wage']>=df['wage'].nlargest(10).iloc[-1]]['gender'].eq('female').sum()
+#Output is under num_of_females (=2)
\ No newline at end of file

From 5bacc1a3b3d20b24a790a467b842a6b427636690 Mon Sep 17 00:00:00 2001
From: kkilgoreND <31992787+kkilgoreND@users.noreply.github.com>
Date: Thu, 28 Sep 2017 10:09:36 -0400
Subject: [PATCH 5/7] 1st Commit for Part 3 of Exercise 5 Challenge

Still needs work
---
 Exercise_5_Part_3.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 Exercise_5_Part_3.py

diff --git a/Exercise_5_Part_3.py b/Exercise_5_Part_3.py
new file mode 100644
index 0000000..1fba7e9
--- /dev/null
+++ b/Exercise_5_Part_3.py
@@ -0,0 +1,17 @@
+import pandas
+pandas.read_csv('wages.csv')
+
+#Select for the people who didn't finish school(i.e. 12yrs of school)
+education12=wages[wages.yearSchool==12]
+
+#Calculate the minimum wage for people with 12yrs of school
+minimum12=min(education12.wage)
+
+#Select for the people who did finish school(i.e. 16yrs of school)
+education16=wages[wages.yearSchool==16]
+
+#Calculate the minimum wage for people with 16yrs of school
+minimum16=min(education16.wage)
+
+#Calculate the difference between the minimum wage for those who finished school vs. those who didn't
+print(minimum16-minimum12)
\ No newline at end of file

From 97410a6b49a99ae4fc5512fe043a88ef698f2589 Mon Sep 17 00:00:00 2001
From: kkilgoreND <31992787+kkilgoreND@users.noreply.github.com>
Date: Thu, 28 Sep 2017 10:21:52 -0400
Subject: [PATCH 6/7] Updated Exercise_5_Part_3.py

---
 Exercise_5_Part_3.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/Exercise_5_Part_3.py b/Exercise_5_Part_3.py
index 1fba7e9..05dbec0 100644
--- a/Exercise_5_Part_3.py
+++ b/Exercise_5_Part_3.py
@@ -1,17 +1,19 @@
 import pandas
-pandas.read_csv('wages.csv')
+#Define wages
+wages=pandas.read_csv('wages.csv')
 
 #Select for the people who didn't finish school(i.e. 12yrs of school)
-education12=wages[wages.yearSchool==12]
+education12=wages[wages.yearsSchool==12]
 
 #Calculate the minimum wage for people with 12yrs of school
 minimum12=min(education12.wage)
 
 #Select for the people who did finish school(i.e. 16yrs of school)
-education16=wages[wages.yearSchool==16]
+education16=wages[wages.yearsSchool==16]
 
 #Calculate the minimum wage for people with 16yrs of school
 minimum16=min(education16.wage)
 
 #Calculate the difference between the minimum wage for those who finished school vs. those who didn't
-print(minimum16-minimum12)
\ No newline at end of file
+print(minimum16-minimum12)
+#Difference is 4.0816223772

From c02fb39967242cce4d4d42fab3c0966d32ab259d Mon Sep 17 00:00:00 2001
From: kkilgoreND <31992787+kkilgoreND@users.noreply.github.com>
Date: Thu, 28 Sep 2017 10:32:39 -0400
Subject: [PATCH 7/7] Scripts for Parts 1-3

---
 Exercise_5_Challenge_Complete.py | 52 ++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 Exercise_5_Challenge_Complete.py

diff --git a/Exercise_5_Challenge_Complete.py b/Exercise_5_Challenge_Complete.py
new file mode 100644
index 0000000..5b1a02c
--- /dev/null
+++ b/Exercise_5_Challenge_Complete.py
@@ -0,0 +1,52 @@
+#Completing Part 1
+import pandas #loading the pandas package to use data frames 
+
+data = pandas.read_csv("wages.csv", header=0,sep=",") #loads the file
+
+gender_yrsexp = data.iloc[:,0:2] #subsets data by selecting the first two columns 
+
+uniquegender = gender_yrsexp.drop_duplicates()  #drops duplicates, like the unique function in unix 
+uniquegender.shape   #displays shape of array 
+
+sortgender = uniquegender.sort_values(["gender","yearsExperience"])
+
+sortgender.to_csv("part1done.txt", sep=" ")
+
+#Completing Part 2
+import pandas
+df=pandas.read_csv('wages.csv')
+#df=wages.csv data
+
+#Highest earner
+highest_earner=df.nlargest(1,'wage')
+#output is under highest_earner (male,5,11,39.808917197)
+
+#Lowest earner
+lowest_earner=df.nsmallest(1,'wage')
+#Output is under lowest_earner (female,9,11,0.07655561)
+
+#Number of females in the top 10
+num_of_females=df[df['wage']>=df['wage'].nlargest(10).iloc[-1]]['gender'].eq('female').sum()
+#Output is under num_of_females (=2)
+
+#Completing Part 3
+import pandas
+
+#Define wages
+wages=pandas.read_csv('wages.csv')
+
+#Select for the people who didn't finish school(i.e. 12yrs of school)
+education12=wages[wages.yearsSchool==12]
+
+#Calculate the minimum wage for people with 12yrs of school
+minimum12=min(education12.wage)
+
+#Select for the people who did finish school(i.e. 16yrs of school)
+education16=wages[wages.yearsSchool==16]
+
+#Calculate the minimum wage for people with 16yrs of school
+minimum16=min(education16.wage)
+
+#Calculate the difference between the minimum wage for those who finished school vs. those who didn't
+print(minimum16-minimum12)
+#Difference is 4.0816223772
\ No newline at end of file