From a937686f3b08835eb7ed33be90f0d6c53137bb49 Mon Sep 17 00:00:00 2001
From: omegadan01 <dbruzzes@nd.edi>
Date: Mon, 25 Sep 2017 01:11:50 -0400
Subject: [PATCH 1/6] DB started work on question 1

---
 exercise5 script.py | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 exercise5 script.py

diff --git a/exercise5 script.py b/exercise5 script.py
new file mode 100644
index 0000000..8977214
--- /dev/null
+++ b/exercise5 script.py	
@@ -0,0 +1,10 @@
+#question 1
+
+import pandas
+wages = pandas.read_csv("wages.csv")
+print wages.head (n=5)
+
+#question 2
+
+
+#question 3
\ No newline at end of file

From 83cc209b56f38b7348b0ee2c940a3217a94cd07c Mon Sep 17 00:00:00 2001
From: omegadan01 <dbruzzes@nd.edi>
Date: Mon, 25 Sep 2017 02:35:27 -0400
Subject: [PATCH 2/6] DB finished question 1. Everything Works!

---
 exercise5 script.py | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/exercise5 script.py b/exercise5 script.py
index 8977214..fc1e391 100644
--- a/exercise5 script.py	
+++ b/exercise5 script.py	
@@ -1,10 +1,26 @@
-#question 1
+###########question 1################
 
+#load the dataset
 import pandas
 wages = pandas.read_csv("wages.csv")
-print wages.head (n=5)
 
-#question 2
+#filter out columns of interest into a new dataframe
+wages2 = wages[['gender', 'yearsExperience']].copy()
 
+#drop duplicates from column 1 and 2
+dwage=wages2.drop_duplicates(subset=['gender','yearsExperience'])
 
-#question 3
\ No newline at end of file
+#sort columns
+swage= dwage.sort_values(by=["gender", "yearsExperience"])
+
+#check dataframe
+print swage
+
+#write.csv
+swage.to_csv('unique_gender_yearsExperience.csv', sep='\t')
+
+
+####################question 2#################
+
+
+############question 3##############
\ No newline at end of file

From 866152f8fbbed3b4bf175ed286555d71190b8911 Mon Sep 17 00:00:00 2001
From: Katherine <bookaholic214@gmail.com>
Date: Mon, 25 Sep 2017 18:50:39 -0400
Subject: [PATCH 3/6] Question 2 except Top10 part

---
 exercise5 script.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/exercise5 script.py b/exercise5 script.py
index fc1e391..457d2cc 100644
--- a/exercise5 script.py	
+++ b/exercise5 script.py	
@@ -21,6 +21,23 @@
 
 
 ####################question 2#################
+#Limit to columns of interest
+q2 = wages[['gender', 'yearsExperience', 'wage']].copy()
+#sort by wage
+sortedq2 = q2.sort_values(by=['wage'])
+#select highest earner
+Highest = sortedq2.tail(1)
+#Print line for highest earner
+print (Highest)
+#select lowest earner
+Lowest = sortedq2.head(1)
+#Print line for lowest earner
+print (Lowest)
+#Find top 10 earners
+Top10 = sortedq2.tail(10)
+#Count number of females
+Top10females = ####Help####
+list.count(Top10females)        
+############question 3##############
 
 
-############question 3##############
\ No newline at end of file

From eec88b7fc129e83c5848a7bc7dd946db30d60a76 Mon Sep 17 00:00:00 2001
From: Katherine <bookaholic214@gmail.com>
Date: Mon, 25 Sep 2017 19:42:45 -0400
Subject: [PATCH 4/6] Draft 2, Question 2

---
 exercise5 script.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/exercise5 script.py b/exercise5 script.py
index 457d2cc..5c4c0f1 100644
--- a/exercise5 script.py	
+++ b/exercise5 script.py	
@@ -36,7 +36,9 @@
 #Find top 10 earners
 Top10 = sortedq2.tail(10)
 #Count number of females
-Top10females = ####Help####
+Topfemales = 0
+for female in Top10:
+    Topfemales = Topfemales + 1 #(this part doesn't work)
 list.count(Top10females)        
 ############question 3##############
 

From 9ec6186aa259e9b4b084b8a65e5b293840e15953 Mon Sep 17 00:00:00 2001
From: omegadan01 <dbruzzes@nd.edi>
Date: Tue, 26 Sep 2017 11:19:03 -0400
Subject: [PATCH 5/6] DB- formatted q1 and got the number of females in the top
 10 to work!

---
 exercise5 script.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/exercise5 script.py b/exercise5 script.py
index 5c4c0f1..1f062ae 100644
--- a/exercise5 script.py	
+++ b/exercise5 script.py	
@@ -1,5 +1,4 @@
 ###########question 1################
-
 #load the dataset
 import pandas
 wages = pandas.read_csv("wages.csv")
@@ -13,9 +12,6 @@
 #sort columns
 swage= dwage.sort_values(by=["gender", "yearsExperience"])
 
-#check dataframe
-print swage
-
 #write.csv
 swage.to_csv('unique_gender_yearsExperience.csv', sep='\t')
 
@@ -35,11 +31,13 @@
 print (Lowest)
 #Find top 10 earners
 Top10 = sortedq2.tail(10)
-#Count number of females
-Topfemales = 0
-for female in Top10:
-    Topfemales = Topfemales + 1 #(this part doesn't work)
-list.count(Top10females)        
-############question 3##############
 
+##Count number of females
+#pulls out females from top 10
+Topfemales=Top10.loc[Top10['gender'] == "female"]
+#counts females in top 10
+ctopfemale= Topfemales.gender.count()
+print (ctopfemale)
+
+############question 3##############
 

From 1ea71a62913e721058e78b7cfa6bb31d8d84df33 Mon Sep 17 00:00:00 2001
From: Mati Nemera <mnemera@nd.edu>
Date: Tue, 26 Sep 2017 15:11:26 -0400
Subject: [PATCH 6/6] Finished question 3

---
 exercise5 script.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/exercise5 script.py b/exercise5 script.py
index 1f062ae..390e63d 100644
--- a/exercise5 script.py	
+++ b/exercise5 script.py	
@@ -40,4 +40,8 @@
 print (ctopfemale)
 
 ############question 3##############
-
+wages12 = wages[wages.yearsSchool==12] #selects people with 12 years of school
+min12 = min(wages12.wage) #minimum wage of people with 12 years of school
+wages16 = wages[wages.yearsSchool==16] #selects people with 16 years of school
+min16 = min(wages16.wage) #minimum wage of people with 16 years of school
+print(min16-min12)