From a937686f3b08835eb7ed33be90f0d6c53137bb49 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Mon, 25 Sep 2017 01:11:50 -0400 Subject: [PATCH 1/6] DB started work on question 1 --- exercise5 script.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 exercise5 script.py diff --git a/exercise5 script.py b/exercise5 script.py new file mode 100644 index 0000000..8977214 --- /dev/null +++ b/exercise5 script.py @@ -0,0 +1,10 @@ +#question 1 + +import pandas +wages = pandas.read_csv("wages.csv") +print wages.head (n=5) + +#question 2 + + +#question 3 \ No newline at end of file From 83cc209b56f38b7348b0ee2c940a3217a94cd07c Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Mon, 25 Sep 2017 02:35:27 -0400 Subject: [PATCH 2/6] DB finished question 1. Everything Works! --- exercise5 script.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/exercise5 script.py b/exercise5 script.py index 8977214..fc1e391 100644 --- a/exercise5 script.py +++ b/exercise5 script.py @@ -1,10 +1,26 @@ -#question 1 +###########question 1################ +#load the dataset import pandas wages = pandas.read_csv("wages.csv") -print wages.head (n=5) -#question 2 +#filter out columns of interest into a new dataframe +wages2 = wages[['gender', 'yearsExperience']].copy() +#drop duplicates from column 1 and 2 +dwage=wages2.drop_duplicates(subset=['gender','yearsExperience']) -#question 3 \ No newline at end of file +#sort columns +swage= dwage.sort_values(by=["gender", "yearsExperience"]) + +#check dataframe +print swage + +#write.csv +swage.to_csv('unique_gender_yearsExperience.csv', sep='\t') + + +####################question 2################# + + +############question 3############## \ No newline at end of file From 866152f8fbbed3b4bf175ed286555d71190b8911 Mon Sep 17 00:00:00 2001 From: Katherine Date: Mon, 25 Sep 2017 18:50:39 -0400 Subject: [PATCH 3/6] Question 2 except Top10 part --- exercise5 script.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/exercise5 script.py b/exercise5 script.py index fc1e391..457d2cc 100644 --- a/exercise5 script.py +++ b/exercise5 script.py @@ -21,6 +21,23 @@ ####################question 2################# +#Limit to columns of interest +q2 = wages[['gender', 'yearsExperience', 'wage']].copy() +#sort by wage +sortedq2 = q2.sort_values(by=['wage']) +#select highest earner +Highest = sortedq2.tail(1) +#Print line for highest earner +print (Highest) +#select lowest earner +Lowest = sortedq2.head(1) +#Print line for lowest earner +print (Lowest) +#Find top 10 earners +Top10 = sortedq2.tail(10) +#Count number of females +Top10females = ####Help#### +list.count(Top10females) +############question 3############## -############question 3############## \ No newline at end of file From eec88b7fc129e83c5848a7bc7dd946db30d60a76 Mon Sep 17 00:00:00 2001 From: Katherine Date: Mon, 25 Sep 2017 19:42:45 -0400 Subject: [PATCH 4/6] Draft 2, Question 2 --- exercise5 script.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/exercise5 script.py b/exercise5 script.py index 457d2cc..5c4c0f1 100644 --- a/exercise5 script.py +++ b/exercise5 script.py @@ -36,7 +36,9 @@ #Find top 10 earners Top10 = sortedq2.tail(10) #Count number of females -Top10females = ####Help#### +Topfemales = 0 +for female in Top10: + Topfemales = Topfemales + 1 #(this part doesn't work) list.count(Top10females) ############question 3############## From 9ec6186aa259e9b4b084b8a65e5b293840e15953 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Tue, 26 Sep 2017 11:19:03 -0400 Subject: [PATCH 5/6] DB- formatted q1 and got the number of females in the top 10 to work! --- exercise5 script.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/exercise5 script.py b/exercise5 script.py index 5c4c0f1..1f062ae 100644 --- a/exercise5 script.py +++ b/exercise5 script.py @@ -1,5 +1,4 @@ ###########question 1################ - #load the dataset import pandas wages = pandas.read_csv("wages.csv") @@ -13,9 +12,6 @@ #sort columns swage= dwage.sort_values(by=["gender", "yearsExperience"]) -#check dataframe -print swage - #write.csv swage.to_csv('unique_gender_yearsExperience.csv', sep='\t') @@ -35,11 +31,13 @@ print (Lowest) #Find top 10 earners Top10 = sortedq2.tail(10) -#Count number of females -Topfemales = 0 -for female in Top10: - Topfemales = Topfemales + 1 #(this part doesn't work) -list.count(Top10females) -############question 3############## +##Count number of females +#pulls out females from top 10 +Topfemales=Top10.loc[Top10['gender'] == "female"] +#counts females in top 10 +ctopfemale= Topfemales.gender.count() +print (ctopfemale) + +############question 3############## From 1ea71a62913e721058e78b7cfa6bb31d8d84df33 Mon Sep 17 00:00:00 2001 From: Mati Nemera Date: Tue, 26 Sep 2017 15:11:26 -0400 Subject: [PATCH 6/6] Finished question 3 --- exercise5 script.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/exercise5 script.py b/exercise5 script.py index 1f062ae..390e63d 100644 --- a/exercise5 script.py +++ b/exercise5 script.py @@ -40,4 +40,8 @@ print (ctopfemale) ############question 3############## - +wages12 = wages[wages.yearsSchool==12] #selects people with 12 years of school +min12 = min(wages12.wage) #minimum wage of people with 12 years of school +wages16 = wages[wages.yearsSchool==16] #selects people with 16 years of school +min16 = min(wages16.wage) #minimum wage of people with 16 years of school +print(min16-min12)