From 575051fdc9d7fb189b89706a254928ad53933474 Mon Sep 17 00:00:00 2001 From: Michelle Corley Date: Tue, 26 Sep 2017 10:45:33 -0400 Subject: [PATCH 1/7] initial commit of part 1 --- .gitignore | 3 +++ exercise5.py | 0 part1completed.txt | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 .gitignore create mode 100644 exercise5.py create mode 100644 part1completed.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5c5ca7a --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.DS_Store +*.pptx + diff --git a/exercise5.py b/exercise5.py new file mode 100644 index 0000000..e69de29 diff --git a/part1completed.txt b/part1completed.txt new file mode 100644 index 0000000..a4ec58a --- /dev/null +++ b/part1completed.txt @@ -0,0 +1,34 @@ + gender yearsExperience +168 female 1 +215 female 2 +15 female 3 +37 female 4 +23 female 5 +27 female 6 +9 female 7 +4 female 8 +0 female 9 +7 female 10 +2 female 11 +1 female 12 +17 female 13 +350 female 14 +46 female 15 +623 female 16 +1784 male 2 +1658 male 3 +1650 male 4 +1599 male 5 +1594 male 6 +1570 male 7 +1581 male 8 +1579 male 9 +1569 male 10 +1573 male 11 +1571 male 12 +1617 male 13 +1589 male 14 +1605 male 15 +1608 male 16 +1959 male 17 +1942 male 18 From b216583ed4271f62b220beafca68f778c701e2c6 Mon Sep 17 00:00:00 2001 From: Michelle Corley Date: Wed, 27 Sep 2017 11:46:54 -0400 Subject: [PATCH 2/7] second commit of exercise 5, part 1 --- exercise5.py | 0 part1done.txt | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) delete mode 100644 exercise5.py create mode 100644 part1done.txt diff --git a/exercise5.py b/exercise5.py deleted file mode 100644 index e69de29..0000000 diff --git a/part1done.txt b/part1done.txt new file mode 100644 index 0000000..a4ec58a --- /dev/null +++ b/part1done.txt @@ -0,0 +1,34 @@ + gender yearsExperience +168 female 1 +215 female 2 +15 female 3 +37 female 4 +23 female 5 +27 female 6 +9 female 7 +4 female 8 +0 female 9 +7 female 10 +2 female 11 +1 female 12 +17 female 13 +350 female 14 +46 female 15 +623 female 16 +1784 male 2 +1658 male 3 +1650 male 4 +1599 male 5 +1594 male 6 +1570 male 7 +1581 male 8 +1579 male 9 +1569 male 10 +1573 male 11 +1571 male 12 +1617 male 13 +1589 male 14 +1605 male 15 +1608 male 16 +1959 male 17 +1942 male 18 From 60c00c99661c04cbb7d500ac550d6d3f74104178 Mon Sep 17 00:00:00 2001 From: Michelle Corley Date: Wed, 27 Sep 2017 12:13:17 -0400 Subject: [PATCH 3/7] second commit of excercise 5 code --- excercise5updated.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 excercise5updated.py diff --git a/excercise5updated.py b/excercise5updated.py new file mode 100644 index 0000000..6de004a --- /dev/null +++ b/excercise5updated.py @@ -0,0 +1,14 @@ +#completing exercise5 +import pandas #loading the pandas package to use data frames + +data = pandas.read_csv("wages.csv", header=0,sep=",") #loads the file + +#completing part1 +gender_yrsexp = data.iloc[:,0:2] #subsets data by selecting the first two columns + +uniquegender = gender_yrsexp.drop_duplicates() #drops duplicates, like the unique function in unix +uniquegender.shape #displays shape of array + +sortgender = uniquegender.sort_values(["gender","yearsExperience"]) + +sortgender.to_csv("part1done.txt", sep=" ") \ No newline at end of file From 69fef0d64cc2cd993e34d8e56fb61b77b48ff533 Mon Sep 17 00:00:00 2001 From: kkilgoreND <31992787+kkilgoreND@users.noreply.github.com> Date: Thu, 28 Sep 2017 08:29:26 -0400 Subject: [PATCH 4/7] Exercise 5 Challenge Part 2 1st commit --- Exercise_5_Part_2.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 Exercise_5_Part_2.py diff --git a/Exercise_5_Part_2.py b/Exercise_5_Part_2.py new file mode 100644 index 0000000..dab1676 --- /dev/null +++ b/Exercise_5_Part_2.py @@ -0,0 +1,15 @@ +import pandas +df=pandas.read_csv('wages.csv') +#df=wages.csv data + +#Highest earner +highest_earner=df.nlargest(1,'wage') +#output is under highest_earner (male,5,11,39.808917197) + +#Lowest earner +lowest_earner=df.nsmallest(1,'wage') +#Output is under lowest_earner (female,9,11,0.07655561) + +#Number of females in the top 10 +num_of_females=df[df['wage']>=df['wage'].nlargest(10).iloc[-1]]['gender'].eq('female').sum() +#Output is under num_of_females (=2) \ No newline at end of file From 5bacc1a3b3d20b24a790a467b842a6b427636690 Mon Sep 17 00:00:00 2001 From: kkilgoreND <31992787+kkilgoreND@users.noreply.github.com> Date: Thu, 28 Sep 2017 10:09:36 -0400 Subject: [PATCH 5/7] 1st Commit for Part 3 of Exercise 5 Challenge Still needs work --- Exercise_5_Part_3.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 Exercise_5_Part_3.py diff --git a/Exercise_5_Part_3.py b/Exercise_5_Part_3.py new file mode 100644 index 0000000..1fba7e9 --- /dev/null +++ b/Exercise_5_Part_3.py @@ -0,0 +1,17 @@ +import pandas +pandas.read_csv('wages.csv') + +#Select for the people who didn't finish school(i.e. 12yrs of school) +education12=wages[wages.yearSchool==12] + +#Calculate the minimum wage for people with 12yrs of school +minimum12=min(education12.wage) + +#Select for the people who did finish school(i.e. 16yrs of school) +education16=wages[wages.yearSchool==16] + +#Calculate the minimum wage for people with 16yrs of school +minimum16=min(education16.wage) + +#Calculate the difference between the minimum wage for those who finished school vs. those who didn't +print(minimum16-minimum12) \ No newline at end of file From 97410a6b49a99ae4fc5512fe043a88ef698f2589 Mon Sep 17 00:00:00 2001 From: kkilgoreND <31992787+kkilgoreND@users.noreply.github.com> Date: Thu, 28 Sep 2017 10:21:52 -0400 Subject: [PATCH 6/7] Updated Exercise_5_Part_3.py --- Exercise_5_Part_3.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Exercise_5_Part_3.py b/Exercise_5_Part_3.py index 1fba7e9..05dbec0 100644 --- a/Exercise_5_Part_3.py +++ b/Exercise_5_Part_3.py @@ -1,17 +1,19 @@ import pandas -pandas.read_csv('wages.csv') +#Define wages +wages=pandas.read_csv('wages.csv') #Select for the people who didn't finish school(i.e. 12yrs of school) -education12=wages[wages.yearSchool==12] +education12=wages[wages.yearsSchool==12] #Calculate the minimum wage for people with 12yrs of school minimum12=min(education12.wage) #Select for the people who did finish school(i.e. 16yrs of school) -education16=wages[wages.yearSchool==16] +education16=wages[wages.yearsSchool==16] #Calculate the minimum wage for people with 16yrs of school minimum16=min(education16.wage) #Calculate the difference between the minimum wage for those who finished school vs. those who didn't -print(minimum16-minimum12) \ No newline at end of file +print(minimum16-minimum12) +#Difference is 4.0816223772 From c02fb39967242cce4d4d42fab3c0966d32ab259d Mon Sep 17 00:00:00 2001 From: kkilgoreND <31992787+kkilgoreND@users.noreply.github.com> Date: Thu, 28 Sep 2017 10:32:39 -0400 Subject: [PATCH 7/7] Scripts for Parts 1-3 --- Exercise_5_Challenge_Complete.py | 52 ++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 Exercise_5_Challenge_Complete.py diff --git a/Exercise_5_Challenge_Complete.py b/Exercise_5_Challenge_Complete.py new file mode 100644 index 0000000..5b1a02c --- /dev/null +++ b/Exercise_5_Challenge_Complete.py @@ -0,0 +1,52 @@ +#Completing Part 1 +import pandas #loading the pandas package to use data frames + +data = pandas.read_csv("wages.csv", header=0,sep=",") #loads the file + +gender_yrsexp = data.iloc[:,0:2] #subsets data by selecting the first two columns + +uniquegender = gender_yrsexp.drop_duplicates() #drops duplicates, like the unique function in unix +uniquegender.shape #displays shape of array + +sortgender = uniquegender.sort_values(["gender","yearsExperience"]) + +sortgender.to_csv("part1done.txt", sep=" ") + +#Completing Part 2 +import pandas +df=pandas.read_csv('wages.csv') +#df=wages.csv data + +#Highest earner +highest_earner=df.nlargest(1,'wage') +#output is under highest_earner (male,5,11,39.808917197) + +#Lowest earner +lowest_earner=df.nsmallest(1,'wage') +#Output is under lowest_earner (female,9,11,0.07655561) + +#Number of females in the top 10 +num_of_females=df[df['wage']>=df['wage'].nlargest(10).iloc[-1]]['gender'].eq('female').sum() +#Output is under num_of_females (=2) + +#Completing Part 3 +import pandas + +#Define wages +wages=pandas.read_csv('wages.csv') + +#Select for the people who didn't finish school(i.e. 12yrs of school) +education12=wages[wages.yearsSchool==12] + +#Calculate the minimum wage for people with 12yrs of school +minimum12=min(education12.wage) + +#Select for the people who did finish school(i.e. 16yrs of school) +education16=wages[wages.yearsSchool==16] + +#Calculate the minimum wage for people with 16yrs of school +minimum16=min(education16.wage) + +#Calculate the difference between the minimum wage for those who finished school vs. those who didn't +print(minimum16-minimum12) +#Difference is 4.0816223772 \ No newline at end of file