From bfae0aef24d2d57062e046e77b4c5b20656d05c8 Mon Sep 17 00:00:00 2001 From: Joe C Date: Mon, 25 Sep 2017 19:01:52 -0400 Subject: [PATCH 01/11] finished part 1 of question 2_Highest earner --- q2part1.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100755 q2part1.py diff --git a/q2part1.py b/q2part1.py new file mode 100755 index 0000000..b1eaa95 --- /dev/null +++ b/q2part1.py @@ -0,0 +1,8 @@ +print("Highest earner") +import pandas +ex5data=pandas.read_csv("wages.csv") +sorted1=ex5data.drop('yearsSchool', axis=1, inplace=True) +sorted2=ex5data.sort_values("wage") +duplicates=sorted2.drop_duplicates("wage") +duplicates.tail(1) + From e687da7b3d63afe60ef778995928f021e4634992 Mon Sep 17 00:00:00 2001 From: Joe C Date: Mon, 25 Sep 2017 21:33:44 -0400 Subject: [PATCH 02/11] question 2 is finished_JC --- question2.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100755 question2.py diff --git a/question2.py b/question2.py new file mode 100755 index 0000000..264936c --- /dev/null +++ b/question2.py @@ -0,0 +1,32 @@ +print("Highest earner") +import pandas +ex5data=pandas.read_csv("wages.csv") +sorted1=ex5data.drop('yearsSchool', axis=1, inplace=True) +sorted2=ex5data.sort_values("wage") +duplicates=sorted2.drop_duplicates("wage") +part1=duplicates.tail(1) +print(part1) + +print("Lowest earner") +import pandas +ex5data=pandas.read_csv("wages.csv") +sorted1=ex5data.drop('yearsSchool', axis=1, inplace=True) +sorted2=ex5data.sort_values("wage") +duplicates=sorted2.drop_duplicates("wage") +part2=duplicates.head(1) +print(part2) + +print("Number of women in the top 10 earners") +import pandas +ex5data=pandas.read_csv("wages.csv") +sorted1=ex5data.sort_values("wage") +duplicates=sorted2.drop_duplicates("wage") +top10.txt=duplicates.tail(10) +top10.to_csv("top10.csv") +total = 0 +with open('top10.csv') as f: + for line in f: + finded = line.find('female') + if finded != -1 and finded != 0: + total += 1 +print(total) From 8e8b7d45533c63bc92be25d42c93fd5572b6ad0e Mon Sep 17 00:00:00 2001 From: Joe C Date: Thu, 28 Sep 2017 00:47:50 -0400 Subject: [PATCH 03/11] JC attempt at question 1 completed. probably not the most efficient --- question1.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100755 question1.py diff --git a/question1.py b/question1.py new file mode 100755 index 0000000..428117c --- /dev/null +++ b/question1.py @@ -0,0 +1,10 @@ +#import pandas. read the csv. read the two columns. sort by both columns in order. drop duplicates. print. +import pandas +wages=pandas.read_csv("wages.csv",header=0,sep=",") +reduced=wages[['gender', 'yearsExperience']] +sorted1=reduced.sort_values(['gender', 'yearsExperience'], ascending=[True, False]) +done=sorted1.drop_duplicates() +print(done) + + + From 0850615521810bd6aac395149cbf0bfefff29db1 Mon Sep 17 00:00:00 2001 From: ayamasaki2011 Date: Thu, 28 Sep 2017 18:05:47 -0400 Subject: [PATCH 04/11] AEY: Start Python script to sort gender-yearsExperience pairs in wages.csv --- AEY_Part1.py | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100755 AEY_Part1.py diff --git a/AEY_Part1.py b/AEY_Part1.py new file mode 100755 index 0000000..8940ecd --- /dev/null +++ b/AEY_Part1.py @@ -0,0 +1,6 @@ +import pandas + +wages = pandas.read_csv("wages.csv") + +gender_Experience=wages[["gender","yearsExperience"]] + From 70caa3d354b3089dd07534ecf2cc1b805975ad1e Mon Sep 17 00:00:00 2001 From: ayamasaki2011 Date: Thu, 28 Sep 2017 18:19:34 -0400 Subject: [PATCH 05/11] Modify script to sort gender and years experience first by gender, then by experience --- AEY_Part1.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/AEY_Part1.py b/AEY_Part1.py index 8940ecd..502592c 100755 --- a/AEY_Part1.py +++ b/AEY_Part1.py @@ -4,3 +4,6 @@ gender_Experience=wages[["gender","yearsExperience"]] +ordered_gender_Experience=gender_Experience.sort(['gender','yearsExperience'], ascending=[True,True]) + +unique_combos=ordered_gender_Experience.drop_duplicates(inplace=True) \ No newline at end of file From 6e06ab7c8472d0689c07ce3770b928a84ac30698 Mon Sep 17 00:00:00 2001 From: ayamasaki2011 Date: Thu, 28 Sep 2017 18:20:50 -0400 Subject: [PATCH 06/11] Modify script to drop duplicate pairs of gender/experience --- AEY_Part1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AEY_Part1.py b/AEY_Part1.py index 502592c..8d3e495 100755 --- a/AEY_Part1.py +++ b/AEY_Part1.py @@ -6,4 +6,4 @@ ordered_gender_Experience=gender_Experience.sort(['gender','yearsExperience'], ascending=[True,True]) -unique_combos=ordered_gender_Experience.drop_duplicates(inplace=True) \ No newline at end of file +unique_combos=ordered_gender_Experience.drop_duplicates() \ No newline at end of file From 0a4e5f0919a76d3abed08084b132ef61514b1933 Mon Sep 17 00:00:00 2001 From: ayamasaki2011 Date: Thu, 28 Sep 2017 18:37:06 -0400 Subject: [PATCH 07/11] Modify script to calculate effect of graduating college on minium wage --- AEY_Part1.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/AEY_Part1.py b/AEY_Part1.py index 8d3e495..2715f63 100755 --- a/AEY_Part1.py +++ b/AEY_Part1.py @@ -6,4 +6,21 @@ ordered_gender_Experience=gender_Experience.sort(['gender','yearsExperience'], ascending=[True,True]) -unique_combos=ordered_gender_Experience.drop_duplicates() \ No newline at end of file +unique_combos=ordered_gender_Experience.drop_duplicates() + +print("Unique combinations of gender/years experience:", unique_combos) + +wages_highschool = wages[wages.yearsSchool == 12] + +wages_college = wages[wages.yearsSchool == 16] + +ordered_wages_HS = wages_highschool.sort(columns='wage', ascending=True) + +ordered_wages_C = wages_college.sort(columns='wage', ascending=True) + +minWage_HS = ordered_wages_HS.iloc[0,3] + +minWage_C = ordered_wages_C.iloc[0,3] + +print("Effect of graduating college on minimum wage:", minWage_C - minWage_HS) + From 3e0a57a10cae1a3f67181a01e3cb8e65d5803e52 Mon Sep 17 00:00:00 2001 From: ayamasaki2011 Date: Thu, 28 Sep 2017 18:38:03 -0400 Subject: [PATCH 08/11] AEY: Add file for combined answers --- earnings.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100755 earnings.py diff --git a/earnings.py b/earnings.py new file mode 100755 index 0000000..2715f63 --- /dev/null +++ b/earnings.py @@ -0,0 +1,26 @@ +import pandas + +wages = pandas.read_csv("wages.csv") + +gender_Experience=wages[["gender","yearsExperience"]] + +ordered_gender_Experience=gender_Experience.sort(['gender','yearsExperience'], ascending=[True,True]) + +unique_combos=ordered_gender_Experience.drop_duplicates() + +print("Unique combinations of gender/years experience:", unique_combos) + +wages_highschool = wages[wages.yearsSchool == 12] + +wages_college = wages[wages.yearsSchool == 16] + +ordered_wages_HS = wages_highschool.sort(columns='wage', ascending=True) + +ordered_wages_C = wages_college.sort(columns='wage', ascending=True) + +minWage_HS = ordered_wages_HS.iloc[0,3] + +minWage_C = ordered_wages_C.iloc[0,3] + +print("Effect of graduating college on minimum wage:", minWage_C - minWage_HS) + From 61dc49ccfaf3cdb362318c9153b0a9bdbbe7e2a0 Mon Sep 17 00:00:00 2001 From: ayamasaki2011 Date: Thu, 28 Sep 2017 18:39:59 -0400 Subject: [PATCH 09/11] AEY: Combine separate Python scripts into single file --- earnings.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/earnings.py b/earnings.py index 2715f63..1ec30cc 100755 --- a/earnings.py +++ b/earnings.py @@ -1,26 +1,25 @@ import pandas +#Part 1 +wages=pandas.read_csv("wages.csv",header=0,sep=",") +reduced=wages[['gender', 'yearsExperience']] +sorted1=reduced.sort_values(['gender', 'yearsExperience'], ascending=[True, False]) +done=sorted1.drop_duplicates() +print(done) + +#Part 2 wages = pandas.read_csv("wages.csv") - gender_Experience=wages[["gender","yearsExperience"]] - ordered_gender_Experience=gender_Experience.sort(['gender','yearsExperience'], ascending=[True,True]) - unique_combos=ordered_gender_Experience.drop_duplicates() - print("Unique combinations of gender/years experience:", unique_combos) +#Part 3 wages_highschool = wages[wages.yearsSchool == 12] - wages_college = wages[wages.yearsSchool == 16] - ordered_wages_HS = wages_highschool.sort(columns='wage', ascending=True) - ordered_wages_C = wages_college.sort(columns='wage', ascending=True) - minWage_HS = ordered_wages_HS.iloc[0,3] - minWage_C = ordered_wages_C.iloc[0,3] - print("Effect of graduating college on minimum wage:", minWage_C - minWage_HS) From 6c0397b69a985ea49de7cc6e3e034eb6ed055ddd Mon Sep 17 00:00:00 2001 From: Joe C Date: Thu, 28 Sep 2017 20:23:19 -0400 Subject: [PATCH 10/11] exercise 5 completed and combined --- Exercise5_AEY_JC.py | 59 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100755 Exercise5_AEY_JC.py diff --git a/Exercise5_AEY_JC.py b/Exercise5_AEY_JC.py new file mode 100755 index 0000000..099a89e --- /dev/null +++ b/Exercise5_AEY_JC.py @@ -0,0 +1,59 @@ +import pandas +print("Question1") +#Question 1 Part 1 +wages=pandas.read_csv("wages.csv",header=0,sep=",") +reduced=wages[['gender', 'yearsExperience']] +sorted1=reduced.sort_values(['gender', 'yearsExperience'], ascending=[True, False]) +done=sorted1.drop_duplicates() +print(done) + +#Question 1 Part 2 +wages = pandas.read_csv("wages.csv") +gender_Experience=wages[["gender","yearsExperience"]] +ordered_gender_Experience=gender_Experience.sort(['gender','yearsExperience'], ascending=[True,True]) +unique_combos=ordered_gender_Experience.drop_duplicates() +print("Unique combinations of gender/years experience:", unique_combos) + +print("Question2") +# Question 2 part 1 +import pandas +ex5data=pandas.read_csv("wages.csv") +sorted1=ex5data.drop('yearsSchool', axis=1, inplace=True) +sorted2=ex5data.sort_values("wage") +duplicates=sorted2.drop_duplicates("wage") +part1=duplicates.tail(1) +print("Highest earner", part1) + +# Question 2 part 2 +import pandas +ex5data=pandas.read_csv("wages.csv") +sorted1=ex5data.drop('yearsSchool', axis=1, inplace=True) +sorted2=ex5data.sort_values("wage") +duplicates=sorted2.drop_duplicates("wage") +part2=duplicates.head(1) +print("Lowest earner", part2) + +# Question 2 part 3 +import pandas +ex5data=pandas.read_csv("wages.csv") +sorted1=ex5data.sort_values("wage") +duplicates=sorted2.drop_duplicates("wage") +top10=duplicates.tail(10) +top10.to_csv("top10.csv") +total = 0 +with open('top10.csv') as f: + for line in f: + finded = line.find('female') + if finded != -1 and finded != 0: + total += 1 +print("Number of women in the top 10 earners", total) + +print("Question3") +#Question 3 +wages_highschool = wages[wages.yearsSchool == 12] +wages_college = wages[wages.yearsSchool == 16] +ordered_wages_HS = wages_highschool.sort(columns='wage', ascending=True) +ordered_wages_C = wages_college.sort(columns='wage', ascending=True) +minWage_HS = ordered_wages_HS.iloc[0,3] +minWage_C= ordered_wages_C.iloc[0,3] +print("Effect of graduating college on minimum wage:", minWage_C - minWage_HS) From a1384e66893ca753f45241541a189eccf9d96875 Mon Sep 17 00:00:00 2001 From: Joe C Date: Thu, 28 Sep 2017 20:27:13 -0400 Subject: [PATCH 11/11] fixed an issue printing with question 2 --- question2.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/question2.py b/question2.py index 264936c..87f6cc8 100755 --- a/question2.py +++ b/question2.py @@ -1,27 +1,27 @@ -print("Highest earner") +# Question 2 part 1 import pandas ex5data=pandas.read_csv("wages.csv") sorted1=ex5data.drop('yearsSchool', axis=1, inplace=True) sorted2=ex5data.sort_values("wage") duplicates=sorted2.drop_duplicates("wage") part1=duplicates.tail(1) -print(part1) +print("Highest earner", part1) -print("Lowest earner") +# Question 2 part 2 import pandas ex5data=pandas.read_csv("wages.csv") sorted1=ex5data.drop('yearsSchool', axis=1, inplace=True) sorted2=ex5data.sort_values("wage") duplicates=sorted2.drop_duplicates("wage") part2=duplicates.head(1) -print(part2) +print("Lowest earner", part2) -print("Number of women in the top 10 earners") +# Question 2 part 3 import pandas ex5data=pandas.read_csv("wages.csv") sorted1=ex5data.sort_values("wage") duplicates=sorted2.drop_duplicates("wage") -top10.txt=duplicates.tail(10) +top10=duplicates.tail(10) top10.to_csv("top10.csv") total = 0 with open('top10.csv') as f: @@ -29,4 +29,5 @@ finded = line.find('female') if finded != -1 and finded != 0: total += 1 -print(total) +print("Number of women in the top 10 earners", total) +