From 4fd3615fb0714d65318cbe373a2d951de6c2483a Mon Sep 17 00:00:00 2001 From: Abigail Midlige Date: Thu, 28 Sep 2017 13:37:36 -0400 Subject: [PATCH 1/9] Answer to part 1 --- part1.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 part1.py diff --git a/part1.py b/part1.py new file mode 100644 index 0000000..91ab6f4 --- /dev/null +++ b/part1.py @@ -0,0 +1,16 @@ +import os + +with open('wages.csv','r') as f: + file = f.readlines() + + + + +for line in file: + line = line.split(',') + gender = line[0] + exp = line[1] + str = gender + ' ' + exp + '\n' + with open('new.txt', 'a') as n: + n.write(str) + From df8679030233c3740c0ee643af08570d4b0a9421 Mon Sep 17 00:00:00 2001 From: Phil McCown Date: Thu, 28 Sep 2017 17:00:59 -0400 Subject: [PATCH 2/9] Parts 5b1 and 5b2 are in here, still need 5bc --- Exercise5b.py | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100755 Exercise5b.py diff --git a/Exercise5b.py b/Exercise5b.py new file mode 100755 index 0000000..66e9459 --- /dev/null +++ b/Exercise5b.py @@ -0,0 +1,7 @@ +with open('wages.csv','r') as f: + file=f.readlines() +import pandas +wages=pandas.read_csv("wages.csv") +a=wages.max(axis=0) +b=wages.min(axis=0) +print (a,b) \ No newline at end of file From dc3af2528a7c46da3906e06498454ed9346b4258 Mon Sep 17 00:00:00 2001 From: Phil McCown Date: Thu, 28 Sep 2017 18:01:42 -0400 Subject: [PATCH 3/9] Fixed issues with the first two parts of the second question --- Exercise5b.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/Exercise5b.py b/Exercise5b.py index 66e9459..33fa920 100755 --- a/Exercise5b.py +++ b/Exercise5b.py @@ -1,7 +1,5 @@ -with open('wages.csv','r') as f: - file=f.readlines() -import pandas -wages=pandas.read_csv("wages.csv") -a=wages.max(axis=0) -b=wages.min(axis=0) -print (a,b) \ No newline at end of file +import pandas +wages=pandas.read_csv("wages.csv") +a=(wages.sort_values(by='wage')) +print (a.head(n=1)) +print (a.tail(n=1)) From 9628dc44c64c5468af88c67c3ea6fa906e567929 Mon Sep 17 00:00:00 2001 From: Phil McCown Date: Thu, 28 Sep 2017 18:18:48 -0400 Subject: [PATCH 4/9] Figured out how to trim out the yearsSchool column --- Exercise5b.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Exercise5b.py b/Exercise5b.py index 33fa920..ecf3adb 100755 --- a/Exercise5b.py +++ b/Exercise5b.py @@ -1,5 +1,6 @@ import pandas wages=pandas.read_csv("wages.csv") +wages.drop([col for col in wages.columns if 'yearsSchool' in col],axis=1,inplace=True) a=(wages.sort_values(by='wage')) print (a.head(n=1)) print (a.tail(n=1)) From f61d2626d51fc744e9b7ac0675c612bf787c1f65 Mon Sep 17 00:00:00 2001 From: Phil McCown Date: Thu, 28 Sep 2017 18:28:22 -0400 Subject: [PATCH 5/9] Had to run to a different event; here's what I have so far, back at ~9 --- Exercise5b.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Exercise5b.py b/Exercise5b.py index ecf3adb..19d8dc8 100755 --- a/Exercise5b.py +++ b/Exercise5b.py @@ -2,5 +2,8 @@ wages=pandas.read_csv("wages.csv") wages.drop([col for col in wages.columns if 'yearsSchool' in col],axis=1,inplace=True) a=(wages.sort_values(by='wage')) +print ("Lowest earner") print (a.head(n=1)) +print ("Highest Earner") print (a.tail(n=1)) +b=(wages.sort_values(by='gender','wage',ascending=False)) From decca506228d3e8a58f1c8f72f8a018dfb205820 Mon Sep 17 00:00:00 2001 From: Phil McCown Date: Thu, 28 Sep 2017 18:31:57 -0400 Subject: [PATCH 6/9] Had to run to a different event; here's what I have so far, back at ~9 --- Exercise5b.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Exercise5b.py b/Exercise5b.py index 19d8dc8..1a93ef4 100755 --- a/Exercise5b.py +++ b/Exercise5b.py @@ -6,4 +6,4 @@ print (a.head(n=1)) print ("Highest Earner") print (a.tail(n=1)) -b=(wages.sort_values(by='gender','wage',ascending=False)) +b=(wages.sort_values(by='wage',ascending=False)) From 13603662e18ce024ac7ee2d4ad1a0d923aa84fba Mon Sep 17 00:00:00 2001 From: Phil McCown Date: Thu, 28 Sep 2017 22:22:22 -0400 Subject: [PATCH 7/9] Exercise 5b is done! Now only to work on 5c! --- Exercise5b.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Exercise5b.py b/Exercise5b.py index 1a93ef4..bfd4033 100755 --- a/Exercise5b.py +++ b/Exercise5b.py @@ -1,9 +1,17 @@ import pandas wages=pandas.read_csv("wages.csv") wages.drop([col for col in wages.columns if 'yearsSchool' in col],axis=1,inplace=True) -a=(wages.sort_values(by='wage')) +cc=(wages.sort_values(by='wage')) print ("Lowest earner") -print (a.head(n=1)) +print (cc.head(n=1)) print ("Highest Earner") -print (a.tail(n=1)) -b=(wages.sort_values(by='wage',ascending=False)) +print (cc.tail(n=1)) +print ("Top 10 Female Earners") +d=(wages.sort_values(by='gender')) +gender=line[0] +exp=line[1] +wage=line[3] +if 'gender'=='female': + a=gender+' '+exp+' '+wage+' '+'\n' +print (a.tail(n=10)) + From 66bad362e0444b09501b6cdc0e0e5d854489448e Mon Sep 17 00:00:00 2001 From: Phil McCown Date: Fri, 29 Sep 2017 08:48:51 -0400 Subject: [PATCH 8/9] The Final Answer for Part 2; I'll have part 3 along shortly --- Exercise5b.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/Exercise5b.py b/Exercise5b.py index bfd4033..143228c 100755 --- a/Exercise5b.py +++ b/Exercise5b.py @@ -2,16 +2,10 @@ wages=pandas.read_csv("wages.csv") wages.drop([col for col in wages.columns if 'yearsSchool' in col],axis=1,inplace=True) cc=(wages.sort_values(by='wage')) -print ("Lowest earner") +print ("Lowest Earner") print (cc.head(n=1)) print ("Highest Earner") print (cc.tail(n=1)) print ("Top 10 Female Earners") -d=(wages.sort_values(by='gender')) -gender=line[0] -exp=line[1] -wage=line[3] -if 'gender'=='female': - a=gender+' '+exp+' '+wage+' '+'\n' -print (a.tail(n=10)) - +numoffemales=wages[wages['wage']>=wages['wage'].nlargest(10).iloc[-1]]['gender'].eq('female').sum() +print (numoffemales) From 8beb584fca105e65f888eaabcac2b4931041539c Mon Sep 17 00:00:00 2001 From: Phil McCown Date: Fri, 29 Sep 2017 08:52:59 -0400 Subject: [PATCH 9/9] All of Exercise 5 is done. Pull request back to YYgit add Exercise5c.pygit add Exercise5c.py --- Exercise5c.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100755 Exercise5c.py diff --git a/Exercise5c.py b/Exercise5c.py new file mode 100755 index 0000000..8a8a1f6 --- /dev/null +++ b/Exercise5c.py @@ -0,0 +1,8 @@ +import pandas +wages=pandas.read_csv("wages.csv") +ed12=wages[wages.yearsSchool==12] #12 years of education +minimum12=min(ed12.wage) #minimum wage of 12 +ed16=wages[wages.yearsSchool==16] #16 years of education +minimum16=min(ed16.wage) #minimum wage of 16 +print (minimum16-minimum12) +