-
Notifications
You must be signed in to change notification settings - Fork 11
chambers-yamasaki submission #5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
bfae0ae
e687da7
8e8b7d4
0850615
70caa3d
6e06ab7
0a4e5f0
3e0a57a
61dc49c
6c0397b
ffeb8c9
a1384e6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| import pandas | ||
|
|
||
| wages = pandas.read_csv("wages.csv") | ||
|
|
||
| gender_Experience=wages[["gender","yearsExperience"]] | ||
|
|
||
| ordered_gender_Experience=gender_Experience.sort(['gender','yearsExperience'], ascending=[True,True]) | ||
|
|
||
| unique_combos=ordered_gender_Experience.drop_duplicates() | ||
|
|
||
| print("Unique combinations of gender/years experience:", unique_combos) | ||
|
|
||
| wages_highschool = wages[wages.yearsSchool == 12] | ||
|
|
||
| wages_college = wages[wages.yearsSchool == 16] | ||
|
|
||
| ordered_wages_HS = wages_highschool.sort(columns='wage', ascending=True) | ||
|
|
||
| ordered_wages_C = wages_college.sort(columns='wage', ascending=True) | ||
|
|
||
| minWage_HS = ordered_wages_HS.iloc[0,3] | ||
|
|
||
| minWage_C = ordered_wages_C.iloc[0,3] | ||
|
|
||
| print("Effect of graduating college on minimum wage:", minWage_C - minWage_HS) | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| import pandas | ||
| print("Question1") | ||
| #Question 1 Part 1 | ||
| wages=pandas.read_csv("wages.csv",header=0,sep=",") | ||
| reduced=wages[['gender', 'yearsExperience']] | ||
| sorted1=reduced.sort_values(['gender', 'yearsExperience'], ascending=[True, False]) | ||
| done=sorted1.drop_duplicates() | ||
| print(done) | ||
|
|
||
| #Question 1 Part 2 | ||
| wages = pandas.read_csv("wages.csv") | ||
| gender_Experience=wages[["gender","yearsExperience"]] | ||
| ordered_gender_Experience=gender_Experience.sort(['gender','yearsExperience'], ascending=[True,True]) | ||
| unique_combos=ordered_gender_Experience.drop_duplicates() | ||
| print("Unique combinations of gender/years experience:", unique_combos) | ||
|
|
||
| print("Question2") | ||
| # Question 2 part 1 | ||
| import pandas | ||
| ex5data=pandas.read_csv("wages.csv") | ||
| sorted1=ex5data.drop('yearsSchool', axis=1, inplace=True) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the variable "sorted1" for? |
||
| sorted2=ex5data.sort_values("wage") | ||
| duplicates=sorted2.drop_duplicates("wage") | ||
| part1=duplicates.tail(1) | ||
| print("Highest earner", part1) | ||
|
|
||
| # Question 2 part 2 | ||
| import pandas | ||
| ex5data=pandas.read_csv("wages.csv") | ||
| sorted1=ex5data.drop('yearsSchool', axis=1, inplace=True) | ||
| sorted2=ex5data.sort_values("wage") | ||
| duplicates=sorted2.drop_duplicates("wage") | ||
| part2=duplicates.head(1) | ||
| print("Lowest earner", part2) | ||
|
|
||
| # Question 2 part 3 | ||
| import pandas | ||
| ex5data=pandas.read_csv("wages.csv") | ||
| sorted1=ex5data.sort_values("wage") | ||
| duplicates=sorted2.drop_duplicates("wage") | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't need to drop_duplicates for this question |
||
| top10=duplicates.tail(10) | ||
| top10.to_csv("top10.csv") | ||
| total = 0 | ||
| with open('top10.csv') as f: | ||
| for line in f: | ||
| finded = line.find('female') | ||
| if finded != -1 and finded != 0: | ||
| total += 1 | ||
| print("Number of women in the top 10 earners", total) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or you can just use: |
||
|
|
||
| print("Question3") | ||
| #Question 3 | ||
| wages_highschool = wages[wages.yearsSchool == 12] | ||
| wages_college = wages[wages.yearsSchool == 16] | ||
| ordered_wages_HS = wages_highschool.sort(columns='wage', ascending=True) | ||
| ordered_wages_C = wages_college.sort(columns='wage', ascending=True) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wages_college.sort -> wages_college.sort_values -0.25 |
||
| minWage_HS = ordered_wages_HS.iloc[0,3] | ||
| minWage_C= ordered_wages_C.iloc[0,3] | ||
| print("Effect of graduating college on minimum wage:", minWage_C - minWage_HS) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| import pandas | ||
|
|
||
| #Part 1 | ||
| wages=pandas.read_csv("wages.csv",header=0,sep=",") | ||
| reduced=wages[['gender', 'yearsExperience']] | ||
| sorted1=reduced.sort_values(['gender', 'yearsExperience'], ascending=[True, False]) | ||
| done=sorted1.drop_duplicates() | ||
| print(done) | ||
|
|
||
| #Part 2 | ||
| wages = pandas.read_csv("wages.csv") | ||
| gender_Experience=wages[["gender","yearsExperience"]] | ||
| ordered_gender_Experience=gender_Experience.sort(['gender','yearsExperience'], ascending=[True,True]) | ||
| unique_combos=ordered_gender_Experience.drop_duplicates() | ||
| print("Unique combinations of gender/years experience:", unique_combos) | ||
|
|
||
| #Part 3 | ||
| wages_highschool = wages[wages.yearsSchool == 12] | ||
| wages_college = wages[wages.yearsSchool == 16] | ||
| ordered_wages_HS = wages_highschool.sort(columns='wage', ascending=True) | ||
| ordered_wages_C = wages_college.sort(columns='wage', ascending=True) | ||
| minWage_HS = ordered_wages_HS.iloc[0,3] | ||
| minWage_C = ordered_wages_C.iloc[0,3] | ||
| print("Effect of graduating college on minimum wage:", minWage_C - minWage_HS) | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| print("Highest earner") | ||
| import pandas | ||
| ex5data=pandas.read_csv("wages.csv") | ||
| sorted1=ex5data.drop('yearsSchool', axis=1, inplace=True) | ||
| sorted2=ex5data.sort_values("wage") | ||
| duplicates=sorted2.drop_duplicates("wage") | ||
| duplicates.tail(1) | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| #import pandas. read the csv. read the two columns. sort by both columns in order. drop duplicates. print. | ||
| import pandas | ||
| wages=pandas.read_csv("wages.csv",header=0,sep=",") | ||
| reduced=wages[['gender', 'yearsExperience']] | ||
| sorted1=reduced.sort_values(['gender', 'yearsExperience'], ascending=[True, False]) | ||
| done=sorted1.drop_duplicates() | ||
| print(done) | ||
|
|
||
|
|
||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| # Question 2 part 1 | ||
| import pandas | ||
| ex5data=pandas.read_csv("wages.csv") | ||
| sorted1=ex5data.drop('yearsSchool', axis=1, inplace=True) | ||
| sorted2=ex5data.sort_values("wage") | ||
| duplicates=sorted2.drop_duplicates("wage") | ||
| part1=duplicates.tail(1) | ||
| print("Highest earner", part1) | ||
|
|
||
| # Question 2 part 2 | ||
| import pandas | ||
| ex5data=pandas.read_csv("wages.csv") | ||
| sorted1=ex5data.drop('yearsSchool', axis=1, inplace=True) | ||
| sorted2=ex5data.sort_values("wage") | ||
| duplicates=sorted2.drop_duplicates("wage") | ||
| part2=duplicates.head(1) | ||
| print("Lowest earner", part2) | ||
|
|
||
| # Question 2 part 3 | ||
| import pandas | ||
| ex5data=pandas.read_csv("wages.csv") | ||
| sorted1=ex5data.sort_values("wage") | ||
| duplicates=sorted2.drop_duplicates("wage") | ||
| top10=duplicates.tail(10) | ||
| top10.to_csv("top10.csv") | ||
| total = 0 | ||
| with open('top10.csv') as f: | ||
| for line in f: | ||
| finded = line.find('female') | ||
| if finded != -1 and finded != 0: | ||
| total += 1 | ||
| print("Number of women in the top 10 earners", total) | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"gender_Experience.sort" should be "gender_Experience.sort_values"