Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions AEY_Part1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import pandas

wages = pandas.read_csv("wages.csv")

gender_Experience=wages[["gender","yearsExperience"]]

ordered_gender_Experience=gender_Experience.sort(['gender','yearsExperience'], ascending=[True,True])

unique_combos=ordered_gender_Experience.drop_duplicates()

print("Unique combinations of gender/years experience:", unique_combos)

wages_highschool = wages[wages.yearsSchool == 12]

wages_college = wages[wages.yearsSchool == 16]

ordered_wages_HS = wages_highschool.sort(columns='wage', ascending=True)

ordered_wages_C = wages_college.sort(columns='wage', ascending=True)

minWage_HS = ordered_wages_HS.iloc[0,3]

minWage_C = ordered_wages_C.iloc[0,3]

print("Effect of graduating college on minimum wage:", minWage_C - minWage_HS)

59 changes: 59 additions & 0 deletions Exercise5_AEY_JC.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import pandas
print("Question1")
#Question 1 Part 1
wages=pandas.read_csv("wages.csv",header=0,sep=",")
reduced=wages[['gender', 'yearsExperience']]
sorted1=reduced.sort_values(['gender', 'yearsExperience'], ascending=[True, False])
done=sorted1.drop_duplicates()
print(done)

#Question 1 Part 2
wages = pandas.read_csv("wages.csv")
gender_Experience=wages[["gender","yearsExperience"]]
ordered_gender_Experience=gender_Experience.sort(['gender','yearsExperience'], ascending=[True,True])

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"gender_Experience.sort" should be "gender_Experience.sort_values"

unique_combos=ordered_gender_Experience.drop_duplicates()
print("Unique combinations of gender/years experience:", unique_combos)

print("Question2")
# Question 2 part 1
import pandas
ex5data=pandas.read_csv("wages.csv")
sorted1=ex5data.drop('yearsSchool', axis=1, inplace=True)

@lyy005 lyy005 Oct 5, 2017

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the variable "sorted1" for?

sorted2=ex5data.sort_values("wage")
duplicates=sorted2.drop_duplicates("wage")
part1=duplicates.tail(1)
print("Highest earner", part1)

# Question 2 part 2
import pandas
ex5data=pandas.read_csv("wages.csv")
sorted1=ex5data.drop('yearsSchool', axis=1, inplace=True)
sorted2=ex5data.sort_values("wage")
duplicates=sorted2.drop_duplicates("wage")
part2=duplicates.head(1)
print("Lowest earner", part2)

# Question 2 part 3
import pandas
ex5data=pandas.read_csv("wages.csv")
sorted1=ex5data.sort_values("wage")
duplicates=sorted2.drop_duplicates("wage")

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't need to drop_duplicates for this question

top10=duplicates.tail(10)
top10.to_csv("top10.csv")
total = 0
with open('top10.csv') as f:
for line in f:
finded = line.find('female')
if finded != -1 and finded != 0:
total += 1
print("Number of women in the top 10 earners", total)

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or you can just use:
femalesInTopTen=sum(top10.gender=="female")


print("Question3")
#Question 3
wages_highschool = wages[wages.yearsSchool == 12]
wages_college = wages[wages.yearsSchool == 16]
ordered_wages_HS = wages_highschool.sort(columns='wage', ascending=True)
ordered_wages_C = wages_college.sort(columns='wage', ascending=True)

@lyy005 lyy005 Oct 5, 2017

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wages_college.sort -> wages_college.sort_values

-0.25

minWage_HS = ordered_wages_HS.iloc[0,3]
minWage_C= ordered_wages_C.iloc[0,3]
print("Effect of graduating college on minimum wage:", minWage_C - minWage_HS)
25 changes: 25 additions & 0 deletions earnings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import pandas

#Part 1
wages=pandas.read_csv("wages.csv",header=0,sep=",")
reduced=wages[['gender', 'yearsExperience']]
sorted1=reduced.sort_values(['gender', 'yearsExperience'], ascending=[True, False])
done=sorted1.drop_duplicates()
print(done)

#Part 2
wages = pandas.read_csv("wages.csv")
gender_Experience=wages[["gender","yearsExperience"]]
ordered_gender_Experience=gender_Experience.sort(['gender','yearsExperience'], ascending=[True,True])
unique_combos=ordered_gender_Experience.drop_duplicates()
print("Unique combinations of gender/years experience:", unique_combos)

#Part 3
wages_highschool = wages[wages.yearsSchool == 12]
wages_college = wages[wages.yearsSchool == 16]
ordered_wages_HS = wages_highschool.sort(columns='wage', ascending=True)
ordered_wages_C = wages_college.sort(columns='wage', ascending=True)
minWage_HS = ordered_wages_HS.iloc[0,3]
minWage_C = ordered_wages_C.iloc[0,3]
print("Effect of graduating college on minimum wage:", minWage_C - minWage_HS)

8 changes: 8 additions & 0 deletions q2part1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
print("Highest earner")
import pandas
ex5data=pandas.read_csv("wages.csv")
sorted1=ex5data.drop('yearsSchool', axis=1, inplace=True)
sorted2=ex5data.sort_values("wage")
duplicates=sorted2.drop_duplicates("wage")
duplicates.tail(1)

10 changes: 10 additions & 0 deletions question1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#import pandas. read the csv. read the two columns. sort by both columns in order. drop duplicates. print.
import pandas
wages=pandas.read_csv("wages.csv",header=0,sep=",")
reduced=wages[['gender', 'yearsExperience']]
sorted1=reduced.sort_values(['gender', 'yearsExperience'], ascending=[True, False])
done=sorted1.drop_duplicates()
print(done)



33 changes: 33 additions & 0 deletions question2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Question 2 part 1
import pandas
ex5data=pandas.read_csv("wages.csv")
sorted1=ex5data.drop('yearsSchool', axis=1, inplace=True)
sorted2=ex5data.sort_values("wage")
duplicates=sorted2.drop_duplicates("wage")
part1=duplicates.tail(1)
print("Highest earner", part1)

# Question 2 part 2
import pandas
ex5data=pandas.read_csv("wages.csv")
sorted1=ex5data.drop('yearsSchool', axis=1, inplace=True)
sorted2=ex5data.sort_values("wage")
duplicates=sorted2.drop_duplicates("wage")
part2=duplicates.head(1)
print("Lowest earner", part2)

# Question 2 part 3
import pandas
ex5data=pandas.read_csv("wages.csv")
sorted1=ex5data.sort_values("wage")
duplicates=sorted2.drop_duplicates("wage")
top10=duplicates.tail(10)
top10.to_csv("top10.csv")
total = 0
with open('top10.csv') as f:
for line in f:
finded = line.find('female')
if finded != -1 and finded != 0:
total += 1
print("Number of women in the top 10 earners", total)