Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.DS_Store
*.pptx

52 changes: 52 additions & 0 deletions Exercise_5_Challenge_Complete.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#Completing Part 1
import pandas #loading the pandas package to use data frames

data = pandas.read_csv("wages.csv", header=0,sep=",") #loads the file

gender_yrsexp = data.iloc[:,0:2] #subsets data by selecting the first two columns

uniquegender = gender_yrsexp.drop_duplicates() #drops duplicates, like the unique function in unix
uniquegender.shape #displays shape of array

sortgender = uniquegender.sort_values(["gender","yearsExperience"])

sortgender.to_csv("part1done.txt", sep=" ")

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good job

#Completing Part 2
import pandas
df=pandas.read_csv('wages.csv')
#df=wages.csv data

#Highest earner
highest_earner=df.nlargest(1,'wage')
#output is under highest_earner (male,5,11,39.808917197)

#Lowest earner
lowest_earner=df.nsmallest(1,'wage')
#Output is under lowest_earner (female,9,11,0.07655561)

#Number of females in the top 10
num_of_females=df[df['wage']>=df['wage'].nlargest(10).iloc[-1]]['gender'].eq('female').sum()
#Output is under num_of_females (=2)

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good job

#Completing Part 3
import pandas

#Define wages
wages=pandas.read_csv('wages.csv')

#Select for the people who didn't finish school(i.e. 12yrs of school)
education12=wages[wages.yearsSchool==12]

#Calculate the minimum wage for people with 12yrs of school
minimum12=min(education12.wage)

#Select for the people who did finish school(i.e. 16yrs of school)
education16=wages[wages.yearsSchool==16]

#Calculate the minimum wage for people with 16yrs of school
minimum16=min(education16.wage)

#Calculate the difference between the minimum wage for those who finished school vs. those who didn't
print(minimum16-minimum12)
#Difference is 4.0816223772

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good job

15 changes: 15 additions & 0 deletions Exercise_5_Part_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import pandas
df=pandas.read_csv('wages.csv')
#df=wages.csv data

#Highest earner
highest_earner=df.nlargest(1,'wage')
#output is under highest_earner (male,5,11,39.808917197)

#Lowest earner
lowest_earner=df.nsmallest(1,'wage')
#Output is under lowest_earner (female,9,11,0.07655561)

#Number of females in the top 10
num_of_females=df[df['wage']>=df['wage'].nlargest(10).iloc[-1]]['gender'].eq('female').sum()
#Output is under num_of_females (=2)
19 changes: 19 additions & 0 deletions Exercise_5_Part_3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pandas
#Define wages
wages=pandas.read_csv('wages.csv')

#Select for the people who didn't finish school(i.e. 12yrs of school)
education12=wages[wages.yearsSchool==12]

#Calculate the minimum wage for people with 12yrs of school
minimum12=min(education12.wage)

#Select for the people who did finish school(i.e. 16yrs of school)
education16=wages[wages.yearsSchool==16]

#Calculate the minimum wage for people with 16yrs of school
minimum16=min(education16.wage)

#Calculate the difference between the minimum wage for those who finished school vs. those who didn't
print(minimum16-minimum12)
#Difference is 4.0816223772
14 changes: 14 additions & 0 deletions excercise5updated.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#completing exercise5
import pandas #loading the pandas package to use data frames

data = pandas.read_csv("wages.csv", header=0,sep=",") #loads the file

#completing part1
gender_yrsexp = data.iloc[:,0:2] #subsets data by selecting the first two columns

uniquegender = gender_yrsexp.drop_duplicates() #drops duplicates, like the unique function in unix
uniquegender.shape #displays shape of array

sortgender = uniquegender.sort_values(["gender","yearsExperience"])

sortgender.to_csv("part1done.txt", sep=" ")
34 changes: 34 additions & 0 deletions part1completed.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
gender yearsExperience
168 female 1
215 female 2
15 female 3
37 female 4
23 female 5
27 female 6
9 female 7
4 female 8
0 female 9
7 female 10
2 female 11
1 female 12
17 female 13
350 female 14
46 female 15
623 female 16
1784 male 2
1658 male 3
1650 male 4
1599 male 5
1594 male 6
1570 male 7
1581 male 8
1579 male 9
1569 male 10
1573 male 11
1571 male 12
1617 male 13
1589 male 14
1605 male 15
1608 male 16
1959 male 17
1942 male 18
34 changes: 34 additions & 0 deletions part1done.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
gender yearsExperience
168 female 1
215 female 2
15 female 3
37 female 4
23 female 5
27 female 6
9 female 7
4 female 8
0 female 9
7 female 10
2 female 11
1 female 12
17 female 13
350 female 14
46 female 15
623 female 16
1784 male 2
1658 male 3
1650 male 4
1599 male 5
1594 male 6
1570 male 7
1581 male 8
1579 male 9
1569 male 10
1573 male 11
1571 male 12
1617 male 13
1589 male 14
1605 male 15
1608 male 16
1959 male 17
1942 male 18