diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5c5ca7a --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.DS_Store +*.pptx + diff --git a/Exercise_5_Challenge_Complete.py b/Exercise_5_Challenge_Complete.py new file mode 100644 index 0000000..5b1a02c --- /dev/null +++ b/Exercise_5_Challenge_Complete.py @@ -0,0 +1,52 @@ +#Completing Part 1 +import pandas #loading the pandas package to use data frames + +data = pandas.read_csv("wages.csv", header=0,sep=",") #loads the file + +gender_yrsexp = data.iloc[:,0:2] #subsets data by selecting the first two columns + +uniquegender = gender_yrsexp.drop_duplicates() #drops duplicates, like the unique function in unix +uniquegender.shape #displays shape of array + +sortgender = uniquegender.sort_values(["gender","yearsExperience"]) + +sortgender.to_csv("part1done.txt", sep=" ") + +#Completing Part 2 +import pandas +df=pandas.read_csv('wages.csv') +#df=wages.csv data + +#Highest earner +highest_earner=df.nlargest(1,'wage') +#output is under highest_earner (male,5,11,39.808917197) + +#Lowest earner +lowest_earner=df.nsmallest(1,'wage') +#Output is under lowest_earner (female,9,11,0.07655561) + +#Number of females in the top 10 +num_of_females=df[df['wage']>=df['wage'].nlargest(10).iloc[-1]]['gender'].eq('female').sum() +#Output is under num_of_females (=2) + +#Completing Part 3 +import pandas + +#Define wages +wages=pandas.read_csv('wages.csv') + +#Select for the people who didn't finish school(i.e. 12yrs of school) +education12=wages[wages.yearsSchool==12] + +#Calculate the minimum wage for people with 12yrs of school +minimum12=min(education12.wage) + +#Select for the people who did finish school(i.e. 16yrs of school) +education16=wages[wages.yearsSchool==16] + +#Calculate the minimum wage for people with 16yrs of school +minimum16=min(education16.wage) + +#Calculate the difference between the minimum wage for those who finished school vs. those who didn't +print(minimum16-minimum12) +#Difference is 4.0816223772 \ No newline at end of file diff --git a/Exercise_5_Part_2.py b/Exercise_5_Part_2.py new file mode 100644 index 0000000..dab1676 --- /dev/null +++ b/Exercise_5_Part_2.py @@ -0,0 +1,15 @@ +import pandas +df=pandas.read_csv('wages.csv') +#df=wages.csv data + +#Highest earner +highest_earner=df.nlargest(1,'wage') +#output is under highest_earner (male,5,11,39.808917197) + +#Lowest earner +lowest_earner=df.nsmallest(1,'wage') +#Output is under lowest_earner (female,9,11,0.07655561) + +#Number of females in the top 10 +num_of_females=df[df['wage']>=df['wage'].nlargest(10).iloc[-1]]['gender'].eq('female').sum() +#Output is under num_of_females (=2) \ No newline at end of file diff --git a/Exercise_5_Part_3.py b/Exercise_5_Part_3.py new file mode 100644 index 0000000..05dbec0 --- /dev/null +++ b/Exercise_5_Part_3.py @@ -0,0 +1,19 @@ +import pandas +#Define wages +wages=pandas.read_csv('wages.csv') + +#Select for the people who didn't finish school(i.e. 12yrs of school) +education12=wages[wages.yearsSchool==12] + +#Calculate the minimum wage for people with 12yrs of school +minimum12=min(education12.wage) + +#Select for the people who did finish school(i.e. 16yrs of school) +education16=wages[wages.yearsSchool==16] + +#Calculate the minimum wage for people with 16yrs of school +minimum16=min(education16.wage) + +#Calculate the difference between the minimum wage for those who finished school vs. those who didn't +print(minimum16-minimum12) +#Difference is 4.0816223772 diff --git a/excercise5updated.py b/excercise5updated.py new file mode 100644 index 0000000..6de004a --- /dev/null +++ b/excercise5updated.py @@ -0,0 +1,14 @@ +#completing exercise5 +import pandas #loading the pandas package to use data frames + +data = pandas.read_csv("wages.csv", header=0,sep=",") #loads the file + +#completing part1 +gender_yrsexp = data.iloc[:,0:2] #subsets data by selecting the first two columns + +uniquegender = gender_yrsexp.drop_duplicates() #drops duplicates, like the unique function in unix +uniquegender.shape #displays shape of array + +sortgender = uniquegender.sort_values(["gender","yearsExperience"]) + +sortgender.to_csv("part1done.txt", sep=" ") \ No newline at end of file diff --git a/part1completed.txt b/part1completed.txt new file mode 100644 index 0000000..a4ec58a --- /dev/null +++ b/part1completed.txt @@ -0,0 +1,34 @@ + gender yearsExperience +168 female 1 +215 female 2 +15 female 3 +37 female 4 +23 female 5 +27 female 6 +9 female 7 +4 female 8 +0 female 9 +7 female 10 +2 female 11 +1 female 12 +17 female 13 +350 female 14 +46 female 15 +623 female 16 +1784 male 2 +1658 male 3 +1650 male 4 +1599 male 5 +1594 male 6 +1570 male 7 +1581 male 8 +1579 male 9 +1569 male 10 +1573 male 11 +1571 male 12 +1617 male 13 +1589 male 14 +1605 male 15 +1608 male 16 +1959 male 17 +1942 male 18 diff --git a/part1done.txt b/part1done.txt new file mode 100644 index 0000000..a4ec58a --- /dev/null +++ b/part1done.txt @@ -0,0 +1,34 @@ + gender yearsExperience +168 female 1 +215 female 2 +15 female 3 +37 female 4 +23 female 5 +27 female 6 +9 female 7 +4 female 8 +0 female 9 +7 female 10 +2 female 11 +1 female 12 +17 female 13 +350 female 14 +46 female 15 +623 female 16 +1784 male 2 +1658 male 3 +1650 male 4 +1599 male 5 +1594 male 6 +1570 male 7 +1581 male 8 +1579 male 9 +1569 male 10 +1573 male 11 +1571 male 12 +1617 male 13 +1589 male 14 +1605 male 15 +1608 male 16 +1959 male 17 +1942 male 18