require(plyr)
## Loading required package: plyr
df1 <- read.table("https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/carData/Salaries.csv", header=TRUE, sep=",", stringsAsFactors=FALSE)
summary(df1)
## X rank discipline yrs.since.phd
## Min. : 1 Length:397 Length:397 Min. : 1.00
## 1st Qu.:100 Class :character Class :character 1st Qu.:12.00
## Median :199 Mode :character Mode :character Median :21.00
## Mean :199 Mean :22.31
## 3rd Qu.:298 3rd Qu.:32.00
## Max. :397 Max. :56.00
## yrs.service sex salary
## Min. : 0.00 Length:397 Min. : 57800
## 1st Qu.: 7.00 Class :character 1st Qu.: 91000
## Median :16.00 Mode :character Median :107300
## Mean :17.61 Mean :113706
## 3rd Qu.:27.00 3rd Qu.:134185
## Max. :60.00 Max. :231545
aggregate(salary ~ rank + sex, df1, each(mean,median))
## rank sex salary.mean salary.median
## 1 AssocProf Female 88512.80 90556.50
## 2 AsstProf Female 78049.91 77000.00
## 3 Prof Female 121967.61 120257.50
## 4 AssocProf Male 94869.70 95626.50
## 5 AsstProf Male 81311.46 80182.00
## 6 Prof Male 127120.82 123996.00
aggregate(yrs.service ~ rank + sex, df1, each(mean,median))
## rank sex yrs.service.mean yrs.service.median
## 1 AssocProf Female 11.500000 9.500000
## 2 AsstProf Female 2.545455 3.000000
## 3 Prof Female 17.111111 17.000000
## 4 AssocProf Male 12.037037 8.000000
## 5 AsstProf Male 2.339286 3.000000
## 6 Prof Male 23.229839 22.000000
aggregate(salary ~ rank, df1, each(mean,median))
## rank salary.mean salary.median
## 1 AssocProf 93876.44 95626.50
## 2 AsstProf 80775.99 79800.00
## 3 Prof 126772.11 123321.50
aggregate(salary ~ sex, df1, each(sd))
## sex salary
## 1 Female 25952.13
## 2 Male 30436.93
aggregate(salary ~ sex, df1, each(max,min))
## sex salary.max salary.min
## 1 Female 161101 62884
## 2 Male 231545 57800
aggregate(salary ~ rank, df1, each(max,min))
## rank salary.max salary.min
## 1 AssocProf 126431 62884
## 2 AsstProf 97032 63100
## 3 Prof 231545 57800
quantile(df1$salary)
## 0% 25% 50% 75% 100%
## 57800 91000 107300 134185 231545
Analyzed the Salaries for Professors dataset which consists of 397 observations and 6 variables
Female assistant professors on average make 4% less than male associate professors;female associate professors on average make 7% less than male associate professors; female full professors on average make 4% less than male full professors
Key Takeway: On average, female professors make between %4 and 7% less that their male counterparts with the largest salary disparity between female & male associate professors
On average, Assistant Professors earn 14% less than Associate Professors and Associate Professors earn 26% less than full professors and Assistant Professors earn 36% less than full professors
Key Takeway: Largest disparity in salary is seen at the top full professorship down to the assistant professor
There is a 30% disparty in the highest salary of female professor as compared to the highest salary of the male professory; 161,101 compared to 231,545 Key Takeway: This indicates a large gap between the top male earner and the top female earner
There is a 8% disparity in the lowest salary of a male professor as compared to the lowest female salary; 62,884 compared to 57,800
Key Takeway: Largest gap in salary exists at the top level male earners compared to female earners
The top 25% of professors fall below the 57,800 range while the top 75% fall below 134,185
Key Takeway: There is a 57% difference in the top earner of the 25% quartile as compared to the top earner at the 75% quartile
The standard deviation is 25k for female professors and 30k for male professors
Key Takeway: This is a somewhat high standard deviation indicating a larger spread of values from the mean
# Round the salary to 6 digits
round(df1$salary,6)
## [1] 139750 173200 79750 115000 141500 97000 175000 147765 119250 129000
## [11] 119800 79800 77700 78000 104800 117150 101000 103450 124750 137000
## [21] 89565 102580 93904 113068 74830 106294 134885 82379 77000 118223
## [31] 132261 79916 117256 80225 80225 77000 155750 86373 125196 100938
## [41] 146500 93418 101299 231545 94384 114778 98193 151768 140096 70768
## [51] 126621 108875 74692 106639 103760 83900 117704 90215 100135 75044
## [61] 90304 75243 109785 103613 68404 100522 101000 99418 111512 91412
## [71] 126320 146856 100131 92391 113398 73266 150480 193000 86100 84240
## [81] 150743 135585 144640 88825 122960 132825 152708 88400 172272 107008
## [91] 97032 105128 105631 166024 123683 84000 95611 129676 102235 106689
## [101] 133217 126933 153303 127512 83850 113543 82099 82600 81500 131205
## [111] 112429 82100 72500 104279 105000 120806 148500 117515 72500 73500
## [121] 115313 124309 97262 62884 96614 78162 155500 72500 113278 73000
## [131] 83001 76840 77500 72500 168635 136000 108262 105668 73877 152664
## [141] 100102 81500 106608 89942 112696 119015 92000 156938 144651 95079
## [151] 128148 92000 111168 103994 92000 118971 113341 88000 95408 137167
## [161] 89516 176500 98510 89942 88795 105890 167284 130664 101210 181257
## [171] 91227 151575 93164 134185 105000 111751 95436 100944 147349 92000
## [181] 142467 141136 100000 150000 101000 134000 103750 107500 106300 153750
## [191] 180000 133700 122100 86250 90000 113600 92700 92000 189409 114500
## [201] 92700 119700 160400 152500 165000 96545 162200 120000 91300 163200
## [211] 91000 111350 128400 126200 118700 145350 146000 105350 109650 119500
## [221] 170000 145200 107150 129600 87800 122400 63900 70000 88175 133900
## [231] 91000 73300 148750 117555 69700 81700 114000 63100 77202 96200
## [241] 69200 122875 102600 108200 84273 90450 91100 101100 128800 204000
## [251] 109000 102000 132000 77500 116450 83000 140300 74000 73800 92550
## [261] 88600 107550 121200 126000 99000 134800 143940 104350 89650 103700
## [271] 143250 194800 73000 74000 78500 93000 107200 163200 107100 100600
## [281] 136500 103600 57800 155865 88650 81800 115800 85000 150500 74000
## [291] 174500 168500 183800 104800 107300 97150 126300 148800 72300 70700
## [301] 88600 127100 170500 105260 144050 111350 74500 122500 74000 166800
## [311] 92050 108100 94350 100351 146800 84716 71065 67559 134550 135027
## [321] 104428 95642 126431 161101 162221 84500 124714 151650 99247 134778
## [331] 192253 116518 105450 145098 104542 151445 98053 145000 128464 137317
## [341] 106231 124312 114596 162150 150376 107986 142023 128250 80139 144309
## [351] 186960 93519 142500 138000 83600 145028 88709 107309 109954 78785
## [361] 121946 109646 138771 81285 205500 101036 115435 108413 131950 134690
## [371] 78182 110515 109707 136660 103275 103649 74856 77081 150680 104121
## [381] 75996 172505 86895 105000 125192 114330 139219 109305 119450 186023
## [391] 166605 151292 103106 150564 101738 95329 81035
head(df1)
## X rank discipline yrs.since.phd yrs.service sex salary
## 1 1 Prof B 19 18 Male 139750
## 2 2 Prof B 20 16 Male 173200
## 3 3 AsstProf B 4 3 Male 79750
## 4 4 Prof B 45 39 Male 115000
## 5 5 Prof B 40 41 Male 141500
## 6 6 AssocProf B 6 6 Male 97000
# Create a new column with the difference of the years since PHD minus years of service
df1$DiffPHDAndYrsOfSvc <- df1$yrs.since.phd - df1$yrs.service
head(df1)
## X rank discipline yrs.since.phd yrs.service sex salary
## 1 1 Prof B 19 18 Male 139750
## 2 2 Prof B 20 16 Male 173200
## 3 3 AsstProf B 4 3 Male 79750
## 4 4 Prof B 45 39 Male 115000
## 5 5 Prof B 40 41 Male 141500
## 6 6 AssocProf B 6 6 Male 97000
## DiffPHDAndYrsOfSvc
## 1 1
## 2 4
## 3 1
## 4 6
## 5 -1
## 6 0
# Create a subset based on the "prof" ranking
df2 <- subset(df1, df1$rank == "Prof")
head(df2)
## X rank discipline yrs.since.phd yrs.service sex salary
## 1 1 Prof B 19 18 Male 139750
## 2 2 Prof B 20 16 Male 173200
## 4 4 Prof B 45 39 Male 115000
## 5 5 Prof B 40 41 Male 141500
## 7 7 Prof B 30 23 Male 175000
## 8 8 Prof B 45 45 Male 147765
## DiffPHDAndYrsOfSvc
## 1 1
## 2 4
## 4 6
## 5 -1
## 7 7
## 8 0
#Rename sex column to gender
colnames(df2)[6] <- "gender"
head(df2)
## X rank discipline yrs.since.phd yrs.service gender salary
## 1 1 Prof B 19 18 Male 139750
## 2 2 Prof B 20 16 Male 173200
## 4 4 Prof B 45 39 Male 115000
## 5 5 Prof B 40 41 Male 141500
## 7 7 Prof B 30 23 Male 175000
## 8 8 Prof B 45 45 Male 147765
## DiffPHDAndYrsOfSvc
## 1 1
## 2 4
## 4 6
## 5 -1
## 7 7
## 8 0
require(ggplot2)
## Loading required package: ggplot2
require(ggthemes)
## Loading required package: ggthemes
ggplot(df1, aes(x = sex, y = salary, colour=sex)) + geom_point() + theme_economist() + scale_colour_excel() + ggtitle("Scatterplot - Sex and Salary") + theme(plot.title = element_text(hjust = 0.5), axis.title.x = element_text(color="blue", size=14, face="bold"), axis.title.y = element_text(color="blue", size=14, face="bold")) + xlab("Sex") + ylab("Salary")
ggplot(df1, aes(x = rank, y = salary, colour=rank)) + geom_boxplot() + theme_economist() + ggtitle("Boxplot Rank and Salary") + theme(plot.title = element_text(hjust = 0.5),axis.title.x = element_text(color="blue", size=14, face="bold"),axis.title.y = element_text(color="blue", size=14, face="bold")) + xlab("Rank") + ylab("Salary")
ggplot(df1, aes(x = rank, y = salary, colour=rank)) + geom_violin() + theme_economist() + ggtitle("Violinplot Rank and Salary") + theme(plot.title = element_text(hjust = 0.5),axis.title.x = element_text(color="blue", size=14, face="bold"),axis.title.y = element_text(color="blue", size=14, face="bold")) + xlab("Rank") + ylab("Salary")
ggplot(df1, aes(x = yrs.service, y = salary)) + geom_line() + theme_economist() + ggtitle("Lineplot Service Duration and Salary") + theme(plot.title = element_text(hjust = 0.5),axis.title.x = element_text(color="blue", size=14, face="bold"),axis.title.y = element_text(color="blue", size=14, face="bold")) + xlab("Service in Years") + ylab("Salary")
ggplot(df1, aes(x = yrs.service)) + geom_histogram() + theme_economist() + ggtitle("Histogram Service Duration") + theme(plot.title = element_text(hjust = 0.5),axis.title.x = element_text(color="blue", size=14, face="bold"),axis.title.y = element_text(color="blue", size=14, face="bold")) + xlab("Service in Years") + ylab("Count")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
dfBonus <- read.table("https://raw.githubusercontent.com/vcolardi2218/Assignment3/master/Salaries4.csv", header=TRUE, sep=",", stringsAsFactors=FALSE)
head(dfBonus)
## X....rank...discipline...yrs.since.phd...yrs.service...sex...salary.
## 1 "1","Prof","B",19,18,"Male",139750
## 2 "2","Prof","B",20,16,"Male",173200
## 3 "3","AsstProf","B",4,3,"Male",79750
## 4 "4","Prof","B",45,39,"Male",115000
## 5 "5","Prof","B",40,41,"Male",141500
## 6 "6","AssocProf","B",6,6,"Male",97000