This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
data=read.csv("salary_data.csv",header = T)
data$Education.Level[which(data$Education.Level=="Bachelor's Degree")]="Bachelor's"
data$Education.Level=factor(data$Education.Level)
data$Gender=factor(data$Gender)
data$Job.Title=factor(data$Job.Title)
summary(data)
## Age Gender Education.Level
## Min. :21.00 Female:3013 Bachelor's :3021
## 1st Qu.:28.00 Male :3671 High School: 436
## Median :32.00 Master's :1858
## Mean :33.61 PhD :1369
## 3rd Qu.:38.00
## Max. :62.00
##
## Job.Title Years.of.Experience Salary
## Software Engineer : 518 Min. : 0.000 Min. : 350
## Data Scientist : 453 1st Qu.: 3.000 1st Qu.: 70000
## Software Engineer Manager: 376 Median : 7.000 Median :115000
## Data Analyst : 363 Mean : 8.078 Mean :115307
## Senior Project Engineer : 316 3rd Qu.:12.000 3rd Qu.:160000
## Product Manager : 313 Max. :34.000 Max. :250000
## (Other) :4345
aov=aov(data$Salary~data$Gender+data$Education.Level+data$Gender:data$Education.Level)
summary(aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## data$Gender 1 3.019e+11 3.019e+11 193.135 < 2e-16 ***
## data$Education.Level 3 7.877e+12 2.626e+12 1679.859 < 2e-16 ***
## data$Gender:data$Education.Level 3 2.151e+10 7.172e+09 4.588 0.00326 **
## Residuals 6676 1.044e+13 1.563e+09
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit1=lm(Salary~Age+Gender+Education.Level,data=data)
summary(fit1)
##
## Call:
## lm(formula = Salary ~ Age + Gender + Education.Level, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -99570 -22625 -5184 15979 97852
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -20883.64 2083.93 -10.021 < 2e-16 ***
## Age 3720.19 66.11 56.273 < 2e-16 ***
## GenderMale 5622.70 821.72 6.843 8.47e-12 ***
## Education.LevelHigh School -45645.93 1691.73 -26.982 < 2e-16 ***
## Education.LevelMaster's 17734.01 1030.71 17.206 < 2e-16 ***
## Education.LevelPhD 29845.72 1281.31 23.293 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32590 on 6678 degrees of freedom
## Multiple R-squared: 0.6194, Adjusted R-squared: 0.6191
## F-statistic: 2173 on 5 and 6678 DF, p-value: < 2.2e-16
library(asbio)
## 載入需要的套件:tcltk
bonfCI(data$Salary,data$Education.Level)
##
## 95% Bonferroni confidence intervals
##
## Diff Lower Upper Decision
## muBachelor's-muHigh School 60667.29625 55263.29395 66071.29856 Reject H0
## muBachelor's-muMaster's -34995.47618 -38105.41349 -31885.53887 Reject H0
## muHigh School-muMaster's -95662.77244 -101276.03104 -90049.51384 Reject H0
## muBachelor's-muPhD -70568.54936 -74005.23348 -67131.86524 Reject H0
## muHigh School-muPhD -131235.84561 -137036.51452 -125435.1767 Reject H0
## muMaster's-muPhD -35573.07318 -39330.23021 -31815.91615 Reject H0
## Adj. p-value
## muBachelor's-muHigh School 0
## muBachelor's-muMaster's 0
## muHigh School-muMaster's 0
## muBachelor's-muPhD 0
## muHigh School-muPhD 0
## muMaster's-muPhD 0
bonfCI(data$Salary,data$Gender)
##
## 95% Bonferroni confidence intervals
##
## Diff Lower Upper Decision Adj. p-value
## muFemale-muMale -13506.69896 -16030.93082 -10982.4671 Reject H0 0