This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
data=read.csv("swe_data.csv",header = T)
data$Education.Level[which(data$Education.Level=="Bachelor's Degree")]="Bachelor's"
data$Education.Level=factor(data$Education.Level)
data$Gender=factor(data$Gender)
data$Job.Title=factor(data$Job.Title)
summary(data)
## Age Gender Education.Level Job.Title
## Min. :22.0 Female:422 Bachelor's :732 Junior Software Developer: 58
## 1st Qu.:27.0 Male :574 High School: 2 Junior Software Engineer : 51
## Median :28.0 Master's :230 Senior Software Architect: 1
## Mean :30.7 PhD : 32 Senior Software Developer: 3
## 3rd Qu.:32.0 Senior Software Engineer :240
## Max. :58.0 Software Developer :125
## Software Engineer :518
## Years.of.Experience Salary
## Min. : 1.000 Min. : 35000
## 1st Qu.: 3.000 1st Qu.: 60000
## Median : 4.000 Median : 90000
## Mean : 6.228 Mean :108589
## 3rd Qu.: 8.000 3rd Qu.:160000
## Max. :32.000 Max. :197000
##
aov=aov(data$Salary~data$Gender+data$Education.Level+data$Gender:data$Education.Level)
summary(aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## data$Gender 1 1.231e+11 1.231e+11 61.130 1.36e-14 ***
## data$Education.Level 3 4.330e+11 1.443e+11 71.665 < 2e-16 ***
## data$Gender:data$Education.Level 2 2.216e+10 1.108e+10 5.502 0.0042 **
## Residuals 989 1.992e+12 2.014e+09
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit1=lm(Salary~Age+Gender+Education.Level,data=data)
summary(fit1)
##
## Call:
## lm(formula = Salary ~ Age + Gender + Education.Level, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -79142 -29556 -10067 18946 80346
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -70003.1 7273.2 -9.625 < 2e-16 ***
## Age 5655.2 260.1 21.742 < 2e-16 ***
## GenderMale 12522.7 2455.2 5.100 4.06e-07 ***
## Education.LevelHigh School -19775.9 26542.6 -0.745 0.4564
## Education.LevelMaster's -8316.0 3673.0 -2.264 0.0238 *
## Education.LevelPhD -9229.2 7607.3 -1.213 0.2253
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37110 on 990 degrees of freedom
## Multiple R-squared: 0.4696, Adjusted R-squared: 0.467
## F-statistic: 175.3 on 5 and 990 DF, p-value: < 2.2e-16
library(asbio)
## 載入需要的套件:tcltk
bonfCI(data$Salary,data$Education.Level)
##
## 95% Bonferroni confidence intervals
##
## Diff Lower Upper Decision
## muBachelor's-muHigh School -69112.19672 -156445.80542 18221.41198 FTR H0
## muBachelor's-muMaster's -40521.47498 -49844.82506 -31198.1249 Reject H0
## muHigh School-muMaster's 28590.72174 -59002.19558 116183.63906 FTR H0
## muBachelor's-muPhD -71150.63422 -93425.75482 -48875.51363 Reject H0
## muHigh School-muPhD -2038.4375 -91937.13218 87860.25718 FTR H0
## muMaster's-muPhD -30629.15924 -53900.19175 -7358.12673 Reject H0
## Adj. p-value
## muBachelor's-muHigh School 0.220152
## muBachelor's-muMaster's 0
## muHigh School-muMaster's 1
## muBachelor's-muPhD 0
## muHigh School-muPhD 1
## muMaster's-muPhD 0.003146
bonfCI(data$Salary,data$Gender)
##
## 95% Bonferroni confidence intervals
##
## Diff Lower Upper Decision Adj. p-value
## muFemale-muMale -22500.62639 -28744.31733 -16256.93544 Reject H0 0