This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
data=read.csv("ds_data.csv",header = T)
data$Education.Level[which(data$Education.Level=="Bachelor's Degree")]="Bachelor's"
data$Education.Level=factor(data$Education.Level)
data$Gender=factor(data$Gender)
data$Job.Title=factor(data$Job.Title)
summary(data)
## Age Gender Education.Level Job.Title
## Min. :23.00 Female:365 Bachelor's:321 Data Scientist :453
## 1st Qu.:28.00 Male :604 Master's :216 Data Analyst :363
## Median :31.00 PhD :432 Senior Data Scientist : 61
## Mean :33.65 Director of Data Science: 57
## 3rd Qu.:41.00 Junior Data Analyst : 25
## Max. :51.00 Senior Data Engineer : 4
## (Other) : 6
## Years.of.Experience Salary
## Min. : 0.000 Min. : 35000
## 1st Qu.: 5.000 1st Qu.:120000
## Median : 8.000 Median :150000
## Mean : 8.996 Mean :148816
## 3rd Qu.:13.000 3rd Qu.:180000
## Max. :24.000 Max. :240000
##
aov=aov(data$Salary~data$Gender+data$Education.Level+data$Gender:data$Education.Level)
summary(aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## data$Gender 1 2.552e+08 2.552e+08 0.227 0.634094
## data$Education.Level 2 4.391e+11 2.195e+11 195.032 < 2e-16 ***
## data$Gender:data$Education.Level 2 2.068e+10 1.034e+10 9.186 0.000112 ***
## Residuals 963 1.084e+12 1.126e+09
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit1=lm(Salary~Age+Gender+Education.Level,data=data)
summary(fit1)
##
## Call:
## lm(formula = Salary ~ Age + Gender + Education.Level, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -94748 -18676 -1888 14901 66332
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 45570.0 5337.1 8.538 < 2e-16 ***
## Age 3211.4 201.2 15.958 < 2e-16 ***
## GenderMale -13966.6 2290.2 -6.098 1.55e-09 ***
## Education.LevelMaster's -2487.9 2925.2 -0.851 0.39525
## Education.LevelPhD 9952.1 3235.8 3.076 0.00216 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30110 on 964 degrees of freedom
## Multiple R-squared: 0.434, Adjusted R-squared: 0.4317
## F-statistic: 184.8 on 4 and 964 DF, p-value: < 2.2e-16
library(asbio)
## 載入需要的套件:tcltk
bonfCI(data$Salary,data$Education.Level)
##
## 95% Bonferroni confidence intervals
##
## Diff Lower Upper Decision
## muBachelor's-muMaster's -14179.47387 -21320.77768 -7038.17006 Reject H0
## muBachelor's-muPhD -47191.04794 -53170.65791 -41211.43798 Reject H0
## muMaster's-muPhD -33011.57407 -39773.78188 -26249.36627 Reject H0
## Adj. p-value
## muBachelor's-muMaster's 7e-06
## muBachelor's-muPhD 0
## muMaster's-muPhD 0
bonfCI(data$Salary,data$Gender)
##
## 95% Bonferroni confidence intervals
##
## Diff Lower Upper Decision Adj. p-value
## muFemale-muMale -1059.0674 -6257.46977 4139.33496 FTR H0 0.68939