R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

data=read.csv("ds_data.csv",header = T)
data$Education.Level[which(data$Education.Level=="Bachelor's Degree")]="Bachelor's"
data$Education.Level=factor(data$Education.Level)

data$Gender=factor(data$Gender)
data$Job.Title=factor(data$Job.Title)
summary(data)
##       Age           Gender      Education.Level                    Job.Title  
##  Min.   :23.00   Female:365   Bachelor's:321    Data Scientist          :453  
##  1st Qu.:28.00   Male  :604   Master's  :216    Data Analyst            :363  
##  Median :31.00                PhD       :432    Senior Data Scientist   : 61  
##  Mean   :33.65                                  Director of Data Science: 57  
##  3rd Qu.:41.00                                  Junior Data Analyst     : 25  
##  Max.   :51.00                                  Senior Data Engineer    :  4  
##                                                 (Other)                 :  6  
##  Years.of.Experience     Salary      
##  Min.   : 0.000      Min.   : 35000  
##  1st Qu.: 5.000      1st Qu.:120000  
##  Median : 8.000      Median :150000  
##  Mean   : 8.996      Mean   :148816  
##  3rd Qu.:13.000      3rd Qu.:180000  
##  Max.   :24.000      Max.   :240000  
## 
aov=aov(data$Salary~data$Gender+data$Education.Level+data$Gender:data$Education.Level)
summary(aov)
##                                   Df    Sum Sq   Mean Sq F value   Pr(>F)    
## data$Gender                        1 2.552e+08 2.552e+08   0.227 0.634094    
## data$Education.Level               2 4.391e+11 2.195e+11 195.032  < 2e-16 ***
## data$Gender:data$Education.Level   2 2.068e+10 1.034e+10   9.186 0.000112 ***
## Residuals                        963 1.084e+12 1.126e+09                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit1=lm(Salary~Age+Gender+Education.Level,data=data)
summary(fit1)
## 
## Call:
## lm(formula = Salary ~ Age + Gender + Education.Level, data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -94748 -18676  -1888  14901  66332 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              45570.0     5337.1   8.538  < 2e-16 ***
## Age                       3211.4      201.2  15.958  < 2e-16 ***
## GenderMale              -13966.6     2290.2  -6.098 1.55e-09 ***
## Education.LevelMaster's  -2487.9     2925.2  -0.851  0.39525    
## Education.LevelPhD        9952.1     3235.8   3.076  0.00216 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 30110 on 964 degrees of freedom
## Multiple R-squared:  0.434,  Adjusted R-squared:  0.4317 
## F-statistic: 184.8 on 4 and 964 DF,  p-value: < 2.2e-16
library(asbio)
## 載入需要的套件:tcltk
bonfCI(data$Salary,data$Education.Level)
## 
## 95% Bonferroni confidence intervals 
## 
##                                 Diff        Lower        Upper  Decision
## muBachelor's-muMaster's -14179.47387 -21320.77768  -7038.17006 Reject H0
## muBachelor's-muPhD      -47191.04794 -53170.65791 -41211.43798 Reject H0
## muMaster's-muPhD        -33011.57407 -39773.78188 -26249.36627 Reject H0
##                         Adj. p-value
## muBachelor's-muMaster's        7e-06
## muBachelor's-muPhD                 0
## muMaster's-muPhD                   0
bonfCI(data$Salary,data$Gender)
## 
## 95% Bonferroni confidence intervals 
## 
##                       Diff       Lower      Upper Decision Adj. p-value
## muFemale-muMale -1059.0674 -6257.46977 4139.33496   FTR H0      0.68939