#read in your data
dta <- read.csv("ncku_prof_V6.csv", h=T, stringsAsFactors = TRUE)
str(dta)
## 'data.frame': 460 obs. of 14 variables:
## $ ID : int 10001 10002 10003 10004 10005 10006 10007 10008 10009 10010 ...
## $ Initial : Factor w/ 347 levels "BCT","BHC","BLC",..: 308 60 81 90 145 201 293 176 198 276 ...
## $ Citation: int 305 355 3452 15808 280 2506 672 5735 1118 685 ...
## $ H.id : int 9 11 10 65 10 22 14 40 19 14 ...
## $ Gender : Factor w/ 2 levels "F","M": 2 2 2 2 1 2 2 2 2 2 ...
## $ Degree : Factor w/ 2 levels "D","O": 1 1 1 2 2 1 1 1 2 1 ...
## $ Rank : int 3 2 1 1 2 2 1 1 2 1 ...
## $ College : Factor w/ 5 levels "ENG","LIB","MNG",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Dept : Factor w/ 25 levels "ACC","BAD","CEN",..: 10 10 10 10 10 10 10 10 10 10 ...
## $ Grads : int 3 10 0 92 25 41 36 54 74 195 ...
## $ FPY : int 2013 2008 2011 1997 2011 2002 2008 2001 1994 1991 ...
## $ Articles: int 30 22 14 349 23 90 36 123 26 70 ...
## $ StuApp : int 169 169 169 169 169 169 169 169 169 169 ...
## $ Colprof : int 309 309 309 309 309 309 309 309 309 309 ...
dta <- dta %>%
select(H.id, Gender, Degree, Rank, College, Dept, Grads, FPY, Articles) %>%
mutate(researchy = 2022 - FPY)
# reorder by column name
dta <- dta[c("H.id", "Gender","researchy", "Degree", "Rank", "College", "Dept","Grads", "FPY", "Articles")]
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
ggpairs(dta[,1:3])
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(dta, aes(x = Gender, y = H.id)) +
geom_boxplot() +
xlab("Gender") +
ylab("H.id")
ggplot(aes(y = H.id, x = researchy, color = Gender), data = dta) +
geom_point() +
geom_smooth(method = lm, se = F) +
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
#Step1
#迴歸係數應該顯著,畢竟研究生涯年影響論文係數
mod1 <- lm(H.id ~ researchy, dta)
summary(mod1)
##
## Call:
## lm(formula = H.id ~ researchy, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.229 -5.840 -1.026 4.949 70.797
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.07520 1.23724 -0.869 0.385
## researchy 0.67511 0.05505 12.263 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.49 on 456 degrees of freedom
## (因為不存在,2 個觀察量被刪除了)
## Multiple R-squared: 0.248, Adjusted R-squared: 0.2464
## F-statistic: 150.4 on 1 and 456 DF, p-value: < 2.2e-16
#Step2
#研究生涯年影響指導畢業學生數,迴歸係數應該顯著
mod2 <- lm(Grads ~ researchy, dta)
summary(mod2)
##
## Call:
## lm(formula = Grads ~ researchy, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -83.06 -23.92 -8.42 12.22 339.33
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.0347 5.1995 -2.122 0.0344 *
## researchy 2.6885 0.2314 11.621 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 44.09 on 456 degrees of freedom
## (因為不存在,2 個觀察量被刪除了)
## Multiple R-squared: 0.2285, Adjusted R-squared: 0.2268
## F-statistic: 135 on 1 and 456 DF, p-value: < 2.2e-16
#Step3
#指導畢業學生數影響論文指數,迴歸係數應該顯著
mod3 <- lm(H.id ~ Grads, dta)
summary(mod3)
##
## Call:
## lm(formula = H.id ~ Grads, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43.893 -7.171 -2.473 5.221 73.539
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.95204 0.67151 11.84 <2e-16 ***
## Grads 0.10947 0.01005 10.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.78 on 458 degrees of freedom
## Multiple R-squared: 0.2057, Adjusted R-squared: 0.204
## F-statistic: 118.6 on 1 and 458 DF, p-value: < 2.2e-16
#Step4
#迴歸模型同時考慮研究生涯年和指導畢業學生數。
#研究生涯年的迴歸係數應該消失或相較減少
mod4 <- lm(H.id ~ researchy + Grads, dta)
summary(mod4)
##
## Call:
## lm(formula = H.id ~ researchy + Grads, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.073 -6.152 -1.198 5.122 69.573
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.33794 1.19462 -0.283 0.777
## researchy 0.49548 0.06022 8.228 2.02e-15 ***
## Grads 0.06681 0.01071 6.240 1.00e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.08 on 455 degrees of freedom
## (因為不存在,2 個觀察量被刪除了)
## Multiple R-squared: 0.3073, Adjusted R-squared: 0.3043
## F-statistic: 100.9 on 2 and 455 DF, p-value: < 2.2e-16
#GenderF
asmF <- dta %>%
select(Gender, H.id, Grads, researchy)%>%
filter(Gender == "F")
str(asmF)
## 'data.frame': 111 obs. of 4 variables:
## $ Gender : Factor w/ 2 levels "F","M": 1 1 1 1 1 1 1 1 1 1 ...
## $ H.id : int 10 28 28 22 14 46 10 14 32 11 ...
## $ Grads : int 25 30 16 81 8 62 54 60 119 0 ...
## $ researchy: num 11 17 23 33 10 30 16 27 31 9 ...
#GenderM
asmM <- dta %>%
select(Gender, H.id, Grads, researchy)%>%
filter(Gender == "M")
str(asmM)
## 'data.frame': 349 obs. of 4 variables:
## $ Gender : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 2 2 2 ...
## $ H.id : int 9 11 10 65 22 14 40 19 14 47 ...
## $ Grads : int 3 10 0 92 41 36 54 74 195 263 ...
## $ researchy: num 9 14 11 25 20 14 21 28 31 31 ...
#Step1
#迴歸係數應該顯著,畢竟研究生涯年影響論文係數
modF1 <- lm(H.id ~ researchy, asmF)
summary(modF1)
##
## Call:
## lm(formula = H.id ~ researchy, data = asmF)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.084 -5.053 -1.417 4.427 33.291
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.83389 1.85566 0.449 0.654
## researchy 0.39583 0.09545 4.147 6.69e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.199 on 109 degrees of freedom
## Multiple R-squared: 0.1363, Adjusted R-squared: 0.1284
## F-statistic: 17.2 on 1 and 109 DF, p-value: 6.69e-05
#Step2
#研究生涯年影響指導畢業學生數,迴歸係數應該顯著
modF2 <- lm(Grads ~ researchy, asmF)
summary(modF2)
##
## Call:
## lm(formula = Grads ~ researchy, data = asmF)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.60 -17.23 -7.87 5.63 131.41
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.4130 6.7711 0.947 0.34567
## researchy 1.1445 0.3483 3.286 0.00137 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 29.92 on 109 degrees of freedom
## Multiple R-squared: 0.09014, Adjusted R-squared: 0.0818
## F-statistic: 10.8 on 1 and 109 DF, p-value: 0.001367
#Step3
#指導畢業學生數影響論文指數,迴歸係數應該顯著
modF3 <- lm(H.id ~ Grads, asmF)
summary(modF3)
##
## Call:
## lm(formula = H.id ~ Grads, data = asmF)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.847 -4.831 -1.951 2.511 33.523
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.31712 0.97428 4.431 2.24e-05 ***
## Grads 0.13162 0.02381 5.528 2.24e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.797 on 109 degrees of freedom
## Multiple R-squared: 0.219, Adjusted R-squared: 0.2118
## F-statistic: 30.56 on 1 and 109 DF, p-value: 2.238e-07
#Step4
modF4 <- lm(H.id ~ researchy + Grads, asmF)
summary(modF4)
##
## Call:
## lm(formula = H.id ~ researchy + Grads, data = asmF)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.503 -4.863 -1.314 3.085 30.945
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.12593 1.71330 0.074 0.94154
## researchy 0.26948 0.09201 2.929 0.00415 **
## Grads 0.11039 0.02414 4.574 1.28e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.539 on 108 degrees of freedom
## Multiple R-squared: 0.2764, Adjusted R-squared: 0.263
## F-statistic: 20.63 on 2 and 108 DF, p-value: 2.583e-08
#residual plot殘差圖
#x軸為某predictor,y軸為其他predictor解釋後的residuals
termplot(modF4, partial.resid = T, smooth = panel.smooth)
#引進{mediation} package
#估計中介效果的信賴區間(CI)-可以知道中介效果的大小
library(mediation)
med.rslt = mediate(modF2, modF4, treat ='researchy', mediator='Grads', boot=T)
summary(med.rslt)
##
## Causal Mediation Analysis
##
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
##
## Estimate 95% CI Lower 95% CI Upper p-value
## ACME 0.1263 0.0442 0.24 <2e-16 ***
## ADE 0.2695 0.0902 0.47 0.008 **
## Total Effect 0.3958 0.2046 0.61 0.002 **
## Prop. Mediated 0.3192 0.1281 0.65 0.002 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Sample Size Used: 111
##
##
## Simulations: 1000
ACME (average causal mediation effects)
In Female group, the indirect effect of the predictor (研究生涯年) on
the outcome (論文指數) that goes through the mediator
(指導畢業學生數)=0.1263
ADE (average direct effects)
In Female group, the direct effect of the predictor on the outcome when
controlling the mediator =0.2695 mod4
Total Effect
In Female group, direct effect + indirect effect of the predictor onto
the outcome =0.1263 + 0.2695 =0.3958
Prop. Mediated
In Female group, the proportion of the effect of the predictor on the
outcomes that goes through the mediator =0.1263/0.3958
根據研究結果,在女性組別中,研究生涯年透過指導畢業學生數影響論文指數,研究生涯年對論文指數的效果部分被指導畢業學生數所中介,其中介效果為.1263,信賴區間為(.0425, .25),不包括0。
#Step1
#迴歸係數應該顯著,畢竟研究生涯年影響論文係數
modM1 <- lm(H.id ~ researchy, asmM)
summary(modM1)
##
## Call:
## lm(formula = H.id ~ researchy, data = asmM)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.878 -6.614 -1.033 4.895 69.525
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.68714 1.53079 -0.449 0.654
## researchy 0.70187 0.06552 10.712 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.89 on 345 degrees of freedom
## (因為不存在,2 個觀察量被刪除了)
## Multiple R-squared: 0.2496, Adjusted R-squared: 0.2474
## F-statistic: 114.7 on 1 and 345 DF, p-value: < 2.2e-16
#Step2
#研究生涯年影響指導畢業學生數,迴歸係數應該顯著
modM2 <- lm(Grads ~ researchy, asmM)
summary(modM2)
##
## Call:
## lm(formula = Grads ~ researchy, data = asmM)
##
## Residuals:
## Min 1Q Median 3Q Max
## -89.65 -24.81 -8.11 12.11 337.38
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -13.4517 6.5761 -2.046 0.0416 *
## researchy 2.9456 0.2815 10.465 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 46.79 on 345 degrees of freedom
## (因為不存在,2 個觀察量被刪除了)
## Multiple R-squared: 0.2409, Adjusted R-squared: 0.2387
## F-statistic: 109.5 on 1 and 345 DF, p-value: < 2.2e-16
#Step3
#指導畢業學生數影響論文指數,迴歸係數應該顯著
modM3 <- lm(H.id ~ Grads, asmM)
summary(modM3)
##
## Call:
## lm(formula = H.id ~ Grads, data = asmM)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.534 -7.433 -1.814 5.540 73.044
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.43297 0.83486 11.299 <2e-16 ***
## Grads 0.09920 0.01141 8.693 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.4 on 347 degrees of freedom
## Multiple R-squared: 0.1788, Adjusted R-squared: 0.1765
## F-statistic: 75.56 on 1 and 347 DF, p-value: < 2.2e-16
#Step4
modM4 <- lm(H.id ~ researchy + Grads, asmM)
summary(modM4)
##
## Call:
## lm(formula = H.id ~ researchy + Grads, data = asmM)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.443 -6.874 -0.884 5.054 68.865
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.03844 1.50030 0.026 0.98
## researchy 0.54299 0.07327 7.411 9.81e-13 ***
## Grads 0.05394 0.01221 4.418 1.34e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.61 on 344 degrees of freedom
## (因為不存在,2 個觀察量被刪除了)
## Multiple R-squared: 0.2899, Adjusted R-squared: 0.2857
## F-statistic: 70.21 on 2 and 344 DF, p-value: < 2.2e-16
#residual plot殘差圖
#x軸為某predictor,y軸為其他predictor解釋後的residuals
termplot(modM4, partial.resid = T, smooth = panel.smooth)
#引進{mediation} package
#估計中介效果的信賴區間(CI)-可以知道中介效果的大小
library(mediation)
med.rslt = mediate(modM2, modM4, treat ='researchy', mediator='Grads', boot=T)
## Running nonparametric bootstrap
summary(med.rslt)
##
## Causal Mediation Analysis
##
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
##
## Estimate 95% CI Lower 95% CI Upper p-value
## ACME 0.1589 0.0697 0.28 <2e-16 ***
## ADE 0.5430 0.3826 0.69 <2e-16 ***
## Total Effect 0.7019 0.5776 0.83 <2e-16 ***
## Prop. Mediated 0.2264 0.0989 0.39 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Sample Size Used: 347
##
##
## Simulations: 1000
ACME (average causal mediation effects)
In Male group, the indirect effect of the predictor (研究生涯年) on the
outcome (論文指數) that goes through the mediator
(指導畢業學生數)=0.1589
ADE (average direct effects)
In Male group, the direct effect of the predictor on the outcome when
controlling the mediator =0.5430
Total Effect
In Male group, direct effect + indirect effect of the predictor onto the
outcome =0.1589 + 0.5430 =0.7019
Prop. Mediated
In Male group, the proportion of the effect of the predictor on the
outcomes that goes through the mediator =0.1589/0.7019
根據研究結果,在男性組別中,研究生涯年透過指導畢業學生數影響論文指數,研究生涯年對論文指數的效果部分被指導畢業學生數所中介,其中介效果為.1589,信賴區間為(.0797, .28),不包括0。