dta <- read.csv("ncku_prof_V6.csv", h=T, stringsAsFactors = TRUE)
dta <- dta %>%
  select(H.id, Degree, Grads, FPY) %>%
  mutate(researchy = 2022 - FPY)
boxplot(H.id ~ Degree, data = dta)

boxplot(Grads ~ Degree, data = dta)

Ddta <- filter(dta, Degree == "D")
Odta <- filter(dta, Degree == "O")

Domestic Degree:

mod1D <- lm(H.id ~ researchy, Ddta)
summary(mod1D)
## 
## Call:
## lm(formula = H.id ~ researchy, data = Ddta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.537  -6.381  -0.118   4.674  33.554 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.2789     2.2214   0.576    0.566    
## researchy     0.6183     0.1069   5.784 4.78e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.15 on 136 degrees of freedom
## Multiple R-squared:  0.1974, Adjusted R-squared:  0.1915 
## F-statistic: 33.45 on 1 and 136 DF,  p-value: 4.783e-08
mod2D <- lm(Grads ~ researchy, Ddta)
summary(mod2D)
## 
## Call:
## lm(formula = Grads ~ researchy, data = Ddta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -77.828 -17.720  -8.046   9.737 178.389 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -19.5624     8.1594  -2.398   0.0179 *  
## researchy     2.7826     0.3926   7.087 6.75e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 37.28 on 136 degrees of freedom
## Multiple R-squared:  0.2697, Adjusted R-squared:  0.2643 
## F-statistic: 50.22 on 1 and 136 DF,  p-value: 6.745e-11
mod3D <- lm(H.id ~ Grads, Ddta)
summary(mod3D)
## 
## Call:
## lm(formula = H.id ~ Grads, data = Ddta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.557  -8.626  -0.296   4.900  33.932 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.47062    1.11108   8.524 2.61e-14 ***
## Grads        0.10814    0.02025   5.341 3.78e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.3 on 136 degrees of freedom
## Multiple R-squared:  0.1734, Adjusted R-squared:  0.1673 
## F-statistic: 28.53 on 1 and 136 DF,  p-value: 3.782e-07
mod4D <- lm(H.id ~ researchy + Grads, Ddta)
summary(mod4D)
## 
## Call:
## lm(formula = H.id ~ researchy + Grads, data = Ddta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.535  -6.498  -0.434   4.725  31.818 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.57030    2.20829   1.164 0.246503    
## researchy    0.43459    0.12180   3.568 0.000499 ***
## Grads        0.06602    0.02273   2.904 0.004305 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.882 on 135 degrees of freedom
## Multiple R-squared:  0.2446, Adjusted R-squared:  0.2334 
## F-statistic: 21.86 on 2 and 135 DF,  p-value: 5.975e-09

Oversea Degree:

mod1O <- lm(H.id ~ researchy, Odta)
summary(mod1O)
## 
## Call:
## lm(formula = H.id ~ researchy, data = Odta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.460  -6.038  -0.985   5.057  70.957 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.32222    1.49981  -1.548    0.123    
## researchy    0.70805    0.06474  10.937   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.61 on 318 degrees of freedom
##   (因為不存在,2 個觀察量被刪除了)
## Multiple R-squared:  0.2733, Adjusted R-squared:  0.2711 
## F-statistic: 119.6 on 1 and 318 DF,  p-value: < 2.2e-16
mod2O <- lm(Grads ~ researchy, Odta)
summary(mod2O)
## 
## Call:
## lm(formula = Grads ~ researchy, data = Odta)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -84.52 -27.27  -8.31  15.09 335.98 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -5.8938     6.5748  -0.896    0.371    
## researchy     2.5832     0.2838   9.102   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 46.51 on 318 degrees of freedom
##   (因為不存在,2 個觀察量被刪除了)
## Multiple R-squared:  0.2067, Adjusted R-squared:  0.2042 
## F-statistic: 82.85 on 1 and 318 DF,  p-value: < 2.2e-16
mod3O <- lm(H.id ~ Grads, Odta)
summary(mod3O)
## 
## Call:
## lm(formula = H.id ~ Grads, data = Odta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -44.537  -6.356  -2.443   4.865  73.986 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.12882    0.83733   8.514 6.64e-16 ***
## Grads        0.11339    0.01174   9.659  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.95 on 320 degrees of freedom
## Multiple R-squared:  0.2257, Adjusted R-squared:  0.2233 
## F-statistic: 93.29 on 1 and 320 DF,  p-value: < 2.2e-16
mod4O <- lm(H.id ~ researchy + Grads, Odta)
summary(mod4O)
## 
## Call:
## lm(formula = H.id ~ researchy + Grads, data = Odta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.430  -6.071  -1.358   4.648  69.781 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.90619    1.43025  -1.333    0.184    
## researchy    0.52571    0.06923   7.594 3.51e-13 ***
## Grads        0.07059    0.01218   5.794 1.66e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.11 on 317 degrees of freedom
##   (因為不存在,2 個觀察量被刪除了)
## Multiple R-squared:  0.3429, Adjusted R-squared:  0.3388 
## F-statistic: 82.72 on 2 and 317 DF,  p-value: < 2.2e-16
ggpairs(Ddta, mapping= aes(color = Degree))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggpairs(Odta)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

termplot(mod4D, partial.resid = T, smooth = panel.smooth)

termplot(mod4O, partial.resid = T, smooth = panel.smooth)

med.rsltD = mediate(mod2D, mod4D, treat ='researchy', mediator='Grads', boot=T)
## Running nonparametric bootstrap
summary(med.rsltD)
## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                Estimate 95% CI Lower 95% CI Upper p-value    
## ACME             0.1837       0.0585         0.45   0.010 ** 
## ADE              0.4346       0.0972         0.69   0.018 *  
## Total Effect     0.6183       0.3681         0.85  <2e-16 ***
## Prop. Mediated   0.2971       0.0926         0.79   0.010 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 138 
## 
## 
## Simulations: 1000
library(mediation)
med.rsltO = mediate(mod2O, mod4O, treat ='researchy', mediator='Grads', boot=T)
## Running nonparametric bootstrap
summary(med.rsltO)
## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                Estimate 95% CI Lower 95% CI Upper p-value    
## ACME             0.1823       0.0964         0.30  <2e-16 ***
## ADE              0.5257       0.3535         0.67  <2e-16 ***
## Total Effect     0.7080       0.5754         0.84  <2e-16 ***
## Prop. Mediated   0.2575       0.1389         0.44  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 320 
## 
## 
## Simulations: 1000

Conclusion:
Domestic:
ACME (average causal mediation effects) The indirect effect of the predictor (研究生涯年) on the outcome (論文指數) that goes through the mediator (指導畢業學生數).
0.1837

ADE (average direct effects) The direct effect of the predictor on the outcome when controlling the mediator.
0.435

Total Effect direct effect + indirect effect of the predictor onto the outcome.
0.618

Prop. Mediated The proportion of the effect of the predictor on the outcomes that goes through the mediator.
0.297

Overseas:
ACME (average causal mediation effects) The indirect effect of the predictor (研究生涯年) on the outcome (論文指數) that goes through the mediator (指導畢業學生數).
0.1823

ADE (average direct effects)
The direct effect of the predictor on the outcome when controlling the mediator.
0.526

Total Effect direct effect + indirect effect of the predictor onto the outcome.
0.708

Prop. Mediated The proportion of the effect of the predictor on the outcomes that goes through the mediator.
0.258

I don’t see much difference on both Domestic Degree or Overseas Degree? It seems like for overseas Degree it effects more on how long have them been doing research. And also how many graduate student they have seems to be slightly less influencial compare to Domestic degree.