dta <- read.csv("ncku_prof_V6.csv", h=T, stringsAsFactors = TRUE)
dta <- dta %>%
select(H.id, Degree, Grads, FPY) %>%
mutate(researchy = 2022 - FPY)
boxplot(H.id ~ Degree, data = dta)
boxplot(Grads ~ Degree, data = dta)
Ddta <- filter(dta, Degree == "D")
Odta <- filter(dta, Degree == "O")
Domestic Degree:
mod1D <- lm(H.id ~ researchy, Ddta)
summary(mod1D)
##
## Call:
## lm(formula = H.id ~ researchy, data = Ddta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.537 -6.381 -0.118 4.674 33.554
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2789 2.2214 0.576 0.566
## researchy 0.6183 0.1069 5.784 4.78e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.15 on 136 degrees of freedom
## Multiple R-squared: 0.1974, Adjusted R-squared: 0.1915
## F-statistic: 33.45 on 1 and 136 DF, p-value: 4.783e-08
mod2D <- lm(Grads ~ researchy, Ddta)
summary(mod2D)
##
## Call:
## lm(formula = Grads ~ researchy, data = Ddta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -77.828 -17.720 -8.046 9.737 178.389
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -19.5624 8.1594 -2.398 0.0179 *
## researchy 2.7826 0.3926 7.087 6.75e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37.28 on 136 degrees of freedom
## Multiple R-squared: 0.2697, Adjusted R-squared: 0.2643
## F-statistic: 50.22 on 1 and 136 DF, p-value: 6.745e-11
mod3D <- lm(H.id ~ Grads, Ddta)
summary(mod3D)
##
## Call:
## lm(formula = H.id ~ Grads, data = Ddta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.557 -8.626 -0.296 4.900 33.932
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.47062 1.11108 8.524 2.61e-14 ***
## Grads 0.10814 0.02025 5.341 3.78e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.3 on 136 degrees of freedom
## Multiple R-squared: 0.1734, Adjusted R-squared: 0.1673
## F-statistic: 28.53 on 1 and 136 DF, p-value: 3.782e-07
mod4D <- lm(H.id ~ researchy + Grads, Ddta)
summary(mod4D)
##
## Call:
## lm(formula = H.id ~ researchy + Grads, data = Ddta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.535 -6.498 -0.434 4.725 31.818
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.57030 2.20829 1.164 0.246503
## researchy 0.43459 0.12180 3.568 0.000499 ***
## Grads 0.06602 0.02273 2.904 0.004305 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.882 on 135 degrees of freedom
## Multiple R-squared: 0.2446, Adjusted R-squared: 0.2334
## F-statistic: 21.86 on 2 and 135 DF, p-value: 5.975e-09
Oversea Degree:
mod1O <- lm(H.id ~ researchy, Odta)
summary(mod1O)
##
## Call:
## lm(formula = H.id ~ researchy, data = Odta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.460 -6.038 -0.985 5.057 70.957
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.32222 1.49981 -1.548 0.123
## researchy 0.70805 0.06474 10.937 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.61 on 318 degrees of freedom
## (因為不存在,2 個觀察量被刪除了)
## Multiple R-squared: 0.2733, Adjusted R-squared: 0.2711
## F-statistic: 119.6 on 1 and 318 DF, p-value: < 2.2e-16
mod2O <- lm(Grads ~ researchy, Odta)
summary(mod2O)
##
## Call:
## lm(formula = Grads ~ researchy, data = Odta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -84.52 -27.27 -8.31 15.09 335.98
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.8938 6.5748 -0.896 0.371
## researchy 2.5832 0.2838 9.102 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 46.51 on 318 degrees of freedom
## (因為不存在,2 個觀察量被刪除了)
## Multiple R-squared: 0.2067, Adjusted R-squared: 0.2042
## F-statistic: 82.85 on 1 and 318 DF, p-value: < 2.2e-16
mod3O <- lm(H.id ~ Grads, Odta)
summary(mod3O)
##
## Call:
## lm(formula = H.id ~ Grads, data = Odta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -44.537 -6.356 -2.443 4.865 73.986
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.12882 0.83733 8.514 6.64e-16 ***
## Grads 0.11339 0.01174 9.659 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.95 on 320 degrees of freedom
## Multiple R-squared: 0.2257, Adjusted R-squared: 0.2233
## F-statistic: 93.29 on 1 and 320 DF, p-value: < 2.2e-16
mod4O <- lm(H.id ~ researchy + Grads, Odta)
summary(mod4O)
##
## Call:
## lm(formula = H.id ~ researchy + Grads, data = Odta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.430 -6.071 -1.358 4.648 69.781
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.90619 1.43025 -1.333 0.184
## researchy 0.52571 0.06923 7.594 3.51e-13 ***
## Grads 0.07059 0.01218 5.794 1.66e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.11 on 317 degrees of freedom
## (因為不存在,2 個觀察量被刪除了)
## Multiple R-squared: 0.3429, Adjusted R-squared: 0.3388
## F-statistic: 82.72 on 2 and 317 DF, p-value: < 2.2e-16
ggpairs(Ddta, mapping= aes(color = Degree))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggpairs(Odta)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
termplot(mod4D, partial.resid = T, smooth = panel.smooth)
termplot(mod4O, partial.resid = T, smooth = panel.smooth)
med.rsltD = mediate(mod2D, mod4D, treat ='researchy', mediator='Grads', boot=T)
## Running nonparametric bootstrap
summary(med.rsltD)
##
## Causal Mediation Analysis
##
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
##
## Estimate 95% CI Lower 95% CI Upper p-value
## ACME 0.1837 0.0585 0.45 0.010 **
## ADE 0.4346 0.0972 0.69 0.018 *
## Total Effect 0.6183 0.3681 0.85 <2e-16 ***
## Prop. Mediated 0.2971 0.0926 0.79 0.010 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Sample Size Used: 138
##
##
## Simulations: 1000
library(mediation)
med.rsltO = mediate(mod2O, mod4O, treat ='researchy', mediator='Grads', boot=T)
## Running nonparametric bootstrap
summary(med.rsltO)
##
## Causal Mediation Analysis
##
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
##
## Estimate 95% CI Lower 95% CI Upper p-value
## ACME 0.1823 0.0964 0.30 <2e-16 ***
## ADE 0.5257 0.3535 0.67 <2e-16 ***
## Total Effect 0.7080 0.5754 0.84 <2e-16 ***
## Prop. Mediated 0.2575 0.1389 0.44 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Sample Size Used: 320
##
##
## Simulations: 1000
Conclusion:
Domestic:
ACME (average causal mediation effects) The indirect effect of the
predictor (研究生涯年) on the outcome (論文指數) that goes through the
mediator (指導畢業學生數).
0.1837
ADE (average direct effects) The direct effect of the predictor on
the outcome when controlling the mediator.
0.435
Total Effect direct effect + indirect effect of the predictor onto
the outcome.
0.618
Prop. Mediated The proportion of the effect of the predictor on the
outcomes that goes through the mediator.
0.297
Overseas:
ACME (average causal mediation effects) The indirect effect of the
predictor (研究生涯年) on the outcome (論文指數) that goes through the
mediator (指導畢業學生數).
0.1823
ADE (average direct effects)
The direct effect of the predictor on the outcome when controlling the
mediator.
0.526
Total Effect direct effect + indirect effect of the predictor onto
the outcome.
0.708
Prop. Mediated The proportion of the effect of the predictor on the
outcomes that goes through the mediator.
0.258
I don’t see much difference on both Domestic Degree or Overseas Degree? It seems like for overseas Degree it effects more on how long have them been doing research. And also how many graduate student they have seems to be slightly less influencial compare to Domestic degree.