dta <- read.csv("ncku_prof_V6.csv", h=T, stringsAsFactors = TRUE)
dta <- dta %>%
select(H.id, Gender, Degree, Rank, College, Dept, Grads, FPY, Articles) %>%
mutate(researchy = 2022 - FPY)
#驗證性別對研究生涯年對論文引數的調節效果
fullmod <- lm(H.id ~ researchy + Degree + Degree*researchy, data = dta) #有交乘項
reducemod <- lm(H.id ~ researchy + Degree, data = dta) #無交乘項
summary(summary(fullmod))
## Length Class Mode
## call 3 -none- call
## terms 3 terms call
## residuals 458 -none- numeric
## coefficients 16 -none- numeric
## aliased 4 -none- logical
## sigma 1 -none- numeric
## df 3 -none- numeric
## r.squared 1 -none- numeric
## adj.r.squared 1 -none- numeric
## fstatistic 3 -none- numeric
## cov.unscaled 16 -none- numeric
## na.action 2 omit numeric
anova(fullmod, reducemod) #檢驗兩個模型間的差異是否顯著
## Analysis of Variance Table
##
## Model 1: H.id ~ researchy + Degree + Degree * researchy
## Model 2: H.id ~ researchy + Degree
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 454 49809
## 2 455 49863 -1 -54.377 0.4956 0.4818
Pr>0.05,不顯著,學位國籍不是研究生涯年與論文引數的調節變項
summary(fullmod)$r.sq - summary(reducemod)$r.sq #計算兩個模型解釋量的差異
## [1] 0.0008148254
ggplot(aes(y = H.id, x = researchy, color = Degree), data = dta) +
geom_smooth(method = "lm", se = F, fullrange = T) +
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
#計算與檢驗簡單斜率是否顯著
dta$nDegree <- as.numeric(dta$Degree)
modmod <- lmres(H.id ~ researchy + nDegree + researchy*nDegree, dta)
summary(modmod)
## Formula:
## H.id ~ researchy + nDegree + researchy.XX.nDegree
## <environment: 0x7fcd2c8334a0>
##
## Models
## R R^2 Adj. R^2 F df1 df2 p.value
## Model 0.504 0.254 0.249 51.426 3.000 454 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -23.5372 -6.2005 -0.7767 0.0000 4.9457 70.9566
##
## Coefficients
## Estimate StdErr t.value beta p.value
## (Intercept) 4.87999 4.81851 1.01276 0.31171
## researchy 0.52852 0.22973 2.30068 0.3899 0.02186 *
## nDegree -3.60111 2.72920 -1.31947 -0.1369 0.18768
## researchy.XX.nDegree 0.08976 0.12750 0.70402 0.1430 0.48178
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Collinearity
## VIF Tolerance
## researchy 17.4680 0.0572
## nDegree 6.5461 0.1528
## researchy.XX.nDegree 25.0876 0.0399
ss <- simpleSlope(modmod, pred = "researchy", mod1 = "nDegree", coded = "nDegree")
summary(ss)
##
## ** Estimated points of H.id **
##
## Low researchy (-1 SD) High researchy (+1 SD)
## Low nDegree ( 1 ) 8.526 19.549
## High nDegree ( 2 ) 5.977 18.600
##
##
##
## ** Simple Slopes analysis ( df= 454 ) **
##
## simple slope standard error t-value p.value
## Low nDegree ( 1 ) 0.6183 0.1103 5.6 <2e-16 ***
## High nDegree ( 2 ) 0.7080 0.0639 11.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
##
## ** Bauer & Curran 95% CI **
##
## lower CI upper CI
## nDegree -0.2349 5.9949
CI範圍包含0,簡單斜率不顯著
PlotSlope(ss)