dta <- read.csv("ncku_prof_V6.csv", h=T, stringsAsFactors = TRUE)
dta <- dta %>%
  select(H.id, Gender, Degree, Rank, College, Dept, Grads, FPY, Articles) %>%
  mutate(researchy = 2022 - FPY)
#驗證性別對研究生涯年對論文引數的調節效果
fullmod <- lm(H.id ~ researchy + Degree + Degree*researchy, data = dta) #有交乘項
reducemod <- lm(H.id ~ researchy + Degree, data = dta) #無交乘項
summary(summary(fullmod))
##               Length Class  Mode   
## call            3    -none- call   
## terms           3    terms  call   
## residuals     458    -none- numeric
## coefficients   16    -none- numeric
## aliased         4    -none- logical
## sigma           1    -none- numeric
## df              3    -none- numeric
## r.squared       1    -none- numeric
## adj.r.squared   1    -none- numeric
## fstatistic      3    -none- numeric
## cov.unscaled   16    -none- numeric
## na.action       2    omit   numeric
anova(fullmod, reducemod) #檢驗兩個模型間的差異是否顯著
## Analysis of Variance Table
## 
## Model 1: H.id ~ researchy + Degree + Degree * researchy
## Model 2: H.id ~ researchy + Degree
##   Res.Df   RSS Df Sum of Sq      F Pr(>F)
## 1    454 49809                           
## 2    455 49863 -1   -54.377 0.4956 0.4818

Pr>0.05,不顯著,學位國籍不是研究生涯年與論文引數的調節變項

summary(fullmod)$r.sq - summary(reducemod)$r.sq #計算兩個模型解釋量的差異
## [1] 0.0008148254
ggplot(aes(y = H.id, x = researchy, color = Degree), data = dta) +
 geom_smooth(method = "lm", se = F, fullrange = T) +
  theme_bw()
## `geom_smooth()` using formula 'y ~ x'

#計算與檢驗簡單斜率是否顯著
dta$nDegree <- as.numeric(dta$Degree)
modmod <- lmres(H.id ~ researchy + nDegree + researchy*nDegree, dta)
summary(modmod)
## Formula:
## H.id ~ researchy + nDegree + researchy.XX.nDegree
## <environment: 0x7fcd2c8334a0>
## 
## Models
##          R     R^2   Adj. R^2    F     df1  df2  p.value    
## Model  0.504  0.254     0.249 51.426  3.000  454  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residuals
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -23.5372  -6.2005  -0.7767   0.0000   4.9457  70.9566 
## 
## Coefficients
##                      Estimate   StdErr  t.value    beta p.value  
## (Intercept)           4.87999  4.81851  1.01276         0.31171  
## researchy             0.52852  0.22973  2.30068  0.3899 0.02186 *
## nDegree              -3.60111  2.72920 -1.31947 -0.1369 0.18768  
## researchy.XX.nDegree  0.08976  0.12750  0.70402  0.1430 0.48178  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Collinearity
##                          VIF Tolerance
## researchy            17.4680    0.0572
## nDegree               6.5461    0.1528
## researchy.XX.nDegree 25.0876    0.0399
ss <- simpleSlope(modmod, pred = "researchy", mod1 = "nDegree", coded = "nDegree")
summary(ss)
## 
## ** Estimated points of H.id  **
## 
##                    Low researchy (-1 SD) High researchy (+1 SD)
## Low nDegree ( 1 )                  8.526                 19.549
## High nDegree ( 2 )                 5.977                 18.600
## 
## 
## 
## ** Simple Slopes analysis ( df= 454 ) **
## 
##                    simple slope standard error t-value p.value    
## Low nDegree ( 1 )        0.6183         0.1103     5.6  <2e-16 ***
## High nDegree ( 2 )       0.7080         0.0639    11.1  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## 
## ** Bauer & Curran 95% CI **
## 
##         lower CI upper CI
## nDegree  -0.2349   5.9949

CI範圍包含0,簡單斜率不顯著

PlotSlope(ss)