library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
#讀取資料
dta <- read.csv("c:/Users/tena8/Desktop/1102/ncku_prof_V6.csv", h=T, stringsAsFactors = TRUE)
dta <- dta %>%
  select(H.id, Gender, Degree, Rank, College, Dept, Grads, FPY, Articles) %>%
  mutate(researchy = 2022 - FPY)

Degree

Assessment 1

#驗證學位國籍是否是研究生涯年與論文引數的調節變項
fullmod <- lm(H.id ~ researchy + Degree + Degree*researchy, data = dta) #有交乘項
reducemod <- lm(H.id ~ researchy + Degree, data = dta) #無交乘項
anova(fullmod, reducemod) #檢驗兩個模型間的差異是否顯著
## Analysis of Variance Table
## 
## Model 1: H.id ~ researchy + Degree + Degree * researchy
## Model 2: H.id ~ researchy + Degree
##   Res.Df   RSS Df Sum of Sq      F Pr(>F)
## 1    454 49809                           
## 2    455 49863 -1   -54.377 0.4956 0.4818

根據anova的結果顯示,p-value>0.05,代表交乘項沒有顯著。

Assessment 2

#調節效果圖
ggplot(aes(y = H.id, x = researchy, color = Degree), data = dta) +
 geom_smooth(method = "lm", se = F, fullrange = T) +
  theme_bw()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).

從調節效果圖來看,可以發現海外和本土的交叉點不明顯,而且是在大約研究生涯40年的位置。

Assessment 3

library(pequod)
## 載入需要的套件:car
## 載入需要的套件:carData
## 
## 載入套件:'car'
## 下列物件被遮斷自 'package:dplyr':
## 
##     recode
## 下列物件被遮斷自 'package:purrr':
## 
##     some
dta$nDegree <- as.numeric(dta$Degree)
modmod <- lmres(H.id ~ researchy + nDegree + researchy*nDegree, dta)
summary(modmod)
## Formula:
## H.id ~ researchy + nDegree + researchy.XX.nDegree
## <environment: 0x0000020912f5f138>
## 
## Models
##          R     R^2   Adj. R^2    F     df1  df2  p.value    
## Model  0.504  0.254     0.249 51.426  3.000  454  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residuals
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -23.5372  -6.2005  -0.7767   0.0000   4.9457  70.9566 
## 
## Coefficients
##                      Estimate   StdErr  t.value    beta p.value  
## (Intercept)           4.87999  4.81851  1.01276         0.31171  
## researchy             0.52852  0.22973  2.30068  0.3899 0.02186 *
## nDegree              -3.60111  2.72920 -1.31947 -0.1369 0.18768  
## researchy.XX.nDegree  0.08976  0.12750  0.70402  0.1430 0.48178  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Collinearity
##                          VIF Tolerance
## researchy            17.4680    0.0572
## nDegree               6.5461    0.1528
## researchy.XX.nDegree 25.0876    0.0399
ss <- simpleSlope(modmod, pred = "researchy", mod1 = "nDegree", coded = "nDegree")
summary(ss)
## 
## ** Estimated points of H.id  **
## 
##                    Low researchy (-1 SD) High researchy (+1 SD)
## Low nDegree ( 1 )                  8.526                 19.549
## High nDegree ( 2 )                 5.977                 18.600
## 
## 
## 
## ** Simple Slopes analysis ( df= 454 ) **
## 
##                    simple slope standard error t-value p.value    
## Low nDegree ( 1 )        0.6183         0.1103     5.6  <2e-16 ***
## High nDegree ( 2 )       0.7080         0.0639    11.1  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## 
## ** Bauer & Curran 95% CI **
## 
##         lower CI upper CI
## nDegree  -0.2349   5.9949
PlotSlope(ss)

簡單斜率為0.6183、0.7080,P值皆<0.05,代表顯著。

從圖來看,兩條直線的斜率很接近,幾乎平行。

Rank

Assessment 1

dta$Rank <- factor(dta$Rank)
#驗證職等是否是研究生涯年與論文引數的調節變項
fullmodr <- lm(H.id ~ researchy + Rank + Rank*researchy, data = dta) #有交乘項
reducemodr <- lm(H.id ~ researchy + Rank, data = dta) #無交乘項
anova(fullmodr, reducemodr) #檢驗兩個模型間的差異是否顯著
## Analysis of Variance Table
## 
## Model 1: H.id ~ researchy + Rank + Rank * researchy
## Model 2: H.id ~ researchy + Rank
##   Res.Df   RSS Df Sum of Sq      F    Pr(>F)    
## 1    452 45925                                  
## 2    454 48364 -2   -2439.1 12.003 8.334e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

根據anova的結果顯示,p-value<0.05,代表交乘項有顯著。

Assessment 2

#調節效果圖
ggplot(aes(y = H.id, x = researchy, color = Rank), data = dta) +
 geom_smooth(method = "lm", se = F, fullrange = T) +
  theme_bw()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).

從圖來看,教授和副教授的交叉點大約在研究生涯14年左右,教授和助理教授的交叉點大約在研究生涯16年左右,副教授和助理教授的交叉點在研究生涯14年左右。

所以三種教授的職等在交叉點的位置滿接近的。

Assessment 3

檢驗簡單斜率是否顯著

#因為有三種職位,所以用linear model分開看

#教授的部分
r1 <- dta %>%
  filter(Rank == "1")
summary(lm(H.id ~ researchy, data = r1))
## 
## Call:
## lm(formula = H.id ~ researchy, data = r1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.589  -7.515  -0.606   6.058  68.092 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -3.8226     2.8179  -1.357    0.176    
## researchy     0.8403     0.1066   7.881 1.16e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.17 on 238 degrees of freedom
## Multiple R-squared:  0.2069, Adjusted R-squared:  0.2036 
## F-statistic:  62.1 on 1 and 238 DF,  p-value: 1.164e-13

在教授的部分中,簡單斜率的估計值為0.8403,t-value為7.881,p-value<0.05,所以教授的簡單斜率有顯著。

#副教授的部分
r2 <- dta %>%
  filter(Rank == "2")
summary(lm(H.id ~ researchy, data = r2))
## 
## Call:
## lm(formula = H.id ~ researchy, data = r2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.699  -5.539  -2.165   4.436  32.537 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.84512    1.60242   3.648 0.000367 ***
## researchy    0.13483    0.08358   1.613 0.108851    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.404 on 146 degrees of freedom
##   (因為不存在,1 個觀察量被刪除了)
## Multiple R-squared:  0.01751,    Adjusted R-squared:  0.01078 
## F-statistic: 2.603 on 1 and 146 DF,  p-value: 0.1089

在副教授的部分中,簡單斜率的估計值為0.13483,t-value為1.613,p-value為0.1089>0.05,所以副教授的簡單斜率不顯著。

#助理教授的部分
r3 <- dta %>%
  filter(Rank == "3")
summary(lm(H.id ~ researchy, data = r3))
## 
## Call:
## lm(formula = H.id ~ researchy, data = r3)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.720 -4.125 -1.189  3.026 28.431 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   1.0371     1.8099   0.573  0.56851   
## researchy     0.5380     0.1568   3.432  0.00102 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.297 on 68 degrees of freedom
##   (因為不存在,1 個觀察量被刪除了)
## Multiple R-squared:  0.1476, Adjusted R-squared:  0.1351 
## F-statistic: 11.78 on 1 and 68 DF,  p-value: 0.001024

在助理教授的部分中,簡單斜率的估計值為0.5380,t-value為3.432,p-value為0.00102<0.05,所以簡單斜率有顯著。

雖然教授和助理教授的簡單斜率都有達顯著,但是教授的簡單斜率估計值較高,顯示教授的相關性較高。