library(showtext);library(systemfonts);library(dplyr);library(sjPlot);library(ggplot2)
## Loading required package: sysfonts
## Loading required package: showtextdb
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:sjPlot':
## 
##     set_theme
# 1. 找到該字體的精確檔案路徑
font_info <- system_fonts() %>% 
  filter(family == "DFPYuanBold-B5") %>% 
  slice(1) # 只取第一筆

# 2. 用路徑來新增字體
# font_info$path 會直接指向 C:/Windows/Fonts/...
font_add("df_round", regular = font_info$path)

# 3. 啟動 showtext
showtext_auto()

# 3. 設定 ggplot2 全域字體
ggplot2::theme_set(theme_minimal(base_family = "df_round"))

# 4. 設定 sjPlot 全域字體
sjPlot::set_theme(base = theme_minimal(), 
          theme.font = "df_round")

ANCOVA = ANOVA + linear regression

但這個模型的建立仰賴於沒有交互作用的假設!! 甚麼是交互作用所謂的交互作用interaction,就是x因為I會對Y有不一樣的影響。 比如說體力(X)會透過穿的鞋子(I)對跑步速度有不一樣的影響,穿到拖鞋你體力怎麼上升都沒用的話,代表體力不能夠等比例的提升跑步速度了,那就是鞋子和體力就有交互作用。

簡單模型 只有交互作用項和X & Y

  1. 先分手自排畫出x y之間的關係
data(mtcars)
df <- mtcars

df$stamina <- factor(df$am, labels = c("Automatic", "Manual"))
#am Transmission (0 = automatic, 1 = manual)
df$weight <- df$wt
df$speed <- df$mpg
#mpg    Miles/(US) gallon

library(ggplot2)
ggplot(df, aes(weight, speed, color = stamina))+
        geom_point()+
        geom_smooth(method ="lm", se = F )+
        labs(title = "車子重量對速度 是否受手自排的交互作用?",
             x = "車子重量(x)",
             y = "速度(y)")
## `geom_smooth()` using formula = 'y ~ x'

可以看到不用做檢定就可以看到有交互作用了 還是做個檢定

simple_model <- lm(speed ~  stamina *weight, data = df)
summary(simple_model)
## 
## Call:
## lm(formula = speed ~ stamina * weight, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6004 -1.5446 -0.5325  0.9012  6.0909 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           31.4161     3.0201  10.402 4.00e-11 ***
## staminaManual         14.8784     4.2640   3.489  0.00162 ** 
## weight                -3.7859     0.7856  -4.819 4.55e-05 ***
## staminaManual:weight  -5.2984     1.4447  -3.667  0.00102 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.591 on 28 degrees of freedom
## Multiple R-squared:  0.833,  Adjusted R-squared:  0.8151 
## F-statistic: 46.57 on 3 and 28 DF,  p-value: 5.209e-11

首先看到weight, stamina, interaction term三個變項就已經對速度有0.833的Rsquare。 以及每一個變項都有顯著意義。

到這裡要知道,這筆數據的手自排和重量真的有交互作用效果,所以如果你原本要用ANCOVA是不可以的!

如果忽略這個交互作用,強迫手排自排的效果平均,就會高估自排的速度,低估手排的。

  1. 因為有交互作用,要分層報告!
library(emmeans) # least square means
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
emtrends(simple_model, ~ stamina, var = 'weight')|>as_tibble() 
## # A tibble: 2 × 6
##   stamina   weight.trend    SE    df lower.CL upper.CL
##   <fct>            <dbl> <dbl> <dbl>    <dbl>    <dbl>
## 1 Automatic        -3.79 0.786    28    -5.40    -2.18
## 2 Manual           -9.08 1.21     28   -11.6     -6.60
# spec 放交互作用項
# var是X

可以知道手排對車子重量很敏感,車子重量一變重就變慢很多,但自排較沒有受影響。

他們的上下界沒有overlap所以是顯著。

複雜模型 交互作用項加其他confounders

complex_model <- lm(speed ~ weight * stamina + cyl + hp, data = df)
summary(complex_model)
## 
## Call:
## lm(formula = speed ~ weight * stamina + cyl + hp, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4534 -1.5557 -0.6668  1.2997  5.1000 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          33.22945    3.01690  11.014 2.73e-11 ***
## weight               -2.19536    0.84638  -2.594   0.0154 *  
## staminaManual        11.26038    3.91986   2.873   0.0080 ** 
## cyl                  -0.83651    0.52826  -1.584   0.1254    
## hp                   -0.01246    0.01322  -0.943   0.3545    
## weight:staminaManual -3.72353    1.40714  -2.646   0.0136 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.27 on 26 degrees of freedom
## Multiple R-squared:  0.8811, Adjusted R-squared:  0.8582 
## F-statistic: 38.52 on 5 and 26 DF,  p-value: 3.234e-11

可以看到0.8811的r square。

emtrends(complex_model, ~stamina, var = "weight")|>as_tibble()
## # A tibble: 2 × 6
##   stamina   weight.trend    SE    df lower.CL upper.CL
##   <fct>            <dbl> <dbl> <dbl>    <dbl>    <dbl>
## 1 Automatic        -2.20 0.846    26    -3.94   -0.456
## 2 Manual           -5.92 1.50     26    -9.01   -2.83

可以看到他們各自的coefficient都變小了,代表剛剛的coef裡面有貢獻by cyl和hp。

  1. 最後來看看VIF
car::vif(simple_model)
## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif
##        stamina         weight stamina:weight 
##      20.901259       2.728248      15.366853
car::vif(complex_model)
## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif
##         weight        stamina            cyl             hp weight:stamina 
##       4.127380      23.024464       5.356564       4.940686      19.002930

會發現有交互作用項VIF會爆錶(>15)的問題,所以可以用去中心化並且 type = 'predictor'的方式去計算 會漂亮很多。 這個vif function裡面的 type是拿來看有interaction時的vif => gvif

df$weight_c <- df$weight - mean(df$weight)
model_centered <- lm(speed ~ (weight_c * stamina) + cyl + hp, data = df)
car::vif(model_centered, type = 'predictor') 
## GVIFs computed for predictors
##              GVIF Df GVIF^(1/(2*Df)) Interacts With       Other Predictors
## weight_c 4.228652  3        1.271648        stamina                cyl, hp
## stamina  4.228652  3        1.271648       weight_c                cyl, hp
## cyl      5.356564  1        2.314425           --    weight_c, stamina, hp
## hp       4.940686  1        2.222765           --   weight_c, stamina, cyl
  1. 增加變項真的有差嗎? => 有!
anova(simple_model, complex_model)
## Analysis of Variance Table
## 
## Model 1: speed ~ stamina * weight
## Model 2: speed ~ weight * stamina + cyl + hp
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1     28 188.01                              
## 2     26 133.93  2    54.079 5.2493 0.01216 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  1. 畫圖
library(ggeffects)
(predictions <- ggpredict(complex_model, terms = c("weight", "stamina")))
## # Predicted values of speed
## 
## stamina: Automatic
## 
## weight | Predicted |       95% CI
## ---------------------------------
##   1.40 |     23.15 | 19.12, 27.18
##   2.00 |     21.84 | 18.79, 24.89
##   2.80 |     20.08 | 18.22, 21.94
##   3.40 |     18.76 | 17.50, 20.03
##   4.00 |     17.44 | 16.06, 18.83
##   5.40 |     14.37 | 11.03, 17.71
## 
## stamina: Manual
## 
## weight | Predicted |       95% CI
## ---------------------------------
##   1.40 |     29.20 | 25.41, 32.99
##   2.00 |     25.65 | 23.42, 27.88
##   2.80 |     20.91 | 19.14, 22.69
##   3.40 |     17.36 | 14.23, 20.49
##   4.00 |     13.81 |  8.98, 18.64
##   5.40 |      5.52 | -3.51, 14.56
## 
## Adjusted for:
## * cyl =   6.19
## *  hp = 146.69
## 
## Not all rows are shown in the output. Use `print(..., n = Inf)` to show
##   all rows.
A <- plot(predictions) +
        labs(title = "add other CF",
             x = "Weight",
             y = "Speed")+ylim(0,40)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
(predictions2 <- ggpredict(simple_model, terms = c("weight", "stamina")))
## # Predicted values of speed
## 
## stamina: Automatic
## 
## weight | Predicted |       95% CI
## ---------------------------------
##   1.40 |     26.12 | 22.11, 30.12
##   2.00 |     23.84 | 20.75, 26.94
##   2.80 |     20.82 | 18.84, 22.79
##   3.40 |     18.54 | 17.19, 19.90
##   4.00 |     16.27 | 15.00, 17.55
##   5.40 |     10.97 |  8.08, 13.87
## 
## stamina: Manual
## 
## weight | Predicted |        95% CI
## ----------------------------------
##   1.40 |     33.58 |  30.67, 36.49
##   2.00 |     28.13 |  26.33, 29.92
##   2.80 |     20.86 |  19.10, 22.62
##   3.40 |     15.41 |  12.54, 18.27
##   4.00 |      9.96 |   5.75, 14.17
##   5.40 |     -2.76 | -10.33,  4.81
## 
## Not all rows are shown in the output. Use `print(..., n = Inf)` to show
##   all rows.
B <- plot(predictions2) +
        labs(title = "only interaction",
             x = "Weight",
             y = "Speed")+ylim(0,40)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
library(patchwork)
(B+A)+ plot_layout(guides = 'collect') + theme(legend.position = 'right')