library(tidyverse)
library(palmerpenguins)

1. Linear Regression without interaction terms

geom_smooth()는 기본적으로 회귀선을 그려준다. 그런데 multiple regression의 경우는 어떻게 해야할까? 즉 covariate가 2개 이상일 경우에는?

step 1. covariates 포함된 multi linear regression

lm_fit <- lm(bill_depth_mm ~ bill_length_mm + sex, data = penguins)
summary(lm_fit)
## 
## Call:
## lm(formula = bill_depth_mm ~ bill_length_mm + sex, data = penguins)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4055 -1.3440 -0.0007  1.1859  4.1253 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    22.56139    0.76355  29.548  < 2e-16 ***
## bill_length_mm -0.14576    0.01787  -8.155 7.35e-15 ***
## sexmale         2.01334    0.19519  10.315  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.672 on 330 degrees of freedom
##   (결측으로 인하여 11개의 관측치가 삭제되었습니다.)
## Multiple R-squared:  0.2833, Adjusted R-squared:  0.279 
## F-statistic: 65.23 on 2 and 330 DF,  p-value: < 2.2e-16

step 2. 위 결과 참조하여 geom_abline()으로 그리자!

Female : intercept = 22.56, coefficient = - 0.145

Male : intercept = 24.57, coefficient = - 0.145

penguins %>%
filter(!is.na(sex)) %>% 
ggplot(aes(x = bill_length_mm, y = bill_depth_mm, fill = sex, color = sex)) +
    geom_jitter(size = 3, alpha = 0.5) +
    geom_abline(intercept = 22.56, slope = - 0.14576, col = "red") + 
    geom_abline(intercept = 24.57, slope = - 0.14576, col = "blue")

그렇다면 geom_smooth()를 썼을 경우 위와 동일한 그림이 그려지는지 한번 볼까?

penguins %>%
filter(!is.na(sex)) %>% 
ggplot(aes(x = bill_length_mm, y = bill_depth_mm, fill = sex, color = sex)) +
    geom_point(size = 3, alpha = 0.5) +
    geom_smooth(method = "lm") +
    geom_abline(intercept = 22.56, slope = - 0.14576, col = "blue") + 
    geom_abline(intercept = 24.57, slope = - 0.14576, col = "red")
## `geom_smooth()` using formula 'y ~ x'

그려진다!

\(~\)

2. Linear Regression with interaction terms

다중회귀 상호작용 시각화 방법 1: interactions::interact_plot를 통해 시각화하기

먼저 회귀식을 저장하고
lm_fit2 <- lm(bill_depth_mm ~ bill_length_mm*body_mass_g, data = penguins)
summary(lm_fit2)
## 
## Call:
## lm(formula = bill_depth_mm ~ bill_length_mm * body_mass_g, data = penguins)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.8186 -1.2083 -0.0342  1.1799  4.2240 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                 1.412e+01  4.370e+00   3.231  0.00135 **
## bill_length_mm              1.822e-01  9.567e-02   1.905  0.05767 . 
## body_mass_g                 6.073e-04  1.125e-03   0.540  0.58966   
## bill_length_mm:body_mass_g -4.020e-05  2.394e-05  -1.679  0.09400 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.738 on 338 degrees of freedom
##   (결측으로 인하여 2개의 관측치가 삭제되었습니다.)
## Multiple R-squared:  0.2324, Adjusted R-squared:  0.2255 
## F-statistic:  34.1 on 3 and 338 DF,  p-value: < 2.2e-16
library(interactions)
## Warning: 패키지 'interactions'는 R 버전 4.1.2에서 작성되었습니다
interact_plot(lm_fit2, 
              pred = bill_length_mm, modx = body_mass_g,
              plot.points = F , interval = T, colors = "seagreen",
              x.label = "Custom X Label", y.label = "Custom Y Label",
              main.title = "Sample Plot",  legend.main = "Custom Legend Title"
              )

다중회귀 상호작용 시각화 방법 2: ggiraphExtra::ggPredict()를 통해 시각화하기

(https://rpubs.com/cardiomoon/153265)

library(ggiraphExtra)
## Warning: 패키지 'ggiraphExtra'는 R 버전 4.1.2에서 작성되었습니다
ggPredict(lm_fit2, interactive = T, se = T)

다중회귀 상호작용 시각화 방법 3: ggeffect::ggpredict()

ggpredict()는 predicted value / probability 볼 때 쓰임, 즉 marginal effect

(https://rdrr.io/cran/ggeffects/man/ggpredict.html)

library(ggeffects)
## Warning: 패키지 'ggeffects'는 R 버전 4.1.2에서 작성되었습니다
model_1 <- ggpredict(lm_fit2, terms = c("bill_length_mm ", "body_mass_g"))
head(model_1)
## # Predicted values of bill_depth_mm
## 
## # body_mass_g = 3399.8
## 
## bill_length_mm | Predicted |         95% CI
## -------------------------------------------
##             30 |     17.55 | [16.95, 18.15]
##             35 |     17.78 | [17.38, 18.18]
## 
## # body_mass_g = 4201.8
## 
## bill_length_mm | Predicted |         95% CI
## -------------------------------------------
##             30 |     17.07 | [16.38, 17.76]
##             35 |     17.14 | [16.65, 17.62]
## 
## # body_mass_g = 5003.7
## 
## bill_length_mm | Predicted |         95% CI
## -------------------------------------------
##             30 |     16.59 | [15.37, 17.81]
##             35 |     16.50 | [15.60, 17.39]
ggplot(model_1) +
  aes(x = x, y = predicted, colour = group) +
  geom_point(size = 1.5) +
  geom_smooth() +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = group), alpha = 0.1)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'