# 20250527
# 회귀분석 개요
# 회귀분석은 다음과 같은 목적으로 사용됩니다.
# 예측: 독립변수를 기반으로 종속변수 값을 에측
# 관계분석: 변수 간 관계의 강도와 방향 파악
# 모델링 : 데이터의 패턴을 수학적으로 표현
# 단순 선형회귀
# 하나의 독립변수(X)와 종속변수(Y)간의 선형관계를 모델링
# 다중 선셩회귀
# 여러 독립변수와 종속변수간의 선형관계를 모델링함
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
rm(list=ls())
ls()
## character(0)
data(mtcars)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
# 단순 선형 회귀 : mpg ~ hp
model <- lm(mpg ~ hp, data = mtcars)
summary(model)
##
## Call:
## lm(formula = mpg ~ hp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.7121 -2.1122 -0.8854 1.5819 8.2360
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.09886 1.63392 18.421 < 2e-16 ***
## hp -0.06823 0.01012 -6.742 1.79e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.863 on 30 degrees of freedom
## Multiple R-squared: 0.6024, Adjusted R-squared: 0.5892
## F-statistic: 45.46 on 1 and 30 DF, p-value: 1.788e-07
# 산점도와 회귀선 시각화
ggplot(mtcars, aes(x = hp, y = mpg)) +
geom_point() + # 산점도
geom_smooth(method = "lm", se = TRUE) + # 회귀선과 신뢰구간
theme_minimal() +
labs(title = "MPG vs Horsepower", x = "Horsepower", y = "Miles per Gallon")
## `geom_smooth()` using formula = 'y ~ x'

# 새로운 데이터로 예측
new_data <- data.frame(hp = c(100, 150, 200))
predict(model, newdata = new_data)
## 1 2 3
## 23.27603 19.86462 16.45320
# 다중 회귀 분석
multi_model <- lm(mpg ~ hp + wt + am, data = mtcars)
summary(multi_model)
##
## Call:
## lm(formula = mpg ~ hp + wt + am, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4221 -1.7924 -0.3788 1.2249 5.5317
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.002875 2.642659 12.867 2.82e-13 ***
## hp -0.037479 0.009605 -3.902 0.000546 ***
## wt -2.878575 0.904971 -3.181 0.003574 **
## am 2.083710 1.376420 1.514 0.141268
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.538 on 28 degrees of freedom
## Multiple R-squared: 0.8399, Adjusted R-squared: 0.8227
## F-statistic: 48.96 on 3 and 28 DF, p-value: 2.908e-11
# 새로운 데이터로 예측
new_multi_data <- data.frame(hp = c(120, 180), wt = c(3.0, 3.5), am = c(0, 1))
predict(multi_model, newdata = new_multi_data)
## 1 2
## 20.8697 19.2654