heart_data <- read.csv("/Users/myojungkim/Downloads/heart.csv")
head(heart_data)
## X biking smoking heart.disease
## 1 1 30.801246 10.896608 11.769423
## 2 2 65.129215 2.219563 2.854081
## 3 3 1.959665 17.588331 17.177803
## 4 4 44.800196 2.802559 6.816647
## 5 5 69.428454 15.974505 4.062224
## 6 6 54.403626 29.333176 9.550046
if(!is.null(heart_data$HeartDisease)) {
heart_data$HeartDisease <- as.factor(heart_data$HeartDisease)
}
str(heart_data)
## 'data.frame': 498 obs. of 4 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ biking : num 30.8 65.13 1.96 44.8 69.43 ...
## $ smoking : num 10.9 2.22 17.59 2.8 15.97 ...
## $ heart.disease: num 11.77 2.85 17.18 6.82 4.06 ...
# 선형 회귀 모델 생성
lm_model <- lm(heart.disease ~ biking + smoking, data = heart_data)
# 회귀 모델 요약 출력
summary(lm_model)
##
## Call:
## lm(formula = heart.disease ~ biking + smoking, data = heart_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1789 -0.4463 0.0362 0.4422 1.9331
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.984658 0.080137 186.99 <2e-16 ***
## biking -0.200133 0.001366 -146.53 <2e-16 ***
## smoking 0.178334 0.003539 50.39 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.654 on 495 degrees of freedom
## Multiple R-squared: 0.9796, Adjusted R-squared: 0.9795
## F-statistic: 1.19e+04 on 2 and 495 DF, p-value: < 2.2e-16