knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
data<-read_csv("heart.csv")
## New names:
## Rows: 498 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (4): ...1, biking, smoking, heart.disease
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
head(data)
## # A tibble: 6 × 4
## ...1 biking smoking heart.disease
## <dbl> <dbl> <dbl> <dbl>
## 1 1 30.8 10.9 11.8
## 2 2 65.1 2.22 2.85
## 3 3 1.96 17.6 17.2
## 4 4 44.8 2.80 6.82
## 5 5 69.4 16.0 4.06
## 6 6 54.4 29.3 9.55
pairs(data)
## 2. 독립변수와 종속변수는 무엇인가? 각 변수의 타입은 무엇인가? ###
독립변수:biking,
smoking(숫자형,연속형)/종속변수:heart.disease(숫자형,연속형)
knitr::opts_chunk$set(echo = TRUE)
model<-lm(heart.disease~biking+smoking,data=data)
summary(model)
##
## Call:
## lm(formula = heart.disease ~ biking + smoking, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1789 -0.4463 0.0362 0.4422 1.9331
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.984658 0.080137 186.99 <2e-16 ***
## biking -0.200133 0.001366 -146.53 <2e-16 ***
## smoking 0.178334 0.003539 50.39 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.654 on 495 degrees of freedom
## Multiple R-squared: 0.9796, Adjusted R-squared: 0.9795
## F-statistic: 1.19e+04 on 2 and 495 DF, p-value: < 2.2e-16
sapply(data,class)
## ...1 biking smoking heart.disease
## "numeric" "numeric" "numeric" "numeric"
data$smoking <- as.numeric(data$smoking)
data$biking <- as.numeric(data$biking)
data$heart.disease <- as.numeric(data$heart.disease)
model <- lm(heart.disease ~ biking + smoking, data = data)
summary(model)
##
## Call:
## lm(formula = heart.disease ~ biking + smoking, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1789 -0.4463 0.0362 0.4422 1.9331
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.984658 0.080137 186.99 <2e-16 ***
## biking -0.200133 0.001366 -146.53 <2e-16 ***
## smoking 0.178334 0.003539 50.39 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.654 on 495 degrees of freedom
## Multiple R-squared: 0.9796, Adjusted R-squared: 0.9795
## F-statistic: 1.19e+04 on 2 and 495 DF, p-value: < 2.2e-16