library(faraway)
data(swiss)
?swiss
## httpd 도움말 서버를 시작합니다 ... 완료
View(swiss)
dim(swiss)
## [1] 47 6
#결측값 파악해야 함
colSums(is.na(swiss))
## Fertility Agriculture Examination Education
## 0 0 0 0
## Catholic Infant.Mortality
## 0 0
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
glimpse(swiss)
## Rows: 47
## Columns: 6
## $ Fertility <dbl> 80.2, 83.1, 92.5, 85.8, 76.9, 76.1, 83.8, 92.4, 82.4,~
## $ Agriculture <dbl> 17.0, 45.1, 39.7, 36.5, 43.5, 35.3, 70.2, 67.8, 53.3,~
## $ Examination <int> 15, 6, 5, 12, 17, 9, 16, 14, 12, 16, 14, 21, 14, 19, ~
## $ Education <int> 12, 9, 5, 7, 15, 7, 7, 8, 7, 13, 6, 12, 7, 12, 5, 2, ~
## $ Catholic <dbl> 9.96, 84.84, 93.40, 33.77, 5.16, 90.57, 92.85, 97.16,~
## $ Infant.Mortality <dbl> 22.2, 22.2, 20.2, 20.3, 20.6, 26.6, 23.6, 24.9, 21.0,~
#1. Numerical(double, integer) characteristics
summary(swiss)
## Fertility Agriculture Examination Education
## Min. :35.00 Min. : 1.20 Min. : 3.00 Min. : 1.00
## 1st Qu.:64.70 1st Qu.:35.90 1st Qu.:12.00 1st Qu.: 6.00
## Median :70.40 Median :54.10 Median :16.00 Median : 8.00
## Mean :70.14 Mean :50.66 Mean :16.49 Mean :10.98
## 3rd Qu.:78.45 3rd Qu.:67.65 3rd Qu.:22.00 3rd Qu.:12.00
## Max. :92.50 Max. :89.70 Max. :37.00 Max. :53.00
## Catholic Infant.Mortality
## Min. : 2.150 Min. :10.80
## 1st Qu.: 5.195 1st Qu.:18.15
## Median : 15.140 Median :20.00
## Mean : 41.144 Mean :19.94
## 3rd Qu.: 93.125 3rd Qu.:21.70
## Max. :100.000 Max. :26.60
#상관계수, 변수들간에 선형의 정도와 방향을 확인 기초 통계학
#correlation, cor()
cor(swiss)
## Fertility Agriculture Examination Education Catholic
## Fertility 1.0000000 0.35307918 -0.6458827 -0.66378886 0.4636847
## Agriculture 0.3530792 1.00000000 -0.6865422 -0.63952252 0.4010951
## Examination -0.6458827 -0.68654221 1.0000000 0.69841530 -0.5727418
## Education -0.6637889 -0.63952252 0.6984153 1.00000000 -0.1538589
## Catholic 0.4636847 0.40109505 -0.5727418 -0.15385892 1.0000000
## Infant.Mortality 0.4165560 -0.06085861 -0.1140216 -0.09932185 0.1754959
## Infant.Mortality
## Fertility 0.41655603
## Agriculture -0.06085861
## Examination -0.11402160
## Education -0.09932185
## Catholic 0.17549591
## Infant.Mortality 1.00000000
#Graphical characteristics
pairs(swiss, panel = panel.smooth,main = "swiss data")

plot(density(swiss$Fertility),main = "Fertility", xalb="Fertility")
## Warning in plot.window(...): "xalb"는 그래픽 매개변수가 아닙니다
## Warning in plot.xy(xy, type, ...): "xalb"는 그래픽 매개변수가 아닙니다
## Warning in axis(side = side, at = at, labels = labels, ...): "xalb"는 그래픽
## 매개변수가 아닙니다
## Warning in axis(side = side, at = at, labels = labels, ...): "xalb"는 그래픽
## 매개변수가 아닙니다
## Warning in box(...): "xalb"는 그래픽 매개변수가 아닙니다
## Warning in title(...): "xalb"는 그래픽 매개변수가 아닙니다
hist(swiss$Fertility,freq=F,add=1)

qqnorm(swiss$Fertility, ylab="fertility")
qqline(swiss$Fertility)

#2.Variable selection to choose the best model
#We start by fitting a linear regression model
attach(swiss)
bs.out2<-lm(Fertility ~., swiss)
summary(bs.out2)
##
## Call:
## lm(formula = Fertility ~ ., data = swiss)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.2743 -5.2617 0.5032 4.1198 15.3213
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 66.91518 10.70604 6.250 1.91e-07 ***
## Agriculture -0.17211 0.07030 -2.448 0.01873 *
## Examination -0.25801 0.25388 -1.016 0.31546
## Education -0.87094 0.18303 -4.758 2.43e-05 ***
## Catholic 0.10412 0.03526 2.953 0.00519 **
## Infant.Mortality 1.07705 0.38172 2.822 0.00734 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.165 on 41 degrees of freedom
## Multiple R-squared: 0.7067, Adjusted R-squared: 0.671
## F-statistic: 19.76 on 5 and 41 DF, p-value: 5.594e-10
library(car)
## 필요한 패키지를 로딩중입니다: carData
##
## 다음의 패키지를 부착합니다: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following objects are masked from 'package:faraway':
##
## logit, vif
serg.res1=residuals(bs.out2)
durbinWatsonTest(serg.res1)
## [1] 1.453536