library(faraway)
data(swiss)
?swiss
## httpd 도움말 서버를 시작합니다 ... 완료
View(swiss)
dim(swiss)
## [1] 47  6
#결측값 파악해야 함
colSums(is.na(swiss))
##        Fertility      Agriculture      Examination        Education 
##                0                0                0                0 
##         Catholic Infant.Mortality 
##                0                0
library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
glimpse(swiss)
## Rows: 47
## Columns: 6
## $ Fertility        <dbl> 80.2, 83.1, 92.5, 85.8, 76.9, 76.1, 83.8, 92.4, 82.4,~
## $ Agriculture      <dbl> 17.0, 45.1, 39.7, 36.5, 43.5, 35.3, 70.2, 67.8, 53.3,~
## $ Examination      <int> 15, 6, 5, 12, 17, 9, 16, 14, 12, 16, 14, 21, 14, 19, ~
## $ Education        <int> 12, 9, 5, 7, 15, 7, 7, 8, 7, 13, 6, 12, 7, 12, 5, 2, ~
## $ Catholic         <dbl> 9.96, 84.84, 93.40, 33.77, 5.16, 90.57, 92.85, 97.16,~
## $ Infant.Mortality <dbl> 22.2, 22.2, 20.2, 20.3, 20.6, 26.6, 23.6, 24.9, 21.0,~
#1. Numerical(double, integer) characteristics

summary(swiss)
##    Fertility      Agriculture     Examination      Education    
##  Min.   :35.00   Min.   : 1.20   Min.   : 3.00   Min.   : 1.00  
##  1st Qu.:64.70   1st Qu.:35.90   1st Qu.:12.00   1st Qu.: 6.00  
##  Median :70.40   Median :54.10   Median :16.00   Median : 8.00  
##  Mean   :70.14   Mean   :50.66   Mean   :16.49   Mean   :10.98  
##  3rd Qu.:78.45   3rd Qu.:67.65   3rd Qu.:22.00   3rd Qu.:12.00  
##  Max.   :92.50   Max.   :89.70   Max.   :37.00   Max.   :53.00  
##     Catholic       Infant.Mortality
##  Min.   :  2.150   Min.   :10.80   
##  1st Qu.:  5.195   1st Qu.:18.15   
##  Median : 15.140   Median :20.00   
##  Mean   : 41.144   Mean   :19.94   
##  3rd Qu.: 93.125   3rd Qu.:21.70   
##  Max.   :100.000   Max.   :26.60
#상관계수, 변수들간에 선형의 정도와 방향을 확인 기초 통계학
#correlation, cor()
cor(swiss)
##                   Fertility Agriculture Examination   Education   Catholic
## Fertility         1.0000000  0.35307918  -0.6458827 -0.66378886  0.4636847
## Agriculture       0.3530792  1.00000000  -0.6865422 -0.63952252  0.4010951
## Examination      -0.6458827 -0.68654221   1.0000000  0.69841530 -0.5727418
## Education        -0.6637889 -0.63952252   0.6984153  1.00000000 -0.1538589
## Catholic          0.4636847  0.40109505  -0.5727418 -0.15385892  1.0000000
## Infant.Mortality  0.4165560 -0.06085861  -0.1140216 -0.09932185  0.1754959
##                  Infant.Mortality
## Fertility              0.41655603
## Agriculture           -0.06085861
## Examination           -0.11402160
## Education             -0.09932185
## Catholic               0.17549591
## Infant.Mortality       1.00000000
#Graphical characteristics
pairs(swiss, panel = panel.smooth,main = "swiss data")

plot(density(swiss$Fertility),main = "Fertility", xalb="Fertility")
## Warning in plot.window(...): "xalb"는 그래픽 매개변수가 아닙니다
## Warning in plot.xy(xy, type, ...): "xalb"는 그래픽 매개변수가 아닙니다
## Warning in axis(side = side, at = at, labels = labels, ...): "xalb"는 그래픽
## 매개변수가 아닙니다

## Warning in axis(side = side, at = at, labels = labels, ...): "xalb"는 그래픽
## 매개변수가 아닙니다
## Warning in box(...): "xalb"는 그래픽 매개변수가 아닙니다
## Warning in title(...): "xalb"는 그래픽 매개변수가 아닙니다
hist(swiss$Fertility,freq=F,add=1)

qqnorm(swiss$Fertility, ylab="fertility")
qqline(swiss$Fertility)

#2.Variable selection to choose the best model
#We start by fitting a linear regression model

attach(swiss)
bs.out2<-lm(Fertility ~., swiss)
summary(bs.out2)
## 
## Call:
## lm(formula = Fertility ~ ., data = swiss)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.2743  -5.2617   0.5032   4.1198  15.3213 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      66.91518   10.70604   6.250 1.91e-07 ***
## Agriculture      -0.17211    0.07030  -2.448  0.01873 *  
## Examination      -0.25801    0.25388  -1.016  0.31546    
## Education        -0.87094    0.18303  -4.758 2.43e-05 ***
## Catholic          0.10412    0.03526   2.953  0.00519 ** 
## Infant.Mortality  1.07705    0.38172   2.822  0.00734 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.165 on 41 degrees of freedom
## Multiple R-squared:  0.7067, Adjusted R-squared:  0.671 
## F-statistic: 19.76 on 5 and 41 DF,  p-value: 5.594e-10
library(car)
## 필요한 패키지를 로딩중입니다: carData
## 
## 다음의 패키지를 부착합니다: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following objects are masked from 'package:faraway':
## 
##     logit, vif
serg.res1=residuals(bs.out2)
durbinWatsonTest(serg.res1)
## [1] 1.453536