한정민-바보!.knit

library(car)

## Warning: 패키지 'car'는 R 버전 4.2.3에서 작성되었습니다

## 필요한 패키지를 로딩중입니다: carData

## Warning: 패키지 'carData'는 R 버전 4.2.3에서 작성되었습니다

library(caret)

## Warning: 패키지 'caret'는 R 버전 4.2.3에서 작성되었습니다

## 필요한 패키지를 로딩중입니다: ggplot2

## Warning: 패키지 'ggplot2'는 R 버전 4.2.3에서 작성되었습니다

## 필요한 패키지를 로딩중입니다: lattice

library(dplyr)

## Warning: 패키지 'dplyr'는 R 버전 4.2.3에서 작성되었습니다

## 
## 다음의 패키지를 부착합니다: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ISLR)

## Warning: 패키지 'ISLR'는 R 버전 4.2.3에서 작성되었습니다

getwd()

## [1] "C:/Users/cic/Desktop"

setwd("C:/Users/cic/Desktop")
rm(list=ls())

df <- read.csv("Data1.csv")
bs.out2 <- lm(Happiness~BM,data=df)
sreg.res1 <- residuals(bs.out2)
sreg.res1 %>% head(1)

##         1 
## 0.3413448

shapiro.test(sreg.res1)

## 
##  Shapiro-Wilk normality test
## 
## data:  sreg.res1
## W = 0.99439, p-value = 1.148e-06

options(scipen=999)
shapiro.test(sreg.res1)

## 
##  Shapiro-Wilk normality test
## 
## data:  sreg.res1
## W = 0.99439, p-value = 0.000001148

options(scripen=-999)
shapiro.test(sreg.res1)

## 
##  Shapiro-Wilk normality test
## 
## data:  sreg.res1
## W = 0.99439, p-value = 0.000001148

summary(bs.out2)

## 
## Call:
## lm(formula = Happiness ~ BM, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1591 -0.4577  0.0418  0.4409  1.9386 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)  2.06599    0.05777   35.77 <0.0000000000000002 ***
## BM           0.49771    0.01878   26.50 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6404 on 1923 degrees of freedom
## Multiple R-squared:  0.2675, Adjusted R-squared:  0.2671 
## F-statistic: 702.2 on 1 and 1923 DF,  p-value: < 0.00000000000000022

plot(bs.out2)

par(mflow=c(2,2))

## Warning in par(mflow = c(2, 2)): "mflow"는 그래픽 매개변수가 아닙니다

bs.out3 <- lm(Happiness~BM+BF,data=df)
summary(bs.out3)

## 
## Call:
## lm(formula = Happiness ~ BM + BF, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.23134 -0.40553  0.02014  0.41352  1.86210 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)  1.60995    0.06412   25.11 <0.0000000000000002 ***
## BM           0.29054    0.02331   12.47 <0.0000000000000002 ***
## BF           0.33817    0.02435   13.89 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6106 on 1922 degrees of freedom
## Multiple R-squared:  0.3343, Adjusted R-squared:  0.3336 
## F-statistic: 482.6 on 2 and 1922 DF,  p-value: < 0.00000000000000022

vif(bs.out3)

##       BM       BF 
## 1.693504 1.693504

set.seed(1)
idx <- createDataPartition(df$Happiness,p=0.8,list=F)
train <- df[idx,]
test <- df[-idx,]
glimpse(train)

## Rows: 1,541
## Columns: 26
## $ Q1        <int> 4, 4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 5, 4, …
## $ Q2        <int> 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 2, 2, 4, 2, 2, 3, 2, 5, 4, …
## $ Q3        <int> 2, 4, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 3, 2, 3, 3, 2, 5, 4, …
## $ Q4        <int> 3, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 2, 4, 2, 2, 4, 4, 1, 5, 4, …
## $ Q5        <int> 4, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 4, 4, 3, 1, 2, 2, 1, 5, 3, …
## $ Q6        <int> 2, 3, 4, 4, 4, 4, 1, 2, 2, 2, 4, 4, 2, 2, 1, 4, 2, 1, 5, 4, …
## $ Q7        <int> 2, 2, 4, 4, 4, 4, 3, 4, 4, 4, 5, 4, 4, 3, 4, 4, 4, 2, 5, 5, …
## $ Q8        <int> 4, 4, 4, 4, 4, 5, 2, 2, 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 5, 3, …
## $ Q9        <int> 4, 4, 4, 2, 4, 5, 3, 4, 4, 4, 2, 2, 2, 4, 2, 4, 4, 4, 5, 3, …
## $ Q10       <int> 4, 4, 4, 4, 4, 5, 2, 4, 2, 4, 4, 4, 4, 3, 2, 3, 3, 3, 5, 2, …
## $ Q11       <int> 4, 4, 4, 4, 4, 5, 4, 4, 4, 3, 4, 4, 5, 4, 3, 3, 4, 3, 5, 3, …
## $ Q12       <int> 4, 4, 4, 4, 4, 5, 3, 4, 4, 3, 4, 3, 5, 4, 4, 2, 3, 4, 5, 4, …
## $ Q13       <int> 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 2, 4, 5, 4, 4, 4, 3, 2, 5, 4, …
## $ Q14       <int> 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 3, 4, 5, 4, 4, 4, 4, 4, 5, 3, …
## $ Q15       <int> 4, 4, 4, 4, 4, 2, 3, 4, 4, 3, 1, 4, 5, 4, 4, 4, 3, 3, 4, 2, …
## $ Q16       <int> 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 4, …
## $ Q17       <int> 4, 3, 4, 4, 4, 2, 4, 4, 4, 4, 3, 2, 4, 4, 3, 4, 2, 4, 4, 5, …
## $ Q18       <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 2, …
## $ Q19       <int> 4, 2, 4, 4, 4, 2, 4, 2, 4, 4, 1, 4, 5, 4, 2, 3, 3, 3, 4, 1, …
## $ Q20       <int> 4, 1, 4, 4, 4, 2, 4, 2, 4, 4, 4, 2, 5, 4, 2, 4, 3, 3, 5, 4, …
## $ Gender1   <int> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, …
## $ EDU1      <int> 1, 1, 1, 2, 1, 1, 4, 3, 2, 1, 1, 3, 1, 1, 1, 4, 3, 2, 1, 2, …
## $ BF        <dbl> 3.4, 4.0, 4.2, 4.0, 4.0, 3.6, 3.6, 3.2, 4.0, 3.2, 4.0, 3.2, …
## $ BM        <dbl> 3.2, 3.4, 4.0, 3.6, 4.0, 4.6, 2.2, 3.2, 3.2, 3.6, 3.8, 3.6, …
## $ Happiness <dbl> 4.0, 4.0, 4.0, 4.0, 4.0, 4.4, 3.8, 4.0, 4.0, 3.4, 2.8, 3.8, …
## $ Peace     <dbl> 4.0, 2.8, 4.0, 4.0, 4.0, 2.4, 4.0, 3.2, 4.0, 3.9, 3.2, 3.2, …

fit <- lm(Happiness~BM+BF+Peace,data=train)
summary(fit)

## 
## Call:
## lm(formula = Happiness ~ BM + BF + Peace, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.81440 -0.32378 -0.00204  0.33429  2.19548 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)  0.53856    0.08371   6.434       0.000000000166 ***
## BM           0.21065    0.02347   8.975 < 0.0000000000000002 ***
## BF           0.22207    0.02421   9.173 < 0.0000000000000002 ***
## Peace        0.47033    0.02268  20.740 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5409 on 1537 degrees of freedom
## Multiple R-squared:  0.4691, Adjusted R-squared:  0.4681 
## F-statistic: 452.7 on 3 and 1537 DF,  p-value: < 0.00000000000000022

#Happiness=0.513888+0.230878*BM+0.24636*BF+0.45747+peace

lm_p <- predict(fit,newdata=test)
lm_p %>% head(1)

##        3 
## 3.883613

round(predict(fit,newdata=test),1)

##    3    7   15   16   39   41   44   45   46   47   48   55   56   60   63   65 
##  3.9  4.1  4.1  4.6  3.9  3.9  2.3  4.2  3.9  3.9  2.0  3.8  3.6  4.0  4.1  3.8 
##   68   69   72   73   76   83   84   86   89   90   96  103  111  115  128  132 
##  4.2  3.3  4.2  3.8  3.7  3.8  4.8  3.6  4.0  3.5  3.4  3.9  4.0  3.5  3.8  4.1 
##  137  139  141  142  154  159  160  170  183  187  188  196  197  201  205  212 
##  3.9  3.2  3.7  3.8  3.0  4.2  4.0  3.6  4.1  4.2  3.7  4.4  3.9  4.0  3.7  3.2 
##  223  224  232  233  245  249  251  253  256  258  261  265  275  278  279  285 
##  4.0  4.1  3.4  3.3  3.4  3.4  3.5  2.2  4.1  3.6  3.8  4.1  3.2  2.3  3.4  3.3 
##  300  303  307  314  319  323  330  332  335  341  345  356  373  375  379  382 
##  3.7  3.9  3.9  3.8  3.4  3.6  3.9  4.0  3.7  3.9  3.9  3.3  3.2  3.3  3.6  3.0 
##  383  389  393  397  399  400  408  417  420  421  435  446  447  448  449  459 
##  3.8  3.9  3.9  3.8  4.1  3.7  3.8  4.0  4.3  3.5  3.5  3.1  3.7  3.0  3.8  3.7 
##  484  492  494  500  506  507  509  510  513  514  516  519  523  535  542  561 
##  3.8  4.1  3.4  3.3  3.7  3.8  3.5  3.7  3.6  3.0  4.5  5.0  4.0  3.8  3.5  3.0 
##  563  567  571  574  576  581  583  591  593  595  602  608  620  622  624  626 
##  3.3  3.6  3.1  3.2  3.2  4.5  3.7  3.5  3.8  3.9  4.5  2.9  3.4  3.5  4.3  3.4 
##  629  634  647  657  659  668  670  675  684  685  688  691  696  699  702  706 
##  4.0  3.6  3.5  2.9  2.5  2.6  2.4  3.1  3.5  4.3  3.5  3.4  3.2  3.1  3.5  2.8 
##  710  712  721  727  731  736  741  742  746  748  756  758  764  775  779  793 
##  3.9  4.2  2.5  2.2  2.5  4.4  4.5  2.9  3.8  4.0  3.5  3.5  3.1  3.2  2.2  3.8 
##  799  802  813  819  825  828  831  841  851  852  856  871  875  877  881  886 
##  3.6  4.7  3.7  3.4  4.0  2.0  4.0  3.4  2.8  2.8  2.7  4.0  3.2  3.4  3.4  4.0 
##  887  888  892  904  905  913  914  915  917  919  927  932  935  943  955  962 
##  2.4  4.0  2.7  3.4  3.7  3.4  3.5  3.2  4.0  3.9  3.7  3.5  3.0  2.7  4.1  3.5 
##  963  967  969  971  972  980  983  991  993  995  998 1003 1006 1007 1008 1014 
##  4.0  2.5  3.3  3.4  3.5  3.9  2.7  4.0  3.4  3.7  3.3  4.1  3.0  4.5  3.1  2.5 
## 1037 1042 1047 1053 1067 1070 1081 1090 1091 1093 1101 1102 1105 1115 1118 1119 
##  4.1  3.6  3.9  3.8  3.4  3.0  2.9  3.9  3.3  3.7  3.7  3.2  4.1  2.6  3.2  4.0 
## 1125 1127 1130 1136 1137 1138 1139 1141 1146 1150 1159 1160 1164 1166 1172 1173 
##  4.2  3.1  3.7  3.6  3.8  3.3  3.0  3.1  3.5  3.5  3.0  3.9  3.0  4.1  2.5  3.0 
## 1179 1181 1183 1187 1189 1191 1202 1208 1213 1219 1225 1232 1247 1253 1265 1266 
##  3.3  3.3  4.4  2.8  4.0  3.5  3.2  3.5  3.0  3.1  3.2  4.4  3.3  3.5  3.9  3.5 
## 1270 1276 1279 1287 1295 1298 1300 1302 1303 1304 1305 1313 1314 1316 1317 1320 
##  4.2  4.2  4.2  3.9  3.6  3.9  3.7  3.6  3.3  3.8  4.2  3.4  4.0  4.1  3.5  3.7 
## 1322 1329 1335 1343 1344 1356 1365 1367 1374 1376 1387 1388 1394 1395 1397 1409 
##  2.7  2.9  3.2  2.8  3.9  3.9  3.5  2.9  3.3  3.9  3.1  3.1  3.8  4.2  3.4  3.2 
## 1414 1419 1422 1423 1424 1426 1428 1433 1435 1450 1455 1460 1461 1463 1481 1496 
##  4.0  3.2  3.1  4.2  3.8  3.4  3.3  3.0  3.4  3.8  3.6  3.2  3.2  3.6  3.8  3.3 
## 1499 1500 1502 1505 1507 1508 1516 1519 1529 1535 1536 1541 1550 1553 1562 1568 
##  3.9  3.4  3.4  3.6  3.9  3.5  3.3  3.1  3.9  4.0  4.3  3.4  3.6  3.2  3.7  4.0 
## 1572 1596 1599 1600 1603 1611 1612 1615 1617 1636 1651 1652 1654 1656 1668 1675 
##  3.0  3.9  4.2  4.1  3.8  3.9  3.4  3.4  3.1  3.3  2.8  3.0  2.8  3.6  3.9  3.4 
## 1683 1687 1690 1694 1695 1696 1700 1708 1717 1719 1720 1724 1727 1730 1733 1734 
##  3.5  3.4  2.9  3.5  3.4  3.0  2.9  3.4  3.5  3.2  3.5  2.7  2.7  4.1  3.4  4.1 
## 1740 1743 1753 1755 1764 1774 1775 1776 1781 1787 1793 1795 1800 1807 1816 1824 
##  4.0  3.9  4.7  4.3  4.2  3.5  4.6  3.6  4.4  3.4  3.5  3.8  3.7  3.7  3.1  4.2 
## 1826 1839 1842 1851 1856 1857 1863 1874 1887 1892 1893 1895 1899 1902 1908 1909 
##  3.5  3.6  3.4  3.3  3.2  2.6  2.5  3.5  3.5  3.5  3.1  2.4  2.4  3.3  3.2  3.6

test$Happiness1 <- round(predict(fit,newdata=test),1)
View(test)

fit1 <- lm(Happiness~BM+BF+Peace,data=train)
mean((test$Happiness-test$Happiness1)^2)

## [1] 0.3188021

mean((test$Happiness-test$Happiness2)^2)

## [1] NaN

predict(fit,newdata=test) %>% head(1)

##        3 
## 3.883613

lm_p <- predict(fit1,newdata=test)  
round(predict(fit1,newdata=test),1)

##    3    7   15   16   39   41   44   45   46   47   48   55   56   60   63   65 
##  3.9  4.1  4.1  4.6  3.9  3.9  2.3  4.2  3.9  3.9  2.0  3.8  3.6  4.0  4.1  3.8 
##   68   69   72   73   76   83   84   86   89   90   96  103  111  115  128  132 
##  4.2  3.3  4.2  3.8  3.7  3.8  4.8  3.6  4.0  3.5  3.4  3.9  4.0  3.5  3.8  4.1 
##  137  139  141  142  154  159  160  170  183  187  188  196  197  201  205  212 
##  3.9  3.2  3.7  3.8  3.0  4.2  4.0  3.6  4.1  4.2  3.7  4.4  3.9  4.0  3.7  3.2 
##  223  224  232  233  245  249  251  253  256  258  261  265  275  278  279  285 
##  4.0  4.1  3.4  3.3  3.4  3.4  3.5  2.2  4.1  3.6  3.8  4.1  3.2  2.3  3.4  3.3 
##  300  303  307  314  319  323  330  332  335  341  345  356  373  375  379  382 
##  3.7  3.9  3.9  3.8  3.4  3.6  3.9  4.0  3.7  3.9  3.9  3.3  3.2  3.3  3.6  3.0 
##  383  389  393  397  399  400  408  417  420  421  435  446  447  448  449  459 
##  3.8  3.9  3.9  3.8  4.1  3.7  3.8  4.0  4.3  3.5  3.5  3.1  3.7  3.0  3.8  3.7 
##  484  492  494  500  506  507  509  510  513  514  516  519  523  535  542  561 
##  3.8  4.1  3.4  3.3  3.7  3.8  3.5  3.7  3.6  3.0  4.5  5.0  4.0  3.8  3.5  3.0 
##  563  567  571  574  576  581  583  591  593  595  602  608  620  622  624  626 
##  3.3  3.6  3.1  3.2  3.2  4.5  3.7  3.5  3.8  3.9  4.5  2.9  3.4  3.5  4.3  3.4 
##  629  634  647  657  659  668  670  675  684  685  688  691  696  699  702  706 
##  4.0  3.6  3.5  2.9  2.5  2.6  2.4  3.1  3.5  4.3  3.5  3.4  3.2  3.1  3.5  2.8 
##  710  712  721  727  731  736  741  742  746  748  756  758  764  775  779  793 
##  3.9  4.2  2.5  2.2  2.5  4.4  4.5  2.9  3.8  4.0  3.5  3.5  3.1  3.2  2.2  3.8 
##  799  802  813  819  825  828  831  841  851  852  856  871  875  877  881  886 
##  3.6  4.7  3.7  3.4  4.0  2.0  4.0  3.4  2.8  2.8  2.7  4.0  3.2  3.4  3.4  4.0 
##  887  888  892  904  905  913  914  915  917  919  927  932  935  943  955  962 
##  2.4  4.0  2.7  3.4  3.7  3.4  3.5  3.2  4.0  3.9  3.7  3.5  3.0  2.7  4.1  3.5 
##  963  967  969  971  972  980  983  991  993  995  998 1003 1006 1007 1008 1014 
##  4.0  2.5  3.3  3.4  3.5  3.9  2.7  4.0  3.4  3.7  3.3  4.1  3.0  4.5  3.1  2.5 
## 1037 1042 1047 1053 1067 1070 1081 1090 1091 1093 1101 1102 1105 1115 1118 1119 
##  4.1  3.6  3.9  3.8  3.4  3.0  2.9  3.9  3.3  3.7  3.7  3.2  4.1  2.6  3.2  4.0 
## 1125 1127 1130 1136 1137 1138 1139 1141 1146 1150 1159 1160 1164 1166 1172 1173 
##  4.2  3.1  3.7  3.6  3.8  3.3  3.0  3.1  3.5  3.5  3.0  3.9  3.0  4.1  2.5  3.0 
## 1179 1181 1183 1187 1189 1191 1202 1208 1213 1219 1225 1232 1247 1253 1265 1266 
##  3.3  3.3  4.4  2.8  4.0  3.5  3.2  3.5  3.0  3.1  3.2  4.4  3.3  3.5  3.9  3.5 
## 1270 1276 1279 1287 1295 1298 1300 1302 1303 1304 1305 1313 1314 1316 1317 1320 
##  4.2  4.2  4.2  3.9  3.6  3.9  3.7  3.6  3.3  3.8  4.2  3.4  4.0  4.1  3.5  3.7 
## 1322 1329 1335 1343 1344 1356 1365 1367 1374 1376 1387 1388 1394 1395 1397 1409 
##  2.7  2.9  3.2  2.8  3.9  3.9  3.5  2.9  3.3  3.9  3.1  3.1  3.8  4.2  3.4  3.2 
## 1414 1419 1422 1423 1424 1426 1428 1433 1435 1450 1455 1460 1461 1463 1481 1496 
##  4.0  3.2  3.1  4.2  3.8  3.4  3.3  3.0  3.4  3.8  3.6  3.2  3.2  3.6  3.8  3.3 
## 1499 1500 1502 1505 1507 1508 1516 1519 1529 1535 1536 1541 1550 1553 1562 1568 
##  3.9  3.4  3.4  3.6  3.9  3.5  3.3  3.1  3.9  4.0  4.3  3.4  3.6  3.2  3.7  4.0 
## 1572 1596 1599 1600 1603 1611 1612 1615 1617 1636 1651 1652 1654 1656 1668 1675 
##  3.0  3.9  4.2  4.1  3.8  3.9  3.4  3.4  3.1  3.3  2.8  3.0  2.8  3.6  3.9  3.4 
## 1683 1687 1690 1694 1695 1696 1700 1708 1717 1719 1720 1724 1727 1730 1733 1734 
##  3.5  3.4  2.9  3.5  3.4  3.0  2.9  3.4  3.5  3.2  3.5  2.7  2.7  4.1  3.4  4.1 
## 1740 1743 1753 1755 1764 1774 1775 1776 1781 1787 1793 1795 1800 1807 1816 1824 
##  4.0  3.9  4.7  4.3  4.2  3.5  4.6  3.6  4.4  3.4  3.5  3.8  3.7  3.7  3.1  4.2 
## 1826 1839 1842 1851 1856 1857 1863 1874 1887 1892 1893 1895 1899 1902 1908 1909 
##  3.5  3.6  3.4  3.3  3.2  2.6  2.5  3.5  3.5  3.5  3.1  2.4  2.4  3.3  3.2  3.6

test$Happiness2 <- round(predict(fit1,newdata=test),1)

x1 <- c(7,1,11,11,7,11,3,1,2,21,1,11,10)
x2 <- c(26,29,56,31,52,55,71,31,54,47,40,66,68)
x3 <- c(6,15,8,8,6,9,17,22,18,4,23,9,8)
x4 <- c(60,52,20,47,33,22,6,44,22,26,34,12,12)
y <- c(78.5,74.3,104.3,87.6,95.9,109.,102.7,72.5,93.1,115.9,83.8,113.3,109.4)
df <- data.frame(x1,x2,x3,x4,y)
step(lm(y~1,df),scope=list(lower=~1,upper=~x1+x2+x3+x4),direction="forward")

## Start:  AIC=71.42
## y ~ 1
## 
##        Df Sum of Sq     RSS    AIC
## + x4    1   1829.53  880.75 58.806
## + x2    1   1807.27  903.02 59.130
## + x1    1   1447.43 1262.86 63.490
## + x3    1    774.97 1935.32 69.040
## <none>              2710.29 71.418
## 
## Step:  AIC=58.81
## y ~ x4
## 
##        Df Sum of Sq    RSS    AIC
## + x1    1    807.47  73.29 28.483
## + x3    1    706.84 173.91 39.717
## <none>              880.75 58.806
## + x2    1     15.04 865.72 60.582
## 
## Step:  AIC=28.48
## y ~ x4 + x1
## 
##        Df Sum of Sq    RSS    AIC
## + x2    1    26.841 46.449 24.554
## + x3    1    23.929 49.361 25.345
## <none>              73.289 28.483
## 
## Step:  AIC=24.55
## y ~ x4 + x1 + x2
## 
##        Df Sum of Sq    RSS    AIC
## <none>              46.449 24.554
## + x3    1   0.12025 46.328 26.520

## 
## Call:
## lm(formula = y ~ x4 + x1 + x2, data = df)
## 
## Coefficients:
## (Intercept)           x4           x1           x2  
##     71.6032      -0.2358       1.4505       0.4165

#y~1
#y~4
#y~x4+x1
#y~x4+x1+x2

data("attitude")
glimpse(attitude)

## Rows: 30
## Columns: 7
## $ rating     <dbl> 43, 63, 71, 61, 81, 43, 58, 71, 72, 67, 64, 67, 69, 68, 77,…
## $ complaints <dbl> 51, 64, 70, 63, 78, 55, 67, 75, 82, 61, 53, 60, 62, 83, 77,…
## $ privileges <dbl> 30, 51, 68, 45, 56, 49, 42, 50, 72, 45, 53, 47, 57, 83, 54,…
## $ learning   <dbl> 39, 54, 69, 47, 66, 44, 56, 55, 67, 47, 58, 39, 42, 45, 72,…
## $ raises     <dbl> 61, 63, 76, 54, 71, 54, 66, 70, 71, 62, 58, 59, 55, 59, 79,…
## $ critical   <dbl> 92, 73, 86, 84, 83, 49, 68, 66, 83, 80, 67, 74, 63, 77, 77,…
## $ advance    <dbl> 45, 47, 48, 35, 47, 34, 35, 41, 31, 41, 34, 41, 25, 35, 46,…

step(lm(rating~.,data=attitude),direction="backward")

## Start:  AIC=123.36
## rating ~ complaints + privileges + learning + raises + critical + 
##     advance
## 
##              Df Sum of Sq    RSS    AIC
## - critical    1      3.41 1152.4 121.45
## - raises      1      6.80 1155.8 121.54
## - privileges  1     14.47 1163.5 121.74
## - advance     1     74.11 1223.1 123.24
## <none>                    1149.0 123.36
## - learning    1    180.50 1329.5 125.74
## - complaints  1    724.80 1873.8 136.04
## 
## Step:  AIC=121.45
## rating ~ complaints + privileges + learning + raises + advance
## 
##              Df Sum of Sq    RSS    AIC
## - raises      1     10.61 1163.0 119.73
## - privileges  1     14.16 1166.6 119.82
## - advance     1     71.27 1223.7 121.25
## <none>                    1152.4 121.45
## - learning    1    177.74 1330.1 123.75
## - complaints  1    724.70 1877.1 134.09
## 
## Step:  AIC=119.73
## rating ~ complaints + privileges + learning + advance
## 
##              Df Sum of Sq    RSS    AIC
## - privileges  1     16.10 1179.1 118.14
## - advance     1     61.60 1224.6 119.28
## <none>                    1163.0 119.73
## - learning    1    197.03 1360.0 122.42
## - complaints  1   1165.94 2328.9 138.56
## 
## Step:  AIC=118.14
## rating ~ complaints + learning + advance
## 
##              Df Sum of Sq    RSS    AIC
## - advance     1     75.54 1254.7 118.00
## <none>                    1179.1 118.14
## - learning    1    186.12 1365.2 120.54
## - complaints  1   1259.91 2439.0 137.94
## 
## Step:  AIC=118
## rating ~ complaints + learning
## 
##              Df Sum of Sq    RSS    AIC
## <none>                    1254.7 118.00
## - learning    1    114.73 1369.4 118.63
## - complaints  1   1370.91 2625.6 138.16

## 
## Call:
## lm(formula = rating ~ complaints + learning, data = attitude)
## 
## Coefficients:
## (Intercept)   complaints     learning  
##      9.8709       0.6435       0.2112

step(lm(rating~.,data=attitude),direction="forward")

## Start:  AIC=123.36
## rating ~ complaints + privileges + learning + raises + critical + 
##     advance

## 
## Call:
## lm(formula = rating ~ complaints + privileges + learning + raises + 
##     critical + advance, data = attitude)
## 
## Coefficients:
## (Intercept)   complaints   privileges     learning       raises     critical  
##    10.78708      0.61319     -0.07305      0.32033      0.08173      0.03838  
##     advance  
##    -0.21706

step(lm(rating~.,data=attitude),direction="both")

## Start:  AIC=123.36
## rating ~ complaints + privileges + learning + raises + critical + 
##     advance
## 
##              Df Sum of Sq    RSS    AIC
## - critical    1      3.41 1152.4 121.45
## - raises      1      6.80 1155.8 121.54
## - privileges  1     14.47 1163.5 121.74
## - advance     1     74.11 1223.1 123.24
## <none>                    1149.0 123.36
## - learning    1    180.50 1329.5 125.74
## - complaints  1    724.80 1873.8 136.04
## 
## Step:  AIC=121.45
## rating ~ complaints + privileges + learning + raises + advance
## 
##              Df Sum of Sq    RSS    AIC
## - raises      1     10.61 1163.0 119.73
## - privileges  1     14.16 1166.6 119.82
## - advance     1     71.27 1223.7 121.25
## <none>                    1152.4 121.45
## + critical    1      3.41 1149.0 123.36
## - learning    1    177.74 1330.1 123.75
## - complaints  1    724.70 1877.1 134.09
## 
## Step:  AIC=119.73
## rating ~ complaints + privileges + learning + advance
## 
##              Df Sum of Sq    RSS    AIC
## - privileges  1     16.10 1179.1 118.14
## - advance     1     61.60 1224.6 119.28
## <none>                    1163.0 119.73
## + raises      1     10.61 1152.4 121.45
## + critical    1      7.21 1155.8 121.54
## - learning    1    197.03 1360.0 122.42
## - complaints  1   1165.94 2328.9 138.56
## 
## Step:  AIC=118.14
## rating ~ complaints + learning + advance
## 
##              Df Sum of Sq    RSS    AIC
## - advance     1     75.54 1254.7 118.00
## <none>                    1179.1 118.14
## + privileges  1     16.10 1163.0 119.73
## + raises      1     12.54 1166.6 119.82
## + critical    1      7.18 1171.9 119.96
## - learning    1    186.12 1365.2 120.54
## - complaints  1   1259.91 2439.0 137.94
## 
## Step:  AIC=118
## rating ~ complaints + learning
## 
##              Df Sum of Sq    RSS    AIC
## <none>                    1254.7 118.00
## + advance     1     75.54 1179.1 118.14
## - learning    1    114.73 1369.4 118.63
## + privileges  1     30.03 1224.6 119.28
## + raises      1      1.19 1253.5 119.97
## + critical    1      0.00 1254.7 120.00
## - complaints  1   1370.91 2625.6 138.16

## 
## Call:
## lm(formula = rating ~ complaints + learning, data = attitude)
## 
## Coefficients:
## (Intercept)   complaints     learning  
##      9.8709       0.6435       0.2112

df <- read.csv("Data1.csv")
table(df$Gender1)

## 
##    0    1 
## 1136  789

lmfit <- lm(Happiness~Gender1,data=df)
summary(lmfit)

## 
## Call:
## lm(formula = Happiness ~ Gender1, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1659 -0.5199  0.0801  0.4801  1.4801 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)  3.56593    0.02219 160.711 <0.0000000000000002 ***
## Gender1     -0.04603    0.03466  -1.328               0.184    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7479 on 1923 degrees of freedom
## Multiple R-squared:  0.0009166,  Adjusted R-squared:  0.0003971 
## F-statistic: 1.764 on 1 and 1923 DF,  p-value: 0.1843

df$EDU1 <- factor(df$EDU1)
df$GENDER1 <- factor(df$Gender1)

lmfit1 <- lm(Happiness~Gender1,data=df)
summary(lmfit1)

## 
## Call:
## lm(formula = Happiness ~ Gender1, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1659 -0.5199  0.0801  0.4801  1.4801 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)  3.56593    0.02219 160.711 <0.0000000000000002 ***
## Gender1     -0.04603    0.03466  -1.328               0.184    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7479 on 1923 degrees of freedom
## Multiple R-squared:  0.0009166,  Adjusted R-squared:  0.0003971 
## F-statistic: 1.764 on 1 and 1923 DF,  p-value: 0.1843

lmfit <- lm(Happiness~EDU1,data=df)
summary(lmfit)

## 
## Call:
## lm(formula = Happiness ~ EDU1, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1512 -0.4754  0.1246  0.4488  1.5246 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)  3.58670    0.04893  73.303 <0.0000000000000002 ***
## EDU12       -0.11127    0.05980  -1.861              0.0629 .  
## EDU13       -0.03552    0.05422  -0.655              0.5125    
## EDU14        0.06330    0.07219   0.877              0.3806    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7469 on 1921 degrees of freedom
## Multiple R-squared:  0.004555,   Adjusted R-squared:  0.003001 
## F-statistic:  2.93 on 3 and 1921 DF,  p-value: 0.03247

table(df$EDU1)

## 
##    1    2    3    4 
##  233  472 1022  198

#1 중졸 ㅣ 2고졸 ㅣ 3대졸 ㅣ4대학원들

bs.out2 <- lm(Happiness~BM,data=df)
summary(bs.out2)

## 
## Call:
## lm(formula = Happiness ~ BM, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1591 -0.4577  0.0418  0.4409  1.9386 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)  2.06599    0.05777   35.77 <0.0000000000000002 ***
## BM           0.49771    0.01878   26.50 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6404 on 1923 degrees of freedom
## Multiple R-squared:  0.2675, Adjusted R-squared:  0.2671 
## F-statistic: 702.2 on 1 and 1923 DF,  p-value: < 0.00000000000000022

library(datasets)
data("USArrests")
head(USArrests)

##            Murder Assault UrbanPop Rape
## Alabama      13.2     236       58 21.2
## Alaska       10.0     263       48 44.5
## Arizona       8.1     294       80 31.0
## Arkansas      8.8     190       50 19.5
## California    9.0     276       91 40.6
## Colorado      7.9     204       78 38.7

fit <- prcomp(USArrests,scale=T)
summary(fit)

## Importance of components:
##                           PC1    PC2     PC3     PC4
## Standard deviation     1.5749 0.9949 0.59713 0.41645
## Proportion of Variance 0.6201 0.2474 0.08914 0.04336
## Cumulative Proportion  0.6201 0.8675 0.95664 1.00000

Nile

## Time Series:
## Start = 1871 
## End = 1970 
## Frequency = 1 
##   [1] 1120 1160  963 1210 1160 1160  813 1230 1370 1140  995  935 1110  994 1020
##  [16]  960 1180  799  958 1140 1100 1210 1150 1250 1260 1220 1030 1100  774  840
##  [31]  874  694  940  833  701  916  692 1020 1050  969  831  726  456  824  702
##  [46] 1120 1100  832  764  821  768  845  864  862  698  845  744  796 1040  759
##  [61]  781  865  845  944  984  897  822 1010  771  676  649  846  812  742  801
##  [76] 1040  860  874  848  890  744  749  838 1050  918  986  797  923  975  815
##  [91] 1020  906  901 1170  912  746  919  718  714  740

plot(Nile)

Nile.diff1 <- diff(Nile,differences=1)
plot(Nile.diff1)

Nile.diff2 <- diff(Nile,differences=2)
plot(Nile.diff2)