7월 20일 실습

#2023.07.20
getwd()

## [1] "C:/data"

rm(list = ls())
setwd('c:/data')
library(dplyr)

## 
## 다음의 패키지를 부착합니다: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(caret)

## 필요한 패키지를 로딩중입니다: ggplot2

## 필요한 패키지를 로딩중입니다: lattice

# 단순선형회귀분석
# BM(건강한 자기 관리)
# HAPINESS(행복도)
# 건강한 자기 관리를 잘할수록 행복도 증가하는가??

df<-read.csv("Data1.csv")
glimpse(df)

## Rows: 1,925
## Columns: 26
## $ Q1        <int> 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
## $ Q2        <int> 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 2, 2, …
## $ Q3        <int> 2, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 4, 4, 3, 2, 3, …
## $ Q4        <int> 3, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 2, 4, 4, 4, 2, 2, 4, …
## $ Q5        <int> 4, 4, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 4, 4, 4, 4, 3, 1, 2, …
## $ Q6        <int> 2, 3, 4, 4, 4, 4, 4, 4, 1, 2, 2, 2, 4, 4, 3, 5, 2, 2, 1, 4, …
## $ Q7        <int> 2, 2, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 5, 4, 4, 5, 4, 3, 4, 4, …
## $ Q8        <int> 4, 4, 4, 4, 4, 4, 5, 5, 2, 2, 4, 4, 4, 4, 3, 5, 4, 2, 4, 4, …
## $ Q9        <int> 4, 4, 4, 4, 2, 4, 5, 5, 3, 4, 4, 4, 2, 2, 4, 5, 2, 4, 2, 4, …
## $ Q10       <int> 4, 4, 2, 4, 4, 4, 5, 5, 2, 4, 2, 4, 4, 4, 3, 4, 4, 3, 2, 3, …
## $ Q11       <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 3, 4, 4, 4, 4, 5, 4, 3, 3, …
## $ Q12       <int> 4, 4, 4, 4, 4, 4, 5, 5, 3, 4, 4, 3, 4, 3, 3, 4, 5, 4, 4, 2, …
## $ Q13       <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 4, 2, 4, 4, 4, 5, 4, 4, 4, …
## $ Q14       <int> 4, 4, 4, 4, 4, 4, 5, 5, 5, 4, 4, 4, 3, 4, 5, 4, 5, 4, 4, 4, …
## $ Q15       <int> 4, 4, 3, 4, 4, 4, 4, 2, 3, 4, 4, 3, 1, 4, 4, 4, 5, 4, 4, 4, …
## $ Q16       <int> 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, 4, 4, 4, 5, 4, 5, 4, 4, 4, …
## $ Q17       <int> 4, 3, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 3, 2, 4, 5, 4, 4, 3, 4, …
## $ Q18       <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, …
## $ Q19       <int> 4, 2, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 1, 4, 4, 4, 5, 4, 2, 3, …
## $ Q20       <int> 4, 1, 3, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 2, 4, 5, 5, 4, 2, 4, …
## $ Gender1   <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, …
## $ EDU1      <int> 1, 1, 2, 1, 2, 1, 1, 1, 4, 3, 2, 1, 1, 3, 3, 2, 1, 1, 1, 4, …
## $ BF        <dbl> 3.4, 4.0, 3.6, 4.2, 4.0, 4.0, 3.6, 3.6, 3.6, 3.2, 4.0, 3.2, …
## $ BM        <dbl> 3.2, 3.4, 3.6, 4.0, 3.6, 4.0, 4.6, 4.6, 2.2, 3.2, 3.2, 3.6, …
## $ Happiness <dbl> 4.0, 4.0, 3.8, 4.0, 4.0, 4.0, 4.8, 4.4, 3.8, 4.0, 4.0, 3.4, …
## $ Peace     <dbl> 4.0, 2.8, 3.8, 4.0, 4.0, 4.0, 3.8, 2.4, 4.0, 3.2, 4.0, 3.9, …

# 회귀분석은 동간척도 이상어어야 함
# 독립변수 범주형이라도 회구분석이 가능하다.=더미변수
# 종속변수가 이전데이터(0,1)경우 로자스틱 회구분석이라 함

bs.out2<-lm(Happiness~BM,data=df)
summary(bs.out2)

## 
## Call:
## lm(formula = Happiness ~ BM, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1591 -0.4577  0.0418  0.4409  1.9386 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.06599    0.05777   35.77   <2e-16 ***
## BM           0.49771    0.01878   26.50   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6404 on 1923 degrees of freedom
## Multiple R-squared:  0.2675, Adjusted R-squared:  0.2671 
## F-statistic: 702.2 on 1 and 1923 DF,  p-value: < 2.2e-16

# 건강한 자기 관리가 '1'증가할 경우 행복은 0.498증가함

# 더빈왓슨 경청
# 더빈왓슨 통계량은 0~4사이값을 갖을 수 있음.
# 0에 가까울수록 -> 양의 상관관계
# 4에 가까울수록->음의 상관관계
# 2에 가까울수록 -> 오차항의 자기 상관이 없음

#오차항 : 모집단을 알 수 없음. 실제 관측값과 모회귀선의 차이
#잔차항 : 표본을 통해 표본 회귀선을 만들고 그때 관측값 차이를
  #잔차라고 하고, 잔차를 통해서 오차항의 가정조건 성립을 확인하게 된다.
  #이를 잔차 분석이라고 한다.

#hAPPINESS=2.06+0.497*BM, 모델링이라 할 수 있음

library(car)

## 필요한 패키지를 로딩중입니다: carData

## 
## 다음의 패키지를 부착합니다: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

sreq.res1<-residuals(bs.out2)
durbinWatsonTest(sreq.res1)

## [1] 1.787942

par(mfrow=c(2,2))
plot(bs.out2)

#전자의 등분산성을 입증하기 위해서는 산점도에서
#예측값(fitted value)의 변화에  관계없이 전차(residuals)가 
#분포하는 모습이 일정하여야 한다



#정규성 경정(normality )
#shapiro-wilk test(사피로 월크 검정)
shapiro.test(sreq.res1)

## 
##  Shapiro-Wilk normality test
## 
## data:  sreq.res1
## W = 0.99439, p-value = 1.148e-06

#귀무가설 : 정규분포이다.
#대립갑설 : 정규분포 아니다.
#유의확률(p-value)< 유의수준(0.05) 이므로 귀무가설 기각.
#따라서 정규분포가 아니다.

#happiness 정규성 검정을 한다.

#아라비아 숫자로 표현 하려면,
options(scipen = 999)
shapiro.test(sreq.res1)

## 
##  Shapiro-Wilk normality test
## 
## data:  sreq.res1
## W = 0.99439, p-value = 0.000001148

#위의 전차분석 결과 등분산성과 정규성을 안촉하지 못함.
#정규성을 안측하지 못하는 경우 박스콕스 변수 변환을 적용할 수 있다.

bs.out3<-lm(Happiness~BM+BF,data = df)
summary(bs.out3)

## 
## Call:
## lm(formula = Happiness ~ BM + BF, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.23134 -0.40553  0.02014  0.41352  1.86210 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)  1.60995    0.06412   25.11 <0.0000000000000002 ***
## BM           0.29054    0.02331   12.47 <0.0000000000000002 ***
## BF           0.33817    0.02435   13.89 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6106 on 1922 degrees of freedom
## Multiple R-squared:  0.3343, Adjusted R-squared:  0.3336 
## F-statistic: 482.6 on 2 and 1922 DF,  p-value: < 0.00000000000000022

library(car)
vif(bs.out3)

##       BM       BF 
## 1.693504 1.693504

#vif 값이 모두 10보다 작으므로 두 변수는 서로 상관이 높지 않댜옹

library(caret)
idx<-createDataPartition(df$Happiness,p=0.8,list = FALSE)
train<-df[idx,]
test<-df[-idx,]
library(dplyr)
glimpse(train)

## Rows: 1,541
## Columns: 26
## $ Q1        <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 5, …
## $ Q2        <int> 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 2, 3, 2, 5, …
## $ Q3        <int> 2, 4, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 4, 4, 3, 2, 3, 2, 5, …
## $ Q4        <int> 3, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 2, 4, 4, 4, 2, 2, 4, 1, 5, …
## $ Q5        <int> 4, 4, 2, 4, 4, 4, 2, 4, 4, 2, 4, 4, 4, 4, 4, 3, 1, 2, 1, 5, …
## $ Q6        <int> 2, 3, 4, 4, 4, 4, 1, 2, 2, 2, 4, 4, 3, 5, 2, 2, 1, 2, 1, 5, …
## $ Q7        <int> 2, 2, 4, 4, 4, 4, 3, 4, 4, 4, 5, 4, 4, 5, 4, 3, 4, 4, 2, 5, …
## $ Q8        <int> 4, 4, 4, 4, 5, 5, 2, 2, 4, 4, 4, 4, 3, 5, 4, 2, 4, 4, 4, 5, …
## $ Q9        <int> 4, 4, 4, 2, 5, 5, 3, 4, 4, 4, 2, 2, 4, 5, 2, 4, 2, 4, 4, 5, …
## $ Q10       <int> 4, 4, 2, 4, 5, 5, 2, 4, 2, 4, 4, 4, 3, 4, 4, 3, 2, 3, 3, 5, …
## $ Q11       <int> 4, 4, 4, 4, 5, 5, 4, 4, 4, 3, 4, 4, 4, 4, 5, 4, 3, 4, 3, 5, …
## $ Q12       <int> 4, 4, 4, 4, 5, 5, 3, 4, 4, 3, 4, 3, 3, 4, 5, 4, 4, 3, 4, 5, …
## $ Q13       <int> 4, 4, 4, 4, 5, 5, 4, 4, 4, 4, 2, 4, 4, 4, 5, 4, 4, 3, 2, 5, …
## $ Q14       <int> 4, 4, 4, 4, 5, 5, 5, 4, 4, 4, 3, 4, 5, 4, 5, 4, 4, 4, 4, 5, …
## $ Q15       <int> 4, 4, 3, 4, 4, 2, 3, 4, 4, 3, 1, 4, 4, 4, 5, 4, 4, 3, 3, 4, …
## $ Q16       <int> 4, 4, 4, 4, 5, 2, 4, 4, 4, 4, 4, 4, 5, 4, 5, 4, 4, 4, 4, 4, …
## $ Q17       <int> 4, 3, 4, 4, 2, 2, 4, 4, 4, 4, 3, 2, 4, 5, 4, 4, 3, 2, 4, 4, …
## $ Q18       <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, 4, 4, …
## $ Q19       <int> 4, 2, 4, 4, 4, 2, 4, 2, 4, 4, 1, 4, 4, 4, 5, 4, 2, 3, 3, 4, …
## $ Q20       <int> 4, 1, 3, 4, 4, 2, 4, 2, 4, 4, 4, 2, 4, 5, 5, 4, 2, 3, 3, 5, …
## $ Gender1   <int> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, …
## $ EDU1      <int> 1, 1, 2, 2, 1, 1, 4, 3, 2, 1, 1, 3, 3, 2, 1, 1, 1, 3, 2, 1, …
## $ BF        <dbl> 3.4, 4.0, 3.6, 4.0, 3.6, 3.6, 3.6, 3.2, 4.0, 3.2, 4.0, 3.2, …
## $ BM        <dbl> 3.2, 3.4, 3.6, 3.6, 4.6, 4.6, 2.2, 3.2, 3.2, 3.6, 3.8, 3.6, …
## $ Happiness <dbl> 4.0, 4.0, 3.8, 4.0, 4.8, 4.4, 3.8, 4.0, 4.0, 3.4, 2.8, 3.8, …
## $ Peace     <dbl> 4.0, 2.8, 3.8, 4.0, 3.8, 2.4, 4.0, 3.2, 4.0, 3.9, 3.2, 3.2, …

#linear regrssion model1
fit<-lm(Happiness~BM+BF+Peace, data = train)
summary(fit)

## 
## Call:
## lm(formula = Happiness ~ BM + BF + Peace, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.89485 -0.33053  0.00184  0.34587  2.18890 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)  0.51632    0.08649   5.970        0.00000000295 ***
## BM           0.22029    0.02369   9.298 < 0.0000000000000002 ***
## BF           0.25288    0.02458  10.289 < 0.0000000000000002 ***
## Peace        0.44082    0.02328  18.935 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5493 on 1537 degrees of freedom
## Multiple R-squared:  0.4608, Adjusted R-squared:  0.4597 
## F-statistic: 437.8 on 3 and 1537 DF,  p-value: < 0.00000000000000022

#

predict(fit,newdata = test)

##        4        6       20       26       29       34       38       40 
## 4.222858 4.172282 3.787181 3.738026 4.229376 3.731508 4.121706 3.731458 
##       42       44       48       67       71       72       73       84 
## 3.819623 2.344300 1.972285 3.932440 4.128225 4.172282 3.813204 4.865986 
##       88       89       92      100      101      102      105      109 
## 3.446187 4.040110 3.315435 3.938908 4.084117 3.641972 4.040060 3.793799 
##      110      117      122      123      134      148      151      152 
## 3.705584 4.203353 4.954101 3.441139 4.405656 3.263439 3.819623 2.652803 
##      167      170      173      175      179      180      184      203 
## 3.680982 3.599285 3.951995 4.222858 4.260397 3.692548 3.863755 3.591645 
##      204      208      215      221      226      227      230      231 
## 3.756110 2.551701 3.938958 3.705534 4.020555 4.121706 3.333420 3.661427 
##      233      234      237      249      254      257      258      259 
## 3.276425 2.445452 3.313965 3.377477 3.813179 4.172282 3.566893 3.320433 
##      260      263      268      269      270      271      281      297 
## 3.573362 3.806785 3.599285 3.496862 4.392670 3.837757 3.850843 3.384195 
##      302      304      306      307      309      314      317      322 
## 4.128225 3.674513 3.951995 3.881864 3.402179 3.787231 4.064662 3.384045 
##      325      332      335      336      337      342      353      362 
## 3.434621 3.956993 3.667945 4.216340 2.432415 3.654909 3.996052 3.579830 
##      365      367      375      379      396      401      405      407 
## 2.961455 3.856040 3.264909 3.503381 1.619626 3.277846 2.344300 4.007618 
##      425      426      447      449      453      455      464      474 
## 4.084117 3.553757 3.655058 3.743123 3.289462 3.932490 4.172282 3.604482 
##      476      480      496      497      498      519      522      524 
## 3.945377 3.756185 3.516367 3.207765 3.996052 4.998158 4.172282 3.932540 
##      536      547      549      553      554      562      572      576 
## 2.986007 3.441089 3.327051 2.445402 2.350818 4.090636 3.220702 3.258241 
##      593      598      605      613      616      617      622      625 
## 3.736655 3.454026 4.733663 3.586298 3.780862 4.121706 3.522836 3.340038 
##      627      629      635      638      639      655      658      660 
## 3.661427 4.027073 3.340038 3.101566 4.020555 3.269907 2.778557 2.533716 
##      662      667      670      671      676      682      688      692 
## 2.665839 2.483091 2.438934 2.659321 4.040060 3.573362 3.441239 3.824969 
##      699      701      704      710      711      719      721      723 
## 3.100195 4.399188 4.071280 3.908037 3.813304 3.875446 2.483041 2.621831 
##      735      741      746      753      761      776      777      790 
## 3.630456 4.493721 3.762678 3.654959 4.229376 2.212128 2.205609 2.910879 
##      792      800      804      810      813      816      823      837 
## 4.172382 3.964931 3.062656 4.128225 3.674613 2.350818 3.491765 3.062631 
##      842      844      848      852      857      869      878      888 
## 3.617419 3.628935 2.476473 2.754004 2.672357 3.252022 3.976497 4.040010 
##      897      899      901      903      906      909      910      912 
## 2.287256 2.533567 3.598064 3.402130 3.845745 3.529354 3.150821 4.310973 
##      918      953      958      959      961      964      971      980 
## 3.736804 2.829232 3.610901 4.247560 3.119800 3.674463 3.454026 3.951895 
##      982      986      988      991      995      998     1009     1015 
## 2.709946 3.359493 3.157239 3.983165 3.622567 3.258391 3.390563 3.183163 
##     1029     1049     1063     1067     1080     1091     1092     1110 
## 4.077649 4.172282 3.692597 3.371108 3.617519 3.283043 3.674613 3.718571 
##     1111     1122     1128     1130     1133     1134     1135     1138 
## 3.057459 2.532246 2.773559 3.724989 3.384195 2.659371 3.100096 3.296030 
##     1145     1147     1149     1155     1160     1171     1177     1197 
## 3.390563 3.295980 3.214284 3.377527 3.951945 3.667945 3.712152 3.125969 
##     1206     1210     1213     1214     1215     1220     1227     1234 
## 3.113132 3.535773 2.974491 3.384145 3.434721 3.467162 3.093677 3.586249 
##     1236     1237     1244     1247     1249     1250     1251     1252 
## 3.125969 4.040110 3.258291 3.276425 3.421634 3.371059 2.967923 3.075593 
##     1254     1255     1268     1271     1272     1281     1283     1287 
## 4.216390 3.289462 4.172282 2.873240 4.172282 4.108770 3.119750 3.888432 
##     1301     1302     1305     1312     1321     1322     1333     1342 
## 3.661477 3.529404 4.266916 3.150721 4.172282 2.753954 3.850743 3.284364 
##     1349     1352     1353     1357     1359     1364     1366     1377 
## 3.509849 3.756110 3.225949 3.459273 4.291568 3.824870 3.441189 3.542191 
##     1383     1387     1393     1398     1402     1410     1415     1416 
## 4.027073 3.183113 3.730236 3.919453 3.699066 3.333569 4.077649 3.756160 
##     1422     1427     1431     1432     1434     1435     1436     1437 
## 3.132637 3.516367 3.679661 3.478728 3.592817 3.353024 4.254029 3.535773 
##     1442     1447     1448     1449     1452     1454     1458     1460 
## 3.769097 2.344300 2.344300 4.040110 4.134743 3.176645 3.125969 3.258291 
##     1468     1470     1477     1481     1482     1484     1485     1488 
## 4.121906 3.573362 3.907788 3.749741 3.850843 2.798111 3.327051 3.447558 
##     1492     1493     1496     1499     1506     1511     1513     1516 
## 3.591446 3.837856 3.283043 3.894901 3.062556 3.201347 2.974391 3.296030 
##     1520     1522     1526     1527     1528     1533     1537     1540 
## 3.976497 2.993946 3.176744 2.709897 3.769147 3.238836 3.163658 4.348762 
##     1547     1549     1550     1552     1553     1565     1566     1567 
## 3.139155 3.661576 3.610951 3.692597 3.170176 3.233689 3.547488 4.051825 
##     1568     1569     1574     1587     1593     1594     1600     1602 
## 4.040159 4.247460 3.926021 4.134643 3.315385 2.167970 4.077749 3.555277 
##     1603     1606     1612     1619     1620     1627     1629     1634 
## 3.736705 3.057409 3.390563 3.844325 3.756210 3.844375 3.025017 3.163758 
##     1637     1644     1646     1647     1658     1660     1672     1674 
## 3.163758 3.813204 2.923865 4.342144 3.068975 3.068975 2.956207 3.100195 
##     1675     1679     1680     1685     1695     1697     1699     1707 
## 3.454076 3.723718 4.910043 3.101516 3.384095 3.681081 3.295831 3.824820 
##     1710     1714     1715     1724     1729     1730     1739     1741 
## 3.163658 3.982966 4.059565 2.690442 3.214284 4.077599 4.518473 3.396932 
##     1743     1745     1750     1753     1755     1760     1763     1769 
## 3.868877 3.170176 3.850843 4.683138 4.348512 2.956207 3.245354 3.100096 
##     1776     1779     1781     1782     1792     1796     1802     1806 
## 3.560425 2.949839 4.430259 3.447608 3.031485 3.295980 3.863830 4.178800 
##     1812     1817     1819     1822     1847     1849     1856     1858 
## 3.692747 2.236780 3.738026 2.742538 2.810998 4.632562 3.201297 3.302349 
##     1864     1878     1883     1888     1893     1900     1901     1921 
## 3.498134 3.093677 4.707640 3.661527 3.126119 3.945377 4.386151 2.293724

lm_p<-predict(fit,newdata = test)
round(predict(fit, newdata = test),1)

##    4    6   20   26   29   34   38   40   42   44   48   67   71   72   73   84 
##  4.2  4.2  3.8  3.7  4.2  3.7  4.1  3.7  3.8  2.3  2.0  3.9  4.1  4.2  3.8  4.9 
##   88   89   92  100  101  102  105  109  110  117  122  123  134  148  151  152 
##  3.4  4.0  3.3  3.9  4.1  3.6  4.0  3.8  3.7  4.2  5.0  3.4  4.4  3.3  3.8  2.7 
##  167  170  173  175  179  180  184  203  204  208  215  221  226  227  230  231 
##  3.7  3.6  4.0  4.2  4.3  3.7  3.9  3.6  3.8  2.6  3.9  3.7  4.0  4.1  3.3  3.7 
##  233  234  237  249  254  257  258  259  260  263  268  269  270  271  281  297 
##  3.3  2.4  3.3  3.4  3.8  4.2  3.6  3.3  3.6  3.8  3.6  3.5  4.4  3.8  3.9  3.4 
##  302  304  306  307  309  314  317  322  325  332  335  336  337  342  353  362 
##  4.1  3.7  4.0  3.9  3.4  3.8  4.1  3.4  3.4  4.0  3.7  4.2  2.4  3.7  4.0  3.6 
##  365  367  375  379  396  401  405  407  425  426  447  449  453  455  464  474 
##  3.0  3.9  3.3  3.5  1.6  3.3  2.3  4.0  4.1  3.6  3.7  3.7  3.3  3.9  4.2  3.6 
##  476  480  496  497  498  519  522  524  536  547  549  553  554  562  572  576 
##  3.9  3.8  3.5  3.2  4.0  5.0  4.2  3.9  3.0  3.4  3.3  2.4  2.4  4.1  3.2  3.3 
##  593  598  605  613  616  617  622  625  627  629  635  638  639  655  658  660 
##  3.7  3.5  4.7  3.6  3.8  4.1  3.5  3.3  3.7  4.0  3.3  3.1  4.0  3.3  2.8  2.5 
##  662  667  670  671  676  682  688  692  699  701  704  710  711  719  721  723 
##  2.7  2.5  2.4  2.7  4.0  3.6  3.4  3.8  3.1  4.4  4.1  3.9  3.8  3.9  2.5  2.6 
##  735  741  746  753  761  776  777  790  792  800  804  810  813  816  823  837 
##  3.6  4.5  3.8  3.7  4.2  2.2  2.2  2.9  4.2  4.0  3.1  4.1  3.7  2.4  3.5  3.1 
##  842  844  848  852  857  869  878  888  897  899  901  903  906  909  910  912 
##  3.6  3.6  2.5  2.8  2.7  3.3  4.0  4.0  2.3  2.5  3.6  3.4  3.8  3.5  3.2  4.3 
##  918  953  958  959  961  964  971  980  982  986  988  991  995  998 1009 1015 
##  3.7  2.8  3.6  4.2  3.1  3.7  3.5  4.0  2.7  3.4  3.2  4.0  3.6  3.3  3.4  3.2 
## 1029 1049 1063 1067 1080 1091 1092 1110 1111 1122 1128 1130 1133 1134 1135 1138 
##  4.1  4.2  3.7  3.4  3.6  3.3  3.7  3.7  3.1  2.5  2.8  3.7  3.4  2.7  3.1  3.3 
## 1145 1147 1149 1155 1160 1171 1177 1197 1206 1210 1213 1214 1215 1220 1227 1234 
##  3.4  3.3  3.2  3.4  4.0  3.7  3.7  3.1  3.1  3.5  3.0  3.4  3.4  3.5  3.1  3.6 
## 1236 1237 1244 1247 1249 1250 1251 1252 1254 1255 1268 1271 1272 1281 1283 1287 
##  3.1  4.0  3.3  3.3  3.4  3.4  3.0  3.1  4.2  3.3  4.2  2.9  4.2  4.1  3.1  3.9 
## 1301 1302 1305 1312 1321 1322 1333 1342 1349 1352 1353 1357 1359 1364 1366 1377 
##  3.7  3.5  4.3  3.2  4.2  2.8  3.9  3.3  3.5  3.8  3.2  3.5  4.3  3.8  3.4  3.5 
## 1383 1387 1393 1398 1402 1410 1415 1416 1422 1427 1431 1432 1434 1435 1436 1437 
##  4.0  3.2  3.7  3.9  3.7  3.3  4.1  3.8  3.1  3.5  3.7  3.5  3.6  3.4  4.3  3.5 
## 1442 1447 1448 1449 1452 1454 1458 1460 1468 1470 1477 1481 1482 1484 1485 1488 
##  3.8  2.3  2.3  4.0  4.1  3.2  3.1  3.3  4.1  3.6  3.9  3.7  3.9  2.8  3.3  3.4 
## 1492 1493 1496 1499 1506 1511 1513 1516 1520 1522 1526 1527 1528 1533 1537 1540 
##  3.6  3.8  3.3  3.9  3.1  3.2  3.0  3.3  4.0  3.0  3.2  2.7  3.8  3.2  3.2  4.3 
## 1547 1549 1550 1552 1553 1565 1566 1567 1568 1569 1574 1587 1593 1594 1600 1602 
##  3.1  3.7  3.6  3.7  3.2  3.2  3.5  4.1  4.0  4.2  3.9  4.1  3.3  2.2  4.1  3.6 
## 1603 1606 1612 1619 1620 1627 1629 1634 1637 1644 1646 1647 1658 1660 1672 1674 
##  3.7  3.1  3.4  3.8  3.8  3.8  3.0  3.2  3.2  3.8  2.9  4.3  3.1  3.1  3.0  3.1 
## 1675 1679 1680 1685 1695 1697 1699 1707 1710 1714 1715 1724 1729 1730 1739 1741 
##  3.5  3.7  4.9  3.1  3.4  3.7  3.3  3.8  3.2  4.0  4.1  2.7  3.2  4.1  4.5  3.4 
## 1743 1745 1750 1753 1755 1760 1763 1769 1776 1779 1781 1782 1792 1796 1802 1806 
##  3.9  3.2  3.9  4.7  4.3  3.0  3.2  3.1  3.6  2.9  4.4  3.4  3.0  3.3  3.9  4.2 
## 1812 1817 1819 1822 1847 1849 1856 1858 1864 1878 1883 1888 1893 1900 1901 1921 
##  3.7  2.2  3.7  2.7  2.8  4.6  3.2  3.3  3.5  3.1  4.7  3.7  3.1  3.9  4.4  2.3

test$Happiness1<-round(predict(fit,newdata = test),1)
View(test)

#MSE

mean((test$Happiness-test$Happiness1)^2)

## [1] 0.280625

mean((test$Happiness-test$Happiness2)^2)

## [1] NaN

#linear regrssion mode
fit1<-lm(Happiness~BM+BF, data = train)
summary(fit1)

## 
## Call:
## lm(formula = Happiness ~ BM + BF, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.22802 -0.40371  0.01684  0.42408  1.86981 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)  1.60196    0.07189   22.28 <0.0000000000000002 ***
## BM           0.29812    0.02590   11.51 <0.0000000000000002 ***
## BF           0.33285    0.02688   12.38 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6099 on 1538 degrees of freedom
## Multiple R-squared:  0.335,  Adjusted R-squared:  0.3341 
## F-statistic: 387.4 on 2 and 1538 DF,  p-value: < 0.00000000000000022

predict(fit, newdata = test)

##        4        6       20       26       29       34       38       40 
## 4.222858 4.172282 3.787181 3.738026 4.229376 3.731508 4.121706 3.731458 
##       42       44       48       67       71       72       73       84 
## 3.819623 2.344300 1.972285 3.932440 4.128225 4.172282 3.813204 4.865986 
##       88       89       92      100      101      102      105      109 
## 3.446187 4.040110 3.315435 3.938908 4.084117 3.641972 4.040060 3.793799 
##      110      117      122      123      134      148      151      152 
## 3.705584 4.203353 4.954101 3.441139 4.405656 3.263439 3.819623 2.652803 
##      167      170      173      175      179      180      184      203 
## 3.680982 3.599285 3.951995 4.222858 4.260397 3.692548 3.863755 3.591645 
##      204      208      215      221      226      227      230      231 
## 3.756110 2.551701 3.938958 3.705534 4.020555 4.121706 3.333420 3.661427 
##      233      234      237      249      254      257      258      259 
## 3.276425 2.445452 3.313965 3.377477 3.813179 4.172282 3.566893 3.320433 
##      260      263      268      269      270      271      281      297 
## 3.573362 3.806785 3.599285 3.496862 4.392670 3.837757 3.850843 3.384195 
##      302      304      306      307      309      314      317      322 
## 4.128225 3.674513 3.951995 3.881864 3.402179 3.787231 4.064662 3.384045 
##      325      332      335      336      337      342      353      362 
## 3.434621 3.956993 3.667945 4.216340 2.432415 3.654909 3.996052 3.579830 
##      365      367      375      379      396      401      405      407 
## 2.961455 3.856040 3.264909 3.503381 1.619626 3.277846 2.344300 4.007618 
##      425      426      447      449      453      455      464      474 
## 4.084117 3.553757 3.655058 3.743123 3.289462 3.932490 4.172282 3.604482 
##      476      480      496      497      498      519      522      524 
## 3.945377 3.756185 3.516367 3.207765 3.996052 4.998158 4.172282 3.932540 
##      536      547      549      553      554      562      572      576 
## 2.986007 3.441089 3.327051 2.445402 2.350818 4.090636 3.220702 3.258241 
##      593      598      605      613      616      617      622      625 
## 3.736655 3.454026 4.733663 3.586298 3.780862 4.121706 3.522836 3.340038 
##      627      629      635      638      639      655      658      660 
## 3.661427 4.027073 3.340038 3.101566 4.020555 3.269907 2.778557 2.533716 
##      662      667      670      671      676      682      688      692 
## 2.665839 2.483091 2.438934 2.659321 4.040060 3.573362 3.441239 3.824969 
##      699      701      704      710      711      719      721      723 
## 3.100195 4.399188 4.071280 3.908037 3.813304 3.875446 2.483041 2.621831 
##      735      741      746      753      761      776      777      790 
## 3.630456 4.493721 3.762678 3.654959 4.229376 2.212128 2.205609 2.910879 
##      792      800      804      810      813      816      823      837 
## 4.172382 3.964931 3.062656 4.128225 3.674613 2.350818 3.491765 3.062631 
##      842      844      848      852      857      869      878      888 
## 3.617419 3.628935 2.476473 2.754004 2.672357 3.252022 3.976497 4.040010 
##      897      899      901      903      906      909      910      912 
## 2.287256 2.533567 3.598064 3.402130 3.845745 3.529354 3.150821 4.310973 
##      918      953      958      959      961      964      971      980 
## 3.736804 2.829232 3.610901 4.247560 3.119800 3.674463 3.454026 3.951895 
##      982      986      988      991      995      998     1009     1015 
## 2.709946 3.359493 3.157239 3.983165 3.622567 3.258391 3.390563 3.183163 
##     1029     1049     1063     1067     1080     1091     1092     1110 
## 4.077649 4.172282 3.692597 3.371108 3.617519 3.283043 3.674613 3.718571 
##     1111     1122     1128     1130     1133     1134     1135     1138 
## 3.057459 2.532246 2.773559 3.724989 3.384195 2.659371 3.100096 3.296030 
##     1145     1147     1149     1155     1160     1171     1177     1197 
## 3.390563 3.295980 3.214284 3.377527 3.951945 3.667945 3.712152 3.125969 
##     1206     1210     1213     1214     1215     1220     1227     1234 
## 3.113132 3.535773 2.974491 3.384145 3.434721 3.467162 3.093677 3.586249 
##     1236     1237     1244     1247     1249     1250     1251     1252 
## 3.125969 4.040110 3.258291 3.276425 3.421634 3.371059 2.967923 3.075593 
##     1254     1255     1268     1271     1272     1281     1283     1287 
## 4.216390 3.289462 4.172282 2.873240 4.172282 4.108770 3.119750 3.888432 
##     1301     1302     1305     1312     1321     1322     1333     1342 
## 3.661477 3.529404 4.266916 3.150721 4.172282 2.753954 3.850743 3.284364 
##     1349     1352     1353     1357     1359     1364     1366     1377 
## 3.509849 3.756110 3.225949 3.459273 4.291568 3.824870 3.441189 3.542191 
##     1383     1387     1393     1398     1402     1410     1415     1416 
## 4.027073 3.183113 3.730236 3.919453 3.699066 3.333569 4.077649 3.756160 
##     1422     1427     1431     1432     1434     1435     1436     1437 
## 3.132637 3.516367 3.679661 3.478728 3.592817 3.353024 4.254029 3.535773 
##     1442     1447     1448     1449     1452     1454     1458     1460 
## 3.769097 2.344300 2.344300 4.040110 4.134743 3.176645 3.125969 3.258291 
##     1468     1470     1477     1481     1482     1484     1485     1488 
## 4.121906 3.573362 3.907788 3.749741 3.850843 2.798111 3.327051 3.447558 
##     1492     1493     1496     1499     1506     1511     1513     1516 
## 3.591446 3.837856 3.283043 3.894901 3.062556 3.201347 2.974391 3.296030 
##     1520     1522     1526     1527     1528     1533     1537     1540 
## 3.976497 2.993946 3.176744 2.709897 3.769147 3.238836 3.163658 4.348762 
##     1547     1549     1550     1552     1553     1565     1566     1567 
## 3.139155 3.661576 3.610951 3.692597 3.170176 3.233689 3.547488 4.051825 
##     1568     1569     1574     1587     1593     1594     1600     1602 
## 4.040159 4.247460 3.926021 4.134643 3.315385 2.167970 4.077749 3.555277 
##     1603     1606     1612     1619     1620     1627     1629     1634 
## 3.736705 3.057409 3.390563 3.844325 3.756210 3.844375 3.025017 3.163758 
##     1637     1644     1646     1647     1658     1660     1672     1674 
## 3.163758 3.813204 2.923865 4.342144 3.068975 3.068975 2.956207 3.100195 
##     1675     1679     1680     1685     1695     1697     1699     1707 
## 3.454076 3.723718 4.910043 3.101516 3.384095 3.681081 3.295831 3.824820 
##     1710     1714     1715     1724     1729     1730     1739     1741 
## 3.163658 3.982966 4.059565 2.690442 3.214284 4.077599 4.518473 3.396932 
##     1743     1745     1750     1753     1755     1760     1763     1769 
## 3.868877 3.170176 3.850843 4.683138 4.348512 2.956207 3.245354 3.100096 
##     1776     1779     1781     1782     1792     1796     1802     1806 
## 3.560425 2.949839 4.430259 3.447608 3.031485 3.295980 3.863830 4.178800 
##     1812     1817     1819     1822     1847     1849     1856     1858 
## 3.692747 2.236780 3.738026 2.742538 2.810998 4.632562 3.201297 3.302349 
##     1864     1878     1883     1888     1893     1900     1901     1921 
## 3.498134 3.093677 4.707640 3.661527 3.126119 3.945377 4.386151 2.293724

lm_p<-predict(fit1, newdata = test)
round(predict(fit,newdata=test),1)

##    4    6   20   26   29   34   38   40   42   44   48   67   71   72   73   84 
##  4.2  4.2  3.8  3.7  4.2  3.7  4.1  3.7  3.8  2.3  2.0  3.9  4.1  4.2  3.8  4.9 
##   88   89   92  100  101  102  105  109  110  117  122  123  134  148  151  152 
##  3.4  4.0  3.3  3.9  4.1  3.6  4.0  3.8  3.7  4.2  5.0  3.4  4.4  3.3  3.8  2.7 
##  167  170  173  175  179  180  184  203  204  208  215  221  226  227  230  231 
##  3.7  3.6  4.0  4.2  4.3  3.7  3.9  3.6  3.8  2.6  3.9  3.7  4.0  4.1  3.3  3.7 
##  233  234  237  249  254  257  258  259  260  263  268  269  270  271  281  297 
##  3.3  2.4  3.3  3.4  3.8  4.2  3.6  3.3  3.6  3.8  3.6  3.5  4.4  3.8  3.9  3.4 
##  302  304  306  307  309  314  317  322  325  332  335  336  337  342  353  362 
##  4.1  3.7  4.0  3.9  3.4  3.8  4.1  3.4  3.4  4.0  3.7  4.2  2.4  3.7  4.0  3.6 
##  365  367  375  379  396  401  405  407  425  426  447  449  453  455  464  474 
##  3.0  3.9  3.3  3.5  1.6  3.3  2.3  4.0  4.1  3.6  3.7  3.7  3.3  3.9  4.2  3.6 
##  476  480  496  497  498  519  522  524  536  547  549  553  554  562  572  576 
##  3.9  3.8  3.5  3.2  4.0  5.0  4.2  3.9  3.0  3.4  3.3  2.4  2.4  4.1  3.2  3.3 
##  593  598  605  613  616  617  622  625  627  629  635  638  639  655  658  660 
##  3.7  3.5  4.7  3.6  3.8  4.1  3.5  3.3  3.7  4.0  3.3  3.1  4.0  3.3  2.8  2.5 
##  662  667  670  671  676  682  688  692  699  701  704  710  711  719  721  723 
##  2.7  2.5  2.4  2.7  4.0  3.6  3.4  3.8  3.1  4.4  4.1  3.9  3.8  3.9  2.5  2.6 
##  735  741  746  753  761  776  777  790  792  800  804  810  813  816  823  837 
##  3.6  4.5  3.8  3.7  4.2  2.2  2.2  2.9  4.2  4.0  3.1  4.1  3.7  2.4  3.5  3.1 
##  842  844  848  852  857  869  878  888  897  899  901  903  906  909  910  912 
##  3.6  3.6  2.5  2.8  2.7  3.3  4.0  4.0  2.3  2.5  3.6  3.4  3.8  3.5  3.2  4.3 
##  918  953  958  959  961  964  971  980  982  986  988  991  995  998 1009 1015 
##  3.7  2.8  3.6  4.2  3.1  3.7  3.5  4.0  2.7  3.4  3.2  4.0  3.6  3.3  3.4  3.2 
## 1029 1049 1063 1067 1080 1091 1092 1110 1111 1122 1128 1130 1133 1134 1135 1138 
##  4.1  4.2  3.7  3.4  3.6  3.3  3.7  3.7  3.1  2.5  2.8  3.7  3.4  2.7  3.1  3.3 
## 1145 1147 1149 1155 1160 1171 1177 1197 1206 1210 1213 1214 1215 1220 1227 1234 
##  3.4  3.3  3.2  3.4  4.0  3.7  3.7  3.1  3.1  3.5  3.0  3.4  3.4  3.5  3.1  3.6 
## 1236 1237 1244 1247 1249 1250 1251 1252 1254 1255 1268 1271 1272 1281 1283 1287 
##  3.1  4.0  3.3  3.3  3.4  3.4  3.0  3.1  4.2  3.3  4.2  2.9  4.2  4.1  3.1  3.9 
## 1301 1302 1305 1312 1321 1322 1333 1342 1349 1352 1353 1357 1359 1364 1366 1377 
##  3.7  3.5  4.3  3.2  4.2  2.8  3.9  3.3  3.5  3.8  3.2  3.5  4.3  3.8  3.4  3.5 
## 1383 1387 1393 1398 1402 1410 1415 1416 1422 1427 1431 1432 1434 1435 1436 1437 
##  4.0  3.2  3.7  3.9  3.7  3.3  4.1  3.8  3.1  3.5  3.7  3.5  3.6  3.4  4.3  3.5 
## 1442 1447 1448 1449 1452 1454 1458 1460 1468 1470 1477 1481 1482 1484 1485 1488 
##  3.8  2.3  2.3  4.0  4.1  3.2  3.1  3.3  4.1  3.6  3.9  3.7  3.9  2.8  3.3  3.4 
## 1492 1493 1496 1499 1506 1511 1513 1516 1520 1522 1526 1527 1528 1533 1537 1540 
##  3.6  3.8  3.3  3.9  3.1  3.2  3.0  3.3  4.0  3.0  3.2  2.7  3.8  3.2  3.2  4.3 
## 1547 1549 1550 1552 1553 1565 1566 1567 1568 1569 1574 1587 1593 1594 1600 1602 
##  3.1  3.7  3.6  3.7  3.2  3.2  3.5  4.1  4.0  4.2  3.9  4.1  3.3  2.2  4.1  3.6 
## 1603 1606 1612 1619 1620 1627 1629 1634 1637 1644 1646 1647 1658 1660 1672 1674 
##  3.7  3.1  3.4  3.8  3.8  3.8  3.0  3.2  3.2  3.8  2.9  4.3  3.1  3.1  3.0  3.1 
## 1675 1679 1680 1685 1695 1697 1699 1707 1710 1714 1715 1724 1729 1730 1739 1741 
##  3.5  3.7  4.9  3.1  3.4  3.7  3.3  3.8  3.2  4.0  4.1  2.7  3.2  4.1  4.5  3.4 
## 1743 1745 1750 1753 1755 1760 1763 1769 1776 1779 1781 1782 1792 1796 1802 1806 
##  3.9  3.2  3.9  4.7  4.3  3.0  3.2  3.1  3.6  2.9  4.4  3.4  3.0  3.3  3.9  4.2 
## 1812 1817 1819 1822 1847 1849 1856 1858 1864 1878 1883 1888 1893 1900 1901 1921 
##  3.7  2.2  3.7  2.7  2.8  4.6  3.2  3.3  3.5  3.1  4.7  3.7  3.1  3.9  4.4  2.3

test$Happiness2<-round(predict(fit1,newdata = test),1)

#linear regrssion model

glimpse(iris)

## Rows: 150
## Columns: 5
## $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.…
## $ Sepal.Width  <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.…
## $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.…
## $ Petal.Width  <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.…
## $ Species      <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa, s…

#알아서 더미변수로 바꿔준다

#단계적 변수 선택
x1<-c(7,1,11,11,7,11,3,1,2,21,1,11,10)
x2<-c(26,29,56,31,52,55,71,31,54,47,40,66,68)
x3<-c(6,15,8,8,6,9,17,22,18,4,23,9,8)
x4<-c(60,52,30,47,33,22,6,44,22,26,34,12,12)
y<-c(78.5,74.3,104.3,87.6,95.9,109.2,102.7,72.5,93.1,115.9,83.8,113.3,109.4)
df<-data.frame(x1,x2,x3,x4,y)
step(lm(y~1,df),scope = list(lower=~1,upper=~x1+x2+x3+x4),direction = "forward")

## Start:  AIC=71.44
## y ~ 1
## 
##        Df Sum of Sq     RSS    AIC
## + x2    1   1809.43  906.34 59.178
## + x4    1   1759.55  956.21 59.874
## + x1    1   1450.08 1265.69 63.519
## + x3    1    776.36 1939.40 69.067
## <none>              2715.76 71.444
## 
## Step:  AIC=59.18
## y ~ x2
## 
##        Df Sum of Sq    RSS    AIC
## + x1    1    848.43  57.90 25.420
## + x3    1    490.89 415.44 51.037
## <none>              906.34 59.178
## + x4    1     12.99 893.34 60.990
## 
## Step:  AIC=25.42
## y ~ x2 + x1
## 
##        Df Sum of Sq    RSS    AIC
## + x4    1   13.9620 43.942 23.833
## + x3    1    9.7939 48.111 25.011
## <none>              57.904 25.420
## 
## Step:  AIC=23.83
## y ~ x2 + x1 + x4
## 
##        Df Sum of Sq    RSS    AIC
## <none>              43.942 23.833
## + x3    1   0.10561 43.837 25.802

## 
## Call:
## lm(formula = y ~ x2 + x1 + x4, data = df)
## 
## Coefficients:
## (Intercept)           x2           x1           x4  
##     72.0911       0.4130       1.4692      -0.2444

library(ISLR)
library(dplyr)
data("attitude")
glimpse(attitude)

## Rows: 30
## Columns: 7
## $ rating     <dbl> 43, 63, 71, 61, 81, 43, 58, 71, 72, 67, 64, 67, 69, 68, 77,…
## $ complaints <dbl> 51, 64, 70, 63, 78, 55, 67, 75, 82, 61, 53, 60, 62, 83, 77,…
## $ privileges <dbl> 30, 51, 68, 45, 56, 49, 42, 50, 72, 45, 53, 47, 57, 83, 54,…
## $ learning   <dbl> 39, 54, 69, 47, 66, 44, 56, 55, 67, 47, 58, 39, 42, 45, 72,…
## $ raises     <dbl> 61, 63, 76, 54, 71, 54, 66, 70, 71, 62, 58, 59, 55, 59, 79,…
## $ critical   <dbl> 92, 73, 86, 84, 83, 49, 68, 66, 83, 80, 67, 74, 63, 77, 77,…
## $ advance    <dbl> 45, 47, 48, 35, 47, 34, 35, 41, 31, 41, 34, 41, 25, 35, 46,…

step(lm(rating~., data=attitude),directions="backward")

## Start:  AIC=123.36
## rating ~ complaints + privileges + learning + raises + critical + 
##     advance
## 
##              Df Sum of Sq    RSS    AIC
## - critical    1      3.41 1152.4 121.45
## - raises      1      6.80 1155.8 121.54
## - privileges  1     14.47 1163.5 121.74
## - advance     1     74.11 1223.1 123.24
## <none>                    1149.0 123.36
## - learning    1    180.50 1329.5 125.74
## - complaints  1    724.80 1873.8 136.04
## 
## Step:  AIC=121.45
## rating ~ complaints + privileges + learning + raises + advance
## 
##              Df Sum of Sq    RSS    AIC
## - raises      1     10.61 1163.0 119.73
## - privileges  1     14.16 1166.6 119.82
## - advance     1     71.27 1223.7 121.25
## <none>                    1152.4 121.45
## - learning    1    177.74 1330.1 123.75
## - complaints  1    724.70 1877.1 134.09
## 
## Step:  AIC=119.73
## rating ~ complaints + privileges + learning + advance
## 
##              Df Sum of Sq    RSS    AIC
## - privileges  1     16.10 1179.1 118.14
## - advance     1     61.60 1224.6 119.28
## <none>                    1163.0 119.73
## - learning    1    197.03 1360.0 122.42
## - complaints  1   1165.94 2328.9 138.56
## 
## Step:  AIC=118.14
## rating ~ complaints + learning + advance
## 
##              Df Sum of Sq    RSS    AIC
## - advance     1     75.54 1254.7 118.00
## <none>                    1179.1 118.14
## - learning    1    186.12 1365.2 120.54
## - complaints  1   1259.91 2439.0 137.94
## 
## Step:  AIC=118
## rating ~ complaints + learning
## 
##              Df Sum of Sq    RSS    AIC
## <none>                    1254.7 118.00
## - learning    1    114.73 1369.4 118.63
## - complaints  1   1370.91 2625.6 138.16

## 
## Call:
## lm(formula = rating ~ complaints + learning, data = attitude)
## 
## Coefficients:
## (Intercept)   complaints     learning  
##      9.8709       0.6435       0.2112

step(lm(rating~.,data=attitude), direction="forward")

## Start:  AIC=123.36
## rating ~ complaints + privileges + learning + raises + critical + 
##     advance

## 
## Call:
## lm(formula = rating ~ complaints + privileges + learning + raises + 
##     critical + advance, data = attitude)
## 
## Coefficients:
## (Intercept)   complaints   privileges     learning       raises     critical  
##    10.78708      0.61319     -0.07305      0.32033      0.08173      0.03838  
##     advance  
##    -0.21706

step(lm(rating~.,data=attitude), direction="both")

## Start:  AIC=123.36
## rating ~ complaints + privileges + learning + raises + critical + 
##     advance
## 
##              Df Sum of Sq    RSS    AIC
## - critical    1      3.41 1152.4 121.45
## - raises      1      6.80 1155.8 121.54
## - privileges  1     14.47 1163.5 121.74
## - advance     1     74.11 1223.1 123.24
## <none>                    1149.0 123.36
## - learning    1    180.50 1329.5 125.74
## - complaints  1    724.80 1873.8 136.04
## 
## Step:  AIC=121.45
## rating ~ complaints + privileges + learning + raises + advance
## 
##              Df Sum of Sq    RSS    AIC
## - raises      1     10.61 1163.0 119.73
## - privileges  1     14.16 1166.6 119.82
## - advance     1     71.27 1223.7 121.25
## <none>                    1152.4 121.45
## + critical    1      3.41 1149.0 123.36
## - learning    1    177.74 1330.1 123.75
## - complaints  1    724.70 1877.1 134.09
## 
## Step:  AIC=119.73
## rating ~ complaints + privileges + learning + advance
## 
##              Df Sum of Sq    RSS    AIC
## - privileges  1     16.10 1179.1 118.14
## - advance     1     61.60 1224.6 119.28
## <none>                    1163.0 119.73
## + raises      1     10.61 1152.4 121.45
## + critical    1      7.21 1155.8 121.54
## - learning    1    197.03 1360.0 122.42
## - complaints  1   1165.94 2328.9 138.56
## 
## Step:  AIC=118.14
## rating ~ complaints + learning + advance
## 
##              Df Sum of Sq    RSS    AIC
## - advance     1     75.54 1254.7 118.00
## <none>                    1179.1 118.14
## + privileges  1     16.10 1163.0 119.73
## + raises      1     12.54 1166.6 119.82
## + critical    1      7.18 1171.9 119.96
## - learning    1    186.12 1365.2 120.54
## - complaints  1   1259.91 2439.0 137.94
## 
## Step:  AIC=118
## rating ~ complaints + learning
## 
##              Df Sum of Sq    RSS    AIC
## <none>                    1254.7 118.00
## + advance     1     75.54 1179.1 118.14
## - learning    1    114.73 1369.4 118.63
## + privileges  1     30.03 1224.6 119.28
## + raises      1      1.19 1253.5 119.97
## + critical    1      0.00 1254.7 120.00
## - complaints  1   1370.91 2625.6 138.16

## 
## Call:
## lm(formula = rating ~ complaints + learning, data = attitude)
## 
## Coefficients:
## (Intercept)   complaints     learning  
##      9.8709       0.6435       0.2112

df<-read.csv("Data1.csv")
table(df$Gender1)

## 
##    0    1 
## 1136  789

#남자:1, 여자:0
lmfit<-lm(Happiness~Gender1,data=df)
summary(lmfit)

## 
## Call:
## lm(formula = Happiness ~ Gender1, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1659 -0.5199  0.0801  0.4801  1.4801 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)  3.56593    0.02219 160.711 <0.0000000000000002 ***
## Gender1     -0.04603    0.03466  -1.328               0.184    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7479 on 1923 degrees of freedom
## Multiple R-squared:  0.0009166,  Adjusted R-squared:  0.0003971 
## F-statistic: 1.764 on 1 and 1923 DF,  p-value: 0.1843

df<-read.csv("Data1.csv")
table(df$Gender1)

## 
##    0    1 
## 1136  789

#남자:1, 여자:0
glimpse(df)

## Rows: 1,925
## Columns: 26
## $ Q1        <int> 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
## $ Q2        <int> 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 2, 2, …
## $ Q3        <int> 2, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 4, 4, 3, 2, 3, …
## $ Q4        <int> 3, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 2, 4, 4, 4, 2, 2, 4, …
## $ Q5        <int> 4, 4, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 4, 4, 4, 4, 3, 1, 2, …
## $ Q6        <int> 2, 3, 4, 4, 4, 4, 4, 4, 1, 2, 2, 2, 4, 4, 3, 5, 2, 2, 1, 4, …
## $ Q7        <int> 2, 2, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 5, 4, 4, 5, 4, 3, 4, 4, …
## $ Q8        <int> 4, 4, 4, 4, 4, 4, 5, 5, 2, 2, 4, 4, 4, 4, 3, 5, 4, 2, 4, 4, …
## $ Q9        <int> 4, 4, 4, 4, 2, 4, 5, 5, 3, 4, 4, 4, 2, 2, 4, 5, 2, 4, 2, 4, …
## $ Q10       <int> 4, 4, 2, 4, 4, 4, 5, 5, 2, 4, 2, 4, 4, 4, 3, 4, 4, 3, 2, 3, …
## $ Q11       <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 3, 4, 4, 4, 4, 5, 4, 3, 3, …
## $ Q12       <int> 4, 4, 4, 4, 4, 4, 5, 5, 3, 4, 4, 3, 4, 3, 3, 4, 5, 4, 4, 2, …
## $ Q13       <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 4, 2, 4, 4, 4, 5, 4, 4, 4, …
## $ Q14       <int> 4, 4, 4, 4, 4, 4, 5, 5, 5, 4, 4, 4, 3, 4, 5, 4, 5, 4, 4, 4, …
## $ Q15       <int> 4, 4, 3, 4, 4, 4, 4, 2, 3, 4, 4, 3, 1, 4, 4, 4, 5, 4, 4, 4, …
## $ Q16       <int> 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, 4, 4, 4, 5, 4, 5, 4, 4, 4, …
## $ Q17       <int> 4, 3, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 3, 2, 4, 5, 4, 4, 3, 4, …
## $ Q18       <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, …
## $ Q19       <int> 4, 2, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 1, 4, 4, 4, 5, 4, 2, 3, …
## $ Q20       <int> 4, 1, 3, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 2, 4, 5, 5, 4, 2, 4, …
## $ Gender1   <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, …
## $ EDU1      <int> 1, 1, 2, 1, 2, 1, 1, 1, 4, 3, 2, 1, 1, 3, 3, 2, 1, 1, 1, 4, …
## $ BF        <dbl> 3.4, 4.0, 3.6, 4.2, 4.0, 4.0, 3.6, 3.6, 3.6, 3.2, 4.0, 3.2, …
## $ BM        <dbl> 3.2, 3.4, 3.6, 4.0, 3.6, 4.0, 4.6, 4.6, 2.2, 3.2, 3.2, 3.6, …
## $ Happiness <dbl> 4.0, 4.0, 3.8, 4.0, 4.0, 4.0, 4.8, 4.4, 3.8, 4.0, 4.0, 3.4, …
## $ Peace     <dbl> 4.0, 2.8, 3.8, 4.0, 4.0, 4.0, 3.8, 2.4, 4.0, 3.2, 4.0, 3.9, …

df$EDU1<-factor(df$EDU1)
lmfit1<-lm(Happiness~EDU1,data=df)
summary(lmfit)

## 
## Call:
## lm(formula = Happiness ~ Gender1, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1659 -0.5199  0.0801  0.4801  1.4801 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)  3.56593    0.02219 160.711 <0.0000000000000002 ***
## Gender1     -0.04603    0.03466  -1.328               0.184    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7479 on 1923 degrees of freedom
## Multiple R-squared:  0.0009166,  Adjusted R-squared:  0.0003971 
## F-statistic: 1.764 on 1 and 1923 DF,  p-value: 0.1843

table(df$EDU1)

## 
##    1    2    3    4 
##  233  472 1022  198

#1중졸, 2 고졸, 3대졸, 4대학원졸

bs.out2<-lm(Happiness~BM,data = df)
summary(bs.out2)

## 
## Call:
## lm(formula = Happiness ~ BM, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1591 -0.4577  0.0418  0.4409  1.9386 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)  2.06599    0.05777   35.77 <0.0000000000000002 ***
## BM           0.49771    0.01878   26.50 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6404 on 1923 degrees of freedom
## Multiple R-squared:  0.2675, Adjusted R-squared:  0.2671 
## F-statistic: 702.2 on 1 and 1923 DF,  p-value: < 0.00000000000000022

#건강한 자기관리가 '1' 증가할 경우 행복은 0.498증가함

#현짱 화장실
install.packages('datasets')

## Warning: 패키지 'datasets'가 사용중이므로 설치되지 않을 것입니다

library(datasets)
data('USArrests')
head(USArrests)

##            Murder Assault UrbanPop Rape
## Alabama      13.2     236       58 21.2
## Alaska       10.0     263       48 44.5
## Arizona       8.1     294       80 31.0
## Arkansas      8.8     190       50 19.5
## California    9.0     276       91 40.6
## Colorado      7.9     204       78 38.7

fit<-prcomp(USArrests,scale=TRUE)
summary(fit)

## Importance of components:
##                           PC1    PC2     PC3     PC4
## Standard deviation     1.5749 0.9949 0.59713 0.41645
## Proportion of Variance 0.6201 0.2474 0.08914 0.04336
## Cumulative Proportion  0.6201 0.8675 0.95664 1.00000

Nile

## Time Series:
## Start = 1871 
## End = 1970 
## Frequency = 1 
##   [1] 1120 1160  963 1210 1160 1160  813 1230 1370 1140  995  935 1110  994 1020
##  [16]  960 1180  799  958 1140 1100 1210 1150 1250 1260 1220 1030 1100  774  840
##  [31]  874  694  940  833  701  916  692 1020 1050  969  831  726  456  824  702
##  [46] 1120 1100  832  764  821  768  845  864  862  698  845  744  796 1040  759
##  [61]  781  865  845  944  984  897  822 1010  771  676  649  846  812  742  801
##  [76] 1040  860  874  848  890  744  749  838 1050  918  986  797  923  975  815
##  [91] 1020  906  901 1170  912  746  919  718  714  740

plot(Nile)

Nile.diff1<-diff(Nile,differences = 1)
plot(Nile.diff1)
Nile.diff2<-diff(Nile,differences = 2)
plot(Nile.diff2)

7월 20일 실습

방지원

2023-07-20