rm(list=ls())
getwd()
## [1] "C:/data"
setwd("c:/data")
getwd()
## [1] "c:/data"
library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(caret)
## 필요한 패키지를 로딩중입니다: ggplot2
## 필요한 패키지를 로딩중입니다: lattice
#단순선형회귀분석
#BM(건강한자기관리)
#HAPPLINESS(행복도)
#bm이 happiness에 끼치는 영향관계
#건강한 자기관리를 잘할수록 행복도 증가하는가









df<-read.csv("DATA1.csv")
glimpse(df)
## Rows: 1,925
## Columns: 26
## $ Q1        <int> 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
## $ Q2        <int> 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 2, 2, …
## $ Q3        <int> 2, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 4, 4, 3, 2, 3, …
## $ Q4        <int> 3, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 2, 4, 4, 4, 2, 2, 4, …
## $ Q5        <int> 4, 4, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 4, 4, 4, 4, 3, 1, 2, …
## $ Q6        <int> 2, 3, 4, 4, 4, 4, 4, 4, 1, 2, 2, 2, 4, 4, 3, 5, 2, 2, 1, 4, …
## $ Q7        <int> 2, 2, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 5, 4, 4, 5, 4, 3, 4, 4, …
## $ Q8        <int> 4, 4, 4, 4, 4, 4, 5, 5, 2, 2, 4, 4, 4, 4, 3, 5, 4, 2, 4, 4, …
## $ Q9        <int> 4, 4, 4, 4, 2, 4, 5, 5, 3, 4, 4, 4, 2, 2, 4, 5, 2, 4, 2, 4, …
## $ Q10       <int> 4, 4, 2, 4, 4, 4, 5, 5, 2, 4, 2, 4, 4, 4, 3, 4, 4, 3, 2, 3, …
## $ Q11       <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 3, 4, 4, 4, 4, 5, 4, 3, 3, …
## $ Q12       <int> 4, 4, 4, 4, 4, 4, 5, 5, 3, 4, 4, 3, 4, 3, 3, 4, 5, 4, 4, 2, …
## $ Q13       <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 4, 2, 4, 4, 4, 5, 4, 4, 4, …
## $ Q14       <int> 4, 4, 4, 4, 4, 4, 5, 5, 5, 4, 4, 4, 3, 4, 5, 4, 5, 4, 4, 4, …
## $ Q15       <int> 4, 4, 3, 4, 4, 4, 4, 2, 3, 4, 4, 3, 1, 4, 4, 4, 5, 4, 4, 4, …
## $ Q16       <int> 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, 4, 4, 4, 5, 4, 5, 4, 4, 4, …
## $ Q17       <int> 4, 3, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 3, 2, 4, 5, 4, 4, 3, 4, …
## $ Q18       <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, …
## $ Q19       <int> 4, 2, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 1, 4, 4, 4, 5, 4, 2, 3, …
## $ Q20       <int> 4, 1, 3, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 2, 4, 5, 5, 4, 2, 4, …
## $ Gender1   <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, …
## $ EDU1      <int> 1, 1, 2, 1, 2, 1, 1, 1, 4, 3, 2, 1, 1, 3, 3, 2, 1, 1, 1, 4, …
## $ BF        <dbl> 3.4, 4.0, 3.6, 4.2, 4.0, 4.0, 3.6, 3.6, 3.6, 3.2, 4.0, 3.2, …
## $ BM        <dbl> 3.2, 3.4, 3.6, 4.0, 3.6, 4.0, 4.6, 4.6, 2.2, 3.2, 3.2, 3.6, …
## $ Happiness <dbl> 4.0, 4.0, 3.8, 4.0, 4.0, 4.0, 4.8, 4.4, 3.8, 4.0, 4.0, 3.4, …
## $ Peace     <dbl> 4.0, 2.8, 3.8, 4.0, 4.0, 4.0, 3.8, 2.4, 4.0, 3.2, 4.0, 3.9, …
#회귀분석은 등간척도 이상이어야함
#독립변수 범주형이라도 회귀분석이 가능하다.=더미변수
#종속변수가 이진데이터(0,1/성공실패)경우 로지스틱회귀분석



bs.out2<-lm(Happiness~BM,data=df) #lm=선형회귀분석을 만드는 함수
summary(bs.out2)
## 
## Call:
## lm(formula = Happiness ~ BM, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1591 -0.4577  0.0418  0.4409  1.9386 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.06599    0.05777   35.77   <2e-16 ***
## BM           0.49771    0.01878   26.50   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6404 on 1923 degrees of freedom
## Multiple R-squared:  0.2675, Adjusted R-squared:  0.2671 
## F-statistic: 702.2 on 1 and 1923 DF,  p-value: < 2.2e-16
#~종속변수 관계 설정
#건강한 자기관리가 1 증가할 경우 행복은 0.498 증가함
#더빈왓슨(Durbin watson) 검정
#더빈왓슨 통계량을 0~4값을 가질 수 있음
#0에 가까울수록 양의 상관관계
#4에 가까울수록 음의 상관관계
#2에 가까울수록 오차항의 자기상관이 없음

#BM Estimate = 비율
#Intercept : 절편
#residuals : 잔차
#단순회귀분석 : happiness=2.06+0.497*BM, 모델링 完
#오차항 : 모집단을 알수 없음. 실제관측값-모회귀선의 차이(오차)
#잔차항 : 표본을 통해 표본회귀선을 만들고 그때 관측값의 차이(잔차), 잔차를 통해 오차항의 가정조건 성립을 확인=>잔차분석

library(car)#더빈왓슨 검정을 위해 필요
## 필요한 패키지를 로딩중입니다: carData
## 
## 다음의 패키지를 부착합니다: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
sreg.res1<-residuals(bs.out2)
durbinWatsonTest(sreg.res1)
## [1] 1.787942
#오차는 독립성이라 판단한다.
library(caret)
#잔차의 등분산성
par(mfrow=c(2,2))
plot(bs.out2)

#q-q : 정규분포 확인



#잔차의 등분산성을 입증하기 위해서는 산점도에서 예측값(Fitted value)의
#변화에 관계없이 잔차(residuals)가 분포하는 모습이 일정하여야 한다.

#정규성검정
#shapiro-Wilk test(샤피로 윌크 검정)
shapiro.test(sreg.res1)#p-value 판단
## 
##  Shapiro-Wilk normality test
## 
## data:  sreg.res1
## W = 0.99439, p-value = 1.148e-06
options(scipen=999)#아라비아숫자로 바꾸기
shapiro.test(sreg.res1)
## 
##  Shapiro-Wilk normality test
## 
## data:  sreg.res1
## W = 0.99439, p-value = 0.000001148
options(scipen=-999)#원래대로 돌아가기
shapiro.test(sreg.res1)
## 
##  Shapiro-Wilk normality test
## 
## data:  sreg.res1
## W = 9.9439e-01, p-value = 1.148e-06
#귀무가설 : 정규분포 맞음
#대립가설 : 정규분포 아님
#유의확률 p-value<유의수준 0.05 = 귀무가설 기각
#정규분포가 아님
#정규성을 만족하지 못하면 박스콕스 변수 변환을 적용할 수 있다.

#Happiness 정규성 검정 한다.

options(scipen=999)
shapiro.test(sreg.res1)
## 
##  Shapiro-Wilk normality test
## 
## data:  sreg.res1
## W = 0.99439, p-value = 0.000001148
#위 잔차분석 결과 등분산성, 정규성만족 x
#정규성을 만족하지 못하는 경우 박스콕스 변수 변환을 적용 가능



#다항회귀분석 :독립변수의 차수 제곱을 넘어가는 것
#회귀분석(all)은 시험당 1문제

#다중회귀분석
#다중선형 회귀모형에서는 두개 이상의 독립변수가 주어졌다는 가정에서
#종속변수에 대한 분포를 가정하고 있다
#ㅣ는 독립변수들은 서로 독립적이야 한다는 것을
#다중공선성 vif함수를 통해 확인해야 한다
bs.out3<-lm(Happiness~BM+BF,data=df)
summary(bs.out3)
## 
## Call:
## lm(formula = Happiness ~ BM + BF, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.23134 -0.40553  0.02014  0.41352  1.86210 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)  1.60995    0.06412   25.11 <0.0000000000000002 ***
## BM           0.29054    0.02331   12.47 <0.0000000000000002 ***
## BF           0.33817    0.02435   13.89 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6106 on 1922 degrees of freedom
## Multiple R-squared:  0.3343, Adjusted R-squared:  0.3336 
## F-statistic: 482.6 on 2 and 1922 DF,  p-value: < 0.00000000000000022
library(car)
vif(bs.out3)
##       BM       BF 
## 1.693504 1.693504
#vif값이 모두 10보다 작으므로 두 변수는 서로 내용추가



#비율 8:2
library(caret)
dim(df)
## [1] 1925   26
idx<-createDataPartition(df$Happiness,p=0.8,list=FALSE)
train<-df[idx,]
test<-df[-idx,]
library(dplyr)
glimpse(train)
## Rows: 1,541
## Columns: 26
## $ Q1        <int> 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, …
## $ Q2        <int> 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 2, 3, 2, …
## $ Q3        <int> 2, 4, 4, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 4, 4, 3, 3, 3, 2, …
## $ Q4        <int> 3, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 2, 4, 4, 4, 2, 4, 4, 1, …
## $ Q5        <int> 4, 4, 2, 4, 4, 4, 4, 2, 4, 4, 2, 4, 4, 4, 4, 4, 3, 2, 2, 1, …
## $ Q6        <int> 2, 3, 4, 4, 4, 4, 4, 1, 2, 2, 2, 4, 4, 3, 5, 2, 2, 4, 2, 1, …
## $ Q7        <int> 2, 2, 4, 4, 4, 4, 4, 3, 4, 4, 4, 5, 4, 4, 5, 4, 3, 4, 4, 2, …
## $ Q8        <int> 4, 4, 4, 4, 4, 4, 5, 2, 2, 4, 4, 4, 4, 3, 5, 4, 2, 4, 4, 4, …
## $ Q9        <int> 4, 4, 4, 4, 2, 4, 5, 3, 4, 4, 4, 2, 2, 4, 5, 2, 4, 4, 4, 4, …
## $ Q10       <int> 4, 4, 2, 4, 4, 4, 5, 2, 4, 2, 4, 4, 4, 3, 4, 4, 3, 3, 3, 3, …
## $ Q11       <int> 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 3, 4, 4, 4, 4, 5, 4, 3, 4, 3, …
## $ Q12       <int> 4, 4, 4, 4, 4, 4, 5, 3, 4, 4, 3, 4, 3, 3, 4, 5, 4, 2, 3, 4, …
## $ Q13       <int> 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 2, 4, 4, 4, 5, 4, 4, 3, 2, …
## $ Q14       <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 3, 4, 5, 4, 5, 4, 4, 4, 4, …
## $ Q15       <int> 4, 4, 3, 4, 4, 4, 4, 3, 4, 4, 3, 1, 4, 4, 4, 5, 4, 4, 3, 3, …
## $ Q16       <int> 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 5, 4, 5, 4, 4, 4, 4, …
## $ Q17       <int> 4, 3, 4, 4, 4, 4, 2, 4, 4, 4, 4, 3, 2, 4, 5, 4, 4, 4, 2, 4, …
## $ Q18       <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, 4, …
## $ Q19       <int> 4, 2, 4, 4, 4, 4, 4, 4, 2, 4, 4, 1, 4, 4, 4, 5, 4, 3, 3, 3, …
## $ Q20       <int> 4, 1, 3, 4, 4, 4, 4, 4, 2, 4, 4, 4, 2, 4, 5, 5, 4, 4, 3, 3, …
## $ Gender1   <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, …
## $ EDU1      <int> 1, 1, 2, 1, 2, 1, 1, 4, 3, 2, 1, 1, 3, 3, 2, 1, 1, 4, 3, 2, …
## $ BF        <dbl> 3.4, 4.0, 3.6, 4.2, 4.0, 4.0, 3.6, 3.6, 3.2, 4.0, 3.2, 4.0, …
## $ BM        <dbl> 3.2, 3.4, 3.6, 4.0, 3.6, 4.0, 4.6, 2.2, 3.2, 3.2, 3.6, 3.8, …
## $ Happiness <dbl> 4.0, 4.0, 3.8, 4.0, 4.0, 4.0, 4.8, 3.8, 4.0, 4.0, 3.4, 2.8, …
## $ Peace     <dbl> 4.0, 2.8, 3.8, 4.0, 4.0, 4.0, 3.8, 4.0, 3.2, 4.0, 3.9, 3.2, …
#linear regrssion model
fit<-lm(Happiness~BM+BF+Peace,data=train)
summary(fit)
## 
## Call:
## lm(formula = Happiness ~ BM + BF + Peace, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.88286 -0.32719 -0.00337  0.32718  1.55630 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)  0.47312    0.08285   5.711   0.0000000134899153 ***
## BM           0.17765    0.02315   7.674   0.0000000000000295 ***
## BF           0.25863    0.02430  10.644 < 0.0000000000000002 ***
## Peace        0.48398    0.02292  21.120 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5375 on 1537 degrees of freedom
## Multiple R-squared:  0.4845, Adjusted R-squared:  0.4835 
## F-statistic: 481.5 on 3 and 1537 DF,  p-value: < 0.00000000000000022
#happiness=0.51388+0.20878*BM+0.24636*BF+0.45747*Peace
predict(fit,newdata=test)
##        8       19       30       34       38       40       44       49 
## 3.382949 2.955942 3.321167 3.695929 4.102446 3.670195 2.313646 3.285636 
##       54       55       60       79       93      105      110      111 
## 3.682356 3.688636 3.979658 2.355387 4.154172 4.021846 3.708348 3.998994 
##      112      113      119      127      154      162      171      182 
## 3.898422 3.243188 4.047580 4.312232 3.023864 3.401767 4.083111 3.979658 
##      183      185      189      201      211      221      224      226 
## 4.050720 3.398367 3.521155 3.979658 3.569365 3.682615 4.066915 3.998994 
##      227      234      243      247      248      250      268      269 
## 4.102446 2.417099 3.776529 3.614434 3.811801 3.408165 3.563603 3.536715 
##      271      277      287      295      299      300      301      312 
## 3.814941 2.271458 3.944127 3.786067 3.814941 3.717886 3.772753 3.175267 
##      315      327      333      336      337      340      343      346 
## 4.118382 3.908596 3.743879 4.189703 2.384708 3.853989 3.760334 3.689013 
##      349      359      376      377      385      391      402      409 
## 3.537351 2.878483 3.298055 3.259902 4.012049 3.472310 3.649965 3.689013 
##      412      413      414      416      417      427      430      431 
## 3.023864 3.405284 3.524555 3.401767 3.979658 3.973001 3.679545 4.286240 
##      434      436      442      446      448      450      451      460 
## 3.647084 3.995853 3.698551 3.062276 3.016948 3.204141 3.314250 3.805144 
##      469      472      479      483      491      493      496      498 
## 4.748518 4.324652 4.118641 3.503806 3.970120 4.154172 3.559567 4.012049 
##      503      508      523      530      536      543      559      568 
## 4.137977 3.595357 3.960582 2.743016 3.003893 3.378655 4.154172 2.807421 
##      569      573      575      576      580      591      593      602 
## 3.876206 1.997267 2.077608 3.208176 3.821598 3.463031 3.737222 4.557549 
##      612      618      619      621      622      632      633      642 
## 4.134837 4.018447 4.088873 4.515621 3.550029 3.925051 3.576022 3.375515 
##      654      658      674      683      684      689      691      694 
## 3.585560 2.755695 4.118382 2.726821 3.546889 3.685496 3.427500 3.449458 
##      695      698      702      708      710      712      713      716 
## 2.788085 3.317131 3.524036 4.154172 3.992454 4.170368 4.085733 4.214919 
##      721      725      726      730      731      736      737      746 
## 2.462168 2.594494 2.197515 2.417099 2.462168 4.409028 4.118641 3.776270 
##      748      749      755      762      766      772      776      778 
## 3.979658 3.585819 4.338225 2.761833 2.901335 2.216851 2.207054 3.040059 
##      790      791      803      811      821      830      834      841 
## 2.939488 3.298055 2.817218 4.406147 3.181924 3.488765 3.669677 3.385312 
##      842      862      865      878      880      883      884      885 
## 3.611553 4.076194 3.408165 3.963463 3.595098 3.233650 3.934848 3.255867 
##      903      905      909      919      926      930      931      934 
## 3.397991 3.666160 3.566225 3.927932 2.797623 4.005651 3.750536 3.866408 
##      936      939      942      945      954      956      958      964 
## 3.349970 3.023864 2.122937 3.365977 4.102446 4.095530 3.595357 3.653741 
##      969      970      972      975      977      979      986      991 
## 3.236791 4.092649 3.520260 3.911736 3.533575 3.579162 3.363095 4.056859 
##      994      996     1001     1020     1028     1031     1032     1037 
## 4.386175 4.092649 4.208780 3.466807 3.491646 4.154172 3.502338 4.066915 
##     1044     1053     1063     1072     1077     1083     1089     1091 
## 3.776529 3.740362 3.701691 3.668782 3.301831 4.138236 3.059136 3.349522 
##     1092     1095     1101     1110     1118     1120     1122     1127 
## 3.730942 2.695325 3.605414 3.715005 3.233909 4.412804 2.538992 3.052738 
##     1130     1134     1137     1138     1139     1140     1141     1143 
## 3.679734 2.655759 3.818199 3.356179 2.965222 3.488765 3.143771 4.018070 
##     1150     1156     1159     1160     1166     1191     1193     1198 
## 3.572882 3.237685 2.975278 3.950784 4.141494 3.443955 3.527177 3.573400 
##     1202     1208     1211     1214     1215     1222     1225     1229 
## 3.198119 3.479226 3.417962 3.427241 3.478967 3.683250 3.182183 2.846469 
##     1233     1234     1235     1238     1240     1241     1252     1257 
## 3.295433 3.531212 3.278719 3.573400 2.833154 3.491646 3.101324 3.417444 
##     1258     1272     1275     1277     1278     1281     1282     1285 
## 3.443436 4.154172 4.448335 3.621091 3.085647 4.121523 3.056255 3.659503 
##     1286     1293     1298     1313     1318     1323     1325     1329 
## 3.576022 3.731460 3.834017 3.436779 4.047580 3.863786 2.723940 2.868685 
##     1332     1335     1337     1345     1354     1357     1360     1363 
## 3.540491 3.236531 3.437298 3.437038 3.243188 3.491646 3.537351 2.571383 
##     1364     1370     1375     1380     1386     1387     1392     1393 
## 3.859751 3.614434 3.156450 3.698739 3.233909 3.143771 3.510981 3.772494 
##     1394     1396     1406     1410     1412     1415     1434     1436 
## 3.779151 3.282236 3.349781 3.375515 3.152414 4.066915 3.573141 4.286240 
##     1440     1456     1457     1458     1461     1466     1469     1480 
## 3.566484 3.443955 3.050116 3.050116 3.153309 3.592217 4.260506 3.385312 
##     1485     1492     1498     1500     1502     1508     1512     1513 
## 3.359319 3.598239 4.477208 3.417703 3.424360 3.559567 3.766472 2.972138 
##     1520     1530     1535     1545     1554     1558     1560     1566 
## 3.963463 3.036543 3.947527 3.449717 3.866667 4.189444 3.440179 3.614175 
##     1568     1571     1574     1576     1579     1583     1587     1588 
## 4.073313 4.037523 3.963204 3.918394 4.441678 4.047320 4.083370 3.643943 
##     1589     1592     1596     1604     1606     1608     1612     1613 
## 3.040319 2.094192 3.947008 3.724544 3.027640 4.083111 3.391969 3.082247 
##     1627     1637     1640     1641     1644     1648     1652     1654 
## 3.882603 3.198119 4.405887 3.330446 3.802263 3.860010 3.007669 2.752813 
##     1655     1659     1665     1673     1678     1681     1694     1705 
## 3.153309 3.324683 3.695670 2.898454 2.736359 4.073572 3.446577 3.324683 
##     1708     1710     1712     1722     1739     1744     1749     1752 
## 3.417962 3.146652 3.420843 3.595993 4.550892 3.637546 3.398367 4.064034 
##     1760     1761     1763     1770     1771     1775     1776     1778 
## 2.898454 3.830877 3.252986 3.514757 4.044699 4.625730 3.595098 4.021587 
##     1782     1783     1787     1791     1792     1797     1802     1808 
## 3.434157 4.163711 3.427500 3.750277 3.040059 3.966720 3.879722 3.191462 
##     1812     1817     1820     1830     1833     1835     1853     1854 
## 3.778892 2.271199 2.655500 3.463031 3.485624 3.082247 2.933467 3.672817 
##     1859     1862     1864     1866     1867     1870     1882     1883 
## 3.510981 2.810938 3.460150 3.943232 3.814682 3.340502 3.602015 4.673940 
##     1886     1892     1894     1903     1907     1908     1911     1916 
## 3.123281 3.572882 3.676593 4.479830 3.179043 3.123800 3.740998 3.155931
lm_p<-predict(fit,newdata=test)
#round(predict(fit,newdata=test),1)

test$Happiness1<-round(predict(fit,newdata=test),1)
View(test)
#오차가 생기는지 확인
#Happiness-Happiness1=오차

#MSE
mean((test$Happiness-test$Happiness1)^1)
## [1] -0.001302083
mean((test$Happiness-test$Happiness2)^2)
## [1] NaN
#linear regrssion model
fit1<-lm(Happiness~BM+BF+Peace,data=train)
summary(fit1)
## 
## Call:
## lm(formula = Happiness ~ BM + BF + Peace, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.88286 -0.32719 -0.00337  0.32718  1.55630 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)  0.47312    0.08285   5.711   0.0000000134899153 ***
## BM           0.17765    0.02315   7.674   0.0000000000000295 ***
## BF           0.25863    0.02430  10.644 < 0.0000000000000002 ***
## Peace        0.48398    0.02292  21.120 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5375 on 1537 degrees of freedom
## Multiple R-squared:  0.4845, Adjusted R-squared:  0.4835 
## F-statistic: 481.5 on 3 and 1537 DF,  p-value: < 0.00000000000000022
predict(fit,newdata=test)
##        8       19       30       34       38       40       44       49 
## 3.382949 2.955942 3.321167 3.695929 4.102446 3.670195 2.313646 3.285636 
##       54       55       60       79       93      105      110      111 
## 3.682356 3.688636 3.979658 2.355387 4.154172 4.021846 3.708348 3.998994 
##      112      113      119      127      154      162      171      182 
## 3.898422 3.243188 4.047580 4.312232 3.023864 3.401767 4.083111 3.979658 
##      183      185      189      201      211      221      224      226 
## 4.050720 3.398367 3.521155 3.979658 3.569365 3.682615 4.066915 3.998994 
##      227      234      243      247      248      250      268      269 
## 4.102446 2.417099 3.776529 3.614434 3.811801 3.408165 3.563603 3.536715 
##      271      277      287      295      299      300      301      312 
## 3.814941 2.271458 3.944127 3.786067 3.814941 3.717886 3.772753 3.175267 
##      315      327      333      336      337      340      343      346 
## 4.118382 3.908596 3.743879 4.189703 2.384708 3.853989 3.760334 3.689013 
##      349      359      376      377      385      391      402      409 
## 3.537351 2.878483 3.298055 3.259902 4.012049 3.472310 3.649965 3.689013 
##      412      413      414      416      417      427      430      431 
## 3.023864 3.405284 3.524555 3.401767 3.979658 3.973001 3.679545 4.286240 
##      434      436      442      446      448      450      451      460 
## 3.647084 3.995853 3.698551 3.062276 3.016948 3.204141 3.314250 3.805144 
##      469      472      479      483      491      493      496      498 
## 4.748518 4.324652 4.118641 3.503806 3.970120 4.154172 3.559567 4.012049 
##      503      508      523      530      536      543      559      568 
## 4.137977 3.595357 3.960582 2.743016 3.003893 3.378655 4.154172 2.807421 
##      569      573      575      576      580      591      593      602 
## 3.876206 1.997267 2.077608 3.208176 3.821598 3.463031 3.737222 4.557549 
##      612      618      619      621      622      632      633      642 
## 4.134837 4.018447 4.088873 4.515621 3.550029 3.925051 3.576022 3.375515 
##      654      658      674      683      684      689      691      694 
## 3.585560 2.755695 4.118382 2.726821 3.546889 3.685496 3.427500 3.449458 
##      695      698      702      708      710      712      713      716 
## 2.788085 3.317131 3.524036 4.154172 3.992454 4.170368 4.085733 4.214919 
##      721      725      726      730      731      736      737      746 
## 2.462168 2.594494 2.197515 2.417099 2.462168 4.409028 4.118641 3.776270 
##      748      749      755      762      766      772      776      778 
## 3.979658 3.585819 4.338225 2.761833 2.901335 2.216851 2.207054 3.040059 
##      790      791      803      811      821      830      834      841 
## 2.939488 3.298055 2.817218 4.406147 3.181924 3.488765 3.669677 3.385312 
##      842      862      865      878      880      883      884      885 
## 3.611553 4.076194 3.408165 3.963463 3.595098 3.233650 3.934848 3.255867 
##      903      905      909      919      926      930      931      934 
## 3.397991 3.666160 3.566225 3.927932 2.797623 4.005651 3.750536 3.866408 
##      936      939      942      945      954      956      958      964 
## 3.349970 3.023864 2.122937 3.365977 4.102446 4.095530 3.595357 3.653741 
##      969      970      972      975      977      979      986      991 
## 3.236791 4.092649 3.520260 3.911736 3.533575 3.579162 3.363095 4.056859 
##      994      996     1001     1020     1028     1031     1032     1037 
## 4.386175 4.092649 4.208780 3.466807 3.491646 4.154172 3.502338 4.066915 
##     1044     1053     1063     1072     1077     1083     1089     1091 
## 3.776529 3.740362 3.701691 3.668782 3.301831 4.138236 3.059136 3.349522 
##     1092     1095     1101     1110     1118     1120     1122     1127 
## 3.730942 2.695325 3.605414 3.715005 3.233909 4.412804 2.538992 3.052738 
##     1130     1134     1137     1138     1139     1140     1141     1143 
## 3.679734 2.655759 3.818199 3.356179 2.965222 3.488765 3.143771 4.018070 
##     1150     1156     1159     1160     1166     1191     1193     1198 
## 3.572882 3.237685 2.975278 3.950784 4.141494 3.443955 3.527177 3.573400 
##     1202     1208     1211     1214     1215     1222     1225     1229 
## 3.198119 3.479226 3.417962 3.427241 3.478967 3.683250 3.182183 2.846469 
##     1233     1234     1235     1238     1240     1241     1252     1257 
## 3.295433 3.531212 3.278719 3.573400 2.833154 3.491646 3.101324 3.417444 
##     1258     1272     1275     1277     1278     1281     1282     1285 
## 3.443436 4.154172 4.448335 3.621091 3.085647 4.121523 3.056255 3.659503 
##     1286     1293     1298     1313     1318     1323     1325     1329 
## 3.576022 3.731460 3.834017 3.436779 4.047580 3.863786 2.723940 2.868685 
##     1332     1335     1337     1345     1354     1357     1360     1363 
## 3.540491 3.236531 3.437298 3.437038 3.243188 3.491646 3.537351 2.571383 
##     1364     1370     1375     1380     1386     1387     1392     1393 
## 3.859751 3.614434 3.156450 3.698739 3.233909 3.143771 3.510981 3.772494 
##     1394     1396     1406     1410     1412     1415     1434     1436 
## 3.779151 3.282236 3.349781 3.375515 3.152414 4.066915 3.573141 4.286240 
##     1440     1456     1457     1458     1461     1466     1469     1480 
## 3.566484 3.443955 3.050116 3.050116 3.153309 3.592217 4.260506 3.385312 
##     1485     1492     1498     1500     1502     1508     1512     1513 
## 3.359319 3.598239 4.477208 3.417703 3.424360 3.559567 3.766472 2.972138 
##     1520     1530     1535     1545     1554     1558     1560     1566 
## 3.963463 3.036543 3.947527 3.449717 3.866667 4.189444 3.440179 3.614175 
##     1568     1571     1574     1576     1579     1583     1587     1588 
## 4.073313 4.037523 3.963204 3.918394 4.441678 4.047320 4.083370 3.643943 
##     1589     1592     1596     1604     1606     1608     1612     1613 
## 3.040319 2.094192 3.947008 3.724544 3.027640 4.083111 3.391969 3.082247 
##     1627     1637     1640     1641     1644     1648     1652     1654 
## 3.882603 3.198119 4.405887 3.330446 3.802263 3.860010 3.007669 2.752813 
##     1655     1659     1665     1673     1678     1681     1694     1705 
## 3.153309 3.324683 3.695670 2.898454 2.736359 4.073572 3.446577 3.324683 
##     1708     1710     1712     1722     1739     1744     1749     1752 
## 3.417962 3.146652 3.420843 3.595993 4.550892 3.637546 3.398367 4.064034 
##     1760     1761     1763     1770     1771     1775     1776     1778 
## 2.898454 3.830877 3.252986 3.514757 4.044699 4.625730 3.595098 4.021587 
##     1782     1783     1787     1791     1792     1797     1802     1808 
## 3.434157 4.163711 3.427500 3.750277 3.040059 3.966720 3.879722 3.191462 
##     1812     1817     1820     1830     1833     1835     1853     1854 
## 3.778892 2.271199 2.655500 3.463031 3.485624 3.082247 2.933467 3.672817 
##     1859     1862     1864     1866     1867     1870     1882     1883 
## 3.510981 2.810938 3.460150 3.943232 3.814682 3.340502 3.602015 4.673940 
##     1886     1892     1894     1903     1907     1908     1911     1916 
## 3.123281 3.572882 3.676593 4.479830 3.179043 3.123800 3.740998 3.155931
lm_p<-predict(fit1,newdata=test)

test$Happiness2<-round(predict(fit1,newdata=test),1)


#비율 변경 7:3
library(caret)
dim(df)
## [1] 1925   26
set.seed(1)
idx<-createDataPartition(df$Happiness,p=0.7,list=FALSE)
train<-df[idx,]
test<-df[-idx,] 
library(dplyr)
glimpse(train)
## Rows: 1,350
## Columns: 26
## $ Q1        <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 5, …
## $ Q2        <int> 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 4, 2, 4, 2, 4, 2, 2, 3, 2, 5, …
## $ Q3        <int> 2, 4, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 4, 3, 2, 3, 3, 2, 5, …
## $ Q4        <int> 3, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 2, 4, 4, 2, 2, 4, 4, 1, 5, …
## $ Q5        <int> 4, 2, 4, 4, 4, 4, 2, 4, 4, 2, 4, 4, 4, 4, 3, 1, 2, 2, 1, 5, …
## $ Q6        <int> 2, 4, 4, 4, 4, 4, 1, 2, 2, 2, 4, 4, 3, 2, 2, 1, 4, 2, 1, 5, …
## $ Q7        <int> 2, 4, 4, 4, 4, 4, 3, 4, 4, 4, 5, 4, 4, 4, 3, 4, 4, 4, 2, 5, …
## $ Q8        <int> 4, 4, 4, 4, 5, 5, 2, 2, 4, 4, 4, 4, 3, 4, 2, 4, 4, 4, 4, 5, …
## $ Q9        <int> 4, 4, 2, 4, 5, 5, 3, 4, 4, 4, 2, 2, 4, 2, 4, 2, 4, 4, 4, 5, …
## $ Q10       <int> 4, 2, 4, 4, 5, 5, 2, 4, 2, 4, 4, 4, 3, 4, 3, 2, 3, 3, 3, 5, …
## $ Q11       <int> 4, 4, 4, 4, 5, 5, 4, 4, 4, 3, 4, 4, 4, 5, 4, 3, 3, 4, 3, 5, …
## $ Q12       <int> 4, 4, 4, 4, 5, 5, 3, 4, 4, 3, 4, 3, 3, 5, 4, 4, 2, 3, 4, 5, …
## $ Q13       <int> 4, 4, 4, 4, 5, 5, 4, 4, 4, 4, 2, 4, 4, 5, 4, 4, 4, 3, 2, 5, …
## $ Q14       <int> 4, 4, 4, 4, 5, 5, 5, 4, 4, 4, 3, 4, 5, 5, 4, 4, 4, 4, 4, 5, …
## $ Q15       <int> 4, 3, 4, 4, 4, 2, 3, 4, 4, 3, 1, 4, 4, 5, 4, 4, 4, 3, 3, 4, …
## $ Q16       <int> 4, 4, 4, 4, 5, 2, 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 4, 4, 4, …
## $ Q17       <int> 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 3, 2, 4, 4, 4, 3, 4, 2, 4, 4, …
## $ Q18       <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, …
## $ Q19       <int> 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 1, 4, 4, 5, 4, 2, 3, 3, 3, 4, …
## $ Q20       <int> 4, 3, 4, 4, 4, 2, 4, 2, 4, 4, 4, 2, 4, 5, 4, 2, 4, 3, 3, 5, …
## $ Gender1   <int> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, …
## $ EDU1      <int> 1, 2, 2, 1, 1, 1, 4, 3, 2, 1, 1, 3, 3, 1, 1, 1, 4, 3, 2, 1, …
## $ BF        <dbl> 3.4, 3.6, 4.0, 4.0, 3.6, 3.6, 3.6, 3.2, 4.0, 3.2, 4.0, 3.2, …
## $ BM        <dbl> 3.2, 3.6, 3.6, 4.0, 4.6, 4.6, 2.2, 3.2, 3.2, 3.6, 3.8, 3.6, …
## $ Happiness <dbl> 4.0, 3.8, 4.0, 4.0, 4.8, 4.4, 3.8, 4.0, 4.0, 3.4, 2.8, 3.8, …
## $ Peace     <dbl> 4.0, 3.8, 4.0, 4.0, 3.8, 2.4, 4.0, 3.2, 4.0, 3.9, 3.2, 3.2, …
#이론 220p 예제3
x1<-c(7,1,11,11,7,11,3,1,2,21,1,11,10)
x2<-c(26,29,56,31,52,55,71,31,54,47,40,66,68)
x3<-c(6,15,8,8,6,9,17,22,18,4,23,9,8)
x4<-c(60,52,20,47,33,22,6,44,22,26,34,12,12)
y<-c(78.5,74.3,104.3,87.6,95.9,109.2,102.7,72.5,93.1,115.9,83.8,113.3,109.4)
df<-data.frame(x1,x2,x3,x4,y)
step(lm(y~1,df),scope=list(lower=~1,upper=~x1+x2+x3+x4),direction="forward")
## Start:  AIC=71.44
## y ~ 1
## 
##        Df Sum of Sq     RSS    AIC
## + x4    1   1831.90  883.87 58.852
## + x2    1   1809.43  906.34 59.178
## + x1    1   1450.08 1265.69 63.519
## + x3    1    776.36 1939.40 69.067
## <none>              2715.76 71.444
## 
## Step:  AIC=58.85
## y ~ x4
## 
##        Df Sum of Sq    RSS    AIC
## + x1    1    809.10  74.76 28.742
## + x3    1    708.13 175.74 39.853
## <none>              883.87 58.852
## + x2    1     14.99 868.88 60.629
## 
## Step:  AIC=28.74
## y ~ x4 + x1
## 
##        Df Sum of Sq    RSS    AIC
## + x2    1    26.789 47.973 24.974
## + x3    1    23.926 50.836 25.728
## <none>              74.762 28.742
## 
## Step:  AIC=24.97
## y ~ x4 + x1 + x2
## 
##        Df Sum of Sq    RSS    AIC
## <none>              47.973 24.974
## + x3    1   0.10909 47.864 26.944
## 
## Call:
## lm(formula = y ~ x4 + x1 + x2, data = df)
## 
## Coefficients:
## (Intercept)           x4           x1           x2  
##     71.6483      -0.2365       1.4519       0.4161
#Intercept : 절편
#scope 분석할때 고려한 변수의 범위 설정. 1은 상수항 의미
#direction은 변수 선택 방법 : forward 전진선택법, backward 후진제거법, both 단계별방법

     
library(ISLR)
data("attitude")
glimpse(attitude)
## Rows: 30
## Columns: 7
## $ rating     <dbl> 43, 63, 71, 61, 81, 43, 58, 71, 72, 67, 64, 67, 69, 68, 77,…
## $ complaints <dbl> 51, 64, 70, 63, 78, 55, 67, 75, 82, 61, 53, 60, 62, 83, 77,…
## $ privileges <dbl> 30, 51, 68, 45, 56, 49, 42, 50, 72, 45, 53, 47, 57, 83, 54,…
## $ learning   <dbl> 39, 54, 69, 47, 66, 44, 56, 55, 67, 47, 58, 39, 42, 45, 72,…
## $ raises     <dbl> 61, 63, 76, 54, 71, 54, 66, 70, 71, 62, 58, 59, 55, 59, 79,…
## $ critical   <dbl> 92, 73, 86, 84, 83, 49, 68, 66, 83, 80, 67, 74, 63, 77, 77,…
## $ advance    <dbl> 45, 47, 48, 35, 47, 34, 35, 41, 31, 41, 34, 41, 25, 35, 46,…
step(lm(rating~.,data=attitude),direction="backward")
## Start:  AIC=123.36
## rating ~ complaints + privileges + learning + raises + critical + 
##     advance
## 
##              Df Sum of Sq    RSS    AIC
## - critical    1      3.41 1152.4 121.45
## - raises      1      6.80 1155.8 121.54
## - privileges  1     14.47 1163.5 121.74
## - advance     1     74.11 1223.1 123.24
## <none>                    1149.0 123.36
## - learning    1    180.50 1329.5 125.74
## - complaints  1    724.80 1873.8 136.04
## 
## Step:  AIC=121.45
## rating ~ complaints + privileges + learning + raises + advance
## 
##              Df Sum of Sq    RSS    AIC
## - raises      1     10.61 1163.0 119.73
## - privileges  1     14.16 1166.6 119.82
## - advance     1     71.27 1223.7 121.25
## <none>                    1152.4 121.45
## - learning    1    177.74 1330.1 123.75
## - complaints  1    724.70 1877.1 134.09
## 
## Step:  AIC=119.73
## rating ~ complaints + privileges + learning + advance
## 
##              Df Sum of Sq    RSS    AIC
## - privileges  1     16.10 1179.1 118.14
## - advance     1     61.60 1224.6 119.28
## <none>                    1163.0 119.73
## - learning    1    197.03 1360.0 122.42
## - complaints  1   1165.94 2328.9 138.56
## 
## Step:  AIC=118.14
## rating ~ complaints + learning + advance
## 
##              Df Sum of Sq    RSS    AIC
## - advance     1     75.54 1254.7 118.00
## <none>                    1179.1 118.14
## - learning    1    186.12 1365.2 120.54
## - complaints  1   1259.91 2439.0 137.94
## 
## Step:  AIC=118
## rating ~ complaints + learning
## 
##              Df Sum of Sq    RSS    AIC
## <none>                    1254.7 118.00
## - learning    1    114.73 1369.4 118.63
## - complaints  1   1370.91 2625.6 138.16
## 
## Call:
## lm(formula = rating ~ complaints + learning, data = attitude)
## 
## Coefficients:
## (Intercept)   complaints     learning  
##      9.8709       0.6435       0.2112
step(lm(rating~.,data=attitude),direction="forward")     
## Start:  AIC=123.36
## rating ~ complaints + privileges + learning + raises + critical + 
##     advance
## 
## Call:
## lm(formula = rating ~ complaints + privileges + learning + raises + 
##     critical + advance, data = attitude)
## 
## Coefficients:
## (Intercept)   complaints   privileges     learning       raises     critical  
##    10.78708      0.61319     -0.07305      0.32033      0.08173      0.03838  
##     advance  
##    -0.21706
step(lm(rating~.,data=attitude),direction="both")
## Start:  AIC=123.36
## rating ~ complaints + privileges + learning + raises + critical + 
##     advance
## 
##              Df Sum of Sq    RSS    AIC
## - critical    1      3.41 1152.4 121.45
## - raises      1      6.80 1155.8 121.54
## - privileges  1     14.47 1163.5 121.74
## - advance     1     74.11 1223.1 123.24
## <none>                    1149.0 123.36
## - learning    1    180.50 1329.5 125.74
## - complaints  1    724.80 1873.8 136.04
## 
## Step:  AIC=121.45
## rating ~ complaints + privileges + learning + raises + advance
## 
##              Df Sum of Sq    RSS    AIC
## - raises      1     10.61 1163.0 119.73
## - privileges  1     14.16 1166.6 119.82
## - advance     1     71.27 1223.7 121.25
## <none>                    1152.4 121.45
## + critical    1      3.41 1149.0 123.36
## - learning    1    177.74 1330.1 123.75
## - complaints  1    724.70 1877.1 134.09
## 
## Step:  AIC=119.73
## rating ~ complaints + privileges + learning + advance
## 
##              Df Sum of Sq    RSS    AIC
## - privileges  1     16.10 1179.1 118.14
## - advance     1     61.60 1224.6 119.28
## <none>                    1163.0 119.73
## + raises      1     10.61 1152.4 121.45
## + critical    1      7.21 1155.8 121.54
## - learning    1    197.03 1360.0 122.42
## - complaints  1   1165.94 2328.9 138.56
## 
## Step:  AIC=118.14
## rating ~ complaints + learning + advance
## 
##              Df Sum of Sq    RSS    AIC
## - advance     1     75.54 1254.7 118.00
## <none>                    1179.1 118.14
## + privileges  1     16.10 1163.0 119.73
## + raises      1     12.54 1166.6 119.82
## + critical    1      7.18 1171.9 119.96
## - learning    1    186.12 1365.2 120.54
## - complaints  1   1259.91 2439.0 137.94
## 
## Step:  AIC=118
## rating ~ complaints + learning
## 
##              Df Sum of Sq    RSS    AIC
## <none>                    1254.7 118.00
## + advance     1     75.54 1179.1 118.14
## - learning    1    114.73 1369.4 118.63
## + privileges  1     30.03 1224.6 119.28
## + raises      1      1.19 1253.5 119.97
## + critical    1      0.00 1254.7 120.00
## - complaints  1   1370.91 2625.6 138.16
## 
## Call:
## lm(formula = rating ~ complaints + learning, data = attitude)
## 
## Coefficients:
## (Intercept)   complaints     learning  
##      9.8709       0.6435       0.2112
#데이터 분석 방법 : both>backward>forward 순 추천



#건강관리부분
#rm=(list=ls())
#getwd()
#setwd("c:/data")
#getwd()
#df<-read.csv("Data1.csv")
#table(df$Gender1)
#여자:0 남자:1
#glimpse(df)
#lmfit<-lm(Happiness~EDU1,data=df)
#df$Gender1<-factor(df$Gender1)
#summary(lmfit)

#1중졸 2고졸 3대졸 4대학원졸
#팩터로 변경해서 더미변수만들기 df$EDU1<-factor(df$EDU1)
#결과보면 학력은 행복과 별개인 것을 알 수 있다 *없음
#성별도 마찬가지, 통계적으로


#bs.out2<-lm(Happiness~BM,data=df)
#summary(bs.out2)


#예제 12
#library(datasets)
#data("USArrests")#데이터는 미국 50개 주의 인구 10만명당 살인,폭행,강간으로 인한 체포 수와 도시인구비율을 나타낸다.
#head(USArrests)

#fit<-prcomp(USArrests,scale=TRUE)
#summary(fit)
#주성분분석 함수 prcomp()
#scale=TRUE:표준화 
#변수가 4개면 주성분도 4개


#240P,242P 유의


#244p예제13
Nile
## Time Series:
## Start = 1871 
## End = 1970 
## Frequency = 1 
##   [1] 1120 1160  963 1210 1160 1160  813 1230 1370 1140  995  935 1110  994 1020
##  [16]  960 1180  799  958 1140 1100 1210 1150 1250 1260 1220 1030 1100  774  840
##  [31]  874  694  940  833  701  916  692 1020 1050  969  831  726  456  824  702
##  [46] 1120 1100  832  764  821  768  845  864  862  698  845  744  796 1040  759
##  [61]  781  865  845  944  984  897  822 1010  771  676  649  846  812  742  801
##  [76] 1040  860  874  848  890  744  749  838 1050  918  986  797  923  975  815
##  [91] 1020  906  901 1170  912  746  919  718  714  740
plot(Nile)
#1차 차분
Nile.diff1<-diff(Nile,differences=1)
plot(Nile.diff1)
#2차 차분
Nile.diff2<-diff(Nile,differences=2)
plot(Nile.diff2)