library(car)
## Warning: 패키지 'car'는 R 버전 4.2.3에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: carData
## Warning: 패키지 'carData'는 R 버전 4.2.3에서 작성되었습니다
library(caret)
## Warning: 패키지 'caret'는 R 버전 4.2.3에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: ggplot2
## Warning: 패키지 'ggplot2'는 R 버전 4.2.3에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: lattice
library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.2.3에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
getwd()
## [1] "C:/Users/cic/Desktop"
setwd("C:/Users/cic/Desktop")
rm(list=ls())
df <- read.csv("Data1.csv")
bs.out2 <- lm(Happiness~BM,data=df)
sreg.res1 <- residuals(bs.out2)
sreg.res1 %>% head(1)
## 1
## 0.3413448
shapiro.test(sreg.res1)
##
## Shapiro-Wilk normality test
##
## data: sreg.res1
## W = 0.99439, p-value = 1.148e-06
options(scipen=999)
shapiro.test(sreg.res1)
##
## Shapiro-Wilk normality test
##
## data: sreg.res1
## W = 0.99439, p-value = 0.000001148
options(scripen=-999)
shapiro.test(sreg.res1)
##
## Shapiro-Wilk normality test
##
## data: sreg.res1
## W = 0.99439, p-value = 0.000001148
summary(bs.out2)
##
## Call:
## lm(formula = Happiness ~ BM, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1591 -0.4577 0.0418 0.4409 1.9386
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.06599 0.05777 35.77 <0.0000000000000002 ***
## BM 0.49771 0.01878 26.50 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6404 on 1923 degrees of freedom
## Multiple R-squared: 0.2675, Adjusted R-squared: 0.2671
## F-statistic: 702.2 on 1 and 1923 DF, p-value: < 0.00000000000000022
plot(bs.out2)




par(mflow=c(2,2))
## Warning in par(mflow = c(2, 2)): "mflow"는 그래픽 매개변수가 아닙니다
bs.out3 <- lm(Happiness~BM+BF,data=df)
summary(bs.out3)
##
## Call:
## lm(formula = Happiness ~ BM + BF, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.23134 -0.40553 0.02014 0.41352 1.86210
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.60995 0.06412 25.11 <0.0000000000000002 ***
## BM 0.29054 0.02331 12.47 <0.0000000000000002 ***
## BF 0.33817 0.02435 13.89 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6106 on 1922 degrees of freedom
## Multiple R-squared: 0.3343, Adjusted R-squared: 0.3336
## F-statistic: 482.6 on 2 and 1922 DF, p-value: < 0.00000000000000022
vif(bs.out3)
## BM BF
## 1.693504 1.693504
idx <- createDataPartition(df$Happiness,p=0.8,list=F)
train <- df[idx,]
test <- df[-idx,]
glimpse(train)
## Rows: 1,541
## Columns: 26
## $ Q1 <int> 4, 4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, …
## $ Q2 <int> 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 2, 4, 4, 2, 2, 3, 2, 4, 4, 4, …
## $ Q3 <int> 2, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 4, 2, 3, 3, 2, 4, 4, 4, …
## $ Q4 <int> 3, 4, 4, 4, 4, 4, 2, 4, 4, 4, 2, 4, 4, 2, 4, 4, 1, 4, 4, 4, …
## $ Q5 <int> 4, 2, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 1, 2, 2, 1, 4, 5, 2, …
## $ Q6 <int> 2, 4, 4, 4, 4, 4, 2, 2, 2, 4, 4, 3, 5, 1, 4, 2, 1, 4, 2, 4, …
## $ Q7 <int> 2, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 5, 4, 4, 4, 2, 5, 4, 4, …
## $ Q8 <int> 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, 4, 3, 5, 4, 4, 4, 4, 5, 2, 4, …
## $ Q9 <int> 4, 4, 4, 2, 4, 5, 4, 4, 4, 2, 2, 4, 5, 2, 4, 4, 4, 5, 5, 4, …
## $ Q10 <int> 4, 2, 4, 4, 4, 5, 4, 2, 4, 4, 4, 3, 4, 2, 3, 3, 3, 5, 4, 4, …
## $ Q11 <int> 4, 4, 4, 4, 4, 5, 4, 4, 3, 4, 4, 4, 4, 3, 3, 4, 3, 5, 2, 3, …
## $ Q12 <int> 4, 4, 4, 4, 4, 5, 4, 4, 3, 4, 3, 3, 4, 4, 2, 3, 4, 5, 2, 3, …
## $ Q13 <int> 4, 4, 4, 4, 4, 5, 4, 4, 4, 2, 4, 4, 4, 4, 4, 3, 2, 5, 4, 3, …
## $ Q14 <int> 4, 4, 4, 4, 4, 5, 4, 4, 4, 3, 4, 5, 4, 4, 4, 4, 4, 5, 2, 3, …
## $ Q15 <int> 4, 3, 4, 4, 4, 4, 4, 4, 3, 1, 4, 4, 4, 4, 4, 3, 3, 5, 2, 3, …
## $ Q16 <int> 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 4, 5, 5, 5, …
## $ Q17 <int> 4, 4, 4, 4, 4, 2, 4, 4, 4, 3, 2, 4, 5, 3, 4, 2, 4, 3, 4, 4, …
## $ Q18 <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 3, 5, 5, …
## $ Q19 <int> 4, 4, 4, 4, 4, 4, 2, 4, 4, 1, 4, 4, 4, 2, 3, 3, 3, 2, 1, 1, …
## $ Q20 <int> 4, 3, 4, 4, 4, 4, 2, 4, 4, 4, 2, 4, 5, 2, 4, 3, 3, 5, 1, 4, …
## $ Gender1 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, …
## $ EDU1 <int> 1, 2, 1, 2, 1, 1, 3, 2, 1, 1, 3, 3, 2, 1, 4, 3, 2, 2, 3, 1, …
## $ BF <dbl> 3.4, 3.6, 4.2, 4.0, 4.0, 3.6, 3.2, 4.0, 3.2, 4.0, 3.2, 4.0, …
## $ BM <dbl> 3.2, 3.6, 4.0, 3.6, 4.0, 4.6, 3.2, 3.2, 3.6, 3.8, 3.6, 3.4, …
## $ Happiness <dbl> 4.0, 3.8, 4.0, 4.0, 4.0, 4.8, 4.0, 4.0, 3.4, 2.8, 3.8, 4.0, …
## $ Peace <dbl> 4.0, 3.8, 4.0, 4.0, 4.0, 3.8, 3.2, 4.0, 3.9, 3.2, 3.2, 4.1, …
fit <- lm(Happiness~BM+BF+Peace,data=train)
summary(fit)
##
## Call:
## lm(formula = Happiness ~ BM + BF + Peace, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.89019 -0.31596 -0.00557 0.33099 1.56448
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.43357 0.08439 5.138 0.000000314 ***
## BM 0.20307 0.02343 8.667 < 0.0000000000000002 ***
## BF 0.25248 0.02420 10.433 < 0.0000000000000002 ***
## Peace 0.47807 0.02305 20.739 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5394 on 1537 degrees of freedom
## Multiple R-squared: 0.4833, Adjusted R-squared: 0.4823
## F-statistic: 479.3 on 3 and 1537 DF, p-value: < 0.00000000000000022
#Happiness=0.513888+0.230878*BM+0.24636*BF+0.45747+peace
lm_p <- predict(fit,newdata=test)
lm_p %>% head(1)
## 2
## 3.472499
round(predict(fit,newdata=test),1) %>% head(1)
## 2
## 3.5
test$Happiness1 <- round(predict(fit,newdata=test),1)
View(test)