한정민바보.knit

library(car)

## Warning: 패키지 'car'는 R 버전 4.2.3에서 작성되었습니다

## 필요한 패키지를 로딩중입니다: carData

## Warning: 패키지 'carData'는 R 버전 4.2.3에서 작성되었습니다

library(caret)

## Warning: 패키지 'caret'는 R 버전 4.2.3에서 작성되었습니다

## 필요한 패키지를 로딩중입니다: ggplot2

## Warning: 패키지 'ggplot2'는 R 버전 4.2.3에서 작성되었습니다

## 필요한 패키지를 로딩중입니다: lattice

library(dplyr)

## Warning: 패키지 'dplyr'는 R 버전 4.2.3에서 작성되었습니다

## 
## 다음의 패키지를 부착합니다: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

getwd()

## [1] "C:/Users/cic/Desktop"

setwd("C:/Users/cic/Desktop")
rm(list=ls())
df <- read.csv("Data1.csv")
bs.out2 <- lm(Happiness~BM,data=df)
sreg.res1 <- residuals(bs.out2)
sreg.res1 %>% head(1)

##         1 
## 0.3413448

shapiro.test(sreg.res1)

## 
##  Shapiro-Wilk normality test
## 
## data:  sreg.res1
## W = 0.99439, p-value = 1.148e-06

options(scipen=999)
shapiro.test(sreg.res1)

## 
##  Shapiro-Wilk normality test
## 
## data:  sreg.res1
## W = 0.99439, p-value = 0.000001148

options(scripen=-999)
shapiro.test(sreg.res1)

## 
##  Shapiro-Wilk normality test
## 
## data:  sreg.res1
## W = 0.99439, p-value = 0.000001148

summary(bs.out2)

## 
## Call:
## lm(formula = Happiness ~ BM, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1591 -0.4577  0.0418  0.4409  1.9386 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)  2.06599    0.05777   35.77 <0.0000000000000002 ***
## BM           0.49771    0.01878   26.50 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6404 on 1923 degrees of freedom
## Multiple R-squared:  0.2675, Adjusted R-squared:  0.2671 
## F-statistic: 702.2 on 1 and 1923 DF,  p-value: < 0.00000000000000022

plot(bs.out2)

par(mflow=c(2,2))

## Warning in par(mflow = c(2, 2)): "mflow"는 그래픽 매개변수가 아닙니다

bs.out3 <- lm(Happiness~BM+BF,data=df)
summary(bs.out3)

## 
## Call:
## lm(formula = Happiness ~ BM + BF, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.23134 -0.40553  0.02014  0.41352  1.86210 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)  1.60995    0.06412   25.11 <0.0000000000000002 ***
## BM           0.29054    0.02331   12.47 <0.0000000000000002 ***
## BF           0.33817    0.02435   13.89 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6106 on 1922 degrees of freedom
## Multiple R-squared:  0.3343, Adjusted R-squared:  0.3336 
## F-statistic: 482.6 on 2 and 1922 DF,  p-value: < 0.00000000000000022

vif(bs.out3)

##       BM       BF 
## 1.693504 1.693504

idx <- createDataPartition(df$Happiness,p=0.8,list=F)
train <- df[idx,]
test <- df[-idx,]
glimpse(train)

## Rows: 1,541
## Columns: 26
## $ Q1        <int> 4, 4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, …
## $ Q2        <int> 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 2, 4, 4, 2, 2, 3, 2, 4, 4, 4, …
## $ Q3        <int> 2, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 4, 2, 3, 3, 2, 4, 4, 4, …
## $ Q4        <int> 3, 4, 4, 4, 4, 4, 2, 4, 4, 4, 2, 4, 4, 2, 4, 4, 1, 4, 4, 4, …
## $ Q5        <int> 4, 2, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 1, 2, 2, 1, 4, 5, 2, …
## $ Q6        <int> 2, 4, 4, 4, 4, 4, 2, 2, 2, 4, 4, 3, 5, 1, 4, 2, 1, 4, 2, 4, …
## $ Q7        <int> 2, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 5, 4, 4, 4, 2, 5, 4, 4, …
## $ Q8        <int> 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, 4, 3, 5, 4, 4, 4, 4, 5, 2, 4, …
## $ Q9        <int> 4, 4, 4, 2, 4, 5, 4, 4, 4, 2, 2, 4, 5, 2, 4, 4, 4, 5, 5, 4, …
## $ Q10       <int> 4, 2, 4, 4, 4, 5, 4, 2, 4, 4, 4, 3, 4, 2, 3, 3, 3, 5, 4, 4, …
## $ Q11       <int> 4, 4, 4, 4, 4, 5, 4, 4, 3, 4, 4, 4, 4, 3, 3, 4, 3, 5, 2, 3, …
## $ Q12       <int> 4, 4, 4, 4, 4, 5, 4, 4, 3, 4, 3, 3, 4, 4, 2, 3, 4, 5, 2, 3, …
## $ Q13       <int> 4, 4, 4, 4, 4, 5, 4, 4, 4, 2, 4, 4, 4, 4, 4, 3, 2, 5, 4, 3, …
## $ Q14       <int> 4, 4, 4, 4, 4, 5, 4, 4, 4, 3, 4, 5, 4, 4, 4, 4, 4, 5, 2, 3, …
## $ Q15       <int> 4, 3, 4, 4, 4, 4, 4, 4, 3, 1, 4, 4, 4, 4, 4, 3, 3, 5, 2, 3, …
## $ Q16       <int> 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 4, 5, 5, 5, …
## $ Q17       <int> 4, 4, 4, 4, 4, 2, 4, 4, 4, 3, 2, 4, 5, 3, 4, 2, 4, 3, 4, 4, …
## $ Q18       <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 3, 5, 5, …
## $ Q19       <int> 4, 4, 4, 4, 4, 4, 2, 4, 4, 1, 4, 4, 4, 2, 3, 3, 3, 2, 1, 1, …
## $ Q20       <int> 4, 3, 4, 4, 4, 4, 2, 4, 4, 4, 2, 4, 5, 2, 4, 3, 3, 5, 1, 4, …
## $ Gender1   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, …
## $ EDU1      <int> 1, 2, 1, 2, 1, 1, 3, 2, 1, 1, 3, 3, 2, 1, 4, 3, 2, 2, 3, 1, …
## $ BF        <dbl> 3.4, 3.6, 4.2, 4.0, 4.0, 3.6, 3.2, 4.0, 3.2, 4.0, 3.2, 4.0, …
## $ BM        <dbl> 3.2, 3.6, 4.0, 3.6, 4.0, 4.6, 3.2, 3.2, 3.6, 3.8, 3.6, 3.4, …
## $ Happiness <dbl> 4.0, 3.8, 4.0, 4.0, 4.0, 4.8, 4.0, 4.0, 3.4, 2.8, 3.8, 4.0, …
## $ Peace     <dbl> 4.0, 3.8, 4.0, 4.0, 4.0, 3.8, 3.2, 4.0, 3.9, 3.2, 3.2, 4.1, …

fit <- lm(Happiness~BM+BF+Peace,data=train)
summary(fit)

## 
## Call:
## lm(formula = Happiness ~ BM + BF + Peace, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.89019 -0.31596 -0.00557  0.33099  1.56448 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)  0.43357    0.08439   5.138          0.000000314 ***
## BM           0.20307    0.02343   8.667 < 0.0000000000000002 ***
## BF           0.25248    0.02420  10.433 < 0.0000000000000002 ***
## Peace        0.47807    0.02305  20.739 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5394 on 1537 degrees of freedom
## Multiple R-squared:  0.4833, Adjusted R-squared:  0.4823 
## F-statistic: 479.3 on 3 and 1537 DF,  p-value: < 0.00000000000000022

#Happiness=0.513888+0.230878*BM+0.24636*BF+0.45747+peace

lm_p <- predict(fit,newdata=test)
lm_p %>% head(1)

##        2 
## 3.472499

round(predict(fit,newdata=test),1) %>% head(1)

##   2 
## 3.5

test$Happiness1 <- round(predict(fit,newdata=test),1)
View(test)