回歸模型
setwd("~/Downloads/")
test <- read.csv("hap.csv",fileEncoding = "big5")
lm1 <- lm(Hap ~ GDP + Family + Life + Freedom + Generosity + CPI , data = test)
summary(lm1)
##
## Call:
## lm(formula = Hap ~ GDP + Family + Life + Freedom + Generosity +
## CPI, data = test)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.58940 -0.27212 -0.00603 0.29740 1.40201
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.736409 0.192672 9.012 1.49e-15 ***
## GDP 0.850232 0.221376 3.841 0.000186 ***
## Family 0.999412 0.210608 4.745 5.14e-06 ***
## Life 1.291357 0.344058 3.753 0.000256 ***
## Freedom 1.576831 0.351209 4.490 1.49e-05 ***
## Generosity 0.514713 0.331871 1.551 0.123205
## CPI 0.002584 0.003378 0.765 0.445614
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5048 on 138 degrees of freedom
## Multiple R-squared: 0.8145, Adjusted R-squared: 0.8064
## F-statistic: 101 on 6 and 138 DF, p-value: < 2.2e-16
summary(step(lm1),k=2,method = "forward")
## Start: AIC=-191.43
## Hap ~ GDP + Family + Life + Freedom + Generosity + CPI
##
## Df Sum of Sq RSS AIC
## - CPI 1 0.1491 35.310 -192.82
## <none> 35.161 -191.43
## - Generosity 1 0.6129 35.774 -190.93
## - Life 1 3.5893 38.751 -179.34
## - GDP 1 3.7583 38.920 -178.71
## - Freedom 1 5.1360 40.297 -173.66
## - Family 1 5.7375 40.899 -171.52
##
## Step: AIC=-192.82
## Hap ~ GDP + Family + Life + Freedom + Generosity
##
## Df Sum of Sq RSS AIC
## <none> 35.310 -192.82
## - Generosity 1 0.6989 36.009 -191.98
## - Life 1 3.9596 39.270 -179.41
## - GDP 1 4.8134 40.124 -176.29
## - Family 1 5.6705 40.981 -173.23
## - Freedom 1 6.5768 41.887 -170.05
##
## Call:
## lm(formula = Hap ~ GDP + Family + Life + Freedom + Generosity,
## data = test)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.54845 -0.28617 -0.00226 0.28461 1.36783
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.7291 0.1921 8.999 1.53e-15 ***
## GDP 0.9068 0.2083 4.353 2.59e-05 ***
## Family 0.9927 0.2101 4.725 5.58e-06 ***
## Life 1.3364 0.3385 3.948 0.000125 ***
## Freedom 1.6710 0.3284 5.088 1.15e-06 ***
## Generosity 0.5456 0.3289 1.659 0.099443 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.504 on 139 degrees of freedom
## Multiple R-squared: 0.8137, Adjusted R-squared: 0.807
## F-statistic: 121.4 on 5 and 139 DF, p-value: < 2.2e-16
lm2 <- lm(Hap~GDP+Family+Life+Freedom+Generosity , data=test)
summary(lm2)
##
## Call:
## lm(formula = Hap ~ GDP + Family + Life + Freedom + Generosity,
## data = test)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.54845 -0.28617 -0.00226 0.28461 1.36783
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.7291 0.1921 8.999 1.53e-15 ***
## GDP 0.9068 0.2083 4.353 2.59e-05 ***
## Family 0.9927 0.2101 4.725 5.58e-06 ***
## Life 1.3364 0.3385 3.948 0.000125 ***
## Freedom 1.6710 0.3284 5.088 1.15e-06 ***
## Generosity 0.5456 0.3289 1.659 0.099443 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.504 on 139 degrees of freedom
## Multiple R-squared: 0.8137, Adjusted R-squared: 0.807
## F-statistic: 121.4 on 5 and 139 DF, p-value: < 2.2e-16
par(mfrow = c(2,2))
plot(lm2)

shapiro.test(lm2$resid)
##
## Shapiro-Wilk normality test
##
## data: lm2$resid
## W = 0.98687, p-value = 0.1858
共線性
library(car)
## Warning: package 'car' was built under R version 3.4.3
vif(lm2)
## GDP Family Life Freedom Generosity
## 4.317090 2.158613 3.509039 1.418113 1.159391
殘差變異數具均齊性
ncvTest(lm2)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 0.05926009 Df = 1 p = 0.8076692
殘差之間互相獨立
durbinWatsonTest(lm2)
## lag Autocorrelation D-W Statistic p-value
## 1 -0.111204 2.213206 0.178
## Alternative hypothesis: rho != 0
離群值分析
hii <- hatvalues(lm2)
s.r <- lm2$residuals/(summary(lm2)$sigma*sqrt(1-hii))
which(as.vector(abs(s.r))>2.5)
## [1] 18 117 121
影響點偵測
which(as.vector(dffits(lm2))>2)
## integer(0)