library(faraway)
##
## 載入套件:'faraway'
## 下列物件被遮斷自 'package:car':
##
## logit, vif
data(happy)
happy$sex <- as.factor(happy$sex)
str(happy)
## 'data.frame': 39 obs. of 5 variables:
## $ happy: num 10 8 8 8 4 9 8 6 5 4 ...
## $ money: num 36 47 53 35 88 175 175 45 35 55 ...
## $ sex : Factor w/ 2 levels "0","1": 1 2 1 2 2 2 2 1 2 2 ...
## $ love : num 3 3 3 3 1 3 3 2 2 1 ...
## $ work : num 4 1 5 3 2 4 4 3 2 4 ...
#data”happy” from “faraway”
#the variables mean:
#happy:Happiness on a 10 point scale where 10 is most happy
#money:family income in thousands of dollars
#sex:1 = satisfactory sexual activity, 0 = not
scatterplot(happy ~ money, data = happy, smooth = F)
##happy與money變異不一樣,就上圖分布來說似乎偏向正相關
ggplot(aes(y = happy, x =money, color = sex), data =happy) +
geom_point() +
geom_smooth(method = lm, se = F)+
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
#在sex:1(satisfactory sexual activity)以及在sex:0時,happy與money就上圖分布來說似乎皆偏向正相關
#進一步檢驗
hpmod <- lm(happy ~ money +sex, data = happy)
summary(hpmod)
##
## Call:
## lm(formula = happy ~ money + sex, data = happy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0776 -1.3140 0.5654 1.2268 3.3331
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.077619 0.665952 9.126 6.74e-11 ***
## money 0.016369 0.008827 1.854 0.0719 .
## sex1 -0.507639 0.665037 -0.763 0.4502
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.825 on 36 degrees of freedom
## Multiple R-squared: 0.08817, Adjusted R-squared: 0.03751
## F-statistic: 1.74 on 2 and 36 DF, p-value: 0.1899
#先看到money跟happy的關係,t value =1.854 <2;p value=0.0719 > 0.05。顯示相關未達顯著
#再考量有無sex時,與happy的關係,t value的絕對值= 0.763 <2;p value=0.4502 > 0.05。顯示相關未達顯著
##Adjusted R-squared: 0.03751 ,表示hpmod僅能解釋happy的3%變異
hist( x = residuals(hpmod),
xlab = "Value of residual",
main = "",
breaks = 20)
#上圖未能呈現常態,顯示residual(觀察跟預測的誤差)未能集中於0,存在residual。
#Normal Q-Q
plot(hpmod, which = 2)
#上圖顯示,分布未能貼著虛線,存在residual(如上圖未能呈現常態分布),有些變異無法被該model解釋。。並且圖中呈現三個outsider。
#Checking the linearity of the relationship #Residuals vs Fitted
plot(hpmod, which = 1)
#上圖顯示,紅線未能貼著虛線,存在residual,有些變異無法被該model解釋。
residualPlots(hpmod)
## Test stat Pr(>|Test stat|)
## money -0.6600 0.5135
## sex
## Tukey test -0.2207 0.8253
#上圖呈現個別變項的residual
plot(hpmod, which = 3)
上圖呈現預測值與residual有關,表示residual中還是有一些變異未能被hpmod這model解釋。