library(faraway)
## 
## 載入套件:'faraway'
## 下列物件被遮斷自 'package:car':
## 
##     logit, vif
data(happy)
happy$sex <- as.factor(happy$sex)
str(happy)
## 'data.frame':    39 obs. of  5 variables:
##  $ happy: num  10 8 8 8 4 9 8 6 5 4 ...
##  $ money: num  36 47 53 35 88 175 175 45 35 55 ...
##  $ sex  : Factor w/ 2 levels "0","1": 1 2 1 2 2 2 2 1 2 2 ...
##  $ love : num  3 3 3 3 1 3 3 2 2 1 ...
##  $ work : num  4 1 5 3 2 4 4 3 2 4 ...

#data”happy” from “faraway”

#the variables mean:

#happy:Happiness on a 10 point scale where 10 is most happy

#money:family income in thousands of dollars

#sex:1 = satisfactory sexual activity, 0 = not

scatterplot(happy ~ money,  data = happy, smooth = F)

##happy與money變異不一樣,就上圖分布來說似乎偏向正相關

ggplot(aes(y = happy, x =money, color = sex), data =happy) +
geom_point() +
geom_smooth(method = lm, se = F)+ 
theme_bw()
## `geom_smooth()` using formula 'y ~ x'

#在sex:1(satisfactory sexual activity)以及在sex:0時,happy與money就上圖分布來說似乎皆偏向正相關

#進一步檢驗

hpmod <- lm(happy ~ money +sex, data = happy)
summary(hpmod)
## 
## Call:
## lm(formula = happy ~ money + sex, data = happy)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0776 -1.3140  0.5654  1.2268  3.3331 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.077619   0.665952   9.126 6.74e-11 ***
## money        0.016369   0.008827   1.854   0.0719 .  
## sex1        -0.507639   0.665037  -0.763   0.4502    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.825 on 36 degrees of freedom
## Multiple R-squared:  0.08817,    Adjusted R-squared:  0.03751 
## F-statistic:  1.74 on 2 and 36 DF,  p-value: 0.1899

#先看到money跟happy的關係,t value =1.854 <2;p value=0.0719 > 0.05。顯示相關未達顯著

#再考量有無sex時,與happy的關係,t value的絕對值= 0.763 <2;p value=0.4502 > 0.05。顯示相關未達顯著

##Adjusted R-squared: 0.03751 ,表示hpmod僅能解釋happy的3%變異

hist( x = residuals(hpmod),
xlab = "Value of residual",
main = "",
breaks = 20)

#上圖未能呈現常態,顯示residual(觀察跟預測的誤差)未能集中於0,存在residual。

#Normal Q-Q

plot(hpmod, which = 2)

#上圖顯示,分布未能貼著虛線,存在residual(如上圖未能呈現常態分布),有些變異無法被該model解釋。。並且圖中呈現三個outsider。

#Checking the linearity of the relationship #Residuals vs Fitted

plot(hpmod, which  = 1)

#上圖顯示,紅線未能貼著虛線,存在residual,有些變異無法被該model解釋。

residualPlots(hpmod)

##            Test stat Pr(>|Test stat|)
## money        -0.6600           0.5135
## sex                                  
## Tukey test   -0.2207           0.8253

#上圖呈現個別變項的residual

plot(hpmod, which = 3)

上圖呈現預測值與residual有關,表示residual中還是有一些變異未能被hpmod這model解釋。