library(car)
## 載入需要的套件:carData
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x dplyr::recode() masks car::recode()
## x purrr::some()   masks car::some()
library(faraway)
## 
## 載入套件:'faraway'
## 下列物件被遮斷自 'package:car':
## 
##     logit, vif
data(happy)

#data “happy” 從 package “faraway”取得

#以下說明個別變項的意涵:

#happy: Happiness on a 10 point scale where 10 is most happy

#money: family income in thousands of dollars

#sex:1 = satisfactory sexual activity , 0 = not

str(happy)
## 'data.frame':    39 obs. of  5 variables:
##  $ happy: num  10 8 8 8 4 9 8 6 5 4 ...
##  $ money: num  36 47 53 35 88 175 175 45 35 55 ...
##  $ sex  : num  0 1 0 1 1 1 1 0 1 1 ...
##  $ love : num  3 3 3 3 1 3 3 2 2 1 ...
##  $ work : num  4 1 5 3 2 4 4 3 2 4 ...
happy$sex <- as.factor(happy$sex)

#可知data “happy” 中有5個變項的39個觀察值

scatterplot(happy ~ money,  data = happy, smooth = F)

#happy與money變異不一樣,就上圖分布來說似乎偏向正相關。所以結論:越有錢,越開心。

ggplot(aes(y = happy, x =money, color = sex), data =happy) +
geom_point() +
geom_smooth(method = lm, se = F)+ 
theme_bw()
## `geom_smooth()` using formula 'y ~ x'

#在sex:1以及sex:0時,happy與money就上圖分布來說似乎皆偏向正相關。所以結論是: (1) satisfactory sexual activity時,越有錢,越開心。 (2) not satisfactory sexual activity時,越有錢,越開心。

進一步檢驗

hpmod <- lm(happy ~ money +sex, data = happy)
summary(hpmod)
## 
## Call:
## lm(formula = happy ~ money + sex, data = happy)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0776 -1.3140  0.5654  1.2268  3.3331 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.077619   0.665952   9.126 6.74e-11 ***
## money        0.016369   0.008827   1.854   0.0719 .  
## sex1        -0.507639   0.665037  -0.763   0.4502    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.825 on 36 degrees of freedom
## Multiple R-squared:  0.08817,    Adjusted R-squared:  0.03751 
## F-statistic:  1.74 on 2 and 36 DF,  p-value: 0.1899

#先看到money跟happy的關係,Estimate = 0.016 , t value =1.854 < 2 ; p value = 0.0719 > 0.05。結果顯示:即使money與happy之間是正相關,但兩者之間的關係,並未達顯著水準。

#sex為類別變項。因此這邊解釋為: Satisfactory sexual activity的happy平均數,比not satisfactory的happy平均數低0.507。且看到t value的絕對值= 0.763 < 2;p value=0.4502 > 0.05。顯示兩者的平均數差異未達顯著水準。

#R-squared: 0.088 ,表示hpmod中的兩個獨變項money和satisfactory sexual activity,僅能解釋happy約8.8%的變異。

hist( x = residuals(hpmod),
xlab = "Value of residual",
main = "",
breaks = 20)

#上圖Residual(殘差,即觀察跟預測的誤差)未能呈現常態,顯示仍有residual不能被被此迴歸模型所解釋,說明除了money和satisfactory sexual activity之外,還有其他更適合解釋happy的獨變項(因素)。

Normal Q-Q

plot(hpmod, which = 2)

#上圖顯示,Residual分布未能貼著虛線,顯示仍有residual不能被被此迴歸模型所解釋,除了hpmod中的兩個獨變項外,還有其他更適合解釋happy的變項。

#並且圖中呈現三個outsider。