library(car)
## 載入需要的套件:carData
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x dplyr::recode() masks car::recode()
## x purrr::some() masks car::some()
library(faraway)
##
## 載入套件:'faraway'
## 下列物件被遮斷自 'package:car':
##
## logit, vif
data(happy)
#data “happy” 從 package “faraway”取得
#以下說明個別變項的意涵:
#happy: Happiness on a 10 point scale where 10 is most happy
#money: family income in thousands of dollars
#sex:1 = satisfactory sexual activity , 0 = not
str(happy)
## 'data.frame': 39 obs. of 5 variables:
## $ happy: num 10 8 8 8 4 9 8 6 5 4 ...
## $ money: num 36 47 53 35 88 175 175 45 35 55 ...
## $ sex : num 0 1 0 1 1 1 1 0 1 1 ...
## $ love : num 3 3 3 3 1 3 3 2 2 1 ...
## $ work : num 4 1 5 3 2 4 4 3 2 4 ...
happy$sex <- as.factor(happy$sex)
#可知data “happy” 中有5個變項的39個觀察值
scatterplot(happy ~ money, data = happy, smooth = F)
#happy與money變異不一樣,就上圖分布來說似乎偏向正相關。所以結論:越有錢,越開心。
ggplot(aes(y = happy, x =money, color = sex), data =happy) +
geom_point() +
geom_smooth(method = lm, se = F)+
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
#在sex:1以及sex:0時,happy與money就上圖分布來說似乎皆偏向正相關。所以結論是: (1) satisfactory sexual activity時,越有錢,越開心。 (2) not satisfactory sexual activity時,越有錢,越開心。
hpmod <- lm(happy ~ money +sex, data = happy)
summary(hpmod)
##
## Call:
## lm(formula = happy ~ money + sex, data = happy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0776 -1.3140 0.5654 1.2268 3.3331
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.077619 0.665952 9.126 6.74e-11 ***
## money 0.016369 0.008827 1.854 0.0719 .
## sex1 -0.507639 0.665037 -0.763 0.4502
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.825 on 36 degrees of freedom
## Multiple R-squared: 0.08817, Adjusted R-squared: 0.03751
## F-statistic: 1.74 on 2 and 36 DF, p-value: 0.1899
#先看到money跟happy的關係,Estimate = 0.016 , t value =1.854 < 2 ; p value = 0.0719 > 0.05。結果顯示:即使money與happy之間是正相關,但兩者之間的關係,並未達顯著水準。
#sex為類別變項。因此這邊解釋為: Satisfactory sexual activity的happy平均數,比not satisfactory的happy平均數低0.507。且看到t value的絕對值= 0.763 < 2;p value=0.4502 > 0.05。顯示兩者的平均數差異未達顯著水準。
#R-squared: 0.088 ,表示hpmod中的兩個獨變項money和satisfactory sexual activity,僅能解釋happy約8.8%的變異。
hist( x = residuals(hpmod),
xlab = "Value of residual",
main = "",
breaks = 20)
#上圖Residual(殘差,即觀察跟預測的誤差)未能呈現常態,顯示仍有residual不能被被此迴歸模型所解釋,說明除了money和satisfactory sexual activity之外,還有其他更適合解釋happy的獨變項(因素)。
plot(hpmod, which = 2)
#上圖顯示,Residual分布未能貼著虛線,顯示仍有residual不能被被此迴歸模型所解釋,除了hpmod中的兩個獨變項外,還有其他更適合解釋happy的變項。
#並且圖中呈現三個outsider。