5. For the divusa data, fit a model with divorce as the response and the other variables except year as predictors. Check for serial correlation.
library(faraway)
data("divusa")
colnames(divusa)
## [1] "year" "divorce" "unemployed" "femlab" "marriage"
## [6] "birth" "military"
summary(divusa)
## year divorce unemployed femlab
## Min. :1920 Min. : 6.10 Min. : 1.200 Min. :22.70
## 1st Qu.:1939 1st Qu.: 8.70 1st Qu.: 4.200 1st Qu.:27.47
## Median :1958 Median :10.60 Median : 5.600 Median :37.10
## Mean :1958 Mean :13.27 Mean : 7.173 Mean :38.58
## 3rd Qu.:1977 3rd Qu.:20.30 3rd Qu.: 7.500 3rd Qu.:47.80
## Max. :1996 Max. :22.80 Max. :24.900 Max. :59.30
## marriage birth military
## Min. : 49.70 Min. : 65.30 Min. : 1.940
## 1st Qu.: 61.90 1st Qu.: 68.90 1st Qu.: 3.469
## Median : 74.10 Median : 85.90 Median : 9.102
## Mean : 72.97 Mean : 88.89 Mean :12.365
## 3rd Qu.: 80.00 3rd Qu.:107.30 3rd Qu.:14.266
## Max. :118.10 Max. :122.90 Max. :86.641
#new dataframe without year
div<-divusa[ , !(names(divusa) %in% c('year'))]
#check if dataset has NA
!(length(complete.cases(div))==length(complete.cases(div)))
## [1] FALSE
#model
m<-lm(divorce~.,div)
summary(m)
##
## Call:
## lm(formula = divorce ~ ., data = div)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8611 -0.8916 -0.0496 0.8650 3.8300
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.48784 3.39378 0.733 0.4659
## unemployed -0.11125 0.05592 -1.989 0.0505 .
## femlab 0.38365 0.03059 12.543 < 2e-16 ***
## marriage 0.11867 0.02441 4.861 6.77e-06 ***
## birth -0.12996 0.01560 -8.333 4.03e-12 ***
## military -0.02673 0.01425 -1.876 0.0647 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.65 on 71 degrees of freedom
## Multiple R-squared: 0.9208, Adjusted R-squared: 0.9152
## F-statistic: 165.1 on 5 and 71 DF, p-value: < 2.2e-16
#check residuals
plot(residuals(m),ylab = "Residuals")
abline(h=0)
qqnorm(residuals(m),ylab = "Residuals")
qqline(residuals(m))
hist(residuals(m))
#the residual plot shows somesort of cyclical behaviour
#plot succesive residuals
plot(residuals(m)[-77],residuals(m)[-1],xlab=expression(hat(epsilon)[i]),ylab = expression((hat(epsilon)[i+1])))
#this plot also shows a relationship
#we calculate a regression of succesive residuals
summary(lm(residuals(m)[-1]~0-1+residuals(m)[-77]))
##
## Call:
## lm(formula = residuals(m)[-1] ~ 0 - 1 + residuals(m)[-77])
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3909 -0.5980 -0.1360 0.4218 2.1718
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## residuals(m)[-77] 0.85147 0.06186 13.76 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8474 on 75 degrees of freedom
## Multiple R-squared: 0.7164, Adjusted R-squared: 0.7126
## F-statistic: 189.4 on 1 and 75 DF, p-value: < 2.2e-16
#no evidence of correlation with the model showing a very small p-value
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
dwtest(divorce~.,data=div)
##
## Durbin-Watson test
##
## data: divorce ~ .
## DW = 0.29988, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
#Durbin-Watson shows no evidence of correlation, very small p-value