CUNY - Data 621 - Week 3 Discussion

LMR Chapter 4

5. For the divusa data, fit a model with divorce as the response and the other variables except year as predictors. Check for serial correlation.

library(faraway)

data("divusa")

colnames(divusa)

## [1] "year"       "divorce"    "unemployed" "femlab"     "marriage"  
## [6] "birth"      "military"

summary(divusa)

##       year         divorce        unemployed         femlab     
##  Min.   :1920   Min.   : 6.10   Min.   : 1.200   Min.   :22.70  
##  1st Qu.:1939   1st Qu.: 8.70   1st Qu.: 4.200   1st Qu.:27.47  
##  Median :1958   Median :10.60   Median : 5.600   Median :37.10  
##  Mean   :1958   Mean   :13.27   Mean   : 7.173   Mean   :38.58  
##  3rd Qu.:1977   3rd Qu.:20.30   3rd Qu.: 7.500   3rd Qu.:47.80  
##  Max.   :1996   Max.   :22.80   Max.   :24.900   Max.   :59.30  
##     marriage          birth           military     
##  Min.   : 49.70   Min.   : 65.30   Min.   : 1.940  
##  1st Qu.: 61.90   1st Qu.: 68.90   1st Qu.: 3.469  
##  Median : 74.10   Median : 85.90   Median : 9.102  
##  Mean   : 72.97   Mean   : 88.89   Mean   :12.365  
##  3rd Qu.: 80.00   3rd Qu.:107.30   3rd Qu.:14.266  
##  Max.   :118.10   Max.   :122.90   Max.   :86.641

#new dataframe without year
div<-divusa[ , !(names(divusa) %in% c('year'))]
#check if dataset has NA
!(length(complete.cases(div))==length(complete.cases(div)))

## [1] FALSE

#model
m<-lm(divorce~.,div)
summary(m)

## 
## Call:
## lm(formula = divorce ~ ., data = div)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.8611 -0.8916 -0.0496  0.8650  3.8300 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.48784    3.39378   0.733   0.4659    
## unemployed  -0.11125    0.05592  -1.989   0.0505 .  
## femlab       0.38365    0.03059  12.543  < 2e-16 ***
## marriage     0.11867    0.02441   4.861 6.77e-06 ***
## birth       -0.12996    0.01560  -8.333 4.03e-12 ***
## military    -0.02673    0.01425  -1.876   0.0647 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.65 on 71 degrees of freedom
## Multiple R-squared:  0.9208, Adjusted R-squared:  0.9152 
## F-statistic: 165.1 on 5 and 71 DF,  p-value: < 2.2e-16

#check residuals
plot(residuals(m),ylab = "Residuals")
abline(h=0)

qqnorm(residuals(m),ylab = "Residuals")
qqline(residuals(m))

hist(residuals(m))

#the residual plot shows somesort of cyclical behaviour
#plot succesive residuals
plot(residuals(m)[-77],residuals(m)[-1],xlab=expression(hat(epsilon)[i]),ylab = expression((hat(epsilon)[i+1])))

#this plot also shows a relationship
#we calculate a regression of succesive residuals
summary(lm(residuals(m)[-1]~0-1+residuals(m)[-77]))

## 
## Call:
## lm(formula = residuals(m)[-1] ~ 0 - 1 + residuals(m)[-77])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3909 -0.5980 -0.1360  0.4218  2.1718 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## residuals(m)[-77]  0.85147    0.06186   13.76   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8474 on 75 degrees of freedom
## Multiple R-squared:  0.7164, Adjusted R-squared:  0.7126 
## F-statistic: 189.4 on 1 and 75 DF,  p-value: < 2.2e-16

#no evidence of correlation with the model showing a very small p-value
library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

dwtest(divorce~.,data=div)

## 
##  Durbin-Watson test
## 
## data:  divorce ~ .
## DW = 0.29988, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

#Durbin-Watson shows no evidence of correlation, very small p-value

CUNY - Data 621 - Week 3 Discussion

Peter Kowalchuk

September 10, 2019

LMR Chapter 4