options(width = 320)
northlands = read.csv("Canada.csv")[, 4:8]
head(northlands)
## SmoothPop Elevation River Port Temperature
## 1 -1.178 0.59604 2.506 2.155 -2.03699
## 2 -1.097 -0.40021 2.545 2.173 -2.06540
## 3 -1.076 -0.75025 2.572 2.173 -2.03699
## 4 -1.112 1.10713 2.427 2.151 0.02058
## 5 -1.077 0.01878 2.433 2.151 0.02058
## 6 -1.059 1.10713 2.433 2.143 -1.91359
summary(northlands)
## SmoothPop Elevation River Port Temperature
## Min. :-1.533 Min. :-7.475 Min. :-2.0773 Min. :-2.6163 Min. :-3.188
## 1st Qu.:-0.832 1st Qu.:-0.597 1st Qu.:-0.6213 1st Qu.:-0.7327 1st Qu.:-0.748
## Median :-0.426 Median :-0.001 Median :-0.0088 Median : 0.0701 Median : 0.207
## Mean : 0.000 Mean : 0.000 Mean : 0.0000 Mean : 0.0000 Mean : 0.000
## 3rd Qu.: 0.717 3rd Qu.: 0.641 3rd Qu.: 0.5452 3rd Qu.: 0.7386 3rd Qu.: 0.820
## Max. : 3.667 Max. : 3.355 Max. : 2.6272 Max. : 2.2921 Max. : 1.979
par(mfrow = c(5, 2))
for (i in 1:5) {
var = names(northlands)[i]
plot(density(northlands[, i]), main = paste("Density of ", var))
plot(sort(northlands[, i]), main = paste("IndexPlot of ", var), pch = ".")
}
pairs(~., data = northlands, main = "Simple Scatterplot Matrix")
northlandsSmoothPFit <- lm(SmoothPop ~ ., data = northlands)
cbind(summary(northlandsSmoothPFit)$coeff, confint(northlandsSmoothPFit))
## Estimate Std. Error t value Pr(>|t|) 2.5 % 97.5 %
## (Intercept) 4.448e-13 0.002041 2.179e-10 1.000e+00 -0.00400 0.00400
## Elevation -2.780e-02 0.002134 -1.303e+01 9.002e-39 -0.03198 -0.02362
## River 1.323e-01 0.002359 5.609e+01 0.000e+00 0.12769 0.13693
## Port 1.367e-01 0.003460 3.950e+01 0.000e+00 0.12989 0.14345
## Temperature 9.371e-01 0.003783 2.477e+02 0.000e+00 0.92971 0.94454
nlwithNoTemp <- lm(SmoothPop ~ Elevation + River + Port, data = northlands)
nlwithNoPort <- lm(SmoothPop ~ Elevation + River + Temperature, data = northlands)
nlwithNoRiver <- lm(SmoothPop ~ Elevation + Port + Temperature, data = northlands)
nlwithNoElevation <- lm(SmoothPop ~ River + Port + Temperature, data = northlands)
anova(northlandsSmoothPFit, nlwithNoTemp)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ Elevation + River + Port
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 41098
## 2 99323 66487 -1 -25389 61360 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(northlandsSmoothPFit, nlwithNoPort)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ Elevation + River + Temperature
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 41098
## 2 99323 41743 -1 -646 1560 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(northlandsSmoothPFit, nlwithNoRiver)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ Elevation + Port + Temperature
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 41098
## 2 99323 42400 -1 -1302 3147 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(northlandsSmoothPFit, nlwithNoElevation)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ River + Port + Temperature
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 41098
## 2 99323 41168 -1 -70.2 170 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
nlwithNoTempElev <- lm(SmoothPop ~ I(Elevation + Temperature) + River + Port, data = northlands)
nlwithNoRiverPort <- lm(SmoothPop ~ Elevation + Temperature + I(River + Port), data = northlands)
nlwithNoTempRiver <- lm(SmoothPop ~ Elevation + I(Temperature + River) + Port, data = northlands)
nlwithNoElevPort <- lm(SmoothPop ~ I(Elevation + Port) + Temperature + River, data = northlands)
nlwithNoElevRiver <- lm(SmoothPop ~ I(Elevation + River) + Temperature + Port, data = northlands)
nlwithNoTempPort <- lm(SmoothPop ~ Elevation + River + I(Temperature + Port), data = northlands)
anova(northlandsSmoothPFit, nlwithNoTempElev)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ I(Elevation + Temperature) + River + Port
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 41098
## 2 99323 58296 -1 -17199 41565 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(northlandsSmoothPFit, nlwithNoRiverPort)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ Elevation + Temperature + I(River + Port)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 41098
## 2 99323 41098 -1 -0.473 1.14 0.29
anova(northlandsSmoothPFit, nlwithNoTempRiver)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ Elevation + I(Temperature + River) + Port
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 41098
## 2 99323 60882 -1 -19784 47814 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(northlandsSmoothPFit, nlwithNoElevPort)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ I(Elevation + Port) + Temperature + River
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 41098
## 2 99323 41735 -1 -638 1541 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(northlandsSmoothPFit, nlwithNoElevRiver)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ I(Elevation + River) + Temperature + Port
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 41098
## 2 99323 42121 -1 -1023 2473 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(northlandsSmoothPFit, nlwithNoTempPort)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ Elevation + River + I(Temperature + Port)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 41098
## 2 99323 83454 -1 -42357 102366 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow = c(2, 2))
plot(northlandsSmoothPFit$res, ylab = "Residuals", main = "Index plot of residuals")
x <- model.matrix(northlandsSmoothPFit)
lev <- hat(x)
plot(lev, ylab = "Leverages", main = "Index plot of Leverages")
results <- summary(northlandsSmoothPFit)
stud <- northlandsSmoothPFit$res/(results$sig * sqrt(1 - lev))
plot(stud, ylab = "Studentized Residuals", main = "Studentized Residuals")
cook <- cooks.distance(northlandsSmoothPFit)
plot(cook, ylab = "Cooks distances")
northlandsSmoothPFit2 <- lm(SmoothPop ~ ., data = northlands, subset = (cook < max(cook)))
summary(northlandsSmoothPFit2)
##
## Call:
## lm(formula = SmoothPop ~ ., data = northlands, subset = (cook <
## max(cook)))
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.036 -0.450 -0.016 0.396 2.708
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.61e-05 2.04e-03 -0.01 0.99
## Elevation -2.77e-02 2.13e-03 -13.00 <2e-16 ***
## River 1.32e-01 2.36e-03 56.12 <2e-16 ***
## Port 1.37e-01 3.46e-03 39.53 <2e-16 ***
## Temperature 9.37e-01 3.78e-03 247.74 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.643 on 99321 degrees of freedom
## Multiple R-squared: 0.586, Adjusted R-squared: 0.586
## F-statistic: 3.52e+04 on 4 and 99321 DF, p-value: <2e-16
summary(northlandsSmoothPFit)
##
## Call:
## lm(formula = SmoothPop ~ ., data = northlands)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.037 -0.450 -0.016 0.396 2.707
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.45e-13 2.04e-03 0.0 1
## Elevation -2.78e-02 2.13e-03 -13.0 <2e-16 ***
## River 1.32e-01 2.36e-03 56.1 <2e-16 ***
## Port 1.37e-01 3.46e-03 39.5 <2e-16 ***
## Temperature 9.37e-01 3.78e-03 247.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.643 on 99322 degrees of freedom
## Multiple R-squared: 0.586, Adjusted R-squared: 0.586
## F-statistic: 3.52e+04 on 4 and 99322 DF, p-value: <2e-16
plot(northlandsSmoothPFit$fit, northlandsSmoothPFit$res, xlab = "Fitted", ylab = "Residuals")
abline(h = 0)
Using test:
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
bptest(northlandsSmoothPFit)
##
## studentized Breusch-Pagan test
##
## data: northlandsSmoothPFit
## BP = 11378, df = 4, p-value < 2.2e-16
library(sandwich)
northlandsSmoothPFit$newse <- vcovHC(northlandsSmoothPFit)
coeftest(northlandsSmoothPFit, northlandsSmoothPFit$newse)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.45e-13 2.04e-03 0.0 1
## Elevation -2.78e-02 2.47e-03 -11.3 <2e-16 ***
## River 1.32e-01 2.29e-03 57.8 <2e-16 ***
## Port 1.37e-01 4.10e-03 33.3 <2e-16 ***
## Temperature 9.37e-01 4.04e-03 231.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow = c(1, 2))
qqnorm(northlandsSmoothPFit$res, ylab = "Raw Residuals")
qqline(northlandsSmoothPFit$res)
qqnorm(rstudent(northlandsSmoothPFit), ylab = "Studentized residuals")
abline(0, 1)
library(car)
coltest = vif(northlandsSmoothPFit) # variance inflation factors
sqrt(coltest) > 2 # problem?
## Elevation River Port Temperature
## FALSE FALSE FALSE FALSE