options(width = 320)
northlands = read.csv("Canada_zscore.csv")[, 4:8]
head(northlands)
## SmoothPop Elevation River Port Temperature
## 1 -0.1355 0.2371 5.975 2.831 -2.03699
## 2 -0.1355 -0.5599 6.338 2.863 -2.06540
## 3 -0.1355 -0.7203 6.599 2.864 -2.03699
## 4 -0.1355 0.9672 5.292 2.823 0.02058
## 5 -0.1355 -0.2990 5.339 2.823 0.02058
## 6 -0.1355 0.9672 5.339 2.809 -1.91359
summary(northlands)
## SmoothPop Elevation River Port Temperature
## Min. :-0.14 Min. :-1.218 Min. :-0.525 Min. :-1.558 Min. :-3.188
## 1st Qu.:-0.14 1st Qu.:-0.656 1st Qu.:-0.454 1st Qu.:-0.821 1st Qu.:-0.748
## Median :-0.13 Median :-0.313 Median :-0.343 Median :-0.112 Median : 0.207
## Mean : 0.00 Mean : 0.000 Mean : 0.000 Mean : 0.000 Mean : 0.000
## 3rd Qu.:-0.12 3rd Qu.: 0.290 3rd Qu.:-0.117 3rd Qu.: 0.659 3rd Qu.: 0.820
## Max. :34.72 Max. :11.829 Max. : 7.174 Max. : 3.080 Max. : 1.979
par(mfrow = c(5, 2))
for (i in 1:5) {
var = names(northlands)[i]
plot(density(northlands$SmoothPop), main = paste("Density of ", var))
plot(sort(northlands$SmoothPop), main = paste("IndexPlot of ", var), pch = ".")
}
pairs(~., data = northlands, main = "Simple Scatterplot Matrix")
northlandsSmoothPFit <- lm(SmoothPop ~ ., data = northlands)
cbind(summary(northlandsSmoothPFit)$coeff, confint(northlandsSmoothPFit))
## Estimate Std. Error t value Pr(>|t|) 2.5 % 97.5 %
## (Intercept) 9.782e-14 0.003099 3.157e-11 1.000e+00 -0.006074 0.006074
## Elevation -8.010e-02 0.003188 -2.512e+01 7.487e-139 -0.086351 -0.073853
## River 1.132e-01 0.004010 2.824e+01 9.991e-175 0.105372 0.121091
## Port -4.257e-02 0.005791 -7.351e+00 1.984e-13 -0.053922 -0.031220
## Temperature 2.335e-01 0.005821 4.012e+01 0.000e+00 0.222134 0.244952
nlwithNoTemp <- lm(SmoothPop ~ Elevation + River + Port, data = northlands)
nlwithNoPort <- lm(SmoothPop ~ Elevation + River + Temperature, data = northlands)
nlwithNoRiver <- lm(SmoothPop ~ Elevation + Port + Temperature, data = northlands)
nlwithNoElevation <- lm(SmoothPop ~ River + Port + Temperature, data = northlands)
anova(northlandsSmoothPFit, nlwithNoTemp)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ Elevation + River + Port
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 94731
## 2 99323 96266 -1 -1535 1610 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(northlandsSmoothPFit, nlwithNoPort)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ Elevation + River + Temperature
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 94731
## 2 99323 94782 -1 -51.5 54 2e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(northlandsSmoothPFit, nlwithNoRiver)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ Elevation + Port + Temperature
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 94731
## 2 99323 95491 -1 -760 797 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(northlandsSmoothPFit, nlwithNoElevation)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ River + Port + Temperature
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 94731
## 2 99323 95333 -1 -602 631 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
nlwithNoTempElev <- lm(SmoothPop ~ I(Elevation + Temperature) + River + Port, data = northlands)
nlwithNoRiverPort <- lm(SmoothPop ~ Elevation + Temperature + I(River + Port), data = northlands)
nlwithNoTempRiver <- lm(SmoothPop ~ Elevation + I(Temperature + River) + Port, data = northlands)
nlwithNoElevPort <- lm(SmoothPop ~ I(Elevation + Port) + Temperature + River, data = northlands)
nlwithNoElevRiver <- lm(SmoothPop ~ I(Elevation + River) + Temperature + Port, data = northlands)
nlwithNoTempPort <- lm(SmoothPop ~ Elevation + River + I(Temperature + Port), data = northlands)
anova(northlandsSmoothPFit, nlwithNoTempElev)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ I(Elevation + Temperature) + River + Port
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 94731
## 2 99323 96675 -1 -1944 2038 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(northlandsSmoothPFit, nlwithNoRiverPort)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ Elevation + Temperature + I(River + Port)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 94731
## 2 99323 95114 -1 -384 402 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(northlandsSmoothPFit, nlwithNoTempRiver)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ Elevation + I(Temperature + River) + Port
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 94731
## 2 99323 95085 -1 -354 372 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(northlandsSmoothPFit, nlwithNoElevPort)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ I(Elevation + Port) + Temperature + River
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 94731
## 2 99323 94763 -1 -32.2 33.8 6.3e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(northlandsSmoothPFit, nlwithNoElevRiver)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ I(Elevation + River) + Temperature + Port
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 94731
## 2 99323 95966 -1 -1235 1295 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(northlandsSmoothPFit, nlwithNoTempPort)
## Analysis of Variance Table
##
## Model 1: SmoothPop ~ Elevation + River + Port + Temperature
## Model 2: SmoothPop ~ Elevation + River + I(Temperature + Port)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 99322 94731
## 2 99323 98708 -1 -3977 4170 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow = c(2, 2))
plot(northlandsSmoothPFit$res, ylab = "Residuals", main = "Index plot of residuals")
x <- model.matrix(northlandsSmoothPFit)
lev <- hat(x)
plot(lev, ylab = "Leverages", main = "Index plot of Leverages")
results <- summary(northlandsSmoothPFit)
stud <- northlandsSmoothPFit$res/(results$sig * sqrt(1 - lev))
plot(stud, ylab = "Studentized Residuals", main = "Studentized Residuals")
cook <- cooks.distance(northlandsSmoothPFit)
plot(cook, ylab = "Cooks distances")
northlandsSmoothPFit2 <- lm(SmoothPop ~ ., data = northlands, subset = (cook < max(cook)))
summary(northlandsSmoothPFit2)
##
## Call:
## lm(formula = SmoothPop ~ ., data = northlands, subset = (cook <
## max(cook)))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.70 -0.23 -0.10 0.07 34.27
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.00033 0.00308 -0.11 0.91
## Elevation -0.07966 0.00317 -25.13 < 2e-16 ***
## River 0.11279 0.00399 28.29 < 2e-16 ***
## Port -0.04228 0.00576 -7.34 2.1e-13 ***
## Temperature 0.23287 0.00579 40.23 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.971 on 99321 degrees of freedom
## Multiple R-squared: 0.0465, Adjusted R-squared: 0.0464
## F-statistic: 1.21e+03 on 4 and 99321 DF, p-value: <2e-16
summary(northlandsSmoothPFit)
##
## Call:
## lm(formula = SmoothPop ~ ., data = northlands)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71 -0.24 -0.10 0.07 34.27
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.78e-14 3.10e-03 0.00 1
## Elevation -8.01e-02 3.19e-03 -25.12 <2e-16 ***
## River 1.13e-01 4.01e-03 28.24 <2e-16 ***
## Port -4.26e-02 5.79e-03 -7.35 2e-13 ***
## Temperature 2.34e-01 5.82e-03 40.12 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.977 on 99322 degrees of freedom
## Multiple R-squared: 0.0463, Adjusted R-squared: 0.0462
## F-statistic: 1.2e+03 on 4 and 99322 DF, p-value: <2e-16
plot(northlandsSmoothPFit$fit, northlandsSmoothPFit$res, xlab = "Fitted", ylab = "Residuals")
abline(h = 0)
Using test:
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
bptest(northlandsSmoothPFit)
##
## studentized Breusch-Pagan test
##
## data: northlandsSmoothPFit
## BP = 836.8, df = 4, p-value < 2.2e-16
library(sandwich)
northlandsSmoothPFit$newse <- vcovHC(northlandsSmoothPFit)
coeftest(northlandsSmoothPFit, northlandsSmoothPFit$newse)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.78e-14 3.10e-03 0.0 1
## Elevation -8.01e-02 3.63e-03 -22.1 <2e-16 ***
## River 1.13e-01 3.56e-03 31.8 <2e-16 ***
## Port -4.26e-02 4.02e-03 -10.6 <2e-16 ***
## Temperature 2.34e-01 5.55e-03 42.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow = c(1, 2))
qqnorm(northlandsSmoothPFit$res, ylab = "Raw Residuals")
qqline(northlandsSmoothPFit$res)
qqnorm(rstudent(northlandsSmoothPFit), ylab = "Studentized residuals")
abline(0, 1)
library(car)
coltest = vif(northlandsSmoothPFit) # variance inflation factors
sqrt(coltest) > 2 # problem?
## Elevation River Port Temperature
## FALSE FALSE FALSE FALSE