rm(list = ls())
vanc <- read.csv("/Users/Buzz/Desktop/HENV665Q/Final_Project/vancouver27.csv",
header = T)
attach(vanc)
names(vanc)
## [1] "Cell_ID" "Year" "Av_Offpeak" "Distance_km" "Cat_km"
## [6] "Rings_km"
View(vanc)
library(lattice)
library(nlme)
# Data Exploration
pairs(Cell_ID ~ Year + Av_Offpeak + Distance_km + Rings_km)
# Linear models
# R. Q.: 1) Does distance to downtown influence off peak transit supply?
# ***
dist_supply <- lm(Av_Offpeak ~ Distance_km)
summary(dist_supply)
##
## Call:
## lm(formula = Av_Offpeak ~ Distance_km)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.6 -6.8 -1.8 2.7 397.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.00096 0.16028 112.3 <2e-16 ***
## Distance_km -0.45831 0.00543 -84.5 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15 on 39418 degrees of freedom
## Multiple R-squared: 0.153, Adjusted R-squared: 0.153
## F-statistic: 7.14e+03 on 1 and 39418 DF, p-value: <2e-16
# 2) Has off peak transit supply (by cell) increased in the last 30 years?
# ***
supply_year <- lm(Av_Offpeak ~ Year)
summary(supply_year)
##
## Call:
## lm(formula = Av_Offpeak ~ Year)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.9 -7.0 -3.5 -0.4 406.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.69e+02 1.45e+01 -25.4 <2e-16 ***
## Year 1.88e-01 7.29e-03 25.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.2 on 39418 degrees of freedom
## Multiple R-squared: 0.0166, Adjusted R-squared: 0.0166
## F-statistic: 665 on 1 and 39418 DF, p-value: <2e-16
# 3) Is there significant variation in transit supply between different
# zones (rings)?
rings_supply <- lm(Av_Offpeak ~ Rings_km)
summary(rings_supply)
##
## Call:
## lm(formula = Av_Offpeak ~ Rings_km)
##
## Residuals:
## Min 1Q Median 3Q Max
## -42.0 -4.3 -1.0 0.2 373.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.963 0.349 120.2 <2e-16 ***
## Rings_kmRing_10km -27.521 0.403 -68.3 <2e-16 ***
## Rings_kmRing_15km -33.661 0.412 -81.8 <2e-16 ***
## Rings_kmRing_20km -35.548 0.405 -87.8 <2e-16 ***
## Rings_kmRing_25km -37.649 0.404 -93.3 <2e-16 ***
## Rings_kmRing_60km -40.914 0.363 -112.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.9 on 39414 degrees of freedom
## Multiple R-squared: 0.276, Adjusted R-squared: 0.276
## F-statistic: 3.01e+03 on 5 and 39414 DF, p-value: <2e-16
# Histograms
par(mfrow = c(2, 2))
hist(supply_year$residuals)
hist(dist_supply$residuals)
hist(rings_supply$residuals)
# Log-Transformation of response variable
supplylog <- log(Av_Offpeak + 1)
# Re-running the models
dist_supply_log <- lm(supplylog ~ Distance_km)
par(mfrow = c(1, 2))
hist(dist_supply$residuals)
hist(dist_supply_log$residuals)
supply_year_log <- lm(supplylog ~ Year)
par(mfrow = c(1, 2))
hist(supply_year$residuals)
hist(supply_year_log$residuals)
rings_supply_log <- lm(supplylog ~ Rings_km)
par(mfrow = c(1, 2))
hist(rings_supply$residuals)
hist(rings_supply_log$residuals)
# Generalized Linear Model
rm(list = ls())