rm(list = ls())
vanc <- read.csv("/Users/Buzz/Desktop/HENV665Q/Final_Project/vancouver27.csv", 
    header = T)
attach(vanc)
names(vanc)
## [1] "Cell_ID"     "Year"        "Av_Offpeak"  "Distance_km" "Cat_km"     
## [6] "Rings_km"
View(vanc)
library(lattice)
library(nlme)

# Data Exploration
pairs(Cell_ID ~ Year + Av_Offpeak + Distance_km + Rings_km)

plot of chunk unnamed-chunk-1


# Linear models

# R. Q.: 1) Does distance to downtown influence off peak transit supply?
# ***

dist_supply <- lm(Av_Offpeak ~ Distance_km)
summary(dist_supply)
## 
## Call:
## lm(formula = Av_Offpeak ~ Distance_km)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -17.6   -6.8   -1.8    2.7  397.1 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18.00096    0.16028   112.3   <2e-16 ***
## Distance_km -0.45831    0.00543   -84.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 15 on 39418 degrees of freedom
## Multiple R-squared: 0.153,   Adjusted R-squared: 0.153 
## F-statistic: 7.14e+03 on 1 and 39418 DF,  p-value: <2e-16

# 2) Has off peak transit supply (by cell) increased in the last 30 years?
# ***
supply_year <- lm(Av_Offpeak ~ Year)
summary(supply_year)
## 
## Call:
## lm(formula = Av_Offpeak ~ Year)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##   -8.9   -7.0   -3.5   -0.4  406.3 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.69e+02   1.45e+01   -25.4   <2e-16 ***
## Year         1.88e-01   7.29e-03    25.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 16.2 on 39418 degrees of freedom
## Multiple R-squared: 0.0166,  Adjusted R-squared: 0.0166 
## F-statistic:  665 on 1 and 39418 DF,  p-value: <2e-16

# 3) Is there significant variation in transit supply between different
# zones (rings)?
rings_supply <- lm(Av_Offpeak ~ Rings_km)
summary(rings_supply)
## 
## Call:
## lm(formula = Av_Offpeak ~ Rings_km)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -42.0   -4.3   -1.0    0.2  373.2 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         41.963      0.349   120.2   <2e-16 ***
## Rings_kmRing_10km  -27.521      0.403   -68.3   <2e-16 ***
## Rings_kmRing_15km  -33.661      0.412   -81.8   <2e-16 ***
## Rings_kmRing_20km  -35.548      0.405   -87.8   <2e-16 ***
## Rings_kmRing_25km  -37.649      0.404   -93.3   <2e-16 ***
## Rings_kmRing_60km  -40.914      0.363  -112.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 13.9 on 39414 degrees of freedom
## Multiple R-squared: 0.276,   Adjusted R-squared: 0.276 
## F-statistic: 3.01e+03 on 5 and 39414 DF,  p-value: <2e-16

# Histograms
par(mfrow = c(2, 2))
hist(supply_year$residuals)
hist(dist_supply$residuals)
hist(rings_supply$residuals)

plot of chunk unnamed-chunk-1


# Log-Transformation of response variable

supplylog <- log(Av_Offpeak + 1)

# Re-running the models
dist_supply_log <- lm(supplylog ~ Distance_km)
par(mfrow = c(1, 2))
hist(dist_supply$residuals)
hist(dist_supply_log$residuals)

plot of chunk unnamed-chunk-1


supply_year_log <- lm(supplylog ~ Year)
par(mfrow = c(1, 2))
hist(supply_year$residuals)
hist(supply_year_log$residuals)

plot of chunk unnamed-chunk-1


rings_supply_log <- lm(supplylog ~ Rings_km)
par(mfrow = c(1, 2))
hist(rings_supply$residuals)
hist(rings_supply_log$residuals)

plot of chunk unnamed-chunk-1



# Generalized Linear Model




rm(list = ls())