Vanc27.R

Buzz — Nov 28, 2012, 10:01 PM

vanc <- read.csv("/Users/Buzz/Desktop/HENV665Q/Final_Project/vancouver27.csv", header = T)
attach(vanc)
names(vanc)
[1] "Cell_ID"     "Year"        "Av_Offpeak"  "Distance_km" "Cat_km"     
[6] "Rings_km"   
View(vanc)
library(lattice)
library(nlme)

# Data Exploration
pairs(Av_Offpeak ~ Distance_km + Rings_km + Year)

plot of chunk unnamed-chunk-1


# Linear models

# R. Q.: 
# 1) Does distance to downtown influence off peak transit supply? ***

supply_dist <- lm(Av_Offpeak ~ Distance_km)
summary(supply_dist)

Call:
lm(formula = Av_Offpeak ~ Distance_km)

Residuals:
   Min     1Q Median     3Q    Max 
 -17.6   -6.8   -1.8    2.7  397.1 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 18.00131    0.16028   112.3   <2e-16 ***
Distance_km -0.45832    0.00543   -84.5   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

Residual standard error: 15 on 39418 degrees of freedom
Multiple R-squared: 0.153,  Adjusted R-squared: 0.153 
F-statistic: 7.14e+03 on 1 and 39418 DF,  p-value: <2e-16 


# 2) Has off peak transit supply (by cell) increased in the last 30 years? ***
supply_year <- lm(Av_Offpeak ~ Year)
summary(supply_year)

Call:
lm(formula = Av_Offpeak ~ Year)

Residuals:
   Min     1Q Median     3Q    Max 
  -8.9   -7.0   -3.5   -0.4  406.3 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -3.69e+02   1.45e+01   -25.4   <2e-16 ***
Year         1.88e-01   7.29e-03    25.8   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

Residual standard error: 16.2 on 39418 degrees of freedom
Multiple R-squared: 0.0166, Adjusted R-squared: 0.0166 
F-statistic:  665 on 1 and 39418 DF,  p-value: <2e-16 


# 3) Is there significant variation in transit supply between different zones (rings)?
supply_rings <- lm(Av_Offpeak ~ Rings_km)
summary(supply_rings)

Call:
lm(formula = Av_Offpeak ~ Rings_km)

Residuals:
   Min     1Q Median     3Q    Max 
 -42.0   -4.3   -1.0    0.2  373.2 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)         41.963      0.349   120.2   <2e-16 ***
Rings_kmRing_10km  -27.521      0.403   -68.3   <2e-16 ***
Rings_kmRing_15km  -33.661      0.412   -81.8   <2e-16 ***
Rings_kmRing_20km  -35.549      0.405   -87.8   <2e-16 ***
Rings_kmRing_25km  -37.649      0.404   -93.3   <2e-16 ***
Rings_kmRing_60km  -40.914      0.363  -112.7   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

Residual standard error: 13.9 on 39414 degrees of freedom
Multiple R-squared: 0.276,  Adjusted R-squared: 0.276 
F-statistic: 3.01e+03 on 5 and 39414 DF,  p-value: <2e-16 


# Histograms
par(mfrow = c(2,2))
hist(supply_year$residuals)
hist(supply_dist$residuals)
hist(supply_dist$residuals)

plot of chunk unnamed-chunk-1


# Log-Transformation of response variable (Log for right-tailed data)

supplylog <-log(Av_Offpeak + 1)

# Re-running the models
supply_dist_log <- lm(supplylog ~ Distance_km)
par(mfrow = c(1,2))
hist(supply_dist$residuals)
hist(supply_dist_log$residuals)

plot of chunk unnamed-chunk-1


supply_year_log <- lm(supplylog ~ Year)
par(mfrow = c(1,2))
hist(supply_year$residuals)
hist(supply_year_log$residuals)

plot of chunk unnamed-chunk-1


supply_rings_log <- lm(supplylog ~ Rings_km)
par(mfrow = c(1,2))
hist(supply_rings$residuals)
hist(supply_rings_log$residuals)

plot of chunk unnamed-chunk-1


#################################################################################
#################################################################################

# Generalized Linear Model

vancpois <- read.csv("/Users/Buzz/Desktop/HENV665Q/Final_Project/vancouver27.csv", header = T)
attach(vancpois)
The following object(s) are masked from 'vanc':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
names(vancpois)
[1] "Cell_ID"     "Year"        "Av_Offpeak"  "Distance_km" "Cat_km"     
[6] "Rings_km"   
View(vancpois)

# Graphs response variable = Av_Offpeak
hist(Av_Offpeak)
plot(Av_Offpeak)

plot of chunk unnamed-chunk-1


# dist_supply_pois <- glm(Av_Offpeak ~ Distance_km, family = poisson, na.action=na.exclude)
# summary(dist_supply_pois)

# Call:
#  glm(formula = Av_Offpeak ~ Distance_km, family = poisson, na.action = na.exclude)

#  Deviance Residuals: 
#  Min      1Q    Median     3Q     Max  
# -8.818  -2.198  -1.012   0.028  33.784  

#  Coefficients:
#  Estimate Std. Error z value Pr(>|z|)    
#  (Intercept)  3.7571429  0.0035074  1071.2   <2e-16 ***
#  Distance_km -0.1123599  0.0002507  -448.1   <2e-16 ***
#  ---
#  Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

#  (Dispersion parameter for poisson family taken to be 1)

# Null deviance: 713542  on 39419  degrees of freedom
# Residual deviance: 389383  on 39418  degrees of freedom
# AIC: Inf

# Number of Fisher Scoring iterations: 6


# Residual deviance greater than degrees of freedom (9.878 overdispersion), apply quasipoisson

dist_supply_qpois1 <- glm(Av_Offpeak ~ Distance_km, family = quasipoisson)
summary(dist_supply_qpois1)

Call:
glm(formula = Av_Offpeak ~ Distance_km, family = quasipoisson)

Deviance Residuals: 
   Min      1Q  Median      3Q     Max  
 -8.82   -2.20   -1.01    0.03   33.78  

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  3.75714    0.01466     256   <2e-16 ***
Distance_km -0.11236    0.00105    -107   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

(Dispersion parameter for quasipoisson family taken to be 17.46)

    Null deviance: 713542  on 39419  degrees of freedom
Residual deviance: 389383  on 39418  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 6

par(mfrow = c(1,1))
plot(Av_Offpeak ~ Distance_km, pch = 19)
interval1 <- seq(0, 60, 10)
prediction1 <- predict(dist_supply_qpois1, list(Distance_km = interval1), type = 'response')
lines(interval1, prediction1, col = "green", lwd = 3)

plot of chunk unnamed-chunk-1


supply_year_qpois2 <- glm(Av_Offpeak ~ Year, family = quasipoisson)
summary(supply_year_qpois2)

Call:
glm(formula = Av_Offpeak ~ Year, family = quasipoisson)

Deviance Residuals: 
   Min      1Q  Median      3Q     Max  
 -4.29   -3.46   -2.66   -0.18   48.50  

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -61.9242     2.3900   -25.9   <2e-16 ***
Year          0.0319     0.0012    26.7   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

(Dispersion parameter for quasipoisson family taken to be 39.21)

    Null deviance: 713542  on 39419  degrees of freedom
Residual deviance: 684406  on 39418  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 6

par(mfrow = c(1,2))
hist(supply_year_qpois2$residuals)
hist(supply_year$residuals)

plot of chunk unnamed-chunk-1


supply_rings_qpois3 <- glm(Av_Offpeak ~ Rings_km, family = quasipoisson)
summary(supply_rings_qpois3)

Call:
glm(formula = Av_Offpeak ~ Rings_km, family = quasipoisson)

Deviance Residuals: 
   Min      1Q  Median      3Q     Max  
 -9.16   -1.94   -1.45    0.09   34.01  

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)         3.7368     0.0155   240.4   <2e-16 ***
Rings_kmRing_10km  -1.0667     0.0218   -48.9   <2e-16 ***
Rings_kmRing_15km  -1.6203     0.0268   -60.5   <2e-16 ***
Rings_kmRing_20km  -1.8782     0.0280   -67.1   <2e-16 ***
Rings_kmRing_25km  -2.2749     0.0321   -70.9   <2e-16 ***
Rings_kmRing_60km  -3.6889     0.0319  -115.8   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

(Dispersion parameter for quasipoisson family taken to be 16.02)

    Null deviance: 713542  on 39419  degrees of freedom
Residual deviance: 400071  on 39414  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 6

par(mfrow = c(1,2))
hist(supply_rings_qpois3$residuals)
hist(supply_rings$residuals)

plot of chunk unnamed-chunk-1


#################################################################################
#################################################################################


# Linear Mixed Effect Model

modelfit <- lm(Av_Offpeak ~ Distance_km * Year)
summary(modelfit)

Call:
lm(formula = Av_Offpeak ~ Distance_km * Year)

Residuals:
   Min     1Q Median     3Q    Max 
 -25.1   -6.1   -1.4    2.6  389.4 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)      -1.01e+03   2.81e+01   -35.8   <2e-16 ***
Distance_km       2.44e+01   9.51e-01    25.7   <2e-16 ***
Year              5.13e-01   1.41e-02    36.5   <2e-16 ***
Distance_km:Year -1.25e-02   4.76e-04   -26.2   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

Residual standard error: 14.7 on 39416 degrees of freedom
Multiple R-squared: 0.184,  Adjusted R-squared: 0.184 
F-statistic: 2.96e+03 on 3 and 39416 DF,  p-value: <2e-16 


# supply_d_y <- lme(Av_Offpeak ~ Distance_km * Year, random = ~ Year | Cell_ID) DOESNT WORK

par(mfrow = c(1,1))
plot(Av_Offpeak ~ Distance_km, pch = 19, col = Year, xlab = "Frequency", ylab = "Distance")

plot of chunk unnamed-chunk-1


supply_d_y <- lme(Av_Offpeak ~ Distance_km * Year, random = ~Year|Cell_ID)
Error: nlminb problem, convergence error code = 1 message = iteration
limit reached without convergence (10)
summary(supply_d_y)
Error: object 'supply_d_y' not found

newsample <- vanc[sample(1:nrow(vanc), 500, replace = FALSE),]
par(mfrow = c(1,1))
plot(Rings_km, Av_Offpeak)

plot of chunk unnamed-chunk-1

View(newsample)

for (i in 1:1000)
{
  randomsamples <- vanc[sample(1:nrow(vanc),1000,replace = FALSE),]
  av <- mean(randomsamples$Distance_km)
  cumsum(av)
}


bb = cumsum(av)/1000
bb
[1] 0.02632
mean(Distance_km)
[1] 26.05
mean(cumsum)
Warning: argument is not numeric or logical: returning NA
[1] NA



supply_d_y <- lme(log(Av_Offpeak+1) ~ Distance_km, random = ~Year|Cell_ID)
summary(supply_d_y)
Linear mixed-effects model fit by REML
 Data: NULL 
    AIC   BIC logLik
  85621 85672 -42804

Random effects:
 Formula: ~Year | Cell_ID
 Structure: General positive-definite, Log-Cholesky parametrization
            StdDev    Corr  
(Intercept) 0.0810444 (Intr)
Year        0.0004407 -0.173
Residual    0.5255458       

Fixed effects: log(Av_Offpeak + 1) ~ Distance_km 
             Value Std.Error    DF t-value p-value
(Intercept)  2.269  0.019384 29565  117.08       0
Distance_km -0.051  0.000656  9853  -77.66       0
 Correlation: 
            (Intr)
Distance_km -0.882

Standardized Within-Group Residuals:
     Min       Q1      Med       Q3      Max 
-5.34596 -0.28617 -0.07044  0.12003  5.78830 

Number of Observations: 39420
Number of Groups: 9855 

#############################################

ring <- subset(vanc, Rings_km=="Ring_05km")
sampled_set<-sample(ring, (nrow(ring)/10), replace=FALSE)
Error: cannot take a sample larger than the population when 'replace =
FALSE'
sampled_set<-ring[sample(1:nrow(ring), (nrow(ring)/10), replace=FALSE),]


attach(sampled_set)
The following object(s) are masked from 'vancpois':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vanc':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
names(sampled_set)
[1] "Cell_ID"     "Year"        "Av_Offpeak"  "Distance_km" "Cat_km"     
[6] "Rings_km"   
View(sampled_set)

supply_d_y <- lme(log(Av_Offpeak+1) ~ Distance_km, random = ~Year|Cell_ID)
summary(supply_d_y)
Linear mixed-effects model fit by REML
 Data: NULL 
    AIC   BIC logLik
  538.3 556.6 -263.1

Random effects:
 Formula: ~Year | Cell_ID
 Structure: General positive-definite, Log-Cholesky parametrization
            StdDev    Corr  
(Intercept) 0.4906588 (Intr)
Year        0.0006157 -0.628
Residual    0.8292790       

Fixed effects: log(Av_Offpeak + 1) ~ Distance_km 
             Value Std.Error  DF t-value p-value
(Intercept)  3.972   0.28961 138  13.714  0.0000
Distance_km -0.257   0.08268 138  -3.113  0.0023
 Correlation: 
            (Intr)
Distance_km -0.928

Standardized Within-Group Residuals:
    Min      Q1     Med      Q3     Max 
-2.8721 -0.1485  0.1571  0.4109  1.9708 

Number of Observations: 158
Number of Groups: 140 

#############################################

zones <- unique(vanc$Rings_km)

for(z in 1:NROW(zones)){
  ring <- subset(vanc, Rings_km==zones[z])
  sampled_set<-ring[sample(1:nrow(ring), (nrow(ring)/10), replace=FALSE),]


  attach(sampled_set)
#  names(sampled_set)
#  View(sampled_set)

  supply_d_y <- lme(log(Av_Offpeak+1) ~ Distance_km, random = ~Year|Cell_ID)
  summary(supply_d_y)
}
The following object(s) are masked from 'sampled_set (position 3)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vancpois':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vanc':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 3)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 4)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vancpois':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vanc':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 3)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 4)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 5)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vancpois':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vanc':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 3)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 4)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 5)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 6)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vancpois':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vanc':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 3)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 4)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 5)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 6)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 7)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vancpois':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vanc':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 3)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 4)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 5)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 6)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 7)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 8)':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vancpois':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vanc':

    Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
Error: nlminb problem, convergence error code = 1 message = iteration
limit reached without convergence (10)

#############################################

zone_lme_summary_by_name <- function(zone_name) {
  ring <- subset(vanc, Rings_km==zone_name)
  sampled_set<-ring[sample(1:nrow(ring), (nrow(ring)/10), replace=FALSE),]

  attach(sampled_set)

  supply_d_y <- lme(log(Av_Offpeak+1) ~ Distance_km, random = ~Year|Cell_ID)
  summary(supply_d_y)
}

zone_lme_summary_by_number <- function(zone_number) {
  zones <- unique(vanc$Rings_km)
  zone_lme_summary(zones[zone_number])
}

zone_lme_summary_by_number(1)
Error: could not find function "zone_lme_summary"
#############################################

rm(list=ls())