Buzz — Nov 28, 2012, 10:01 PM
vanc <- read.csv("/Users/Buzz/Desktop/HENV665Q/Final_Project/vancouver27.csv", header = T)
attach(vanc)
names(vanc)
[1] "Cell_ID" "Year" "Av_Offpeak" "Distance_km" "Cat_km"
[6] "Rings_km"
View(vanc)
library(lattice)
library(nlme)
# Data Exploration
pairs(Av_Offpeak ~ Distance_km + Rings_km + Year)
# Linear models
# R. Q.:
# 1) Does distance to downtown influence off peak transit supply? ***
supply_dist <- lm(Av_Offpeak ~ Distance_km)
summary(supply_dist)
Call:
lm(formula = Av_Offpeak ~ Distance_km)
Residuals:
Min 1Q Median 3Q Max
-17.6 -6.8 -1.8 2.7 397.1
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 18.00131 0.16028 112.3 <2e-16 ***
Distance_km -0.45832 0.00543 -84.5 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 15 on 39418 degrees of freedom
Multiple R-squared: 0.153, Adjusted R-squared: 0.153
F-statistic: 7.14e+03 on 1 and 39418 DF, p-value: <2e-16
# 2) Has off peak transit supply (by cell) increased in the last 30 years? ***
supply_year <- lm(Av_Offpeak ~ Year)
summary(supply_year)
Call:
lm(formula = Av_Offpeak ~ Year)
Residuals:
Min 1Q Median 3Q Max
-8.9 -7.0 -3.5 -0.4 406.3
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.69e+02 1.45e+01 -25.4 <2e-16 ***
Year 1.88e-01 7.29e-03 25.8 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 16.2 on 39418 degrees of freedom
Multiple R-squared: 0.0166, Adjusted R-squared: 0.0166
F-statistic: 665 on 1 and 39418 DF, p-value: <2e-16
# 3) Is there significant variation in transit supply between different zones (rings)?
supply_rings <- lm(Av_Offpeak ~ Rings_km)
summary(supply_rings)
Call:
lm(formula = Av_Offpeak ~ Rings_km)
Residuals:
Min 1Q Median 3Q Max
-42.0 -4.3 -1.0 0.2 373.2
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 41.963 0.349 120.2 <2e-16 ***
Rings_kmRing_10km -27.521 0.403 -68.3 <2e-16 ***
Rings_kmRing_15km -33.661 0.412 -81.8 <2e-16 ***
Rings_kmRing_20km -35.549 0.405 -87.8 <2e-16 ***
Rings_kmRing_25km -37.649 0.404 -93.3 <2e-16 ***
Rings_kmRing_60km -40.914 0.363 -112.7 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 13.9 on 39414 degrees of freedom
Multiple R-squared: 0.276, Adjusted R-squared: 0.276
F-statistic: 3.01e+03 on 5 and 39414 DF, p-value: <2e-16
# Histograms
par(mfrow = c(2,2))
hist(supply_year$residuals)
hist(supply_dist$residuals)
hist(supply_dist$residuals)
# Log-Transformation of response variable (Log for right-tailed data)
supplylog <-log(Av_Offpeak + 1)
# Re-running the models
supply_dist_log <- lm(supplylog ~ Distance_km)
par(mfrow = c(1,2))
hist(supply_dist$residuals)
hist(supply_dist_log$residuals)
supply_year_log <- lm(supplylog ~ Year)
par(mfrow = c(1,2))
hist(supply_year$residuals)
hist(supply_year_log$residuals)
supply_rings_log <- lm(supplylog ~ Rings_km)
par(mfrow = c(1,2))
hist(supply_rings$residuals)
hist(supply_rings_log$residuals)
#################################################################################
#################################################################################
# Generalized Linear Model
vancpois <- read.csv("/Users/Buzz/Desktop/HENV665Q/Final_Project/vancouver27.csv", header = T)
attach(vancpois)
The following object(s) are masked from 'vanc':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
names(vancpois)
[1] "Cell_ID" "Year" "Av_Offpeak" "Distance_km" "Cat_km"
[6] "Rings_km"
View(vancpois)
# Graphs response variable = Av_Offpeak
hist(Av_Offpeak)
plot(Av_Offpeak)
# dist_supply_pois <- glm(Av_Offpeak ~ Distance_km, family = poisson, na.action=na.exclude)
# summary(dist_supply_pois)
# Call:
# glm(formula = Av_Offpeak ~ Distance_km, family = poisson, na.action = na.exclude)
# Deviance Residuals:
# Min 1Q Median 3Q Max
# -8.818 -2.198 -1.012 0.028 33.784
# Coefficients:
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) 3.7571429 0.0035074 1071.2 <2e-16 ***
# Distance_km -0.1123599 0.0002507 -448.1 <2e-16 ***
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# (Dispersion parameter for poisson family taken to be 1)
# Null deviance: 713542 on 39419 degrees of freedom
# Residual deviance: 389383 on 39418 degrees of freedom
# AIC: Inf
# Number of Fisher Scoring iterations: 6
# Residual deviance greater than degrees of freedom (9.878 overdispersion), apply quasipoisson
dist_supply_qpois1 <- glm(Av_Offpeak ~ Distance_km, family = quasipoisson)
summary(dist_supply_qpois1)
Call:
glm(formula = Av_Offpeak ~ Distance_km, family = quasipoisson)
Deviance Residuals:
Min 1Q Median 3Q Max
-8.82 -2.20 -1.01 0.03 33.78
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.75714 0.01466 256 <2e-16 ***
Distance_km -0.11236 0.00105 -107 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for quasipoisson family taken to be 17.46)
Null deviance: 713542 on 39419 degrees of freedom
Residual deviance: 389383 on 39418 degrees of freedom
AIC: NA
Number of Fisher Scoring iterations: 6
par(mfrow = c(1,1))
plot(Av_Offpeak ~ Distance_km, pch = 19)
interval1 <- seq(0, 60, 10)
prediction1 <- predict(dist_supply_qpois1, list(Distance_km = interval1), type = 'response')
lines(interval1, prediction1, col = "green", lwd = 3)
supply_year_qpois2 <- glm(Av_Offpeak ~ Year, family = quasipoisson)
summary(supply_year_qpois2)
Call:
glm(formula = Av_Offpeak ~ Year, family = quasipoisson)
Deviance Residuals:
Min 1Q Median 3Q Max
-4.29 -3.46 -2.66 -0.18 48.50
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -61.9242 2.3900 -25.9 <2e-16 ***
Year 0.0319 0.0012 26.7 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for quasipoisson family taken to be 39.21)
Null deviance: 713542 on 39419 degrees of freedom
Residual deviance: 684406 on 39418 degrees of freedom
AIC: NA
Number of Fisher Scoring iterations: 6
par(mfrow = c(1,2))
hist(supply_year_qpois2$residuals)
hist(supply_year$residuals)
supply_rings_qpois3 <- glm(Av_Offpeak ~ Rings_km, family = quasipoisson)
summary(supply_rings_qpois3)
Call:
glm(formula = Av_Offpeak ~ Rings_km, family = quasipoisson)
Deviance Residuals:
Min 1Q Median 3Q Max
-9.16 -1.94 -1.45 0.09 34.01
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.7368 0.0155 240.4 <2e-16 ***
Rings_kmRing_10km -1.0667 0.0218 -48.9 <2e-16 ***
Rings_kmRing_15km -1.6203 0.0268 -60.5 <2e-16 ***
Rings_kmRing_20km -1.8782 0.0280 -67.1 <2e-16 ***
Rings_kmRing_25km -2.2749 0.0321 -70.9 <2e-16 ***
Rings_kmRing_60km -3.6889 0.0319 -115.8 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for quasipoisson family taken to be 16.02)
Null deviance: 713542 on 39419 degrees of freedom
Residual deviance: 400071 on 39414 degrees of freedom
AIC: NA
Number of Fisher Scoring iterations: 6
par(mfrow = c(1,2))
hist(supply_rings_qpois3$residuals)
hist(supply_rings$residuals)
#################################################################################
#################################################################################
# Linear Mixed Effect Model
modelfit <- lm(Av_Offpeak ~ Distance_km * Year)
summary(modelfit)
Call:
lm(formula = Av_Offpeak ~ Distance_km * Year)
Residuals:
Min 1Q Median 3Q Max
-25.1 -6.1 -1.4 2.6 389.4
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.01e+03 2.81e+01 -35.8 <2e-16 ***
Distance_km 2.44e+01 9.51e-01 25.7 <2e-16 ***
Year 5.13e-01 1.41e-02 36.5 <2e-16 ***
Distance_km:Year -1.25e-02 4.76e-04 -26.2 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 14.7 on 39416 degrees of freedom
Multiple R-squared: 0.184, Adjusted R-squared: 0.184
F-statistic: 2.96e+03 on 3 and 39416 DF, p-value: <2e-16
# supply_d_y <- lme(Av_Offpeak ~ Distance_km * Year, random = ~ Year | Cell_ID) DOESNT WORK
par(mfrow = c(1,1))
plot(Av_Offpeak ~ Distance_km, pch = 19, col = Year, xlab = "Frequency", ylab = "Distance")
supply_d_y <- lme(Av_Offpeak ~ Distance_km * Year, random = ~Year|Cell_ID)
Error: nlminb problem, convergence error code = 1 message = iteration
limit reached without convergence (10)
summary(supply_d_y)
Error: object 'supply_d_y' not found
newsample <- vanc[sample(1:nrow(vanc), 500, replace = FALSE),]
par(mfrow = c(1,1))
plot(Rings_km, Av_Offpeak)
View(newsample)
for (i in 1:1000)
{
randomsamples <- vanc[sample(1:nrow(vanc),1000,replace = FALSE),]
av <- mean(randomsamples$Distance_km)
cumsum(av)
}
bb = cumsum(av)/1000
bb
[1] 0.02632
mean(Distance_km)
[1] 26.05
mean(cumsum)
Warning: argument is not numeric or logical: returning NA
[1] NA
supply_d_y <- lme(log(Av_Offpeak+1) ~ Distance_km, random = ~Year|Cell_ID)
summary(supply_d_y)
Linear mixed-effects model fit by REML
Data: NULL
AIC BIC logLik
85621 85672 -42804
Random effects:
Formula: ~Year | Cell_ID
Structure: General positive-definite, Log-Cholesky parametrization
StdDev Corr
(Intercept) 0.0810444 (Intr)
Year 0.0004407 -0.173
Residual 0.5255458
Fixed effects: log(Av_Offpeak + 1) ~ Distance_km
Value Std.Error DF t-value p-value
(Intercept) 2.269 0.019384 29565 117.08 0
Distance_km -0.051 0.000656 9853 -77.66 0
Correlation:
(Intr)
Distance_km -0.882
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-5.34596 -0.28617 -0.07044 0.12003 5.78830
Number of Observations: 39420
Number of Groups: 9855
#############################################
ring <- subset(vanc, Rings_km=="Ring_05km")
sampled_set<-sample(ring, (nrow(ring)/10), replace=FALSE)
Error: cannot take a sample larger than the population when 'replace =
FALSE'
sampled_set<-ring[sample(1:nrow(ring), (nrow(ring)/10), replace=FALSE),]
attach(sampled_set)
The following object(s) are masked from 'vancpois':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vanc':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
names(sampled_set)
[1] "Cell_ID" "Year" "Av_Offpeak" "Distance_km" "Cat_km"
[6] "Rings_km"
View(sampled_set)
supply_d_y <- lme(log(Av_Offpeak+1) ~ Distance_km, random = ~Year|Cell_ID)
summary(supply_d_y)
Linear mixed-effects model fit by REML
Data: NULL
AIC BIC logLik
538.3 556.6 -263.1
Random effects:
Formula: ~Year | Cell_ID
Structure: General positive-definite, Log-Cholesky parametrization
StdDev Corr
(Intercept) 0.4906588 (Intr)
Year 0.0006157 -0.628
Residual 0.8292790
Fixed effects: log(Av_Offpeak + 1) ~ Distance_km
Value Std.Error DF t-value p-value
(Intercept) 3.972 0.28961 138 13.714 0.0000
Distance_km -0.257 0.08268 138 -3.113 0.0023
Correlation:
(Intr)
Distance_km -0.928
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-2.8721 -0.1485 0.1571 0.4109 1.9708
Number of Observations: 158
Number of Groups: 140
#############################################
zones <- unique(vanc$Rings_km)
for(z in 1:NROW(zones)){
ring <- subset(vanc, Rings_km==zones[z])
sampled_set<-ring[sample(1:nrow(ring), (nrow(ring)/10), replace=FALSE),]
attach(sampled_set)
# names(sampled_set)
# View(sampled_set)
supply_d_y <- lme(log(Av_Offpeak+1) ~ Distance_km, random = ~Year|Cell_ID)
summary(supply_d_y)
}
The following object(s) are masked from 'sampled_set (position 3)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vancpois':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vanc':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 3)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 4)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vancpois':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vanc':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 3)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 4)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 5)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vancpois':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vanc':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 3)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 4)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 5)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 6)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vancpois':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vanc':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 3)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 4)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 5)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 6)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 7)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vancpois':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vanc':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 3)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 4)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 5)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 6)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 7)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'sampled_set (position 8)':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vancpois':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
The following object(s) are masked from 'vanc':
Av_Offpeak, Cat_km, Cell_ID, Distance_km, Rings_km, Year
Error: nlminb problem, convergence error code = 1 message = iteration
limit reached without convergence (10)
#############################################
zone_lme_summary_by_name <- function(zone_name) {
ring <- subset(vanc, Rings_km==zone_name)
sampled_set<-ring[sample(1:nrow(ring), (nrow(ring)/10), replace=FALSE),]
attach(sampled_set)
supply_d_y <- lme(log(Av_Offpeak+1) ~ Distance_km, random = ~Year|Cell_ID)
summary(supply_d_y)
}
zone_lme_summary_by_number <- function(zone_number) {
zones <- unique(vanc$Rings_km)
zone_lme_summary(zones[zone_number])
}
zone_lme_summary_by_number(1)
Error: could not find function "zone_lme_summary"
#############################################
rm(list=ls())