The goal of this project was to analyze the daily cyclist counts on the NYC East river bridges and evaluate how weather and day-of-week influence traffic volume. Two Poisson regression models were applied: a counts model predicting total daily cyclists, and a rate model which used an offset to examine relative flow rates after adjusting for exposure.
The dataset contains daily totals for bridge traffic aolng with weather variables (high temp, low temp, precipitation) and calendar information (date, day). All counts were non-negative integers representing the number of cyclists crossing a single bridge per day. Each observation was treated as independent. After cleaning and converting text dates to proper date objects, and factors were created for weekdays and months.
Poisson generalized linear models were used because the response variable represented count data. Two versions of the model were fit: Counts models and rate models. Model selection used AIC, and overdispersion was checked using residual deviance/degrees of freedom.
Weekday effects: Relative to Friday, Wednesday was +20%, Thursday +12%, and Tuesday +10% showed higher expected cyclist counts. Weekend days dropped sharply, with Saturday -18% and Sunday -20%. Weather Effects: Each 1 degree increase in temp raised expected counts by 1.6%. Lowtemp had an opposite effect, and precipitation reduced counts 27% per unit. # Rate Model Using offset(log(total)) produced nearly identical effects but a smaller deviance, although still over dispersed. This model appropriately interpreted coefficients as having effects on rates rather than raw counts.
Cyclists traffic increased mid-week on warm and dry days. Weekend and rainy conditions reduce riders.
The analysis shows how routine weather and temporal variables help explain variability in cyclist volume. Limitations within the study include bridge limitation as only one bridge had data, possible autocorrelation between days, and unmeasured variables like holidays or daylight length. Future work could expand to more bridges or time-series model to address over dispersion and temperature dependence.
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(tidyr)
library(ggplot2)
library(broom)
library(knitr)
library(forcats)
data_path <- "C:/Users/rg03/Downloads/sta321/PoissonData.xlsx"
raw <- readxl::read_excel(data_path)
names(raw) <- names(raw) %>%
tolower() %>%
gsub("[[:space:]]+", "*", .) %>%
gsub("[^a-z0-9*]", "", .)
dat0 <- raw
has_total <- "total" %in% names(dat0)
bridge_cols <- names(dat0)[names(dat0) %in% c(
"brooklyn","manhattan","williamsburg","queensborobridge"
)]
date_col <- if ("date" %in% names(dat0)) "date" else NULL
day_col <- if ("day" %in% names(dat0)) "day" else NULL
hitemp_col <- names(dat0)[grepl("^hightemp", names(dat0))][1]
if (is.na(hitemp_col)) hitemp_col <- NULL
lotemp_col <- names(dat0)[grepl("^lowtemp", names(dat0))][1]
if (is.na(lotemp_col)) lotemp_col <- NULL
prcp_col <- if ("precipitation" %in% names(dat0)) "precipitation" else NULL
if (length(bridge_cols) >= 1) {
if (length(bridge_cols) > 1) {
long_counts <- dat0 %>%
tidyr::pivot_longer(
cols = dplyr::all_of(bridge_cols),
names_to = "bridge",
values_to = "count"
)
} else {
bcol <- bridge_cols[1]
long_counts <- dat0 %>%
dplyr::mutate(
bridge = bcol,
count = .data[[bcol]]
)
}
long_counts <- long_counts %>% dplyr::mutate(direction = "total")
if (has_total) {
long_counts$total <- long_counts$total
} else {
long_counts$total <- long_counts$count
}
} else if (has_total) {
long_counts <- dat0 %>%
dplyr::mutate(
bridge = "all",
direction = "total",
count = total
)
} else {
stop("Could not detect counts. Please ensure your sheet has a bridge count column (e.g., QueensboroBridge) or a Total column.")
}
dat <- long_counts %>%
{ if (!is.null(date_col)) dplyr::rename(., date = dplyr::all_of(date_col)) else dplyr::mutate(., date = NA) } %>%
dplyr::mutate(
date = suppressWarnings(lubridate::as_date(date)),
wday_chr = dplyr::case_when(
!is.null(day_col) ~ as.character(.data[[day_col]]),
!is.na(date) ~ as.character(lubridate::wday(date, label = TRUE, abbr = TRUE)),
TRUE ~ NA_character_
),
month_chr = dplyr::case_when(
!is.na(date) ~ as.character(lubridate::month(date, label = TRUE, abbr = TRUE)),
TRUE ~ NA_character_
),
wday = factor(wday_chr),
month = factor(month_chr),
bridge = factor(bridge),
direction = factor(direction),
count = as.numeric(count),
total = as.numeric(total)
) %>%
dplyr::select(-wday_chr, -month_chr)
if (!is.null(hitemp_col)) dat$hightemp <- suppressWarnings(as.numeric(dat0[[hitemp_col]]))
if (!is.null(lotemp_col)) dat$lowtemp <- suppressWarnings(as.numeric(dat0[[lotemp_col]]))
if (!is.null(prcp_col)) dat$precip <- suppressWarnings(as.numeric(dat0[[prcp_col]]))
dat <- dat %>% dplyr::filter(!is.na(count))
knitr::kable(head(dat), caption = "Preview of standardized data")
| date | day | hightemp | lowtemp | precipitation | queensborobridge | total | bridge | count | direction | wday | month | precip |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2025-07-01 | Saturday | 84.9 | 72.0 | 0.23 | 3216 | 11867 | queensborobridge | 3216 | total | Saturday | Jul | 0.23 |
| 2025-07-02 | Sunday | 87.1 | 73.0 | 0.00 | 3579 | 13995 | queensborobridge | 3579 | total | Sunday | Jul | 0.00 |
| 2025-07-03 | Monday | 87.1 | 71.1 | 0.45 | 4230 | 16067 | queensborobridge | 4230 | total | Monday | Jul | 0.45 |
| 2025-07-04 | Tuesday | 82.9 | 70.0 | 0.00 | 3861 | 13925 | queensborobridge | 3861 | total | Tuesday | Jul | 0.00 |
| 2025-07-05 | Wednesday | 84.9 | 71.1 | 0.00 | 5862 | 23110 | queensborobridge | 5862 | total | Wednesday | Jul | 0.00 |
| 2025-07-06 | Thursday | 75.0 | 71.1 | 0.00 | 5251 | 21861 | queensborobridge | 5251 | total | Thursday | Jul | 0.00 |
has_2lvls <- function(x) length(unique(na.omit(x))) > 1
has_var <- function(x) is.numeric(x) && is.finite(sd(x, na.rm = TRUE)) && sd(x, na.rm = TRUE) > 0
rhs_terms <- c()
if ("bridge" %in% names(dat) && has_2lvls(dat$bridge)) rhs_terms <- c(rhs_terms, "bridge")
if ("direction" %in% names(dat) && has_2lvls(dat$direction)) rhs_terms <- c(rhs_terms, "direction")
if ("month" %in% names(dat) && has_2lvls(dat$month)) rhs_terms <- c(rhs_terms, "month")
if ("wday" %in% names(dat) && has_2lvls(dat$wday)) rhs_terms <- c(rhs_terms, "wday")
if ("hightemp" %in% names(dat) && has_var(dat$hightemp)) rhs_terms <- c(rhs_terms, "hightemp")
if ("lowtemp" %in% names(dat) && has_var(dat$lowtemp)) rhs_terms <- c(rhs_terms, "lowtemp")
if ("precip" %in% names(dat) && has_var(dat$precip)) rhs_terms <- c(rhs_terms, "precip")
if (length(rhs_terms) == 0) rhs_terms <- "1"
counts_formula <- as.formula(paste("count ~", paste(rhs_terms, collapse = " + ")))
dat2 <- dat %>% mutate(
bridge = if ("bridge" %in% names(.)) droplevels(bridge) else bridge,
direction = if ("direction" %in% names(.)) droplevels(direction) else direction,
month = if ("month" %in% names(.)) droplevels(month) else month,
wday = if ("wday" %in% names(.)) droplevels(wday) else wday
)
m_counts_full <- glm(counts_formula, family = poisson(link = "log"), data = dat2)
counts_full_tidy <- broom::tidy(m_counts_full, conf.int = TRUE, exponentiate = TRUE)
knitr::kable(counts_full_tidy, digits = 3, caption = "Counts model (IRRs = exp(coef)) — full")
| term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|
| (Intercept) | 5175.666 | 0.048 | 178.508 | 0 | 4711.687 | 5685.036 |
| wdayMonday | 1.056 | 0.011 | 4.991 | 0 | 1.033 | 1.078 |
| wdaySaturday | 0.823 | 0.011 | -17.525 | 0 | 0.805 | 0.841 |
| wdaySunday | 0.800 | 0.012 | -19.211 | 0 | 0.782 | 0.819 |
| wdayThursday | 1.123 | 0.011 | 10.232 | 0 | 1.098 | 1.148 |
| wdayTuesday | 1.101 | 0.011 | 8.506 | 0 | 1.077 | 1.126 |
| wdayWednesday | 1.202 | 0.011 | 16.595 | 0 | 1.176 | 1.228 |
| hightemp | 1.016 | 0.001 | 19.692 | 0 | 1.014 | 1.018 |
| lowtemp | 0.980 | 0.001 | -16.876 | 0 | 0.978 | 0.983 |
| precip | 0.725 | 0.011 | -30.584 | 0 | 0.710 | 0.740 |
m_counts <- tryCatch(step(m_counts_full, trace = 0), error = function(e) m_counts_full)
knitr::kable(broom::tidy(m_counts, conf.int = TRUE, exponentiate = TRUE),
digits = 3, caption = "Counts model (IRRs) — final")
| term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|
| (Intercept) | 5175.666 | 0.048 | 178.508 | 0 | 4711.687 | 5685.036 |
| wdayMonday | 1.056 | 0.011 | 4.991 | 0 | 1.033 | 1.078 |
| wdaySaturday | 0.823 | 0.011 | -17.525 | 0 | 0.805 | 0.841 |
| wdaySunday | 0.800 | 0.012 | -19.211 | 0 | 0.782 | 0.819 |
| wdayThursday | 1.123 | 0.011 | 10.232 | 0 | 1.098 | 1.148 |
| wdayTuesday | 1.101 | 0.011 | 8.506 | 0 | 1.077 | 1.126 |
| wdayWednesday | 1.202 | 0.011 | 16.595 | 0 | 1.176 | 1.228 |
| hightemp | 1.016 | 0.001 | 19.692 | 0 | 1.014 | 1.018 |
| lowtemp | 0.980 | 0.001 | -16.876 | 0 | 0.978 | 0.983 |
| precip | 0.725 | 0.011 | -30.584 | 0 | 0.710 | 0.740 |
knitr::kable(broom::glance(m_counts), digits = 3, caption = "Counts model fit stats")
| null.deviance | df.null | logLik | AIC | BIC | deviance | df.residual | nobs |
|---|---|---|---|---|---|---|---|
| 7891.022 | 30 | -1670.379 | 3360.758 | 3375.098 | 3023.595 | 21 | 31 |
rate_dat <- dat2 %>% dplyr::filter(!is.na(total), total > 0)
if (nrow(rate_dat) >= 2) {
rhs_rate <- c()
if ("bridge" %in% names(rate_dat) && has_2lvls(rate_dat$bridge)) rhs_rate <- c(rhs_rate, "bridge")
if ("direction" %in% names(rate_dat) && has_2lvls(rate_dat$direction)) rhs_rate <- c(rhs_rate, "direction")
if ("month" %in% names(rate_dat) && has_2lvls(rate_dat$month)) rhs_rate <- c(rhs_rate, "month")
if ("wday" %in% names(rate_dat) && has_2lvls(rate_dat$wday)) rhs_rate <- c(rhs_rate, "wday")
if ("hightemp" %in% names(rate_dat) && has_var(rate_dat$hightemp)) rhs_rate <- c(rhs_rate, "hightemp")
if ("lowtemp" %in% names(rate_dat) && has_var(rate_dat$lowtemp)) rhs_rate <- c(rhs_rate, "lowtemp")
if ("precip" %in% names(rate_dat) && has_var(rate_dat$precip)) rhs_rate <- c(rhs_rate, "precip")
if (length(rhs_rate) == 0) rhs_rate <- "1"
rate_formula <- as.formula(paste("count ~", paste(rhs_rate, collapse = " + "),
"+ offset(log(total))"))
rate_dat2 <- rate_dat %>% mutate(
bridge = if ("bridge" %in% names(.)) droplevels(bridge) else bridge,
direction = if ("direction" %in% names(.)) droplevels(direction) else direction,
month = if ("month" %in% names(.)) droplevels(month) else month,
wday = if ("wday" %in% names(.)) droplevels(wday) else wday
)
m_rate_full <- glm(rate_formula, family = poisson(link = "log"), data = rate_dat2)
rate_full_tidy <- broom::tidy(m_rate_full, conf.int = TRUE, exponentiate = TRUE)
knitr::kable(rate_full_tidy, digits = 3, caption = "Rate model (rate ratios) — full")
m_rate <- tryCatch(step(m_rate_full, trace = 0), error = function(e) m_rate_full)
knitr::kable(broom::tidy(m_rate, conf.int = TRUE, exponentiate = TRUE),
digits = 3, caption = "Rate model (rate ratios) — final")
knitr::kable(broom::glance(m_rate), digits = 3, caption = "Rate model fit stats")
} else {
knitr::kable(head(rate_dat), caption = "Rate data unavailable or lacks valid 'total'; skipping rate model")
}
| null.deviance | df.null | logLik | AIC | BIC | deviance | df.residual | nobs |
|---|---|---|---|---|---|---|---|
| 467.325 | 30 | -330.421 | 680.841 | 695.181 | 343.678 | 21 | 31 |