Trips <- Trips |>
mutate(duration_sec = as.numeric(edate - sdate),
duration_min = duration_sec / 60)
# Trim to central 99% to avoid extreme outliers
q_hi <- quantile(Trips$duration_min, 0.99, na.rm=TRUE)
Trips_f <- Trips |> filter(duration_min > 0, duration_min <= q_hi)
ggplot(Trips_f, aes(x = client, y = duration_min, fill = client)) +
geom_boxplot(outlier.alpha = 0.15) +
labs(title = "Bike Rental Duration by Client Type (trimmed at 99th percentile)",
x = "Client Type", y = "Duration (minutes)") +
theme_minimal(base_size = 12) +
theme(legend.position = "none")
Notes: Casual users typically ride longer than regulars; trimming keeps the box focused on typical rides.
Trips <- Trips |>
mutate(
yday = lubridate::yday(sdate),
wday = lubridate::wday(sdate, label = TRUE, abbr = TRUE, week_start = 7),
hour = lubridate::hour(sdate),
minute = lubridate::minute(sdate)
)
# Day of year (Q4 2014)
ggplot(Trips, aes(x = yday)) +
geom_density(fill = "gray85", color = NA) +
labs(title = "Checkouts by Day of Year (Q4 2014)",
x = "Day of Year (1–365)", y = "Density") +
theme_minimal(base_size = 12)
# Day of week
ggplot(Trips, aes(x = wday)) +
geom_bar(fill = "steelblue") +
labs(title = "Checkouts by Day of Week",
x = "Weekday", y = "Trips") +
theme_minimal(base_size = 12)
# Hour of day (density)
ggplot(Trips, aes(x = hour)) +
geom_density(fill = "gray85", color = NA) +
scale_x_continuous(breaks = seq(0, 24, 2)) +
labs(title = "Checkouts by Hour of Day (Density)",
x = "Hour (0–24)", y = "Density") +
theme_minimal(base_size = 12)
# Minute within hour (density)
ggplot(Trips, aes(x = minute)) +
geom_density(fill = "gray85", color = NA) +
labs(title = "Checkouts by Minute within Hour",
x = "Minute (0–59)", y = "Density") +
theme_minimal(base_size = 12)
# Counts by hour × weekday × client
Trips_counts <- Trips |> count(client, wday, hour, name = "n")
ggplot(Trips_counts, aes(x = hour, y = n, color = client)) +
geom_line(linewidth = 0.9) +
facet_wrap(~ wday, ncol = 4, scales = "free_y") +
scale_x_continuous(breaks = seq(0, 24, 3)) +
labs(title = "Trips by Hour, Weekday, and Client Type",
x = "Hour of Day", y = "Trips", color = "Client") +
theme_minimal(base_size = 12)
Notes: Regulars peak at commute hours (~8a, ~5p). Casuals show broader daytime/weekend use.
# Haversine (meters)
haversine <- function(lat1, lon1, lat2, lon2){
R <- 6371000
to_rad <- function(deg) deg * pi / 180
φ1 <- to_rad(lat1); λ1 <- to_rad(lon1)
φ2 <- to_rad(lat2); λ2 <- to_rad(lon2)
dφ <- φ2 - φ1; dλ <- λ2 - λ1
a <- sin(dφ/2)^2 + cos(φ1)*cos(φ2)*sin(dλ/2)^2
2 * R * asin(pmin(1, sqrt(a)))
}
# Station pairs (cartesian product) and straight-line distances
Simple <- Stations |> dplyr::select(name, lat, long) |> dplyr::rename(sstation = name)
Simple2 <- Simple |> dplyr::rename(estation = sstation, lat2 = lat, long2 = long)
StationPairs <- merge(Simple, Simple2, by = NULL)
PairDistances <- StationPairs |>
dplyr::mutate(distance_km = haversine(lat, long, lat2, long2) / 1000) |>
dplyr::select(sstation, estation, distance_km)
ggplot(PairDistances, aes(x = distance_km)) +
geom_histogram(binwidth = 0.25, fill = "gray80", color = "white") +
labs(title = "Station-to-Station Straight-Line Distances",
x = "Distance (km)", y = "Count") +
theme_minimal(base_size = 12)
# Join rides to pairwise distances to estimate straight-line ride length
Trips_w_dist <- Trips |> dplyr::inner_join(PairDistances, by = c("sstation","estation"))
ggplot() +
geom_histogram(data = PairDistances, aes(x = distance_km, y = after_stat(density)),
binwidth = 0.25, fill = "gray85", color = NA) +
geom_density(data = Trips_w_dist, aes(x = distance_km), color = "blue", linewidth = 1) +
labs(title = "Ride Distances vs Station Distances",
subtitle = "Gray: all station pairs | Blue: rides (straight-line start→end)",
x = "Distance (km)", y = "Density") +
theme_minimal(base_size = 12)
Notes: Most rides cluster within ~2–3 km; they’re generally shorter than many possible station-pair distances.
ggplot(Stations, aes(x = long, y = lat)) +
geom_point(color = "red", size = 1.5) +
labs(title = "Capital Bikeshare Station Locations",
x = "Longitude", y = "Latitude") +
theme_minimal(base_size = 12)
Notes: Dense downtown cluster with several outlying stations.