2023.03.01 CaBi test analysis

library(here)
here() starts at /Users/galenerickson/Desktop/Personal/Analyses/CaPi/2023.03.01 CaBi test analysis
library(readr)
library(ggplot2)
library(leaflet)
library(ggmap)
ℹ Google's Terms of Service: <https://mapsplatform.google.com>
  Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service/>
  OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles/>
ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
library(fpc)
library(geosphere)


#import csv
trips <- read_csv(here("202312-capitalbikeshare-tripdata.csv"))
Rows: 277042 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (5): ride_id, rideable_type, start_station_name, end_station_name, memb...
dbl  (6): start_station_id, end_station_id, start_lat, start_lng, end_lat, e...
dttm (2): started_at, ended_at

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#plot data
library(leaflet)

# Assuming your data frame is named 'trips' and has the columns start_lat, start_lng, end_lat, end_lng

# If you haven't already, install and then load dplyr
# install.packages("dplyr")
library(dplyr)

Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
# Filter rows where started_at is within the desired range
subset_trips <- trips %>%
  filter(rideable_type == "classic_bike")

subset_trips <- subset_trips %>%
  filter(!is.na(start_lng), !is.na(start_lat), !is.na(end_lat), !is.na(end_lng), !is.na(start_station_id), !is.na(end_station_id))
# Calculate the 10 most common routes
top_routes <- subset_trips %>%
  group_by(start_station_id, end_station_id) %>%
  summarise(count = n(), .groups = 'drop') %>%
  arrange(desc(count)) %>%
  slice(1:10)

# Join back to the original trips dataframe to get the coordinates for these routes
# Assuming there's one set of consistent coordinates per station_id in the dataset
top_routes_with_coords <- top_routes %>%
  left_join(subset_trips, by = c("start_station_id", "end_station_id")) %>%
  select(start_station_id, end_station_id, start_lat, start_lng, end_lat, end_lng) %>%
  distinct(start_station_id, end_station_id, .keep_all = TRUE)

# Plot the routes on a map with leaflet
leaflet(data = top_routes_with_coords) %>%
  addTiles() %>%
  addPolylines(~start_lng, ~start_lat, ~end_lng, ~end_lat, color = "blue", weight = 4, opacity = 0.7) %>%
  addCircleMarkers(~start_lng, ~start_lat, color = "red", radius = 4, popup = ~start_station_id, group = "Start Stations") %>%
  addCircleMarkers(~end_lng, ~end_lat, color = "green", radius = 4, popup = ~end_station_id, group = "End Stations") %>%
  addLayersControl(overlayGroups = c("Start Stations", "End Stations"), options = layersControlOptions(collapsed = FALSE))