# dir.create("~/Raw", recursive = TRUE)
# url <-
# "https://s3.amazonaws.com/biketown-tripdata-public/BiketownPublicTripData201804.zip"
# destfile <- "~/Raw/BiketownPublicTripData201804.zip"
# download(url, destfile, quiet = TRUE)
# unzip(destfile, exdir = "~/Raw", overwrite = TRUE)csv_files <- list.files("~/Raw/PublicTripData", full.names = TRUE) %>%
grep(pattern = ".csv", value = TRUE)biketown_raw <- tibble()
for (file in csv_files) {
biketown_raw <- biketown_raw %>% bind_rows(read_csv(
file, progress = FALSE, col_types = cols(
.default = col_character(),
StartLatitude = col_double(),
StartLongitude = col_double(),
StartDate = col_date(format = "%m/%d/%Y"),
StartTime = col_time(format = "%H:%M"),
EndLatitude = col_double(),
EndLongitude = col_double(),
EndDate = col_date(format = "%m/%d/%Y"),
EndTime = col_time(format = "%H:%M"),
Distance_Miles = col_double())))}biketown_cleaned <- biketown_raw %>%
filter(
!is.na(StartLatitude),
!is.na(EndLatitude),
!is.na(Duration),
!is.na(Distance_Miles))biketown_cleaned <- biketown_cleaned %>% mutate(
duration_hours = as.numeric(
as.duration(hms(Duration)), "hours"),
mph = Distance_Miles / duration_hours)high_durations <- biketown_cleaned %>%
top_n(15, duration_hours) %>%
mutate(days = duration_hours / 24) %>%
select(RouteID, duration_hours, days, StartDate, EndDate) %>%
arrange(desc(days))options(tibble.print_max = 20, tibble.print_min = 20)
high_durations## # A tibble: 15 x 5
## RouteID duration_hours days StartDate EndDate
## <chr> <dbl> <dbl> <date> <date>
## 1 2475238 550617. 22942. 2017-03-14 2080-01-05
## 2 2596071 550041. 22918. 2017-04-07 2080-01-05
## 3 2674282 549764. 22907. 2017-04-18 2080-01-05
## 4 3580600 547725. 22822. 2017-07-12 2080-01-05
## 5 3897287 547186. 22799. 2017-08-04 2080-01-05
## 6 3972142 547049. 22794. 2017-08-10 2080-01-05
## 7 4433814 546260. 22761. 2017-09-11 2080-01-05
## 8 4498302 546155. 22756. 2017-09-16 2080-01-05
## 9 4577375 546002. 22750. 2017-09-22 2080-01-05
## 10 2719516 191. 7.97 2017-04-25 2017-05-03
## 11 4611006 97.2 4.05 2017-09-25 2017-09-29
## 12 3240778 96.9 4.04 2017-06-17 2017-06-21
## 13 4296715 87.5 3.64 2017-09-01 2017-09-05
## 14 1478741 81.3 3.39 2016-08-19 2016-08-22
## 15 1429170 70.4 2.93 2016-08-11 2016-08-14
biketown_cleaned <- biketown_cleaned %>% filter(EndDate != '2080-01-05')high_distances <- biketown_cleaned %>%
top_n(15, Distance_Miles) %>%
select(RouteID, Distance_Miles, duration_hours, mph) %>%
arrange(desc(Distance_Miles))high_distances## # A tibble: 15 x 4
## RouteID Distance_Miles duration_hours mph
## <chr> <dbl> <dbl> <dbl>
## 1 3113184 5258. 7.17 733.
## 2 1376084 5253. 0.884 5945.
## 3 1374716 5250. 0.292 17998.
## 4 1819628 5249. 0.163 32137.
## 5 3124476 5249. 0.201 26172.
## 6 5941795 5249. 0.171 30625.
## 7 4298322 5249. 0.129 40811.
## 8 2309921 5247. 0.263 19927.
## 9 3903517 5247. 0.312 16807.
## 10 5039933 5246. 0.103 50909.
## 11 1362610 2587. 5.50 470.
## 12 2712719 1430. 18.1 79.1
## 13 2719516 865. 191. 4.53
## 14 4174266 86.9 15.3 5.69
## 15 3281356 67.8 16.1 4.21
biketown_cleaned <- biketown_cleaned %>% filter(Distance_Miles < 100)biketown_cleaned <- biketown_cleaned %>% filter(
duration_hours > 0,
Distance_Miles > 0
).start_coords <- paste(
"GPS(",
biketown_cleaned$StartLongitude,
",",
biketown_cleaned$StartLatitude,
")", sep = ""
).end_coords <- paste(
"GPS(",
biketown_cleaned$EndLongitude,
",",
biketown_cleaned$EndLatitude,
")", sep = ""
)biketown_cleaned$StartHub <- ifelse(
is.na(biketown_cleaned$StartHub),
.start_coords,
biketown_cleaned$StartHub
)
biketown_cleaned$EndHub <- ifelse(
is.na(biketown_cleaned$EndHub),
.end_coords,
biketown_cleaned$EndHub
)trip_summary <- biketown_cleaned %>% group_by(StartHub, EndHub) %>%
summarize(trips = n()) %>% ungroup() %>% filter(
StartHub != EndHub,
substr(StartHub, 1, 3) == "GPS" |
substr(EndHub, 1, 3) == "GPS") %>%
arrange(desc(trips)) %>% top_n(20, trips)trip_summary## # A tibble: 20 x 3
## StartHub EndHub trips
## <chr> <chr> <int>
## 1 GPS(-122.6736457,45.5154699) SW Salmon at Waterfront Park 228
## 2 NW 13th at Marshall GPS(-122.6852715,45.5360712) 202
## 3 GPS(-122.6717313,45.5000094) SW Moody at Aerial Tram Terminal 158
## 4 NW 23rd at Overton GPS(-122.6842918,45.5264914) 142
## 5 GPS(-122.6734472,45.5245495) NW 2nd at Everett 134
## 6 GPS(-122.6852715,45.5360712) NW 13th at Marshall 120
## 7 GPS(-122.6842918,45.5264914) NW 23rd at Overton 113
## 8 GPS(-122.6844243,45.5300616) GPS(-122.6852715,45.5360712) 104
## 9 GPS(-122.6759187,45.5175854) SW 3rd at Morrison 100
## 10 GPS(-122.6702589,45.522513) SW Naito at Ankeny Plaza 94
## 11 GPS(-122.6799961,45.5186858) SW Morrison at Pioneer Courthouse S… 87
## 12 SW 10th at Columbia GPS(-122.6863885,45.5142926) 87
## 13 GPS(-122.6620427,45.5065432) SE 2nd Pl at Tilikum Way 83
## 14 GPS(-122.6743175,45.5071786) SW River at Montgomery 82
## 15 GPS(-122.6725072,45.5175793) SW Naito at Morrison 80
## 16 GPS(-122.6372902,45.5221371) SE 7th at Burnside 79
## 17 GPS(-122.6485026,45.527931) GPS(-122.6768712,45.5201388) 79
## 18 GPS(-122.6717672,45.4992903) SW Moody at Aerial Tram Terminal 79
## 19 GPS(-122.6823725,45.5293695) NW Johnson at Jamison Square 77
## 20 GPS(-122.6814345,45.5315321) NW 11th at The Fields 76
.start_hubs <- biketown_cleaned %>% select(
hub = StartHub,
longitude = StartLongitude,
latitude = StartLatitude
) %>% unique()
.end_hubs <- biketown_cleaned %>% select(
hub = EndHub,
longitude = EndLongitude,
latitude = EndLatitude
) %>% unique()biketown_hubs <- bind_rows(.start_hubs, .end_hubs) %>% unique()
biketown_hubs_sp <- SpatialPointsDataFrame(
coords = select(biketown_hubs, longitude, latitude),
data = biketown_hubs,
proj4string = CRS("+init=epsg:4326")
)desire_lines <- od2line(
flow = trip_summary,
zones = biketown_hubs_sp
)