library(tidyverse)
library(RColorBrewer)
library(scales)
library(here)
tripdata_summary_weather <- read_csv(here("processed_data/r", "tripdata_summary_weather.csv"))
tripdata_summary_daily <- read_csv(here("processed_data/r", "tripdata_summary_daily.csv"))
tripdata_time_of_day <- read_csv(here("processed_data/r", "tripdata_time_of_day.csv"))
tripdata_stations <- read_csv(here("processed_data/r", "tripdata_stations.csv"))
tripdata_summary <- read_csv(here("processed_data/r", "tripdata_summary.csv"))
station_names <- read_csv(here("processed_data/r", "station_names.csv"))
tripdata_summary %>%
ggplot(aes(x = s_year_month, y = ride_count, color = member_casual)) +
theme_grey(base_size = 12) +
geom_smooth(se = FALSE, size = 1.1) +
scale_x_date(date_breaks = "1 month", date_labels = "%b %y") +
scale_color_manual(values=c("purple","gold")) +
scale_y_continuous(labels = scales::comma) +
labs(x = "Month-Year", y = "Ride Count", color = "Member Type") +
theme(legend.position = "bottom")
head(tripdata_summary)
## # A tibble: 6 x 4
## member_casual s_year_month mean_ride_length ride_count
## <chr> <date> <dbl> <dbl>
## 1 casual 2020-04-01 72.5 23553
## 2 casual 2020-05-01 50.6 86635
## 3 casual 2020-06-01 51.3 154186
## 4 casual 2020-07-01 59.3 267977
## 5 casual 2020-08-01 44.5 280498
## 6 casual 2020-09-01 38.5 214102
tripdata_summary %>%
group_by(member_casual) %>%
summarise(total_ride = sum(ride_count)) %>%
ggplot(aes(x = member_casual, y = total_ride, fill = member_casual)) +
theme_grey(base_size = 12) +
theme(axis.title.x=element_blank()) +
geom_col(width = 0.50, show.legend = FALSE) +
scale_y_continuous(labels = scales::comma) +
scale_fill_manual(values=c("purple", "gold")) +
stat_summary(fun = "sum", aes(label = format(..y.., big.mark = ",")),
geom = "text", vjust = 2, size = 3.5) +
ylab("Ride Count") +
scale_x_discrete(labels = c('Casual', 'Member'))
tripdata_summary_weather %>%
ggplot(aes(x = s_year_month, y = ride_count)) +
theme_dark(base_size = 12) +
geom_smooth(color = "gray", se = FALSE, size = 1) +
geom_point(aes(color = avg_temp), size = 3) +
scale_x_date(date_breaks = "1 month", date_labels = "%b %y") +
scale_y_continuous(labels = scales::comma) +
scale_color_distiller(name = "Temp (C)", palette = "RdBu") +
labs(x = "Month-Year", y = "Ride Count")
head(tripdata_summary_weather)
## # A tibble: 6 x 4
## s_year_month avg_temp mean_rl ride_count
## <date> <dbl> <dbl> <dbl>
## 1 2020-04-01 10 36 84607
## 2 2020-05-01 18 33 199674
## 3 2020-06-01 23 33 341897
## 4 2020-07-01 26 38 548485
## 5 2020-08-01 25 30 604203
## 6 2020-09-01 19 25 497656
tripdata_summary %>%
ggplot(aes(x = s_year_month, y = mean_ride_length, color = member_casual)) +
theme_grey(base_size = 12) +
geom_smooth(se = FALSE, size = 1.1, show.legend = FALSE) +
scale_x_date(date_breaks = "1 month", date_labels = "%b %y") +
scale_color_manual(values=c("purple","gold")) +
ylim(0, 80) +
labs(x = "Month-Year", y = "Average Ride Length (Min)")
tripdata_summary_daily%>%
ggplot(aes(x = factor(day_of_week,level = c('Monday', 'Tuesday', 'Wednesday',
'Thursday', 'Friday', 'Saturday', 'Sunday')),
y = ride_count, fill = member_casual)) +
theme_grey(base_size = 12) +
geom_col(width = 0.50, show.legend = FALSE) +
facet_wrap(~member_casual) +
scale_y_continuous(labels = scales::comma) +
scale_fill_manual(values=c("purple", "gold")) +
labs(x = "Day of Week", y = "Ride Count") +
scale_x_discrete(labels = c('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'))
head(tripdata_summary_daily)
## # A tibble: 6 x 3
## member_casual day_of_week ride_count
## <chr> <chr> <dbl>
## 1 casual Sunday 518410
## 2 casual Monday 292885
## 3 casual Tuesday 281572
## 4 casual Wednesday 286462
## 5 casual Thursday 302551
## 6 casual Friday 391052
tripdata_time_of_day%>%
ggplot(aes(x = started_at_time, y = ride_count, color = member_casual)) +
theme_grey(base_size = 12) +
geom_smooth(se = FALSE, size = 1.1, show.legend = FALSE) +
scale_y_continuous(labels = scales::comma, breaks=seq(0,140000,20000)) +
scale_color_manual(values=c("purple", "gold")) +
labs(x = "Time of Day", y = "Ride Count") +
scale_x_time(breaks = scales::breaks_width("2 hours"),
labels = label_time(format = "%H:%M", tz = "UTC"))
head(tripdata_time_of_day)
## # A tibble: 6 x 3
## member_casual started_at_time ride_count
## <chr> <time> <dbl>
## 1 casual 00:00 14507
## 2 casual 00:30 25044
## 3 casual 01:00 20887
## 4 casual 01:30 17087
## 5 casual 02:00 14243
## 6 casual 02:30 10399
station_names %>%
summarise(total_stations = n_distinct(start_station_name))
## # A tibble: 1 x 1
## total_stations
## <int>
## 1 763
tripdata_stations %>%
ggplot(aes(x = factor(stations, level = c('start_station', 'end_station')),
y = stations_covered, fill = member_casual)) +
theme_grey(base_size = 12) +
theme(axis.title.x=element_blank()) +
geom_col(width = 0.50, position = "dodge", show.legend = FALSE) +
ylim(0, 800) +
scale_fill_manual(values=c("purple", "gold")) +
ylab("Stations Covered") +
scale_x_discrete(labels = c('Start Station', 'End Station')) +
geom_text(aes(label = stations_covered),
position = position_dodge(width = 0.5), vjust = 2, size = 3.5)
head(station_names)
## # A tibble: 6 x 1
## start_station_name
## <chr>
## 1 Eckhart Park
## 2 Drake Ave & Fullerton Ave
## 3 McClurg Ct & Erie St
## 4 California Ave & Division St
## 5 Rush St & Hubbard St
## 6 Mies van der Rohe Way & Chicago Ave