This is an extension of the tidytuesday assignment you have already done. Complete the questions below, using the screencast you chose for the tidytuesday assigment.
library(tidyverse)
library(lubridate)
library(scales)
theme_set(theme_light())
bike_traffic_raw <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-04-02/bike_traffic.csv")
bike_traffic <- bike_traffic_raw %>%
mutate(date = mdy_hms(date)) %>%
filter(bike_count < 2000) %>%
select(-ped_count)
bike_traffic %>%
count(crossing, direction)
## # A tibble: 13 x 3
## crossing direction n
## <chr> <chr> <int>
## 1 39th Ave NE Greenway at NE 62nd St North 38660
## 2 39th Ave NE Greenway at NE 62nd St South 38660
## 3 Broadway Cycle Track North Of E Union St North 44565
## 4 Broadway Cycle Track North Of E Union St South 44565
## 5 Burke Gilman Trail North 42905
## 6 Burke Gilman Trail South 42902
## 7 Elliot Bay Trail North 45234
## 8 Elliot Bay Trail South 45234
## 9 MTS Trail East 45565
## 10 NW 58th St Greenway at 22nd Ave East 44342
## 11 NW 58th St Greenway at 22nd Ave West 44342
## 12 Sealth Trail North 16054
## 13 Sealth Trail South 16054
bike_traffic %>%
ggplot(aes(date, fill = is.na(bike_count))) +
geom_histogram() +
facet_grid(crossing ~ direction)
bike_traffic %>%
group_by(crossing,
hour = hour(date)) %>%
summarize(bike_count = sum(bike_count, na.rm = TRUE)) %>%
mutate(pct_bike = bike_count / sum(bike_count)) %>%
ggplot(aes(hour, pct_bike, color = crossing)) +
geom_line() +
geom_point() +
scale_y_continuous(labels = percent_format()) +
labs(title = "When in the day do people bike through these Seattle crossings?",
subtitle = "Based on crossings from 2014-February 2019",
color = "Crossing",
x = "Time of day (local time)",
y = "% of bike crossings that happen in this hour")
bike_by_time_window <- bike_traffic %>%
mutate(hour = hour(date)) %>%
mutate(time_window = case_when(
between(hour, 7, 10) ~ "Morning Commute",
between(hour, 11, 15) ~ "Midday",
between(hour, 16, 18) ~ "Evening Commute",
TRUE ~ "Night"
)) %>%
group_by(crossing,
time_window) %>%
summarize(number_missing = sum(is.na(bike_count)),
bike_count = sum(bike_count, na.rm = TRUE)) %>%
mutate(pct_bike = bike_count / sum(bike_count))
bike_by_time_window %>%
select(-number_missing, -bike_count) %>%
spread(time_window, pct_bike) %>%
mutate(TotalCommute = `Evening Commute` + `Morning Commute`) %>%
arrange(desc(TotalCommute))
## # A tibble: 7 x 6
## # Groups: crossing [7]
## crossing `Evening Commut… Midday `Morning Commut… Night TotalCommute
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Elliot Bay Trail 0.329 0.250 0.278 0.143 0.607
## 2 39th Ave NE Green… 0.294 0.248 0.288 0.171 0.581
## 3 Sealth Trail 0.266 0.307 0.280 0.147 0.546
## 4 MTS Trail 0.270 0.330 0.271 0.129 0.541
## 5 Burke Gilman Trail 0.271 0.370 0.241 0.117 0.513
## 6 Broadway Cycle Tr… 0.271 0.248 0.213 0.268 0.484
## 7 NW 58th St Greenw… 0.240 0.364 0.202 0.194 0.442
bike_by_time_window %>%
ggplot(aes(time_window, pct_bike)) +
geom_col() +
coord_flip() +
facet_wrap(~ crossing)
bike_by_time_window %>%
group_by(crossing) %>%
summarize(total_bikes = sum(bike_count),
pct_commute = sum(bike_count[str_detect(time_window, "Commute")]) / total_bikes) %>%
ggplot(aes(total_bikes, pct_commute)) +
geom_point() +
scale_x_log10()
bike_traffic %>%
group_by(crossing,
weekday = wday(date, label = TRUE),
hour = hour(date)) %>%
summarize(total_bikes = sum(bike_count, na.rm = TRUE)) %>%
group_by(crossing) %>%
mutate(pct_bike = total_bikes / sum(total_bikes)) %>%
ggplot(aes(hour, pct_bike, color = crossing)) +
geom_line(show.legend = FALSE) +
facet_grid(crossing ~ weekday) +
scale_y_continuous(labels = percent_format()) +
labs(x = "Time of week",
y = "% of bike crossings happening in this hour",
title = "When in the week do people in Seattle bike?",
subtitle = "Based on crossings from 2014-February 2019")
bike_traffic %>%
filter(date < "2018-01-01") %>%
group_by(crossing,
month = fct_relevel(month.name[month(date)], month.name)) %>%
summarize(total_bikes = sum(bike_count, na.rm = TRUE)) %>%
mutate(pct_bike = total_bikes / sum(total_bikes)) %>%
ggplot(aes(month, pct_bike, color = crossing, group = crossing)) +
geom_line() +
expand_limits(y = 0) +
scale_y_continuous(labels = percent_format()) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "What time of year do people bike?",
subtitle = "Based on 2014-2017 bike crossings",
y = "% of yearly trips in this month",
x = "")
bike_by_direction_hour_crossing <- bike_traffic %>%
filter(crossing != "MTS Trail",
!wday(date, label = TRUE) %in% c("Sat", "Sun"),
direction %in% c("North", "South")) %>%
mutate(hour = hour(date)) %>%
group_by(crossing,
direction,
hour) %>%
summarize(bike_count = sum(bike_count, na.rm = TRUE)) %>%
mutate(pct_bike = bike_count / sum(bike_count))
bike_by_direction_hour_crossing %>%
group_by(crossing) %>%
mutate(average_hour = sum((hour * pct_bike)[direction == "North"])) %>%
ungroup() %>%
mutate(crossing = fct_reorder(crossing, average_hour)) %>%
ggplot(aes(hour, pct_bike, color = direction)) +
geom_line() +
facet_grid(crossing ~ .) +
scale_y_continuous(labels = percent_format()) +
labs(x = "Time of day",
y = "% of bike crossings happening in this hour",
title = "In which directions do people commute by bike?",
subtitle = "Based on weekday crossings at six Seattle locations from 2014-February 2019",
color = "Direction")
These data sets are showing all the different times, directions, and frequency people are travelling by bicycle in Seattle. By looking at all of these graphs we can see there is some really interesting correlations in regard to bike traffic in seattle and the time people are cycling the most.
Definition of Variables and Meaning:
Hint: One graph of your choice.
bike_traffic %>%
group_by(crossing,
hour = hour(date)) %>%
summarize(bike_count = sum(bike_count, na.rm = TRUE)) %>%
mutate(pct_bike = bike_count / sum(bike_count)) %>%
ggplot(aes(hour, pct_bike, color = crossing)) +
geom_line() +
geom_point() +
scale_y_continuous(labels = percent_format()) +
labs(title = "When in the day do people bike through these Seattle crossings?",
subtitle = "Based on crossings from 2014-February 2019",
color = "Crossing",
x = "Time of day (local time)",
y = "% of bike crossings that happen in this hour")
When looking at this graph there are many different colors that are representing which streets are being traveled during the day and at what time they are being travelled. The title is very simply describing what we’re looking at here and it’s “When in the day do people bike through these Seattle crossings?” This graph also describes the percentage of bike crossings in each hour specifically and we can definitely see that travelling is more popular in the morning and late afternoon because that is when most people are travelling to and from work or school.
This is my favorite graph from the data set because it has so many different colors to help you look at the data and understand it better. I am a visual learner so having color and labeling helps me remember and absorb content.
All of the streets in Seattle on this graph are travelled at some point during the day, but are most travelled between the hours of 5:00 am and 10:00 am and 3:00 pm and 5:30 pm. These hours are typical commuting hours for anyone working a day job which is why these stats make sense. Burke Gilman trail is the most popular trail travelled throughout the day where as Broadway Cycle Track North is pretty consistently has lower travel throughout the day. What is interesting about this graph is that all of the trails are travelled pretty signficantly at some point in the day.