This is an extension of the tidytuesday assignment you have already done. Complete the questions below, using the screencast you chose for the tidytuesday assigment.

Import data

library(tidyverse)
library(lubridate)
library(scales)
theme_set(theme_light())
bike_traffic_raw <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-04-02/bike_traffic.csv")
bike_traffic <- bike_traffic_raw %>%
  mutate(date = mdy_hms(date)) %>%
  filter(bike_count < 2000) %>%
  select(-ped_count)
bike_traffic %>%
  count(crossing, direction)
## # A tibble: 13 x 3
##    crossing                                 direction     n
##    <chr>                                    <chr>     <int>
##  1 39th Ave NE Greenway at NE 62nd St       North     38660
##  2 39th Ave NE Greenway at NE 62nd St       South     38660
##  3 Broadway Cycle Track North Of E Union St North     44565
##  4 Broadway Cycle Track North Of E Union St South     44565
##  5 Burke Gilman Trail                       North     42905
##  6 Burke Gilman Trail                       South     42902
##  7 Elliot Bay Trail                         North     45234
##  8 Elliot Bay Trail                         South     45234
##  9 MTS Trail                                East      45565
## 10 NW 58th St Greenway at 22nd Ave          East      44342
## 11 NW 58th St Greenway at 22nd Ave          West      44342
## 12 Sealth Trail                             North     16054
## 13 Sealth Trail                             South     16054
bike_traffic %>%
  ggplot(aes(date, fill = is.na(bike_count))) +
  geom_histogram() +
  facet_grid(crossing ~ direction)

When in the day do we see bikers?

bike_traffic %>%
  group_by(crossing,
           hour = hour(date)) %>%
  summarize(bike_count = sum(bike_count, na.rm = TRUE)) %>%
  mutate(pct_bike = bike_count / sum(bike_count)) %>%
  ggplot(aes(hour, pct_bike, color = crossing)) +
  geom_line() +
  geom_point() +
  scale_y_continuous(labels = percent_format()) +
  labs(title = "When in the day do people bike through these Seattle crossings?",
       subtitle = "Based on crossings from 2014-February 2019",
       color = "Crossing",
       x = "Time of day (local time)",
       y = "% of bike crossings that happen in this hour")

bike_by_time_window <- bike_traffic %>%
  mutate(hour = hour(date)) %>%
  mutate(time_window = case_when(
    between(hour, 7, 10) ~ "Morning Commute",
    between(hour, 11, 15) ~ "Midday",
    between(hour, 16, 18) ~ "Evening Commute",
    TRUE ~ "Night"
  )) %>%
  group_by(crossing,
           time_window) %>%
  summarize(number_missing = sum(is.na(bike_count)),
            bike_count = sum(bike_count, na.rm = TRUE)) %>%
  mutate(pct_bike = bike_count / sum(bike_count))
bike_by_time_window %>%
  select(-number_missing, -bike_count) %>%
  spread(time_window, pct_bike) %>%
  mutate(TotalCommute = `Evening Commute` + `Morning Commute`) %>%
  arrange(desc(TotalCommute))
## # A tibble: 7 x 6
## # Groups:   crossing [7]
##   crossing           `Evening Commut… Midday `Morning Commut… Night TotalCommute
##   <chr>                         <dbl>  <dbl>            <dbl> <dbl>        <dbl>
## 1 Elliot Bay Trail              0.329  0.250            0.278 0.143        0.607
## 2 39th Ave NE Green…            0.294  0.248            0.288 0.171        0.581
## 3 Sealth Trail                  0.266  0.307            0.280 0.147        0.546
## 4 MTS Trail                     0.270  0.330            0.271 0.129        0.541
## 5 Burke Gilman Trail            0.271  0.370            0.241 0.117        0.513
## 6 Broadway Cycle Tr…            0.271  0.248            0.213 0.268        0.484
## 7 NW 58th St Greenw…            0.240  0.364            0.202 0.194        0.442
bike_by_time_window %>%
  ggplot(aes(time_window, pct_bike)) +
  geom_col() +
  coord_flip() +
  facet_wrap(~ crossing)

bike_by_time_window %>%
  group_by(crossing) %>%
  summarize(total_bikes = sum(bike_count),
            pct_commute = sum(bike_count[str_detect(time_window, "Commute")]) / total_bikes) %>%
  ggplot(aes(total_bikes, pct_commute)) +
  geom_point() +
  scale_x_log10()

bike_traffic %>%
  group_by(crossing,
           weekday = wday(date, label = TRUE),
           hour = hour(date)) %>%
  summarize(total_bikes = sum(bike_count, na.rm = TRUE)) %>%
  group_by(crossing) %>%
  mutate(pct_bike = total_bikes / sum(total_bikes)) %>%
  ggplot(aes(hour, pct_bike, color = crossing)) +
  geom_line(show.legend = FALSE) +
  facet_grid(crossing ~ weekday) +
  scale_y_continuous(labels = percent_format()) +
  labs(x = "Time of week",
       y = "% of bike crossings happening in this hour",
       title = "When in the week do people in Seattle bike?",
       subtitle = "Based on crossings from 2014-February 2019")

bike_traffic %>%
  filter(date < "2018-01-01") %>%
  group_by(crossing,
           month = fct_relevel(month.name[month(date)], month.name)) %>%
  summarize(total_bikes = sum(bike_count, na.rm = TRUE)) %>%
  mutate(pct_bike = total_bikes / sum(total_bikes)) %>%
  ggplot(aes(month, pct_bike, color = crossing, group = crossing)) +
  geom_line() +
  expand_limits(y = 0) +
  scale_y_continuous(labels = percent_format()) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(title = "What time of year do people bike?",
       subtitle = "Based on 2014-2017 bike crossings",
       y = "% of yearly trips in this month",
       x = "")

What directions do people commute by bike?

bike_by_direction_hour_crossing <- bike_traffic %>%
  filter(crossing != "MTS Trail",
         !wday(date, label = TRUE) %in% c("Sat", "Sun"),
         direction %in% c("North", "South")) %>%
  mutate(hour = hour(date)) %>%
  group_by(crossing,
           direction,
           hour) %>%
  summarize(bike_count = sum(bike_count, na.rm = TRUE)) %>%
  mutate(pct_bike = bike_count / sum(bike_count))
bike_by_direction_hour_crossing %>%
  group_by(crossing) %>%
  mutate(average_hour = sum((hour * pct_bike)[direction == "North"])) %>%
  ungroup() %>%
  mutate(crossing = fct_reorder(crossing, average_hour)) %>%
  ggplot(aes(hour, pct_bike, color = direction)) +
  geom_line() +
  facet_grid(crossing ~ .) +
  scale_y_continuous(labels = percent_format()) +
  labs(x = "Time of day",
       y = "% of bike crossings happening in this hour",
       title = "In which directions do people commute by bike?",
       subtitle = "Based on weekday crossings at six Seattle locations from 2014-February 2019",
       color = "Direction")

Description of the data and definition of variables

These data sets are showing all the different times, directions, and frequency people are travelling by bicycle in Seattle. By looking at all of these graphs we can see there is some really interesting correlations in regard to bike traffic in seattle and the time people are cycling the most.

Definition of Variables and Meaning:

  • crossing - the street crossing/intersection
  • direction - North/South/East/West - varies by crossing
  • data - date of data upload
  • bike_count - number of bikes each hour
  • ped_count - number of pedestrians each hour

Visualize data

Hint: One graph of your choice.

bike_traffic %>%
  group_by(crossing,
           hour = hour(date)) %>%
  summarize(bike_count = sum(bike_count, na.rm = TRUE)) %>%
  mutate(pct_bike = bike_count / sum(bike_count)) %>%
  ggplot(aes(hour, pct_bike, color = crossing)) +
  geom_line() +
  geom_point() +
  scale_y_continuous(labels = percent_format()) +
  labs(title = "When in the day do people bike through these Seattle crossings?",
       subtitle = "Based on crossings from 2014-February 2019",
       color = "Crossing",
       x = "Time of day (local time)",
       y = "% of bike crossings that happen in this hour")

When looking at this graph there are many different colors that are representing which streets are being traveled during the day and at what time they are being travelled. The title is very simply describing what we’re looking at here and it’s “When in the day do people bike through these Seattle crossings?” This graph also describes the percentage of bike crossings in each hour specifically and we can definitely see that travelling is more popular in the morning and late afternoon because that is when most people are travelling to and from work or school.

This is my favorite graph from the data set because it has so many different colors to help you look at the data and understand it better. I am a visual learner so having color and labeling helps me remember and absorb content.

What is the story behind the graph?

All of the streets in Seattle on this graph are travelled at some point during the day, but are most travelled between the hours of 5:00 am and 10:00 am and 3:00 pm and 5:30 pm. These hours are typical commuting hours for anyone working a day job which is why these stats make sense. Burke Gilman trail is the most popular trail travelled throughout the day where as Broadway Cycle Track North is pretty consistently has lower travel throughout the day. What is interesting about this graph is that all of the trails are travelled pretty signficantly at some point in the day.

Hide the messages, but display the code and its results on the webpage.

Write your name for the author at the top.

Use the correct slug.