Version 1

Instructions:

In this course, we have learned the importance of data visualization and how to leverage packages in the Tidyverse for data visualization in R. This project will give you the opportunity to practice those skills in greater depth.

This assignment uses two data sets about fast food restaurants. Specifically, we will create data visualizations using:

Data set 1: annual sales information and number of stores per fast food restaurants in 2018 (data_fastfood_sales.csv). Source: Data originally from Hubworks.

Data set 2: nutrition information about entrees from fast food restaurants (data_fastfood_calories.csv). Source: Data originally from the GitHub repository from the Tidy Tuesday project in R4DS online learning community.

Setting Up the R packages and Data

library(tidyverse)
library(magrittr)
library(ggthemes)
library(ggrepel)
library(cowplot)

options(dplyr.summarise.inform = FALSE)

Sales

Annual sales information and number of stores per fast food restaurants in 2018

sales <- read_csv('Data_Fastfood_Sales.csv',show_col_types = FALSE)

Calories

Nutrition information about entrees from fast food restaurants

calories <- read_csv('Data_Fastfood_Calories.csv',show_col_types = FALSE)

Plots

Plot 1

You can also embed plots, for example:

sales %>% 
  select(restaurant, us_sales, num_franchised_stores, unit_count) %>%
  mutate(prop_franchised_stores = (num_franchised_stores/unit_count)) %>% 
  group_by(restaurant, num_franchised_stores, us_sales, prop_franchised_stores,
           unit_count) %>%
  summarise(us_sales = sum(us_sales)) %>% 
  ggplot(aes(x = us_sales,
             y = unit_count,
             colour = prop_franchised_stores)) + 
  geom_point(size = 3) + 
  scale_x_log10() +
  scale_y_log10() +
  labs(
    title = "Plot 1: US Sales vs. Number of Stores",
    subtitle = '',
    x = 'U.S. sales in millions (log10 scale)',
    y = 'Total number if stores (log10 scale)',
    colour = "Proportion of stores\nfranchised") +
  theme_cowplot() 

Plot 2

sales %>% 
  select(restaurant, average_sales) %>% 
  group_by(restaurant) %>%
  summarise(average_sales = sum(average_sales)) %>% 
  mutate(
    bar_label = round(average_sales, digits = 0),
    bar_label = paste0('$', {bar_label})
    ) %>% 
  arrange(desc(average_sales)) %>% 
  # order by sales from High to Low
  mutate(restaurant = restaurant %>% fct_reorder(average_sales)) %>% 
  ungroup() %>% 
  ggplot(aes(x = average_sales, y = restaurant)) +
  geom_col() +
  scale_x_continuous(labels = scales::dollar_format()) +
  scale_y_discrete() +
  coord_cartesian(clip='off') +
  labs(
    title = "Plot 2: Average sales per unit store",
    subtitle = '',
    x = 'Average sales per unit store (in thousands)',
    y = 'Restaurant') +
  theme_cowplot()

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Plot 3

You can also embed plots, for example:

calories %>% 
  select(restaurant, item, calories, sodium) %>% 
  ggplot(aes(x = calories, y = sodium)) + 
  facet_wrap(~restaurant) +
  geom_point(size = 2, alpha = 0.5) +
  geom_hline(yintercept = 2300) + 
  geom_text_repel(data = calories %>% filter(sodium > 2300),
                  aes(label = item, direction = 'y'), 
                  nudge_x = 800, nudge_y = 1000) +
  labs(
    title = "Plot 3: Sodium Levels",
    subtitle = '',
    x = 'Calories',
    y = 'Sodium (mg)'
    ) +
  theme(
        
    plot.title = element_text(
      color = "#2C3E50",
      face = "bold",
      size = 14,  
      margin = margin(t = 10)),
    
    plot.subtitle = element_text(
      color = "#2C3E50",
      size = 14,  
      margin = margin(b = 5)),
    
    plot.caption = element_text(
      color = "grey60",
      size = 10,
      hjust = .5,
      margin = margin(t = 15, b = 15))
  )

Plot 4

You can also embed plots, for example:

## data
p4 <- calories %>% 
  select(restaurant, item, calories) %>% 
  # salad available?
  mutate(is_salad = str_detect(str_to_lower(item), 'salad')) %>% 
  # order by calories from High to Low
  mutate(restaurant = restaurant %>% fct_reorder(calories)) 
## visualization
p4 %>%  
  ggplot(aes(x = calories, y = restaurant)) +
  
  geom_boxplot(outlier.shape = NA) + 
  geom_jitter(aes(color = is_salad)) + 
  scale_x_log10() +
  scale_y_discrete() +
  scale_color_discrete(labels=c("Not a salad", "Salad")) +
  
  #labs
  labs(
    title = "Problem 4: Any salad? ",
    subtitle = '',
    x = 'Calories (log10 scale)',
    y = 'Restaurant',
    color = "Is the entree\n a salad?"
  )


Plot 5

p5 <- sales %>% 
  inner_join(calories, by = 'restaurant') %>%
  
  select(restaurant, item, calories, sugar, us_sales) %>% 
  filter(restaurant!="Taco Bell") %>%
  group_by(restaurant, us_sales) %>%
  summarise(median_sugar = median(sugar)) %>% 
  
  ungroup() %>% 
  arrange(desc(median_sugar)) %>% 
  
  # order by sugar from High to Low
  mutate(restaurant = restaurant %>% fct_reorder(median_sugar))
## visualization

p5 %>%  
  ggplot(aes(x = restaurant,
             y = us_sales)) +
  geom_col(aes(fill = median_sugar)) + 
  scale_x_discrete() + 
  scale_y_continuous() + 
  scale_fill_viridis_c() +
  
  #labs
  labs(
    title = "Plot 5: Sugar Levels ",
    subtitle = '',
    x = 'Restaurant',
    y = 'U.S. sales (in millions)',
    fill = "Median sugar (grams)\nin fast food entrees" 
  ) +
  
  # theme
  theme_classic()