This assignment is part of the “Visualizing Data in the Tidyverse” course from Coursera.
This assignment uses two data sets about fast food restaurants. Specifically, we will create data visualizations using:
Source: Data originally from Hubworks.
The codebook for the data includes 6 variables:
| Field name | Description | Data type |
|---|---|---|
| `restaurant` | Name of the restaurant | character |
| `average_sales` | Average US sales per unit (store) in thousands in 2018 | numeric |
| `us_sales` | U.S. sales in millions in 2018 | numeric |
| `num_company_stores` | Number of company / corporate-owned stores in 2018 | numeric |
| `num_franchised_stores` | Number of franchised stores in 2018 | numeric |
| `unit_count` | Total number of stores (unit counts) in 2018 | numeric |
Source: Data originally from the GitHub repository from the Tidy Tuesday project in R4DS online learning community.
The codebook for the data includes 16 variables:
| Field name | Description | Data type |
|---|---|---|
| `restaurant` | Name of the restaurant | character |
| `item` | Name of entree item | character |
| `calories` | Calories | numeric |
| `cal_fat` | Calories from fat | numeric |
| `total_fat` | Total fat (g) | numeric |
| `sat_fat` | Saturated fat (g) | numeric |
| `trans_fat` | Trans_ fat (g) | numeric |
| `cholesterol` | Cholesterol (mg) | numeric |
| `sodium` | Sodium (mg) | numeric |
| `total_carb` | Total Carbohydrate (g) | numeric |
| `fiber` | Dietary fiber (g) | numeric |
| `sugar` | Total sugar (g) | numeric |
| `protein` | Protein (g) | numeric |
| `vit_a` | Vitamin A (mcg) | numeric |
| `vit_c` | Vitamin C (mcg) | numeric |
| `calcium` | Calcium (mg) | numeric |
library(tidyverse)
library(janitor)
library(ggrepel)
library(scales)
library(directlabels)
library(viridis)
fast_food_sales <-
read_csv("https://d3c33hcgiwev3.cloudfront.net/ERgbPUSBTj2YGz1Egd49gA_64d91f329b334df59109113c3084708f_data_fastfood_sales.csv?Expires=1678406400&Signature=lUC28zAXtmKZ-JIYBirKOE5SRWTVpJd3RyPlwtjcCAv-uzhpatUqREPX39ex56RZMpKFkBD0vsBa4TXsHlrAbWmxxg8Bgw7BGSr4OMYOYHannixUiqKI-~ZumuSCRBplskZZ8SfGHjPlxHNhuD8FxS2C53OnoxinNhDHlb-o1YA_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A") |>
clean_names()
fast_food_sales <- fast_food_sales |>
mutate(restaurant = as_factor(restaurant),
prop = (num_franchised_stores/unit_count))
plot_1 <-
fast_food_sales |>
ggplot(aes(us_sales,
unit_count,
colour = prop,
label = restaurant))+
geom_point()
plot_1 <-
plot_1+
scale_y_log10()+
scale_x_log10()+
scale_color_gradient(limits = c(0,1))
plot_1 <-
plot_1+
geom_text_repel(color = "black")+
labs(x = "US sales in millions (log 10 scale)",
y = "Total number of stores (log 10 scale)",
color = "Proportion of stores franchised")+
theme_bw()
plot_1
plot_2 <-
fast_food_sales |>
ggplot(aes(fct_reorder(restaurant,
average_sales),
average_sales))+
geom_bar(stat="identity")+
coord_flip()
plot_2 <-
plot_2 +
scale_y_continuous(labels = label_dollar())+
geom_dl(aes(label = dollar(round(average_sales))),
method = list("last.points",
cex = 0.6))+
labs(x = "Restaurant",
y = "Average sales per unit store (in the thousands)")+
theme_classic()
plot_2
fast_food_calories <-
read_csv("https://d3c33hcgiwev3.cloudfront.net/-LjVvbxIR5G41b28SMeReA_1a02baf08f0c4c479cbd944461c41ced_data_fastfood_calories.csv?Expires=1678492800&Signature=jwOwgbKWbeWK6GK7iNdOzAj1q6O03V2cyF4FDOZ~4h6VLIT7I79sgTnPIgNFPLyIMfDmV99pzBBEYm1vLXfDrvgj~zRNJmtcpb6zsPlVH4aQotPoCSiGCUtaTufFczCjb2IHmm0xYhPU~JoE091IFlS3-9iML-L1eHoFwwILTN4_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A") |>
clean_names()
plot_3 <-
fast_food_calories |>
ggplot(aes(calories,
sodium,
label = item))+
geom_hline(yintercept = 2300)+
geom_point()
plot_3 <-
plot_3+
geom_text_repel(data = fast_food_calories |>
filter(sodium >= 2300),
size = 1,
nudge_y = 1,
nudge_x = 3,
max.overlaps = Inf,
ylim = c(3000,6500),
xlim = c(550,2500),
force = 2)+
labs(x = "Calories", y = "Sodium (mg)")+
theme_bw()
plot_3 <-
plot_3+
facet_wrap(~restaurant)
plot_3
fast_food_calories<-
fast_food_calories %>%
mutate(is_salad = case_when(str_detect(item, "Salad") ~ TRUE,
TRUE ~ FALSE))
plot_4 <-
fast_food_calories |>
ggplot(aes(calories,
fct_reorder(restaurant,
calories)))+
geom_boxplot(outlier.shape = NA)+
geom_jitter(aes(color = factor(is_salad)))
plot_4 <-
plot_4+
scale_x_log10()+
scale_color_discrete(labels = c("Not a salad",
"Salad"))+
labs(x = "Calories (log10 scale)",
y = "Restaurant",
color = "Is the entree a salad?")
plot_4
sugar <-
fast_food_calories |>
filter(restaurant != "Taco Bell") |>
group_by(restaurant) |>
summarise(median_sugar = median(sugar))
sugar_final <-
inner_join(sugar,
fast_food_sales,
join_by(restaurant == restaurant))
plot_5 <-
sugar_final |>
ggplot(aes(fct_reorder(restaurant,
us_sales),
us_sales,
fill = median_sugar))+
geom_bar(stat = "identity")
plot_5 <-
plot_5+
scale_fill_viridis(option = "D")+
labs(x = "Restaurant",
y = "U.S sales ",
fill = "Median sugar (grams)
in fast food entrees")
plot_5