This section helps to identify the file source and download respective CSV files.
library(tidyverse)
library(ggrepel)
library(scales)
# Record URLs for data source
url1 <- c("https://d3c33hcgiwev3.cloudfront.net/ERgbPUSBTj2YGz1Egd49gA_64d91f329b334df59109113c3084708f_data_fastfood_sales.csv?Expires=1649030400&Signature=KlMKgzrmchTaOXz6X9vkDAn3W7devc6Wk07eO~mz3HN3slwd2u7kfwHwOwYQ73CivVbr-hF9CKifxnVJOuUA3oAiVYaqHveVX-GtdaxFRF12HiWdcaGpa50HqgxkGWHN1sAGI89tl6Ay5kVH6XG0fmhfjUohRiqUnyiMNASeFqU_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A")
url2 <- c("https://d3c33hcgiwev3.cloudfront.net/-LjVvbxIR5G41b28SMeReA_1a02baf08f0c4c479cbd944461c41ced_data_fastfood_calories.csv?Expires=1649030400&Signature=F8AmOAyISagGKYxUH-ZuV1-uIANJG9KsK9nTwx-3mMLG0tz8h8So03n~AV9wpSztNmb8SdfRzAeeUrTjD3DGQgix8IWCQ7Zyo0skiXqnWeesooV6AYNqGzy~~XSuI9wI9eXLW7tq3EepUE0BbOa-IroSepMUD-eK3TTI446xm5s_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A")
# Download csv files
download.file(url1, "ffsales.csv")
download.file(url2, "ffcalories.csv")
# Read csv files accordingly
sales <- read_csv("ffsales.csv")
caloriess <- read_csv("ffcalories.csv")
# Add proportion towards the franchised store compare to total stores.
q1 <- sales %>%
mutate(fran_prop = num_franchised_stores / unit_count)
# Draw aesthetics and color the continuous point segment
ggplot(q1, aes(x = us_sales, y = unit_count))+
geom_point(aes(color = fran_prop)) +
# Add necessary text, transform scales and axis labels
geom_text_repel(aes(x = us_sales, y = unit_count, label = restaurant), size = 3) +
scale_x_log10() +
scale_y_log10() +
theme_bw() +
theme(legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
labs( x = "U.S. sales in millions (log10 scale)", y = "Total number of stores (log10 scale)", col = "Proportion of stores franchised")
# Draw Plot
sales %>%
# Reorder the restaurant based on average sales
ggplot(aes(x = fct_reorder(restaurant, average_sales), y = average_sales))+
geom_bar(stat = 'identity') +
# Flip the coordinate for reversal
coord_flip() +
theme_classic() +
# Add additional labels for the money mark and axis name
labs( x = "Restaurant", y = "Average sales per unit store (in thousands)")+
scale_y_continuous(labels=dollar_format()) +
geom_text(aes(label = paste0("$", round(average_sales,0)), hjust = -0.1), size = 2)
# Draw Plot
caloriess %>%
# Plot the aesthetics while using geom_point to plot
ggplot(aes(x = calories, y = sodium)) +
geom_point(size = 1) +
theme_bw() +
# Add axis title and intercept lines
labs( x = "Calories", y = "Sodium (mg)")+
geom_hline(yintercept = 2300) +
# Use facet wrap to separate the restaurant and label
facet_wrap(~restaurant) +
geom_text_repel(data = caloriess %>% filter(sodium > 2300),
aes(label = item),
direction = "y",
nudge_y = 900,
nudge_x = 900,
size = 2)
# Filter the salad entrees
q4 <- caloriess %>%
mutate(is_salad = str_detect(item, "[Ss]alad"))
# Plot with the reordered calories
ggplot(q4, aes(x = fct_reorder(restaurant, calories),y = calories))+
# Add boxplot (exclude outlier) and use point setup for the position of points
geom_boxplot(outlier.shape = NA) +
geom_point(aes(color = is_salad), position = position_jitterdodge()) +
scale_y_log10()+
coord_flip() +
theme_bw()+
# Add the axis description
labs(x = "Restaurant", y = "Calories (log10 scale)", color = "Is the entree\n a salad?") +
scale_color_discrete(labels=c("Not Salad", "Salad"))
# Filter Taco Bell restaurant
q5 <- caloriess %>%
filter(restaurant != "Taco Bell") %>%
group_by(restaurant) %>%
# Add median sugar as variable based on group
summarise(med_sugar = median(sugar)) %>%
# Filter the restaurant in sales database while join the sales amount and name
filter(restaurant %in% sales$restaurant) %>%
inner_join(sales[,c(1,3)], by = "restaurant")
ggplot(q5, aes(x = fct_reorder(restaurant, us_sales), y = us_sales)) +
scale_fill_gradient(low="#1CA695", high = "#F32A5B")+
geom_bar(aes(fill = med_sugar), stat = "identity") +
labs(x = "Restaurant", y = "U.S. sales (in millions)",
fill = "Median sugar (grams)\n in fast food entrees") +
theme_classic()