El presente informe permite conocer la realidad de los restaurant fast food en el año 2018.
# librerias
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.6 v dplyr 1.0.4
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(skimr)
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.0.4
library(visdat)
## Warning: package 'visdat' was built under R version 4.0.5
library(hrbrthemes)
## Warning: package 'hrbrthemes' was built under R version 4.0.4
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(viridis)
## Warning: package 'viridis' was built under R version 4.0.4
## Loading required package: viridisLite
library(forcats)
library(ggExtra)
## Warning: package 'ggExtra' was built under R version 4.0.4
# cargar data
ff_sales <- read_csv("data_fastfood_sales.csv") # data de ventas
##
## -- Column specification --------------------------------------------------------
## cols(
## restaurant = col_character(),
## average_sales = col_double(),
## us_sales = col_double(),
## num_company_stores = col_double(),
## num_franchised_stores = col_double(),
## unit_count = col_double()
## )
ff_calories <- read_csv("data_fastfood_calories.csv") # data de contenido nutricional
##
## -- Column specification --------------------------------------------------------
## cols(
## restaurant = col_character(),
## item = col_character(),
## calories = col_double(),
## cal_fat = col_double(),
## total_fat = col_double(),
## sat_fat = col_double(),
## trans_fat = col_double(),
## cholesterol = col_double(),
## sodium = col_double(),
## total_carb = col_double(),
## fiber = col_double(),
## sugar = col_double(),
## protein = col_double(),
## vit_a = col_double(),
## vit_c = col_double(),
## calcium = col_double()
## )
ff_sales <- as_tibble(ff_sales) ;ff_calories <- as_tibble(ff_calories) ;
ff_sales_plot <- ff_sales %>% mutate(prop_stores_fran = num_franchised_stores / unit_count )
# PROBLEMA 1 -----
ggplot(ff_sales_plot, aes(x = us_sales,
y = unit_count,
color = prop_stores_fran)) +
geom_point() +
labs(x = "U.S sales in millions (log10 scale)",
y = "Total number of stores (log10 scale)") +
scale_x_continuous(trans = "log10") +
scale_y_continuous(trans = "log10") +
scale_color_continuous("Proportion of stores\n\ franchised") +
geom_text_repel(aes(label = restaurant), colour="black") +
theme_bw() +
theme(axis.title = element_text(face = "bold"), text = element_text(face = "bold"))
ff_sales_plot$restaurant <- as.factor(ff_sales_plot$restaurant) # convirtiendo a factor
ff_sales_plot %>%
mutate(restaurant = fct_reorder(restaurant, average_sales, .desc = F)) %>% # ordenando la var factor con average_sales
# graficando
ggplot(aes(x= restaurant, y = average_sales )) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = scales::label_dollar()) +
geom_text_repel(aes(label = paste("$", round(average_sales,0)))
, hjust = 1) +
coord_flip() +
labs(x = "Restaurant", y = "Average sales per unit store (in thousands)") +
theme_classic() +
theme(axis.title = element_text(face = "bold"))
ggplot(ff_calories, aes(x = calories, y = sodium, label = item)) +
geom_point(size = 1)+
labs(x = "Calories", y = "Sodium(mg)") +
facet_wrap(~restaurant) +
geom_hline(yintercept = 2300, color = "black", size = 0.5) +
geom_text_repel(data = ff_calories %>% filter(sodium > 2300),
nudge_y = 1000,
hjust = -0.5,
direction = "y") +
theme_bw() +
theme(axis.title = element_text(face = "bold"))
# paso 1: transformo todo a minusculas para detectar la palabra "salad"
ff_calories_plot <- ff_calories %>%
mutate(item = tolower(item)) %>%
mutate(is_salad = str_detect(item, "salad"))
# paso 2: convierto a facto "restaurant" para poder ordenarlo luego
ff_calories_plot$is_salad <- as_factor(ff_calories_plot$is_salad)
ff_calories_plot$restaurant <- as_factor(ff_calories_plot$restaurant)
# paso 3: construyo una tabla resumen que agrupa por restaurant la mediana de calorias y hago un inner con la data global
# aunque la variable restaurant ya es factor y esta ordenado de forma decreciente con respecto a su mediana
ff_calories_plot2 <- ff_calories_plot %>%
group_by(restaurant) %>%
summarize(median_calories = median(calories)) %>%
mutate(restaurant2 = fct_reorder(restaurant, median_calories, .desc = F)) %>%
inner_join(ff_calories_plot, by = c("restaurant2"="restaurant"))
ff_calories_plot2 <- as.data.frame(ff_calories_plot2)
# paso 4: construyo el grĂ¡fico
ff_calories_plot2 %>% mutate(is_salad = if_else(is_salad == "FALSE","Not a salad","Salad")) %>%
ggplot( aes(x = calories, y = restaurant2)) +
geom_boxplot(outlier.colour = "white") +
geom_jitter(aes(color = is_salad) ,size = 2, alpha = 5) +
scale_x_continuous(trans = "log10") +
labs(x= "Calories (log10 scale)", y = "Restaurant") +
scale_color_discrete ("Is the entree a salad?")+
scale_fill_viridis(discrete = T, alpha = 0.6) +
theme_bw() +
theme(axis.title = element_text(face = "bold"), text = element_text(face = "bold"))
ff_calories_p5 <- ff_calories %>%
filter(restaurant != "Taco Bell") %>%
group_by(restaurant) %>%
summarize(median_sugar = median(sugar)) %>%
inner_join(ff_sales_plot, by = c("restaurant")) %>%
mutate(restaurant = fct_reorder(restaurant, us_sales, .desc = F))
# graficando
ggplot(ff_calories_p5, aes(x = restaurant, y = us_sales)) +
geom_bar(stat = "identity", aes(fill = median_sugar)) +
labs(x = "Restaurant", y = "U.S. sales (in millions)") +
scale_color_continuous ("Median sugar (grams) \n\ in fast food entrees ")+
theme_classic() +
theme(axis.title = element_text(face = "bold"), text = element_text(face = "bold"))