library(ggplot2)

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Fast Food Data

As you embark on an MBA that provides some new time management challenges, here is some food for thought and an analysis task… It may even inform your decision making over food.

FFood <- read.csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2018/2018-09-04/fastfood_calories.csv")

FFood = na.omit(FFood)

Basic Summary

A basic summary of the data. An example item can be found here. The data were scraped from this site so the variable names, and the relevant units, should be clear from the labels.

##        X          restaurant            item              calories   
##  Min.   :  1.0   Length:301         Length:301         Min.   :  20  
##  1st Qu.: 80.0   Class :character   Class :character   1st Qu.: 340  
##  Median :189.0   Mode  :character   Mode  :character   Median : 490  
##  Mean   :219.4                                         Mean   : 537  
##  3rd Qu.:351.0                                         3rd Qu.: 680  
##  Max.   :511.0                                         Max.   :2430  
##     cal_fat         total_fat         sat_fat         trans_fat     
##  Min.   :   0.0   Min.   :  0.00   Min.   : 0.000   Min.   :0.0000  
##  1st Qu.: 120.0   1st Qu.: 13.00   1st Qu.: 4.000   1st Qu.:0.0000  
##  Median : 200.0   Median : 22.00   Median : 7.000   Median :0.0000  
##  Mean   : 229.5   Mean   : 25.55   Mean   : 7.849   Mean   :0.4551  
##  3rd Qu.: 310.0   3rd Qu.: 34.00   3rd Qu.:10.000   3rd Qu.:1.0000  
##  Max.   :1270.0   Max.   :141.00   Max.   :36.000   Max.   :4.0000  
##   cholesterol         sodium       total_carb         fiber       
##  Min.   :  0.00   Min.   :  15   Min.   :  0.00   Min.   : 0.000  
##  1st Qu.: 40.00   1st Qu.: 830   1st Qu.: 30.00   1st Qu.: 2.000  
##  Median : 65.00   Median :1120   Median : 44.00   Median : 4.000  
##  Mean   : 75.23   Mean   :1274   Mean   : 47.53   Mean   : 4.196  
##  3rd Qu.: 95.00   3rd Qu.:1550   3rd Qu.: 61.00   3rd Qu.: 5.000  
##  Max.   :475.00   Max.   :6080   Max.   :156.00   Max.   :16.000  
##      sugar           protein           vit_a            vit_c       
##  Min.   : 0.000   Min.   :  1.00   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.: 4.000   1st Qu.: 18.00   1st Qu.:  4.00   1st Qu.:  4.00  
##  Median : 7.000   Median : 28.00   Median : 10.00   Median : 10.00  
##  Mean   : 8.176   Mean   : 30.19   Mean   : 18.86   Mean   : 20.12  
##  3rd Qu.:10.000   3rd Qu.: 37.00   3rd Qu.: 20.00   3rd Qu.: 30.00  
##  Max.   :87.000   Max.   :186.00   Max.   :180.00   Max.   :400.00  
##     calcium          salad          
##  Min.   :  0.00   Length:301        
##  1st Qu.:  8.00   Class :character  
##  Median : 20.00   Mode  :character  
##  Mean   : 24.98                     
##  3rd Qu.: 35.00                     
##  Max.   :290.00

The Questions

In Excel, construct a pivot table of the data by fast food enterprise that displays five key quantities: the count of the number of menu items, the mean and sample standard deviation of calories, and the mean and sample standard deviation of sodium.

library(readxl)
FastFood <- read_excel("~/Desktop/FastFood.xlsx", 
    sheet = "PivotTable")

Which chain has the highest average calories? McDonald’s: 640
Which chain has the lower average calories? Chick Fil-A: 384
Which chain has the highest average sodium content? Arby’s: 1515
In R, construct the same pivot table equivalent using the tidyverse,the piping operator, and the skim function from the skimr library

FFood %>% group_by(restaurant) %>% summarise(Mean.calories = mean(calories), Sd.calories = sd(calories), Mean.sodium = mean(sodium), Sd.sodium = sd(sodium))

## `summarise()` ungrouping output (override with `.groups` argument)

## # A tibble: 7 x 5
##   restaurant  Mean.calories Sd.calories Mean.sodium Sd.sodium
##   <chr>               <dbl>       <dbl>       <dbl>     <dbl>
## 1 Arbys                486         181.       1410.      595.
## 2 Chick Fil-A          318.        153.        953.      457.
## 3 Dairy Queen          519.        274.       1114.      674.
## 4 Mcdonalds            640.        411.       1438.     1036.
## 5 Sonic                620         285.       1281.      494.
## 6 Subway               503.        282.       1273.      744.
## 7 Taco Bell            524.        130.       1199.      387.

FFood %>% group_by(restaurant) %>% summarise(Median.calories = median(calories), Median.sodium = median(sodium))

## `summarise()` ungrouping output (override with `.groups` argument)

## # A tibble: 7 x 3
##   restaurant  Median.calories Median.sodium
##   <chr>                 <dbl>         <dbl>
## 1 Arbys                   480          1280
## 2 Chick Fil-A             350           960
## 3 Dairy Queen             470           930
## 4 Mcdonalds               540          1120
## 5 Sonic                   570          1190
## 6 Subway                  460          1130
## 7 Taco Bell               550          1120

Which chain has the highest median calories?
Sonic:570
Which chain has the lowest median calories? Chick Fil-A:350
Which chain has the highest median sodium? Arbys:1280
Which chain has the lowest 3rd quartile of calories?

FFood %>% group_by(restaurant) %>% summarise(quantile.calories = quantile(calories))

## `summarise()` regrouping output by 'restaurant' (override with `.groups` argument)

## # A tibble: 35 x 2
## # Groups:   restaurant [7]
##    restaurant  quantile.calories
##    <chr>                   <dbl>
##  1 Arbys                      70
##  2 Arbys                     360
##  3 Arbys                     480
##  4 Arbys                     600
##  5 Arbys                     840
##  6 Chick Fil-A                70
##  7 Chick Fil-A               190
##  8 Chick Fil-A               350
##  9 Chick Fil-A               450
## 10 Chick Fil-A               540
## # … with 25 more rows

Chick Fil-A:350

Which chain has the highest 3rd quartile of protein?

FFood %>% group_by(restaurant) %>% summarise(quantile.protein = quantile(protein))

## `summarise()` regrouping output by 'restaurant' (override with `.groups` argument)

## # A tibble: 35 x 2
## # Groups:   restaurant [7]
##    restaurant  quantile.protein
##    <chr>                  <dbl>
##  1 Arbys                      5
##  2 Arbys                     23
##  3 Arbys                     28
##  4 Arbys                     37
##  5 Arbys                     45
##  6 Chick Fil-A               11
##  7 Chick Fil-A               22
##  8 Chick Fil-A               28
##  9 Chick Fil-A               34
## 10 Chick Fil-A               41
## # … with 25 more rows

Mcdonalds:33

In R, provide two graphics – a boxplot and something resembling a density plot or histogram – that shows the distributions of sodium and cholesterol.

ggplot(FFood, aes(x = cholesterol, y = sodium, color = restaurant))+
geom_boxplot()

ggplot(FFood) +
 aes(x = sodium, fill = restaurant) +
 geom_histogram(bins = 30L) +
 scale_fill_hue() +
 theme_minimal()

ggplot(FFood) +
 aes(x = cholesterol, fill = restaurant) +
 geom_histogram(bins = 30L) +
 scale_fill_hue() +
 theme_minimal()

In R, provide two graphics – a boxplot and something resembling a density plot – that compares the distributions of total fat and cholesterol by chain.

ggplot(FFood, aes(x = cholesterol, y = total_fat, color = restaurant))+
geom_boxplot()

ggplot(FFood) +
 aes(x = total_fat, fill = restaurant) +
 geom_density(adjust = 1L) +
 scale_fill_hue() +
 theme_minimal()

 ggplot(FFood) +
 aes(x = cholesterol, fill = restaurant) +
 geom_density(adjust = 1L) +
 scale_fill_hue() +
 theme_minimal()

Reduce the dataset to only exclude items from Sonic and Subway and provide a plot the distribution of protein [histogram, boxplot, violin, density] by restaurant.

F <- FFood %>% filter(restaurant%in%c("Taco Bell","Arbys","Chick Fil-A","Dairy Queen","Mcdonalds"))
ggplot(F) +
 aes(x = protein, fill = restaurant) +
 geom_histogram(bins = 30L) +
 scale_fill_hue() +
 theme_minimal()

Reduce the dataset to only exclude items from Sonic and Subway and provide a plot the distribution of sodium in grams [histogram, boxplot, violin, density] by restaurant. This will require that you transform the data from milligrams to grams.

fastfood <- F %>% mutate(sodium.grams = sodium / 1000)
ggplot(fastfood) +
 aes(x = sodium.grams, fill = restaurant) +
 geom_histogram(bins = 30L) +
 scale_fill_hue() +
 theme_minimal()

What item has the highest calories from fat among all items?

Cal_fat <- FFood %>% arrange(desc(cal_fat))
head(Cal_fat)

##     X restaurant                                           item calories
## 1  40  Mcdonalds     20 piece Buttermilk Crispy Chicken Tenders     2430
## 2  45  Mcdonalds                     40 piece Chicken McNuggets     1770
## 3  99      Sonic Super Sonic Bacon Double Cheeseburger (w/mayo)     1280
## 4  39  Mcdonalds     12 piece Buttermilk Crispy Chicken Tenders     1510
## 5 102      Sonic        Super Sonic Double Cheeseburger W/ Mayo     1220
## 6 100      Sonic     Super Sonic Double Cheeseburger W/ Mustard     1120
##   cal_fat total_fat sat_fat trans_fat cholesterol sodium total_carb fiber sugar
## 1    1270       141      24       2.0         475   6080        103     2     3
## 2     960       107      18       0.5         295   3370        105     7     1
## 3     830        92      36       4.0         260   1630         44     2     7
## 4     790        88      15       1.0         295   3770         64     1     2
## 5     780        87      34       4.0         245   1520         45     2     8
## 6     680        76      32       4.0         235   1550         44     2     8
##   protein vit_a vit_c calcium salad
## 1     186     0     2       8 Other
## 2      98     0    15       6 Other
## 3      67    15     6      40 Other
## 4     115     0     2       6 Other
## 5      63    15     8      40 Other
## 6      63    15     8      40 Other

20 piece Buttermilk Crispy Chicken Tenders

Fast Food Calories Exercise

Lauryn Keller

September 10, 2020

Fast Food Data

Basic Summary

The Questions