library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
As you embark on an MBA that provides some new time management challenges, here is some food for thought and an analysis task… It may even inform your decision making over food.
FFood <- read.csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2018/2018-09-04/fastfood_calories.csv")
FFood = na.omit(FFood)
A basic summary of the data. An example item can be found here. The data were scraped from this site so the variable names, and the relevant units, should be clear from the labels.
## X restaurant item calories
## Min. : 1.0 Length:301 Length:301 Min. : 20
## 1st Qu.: 80.0 Class :character Class :character 1st Qu.: 340
## Median :189.0 Mode :character Mode :character Median : 490
## Mean :219.4 Mean : 537
## 3rd Qu.:351.0 3rd Qu.: 680
## Max. :511.0 Max. :2430
## cal_fat total_fat sat_fat trans_fat
## Min. : 0.0 Min. : 0.00 Min. : 0.000 Min. :0.0000
## 1st Qu.: 120.0 1st Qu.: 13.00 1st Qu.: 4.000 1st Qu.:0.0000
## Median : 200.0 Median : 22.00 Median : 7.000 Median :0.0000
## Mean : 229.5 Mean : 25.55 Mean : 7.849 Mean :0.4551
## 3rd Qu.: 310.0 3rd Qu.: 34.00 3rd Qu.:10.000 3rd Qu.:1.0000
## Max. :1270.0 Max. :141.00 Max. :36.000 Max. :4.0000
## cholesterol sodium total_carb fiber
## Min. : 0.00 Min. : 15 Min. : 0.00 Min. : 0.000
## 1st Qu.: 40.00 1st Qu.: 830 1st Qu.: 30.00 1st Qu.: 2.000
## Median : 65.00 Median :1120 Median : 44.00 Median : 4.000
## Mean : 75.23 Mean :1274 Mean : 47.53 Mean : 4.196
## 3rd Qu.: 95.00 3rd Qu.:1550 3rd Qu.: 61.00 3rd Qu.: 5.000
## Max. :475.00 Max. :6080 Max. :156.00 Max. :16.000
## sugar protein vit_a vit_c
## Min. : 0.000 Min. : 1.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 4.000 1st Qu.: 18.00 1st Qu.: 4.00 1st Qu.: 4.00
## Median : 7.000 Median : 28.00 Median : 10.00 Median : 10.00
## Mean : 8.176 Mean : 30.19 Mean : 18.86 Mean : 20.12
## 3rd Qu.:10.000 3rd Qu.: 37.00 3rd Qu.: 20.00 3rd Qu.: 30.00
## Max. :87.000 Max. :186.00 Max. :180.00 Max. :400.00
## calcium salad
## Min. : 0.00 Length:301
## 1st Qu.: 8.00 Class :character
## Median : 20.00 Mode :character
## Mean : 24.98
## 3rd Qu.: 35.00
## Max. :290.00
library(readxl)
FastFood <- read_excel("~/Desktop/FastFood.xlsx",
sheet = "PivotTable")
Which chain has the highest average calories? McDonald’s: 640
Which chain has the lower average calories? Chick Fil-A: 384
Which chain has the highest average sodium content? Arby’s: 1515
In R, construct the same pivot table equivalent using the tidyverse,the piping operator, and the skim function from the skimr library
FFood %>% group_by(restaurant) %>% summarise(Mean.calories = mean(calories), Sd.calories = sd(calories), Mean.sodium = mean(sodium), Sd.sodium = sd(sodium))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 7 x 5
## restaurant Mean.calories Sd.calories Mean.sodium Sd.sodium
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Arbys 486 181. 1410. 595.
## 2 Chick Fil-A 318. 153. 953. 457.
## 3 Dairy Queen 519. 274. 1114. 674.
## 4 Mcdonalds 640. 411. 1438. 1036.
## 5 Sonic 620 285. 1281. 494.
## 6 Subway 503. 282. 1273. 744.
## 7 Taco Bell 524. 130. 1199. 387.
FFood %>% group_by(restaurant) %>% summarise(Median.calories = median(calories), Median.sodium = median(sodium))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 7 x 3
## restaurant Median.calories Median.sodium
## <chr> <dbl> <dbl>
## 1 Arbys 480 1280
## 2 Chick Fil-A 350 960
## 3 Dairy Queen 470 930
## 4 Mcdonalds 540 1120
## 5 Sonic 570 1190
## 6 Subway 460 1130
## 7 Taco Bell 550 1120
Which chain has the highest median calories?
Sonic:570
Which chain has the lowest median calories? Chick Fil-A:350
Which chain has the highest median sodium? Arbys:1280
Which chain has the lowest 3rd quartile of calories?
FFood %>% group_by(restaurant) %>% summarise(quantile.calories = quantile(calories))
## `summarise()` regrouping output by 'restaurant' (override with `.groups` argument)
## # A tibble: 35 x 2
## # Groups: restaurant [7]
## restaurant quantile.calories
## <chr> <dbl>
## 1 Arbys 70
## 2 Arbys 360
## 3 Arbys 480
## 4 Arbys 600
## 5 Arbys 840
## 6 Chick Fil-A 70
## 7 Chick Fil-A 190
## 8 Chick Fil-A 350
## 9 Chick Fil-A 450
## 10 Chick Fil-A 540
## # … with 25 more rows
Chick Fil-A:350
FFood %>% group_by(restaurant) %>% summarise(quantile.protein = quantile(protein))
## `summarise()` regrouping output by 'restaurant' (override with `.groups` argument)
## # A tibble: 35 x 2
## # Groups: restaurant [7]
## restaurant quantile.protein
## <chr> <dbl>
## 1 Arbys 5
## 2 Arbys 23
## 3 Arbys 28
## 4 Arbys 37
## 5 Arbys 45
## 6 Chick Fil-A 11
## 7 Chick Fil-A 22
## 8 Chick Fil-A 28
## 9 Chick Fil-A 34
## 10 Chick Fil-A 41
## # … with 25 more rows
Mcdonalds:33
ggplot(FFood, aes(x = cholesterol, y = sodium, color = restaurant))+
geom_boxplot()
ggplot(FFood) +
aes(x = sodium, fill = restaurant) +
geom_histogram(bins = 30L) +
scale_fill_hue() +
theme_minimal()
ggplot(FFood) +
aes(x = cholesterol, fill = restaurant) +
geom_histogram(bins = 30L) +
scale_fill_hue() +
theme_minimal()
ggplot(FFood, aes(x = cholesterol, y = total_fat, color = restaurant))+
geom_boxplot()
ggplot(FFood) +
aes(x = total_fat, fill = restaurant) +
geom_density(adjust = 1L) +
scale_fill_hue() +
theme_minimal()
ggplot(FFood) +
aes(x = cholesterol, fill = restaurant) +
geom_density(adjust = 1L) +
scale_fill_hue() +
theme_minimal()
F <- FFood %>% filter(restaurant%in%c("Taco Bell","Arbys","Chick Fil-A","Dairy Queen","Mcdonalds"))
ggplot(F) +
aes(x = protein, fill = restaurant) +
geom_histogram(bins = 30L) +
scale_fill_hue() +
theme_minimal()
fastfood <- F %>% mutate(sodium.grams = sodium / 1000)
ggplot(fastfood) +
aes(x = sodium.grams, fill = restaurant) +
geom_histogram(bins = 30L) +
scale_fill_hue() +
theme_minimal()
Cal_fat <- FFood %>% arrange(desc(cal_fat))
head(Cal_fat)
## X restaurant item calories
## 1 40 Mcdonalds 20 piece Buttermilk Crispy Chicken Tenders 2430
## 2 45 Mcdonalds 40 piece Chicken McNuggets 1770
## 3 99 Sonic Super Sonic Bacon Double Cheeseburger (w/mayo) 1280
## 4 39 Mcdonalds 12 piece Buttermilk Crispy Chicken Tenders 1510
## 5 102 Sonic Super Sonic Double Cheeseburger W/ Mayo 1220
## 6 100 Sonic Super Sonic Double Cheeseburger W/ Mustard 1120
## cal_fat total_fat sat_fat trans_fat cholesterol sodium total_carb fiber sugar
## 1 1270 141 24 2.0 475 6080 103 2 3
## 2 960 107 18 0.5 295 3370 105 7 1
## 3 830 92 36 4.0 260 1630 44 2 7
## 4 790 88 15 1.0 295 3770 64 1 2
## 5 780 87 34 4.0 245 1520 45 2 8
## 6 680 76 32 4.0 235 1550 44 2 8
## protein vit_a vit_c calcium salad
## 1 186 0 2 8 Other
## 2 98 0 15 6 Other
## 3 67 15 6 40 Other
## 4 115 0 2 6 Other
## 5 63 15 8 40 Other
## 6 63 15 8 40 Other
20 piece Buttermilk Crispy Chicken Tenders