library(tidyverse)
library(stringr)
datasource <- read_csv("~/R_projects/170423_macdo/menu.csv")
my_palette <- c("#00abbd", "#055499", "#132241", "#9bca3c", "#c3c3c3",
"#ff5a00", "#e91365", "#ff921f","#cc0000")
#palette found on http://demo.tremormedia.com/proddev/creative/projects/VHA_Analytics/StyleGuide/GraphColors.html
Let’s have a look today at the nutrition data related to McDonalds products. The Data is available on kaggle. I don’t know much about nutrition (such as the difference between Saturated Fat, Trans Fat, etc…) so it will be pretty basic.
The data is made of 260 rows for 24 variables with one row representing an item. For each of the 260 items (for example “Iced Mocha (Medium)”), we have the related category (“Salads”, “Desserts”, etc…). If we exclude the variables “item”, “category” and “serving size”, all the other variables represent a nutrition value (“iron”, “Protein”, “Staturated Fat”, etc…).
We will start by checking the distribution of calories within each category. In order to do this, we can use box plot chart.
calories <- select(datasource, c(1,2,4))
ggplot(calories, aes(x=Category, y= Calories))+
geom_boxplot()+
ggtitle("Calories distribution per category")+
theme_bw()+
ylab("Calories")+
theme(legend.position = "none")
First look and first surprise (at least for me), the highest median is related to “smoothies & shakes”.
We have few outliers in the “breakfast”, “chicken & fish” and “cofee & tea” categories. One item seems to be particularly high in the “chicken & fish” category, with calories a bit below 2000. To give a perspective, the McDonalds website uses 2000 calories as the daily basis: “Percent Daily Values (DV) are based on a 2,000 calorie diet”(source).
Let’s now have a look at the top 20 items with the highest calories.
calories <- head(arrange(calories, desc(Calories)),20)
ggplot(calories, aes(x=reorder(Item,Calories), y= Calories, fill= Category, label=Calories))+
geom_bar(stat="identity")+
coord_flip()+
theme_bw()+
scale_fill_manual(values = my_palette)+
theme(legend.position = "top")+
geom_text(hjust=1.3, colour = "white")+
xlab("")+
ggtitle("Top 20 products with the most calories")
So the “Chicken McNuggets (40 pieces)” is the most calories item, and by far. I never ordered this product myself, but my assumption would be that this is to be shared with several individuals (at least I hope it is the case).
Breakfast products are pretty high on the list followed by a lot of smoothies & shakes. Suprisingly, there are not so many burgers, only the “double quarter pounder with cheese” and “bacon clubhouse burger”.
Talking about burgers, there are the main products of Mc Donalds, so let’s check the “beef & pork” category.
We will focus on the burger size, the calories and the Total fat.
burgers <- filter(datasource, Category == "Beef & Pork")
burgers$size <- word(burgers$`Serving Size`, -2)
burgers$size <- gsub("\\(", "", burgers$size)
burgers$size <- as.numeric(burgers$size)
burgers1 <- select(burgers, Item, size, Calories, `Total Fat`)
colnames(burgers1) <- c("Item", "Size (g)", "Calories" ,"Total Fat")
burgers1 <- gather(burgers1, Type, Value, -Item)
ggplot(burgers1, aes(x=Item, y=Value, label= Value, fill=Type))+
geom_bar(stat="identity")+
scale_fill_manual(values = my_palette)+
theme_bw()+
facet_wrap(~ Type, scales = "free_x")+
coord_flip()+
xlab("")+
ggtitle("Burgers comparaison of size, calories and total fat")+
geom_text(hjust = 1.3, colour= "white")+
theme(legend.position = "none")
No massive differences here compared with what we already know. The “double quarter pounder with cheese” and “bacon clubhouse burger” are the biggest burgers in size and in calories / total fat.
I used to order several basic burgers / cheeseburgers instead of buying a menu. Although it is a good strategy to optimize the size (g), it is clearly not good from a calory / total fat perspective…
Checking the calories is one thing, but I would assume it is not telling us much on how “bad” is it. On a “fivethirtyeight.com” article regarding the double down sandwich (source), the author wrote “calorie counts are overrated. We all need to eat, to the tune of about 2,000 calories per day for a healthy adult. It’s not the calories so much as what you do with them. Are you getting a lot of fat, cholesterol, and sodium (bad)? Or lots of fiber and vitamins instead?”. Allright, so let’s check the percentage of daily value of McDonal products for Total fat, Cholesterol and sodium.
We will start with food and we will check drinks after.
unhealthy <- select(datasource, c(1,2,7,12,14))
unhealthy <- gather(unhealthy, Type, Value, -c(1:2))
unhealthy_food <- filter(unhealthy, !(Category %in% c("Beverages", "Coffee & Tea", "Smoothies & Shakes")))
ggplot(unhealthy_food, aes(x=Item, y=Value, colour=Type))+
geom_point(size=3)+
coord_flip()+
scale_color_manual(values = my_palette)+
theme_bw()+
theme(legend.position = "top")+
geom_hline(yintercept=100, col="red", linetype=2)+
geom_hline(yintercept=50, col="orange", linetype=2)+
ylab("% Daily Value")+
xlab("")+
scale_y_continuous(position = "top")+
ggtitle("Daily value of McDonald food items for Cholesterol, Sodium and Total Fat")
We have few items with Cholesterol, Sodium or Total fast higher than the daily value. It is the case for the Chicken McNuggets (40 pieces), or some of the big breakfasts. If we check the items between 50% and 100%, we can see that the big breakfasts are often represented.
unhealthy_drink <- filter(unhealthy, Category %in% c("Beverages", "Coffee & Tea", "Smoothies & Shakes"))
ggplot(unhealthy_drink, aes(x=Item, y=Value, colour=Type))+
geom_point(size=3)+
coord_flip()+
scale_color_manual(values = my_palette)+
theme_bw()+
theme(legend.position = "top")+
geom_hline(yintercept=100, col="red", linetype=2)+
geom_hline(yintercept=50, col="orange", linetype=2)+
ylab("% Daily Value")+
xlab("")+
scale_y_continuous(position = "top")+
ggtitle("Daily value of McDonald drink items for Cholesterol, Sodium and Total Fat")
Although we have nothing more than 50% daily value, we have few high percentage: McFlurry with Reese’s Peanut Butter Cups (medium) and McFlurry with M&M’s candies (medium) have a 50% daily value total fat (!). The various Frappe also have pretty Total Fat / Cholesterol values. Sodium however, tends to be rather low in beverages when compared with the food products.
To finish, we will check the daily value distribution for each category.
Daily_value <- select(datasource, c(1,2,7,12, 14))
#Daily_value <- select(datasource, c(1,2,7,12,14,16,18,21,22,23,24))
Daily_value <- gather(Daily_value, key= Type, value= "Daily value", -c(1:2))
Daily_value$Type <- gsub("\\(% Daily Value)", "", Daily_value$Type)
ggplot(Daily_value, aes(x=Category, y= `Daily value`, colour= Category))+
geom_boxplot()+
facet_grid(Type ~ .)+
ggtitle("Cholesterol, Sodium and Total fat distribution",
subtitle = "Daily Value in % per category")+
theme_bw()+
ylab("Daily Value in %")+
scale_color_manual(values = my_palette)+
theme(legend.position = "none")
Daily_value <- select(datasource, c(1,2,16,18, 21))
#Daily_value <- select(datasource, c(1,2,7,12,14,16,18,21,22,23,24))
Daily_value <- gather(Daily_value, key= Type, value= "Daily value", -c(1:2))
Daily_value$Type <- gsub("\\(% Daily Value)", "", Daily_value$Type)
ggplot(Daily_value, aes(x=Category, y= `Daily value`, colour= Category))+
geom_boxplot()+
facet_grid(Type ~ .)+
ggtitle("Carbohydrates, Dietary Fiber and Vitamin A distribution",
subtitle = "Daily Value in % per category")+
theme_bw()+
ylab("Daily Value in %")+
scale_color_manual(values = my_palette)+
theme(legend.position = "none")
Daily_value <- select(datasource, c(1,2,22,23,24))
#Daily_value <- select(datasource, c(1,2,7,12,14,16,18,21,22,23,24))
Daily_value <- gather(Daily_value, key= Type, value= "Daily value", -c(1:2))
Daily_value$Type <- gsub("\\(% Daily Value)", "", Daily_value$Type)
ggplot(Daily_value, aes(x=Category, y= `Daily value`, colour= Category))+
geom_boxplot()+
facet_grid(Type ~ .)+
ggtitle("Vitamin C, Calcium and Iron distribution",
subtitle = "Daily Value in % per category")+
theme_bw()+
ylab("Daily Value in %")+
scale_color_manual(values = my_palette)+
theme(legend.position = "none")