library(tidyverse)
library(readr)
library(dplyr)
library(janitor)
library(plotly)
library(forcats)
library(plyr)
library(scales)
I’m the primary stack-holder for this project.
This dataset was provided by Chris Crawford on Kaggle. Latest update was 10-24-2017. Data was downloaded as a .CSV and stored safely on an external hard drive.
cereal <- read.csv("cereal.csv")
food_group <- read.csv("high_potassium_food.csv")
Used glimpse to find out we have 77 rows, 16 columns,the datatypes involved, and make sure we have no duplicates
glimpse(cereal)
## Rows: 77
## Columns: 16
## $ name <chr> "100% Bran", "100% Natural Bran", "All-Bran", "All-Bran with ~
## $ mfr <chr> "N", "Q", "K", "K", "R", "G", "K", "G", "R", "P", "Q", "G", "~
## $ type <chr> "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "~
## $ calories <int> 70, 120, 70, 50, 110, 110, 110, 130, 90, 90, 120, 110, 120, 1~
## $ protein <int> 4, 3, 4, 4, 2, 2, 2, 3, 2, 3, 1, 6, 1, 3, 1, 2, 2, 1, 1, 3, 3~
## $ fat <int> 1, 5, 1, 0, 2, 2, 0, 2, 1, 0, 2, 2, 3, 2, 1, 0, 0, 0, 1, 3, 0~
## $ sodium <int> 130, 15, 260, 140, 200, 180, 125, 210, 200, 210, 220, 290, 21~
## $ fiber <dbl> 10.0, 2.0, 9.0, 14.0, 1.0, 1.5, 1.0, 2.0, 4.0, 5.0, 0.0, 2.0,~
## $ carbo <dbl> 5.0, 8.0, 7.0, 8.0, 14.0, 10.5, 11.0, 18.0, 15.0, 13.0, 12.0,~
## $ sugars <int> 6, 8, 5, 0, 8, 10, 14, 8, 6, 5, 12, 1, 9, 7, 13, 3, 2, 12, 13~
## $ potass <int> 280, 135, 320, 330, -1, 70, 30, 100, 125, 190, 35, 105, 45, 1~
## $ vitamins <int> 25, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25~
## $ shelf <int> 3, 3, 3, 3, 3, 1, 2, 3, 1, 3, 2, 1, 2, 3, 2, 1, 1, 2, 2, 3, 2~
## $ weight <dbl> 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.33, 1.00, 1.00, 1~
## $ cups <dbl> 0.33, 1.00, 0.33, 0.50, 0.75, 0.75, 1.00, 0.75, 0.67, 0.67, 0~
## $ rating <dbl> 68.40297, 33.98368, 59.42551, 93.70491, 34.38484, 29.50954, 3~
n_distinct(cereal)
## [1] 77
summary(cereal)
## name mfr type calories
## Length:77 Length:77 Length:77 Min. : 50.0
## Class :character Class :character Class :character 1st Qu.:100.0
## Mode :character Mode :character Mode :character Median :110.0
## Mean :106.9
## 3rd Qu.:110.0
## Max. :160.0
## protein fat sodium fiber
## Min. :1.000 Min. :0.000 Min. : 0.0 Min. : 0.000
## 1st Qu.:2.000 1st Qu.:0.000 1st Qu.:130.0 1st Qu.: 1.000
## Median :3.000 Median :1.000 Median :180.0 Median : 2.000
## Mean :2.545 Mean :1.013 Mean :159.7 Mean : 2.152
## 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:210.0 3rd Qu.: 3.000
## Max. :6.000 Max. :5.000 Max. :320.0 Max. :14.000
## carbo sugars potass vitamins
## Min. :-1.0 Min. :-1.000 Min. : -1.00 Min. : 0.00
## 1st Qu.:12.0 1st Qu.: 3.000 1st Qu.: 40.00 1st Qu.: 25.00
## Median :14.0 Median : 7.000 Median : 90.00 Median : 25.00
## Mean :14.6 Mean : 6.922 Mean : 96.08 Mean : 28.25
## 3rd Qu.:17.0 3rd Qu.:11.000 3rd Qu.:120.00 3rd Qu.: 25.00
## Max. :23.0 Max. :15.000 Max. :330.00 Max. :100.00
## shelf weight cups rating
## Min. :1.000 Min. :0.50 Min. :0.250 Min. :18.04
## 1st Qu.:1.000 1st Qu.:1.00 1st Qu.:0.670 1st Qu.:33.17
## Median :2.000 Median :1.00 Median :0.750 Median :40.40
## Mean :2.208 Mean :1.03 Mean :0.821 Mean :42.67
## 3rd Qu.:3.000 3rd Qu.:1.00 3rd Qu.:1.000 3rd Qu.:50.83
## Max. :3.000 Max. :1.50 Max. :1.500 Max. :93.70
Removed the rows that have negative values. Which brought me to 74 rows and 16 columns after removing the rows with negative values.
cereal_without_neg <- subset(cereal,carbo != -1 & potass != -1)
glimpse(cereal_without_neg)
## Rows: 74
## Columns: 16
## $ name <chr> "100% Bran", "100% Natural Bran", "All-Bran", "All-Bran with ~
## $ mfr <chr> "N", "Q", "K", "K", "G", "K", "G", "R", "P", "Q", "G", "G", "~
## $ type <chr> "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "~
## $ calories <int> 70, 120, 70, 50, 110, 110, 130, 90, 90, 120, 110, 120, 110, 1~
## $ protein <int> 4, 3, 4, 4, 2, 2, 3, 2, 3, 1, 6, 1, 3, 1, 2, 2, 1, 1, 3, 2, 2~
## $ fat <int> 1, 5, 1, 0, 2, 0, 2, 1, 0, 2, 2, 3, 2, 1, 0, 0, 0, 1, 3, 0, 1~
## $ sodium <int> 130, 15, 260, 140, 180, 125, 210, 200, 210, 220, 290, 210, 14~
## $ fiber <dbl> 10.0, 2.0, 9.0, 14.0, 1.5, 1.0, 2.0, 4.0, 5.0, 0.0, 2.0, 0.0,~
## $ carbo <dbl> 5.0, 8.0, 7.0, 8.0, 10.5, 11.0, 18.0, 15.0, 13.0, 12.0, 17.0,~
## $ sugars <int> 6, 8, 5, 0, 10, 14, 8, 6, 5, 12, 1, 9, 7, 13, 3, 2, 12, 13, 7~
## $ potass <int> 280, 135, 320, 330, 70, 30, 100, 125, 190, 35, 105, 45, 105, ~
## $ vitamins <int> 25, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25~
## $ shelf <int> 3, 3, 3, 3, 1, 2, 3, 1, 3, 2, 1, 2, 3, 2, 1, 1, 2, 2, 3, 3, 3~
## $ weight <dbl> 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.33, 1.00, 1.00, 1.00, 1~
## $ cups <dbl> 0.33, 1.00, 0.33, 0.50, 0.75, 1.00, 0.75, 0.67, 0.67, 0.75, 1~
## $ rating <dbl> 68.40297, 33.98368, 59.42551, 93.70491, 29.50954, 33.17409, 3~
summary(cereal_without_neg)
## name mfr type calories
## Length:74 Length:74 Length:74 Min. : 50
## Class :character Class :character Class :character 1st Qu.:100
## Mode :character Mode :character Mode :character Median :110
## Mean :107
## 3rd Qu.:110
## Max. :160
## protein fat sodium fiber carbo
## Min. :1.000 Min. :0 Min. : 0.0 Min. : 0.000 Min. : 5.00
## 1st Qu.:2.000 1st Qu.:0 1st Qu.:135.0 1st Qu.: 0.250 1st Qu.:12.00
## Median :2.500 Median :1 Median :180.0 Median : 2.000 Median :14.50
## Mean :2.514 Mean :1 Mean :162.4 Mean : 2.176 Mean :14.73
## 3rd Qu.:3.000 3rd Qu.:1 3rd Qu.:217.5 3rd Qu.: 3.000 3rd Qu.:17.00
## Max. :6.000 Max. :5 Max. :320.0 Max. :14.000 Max. :23.00
## sugars potass vitamins shelf
## Min. : 0.000 Min. : 15.00 Min. : 0.00 Min. :1.000
## 1st Qu.: 3.000 1st Qu.: 41.25 1st Qu.: 25.00 1st Qu.:1.250
## Median : 7.000 Median : 90.00 Median : 25.00 Median :2.000
## Mean : 7.108 Mean : 98.51 Mean : 29.05 Mean :2.216
## 3rd Qu.:11.000 3rd Qu.:120.00 3rd Qu.: 25.00 3rd Qu.:3.000
## Max. :15.000 Max. :330.00 Max. :100.00 Max. :3.000
## weight cups rating
## Min. :0.500 Min. :0.2500 Min. :18.04
## 1st Qu.:1.000 1st Qu.:0.6700 1st Qu.:32.45
## Median :1.000 Median :0.7500 Median :40.25
## Mean :1.031 Mean :0.8216 Mean :42.37
## 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:50.52
## Max. :1.500 Max. :1.5000 Max. :93.70
Arranged the cereal by rating in descending order
cereal_by_rating <- cereal_without_neg %>% arrange(desc(rating))
food_group <-clean_names(food_group)
As seen below, the higher rating cereals have a lower calorie count.
rating_calories <- ggplot(data = cereal_by_rating) + geom_jitter(aes(x= calories, y = rating, fill = name)) +
labs(title = "80 Cereals", subtitle = "Calorie vs Rating trend", x = "Calories", y= "Rating", caption = "Data Source: Chris Crawford")
rating_calorie <- ggplotly(rating_calories)
rating_calorie
The World Health Organization(WHO) recommends a reduction in sodium intake to reduce blood pressure and risk of cardiovascular disease,stroke and coronary heart disease in adults. WHO recommends a reduction to <2g/day sodium(5g/day salt) in adults. Adults include individuals > or = 16 years of age. I decided to compare this recommendation to the highest rated cereal on our list to see if this would be a good product to eat.I also converted the sodium of the cereal to grams.
cereal_max_rating <- cereal_by_rating [1,]
cereal_max_rating["sodium_in_grams"] <- cereal_max_rating$sodium / 1000
cereal_max <- select(cereal_max_rating, name, sodium_in_grams)
recommended_sodium <- c("WHO_recommendation", 7)
sodium_intake <- rbind(cereal_max, recommended_sodium)
sodium_intake$sodium_in_grams <- as.double(sodium_intake$sodium_in_grams)
ggplot(data = sodium_intake) + geom_col(aes(x = name, y = sodium_in_grams, fill = name)) +
labs(title = "Sodium", x= "", y = "Sodium (g)", subtitle = "All-Bran with Extra Fiber vs WHO recommendation for daily sodium intake 3/27/2022", caption = "Data Source: Chris Crawford") +
geom_text(aes(x= name, y = sodium_in_grams, label = sodium_in_grams, vjust=-.1))
The meta-analysis of 21 studies with 21 comparisons found that increased potassium resulted in a decrease in resting systolic blood pressure. The results suggest that the greatest impact on blood pressure was achieved when the increased potassium intake was approximately 3,519–4,692mg/day. Let’s see how our close the top rated cereal compares to the WHO recommendation.
cereal_by_potassium <- cereal_by_rating %>% arrange(desc(potass))
potass_top_5 <- head(cereal_by_potassium,5)
ggplot(data = potass_top_5) + geom_col(mapping = aes(x = name, y = potass, fill = name)) +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=.4)) +
labs(x = " ",y = "Potassium (mg)", title = "Potassium", subtitle = "Top Five brands with the highest amount of Potassium", caption = "Data Source: Chris Crawford") +
geom_text(aes(x= name, y = potass, label = potass, vjust=-.1))
cereal_max_potass <- select(cereal_max_rating, name, potass)
recommended_potassium <- c("WHO_recommendation", 3519)
potass_intake <- rbind(cereal_max_potass, recommended_potassium)
potass_intake$potass <- as.double(potass_intake$potass)
ggplot(data = potass_intake) + geom_col(aes(x = name, y = potass, fill = name)) +
labs(title = "Potassium", x= "", y = "Potassium (mg)", subtitle = "All-Bran with Extra Fiber vs WHO recommendation for daily potassium intake 3/27/2022", caption = "Data Source: Chris Crawford") +
geom_text(aes(x= name, y = potass, label = potass, vjust=-.1))
The highest rated cereal may be good for weight loss due to it’s low calorie and sodium, but if you plan to eat this cereal I suggest eating foods rich in potassium along with it. Examples:
## food_group potassium_content_mg
## 1 Beans and peas 1300
## 2 Nuts 600
## 3 Green vegetables 550
## 4 Root vegetables 200
## 5 Other vegetables 300
## 6 Fruits 300