Source: Time Magazine
Define variables you will use in your project from the dataset and the types of questions you would like to explore about your dataset.
Attempt to discover HOW the data was collected – describe the methodology, or state clearly that there is no ReadMe (or something similar) file with that information.
Explain why you chose this topic and dataset – what meaning does it have for you?
# load the libraries
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
library(readr)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.5.2
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.5.2
library(highcharter)
## Warning: package 'highcharter' was built under R version 4.5.2
library(RColorBrewer)
# set working directory
menu <- read_csv("menu2_.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 266 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Category, Item, Serving Size, Calories
## dbl (20): Calories from Fat, Total Fat, Total Fat (% Daily Value), Saturated...
## lgl (1): Observ
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(menu)
## # A tibble: 6 × 25
## Category Item `Serving Size` Calories `Calories from Fat` `Total Fat`
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 Breakfast Egg McMuffin 4.8 oz (136 g) 300cal. 120 13
## 2 Breakfast Egg White D… 4.8 oz (135 g) 250 70 8
## 3 Breakfast Sausage McM… 3.9 oz (111 g) 370 200 23
## 4 Breakfast Sausage McM… 5.7 oz (161 g) 450 250 28
## 5 Breakfast Sausage McM… 5.7 oz (161 g) 400 210 23
## 6 Breakfast Steak & Egg… 6.5 oz (185 g) 430 210 23
## # ℹ 19 more variables: `Total Fat (% Daily Value)` <dbl>,
## # `Saturated Fat` <dbl>, `Saturated Fat (% Daily Value)` <dbl>,
## # `Trans Fat` <dbl>, Cholesterol <dbl>, `Cholesterol (% Daily Value)` <dbl>,
## # Sodium <dbl>, `Sodium (% Daily Value)` <dbl>, Carbohydrates <dbl>,
## # `Carbohydrates (% Daily Value)` <dbl>, `Dietary Fiber` <dbl>,
## # `Dietary Fiber (% Daily Value)` <dbl>, Sugars <dbl>, Protein <dbl>,
## # `Vitamin A (% Daily Value)` <dbl>, `Vitamin C (% Daily Value)` <dbl>, …
# cleaning
names(menu) <- tolower(names(menu))
names(menu) <- gsub(" ","_",names(menu))
names(menu) <- gsub("[(). //-]", "_", names(menu))
mcdonalds <- menu|>
select(-observ)
head(mcdonalds)
## # A tibble: 6 × 24
## category item serving_size calories calories_from_fat total_fat
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 Breakfast Egg McMuffin 4.8 oz (136… 300cal. 120 13
## 2 Breakfast Egg White Delight 4.8 oz (135… 250 70 8
## 3 Breakfast Sausage McMuffin 3.9 oz (111… 370 200 23
## 4 Breakfast Sausage McMuffin … 5.7 oz (161… 450 250 28
## 5 Breakfast Sausage McMuffin … 5.7 oz (161… 400 210 23
## 6 Breakfast Steak & Egg McMuf… 6.5 oz (185… 430 210 23
## # ℹ 18 more variables: `total_fat__%_daily_value_` <dbl>, saturated_fat <dbl>,
## # `saturated_fat__%_daily_value_` <dbl>, trans_fat <dbl>, cholesterol <dbl>,
## # `cholesterol__%_daily_value_` <dbl>, sodium <dbl>,
## # `sodium__%_daily_value_` <dbl>, carbohydrates <dbl>,
## # `carbohydrates__%_daily_value_` <dbl>, dietary_fiber <dbl>,
## # `dietary_fiber__%_daily_value_` <dbl>, sugars <dbl>, protein <dbl>,
## # `vitamin_a__%_daily_value_` <dbl>, `vitamin_c__%_daily_value_` <dbl>, …
mcdonalds$calories <- gsub("cal.", "", mcdonalds$calories)
mcdonalds$calories <- gsub("cal", "", mcdonalds$calories)
mcdonalds$calories <- gsub("CAL", "", mcdonalds$calories)
head(mcdonalds)
## # A tibble: 6 × 24
## category item serving_size calories calories_from_fat total_fat
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 Breakfast Egg McMuffin 4.8 oz (136… 300 120 13
## 2 Breakfast Egg White Delight 4.8 oz (135… 250 70 8
## 3 Breakfast Sausage McMuffin 3.9 oz (111… 370 200 23
## 4 Breakfast Sausage McMuffin … 5.7 oz (161… 450 250 28
## 5 Breakfast Sausage McMuffin … 5.7 oz (161… 400 210 23
## 6 Breakfast Steak & Egg McMuf… 6.5 oz (185… 430 210 23
## # ℹ 18 more variables: `total_fat__%_daily_value_` <dbl>, saturated_fat <dbl>,
## # `saturated_fat__%_daily_value_` <dbl>, trans_fat <dbl>, cholesterol <dbl>,
## # `cholesterol__%_daily_value_` <dbl>, sodium <dbl>,
## # `sodium__%_daily_value_` <dbl>, carbohydrates <dbl>,
## # `carbohydrates__%_daily_value_` <dbl>, dietary_fiber <dbl>,
## # `dietary_fiber__%_daily_value_` <dbl>, sugars <dbl>, protein <dbl>,
## # `vitamin_a__%_daily_value_` <dbl>, `vitamin_c__%_daily_value_` <dbl>, …
mcdonalds$calories<- as.numeric(mcdonalds$calories)
head(mcdonalds)
## # A tibble: 6 × 24
## category item serving_size calories calories_from_fat total_fat
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Breakfast Egg McMuffin 4.8 oz (136… 300 120 13
## 2 Breakfast Egg White Delight 4.8 oz (135… 250 70 8
## 3 Breakfast Sausage McMuffin 3.9 oz (111… 370 200 23
## 4 Breakfast Sausage McMuffin … 5.7 oz (161… 450 250 28
## 5 Breakfast Sausage McMuffin … 5.7 oz (161… 400 210 23
## 6 Breakfast Steak & Egg McMuf… 6.5 oz (185… 430 210 23
## # ℹ 18 more variables: `total_fat__%_daily_value_` <dbl>, saturated_fat <dbl>,
## # `saturated_fat__%_daily_value_` <dbl>, trans_fat <dbl>, cholesterol <dbl>,
## # `cholesterol__%_daily_value_` <dbl>, sodium <dbl>,
## # `sodium__%_daily_value_` <dbl>, carbohydrates <dbl>,
## # `carbohydrates__%_daily_value_` <dbl>, dietary_fiber <dbl>,
## # `dietary_fiber__%_daily_value_` <dbl>, sugars <dbl>, protein <dbl>,
## # `vitamin_a__%_daily_value_` <dbl>, `vitamin_c__%_daily_value_` <dbl>, …
colSums(is.na(mcdonalds))
## category item
## 0 0
## serving_size calories
## 0 3
## calories_from_fat total_fat
## 1 2
## total_fat__%_daily_value_ saturated_fat
## 2 2
## saturated_fat__%_daily_value_ trans_fat
## 1 2
## cholesterol cholesterol__%_daily_value_
## 2 1
## sodium sodium__%_daily_value_
## 2 1
## carbohydrates carbohydrates__%_daily_value_
## 3 1
## dietary_fiber dietary_fiber__%_daily_value_
## 2 0
## sugars protein
## 0 1
## vitamin_a__%_daily_value_ vitamin_c__%_daily_value_
## 1 2
## calcium__%_daily_value_ iron__%_daily_value_
## 1 1
avg_calories <- mcdonalds |>
filter(!is.na(calories)) |>
group_by(category) |>
summarize(avg_cal = round(mean(calories, na.rm = TRUE), 1)) |>
arrange(desc(avg_cal))
hchart(avg_calories, "bar", hcaes(x = category, y = avg_cal)) |>
hc_title(text = "Average Calories by McDonald's Menu Category") |>
hc_xAxis(title = list(text = "Menu Category")) |>
hc_yAxis(title = list(text = "Average Calories")) |>
hc_tooltip(pointFormat = "Avg Calories: <b>{point.y}</b>") |>
hc_colors("#c8102e") |>
hc_caption(text = "Source: McDonald's USA Nutritional Facts") |>
hc_add_theme(hc_theme_flat())
mcdonalds |>
filter(!is.na(calories), !is.na(total_fat)) |>
ggplot(aes(x = total_fat, y = calories, color = category)) +
geom_point(size = 2.5, alpha = 0.75) +
scale_color_brewer(palette = "Set1") +
theme_foundation() +
labs(
title = "Calories vs. Total Fat in McDonald's Menu Items",
subtitle = "Each point represents one menu item, colored by menu category",
x = "Total Fat (g)",
y = "Calories",
color = "Menu Category",
caption = "Source: McDonald's USA Nutritional Facts"
)
mcdonalds1 <- mcdonalds |>
filter(!is.na(calories))|>
filter(!is.na(total_fat))|>
filter(!is.na(saturated_fat))|>
filter(!is.na(trans_fat))|>
filter(!is.na(sodium))|>
filter(!is.na(carbohydrates))|>
filter(!is.na(protein))|>
filter(!is.na(cholesterol))|>
filter(!is.na(dietary_fiber))|>
select(calories, total_fat, saturated_fat,trans_fat, sugars, sodium,cholesterol, carbohydrates, dietary_fiber, protein)
head(mcdonalds1)
## # A tibble: 6 × 10
## calories total_fat saturated_fat trans_fat sugars sodium cholesterol
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 300 13 5 0 3 750 260
## 2 370 23 8 0 2 780 45
## 3 450 28 10 0 2 860 285
## 4 400 23 8 0 2 880 50
## 5 430 23 9 1 3 960 300
## 6 460 26 13 0 3 1300 250
## # ℹ 3 more variables: carbohydrates <dbl>, dietary_fiber <dbl>, protein <dbl>
library(DataExplorer)
plot_correlation(mcdonalds1)
multiple_model <- lm(calories ~ total_fat + carbohydrates + protein + dietary_fiber + sodium + sugars + cholesterol,
data = mcdonalds1)
summary(multiple_model)
##
## Call:
## lm(formula = calories ~ total_fat + carbohydrates + protein +
## dietary_fiber + sodium + sugars + cholesterol, data = mcdonalds1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.230 -4.097 0.218 3.150 192.292
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.6033480 1.8168448 -0.882 0.378
## total_fat 8.5798042 0.1483025 57.853 <2e-16 ***
## carbohydrates 4.1830134 0.1228323 34.055 <2e-16 ***
## protein 4.2604630 0.1825505 23.339 <2e-16 ***
## dietary_fiber -0.4348191 0.9015951 -0.482 0.630
## sodium -0.0008306 0.0057083 -0.146 0.884
## sugars -0.1719959 0.1273483 -1.351 0.178
## cholesterol 0.0087130 0.0135902 0.641 0.522
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.72 on 245 degrees of freedom
## Multiple R-squared: 0.9969, Adjusted R-squared: 0.9968
## F-statistic: 1.125e+04 on 7 and 245 DF, p-value: < 2.2e-16
plot(multiple_model)
References:
Image: https://time.com/4084668/mcdonalds-rebranding-sales-growth/