Source: Time Magazine

Introduction You MUST include the source for your data in your introduction. Be sure you are not describing the data repository for where you got the data. The data source is the individual or organization that collected the data.

Define variables you will use in your project from the dataset and the types of questions you would like to explore about your dataset.

Attempt to discover HOW the data was collected – describe the methodology, or state clearly that there is no ReadMe (or something similar) file with that information.

Explain why you chose this topic and dataset – what meaning does it have for you?

Coding

# load the libraries
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
library(readr)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.5.2
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.5.2
library(highcharter)
## Warning: package 'highcharter' was built under R version 4.5.2
library(RColorBrewer)
# set working directory
menu <- read_csv("menu2_.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 266 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): Category, Item, Serving Size, Calories
## dbl (20): Calories from Fat, Total Fat, Total Fat (% Daily Value), Saturated...
## lgl  (1): Observ
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(menu)
## # A tibble: 6 × 25
##   Category  Item         `Serving Size` Calories `Calories from Fat` `Total Fat`
##   <chr>     <chr>        <chr>          <chr>                  <dbl>       <dbl>
## 1 Breakfast Egg McMuffin 4.8 oz (136 g) 300cal.                  120          13
## 2 Breakfast Egg White D… 4.8 oz (135 g) 250                       70           8
## 3 Breakfast Sausage McM… 3.9 oz (111 g) 370                      200          23
## 4 Breakfast Sausage McM… 5.7 oz (161 g) 450                      250          28
## 5 Breakfast Sausage McM… 5.7 oz (161 g) 400                      210          23
## 6 Breakfast Steak & Egg… 6.5 oz (185 g) 430                      210          23
## # ℹ 19 more variables: `Total Fat (% Daily Value)` <dbl>,
## #   `Saturated Fat` <dbl>, `Saturated Fat (% Daily Value)` <dbl>,
## #   `Trans Fat` <dbl>, Cholesterol <dbl>, `Cholesterol (% Daily Value)` <dbl>,
## #   Sodium <dbl>, `Sodium (% Daily Value)` <dbl>, Carbohydrates <dbl>,
## #   `Carbohydrates (% Daily Value)` <dbl>, `Dietary Fiber` <dbl>,
## #   `Dietary Fiber (% Daily Value)` <dbl>, Sugars <dbl>, Protein <dbl>,
## #   `Vitamin A (% Daily Value)` <dbl>, `Vitamin C (% Daily Value)` <dbl>, …
# cleaning
names(menu) <- tolower(names(menu))
names(menu) <- gsub(" ","_",names(menu))
names(menu) <- gsub("[(). //-]", "_", names(menu))
mcdonalds <- menu|>
  select(-observ)
head(mcdonalds)
## # A tibble: 6 × 24
##   category  item               serving_size calories calories_from_fat total_fat
##   <chr>     <chr>              <chr>        <chr>                <dbl>     <dbl>
## 1 Breakfast Egg McMuffin       4.8 oz (136… 300cal.                120        13
## 2 Breakfast Egg White Delight  4.8 oz (135… 250                     70         8
## 3 Breakfast Sausage McMuffin   3.9 oz (111… 370                    200        23
## 4 Breakfast Sausage McMuffin … 5.7 oz (161… 450                    250        28
## 5 Breakfast Sausage McMuffin … 5.7 oz (161… 400                    210        23
## 6 Breakfast Steak & Egg McMuf… 6.5 oz (185… 430                    210        23
## # ℹ 18 more variables: `total_fat__%_daily_value_` <dbl>, saturated_fat <dbl>,
## #   `saturated_fat__%_daily_value_` <dbl>, trans_fat <dbl>, cholesterol <dbl>,
## #   `cholesterol__%_daily_value_` <dbl>, sodium <dbl>,
## #   `sodium__%_daily_value_` <dbl>, carbohydrates <dbl>,
## #   `carbohydrates__%_daily_value_` <dbl>, dietary_fiber <dbl>,
## #   `dietary_fiber__%_daily_value_` <dbl>, sugars <dbl>, protein <dbl>,
## #   `vitamin_a__%_daily_value_` <dbl>, `vitamin_c__%_daily_value_` <dbl>, …
mcdonalds$calories <- gsub("cal.", "", mcdonalds$calories)
mcdonalds$calories <- gsub("cal", "", mcdonalds$calories)
mcdonalds$calories <- gsub("CAL", "", mcdonalds$calories)
head(mcdonalds)
## # A tibble: 6 × 24
##   category  item               serving_size calories calories_from_fat total_fat
##   <chr>     <chr>              <chr>        <chr>                <dbl>     <dbl>
## 1 Breakfast Egg McMuffin       4.8 oz (136… 300                    120        13
## 2 Breakfast Egg White Delight  4.8 oz (135… 250                     70         8
## 3 Breakfast Sausage McMuffin   3.9 oz (111… 370                    200        23
## 4 Breakfast Sausage McMuffin … 5.7 oz (161… 450                    250        28
## 5 Breakfast Sausage McMuffin … 5.7 oz (161… 400                    210        23
## 6 Breakfast Steak & Egg McMuf… 6.5 oz (185… 430                    210        23
## # ℹ 18 more variables: `total_fat__%_daily_value_` <dbl>, saturated_fat <dbl>,
## #   `saturated_fat__%_daily_value_` <dbl>, trans_fat <dbl>, cholesterol <dbl>,
## #   `cholesterol__%_daily_value_` <dbl>, sodium <dbl>,
## #   `sodium__%_daily_value_` <dbl>, carbohydrates <dbl>,
## #   `carbohydrates__%_daily_value_` <dbl>, dietary_fiber <dbl>,
## #   `dietary_fiber__%_daily_value_` <dbl>, sugars <dbl>, protein <dbl>,
## #   `vitamin_a__%_daily_value_` <dbl>, `vitamin_c__%_daily_value_` <dbl>, …
mcdonalds$calories<- as.numeric(mcdonalds$calories)
head(mcdonalds)
## # A tibble: 6 × 24
##   category  item               serving_size calories calories_from_fat total_fat
##   <chr>     <chr>              <chr>           <dbl>             <dbl>     <dbl>
## 1 Breakfast Egg McMuffin       4.8 oz (136…      300               120        13
## 2 Breakfast Egg White Delight  4.8 oz (135…      250                70         8
## 3 Breakfast Sausage McMuffin   3.9 oz (111…      370               200        23
## 4 Breakfast Sausage McMuffin … 5.7 oz (161…      450               250        28
## 5 Breakfast Sausage McMuffin … 5.7 oz (161…      400               210        23
## 6 Breakfast Steak & Egg McMuf… 6.5 oz (185…      430               210        23
## # ℹ 18 more variables: `total_fat__%_daily_value_` <dbl>, saturated_fat <dbl>,
## #   `saturated_fat__%_daily_value_` <dbl>, trans_fat <dbl>, cholesterol <dbl>,
## #   `cholesterol__%_daily_value_` <dbl>, sodium <dbl>,
## #   `sodium__%_daily_value_` <dbl>, carbohydrates <dbl>,
## #   `carbohydrates__%_daily_value_` <dbl>, dietary_fiber <dbl>,
## #   `dietary_fiber__%_daily_value_` <dbl>, sugars <dbl>, protein <dbl>,
## #   `vitamin_a__%_daily_value_` <dbl>, `vitamin_c__%_daily_value_` <dbl>, …
colSums(is.na(mcdonalds))
##                      category                          item 
##                             0                             0 
##                  serving_size                      calories 
##                             0                             3 
##             calories_from_fat                     total_fat 
##                             1                             2 
##     total_fat__%_daily_value_                 saturated_fat 
##                             2                             2 
## saturated_fat__%_daily_value_                     trans_fat 
##                             1                             2 
##                   cholesterol   cholesterol__%_daily_value_ 
##                             2                             1 
##                        sodium        sodium__%_daily_value_ 
##                             2                             1 
##                 carbohydrates carbohydrates__%_daily_value_ 
##                             3                             1 
##                 dietary_fiber dietary_fiber__%_daily_value_ 
##                             2                             0 
##                        sugars                       protein 
##                             0                             1 
##     vitamin_a__%_daily_value_     vitamin_c__%_daily_value_ 
##                             1                             2 
##       calcium__%_daily_value_          iron__%_daily_value_ 
##                             1                             1
avg_calories <- mcdonalds |>
  filter(!is.na(calories)) |>
  group_by(category) |>
  summarize(avg_cal = round(mean(calories, na.rm = TRUE), 1)) |>
  arrange(desc(avg_cal))

hchart(avg_calories, "bar", hcaes(x = category, y = avg_cal)) |>
  hc_title(text = "Average Calories by McDonald's Menu Category") |>
  hc_xAxis(title = list(text = "Menu Category")) |>
  hc_yAxis(title = list(text = "Average Calories")) |>
  hc_tooltip(pointFormat = "Avg Calories: <b>{point.y}</b>") |>
  hc_colors("#c8102e") |>
  hc_caption(text = "Source: McDonald's USA Nutritional Facts") |>
  hc_add_theme(hc_theme_flat())
mcdonalds |>
  filter(!is.na(calories), !is.na(total_fat)) |>
  ggplot(aes(x = total_fat, y = calories, color = category)) +
  geom_point(size = 2.5, alpha = 0.75) +
  scale_color_brewer(palette = "Set1") +
  theme_foundation() +
  labs(
    title = "Calories vs. Total Fat in McDonald's Menu Items",
    subtitle = "Each point represents one menu item, colored by menu category",
    x = "Total Fat (g)",
    y = "Calories",
    color = "Menu Category",
    caption = "Source: McDonald's USA Nutritional Facts"
  )

mcdonalds1 <- mcdonalds |>
  filter(!is.na(calories))|>
 filter(!is.na(total_fat))|>
  filter(!is.na(saturated_fat))|>
  filter(!is.na(trans_fat))|>
  filter(!is.na(sodium))|>
  filter(!is.na(carbohydrates))|>
  filter(!is.na(protein))|>
  filter(!is.na(cholesterol))|>
  filter(!is.na(dietary_fiber))|>
  select(calories, total_fat, saturated_fat,trans_fat, sugars, sodium,cholesterol, carbohydrates, dietary_fiber, protein)
head(mcdonalds1)
## # A tibble: 6 × 10
##   calories total_fat saturated_fat trans_fat sugars sodium cholesterol
##      <dbl>     <dbl>         <dbl>     <dbl>  <dbl>  <dbl>       <dbl>
## 1      300        13             5         0      3    750         260
## 2      370        23             8         0      2    780          45
## 3      450        28            10         0      2    860         285
## 4      400        23             8         0      2    880          50
## 5      430        23             9         1      3    960         300
## 6      460        26            13         0      3   1300         250
## # ℹ 3 more variables: carbohydrates <dbl>, dietary_fiber <dbl>, protein <dbl>
library(DataExplorer)
plot_correlation(mcdonalds1)

multiple_model <- lm(calories ~ total_fat + carbohydrates + protein + dietary_fiber + sodium + sugars + cholesterol, 
                                data = mcdonalds1)

summary(multiple_model)
## 
## Call:
## lm(formula = calories ~ total_fat + carbohydrates + protein + 
##     dietary_fiber + sodium + sugars + cholesterol, data = mcdonalds1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.230  -4.097   0.218   3.150 192.292 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.6033480  1.8168448  -0.882    0.378    
## total_fat      8.5798042  0.1483025  57.853   <2e-16 ***
## carbohydrates  4.1830134  0.1228323  34.055   <2e-16 ***
## protein        4.2604630  0.1825505  23.339   <2e-16 ***
## dietary_fiber -0.4348191  0.9015951  -0.482    0.630    
## sodium        -0.0008306  0.0057083  -0.146    0.884    
## sugars        -0.1719959  0.1273483  -1.351    0.178    
## cholesterol    0.0087130  0.0135902   0.641    0.522    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.72 on 245 degrees of freedom
## Multiple R-squared:  0.9969, Adjusted R-squared:  0.9968 
## F-statistic: 1.125e+04 on 7 and 245 DF,  p-value: < 2.2e-16
plot(multiple_model)

References:

Image: https://time.com/4084668/mcdonalds-rebranding-sales-growth/