#== Introduction

### Load Libraries
library(tidyverse) 
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(dplyr)

#Load dataset

fastfood
## # A tibble: 515 × 17
##    restaurant item      calories cal_fat total_fat sat_fat trans_fat cholesterol
##    <chr>      <chr>        <dbl>   <dbl>     <dbl>   <dbl>     <dbl>       <dbl>
##  1 Mcdonalds  Artisan …      380      60         7       2       0            95
##  2 Mcdonalds  Single B…      840     410        45      17       1.5         130
##  3 Mcdonalds  Double B…     1130     600        67      27       3           220
##  4 Mcdonalds  Grilled …      750     280        31      10       0.5         155
##  5 Mcdonalds  Crispy B…      920     410        45      12       0.5         120
##  6 Mcdonalds  Big Mac        540     250        28      10       1            80
##  7 Mcdonalds  Cheesebu…      300     100        12       5       0.5          40
##  8 Mcdonalds  Classic …      510     210        24       4       0            65
##  9 Mcdonalds  Double C…      430     190        21      11       1            85
## 10 Mcdonalds  Double Q…      770     400        45      21       2.5         175
## # ℹ 505 more rows
## # ℹ 9 more variables: sodium <dbl>, total_carb <dbl>, fiber <dbl>, sugar <dbl>,
## #   protein <dbl>, vit_a <dbl>, vit_c <dbl>, calcium <dbl>, salad <chr>

#run the following:

Q0.1 <- fastfood %>%
  select(restaurant, calories, item) %>%
  group_by(restaurant) %>% 
  slice_max(calories, n=3) %>%
  as.data.frame()
Q0.1
##     restaurant calories                                             item
## 1        Arbys     1030                           Triple Decker Sandwich
## 2        Arbys      980                                     Ultimate BLT
## 3        Arbys      840                     Pecan Chicken Salad Sandwich
## 4  Burger King     1550                          American Brewhouse King
## 5  Burger King     1250                                       Rodeo King
## 6  Burger King     1220                                   Farmhouse King
## 7  Chick Fil-A      970                         30 piece Chicken Nuggets
## 8  Chick Fil-A      860                      Chicken Enchiladas Meal Kit
## 9  Chick Fil-A      720                        Chicken Parmesan Meal Kit
## 10 Dairy Queen     1260    6 Piece Chicken Strip Basket w/ Country Gravy
## 11 Dairy Queen     1050                               Large Cheese Curds
## 12 Dairy Queen     1030    4 Piece Chicken Strip Basket w/ Country Gravy
## 13   Mcdonalds     2430       20 piece Buttermilk Crispy Chicken Tenders
## 14   Mcdonalds     1770                       40 piece Chicken McNuggets
## 15   Mcdonalds     1600 10 piece Sweet N' Spicy Honey BBQ Glazed Tenders
## 16       Sonic     1350 Garlic Parmesan Dunked Ultimate Chicken Sandwich
## 17       Sonic     1280   Super Sonic Bacon Double Cheeseburger (w/mayo)
## 18       Sonic     1220          Super Sonic Double Cheeseburger W/ Mayo
## 19      Subway     1160                        Footlong Big Hot Pastrami
## 20      Subway     1140         Footlong Carved Turkey & Bacon w/ Cheese
## 21      Subway     1140              Footlong Chicken & Bacon Ranch Melt
## 22   Taco Bell      880                 XXL Grilled Stuft Burrito - Beef
## 23   Taco Bell      830              XXL Grilled Stuft Burrito - Chicken
## 24   Taco Bell      820                XXL Grilled Stuft Burrito - Steak

#Introduction run the following:

Q0.2 <- fastfood %>%   
  select(restaurant, calories) %>%   
  filter(calories>1000) %>%    
  group_by(restaurant) %>%    
  summarise(item_count = n()) %>%    
  as.data.frame() 
Q0.2
##    restaurant item_count
## 1       Arbys          1
## 2 Burger King          6
## 3 Dairy Queen          3
## 4   Mcdonalds          6
## 5       Sonic          8
## 6      Subway          4

Question1. ### Q1 Looking only at Burger King and Chick-Fil-A, which item has the highest calories?

# Q1
Q1 <- fastfood %>%
  filter(restaurant %in% c("Burger King", "Chick-Fil-A")) %>%
  group_by(restaurant) %>%
  slice_max(calories) %>%
  as.data.frame()
Q1
##    restaurant                    item calories cal_fat total_fat sat_fat
## 1 Burger King American Brewhouse King     1550    1134       126      47
##   trans_fat cholesterol sodium total_carb fiber sugar protein vit_a vit_c
## 1         8         805   1820         21     3     7     134    NA    NA
##   calcium salad
## 1      NA Other

Question2. ### Q2 What is the average sugar amount for all items from Subway?

# Q2
Q2 <- fastfood %>%
  filter(restaurant == "Subway") %>%
  summarise(average_sugar = mean(sugar, na.rm = TRUE)) %>%
  as.data.frame()
Q2
##   average_sugar
## 1      10.09375

Question3. ### Q3 What is the average value of calories for all items from Taco Bell?

# Q3
Q3 <- fastfood %>%
  filter(restaurant == "Taco Bell") %>%
  summarise(average_calories = mean(calories, na.rm = TRUE)) %>%
  as.data.frame()
Q3
##   average_calories
## 1         443.6522

Question4. ### Q4 Create a variable equal to total_fat x sugar called fatXsugar. Produce a dataframe that has the restaurant, item, and fatXsugar for the top 3 items, from highest to lowest.

# Q4
Q4 <- fastfood %>%
  mutate(fatXsugar = total_fat * sugar) %>%
  arrange(desc(fatXsugar)) %>%
  group_by(restaurant) %>%
  slice_head(n = 3) %>%
  select(restaurant, item, fatXsugar) %>%
  as.data.frame()
Q4
##     restaurant                                               item fatXsugar
## 1        Arbys                                       Ultimate BLT      1045
## 2        Arbys                             Triple Decker Sandwich       969
## 3        Arbys                       Pecan Chicken Salad Sandwich       880
## 4  Burger King Chicken, Apple & Cranberry Salad w/ Crispy Chicken      1517
## 5  Burger King                                     Farmhouse King      1200
## 6  Burger King                                         Rodeo King      1148
## 7  Chick Fil-A                        Chicken Enchiladas Meal Kit       376
## 8  Chick Fil-A                             Chicken Salad Sandwich       228
## 9  Chick Fil-A                          Chicken Parmesan Meal Kit       217
## 10 Dairy Queen                                 Large Cheese Curds      2250
## 11 Dairy Queen                  1/2 lb. FlameThrower® GrillBurger       666
## 12 Dairy Queen                    1/2 lb. GrillBurger with Cheese       663
## 13   Mcdonalds   10 piece Sweet N' Spicy Honey BBQ Glazed Tenders      5742
## 14   Mcdonalds    6 piece Sweet N' Spicy Honey BBQ Glazed Tenders      2080
## 15   Mcdonalds                     Double Bacon Smokehouse Burger      1206
## 16       Sonic   Garlic Parmesan Dunked Ultimate Chicken Sandwich      1000
## 17       Sonic         Super Sonic Double Cheeseburger W/ Ketchup       836
## 18       Sonic                              Ultimate Chicken Club       768
## 19      Subway                              Footlong Italian Hero      1044
## 20      Subway                Footlong Chicken & Bacon Ranch Melt       896
## 21      Subway                          Footlong Big Hot Pastrami       868
## 22   Taco Bell                     Spicy Triple Double Crunchwrap       304
## 23   Taco Bell                             Fiesta Taco Salad-Beef       294
## 24   Taco Bell                            Fiesta Taco Salad-Steak       288

Q5 How many restaurants have an average saturated fat over 10?

# Q5
Q5 <- fastfood %>%
  group_by(restaurant) %>%
  summarise(average_saturated_fat = mean(sat_fat, na.rm = TRUE)) %>%
  filter(average_saturated_fat > 10) %>%
  summarise(restaurant_count = n()) %>%
  as.data.frame()
Q5
##   restaurant_count
## 1                3

—–NEXT SECTION: ### Allowable packages

The only allowable packages are tidyverse, openintro (you will need to install it if you haven’t already), and lm.beta

Data Set

The data set for this assignment is called fastfood. See the fastfood documentation for more info

Continued -

Load Libraries

library(tidyverse) 
library(openintro) 
library(lm.beta)

Question6. ### Q6 , Create a correlation matrix for the relations between calories, total_fat, sugar, and calcium for all items at Sonic, Subway, and Taco Bell, omitting missing values with na.omit().

# Q6
Q1 <- fastfood %>%
  filter(restaurant %in% c("Sonic", "Subway", "Taco Bell")) %>%
  select(calories, total_fat, sugar, calcium) %>%
  na.omit() %>%
  cor() %>%
  as.data.frame()
Q1
##            calories total_fat     sugar   calcium
## calories  1.0000000 0.8117648 0.4454451 0.6127083
## total_fat 0.8117648 1.0000000 0.1018453 0.2415309
## sugar     0.4454451 0.1018453 1.0000000 0.6690489
## calcium   0.6127083 0.2415309 0.6690489 1.0000000

Question7. ### Q7 Create a regression predicting whether or not a restaurant is McDonalds or Subway based on calories, sodium, and protein. (McDonalds should be 1, Subway 0) Hint: make sure you know how McDonalds is spelled in the dataset.

  • Assign the model coefficients to Q7.
# Q7
model <- lm(restaurant %in% c("McDonald's", "Subway") ~ calories + sodium + protein, data = fastfood)
Q7 <- model
Q7 
## 
## Call:
## lm(formula = restaurant %in% c("McDonald's", "Subway") ~ calories + 
##     sodium + protein, data = fastfood)
## 
## Coefficients:
## (Intercept)     calories       sodium      protein  
##   2.097e-01   -5.234e-04    5.063e-05    6.855e-03

Question8. ### Q8 Run the same regression as in Q7 but remove sodium as a predictor. Which is the better model?

  • Use the classical AIC (k=2).
  • Assign the AIC of the better model to Q3.
# Fit the model with sodium as a predictor (Q7)
model_with_sodium <- lm(restaurant %in% c("McDonald's", "Subway") ~ calories + sodium + protein, data = fastfood)

# Fit the model without sodium as a predictor
model_without_sodium <- lm(restaurant %in% c("McDonald's", "Subway") ~ calories + protein, data = fastfood)

# Calculate AIC for both models
AIC_with_sodium <- AIC(model_with_sodium, k = 2)
AIC_without_sodium <- AIC(model_without_sodium, k = 2)

# Determine the better model based on AIC
better_model <- ifelse(AIC_with_sodium < AIC_without_sodium, "with_sodium", "without_sodium")

# Assign the AIC of the better model to Q3
Q3 <- ifelse(better_model == "with_sodium", AIC_with_sodium, AIC_without_sodium)
Q3 <- as.data.frame(Q3)
Q3
##         Q3
## 1 478.7177

Q9 Run a regression predicting calories from saturated fat, fiber, and sugar. Based on standardized regression coefficients, identify the strongest predictor.

  • Assign the unstandardized regression coefficient of the strongest predictor to Q9.
  • (You can access the coefficients by indexing the model object.)
# Q9
model_q9 <- lm(calories ~ sat_fat + fiber + sugar, data = fastfood)

# Identify the strongest predictor based on *standardized* coefficients
strongest_predictor <- names(which.max(abs(coef(model_q9))))

# Access the *unstandardized* regression coefficient of the strongest predictor
Q9 <- coef(model_q9)[strongest_predictor]
Q9 <- as.data.frame(Q9)
Q9
##                   Q9
## (Intercept) 113.3344

Q10 For this question, use data from only restaurants with between 50 and 60 items in the data set. Predict total fat from cholesterol, total carbs, vitamin a, and restaurant. Remove any nonsignificant predictors and run again.

  • Assign the strongest standardized regression coefficient to Q10. Your output should look something like this:
filtered_data <- fastfood %>%
  group_by(restaurant) %>%
  filter(n() >= 50 & n() <= 60) %>%
  ungroup()

model_q10 <- lm(total_fat ~ cholesterol + total_carb + vit_a + restaurant, data = filtered_data)

# Identify and print the strongest standardized regression coefficient
strongest_predictor_q10 <- names(which.max(abs(coef(model_q10, standardize = TRUE))))
Q10 <- coef(model_q10, standardize = TRUE)[strongest_predictor_q10]
Q10 <- as.data.frame(Q10)
Q10
##                      Q10
## restaurantSonic 6.433533