#== Introduction
### Load Libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(dplyr)
#Load dataset
fastfood
## # A tibble: 515 × 17
## restaurant item calories cal_fat total_fat sat_fat trans_fat cholesterol
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Mcdonalds Artisan … 380 60 7 2 0 95
## 2 Mcdonalds Single B… 840 410 45 17 1.5 130
## 3 Mcdonalds Double B… 1130 600 67 27 3 220
## 4 Mcdonalds Grilled … 750 280 31 10 0.5 155
## 5 Mcdonalds Crispy B… 920 410 45 12 0.5 120
## 6 Mcdonalds Big Mac 540 250 28 10 1 80
## 7 Mcdonalds Cheesebu… 300 100 12 5 0.5 40
## 8 Mcdonalds Classic … 510 210 24 4 0 65
## 9 Mcdonalds Double C… 430 190 21 11 1 85
## 10 Mcdonalds Double Q… 770 400 45 21 2.5 175
## # ℹ 505 more rows
## # ℹ 9 more variables: sodium <dbl>, total_carb <dbl>, fiber <dbl>, sugar <dbl>,
## # protein <dbl>, vit_a <dbl>, vit_c <dbl>, calcium <dbl>, salad <chr>
#run the following:
Q0.1 <- fastfood %>%
select(restaurant, calories, item) %>%
group_by(restaurant) %>%
slice_max(calories, n=3) %>%
as.data.frame()
Q0.1
## restaurant calories item
## 1 Arbys 1030 Triple Decker Sandwich
## 2 Arbys 980 Ultimate BLT
## 3 Arbys 840 Pecan Chicken Salad Sandwich
## 4 Burger King 1550 American Brewhouse King
## 5 Burger King 1250 Rodeo King
## 6 Burger King 1220 Farmhouse King
## 7 Chick Fil-A 970 30 piece Chicken Nuggets
## 8 Chick Fil-A 860 Chicken Enchiladas Meal Kit
## 9 Chick Fil-A 720 Chicken Parmesan Meal Kit
## 10 Dairy Queen 1260 6 Piece Chicken Strip Basket w/ Country Gravy
## 11 Dairy Queen 1050 Large Cheese Curds
## 12 Dairy Queen 1030 4 Piece Chicken Strip Basket w/ Country Gravy
## 13 Mcdonalds 2430 20 piece Buttermilk Crispy Chicken Tenders
## 14 Mcdonalds 1770 40 piece Chicken McNuggets
## 15 Mcdonalds 1600 10 piece Sweet N' Spicy Honey BBQ Glazed Tenders
## 16 Sonic 1350 Garlic Parmesan Dunked Ultimate Chicken Sandwich
## 17 Sonic 1280 Super Sonic Bacon Double Cheeseburger (w/mayo)
## 18 Sonic 1220 Super Sonic Double Cheeseburger W/ Mayo
## 19 Subway 1160 Footlong Big Hot Pastrami
## 20 Subway 1140 Footlong Carved Turkey & Bacon w/ Cheese
## 21 Subway 1140 Footlong Chicken & Bacon Ranch Melt
## 22 Taco Bell 880 XXL Grilled Stuft Burrito - Beef
## 23 Taco Bell 830 XXL Grilled Stuft Burrito - Chicken
## 24 Taco Bell 820 XXL Grilled Stuft Burrito - Steak
#Introduction run the following:
Q0.2 <- fastfood %>%
select(restaurant, calories) %>%
filter(calories>1000) %>%
group_by(restaurant) %>%
summarise(item_count = n()) %>%
as.data.frame()
Q0.2
## restaurant item_count
## 1 Arbys 1
## 2 Burger King 6
## 3 Dairy Queen 3
## 4 Mcdonalds 6
## 5 Sonic 8
## 6 Subway 4
Question1. ### Q1 Looking only at Burger King and Chick-Fil-A, which item has the highest calories?
# Q1
Q1 <- fastfood %>%
filter(restaurant %in% c("Burger King", "Chick-Fil-A")) %>%
group_by(restaurant) %>%
slice_max(calories) %>%
as.data.frame()
Q1
## restaurant item calories cal_fat total_fat sat_fat
## 1 Burger King American Brewhouse King 1550 1134 126 47
## trans_fat cholesterol sodium total_carb fiber sugar protein vit_a vit_c
## 1 8 805 1820 21 3 7 134 NA NA
## calcium salad
## 1 NA Other
Question2. ### Q2 What is the average sugar amount for all items from Subway?
# Q2
Q2 <- fastfood %>%
filter(restaurant == "Subway") %>%
summarise(average_sugar = mean(sugar, na.rm = TRUE)) %>%
as.data.frame()
Q2
## average_sugar
## 1 10.09375
Question3. ### Q3 What is the average value of calories for all items from Taco Bell?
# Q3
Q3 <- fastfood %>%
filter(restaurant == "Taco Bell") %>%
summarise(average_calories = mean(calories, na.rm = TRUE)) %>%
as.data.frame()
Q3
## average_calories
## 1 443.6522
Question4. ### Q4 Create a variable equal to total_fat x sugar called fatXsugar. Produce a dataframe that has the restaurant, item, and fatXsugar for the top 3 items, from highest to lowest.
# Q4
Q4 <- fastfood %>%
mutate(fatXsugar = total_fat * sugar) %>%
arrange(desc(fatXsugar)) %>%
group_by(restaurant) %>%
slice_head(n = 3) %>%
select(restaurant, item, fatXsugar) %>%
as.data.frame()
Q4
## restaurant item fatXsugar
## 1 Arbys Ultimate BLT 1045
## 2 Arbys Triple Decker Sandwich 969
## 3 Arbys Pecan Chicken Salad Sandwich 880
## 4 Burger King Chicken, Apple & Cranberry Salad w/ Crispy Chicken 1517
## 5 Burger King Farmhouse King 1200
## 6 Burger King Rodeo King 1148
## 7 Chick Fil-A Chicken Enchiladas Meal Kit 376
## 8 Chick Fil-A Chicken Salad Sandwich 228
## 9 Chick Fil-A Chicken Parmesan Meal Kit 217
## 10 Dairy Queen Large Cheese Curds 2250
## 11 Dairy Queen 1/2 lb. FlameThrower® GrillBurger 666
## 12 Dairy Queen 1/2 lb. GrillBurger with Cheese 663
## 13 Mcdonalds 10 piece Sweet N' Spicy Honey BBQ Glazed Tenders 5742
## 14 Mcdonalds 6 piece Sweet N' Spicy Honey BBQ Glazed Tenders 2080
## 15 Mcdonalds Double Bacon Smokehouse Burger 1206
## 16 Sonic Garlic Parmesan Dunked Ultimate Chicken Sandwich 1000
## 17 Sonic Super Sonic Double Cheeseburger W/ Ketchup 836
## 18 Sonic Ultimate Chicken Club 768
## 19 Subway Footlong Italian Hero 1044
## 20 Subway Footlong Chicken & Bacon Ranch Melt 896
## 21 Subway Footlong Big Hot Pastrami 868
## 22 Taco Bell Spicy Triple Double Crunchwrap 304
## 23 Taco Bell Fiesta Taco Salad-Beef 294
## 24 Taco Bell Fiesta Taco Salad-Steak 288
# Q5
Q5 <- fastfood %>%
group_by(restaurant) %>%
summarise(average_saturated_fat = mean(sat_fat, na.rm = TRUE)) %>%
filter(average_saturated_fat > 10) %>%
summarise(restaurant_count = n()) %>%
as.data.frame()
Q5
## restaurant_count
## 1 3
—–NEXT SECTION: ### Allowable packages
tidyverse,
openintro (you will need to install it if you haven’t
already), and lm.betaThe data set for this assignment is called
fastfood. See the fastfood
documentation for more info
library(tidyverse)
library(openintro)
library(lm.beta)
Question6. ### Q6 , Create a correlation matrix for the relations
between calories, total_fat, sugar, and calcium for all items at Sonic,
Subway, and Taco Bell, omitting missing values with
na.omit().
# Q6
Q1 <- fastfood %>%
filter(restaurant %in% c("Sonic", "Subway", "Taco Bell")) %>%
select(calories, total_fat, sugar, calcium) %>%
na.omit() %>%
cor() %>%
as.data.frame()
Q1
## calories total_fat sugar calcium
## calories 1.0000000 0.8117648 0.4454451 0.6127083
## total_fat 0.8117648 1.0000000 0.1018453 0.2415309
## sugar 0.4454451 0.1018453 1.0000000 0.6690489
## calcium 0.6127083 0.2415309 0.6690489 1.0000000
Question7. ### Q7 Create a regression predicting whether or not a restaurant is McDonalds or Subway based on calories, sodium, and protein. (McDonalds should be 1, Subway 0) Hint: make sure you know how McDonalds is spelled in the dataset.
# Q7
model <- lm(restaurant %in% c("McDonald's", "Subway") ~ calories + sodium + protein, data = fastfood)
Q7 <- model
Q7
##
## Call:
## lm(formula = restaurant %in% c("McDonald's", "Subway") ~ calories +
## sodium + protein, data = fastfood)
##
## Coefficients:
## (Intercept) calories sodium protein
## 2.097e-01 -5.234e-04 5.063e-05 6.855e-03
Question8. ### Q8 Run the same regression as in Q7 but remove sodium as a predictor. Which is the better model?
# Fit the model with sodium as a predictor (Q7)
model_with_sodium <- lm(restaurant %in% c("McDonald's", "Subway") ~ calories + sodium + protein, data = fastfood)
# Fit the model without sodium as a predictor
model_without_sodium <- lm(restaurant %in% c("McDonald's", "Subway") ~ calories + protein, data = fastfood)
# Calculate AIC for both models
AIC_with_sodium <- AIC(model_with_sodium, k = 2)
AIC_without_sodium <- AIC(model_without_sodium, k = 2)
# Determine the better model based on AIC
better_model <- ifelse(AIC_with_sodium < AIC_without_sodium, "with_sodium", "without_sodium")
# Assign the AIC of the better model to Q3
Q3 <- ifelse(better_model == "with_sodium", AIC_with_sodium, AIC_without_sodium)
Q3 <- as.data.frame(Q3)
Q3
## Q3
## 1 478.7177
# Q9
model_q9 <- lm(calories ~ sat_fat + fiber + sugar, data = fastfood)
# Identify the strongest predictor based on *standardized* coefficients
strongest_predictor <- names(which.max(abs(coef(model_q9))))
# Access the *unstandardized* regression coefficient of the strongest predictor
Q9 <- coef(model_q9)[strongest_predictor]
Q9 <- as.data.frame(Q9)
Q9
## Q9
## (Intercept) 113.3344
filtered_data <- fastfood %>%
group_by(restaurant) %>%
filter(n() >= 50 & n() <= 60) %>%
ungroup()
model_q10 <- lm(total_fat ~ cholesterol + total_carb + vit_a + restaurant, data = filtered_data)
# Identify and print the strongest standardized regression coefficient
strongest_predictor_q10 <- names(which.max(abs(coef(model_q10, standardize = TRUE))))
Q10 <- coef(model_q10, standardize = TRUE)[strongest_predictor_q10]
Q10 <- as.data.frame(Q10)
Q10
## Q10
## restaurantSonic 6.433533