fastfood <- read.csv("fastfood.csv")
str(fastfood)
## 'data.frame':    515 obs. of  17 variables:
##  $ restaurant : chr  "Mcdonalds" "Mcdonalds" "Mcdonalds" "Mcdonalds" ...
##  $ item       : chr  "Artisan Grilled Chicken Sandwich" "Single Bacon Smokehouse Burger" "Double Bacon Smokehouse Burger" "Grilled Bacon Smokehouse Chicken Sandwich" ...
##  $ calories   : int  380 840 1130 750 920 540 300 510 430 770 ...
##  $ cal_fat    : int  60 410 600 280 410 250 100 210 190 400 ...
##  $ total_fat  : int  7 45 67 31 45 28 12 24 21 45 ...
##  $ sat_fat    : num  2 17 27 10 12 10 5 4 11 21 ...
##  $ trans_fat  : num  0 1.5 3 0.5 0.5 1 0.5 0 1 2.5 ...
##  $ cholesterol: int  95 130 220 155 120 80 40 65 85 175 ...
##  $ sodium     : int  1110 1580 1920 1940 1980 950 680 1040 1040 1290 ...
##  $ total_carb : int  44 62 63 62 81 46 33 49 35 42 ...
##  $ fiber      : int  3 2 3 2 4 3 2 3 2 3 ...
##  $ sugar      : int  11 18 18 18 18 9 7 6 7 10 ...
##  $ protein    : int  37 46 70 55 46 25 15 25 25 51 ...
##  $ vit_a      : int  4 6 10 6 6 10 10 0 20 20 ...
##  $ vit_c      : int  20 20 20 25 20 2 2 4 4 6 ...
##  $ calcium    : int  20 20 50 20 20 15 10 2 15 20 ...
##  $ salad      : chr  "Other" "Other" "Other" "Other" ...

Question 1. Use 2 different functions to determine the mean, median, and sd of calories

mean_calories_base <- mean(fastfood$calories, na.rm = TRUE)
median_calories_base <- median(fastfood$calories, na.rm = TRUE)
sd_calories_base <- sd(fastfood$calories, na.rm = TRUE)

summary_stats <- fastfood %>%
  summarise(
    mean_calories = mean(calories, na.rm = TRUE),
    median_calories = median(calories, na.rm = TRUE),
    sd_calories = sd(calories, na.rm = TRUE)
  )
print(summary_stats)
##   mean_calories median_calories sd_calories
## 1      530.9126             490    282.4361

Question 2. Calculate the z score for calories in a Sonic corn dog

sonic_corn_dog_calories <- 210
mean_calories <- mean(fastfood$calories, na.rm = TRUE)
sd_calories <- sd(fastfood$calories, na.rm = TRUE)
z_score <- (sonic_corn_dog_calories - mean_calories) / sd_calories
print(paste("Mean calories:", mean_calories))
## [1] "Mean calories: 530.912621359223"
print(paste("Standard deviation of calories:", sd_calories))
## [1] "Standard deviation of calories: 282.436147075798"
print(paste("Z-score for Sonic corn dog (calories = 210):", z_score))
## [1] "Z-score for Sonic corn dog (calories = 210): -1.1362307009276"

Question 3. Create a new data object

fastfood_with_zscore <- fastfood %>%
  mutate(z_score = (calories - mean_calories) / sd_calories) %>%
  select(restaurant, item, calories, z_score)
print(head(fastfood_with_zscore))
##   restaurant                                      item calories     z_score
## 1  Mcdonalds          Artisan Grilled Chicken Sandwich      380 -0.53432474
## 2  Mcdonalds            Single Bacon Smokehouse Burger      840  1.09436197
## 3  Mcdonalds            Double Bacon Smokehouse Burger     1130  2.12114272
## 4  Mcdonalds Grilled Bacon Smokehouse Chicken Sandwich      750  0.77570588
## 5  Mcdonalds  Crispy Bacon Smokehouse Chicken Sandwich      920  1.37761183
## 6  Mcdonalds                                   Big Mac      540  0.03217498

Question 4 Create a density plot

density_plot <- ggplot(fastfood_with_zscore, aes(x = z_score)) +
  geom_density(fill = "skyblue", alpha = 0.5) +  # Density plot with fill color
  geom_vline(aes(xintercept = 0), color = "red", linetype = "dashed") +  # Mean line
  geom_vline(aes(xintercept = 1), color = "green", linetype = "dotted") +  # 1 SD above
  geom_vline(aes(xintercept = -1), color = "green", linetype = "dotted") +  # 1 SD below
  geom_vline(aes(xintercept = 2), color = "blue", linetype = "dotted") +  # 2 SD above
  geom_vline(aes(xintercept = -2), color = "blue", linetype = "dotted") +  # 2 SD below
  labs(title = "Density Plot of Standardized Scores of Calories",
       x = "Z-Score",
       y = "Density") +
  theme_minimal()  # Use a minimal theme for better aesthetics
print(density_plot)

Question 5 How many items fall within 1 SD of standardized calories

within_1_sd <- fastfood_with_zscore %>%
  filter(z_score >= -1 & z_score <= 1)
num_within_1_sd <- nrow(within_1_sd)
total_items <- nrow(fastfood_with_zscore)
percentage_within_1_sd <- (num_within_1_sd / total_items) * 100
cat("Number of items within 1 SD of standardized calories:", num_within_1_sd, "\n")
## Number of items within 1 SD of standardized calories: 381
cat("Percentage of items within 1 SD of standardized calories:", percentage_within_1_sd, "%\n")
## Percentage of items within 1 SD of standardized calories: 73.98058 %