knitr::opts_chunk$set(echo = TRUE)

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
data("fastfood", package='openintro')
head(fastfood)
## # A tibble: 6 × 17
##   restaur…¹ item  calor…² cal_fat total…³ sat_fat trans…⁴ chole…⁵ sodium total…⁶
##   <chr>     <chr>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>  <dbl>   <dbl>
## 1 Mcdonalds Arti…     380      60       7       2     0        95   1110      44
## 2 Mcdonalds Sing…     840     410      45      17     1.5     130   1580      62
## 3 Mcdonalds Doub…    1130     600      67      27     3       220   1920      63
## 4 Mcdonalds Gril…     750     280      31      10     0.5     155   1940      62
## 5 Mcdonalds Cris…     920     410      45      12     0.5     120   1980      81
## 6 Mcdonalds Big …     540     250      28      10     1        80    950      46
## # … with 7 more variables: fiber <dbl>, sugar <dbl>, protein <dbl>,
## #   vit_a <dbl>, vit_c <dbl>, calcium <dbl>, salad <chr>, and abbreviated
## #   variable names ¹​restaurant, ²​calories, ³​total_fat, ⁴​trans_fat,
## #   ⁵​cholesterol, ⁶​total_carb
mcdonalds <- fastfood %>%
  filter(restaurant == "Mcdonalds")
dairy_queen <- fastfood %>%
  filter(restaurant == "Dairy Queen")

ggplot(data = dairy_queen, aes(x = cal_fat)) +
  geom_bar()

ggplot(data = mcdonalds, aes(x = cal_fat)) +
  geom_bar()

E1

Dairy Queen and McDonalds both have a higher density of items with calories from fat below 500.

dqmean <- mean(dairy_queen$cal_fat)
dqsd   <- sd(dairy_queen$cal_fat)

ggplot(data = dairy_queen, aes(x = cal_fat)) +
        geom_blank() +
        geom_histogram(aes(y = ..density..)) +
        stat_function(fun = dnorm, args = c(mean = dqmean, sd = dqsd), col = "tomato")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

##E2 The distribution of calories doesn’t appear to follow a nearly normal distribution.

ggplot(data = dairy_queen, aes(sample = cal_fat)) + 
  geom_line(stat = "qq")

sim_norm <- rnorm(n = nrow(dairy_queen), mean = dqmean, sd = dqsd)
qqnorm(sim_norm)
qqline(sim_norm)

qqnormsim(sample = cal_fat, data = dairy_queen)

qqnormsim(sample = calories, data = mcdonalds)

## E5 The calories appear to come from a normal distribution according to the above simulations

1 - pnorm(q = 600, mean = dqmean, sd = dqsd)
## [1] 0.01501523
dairy_queen %>% 
  filter(cal_fat > 600) %>%
  summarise(percent = n() / nrow(dairy_queen))
## # A tibble: 1 × 1
##   percent
##     <dbl>
## 1  0.0476
#chance of food item being greater than 1000mg of sodium
1 - pnorm(q = 1000, mean = dqmean, sd = dqsd)
## [1] 1.145813e-06
dairy_queen %>% 
  filter(sodium > 1000) %>%
  summarise(percent = n() / nrow(dairy_queen))
## # A tibble: 1 × 1
##   percent
##     <dbl>
## 1   0.524
#probability of food item having less than 50g of total carbs
1 - pnorm(q = 50, mean = dqmean, sd = dqsd)
## [1] 0.9106913
mcdonalds %>% 
  filter(total_carb < 50) %>%
  summarise(percent = n() / nrow(mcdonalds))
## # A tibble: 1 × 1
##   percent
##     <dbl>
## 1   0.561
qqnormsim(sample = sodium, data = mcdonalds)

qqnormsim(sample = sodium, data = dairy_queen)