knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
data("fastfood", package='openintro')
head(fastfood)
## # A tibble: 6 × 17
## restaur…¹ item calor…² cal_fat total…³ sat_fat trans…⁴ chole…⁵ sodium total…⁶
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Mcdonalds Arti… 380 60 7 2 0 95 1110 44
## 2 Mcdonalds Sing… 840 410 45 17 1.5 130 1580 62
## 3 Mcdonalds Doub… 1130 600 67 27 3 220 1920 63
## 4 Mcdonalds Gril… 750 280 31 10 0.5 155 1940 62
## 5 Mcdonalds Cris… 920 410 45 12 0.5 120 1980 81
## 6 Mcdonalds Big … 540 250 28 10 1 80 950 46
## # … with 7 more variables: fiber <dbl>, sugar <dbl>, protein <dbl>,
## # vit_a <dbl>, vit_c <dbl>, calcium <dbl>, salad <chr>, and abbreviated
## # variable names ¹restaurant, ²calories, ³total_fat, ⁴trans_fat,
## # ⁵cholesterol, ⁶total_carb
mcdonalds <- fastfood %>%
filter(restaurant == "Mcdonalds")
dairy_queen <- fastfood %>%
filter(restaurant == "Dairy Queen")
ggplot(data = dairy_queen, aes(x = cal_fat)) +
geom_bar()
ggplot(data = mcdonalds, aes(x = cal_fat)) +
geom_bar()
Dairy Queen and McDonalds both have a higher density of items with calories from fat below 500.
dqmean <- mean(dairy_queen$cal_fat)
dqsd <- sd(dairy_queen$cal_fat)
ggplot(data = dairy_queen, aes(x = cal_fat)) +
geom_blank() +
geom_histogram(aes(y = ..density..)) +
stat_function(fun = dnorm, args = c(mean = dqmean, sd = dqsd), col = "tomato")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##E2 The
distribution of calories doesn’t appear to follow a nearly normal
distribution.
ggplot(data = dairy_queen, aes(sample = cal_fat)) +
geom_line(stat = "qq")
sim_norm <- rnorm(n = nrow(dairy_queen), mean = dqmean, sd = dqsd)
qqnorm(sim_norm)
qqline(sim_norm)
qqnormsim(sample = cal_fat, data = dairy_queen)
qqnormsim(sample = calories, data = mcdonalds)
## E5 The
calories appear to come from a normal distribution according to the
above simulations
1 - pnorm(q = 600, mean = dqmean, sd = dqsd)
## [1] 0.01501523
dairy_queen %>%
filter(cal_fat > 600) %>%
summarise(percent = n() / nrow(dairy_queen))
## # A tibble: 1 × 1
## percent
## <dbl>
## 1 0.0476
#chance of food item being greater than 1000mg of sodium
1 - pnorm(q = 1000, mean = dqmean, sd = dqsd)
## [1] 1.145813e-06
dairy_queen %>%
filter(sodium > 1000) %>%
summarise(percent = n() / nrow(dairy_queen))
## # A tibble: 1 × 1
## percent
## <dbl>
## 1 0.524
#probability of food item having less than 50g of total carbs
1 - pnorm(q = 50, mean = dqmean, sd = dqsd)
## [1] 0.9106913
mcdonalds %>%
filter(total_carb < 50) %>%
summarise(percent = n() / nrow(mcdonalds))
## # A tibble: 1 × 1
## percent
## <dbl>
## 1 0.561
qqnormsim(sample = sodium, data = mcdonalds)
qqnormsim(sample = sodium, data = dairy_queen)