library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.5 v dplyr 1.0.3
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
head(fastfood)
## # A tibble: 6 x 17
## restaurant item calories cal_fat total_fat sat_fat trans_fat cholesterol
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Mcdonalds Arti~ 380 60 7 2 0 95
## 2 Mcdonalds Sing~ 840 410 45 17 1.5 130
## 3 Mcdonalds Doub~ 1130 600 67 27 3 220
## 4 Mcdonalds Gril~ 750 280 31 10 0.5 155
## 5 Mcdonalds Cris~ 920 410 45 12 0.5 120
## 6 Mcdonalds Big ~ 540 250 28 10 1 80
## # ... with 9 more variables: sodium <dbl>, total_carb <dbl>, fiber <dbl>,
## # sugar <dbl>, protein <dbl>, vit_a <dbl>, vit_c <dbl>, calcium <dbl>,
## # salad <chr>
mcdonalds <- fastfood %>%
filter(restaurant == "Mcdonalds")
dairy_queen <- fastfood %>%
filter(restaurant == "Dairy Queen")
ggplot(data = mcdonalds, aes(x = cal_fat )) +
ggtitle("Calories from Fat on the McDonalds Menu") +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = dairy_queen, aes(x = cal_fat )) +
ggtitle("Calories from Fat on the Dairy Queen Menu") +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
dqmean <- mean(dairy_queen$cal_fat)
dqsd <- sd(dairy_queen$cal_fat)
ggplot(data = dairy_queen, aes(x = cal_fat )) +
geom_blank() +
geom_histogram(aes(y = ..density..)) +
stat_function(fun = dnorm, args = c(mean = dqmean, sd = dqsd), col = "tomato")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
### Answer: Based on the graph above, it does appear that the Dairy Queen data follow an almost normal distribution. There is an identifiable bell-shape to the curve.
ggplot(data = dairy_queen, aes(sample = cal_fat)) +
ggtitle("Dairy Queen Menu Probability Plot") +
geom_line(stat = "qq")
sim_norm <- rnorm(n = nrow(dairy_queen), mean = dqmean, sd = dqsd)
ggplot(data = dairy_queen, aes(sample = sim_norm)) +
ggtitle("Dairy Queen Theoretical Probability Plot") +
geom_line(stat = "qq")
qqnormsim(sample = cal_fat, data = dairy_queen)
pnorm(q = 300, mean = dqmean, sd = dqsd)
## [1] 0.5997007
dairy_queen %>%
filter(cal_fat < 300) %>%
summarize(percent = n() / nrow(dairy_queen))
## # A tibble: 1 x 1
## percent
## <dbl>
## 1 0.667
mdmean <- mean(mcdonalds$cal_fat)
mdsd <- sd(mcdonalds$cal_fat)
1 - pnorm(q = 400, mean = mdmean, sd = mdsd )
## [1] 0.3022921
mcdonalds %>%
filter(cal_fat > 400) %>%
summarise(percent = n() / nrow(mcdonalds))
## # A tibble: 1 x 1
## percent
## <dbl>
## 1 0.158
unique(fastfood$restaurant)
## [1] "Mcdonalds" "Chick Fil-A" "Sonic" "Arbys" "Burger King"
## [6] "Dairy Queen" "Subway" "Taco Bell"
ggplot(data = mcdonalds, aes(sample = sodium)) +
ggtitle("McDonalds") +
geom_line(stat = "qq")
chick_fil_a <- fastfood %>%
filter(restaurant == "Chick Fil-A")
ggplot(data = chick_fil_a, aes(sample = sodium)) +
ggtitle("Chick Fil-A") +
geom_line(stat = "qq")
sonic <- fastfood %>%
filter(restaurant == "Sonic")
ggplot(data = sonic, aes(sample = sodium)) +
ggtitle("Sonic") +
geom_line(stat = "qq")
arbys <- fastfood %>%
filter(restaurant == "Arbys")
ggplot(data = arbys, aes(sample = sodium)) +
ggtitle("Arbys") +
geom_line(stat = "qq")
burger_king <- fastfood %>%
filter(restaurant == "Burger King")
ggplot(data = burger_king, aes(sample = sodium)) +
ggtitle("Burger King") +
geom_line(stat = "qq")
ggplot(data = dairy_queen, aes(sample = sodium)) +
ggtitle("Dairy Queen") +
geom_line(stat = "qq")
subway <- fastfood %>%
filter(restaurant == "Subway")
ggplot(data = subway, aes(sample = sodium)) +
ggtitle("Subway") +
geom_line(stat = "qq")
taco_bell <- fastfood %>%
filter(restaurant == "Taco Bell")
ggplot(data = taco_bell, aes(sample = sodium)) +
ggtitle("Taco Bell") +
geom_line(stat = "qq")
### Answer: The restaurant with the closest line graph to expected normal distribution is Burger King. Therefore, Burger King’s distribution is the closest to normal.
ggplot(data = dairy_queen, aes(sample = total_carb)) +
geom_line(stat = "qq")
ggplot(data = dairy_queen, aes(x = total_carb)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
### Answer: The variable is right-skewed.