knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(infer)
table (yrbss$text_while_driving_30d)
##
## 0 1-2 10-19 20-29 3-5
## 4792 925 373 298 493
## 30 6-9 did not drive
## 827 311 4646
yrbss %>%filter(text_while_driving_30d =="30" & helmet_12m =="never") %>%nrow()/nrow(yrbss)
## [1] 0.03408673
0= 4792, 1-2 = 925, 6-9 = 311, 10-19 = 373, 20-29 = 298, 30 = 827, DND = 4646
The proportion is 0.034
p <-yrbss %>%filter(text_while_driving_30d =="30" & yrbss$helmet_12m =="never") %>%nrow()/nrow(yrbss)
ME<-1.96*sqrt(p*(1-p)/nrow(yrbss))
ME
## [1] 0.003051546
ME=0.003
table(yrbss$physically_active_7d)
##
## 0 1 2 3 4 5 6 7
## 2172 962 1270 1451 1265 1728 840 3622
n1 <-yrbss %>%filter( physically_active_7d > 6) %>%nrow(); n1
## [1] 3622
n = nrow(yrbss); n
## [1] 13583
p <- n1/n; p
## [1] 0.2666569
ME<-1.96*sqrt(p*(1-p)/nrow(yrbss))
ME
## [1] 0.007436836
c(p - ME, p + ME)
## [1] 0.2592200 0.2740937
table(yrbss$strength_training_7d)
##
## 0 1 2 3 4 5 6 7
## 3632 1012 1305 1468 1059 1333 513 2085
n1 <-yrbss %>%filter( strength_training_7d > 3) %>%nrow(); n1
## [1] 4990
n = nrow(yrbss); n
## [1] 13583
p <- n1/n; p
## [1] 0.367371
ME<-1.96*sqrt(p*(1-p)/nrow(yrbss))
ME
## [1] 0.008107467
c(p - ME, p + ME)
## [1] 0.3592635 0.3754784
n <- 1000
p <- seq(from = 0, to = 1, by = 0.01)
me <- 2 * sqrt(p * (1 - p)/n)
dd <- data.frame(p = p, me = me)
ggplot(data = dd, aes(x = p, y = me)) +
geom_line() +
labs(x = "Population Proportion", y = "Margin of Error")
n <- 10
p <- seq(from = 0, to = 1, by = 0.01)
me <- 2 * sqrt(p * (1 - p)/n)
dd <- data.frame(p = p, me = me)
ggplot(data = dd, aes(x = p, y = me)) +
geom_line() +
labs(x = "Population Proportion", y = "Margin of Error")
## Exercise 5
The ME seems to increase with the population increase. At 50% it begins to decrease
n <- 300
p <- seq(from = 0, to = 1, by = 0.01)
me <- 2 * sqrt(p * (1 - p)/n)
dd <- data.frame(p = p, me = me)
ggplot(data = dd, aes(x = p, y = me)) +
geom_line() +
labs(x = "Population Proportion", y = "Margin of Error")
The shape appears to be mostly normal
p<-0.1
n<-300
(p*(1-p)/n)^.5
## [1] 0.01732051
.1-(p*(1-p)/n)^.5
## [1] 0.08267949
.1+(p*(1-p)/n)^.5
## [1] 0.1173205
p<-0.1
n<-400
(p*(1-p)/n)^.5
## [1] 0.015
.1-(p*(1-p)/n)^.5
## [1] 0.085
.1+(p*(1-p)/n)^.5
## [1] 0.115
As n increases, p becomes tighter
sleep <- yrbss %>%
filter(school_night_hours_sleep == "10+")
strengthTraining <- yrbss %>%
mutate(text_ind = ifelse(strength_training_7d == "7", "yes", "no"))
strengthTraining %>%
filter(text_ind != "") %>%
specify(response = text_ind, success = "yes") %>%
generate(reps = 1000, type = "bootstrap") %>%
calculate(stat = "prop") %>%
get_ci(level = 0.95)
Type 1 is a false positive, so we can reject the null hypothesis.
ME= 1.96SE = 1.96 (p(1-p)/n)^0.5